12 лет назад · 332ac17ef5
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1104,6 +1104,7 @@ struct sched_dl_entity {
 
															 	u64 dl_runtime;		/* maximum runtime for each instance	*/
														
 
															 	u64 dl_deadline;	/* relative deadline of each instance	*/
														
 
															 	u64 dl_period;		/* separation of two instances (period) */
														
 
															+	u64 dl_bw;		/* dl_runtime / dl_deadline		*/
														
 
															 	/*
														
 
															 	 * Actual scheduling parameters. Initialized with the values above,
														
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -81,6 +81,15 @@ static inline unsigned int get_sysctl_timer_migration(void)
 
															 extern unsigned int sysctl_sched_rt_period;
														
 
															 extern int sysctl_sched_rt_runtime;
														
 
															+/*
														
 
															+ *  control SCHED_DEADLINE reservations:
														
 
															+ *
														
 
															+ *  /proc/sys/kernel/sched_dl_period_us
														
 
															+ *  /proc/sys/kernel/sched_dl_runtime_us
														
 
															+ */
														
 
															+extern unsigned int sysctl_sched_dl_period;
														
 
															+extern int sysctl_sched_dl_runtime;
														
 
															+
														
 
															 #ifdef CONFIG_CFS_BANDWIDTH
														
 
															 extern unsigned int sysctl_sched_cfs_bandwidth_slice;
														
 
															 #endif
														
@@ -99,4 +108,8 @@ extern int sched_rt_handler(struct ctl_table *table, int write,
 
															 		void __user *buffer, size_t *lenp,
														
 
															 		loff_t *ppos);
														
 
															+int sched_dl_handler(struct ctl_table *table, int write,
														
 
															+		void __user *buffer, size_t *lenp,
														
 
															+		loff_t *ppos);
														
 
															+
														
 
															 #endif /* _SCHED_SYSCTL_H */
														
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -296,6 +296,15 @@ __read_mostly int scheduler_running;
 
															  */
														
 
															 int sysctl_sched_rt_runtime = 950000;
														
 
															+/*
														
 
															+ * Maximum bandwidth available for all -deadline tasks and groups
														
 
															+ * (if group scheduling is configured) on each CPU.
														
 
															+ *
														
 
															+ * default: 5%
														
 
															+ */
														
 
															+unsigned int sysctl_sched_dl_period = 1000000;
														
 
															+int sysctl_sched_dl_runtime = 50000;
														
 
															+
														
 
															 /*
														
@@ -1856,6 +1865,111 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 
															 	return 0;
														
 
															 }
														
 
															+unsigned long to_ratio(u64 period, u64 runtime)
														
 
															+{
														
 
															+	if (runtime == RUNTIME_INF)
														
 
															+		return 1ULL << 20;
														
 
															+
														
 
															+	/*
														
 
															+	 * Doing this here saves a lot of checks in all
														
 
															+	 * the calling paths, and returning zero seems
														
 
															+	 * safe for them anyway.
														
 
															+	 */
														
 
															+	if (period == 0)
														
 
															+		return 0;
														
 
															+
														
 
															+	return div64_u64(runtime << 20, period);
														
 
															+}
														
 
															+
														
 
															+#ifdef CONFIG_SMP
														
 
															+inline struct dl_bw *dl_bw_of(int i)
														
 
															+{
														
 
															+	return &cpu_rq(i)->rd->dl_bw;
														
 
															+}
														
 
															+
														
 
															+static inline int __dl_span_weight(struct rq *rq)
														
 
															+{
														
 
															+	return cpumask_weight(rq->rd->span);
														
 
															+}
														
 
															+#else
														
 
															+inline struct dl_bw *dl_bw_of(int i)
														
 
															+{
														
 
															+	return &cpu_rq(i)->dl.dl_bw;
														
 
															+}
														
 
															+
														
 
															+static inline int __dl_span_weight(struct rq *rq)
														
 
															+{
														
 
															+	return 1;
														
 
															+}
														
 
															+#endif
														
 
															+
														
 
															+static inline
														
 
															+void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
														
 
															+{
														
 
															+	dl_b->total_bw -= tsk_bw;
														
 
															+}
														
 
															+
														
 
															+static inline
														
 
															+void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
														
 
															+{
														
 
															+	dl_b->total_bw += tsk_bw;
														
 
															+}
														
 
															+
														
 
															+static inline
														
 
															+bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
														
 
															+{
														
 
															+	return dl_b->bw != -1 &&
														
 
															+	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
														
 
															+}
														
 
															+
														
 
															+/*
														
 
															+ * We must be sure that accepting a new task (or allowing changing the
														
 
															+ * parameters of an existing one) is consistent with the bandwidth
														
 
															+ * constraints. If yes, this function also accordingly updates the currently
														
 
															+ * allocated bandwidth to reflect the new situation.
														
 
															+ *
														
 
															+ * This function is called while holding p's rq->lock.
														
 
															+ */
														
 
															+static int dl_overflow(struct task_struct *p, int policy,
														
 
															+		       const struct sched_attr *attr)
														
 
															+{
														
 
															+
														
 
															+	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
														
 
															+	u64 period = attr->sched_period;
														
 
															+	u64 runtime = attr->sched_runtime;
														
 
															+	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
														
 
															+	int cpus = __dl_span_weight(task_rq(p));
														
 
															+	int err = -1;
														
 
															+
														
 
															+	if (new_bw == p->dl.dl_bw)
														
 
															+		return 0;
														
 
															+
														
 
															+	/*
														
 
															+	 * Either if a task, enters, leave, or stays -deadline but changes
														
 
															+	 * its parameters, we may need to update accordingly the total
														
 
															+	 * allocated bandwidth of the container.
														
 
															+	 */
														
 
															+	raw_spin_lock(&dl_b->lock);
														
 
															+	if (dl_policy(policy) && !task_has_dl_policy(p) &&
														
 
															+	    !__dl_overflow(dl_b, cpus, 0, new_bw)) {
														
 
															+		__dl_add(dl_b, new_bw);
														
 
															+		err = 0;
														
 
															+	} else if (dl_policy(policy) && task_has_dl_policy(p) &&
														
 
															+		   !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
														
 
															+		__dl_clear(dl_b, p->dl.dl_bw);
														
 
															+		__dl_add(dl_b, new_bw);
														
 
															+		err = 0;
														
 
															+	} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
														
 
															+		__dl_clear(dl_b, p->dl.dl_bw);
														
 
															+		err = 0;
														
 
															+	}
														
 
															+	raw_spin_unlock(&dl_b->lock);
														
 
															+
														
 
															+	return err;
														
 
															+}
														
 
															+
														
 
															+extern void init_dl_bw(struct dl_bw *dl_b);
														
 
															+
														
 
															 /*
														
 
															  * wake_up_new_task - wake up a newly created task for the first time.
														
 
															  *
														
@@ -3053,6 +3167,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
 
															 	dl_se->dl_deadline = attr->sched_deadline;
														
 
															 	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
														
 
															 	dl_se->flags = attr->sched_flags;
														
 
															+	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
														
 
															 	dl_se->dl_throttled = 0;
														
 
															 	dl_se->dl_new = 1;
														
 
															 }
														
@@ -3101,7 +3216,9 @@ __getparam_dl(struct task_struct *p, struct sched_attr *attr)
 
															  * This function validates the new parameters of a -deadline task.
														
 
															  * We ask for the deadline not being zero, and greater or equal
														
 
															  * than the runtime, as well as the period of being zero or
														
 
															- * greater than deadline.
														
 
															+ * greater than deadline. Furthermore, we have to be sure that
														
 
															+ * user parameters are above the internal resolution (1us); we
														
 
															+ * check sched_runtime only since it is always the smaller one.
														
 
															  */
														
 
															 static bool
														
 
															 __checkparam_dl(const struct sched_attr *attr)
														
@@ -3109,7 +3226,8 @@ __checkparam_dl(const struct sched_attr *attr)
 
															 	return attr && attr->sched_deadline != 0 &&
														
 
															 		(attr->sched_period == 0 ||
														
 
															 		(s64)(attr->sched_period   - attr->sched_deadline) >= 0) &&
														
 
															-		(s64)(attr->sched_deadline - attr->sched_runtime ) >= 0;
														
 
															+		(s64)(attr->sched_deadline - attr->sched_runtime ) >= 0  &&
														
 
															+		attr->sched_runtime >= (2 << (DL_SCALE - 1));
														
 
															 }
														
 
															 /*
														
@@ -3250,8 +3368,8 @@ static int __sched_setscheduler(struct task_struct *p,
 
															 	}
														
 
															 change:
														
 
															-#ifdef CONFIG_RT_GROUP_SCHED
														
 
															 	if (user) {
														
 
															+#ifdef CONFIG_RT_GROUP_SCHED
														
 
															 		/*
														
 
															 		 * Do not allow realtime tasks into groups that have no runtime
														
 
															 		 * assigned.
														
@@ -3262,8 +3380,33 @@ static int __sched_setscheduler(struct task_struct *p,
 
															 			task_rq_unlock(rq, p, &flags);
														
 
															 			return -EPERM;
														
 
															 		}
														
 
															-	}
														
 
															 #endif
														
 
															+#ifdef CONFIG_SMP
														
 
															+		if (dl_bandwidth_enabled() && dl_policy(policy)) {
														
 
															+			cpumask_t *span = rq->rd->span;
														
 
															+			cpumask_t act_affinity;
														
 
															+
														
 
															+			/*
														
 
															+			 * cpus_allowed mask is statically initialized with
														
 
															+			 * CPU_MASK_ALL, span is instead dynamic. Here we
														
 
															+			 * compute the "dynamic" affinity of a task.
														
 
															+			 */
														
 
															+			cpumask_and(&act_affinity, &p->cpus_allowed,
														
 
															+				    cpu_active_mask);
														
 
															+
														
 
															+			/*
														
 
															+			 * Don't allow tasks with an affinity mask smaller than
														
 
															+			 * the entire root_domain to become SCHED_DEADLINE. We
														
 
															+			 * will also fail if there's no bandwidth available.
														
 
															+			 */
														
 
															+			if (!cpumask_equal(&act_affinity, span) ||
														
 
															+					   rq->rd->dl_bw.bw == 0) {
														
 
															+				task_rq_unlock(rq, p, &flags);
														
 
															+				return -EPERM;
														
 
															+			}
														
 
															+		}
														
 
															+#endif
														
 
															+	}
														
 
															 	/* recheck policy now with rq lock held */
														
 
															 	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
														
@@ -3271,6 +3414,18 @@ static int __sched_setscheduler(struct task_struct *p,
 
															 		task_rq_unlock(rq, p, &flags);
														
 
															 		goto recheck;
														
 
															 	}
														
 
															+
														
 
															+	/*
														
 
															+	 * If setscheduling to SCHED_DEADLINE (or changing the parameters
														
 
															+	 * of a SCHED_DEADLINE task) we need to check if enough bandwidth
														
 
															+	 * is available.
														
 
															+	 */
														
 
															+	if ((dl_policy(policy) || dl_task(p)) &&
														
 
															+	    dl_overflow(p, policy, attr)) {
														
 
															+		task_rq_unlock(rq, p, &flags);
														
 
															+		return -EBUSY;
														
 
															+	}
														
 
															+
														
 
															 	on_rq = p->on_rq;
														
 
															 	running = task_current(rq, p);
														
 
															 	if (on_rq)
														
@@ -3705,6 +3860,24 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 
															 	if (retval)
														
 
															 		goto out_unlock;
														
 
															+	/*
														
 
															+	 * Since bandwidth control happens on root_domain basis,
														
 
															+	 * if admission test is enabled, we only admit -deadline
														
 
															+	 * tasks allowed to run on all the CPUs in the task's
														
 
															+	 * root_domain.
														
 
															+	 */
														
 
															+#ifdef CONFIG_SMP
														
 
															+	if (task_has_dl_policy(p)) {
														
 
															+		const struct cpumask *span = task_rq(p)->rd->span;
														
 
															+
														
 
															+		if (dl_bandwidth_enabled() &&
														
 
															+		    !cpumask_equal(in_mask, span)) {
														
 
															+			retval = -EBUSY;
														
 
															+			goto out_unlock;
														
 
															+		}
														
 
															+	}
														
 
															+#endif
														
 
															+
														
 
															 	cpuset_cpus_allowed(p, cpus_allowed);
														
 
															 	cpumask_and(new_mask, in_mask, cpus_allowed);
														
 
															 again:
														
@@ -4358,6 +4531,42 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
														
 
															+/*
														
 
															+ * When dealing with a -deadline task, we have to check if moving it to
														
 
															+ * a new CPU is possible or not. In fact, this is only true iff there
														
 
															+ * is enough bandwidth available on such CPU, otherwise we want the
														
 
															+ * whole migration progedure to fail over.
														
 
															+ */
														
 
															+static inline
														
 
															+bool set_task_cpu_dl(struct task_struct *p, unsigned int cpu)
														
 
															+{
														
 
															+	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
														
 
															+	struct dl_bw *cpu_b = dl_bw_of(cpu);
														
 
															+	int ret = 1;
														
 
															+	u64 bw;
														
 
															+
														
 
															+	if (dl_b == cpu_b)
														
 
															+		return 1;
														
 
															+
														
 
															+	raw_spin_lock(&dl_b->lock);
														
 
															+	raw_spin_lock(&cpu_b->lock);
														
 
															+
														
 
															+	bw = cpu_b->bw * cpumask_weight(cpu_rq(cpu)->rd->span);
														
 
															+	if (dl_bandwidth_enabled() &&
														
 
															+	    bw < cpu_b->total_bw + p->dl.dl_bw) {
														
 
															+		ret = 0;
														
 
															+		goto unlock;
														
 
															+	}
														
 
															+	dl_b->total_bw -= p->dl.dl_bw;
														
 
															+	cpu_b->total_bw += p->dl.dl_bw;
														
 
															+
														
 
															+unlock:
														
 
															+	raw_spin_unlock(&cpu_b->lock);
														
 
															+	raw_spin_unlock(&dl_b->lock);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 /*
														
 
															  * Move (not current) task off this cpu, onto dest cpu. We're doing
														
 
															  * this because either it can't run here any more (set_cpus_allowed()
														
@@ -4389,6 +4598,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 
															 	if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
														
 
															 		goto fail;
														
 
															+	/*
														
 
															+	 * If p is -deadline, proceed only if there is enough
														
 
															+	 * bandwidth available on dest_cpu
														
 
															+	 */
														
 
															+	if (unlikely(dl_task(p)) && !set_task_cpu_dl(p, dest_cpu))
														
 
															+		goto fail;
														
 
															+
														
 
															 	/*
														
 
															 	 * If we're not on a rq, the next wake-up will ensure we're
														
 
															 	 * placed properly.
														
@@ -5128,6 +5344,8 @@ static int init_rootdomain(struct root_domain *rd)
 
															 	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
														
 
															 		goto free_dlo_mask;
														
 
															+	init_dl_bw(&rd->dl_bw);
														
 
															+
														
 
															 	if (cpupri_init(&rd->cpupri) != 0)
														
 
															 		goto free_rto_mask;
														
 
															 	return 0;
														
@@ -6557,13 +6775,15 @@ void __init sched_init(void)
 
															 #endif /* CONFIG_CPUMASK_OFFSTACK */
														
 
															 	}
														
 
															+	init_rt_bandwidth(&def_rt_bandwidth,
														
 
															+			global_rt_period(), global_rt_runtime());
														
 
															+	init_dl_bandwidth(&def_dl_bandwidth,
														
 
															+			global_dl_period(), global_dl_runtime());
														
 
															+
														
 
															 #ifdef CONFIG_SMP
														
 
															 	init_defrootdomain();
														
 
															 #endif
														
 
															-	init_rt_bandwidth(&def_rt_bandwidth,
														
 
															-			global_rt_period(), global_rt_runtime());
														
 
															-
														
 
															 #ifdef CONFIG_RT_GROUP_SCHED
														
 
															 	init_rt_bandwidth(&root_task_group.rt_bandwidth,
														
 
															 			global_rt_period(), global_rt_runtime());
														
@@ -6966,16 +7186,6 @@ void sched_move_task(struct task_struct *tsk)
 
															 }
														
 
															 #endif /* CONFIG_CGROUP_SCHED */
														
 
															-#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH)
														
 
															-static unsigned long to_ratio(u64 period, u64 runtime)
														
 
															-{
														
 
															-	if (runtime == RUNTIME_INF)
														
 
															-		return 1ULL << 20;
														
 
															-
														
 
															-	return div64_u64(runtime << 20, period);
														
 
															-}
														
 
															-#endif
														
 
															-
														
 
															 #ifdef CONFIG_RT_GROUP_SCHED
														
 
															 /*
														
 
															  * Ensure that the real time constraints are schedulable.
														
@@ -7149,10 +7359,48 @@ static long sched_group_rt_period(struct task_group *tg)
 
															 	do_div(rt_period_us, NSEC_PER_USEC);
														
 
															 	return rt_period_us;
														
 
															 }
														
 
															+#endif /* CONFIG_RT_GROUP_SCHED */
														
 
															+/*
														
 
															+ * Coupling of -rt and -deadline bandwidth.
														
 
															+ *
														
 
															+ * Here we check if the new -rt bandwidth value is consistent
														
 
															+ * with the system settings for the bandwidth available
														
 
															+ * to -deadline tasks.
														
 
															+ *
														
 
															+ * IOW, we want to enforce that
														
 
															+ *
														
 
															+ *   rt_bandwidth + dl_bandwidth <= 100%
														
 
															+ *
														
 
															+ * is always true.
														
 
															+ */
														
 
															+static bool __sched_rt_dl_global_constraints(u64 rt_bw)
														
 
															+{
														
 
															+	unsigned long flags;
														
 
															+	u64 dl_bw;
														
 
															+	bool ret;
														
 
															+
														
 
															+	raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock, flags);
														
 
															+	if (global_rt_runtime() == RUNTIME_INF ||
														
 
															+	    global_dl_runtime() == RUNTIME_INF) {
														
 
															+		ret = true;
														
 
															+		goto unlock;
														
 
															+	}
														
 
															+
														
 
															+	dl_bw = to_ratio(def_dl_bandwidth.dl_period,
														
 
															+			 def_dl_bandwidth.dl_runtime);
														
 
															+
														
 
															+	ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
														
 
															+unlock:
														
 
															+	raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock, flags);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+#ifdef CONFIG_RT_GROUP_SCHED
														
 
															 static int sched_rt_global_constraints(void)
														
 
															 {
														
 
															-	u64 runtime, period;
														
 
															+	u64 runtime, period, bw;
														
 
															 	int ret = 0;
														
 
															 	if (sysctl_sched_rt_period <= 0)
														
@@ -7167,6 +7415,10 @@ static int sched_rt_global_constraints(void)
 
															 	if (runtime > period && runtime != RUNTIME_INF)
														
 
															 		return -EINVAL;
														
 
															+	bw = to_ratio(period, runtime);
														
 
															+	if (!__sched_rt_dl_global_constraints(bw))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															 	mutex_lock(&rt_constraints_mutex);
														
 
															 	read_lock(&tasklist_lock);
														
 
															 	ret = __rt_schedulable(NULL, 0, 0);
														
@@ -7189,19 +7441,19 @@ static int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
 
															 static int sched_rt_global_constraints(void)
														
 
															 {
														
 
															 	unsigned long flags;
														
 
															-	int i;
														
 
															+	int i, ret = 0;
														
 
															+	u64 bw;
														
 
															 	if (sysctl_sched_rt_period <= 0)
														
 
															 		return -EINVAL;
														
 
															-	/*
														
 
															-	 * There's always some RT tasks in the root group
														
 
															-	 * -- migration, kstopmachine etc..
														
 
															-	 */
														
 
															-	if (sysctl_sched_rt_runtime == 0)
														
 
															-		return -EBUSY;
														
 
															-
														
 
															 	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
														
 
															+	bw = to_ratio(global_rt_period(), global_rt_runtime());
														
 
															+	if (!__sched_rt_dl_global_constraints(bw)) {
														
 
															+		ret = -EINVAL;
														
 
															+		goto unlock;
														
 
															+	}
														
 
															+
														
 
															 	for_each_possible_cpu(i) {
														
 
															 		struct rt_rq *rt_rq = &cpu_rq(i)->rt;
														
@@ -7209,12 +7461,93 @@ static int sched_rt_global_constraints(void)
 
															 		rt_rq->rt_runtime = global_rt_runtime();
														
 
															 		raw_spin_unlock(&rt_rq->rt_runtime_lock);
														
 
															 	}
														
 
															+unlock:
														
 
															 	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
														
 
															-	return 0;
														
 
															+	return ret;
														
 
															 }
														
 
															 #endif /* CONFIG_RT_GROUP_SCHED */
														
 
															+/*
														
 
															+ * Coupling of -dl and -rt bandwidth.
														
 
															+ *
														
 
															+ * Here we check, while setting the system wide bandwidth available
														
 
															+ * for -dl tasks and groups, if the new values are consistent with
														
 
															+ * the system settings for the bandwidth available to -rt entities.
														
 
															+ *
														
 
															+ * IOW, we want to enforce that
														
 
															+ *
														
 
															+ *   rt_bandwidth + dl_bandwidth <= 100%
														
 
															+ *
														
 
															+ * is always true.
														
 
															+ */
														
 
															+static bool __sched_dl_rt_global_constraints(u64 dl_bw)
														
 
															+{
														
 
															+	u64 rt_bw;
														
 
															+	bool ret;
														
 
															+
														
 
															+	raw_spin_lock(&def_rt_bandwidth.rt_runtime_lock);
														
 
															+	if (global_dl_runtime() == RUNTIME_INF ||
														
 
															+	    global_rt_runtime() == RUNTIME_INF) {
														
 
															+		ret = true;
														
 
															+		goto unlock;
														
 
															+	}
														
 
															+
														
 
															+	rt_bw = to_ratio(ktime_to_ns(def_rt_bandwidth.rt_period),
														
 
															+			 def_rt_bandwidth.rt_runtime);
														
 
															+
														
 
															+	ret = rt_bw + dl_bw <= to_ratio(RUNTIME_INF, RUNTIME_INF);
														
 
															+unlock:
														
 
															+	raw_spin_unlock(&def_rt_bandwidth.rt_runtime_lock);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															+static bool __sched_dl_global_constraints(u64 runtime, u64 period)
														
 
															+{
														
 
															+	if (!period || (runtime != RUNTIME_INF && runtime > period))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															+static int sched_dl_global_constraints(void)
														
 
															+{
														
 
															+	u64 runtime = global_dl_runtime();
														
 
															+	u64 period = global_dl_period();
														
 
															+	u64 new_bw = to_ratio(period, runtime);
														
 
															+	int ret, i;
														
 
															+
														
 
															+	ret = __sched_dl_global_constraints(runtime, period);
														
 
															+	if (ret)
														
 
															+		return ret;
														
 
															+
														
 
															+	if (!__sched_dl_rt_global_constraints(new_bw))
														
 
															+		return -EINVAL;
														
 
															+
														
 
															+	/*
														
 
															+	 * Here we want to check the bandwidth not being set to some
														
 
															+	 * value smaller than the currently allocated bandwidth in
														
 
															+	 * any of the root_domains.
														
 
															+	 *
														
 
															+	 * FIXME: Cycling on all the CPUs is overdoing, but simpler than
														
 
															+	 * cycling on root_domains... Discussion on different/better
														
 
															+	 * solutions is welcome!
														
 
															+	 */
														
 
															+	for_each_possible_cpu(i) {
														
 
															+		struct dl_bw *dl_b = dl_bw_of(i);
														
 
															+
														
 
															+		raw_spin_lock(&dl_b->lock);
														
 
															+		if (new_bw < dl_b->total_bw) {
														
 
															+			raw_spin_unlock(&dl_b->lock);
														
 
															+			return -EBUSY;
														
 
															+		}
														
 
															+		raw_spin_unlock(&dl_b->lock);
														
 
															+	}
														
 
															+
														
 
															+	return 0;
														
 
															+}
														
 
															+
														
 
															 int sched_rr_handler(struct ctl_table *table, int write,
														
 
															 		void __user *buffer, size_t *lenp,
														
 
															 		loff_t *ppos)
														
@@ -7264,6 +7597,60 @@ int sched_rt_handler(struct ctl_table *table, int write,
 
															 	return ret;
														
 
															 }
														
 
															+int sched_dl_handler(struct ctl_table *table, int write,
														
 
															+		void __user *buffer, size_t *lenp,
														
 
															+		loff_t *ppos)
														
 
															+{
														
 
															+	int ret;
														
 
															+	int old_period, old_runtime;
														
 
															+	static DEFINE_MUTEX(mutex);
														
 
															+	unsigned long flags;
														
 
															+
														
 
															+	mutex_lock(&mutex);
														
 
															+	old_period = sysctl_sched_dl_period;
														
 
															+	old_runtime = sysctl_sched_dl_runtime;
														
 
															+
														
 
															+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
														
 
															+
														
 
															+	if (!ret && write) {
														
 
															+		raw_spin_lock_irqsave(&def_dl_bandwidth.dl_runtime_lock,
														
 
															+				      flags);
														
 
															+
														
 
															+		ret = sched_dl_global_constraints();
														
 
															+		if (ret) {
														
 
															+			sysctl_sched_dl_period = old_period;
														
 
															+			sysctl_sched_dl_runtime = old_runtime;
														
 
															+		} else {
														
 
															+			u64 new_bw;
														
 
															+			int i;
														
 
															+
														
 
															+			def_dl_bandwidth.dl_period = global_dl_period();
														
 
															+			def_dl_bandwidth.dl_runtime = global_dl_runtime();
														
 
															+			if (global_dl_runtime() == RUNTIME_INF)
														
 
															+				new_bw = -1;
														
 
															+			else
														
 
															+				new_bw = to_ratio(global_dl_period(),
														
 
															+						  global_dl_runtime());
														
 
															+			/*
														
 
															+			 * FIXME: As above...
														
 
															+			 */
														
 
															+			for_each_possible_cpu(i) {
														
 
															+				struct dl_bw *dl_b = dl_bw_of(i);
														
 
															+
														
 
															+				raw_spin_lock(&dl_b->lock);
														
 
															+				dl_b->bw = new_bw;
														
 
															+				raw_spin_unlock(&dl_b->lock);
														
 
															+			}
														
 
															+		}
														
 
															+
														
 
															+		raw_spin_unlock_irqrestore(&def_dl_bandwidth.dl_runtime_lock,
														
 
															+					   flags);
														
 
															+	}
														
 
															+	mutex_unlock(&mutex);
														
 
															+
														
 
															+	return ret;
														
 
															+}
														
 
															+
														
 
															 #ifdef CONFIG_CGROUP_SCHED
														
 
															 static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
														
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -16,6 +16,8 @@
 
															  */
														
 
															 #include "sched.h"
														
 
															+struct dl_bandwidth def_dl_bandwidth;
														
 
															+
														
 
															 static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
														
 
															 {
														
 
															 	return container_of(dl_se, struct task_struct, dl);
														
@@ -46,6 +48,27 @@ static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 
															 	return dl_rq->rb_leftmost == &dl_se->rb_node;
														
 
															 }
														
 
															+void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
														
 
															+{
														
 
															+	raw_spin_lock_init(&dl_b->dl_runtime_lock);
														
 
															+	dl_b->dl_period = period;
														
 
															+	dl_b->dl_runtime = runtime;
														
 
															+}
														
 
															+
														
 
															+extern unsigned long to_ratio(u64 period, u64 runtime);
														
 
															+
														
 
															+void init_dl_bw(struct dl_bw *dl_b)
														
 
															+{
														
 
															+	raw_spin_lock_init(&dl_b->lock);
														
 
															+	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
														
 
															+	if (global_dl_runtime() == RUNTIME_INF)
														
 
															+		dl_b->bw = -1;
														
 
															+	else
														
 
															+		dl_b->bw = to_ratio(global_dl_period(), global_dl_runtime());
														
 
															+	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
														
 
															+	dl_b->total_bw = 0;
														
 
															+}
														
 
															+
														
 
															 void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)
														
 
															 {
														
 
															 	dl_rq->rb_root = RB_ROOT;
														
@@ -57,6 +80,8 @@ void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)
 
															 	dl_rq->dl_nr_migratory = 0;
														
 
															 	dl_rq->overloaded = 0;
														
 
															 	dl_rq->pushable_dl_tasks_root = RB_ROOT;
														
 
															+#else
														
 
															+	init_dl_bw(&dl_rq->dl_bw);
														
 
															 #endif
														
 
															 }
														
@@ -359,8 +384,9 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
 
															 	 * of anything below microseconds resolution is actually fiction
														
 
															 	 * (but still we want to give the user that illusion >;).
														
 
															 	 */
														
 
															-	left = (pi_se->dl_period >> 10) * (dl_se->runtime >> 10);
														
 
															-	right = ((dl_se->deadline - t) >> 10) * (pi_se->dl_runtime >> 10);
														
 
															+	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
														
 
															+	right = ((dl_se->deadline - t) >> DL_SCALE) *
														
 
															+		(pi_se->dl_runtime >> DL_SCALE);
														
 
															 	return dl_time_before(right, left);
														
 
															 }
														
@@ -911,8 +937,8 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
 
															 	 * In the unlikely case current and p have the same deadline
														
 
															 	 * let us try to decide what's the best thing to do...
														
 
															 	 */
														
 
															-	if ((s64)(p->dl.deadline - rq->curr->dl.deadline) == 0 &&
														
 
															-	    !need_resched())
														
 
															+	if ((p->dl.deadline == rq->curr->dl.deadline) &&
														
 
															+	    !test_tsk_need_resched(rq->curr))
														
 
															 		check_preempt_equal_dl(rq, p);
														
 
															 #endif /* CONFIG_SMP */
														
 
															 }
														
@@ -1000,6 +1026,14 @@ static void task_fork_dl(struct task_struct *p)
 
															 static void task_dead_dl(struct task_struct *p)
														
 
															 {
														
 
															 	struct hrtimer *timer = &p->dl.dl_timer;
														
 
															+	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
														
 
															+
														
 
															+	/*
														
 
															+	 * Since we are TASK_DEAD we won't slip out of the domain!
														
 
															+	 */
														
 
															+	raw_spin_lock_irq(&dl_b->lock);
														
 
															+	dl_b->total_bw -= p->dl.dl_bw;
														
 
															+	raw_spin_unlock_irq(&dl_b->lock);
														
 
															 	hrtimer_cancel(timer);
														
 
															 }
														
@@ -1226,7 +1260,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
 
															 	BUG_ON(task_current(rq, p));
														
 
															 	BUG_ON(p->nr_cpus_allowed <= 1);
														
 
															-	BUG_ON(!p->se.on_rq);
														
 
															+	BUG_ON(!p->on_rq);
														
 
															 	BUG_ON(!dl_task(p));
														
 
															 	return p;
														
@@ -1373,7 +1407,7 @@ static int pull_dl_task(struct rq *this_rq)
 
															 		     dl_time_before(p->dl.deadline,
														
 
															 				    this_rq->dl.earliest_dl.curr))) {
														
 
															 			WARN_ON(p == src_rq->curr);
														
 
															-			WARN_ON(!p->se.on_rq);
														
 
															+			WARN_ON(!p->on_rq);
														
 
															 			/*
														
 
															 			 * Then we pull iff p has actually an earlier
														
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -73,6 +73,13 @@ extern void update_cpu_load_active(struct rq *this_rq);
 
															 #define NICE_0_LOAD		SCHED_LOAD_SCALE
														
 
															 #define NICE_0_SHIFT		SCHED_LOAD_SHIFT
														
 
															+/*
														
 
															+ * Single value that decides SCHED_DEADLINE internal math precision.
														
 
															+ * 10 -> just above 1us
														
 
															+ * 9  -> just above 0.5us
														
 
															+ */
														
 
															+#define DL_SCALE (10)
														
 
															+
														
 
															 /*
														
 
															  * These are the 'tuning knobs' of the scheduler:
														
 
															  */
														
@@ -107,7 +114,7 @@ static inline int task_has_dl_policy(struct task_struct *p)
 
															 	return dl_policy(p->policy);
														
 
															 }
														
 
															-static inline int dl_time_before(u64 a, u64 b)
														
 
															+static inline bool dl_time_before(u64 a, u64 b)
														
 
															 {
														
 
															 	return (s64)(a - b) < 0;
														
 
															 }
														
@@ -115,8 +122,8 @@ static inline int dl_time_before(u64 a, u64 b)
 
															 /*
														
 
															  * Tells if entity @a should preempt entity @b.
														
 
															  */
														
 
															-static inline
														
 
															-int dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
														
 
															+static inline bool
														
 
															+dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
														
 
															 {
														
 
															 	return dl_time_before(a->deadline, b->deadline);
														
 
															 }
														
@@ -136,6 +143,50 @@ struct rt_bandwidth {
 
															 	u64			rt_runtime;
														
 
															 	struct hrtimer		rt_period_timer;
														
 
															 };
														
 
															+/*
														
 
															+ * To keep the bandwidth of -deadline tasks and groups under control
														
 
															+ * we need some place where:
														
 
															+ *  - store the maximum -deadline bandwidth of the system (the group);
														
 
															+ *  - cache the fraction of that bandwidth that is currently allocated.
														
 
															+ *
														
 
															+ * This is all done in the data structure below. It is similar to the
														
 
															+ * one used for RT-throttling (rt_bandwidth), with the main difference
														
 
															+ * that, since here we are only interested in admission control, we
														
 
															+ * do not decrease any runtime while the group "executes", neither we
														
 
															+ * need a timer to replenish it.
														
 
															+ *
														
 
															+ * With respect to SMP, the bandwidth is given on a per-CPU basis,
														
 
															+ * meaning that:
														
 
															+ *  - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
														
 
															+ *  - dl_total_bw array contains, in the i-eth element, the currently
														
 
															+ *    allocated bandwidth on the i-eth CPU.
														
 
															+ * Moreover, groups consume bandwidth on each CPU, while tasks only
														
 
															+ * consume bandwidth on the CPU they're running on.
														
 
															+ * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw
														
 
															+ * that will be shown the next time the proc or cgroup controls will
														
 
															+ * be red. It on its turn can be changed by writing on its own
														
 
															+ * control.
														
 
															+ */
														
 
															+struct dl_bandwidth {
														
 
															+	raw_spinlock_t dl_runtime_lock;
														
 
															+	u64 dl_runtime;
														
 
															+	u64 dl_period;
														
 
															+};
														
 
															+
														
 
															+static inline int dl_bandwidth_enabled(void)
														
 
															+{
														
 
															+	return sysctl_sched_dl_runtime >= 0;
														
 
															+}
														
 
															+
														
 
															+extern struct dl_bw *dl_bw_of(int i);
														
 
															+
														
 
															+struct dl_bw {
														
 
															+	raw_spinlock_t lock;
														
 
															+	u64 bw, total_bw;
														
 
															+};
														
 
															+
														
 
															+static inline u64 global_dl_period(void);
														
 
															+static inline u64 global_dl_runtime(void);
														
 
															 extern struct mutex sched_domains_mutex;
														
@@ -423,6 +474,8 @@ struct dl_rq {
 
															 	 */
														
 
															 	struct rb_root pushable_dl_tasks_root;
														
 
															 	struct rb_node *pushable_dl_tasks_leftmost;
														
 
															+#else
														
 
															+	struct dl_bw dl_bw;
														
 
															 #endif
														
 
															 };
														
@@ -449,6 +502,7 @@ struct root_domain {
 
															 	 */
														
 
															 	cpumask_var_t dlo_mask;
														
 
															 	atomic_t dlo_count;
														
 
															+	struct dl_bw dl_bw;
														
 
															 	/*
														
 
															 	 * The "RT overload" flag: it gets set if a CPU has more than
														
@@ -897,7 +951,18 @@ static inline u64 global_rt_runtime(void)
 
															 	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
														
 
															 }
														
 
															+static inline u64 global_dl_period(void)
														
 
															+{
														
 
															+	return (u64)sysctl_sched_dl_period * NSEC_PER_USEC;
														
 
															+}
														
 
															+
														
 
															+static inline u64 global_dl_runtime(void)
														
 
															+{
														
 
															+	if (sysctl_sched_dl_runtime < 0)
														
 
															+		return RUNTIME_INF;
														
 
															+	return (u64)sysctl_sched_dl_runtime * NSEC_PER_USEC;
														
 
															+}
														
 
															 static inline int task_current(struct rq *rq, struct task_struct *p)
														
 
															 {
														
@@ -1145,6 +1210,7 @@ extern void update_max_interval(void);
 
															 extern void init_sched_dl_class(void);
														
 
															 extern void init_sched_rt_class(void);
														
 
															 extern void init_sched_fair_class(void);
														
 
															+extern void init_sched_dl_class(void);
														
 
															 extern void resched_task(struct task_struct *p);
														
 
															 extern void resched_cpu(int cpu);
														
@@ -1152,8 +1218,12 @@ extern void resched_cpu(int cpu);
 
															 extern struct rt_bandwidth def_rt_bandwidth;
														
 
															 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
														
 
															+extern struct dl_bandwidth def_dl_bandwidth;
														
 
															+extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
														
 
															 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
														
 
															+unsigned long to_ratio(u64 period, u64 runtime);
														
 
															+
														
 
															 extern void update_idle_cpu_load(struct rq *this_rq);
														
 
															 extern void init_task_runnable_average(struct task_struct *p);
														
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -414,6 +414,20 @@ static struct ctl_table kern_table[] = {
 
															 		.mode		= 0644,
														
 
															 		.proc_handler	= sched_rr_handler,
														
 
															 	},
														
 
															+	{
														
 
															+		.procname	= "sched_dl_period_us",
														
 
															+		.data		= &sysctl_sched_dl_period,
														
 
															+		.maxlen		= sizeof(unsigned int),
														
 
															+		.mode		= 0644,
														
 
															+		.proc_handler	= sched_dl_handler,
														
 
															+	},
														
 
															+	{
														
 
															+		.procname	= "sched_dl_runtime_us",
														
 
															+		.data		= &sysctl_sched_dl_runtime,
														
 
															+		.maxlen		= sizeof(int),
														
 
															+		.mode		= 0644,
														
 
															+		.proc_handler	= sched_dl_handler,
														
 
															+	},
														
 
															 #ifdef CONFIG_SCHED_AUTOGROUP
														
 
															 	{
														
 
															 		.procname	= "sched_autogroup_enabled",