当A进程唤醒B进程时,假设都是普通进程,那么将会调用try_to_wake_up()->select_task_rq()->select_task_rq_fair()
- /* * sched_balance_self: balance the current task (running on cpu) in domains * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and * SD_BALANCE_EXEC. * * Balance, ie. select the least loaded group. * * Returns the target CPU number, or the same CPU if no balancing is needed. * * preempt must be disabled. */
- /* A进程给自己或者B进程选择一个CPU运行, * 1: A唤醒B * 2: A fork()出B后让B运行 * 3: A execute()后重新选择自己将要运行的CPU */
- static int
- select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
- {
- struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
- int cpu = smp_processor_id();
- int new_cpu = cpu;
- int want_affine = 0;
- int sync = wake_flags & WF_SYNC;
-
- /* 当A进程唤醒B进程时,从try_to_wake_up()进入本函数,这里会置位SD_BALANCE_WAKE。 */
- if (sd_flag & SD_BALANCE_WAKE) {
- /* B进程被唤醒时希望运行的CPU尽可能离A进程所在CPU近一点 */
- if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
- want_affine = 1;
- new_cpu = prev_cpu;
- record_wakee(p);
- }
-
- rcu_read_lock();
- /* * 如果是A唤醒B模式,则查找同时包含A所在cpu和B睡眠前所在prev_cpu的最低级别的调度域。因为A进程 * 和B进程大概率会有某种数据交换关系,唤醒B时让它们所在的CPU离的近一点会性能最优。 * 否则,查找包含了sd_flag的最高调度域。 */
- for_each_domain(cpu, tmp) {
- if (!(tmp->flags & SD_LOAD_BALANCE))
- continue;
-
- /* * If both cpu and prev_cpu are part of this domain, * cpu is a valid SD_WAKE_AFFINE target. */
- if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
- cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
- affine_sd = tmp;
- break;
- }
-
- if (tmp->flags & sd_flag)
- sd = tmp;
- }
-
- /* 如果是A唤醒B模式,则在同时包含A所在cpu和B睡眠前所在prev_cpu的最低级别的调度域中寻找合适的CPU */
- if (affine_sd) {
- /* * wake_affine()计算A所在CPU和B睡眠前所在CPU的负载值,判断出B进程唤醒时是否 * 需要离A近一点。 */
- if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
- prev_cpu = cpu;
-
- /* 在与prev_cpu共享LLC的CPU中寻找空闲CPU,如果没有找到,则返回prev_cpu。这里将确定 * B进程唤醒后在哪个CPU运行。 */
- new_cpu = select_idle_sibling(p, prev_cpu);
- goto unlock;
- }
-
- /* 到这里,A进程和B进程基本是没有啥亲缘关系的。不用考虑两个进程的Cache亲缘性 */
- while (sd) {
- int load_idx = sd->forkexec_idx;
- struct sched_group *group;
- int weight;
-
- if (!(sd->flags & sd_flag)) {
- sd = sd->child;
- continue;
- }
-
- if (sd_flag & SD_BALANCE_WAKE)
- load_idx = sd->wake_idx;
-
- group = find_idlest_group(sd, p, cpu, load_idx);
- if (!group) {
- sd = sd->child;
- continue;
- }
-
- new_cpu = find_idlest_cpu(group, p, cpu);
- if (new_cpu == -1 || new_cpu == cpu) {
- /* Now try balancing at a lower domain level of cpu */
- sd = sd->child;
- continue;
- }
-
- /* Now try balancing at a lower domain level of new_cpu */
- cpu = new_cpu;
- weight = sd->span_weight;
- sd = NULL;
- for_each_domain(cpu, tmp) {
- if (weight <= tmp->span_weight)
- break;
- if (tmp->flags & sd_flag)
- sd = tmp;
- }
- /* while loop will break here if sd == NULL */
- }
- unlock:
- rcu_read_unlock();
-
- return new_cpu;
- }
- /* * Try and locate an idle CPU in the sched_domain. */
- /* 寻找离target CPU最近的空闲CPU(Cache或者内存距离最近)*/
- static int select_idle_sibling(struct task_struct *p, int target)
- {
- struct sched_domain *sd;
- struct sched_group *sg;
- int i = task_cpu(p);
-
- /* target CPU正好空闲,自己跟自己当然最近*/
- if (idle_cpu(target))
- return target;
-
- /* * If the prevous cpu is cache affine and idle, don't be stupid. */
- /* * p进程所在的CPU跟target CPU有Cache共享关系(SMT,或者MC层才有这个关系),并且是空闲的,那就用它了。 * Cache共享说明距离很近了 */
- if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
- return i;
-
- /* * Otherwise, iterate the domains and find an elegible idle cpu. */
- /* * 在与target CPU有LLC Cache共享关系的调度域中寻找空闲CPU。注意,在X86体系中只有SMT和MC层的调度域才有Cache共享。 */
- sd = rcu_dereference(per_cpu(sd_llc, target));
- /* 在我的机器上是按MC,SMT调度域顺序遍历 */
- for_each_lower_domain(sd) {
- sg = sd->groups;
- do {
- if (!cpumask_intersects(sched_group_cpus(sg),
- tsk_cpus_allowed(p)))
- goto next;
-
- /* 调度组内所有CPU都是空闲状态,才能选定 */
- for_each_cpu(i, sched_group_cpus(sg)) {
- if (i == target || !idle_cpu(i))
- goto next;
- }
-
- /* 选择全部CPU都空闲的调度组中第一个CPU*/
- target = cpumask_first_and(sched_group_cpus(sg),
- tsk_cpus_allowed(p));
- goto done;
- next:
- sg = sg->next;
- } while (sg != sd->groups);
- }
- done:
- return target;
- }
调用execve()系统调用时
- /* * sched_exec - execve() is a valuable balancing opportunity, because at * this point the task has the smallest effective memory and cache footprint. */
- void sched_exec(void)
- {
- struct task_struct *p = current;
- unsigned long flags;
- int dest_cpu;
-
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- /* 选择最合适的CPU,这里由于进程execve()后,之前的Cache就无意义了,因此选择目标CPU不用考虑Cache距离 */
- dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), SD_BALANCE_EXEC, 0);
- if (dest_cpu == smp_processor_id())
- goto unlock;
-
- if (likely(cpu_active(dest_cpu))) {
- struct migration_arg arg = { p, dest_cpu };
-
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
- stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
- return;
- }
- unlock:
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
- }
fork的子进程第一次被调度运行时
- do_fork()->wake_up_new_task()
-
- /* * wake_up_new_task - wake up a newly created task for the first time. * * This function will do some initial scheduler statistics housekeeping * that must be done for every newly created context, then puts the task * on the runqueue and wakes it. */
- void wake_up_new_task(struct task_struct *p)
- {
- unsigned long flags;
- struct rq *rq;
-
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- #ifdef CONFIG_SMP
- /* * Fork balancing, do it here and not earlier because: * - cpus_allowed can change in the fork path * - any previously selected cpu might disappear through hotplug */
- /* 选择最合适的CPU,这里由于进程execve()后,之前的Cache就无意义了,因此选择目标CPU不用考虑Cache距离 */
- set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
- #endif
-
- /* Initialize new task's runnable average */
- init_task_runnable_average(p);
- rq = __task_rq_lock(p);
- activate_task(rq, p, 0);
- p->on_rq = TASK_ON_RQ_QUEUED;
- trace_sched_wakeup_new(p, true);
- check_preempt_curr(rq, p, WF_FORK);
- #ifdef CONFIG_SMP
- if (p->sched_class->task_woken)
- p->sched_class->task_woken(rq, p);
- #endif
- task_rq_unlock(rq, p, &flags);
- }
SMP负载均衡模型的配置
可以在/proc/sys/kernel/sched_domain/cpuX/中可以对指定CPU所在不同层的调度域进行设置 (编辑:晋中站长网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|