Lines Matching +full:cpu +full:- +full:capacity

1 // SPDX-License-Identifier: GPL-2.0
43 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, in sched_domain_debug_one() argument
46 struct sched_group *group = sd->groups; in sched_domain_debug_one()
47 unsigned long flags = sd->flags; in sched_domain_debug_one()
52 printk(KERN_DEBUG "%*s domain-%d: ", level, "", level); in sched_domain_debug_one()
54 cpumask_pr_args(sched_domain_span(sd)), sd->name); in sched_domain_debug_one()
56 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { in sched_domain_debug_one()
57 printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu); in sched_domain_debug_one()
59 if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) { in sched_domain_debug_one()
60 printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu); in sched_domain_debug_one()
67 if ((meta_flags & SDF_SHARED_CHILD) && sd->child && in sched_domain_debug_one()
68 !(sd->child->flags & flag)) in sched_domain_debug_one()
72 if ((meta_flags & SDF_SHARED_PARENT) && sd->parent && in sched_domain_debug_one()
73 !(sd->parent->flags & flag)) in sched_domain_debug_one()
92 if (!(sd->flags & SD_OVERLAP) && in sched_domain_debug_one()
102 group->sgc->id, in sched_domain_debug_one()
105 if ((sd->flags & SD_OVERLAP) && in sched_domain_debug_one()
111 if (group->sgc->capacity != SCHED_CAPACITY_SCALE) in sched_domain_debug_one()
112 printk(KERN_CONT " cap=%lu", group->sgc->capacity); in sched_domain_debug_one()
114 if (group == sd->groups && sd->child && in sched_domain_debug_one()
115 !cpumask_equal(sched_domain_span(sd->child), in sched_domain_debug_one()
117 printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n"); in sched_domain_debug_one()
122 group = group->next; in sched_domain_debug_one()
124 if (group != sd->groups) in sched_domain_debug_one()
127 } while (group != sd->groups); in sched_domain_debug_one()
131 printk(KERN_ERR "ERROR: groups don't span domain->span\n"); in sched_domain_debug_one()
133 if (sd->parent && in sched_domain_debug_one()
134 !cpumask_subset(groupmask, sched_domain_span(sd->parent))) in sched_domain_debug_one()
135 printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n"); in sched_domain_debug_one()
139 static void sched_domain_debug(struct sched_domain *sd, int cpu) in sched_domain_debug() argument
147 printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); in sched_domain_debug()
151 printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu); in sched_domain_debug()
154 if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask)) in sched_domain_debug()
157 sd = sd->parent; in sched_domain_debug()
165 # define sched_domain_debug(sd, cpu) do { } while (0) argument
185 if ((sd->flags & SD_DEGENERATE_GROUPS_MASK) && in sd_degenerate()
186 (sd->groups != sd->groups->next)) in sd_degenerate()
190 if (sd->flags & (SD_WAKE_AFFINE)) in sd_degenerate()
199 unsigned long cflags = sd->flags, pflags = parent->flags; in sd_parent_degenerate()
208 if (parent->groups == parent->groups->next) in sd_parent_degenerate()
230 /* EAS is enabled for asymmetric CPU capacity topologies. */ in sched_is_eas_possible()
256 pr_info("rd %*pbl: Checking EAS: frequency-invariant load tracking not yet supported", in sched_is_eas_possible()
267 pr_info("rd %*pbl: Checking EAS, cpufreq policy not set for CPU: %d", in sched_is_eas_possible()
272 gov = policy->governor; in sched_is_eas_possible()
302 return -EPERM; in sched_energy_aware_handler()
306 return -EOPNOTSUPP; in sched_energy_aware_handler()
349 tmp = pd->next; in free_pd()
355 static struct perf_domain *find_pd(struct perf_domain *pd, int cpu) in find_pd() argument
358 if (cpumask_test_cpu(cpu, perf_domain_span(pd))) in find_pd()
360 pd = pd->next; in find_pd()
366 static struct perf_domain *pd_init(int cpu) in pd_init() argument
368 struct em_perf_domain *obj = em_cpu_get(cpu); in pd_init()
373 pr_info("%s: no EM found for CPU%d\n", __func__, cpu); in pd_init()
380 pd->em_pd = obj; in pd_init()
397 em_pd_nr_perf_states(pd->em_pd)); in perf_domain_debug()
398 pd = pd->next; in perf_domain_debug()
437 int cpu = cpumask_first(cpu_map); in build_perf_domains() local
438 struct root_domain *rd = cpu_rq(cpu)->rd; in build_perf_domains()
455 tmp->next = pd; in build_perf_domains()
462 tmp = rd->pd; in build_perf_domains()
463 rcu_assign_pointer(rd->pd, pd); in build_perf_domains()
465 call_rcu(&tmp->rcu, destroy_perf_domain_rcu); in build_perf_domains()
471 tmp = rd->pd; in build_perf_domains()
472 rcu_assign_pointer(rd->pd, NULL); in build_perf_domains()
474 call_rcu(&tmp->rcu, destroy_perf_domain_rcu); in build_perf_domains()
486 cpupri_cleanup(&rd->cpupri); in free_rootdomain()
487 cpudl_cleanup(&rd->cpudl); in free_rootdomain()
488 free_cpumask_var(rd->dlo_mask); in free_rootdomain()
489 free_cpumask_var(rd->rto_mask); in free_rootdomain()
490 free_cpumask_var(rd->online); in free_rootdomain()
491 free_cpumask_var(rd->span); in free_rootdomain()
492 free_pd(rd->pd); in free_rootdomain()
503 if (rq->rd) { in rq_attach_root()
504 old_rd = rq->rd; in rq_attach_root()
506 if (cpumask_test_cpu(rq->cpu, old_rd->online)) in rq_attach_root()
509 cpumask_clear_cpu(rq->cpu, old_rd->span); in rq_attach_root()
516 if (!atomic_dec_and_test(&old_rd->refcount)) in rq_attach_root()
520 atomic_inc(&rd->refcount); in rq_attach_root()
521 rq->rd = rd; in rq_attach_root()
523 cpumask_set_cpu(rq->cpu, rd->span); in rq_attach_root()
524 if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) in rq_attach_root()
532 if (rq->fair_server.dl_server) in rq_attach_root()
533 __dl_server_attach_root(&rq->fair_server, rq); in rq_attach_root()
538 call_rcu(&old_rd->rcu, free_rootdomain); in rq_attach_root()
543 atomic_inc(&rd->refcount); in sched_get_rd()
548 if (!atomic_dec_and_test(&rd->refcount)) in sched_put_rd()
551 call_rcu(&rd->rcu, free_rootdomain); in sched_put_rd()
556 if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL)) in init_rootdomain()
558 if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL)) in init_rootdomain()
560 if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL)) in init_rootdomain()
562 if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) in init_rootdomain()
566 rd->rto_cpu = -1; in init_rootdomain()
567 raw_spin_lock_init(&rd->rto_lock); in init_rootdomain()
568 rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func); in init_rootdomain()
571 rd->visit_cookie = 0; in init_rootdomain()
572 init_dl_bw(&rd->dl_bw); in init_rootdomain()
573 if (cpudl_init(&rd->cpudl) != 0) in init_rootdomain()
576 if (cpupri_init(&rd->cpupri) != 0) in init_rootdomain()
581 cpudl_cleanup(&rd->cpudl); in init_rootdomain()
583 free_cpumask_var(rd->rto_mask); in init_rootdomain()
585 free_cpumask_var(rd->dlo_mask); in init_rootdomain()
587 free_cpumask_var(rd->online); in init_rootdomain()
589 free_cpumask_var(rd->span); in init_rootdomain()
591 return -ENOMEM; in init_rootdomain()
595 * By default the system creates a single root-domain with all CPUs as
632 tmp = sg->next; in free_sched_groups()
634 if (free_sgc && atomic_dec_and_test(&sg->sgc->ref)) in free_sched_groups()
635 kfree(sg->sgc); in free_sched_groups()
637 if (atomic_dec_and_test(&sg->ref)) in free_sched_groups()
648 * dropping group/capacity references, freeing where none remain. in destroy_sched_domain()
650 free_sched_groups(sd->groups, 1); in destroy_sched_domain()
652 if (sd->shared && atomic_dec_and_test(&sd->shared->ref)) in destroy_sched_domain()
653 kfree(sd->shared); in destroy_sched_domain()
662 struct sched_domain *parent = sd->parent; in destroy_sched_domains_rcu()
671 call_rcu(&sd->rcu, destroy_sched_domains_rcu); in destroy_sched_domains()
679 * Also keep a unique ID per domain (we use the first CPU number in the cpumask
695 static void update_top_cache_domain(int cpu) in update_top_cache_domain() argument
699 int id = cpu; in update_top_cache_domain()
702 sd = highest_flag_domain(cpu, SD_SHARE_LLC); in update_top_cache_domain()
706 sds = sd->shared; in update_top_cache_domain()
709 rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); in update_top_cache_domain()
710 per_cpu(sd_llc_size, cpu) = size; in update_top_cache_domain()
711 per_cpu(sd_llc_id, cpu) = id; in update_top_cache_domain()
712 rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds); in update_top_cache_domain()
714 sd = lowest_flag_domain(cpu, SD_CLUSTER); in update_top_cache_domain()
721 * but equals to LLC id on non-Cluster machines. in update_top_cache_domain()
723 per_cpu(sd_share_id, cpu) = id; in update_top_cache_domain()
725 sd = lowest_flag_domain(cpu, SD_NUMA); in update_top_cache_domain()
726 rcu_assign_pointer(per_cpu(sd_numa, cpu), sd); in update_top_cache_domain()
728 sd = highest_flag_domain(cpu, SD_ASYM_PACKING); in update_top_cache_domain()
729 rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd); in update_top_cache_domain()
731 sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL); in update_top_cache_domain()
732 rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd); in update_top_cache_domain()
736 * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
740 cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) in cpu_attach_domain() argument
742 struct rq *rq = cpu_rq(cpu); in cpu_attach_domain()
747 struct sched_domain *parent = tmp->parent; in cpu_attach_domain()
752 tmp->parent = parent->parent; in cpu_attach_domain()
754 if (parent->parent) { in cpu_attach_domain()
755 parent->parent->child = tmp; in cpu_attach_domain()
756 parent->parent->groups->flags = tmp->flags; in cpu_attach_domain()
764 if (parent->flags & SD_PREFER_SIBLING) in cpu_attach_domain()
765 tmp->flags |= SD_PREFER_SIBLING; in cpu_attach_domain()
768 tmp = tmp->parent; in cpu_attach_domain()
773 sd = sd->parent; in cpu_attach_domain()
776 struct sched_group *sg = sd->groups; in cpu_attach_domain()
784 sg->flags = 0; in cpu_attach_domain()
785 } while (sg != sd->groups); in cpu_attach_domain()
787 sd->child = NULL; in cpu_attach_domain()
791 sched_domain_debug(sd, cpu); in cpu_attach_domain()
794 tmp = rq->sd; in cpu_attach_domain()
795 rcu_assign_pointer(rq->sd, sd); in cpu_attach_domain()
796 dirty_sched_domain_sysctl(cpu); in cpu_attach_domain()
799 update_top_cache_domain(cpu); in cpu_attach_domain()
815 * Return the canonical balance CPU for this group, this is the first CPU
832 * Given a node-distance table, for example:
842 * 0 ----- 1
846 * 3 ----- 2
854 * NUMA-2 0-3 0-3 0-3 0-3
855 * groups: {0-1,3},{1-3} {0-2},{0,2-3} {1-3},{0-1,3} {0,2-3},{0-2}
857 * NUMA-1 0-1,3 0-2 1-3 0,2-3
860 * NUMA-0 0 1 2 3
865 * represented multiple times -- hence the "overlap" naming for this part of
869 * domain. For instance Node-0 NUMA-2 would only get groups: 0-1,3 and 1-3.
873 * - the first group of each domain is its child domain; this
874 * gets us the first 0-1,3
875 * - the only uncovered node is 2, who's child domain is 1-3.
877 * However, because of the overlap, computing a unique CPU for each group is
878 * more complicated. Consider for instance the groups of NODE-1 NUMA-2, both
879 * groups include the CPUs of Node-0, while those CPUs would not in fact ever
880 * end up at those groups (they would end up in group: 0-1,3).
903 * 0 ----- 1
907 * 2 ----- 3
913 * not of the same number for each CPU. Consider:
915 * NUMA-2 0-3 0-3
916 * groups: {0-2},{1-3} {1-3},{0-2}
918 * NUMA-1 0-2 0-3 0-3 1-3
920 * NUMA-0 0 1 2 3
938 struct sd_data *sdd = sd->private; in build_balance_mask()
945 sibling = *per_cpu_ptr(sdd->sd, i); in build_balance_mask()
952 if (!sibling->child) in build_balance_mask()
956 if (!cpumask_equal(sg_span, sched_domain_span(sibling->child))) in build_balance_mask()
967 * XXX: This creates per-node group entries; since the load-balancer will
968 * immediately access remote memory to construct this group's load-balance
972 build_group_from_child_sched_domain(struct sched_domain *sd, int cpu) in build_group_from_child_sched_domain() argument
978 GFP_KERNEL, cpu_to_node(cpu)); in build_group_from_child_sched_domain()
984 if (sd->child) { in build_group_from_child_sched_domain()
985 cpumask_copy(sg_span, sched_domain_span(sd->child)); in build_group_from_child_sched_domain()
986 sg->flags = sd->child->flags; in build_group_from_child_sched_domain()
991 atomic_inc(&sg->ref); in build_group_from_child_sched_domain()
999 struct sd_data *sdd = sd->private; in init_overlap_sched_group()
1001 int cpu; in init_overlap_sched_group() local
1004 cpu = cpumask_first(mask); in init_overlap_sched_group()
1006 sg->sgc = *per_cpu_ptr(sdd->sgc, cpu); in init_overlap_sched_group()
1007 if (atomic_inc_return(&sg->sgc->ref) == 1) in init_overlap_sched_group()
1013 * Initialize sgc->capacity such that even if we mess up the in init_overlap_sched_group()
1018 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); in init_overlap_sched_group()
1019 sg->sgc->min_capacity = SCHED_CAPACITY_SCALE; in init_overlap_sched_group()
1020 sg->sgc->max_capacity = SCHED_CAPACITY_SCALE; in init_overlap_sched_group()
1030 while (sibling->child && in find_descended_sibling()
1031 !cpumask_subset(sched_domain_span(sibling->child), in find_descended_sibling()
1033 sibling = sibling->child; in find_descended_sibling()
1040 while (sibling->child && in find_descended_sibling()
1041 cpumask_equal(sched_domain_span(sibling->child), in find_descended_sibling()
1043 sibling = sibling->child; in find_descended_sibling()
1049 build_overlap_sched_groups(struct sched_domain *sd, int cpu) in build_overlap_sched_groups() argument
1054 struct sd_data *sdd = sd->private; in build_overlap_sched_groups()
1060 for_each_cpu_wrap(i, span, cpu) { in build_overlap_sched_groups()
1066 sibling = *per_cpu_ptr(sdd->sd, i); in build_overlap_sched_groups()
1075 * Domains should always include the CPU they're built on, so in build_overlap_sched_groups()
1096 * 0 --- 1 --- 2 --- 3 in build_overlap_sched_groups()
1098 * NUMA-3 0-3 N/A N/A 0-3 in build_overlap_sched_groups()
1099 * groups: {0-2},{1-3} {1-3},{0-2} in build_overlap_sched_groups()
1101 * NUMA-2 0-2 0-3 0-3 1-3 in build_overlap_sched_groups()
1102 * groups: {0-1},{1-3} {0-2},{2-3} {1-3},{0-1} {2-3},{0-2} in build_overlap_sched_groups()
1104 * NUMA-1 0-1 0-2 1-3 2-3 in build_overlap_sched_groups()
1107 * NUMA-0 0 1 2 3 in build_overlap_sched_groups()
1109 * The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the in build_overlap_sched_groups()
1112 if (sibling->child && in build_overlap_sched_groups()
1113 !cpumask_subset(sched_domain_span(sibling->child), span)) in build_overlap_sched_groups()
1116 sg = build_group_from_child_sched_domain(sibling, cpu); in build_overlap_sched_groups()
1128 last->next = sg; in build_overlap_sched_groups()
1130 last->next = first; in build_overlap_sched_groups()
1132 sd->groups = first; in build_overlap_sched_groups()
1139 return -ENOMEM; in build_overlap_sched_groups()
1144 * Package topology (also see the load-balance blurb in fair.c)
1149 * - Simultaneous multithreading (SMT)
1150 * - Multi-Core Cache (MC)
1151 * - Package (PKG)
1157 * sched_domain -> sched_group -> sched_group_capacity
1159 * `-' `-'
1161 * The sched_domains are per-CPU and have a two way link (parent & child) and
1167 * CPU of that sched_domain [*].
1171 * CPU 0 1 2 3 4 5 6 7
1177 * - or -
1179 * PKG 0-7 0-7 0-7 0-7 0-7 0-7 0-7 0-7
1180 * MC 0-3 0-3 0-3 0-3 4-7 4-7 4-7 4-7
1181 * SMT 0-1 0-1 2-3 2-3 4-5 4-5 6-7 6-7
1183 * CPU 0 1 2 3 4 5 6 7
1191 * There are two related construction problems, both require a CPU that
1194 * - The first is the balance_cpu (see should_we_balance() and the
1195 * load-balance blurb in fair.c); for each group we only want 1 CPU to
1198 * - The second is the sched_group_capacity; we want all identical groups
1204 * for each CPU in the hierarchy.
1206 * Therefore computing a unique CPU for each group is trivial (the iteration
1208 * group), we can simply pick the first CPU in each group.
1214 static struct sched_group *get_group(int cpu, struct sd_data *sdd) in get_group() argument
1216 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); in get_group()
1217 struct sched_domain *child = sd->child; in get_group()
1222 cpu = cpumask_first(sched_domain_span(child)); in get_group()
1224 sg = *per_cpu_ptr(sdd->sg, cpu); in get_group()
1225 sg->sgc = *per_cpu_ptr(sdd->sgc, cpu); in get_group()
1228 already_visited = atomic_inc_return(&sg->ref) > 1; in get_group()
1230 WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1)); in get_group()
1239 sg->flags = child->flags; in get_group()
1241 cpumask_set_cpu(cpu, sched_group_span(sg)); in get_group()
1242 cpumask_set_cpu(cpu, group_balance_mask(sg)); in get_group()
1245 sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg)); in get_group()
1246 sg->sgc->min_capacity = SCHED_CAPACITY_SCALE; in get_group()
1247 sg->sgc->max_capacity = SCHED_CAPACITY_SCALE; in get_group()
1254 * covered by the given span, will set each group's ->cpumask correctly,
1255 * and will initialize their ->sgc.
1260 build_sched_groups(struct sched_domain *sd, int cpu) in build_sched_groups() argument
1263 struct sd_data *sdd = sd->private; in build_sched_groups()
1273 for_each_cpu_wrap(i, span, cpu) { in build_sched_groups()
1286 last->next = sg; in build_sched_groups()
1289 last->next = first; in build_sched_groups()
1290 sd->groups = first; in build_sched_groups()
1298 * cpu_capacity indicates the capacity of sched group, which is used while
1305 static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) in init_sched_groups_capacity() argument
1307 struct sched_group *sg = sd->groups; in init_sched_groups_capacity()
1313 int cpu, cores = 0, max_cpu = -1; in init_sched_groups_capacity() local
1315 sg->group_weight = cpumask_weight(sched_group_span(sg)); in init_sched_groups_capacity()
1318 for_each_cpu(cpu, mask) { in init_sched_groups_capacity()
1321 cpumask_andnot(mask, mask, cpu_smt_mask(cpu)); in init_sched_groups_capacity()
1324 sg->cores = cores; in init_sched_groups_capacity()
1326 if (!(sd->flags & SD_ASYM_PACKING)) in init_sched_groups_capacity()
1329 for_each_cpu(cpu, sched_group_span(sg)) { in init_sched_groups_capacity()
1331 max_cpu = cpu; in init_sched_groups_capacity()
1332 else if (sched_asym_prefer(cpu, max_cpu)) in init_sched_groups_capacity()
1333 max_cpu = cpu; in init_sched_groups_capacity()
1335 sg->asym_prefer_cpu = max_cpu; in init_sched_groups_capacity()
1338 sg = sg->next; in init_sched_groups_capacity()
1339 } while (sg != sd->groups); in init_sched_groups_capacity()
1341 if (cpu != group_balance_cpu(sg)) in init_sched_groups_capacity()
1344 update_group_capacity(sd, cpu); in init_sched_groups_capacity()
1349 * Each list entry contains a CPU mask reflecting CPUs that share the same
1350 * capacity.
1356 * Verify whether there is any CPU capacity asymmetry in a given sched domain.
1367 * Count how many unique CPU capacities this domain spans across in asym_cpu_capacity_classify()
1384 /* Some of the available CPU capacity values have not been detected */ in asym_cpu_capacity_classify()
1399 static inline void asym_cpu_capacity_update_data(int cpu) in asym_cpu_capacity_update_data() argument
1401 unsigned long capacity = arch_scale_cpu_capacity(cpu); in asym_cpu_capacity_update_data() local
1406 * Search if capacity already exits. If not, track which the entry in asym_cpu_capacity_update_data()
1410 if (capacity == entry->capacity) in asym_cpu_capacity_update_data()
1412 else if (!insert_entry && capacity > entry->capacity) in asym_cpu_capacity_update_data()
1419 entry->capacity = capacity; in asym_cpu_capacity_update_data()
1421 /* If NULL then the new capacity is the smallest, add last. */ in asym_cpu_capacity_update_data()
1423 list_add_tail_rcu(&entry->link, &asym_cap_list); in asym_cpu_capacity_update_data()
1425 list_add_rcu(&entry->link, &insert_entry->link); in asym_cpu_capacity_update_data()
1427 __cpumask_set_cpu(cpu, cpu_capacity_span(entry)); in asym_cpu_capacity_update_data()
1431 * Build-up/update list of CPUs grouped by their capacities
1433 * with state indicating CPU topology changes.
1438 int cpu; in asym_cpu_capacity_scan() local
1443 for_each_cpu_and(cpu, cpu_possible_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) in asym_cpu_capacity_scan()
1444 asym_cpu_capacity_update_data(cpu); in asym_cpu_capacity_scan()
1448 list_del_rcu(&entry->link); in asym_cpu_capacity_scan()
1449 call_rcu(&entry->rcu, free_asym_cap_entry); in asym_cpu_capacity_scan()
1454 * Only one capacity value has been detected i.e. this system is symmetric. in asym_cpu_capacity_scan()
1459 list_del_rcu(&entry->link); in asym_cpu_capacity_scan()
1460 call_rcu(&entry->rcu, free_asym_cap_entry); in asym_cpu_capacity_scan()
1466 * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
1469 static int default_relax_domain_level = -1;
1486 if (!attr || attr->relax_domain_level < 0) { in set_domain_attribute()
1491 request = attr->relax_domain_level; in set_domain_attribute()
1493 if (sd->level >= request) { in set_domain_attribute()
1495 sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); in set_domain_attribute()
1507 if (!atomic_read(&d->rd->refcount)) in __free_domain_allocs()
1508 free_rootdomain(&d->rd->rcu); in __free_domain_allocs()
1511 free_percpu(d->sd); in __free_domain_allocs()
1528 d->sd = alloc_percpu(struct sched_domain *); in __visit_domain_allocation_hell()
1529 if (!d->sd) in __visit_domain_allocation_hell()
1531 d->rd = alloc_rootdomain(); in __visit_domain_allocation_hell()
1532 if (!d->rd) in __visit_domain_allocation_hell()
1543 static void claim_allocations(int cpu, struct sched_domain *sd) in claim_allocations() argument
1545 struct sd_data *sdd = sd->private; in claim_allocations()
1547 WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); in claim_allocations()
1548 *per_cpu_ptr(sdd->sd, cpu) = NULL; in claim_allocations()
1550 if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref)) in claim_allocations()
1551 *per_cpu_ptr(sdd->sds, cpu) = NULL; in claim_allocations()
1553 if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) in claim_allocations()
1554 *per_cpu_ptr(sdd->sg, cpu) = NULL; in claim_allocations()
1556 if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref)) in claim_allocations()
1557 *per_cpu_ptr(sdd->sgc, cpu) = NULL; in claim_allocations()
1586 * SD_ASYM_PACKING - describes SMT quirks
1598 struct sched_domain *child, int cpu) in sd_init() argument
1600 struct sd_data *sdd = &tl->data; in sd_init()
1601 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); in sd_init()
1609 sched_domains_curr_level = tl->numa_level; in sd_init()
1612 sd_weight = cpumask_weight(tl->mask(cpu)); in sd_init()
1614 if (tl->sd_flags) in sd_init()
1615 sd_flags = (*tl->sd_flags)(); in sd_init()
1646 .name = tl->name, in sd_init()
1650 cpumask_and(sd_span, cpu_map, tl->mask(cpu)); in sd_init()
1653 sd->flags |= asym_cpu_capacity_classify(sd_span, cpu_map); in sd_init()
1655 WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) == in sd_init()
1657 "CPU capacity asymmetry not supported on SMT\n"); in sd_init()
1663 if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child) in sd_init()
1664 sd->child->flags &= ~SD_PREFER_SIBLING; in sd_init()
1666 if (sd->flags & SD_SHARE_CPUCAPACITY) { in sd_init()
1667 sd->imbalance_pct = 110; in sd_init()
1669 } else if (sd->flags & SD_SHARE_LLC) { in sd_init()
1670 sd->imbalance_pct = 117; in sd_init()
1671 sd->cache_nice_tries = 1; in sd_init()
1674 } else if (sd->flags & SD_NUMA) { in sd_init()
1675 sd->cache_nice_tries = 2; in sd_init()
1677 sd->flags &= ~SD_PREFER_SIBLING; in sd_init()
1678 sd->flags |= SD_SERIALIZE; in sd_init()
1679 if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) { in sd_init()
1680 sd->flags &= ~(SD_BALANCE_EXEC | in sd_init()
1687 sd->cache_nice_tries = 1; in sd_init()
1694 if (sd->flags & SD_SHARE_LLC) { in sd_init()
1695 sd->shared = *per_cpu_ptr(sdd->sds, sd_id); in sd_init()
1696 atomic_inc(&sd->shared->ref); in sd_init()
1697 atomic_set(&sd->shared->nr_busy_cpus, sd_weight); in sd_init()
1700 sd->private = sdd; in sd_init()
1706 * Topology list, bottom-up.
1729 for (tl = sched_domain_topology; tl->mask; tl++)
1742 static const struct cpumask *sd_numa_mask(int cpu) in sd_numa_mask() argument
1744 return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; in sd_numa_mask()
1815 * - If the maximum distance between any nodes is 1 hop, the system
1817 * - If for two nodes A and B, located N > 1 hops away from each other,
1870 * O(nr_nodes^2) de-duplicating selection sort -- in order to find the in sched_init_numa()
1953 sched_numa_warn("Node-distance not symmetric"); in sched_init_numa()
2004 WRITE_ONCE(sched_max_numa_distance, sched_domains_numa_distance[nr_levels - 1]); in sched_init_numa()
2047 void sched_update_numa(int cpu, bool online) in sched_update_numa() argument
2051 node = cpu_to_node(cpu); in sched_update_numa()
2053 * Scheduler NUMA topology is updated when the first CPU of a in sched_update_numa()
2054 * node is onlined or the last CPU of a node is offlined. in sched_update_numa()
2063 void sched_domains_numa_masks_set(unsigned int cpu) in sched_domains_numa_masks_set() argument
2065 int node = cpu_to_node(cpu); in sched_domains_numa_masks_set()
2075 cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]); in sched_domains_numa_masks_set()
2080 void sched_domains_numa_masks_clear(unsigned int cpu) in sched_domains_numa_masks_clear() argument
2087 cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); in sched_domains_numa_masks_clear()
2093 * sched_numa_find_closest() - given the NUMA topology, find the cpu
2094 * closest to @cpu from @cpumask.
2095 * cpumask: cpumask to find a cpu from
2096 * cpu: cpu to be close to
2098 * returns: cpu, or nr_cpu_ids when nothing found.
2100 int sched_numa_find_closest(const struct cpumask *cpus, int cpu) in sched_numa_find_closest() argument
2102 int i, j = cpu_to_node(cpu), found = nr_cpu_ids; in sched_numa_find_closest()
2112 cpu = cpumask_any_and(cpus, masks[i][j]); in sched_numa_find_closest()
2113 if (cpu < nr_cpu_ids) { in sched_numa_find_closest()
2114 found = cpu; in sched_numa_find_closest()
2128 int cpu; member
2137 if (cpumask_weight_and(k->cpus, cur_hop[k->node]) <= k->cpu) in hop_cmp()
2140 if (b == k->masks) { in hop_cmp()
2141 k->w = 0; in hop_cmp()
2145 prev_hop = *((struct cpumask ***)b - 1); in hop_cmp()
2146 k->w = cpumask_weight_and(k->cpus, prev_hop[k->node]); in hop_cmp()
2147 if (k->w <= k->cpu) in hop_cmp()
2150 return -1; in hop_cmp()
2154 * sched_numa_find_nth_cpu() - given the NUMA topology, find the Nth closest CPU
2155 * from @cpus to @cpu, taking into account distance
2157 * @cpus: cpumask to find a cpu from
2158 * @cpu: CPU to start searching
2161 * Return: cpu, or nr_cpu_ids when nothing found.
2163 int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) in sched_numa_find_nth_cpu() argument
2165 struct __cmp_key k = { .cpus = cpus, .cpu = cpu }; in sched_numa_find_nth_cpu()
2170 return cpumask_nth_and(cpu, cpus, cpu_online_mask); in sched_numa_find_nth_cpu()
2174 /* CPU-less node entries are uninitialized in sched_domains_numa_masks */ in sched_numa_find_nth_cpu()
2183 hop = hop_masks - k.masks; in sched_numa_find_nth_cpu()
2186 cpumask_nth_and_andnot(cpu - k.w, cpus, k.masks[hop][node], k.masks[hop-1][node]) : in sched_numa_find_nth_cpu()
2187 cpumask_nth_and(cpu, cpus, k.masks[0][node]); in sched_numa_find_nth_cpu()
2195 * sched_numa_hop_mask() - Get the cpumask of CPUs at most @hops hops away from
2204 * read-side section, copy it if required beyond that.
2216 return ERR_PTR(-EINVAL); in sched_numa_hop_mask()
2220 return ERR_PTR(-EBUSY); in sched_numa_hop_mask()
2234 struct sd_data *sdd = &tl->data; in __sdt_alloc()
2236 sdd->sd = alloc_percpu(struct sched_domain *); in __sdt_alloc()
2237 if (!sdd->sd) in __sdt_alloc()
2238 return -ENOMEM; in __sdt_alloc()
2240 sdd->sds = alloc_percpu(struct sched_domain_shared *); in __sdt_alloc()
2241 if (!sdd->sds) in __sdt_alloc()
2242 return -ENOMEM; in __sdt_alloc()
2244 sdd->sg = alloc_percpu(struct sched_group *); in __sdt_alloc()
2245 if (!sdd->sg) in __sdt_alloc()
2246 return -ENOMEM; in __sdt_alloc()
2248 sdd->sgc = alloc_percpu(struct sched_group_capacity *); in __sdt_alloc()
2249 if (!sdd->sgc) in __sdt_alloc()
2250 return -ENOMEM; in __sdt_alloc()
2261 return -ENOMEM; in __sdt_alloc()
2263 *per_cpu_ptr(sdd->sd, j) = sd; in __sdt_alloc()
2268 return -ENOMEM; in __sdt_alloc()
2270 *per_cpu_ptr(sdd->sds, j) = sds; in __sdt_alloc()
2275 return -ENOMEM; in __sdt_alloc()
2277 sg->next = sg; in __sdt_alloc()
2279 *per_cpu_ptr(sdd->sg, j) = sg; in __sdt_alloc()
2284 return -ENOMEM; in __sdt_alloc()
2287 sgc->id = j; in __sdt_alloc()
2290 *per_cpu_ptr(sdd->sgc, j) = sgc; in __sdt_alloc()
2303 struct sd_data *sdd = &tl->data; in __sdt_free()
2308 if (sdd->sd) { in __sdt_free()
2309 sd = *per_cpu_ptr(sdd->sd, j); in __sdt_free()
2310 if (sd && (sd->flags & SD_OVERLAP)) in __sdt_free()
2311 free_sched_groups(sd->groups, 0); in __sdt_free()
2312 kfree(*per_cpu_ptr(sdd->sd, j)); in __sdt_free()
2315 if (sdd->sds) in __sdt_free()
2316 kfree(*per_cpu_ptr(sdd->sds, j)); in __sdt_free()
2317 if (sdd->sg) in __sdt_free()
2318 kfree(*per_cpu_ptr(sdd->sg, j)); in __sdt_free()
2319 if (sdd->sgc) in __sdt_free()
2320 kfree(*per_cpu_ptr(sdd->sgc, j)); in __sdt_free()
2322 free_percpu(sdd->sd); in __sdt_free()
2323 sdd->sd = NULL; in __sdt_free()
2324 free_percpu(sdd->sds); in __sdt_free()
2325 sdd->sds = NULL; in __sdt_free()
2326 free_percpu(sdd->sg); in __sdt_free()
2327 sdd->sg = NULL; in __sdt_free()
2328 free_percpu(sdd->sgc); in __sdt_free()
2329 sdd->sgc = NULL; in __sdt_free()
2335 struct sched_domain *child, int cpu) in build_sched_domain() argument
2337 struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu); in build_sched_domain()
2340 sd->level = child->level + 1; in build_sched_domain()
2341 sched_domain_level_max = max(sched_domain_level_max, sd->level); in build_sched_domain()
2342 child->parent = sd; in build_sched_domain()
2348 child->name, sd->name); in build_sched_domain()
2363 * any two given CPUs at this (non-NUMA) topology level.
2366 const struct cpumask *cpu_map, int cpu) in topology_span_sane() argument
2368 int i = cpu + 1; in topology_span_sane()
2371 if (tl->flags & SDTL_OVERLAP) in topology_span_sane()
2375 * Non-NUMA levels cannot partially overlap - they must be either in topology_span_sane()
2377 * breaking the sched_group lists - i.e. a later get_group() pass in topology_span_sane()
2387 if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) && in topology_span_sane()
2388 cpumask_intersects(tl->mask(cpu), tl->mask(i))) in topology_span_sane()
2406 int i, ret = -ENOMEM; in build_sched_domains()
2429 has_asym |= sd->flags & SD_ASYM_CPUCAPACITY; in build_sched_domains()
2433 if (tl->flags & SDTL_OVERLAP) in build_sched_domains()
2434 sd->flags |= SD_OVERLAP; in build_sched_domains()
2442 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { in build_sched_domains()
2443 sd->span_weight = cpumask_weight(sched_domain_span(sd)); in build_sched_domains()
2444 if (sd->flags & SD_OVERLAP) { in build_sched_domains()
2462 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { in build_sched_domains()
2463 struct sched_domain *child = sd->child; in build_sched_domains()
2465 if (!(sd->flags & SD_SHARE_LLC) && child && in build_sched_domains()
2466 (child->flags & SD_SHARE_LLC)) { in build_sched_domains()
2473 * arbitrary cutoff based two factors -- SMT and in build_sched_domains()
2474 * memory channels. For SMT-2, the intent is to in build_sched_domains()
2476 * SMT-4 or SMT-8 *may* benefit from a different in build_sched_domains()
2490 nr_llcs = sd->span_weight / child->span_weight; in build_sched_domains()
2492 imb = sd->span_weight >> 3; in build_sched_domains()
2496 sd->imb_numa_nr = imb; in build_sched_domains()
2499 top_p = sd->parent; in build_sched_domains()
2500 while (top_p && !(top_p->flags & SD_NUMA)) { in build_sched_domains()
2501 top_p = top_p->parent; in build_sched_domains()
2503 imb_span = top_p ? top_p->span_weight : sd->span_weight; in build_sched_domains()
2505 int factor = max(1U, (sd->span_weight / imb_span)); in build_sched_domains()
2507 sd->imb_numa_nr = imb * factor; in build_sched_domains()
2512 /* Calculate CPU capacity for physical packages and nodes */ in build_sched_domains()
2513 for (i = nr_cpumask_bits-1; i >= 0; i--) { in build_sched_domains()
2517 for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { in build_sched_domains()
2570 * CPU core maps. It is supposed to return 1 if the topology changed
2633 unsigned int cpu = cpumask_any(cpu_map); in detach_destroy_domains() local
2636 if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu))) in detach_destroy_domains()
2700 /* Let the architecture update CPU core mappings: */ in partition_sched_domains_locked()
2702 /* Trigger rebuilding CPU capacity asymmetry data */ in partition_sched_domains_locked()
2728 * its dl_bw->total_bw needs to be cleared. in partition_sched_domains_locked()
2734 rd = cpu_rq(cpumask_any(doms_cur[i]))->rd; in partition_sched_domains_locked()
2739 /* No match - a current sched domain not in new doms_new[] */ in partition_sched_domains_locked()
2760 /* No match - add a new doms_new */ in partition_sched_domains_locked()
2771 cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) { in partition_sched_domains_locked()
2776 /* No match - add perf domains for a new rd */ in partition_sched_domains_locked()