topology.c - OpenGrok cross reference for /linux-6.14.4/kernel/sched/topology.c

Lines Matching +full:cpu +full:- +full:capacity
1 // SPDX-License-Identifier: GPL-2.0
43 static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,  in sched_domain_debug_one()  argument
46 	struct sched_group *group = sd->groups;  in sched_domain_debug_one()
47 	unsigned long flags = sd->flags;  in sched_domain_debug_one()
52 	printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);  in sched_domain_debug_one()
54 	       cpumask_pr_args(sched_domain_span(sd)), sd->name);  in sched_domain_debug_one()
56 	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {  in sched_domain_debug_one()
57 		printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);  in sched_domain_debug_one()
59 	if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) {  in sched_domain_debug_one()
60 		printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);  in sched_domain_debug_one()
67 		if ((meta_flags & SDF_SHARED_CHILD) && sd->child &&  in sched_domain_debug_one()
68 		    !(sd->child->flags & flag))  in sched_domain_debug_one()
72 		if ((meta_flags & SDF_SHARED_PARENT) && sd->parent &&  in sched_domain_debug_one()
73 		    !(sd->parent->flags & flag))  in sched_domain_debug_one()
92 		if (!(sd->flags & SD_OVERLAP) &&  in sched_domain_debug_one()
102 				group->sgc->id,  in sched_domain_debug_one()
105 		if ((sd->flags & SD_OVERLAP) &&  in sched_domain_debug_one()
111 		if (group->sgc->capacity != SCHED_CAPACITY_SCALE)  in sched_domain_debug_one()
112 			printk(KERN_CONT " cap=%lu", group->sgc->capacity);  in sched_domain_debug_one()
114 		if (group == sd->groups && sd->child &&  in sched_domain_debug_one()
115 		    !cpumask_equal(sched_domain_span(sd->child),  in sched_domain_debug_one()
117 			printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");  in sched_domain_debug_one()
122 		group = group->next;  in sched_domain_debug_one()
124 		if (group != sd->groups)  in sched_domain_debug_one()
127 	} while (group != sd->groups);  in sched_domain_debug_one()
131 		printk(KERN_ERR "ERROR: groups don't span domain->span\n");  in sched_domain_debug_one()
133 	if (sd->parent &&  in sched_domain_debug_one()
134 	    !cpumask_subset(groupmask, sched_domain_span(sd->parent)))  in sched_domain_debug_one()
135 		printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");  in sched_domain_debug_one()
139 static void sched_domain_debug(struct sched_domain *sd, int cpu)  in sched_domain_debug()  argument
147 		printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);  in sched_domain_debug()
151 	printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);  in sched_domain_debug()
154 		if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))  in sched_domain_debug()
157 		sd = sd->parent;  in sched_domain_debug()
165 # define sched_domain_debug(sd, cpu) do { } while (0)  argument
185 	if ((sd->flags & SD_DEGENERATE_GROUPS_MASK) &&  in sd_degenerate()
186 	    (sd->groups != sd->groups->next))  in sd_degenerate()
190 	if (sd->flags & (SD_WAKE_AFFINE))  in sd_degenerate()
199 	unsigned long cflags = sd->flags, pflags = parent->flags;  in sd_parent_degenerate()
208 	if (parent->groups == parent->groups->next)  in sd_parent_degenerate()
230 	/* EAS is enabled for asymmetric CPU capacity topologies. */  in sched_is_eas_possible()
256 			pr_info("rd %*pbl: Checking EAS: frequency-invariant load tracking not yet supported",  in sched_is_eas_possible()
267 				pr_info("rd %*pbl: Checking EAS, cpufreq policy not set for CPU: %d",  in sched_is_eas_possible()
272 		gov = policy->governor;  in sched_is_eas_possible()
302 		return -EPERM;  in sched_energy_aware_handler()
306 			return -EOPNOTSUPP;  in sched_energy_aware_handler()
349 		tmp = pd->next;  in free_pd()
355 static struct perf_domain *find_pd(struct perf_domain *pd, int cpu)  in find_pd()  argument
358 		if (cpumask_test_cpu(cpu, perf_domain_span(pd)))  in find_pd()
360 		pd = pd->next;  in find_pd()
366 static struct perf_domain *pd_init(int cpu)  in pd_init()  argument
368 	struct em_perf_domain *obj = em_cpu_get(cpu);  in pd_init()
373 			pr_info("%s: no EM found for CPU%d\n", __func__, cpu);  in pd_init()
380 	pd->em_pd = obj;  in pd_init()
397 				em_pd_nr_perf_states(pd->em_pd));  in perf_domain_debug()
398 		pd = pd->next;  in perf_domain_debug()
437 	int cpu = cpumask_first(cpu_map);  in build_perf_domains()  local
438 	struct root_domain *rd = cpu_rq(cpu)->rd;  in build_perf_domains()
455 		tmp->next = pd;  in build_perf_domains()
462 	tmp = rd->pd;  in build_perf_domains()
463 	rcu_assign_pointer(rd->pd, pd);  in build_perf_domains()
465 		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);  in build_perf_domains()
471 	tmp = rd->pd;  in build_perf_domains()
472 	rcu_assign_pointer(rd->pd, NULL);  in build_perf_domains()
474 		call_rcu(&tmp->rcu, destroy_perf_domain_rcu);  in build_perf_domains()
486 	cpupri_cleanup(&rd->cpupri);  in free_rootdomain()
487 	cpudl_cleanup(&rd->cpudl);  in free_rootdomain()
488 	free_cpumask_var(rd->dlo_mask);  in free_rootdomain()
489 	free_cpumask_var(rd->rto_mask);  in free_rootdomain()
490 	free_cpumask_var(rd->online);  in free_rootdomain()
491 	free_cpumask_var(rd->span);  in free_rootdomain()
492 	free_pd(rd->pd);  in free_rootdomain()
503 	if (rq->rd) {  in rq_attach_root()
504 		old_rd = rq->rd;  in rq_attach_root()
506 		if (cpumask_test_cpu(rq->cpu, old_rd->online))  in rq_attach_root()
509 		cpumask_clear_cpu(rq->cpu, old_rd->span);  in rq_attach_root()
516 		if (!atomic_dec_and_test(&old_rd->refcount))  in rq_attach_root()
520 	atomic_inc(&rd->refcount);  in rq_attach_root()
521 	rq->rd = rd;  in rq_attach_root()
523 	cpumask_set_cpu(rq->cpu, rd->span);  in rq_attach_root()
524 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))  in rq_attach_root()
532 	if (rq->fair_server.dl_server)  in rq_attach_root()
533 		__dl_server_attach_root(&rq->fair_server, rq);  in rq_attach_root()
538 		call_rcu(&old_rd->rcu, free_rootdomain);  in rq_attach_root()
543 	atomic_inc(&rd->refcount);  in sched_get_rd()
548 	if (!atomic_dec_and_test(&rd->refcount))  in sched_put_rd()
551 	call_rcu(&rd->rcu, free_rootdomain);  in sched_put_rd()
556 	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))  in init_rootdomain()
558 	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))  in init_rootdomain()
560 	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))  in init_rootdomain()
562 	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))  in init_rootdomain()
566 	rd->rto_cpu = -1;  in init_rootdomain()
567 	raw_spin_lock_init(&rd->rto_lock);  in init_rootdomain()
568 	rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func);  in init_rootdomain()
571 	rd->visit_cookie = 0;  in init_rootdomain()
572 	init_dl_bw(&rd->dl_bw);  in init_rootdomain()
573 	if (cpudl_init(&rd->cpudl) != 0)  in init_rootdomain()
576 	if (cpupri_init(&rd->cpupri) != 0)  in init_rootdomain()
581 	cpudl_cleanup(&rd->cpudl);  in init_rootdomain()
583 	free_cpumask_var(rd->rto_mask);  in init_rootdomain()
585 	free_cpumask_var(rd->dlo_mask);  in init_rootdomain()
587 	free_cpumask_var(rd->online);  in init_rootdomain()
589 	free_cpumask_var(rd->span);  in init_rootdomain()
591 	return -ENOMEM;  in init_rootdomain()
595  * By default the system creates a single root-domain with all CPUs as
632 		tmp = sg->next;  in free_sched_groups()
634 		if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))  in free_sched_groups()
635 			kfree(sg->sgc);  in free_sched_groups()
637 		if (atomic_dec_and_test(&sg->ref))  in free_sched_groups()
648 	 * dropping group/capacity references, freeing where none remain.  in destroy_sched_domain()
650 	free_sched_groups(sd->groups, 1);  in destroy_sched_domain()
652 	if (sd->shared && atomic_dec_and_test(&sd->shared->ref))  in destroy_sched_domain()
653 		kfree(sd->shared);  in destroy_sched_domain()
662 		struct sched_domain *parent = sd->parent;  in destroy_sched_domains_rcu()
671 		call_rcu(&sd->rcu, destroy_sched_domains_rcu);  in destroy_sched_domains()
679  * Also keep a unique ID per domain (we use the first CPU number in the cpumask
695 static void update_top_cache_domain(int cpu)  in update_top_cache_domain()  argument
699 	int id = cpu;  in update_top_cache_domain()
702 	sd = highest_flag_domain(cpu, SD_SHARE_LLC);  in update_top_cache_domain()
706 		sds = sd->shared;  in update_top_cache_domain()
709 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);  in update_top_cache_domain()
710 	per_cpu(sd_llc_size, cpu) = size;  in update_top_cache_domain()
711 	per_cpu(sd_llc_id, cpu) = id;  in update_top_cache_domain()
712 	rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);  in update_top_cache_domain()
714 	sd = lowest_flag_domain(cpu, SD_CLUSTER);  in update_top_cache_domain()
721 	 * but equals to LLC id on non-Cluster machines.  in update_top_cache_domain()
723 	per_cpu(sd_share_id, cpu) = id;  in update_top_cache_domain()
725 	sd = lowest_flag_domain(cpu, SD_NUMA);  in update_top_cache_domain()
726 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);  in update_top_cache_domain()
728 	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);  in update_top_cache_domain()
729 	rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);  in update_top_cache_domain()
731 	sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);  in update_top_cache_domain()
732 	rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);  in update_top_cache_domain()
736  * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
740 cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)  in cpu_attach_domain()  argument
742 	struct rq *rq = cpu_rq(cpu);  in cpu_attach_domain()
747 		struct sched_domain *parent = tmp->parent;  in cpu_attach_domain()
752 			tmp->parent = parent->parent;  in cpu_attach_domain()
754 			if (parent->parent) {  in cpu_attach_domain()
755 				parent->parent->child = tmp;  in cpu_attach_domain()
756 				parent->parent->groups->flags = tmp->flags;  in cpu_attach_domain()
764 			if (parent->flags & SD_PREFER_SIBLING)  in cpu_attach_domain()
765 				tmp->flags |= SD_PREFER_SIBLING;  in cpu_attach_domain()
768 			tmp = tmp->parent;  in cpu_attach_domain()
773 		sd = sd->parent;  in cpu_attach_domain()
776 			struct sched_group *sg = sd->groups;  in cpu_attach_domain()
784 				sg->flags = 0;  in cpu_attach_domain()
785 			} while (sg != sd->groups);  in cpu_attach_domain()
787 			sd->child = NULL;  in cpu_attach_domain()
791 	sched_domain_debug(sd, cpu);  in cpu_attach_domain()
794 	tmp = rq->sd;  in cpu_attach_domain()
795 	rcu_assign_pointer(rq->sd, sd);  in cpu_attach_domain()
796 	dirty_sched_domain_sysctl(cpu);  in cpu_attach_domain()
799 	update_top_cache_domain(cpu);  in cpu_attach_domain()
815  * Return the canonical balance CPU for this group, this is the first CPU
832  * Given a node-distance table, for example:
842  *   0 ----- 1
846  *   3 ----- 2
854  * NUMA-2	0-3		0-3		0-3		0-3
855  *  groups:	{0-1,3},{1-3}	{0-2},{0,2-3}	{1-3},{0-1,3}	{0,2-3},{0-2}
857  * NUMA-1	0-1,3		0-2		1-3		0,2-3
860  * NUMA-0	0		1		2		3
865  * represented multiple times -- hence the "overlap" naming for this part of
869  * domain. For instance Node-0 NUMA-2 would only get groups: 0-1,3 and 1-3.
873  *  - the first group of each domain is its child domain; this
874  *    gets us the first 0-1,3
875  *  - the only uncovered node is 2, who's child domain is 1-3.
877  * However, because of the overlap, computing a unique CPU for each group is
878  * more complicated. Consider for instance the groups of NODE-1 NUMA-2, both
879  * groups include the CPUs of Node-0, while those CPUs would not in fact ever
880  * end up at those groups (they would end up in group: 0-1,3).
903  *   0 ----- 1
907  *   2 ----- 3
913  * not of the same number for each CPU. Consider:
915  * NUMA-2	0-3						0-3
916  *  groups:	{0-2},{1-3}					{1-3},{0-2}
918  * NUMA-1	0-2		0-3		0-3		1-3
920  * NUMA-0	0		1		2		3
938 	struct sd_data *sdd = sd->private;  in build_balance_mask()
945 		sibling = *per_cpu_ptr(sdd->sd, i);  in build_balance_mask()
952 		if (!sibling->child)  in build_balance_mask()
956 		if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))  in build_balance_mask()
967  * XXX: This creates per-node group entries; since the load-balancer will
968  * immediately access remote memory to construct this group's load-balance
972 build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)  in build_group_from_child_sched_domain()  argument
978 			GFP_KERNEL, cpu_to_node(cpu));  in build_group_from_child_sched_domain()
984 	if (sd->child) {  in build_group_from_child_sched_domain()
985 		cpumask_copy(sg_span, sched_domain_span(sd->child));  in build_group_from_child_sched_domain()
986 		sg->flags = sd->child->flags;  in build_group_from_child_sched_domain()
991 	atomic_inc(&sg->ref);  in build_group_from_child_sched_domain()
999 	struct sd_data *sdd = sd->private;  in init_overlap_sched_group()
1001 	int cpu;  in init_overlap_sched_group()  local
1004 	cpu = cpumask_first(mask);  in init_overlap_sched_group()
1006 	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);  in init_overlap_sched_group()
1007 	if (atomic_inc_return(&sg->sgc->ref) == 1)  in init_overlap_sched_group()
1013 	 * Initialize sgc->capacity such that even if we mess up the  in init_overlap_sched_group()
1018 	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);  in init_overlap_sched_group()
1019 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;  in init_overlap_sched_group()
1020 	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;  in init_overlap_sched_group()
1030 	while (sibling->child &&  in find_descended_sibling()
1031 	       !cpumask_subset(sched_domain_span(sibling->child),  in find_descended_sibling()
1033 		sibling = sibling->child;  in find_descended_sibling()
1040 	while (sibling->child &&  in find_descended_sibling()
1041 	       cpumask_equal(sched_domain_span(sibling->child),  in find_descended_sibling()
1043 		sibling = sibling->child;  in find_descended_sibling()
1049 build_overlap_sched_groups(struct sched_domain *sd, int cpu)  in build_overlap_sched_groups()  argument
1054 	struct sd_data *sdd = sd->private;  in build_overlap_sched_groups()
1060 	for_each_cpu_wrap(i, span, cpu) {  in build_overlap_sched_groups()
1066 		sibling = *per_cpu_ptr(sdd->sd, i);  in build_overlap_sched_groups()
1075 		 * Domains should always include the CPU they're built on, so  in build_overlap_sched_groups()
1096 		 *   0 --- 1 --- 2 --- 3  in build_overlap_sched_groups()
1098 		 * NUMA-3       0-3             N/A             N/A             0-3  in build_overlap_sched_groups()
1099 		 *  groups:     {0-2},{1-3}                                     {1-3},{0-2}  in build_overlap_sched_groups()
1101 		 * NUMA-2       0-2             0-3             0-3             1-3  in build_overlap_sched_groups()
1102 		 *  groups:     {0-1},{1-3}     {0-2},{2-3}     {1-3},{0-1}     {2-3},{0-2}  in build_overlap_sched_groups()
1104 		 * NUMA-1       0-1             0-2             1-3             2-3  in build_overlap_sched_groups()
1107 		 * NUMA-0       0               1               2               3  in build_overlap_sched_groups()
1109 		 * The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the  in build_overlap_sched_groups()
1112 		if (sibling->child &&  in build_overlap_sched_groups()
1113 		    !cpumask_subset(sched_domain_span(sibling->child), span))  in build_overlap_sched_groups()
1116 		sg = build_group_from_child_sched_domain(sibling, cpu);  in build_overlap_sched_groups()
1128 			last->next = sg;  in build_overlap_sched_groups()
1130 		last->next = first;  in build_overlap_sched_groups()
1132 	sd->groups = first;  in build_overlap_sched_groups()
1139 	return -ENOMEM;  in build_overlap_sched_groups()
1144  * Package topology (also see the load-balance blurb in fair.c)
1149  *  - Simultaneous multithreading (SMT)
1150  *  - Multi-Core Cache (MC)
1151  *  - Package (PKG)
1157  *	sched_domain -> sched_group -> sched_group_capacity
1159  *          `-'             `-'
1161  * The sched_domains are per-CPU and have a two way link (parent & child) and
1167  * CPU of that sched_domain [*].
1171  * CPU   0   1   2   3   4   5   6   7
1177  *  - or -
1179  * PKG  0-7 0-7 0-7 0-7 0-7 0-7 0-7 0-7
1180  * MC	0-3 0-3 0-3 0-3 4-7 4-7 4-7 4-7
1181  * SMT  0-1 0-1 2-3 2-3 4-5 4-5 6-7 6-7
1183  * CPU   0   1   2   3   4   5   6   7
1191  * There are two related construction problems, both require a CPU that
1194  *  - The first is the balance_cpu (see should_we_balance() and the
1195  *    load-balance blurb in fair.c); for each group we only want 1 CPU to
1198  *  - The second is the sched_group_capacity; we want all identical groups
1204  * for each CPU in the hierarchy.
1206  * Therefore computing a unique CPU for each group is trivial (the iteration
1208  * group), we can simply pick the first CPU in each group.
1214 static struct sched_group *get_group(int cpu, struct sd_data *sdd)  in get_group()  argument
1216 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);  in get_group()
1217 	struct sched_domain *child = sd->child;  in get_group()
1222 		cpu = cpumask_first(sched_domain_span(child));  in get_group()
1224 	sg = *per_cpu_ptr(sdd->sg, cpu);  in get_group()
1225 	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);  in get_group()
1228 	already_visited = atomic_inc_return(&sg->ref) > 1;  in get_group()
1230 	WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));  in get_group()
1239 		sg->flags = child->flags;  in get_group()
1241 		cpumask_set_cpu(cpu, sched_group_span(sg));  in get_group()
1242 		cpumask_set_cpu(cpu, group_balance_mask(sg));  in get_group()
1245 	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));  in get_group()
1246 	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;  in get_group()
1247 	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;  in get_group()
1254  * covered by the given span, will set each group's ->cpumask correctly,
1255  * and will initialize their ->sgc.
1260 build_sched_groups(struct sched_domain *sd, int cpu)  in build_sched_groups()  argument
1263 	struct sd_data *sdd = sd->private;  in build_sched_groups()
1273 	for_each_cpu_wrap(i, span, cpu) {  in build_sched_groups()
1286 			last->next = sg;  in build_sched_groups()
1289 	last->next = first;  in build_sched_groups()
1290 	sd->groups = first;  in build_sched_groups()
1298  * cpu_capacity indicates the capacity of sched group, which is used while
1305 static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)  in init_sched_groups_capacity()  argument
1307 	struct sched_group *sg = sd->groups;  in init_sched_groups_capacity()
1313 		int cpu, cores = 0, max_cpu = -1;  in init_sched_groups_capacity()  local
1315 		sg->group_weight = cpumask_weight(sched_group_span(sg));  in init_sched_groups_capacity()
1318 		for_each_cpu(cpu, mask) {  in init_sched_groups_capacity()
1321 			cpumask_andnot(mask, mask, cpu_smt_mask(cpu));  in init_sched_groups_capacity()
1324 		sg->cores = cores;  in init_sched_groups_capacity()
1326 		if (!(sd->flags & SD_ASYM_PACKING))  in init_sched_groups_capacity()
1329 		for_each_cpu(cpu, sched_group_span(sg)) {  in init_sched_groups_capacity()
1331 				max_cpu = cpu;  in init_sched_groups_capacity()
1332 			else if (sched_asym_prefer(cpu, max_cpu))  in init_sched_groups_capacity()
1333 				max_cpu = cpu;  in init_sched_groups_capacity()
1335 		sg->asym_prefer_cpu = max_cpu;  in init_sched_groups_capacity()
1338 		sg = sg->next;  in init_sched_groups_capacity()
1339 	} while (sg != sd->groups);  in init_sched_groups_capacity()
1341 	if (cpu != group_balance_cpu(sg))  in init_sched_groups_capacity()
1344 	update_group_capacity(sd, cpu);  in init_sched_groups_capacity()
1349  * Each list entry contains a CPU mask reflecting CPUs that share the same
1350  * capacity.
1356  * Verify whether there is any CPU capacity asymmetry in a given sched domain.
1367 	 * Count how many unique CPU capacities this domain spans across  in asym_cpu_capacity_classify()
1384 	/* Some of the available CPU capacity values have not been detected */  in asym_cpu_capacity_classify()
1399 static inline void asym_cpu_capacity_update_data(int cpu)  in asym_cpu_capacity_update_data()  argument
1401 	unsigned long capacity = arch_scale_cpu_capacity(cpu);  in asym_cpu_capacity_update_data()  local
1406 	 * Search if capacity already exits. If not, track which the entry  in asym_cpu_capacity_update_data()
1410 		if (capacity == entry->capacity)  in asym_cpu_capacity_update_data()
1412 		else if (!insert_entry && capacity > entry->capacity)  in asym_cpu_capacity_update_data()
1419 	entry->capacity = capacity;  in asym_cpu_capacity_update_data()
1421 	/* If NULL then the new capacity is the smallest, add last. */  in asym_cpu_capacity_update_data()
1423 		list_add_tail_rcu(&entry->link, &asym_cap_list);  in asym_cpu_capacity_update_data()
1425 		list_add_rcu(&entry->link, &insert_entry->link);  in asym_cpu_capacity_update_data()
1427 	__cpumask_set_cpu(cpu, cpu_capacity_span(entry));  in asym_cpu_capacity_update_data()
1431  * Build-up/update list of CPUs grouped by their capacities
1433  * with state indicating CPU topology changes.
1438 	int cpu;  in asym_cpu_capacity_scan()  local
1443 	for_each_cpu_and(cpu, cpu_possible_mask, housekeeping_cpumask(HK_TYPE_DOMAIN))  in asym_cpu_capacity_scan()
1444 		asym_cpu_capacity_update_data(cpu);  in asym_cpu_capacity_scan()
1448 			list_del_rcu(&entry->link);  in asym_cpu_capacity_scan()
1449 			call_rcu(&entry->rcu, free_asym_cap_entry);  in asym_cpu_capacity_scan()
1454 	 * Only one capacity value has been detected i.e. this system is symmetric.  in asym_cpu_capacity_scan()
1459 		list_del_rcu(&entry->link);  in asym_cpu_capacity_scan()
1460 		call_rcu(&entry->rcu, free_asym_cap_entry);  in asym_cpu_capacity_scan()
1466  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
1469 static int default_relax_domain_level = -1;
1486 	if (!attr || attr->relax_domain_level < 0) {  in set_domain_attribute()
1491 		request = attr->relax_domain_level;  in set_domain_attribute()
1493 	if (sd->level >= request) {  in set_domain_attribute()
1495 		sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);  in set_domain_attribute()
1507 		if (!atomic_read(&d->rd->refcount))  in __free_domain_allocs()
1508 			free_rootdomain(&d->rd->rcu);  in __free_domain_allocs()
1511 		free_percpu(d->sd);  in __free_domain_allocs()
1528 	d->sd = alloc_percpu(struct sched_domain *);  in __visit_domain_allocation_hell()
1529 	if (!d->sd)  in __visit_domain_allocation_hell()
1531 	d->rd = alloc_rootdomain();  in __visit_domain_allocation_hell()
1532 	if (!d->rd)  in __visit_domain_allocation_hell()
1543 static void claim_allocations(int cpu, struct sched_domain *sd)  in claim_allocations()  argument
1545 	struct sd_data *sdd = sd->private;  in claim_allocations()
1547 	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);  in claim_allocations()
1548 	*per_cpu_ptr(sdd->sd, cpu) = NULL;  in claim_allocations()
1550 	if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))  in claim_allocations()
1551 		*per_cpu_ptr(sdd->sds, cpu) = NULL;  in claim_allocations()
1553 	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))  in claim_allocations()
1554 		*per_cpu_ptr(sdd->sg, cpu) = NULL;  in claim_allocations()
1556 	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))  in claim_allocations()
1557 		*per_cpu_ptr(sdd->sgc, cpu) = NULL;  in claim_allocations()
1586  *   SD_ASYM_PACKING        - describes SMT quirks
1598 	struct sched_domain *child, int cpu)  in sd_init()  argument
1600 	struct sd_data *sdd = &tl->data;  in sd_init()
1601 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);  in sd_init()
1609 	sched_domains_curr_level = tl->numa_level;  in sd_init()
1612 	sd_weight = cpumask_weight(tl->mask(cpu));  in sd_init()
1614 	if (tl->sd_flags)  in sd_init()
1615 		sd_flags = (*tl->sd_flags)();  in sd_init()
1646 		.name			= tl->name,  in sd_init()
1650 	cpumask_and(sd_span, cpu_map, tl->mask(cpu));  in sd_init()
1653 	sd->flags |= asym_cpu_capacity_classify(sd_span, cpu_map);  in sd_init()
1655 	WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==  in sd_init()
1657 		  "CPU capacity asymmetry not supported on SMT\n");  in sd_init()
1663 	if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child)  in sd_init()
1664 		sd->child->flags &= ~SD_PREFER_SIBLING;  in sd_init()
1666 	if (sd->flags & SD_SHARE_CPUCAPACITY) {  in sd_init()
1667 		sd->imbalance_pct = 110;  in sd_init()
1669 	} else if (sd->flags & SD_SHARE_LLC) {  in sd_init()
1670 		sd->imbalance_pct = 117;  in sd_init()
1671 		sd->cache_nice_tries = 1;  in sd_init()
1674 	} else if (sd->flags & SD_NUMA) {  in sd_init()
1675 		sd->cache_nice_tries = 2;  in sd_init()
1677 		sd->flags &= ~SD_PREFER_SIBLING;  in sd_init()
1678 		sd->flags |= SD_SERIALIZE;  in sd_init()
1679 		if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) {  in sd_init()
1680 			sd->flags &= ~(SD_BALANCE_EXEC |  in sd_init()
1687 		sd->cache_nice_tries = 1;  in sd_init()
1694 	if (sd->flags & SD_SHARE_LLC) {  in sd_init()
1695 		sd->shared = *per_cpu_ptr(sdd->sds, sd_id);  in sd_init()
1696 		atomic_inc(&sd->shared->ref);  in sd_init()
1697 		atomic_set(&sd->shared->nr_busy_cpus, sd_weight);  in sd_init()
1700 	sd->private = sdd;  in sd_init()
1706  * Topology list, bottom-up.
1729 	for (tl = sched_domain_topology; tl->mask; tl++)
1742 static const struct cpumask *sd_numa_mask(int cpu)  in sd_numa_mask()  argument
1744 	return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];  in sd_numa_mask()
1815  * - If the maximum distance between any nodes is 1 hop, the system
1817  * - If for two nodes A and B, located N > 1 hops away from each other,
1870 	 * O(nr_nodes^2) de-duplicating selection sort -- in order to find the  in sched_init_numa()
1953 					sched_numa_warn("Node-distance not symmetric");  in sched_init_numa()
2004 	WRITE_ONCE(sched_max_numa_distance, sched_domains_numa_distance[nr_levels - 1]);  in sched_init_numa()
2047 void sched_update_numa(int cpu, bool online)  in sched_update_numa()  argument
2051 	node = cpu_to_node(cpu);  in sched_update_numa()
2053 	 * Scheduler NUMA topology is updated when the first CPU of a  in sched_update_numa()
2054 	 * node is onlined or the last CPU of a node is offlined.  in sched_update_numa()
2063 void sched_domains_numa_masks_set(unsigned int cpu)  in sched_domains_numa_masks_set()  argument
2065 	int node = cpu_to_node(cpu);  in sched_domains_numa_masks_set()
2075 				cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);  in sched_domains_numa_masks_set()
2080 void sched_domains_numa_masks_clear(unsigned int cpu)  in sched_domains_numa_masks_clear()  argument
2087 				cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);  in sched_domains_numa_masks_clear()
2093  * sched_numa_find_closest() - given the NUMA topology, find the cpu
2094  *                             closest to @cpu from @cpumask.
2095  * cpumask: cpumask to find a cpu from
2096  * cpu: cpu to be close to
2098  * returns: cpu, or nr_cpu_ids when nothing found.
2100 int sched_numa_find_closest(const struct cpumask *cpus, int cpu)  in sched_numa_find_closest()  argument
2102 	int i, j = cpu_to_node(cpu), found = nr_cpu_ids;  in sched_numa_find_closest()
2112 		cpu = cpumask_any_and(cpus, masks[i][j]);  in sched_numa_find_closest()
2113 		if (cpu < nr_cpu_ids) {  in sched_numa_find_closest()
2114 			found = cpu;  in sched_numa_find_closest()
2128 	int cpu;  member
2137 	if (cpumask_weight_and(k->cpus, cur_hop[k->node]) <= k->cpu)  in hop_cmp()
2140 	if (b == k->masks) {  in hop_cmp()
2141 		k->w = 0;  in hop_cmp()
2145 	prev_hop = *((struct cpumask ***)b - 1);  in hop_cmp()
2146 	k->w = cpumask_weight_and(k->cpus, prev_hop[k->node]);  in hop_cmp()
2147 	if (k->w <= k->cpu)  in hop_cmp()
2150 	return -1;  in hop_cmp()
2154  * sched_numa_find_nth_cpu() - given the NUMA topology, find the Nth closest CPU
2155  *                             from @cpus to @cpu, taking into account distance
2157  * @cpus: cpumask to find a cpu from
2158  * @cpu: CPU to start searching
2161  * Return: cpu, or nr_cpu_ids when nothing found.
2163 int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)  in sched_numa_find_nth_cpu()  argument
2165 	struct __cmp_key k = { .cpus = cpus, .cpu = cpu };  in sched_numa_find_nth_cpu()
2170 		return cpumask_nth_and(cpu, cpus, cpu_online_mask);  in sched_numa_find_nth_cpu()
2174 	/* CPU-less node entries are uninitialized in sched_domains_numa_masks */  in sched_numa_find_nth_cpu()
2183 	hop = hop_masks	- k.masks;  in sched_numa_find_nth_cpu()
2186 		cpumask_nth_and_andnot(cpu - k.w, cpus, k.masks[hop][node], k.masks[hop-1][node]) :  in sched_numa_find_nth_cpu()
2187 		cpumask_nth_and(cpu, cpus, k.masks[0][node]);  in sched_numa_find_nth_cpu()
2195  * sched_numa_hop_mask() - Get the cpumask of CPUs at most @hops hops away from
2204  * read-side section, copy it if required beyond that.
2216 		return ERR_PTR(-EINVAL);  in sched_numa_hop_mask()
2220 		return ERR_PTR(-EBUSY);  in sched_numa_hop_mask()
2234 		struct sd_data *sdd = &tl->data;  in __sdt_alloc()
2236 		sdd->sd = alloc_percpu(struct sched_domain *);  in __sdt_alloc()
2237 		if (!sdd->sd)  in __sdt_alloc()
2238 			return -ENOMEM;  in __sdt_alloc()
2240 		sdd->sds = alloc_percpu(struct sched_domain_shared *);  in __sdt_alloc()
2241 		if (!sdd->sds)  in __sdt_alloc()
2242 			return -ENOMEM;  in __sdt_alloc()
2244 		sdd->sg = alloc_percpu(struct sched_group *);  in __sdt_alloc()
2245 		if (!sdd->sg)  in __sdt_alloc()
2246 			return -ENOMEM;  in __sdt_alloc()
2248 		sdd->sgc = alloc_percpu(struct sched_group_capacity *);  in __sdt_alloc()
2249 		if (!sdd->sgc)  in __sdt_alloc()
2250 			return -ENOMEM;  in __sdt_alloc()
2261 				return -ENOMEM;  in __sdt_alloc()
2263 			*per_cpu_ptr(sdd->sd, j) = sd;  in __sdt_alloc()
2268 				return -ENOMEM;  in __sdt_alloc()
2270 			*per_cpu_ptr(sdd->sds, j) = sds;  in __sdt_alloc()
2275 				return -ENOMEM;  in __sdt_alloc()
2277 			sg->next = sg;  in __sdt_alloc()
2279 			*per_cpu_ptr(sdd->sg, j) = sg;  in __sdt_alloc()
2284 				return -ENOMEM;  in __sdt_alloc()
2287 			sgc->id = j;  in __sdt_alloc()
2290 			*per_cpu_ptr(sdd->sgc, j) = sgc;  in __sdt_alloc()
2303 		struct sd_data *sdd = &tl->data;  in __sdt_free()
2308 			if (sdd->sd) {  in __sdt_free()
2309 				sd = *per_cpu_ptr(sdd->sd, j);  in __sdt_free()
2310 				if (sd && (sd->flags & SD_OVERLAP))  in __sdt_free()
2311 					free_sched_groups(sd->groups, 0);  in __sdt_free()
2312 				kfree(*per_cpu_ptr(sdd->sd, j));  in __sdt_free()
2315 			if (sdd->sds)  in __sdt_free()
2316 				kfree(*per_cpu_ptr(sdd->sds, j));  in __sdt_free()
2317 			if (sdd->sg)  in __sdt_free()
2318 				kfree(*per_cpu_ptr(sdd->sg, j));  in __sdt_free()
2319 			if (sdd->sgc)  in __sdt_free()
2320 				kfree(*per_cpu_ptr(sdd->sgc, j));  in __sdt_free()
2322 		free_percpu(sdd->sd);  in __sdt_free()
2323 		sdd->sd = NULL;  in __sdt_free()
2324 		free_percpu(sdd->sds);  in __sdt_free()
2325 		sdd->sds = NULL;  in __sdt_free()
2326 		free_percpu(sdd->sg);  in __sdt_free()
2327 		sdd->sg = NULL;  in __sdt_free()
2328 		free_percpu(sdd->sgc);  in __sdt_free()
2329 		sdd->sgc = NULL;  in __sdt_free()
2335 		struct sched_domain *child, int cpu)  in build_sched_domain()  argument
2337 	struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);  in build_sched_domain()
2340 		sd->level = child->level + 1;  in build_sched_domain()
2341 		sched_domain_level_max = max(sched_domain_level_max, sd->level);  in build_sched_domain()
2342 		child->parent = sd;  in build_sched_domain()
2348 					child->name, sd->name);  in build_sched_domain()
2363  * any two given CPUs at this (non-NUMA) topology level.
2366 			      const struct cpumask *cpu_map, int cpu)  in topology_span_sane()  argument
2368 	int i = cpu + 1;  in topology_span_sane()
2371 	if (tl->flags & SDTL_OVERLAP)  in topology_span_sane()
2375 	 * Non-NUMA levels cannot partially overlap - they must be either  in topology_span_sane()
2377 	 * breaking the sched_group lists - i.e. a later get_group() pass  in topology_span_sane()
2387 		if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&  in topology_span_sane()
2388 		    cpumask_intersects(tl->mask(cpu), tl->mask(i)))  in topology_span_sane()
2406 	int i, ret = -ENOMEM;  in build_sched_domains()
2429 			has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;  in build_sched_domains()
2433 			if (tl->flags & SDTL_OVERLAP)  in build_sched_domains()
2434 				sd->flags |= SD_OVERLAP;  in build_sched_domains()
2442 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {  in build_sched_domains()
2443 			sd->span_weight = cpumask_weight(sched_domain_span(sd));  in build_sched_domains()
2444 			if (sd->flags & SD_OVERLAP) {  in build_sched_domains()
2462 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {  in build_sched_domains()
2463 			struct sched_domain *child = sd->child;  in build_sched_domains()
2465 			if (!(sd->flags & SD_SHARE_LLC) && child &&  in build_sched_domains()
2466 			    (child->flags & SD_SHARE_LLC)) {  in build_sched_domains()
2473 				 * arbitrary cutoff based two factors -- SMT and  in build_sched_domains()
2474 				 * memory channels. For SMT-2, the intent is to  in build_sched_domains()
2476 				 * SMT-4 or SMT-8 *may* benefit from a different  in build_sched_domains()
2490 				nr_llcs = sd->span_weight / child->span_weight;  in build_sched_domains()
2492 					imb = sd->span_weight >> 3;  in build_sched_domains()
2496 				sd->imb_numa_nr = imb;  in build_sched_domains()
2499 				top_p = sd->parent;  in build_sched_domains()
2500 				while (top_p && !(top_p->flags & SD_NUMA)) {  in build_sched_domains()
2501 					top_p = top_p->parent;  in build_sched_domains()
2503 				imb_span = top_p ? top_p->span_weight : sd->span_weight;  in build_sched_domains()
2505 				int factor = max(1U, (sd->span_weight / imb_span));  in build_sched_domains()
2507 				sd->imb_numa_nr = imb * factor;  in build_sched_domains()
2512 	/* Calculate CPU capacity for physical packages and nodes */  in build_sched_domains()
2513 	for (i = nr_cpumask_bits-1; i >= 0; i--) {  in build_sched_domains()
2517 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {  in build_sched_domains()
2570  * CPU core maps. It is supposed to return 1 if the topology changed
2633 	unsigned int cpu = cpumask_any(cpu_map);  in detach_destroy_domains()  local
2636 	if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))  in detach_destroy_domains()
2700 	/* Let the architecture update CPU core mappings: */  in partition_sched_domains_locked()
2702 	/* Trigger rebuilding CPU capacity asymmetry data */  in partition_sched_domains_locked()
2728 				 * its dl_bw->total_bw needs to be cleared.  in partition_sched_domains_locked()
2734 				rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;  in partition_sched_domains_locked()
2739 		/* No match - a current sched domain not in new doms_new[] */  in partition_sched_domains_locked()
2760 		/* No match - add a new doms_new */  in partition_sched_domains_locked()
2771 			    cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {  in partition_sched_domains_locked()
2776 		/* No match - add perf domains for a new rd */  in partition_sched_domains_locked()