Lines Matching +full:timer +full:- +full:cannot +full:- +full:wake +full:- +full:cpu

1 // SPDX-License-Identifier: GPL-2.0-only
15 #include "tick-internal.h"
21 * The timer migration mechanism is built on a hierarchy of groups. The
22 * lowest level group contains CPUs, the next level groups of CPU groups
23 * and so forth. The CPU groups are kept per node so for the normal case
25 * CPUs per node even the next level might be kept as groups of CPU groups
34 * GRP0:0 - GRP0:2 GRP0:3 - GRP0:5
37 * CPUS 0-7 8-15 16-23 24-31 32-39 40-47
39 * The groups hold a timer queue of events sorted by expiry time. These
43 * Each group has a designated migrator CPU/group as long as a CPU/group is
48 * When a CPU is awake, it checks in it's own timer tick the group
50 * no CPU is active, it also checks the groups where no migrator is set
54 * from the idle CPU and runs the timer function. After that it updates the
57 * CPUs which go idle arm their CPU local timer hardware for the next local
58 * (pinned) timer event. If the next migratable timer expires after the
59 * next local timer or the CPU has no migratable timer pending then the
60 * CPU does not queue an event in the LVL0 group. If the next migratable
61 * timer expires before the next local timer then the CPU queues that timer
62 * in the LVL0 group. In both cases the CPU marks itself idle in the LVL0
65 * When CPU comes out of idle and when a group has at least a single active
68 * timer queue. It will be removed when touching the timer queue the next
73 * If the CPU is the migrator of the group then it delegates that role to
74 * the next active CPU in the group or sets migrator to TMIGR_NONE when
75 * there is no active CPU in the group. This delegation needs to be
79 * When the last CPU in the system goes idle, then it drops all migrator
81 * then has to make sure, that it arms it's own local hardware timer for
86 * ---------------
96 * --------------
100 * includes the per CPU locks in struct tmigr_cpu. For updating the migrator and
101 * active CPU/group information atomic_try_cmpxchg() is used instead and only
102 * the per CPU tmigr_cpu->lock is held.
107 * When @timer_base->lock as well as tmigr related locks are required, the lock
108 * ordering is: first @timer_base->lock, afterwards tmigr related locks.
112 * ------------------------------------------------
144 * --> migrator = TMIGR_NONE migrator = CPU2
145 * --> active = active = CPU2
148 * --> idle idle active idle
161 * --> migrator = CPU1 migrator = CPU2
162 * --> active = CPU1 active = CPU2
165 * idle --> active active idle
173 * --> migrator = GRP0:1
174 * --> active = GRP0:0, GRP0:1
186 * --> migrator = GRP0:1
187 * --> active = GRP0:1
202 * expected value (compare-and-exchange).
209 * Prevent race between new event and last CPU going inactive
210 * ----------------------------------------------------------
212 * When the last CPU is going idle and there is a concurrent update of a new
213 * first global timer of an idle CPU, the group and child states have to be read
232 * 2. Now CPU 2 goes idle (and has no global timer, that has to be handled) and
241 * migrator = TMIGR_NONE --> migrator = TMIGR_NONE
242 * active = --> active =
246 * idle idle --> idle idle
254 * --> migrator = TMIGR_NONE
255 * --> active =
264 * idle idle --> idle idle
266 * 4. CPU 0 has a new timer queued from idle and it expires at TIMER0. CPU0
277 * --> next_expiry = TIMER0 next_expiry = KTIME_MAX
282 * 5. GRP0:0 is not active, so the new timer has to be propagated to
284 * (from step 2) is read, the timer is enqueued into GRP1:0, but nothing is
291 * --> next_expiry = TIMER0
302 * timer has to be propagated from idle path)::
305 * -------------------------- ---------------------------
307 * cmpxchg(&GRP1:0->state);
309 * spin_lock(&GRP1:0->lock);
312 * spin_unlock(&GRP1:0->lock);
316 * spin_lock(&GRP1:0->lock)
318 * group_state = atomic_read(&GRP1:0->state)
321 * spin_unlock(&GRP1:0->lock) <3>
326 * When CPU0 grabs the lock directly after cmpxchg, the first timer is reported
327 * back to CPU0 and also later on to CPU2. So no timer is missed. A concurrent
328 * update of the group state from active path is no problem, as the upcoming CPU
332 * -----------------------------------------------------------
334 * After expiring timers of a remote CPU, a walk through the hierarchy and
336 * is a 'new' global timer but also if there is no new global timer but the
337 * remote CPU is still idle.
339 * 1. CPU0 and CPU1 are idle and have both a global timer expiring at the same
341 * also idle and has no global timer pending. CPU2 is the only active CPU and
347 * --> timerqueue = evt-GRP0:0
353 * groupevt.cpu = CPU0 groupevt.cpu =
354 * timerqueue = evt-CPU0, timerqueue =
355 * evt-CPU1
363 * looks at tmigr_event::cpu struct member and expires the pending timer(s)
369 * --> timerqueue =
375 * --> groupevt.cpu = CPU0 groupevt.cpu =
376 * timerqueue = evt-CPU0, timerqueue =
377 * evt-CPU1
383 * here, then CPU1's pending global timer(s) will not expire in time and the
387 * timerqueue because it has no pending timer. If CPU0 would have a timer
388 * pending then it has to expire after CPU1's first timer because all timers
390 * in GRP0:0's timerqueue and therefore set in the CPU field of the group
397 * --> timerqueue = evt-GRP0:0
403 * --> groupevt.cpu = CPU1 groupevt.cpu =
404 * --> timerqueue = evt-CPU1 timerqueue =
410 * timer(s) of CPU1.
413 * CPU of GRP0:0 is active again. The CPU will mark GRP0:0 active and take care
430 return !(tmc->tmgroup && tmc->online); in tmigr_is_not_available()
435 * group is not active - so no migrator is set.
441 s.state = atomic_read(&group->migr_state); in tmigr_check_migrator()
455 s.state = atomic_read(&group->migr_state); in tmigr_check_migrator_and_lonely()
471 s.state = atomic_read(&group->migr_state); in tmigr_check_lonely()
479 * struct tmigr_walk - data required for walking the hierarchy
480 * @nextexp: Next CPU event expiry information which is handed into
481 * the timer migration code by the timer code
484 * hierarchy is completely idle. When CPU itself was the
485 * last going idle, information makes sure, that CPU will
487 * expiry case, firstexp is stored in the per CPU tmigr_cpu
488 * struct of CPU which expires remote timers. It is updated
495 * outcome is a CPU which might wake up a little early.
499 * @remote: Is set, when the new timer path is executed in
501 * @basej: timer base in jiffies
502 * @now: timer base monotonic
505 * @tmc_active: this flag indicates, whether the CPU which triggers
506 * the hierarchy walk is !idle in the timer migration
507 * hierarchy. When the CPU is idle and the whole hierarchy is
528 struct tmigr_group *child = NULL, *group = tmc->tmgroup; in __walk_groups()
531 WARN_ON_ONCE(group->level >= tmigr_hierarchy_levels); in __walk_groups()
541 group = READ_ONCE(group->parent); in __walk_groups()
542 data->childmask = child->groupmask; in __walk_groups()
543 WARN_ON_ONCE(!data->childmask); in __walk_groups()
549 lockdep_assert_held(&tmc->lock); in walk_groups()
555 * Returns the next event of the timerqueue @group->events
565 lockdep_assert_held(&group->lock); in tmigr_next_groupevt()
567 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_next_groupevt()
569 while ((node = timerqueue_getnext(&group->events))) { in tmigr_next_groupevt()
572 if (!READ_ONCE(evt->ignore)) { in tmigr_next_groupevt()
573 WRITE_ONCE(group->next_expiry, evt->nextevt.expires); in tmigr_next_groupevt()
581 if (!timerqueue_del(&group->events, node)) in tmigr_next_groupevt()
598 if (!evt || now < evt->nextevt.expires) in tmigr_next_expired_groupevt()
604 timerqueue_del(&group->events, &evt->nextevt); in tmigr_next_expired_groupevt()
619 return evt->nextevt.expires; in tmigr_next_groupevt_expires()
630 childmask = data->childmask; in tmigr_active_up()
636 curstate.state = atomic_read(&group->migr_state); in tmigr_active_up()
652 } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)); in tmigr_active_up()
668 WRITE_ONCE(group->groupevt.ignore, true); in tmigr_active_up()
677 data.childmask = tmc->groupmask; in __tmigr_cpu_activate()
681 tmc->cpuevt.ignore = true; in __tmigr_cpu_activate()
682 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in __tmigr_cpu_activate()
688 * tmigr_cpu_activate() - set this CPU active in timer migration hierarchy
699 if (WARN_ON_ONCE(!tmc->idle)) in tmigr_cpu_activate()
702 raw_spin_lock(&tmc->lock); in tmigr_cpu_activate()
703 tmc->idle = false; in tmigr_cpu_activate()
705 raw_spin_unlock(&tmc->lock); in tmigr_cpu_activate()
711 * @data->firstexp is set to expiry of first gobal event of the (top level of
715 * against a concurrent tmigr_inactive_up() run when the last CPU goes idle. See
716 * also section "Prevent race between new event and last CPU going inactive" in
727 bool remote = data->remote; in tmigr_update_events()
733 raw_spin_lock(&child->lock); in tmigr_update_events()
734 raw_spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING); in tmigr_update_events()
736 childstate.state = atomic_read(&child->migr_state); in tmigr_update_events()
737 groupstate.state = atomic_read(&group->migr_state); in tmigr_update_events()
745 nextexp = child->next_expiry; in tmigr_update_events()
746 evt = &child->groupevt; in tmigr_update_events()
755 WRITE_ONCE(evt->ignore, ignore); in tmigr_update_events()
757 nextexp = data->nextexp; in tmigr_update_events()
759 first_childevt = evt = data->evt; in tmigr_update_events()
760 ignore = evt->ignore; in tmigr_update_events()
772 * - When entering this path by tmigr_new_timer(), @evt->ignore in tmigr_update_events()
774 * - tmigr_inactive_up() takes care of the propagation by in tmigr_update_events()
786 if (ignore && !remote && group->parent) in tmigr_update_events()
789 raw_spin_lock(&group->lock); in tmigr_update_events()
792 groupstate.state = atomic_read(&group->migr_state); in tmigr_update_events()
799 if (timerqueue_node_queued(&evt->nextevt)) { in tmigr_update_events()
800 if ((evt->nextevt.expires == nextexp) && !ignore) { in tmigr_update_events()
801 /* Make sure not to miss a new CPU event with the same expiry */ in tmigr_update_events()
802 evt->cpu = first_childevt->cpu; in tmigr_update_events()
806 if (!timerqueue_del(&group->events, &evt->nextevt)) in tmigr_update_events()
807 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_update_events()
813 * KTIME_MAX) and there was no remote timer handling before or in tmigr_update_events()
818 * if a remote timer handling was executed before and the group in tmigr_update_events()
820 * an enqueued timer in the non active group. The enqueued timer in tmigr_update_events()
827 evt->nextevt.expires = nextexp; in tmigr_update_events()
828 evt->cpu = first_childevt->cpu; in tmigr_update_events()
830 if (timerqueue_add(&group->events, &evt->nextevt)) in tmigr_update_events()
831 WRITE_ONCE(group->next_expiry, nextexp); in tmigr_update_events()
835 if (!group->parent && (groupstate.migrator == TMIGR_NONE)) { in tmigr_update_events()
839 * Nothing to do when update was done during remote timer in tmigr_update_events()
840 * handling. First timer in top level group which needs to be in tmigr_update_events()
851 * arming it on the CPU if the new event is earlier. Not sure if in tmigr_update_events()
854 data->firstexp = tmigr_next_groupevt_expires(group); in tmigr_update_events()
861 raw_spin_unlock(&group->lock); in tmigr_update_events()
864 raw_spin_unlock(&child->lock); in tmigr_update_events()
877 * Returns the expiry of the next timer that needs to be handled. KTIME_MAX is
878 * returned, if an active CPU will handle all the timer migration hierarchy
885 .evt = &tmc->cpuevt }; in tmigr_new_timer()
887 lockdep_assert_held(&tmc->lock); in tmigr_new_timer()
889 if (tmc->remote) in tmigr_new_timer()
894 tmc->cpuevt.ignore = false; in tmigr_new_timer()
903 static void tmigr_handle_remote_cpu(unsigned int cpu, u64 now, in tmigr_handle_remote_cpu() argument
910 tmc = per_cpu_ptr(&tmigr_cpu, cpu); in tmigr_handle_remote_cpu()
912 raw_spin_lock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
915 * If the remote CPU is offline then the timers have been migrated to in tmigr_handle_remote_cpu()
916 * another CPU. in tmigr_handle_remote_cpu()
918 * If tmigr_cpu::remote is set, at the moment another CPU already in tmigr_handle_remote_cpu()
919 * expires the timers of the remote CPU. in tmigr_handle_remote_cpu()
921 * If tmigr_event::ignore is set, then the CPU returns from idle and in tmigr_handle_remote_cpu()
925 * updated and there are no timers to expire right now. The CPU which in tmigr_handle_remote_cpu()
929 if (!tmc->online || tmc->remote || tmc->cpuevt.ignore || in tmigr_handle_remote_cpu()
930 now < tmc->cpuevt.nextevt.expires) { in tmigr_handle_remote_cpu()
931 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
937 tmc->remote = true; in tmigr_handle_remote_cpu()
938 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_handle_remote_cpu()
940 /* Drop the lock to allow the remote CPU to exit idle */ in tmigr_handle_remote_cpu()
941 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
943 if (cpu != smp_processor_id()) in tmigr_handle_remote_cpu()
944 timer_expire_remote(cpu); in tmigr_handle_remote_cpu()
947 * Lock ordering needs to be preserved - timer_base locks before tmigr in tmigr_handle_remote_cpu()
949 * the top). During fetching the next timer interrupt, also tmc->lock in tmigr_handle_remote_cpu()
951 * the CPU itself when it comes out of idle, updates the first timer in in tmigr_handle_remote_cpu()
954 * timer base locks are dropped as fast as possible: After checking in tmigr_handle_remote_cpu()
955 * whether the remote CPU went offline in the meantime and after in tmigr_handle_remote_cpu()
956 * fetching the next remote timer interrupt. Dropping the locks as fast in tmigr_handle_remote_cpu()
962 timer_lock_remote_bases(cpu); in tmigr_handle_remote_cpu()
963 raw_spin_lock(&tmc->lock); in tmigr_handle_remote_cpu()
966 * When the CPU went offline in the meantime, no hierarchy walk has to in tmigr_handle_remote_cpu()
968 * already done during marking the CPU offline in the hierarchy. in tmigr_handle_remote_cpu()
970 * When the CPU is no longer idle, the CPU takes care of the timers and in tmigr_handle_remote_cpu()
976 if (!tmc->online || !tmc->idle) { in tmigr_handle_remote_cpu()
977 timer_unlock_remote_bases(cpu); in tmigr_handle_remote_cpu()
981 /* next event of CPU */ in tmigr_handle_remote_cpu()
982 fetch_next_timer_interrupt_remote(jif, now, &tevt, cpu); in tmigr_handle_remote_cpu()
983 timer_unlock_remote_bases(cpu); in tmigr_handle_remote_cpu()
987 data.evt = &tmc->cpuevt; in tmigr_handle_remote_cpu()
991 * The update is done even when there is no 'new' global timer pending in tmigr_handle_remote_cpu()
992 * on the remote CPU (see section "Required event and timerqueue update in tmigr_handle_remote_cpu()
998 tmc->remote = false; in tmigr_handle_remote_cpu()
999 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
1011 jif = data->basej; in tmigr_handle_remote_up()
1012 now = data->now; in tmigr_handle_remote_up()
1014 childmask = data->childmask; in tmigr_handle_remote_up()
1026 raw_spin_lock_irq(&group->lock); in tmigr_handle_remote_up()
1031 unsigned int remote_cpu = evt->cpu; in tmigr_handle_remote_up()
1033 raw_spin_unlock_irq(&group->lock); in tmigr_handle_remote_up()
1043 * (group->next_expiry was updated by tmigr_next_expired_groupevt(), in tmigr_handle_remote_up()
1046 data->firstexp = group->next_expiry; in tmigr_handle_remote_up()
1048 raw_spin_unlock_irq(&group->lock); in tmigr_handle_remote_up()
1054 * tmigr_handle_remote() - Handle global timers of remote idle CPUs
1056 * Called from the timer soft interrupt with interrupts enabled.
1066 data.childmask = tmc->groupmask; in tmigr_handle_remote()
1074 if (!tmigr_check_migrator(tmc->tmgroup, tmc->groupmask)) { in tmigr_handle_remote()
1076 * If this CPU was an idle migrator, make sure to clear its wakeup in tmigr_handle_remote()
1080 if (READ_ONCE(tmc->wakeup) == KTIME_MAX) in tmigr_handle_remote()
1087 * Update @tmc->wakeup only at the end and do not reset @tmc->wakeup to in tmigr_handle_remote()
1088 * KTIME_MAX. Even if tmc->lock is not held during the whole remote in tmigr_handle_remote()
1089 * handling, tmc->wakeup is fine to be stale as it is called in in tmigr_handle_remote()
1096 raw_spin_lock_irq(&tmc->lock); in tmigr_handle_remote()
1097 WRITE_ONCE(tmc->wakeup, data.firstexp); in tmigr_handle_remote()
1098 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote()
1107 childmask = data->childmask; in tmigr_requires_handle_remote_up()
1118 * When there is a parent group and the CPU which triggered the in tmigr_requires_handle_remote_up()
1122 if (group->parent && !data->tmc_active) in tmigr_requires_handle_remote_up()
1132 data->firstexp = READ_ONCE(group->next_expiry); in tmigr_requires_handle_remote_up()
1133 if (data->now >= data->firstexp) { in tmigr_requires_handle_remote_up()
1134 data->check = true; in tmigr_requires_handle_remote_up()
1138 raw_spin_lock(&group->lock); in tmigr_requires_handle_remote_up()
1139 data->firstexp = group->next_expiry; in tmigr_requires_handle_remote_up()
1140 if (data->now >= group->next_expiry) { in tmigr_requires_handle_remote_up()
1141 data->check = true; in tmigr_requires_handle_remote_up()
1142 raw_spin_unlock(&group->lock); in tmigr_requires_handle_remote_up()
1145 raw_spin_unlock(&group->lock); in tmigr_requires_handle_remote_up()
1152 * tmigr_requires_handle_remote() - Check the need of remote timer handling
1167 data.childmask = tmc->groupmask; in tmigr_requires_handle_remote()
1169 data.tmc_active = !tmc->idle; in tmigr_requires_handle_remote()
1173 * If the CPU is active, walk the hierarchy to check whether a remote in tmigr_requires_handle_remote()
1176 * Check is done lockless as interrupts are disabled and @tmc->idle is in tmigr_requires_handle_remote()
1177 * set only by the local CPU. in tmigr_requires_handle_remote()
1179 if (!tmc->idle) { in tmigr_requires_handle_remote()
1186 * When the CPU is idle, compare @tmc->wakeup with @data.now. The lock in tmigr_requires_handle_remote()
1192 if (data.now >= READ_ONCE(tmc->wakeup)) in tmigr_requires_handle_remote()
1195 raw_spin_lock(&tmc->lock); in tmigr_requires_handle_remote()
1196 if (data.now >= tmc->wakeup) in tmigr_requires_handle_remote()
1198 raw_spin_unlock(&tmc->lock); in tmigr_requires_handle_remote()
1205 * tmigr_cpu_new_timer() - enqueue next global timer into hierarchy (idle tmc)
1206 * @nextexp: Next expiry of global timer (or KTIME_MAX if not)
1208 * The CPU is already deactivated in the timer migration
1210 * and thereby the timer idle path is executed once more. @tmc->wakeup
1211 * holds the first timer, when the timer migration hierarchy is
1214 * Returns the first timer that needs to be handled by this CPU or KTIME_MAX if
1225 raw_spin_lock(&tmc->lock); in tmigr_cpu_new_timer()
1227 ret = READ_ONCE(tmc->wakeup); in tmigr_cpu_new_timer()
1229 if (nextexp != tmc->cpuevt.nextevt.expires || in tmigr_cpu_new_timer()
1230 tmc->cpuevt.ignore) { in tmigr_cpu_new_timer()
1236 WRITE_ONCE(tmc->wakeup, ret); in tmigr_cpu_new_timer()
1240 raw_spin_unlock(&tmc->lock); in tmigr_cpu_new_timer()
1252 childmask = data->childmask; in tmigr_inactive_up()
1261 curstate.state = atomic_read_acquire(&group->migr_state); in tmigr_inactive_up()
1265 childstate.state = atomic_read(&child->migr_state); in tmigr_inactive_up()
1299 if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)) { in tmigr_inactive_up()
1313 data->remote = false; in tmigr_inactive_up()
1325 .evt = &tmc->cpuevt, in __tmigr_cpu_deactivate()
1326 .childmask = tmc->groupmask }; in __tmigr_cpu_deactivate()
1329 * If nextexp is KTIME_MAX, the CPU event will be ignored because the in __tmigr_cpu_deactivate()
1330 * local timer expires before the global timer, no global timer is set in __tmigr_cpu_deactivate()
1331 * or CPU goes offline. in __tmigr_cpu_deactivate()
1334 tmc->cpuevt.ignore = false; in __tmigr_cpu_deactivate()
1341 * tmigr_cpu_deactivate() - Put current CPU into inactive state
1342 * @nextexp: The next global timer expiry of the current CPU
1346 * Return: the next event expiry of the current CPU or the next event expiry
1347 * from the hierarchy if this CPU is the top level migrator or the hierarchy is
1358 raw_spin_lock(&tmc->lock); in tmigr_cpu_deactivate()
1362 tmc->idle = true; in tmigr_cpu_deactivate()
1368 WRITE_ONCE(tmc->wakeup, ret); in tmigr_cpu_deactivate()
1371 raw_spin_unlock(&tmc->lock); in tmigr_cpu_deactivate()
1376 * tmigr_quick_check() - Quick forecast of next tmigr event when CPU wants to
1378 * @nextevt: The next global timer expiry of the current CPU
1381 * * KTIME_MAX - when it is probable that nothing has to be done (not
1385 * * nextevt - when CPU is offline and has to handle timer on its own
1389 * * next_expiry - value of lowest expiry encountered while walking groups
1396 struct tmigr_group *group = tmc->tmgroup; in tmigr_quick_check()
1401 if (WARN_ON_ONCE(tmc->idle)) in tmigr_quick_check()
1404 if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->groupmask)) in tmigr_quick_check()
1412 * Since current CPU is active, events may not be sorted in tmigr_quick_check()
1413 * from bottom to the top because the CPU's event is ignored in tmigr_quick_check()
1417 nextevt = min_t(u64, nextevt, READ_ONCE(group->next_expiry)); in tmigr_quick_check()
1418 if (!group->parent) in tmigr_quick_check()
1421 group = group->parent; in tmigr_quick_check()
1428 * tmigr_trigger_active() - trigger a CPU to become active again
1430 * This function is executed on a CPU which is part of cpu_online_mask, when the
1431 * last active CPU in the hierarchy is offlining. With this, it is ensured that
1432 * the other CPU is active and takes over the migrator duty.
1438 WARN_ON_ONCE(!tmc->online || tmc->idle); in tmigr_trigger_active()
1443 static int tmigr_cpu_offline(unsigned int cpu) in tmigr_cpu_offline() argument
1449 raw_spin_lock_irq(&tmc->lock); in tmigr_cpu_offline()
1450 tmc->online = false; in tmigr_cpu_offline()
1451 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_cpu_offline()
1454 * CPU has to handle the local events on his own, when on the way to in tmigr_cpu_offline()
1459 raw_spin_unlock_irq(&tmc->lock); in tmigr_cpu_offline()
1462 migrator = cpumask_any_but(cpu_online_mask, cpu); in tmigr_cpu_offline()
1469 static int tmigr_cpu_online(unsigned int cpu) in tmigr_cpu_online() argument
1473 /* Check whether CPU data was successfully initialized */ in tmigr_cpu_online()
1474 if (WARN_ON_ONCE(!tmc->tmgroup)) in tmigr_cpu_online()
1475 return -EINVAL; in tmigr_cpu_online()
1477 raw_spin_lock_irq(&tmc->lock); in tmigr_cpu_online()
1479 tmc->idle = timer_base_is_idle(); in tmigr_cpu_online()
1480 if (!tmc->idle) in tmigr_cpu_online()
1482 tmc->online = true; in tmigr_cpu_online()
1483 raw_spin_unlock_irq(&tmc->lock); in tmigr_cpu_online()
1492 raw_spin_lock_init(&group->lock); in tmigr_init_group()
1494 group->level = lvl; in tmigr_init_group()
1495 group->numa_node = lvl < tmigr_crossnode_level ? node : NUMA_NO_NODE; in tmigr_init_group()
1497 group->num_children = 0; in tmigr_init_group()
1502 atomic_set(&group->migr_state, s.state); in tmigr_init_group()
1505 * If this is a new top-level, prepare its groupmask in advance. in tmigr_init_group()
1506 * This avoids accidents where yet another new top-level is in tmigr_init_group()
1510 group->groupmask = BIT(0); in tmigr_init_group()
1516 group->num_children = 1; in tmigr_init_group()
1519 timerqueue_init_head(&group->events); in tmigr_init_group()
1520 timerqueue_init(&group->groupevt.nextevt); in tmigr_init_group()
1521 group->groupevt.nextevt.expires = KTIME_MAX; in tmigr_init_group()
1522 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_init_group()
1523 group->groupevt.ignore = true; in tmigr_init_group()
1526 static struct tmigr_group *tmigr_get_group(unsigned int cpu, int node, in tmigr_get_group() argument
1539 if (lvl < tmigr_crossnode_level && tmp->numa_node != node) in tmigr_get_group()
1543 if (tmp->num_children >= TMIGR_CHILDREN_PER_GROUP) in tmigr_get_group()
1547 * TODO: A possible further improvement: Make sure that all CPU in tmigr_get_group()
1563 return ERR_PTR(-ENOMEM); in tmigr_get_group()
1568 list_add(&group->list, &tmigr_level_list[lvl]); in tmigr_get_group()
1579 raw_spin_lock_irq(&child->lock); in tmigr_connect_child_parent()
1580 raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING); in tmigr_connect_child_parent()
1585 * case groupmask is pre-initialized and @child already in tmigr_connect_child_parent()
1587 * CPU going up. in tmigr_connect_child_parent()
1589 WARN_ON_ONCE(child->groupmask != BIT(0) || parent->num_children != 2); in tmigr_connect_child_parent()
1591 /* Adding @child for the CPU going up to @parent. */ in tmigr_connect_child_parent()
1592 child->groupmask = BIT(parent->num_children++); in tmigr_connect_child_parent()
1597 * racing CPU entering/exiting idle. This RELEASE barrier enforces an in tmigr_connect_child_parent()
1600 smp_store_release(&child->parent, parent); in tmigr_connect_child_parent()
1602 raw_spin_unlock(&parent->lock); in tmigr_connect_child_parent()
1603 raw_spin_unlock_irq(&child->lock); in tmigr_connect_child_parent()
1618 * the new online CPU comes active. in tmigr_connect_child_parent()
1628 * already connected and !idle CPU. Even if all other CPUs go idle, in tmigr_connect_child_parent()
1629 * the CPU executing the setup will be responsible up to current top in tmigr_connect_child_parent()
1634 data.childmask = child->groupmask; in tmigr_connect_child_parent()
1642 WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); in tmigr_connect_child_parent()
1645 static int tmigr_setup_groups(unsigned int cpu, unsigned int node) in tmigr_setup_groups() argument
1653 return -ENOMEM; in tmigr_setup_groups()
1656 group = tmigr_get_group(cpu, node, i); in tmigr_setup_groups()
1673 if (group->parent || list_is_singular(&tmigr_level_list[i - 1])) in tmigr_setup_groups()
1679 WARN_ON_ONCE(!err && !group->parent && !list_is_singular(&tmigr_level_list[top])); in tmigr_setup_groups()
1682 group = stack[--i]; in tmigr_setup_groups()
1685 list_del(&group->list); in tmigr_setup_groups()
1690 WARN_ON_ONCE(i != group->level); in tmigr_setup_groups()
1693 * Update tmc -> group / child -> group connection in tmigr_setup_groups()
1696 struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); in tmigr_setup_groups()
1698 raw_spin_lock_irq(&group->lock); in tmigr_setup_groups()
1700 tmc->tmgroup = group; in tmigr_setup_groups()
1701 tmc->groupmask = BIT(group->num_children++); in tmigr_setup_groups()
1703 raw_spin_unlock_irq(&group->lock); in tmigr_setup_groups()
1710 child = stack[i - 1]; in tmigr_setup_groups()
1725 * CPU's child group and pre-accounted the old root. in tmigr_setup_groups()
1727 if (group->num_children == 2 && list_is_singular(lvllist)) { in tmigr_setup_groups()
1729 * The target CPU must never do the prepare work, except in tmigr_setup_groups()
1730 * on early boot when the boot CPU is the target. Otherwise in tmigr_setup_groups()
1735 WARN_ON_ONCE(cpu == raw_smp_processor_id()); in tmigr_setup_groups()
1737 lvllist = &tmigr_level_list[top - 1]; in tmigr_setup_groups()
1739 if (child->parent) in tmigr_setup_groups()
1752 static int tmigr_add_cpu(unsigned int cpu) in tmigr_add_cpu() argument
1754 int node = cpu_to_node(cpu); in tmigr_add_cpu()
1758 ret = tmigr_setup_groups(cpu, node); in tmigr_add_cpu()
1764 static int tmigr_cpu_prepare(unsigned int cpu) in tmigr_cpu_prepare() argument
1766 struct tmigr_cpu *tmc = per_cpu_ptr(&tmigr_cpu, cpu); in tmigr_cpu_prepare()
1770 if (tmc->tmgroup) in tmigr_cpu_prepare()
1773 raw_spin_lock_init(&tmc->lock); in tmigr_cpu_prepare()
1774 timerqueue_init(&tmc->cpuevt.nextevt); in tmigr_cpu_prepare()
1775 tmc->cpuevt.nextevt.expires = KTIME_MAX; in tmigr_cpu_prepare()
1776 tmc->cpuevt.ignore = true; in tmigr_cpu_prepare()
1777 tmc->cpuevt.cpu = cpu; in tmigr_cpu_prepare()
1778 tmc->remote = false; in tmigr_cpu_prepare()
1779 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_cpu_prepare()
1781 ret = tmigr_add_cpu(cpu); in tmigr_cpu_prepare()
1785 if (tmc->groupmask == 0) in tmigr_cpu_prepare()
1786 return -EINVAL; in tmigr_cpu_prepare()
1796 int ret = -ENOMEM; in tmigr_init()
1811 * nodes. We cannot rely on cpumask_of_node() because it only works for in tmigr_init()
1827 * If a NUMA node spawns more than one CPU level group then the next in tmigr_init()
1828 * level(s) of the hierarchy contains groups which handle all CPU groups in tmigr_init()
1842 pr_info("Timer migration: %d hierarchy levels; %d children per group;" in tmigr_init()
1860 pr_err("Timer migration setup failed\n"); in tmigr_init()