Lines Matching +full:early +full:- +full:to +full:- +full:mid

1 // SPDX-License-Identifier: GPL-2.0-only
2 #include "cgroup-internal.h"
36 * pidlist destructions need to be flushed on cgroup destruction. Use a
41 /* protects cgroup_subsys->release_agent_path */
51 /* Check also dfl_cftypes for file-less controllers, i.e. perf_event */ in cgroup1_subsys_absent()
52 return ss->legacy_cftypes == NULL && ss->dfl_cftypes; in cgroup1_subsys_absent()
56 * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
57 * @from: attach to all cgroups of a given task
58 * @tsk: the task to be attached
88 * cgroup_transfer_tasks - move tasks from one cgroup to another
89 * @to: cgroup to which the tasks will be moved
94 * is guaranteed to be either visible in the source cgroup after the
100 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) in cgroup_transfer_tasks() argument
108 if (cgroup_on_dfl(to)) in cgroup_transfer_tasks()
109 return -EINVAL; in cgroup_transfer_tasks()
111 ret = cgroup_migrate_vet_dst(to); in cgroup_transfer_tasks()
121 list_for_each_entry(link, &from->cset_links, cset_link) in cgroup_transfer_tasks()
122 cgroup_migrate_add_src(link->cset, to, &mgctx); in cgroup_transfer_tasks()
130 * Migrate tasks one-by-one until @from is empty. This fails iff in cgroup_transfer_tasks()
131 * ->can_attach() fails. in cgroup_transfer_tasks()
134 css_task_iter_start(&from->self, 0, &it); in cgroup_transfer_tasks()
138 } while (task && (task->flags & PF_EXITING)); in cgroup_transfer_tasks()
147 TRACE_CGROUP_PATH(transfer_tasks, to, task, false); in cgroup_transfer_tasks()
162 * *lots* of attached tasks. So it may need several calls to read(),
178 * to the cgroup.
182 * used to find which pidlist is wanted. doesn't change as long as
192 /* pointer to the cgroup we belong to, for list removal purposes */
199 * Used to destroy all pidlists lingering waiting for destroy timer. None
206 mutex_lock(&cgrp->pidlist_mutex); in cgroup1_pidlist_destroy_all()
207 list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links) in cgroup1_pidlist_destroy_all()
208 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0); in cgroup1_pidlist_destroy_all()
209 mutex_unlock(&cgrp->pidlist_mutex); in cgroup1_pidlist_destroy_all()
212 BUG_ON(!list_empty(&cgrp->pidlists)); in cgroup1_pidlist_destroy_all()
222 mutex_lock(&l->owner->pidlist_mutex); in cgroup_pidlist_destroy_work_fn()
229 list_del(&l->links); in cgroup_pidlist_destroy_work_fn()
230 kvfree(l->list); in cgroup_pidlist_destroy_work_fn()
231 put_pid_ns(l->key.ns); in cgroup_pidlist_destroy_work_fn()
235 mutex_unlock(&l->owner->pidlist_mutex); in cgroup_pidlist_destroy_work_fn()
240 * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
249 * edge cases first; no work needs to be done for either in pidlist_uniq()
256 while (list[src] == list[src-1]) { in pidlist_uniq()
261 /* dest always points to where the next unique element goes */ in pidlist_uniq()
270 * The two pid files - task and cgroup.procs - guaranteed that the result
273 * making it impossible to use, for example, single rbtree of member tasks
275 * per open file is dangerous, so cgroup had to implement shared pool of
280 return *(pid_t *)a - *(pid_t *)b; in cmppid()
290 lockdep_assert_held(&cgrp->pidlist_mutex); in cgroup_pidlist_find()
292 list_for_each_entry(l, &cgrp->pidlists, links) in cgroup_pidlist_find()
293 if (l->key.type == type && l->key.ns == ns) in cgroup_pidlist_find()
309 lockdep_assert_held(&cgrp->pidlist_mutex); in cgroup_pidlist_find_create()
320 INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn); in cgroup_pidlist_find_create()
321 l->key.type = type; in cgroup_pidlist_find_create()
323 l->key.ns = get_pid_ns(task_active_pid_ns(current)); in cgroup_pidlist_find_create()
324 l->owner = cgrp; in cgroup_pidlist_find_create()
325 list_add(&l->links, &cgrp->pidlists); in cgroup_pidlist_find_create()
342 lockdep_assert_held(&cgrp->pidlist_mutex); in pidlist_array_load()
346 * enough space - tough. This race is indistinguishable to the in pidlist_array_load()
353 return -ENOMEM; in pidlist_array_load()
355 css_task_iter_start(&cgrp->self, 0, &it); in pidlist_array_load()
364 if (pid > 0) /* make sure to only use valid results */ in pidlist_array_load()
376 return -ENOMEM; in pidlist_array_load()
380 kvfree(l->list); in pidlist_array_load()
381 l->list = array; in pidlist_array_load()
382 l->length = length; in pidlist_array_load()
389 * next pid to display; the seq_file iterator is a pointer to the pid
390 * in the cgroup->l->list array.
396 * Initially we receive a position value that corresponds to in cgroup_pidlist_start()
398 * after a seek to the start). Use a binary-search to find the in cgroup_pidlist_start()
399 * next pid to display, if any in cgroup_pidlist_start()
401 struct kernfs_open_file *of = s->private; in cgroup_pidlist_start()
402 struct cgroup_file_ctx *ctx = of->priv; in cgroup_pidlist_start()
403 struct cgroup *cgrp = seq_css(s)->cgroup; in cgroup_pidlist_start()
405 enum cgroup_filetype type = seq_cft(s)->private; in cgroup_pidlist_start()
409 mutex_lock(&cgrp->pidlist_mutex); in cgroup_pidlist_start()
412 * !NULL @ctx->procs1.pidlist indicates that this isn't the first in cgroup_pidlist_start()
414 * that. Look for it. Note that @ctx->procs1.pidlist can't be used in cgroup_pidlist_start()
417 if (ctx->procs1.pidlist) in cgroup_pidlist_start()
418 ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); in cgroup_pidlist_start()
424 if (!ctx->procs1.pidlist) { in cgroup_pidlist_start()
425 ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); in cgroup_pidlist_start()
429 l = ctx->procs1.pidlist; in cgroup_pidlist_start()
432 int end = l->length; in cgroup_pidlist_start()
435 int mid = (index + end) / 2; in cgroup_pidlist_start() local
436 if (l->list[mid] == pid) { in cgroup_pidlist_start()
437 index = mid; in cgroup_pidlist_start()
439 } else if (l->list[mid] < pid) in cgroup_pidlist_start()
440 index = mid + 1; in cgroup_pidlist_start()
442 end = mid; in cgroup_pidlist_start()
446 if (index >= l->length) in cgroup_pidlist_start()
448 /* Update the abstract position to be the actual pid that we found */ in cgroup_pidlist_start()
449 iter = l->list + index; in cgroup_pidlist_start()
456 struct kernfs_open_file *of = s->private; in cgroup_pidlist_stop()
457 struct cgroup_file_ctx *ctx = of->priv; in cgroup_pidlist_stop()
458 struct cgroup_pidlist *l = ctx->procs1.pidlist; in cgroup_pidlist_stop()
461 mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, in cgroup_pidlist_stop()
463 mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex); in cgroup_pidlist_stop()
468 struct kernfs_open_file *of = s->private; in cgroup_pidlist_next()
469 struct cgroup_file_ctx *ctx = of->priv; in cgroup_pidlist_next()
470 struct cgroup_pidlist *l = ctx->procs1.pidlist; in cgroup_pidlist_next()
472 pid_t *end = l->list + l->length; in cgroup_pidlist_next()
474 * Advance to the next pid in the array. If this goes off the in cgroup_pidlist_next()
504 cgrp = cgroup_kn_lock_live(of->kn, false); in __cgroup1_procs_write()
506 return -ENODEV; in __cgroup1_procs_write()
515 * to check permissions on one of them. Check permissions using the in __cgroup1_procs_write()
516 * credentials from file open to protect against inherited fd attacks. in __cgroup1_procs_write()
518 cred = of->file->f_cred; in __cgroup1_procs_write()
520 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && in __cgroup1_procs_write()
521 !uid_eq(cred->euid, tcred->uid) && in __cgroup1_procs_write()
522 !uid_eq(cred->euid, tcred->suid)) in __cgroup1_procs_write()
523 ret = -EACCES; in __cgroup1_procs_write()
533 cgroup_kn_unlock(of->kn); in __cgroup1_procs_write()
556 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); in cgroup_release_agent_write()
560 * require capabilities to set release agent. in cgroup_release_agent_write()
562 ctx = of->priv; in cgroup_release_agent_write()
563 if ((ctx->ns->user_ns != &init_user_ns) || in cgroup_release_agent_write()
564 !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN)) in cgroup_release_agent_write()
565 return -EPERM; in cgroup_release_agent_write()
567 cgrp = cgroup_kn_lock_live(of->kn, false); in cgroup_release_agent_write()
569 return -ENODEV; in cgroup_release_agent_write()
571 strscpy(cgrp->root->release_agent_path, strstrip(buf), in cgroup_release_agent_write()
572 sizeof(cgrp->root->release_agent_path)); in cgroup_release_agent_write()
574 cgroup_kn_unlock(of->kn); in cgroup_release_agent_write()
580 struct cgroup *cgrp = seq_css(seq)->cgroup; in cgroup_release_agent_show()
583 seq_puts(seq, cgrp->root->release_agent_path); in cgroup_release_agent_show()
598 return notify_on_release(css->cgroup); in cgroup_read_notify_on_release()
605 set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); in cgroup_write_notify_on_release()
607 clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags); in cgroup_write_notify_on_release()
614 return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); in cgroup_clone_children_read()
621 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); in cgroup_clone_children_write()
623 clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags); in cgroup_clone_children_write()
667 .max_write_len = PATH_MAX - 1,
680 * Grab the subsystems state racily. No need to add avenue to in proc_cgroupstats_show()
688 ss->legacy_name, ss->root->hierarchy_id, in proc_cgroupstats_show()
689 atomic_read(&ss->root->nr_cgrps), in proc_cgroupstats_show()
697 * cgroupstats_build - build and fill cgroupstats
698 * @stats: cgroupstats to fill information into
699 * @dentry: A dentry entry belonging to the cgroup for which stats have
702 * Build and fill cgroupstats so that taskstats can export it to user
714 /* it should be kernfs_node belonging to cgroupfs and is a directory */ in cgroupstats_build()
715 if (dentry->d_sb->s_type != &cgroup_fs_type || !kn || in cgroupstats_build()
717 return -EINVAL; in cgroupstats_build()
721 * @kn->priv's validity. For this and css_tryget_online_from_dir(), in cgroupstats_build()
722 * @kn->priv is RCU safe. Let's do the RCU dancing. in cgroupstats_build()
725 cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); in cgroupstats_build()
728 return -ENOENT; in cgroupstats_build()
732 css_task_iter_start(&cgrp->self, 0, &it); in cgroupstats_build()
734 switch (READ_ONCE(tsk->__state)) { in cgroupstats_build()
736 stats->nr_running++; in cgroupstats_build()
739 stats->nr_sleeping++; in cgroupstats_build()
742 stats->nr_uninterruptible++; in cgroupstats_build()
745 stats->nr_stopped++; in cgroupstats_build()
748 if (tsk->in_iowait) in cgroupstats_build()
749 stats->nr_io_wait++; in cgroupstats_build()
762 !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp)) in cgroup1_check_for_release()
763 schedule_work(&cgrp->release_agent_work); in cgroup1_check_for_release()
769 * relative to the root of cgroup file system) as the argument.
771 * Most likely, this user command will try to rmdir this cgroup.
774 * attached to this cgroup before it is removed, or that some other
778 * to continue to serve a useful existence. Next time it's released,
781 * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
785 * release agent task. We don't bother to wait because the caller of
797 /* snoop agent path and exit early if empty */ in cgroup1_release_agent()
798 if (!cgrp->root->release_agent_path[0]) in cgroup1_release_agent()
808 strscpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX); in cgroup1_release_agent()
833 * cgroup_rename - Only allow simple rename of directories in place.
838 struct cgroup *cgrp = kn->priv; in cgroup1_rename()
841 /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */ in cgroup1_rename()
843 return -EINVAL; in cgroup1_rename()
846 return -ENOTDIR; in cgroup1_rename()
847 if (kn->parent != new_parent) in cgroup1_rename()
848 return -EIO; in cgroup1_rename()
878 if (root->subsys_mask & (1 << ssid)) in cgroup1_show_options()
879 seq_show_option(seq, ss->legacy_name, NULL); in cgroup1_show_options()
880 if (root->flags & CGRP_ROOT_NOPREFIX) in cgroup1_show_options()
882 if (root->flags & CGRP_ROOT_XATTR) in cgroup1_show_options()
884 if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) in cgroup1_show_options()
886 if (root->flags & CGRP_ROOT_FAVOR_DYNMODS) in cgroup1_show_options()
890 if (strlen(root->release_agent_path)) in cgroup1_show_options()
892 root->release_agent_path); in cgroup1_show_options()
895 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags)) in cgroup1_show_options()
897 if (strlen(root->name)) in cgroup1_show_options()
898 seq_show_option(seq, "name", root->name); in cgroup1_show_options()
937 if (opt == -ENOPARAM) { in cgroup1_parse_param()
941 if (ret != -ENOPARAM) in cgroup1_parse_param()
944 if (strcmp(param->key, ss->legacy_name) || in cgroup1_parse_param()
949 param->key); in cgroup1_parse_param()
950 ctx->subsys_mask |= (1 << i); in cgroup1_parse_param()
953 return invalfc(fc, "Unknown subsys name '%s'", param->key); in cgroup1_parse_param()
961 ctx->none = true; in cgroup1_parse_param()
964 ctx->all_ss = true; in cgroup1_parse_param()
967 ctx->flags |= CGRP_ROOT_NOPREFIX; in cgroup1_parse_param()
970 ctx->cpuset_clone_children = true; in cgroup1_parse_param()
973 ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; in cgroup1_parse_param()
976 ctx->flags |= CGRP_ROOT_XATTR; in cgroup1_parse_param()
979 ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS; in cgroup1_parse_param()
982 ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS; in cgroup1_parse_param()
986 if (ctx->release_agent) in cgroup1_parse_param()
990 * require capabilities to set release agent. in cgroup1_parse_param()
992 if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) in cgroup1_parse_param()
994 ctx->release_agent = param->string; in cgroup1_parse_param()
995 param->string = NULL; in cgroup1_parse_param()
1000 return -ENOENT; in cgroup1_parse_param()
1002 if (!param->size) in cgroup1_parse_param()
1004 if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) in cgroup1_parse_param()
1006 /* Must match [\w.-]+ */ in cgroup1_parse_param()
1007 for (i = 0; i < param->size; i++) { in cgroup1_parse_param()
1008 char c = param->string[i]; in cgroup1_parse_param()
1011 if ((c == '.') || (c == '-') || (c == '_')) in cgroup1_parse_param()
1016 if (ctx->name) in cgroup1_parse_param()
1018 ctx->name = param->string; in cgroup1_parse_param()
1019 param->string = NULL; in cgroup1_parse_param()
1041 ctx->subsys_mask &= enabled; in check_cgroupfs_options()
1045 * let's default to 'all'. in check_cgroupfs_options()
1047 if (!ctx->subsys_mask && !ctx->none && !ctx->name) in check_cgroupfs_options()
1048 ctx->all_ss = true; in check_cgroupfs_options()
1050 if (ctx->all_ss) { in check_cgroupfs_options()
1052 if (ctx->subsys_mask) in check_cgroupfs_options()
1055 ctx->subsys_mask = enabled; in check_cgroupfs_options()
1059 * We either have to specify by name or by subsystems. (So all in check_cgroupfs_options()
1062 if (!ctx->subsys_mask && !ctx->name) in check_cgroupfs_options()
1070 if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) in check_cgroupfs_options()
1074 if (ctx->subsys_mask && ctx->none) in check_cgroupfs_options()
1083 struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb); in cgroup1_reconfigure()
1095 if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent) in cgroup1_reconfigure()
1097 task_tgid_nr(current), current->comm); in cgroup1_reconfigure()
1099 added_mask = ctx->subsys_mask & ~root->subsys_mask; in cgroup1_reconfigure()
1100 removed_mask = root->subsys_mask & ~ctx->subsys_mask; in cgroup1_reconfigure()
1102 /* Don't allow flags or name to change at remount */ in cgroup1_reconfigure()
1103 if ((ctx->flags ^ root->flags) || in cgroup1_reconfigure()
1104 (ctx->name && strcmp(ctx->name, root->name))) { in cgroup1_reconfigure()
1106 ctx->flags, ctx->name ?: "", root->flags, root->name); in cgroup1_reconfigure()
1107 ret = -EINVAL; in cgroup1_reconfigure()
1112 if (!list_empty(&root->cgrp.self.children)) { in cgroup1_reconfigure()
1113 ret = -EBUSY; in cgroup1_reconfigure()
1123 if (ctx->release_agent) { in cgroup1_reconfigure()
1125 strcpy(root->release_agent_path, ctx->release_agent); in cgroup1_reconfigure()
1145 * The guts of cgroup1 mount - find or create cgroup_root to use.
1146 * Called with cgroup_mutex held; returns 0 on success, -E... on
1147 * error and positive - in case when the candidate is busy dying.
1148 * On success it stashes a reference to cgroup_root into given
1167 * dying subsystems. We just need to ensure that the ones in cgroup1_root_to_use()
1172 if (!(ctx->subsys_mask & (1 << i)) || in cgroup1_root_to_use()
1173 ss->root == &cgrp_dfl_root) in cgroup1_root_to_use()
1176 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) in cgroup1_root_to_use()
1178 cgroup_put(&ss->root->cgrp); in cgroup1_root_to_use()
1192 if (ctx->name) { in cgroup1_root_to_use()
1193 if (strcmp(ctx->name, root->name)) in cgroup1_root_to_use()
1202 if ((ctx->subsys_mask || ctx->none) && in cgroup1_root_to_use()
1203 (ctx->subsys_mask != root->subsys_mask)) { in cgroup1_root_to_use()
1206 return -EBUSY; in cgroup1_root_to_use()
1209 if (root->flags ^ ctx->flags) in cgroup1_root_to_use()
1212 ctx->root = root; in cgroup1_root_to_use()
1221 if (!ctx->subsys_mask && !ctx->none) in cgroup1_root_to_use()
1225 if (ctx->ns != &init_cgroup_ns) in cgroup1_root_to_use()
1226 return -EPERM; in cgroup1_root_to_use()
1230 return -ENOMEM; in cgroup1_root_to_use()
1232 ctx->root = root; in cgroup1_root_to_use()
1235 ret = cgroup_setup_root(root, ctx->subsys_mask); in cgroup1_root_to_use()
1237 cgroup_favor_dynmods(root, ctx->flags & CGRP_ROOT_FAVOR_DYNMODS); in cgroup1_root_to_use()
1249 /* Check if the caller has permission to mount. */ in cgroup1_get_tree()
1250 if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN)) in cgroup1_get_tree()
1251 return -EPERM; in cgroup1_get_tree()
1256 if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt)) in cgroup1_get_tree()
1264 if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { in cgroup1_get_tree()
1277 * task_get_cgroup1 - Acquires the associated cgroup of a task within a
1284 * We limit it to cgroup1 only.
1288 struct cgroup *cgrp = ERR_PTR(-ENOENT); in task_get_cgroup1()
1297 if (root->hierarchy_id != hierarchy_id) in task_get_cgroup1()
1302 cgrp = ERR_PTR(-ENOENT); in task_get_cgroup1()
1313 * Used to destroy pidlists and separate to serve as flush domain. in cgroup1_wq_init()
1314 * Cap @max_active to 1 too. in cgroup1_wq_init()
1344 if (strcmp(token, ss->name) && in cgroup_no_v1()
1345 strcmp(token, ss->legacy_name)) in cgroup_no_v1()