xref: /aosp_15_r20/system/core/libprocessgroup/processgroup.cpp (revision 00c7fec1bb09f3284aad6a6f96d2f63dfc3650ad)
1 /*
2  *  Copyright 2014 Google, Inc
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *  Unless required by applicable law or agreed to in writing, software
11  *  distributed under the License is distributed on an "AS IS" BASIS,
12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "libprocessgroup"
19 
20 #include <assert.h>
21 #include <dirent.h>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <poll.h>
26 #include <signal.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <sys/stat.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32 
33 #include <chrono>
34 #include <cstring>
35 #include <map>
36 #include <memory>
37 #include <mutex>
38 #include <set>
39 #include <string>
40 #include <string_view>
41 #include <thread>
42 
43 #include <android-base/file.h>
44 #include <android-base/logging.h>
45 #include <android-base/properties.h>
46 #include <android-base/stringprintf.h>
47 #include <cutils/android_filesystem_config.h>
48 #include <processgroup/processgroup.h>
49 #include <task_profiles.h>
50 
51 using android::base::GetBoolProperty;
52 using android::base::StringPrintf;
53 using android::base::WriteStringToFile;
54 
55 using namespace std::chrono_literals;
56 
57 #define PROCESSGROUP_CGROUP_PROCS_FILE "cgroup.procs"
58 #define PROCESSGROUP_CGROUP_KILL_FILE "cgroup.kill"
59 #define PROCESSGROUP_CGROUP_EVENTS_FILE "cgroup.events"
60 
CgroupsAvailable()61 bool CgroupsAvailable() {
62     static bool cgroups_available = access("/proc/cgroups", F_OK) == 0;
63     return cgroups_available;
64 }
65 
CgroupGetControllerPath(const std::string & cgroup_name,std::string * path)66 bool CgroupGetControllerPath(const std::string& cgroup_name, std::string* path) {
67     auto controller = CgroupMap::GetInstance().FindController(cgroup_name);
68 
69     if (!controller.HasValue()) {
70         return false;
71     }
72 
73     if (path) {
74         *path = controller.path();
75     }
76 
77     return true;
78 }
79 
CgroupKillAvailable()80 static bool CgroupKillAvailable() {
81     static std::once_flag f;
82     static bool cgroup_kill_available = false;
83     std::call_once(f, []() {
84         std::string cg_kill;
85         CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &cg_kill);
86         // cgroup.kill is not on the root cgroup, so check a non-root cgroup that should always
87         // exist
88         cg_kill = ConvertUidToPath(cg_kill.c_str(), AID_ROOT) + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
89         cgroup_kill_available = access(cg_kill.c_str(), F_OK) == 0;
90     });
91 
92     return cgroup_kill_available;
93 }
94 
CgroupGetMemcgAppsPath(std::string * path)95 static bool CgroupGetMemcgAppsPath(std::string* path) {
96     CgroupControllerWrapper controller = CgroupMap::GetInstance().FindController("memory");
97 
98     if (!controller.HasValue()) {
99         return false;
100     }
101 
102     if (path) {
103         *path = controller.path();
104         if (controller.version() == 1) {
105             *path += "/apps";
106         }
107     }
108 
109     return true;
110 }
111 
CgroupGetControllerFromPath(const std::string & path,std::string * cgroup_name)112 bool CgroupGetControllerFromPath(const std::string& path, std::string* cgroup_name) {
113     auto controller = CgroupMap::GetInstance().FindControllerByPath(path);
114 
115     if (!controller.HasValue()) {
116         return false;
117     }
118 
119     if (cgroup_name) {
120         *cgroup_name = controller.name();
121     }
122 
123     return true;
124 }
125 
CgroupGetAttributePath(const std::string & attr_name,std::string * path)126 bool CgroupGetAttributePath(const std::string& attr_name, std::string* path) {
127     const TaskProfiles& tp = TaskProfiles::GetInstance();
128     const IProfileAttribute* attr = tp.GetAttribute(attr_name);
129 
130     if (attr == nullptr) {
131         return false;
132     }
133 
134     if (path) {
135         *path = StringPrintf("%s/%s", attr->controller()->path(), attr->file_name().c_str());
136     }
137 
138     return true;
139 }
140 
CgroupGetAttributePathForTask(const std::string & attr_name,pid_t tid,std::string * path)141 bool CgroupGetAttributePathForTask(const std::string& attr_name, pid_t tid, std::string* path) {
142     const TaskProfiles& tp = TaskProfiles::GetInstance();
143     const IProfileAttribute* attr = tp.GetAttribute(attr_name);
144 
145     if (attr == nullptr) {
146         return false;
147     }
148 
149     if (!attr->GetPathForTask(tid, path)) {
150         LOG(ERROR) << "Failed to find cgroup for tid " << tid;
151         return false;
152     }
153 
154     return true;
155 }
156 
UsePerAppMemcg()157 bool UsePerAppMemcg() {
158     bool low_ram_device = GetBoolProperty("ro.config.low_ram", false);
159     return GetBoolProperty("ro.config.per_app_memcg", low_ram_device);
160 }
161 
isMemoryCgroupSupported()162 static bool isMemoryCgroupSupported() {
163     static bool memcg_supported = CgroupMap::GetInstance().FindController("memory").IsUsable();
164 
165     return memcg_supported;
166 }
167 
DropTaskProfilesResourceCaching()168 void DropTaskProfilesResourceCaching() {
169     TaskProfiles::GetInstance().DropResourceCaching(ProfileAction::RCT_TASK);
170     TaskProfiles::GetInstance().DropResourceCaching(ProfileAction::RCT_PROCESS);
171 }
172 
SetProcessProfiles(uid_t uid,pid_t pid,const std::vector<std::string> & profiles)173 bool SetProcessProfiles(uid_t uid, pid_t pid, const std::vector<std::string>& profiles) {
174     return TaskProfiles::GetInstance().SetProcessProfiles(
175             uid, pid, std::span<const std::string>(profiles), false);
176 }
177 
SetProcessProfiles(uid_t uid,pid_t pid,std::initializer_list<std::string_view> profiles)178 bool SetProcessProfiles(uid_t uid, pid_t pid, std::initializer_list<std::string_view> profiles) {
179     return TaskProfiles::GetInstance().SetProcessProfiles(
180             uid, pid, std::span<const std::string_view>(profiles), false);
181 }
182 
SetProcessProfiles(uid_t uid,pid_t pid,std::span<const std::string_view> profiles)183 bool SetProcessProfiles(uid_t uid, pid_t pid, std::span<const std::string_view> profiles) {
184     return TaskProfiles::GetInstance().SetProcessProfiles(uid, pid, profiles, false);
185 }
186 
SetProcessProfilesCached(uid_t uid,pid_t pid,const std::vector<std::string> & profiles)187 bool SetProcessProfilesCached(uid_t uid, pid_t pid, const std::vector<std::string>& profiles) {
188     return TaskProfiles::GetInstance().SetProcessProfiles(
189             uid, pid, std::span<const std::string>(profiles), true);
190 }
191 
SetTaskProfiles(pid_t tid,const std::vector<std::string> & profiles,bool use_fd_cache)192 bool SetTaskProfiles(pid_t tid, const std::vector<std::string>& profiles, bool use_fd_cache) {
193     return TaskProfiles::GetInstance().SetTaskProfiles(tid, std::span<const std::string>(profiles),
194                                                        use_fd_cache);
195 }
196 
SetTaskProfiles(pid_t tid,std::initializer_list<std::string_view> profiles,bool use_fd_cache)197 bool SetTaskProfiles(pid_t tid, std::initializer_list<std::string_view> profiles,
198                      bool use_fd_cache) {
199     return TaskProfiles::GetInstance().SetTaskProfiles(
200             tid, std::span<const std::string_view>(profiles), use_fd_cache);
201 }
202 
SetTaskProfiles(pid_t tid,std::span<const std::string_view> profiles,bool use_fd_cache)203 bool SetTaskProfiles(pid_t tid, std::span<const std::string_view> profiles, bool use_fd_cache) {
204     return TaskProfiles::GetInstance().SetTaskProfiles(tid, profiles, use_fd_cache);
205 }
206 
207 // C wrapper for SetProcessProfiles.
208 // No need to have this in the header file because this function is specifically for crosvm. Crosvm
209 // which is written in Rust has its own declaration of this foreign function and doesn't rely on the
210 // header. See
211 // https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3574427/5/src/linux/android.rs#12
android_set_process_profiles(uid_t uid,pid_t pid,size_t num_profiles,const char * profiles[])212 extern "C" bool android_set_process_profiles(uid_t uid, pid_t pid, size_t num_profiles,
213                                              const char* profiles[]) {
214     std::vector<std::string_view> profiles_;
215     profiles_.reserve(num_profiles);
216     for (size_t i = 0; i < num_profiles; i++) {
217         profiles_.emplace_back(profiles[i]);
218     }
219     return SetProcessProfiles(uid, pid, std::span<const std::string_view>(profiles_));
220 }
221 
SetUserProfiles(uid_t uid,const std::vector<std::string> & profiles)222 bool SetUserProfiles(uid_t uid, const std::vector<std::string>& profiles) {
223     return TaskProfiles::GetInstance().SetUserProfiles(uid, std::span<const std::string>(profiles),
224                                                        false);
225 }
226 
RemoveCgroup(const char * cgroup,uid_t uid,pid_t pid)227 static int RemoveCgroup(const char* cgroup, uid_t uid, pid_t pid) {
228     auto path = ConvertUidPidToPath(cgroup, uid, pid);
229     int ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
230 
231     if (!ret && uid >= AID_ISOLATED_START && uid <= AID_ISOLATED_END) {
232         // Isolated UIDs are unlikely to be reused soon after removal,
233         // so free up the kernel resources for the UID level cgroup.
234         path = ConvertUidToPath(cgroup, uid);
235         ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
236     }
237 
238     if (ret < 0 && errno == ENOENT) {
239         // This function is idempoetent, but still warn here.
240         LOG(WARNING) << "RemoveCgroup: " << path << " does not exist.";
241         ret = 0;
242     }
243 
244     return ret;
245 }
246 
RemoveEmptyUidCgroups(const std::string & uid_path)247 static bool RemoveEmptyUidCgroups(const std::string& uid_path) {
248     std::unique_ptr<DIR, decltype(&closedir)> uid(opendir(uid_path.c_str()), closedir);
249     bool empty = true;
250     if (uid != NULL) {
251         dirent* dir;
252         while ((dir = readdir(uid.get())) != nullptr) {
253             if (dir->d_type != DT_DIR) {
254                 continue;
255             }
256 
257             if (!std::string_view(dir->d_name).starts_with("pid_")) {
258                 continue;
259             }
260 
261             auto path = StringPrintf("%s/%s", uid_path.c_str(), dir->d_name);
262             LOG(VERBOSE) << "Removing " << path;
263             if (rmdir(path.c_str()) == -1) {
264                 if (errno != EBUSY) {
265                     PLOG(WARNING) << "Failed to remove " << path;
266                 }
267                 empty = false;
268             }
269         }
270     }
271     return empty;
272 }
273 
removeAllEmptyProcessGroups()274 void removeAllEmptyProcessGroups() {
275     LOG(VERBOSE) << "removeAllEmptyProcessGroups()";
276 
277     std::vector<std::string> cgroups;
278     std::string path, memcg_apps_path;
279 
280     if (CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &path)) {
281         cgroups.push_back(path);
282     }
283     if (CgroupGetMemcgAppsPath(&memcg_apps_path) && memcg_apps_path != path) {
284         cgroups.push_back(memcg_apps_path);
285     }
286 
287     for (std::string cgroup_root_path : cgroups) {
288         std::unique_ptr<DIR, decltype(&closedir)> root(opendir(cgroup_root_path.c_str()), closedir);
289         if (root == NULL) {
290             PLOG(ERROR) << __func__ << " failed to open " << cgroup_root_path;
291         } else {
292             dirent* dir;
293             while ((dir = readdir(root.get())) != nullptr) {
294                 if (dir->d_type != DT_DIR) {
295                     continue;
296                 }
297 
298                 if (!std::string_view(dir->d_name).starts_with("uid_")) {
299                     continue;
300                 }
301 
302                 auto path = StringPrintf("%s/%s", cgroup_root_path.c_str(), dir->d_name);
303                 if (!RemoveEmptyUidCgroups(path)) {
304                     LOG(VERBOSE) << "Skip removing " << path;
305                     continue;
306                 }
307                 LOG(VERBOSE) << "Removing " << path;
308                 if (rmdir(path.c_str()) == -1 && errno != EBUSY) {
309                     PLOG(WARNING) << "Failed to remove " << path;
310                 }
311             }
312         }
313     }
314 }
315 
316 /**
317  * Process groups are primarily created by the Zygote, meaning that uid/pid groups are created by
318  * the user root. Ownership for the newly created cgroup and all of its files must thus be
319  * transferred for the user/group passed as uid/gid before system_server can properly access them.
320  */
MkdirAndChown(const std::string & path,mode_t mode,uid_t uid,gid_t gid)321 static bool MkdirAndChown(const std::string& path, mode_t mode, uid_t uid, gid_t gid) {
322     if (mkdir(path.c_str(), mode) == -1) {
323         if (errno == EEXIST) {
324             // Directory already exists and permissions have been set at the time it was created
325             return true;
326         }
327         return false;
328     }
329 
330     auto dir = std::unique_ptr<DIR, decltype(&closedir)>(opendir(path.c_str()), closedir);
331 
332     if (dir == NULL) {
333         PLOG(ERROR) << "opendir failed for " << path;
334         goto err;
335     }
336 
337     struct dirent* dir_entry;
338     while ((dir_entry = readdir(dir.get()))) {
339         if (!strcmp("..", dir_entry->d_name)) {
340             continue;
341         }
342 
343         std::string file_path = path + "/" + dir_entry->d_name;
344 
345         if (lchown(file_path.c_str(), uid, gid) < 0) {
346             PLOG(ERROR) << "lchown failed for " << file_path;
347             goto err;
348         }
349 
350         if (fchmodat(AT_FDCWD, file_path.c_str(), mode, AT_SYMLINK_NOFOLLOW) != 0) {
351             PLOG(ERROR) << "fchmodat failed for " << file_path;
352             goto err;
353         }
354     }
355 
356     return true;
357 err:
358     int saved_errno = errno;
359     rmdir(path.c_str());
360     errno = saved_errno;
361 
362     return false;
363 }
364 
sendSignalToProcessGroup(uid_t uid,pid_t initialPid,int signal)365 bool sendSignalToProcessGroup(uid_t uid, pid_t initialPid, int signal) {
366     std::set<pid_t> pgids, pids;
367 
368     if (CgroupsAvailable()) {
369         std::string hierarchy_root_path, cgroup_v2_path;
370         CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
371         cgroup_v2_path = ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
372 
373         if (signal == SIGKILL && CgroupKillAvailable()) {
374             LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_KILL_FILE << " to SIGKILL "
375                          << cgroup_v2_path;
376 
377             // We need to kill the process group in addition to the cgroup. For normal apps they
378             // should completely overlap, but system_server kills depend on process group kills to
379             // take down apps which are in their own cgroups and not individually targeted.
380             if (kill(-initialPid, signal) == -1 && errno != ESRCH) {
381                 PLOG(WARNING) << "kill(" << -initialPid << ", " << signal << ") failed";
382             }
383 
384             const std::string killfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
385             if (WriteStringToFile("1", killfilepath)) {
386                 return true;
387             } else {
388                 PLOG(ERROR) << "Failed to write 1 to " << killfilepath;
389                 // Fallback to cgroup.procs below
390             }
391         }
392 
393         // Since cgroup.kill only sends SIGKILLs, we read cgroup.procs to find each process to
394         // signal individually. This is more costly than using cgroup.kill for SIGKILLs.
395         LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_PROCS_FILE << " to signal (" << signal
396                      << ") " << cgroup_v2_path;
397 
398         // We separate all of the pids in the cgroup into those pids that are also the leaders of
399         // process groups (stored in the pgids set) and those that are not (stored in the pids set).
400         const auto procsfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
401         std::unique_ptr<FILE, decltype(&fclose)> fp(fopen(procsfilepath.c_str(), "re"), fclose);
402         if (!fp) {
403             // This should only happen if the cgroup has already been removed with a successful call
404             // to killProcessGroup. Callers should only retry sendSignalToProcessGroup or
405             // killProcessGroup calls if they fail without ENOENT.
406             PLOG(ERROR) << "Failed to open " << procsfilepath;
407             kill(-initialPid, signal);
408             return false;
409         }
410 
411         pid_t pid;
412         bool file_is_empty = true;
413         while (fscanf(fp.get(), "%d\n", &pid) == 1 && pid >= 0) {
414             file_is_empty = false;
415             if (pid == 0) {
416                 // Should never happen...  but if it does, trying to kill this
417                 // will boomerang right back and kill us!  Let's not let that happen.
418                 LOG(WARNING)
419                         << "Yikes, we've been told to kill pid 0!  How about we don't do that?";
420                 continue;
421             }
422             pid_t pgid = getpgid(pid);
423             if (pgid == -1) PLOG(ERROR) << "getpgid(" << pid << ") failed";
424             if (pgid == pid) {
425                 pgids.emplace(pid);
426             } else {
427                 pids.emplace(pid);
428             }
429         }
430         if (!file_is_empty) {
431             // Erase all pids that will be killed when we kill the process groups.
432             for (auto it = pids.begin(); it != pids.end();) {
433                 pid_t pgid = getpgid(*it);
434                 if (pgids.count(pgid) == 1) {
435                     it = pids.erase(it);
436                 } else {
437                     ++it;
438                 }
439             }
440         }
441     }
442 
443     pgids.emplace(initialPid);
444 
445     // Kill all process groups.
446     for (const auto pgid : pgids) {
447         LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid
448                      << " as part of process cgroup " << initialPid;
449 
450         if (kill(-pgid, signal) == -1 && errno != ESRCH) {
451             PLOG(WARNING) << "kill(" << -pgid << ", " << signal << ") failed";
452         }
453     }
454 
455     // Kill remaining pids.
456     for (const auto pid : pids) {
457         LOG(VERBOSE) << "Killing pid " << pid << " in uid " << uid << " as part of process cgroup "
458                      << initialPid;
459 
460         if (kill(pid, signal) == -1 && errno != ESRCH) {
461             PLOG(WARNING) << "kill(" << pid << ", " << signal << ") failed";
462         }
463     }
464 
465     return true;
466 }
467 
468 template <typename T>
toMillisec(T && duration)469 static std::chrono::milliseconds toMillisec(T&& duration) {
470     return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
471 }
472 
473 enum class populated_status
474 {
475     populated,
476     not_populated,
477     error
478 };
479 
cgroupIsPopulated(int events_fd)480 static populated_status cgroupIsPopulated(int events_fd) {
481     const std::string POPULATED_KEY("populated ");
482     const std::string::size_type MAX_EVENTS_FILE_SIZE = 32;
483 
484     std::string buf;
485     buf.resize(MAX_EVENTS_FILE_SIZE);
486     ssize_t len = TEMP_FAILURE_RETRY(pread(events_fd, buf.data(), buf.size(), 0));
487     if (len == -1) {
488         PLOG(ERROR) << "Could not read cgroup.events: ";
489         // Potentially ENODEV if the cgroup has been removed since we opened this file, but that
490         // shouldn't have happened yet.
491         return populated_status::error;
492     }
493 
494     if (len == 0) {
495         LOG(ERROR) << "cgroup.events EOF";
496         return populated_status::error;
497     }
498 
499     buf.resize(len);
500 
501     const std::string::size_type pos = buf.find(POPULATED_KEY);
502     if (pos == std::string::npos) {
503         LOG(ERROR) << "Could not find populated key in cgroup.events";
504         return populated_status::error;
505     }
506 
507     if (pos + POPULATED_KEY.size() + 1 > len) {
508         LOG(ERROR) << "Partial read of cgroup.events";
509         return populated_status::error;
510     }
511 
512     return buf[pos + POPULATED_KEY.size()] == '1' ?
513         populated_status::populated : populated_status::not_populated;
514 }
515 
516 // The default timeout of 2200ms comes from the default number of retries in a previous
517 // implementation of this function. The default retry value was 40 for killing and 400 for cgroup
518 // removal with 5ms sleeps between each retry.
KillProcessGroup(uid_t uid,pid_t initialPid,int signal,bool once=false,std::chrono::steady_clock::time_point until=std::chrono::steady_clock::now ()+2200ms)519 static int KillProcessGroup(
520         uid_t uid, pid_t initialPid, int signal, bool once = false,
521         std::chrono::steady_clock::time_point until = std::chrono::steady_clock::now() + 2200ms) {
522     if (uid < 0) {
523         LOG(ERROR) << __func__ << ": invalid UID " << uid;
524         return -1;
525     }
526     if (initialPid <= 0) {
527         LOG(ERROR) << __func__ << ": invalid PID " << initialPid;
528         return -1;
529     }
530 
531     // Always attempt to send a kill signal to at least the initialPid, at least once, regardless of
532     // whether its cgroup exists or not. This should only be necessary if a bug results in the
533     // migration of the targeted process out of its cgroup, which we will also attempt to kill.
534     const bool signal_ret = sendSignalToProcessGroup(uid, initialPid, signal);
535 
536     if (!CgroupsAvailable() || !signal_ret) return signal_ret ? 0 : -1;
537 
538     std::string hierarchy_root_path;
539     CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
540 
541     const std::string cgroup_v2_path =
542             ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
543 
544     const std::string eventsfile = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_EVENTS_FILE;
545     android::base::unique_fd events_fd(open(eventsfile.c_str(), O_RDONLY));
546     if (events_fd.get() == -1) {
547         PLOG(WARNING) << "Error opening " << eventsfile << " for KillProcessGroup";
548         return -1;
549     }
550 
551     struct pollfd fds = {
552         .fd = events_fd,
553         .events = POLLPRI,
554     };
555 
556     const std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
557 
558     // The primary reason to loop here is to capture any new forks or migrations that could occur
559     // after we send signals to the original set of processes, but before all of those processes
560     // exit and the cgroup becomes unpopulated, or before we remove the cgroup. We try hard to
561     // ensure this completes successfully to avoid permanent memory leaks, but we still place a
562     // large default upper bound on the amount of time we spend in this loop. The amount of CPU
563     // contention, and the amount of work that needs to be done in do_exit for each process
564     // determines how long this will take.
565     int ret;
566     do {
567         populated_status populated;
568         while ((populated = cgroupIsPopulated(events_fd.get())) == populated_status::populated &&
569                std::chrono::steady_clock::now() < until) {
570 
571             sendSignalToProcessGroup(uid, initialPid, signal);
572             if (once) {
573                 populated = cgroupIsPopulated(events_fd.get());
574                 break;
575             }
576 
577             const std::chrono::steady_clock::time_point poll_start =
578                     std::chrono::steady_clock::now();
579 
580             if (poll_start < until)
581                 ret = TEMP_FAILURE_RETRY(poll(&fds, 1, toMillisec(until - poll_start).count()));
582 
583             if (ret == -1) {
584                 // Fallback to 5ms sleeps if poll fails
585                 PLOG(ERROR) << "Poll on " << eventsfile << "failed";
586                 const std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
587                 if (now < until)
588                     std::this_thread::sleep_for(std::min(5ms, toMillisec(until - now)));
589             }
590 
591             LOG(VERBOSE) << "Waited "
592                          << toMillisec(std::chrono::steady_clock::now() - poll_start).count()
593                          << " ms for " << eventsfile << " poll";
594         }
595 
596         const std::chrono::milliseconds kill_duration =
597                 toMillisec(std::chrono::steady_clock::now() - start);
598 
599         if (populated == populated_status::populated) {
600             LOG(WARNING) << "Still waiting on process(es) to exit for cgroup " << cgroup_v2_path
601                          << " after " << kill_duration.count() << " ms";
602             // We'll still try the cgroup removal below which we expect to log an error.
603         } else if (populated == populated_status::not_populated) {
604             LOG(VERBOSE) << "Killed all processes under cgroup " << cgroup_v2_path
605                          << " after " << kill_duration.count() << " ms";
606         }
607 
608         ret = RemoveCgroup(hierarchy_root_path.c_str(), uid, initialPid);
609         if (ret)
610             PLOG(ERROR) << "Unable to remove cgroup " << cgroup_v2_path;
611         else
612             LOG(INFO) << "Removed cgroup " << cgroup_v2_path;
613 
614         if (isMemoryCgroupSupported() && UsePerAppMemcg()) {
615             // This per-application memcg v1 case should eventually be removed after migration to
616             // memcg v2.
617             std::string memcg_apps_path;
618             if (CgroupGetMemcgAppsPath(&memcg_apps_path) &&
619                 (ret = RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid)) < 0) {
620                 const auto memcg_v1_cgroup_path =
621                         ConvertUidPidToPath(memcg_apps_path.c_str(), uid, initialPid);
622                 PLOG(ERROR) << "Unable to remove memcg v1 cgroup " << memcg_v1_cgroup_path;
623             }
624         }
625 
626         if (once) break;
627         if (std::chrono::steady_clock::now() >= until) break;
628     } while (ret && errno == EBUSY);
629 
630     return ret;
631 }
632 
killProcessGroup(uid_t uid,pid_t initialPid,int signal)633 int killProcessGroup(uid_t uid, pid_t initialPid, int signal) {
634     return KillProcessGroup(uid, initialPid, signal);
635 }
636 
killProcessGroupOnce(uid_t uid,pid_t initialPid,int signal)637 int killProcessGroupOnce(uid_t uid, pid_t initialPid, int signal) {
638     return KillProcessGroup(uid, initialPid, signal, true);
639 }
640 
createProcessGroupInternal(uid_t uid,pid_t initialPid,std::string cgroup,bool activate_controllers)641 static int createProcessGroupInternal(uid_t uid, pid_t initialPid, std::string cgroup,
642                                       bool activate_controllers) {
643     auto uid_path = ConvertUidToPath(cgroup.c_str(), uid);
644 
645     struct stat cgroup_stat;
646     mode_t cgroup_mode = 0750;
647     uid_t cgroup_uid = AID_SYSTEM;
648     gid_t cgroup_gid = AID_SYSTEM;
649     int ret = 0;
650 
651     if (stat(cgroup.c_str(), &cgroup_stat) < 0) {
652         PLOG(ERROR) << "Failed to get stats for " << cgroup;
653     } else {
654         cgroup_mode = cgroup_stat.st_mode;
655         cgroup_uid = cgroup_stat.st_uid;
656         cgroup_gid = cgroup_stat.st_gid;
657     }
658 
659     if (!MkdirAndChown(uid_path, cgroup_mode, cgroup_uid, cgroup_gid)) {
660         PLOG(ERROR) << "Failed to make and chown " << uid_path;
661         return -errno;
662     }
663     if (activate_controllers) {
664         if (!CgroupMap::GetInstance().ActivateControllers(uid_path)) {
665             PLOG(ERROR) << "Failed to activate controllers in " << uid_path;
666             return -errno;
667         }
668     }
669 
670     auto uid_pid_path = ConvertUidPidToPath(cgroup.c_str(), uid, initialPid);
671 
672     if (!MkdirAndChown(uid_pid_path, cgroup_mode, cgroup_uid, cgroup_gid)) {
673         PLOG(ERROR) << "Failed to make and chown " << uid_pid_path;
674         return -errno;
675     }
676 
677     auto uid_pid_procs_file = uid_pid_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
678 
679     if (!WriteStringToFile(std::to_string(initialPid), uid_pid_procs_file)) {
680         ret = -errno;
681         PLOG(ERROR) << "Failed to write '" << initialPid << "' to " << uid_pid_procs_file;
682     }
683 
684     return ret;
685 }
686 
createProcessGroup(uid_t uid,pid_t initialPid,bool memControl)687 int createProcessGroup(uid_t uid, pid_t initialPid, bool memControl) {
688     if (uid < 0) {
689         LOG(ERROR) << __func__ << ": invalid UID " << uid;
690         return -1;
691     }
692     if (initialPid <= 0) {
693         LOG(ERROR) << __func__ << ": invalid PID " << initialPid;
694         return -1;
695     }
696 
697     if (memControl && !UsePerAppMemcg()) {
698         LOG(ERROR) << "service memory controls are used without per-process memory cgroup support";
699         return -EINVAL;
700     }
701 
702     if (std::string memcg_apps_path;
703         isMemoryCgroupSupported() && UsePerAppMemcg() && CgroupGetMemcgAppsPath(&memcg_apps_path)) {
704         // Note by bvanassche: passing 'false' as fourth argument below implies that the v1
705         // hierarchy is used. It is not clear to me whether the above conditions guarantee that the
706         // v1 hierarchy is used.
707         int ret = createProcessGroupInternal(uid, initialPid, memcg_apps_path, false);
708         if (ret != 0) {
709             return ret;
710         }
711     }
712 
713     std::string cgroup;
714     CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &cgroup);
715     return createProcessGroupInternal(uid, initialPid, cgroup, true);
716 }
717 
SetProcessGroupValue(pid_t tid,const std::string & attr_name,int64_t value)718 static bool SetProcessGroupValue(pid_t tid, const std::string& attr_name, int64_t value) {
719     if (!isMemoryCgroupSupported()) {
720         LOG(ERROR) << "Memcg is not mounted.";
721         return false;
722     }
723 
724     std::string path;
725     if (!CgroupGetAttributePathForTask(attr_name, tid, &path)) {
726         LOG(ERROR) << "Failed to find attribute '" << attr_name << "'";
727         return false;
728     }
729 
730     if (!WriteStringToFile(std::to_string(value), path)) {
731         PLOG(ERROR) << "Failed to write '" << value << "' to " << path;
732         return false;
733     }
734     return true;
735 }
736 
setProcessGroupSwappiness(uid_t,pid_t pid,int swappiness)737 bool setProcessGroupSwappiness(uid_t, pid_t pid, int swappiness) {
738     return SetProcessGroupValue(pid, "MemSwappiness", swappiness);
739 }
740 
setProcessGroupSoftLimit(uid_t,pid_t pid,int64_t soft_limit_in_bytes)741 bool setProcessGroupSoftLimit(uid_t, pid_t pid, int64_t soft_limit_in_bytes) {
742     return SetProcessGroupValue(pid, "MemSoftLimit", soft_limit_in_bytes);
743 }
744 
setProcessGroupLimit(uid_t,pid_t pid,int64_t limit_in_bytes)745 bool setProcessGroupLimit(uid_t, pid_t pid, int64_t limit_in_bytes) {
746     return SetProcessGroupValue(pid, "MemLimit", limit_in_bytes);
747 }
748 
getAttributePathForTask(const std::string & attr_name,pid_t tid,std::string * path)749 bool getAttributePathForTask(const std::string& attr_name, pid_t tid, std::string* path) {
750     return CgroupGetAttributePathForTask(attr_name, tid, path);
751 }
752 
isProfileValidForProcess(const std::string & profile_name,uid_t uid,pid_t pid)753 bool isProfileValidForProcess(const std::string& profile_name, uid_t uid, pid_t pid) {
754     const TaskProfile* tp = TaskProfiles::GetInstance().GetProfile(profile_name);
755 
756     if (tp == nullptr) {
757         return false;
758     }
759 
760     return tp->IsValidForProcess(uid, pid);
761 }
762