1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright(c) 2014 Intel Mobile Communications GmbH
4 * Copyright(c) 2015 Intel Deutschland GmbH
5 *
6 * Author: Johannes Berg <[email protected]>
7 */
8 #include <linux/module.h>
9 #include <linux/device.h>
10 #include <linux/devcoredump.h>
11 #include <linux/list.h>
12 #include <linux/slab.h>
13 #include <linux/fs.h>
14 #include <linux/workqueue.h>
15
16 static struct class devcd_class;
17
18 /* global disable flag, for security purposes */
19 static bool devcd_disabled;
20
21 struct devcd_entry {
22 struct device devcd_dev;
23 void *data;
24 size_t datalen;
25 /*
26 * Here, mutex is required to serialize the calls to del_wk work between
27 * user/kernel space which happens when devcd is added with device_add()
28 * and that sends uevent to user space. User space reads the uevents,
29 * and calls to devcd_data_write() which try to modify the work which is
30 * not even initialized/queued from devcoredump.
31 *
32 *
33 *
34 * cpu0(X) cpu1(Y)
35 *
36 * dev_coredump() uevent sent to user space
37 * device_add() ======================> user space process Y reads the
38 * uevents writes to devcd fd
39 * which results into writes to
40 *
41 * devcd_data_write()
42 * mod_delayed_work()
43 * try_to_grab_pending()
44 * del_timer()
45 * debug_assert_init()
46 * INIT_DELAYED_WORK()
47 * schedule_delayed_work()
48 *
49 *
50 * Also, mutex alone would not be enough to avoid scheduling of
51 * del_wk work after it get flush from a call to devcd_free()
52 * mentioned as below.
53 *
54 * disabled_store()
55 * devcd_free()
56 * mutex_lock() devcd_data_write()
57 * flush_delayed_work()
58 * mutex_unlock()
59 * mutex_lock()
60 * mod_delayed_work()
61 * mutex_unlock()
62 * So, delete_work flag is required.
63 */
64 struct mutex mutex;
65 bool delete_work;
66 struct module *owner;
67 ssize_t (*read)(char *buffer, loff_t offset, size_t count,
68 void *data, size_t datalen);
69 void (*free)(void *data);
70 struct delayed_work del_wk;
71 struct device *failing_dev;
72 };
73
dev_to_devcd(struct device * dev)74 static struct devcd_entry *dev_to_devcd(struct device *dev)
75 {
76 return container_of(dev, struct devcd_entry, devcd_dev);
77 }
78
devcd_dev_release(struct device * dev)79 static void devcd_dev_release(struct device *dev)
80 {
81 struct devcd_entry *devcd = dev_to_devcd(dev);
82
83 devcd->free(devcd->data);
84 module_put(devcd->owner);
85
86 /*
87 * this seems racy, but I don't see a notifier or such on
88 * a struct device to know when it goes away?
89 */
90 if (devcd->failing_dev->kobj.sd)
91 sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj,
92 "devcoredump");
93
94 put_device(devcd->failing_dev);
95 kfree(devcd);
96 }
97
devcd_del(struct work_struct * wk)98 static void devcd_del(struct work_struct *wk)
99 {
100 struct devcd_entry *devcd;
101
102 devcd = container_of(wk, struct devcd_entry, del_wk.work);
103
104 device_del(&devcd->devcd_dev);
105 put_device(&devcd->devcd_dev);
106 }
107
devcd_data_read(struct file * filp,struct kobject * kobj,const struct bin_attribute * bin_attr,char * buffer,loff_t offset,size_t count)108 static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
109 const struct bin_attribute *bin_attr,
110 char *buffer, loff_t offset, size_t count)
111 {
112 struct device *dev = kobj_to_dev(kobj);
113 struct devcd_entry *devcd = dev_to_devcd(dev);
114
115 return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
116 }
117
devcd_data_write(struct file * filp,struct kobject * kobj,const struct bin_attribute * bin_attr,char * buffer,loff_t offset,size_t count)118 static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
119 const struct bin_attribute *bin_attr,
120 char *buffer, loff_t offset, size_t count)
121 {
122 struct device *dev = kobj_to_dev(kobj);
123 struct devcd_entry *devcd = dev_to_devcd(dev);
124
125 mutex_lock(&devcd->mutex);
126 if (!devcd->delete_work) {
127 devcd->delete_work = true;
128 mod_delayed_work(system_wq, &devcd->del_wk, 0);
129 }
130 mutex_unlock(&devcd->mutex);
131
132 return count;
133 }
134
135 static const struct bin_attribute devcd_attr_data =
136 __BIN_ATTR(data, 0600, devcd_data_read, devcd_data_write, 0);
137
138 static const struct bin_attribute *const devcd_dev_bin_attrs[] = {
139 &devcd_attr_data, NULL,
140 };
141
142 static const struct attribute_group devcd_dev_group = {
143 .bin_attrs_new = devcd_dev_bin_attrs,
144 };
145
146 static const struct attribute_group *devcd_dev_groups[] = {
147 &devcd_dev_group, NULL,
148 };
149
devcd_free(struct device * dev,void * data)150 static int devcd_free(struct device *dev, void *data)
151 {
152 struct devcd_entry *devcd = dev_to_devcd(dev);
153
154 mutex_lock(&devcd->mutex);
155 if (!devcd->delete_work)
156 devcd->delete_work = true;
157
158 flush_delayed_work(&devcd->del_wk);
159 mutex_unlock(&devcd->mutex);
160 return 0;
161 }
162
disabled_show(const struct class * class,const struct class_attribute * attr,char * buf)163 static ssize_t disabled_show(const struct class *class, const struct class_attribute *attr,
164 char *buf)
165 {
166 return sysfs_emit(buf, "%d\n", devcd_disabled);
167 }
168
169 /*
170 *
171 * disabled_store() worker()
172 * class_for_each_device(&devcd_class,
173 * NULL, NULL, devcd_free)
174 * ...
175 * ...
176 * while ((dev = class_dev_iter_next(&iter))
177 * devcd_del()
178 * device_del()
179 * put_device() <- last reference
180 * error = fn(dev, data) devcd_dev_release()
181 * devcd_free(dev, data) kfree(devcd)
182 * mutex_lock(&devcd->mutex);
183 *
184 *
185 * In the above diagram, it looks like disabled_store() would be racing with parallelly
186 * running devcd_del() and result in memory abort while acquiring devcd->mutex which
187 * is called after kfree of devcd memory after dropping its last reference with
188 * put_device(). However, this will not happens as fn(dev, data) runs
189 * with its own reference to device via klist_node so it is not its last reference.
190 * so, above situation would not occur.
191 */
192
disabled_store(const struct class * class,const struct class_attribute * attr,const char * buf,size_t count)193 static ssize_t disabled_store(const struct class *class, const struct class_attribute *attr,
194 const char *buf, size_t count)
195 {
196 long tmp = simple_strtol(buf, NULL, 10);
197
198 /*
199 * This essentially makes the attribute write-once, since you can't
200 * go back to not having it disabled. This is intentional, it serves
201 * as a system lockdown feature.
202 */
203 if (tmp != 1)
204 return -EINVAL;
205
206 devcd_disabled = true;
207
208 class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
209
210 return count;
211 }
212 static CLASS_ATTR_RW(disabled);
213
214 static struct attribute *devcd_class_attrs[] = {
215 &class_attr_disabled.attr,
216 NULL,
217 };
218 ATTRIBUTE_GROUPS(devcd_class);
219
220 static struct class devcd_class = {
221 .name = "devcoredump",
222 .dev_release = devcd_dev_release,
223 .dev_groups = devcd_dev_groups,
224 .class_groups = devcd_class_groups,
225 };
226
devcd_readv(char * buffer,loff_t offset,size_t count,void * data,size_t datalen)227 static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,
228 void *data, size_t datalen)
229 {
230 return memory_read_from_buffer(buffer, count, &offset, data, datalen);
231 }
232
devcd_freev(void * data)233 static void devcd_freev(void *data)
234 {
235 vfree(data);
236 }
237
238 /**
239 * dev_coredumpv - create device coredump with vmalloc data
240 * @dev: the struct device for the crashed device
241 * @data: vmalloc data containing the device coredump
242 * @datalen: length of the data
243 * @gfp: allocation flags
244 *
245 * This function takes ownership of the vmalloc'ed data and will free
246 * it when it is no longer used. See dev_coredumpm() for more information.
247 */
dev_coredumpv(struct device * dev,void * data,size_t datalen,gfp_t gfp)248 void dev_coredumpv(struct device *dev, void *data, size_t datalen,
249 gfp_t gfp)
250 {
251 dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, devcd_freev);
252 }
253 EXPORT_SYMBOL_GPL(dev_coredumpv);
254
devcd_match_failing(struct device * dev,const void * failing)255 static int devcd_match_failing(struct device *dev, const void *failing)
256 {
257 struct devcd_entry *devcd = dev_to_devcd(dev);
258
259 return devcd->failing_dev == failing;
260 }
261
262 /**
263 * devcd_free_sgtable - free all the memory of the given scatterlist table
264 * (i.e. both pages and scatterlist instances)
265 * NOTE: if two tables allocated with devcd_alloc_sgtable and then chained
266 * using the sg_chain function then that function should be called only once
267 * on the chained table
268 * @data: pointer to sg_table to free
269 */
devcd_free_sgtable(void * data)270 static void devcd_free_sgtable(void *data)
271 {
272 _devcd_free_sgtable(data);
273 }
274
275 /**
276 * devcd_read_from_sgtable - copy data from sg_table to a given buffer
277 * and return the number of bytes read
278 * @buffer: the buffer to copy the data to it
279 * @buf_len: the length of the buffer
280 * @data: the scatterlist table to copy from
281 * @offset: start copy from @offset@ bytes from the head of the data
282 * in the given scatterlist
283 * @data_len: the length of the data in the sg_table
284 *
285 * Returns: the number of bytes copied
286 */
devcd_read_from_sgtable(char * buffer,loff_t offset,size_t buf_len,void * data,size_t data_len)287 static ssize_t devcd_read_from_sgtable(char *buffer, loff_t offset,
288 size_t buf_len, void *data,
289 size_t data_len)
290 {
291 struct scatterlist *table = data;
292
293 if (offset > data_len)
294 return -EINVAL;
295
296 if (offset + buf_len > data_len)
297 buf_len = data_len - offset;
298 return sg_pcopy_to_buffer(table, sg_nents(table), buffer, buf_len,
299 offset);
300 }
301
302 /**
303 * dev_coredump_put - remove device coredump
304 * @dev: the struct device for the crashed device
305 *
306 * dev_coredump_put() removes coredump, if exists, for a given device from
307 * the file system and free its associated data otherwise, does nothing.
308 *
309 * It is useful for modules that do not want to keep coredump
310 * available after its unload.
311 */
dev_coredump_put(struct device * dev)312 void dev_coredump_put(struct device *dev)
313 {
314 struct device *existing;
315
316 existing = class_find_device(&devcd_class, NULL, dev,
317 devcd_match_failing);
318 if (existing) {
319 devcd_free(existing, NULL);
320 put_device(existing);
321 }
322 }
323 EXPORT_SYMBOL_GPL(dev_coredump_put);
324
325 /**
326 * dev_coredumpm_timeout - create device coredump with read/free methods with a
327 * custom timeout.
328 * @dev: the struct device for the crashed device
329 * @owner: the module that contains the read/free functions, use %THIS_MODULE
330 * @data: data cookie for the @read/@free functions
331 * @datalen: length of the data
332 * @gfp: allocation flags
333 * @read: function to read from the given buffer
334 * @free: function to free the given buffer
335 * @timeout: time in jiffies to remove coredump
336 *
337 * Creates a new device coredump for the given device. If a previous one hasn't
338 * been read yet, the new coredump is discarded. The data lifetime is determined
339 * by the device coredump framework and when it is no longer needed the @free
340 * function will be called to free the data.
341 */
dev_coredumpm_timeout(struct device * dev,struct module * owner,void * data,size_t datalen,gfp_t gfp,ssize_t (* read)(char * buffer,loff_t offset,size_t count,void * data,size_t datalen),void (* free)(void * data),unsigned long timeout)342 void dev_coredumpm_timeout(struct device *dev, struct module *owner,
343 void *data, size_t datalen, gfp_t gfp,
344 ssize_t (*read)(char *buffer, loff_t offset,
345 size_t count, void *data,
346 size_t datalen),
347 void (*free)(void *data),
348 unsigned long timeout)
349 {
350 static atomic_t devcd_count = ATOMIC_INIT(0);
351 struct devcd_entry *devcd;
352 struct device *existing;
353
354 if (devcd_disabled)
355 goto free;
356
357 existing = class_find_device(&devcd_class, NULL, dev,
358 devcd_match_failing);
359 if (existing) {
360 put_device(existing);
361 goto free;
362 }
363
364 if (!try_module_get(owner))
365 goto free;
366
367 devcd = kzalloc(sizeof(*devcd), gfp);
368 if (!devcd)
369 goto put_module;
370
371 devcd->owner = owner;
372 devcd->data = data;
373 devcd->datalen = datalen;
374 devcd->read = read;
375 devcd->free = free;
376 devcd->failing_dev = get_device(dev);
377 devcd->delete_work = false;
378
379 mutex_init(&devcd->mutex);
380 device_initialize(&devcd->devcd_dev);
381
382 dev_set_name(&devcd->devcd_dev, "devcd%d",
383 atomic_inc_return(&devcd_count));
384 devcd->devcd_dev.class = &devcd_class;
385
386 mutex_lock(&devcd->mutex);
387 dev_set_uevent_suppress(&devcd->devcd_dev, true);
388 if (device_add(&devcd->devcd_dev))
389 goto put_device;
390
391 /*
392 * These should normally not fail, but there is no problem
393 * continuing without the links, so just warn instead of
394 * failing.
395 */
396 if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj,
397 "failing_device") ||
398 sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj,
399 "devcoredump"))
400 dev_warn(dev, "devcoredump create_link failed\n");
401
402 dev_set_uevent_suppress(&devcd->devcd_dev, false);
403 kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD);
404 INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
405 schedule_delayed_work(&devcd->del_wk, timeout);
406 mutex_unlock(&devcd->mutex);
407 return;
408 put_device:
409 put_device(&devcd->devcd_dev);
410 mutex_unlock(&devcd->mutex);
411 put_module:
412 module_put(owner);
413 free:
414 free(data);
415 }
416 EXPORT_SYMBOL_GPL(dev_coredumpm_timeout);
417
418 /**
419 * dev_coredumpsg - create device coredump that uses scatterlist as data
420 * parameter
421 * @dev: the struct device for the crashed device
422 * @table: the dump data
423 * @datalen: length of the data
424 * @gfp: allocation flags
425 *
426 * Creates a new device coredump for the given device. If a previous one hasn't
427 * been read yet, the new coredump is discarded. The data lifetime is determined
428 * by the device coredump framework and when it is no longer needed
429 * it will free the data.
430 */
dev_coredumpsg(struct device * dev,struct scatterlist * table,size_t datalen,gfp_t gfp)431 void dev_coredumpsg(struct device *dev, struct scatterlist *table,
432 size_t datalen, gfp_t gfp)
433 {
434 dev_coredumpm(dev, NULL, table, datalen, gfp, devcd_read_from_sgtable,
435 devcd_free_sgtable);
436 }
437 EXPORT_SYMBOL_GPL(dev_coredumpsg);
438
devcoredump_init(void)439 static int __init devcoredump_init(void)
440 {
441 return class_register(&devcd_class);
442 }
443 __initcall(devcoredump_init);
444
devcoredump_exit(void)445 static void __exit devcoredump_exit(void)
446 {
447 class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
448 class_unregister(&devcd_class);
449 }
450 __exitcall(devcoredump_exit);
451