1 // SPDX-License-Identifier: GPL-2.0
2 #include <errno.h>
3 #include <inttypes.h>
4 #include "builtin.h"
5 #include "perf.h"
6
7 #include "util/evlist.h" // for struct evsel_str_handler
8 #include "util/evsel.h"
9 #include "util/symbol.h"
10 #include "util/thread.h"
11 #include "util/header.h"
12 #include "util/target.h"
13 #include "util/cgroup.h"
14 #include "util/callchain.h"
15 #include "util/lock-contention.h"
16 #include "util/bpf_skel/lock_data.h"
17
18 #include <subcmd/pager.h>
19 #include <subcmd/parse-options.h>
20 #include "util/trace-event.h"
21 #include "util/tracepoint.h"
22
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/data.h"
27 #include "util/string2.h"
28 #include "util/map.h"
29 #include "util/util.h"
30
31 #include <stdio.h>
32 #include <sys/types.h>
33 #include <sys/prctl.h>
34 #include <semaphore.h>
35 #include <math.h>
36 #include <limits.h>
37 #include <ctype.h>
38
39 #include <linux/list.h>
40 #include <linux/hash.h>
41 #include <linux/kernel.h>
42 #include <linux/zalloc.h>
43 #include <linux/err.h>
44 #include <linux/stringify.h>
45
46 static struct perf_session *session;
47 static struct target target;
48
49 static struct rb_root thread_stats;
50
51 static bool combine_locks;
52 static bool show_thread_stats;
53 static bool show_lock_addrs;
54 static bool show_lock_owner;
55 static bool show_lock_cgroups;
56 static bool use_bpf;
57 static unsigned long bpf_map_entries = MAX_ENTRIES;
58 static int max_stack_depth = CONTENTION_STACK_DEPTH;
59 static int stack_skip = CONTENTION_STACK_SKIP;
60 static int print_nr_entries = INT_MAX / 2;
61 static const char *output_name = NULL;
62 static FILE *lock_output;
63
64 static struct lock_filter filters;
65
66 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
67
thread_stat_find(u32 tid)68 static struct thread_stat *thread_stat_find(u32 tid)
69 {
70 struct rb_node *node;
71 struct thread_stat *st;
72
73 node = thread_stats.rb_node;
74 while (node) {
75 st = container_of(node, struct thread_stat, rb);
76 if (st->tid == tid)
77 return st;
78 else if (tid < st->tid)
79 node = node->rb_left;
80 else
81 node = node->rb_right;
82 }
83
84 return NULL;
85 }
86
thread_stat_insert(struct thread_stat * new)87 static void thread_stat_insert(struct thread_stat *new)
88 {
89 struct rb_node **rb = &thread_stats.rb_node;
90 struct rb_node *parent = NULL;
91 struct thread_stat *p;
92
93 while (*rb) {
94 p = container_of(*rb, struct thread_stat, rb);
95 parent = *rb;
96
97 if (new->tid < p->tid)
98 rb = &(*rb)->rb_left;
99 else if (new->tid > p->tid)
100 rb = &(*rb)->rb_right;
101 else
102 BUG_ON("inserting invalid thread_stat\n");
103 }
104
105 rb_link_node(&new->rb, parent, rb);
106 rb_insert_color(&new->rb, &thread_stats);
107 }
108
thread_stat_findnew_after_first(u32 tid)109 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
110 {
111 struct thread_stat *st;
112
113 st = thread_stat_find(tid);
114 if (st)
115 return st;
116
117 st = zalloc(sizeof(struct thread_stat));
118 if (!st) {
119 pr_err("memory allocation failed\n");
120 return NULL;
121 }
122
123 st->tid = tid;
124 INIT_LIST_HEAD(&st->seq_list);
125
126 thread_stat_insert(st);
127
128 return st;
129 }
130
131 static struct thread_stat *thread_stat_findnew_first(u32 tid);
132 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
133 thread_stat_findnew_first;
134
thread_stat_findnew_first(u32 tid)135 static struct thread_stat *thread_stat_findnew_first(u32 tid)
136 {
137 struct thread_stat *st;
138
139 st = zalloc(sizeof(struct thread_stat));
140 if (!st) {
141 pr_err("memory allocation failed\n");
142 return NULL;
143 }
144 st->tid = tid;
145 INIT_LIST_HEAD(&st->seq_list);
146
147 rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
148 rb_insert_color(&st->rb, &thread_stats);
149
150 thread_stat_findnew = thread_stat_findnew_after_first;
151 return st;
152 }
153
154 /* build simple key function one is bigger than two */
155 #define SINGLE_KEY(member) \
156 static int lock_stat_key_ ## member(struct lock_stat *one, \
157 struct lock_stat *two) \
158 { \
159 return one->member > two->member; \
160 }
161
162 SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)163 SINGLE_KEY(nr_contended)
164 SINGLE_KEY(avg_wait_time)
165 SINGLE_KEY(wait_time_total)
166 SINGLE_KEY(wait_time_max)
167
168 static int lock_stat_key_wait_time_min(struct lock_stat *one,
169 struct lock_stat *two)
170 {
171 u64 s1 = one->wait_time_min;
172 u64 s2 = two->wait_time_min;
173 if (s1 == ULLONG_MAX)
174 s1 = 0;
175 if (s2 == ULLONG_MAX)
176 s2 = 0;
177 return s1 > s2;
178 }
179
180 struct lock_key {
181 /*
182 * name: the value for specify by user
183 * this should be simpler than raw name of member
184 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
185 */
186 const char *name;
187 /* header: the string printed on the header line */
188 const char *header;
189 /* len: the printing width of the field */
190 int len;
191 /* key: a pointer to function to compare two lock stats for sorting */
192 int (*key)(struct lock_stat*, struct lock_stat*);
193 /* print: a pointer to function to print a given lock stats */
194 void (*print)(struct lock_key*, struct lock_stat*);
195 /* list: list entry to link this */
196 struct list_head list;
197 };
198
lock_stat_key_print_time(unsigned long long nsec,int len)199 static void lock_stat_key_print_time(unsigned long long nsec, int len)
200 {
201 static const struct {
202 float base;
203 const char *unit;
204 } table[] = {
205 { 1e9 * 3600, "h " },
206 { 1e9 * 60, "m " },
207 { 1e9, "s " },
208 { 1e6, "ms" },
209 { 1e3, "us" },
210 { 0, NULL },
211 };
212
213 /* for CSV output */
214 if (len == 0) {
215 fprintf(lock_output, "%llu", nsec);
216 return;
217 }
218
219 for (int i = 0; table[i].unit; i++) {
220 if (nsec < table[i].base)
221 continue;
222
223 fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
224 return;
225 }
226
227 fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
228 }
229
230 #define PRINT_KEY(member) \
231 static void lock_stat_key_print_ ## member(struct lock_key *key, \
232 struct lock_stat *ls) \
233 { \
234 fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
235 }
236
237 #define PRINT_TIME(member) \
238 static void lock_stat_key_print_ ## member(struct lock_key *key, \
239 struct lock_stat *ls) \
240 { \
241 lock_stat_key_print_time((unsigned long long)ls->member, key->len); \
242 }
243
244 PRINT_KEY(nr_acquired)
PRINT_KEY(nr_contended)245 PRINT_KEY(nr_contended)
246 PRINT_TIME(avg_wait_time)
247 PRINT_TIME(wait_time_total)
248 PRINT_TIME(wait_time_max)
249
250 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
251 struct lock_stat *ls)
252 {
253 u64 wait_time = ls->wait_time_min;
254
255 if (wait_time == ULLONG_MAX)
256 wait_time = 0;
257
258 lock_stat_key_print_time(wait_time, key->len);
259 }
260
261
262 static const char *sort_key = "acquired";
263
264 static int (*compare)(struct lock_stat *, struct lock_stat *);
265
266 static struct rb_root sorted; /* place to store intermediate data */
267 static struct rb_root result; /* place to store sorted data */
268
269 static LIST_HEAD(lock_keys);
270 static const char *output_fields;
271
272 #define DEF_KEY_LOCK(name, header, fn_suffix, len) \
273 { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
274 static struct lock_key report_keys[] = {
275 DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
276 DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
277 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
278 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
279 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
280 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
281
282 /* extra comparisons much complicated should be here */
283 { }
284 };
285
286 static struct lock_key contention_keys[] = {
287 DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
288 DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
289 DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
290 DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
291 DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
292
293 /* extra comparisons much complicated should be here */
294 { }
295 };
296
select_key(bool contention)297 static int select_key(bool contention)
298 {
299 int i;
300 struct lock_key *keys = report_keys;
301
302 if (contention)
303 keys = contention_keys;
304
305 for (i = 0; keys[i].name; i++) {
306 if (!strcmp(keys[i].name, sort_key)) {
307 compare = keys[i].key;
308
309 /* selected key should be in the output fields */
310 if (list_empty(&keys[i].list))
311 list_add_tail(&keys[i].list, &lock_keys);
312
313 return 0;
314 }
315 }
316
317 pr_err("Unknown compare key: %s\n", sort_key);
318 return -1;
319 }
320
add_output_field(bool contention,char * name)321 static int add_output_field(bool contention, char *name)
322 {
323 int i;
324 struct lock_key *keys = report_keys;
325
326 if (contention)
327 keys = contention_keys;
328
329 for (i = 0; keys[i].name; i++) {
330 if (strcmp(keys[i].name, name))
331 continue;
332
333 /* prevent double link */
334 if (list_empty(&keys[i].list))
335 list_add_tail(&keys[i].list, &lock_keys);
336
337 return 0;
338 }
339
340 pr_err("Unknown output field: %s\n", name);
341 return -1;
342 }
343
setup_output_field(bool contention,const char * str)344 static int setup_output_field(bool contention, const char *str)
345 {
346 char *tok, *tmp, *orig;
347 int i, ret = 0;
348 struct lock_key *keys = report_keys;
349
350 if (contention)
351 keys = contention_keys;
352
353 /* no output field given: use all of them */
354 if (str == NULL) {
355 for (i = 0; keys[i].name; i++)
356 list_add_tail(&keys[i].list, &lock_keys);
357 return 0;
358 }
359
360 for (i = 0; keys[i].name; i++)
361 INIT_LIST_HEAD(&keys[i].list);
362
363 orig = tmp = strdup(str);
364 if (orig == NULL)
365 return -ENOMEM;
366
367 while ((tok = strsep(&tmp, ",")) != NULL){
368 ret = add_output_field(contention, tok);
369 if (ret < 0)
370 break;
371 }
372 free(orig);
373
374 return ret;
375 }
376
combine_lock_stats(struct lock_stat * st)377 static void combine_lock_stats(struct lock_stat *st)
378 {
379 struct rb_node **rb = &sorted.rb_node;
380 struct rb_node *parent = NULL;
381 struct lock_stat *p;
382 int ret;
383
384 while (*rb) {
385 p = container_of(*rb, struct lock_stat, rb);
386 parent = *rb;
387
388 if (st->name && p->name)
389 ret = strcmp(st->name, p->name);
390 else
391 ret = !!st->name - !!p->name;
392
393 if (ret == 0) {
394 p->nr_acquired += st->nr_acquired;
395 p->nr_contended += st->nr_contended;
396 p->wait_time_total += st->wait_time_total;
397
398 if (p->nr_contended)
399 p->avg_wait_time = p->wait_time_total / p->nr_contended;
400
401 if (p->wait_time_min > st->wait_time_min)
402 p->wait_time_min = st->wait_time_min;
403 if (p->wait_time_max < st->wait_time_max)
404 p->wait_time_max = st->wait_time_max;
405
406 p->broken |= st->broken;
407 st->combined = 1;
408 return;
409 }
410
411 if (ret < 0)
412 rb = &(*rb)->rb_left;
413 else
414 rb = &(*rb)->rb_right;
415 }
416
417 rb_link_node(&st->rb, parent, rb);
418 rb_insert_color(&st->rb, &sorted);
419 }
420
insert_to_result(struct lock_stat * st,int (* bigger)(struct lock_stat *,struct lock_stat *))421 static void insert_to_result(struct lock_stat *st,
422 int (*bigger)(struct lock_stat *, struct lock_stat *))
423 {
424 struct rb_node **rb = &result.rb_node;
425 struct rb_node *parent = NULL;
426 struct lock_stat *p;
427
428 if (combine_locks && st->combined)
429 return;
430
431 while (*rb) {
432 p = container_of(*rb, struct lock_stat, rb);
433 parent = *rb;
434
435 if (bigger(st, p))
436 rb = &(*rb)->rb_left;
437 else
438 rb = &(*rb)->rb_right;
439 }
440
441 rb_link_node(&st->rb, parent, rb);
442 rb_insert_color(&st->rb, &result);
443 }
444
445 /* returns left most element of result, and erase it */
pop_from_result(void)446 static struct lock_stat *pop_from_result(void)
447 {
448 struct rb_node *node = result.rb_node;
449
450 if (!node)
451 return NULL;
452
453 while (node->rb_left)
454 node = node->rb_left;
455
456 rb_erase(node, &result);
457 return container_of(node, struct lock_stat, rb);
458 }
459
460 struct trace_lock_handler {
461 /* it's used on CONFIG_LOCKDEP */
462 int (*acquire_event)(struct evsel *evsel,
463 struct perf_sample *sample);
464
465 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
466 int (*acquired_event)(struct evsel *evsel,
467 struct perf_sample *sample);
468
469 /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
470 int (*contended_event)(struct evsel *evsel,
471 struct perf_sample *sample);
472
473 /* it's used on CONFIG_LOCKDEP */
474 int (*release_event)(struct evsel *evsel,
475 struct perf_sample *sample);
476
477 /* it's used when CONFIG_LOCKDEP is off */
478 int (*contention_begin_event)(struct evsel *evsel,
479 struct perf_sample *sample);
480
481 /* it's used when CONFIG_LOCKDEP is off */
482 int (*contention_end_event)(struct evsel *evsel,
483 struct perf_sample *sample);
484 };
485
get_seq(struct thread_stat * ts,u64 addr)486 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
487 {
488 struct lock_seq_stat *seq;
489
490 list_for_each_entry(seq, &ts->seq_list, list) {
491 if (seq->addr == addr)
492 return seq;
493 }
494
495 seq = zalloc(sizeof(struct lock_seq_stat));
496 if (!seq) {
497 pr_err("memory allocation failed\n");
498 return NULL;
499 }
500 seq->state = SEQ_STATE_UNINITIALIZED;
501 seq->addr = addr;
502
503 list_add(&seq->list, &ts->seq_list);
504 return seq;
505 }
506
507 enum broken_state {
508 BROKEN_ACQUIRE,
509 BROKEN_ACQUIRED,
510 BROKEN_CONTENDED,
511 BROKEN_RELEASE,
512 BROKEN_MAX,
513 };
514
515 static int bad_hist[BROKEN_MAX];
516
517 enum acquire_flags {
518 TRY_LOCK = 1,
519 READ_LOCK = 2,
520 };
521
get_key_by_aggr_mode_simple(u64 * key,u64 addr,u32 tid)522 static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
523 {
524 switch (aggr_mode) {
525 case LOCK_AGGR_ADDR:
526 *key = addr;
527 break;
528 case LOCK_AGGR_TASK:
529 *key = tid;
530 break;
531 case LOCK_AGGR_CALLER:
532 case LOCK_AGGR_CGROUP:
533 default:
534 pr_err("Invalid aggregation mode: %d\n", aggr_mode);
535 return -EINVAL;
536 }
537 return 0;
538 }
539
540 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
541
get_key_by_aggr_mode(u64 * key,u64 addr,struct evsel * evsel,struct perf_sample * sample)542 static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
543 struct perf_sample *sample)
544 {
545 if (aggr_mode == LOCK_AGGR_CALLER) {
546 *key = callchain_id(evsel, sample);
547 return 0;
548 }
549 return get_key_by_aggr_mode_simple(key, addr, sample->tid);
550 }
551
report_lock_acquire_event(struct evsel * evsel,struct perf_sample * sample)552 static int report_lock_acquire_event(struct evsel *evsel,
553 struct perf_sample *sample)
554 {
555 struct lock_stat *ls;
556 struct thread_stat *ts;
557 struct lock_seq_stat *seq;
558 const char *name = evsel__strval(evsel, sample, "name");
559 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
560 int flag = evsel__intval(evsel, sample, "flags");
561 u64 key;
562 int ret;
563
564 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
565 if (ret < 0)
566 return ret;
567
568 ls = lock_stat_findnew(key, name, 0);
569 if (!ls)
570 return -ENOMEM;
571
572 ts = thread_stat_findnew(sample->tid);
573 if (!ts)
574 return -ENOMEM;
575
576 seq = get_seq(ts, addr);
577 if (!seq)
578 return -ENOMEM;
579
580 switch (seq->state) {
581 case SEQ_STATE_UNINITIALIZED:
582 case SEQ_STATE_RELEASED:
583 if (!flag) {
584 seq->state = SEQ_STATE_ACQUIRING;
585 } else {
586 if (flag & TRY_LOCK)
587 ls->nr_trylock++;
588 if (flag & READ_LOCK)
589 ls->nr_readlock++;
590 seq->state = SEQ_STATE_READ_ACQUIRED;
591 seq->read_count = 1;
592 ls->nr_acquired++;
593 }
594 break;
595 case SEQ_STATE_READ_ACQUIRED:
596 if (flag & READ_LOCK) {
597 seq->read_count++;
598 ls->nr_acquired++;
599 goto end;
600 } else {
601 goto broken;
602 }
603 break;
604 case SEQ_STATE_ACQUIRED:
605 case SEQ_STATE_ACQUIRING:
606 case SEQ_STATE_CONTENDED:
607 broken:
608 /* broken lock sequence */
609 if (!ls->broken) {
610 ls->broken = 1;
611 bad_hist[BROKEN_ACQUIRE]++;
612 }
613 list_del_init(&seq->list);
614 free(seq);
615 goto end;
616 default:
617 BUG_ON("Unknown state of lock sequence found!\n");
618 break;
619 }
620
621 ls->nr_acquire++;
622 seq->prev_event_time = sample->time;
623 end:
624 return 0;
625 }
626
report_lock_acquired_event(struct evsel * evsel,struct perf_sample * sample)627 static int report_lock_acquired_event(struct evsel *evsel,
628 struct perf_sample *sample)
629 {
630 struct lock_stat *ls;
631 struct thread_stat *ts;
632 struct lock_seq_stat *seq;
633 u64 contended_term;
634 const char *name = evsel__strval(evsel, sample, "name");
635 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
636 u64 key;
637 int ret;
638
639 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
640 if (ret < 0)
641 return ret;
642
643 ls = lock_stat_findnew(key, name, 0);
644 if (!ls)
645 return -ENOMEM;
646
647 ts = thread_stat_findnew(sample->tid);
648 if (!ts)
649 return -ENOMEM;
650
651 seq = get_seq(ts, addr);
652 if (!seq)
653 return -ENOMEM;
654
655 switch (seq->state) {
656 case SEQ_STATE_UNINITIALIZED:
657 /* orphan event, do nothing */
658 return 0;
659 case SEQ_STATE_ACQUIRING:
660 break;
661 case SEQ_STATE_CONTENDED:
662 contended_term = sample->time - seq->prev_event_time;
663 ls->wait_time_total += contended_term;
664 if (contended_term < ls->wait_time_min)
665 ls->wait_time_min = contended_term;
666 if (ls->wait_time_max < contended_term)
667 ls->wait_time_max = contended_term;
668 break;
669 case SEQ_STATE_RELEASED:
670 case SEQ_STATE_ACQUIRED:
671 case SEQ_STATE_READ_ACQUIRED:
672 /* broken lock sequence */
673 if (!ls->broken) {
674 ls->broken = 1;
675 bad_hist[BROKEN_ACQUIRED]++;
676 }
677 list_del_init(&seq->list);
678 free(seq);
679 goto end;
680 default:
681 BUG_ON("Unknown state of lock sequence found!\n");
682 break;
683 }
684
685 seq->state = SEQ_STATE_ACQUIRED;
686 ls->nr_acquired++;
687 ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
688 seq->prev_event_time = sample->time;
689 end:
690 return 0;
691 }
692
report_lock_contended_event(struct evsel * evsel,struct perf_sample * sample)693 static int report_lock_contended_event(struct evsel *evsel,
694 struct perf_sample *sample)
695 {
696 struct lock_stat *ls;
697 struct thread_stat *ts;
698 struct lock_seq_stat *seq;
699 const char *name = evsel__strval(evsel, sample, "name");
700 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
701 u64 key;
702 int ret;
703
704 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
705 if (ret < 0)
706 return ret;
707
708 ls = lock_stat_findnew(key, name, 0);
709 if (!ls)
710 return -ENOMEM;
711
712 ts = thread_stat_findnew(sample->tid);
713 if (!ts)
714 return -ENOMEM;
715
716 seq = get_seq(ts, addr);
717 if (!seq)
718 return -ENOMEM;
719
720 switch (seq->state) {
721 case SEQ_STATE_UNINITIALIZED:
722 /* orphan event, do nothing */
723 return 0;
724 case SEQ_STATE_ACQUIRING:
725 break;
726 case SEQ_STATE_RELEASED:
727 case SEQ_STATE_ACQUIRED:
728 case SEQ_STATE_READ_ACQUIRED:
729 case SEQ_STATE_CONTENDED:
730 /* broken lock sequence */
731 if (!ls->broken) {
732 ls->broken = 1;
733 bad_hist[BROKEN_CONTENDED]++;
734 }
735 list_del_init(&seq->list);
736 free(seq);
737 goto end;
738 default:
739 BUG_ON("Unknown state of lock sequence found!\n");
740 break;
741 }
742
743 seq->state = SEQ_STATE_CONTENDED;
744 ls->nr_contended++;
745 ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
746 seq->prev_event_time = sample->time;
747 end:
748 return 0;
749 }
750
report_lock_release_event(struct evsel * evsel,struct perf_sample * sample)751 static int report_lock_release_event(struct evsel *evsel,
752 struct perf_sample *sample)
753 {
754 struct lock_stat *ls;
755 struct thread_stat *ts;
756 struct lock_seq_stat *seq;
757 const char *name = evsel__strval(evsel, sample, "name");
758 u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
759 u64 key;
760 int ret;
761
762 ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
763 if (ret < 0)
764 return ret;
765
766 ls = lock_stat_findnew(key, name, 0);
767 if (!ls)
768 return -ENOMEM;
769
770 ts = thread_stat_findnew(sample->tid);
771 if (!ts)
772 return -ENOMEM;
773
774 seq = get_seq(ts, addr);
775 if (!seq)
776 return -ENOMEM;
777
778 switch (seq->state) {
779 case SEQ_STATE_UNINITIALIZED:
780 goto end;
781 case SEQ_STATE_ACQUIRED:
782 break;
783 case SEQ_STATE_READ_ACQUIRED:
784 seq->read_count--;
785 BUG_ON(seq->read_count < 0);
786 if (seq->read_count) {
787 ls->nr_release++;
788 goto end;
789 }
790 break;
791 case SEQ_STATE_ACQUIRING:
792 case SEQ_STATE_CONTENDED:
793 case SEQ_STATE_RELEASED:
794 /* broken lock sequence */
795 if (!ls->broken) {
796 ls->broken = 1;
797 bad_hist[BROKEN_RELEASE]++;
798 }
799 goto free_seq;
800 default:
801 BUG_ON("Unknown state of lock sequence found!\n");
802 break;
803 }
804
805 ls->nr_release++;
806 free_seq:
807 list_del_init(&seq->list);
808 free(seq);
809 end:
810 return 0;
811 }
812
get_symbol_name_offset(struct map * map,struct symbol * sym,u64 ip,char * buf,int size)813 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
814 char *buf, int size)
815 {
816 u64 offset;
817
818 if (map == NULL || sym == NULL) {
819 buf[0] = '\0';
820 return 0;
821 }
822
823 offset = map__map_ip(map, ip) - sym->start;
824
825 if (offset)
826 return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
827 else
828 return strlcpy(buf, sym->name, size);
829 }
lock_contention_caller(struct evsel * evsel,struct perf_sample * sample,char * buf,int size)830 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
831 char *buf, int size)
832 {
833 struct thread *thread;
834 struct callchain_cursor *cursor;
835 struct machine *machine = &session->machines.host;
836 struct symbol *sym;
837 int skip = 0;
838 int ret;
839
840 /* lock names will be replaced to task name later */
841 if (show_thread_stats)
842 return -1;
843
844 thread = machine__findnew_thread(machine, -1, sample->pid);
845 if (thread == NULL)
846 return -1;
847
848 cursor = get_tls_callchain_cursor();
849
850 /* use caller function name from the callchain */
851 ret = thread__resolve_callchain(thread, cursor, evsel, sample,
852 NULL, NULL, max_stack_depth);
853 if (ret != 0) {
854 thread__put(thread);
855 return -1;
856 }
857
858 callchain_cursor_commit(cursor);
859 thread__put(thread);
860
861 while (true) {
862 struct callchain_cursor_node *node;
863
864 node = callchain_cursor_current(cursor);
865 if (node == NULL)
866 break;
867
868 /* skip first few entries - for lock functions */
869 if (++skip <= stack_skip)
870 goto next;
871
872 sym = node->ms.sym;
873 if (sym && !machine__is_lock_function(machine, node->ip)) {
874 get_symbol_name_offset(node->ms.map, sym, node->ip,
875 buf, size);
876 return 0;
877 }
878
879 next:
880 callchain_cursor_advance(cursor);
881 }
882 return -1;
883 }
884
callchain_id(struct evsel * evsel,struct perf_sample * sample)885 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
886 {
887 struct callchain_cursor *cursor;
888 struct machine *machine = &session->machines.host;
889 struct thread *thread;
890 u64 hash = 0;
891 int skip = 0;
892 int ret;
893
894 thread = machine__findnew_thread(machine, -1, sample->pid);
895 if (thread == NULL)
896 return -1;
897
898 cursor = get_tls_callchain_cursor();
899 /* use caller function name from the callchain */
900 ret = thread__resolve_callchain(thread, cursor, evsel, sample,
901 NULL, NULL, max_stack_depth);
902 thread__put(thread);
903
904 if (ret != 0)
905 return -1;
906
907 callchain_cursor_commit(cursor);
908
909 while (true) {
910 struct callchain_cursor_node *node;
911
912 node = callchain_cursor_current(cursor);
913 if (node == NULL)
914 break;
915
916 /* skip first few entries - for lock functions */
917 if (++skip <= stack_skip)
918 goto next;
919
920 if (node->ms.sym && machine__is_lock_function(machine, node->ip))
921 goto next;
922
923 hash ^= hash_long((unsigned long)node->ip, 64);
924
925 next:
926 callchain_cursor_advance(cursor);
927 }
928 return hash;
929 }
930
get_callstack(struct perf_sample * sample,int max_stack)931 static u64 *get_callstack(struct perf_sample *sample, int max_stack)
932 {
933 u64 *callstack;
934 u64 i;
935 int c;
936
937 callstack = calloc(max_stack, sizeof(*callstack));
938 if (callstack == NULL)
939 return NULL;
940
941 for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
942 u64 ip = sample->callchain->ips[i];
943
944 if (ip >= PERF_CONTEXT_MAX)
945 continue;
946
947 callstack[c++] = ip;
948 }
949 return callstack;
950 }
951
report_lock_contention_begin_event(struct evsel * evsel,struct perf_sample * sample)952 static int report_lock_contention_begin_event(struct evsel *evsel,
953 struct perf_sample *sample)
954 {
955 struct lock_stat *ls;
956 struct thread_stat *ts;
957 struct lock_seq_stat *seq;
958 u64 addr = evsel__intval(evsel, sample, "lock_addr");
959 unsigned int flags = evsel__intval(evsel, sample, "flags");
960 u64 key;
961 int i, ret;
962 static bool kmap_loaded;
963 struct machine *machine = &session->machines.host;
964 struct map *kmap;
965 struct symbol *sym;
966
967 ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
968 if (ret < 0)
969 return ret;
970
971 if (!kmap_loaded) {
972 unsigned long *addrs;
973
974 /* make sure it loads the kernel map to find lock symbols */
975 map__load(machine__kernel_map(machine));
976 kmap_loaded = true;
977
978 /* convert (kernel) symbols to addresses */
979 for (i = 0; i < filters.nr_syms; i++) {
980 sym = machine__find_kernel_symbol_by_name(machine,
981 filters.syms[i],
982 &kmap);
983 if (sym == NULL) {
984 pr_warning("ignore unknown symbol: %s\n",
985 filters.syms[i]);
986 continue;
987 }
988
989 addrs = realloc(filters.addrs,
990 (filters.nr_addrs + 1) * sizeof(*addrs));
991 if (addrs == NULL) {
992 pr_warning("memory allocation failure\n");
993 return -ENOMEM;
994 }
995
996 addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
997 filters.addrs = addrs;
998 }
999 }
1000
1001 ls = lock_stat_find(key);
1002 if (!ls) {
1003 char buf[128];
1004 const char *name = "";
1005
1006 switch (aggr_mode) {
1007 case LOCK_AGGR_ADDR:
1008 sym = machine__find_kernel_symbol(machine, key, &kmap);
1009 if (sym)
1010 name = sym->name;
1011 break;
1012 case LOCK_AGGR_CALLER:
1013 name = buf;
1014 if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1015 name = "Unknown";
1016 break;
1017 case LOCK_AGGR_CGROUP:
1018 case LOCK_AGGR_TASK:
1019 default:
1020 break;
1021 }
1022
1023 ls = lock_stat_findnew(key, name, flags);
1024 if (!ls)
1025 return -ENOMEM;
1026 }
1027
1028 if (filters.nr_types) {
1029 bool found = false;
1030
1031 for (i = 0; i < filters.nr_types; i++) {
1032 if (flags == filters.types[i]) {
1033 found = true;
1034 break;
1035 }
1036 }
1037
1038 if (!found)
1039 return 0;
1040 }
1041
1042 if (filters.nr_addrs) {
1043 bool found = false;
1044
1045 for (i = 0; i < filters.nr_addrs; i++) {
1046 if (addr == filters.addrs[i]) {
1047 found = true;
1048 break;
1049 }
1050 }
1051
1052 if (!found)
1053 return 0;
1054 }
1055
1056 if (needs_callstack()) {
1057 u64 *callstack = get_callstack(sample, max_stack_depth);
1058 if (callstack == NULL)
1059 return -ENOMEM;
1060
1061 if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
1062 free(callstack);
1063 return 0;
1064 }
1065
1066 if (ls->callstack == NULL)
1067 ls->callstack = callstack;
1068 else
1069 free(callstack);
1070 }
1071
1072 ts = thread_stat_findnew(sample->tid);
1073 if (!ts)
1074 return -ENOMEM;
1075
1076 seq = get_seq(ts, addr);
1077 if (!seq)
1078 return -ENOMEM;
1079
1080 switch (seq->state) {
1081 case SEQ_STATE_UNINITIALIZED:
1082 case SEQ_STATE_ACQUIRED:
1083 break;
1084 case SEQ_STATE_CONTENDED:
1085 /*
1086 * It can have nested contention begin with mutex spinning,
1087 * then we would use the original contention begin event and
1088 * ignore the second one.
1089 */
1090 goto end;
1091 case SEQ_STATE_ACQUIRING:
1092 case SEQ_STATE_READ_ACQUIRED:
1093 case SEQ_STATE_RELEASED:
1094 /* broken lock sequence */
1095 if (!ls->broken) {
1096 ls->broken = 1;
1097 bad_hist[BROKEN_CONTENDED]++;
1098 }
1099 list_del_init(&seq->list);
1100 free(seq);
1101 goto end;
1102 default:
1103 BUG_ON("Unknown state of lock sequence found!\n");
1104 break;
1105 }
1106
1107 if (seq->state != SEQ_STATE_CONTENDED) {
1108 seq->state = SEQ_STATE_CONTENDED;
1109 seq->prev_event_time = sample->time;
1110 ls->nr_contended++;
1111 }
1112 end:
1113 return 0;
1114 }
1115
report_lock_contention_end_event(struct evsel * evsel,struct perf_sample * sample)1116 static int report_lock_contention_end_event(struct evsel *evsel,
1117 struct perf_sample *sample)
1118 {
1119 struct lock_stat *ls;
1120 struct thread_stat *ts;
1121 struct lock_seq_stat *seq;
1122 u64 contended_term;
1123 u64 addr = evsel__intval(evsel, sample, "lock_addr");
1124 u64 key;
1125 int ret;
1126
1127 ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1128 if (ret < 0)
1129 return ret;
1130
1131 ls = lock_stat_find(key);
1132 if (!ls)
1133 return 0;
1134
1135 ts = thread_stat_find(sample->tid);
1136 if (!ts)
1137 return 0;
1138
1139 seq = get_seq(ts, addr);
1140 if (!seq)
1141 return -ENOMEM;
1142
1143 switch (seq->state) {
1144 case SEQ_STATE_UNINITIALIZED:
1145 goto end;
1146 case SEQ_STATE_CONTENDED:
1147 contended_term = sample->time - seq->prev_event_time;
1148 ls->wait_time_total += contended_term;
1149 if (contended_term < ls->wait_time_min)
1150 ls->wait_time_min = contended_term;
1151 if (ls->wait_time_max < contended_term)
1152 ls->wait_time_max = contended_term;
1153 break;
1154 case SEQ_STATE_ACQUIRING:
1155 case SEQ_STATE_ACQUIRED:
1156 case SEQ_STATE_READ_ACQUIRED:
1157 case SEQ_STATE_RELEASED:
1158 /* broken lock sequence */
1159 if (!ls->broken) {
1160 ls->broken = 1;
1161 bad_hist[BROKEN_ACQUIRED]++;
1162 }
1163 list_del_init(&seq->list);
1164 free(seq);
1165 goto end;
1166 default:
1167 BUG_ON("Unknown state of lock sequence found!\n");
1168 break;
1169 }
1170
1171 seq->state = SEQ_STATE_ACQUIRED;
1172 ls->nr_acquired++;
1173 ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1174 end:
1175 return 0;
1176 }
1177
1178 /* lock oriented handlers */
1179 /* TODO: handlers for CPU oriented, thread oriented */
1180 static struct trace_lock_handler report_lock_ops = {
1181 .acquire_event = report_lock_acquire_event,
1182 .acquired_event = report_lock_acquired_event,
1183 .contended_event = report_lock_contended_event,
1184 .release_event = report_lock_release_event,
1185 .contention_begin_event = report_lock_contention_begin_event,
1186 .contention_end_event = report_lock_contention_end_event,
1187 };
1188
1189 static struct trace_lock_handler contention_lock_ops = {
1190 .contention_begin_event = report_lock_contention_begin_event,
1191 .contention_end_event = report_lock_contention_end_event,
1192 };
1193
1194
1195 static struct trace_lock_handler *trace_handler;
1196
evsel__process_lock_acquire(struct evsel * evsel,struct perf_sample * sample)1197 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1198 {
1199 if (trace_handler->acquire_event)
1200 return trace_handler->acquire_event(evsel, sample);
1201 return 0;
1202 }
1203
evsel__process_lock_acquired(struct evsel * evsel,struct perf_sample * sample)1204 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1205 {
1206 if (trace_handler->acquired_event)
1207 return trace_handler->acquired_event(evsel, sample);
1208 return 0;
1209 }
1210
evsel__process_lock_contended(struct evsel * evsel,struct perf_sample * sample)1211 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1212 {
1213 if (trace_handler->contended_event)
1214 return trace_handler->contended_event(evsel, sample);
1215 return 0;
1216 }
1217
evsel__process_lock_release(struct evsel * evsel,struct perf_sample * sample)1218 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1219 {
1220 if (trace_handler->release_event)
1221 return trace_handler->release_event(evsel, sample);
1222 return 0;
1223 }
1224
evsel__process_contention_begin(struct evsel * evsel,struct perf_sample * sample)1225 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1226 {
1227 if (trace_handler->contention_begin_event)
1228 return trace_handler->contention_begin_event(evsel, sample);
1229 return 0;
1230 }
1231
evsel__process_contention_end(struct evsel * evsel,struct perf_sample * sample)1232 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1233 {
1234 if (trace_handler->contention_end_event)
1235 return trace_handler->contention_end_event(evsel, sample);
1236 return 0;
1237 }
1238
print_bad_events(int bad,int total)1239 static void print_bad_events(int bad, int total)
1240 {
1241 /* Output for debug, this have to be removed */
1242 int i;
1243 int broken = 0;
1244 const char *name[4] =
1245 { "acquire", "acquired", "contended", "release" };
1246
1247 for (i = 0; i < BROKEN_MAX; i++)
1248 broken += bad_hist[i];
1249
1250 if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1251 return;
1252
1253 fprintf(lock_output, "\n=== output for debug ===\n\n");
1254 fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1255 fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1256 fprintf(lock_output, "histogram of events caused bad sequence\n");
1257 for (i = 0; i < BROKEN_MAX; i++)
1258 fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1259 }
1260
1261 /* TODO: various way to print, coloring, nano or milli sec */
print_result(void)1262 static void print_result(void)
1263 {
1264 struct lock_stat *st;
1265 struct lock_key *key;
1266 char cut_name[20];
1267 int bad, total, printed;
1268
1269 if (!quiet) {
1270 fprintf(lock_output, "%20s ", "Name");
1271 list_for_each_entry(key, &lock_keys, list)
1272 fprintf(lock_output, "%*s ", key->len, key->header);
1273 fprintf(lock_output, "\n\n");
1274 }
1275
1276 bad = total = printed = 0;
1277 while ((st = pop_from_result())) {
1278 total++;
1279 if (st->broken)
1280 bad++;
1281 if (!st->nr_acquired)
1282 continue;
1283
1284 bzero(cut_name, 20);
1285
1286 if (strlen(st->name) < 20) {
1287 /* output raw name */
1288 const char *name = st->name;
1289
1290 if (show_thread_stats) {
1291 struct thread *t;
1292
1293 /* st->addr contains tid of thread */
1294 t = perf_session__findnew(session, st->addr);
1295 name = thread__comm_str(t);
1296 }
1297
1298 fprintf(lock_output, "%20s ", name);
1299 } else {
1300 strncpy(cut_name, st->name, 16);
1301 cut_name[16] = '.';
1302 cut_name[17] = '.';
1303 cut_name[18] = '.';
1304 cut_name[19] = '\0';
1305 /* cut off name for saving output style */
1306 fprintf(lock_output, "%20s ", cut_name);
1307 }
1308
1309 list_for_each_entry(key, &lock_keys, list) {
1310 key->print(key, st);
1311 fprintf(lock_output, " ");
1312 }
1313 fprintf(lock_output, "\n");
1314
1315 if (++printed >= print_nr_entries)
1316 break;
1317 }
1318
1319 print_bad_events(bad, total);
1320 }
1321
1322 static bool info_threads, info_map;
1323
dump_threads(void)1324 static void dump_threads(void)
1325 {
1326 struct thread_stat *st;
1327 struct rb_node *node;
1328 struct thread *t;
1329
1330 fprintf(lock_output, "%10s: comm\n", "Thread ID");
1331
1332 node = rb_first(&thread_stats);
1333 while (node) {
1334 st = container_of(node, struct thread_stat, rb);
1335 t = perf_session__findnew(session, st->tid);
1336 fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1337 node = rb_next(node);
1338 thread__put(t);
1339 }
1340 }
1341
compare_maps(struct lock_stat * a,struct lock_stat * b)1342 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1343 {
1344 int ret;
1345
1346 if (a->name && b->name)
1347 ret = strcmp(a->name, b->name);
1348 else
1349 ret = !!a->name - !!b->name;
1350
1351 if (!ret)
1352 return a->addr < b->addr;
1353 else
1354 return ret < 0;
1355 }
1356
dump_map(void)1357 static void dump_map(void)
1358 {
1359 unsigned int i;
1360 struct lock_stat *st;
1361
1362 fprintf(lock_output, "Address of instance: name of class\n");
1363 for (i = 0; i < LOCKHASH_SIZE; i++) {
1364 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1365 insert_to_result(st, compare_maps);
1366 }
1367 }
1368
1369 while ((st = pop_from_result()))
1370 fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1371 }
1372
dump_info(void)1373 static void dump_info(void)
1374 {
1375 if (info_threads)
1376 dump_threads();
1377
1378 if (info_map) {
1379 if (info_threads)
1380 fputc('\n', lock_output);
1381 dump_map();
1382 }
1383 }
1384
1385 static const struct evsel_str_handler lock_tracepoints[] = {
1386 { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
1387 { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1388 { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1389 { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
1390 };
1391
1392 static const struct evsel_str_handler contention_tracepoints[] = {
1393 { "lock:contention_begin", evsel__process_contention_begin, },
1394 { "lock:contention_end", evsel__process_contention_end, },
1395 };
1396
process_event_update(const struct perf_tool * tool,union perf_event * event,struct evlist ** pevlist)1397 static int process_event_update(const struct perf_tool *tool,
1398 union perf_event *event,
1399 struct evlist **pevlist)
1400 {
1401 int ret;
1402
1403 ret = perf_event__process_event_update(tool, event, pevlist);
1404 if (ret < 0)
1405 return ret;
1406
1407 /* this can return -EEXIST since we call it for each evsel */
1408 perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1409 perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1410 return 0;
1411 }
1412
1413 typedef int (*tracepoint_handler)(struct evsel *evsel,
1414 struct perf_sample *sample);
1415
process_sample_event(const struct perf_tool * tool __maybe_unused,union perf_event * event,struct perf_sample * sample,struct evsel * evsel,struct machine * machine)1416 static int process_sample_event(const struct perf_tool *tool __maybe_unused,
1417 union perf_event *event,
1418 struct perf_sample *sample,
1419 struct evsel *evsel,
1420 struct machine *machine)
1421 {
1422 int err = 0;
1423 struct thread *thread = machine__findnew_thread(machine, sample->pid,
1424 sample->tid);
1425
1426 if (thread == NULL) {
1427 pr_debug("problem processing %d event, skipping it.\n",
1428 event->header.type);
1429 return -1;
1430 }
1431
1432 if (evsel->handler != NULL) {
1433 tracepoint_handler f = evsel->handler;
1434 err = f(evsel, sample);
1435 }
1436
1437 thread__put(thread);
1438
1439 return err;
1440 }
1441
combine_result(void)1442 static void combine_result(void)
1443 {
1444 unsigned int i;
1445 struct lock_stat *st;
1446
1447 if (!combine_locks)
1448 return;
1449
1450 for (i = 0; i < LOCKHASH_SIZE; i++) {
1451 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1452 combine_lock_stats(st);
1453 }
1454 }
1455 }
1456
sort_result(void)1457 static void sort_result(void)
1458 {
1459 unsigned int i;
1460 struct lock_stat *st;
1461
1462 for (i = 0; i < LOCKHASH_SIZE; i++) {
1463 hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1464 insert_to_result(st, compare);
1465 }
1466 }
1467 }
1468
1469 static const struct {
1470 unsigned int flags;
1471 /*
1472 * Name of the lock flags (access), with delimeter ':'.
1473 * For example, rwsem:R of rwsem:W.
1474 */
1475 const char *flags_name;
1476 /* Name of the lock (type), for example, rwlock or rwsem. */
1477 const char *lock_name;
1478 } lock_type_table[] = {
1479 { 0, "semaphore", "semaphore" },
1480 { LCB_F_SPIN, "spinlock", "spinlock" },
1481 { LCB_F_SPIN | LCB_F_READ, "rwlock:R", "rwlock" },
1482 { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W", "rwlock" },
1483 { LCB_F_READ, "rwsem:R", "rwsem" },
1484 { LCB_F_WRITE, "rwsem:W", "rwsem" },
1485 { LCB_F_RT, "rt-mutex", "rt-mutex" },
1486 { LCB_F_RT | LCB_F_READ, "rwlock-rt:R", "rwlock-rt" },
1487 { LCB_F_RT | LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" },
1488 { LCB_F_PERCPU | LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" },
1489 { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" },
1490 { LCB_F_MUTEX, "mutex", "mutex" },
1491 { LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" },
1492 /* alias for optimistic spinning only */
1493 { LCB_F_MUTEX | LCB_F_SPIN, "mutex:spin", "mutex-spin" },
1494 };
1495
get_type_flags_name(unsigned int flags)1496 static const char *get_type_flags_name(unsigned int flags)
1497 {
1498 flags &= LCB_F_TYPE_MASK;
1499
1500 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1501 if (lock_type_table[i].flags == flags)
1502 return lock_type_table[i].flags_name;
1503 }
1504 return "unknown";
1505 }
1506
get_type_lock_name(unsigned int flags)1507 static const char *get_type_lock_name(unsigned int flags)
1508 {
1509 flags &= LCB_F_TYPE_MASK;
1510
1511 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1512 if (lock_type_table[i].flags == flags)
1513 return lock_type_table[i].lock_name;
1514 }
1515 return "unknown";
1516 }
1517
lock_filter_finish(void)1518 static void lock_filter_finish(void)
1519 {
1520 zfree(&filters.types);
1521 filters.nr_types = 0;
1522
1523 zfree(&filters.addrs);
1524 filters.nr_addrs = 0;
1525
1526 for (int i = 0; i < filters.nr_syms; i++)
1527 free(filters.syms[i]);
1528
1529 zfree(&filters.syms);
1530 filters.nr_syms = 0;
1531
1532 zfree(&filters.cgrps);
1533 filters.nr_cgrps = 0;
1534
1535 for (int i = 0; i < filters.nr_slabs; i++)
1536 free(filters.slabs[i]);
1537
1538 zfree(&filters.slabs);
1539 filters.nr_slabs = 0;
1540 }
1541
sort_contention_result(void)1542 static void sort_contention_result(void)
1543 {
1544 sort_result();
1545 }
1546
print_header_stdio(void)1547 static void print_header_stdio(void)
1548 {
1549 struct lock_key *key;
1550
1551 list_for_each_entry(key, &lock_keys, list)
1552 fprintf(lock_output, "%*s ", key->len, key->header);
1553
1554 switch (aggr_mode) {
1555 case LOCK_AGGR_TASK:
1556 fprintf(lock_output, " %10s %s\n\n", "pid",
1557 show_lock_owner ? "owner" : "comm");
1558 break;
1559 case LOCK_AGGR_CALLER:
1560 fprintf(lock_output, " %10s %s\n\n", "type", "caller");
1561 break;
1562 case LOCK_AGGR_ADDR:
1563 fprintf(lock_output, " %16s %s\n\n", "address", "symbol");
1564 break;
1565 case LOCK_AGGR_CGROUP:
1566 fprintf(lock_output, " %s\n\n", "cgroup");
1567 break;
1568 default:
1569 break;
1570 }
1571 }
1572
print_header_csv(const char * sep)1573 static void print_header_csv(const char *sep)
1574 {
1575 struct lock_key *key;
1576
1577 fprintf(lock_output, "# output: ");
1578 list_for_each_entry(key, &lock_keys, list)
1579 fprintf(lock_output, "%s%s ", key->header, sep);
1580
1581 switch (aggr_mode) {
1582 case LOCK_AGGR_TASK:
1583 fprintf(lock_output, "%s%s %s\n", "pid", sep,
1584 show_lock_owner ? "owner" : "comm");
1585 break;
1586 case LOCK_AGGR_CALLER:
1587 fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1588 if (verbose > 0)
1589 fprintf(lock_output, "%s %s", sep, "stacktrace");
1590 fprintf(lock_output, "\n");
1591 break;
1592 case LOCK_AGGR_ADDR:
1593 fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1594 break;
1595 case LOCK_AGGR_CGROUP:
1596 fprintf(lock_output, "%s\n", "cgroup");
1597 break;
1598 default:
1599 break;
1600 }
1601 }
1602
print_header(void)1603 static void print_header(void)
1604 {
1605 if (!quiet) {
1606 if (symbol_conf.field_sep)
1607 print_header_csv(symbol_conf.field_sep);
1608 else
1609 print_header_stdio();
1610 }
1611 }
1612
print_lock_stat_stdio(struct lock_contention * con,struct lock_stat * st)1613 static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1614 {
1615 struct lock_key *key;
1616 struct thread *t;
1617 int pid;
1618
1619 list_for_each_entry(key, &lock_keys, list) {
1620 key->print(key, st);
1621 fprintf(lock_output, " ");
1622 }
1623
1624 switch (aggr_mode) {
1625 case LOCK_AGGR_CALLER:
1626 fprintf(lock_output, " %10s %s\n", get_type_flags_name(st->flags), st->name);
1627 break;
1628 case LOCK_AGGR_TASK:
1629 pid = st->addr;
1630 t = perf_session__findnew(session, pid);
1631 fprintf(lock_output, " %10d %s\n",
1632 pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1633 break;
1634 case LOCK_AGGR_ADDR:
1635 fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr,
1636 st->name, get_type_lock_name(st->flags));
1637 break;
1638 case LOCK_AGGR_CGROUP:
1639 fprintf(lock_output, " %s\n", st->name);
1640 break;
1641 default:
1642 break;
1643 }
1644
1645 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1646 struct map *kmap;
1647 struct symbol *sym;
1648 char buf[128];
1649 u64 ip;
1650
1651 for (int i = 0; i < max_stack_depth; i++) {
1652 if (!st->callstack || !st->callstack[i])
1653 break;
1654
1655 ip = st->callstack[i];
1656 sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1657 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1658 fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf);
1659 }
1660 }
1661 }
1662
print_lock_stat_csv(struct lock_contention * con,struct lock_stat * st,const char * sep)1663 static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1664 const char *sep)
1665 {
1666 struct lock_key *key;
1667 struct thread *t;
1668 int pid;
1669
1670 list_for_each_entry(key, &lock_keys, list) {
1671 key->print(key, st);
1672 fprintf(lock_output, "%s ", sep);
1673 }
1674
1675 switch (aggr_mode) {
1676 case LOCK_AGGR_CALLER:
1677 fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name);
1678 if (verbose <= 0)
1679 fprintf(lock_output, "\n");
1680 break;
1681 case LOCK_AGGR_TASK:
1682 pid = st->addr;
1683 t = perf_session__findnew(session, pid);
1684 fprintf(lock_output, "%d%s %s\n", pid, sep,
1685 pid == -1 ? "Unknown" : thread__comm_str(t));
1686 break;
1687 case LOCK_AGGR_ADDR:
1688 fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1689 st->name, sep, get_type_lock_name(st->flags));
1690 break;
1691 case LOCK_AGGR_CGROUP:
1692 fprintf(lock_output, "%s\n",st->name);
1693 break;
1694 default:
1695 break;
1696 }
1697
1698 if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1699 struct map *kmap;
1700 struct symbol *sym;
1701 char buf[128];
1702 u64 ip;
1703
1704 for (int i = 0; i < max_stack_depth; i++) {
1705 if (!st->callstack || !st->callstack[i])
1706 break;
1707
1708 ip = st->callstack[i];
1709 sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1710 get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1711 fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1712 }
1713 fprintf(lock_output, "\n");
1714 }
1715 }
1716
print_lock_stat(struct lock_contention * con,struct lock_stat * st)1717 static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1718 {
1719 if (symbol_conf.field_sep)
1720 print_lock_stat_csv(con, st, symbol_conf.field_sep);
1721 else
1722 print_lock_stat_stdio(con, st);
1723 }
1724
print_footer_stdio(int total,int bad,struct lock_contention_fails * fails)1725 static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1726 {
1727 /* Output for debug, this have to be removed */
1728 int broken = fails->task + fails->stack + fails->time + fails->data;
1729
1730 if (!use_bpf)
1731 print_bad_events(bad, total);
1732
1733 if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1734 return;
1735
1736 total += broken;
1737 fprintf(lock_output, "\n=== output for debug ===\n\n");
1738 fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1739 fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1740
1741 fprintf(lock_output, "histogram of failure reasons\n");
1742 fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1743 fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1744 fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1745 fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1746 }
1747
print_footer_csv(int total,int bad,struct lock_contention_fails * fails,const char * sep)1748 static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1749 const char *sep)
1750 {
1751 /* Output for debug, this have to be removed */
1752 if (use_bpf)
1753 bad = fails->task + fails->stack + fails->time + fails->data;
1754
1755 if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1756 return;
1757
1758 total += bad;
1759 fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1760
1761 if (use_bpf) {
1762 fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1763 fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1764 fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1765 fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1766 } else {
1767 int i;
1768 const char *name[4] = { "acquire", "acquired", "contended", "release" };
1769
1770 for (i = 0; i < BROKEN_MAX; i++)
1771 fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1772 }
1773 fprintf(lock_output, "\n");
1774 }
1775
print_footer(int total,int bad,struct lock_contention_fails * fails)1776 static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1777 {
1778 if (symbol_conf.field_sep)
1779 print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1780 else
1781 print_footer_stdio(total, bad, fails);
1782 }
1783
print_contention_result(struct lock_contention * con)1784 static void print_contention_result(struct lock_contention *con)
1785 {
1786 struct lock_stat *st;
1787 int bad, total, printed;
1788
1789 if (!quiet)
1790 print_header();
1791
1792 bad = total = printed = 0;
1793
1794 while ((st = pop_from_result())) {
1795 total += use_bpf ? st->nr_contended : 1;
1796 if (st->broken)
1797 bad++;
1798
1799 if (!st->wait_time_total)
1800 continue;
1801
1802 print_lock_stat(con, st);
1803
1804 if (++printed >= print_nr_entries)
1805 break;
1806 }
1807
1808 if (print_nr_entries) {
1809 /* update the total/bad stats */
1810 while ((st = pop_from_result())) {
1811 total += use_bpf ? st->nr_contended : 1;
1812 if (st->broken)
1813 bad++;
1814 }
1815 }
1816 /* some entries are collected but hidden by the callstack filter */
1817 total += con->nr_filtered;
1818
1819 print_footer(total, bad, &con->fails);
1820 }
1821
1822 static bool force;
1823
__cmd_report(bool display_info)1824 static int __cmd_report(bool display_info)
1825 {
1826 int err = -EINVAL;
1827 struct perf_tool eops;
1828 struct perf_data data = {
1829 .path = input_name,
1830 .mode = PERF_DATA_MODE_READ,
1831 .force = force,
1832 };
1833
1834 perf_tool__init(&eops, /*ordered_events=*/true);
1835 eops.attr = perf_event__process_attr;
1836 eops.event_update = process_event_update;
1837 eops.sample = process_sample_event;
1838 eops.comm = perf_event__process_comm;
1839 eops.mmap = perf_event__process_mmap;
1840 eops.namespaces = perf_event__process_namespaces;
1841 eops.tracing_data = perf_event__process_tracing_data;
1842 session = perf_session__new(&data, &eops);
1843 if (IS_ERR(session)) {
1844 pr_err("Initializing perf session failed\n");
1845 return PTR_ERR(session);
1846 }
1847
1848 symbol_conf.allow_aliases = true;
1849 symbol__init(&session->header.env);
1850
1851 if (!data.is_pipe) {
1852 if (!perf_session__has_traces(session, "lock record"))
1853 goto out_delete;
1854
1855 if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1856 pr_err("Initializing perf session tracepoint handlers failed\n");
1857 goto out_delete;
1858 }
1859
1860 if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1861 pr_err("Initializing perf session tracepoint handlers failed\n");
1862 goto out_delete;
1863 }
1864 }
1865
1866 if (setup_output_field(false, output_fields))
1867 goto out_delete;
1868
1869 if (select_key(false))
1870 goto out_delete;
1871
1872 if (show_thread_stats)
1873 aggr_mode = LOCK_AGGR_TASK;
1874
1875 err = perf_session__process_events(session);
1876 if (err)
1877 goto out_delete;
1878
1879 setup_pager();
1880 if (display_info) /* used for info subcommand */
1881 dump_info();
1882 else {
1883 combine_result();
1884 sort_result();
1885 print_result();
1886 }
1887
1888 out_delete:
1889 perf_session__delete(session);
1890 return err;
1891 }
1892
sighandler(int sig __maybe_unused)1893 static void sighandler(int sig __maybe_unused)
1894 {
1895 }
1896
check_lock_contention_options(const struct option * options,const char * const * usage)1897 static int check_lock_contention_options(const struct option *options,
1898 const char * const *usage)
1899
1900 {
1901 if (show_thread_stats && show_lock_addrs) {
1902 pr_err("Cannot use thread and addr mode together\n");
1903 parse_options_usage(usage, options, "threads", 0);
1904 parse_options_usage(NULL, options, "lock-addr", 0);
1905 return -1;
1906 }
1907
1908 if (show_lock_owner && !use_bpf) {
1909 pr_err("Lock owners are available only with BPF\n");
1910 parse_options_usage(usage, options, "lock-owner", 0);
1911 parse_options_usage(NULL, options, "use-bpf", 0);
1912 return -1;
1913 }
1914
1915 if (show_lock_owner && show_lock_addrs) {
1916 pr_err("Cannot use owner and addr mode together\n");
1917 parse_options_usage(usage, options, "lock-owner", 0);
1918 parse_options_usage(NULL, options, "lock-addr", 0);
1919 return -1;
1920 }
1921
1922 if (show_lock_cgroups && !use_bpf) {
1923 pr_err("Cgroups are available only with BPF\n");
1924 parse_options_usage(usage, options, "lock-cgroup", 0);
1925 parse_options_usage(NULL, options, "use-bpf", 0);
1926 return -1;
1927 }
1928
1929 if (show_lock_cgroups && show_lock_addrs) {
1930 pr_err("Cannot use cgroup and addr mode together\n");
1931 parse_options_usage(usage, options, "lock-cgroup", 0);
1932 parse_options_usage(NULL, options, "lock-addr", 0);
1933 return -1;
1934 }
1935
1936 if (show_lock_cgroups && show_thread_stats) {
1937 pr_err("Cannot use cgroup and thread mode together\n");
1938 parse_options_usage(usage, options, "lock-cgroup", 0);
1939 parse_options_usage(NULL, options, "threads", 0);
1940 return -1;
1941 }
1942
1943 if (symbol_conf.field_sep) {
1944 if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
1945 strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
1946 strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
1947 pr_err("Cannot use the separator that is already used\n");
1948 parse_options_usage(usage, options, "x", 1);
1949 return -1;
1950 }
1951 }
1952
1953 if (show_lock_owner)
1954 show_thread_stats = true;
1955
1956 return 0;
1957 }
1958
__cmd_contention(int argc,const char ** argv)1959 static int __cmd_contention(int argc, const char **argv)
1960 {
1961 int err = -EINVAL;
1962 struct perf_tool eops;
1963 struct perf_data data = {
1964 .path = input_name,
1965 .mode = PERF_DATA_MODE_READ,
1966 .force = force,
1967 };
1968 struct lock_contention con = {
1969 .target = &target,
1970 .map_nr_entries = bpf_map_entries,
1971 .max_stack = max_stack_depth,
1972 .stack_skip = stack_skip,
1973 .filters = &filters,
1974 .save_callstack = needs_callstack(),
1975 .owner = show_lock_owner,
1976 .cgroups = RB_ROOT,
1977 };
1978
1979 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
1980 if (!lockhash_table)
1981 return -ENOMEM;
1982
1983 con.result = &lockhash_table[0];
1984
1985 perf_tool__init(&eops, /*ordered_events=*/true);
1986 eops.attr = perf_event__process_attr;
1987 eops.event_update = process_event_update;
1988 eops.sample = process_sample_event;
1989 eops.comm = perf_event__process_comm;
1990 eops.mmap = perf_event__process_mmap;
1991 eops.tracing_data = perf_event__process_tracing_data;
1992
1993 session = perf_session__new(use_bpf ? NULL : &data, &eops);
1994 if (IS_ERR(session)) {
1995 pr_err("Initializing perf session failed\n");
1996 err = PTR_ERR(session);
1997 session = NULL;
1998 goto out_delete;
1999 }
2000
2001 con.machine = &session->machines.host;
2002
2003 con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2004 show_lock_addrs ? LOCK_AGGR_ADDR :
2005 show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2006
2007 if (con.aggr_mode == LOCK_AGGR_CALLER)
2008 con.save_callstack = true;
2009
2010 symbol_conf.allow_aliases = true;
2011 symbol__init(&session->header.env);
2012
2013 if (use_bpf) {
2014 err = target__validate(&target);
2015 if (err) {
2016 char errbuf[512];
2017
2018 target__strerror(&target, err, errbuf, 512);
2019 pr_err("%s\n", errbuf);
2020 goto out_delete;
2021 }
2022
2023 signal(SIGINT, sighandler);
2024 signal(SIGCHLD, sighandler);
2025 signal(SIGTERM, sighandler);
2026
2027 con.evlist = evlist__new();
2028 if (con.evlist == NULL) {
2029 err = -ENOMEM;
2030 goto out_delete;
2031 }
2032
2033 err = evlist__create_maps(con.evlist, &target);
2034 if (err < 0)
2035 goto out_delete;
2036
2037 if (argc) {
2038 err = evlist__prepare_workload(con.evlist, &target,
2039 argv, false, NULL);
2040 if (err < 0)
2041 goto out_delete;
2042 }
2043
2044 err = lock_contention_prepare(&con);
2045 if (err < 0) {
2046 pr_err("lock contention BPF setup failed\n");
2047 goto out_delete;
2048 }
2049 } else if (!data.is_pipe) {
2050 if (!perf_session__has_traces(session, "lock record"))
2051 goto out_delete;
2052
2053 if (!evlist__find_evsel_by_str(session->evlist,
2054 "lock:contention_begin")) {
2055 pr_err("lock contention evsel not found\n");
2056 goto out_delete;
2057 }
2058
2059 if (perf_session__set_tracepoints_handlers(session,
2060 contention_tracepoints)) {
2061 pr_err("Initializing perf session tracepoint handlers failed\n");
2062 goto out_delete;
2063 }
2064 }
2065
2066 err = setup_output_field(true, output_fields);
2067 if (err) {
2068 pr_err("Failed to setup output field\n");
2069 goto out_delete;
2070 }
2071
2072 err = select_key(true);
2073 if (err)
2074 goto out_delete;
2075
2076 if (symbol_conf.field_sep) {
2077 int i;
2078 struct lock_key *keys = contention_keys;
2079
2080 /* do not align output in CSV format */
2081 for (i = 0; keys[i].name; i++)
2082 keys[i].len = 0;
2083 }
2084
2085 if (use_bpf) {
2086 lock_contention_start();
2087 if (argc)
2088 evlist__start_workload(con.evlist);
2089
2090 /* wait for signal */
2091 pause();
2092
2093 lock_contention_stop();
2094 lock_contention_read(&con);
2095 } else {
2096 err = perf_session__process_events(session);
2097 if (err)
2098 goto out_delete;
2099 }
2100
2101 setup_pager();
2102
2103 sort_contention_result();
2104 print_contention_result(&con);
2105
2106 out_delete:
2107 lock_filter_finish();
2108 evlist__delete(con.evlist);
2109 lock_contention_finish(&con);
2110 perf_session__delete(session);
2111 zfree(&lockhash_table);
2112 return err;
2113 }
2114
2115
__cmd_record(int argc,const char ** argv)2116 static int __cmd_record(int argc, const char **argv)
2117 {
2118 const char *record_args[] = {
2119 "record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2120 };
2121 const char *callgraph_args[] = {
2122 "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2123 };
2124 unsigned int rec_argc, i, j, ret;
2125 unsigned int nr_tracepoints;
2126 unsigned int nr_callgraph_args = 0;
2127 const char **rec_argv;
2128 bool has_lock_stat = true;
2129
2130 for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2131 if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2132 pr_debug("tracepoint %s is not enabled. "
2133 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2134 lock_tracepoints[i].name);
2135 has_lock_stat = false;
2136 break;
2137 }
2138 }
2139
2140 if (has_lock_stat)
2141 goto setup_args;
2142
2143 for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2144 if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2145 pr_err("tracepoint %s is not enabled.\n",
2146 contention_tracepoints[i].name);
2147 return 1;
2148 }
2149 }
2150
2151 nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2152
2153 setup_args:
2154 rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2155
2156 if (has_lock_stat)
2157 nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2158 else
2159 nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2160
2161 /* factor of 2 is for -e in front of each tracepoint */
2162 rec_argc += 2 * nr_tracepoints;
2163
2164 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2165 if (!rec_argv)
2166 return -ENOMEM;
2167
2168 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2169 rec_argv[i] = record_args[i];
2170
2171 for (j = 0; j < nr_tracepoints; j++) {
2172 rec_argv[i++] = "-e";
2173 rec_argv[i++] = has_lock_stat
2174 ? lock_tracepoints[j].name
2175 : contention_tracepoints[j].name;
2176 }
2177
2178 for (j = 0; j < nr_callgraph_args; j++, i++)
2179 rec_argv[i] = callgraph_args[j];
2180
2181 for (j = 1; j < (unsigned int)argc; j++, i++)
2182 rec_argv[i] = argv[j];
2183
2184 BUG_ON(i != rec_argc);
2185
2186 ret = cmd_record(i, rec_argv);
2187 free(rec_argv);
2188 return ret;
2189 }
2190
parse_map_entry(const struct option * opt,const char * str,int unset __maybe_unused)2191 static int parse_map_entry(const struct option *opt, const char *str,
2192 int unset __maybe_unused)
2193 {
2194 unsigned long *len = (unsigned long *)opt->value;
2195 unsigned long val;
2196 char *endptr;
2197
2198 errno = 0;
2199 val = strtoul(str, &endptr, 0);
2200 if (*endptr != '\0' || errno != 0) {
2201 pr_err("invalid BPF map length: %s\n", str);
2202 return -1;
2203 }
2204
2205 *len = val;
2206 return 0;
2207 }
2208
parse_max_stack(const struct option * opt,const char * str,int unset __maybe_unused)2209 static int parse_max_stack(const struct option *opt, const char *str,
2210 int unset __maybe_unused)
2211 {
2212 unsigned long *len = (unsigned long *)opt->value;
2213 long val;
2214 char *endptr;
2215
2216 errno = 0;
2217 val = strtol(str, &endptr, 0);
2218 if (*endptr != '\0' || errno != 0) {
2219 pr_err("invalid max stack depth: %s\n", str);
2220 return -1;
2221 }
2222
2223 if (val < 0 || val > sysctl__max_stack()) {
2224 pr_err("invalid max stack depth: %ld\n", val);
2225 return -1;
2226 }
2227
2228 *len = val;
2229 return 0;
2230 }
2231
add_lock_type(unsigned int flags)2232 static bool add_lock_type(unsigned int flags)
2233 {
2234 unsigned int *tmp;
2235
2236 tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2237 if (tmp == NULL)
2238 return false;
2239
2240 tmp[filters.nr_types++] = flags;
2241 filters.types = tmp;
2242 return true;
2243 }
2244
parse_lock_type(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2245 static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2246 int unset __maybe_unused)
2247 {
2248 char *s, *tmp, *tok;
2249
2250 s = strdup(str);
2251 if (s == NULL)
2252 return -1;
2253
2254 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2255 bool found = false;
2256
2257 /* `tok` is a flags name if it contains ':'. */
2258 if (strchr(tok, ':')) {
2259 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2260 if (!strcmp(lock_type_table[i].flags_name, tok) &&
2261 add_lock_type(lock_type_table[i].flags)) {
2262 found = true;
2263 break;
2264 }
2265 }
2266
2267 if (!found) {
2268 pr_err("Unknown lock flags name: %s\n", tok);
2269 free(s);
2270 return -1;
2271 }
2272
2273 continue;
2274 }
2275
2276 /*
2277 * Otherwise `tok` is a lock name.
2278 * Single lock name could contain multiple flags.
2279 * Replace alias `pcpu-sem` with actual name `percpu-rwsem.
2280 */
2281 if (!strcmp(tok, "pcpu-sem"))
2282 tok = (char *)"percpu-rwsem";
2283 for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
2284 if (!strcmp(lock_type_table[i].lock_name, tok)) {
2285 if (add_lock_type(lock_type_table[i].flags)) {
2286 found = true;
2287 } else {
2288 free(s);
2289 return -1;
2290 }
2291 }
2292 }
2293
2294 if (!found) {
2295 pr_err("Unknown lock name: %s\n", tok);
2296 free(s);
2297 return -1;
2298 }
2299
2300 }
2301
2302 free(s);
2303 return 0;
2304 }
2305
add_lock_addr(unsigned long addr)2306 static bool add_lock_addr(unsigned long addr)
2307 {
2308 unsigned long *tmp;
2309
2310 tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2311 if (tmp == NULL) {
2312 pr_err("Memory allocation failure\n");
2313 return false;
2314 }
2315
2316 tmp[filters.nr_addrs++] = addr;
2317 filters.addrs = tmp;
2318 return true;
2319 }
2320
add_lock_sym(char * name)2321 static bool add_lock_sym(char *name)
2322 {
2323 char **tmp;
2324 char *sym = strdup(name);
2325
2326 if (sym == NULL) {
2327 pr_err("Memory allocation failure\n");
2328 return false;
2329 }
2330
2331 tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2332 if (tmp == NULL) {
2333 pr_err("Memory allocation failure\n");
2334 free(sym);
2335 return false;
2336 }
2337
2338 tmp[filters.nr_syms++] = sym;
2339 filters.syms = tmp;
2340 return true;
2341 }
2342
add_lock_slab(char * name)2343 static bool add_lock_slab(char *name)
2344 {
2345 char **tmp;
2346 char *sym = strdup(name);
2347
2348 if (sym == NULL) {
2349 pr_err("Memory allocation failure\n");
2350 return false;
2351 }
2352
2353 tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
2354 if (tmp == NULL) {
2355 pr_err("Memory allocation failure\n");
2356 return false;
2357 }
2358
2359 tmp[filters.nr_slabs++] = sym;
2360 filters.slabs = tmp;
2361 return true;
2362 }
2363
parse_lock_addr(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2364 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2365 int unset __maybe_unused)
2366 {
2367 char *s, *tmp, *tok;
2368 int ret = 0;
2369 u64 addr;
2370
2371 s = strdup(str);
2372 if (s == NULL)
2373 return -1;
2374
2375 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2376 char *end;
2377
2378 addr = strtoul(tok, &end, 16);
2379 if (*end == '\0') {
2380 if (!add_lock_addr(addr)) {
2381 ret = -1;
2382 break;
2383 }
2384 continue;
2385 }
2386
2387 if (*tok == '&') {
2388 if (!add_lock_slab(tok + 1)) {
2389 ret = -1;
2390 break;
2391 }
2392 continue;
2393 }
2394
2395 /*
2396 * At this moment, we don't have kernel symbols. Save the symbols
2397 * in a separate list and resolve them to addresses later.
2398 */
2399 if (!add_lock_sym(tok)) {
2400 ret = -1;
2401 break;
2402 }
2403 }
2404
2405 free(s);
2406 return ret;
2407 }
2408
parse_output(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2409 static int parse_output(const struct option *opt __maybe_unused, const char *str,
2410 int unset __maybe_unused)
2411 {
2412 const char **name = (const char **)opt->value;
2413
2414 if (str == NULL)
2415 return -1;
2416
2417 lock_output = fopen(str, "w");
2418 if (lock_output == NULL) {
2419 pr_err("Cannot open %s\n", str);
2420 return -1;
2421 }
2422
2423 *name = str;
2424 return 0;
2425 }
2426
add_lock_cgroup(char * name)2427 static bool add_lock_cgroup(char *name)
2428 {
2429 u64 *tmp;
2430 struct cgroup *cgrp;
2431
2432 cgrp = cgroup__new(name, /*do_open=*/false);
2433 if (cgrp == NULL) {
2434 pr_err("Failed to create cgroup: %s\n", name);
2435 return false;
2436 }
2437
2438 if (read_cgroup_id(cgrp) < 0) {
2439 pr_err("Failed to read cgroup id for %s\n", name);
2440 cgroup__put(cgrp);
2441 return false;
2442 }
2443
2444 tmp = realloc(filters.cgrps, (filters.nr_cgrps + 1) * sizeof(*filters.cgrps));
2445 if (tmp == NULL) {
2446 pr_err("Memory allocation failure\n");
2447 return false;
2448 }
2449
2450 tmp[filters.nr_cgrps++] = cgrp->id;
2451 filters.cgrps = tmp;
2452 cgroup__put(cgrp);
2453 return true;
2454 }
2455
parse_cgroup_filter(const struct option * opt __maybe_unused,const char * str,int unset __maybe_unused)2456 static int parse_cgroup_filter(const struct option *opt __maybe_unused, const char *str,
2457 int unset __maybe_unused)
2458 {
2459 char *s, *tmp, *tok;
2460 int ret = 0;
2461
2462 s = strdup(str);
2463 if (s == NULL)
2464 return -1;
2465
2466 for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2467 if (!add_lock_cgroup(tok)) {
2468 ret = -1;
2469 break;
2470 }
2471 }
2472
2473 free(s);
2474 return ret;
2475 }
2476
cmd_lock(int argc,const char ** argv)2477 int cmd_lock(int argc, const char **argv)
2478 {
2479 const struct option lock_options[] = {
2480 OPT_STRING('i', "input", &input_name, "file", "input file name"),
2481 OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2482 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2483 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2484 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2485 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2486 "file", "vmlinux pathname"),
2487 OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2488 "file", "kallsyms pathname"),
2489 OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2490 OPT_END()
2491 };
2492
2493 const struct option info_options[] = {
2494 OPT_BOOLEAN('t', "threads", &info_threads,
2495 "dump the thread list in perf.data"),
2496 OPT_BOOLEAN('m', "map", &info_map,
2497 "dump the map of lock instances (address:name table)"),
2498 OPT_PARENT(lock_options)
2499 };
2500
2501 const struct option report_options[] = {
2502 OPT_STRING('k', "key", &sort_key, "acquired",
2503 "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2504 OPT_STRING('F', "field", &output_fields, NULL,
2505 "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2506 /* TODO: type */
2507 OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2508 "combine locks in the same class"),
2509 OPT_BOOLEAN('t', "threads", &show_thread_stats,
2510 "show per-thread lock stats"),
2511 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2512 OPT_PARENT(lock_options)
2513 };
2514
2515 struct option contention_options[] = {
2516 OPT_STRING('k', "key", &sort_key, "wait_total",
2517 "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2518 OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2519 "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2520 OPT_BOOLEAN('t', "threads", &show_thread_stats,
2521 "show per-thread lock stats"),
2522 OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2523 OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2524 "System-wide collection from all CPUs"),
2525 OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2526 "List of cpus to monitor"),
2527 OPT_STRING('p', "pid", &target.pid, "pid",
2528 "Trace on existing process id"),
2529 OPT_STRING(0, "tid", &target.tid, "tid",
2530 "Trace on existing thread id (exclusive to --pid)"),
2531 OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2532 "Max number of BPF map entries", parse_map_entry),
2533 OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2534 "Set the maximum stack depth when collecting lock contention, "
2535 "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2536 OPT_INTEGER(0, "stack-skip", &stack_skip,
2537 "Set the number of stack depth to skip when finding a lock caller, "
2538 "Default: " __stringify(CONTENTION_STACK_SKIP)),
2539 OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2540 OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2541 OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2542 "Filter specific type of locks", parse_lock_type),
2543 OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2544 "Filter specific address/symbol of locks", parse_lock_addr),
2545 OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2546 "Filter specific function in the callstack", parse_call_stack),
2547 OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2548 OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2549 "print result in CSV format with custom separator"),
2550 OPT_BOOLEAN(0, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2551 OPT_CALLBACK('G', "cgroup-filter", NULL, "CGROUPS",
2552 "Filter specific cgroups", parse_cgroup_filter),
2553 OPT_PARENT(lock_options)
2554 };
2555
2556 const char * const info_usage[] = {
2557 "perf lock info [<options>]",
2558 NULL
2559 };
2560 const char *const lock_subcommands[] = { "record", "report", "script",
2561 "info", "contention", NULL };
2562 const char *lock_usage[] = {
2563 NULL,
2564 NULL
2565 };
2566 const char * const report_usage[] = {
2567 "perf lock report [<options>]",
2568 NULL
2569 };
2570 const char * const contention_usage[] = {
2571 "perf lock contention [<options>]",
2572 NULL
2573 };
2574 unsigned int i;
2575 int rc = 0;
2576
2577 lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2578 if (!lockhash_table)
2579 return -ENOMEM;
2580
2581 for (i = 0; i < LOCKHASH_SIZE; i++)
2582 INIT_HLIST_HEAD(lockhash_table + i);
2583
2584 lock_output = stderr;
2585 argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2586 lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2587 if (!argc)
2588 usage_with_options(lock_usage, lock_options);
2589
2590 if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2591 return __cmd_record(argc, argv);
2592 } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2593 trace_handler = &report_lock_ops;
2594 if (argc) {
2595 argc = parse_options(argc, argv,
2596 report_options, report_usage, 0);
2597 if (argc)
2598 usage_with_options(report_usage, report_options);
2599 }
2600 rc = __cmd_report(false);
2601 } else if (!strcmp(argv[0], "script")) {
2602 /* Aliased to 'perf script' */
2603 rc = cmd_script(argc, argv);
2604 } else if (!strcmp(argv[0], "info")) {
2605 if (argc) {
2606 argc = parse_options(argc, argv,
2607 info_options, info_usage, 0);
2608 if (argc)
2609 usage_with_options(info_usage, info_options);
2610 }
2611
2612 /* If neither threads nor map requested, display both */
2613 if (!info_threads && !info_map) {
2614 info_threads = true;
2615 info_map = true;
2616 }
2617
2618 /* recycling report_lock_ops */
2619 trace_handler = &report_lock_ops;
2620 rc = __cmd_report(true);
2621 } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2622 trace_handler = &contention_lock_ops;
2623 sort_key = "wait_total";
2624 output_fields = "contended,wait_total,wait_max,avg_wait";
2625
2626 #ifndef HAVE_BPF_SKEL
2627 set_option_nobuild(contention_options, 'b', "use-bpf",
2628 "no BUILD_BPF_SKEL=1", false);
2629 #endif
2630 if (argc) {
2631 argc = parse_options(argc, argv, contention_options,
2632 contention_usage, 0);
2633 }
2634
2635 if (check_lock_contention_options(contention_options,
2636 contention_usage) < 0)
2637 return -1;
2638
2639 rc = __cmd_contention(argc, argv);
2640 } else {
2641 usage_with_options(lock_usage, lock_options);
2642 }
2643
2644 /* free usage string allocated by parse_options_subcommand */
2645 free((void *)lock_usage[0]);
2646
2647 zfree(&lockhash_table);
2648 return rc;
2649 }
2650