1#!/usr/bin/env python3
2# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
3"""Convert directories of JSON events to C code."""
4import argparse
5import csv
6from functools import lru_cache
7import json
8import metric
9import os
10import sys
11from typing import (Callable, Dict, Optional, Sequence, Set, Tuple)
12import collections
13
14# Global command line arguments.
15_args = None
16# List of regular event tables.
17_event_tables = []
18# List of event tables generated from "/sys" directories.
19_sys_event_tables = []
20# List of regular metric tables.
21_metric_tables = []
22# List of metric tables generated from "/sys" directories.
23_sys_metric_tables = []
24# Mapping between sys event table names and sys metric table names.
25_sys_event_table_to_metric_table_mapping = {}
26# Map from an event name to an architecture standard
27# JsonEvent. Architecture standard events are in json files in the top
28# f'{_args.starting_dir}/{_args.arch}' directory.
29_arch_std_events = {}
30# Events to write out when the table is closed
31_pending_events = []
32# Name of events table to be written out
33_pending_events_tblname = None
34# Metrics to write out when the table is closed
35_pending_metrics = []
36# Name of metrics table to be written out
37_pending_metrics_tblname = None
38# Global BigCString shared by all structures.
39_bcs = None
40# Map from the name of a metric group to a description of the group.
41_metricgroups = {}
42# Order specific JsonEvent attributes will be visited.
43_json_event_attributes = [
44    # cmp_sevent related attributes.
45    'name', 'topic', 'desc',
46    # Seems useful, put it early.
47    'event',
48    # Short things in alphabetical order.
49    'compat', 'deprecated', 'perpkg', 'unit',
50    # Longer things (the last won't be iterated over during decompress).
51    'long_desc'
52]
53
54# Attributes that are in pmu_metric rather than pmu_event.
55_json_metric_attributes = [
56    'metric_name', 'metric_group', 'metric_expr', 'metric_threshold',
57    'desc', 'long_desc', 'unit', 'compat', 'metricgroup_no_group',
58    'default_metricgroup_name', 'aggr_mode', 'event_grouping'
59]
60# Attributes that are bools or enum int values, encoded as '0', '1',...
61_json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg']
62
63def removesuffix(s: str, suffix: str) -> str:
64  """Remove the suffix from a string
65
66  The removesuffix function is added to str in Python 3.9. We aim for 3.6
67  compatibility and so provide our own function here.
68  """
69  return s[0:-len(suffix)] if s.endswith(suffix) else s
70
71
72def file_name_to_table_name(prefix: str, parents: Sequence[str],
73                            dirname: str) -> str:
74  """Generate a C table name from directory names."""
75  tblname = prefix
76  for p in parents:
77    tblname += '_' + p
78  tblname += '_' + dirname
79  return tblname.replace('-', '_')
80
81
82def c_len(s: str) -> int:
83  """Return the length of s a C string
84
85  This doesn't handle all escape characters properly. It first assumes
86  all \\ are for escaping, it then adjusts as it will have over counted
87  \\. The code uses \000 rather than \0 as a terminator as an adjacent
88  number would be folded into a string of \0 (ie. "\0" + "5" doesn't
89  equal a terminator followed by the number 5 but the escape of
90  \05). The code adjusts for \000 but not properly for all octal, hex
91  or unicode values.
92  """
93  try:
94    utf = s.encode(encoding='utf-8',errors='strict')
95  except:
96    print(f'broken string {s}')
97    raise
98  return len(utf) - utf.count(b'\\') + utf.count(b'\\\\') - (utf.count(b'\\000') * 2)
99
100class BigCString:
101  """A class to hold many strings concatenated together.
102
103  Generating a large number of stand-alone C strings creates a large
104  number of relocations in position independent code. The BigCString
105  is a helper for this case. It builds a single string which within it
106  are all the other C strings (to avoid memory issues the string
107  itself is held as a list of strings). The offsets within the big
108  string are recorded and when stored to disk these don't need
109  relocation. To reduce the size of the string further, identical
110  strings are merged. If a longer string ends-with the same value as a
111  shorter string, these entries are also merged.
112  """
113  strings: Set[str]
114  big_string: Sequence[str]
115  offsets: Dict[str, int]
116  insert_number: int
117  insert_point: Dict[str, int]
118  metrics: Set[str]
119
120  def __init__(self):
121    self.strings = set()
122    self.insert_number = 0;
123    self.insert_point = {}
124    self.metrics = set()
125
126  def add(self, s: str, metric: bool) -> None:
127    """Called to add to the big string."""
128    if s not in self.strings:
129      self.strings.add(s)
130      self.insert_point[s] = self.insert_number
131      self.insert_number += 1
132      if metric:
133        self.metrics.add(s)
134
135  def compute(self) -> None:
136    """Called once all strings are added to compute the string and offsets."""
137
138    folded_strings = {}
139    # Determine if two strings can be folded, ie. let 1 string use the
140    # end of another. First reverse all strings and sort them.
141    sorted_reversed_strings = sorted([x[::-1] for x in self.strings])
142
143    # Strings 'xyz' and 'yz' will now be [ 'zy', 'zyx' ]. Scan forward
144    # for each string to see if there is a better candidate to fold it
145    # into, in the example rather than using 'yz' we can use'xyz' at
146    # an offset of 1. We record which string can be folded into which
147    # in folded_strings, we don't need to record the offset as it is
148    # trivially computed from the string lengths.
149    for pos,s in enumerate(sorted_reversed_strings):
150      best_pos = pos
151      for check_pos in range(pos + 1, len(sorted_reversed_strings)):
152        if sorted_reversed_strings[check_pos].startswith(s):
153          best_pos = check_pos
154        else:
155          break
156      if pos != best_pos:
157        folded_strings[s[::-1]] = sorted_reversed_strings[best_pos][::-1]
158
159    # Compute reverse mappings for debugging.
160    fold_into_strings = collections.defaultdict(set)
161    for key, val in folded_strings.items():
162      if key != val:
163        fold_into_strings[val].add(key)
164
165    # big_string_offset is the current location within the C string
166    # being appended to - comments, etc. don't count. big_string is
167    # the string contents represented as a list. Strings are immutable
168    # in Python and so appending to one causes memory issues, while
169    # lists are mutable.
170    big_string_offset = 0
171    self.big_string = []
172    self.offsets = {}
173
174    def string_cmp_key(s: str) -> Tuple[bool, int, str]:
175      return (s in self.metrics, self.insert_point[s], s)
176
177    # Emit all strings that aren't folded in a sorted manner.
178    for s in sorted(self.strings, key=string_cmp_key):
179      if s not in folded_strings:
180        self.offsets[s] = big_string_offset
181        self.big_string.append(f'/* offset={big_string_offset} */ "')
182        self.big_string.append(s)
183        self.big_string.append('"')
184        if s in fold_into_strings:
185          self.big_string.append(' /* also: ' + ', '.join(fold_into_strings[s]) + ' */')
186        self.big_string.append('\n')
187        big_string_offset += c_len(s)
188        continue
189
190    # Compute the offsets of the folded strings.
191    for s in folded_strings.keys():
192      assert s not in self.offsets
193      folded_s = folded_strings[s]
194      self.offsets[s] = self.offsets[folded_s] + c_len(folded_s) - c_len(s)
195
196_bcs = BigCString()
197
198class JsonEvent:
199  """Representation of an event loaded from a json file dictionary."""
200
201  def __init__(self, jd: dict):
202    """Constructor passed the dictionary of parsed json values."""
203
204    def llx(x: int) -> str:
205      """Convert an int to a string similar to a printf modifier of %#llx."""
206      return str(x) if x >= 0 and x < 10 else hex(x)
207
208    def fixdesc(s: str) -> str:
209      """Fix formatting issue for the desc string."""
210      if s is None:
211        return None
212      return removesuffix(removesuffix(removesuffix(s, '.  '),
213                                       '. '), '.').replace('\n', '\\n').replace(
214                                           '\"', '\\"').replace('\r', '\\r')
215
216    def convert_aggr_mode(aggr_mode: str) -> Optional[str]:
217      """Returns the aggr_mode_class enum value associated with the JSON string."""
218      if not aggr_mode:
219        return None
220      aggr_mode_to_enum = {
221          'PerChip': '1',
222          'PerCore': '2',
223      }
224      return aggr_mode_to_enum[aggr_mode]
225
226    def convert_metric_constraint(metric_constraint: str) -> Optional[str]:
227      """Returns the metric_event_groups enum value associated with the JSON string."""
228      if not metric_constraint:
229        return None
230      metric_constraint_to_enum = {
231          'NO_GROUP_EVENTS': '1',
232          'NO_GROUP_EVENTS_NMI': '2',
233          'NO_NMI_WATCHDOG': '2',
234          'NO_GROUP_EVENTS_SMT': '3',
235      }
236      return metric_constraint_to_enum[metric_constraint]
237
238    def lookup_msr(num: str) -> Optional[str]:
239      """Converts the msr number, or first in a list to the appropriate event field."""
240      if not num:
241        return None
242      msrmap = {
243          0x3F6: 'ldlat=',
244          0x1A6: 'offcore_rsp=',
245          0x1A7: 'offcore_rsp=',
246          0x3F7: 'frontend=',
247      }
248      return msrmap[int(num.split(',', 1)[0], 0)]
249
250    def real_event(name: str, event: str) -> Optional[str]:
251      """Convert well known event names to an event string otherwise use the event argument."""
252      fixed = {
253          'inst_retired.any': 'event=0xc0,period=2000003',
254          'inst_retired.any_p': 'event=0xc0,period=2000003',
255          'cpu_clk_unhalted.ref': 'event=0x0,umask=0x03,period=2000003',
256          'cpu_clk_unhalted.thread': 'event=0x3c,period=2000003',
257          'cpu_clk_unhalted.core': 'event=0x3c,period=2000003',
258          'cpu_clk_unhalted.thread_any': 'event=0x3c,any=1,period=2000003',
259      }
260      if not name:
261        return None
262      if name.lower() in fixed:
263        return fixed[name.lower()]
264      return event
265
266    def unit_to_pmu(unit: str) -> Optional[str]:
267      """Convert a JSON Unit to Linux PMU name."""
268      if not unit:
269        return 'default_core'
270      # Comment brought over from jevents.c:
271      # it's not realistic to keep adding these, we need something more scalable ...
272      table = {
273          'CBO': 'uncore_cbox',
274          'QPI LL': 'uncore_qpi',
275          'SBO': 'uncore_sbox',
276          'iMPH-U': 'uncore_arb',
277          'CPU-M-CF': 'cpum_cf',
278          'CPU-M-SF': 'cpum_sf',
279          'PAI-CRYPTO' : 'pai_crypto',
280          'PAI-EXT' : 'pai_ext',
281          'UPI LL': 'uncore_upi',
282          'hisi_sicl,cpa': 'hisi_sicl,cpa',
283          'hisi_sccl,ddrc': 'hisi_sccl,ddrc',
284          'hisi_sccl,hha': 'hisi_sccl,hha',
285          'hisi_sccl,l3c': 'hisi_sccl,l3c',
286          'imx8_ddr': 'imx8_ddr',
287          'imx9_ddr': 'imx9_ddr',
288          'L3PMC': 'amd_l3',
289          'DFPMC': 'amd_df',
290          'UMCPMC': 'amd_umc',
291          'cpu_core': 'cpu_core',
292          'cpu_atom': 'cpu_atom',
293          'ali_drw': 'ali_drw',
294          'arm_cmn': 'arm_cmn',
295          'tool': 'tool',
296      }
297      return table[unit] if unit in table else f'uncore_{unit.lower()}'
298
299    def is_zero(val: str) -> bool:
300        try:
301            if val.startswith('0x'):
302                return int(val, 16) == 0
303            else:
304                return int(val) == 0
305        except e:
306            return False
307
308    def canonicalize_value(val: str) -> str:
309        try:
310            if val.startswith('0x'):
311                return llx(int(val, 16))
312            return str(int(val))
313        except e:
314            return val
315
316    eventcode = 0
317    if 'EventCode' in jd:
318      eventcode = int(jd['EventCode'].split(',', 1)[0], 0)
319    if 'ExtSel' in jd:
320      eventcode |= int(jd['ExtSel']) << 8
321    configcode = int(jd['ConfigCode'], 0) if 'ConfigCode' in jd else None
322    eventidcode = int(jd['EventidCode'], 0) if 'EventidCode' in jd else None
323    self.name = jd['EventName'].lower() if 'EventName' in jd else None
324    self.topic = ''
325    self.compat = jd.get('Compat')
326    self.desc = fixdesc(jd.get('BriefDescription'))
327    self.long_desc = fixdesc(jd.get('PublicDescription'))
328    precise = jd.get('PEBS')
329    msr = lookup_msr(jd.get('MSRIndex'))
330    msrval = jd.get('MSRValue')
331    extra_desc = ''
332    if 'Data_LA' in jd:
333      extra_desc += '  Supports address when precise'
334      if 'Errata' in jd:
335        extra_desc += '.'
336    if 'Errata' in jd:
337      extra_desc += '  Spec update: ' + jd['Errata']
338    self.pmu = unit_to_pmu(jd.get('Unit'))
339    filter = jd.get('Filter')
340    self.unit = jd.get('ScaleUnit')
341    self.perpkg = jd.get('PerPkg')
342    self.aggr_mode = convert_aggr_mode(jd.get('AggregationMode'))
343    self.deprecated = jd.get('Deprecated')
344    self.metric_name = jd.get('MetricName')
345    self.metric_group = jd.get('MetricGroup')
346    self.metricgroup_no_group = jd.get('MetricgroupNoGroup')
347    self.default_metricgroup_name = jd.get('DefaultMetricgroupName')
348    self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint'))
349    self.metric_expr = None
350    if 'MetricExpr' in jd:
351      self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
352    # Note, the metric formula for the threshold isn't parsed as the &
353    # and > have incorrect precedence.
354    self.metric_threshold = jd.get('MetricThreshold')
355
356    arch_std = jd.get('ArchStdEvent')
357    if precise and self.desc and '(Precise Event)' not in self.desc:
358      extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise '
359                                                                 'event)')
360    event = None
361    if configcode is not None:
362      event = f'config={llx(configcode)}'
363    elif eventidcode is not None:
364      event = f'eventid={llx(eventidcode)}'
365    else:
366      event = f'event={llx(eventcode)}'
367    event_fields = [
368        ('AnyThread', 'any='),
369        ('PortMask', 'ch_mask='),
370        ('CounterMask', 'cmask='),
371        ('EdgeDetect', 'edge='),
372        ('FCMask', 'fc_mask='),
373        ('Invert', 'inv='),
374        ('SampleAfterValue', 'period='),
375        ('UMask', 'umask='),
376        ('NodeType', 'type='),
377        ('RdWrMask', 'rdwrmask='),
378        ('EnAllCores', 'enallcores='),
379        ('EnAllSlices', 'enallslices='),
380        ('SliceId', 'sliceid='),
381        ('ThreadMask', 'threadmask='),
382    ]
383    for key, value in event_fields:
384      if key in jd and not is_zero(jd[key]):
385        event += f',{value}{canonicalize_value(jd[key])}'
386    if filter:
387      event += f',{filter}'
388    if msr:
389      event += f',{msr}{msrval}'
390    if self.desc and extra_desc:
391      self.desc += extra_desc
392    if self.long_desc and extra_desc:
393      self.long_desc += extra_desc
394    if arch_std:
395      if arch_std.lower() in _arch_std_events:
396        event = _arch_std_events[arch_std.lower()].event
397        # Copy from the architecture standard event to self for undefined fields.
398        for attr, value in _arch_std_events[arch_std.lower()].__dict__.items():
399          if hasattr(self, attr) and not getattr(self, attr):
400            setattr(self, attr, value)
401      else:
402        raise argparse.ArgumentTypeError('Cannot find arch std event:', arch_std)
403
404    self.event = real_event(self.name, event)
405
406  def __repr__(self) -> str:
407    """String representation primarily for debugging."""
408    s = '{\n'
409    for attr, value in self.__dict__.items():
410      if value:
411        s += f'\t{attr} = {value},\n'
412    return s + '}'
413
414  def build_c_string(self, metric: bool) -> str:
415    s = ''
416    for attr in _json_metric_attributes if metric else _json_event_attributes:
417      x = getattr(self, attr)
418      if metric and x and attr == 'metric_expr':
419        # Convert parsed metric expressions into a string. Slashes
420        # must be doubled in the file.
421        x = x.ToPerfJson().replace('\\', '\\\\')
422      if metric and x and attr == 'metric_threshold':
423        x = x.replace('\\', '\\\\')
424      if attr in _json_enum_attributes:
425        s += x if x else '0'
426      else:
427        s += f'{x}\\000' if x else '\\000'
428    return s
429
430  def to_c_string(self, metric: bool) -> str:
431    """Representation of the event as a C struct initializer."""
432
433    def fix_comment(s: str) -> str:
434        return s.replace('*/', r'\*\/')
435
436    s = self.build_c_string(metric)
437    return f'{{ { _bcs.offsets[s] } }}, /* {fix_comment(s)} */\n'
438
439
440@lru_cache(maxsize=None)
441def read_json_events(path: str, topic: str) -> Sequence[JsonEvent]:
442  """Read json events from the specified file."""
443  try:
444    events = json.load(open(path), object_hook=JsonEvent)
445  except BaseException as err:
446    print(f"Exception processing {path}")
447    raise
448  metrics: list[Tuple[str, str, metric.Expression]] = []
449  for event in events:
450    event.topic = topic
451    if event.metric_name and '-' not in event.metric_name:
452      metrics.append((event.pmu, event.metric_name, event.metric_expr))
453  updates = metric.RewriteMetricsInTermsOfOthers(metrics)
454  if updates:
455    for event in events:
456      if event.metric_name in updates:
457        # print(f'Updated {event.metric_name} from\n"{event.metric_expr}"\n'
458        #       f'to\n"{updates[event.metric_name]}"')
459        event.metric_expr = updates[event.metric_name]
460
461  return events
462
463def preprocess_arch_std_files(archpath: str) -> None:
464  """Read in all architecture standard events."""
465  global _arch_std_events
466  for item in os.scandir(archpath):
467    if not item.is_file() or not item.name.endswith('.json'):
468      continue
469    try:
470      for event in read_json_events(item.path, topic=''):
471        if event.name:
472          _arch_std_events[event.name.lower()] = event
473        if event.metric_name:
474          _arch_std_events[event.metric_name.lower()] = event
475    except Exception as e:
476        raise RuntimeError(f'Failure processing \'{item.name}\' in \'{archpath}\'') from e
477
478
479def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
480  """Add contents of file to _pending_events table."""
481  for e in read_json_events(item.path, topic):
482    if e.name:
483      _pending_events.append(e)
484    if e.metric_name:
485      _pending_metrics.append(e)
486
487
488def print_pending_events() -> None:
489  """Optionally close events table."""
490
491  def event_cmp_key(j: JsonEvent) -> Tuple[str, str, bool, str, str]:
492    def fix_none(s: Optional[str]) -> str:
493      if s is None:
494        return ''
495      return s
496
497    return (fix_none(j.pmu).replace(',','_'), fix_none(j.name), j.desc is not None, fix_none(j.topic),
498            fix_none(j.metric_name))
499
500  global _pending_events
501  if not _pending_events:
502    return
503
504  global _pending_events_tblname
505  if _pending_events_tblname.endswith('_sys'):
506    global _sys_event_tables
507    _sys_event_tables.append(_pending_events_tblname)
508  else:
509    global event_tables
510    _event_tables.append(_pending_events_tblname)
511
512  first = True
513  last_pmu = None
514  last_name = None
515  pmus = set()
516  for event in sorted(_pending_events, key=event_cmp_key):
517    if last_pmu and last_pmu == event.pmu:
518      assert event.name != last_name, f"Duplicate event: {last_pmu}/{last_name}/ in {_pending_events_tblname}"
519    if event.pmu != last_pmu:
520      if not first:
521        _args.output_file.write('};\n')
522      pmu_name = event.pmu.replace(',', '_')
523      _args.output_file.write(
524          f'static const struct compact_pmu_event {_pending_events_tblname}_{pmu_name}[] = {{\n')
525      first = False
526      last_pmu = event.pmu
527      pmus.add((event.pmu, pmu_name))
528
529    _args.output_file.write(event.to_c_string(metric=False))
530    last_name = event.name
531  _pending_events = []
532
533  _args.output_file.write(f"""
534}};
535
536const struct pmu_table_entry {_pending_events_tblname}[] = {{
537""")
538  for (pmu, tbl_pmu) in sorted(pmus):
539    pmu_name = f"{pmu}\\000"
540    _args.output_file.write(f"""{{
541     .entries = {_pending_events_tblname}_{tbl_pmu},
542     .num_entries = ARRAY_SIZE({_pending_events_tblname}_{tbl_pmu}),
543     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
544}},
545""")
546  _args.output_file.write('};\n\n')
547
548def print_pending_metrics() -> None:
549  """Optionally close metrics table."""
550
551  def metric_cmp_key(j: JsonEvent) -> Tuple[bool, str, str]:
552    def fix_none(s: Optional[str]) -> str:
553      if s is None:
554        return ''
555      return s
556
557    return (j.desc is not None, fix_none(j.pmu), fix_none(j.metric_name))
558
559  global _pending_metrics
560  if not _pending_metrics:
561    return
562
563  global _pending_metrics_tblname
564  if _pending_metrics_tblname.endswith('_sys'):
565    global _sys_metric_tables
566    _sys_metric_tables.append(_pending_metrics_tblname)
567  else:
568    global metric_tables
569    _metric_tables.append(_pending_metrics_tblname)
570
571  first = True
572  last_pmu = None
573  pmus = set()
574  for metric in sorted(_pending_metrics, key=metric_cmp_key):
575    if metric.pmu != last_pmu:
576      if not first:
577        _args.output_file.write('};\n')
578      pmu_name = metric.pmu.replace(',', '_')
579      _args.output_file.write(
580          f'static const struct compact_pmu_event {_pending_metrics_tblname}_{pmu_name}[] = {{\n')
581      first = False
582      last_pmu = metric.pmu
583      pmus.add((metric.pmu, pmu_name))
584
585    _args.output_file.write(metric.to_c_string(metric=True))
586  _pending_metrics = []
587
588  _args.output_file.write(f"""
589}};
590
591const struct pmu_table_entry {_pending_metrics_tblname}[] = {{
592""")
593  for (pmu, tbl_pmu) in sorted(pmus):
594    pmu_name = f"{pmu}\\000"
595    _args.output_file.write(f"""{{
596     .entries = {_pending_metrics_tblname}_{tbl_pmu},
597     .num_entries = ARRAY_SIZE({_pending_metrics_tblname}_{tbl_pmu}),
598     .pmu_name = {{ {_bcs.offsets[pmu_name]} /* {pmu_name} */ }},
599}},
600""")
601  _args.output_file.write('};\n\n')
602
603def get_topic(topic: str) -> str:
604  if topic.endswith('metrics.json'):
605    return 'metrics'
606  return removesuffix(topic, '.json').replace('-', ' ')
607
608def preprocess_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
609
610  if item.is_dir():
611    return
612
613  # base dir or too deep
614  level = len(parents)
615  if level == 0 or level > 4:
616    return
617
618  # Ignore other directories. If the file name does not have a .json
619  # extension, ignore it. It could be a readme.txt for instance.
620  if not item.is_file() or not item.name.endswith('.json'):
621    return
622
623  if item.name == 'metricgroups.json':
624    metricgroup_descriptions = json.load(open(item.path))
625    for mgroup in metricgroup_descriptions:
626      assert len(mgroup) > 1, parents
627      description = f"{metricgroup_descriptions[mgroup]}\\000"
628      mgroup = f"{mgroup}\\000"
629      _bcs.add(mgroup, metric=True)
630      _bcs.add(description, metric=True)
631      _metricgroups[mgroup] = description
632    return
633
634  topic = get_topic(item.name)
635  for event in read_json_events(item.path, topic):
636    pmu_name = f"{event.pmu}\\000"
637    if event.name:
638      _bcs.add(pmu_name, metric=False)
639      _bcs.add(event.build_c_string(metric=False), metric=False)
640    if event.metric_name:
641      _bcs.add(pmu_name, metric=True)
642      _bcs.add(event.build_c_string(metric=True), metric=True)
643
644def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None:
645  """Process a JSON file during the main walk."""
646  def is_leaf_dir_ignoring_sys(path: str) -> bool:
647    for item in os.scandir(path):
648      if item.is_dir() and item.name != 'sys':
649        return False
650    return True
651
652  # Model directories are leaves (ignoring possible sys
653  # directories). The FTW will walk into the directory next. Flush
654  # pending events and metrics and update the table names for the new
655  # model directory.
656  if item.is_dir() and is_leaf_dir_ignoring_sys(item.path):
657    print_pending_events()
658    print_pending_metrics()
659
660    global _pending_events_tblname
661    _pending_events_tblname = file_name_to_table_name('pmu_events_', parents, item.name)
662    global _pending_metrics_tblname
663    _pending_metrics_tblname = file_name_to_table_name('pmu_metrics_', parents, item.name)
664
665    if item.name == 'sys':
666      _sys_event_table_to_metric_table_mapping[_pending_events_tblname] = _pending_metrics_tblname
667    return
668
669  # base dir or too deep
670  level = len(parents)
671  if level == 0 or level > 4:
672    return
673
674  # Ignore other directories. If the file name does not have a .json
675  # extension, ignore it. It could be a readme.txt for instance.
676  if not item.is_file() or not item.name.endswith('.json') or item.name == 'metricgroups.json':
677    return
678
679  add_events_table_entries(item, get_topic(item.name))
680
681
682def print_mapping_table(archs: Sequence[str]) -> None:
683  """Read the mapfile and generate the struct from cpuid string to event table."""
684  _args.output_file.write("""
685/* Struct used to make the PMU event table implementation opaque to callers. */
686struct pmu_events_table {
687        const struct pmu_table_entry *pmus;
688        uint32_t num_pmus;
689};
690
691/* Struct used to make the PMU metric table implementation opaque to callers. */
692struct pmu_metrics_table {
693        const struct pmu_table_entry *pmus;
694        uint32_t num_pmus;
695};
696
697/*
698 * Map a CPU to its table of PMU events. The CPU is identified by the
699 * cpuid field, which is an arch-specific identifier for the CPU.
700 * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
701 * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
702 *
703 * The  cpuid can contain any character other than the comma.
704 */
705struct pmu_events_map {
706        const char *arch;
707        const char *cpuid;
708        struct pmu_events_table event_table;
709        struct pmu_metrics_table metric_table;
710};
711
712/*
713 * Global table mapping each known CPU for the architecture to its
714 * table of PMU events.
715 */
716const struct pmu_events_map pmu_events_map[] = {
717""")
718  for arch in archs:
719    if arch == 'test':
720      _args.output_file.write("""{
721\t.arch = "testarch",
722\t.cpuid = "testcpu",
723\t.event_table = {
724\t\t.pmus = pmu_events__test_soc_cpu,
725\t\t.num_pmus = ARRAY_SIZE(pmu_events__test_soc_cpu),
726\t},
727\t.metric_table = {
728\t\t.pmus = pmu_metrics__test_soc_cpu,
729\t\t.num_pmus = ARRAY_SIZE(pmu_metrics__test_soc_cpu),
730\t}
731},
732""")
733    elif arch == 'common':
734      _args.output_file.write("""{
735\t.arch = "common",
736\t.cpuid = "common",
737\t.event_table = {
738\t\t.pmus = pmu_events__common,
739\t\t.num_pmus = ARRAY_SIZE(pmu_events__common),
740\t},
741\t.metric_table = {},
742},
743""")
744    else:
745      with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile:
746        table = csv.reader(csvfile)
747        first = True
748        for row in table:
749          # Skip the first row or any row beginning with #.
750          if not first and len(row) > 0 and not row[0].startswith('#'):
751            event_tblname = file_name_to_table_name('pmu_events_', [], row[2].replace('/', '_'))
752            if event_tblname in _event_tables:
753              event_size = f'ARRAY_SIZE({event_tblname})'
754            else:
755              event_tblname = 'NULL'
756              event_size = '0'
757            metric_tblname = file_name_to_table_name('pmu_metrics_', [], row[2].replace('/', '_'))
758            if metric_tblname in _metric_tables:
759              metric_size = f'ARRAY_SIZE({metric_tblname})'
760            else:
761              metric_tblname = 'NULL'
762              metric_size = '0'
763            if event_size == '0' and metric_size == '0':
764              continue
765            cpuid = row[0].replace('\\', '\\\\')
766            _args.output_file.write(f"""{{
767\t.arch = "{arch}",
768\t.cpuid = "{cpuid}",
769\t.event_table = {{
770\t\t.pmus = {event_tblname},
771\t\t.num_pmus = {event_size}
772\t}},
773\t.metric_table = {{
774\t\t.pmus = {metric_tblname},
775\t\t.num_pmus = {metric_size}
776\t}}
777}},
778""")
779          first = False
780
781  _args.output_file.write("""{
782\t.arch = 0,
783\t.cpuid = 0,
784\t.event_table = { 0, 0 },
785\t.metric_table = { 0, 0 },
786}
787};
788""")
789
790
791def print_system_mapping_table() -> None:
792  """C struct mapping table array for tables from /sys directories."""
793  _args.output_file.write("""
794struct pmu_sys_events {
795\tconst char *name;
796\tstruct pmu_events_table event_table;
797\tstruct pmu_metrics_table metric_table;
798};
799
800static const struct pmu_sys_events pmu_sys_event_tables[] = {
801""")
802  printed_metric_tables = []
803  for tblname in _sys_event_tables:
804    _args.output_file.write(f"""\t{{
805\t\t.event_table = {{
806\t\t\t.pmus = {tblname},
807\t\t\t.num_pmus = ARRAY_SIZE({tblname})
808\t\t}},""")
809    metric_tblname = _sys_event_table_to_metric_table_mapping[tblname]
810    if metric_tblname in _sys_metric_tables:
811      _args.output_file.write(f"""
812\t\t.metric_table = {{
813\t\t\t.pmus = {metric_tblname},
814\t\t\t.num_pmus = ARRAY_SIZE({metric_tblname})
815\t\t}},""")
816      printed_metric_tables.append(metric_tblname)
817    _args.output_file.write(f"""
818\t\t.name = \"{tblname}\",
819\t}},
820""")
821  for tblname in _sys_metric_tables:
822    if tblname in printed_metric_tables:
823      continue
824    _args.output_file.write(f"""\t{{
825\t\t.metric_table = {{
826\t\t\t.pmus = {tblname},
827\t\t\t.num_pmus = ARRAY_SIZE({tblname})
828\t\t}},
829\t\t.name = \"{tblname}\",
830\t}},
831""")
832  _args.output_file.write("""\t{
833\t\t.event_table = { 0, 0 },
834\t\t.metric_table = { 0, 0 },
835\t},
836};
837
838static void decompress_event(int offset, struct pmu_event *pe)
839{
840\tconst char *p = &big_c_string[offset];
841""")
842  for attr in _json_event_attributes:
843    _args.output_file.write(f'\n\tpe->{attr} = ')
844    if attr in _json_enum_attributes:
845      _args.output_file.write("*p - '0';\n")
846    else:
847      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
848    if attr == _json_event_attributes[-1]:
849      continue
850    if attr in _json_enum_attributes:
851      _args.output_file.write('\tp++;')
852    else:
853      _args.output_file.write('\twhile (*p++);')
854  _args.output_file.write("""}
855
856static void decompress_metric(int offset, struct pmu_metric *pm)
857{
858\tconst char *p = &big_c_string[offset];
859""")
860  for attr in _json_metric_attributes:
861    _args.output_file.write(f'\n\tpm->{attr} = ')
862    if attr in _json_enum_attributes:
863      _args.output_file.write("*p - '0';\n")
864    else:
865      _args.output_file.write("(*p == '\\0' ? NULL : p);\n")
866    if attr == _json_metric_attributes[-1]:
867      continue
868    if attr in _json_enum_attributes:
869      _args.output_file.write('\tp++;')
870    else:
871      _args.output_file.write('\twhile (*p++);')
872  _args.output_file.write("""}
873
874static int pmu_events_table__for_each_event_pmu(const struct pmu_events_table *table,
875                                                const struct pmu_table_entry *pmu,
876                                                pmu_event_iter_fn fn,
877                                                void *data)
878{
879        int ret;
880        struct pmu_event pe = {
881                .pmu = &big_c_string[pmu->pmu_name.offset],
882        };
883
884        for (uint32_t i = 0; i < pmu->num_entries; i++) {
885                decompress_event(pmu->entries[i].offset, &pe);
886                if (!pe.name)
887                        continue;
888                ret = fn(&pe, table, data);
889                if (ret)
890                        return ret;
891        }
892        return 0;
893 }
894
895static int pmu_events_table__find_event_pmu(const struct pmu_events_table *table,
896                                            const struct pmu_table_entry *pmu,
897                                            const char *name,
898                                            pmu_event_iter_fn fn,
899                                            void *data)
900{
901        struct pmu_event pe = {
902                .pmu = &big_c_string[pmu->pmu_name.offset],
903        };
904        int low = 0, high = pmu->num_entries - 1;
905
906        while (low <= high) {
907                int cmp, mid = (low + high) / 2;
908
909                decompress_event(pmu->entries[mid].offset, &pe);
910
911                if (!pe.name && !name)
912                        goto do_call;
913
914                if (!pe.name && name) {
915                        low = mid + 1;
916                        continue;
917                }
918                if (pe.name && !name) {
919                        high = mid - 1;
920                        continue;
921                }
922
923                cmp = strcasecmp(pe.name, name);
924                if (cmp < 0) {
925                        low = mid + 1;
926                        continue;
927                }
928                if (cmp > 0) {
929                        high = mid - 1;
930                        continue;
931                }
932  do_call:
933                return fn ? fn(&pe, table, data) : 0;
934        }
935        return PMU_EVENTS__NOT_FOUND;
936}
937
938int pmu_events_table__for_each_event(const struct pmu_events_table *table,
939                                    struct perf_pmu *pmu,
940                                    pmu_event_iter_fn fn,
941                                    void *data)
942{
943        for (size_t i = 0; i < table->num_pmus; i++) {
944                const struct pmu_table_entry *table_pmu = &table->pmus[i];
945                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
946                int ret;
947
948                if (pmu && !perf_pmu__name_wildcard_match(pmu, pmu_name))
949                        continue;
950
951                ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data);
952                if (ret)
953                        return ret;
954        }
955        return 0;
956}
957
958int pmu_events_table__find_event(const struct pmu_events_table *table,
959                                 struct perf_pmu *pmu,
960                                 const char *name,
961                                 pmu_event_iter_fn fn,
962                                 void *data)
963{
964        for (size_t i = 0; i < table->num_pmus; i++) {
965                const struct pmu_table_entry *table_pmu = &table->pmus[i];
966                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
967                int ret;
968
969                if (!perf_pmu__name_wildcard_match(pmu, pmu_name))
970                        continue;
971
972                ret = pmu_events_table__find_event_pmu(table, table_pmu, name, fn, data);
973                if (ret != PMU_EVENTS__NOT_FOUND)
974                        return ret;
975        }
976        return PMU_EVENTS__NOT_FOUND;
977}
978
979size_t pmu_events_table__num_events(const struct pmu_events_table *table,
980                                    struct perf_pmu *pmu)
981{
982        size_t count = 0;
983
984        for (size_t i = 0; i < table->num_pmus; i++) {
985                const struct pmu_table_entry *table_pmu = &table->pmus[i];
986                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
987
988                if (perf_pmu__name_wildcard_match(pmu, pmu_name))
989                        count += table_pmu->num_entries;
990        }
991        return count;
992}
993
994static int pmu_metrics_table__for_each_metric_pmu(const struct pmu_metrics_table *table,
995                                                const struct pmu_table_entry *pmu,
996                                                pmu_metric_iter_fn fn,
997                                                void *data)
998{
999        int ret;
1000        struct pmu_metric pm = {
1001                .pmu = &big_c_string[pmu->pmu_name.offset],
1002        };
1003
1004        for (uint32_t i = 0; i < pmu->num_entries; i++) {
1005                decompress_metric(pmu->entries[i].offset, &pm);
1006                if (!pm.metric_expr)
1007                        continue;
1008                ret = fn(&pm, table, data);
1009                if (ret)
1010                        return ret;
1011        }
1012        return 0;
1013}
1014
1015int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table,
1016                                     pmu_metric_iter_fn fn,
1017                                     void *data)
1018{
1019        for (size_t i = 0; i < table->num_pmus; i++) {
1020                int ret = pmu_metrics_table__for_each_metric_pmu(table, &table->pmus[i],
1021                                                                 fn, data);
1022
1023                if (ret)
1024                        return ret;
1025        }
1026        return 0;
1027}
1028
1029static const struct pmu_events_map *map_for_cpu(struct perf_cpu cpu)
1030{
1031        static struct {
1032                const struct pmu_events_map *map;
1033                struct perf_cpu cpu;
1034        } last_result;
1035        static struct {
1036                const struct pmu_events_map *map;
1037                char *cpuid;
1038        } last_map_search;
1039        static bool has_last_result, has_last_map_search;
1040        const struct pmu_events_map *map = NULL;
1041        char *cpuid = NULL;
1042        size_t i;
1043
1044        if (has_last_result && last_result.cpu.cpu == cpu.cpu)
1045                return last_result.map;
1046
1047        cpuid = get_cpuid_allow_env_override(cpu);
1048
1049        /*
1050         * On some platforms which uses cpus map, cpuid can be NULL for
1051         * PMUs other than CORE PMUs.
1052         */
1053        if (!cpuid)
1054                goto out_update_last_result;
1055
1056        if (has_last_map_search && !strcmp(last_map_search.cpuid, cpuid)) {
1057                map = last_map_search.map;
1058                free(cpuid);
1059        } else {
1060                i = 0;
1061                for (;;) {
1062                        map = &pmu_events_map[i++];
1063
1064                        if (!map->arch) {
1065                                map = NULL;
1066                                break;
1067                        }
1068
1069                        if (!strcmp_cpuid_str(map->cpuid, cpuid))
1070                                break;
1071               }
1072               free(last_map_search.cpuid);
1073               last_map_search.cpuid = cpuid;
1074               last_map_search.map = map;
1075               has_last_map_search = true;
1076        }
1077out_update_last_result:
1078        last_result.cpu = cpu;
1079        last_result.map = map;
1080        has_last_result = true;
1081        return map;
1082}
1083
1084static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu)
1085{
1086        struct perf_cpu cpu = {-1};
1087
1088        if (pmu)
1089                cpu = perf_cpu_map__min(pmu->cpus);
1090        return map_for_cpu(cpu);
1091}
1092
1093const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu)
1094{
1095        const struct pmu_events_map *map = map_for_pmu(pmu);
1096
1097        if (!map)
1098                return NULL;
1099
1100        if (!pmu)
1101                return &map->event_table;
1102
1103        for (size_t i = 0; i < map->event_table.num_pmus; i++) {
1104                const struct pmu_table_entry *table_pmu = &map->event_table.pmus[i];
1105                const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
1106
1107                if (perf_pmu__name_wildcard_match(pmu, pmu_name))
1108                         return &map->event_table;
1109        }
1110        return NULL;
1111}
1112
1113const struct pmu_metrics_table *pmu_metrics_table__find(void)
1114{
1115        struct perf_cpu cpu = {-1};
1116        const struct pmu_events_map *map = map_for_cpu(cpu);
1117
1118        return map ? &map->metric_table : NULL;
1119}
1120
1121const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
1122{
1123        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1124             tables->arch;
1125             tables++) {
1126                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1127                        return &tables->event_table;
1128        }
1129        return NULL;
1130}
1131
1132const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid)
1133{
1134        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1135             tables->arch;
1136             tables++) {
1137                if (!strcmp(tables->arch, arch) && !strcmp_cpuid_str(tables->cpuid, cpuid))
1138                        return &tables->metric_table;
1139        }
1140        return NULL;
1141}
1142
1143int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data)
1144{
1145        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1146             tables->arch;
1147             tables++) {
1148                int ret = pmu_events_table__for_each_event(&tables->event_table,
1149                                                           /*pmu=*/ NULL, fn, data);
1150
1151                if (ret)
1152                        return ret;
1153        }
1154        return 0;
1155}
1156
1157int pmu_for_each_core_metric(pmu_metric_iter_fn fn, void *data)
1158{
1159        for (const struct pmu_events_map *tables = &pmu_events_map[0];
1160             tables->arch;
1161             tables++) {
1162                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1163
1164                if (ret)
1165                        return ret;
1166        }
1167        return 0;
1168}
1169
1170const struct pmu_events_table *find_sys_events_table(const char *name)
1171{
1172        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1173             tables->name;
1174             tables++) {
1175                if (!strcmp(tables->name, name))
1176                        return &tables->event_table;
1177        }
1178        return NULL;
1179}
1180
1181int pmu_for_each_sys_event(pmu_event_iter_fn fn, void *data)
1182{
1183        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1184             tables->name;
1185             tables++) {
1186                int ret = pmu_events_table__for_each_event(&tables->event_table,
1187                                                           /*pmu=*/ NULL, fn, data);
1188
1189                if (ret)
1190                        return ret;
1191        }
1192        return 0;
1193}
1194
1195int pmu_for_each_sys_metric(pmu_metric_iter_fn fn, void *data)
1196{
1197        for (const struct pmu_sys_events *tables = &pmu_sys_event_tables[0];
1198             tables->name;
1199             tables++) {
1200                int ret = pmu_metrics_table__for_each_metric(&tables->metric_table, fn, data);
1201
1202                if (ret)
1203                        return ret;
1204        }
1205        return 0;
1206}
1207""")
1208
1209def print_metricgroups() -> None:
1210  _args.output_file.write("""
1211static const int metricgroups[][2] = {
1212""")
1213  for mgroup in sorted(_metricgroups):
1214    description = _metricgroups[mgroup]
1215    _args.output_file.write(
1216        f'\t{{ {_bcs.offsets[mgroup]}, {_bcs.offsets[description]} }}, /* {mgroup} => {description} */\n'
1217    )
1218  _args.output_file.write("""
1219};
1220
1221const char *describe_metricgroup(const char *group)
1222{
1223        int low = 0, high = (int)ARRAY_SIZE(metricgroups) - 1;
1224
1225        while (low <= high) {
1226                int mid = (low + high) / 2;
1227                const char *mgroup = &big_c_string[metricgroups[mid][0]];
1228                int cmp = strcmp(mgroup, group);
1229
1230                if (cmp == 0) {
1231                        return &big_c_string[metricgroups[mid][1]];
1232                } else if (cmp < 0) {
1233                        low = mid + 1;
1234                } else {
1235                        high = mid - 1;
1236                }
1237        }
1238        return NULL;
1239}
1240""")
1241
1242def main() -> None:
1243  global _args
1244
1245  def dir_path(path: str) -> str:
1246    """Validate path is a directory for argparse."""
1247    if os.path.isdir(path):
1248      return path
1249    raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory')
1250
1251  def ftw(path: str, parents: Sequence[str],
1252          action: Callable[[Sequence[str], os.DirEntry], None]) -> None:
1253    """Replicate the directory/file walking behavior of C's file tree walk."""
1254    for item in sorted(os.scandir(path), key=lambda e: e.name):
1255      if _args.model != 'all' and item.is_dir():
1256        # Check if the model matches one in _args.model.
1257        if len(parents) == _args.model.split(',')[0].count('/'):
1258          # We're testing the correct directory.
1259          item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name
1260          if 'test' not in item_path and 'common' not in item_path and item_path not in _args.model.split(','):
1261            continue
1262      try:
1263        action(parents, item)
1264      except Exception as e:
1265        raise RuntimeError(f'Action failure for \'{item.name}\' in {parents}') from e
1266      if item.is_dir():
1267        ftw(item.path, parents + [item.name], action)
1268
1269  ap = argparse.ArgumentParser()
1270  ap.add_argument('arch', help='Architecture name like x86')
1271  ap.add_argument('model', help='''Select a model such as skylake to
1272reduce the code size.  Normally set to "all". For architectures like
1273ARM64 with an implementor/model, the model must include the implementor
1274such as "arm/cortex-a34".''',
1275                  default='all')
1276  ap.add_argument(
1277      'starting_dir',
1278      type=dir_path,
1279      help='Root of tree containing architecture directories containing json files'
1280  )
1281  ap.add_argument(
1282      'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout)
1283  _args = ap.parse_args()
1284
1285  _args.output_file.write(f"""
1286/* SPDX-License-Identifier: GPL-2.0 */
1287/* THIS FILE WAS AUTOGENERATED BY jevents.py arch={_args.arch} model={_args.model} ! */
1288""")
1289  _args.output_file.write("""
1290#include <pmu-events/pmu-events.h>
1291#include "util/header.h"
1292#include "util/pmu.h"
1293#include <string.h>
1294#include <stddef.h>
1295
1296struct compact_pmu_event {
1297        int offset;
1298};
1299
1300struct pmu_table_entry {
1301        const struct compact_pmu_event *entries;
1302        uint32_t num_entries;
1303        struct compact_pmu_event pmu_name;
1304};
1305
1306""")
1307  archs = []
1308  for item in os.scandir(_args.starting_dir):
1309    if not item.is_dir():
1310      continue
1311    if item.name == _args.arch or _args.arch == 'all' or item.name == 'test' or item.name == 'common':
1312      archs.append(item.name)
1313
1314  if len(archs) < 2 and _args.arch != 'none':
1315    raise IOError(f'Missing architecture directory \'{_args.arch}\'')
1316
1317  archs.sort()
1318  for arch in archs:
1319    arch_path = f'{_args.starting_dir}/{arch}'
1320    preprocess_arch_std_files(arch_path)
1321    ftw(arch_path, [], preprocess_one_file)
1322
1323  _bcs.compute()
1324  _args.output_file.write('static const char *const big_c_string =\n')
1325  for s in _bcs.big_string:
1326    _args.output_file.write(s)
1327  _args.output_file.write(';\n\n')
1328  for arch in archs:
1329    arch_path = f'{_args.starting_dir}/{arch}'
1330    ftw(arch_path, [], process_one_file)
1331    print_pending_events()
1332    print_pending_metrics()
1333
1334  print_mapping_table(archs)
1335  print_system_mapping_table()
1336  print_metricgroups()
1337
1338if __name__ == '__main__':
1339  main()
1340