xref: /aosp_15_r20/external/mesa3d/src/intel/perf/gen_perf.py (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1# Copyright (c) 2015-2017 Intel Corporation
2#
3# Permission is hereby granted, free of charge, to any person obtaining a
4# copy of this software and associated documentation files (the "Software"),
5# to deal in the Software without restriction, including without limitation
6# the rights to use, copy, modify, merge, publish, distribute, sublicense,
7# and/or sell copies of the Software, and to permit persons to whom the
8# Software is furnished to do so, subject to the following conditions:
9#
10# The above copyright notice and this permission notice (including the next
11# paragraph) shall be included in all copies or substantial portions of the
12# Software.
13#
14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20# IN THE SOFTWARE.
21
22import argparse
23import builtins
24import collections
25import os
26import re
27import sys
28import textwrap
29
30import xml.etree.ElementTree as et
31
32hashed_funcs = {}
33
34c_file = None
35_c_indent = 0
36
37def c(*args):
38    code = ' '.join(map(str,args))
39    for line in code.splitlines():
40        text = ''.rjust(_c_indent) + line
41        c_file.write(text.rstrip() + "\n")
42
43# indented, but no trailing newline...
44def c_line_start(code):
45    c_file.write(''.rjust(_c_indent) + code)
46def c_raw(code):
47    c_file.write(code)
48
49def c_indent(n):
50    global _c_indent
51    _c_indent = _c_indent + n
52def c_outdent(n):
53    global _c_indent
54    _c_indent = _c_indent - n
55
56header_file = None
57_h_indent = 0
58
59def h(*args):
60    code = ' '.join(map(str,args))
61    for line in code.splitlines():
62        text = ''.rjust(_h_indent) + line
63        header_file.write(text.rstrip() + "\n")
64
65def h_indent(n):
66    global _c_indent
67    _h_indent = _h_indent + n
68def h_outdent(n):
69    global _c_indent
70    _h_indent = _h_indent - n
71
72
73def emit_fadd(tmp_id, args):
74    c("double tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0]))
75    return tmp_id + 1
76
77# Be careful to check for divide by zero...
78def emit_fdiv(tmp_id, args):
79    c("double tmp{0} = {1};".format(tmp_id, args[1]))
80    c("double tmp{0} = {1};".format(tmp_id + 1, args[0]))
81    c("double tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id))
82    return tmp_id + 3
83
84def emit_fmax(tmp_id, args):
85    c("double tmp{0} = {1};".format(tmp_id, args[1]))
86    c("double tmp{0} = {1};".format(tmp_id + 1, args[0]))
87    c("double tmp{0} = MAX(tmp{1}, tmp{2});".format(tmp_id + 2, tmp_id, tmp_id + 1))
88    return tmp_id + 3
89
90def emit_fmul(tmp_id, args):
91    c("double tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0]))
92    return tmp_id + 1
93
94def emit_fsub(tmp_id, args):
95    c("double tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0]))
96    return tmp_id + 1
97
98def emit_read(tmp_id, args):
99    type = args[1].lower()
100    c("uint64_t tmp{0} = results->accumulator[query->{1}_offset + {2}];".format(tmp_id, type, args[0]))
101    return tmp_id + 1
102
103def emit_uadd(tmp_id, args):
104    c("uint64_t tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0]))
105    return tmp_id + 1
106
107# Be careful to check for divide by zero...
108def emit_udiv(tmp_id, args):
109    c("uint64_t tmp{0} = {1};".format(tmp_id, args[1]))
110    c("uint64_t tmp{0} = {1};".format(tmp_id + 1, args[0]))
111    if args[0].isdigit():
112        assert int(args[0]) > 0
113        c("uint64_t tmp{0} = tmp{2} / tmp{1};".format(tmp_id + 2, tmp_id + 1, tmp_id))
114    else:
115        c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id))
116    return tmp_id + 3
117
118def emit_umul(tmp_id, args):
119    c("uint64_t tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0]))
120    return tmp_id + 1
121
122def emit_usub(tmp_id, args):
123    c("uint64_t tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0]))
124    return tmp_id + 1
125
126def emit_umin(tmp_id, args):
127    c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0]))
128    return tmp_id + 1
129
130def emit_lshft(tmp_id, args):
131    c("uint64_t tmp{0} = {1} << {2};".format(tmp_id, args[1], args[0]))
132    return tmp_id + 1
133
134def emit_rshft(tmp_id, args):
135    c("uint64_t tmp{0} = {1} >> {2};".format(tmp_id, args[1], args[0]))
136    return tmp_id + 1
137
138def emit_and(tmp_id, args):
139    c("uint64_t tmp{0} = {1} & {2};".format(tmp_id, args[1], args[0]))
140    return tmp_id + 1
141
142def emit_ulte(tmp_id, args):
143    c("uint64_t tmp{0} = {1} <= {2};".format(tmp_id, args[1], args[0]))
144    return tmp_id + 1
145
146def emit_ult(tmp_id, args):
147    c("uint64_t tmp{0} = {1} < {2};".format(tmp_id, args[1], args[0]))
148    return tmp_id + 1
149
150def emit_ugte(tmp_id, args):
151    c("uint64_t tmp{0} = {1} >= {2};".format(tmp_id, args[1], args[0]))
152    return tmp_id + 1
153
154def emit_ugt(tmp_id, args):
155    c("uint64_t tmp{0} = {1} > {2};".format(tmp_id, args[1], args[0]))
156    return tmp_id + 1
157
158ops = {}
159#             (n operands, emitter)
160ops["FADD"] = (2, emit_fadd)
161ops["FDIV"] = (2, emit_fdiv)
162ops["FMAX"] = (2, emit_fmax)
163ops["FMUL"] = (2, emit_fmul)
164ops["FSUB"] = (2, emit_fsub)
165ops["READ"] = (2, emit_read)
166ops["UADD"] = (2, emit_uadd)
167ops["UDIV"] = (2, emit_udiv)
168ops["UMUL"] = (2, emit_umul)
169ops["USUB"] = (2, emit_usub)
170ops["UMIN"] = (2, emit_umin)
171ops["<<"]   = (2, emit_lshft)
172ops[">>"]   = (2, emit_rshft)
173ops["AND"]  = (2, emit_and)
174ops["UGTE"] = (2, emit_ugte)
175ops["UGT"]  = (2, emit_ugt)
176ops["ULTE"] = (2, emit_ulte)
177ops["ULT"]  = (2, emit_ult)
178
179
180def brkt(subexp):
181    if " " in subexp:
182        return "(" + subexp + ")"
183    else:
184        return subexp
185
186def splice_bitwise_and(args):
187    return brkt(args[1]) + " & " + brkt(args[0])
188
189def splice_bitwise_or(args):
190    return brkt(args[1]) + " | " + brkt(args[0])
191
192def splice_logical_and(args):
193    return brkt(args[1]) + " && " + brkt(args[0])
194
195def splice_umul(args):
196    return brkt(args[1]) + " * " + brkt(args[0])
197
198def splice_ult(args):
199    return brkt(args[1]) + " < " + brkt(args[0])
200
201def splice_ugte(args):
202    return brkt(args[1]) + " >= " + brkt(args[0])
203
204def splice_ulte(args):
205    return brkt(args[1]) + " <= " + brkt(args[0])
206
207def splice_ugt(args):
208    return brkt(args[1]) + " > " + brkt(args[0])
209
210def splice_lshft(args):
211    return brkt(args[1]) + " << " + brkt(args[0])
212
213def splice_equal(args):
214    return brkt(args[1]) + " == " + brkt(args[0])
215
216exp_ops = {}
217#                 (n operands, splicer)
218exp_ops["AND"]  = (2, splice_bitwise_and)
219exp_ops["OR"]   = (2, splice_bitwise_or)
220exp_ops["UGTE"] = (2, splice_ugte)
221exp_ops["ULT"]  = (2, splice_ult)
222exp_ops["&&"]   = (2, splice_logical_and)
223exp_ops["UMUL"] = (2, splice_umul)
224exp_ops["<<"]   = (2, splice_lshft)
225exp_ops["=="]   = (2, splice_equal)
226
227
228hw_vars = {}
229hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus"
230hw_vars["$VectorEngineTotalCount"] = "perf->sys_vars.n_eus"
231hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices"
232hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
233hw_vars["$XeCoreTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
234hw_vars["$EuDualSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
235hw_vars["$EuDualSubslicesSlice0123Count"] = "perf->sys_vars.n_eu_slice0123"
236hw_vars["$EuThreadsCount"] = "perf->devinfo->num_thread_per_eu"
237hw_vars["$VectorEngineThreadsCount"] = "perf->devinfo->num_thread_per_eu"
238hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
239hw_vars["$SliceTotalCount"] = "perf->sys_vars.n_eu_slices"
240# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+
241# only has dual subslices which can be assimilated with 16EUs subslices.
242hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
243hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"
244hw_vars["$XeCoreMask"] = "perf->sys_vars.subslice_mask"
245hw_vars["$GpuTimestampFrequency"] = "perf->devinfo->timestamp_frequency"
246hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq"
247hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq"
248hw_vars["$SkuRevisionId"] = "perf->devinfo->revision"
249hw_vars["$QueryMode"] = "perf->sys_vars.query_mode"
250hw_vars["$ComputeEngineTotalCount"] = "perf->devinfo->engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE]"
251hw_vars["$CopyEngineTotalCount"] = "perf->devinfo->engine_class_supported_count[INTEL_ENGINE_CLASS_COPY]"
252
253def resolve_variable(name, set, allow_counters):
254    if name in hw_vars:
255        return hw_vars[name]
256    m = re.search(r'\$GtSlice([0-9]+)$', name)
257    if m:
258        return 'intel_device_info_slice_available(perf->devinfo, {0})'.format(m.group(1))
259    m = re.search(r'\$GtSlice([0-9]+)XeCore([0-9]+)$', name)
260    if m:
261        return 'intel_device_info_subslice_available(perf->devinfo, {0}, {1})'.format(m.group(1), m.group(2))
262    if allow_counters and name in set.counter_vars:
263        return set.read_funcs[name[1:]] + "(perf, query, results)"
264    return None
265
266def output_rpn_equation_code(set, counter, equation):
267    c("/* RPN equation: " + equation + " */")
268    tokens = equation.split()
269    stack = []
270    tmp_id = 0
271    tmp = None
272
273    for token in tokens:
274        stack.append(token)
275        while stack and stack[-1] in ops:
276            op = stack.pop()
277            argc, callback = ops[op]
278            args = []
279            for i in range(0, argc):
280                operand = stack.pop()
281                if operand[0] == "$":
282                    resolved_variable = resolve_variable(operand, set, True)
283                    if resolved_variable == None:
284                        raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.name + " :: " + counter.get('name'));
285                    operand = resolved_variable
286                args.append(operand)
287
288            tmp_id = callback(tmp_id, args)
289
290            tmp = "tmp{0}".format(tmp_id - 1)
291            stack.append(tmp)
292
293    if len(stack) != 1:
294        raise Exception("Spurious empty rpn code for " + set.name + " :: " +
295                counter.get('name') + ".\nThis is probably due to some unhandled RPN function, in the equation \"" +
296                equation + "\"")
297
298    value = stack[-1]
299
300    if value[0] == "$":
301        resolved_variable = resolve_variable(value, set, True)
302        if resolved_variable == None:
303            raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.name + " :: " + counter.get('name'));
304        value = resolved_variable
305
306    c("\nreturn " + value + ";")
307
308def splice_rpn_expression(set, counter_name, expression):
309    tokens = expression.split()
310    stack = []
311
312    for token in tokens:
313        stack.append(token)
314        while stack and stack[-1] in exp_ops:
315            op = stack.pop()
316            argc, callback = exp_ops[op]
317            args = []
318            for i in range(0, argc):
319                operand = stack.pop()
320                if operand[0] == "$":
321                    resolved_variable = resolve_variable(operand, set, False)
322                    if resolved_variable == None:
323                        raise Exception("Failed to resolve variable " + operand + " in expression " + expression + " for " + set.name + " :: " + counter_name)
324                    operand = resolved_variable
325                args.append(operand)
326
327            subexp = callback(args)
328
329            stack.append(subexp)
330
331    if len(stack) != 1:
332        raise Exception("Spurious empty rpn expression for " + set.name + " :: " +
333                counter_name + ".\nThis is probably due to some unhandled RPN operation, in the expression \"" +
334                expression + "\"")
335
336    value = stack[-1]
337
338    if value[0] == "$":
339        resolved_variable = resolve_variable(value, set, False)
340        if resolved_variable == None:
341            raise Exception("Failed to resolve variable " + operand + " in expression " + expression + " for " + set.name + " :: " + counter_name)
342        value = resolved_variable
343
344    return value
345
346def output_counter_read(gen, set, counter):
347    c("\n")
348    c("/* {0} :: {1} */".format(set.name, counter.get('name')))
349
350    if counter.read_hash in hashed_funcs:
351        c("#define %s \\" % counter.read_sym)
352        c_indent(3)
353        c("%s" % hashed_funcs[counter.read_hash])
354        c_outdent(3)
355    else:
356        ret_type = counter.get('data_type')
357        if ret_type == "uint64":
358            ret_type = "uint64_t"
359
360        read_eq = counter.get('equation')
361
362        c("static " + ret_type)
363        c(counter.read_sym + "(UNUSED struct intel_perf_config *perf,\n")
364        c_indent(len(counter.read_sym) + 1)
365        c("const struct intel_perf_query_info *query,\n")
366        c("const struct intel_perf_query_result *results)\n")
367        c_outdent(len(counter.read_sym) + 1)
368
369        c("{")
370        c_indent(3)
371        output_rpn_equation_code(set, counter, read_eq)
372        c_outdent(3)
373        c("}")
374
375        hashed_funcs[counter.read_hash] = counter.read_sym
376
377
378def output_counter_max(gen, set, counter):
379    max_eq = counter.get('max_equation')
380
381    if not counter.has_custom_max_func():
382        return
383
384    c("\n")
385    c("/* {0} :: {1} */".format(set.name, counter.get('name')))
386
387    if counter.max_hash in hashed_funcs:
388        c("#define %s \\" % counter.max_sym)
389        c_indent(3)
390        c("%s" % hashed_funcs[counter.max_hash])
391        c_outdent(3)
392    else:
393        ret_type = counter.get('data_type')
394        if ret_type == "uint64":
395            ret_type = "uint64_t"
396
397        c("static " + ret_type)
398        c(counter.max_sym + "(struct intel_perf_config *perf,\n")
399        c_indent(len(counter.read_sym) + 1)
400        c("const struct intel_perf_query_info *query,\n")
401        c("const struct intel_perf_query_result *results)\n")
402        c_outdent(len(counter.read_sym) + 1)
403        c("{")
404        c_indent(3)
405        output_rpn_equation_code(set, counter, max_eq)
406        c_outdent(3)
407        c("}")
408
409        hashed_funcs[counter.max_hash] = counter.max_sym
410
411
412c_type_sizes = { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 }
413def sizeof(c_type):
414    return c_type_sizes[c_type]
415
416def pot_align(base, pot_alignment):
417    return (base + pot_alignment - 1) & ~(pot_alignment - 1);
418
419semantic_type_map = {
420    "duration": "raw",
421    "ratio": "event"
422    }
423
424def output_availability(set, availability, counter_name):
425    expression = splice_rpn_expression(set, counter_name, availability)
426    lines = expression.split(' && ')
427    n_lines = len(lines)
428    if n_lines == 1:
429        c("if (" + lines[0] + ") {")
430    else:
431        c("if (" + lines[0] + " &&")
432        c_indent(4)
433        for i in range(1, (n_lines - 1)):
434            c(lines[i] + " &&")
435        c(lines[(n_lines - 1)] + ") {")
436        c_outdent(4)
437
438
439def output_units(unit):
440    return unit.replace(' ', '_').upper()
441
442
443# should a unit be visible in description?
444units_map = {
445    "bytes" : True,
446    "cycles" : True,
447    "eu atomic requests to l3 cache lines" : False,
448    "eu bytes per l3 cache line" : False,
449    "eu requests to l3 cache lines" : False,
450    "eu sends to l3 cache lines" : False,
451    "events" : True,
452    "hz" : True,
453    "messages" : True,
454    "ns" : True,
455    "number" : False,
456    "percent" : True,
457    "pixels" : True,
458    "texels" : True,
459    "threads" : True,
460    "us" : True,
461    "utilization" : False,
462    "gbps" : True,
463    }
464
465
466def desc_units(unit):
467    val = units_map.get(unit)
468    if val is None:
469        raise Exception("Unknown unit: " + unit)
470    if val == False:
471        return ""
472    if unit == 'hz':
473        unit = 'Hz'
474    return "Unit: " + unit + "."
475
476
477counter_key_tuple = collections.namedtuple(
478    'counter_key',
479    [
480        'name',
481        'description',
482        'symbol_name',
483        'mdapi_group',
484        'semantic_type',
485        'data_type',
486        'units',
487    ]
488)
489
490
491def counter_key(counter):
492    return counter_key_tuple._make([counter.get(field) for field in counter_key_tuple._fields])
493
494
495def output_counter_struct(set, counter, idx,
496                          name_to_idx, desc_to_idx,
497                          symbol_name_to_idx, category_to_idx):
498    data_type = counter.data_type
499    data_type_uc = data_type.upper()
500
501    semantic_type = counter.semantic_type
502    if semantic_type in semantic_type_map:
503        semantic_type = semantic_type_map[semantic_type]
504
505    semantic_type_uc = semantic_type.upper()
506
507    c("[" + str(idx) + "] = {\n")
508    c_indent(3)
509    c(".name_idx = " + str(name_to_idx[counter.name]) + ",\n")
510    c(".desc_idx = " + str(desc_to_idx[counter.description + " " + desc_units(counter.units)]) + ",\n")
511    c(".symbol_name_idx = " + str(symbol_name_to_idx[counter.symbol_name]) + ",\n")
512    c(".category_idx = " + str(category_to_idx[counter.mdapi_group]) + ",\n")
513    c(".type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ",\n")
514    c(".data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ",\n")
515    c(".units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.units) + ",\n")
516    c_outdent(3)
517    c("},\n")
518
519
520def output_counter_report(set, counter, counter_to_idx, current_offset):
521    data_type = counter.get('data_type')
522    data_type_uc = data_type.upper()
523    c_type = data_type
524
525    if "uint" in c_type:
526        c_type = c_type + "_t"
527
528    semantic_type = counter.get('semantic_type')
529    if semantic_type in semantic_type_map:
530        semantic_type = semantic_type_map[semantic_type]
531
532    semantic_type_uc = semantic_type.upper()
533
534    c("\n")
535
536    availability = counter.get('availability')
537    if availability:
538        output_availability(set, availability, counter.get('name'))
539        c_indent(3)
540
541    key = counter_key(counter)
542    idx = str(counter_to_idx[key])
543
544    current_offset = pot_align(current_offset, sizeof(c_type))
545
546    if data_type == 'uint64':
547        c("intel_perf_query_add_counter_uint64(query, " + idx + ", " +
548          str(current_offset) + ", " +
549          set.max_funcs[counter.get('symbol_name')] + "," +
550          set.read_funcs[counter.get('symbol_name')] + ");\n")
551    else:
552        c("intel_perf_query_add_counter_float(query, " + idx + ", " +
553          str(current_offset) + ", " +
554          set.max_funcs[counter.get('symbol_name')] + "," +
555          set.read_funcs[counter.get('symbol_name')] + ");\n")
556
557
558    if availability:
559        c_outdent(3);
560        c("}")
561
562    return current_offset + sizeof(c_type)
563
564
565def str_to_idx_table(strs):
566    sorted_strs = sorted(strs)
567
568    str_to_idx = collections.OrderedDict()
569    str_to_idx[sorted_strs[0]] = 0
570    previous = sorted_strs[0]
571
572    for i in range(1, len(sorted_strs)):
573        str_to_idx[sorted_strs[i]] = str_to_idx[previous] + len(previous) + 1
574        previous = sorted_strs[i]
575
576    return str_to_idx
577
578
579def output_str_table(name: str, str_to_idx):
580    c("\n")
581    c("static const char " + name + "[] = {\n")
582    c_indent(3)
583    c("\n".join(f"/* {idx} */ \"{val}\\0\"" for val, idx in str_to_idx.items()))
584    c_outdent(3)
585    c("};\n")
586
587
588register_types = {
589    'FLEX': 'flex_regs',
590    'NOA': 'mux_regs',
591    'OA': 'b_counter_regs',
592}
593
594def compute_register_lengths(set):
595    register_lengths = {}
596    register_configs = set.findall('register_config')
597    for register_config in register_configs:
598        t = register_types[register_config.get('type')]
599        if t not in register_lengths:
600            register_lengths[t] = len(register_config.findall('register'))
601        else:
602            register_lengths[t] += len(register_config.findall('register'))
603
604    return register_lengths
605
606
607def generate_register_configs(set):
608    register_configs = set.findall('register_config')
609
610    for register_config in register_configs:
611        t = register_types[register_config.get('type')]
612
613        availability = register_config.get('availability')
614        if availability:
615            output_availability(set, availability, register_config.get('type') + ' register config')
616            c_indent(3)
617
618        registers = register_config.findall('register')
619        c("static const struct intel_perf_query_register_prog %s[] = {" % t)
620        c_indent(3)
621        for register in registers:
622            c("{ .reg = %s, .val = %s }," % (register.get('address'), register.get('value')))
623        c_outdent(3)
624        c("};")
625        c("query->config.%s = %s;" % (t, t))
626        c("query->config.n_%s = ARRAY_SIZE(%s);" % (t, t))
627
628        if availability:
629            c_outdent(3)
630            c("}")
631        c("\n")
632
633
634# Wraps a <counter> element from the oa-*.xml files.
635class Counter:
636    def __init__(self, set, xml):
637        self.xml = xml
638        self.set = set
639        self.read_hash = None
640        self.max_hash = None
641
642        self.read_sym = "{0}__{1}__{2}__read".format(self.set.gen.chipset,
643                                                     self.set.underscore_name,
644                                                     self.xml.get('underscore_name'))
645        self.max_sym = self.build_max_sym()
646
647    def get(self, prop):
648        return self.xml.get(prop)
649
650    # Compute the hash of a counter's equation by expanding (including all the
651    # sub-equations it depends on)
652    def compute_hashes(self):
653        if self.read_hash is not None:
654            return
655
656        def replace_token(token):
657            if token[0] != "$":
658                return token
659            if token not in self.set.counter_vars:
660                return token
661            self.set.counter_vars[token].compute_hashes()
662            return self.set.counter_vars[token].read_hash
663
664        read_eq = self.xml.get('equation')
665        self.read_hash = ' '.join(map(replace_token, read_eq.split()))
666
667        max_eq = self.xml.get('max_equation')
668        if max_eq:
669            self.max_hash = ' '.join(map(replace_token, max_eq.split()))
670
671    def has_custom_max_func(self):
672        max_eq = self.xml.get('max_equation')
673        if not max_eq:
674            return False
675
676        try:
677            val = float(max_eq)
678            if val == 100:
679                return False
680        except ValueError:
681            pass
682
683        for token in max_eq.split():
684            if token[0] == '$' and resolve_variable(token, self.set, True) == None:
685                print("unresolved token " + token)
686                return False
687        return True
688
689    def build_max_sym(self):
690        max_eq = self.xml.get('max_equation')
691        if not max_eq:
692            return "NULL"
693
694        try:
695            val = float(max_eq)
696            if val == 100:
697                if self.xml.get('data_type') == 'uint64':
698                    return "percentage_max_uint64"
699                else:
700                    return "percentage_max_float"
701        except ValueError:
702            pass
703
704        assert self.has_custom_max_func()
705        return "{0}__{1}__{2}__max".format(self.set.gen.chipset,
706                                           self.set.underscore_name,
707                                           self.xml.get('underscore_name'))
708
709
710# Wraps a <set> element from the oa-*.xml files.
711class Set:
712    def __init__(self, gen, xml):
713        self.gen = gen
714        self.xml = xml
715
716        self.counter_vars = {}
717        self.max_funcs = {}
718        self.read_funcs = {}
719
720        xml_counters = self.xml.findall("counter")
721        self.counters = []
722        for xml_counter in xml_counters:
723            counter = Counter(self, xml_counter)
724            self.counters.append(counter)
725            self.counter_vars['$' + counter.get('symbol_name')] = counter
726            self.read_funcs[counter.get('symbol_name')] = counter.read_sym
727            self.max_funcs[counter.get('symbol_name')] = counter.max_sym
728
729        for counter in self.counters:
730            counter.compute_hashes()
731
732    @property
733    def hw_config_guid(self):
734        return self.xml.get('hw_config_guid')
735
736    @property
737    def name(self):
738        return self.xml.get('name')
739
740    @property
741    def symbol_name(self):
742        return self.xml.get('symbol_name')
743
744    @property
745    def underscore_name(self):
746        return self.xml.get('underscore_name')
747
748    def findall(self, path):
749        return self.xml.findall(path)
750
751    def find(self, path):
752        return self.xml.find(path)
753
754
755# Wraps an entire oa-*.xml file.
756class Gen:
757    def __init__(self, filename):
758        self.filename = filename
759        self.xml = et.parse(self.filename)
760        self.chipset = self.xml.find('.//set').get('chipset').lower()
761        self.sets = []
762
763        for xml_set in self.xml.findall(".//set"):
764            self.sets.append(Set(self, xml_set))
765
766
767def main():
768    global c_file
769    global header_file
770
771    parser = argparse.ArgumentParser()
772    parser.add_argument("--header", help="Header file to write", required=True)
773    parser.add_argument("--code", help="C file to write", required=True)
774    parser.add_argument("xml_files", nargs='+', help="List of xml metrics files to process")
775
776    args = parser.parse_args()
777
778    c_file = open(args.code, 'w')
779    header_file = open(args.header, 'w')
780
781    gens = []
782    for xml_file in args.xml_files:
783        gens.append(Gen(xml_file))
784
785
786    copyright = textwrap.dedent("""\
787        /* Autogenerated file, DO NOT EDIT manually! generated by {}
788         *
789         * Copyright (c) 2015 Intel Corporation
790         *
791         * Permission is hereby granted, free of charge, to any person obtaining a
792         * copy of this software and associated documentation files (the "Software"),
793         * to deal in the Software without restriction, including without limitation
794         * the rights to use, copy, modify, merge, publish, distribute, sublicense,
795         * and/or sell copies of the Software, and to permit persons to whom the
796         * Software is furnished to do so, subject to the following conditions:
797         *
798         * The above copyright notice and this permission notice (including the next
799         * paragraph) shall be included in all copies or substantial portions of the
800         * Software.
801         *
802         * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
803         * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
804         * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
805         * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
806         * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
807         * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
808         * DEALINGS IN THE SOFTWARE.
809         */
810
811        """).format(os.path.basename(__file__))
812
813    h(copyright)
814    h(textwrap.dedent("""\
815        #pragma once
816
817        struct intel_perf_config;
818
819        """))
820
821    c(copyright)
822    c(textwrap.dedent("""\
823        #include <stdint.h>
824        #include <stdbool.h>
825
826        #include "util/hash_table.h"
827        #include "util/ralloc.h"
828
829        """))
830
831    c("#include \"" + os.path.basename(args.header) + "\"")
832
833    c(textwrap.dedent("""\
834        #include "perf/intel_perf.h"
835        #include "perf/intel_perf_setup.h"
836        """))
837
838    names = builtins.set()
839    descs = builtins.set()
840    symbol_names = builtins.set()
841    categories = builtins.set()
842    for gen in gens:
843        for set in gen.sets:
844            for counter in set.counters:
845                names.add(counter.get('name'))
846                symbol_names.add(counter.get('symbol_name'))
847                descs.add(counter.get('description') + " " + desc_units(counter.get('units')))
848                categories.add(counter.get('mdapi_group'))
849
850    name_to_idx = str_to_idx_table(names)
851    output_str_table("name", name_to_idx)
852
853    desc_to_idx = str_to_idx_table(descs)
854    output_str_table("desc", desc_to_idx)
855
856    symbol_name_to_idx = str_to_idx_table(symbol_names)
857    output_str_table("symbol_name", symbol_name_to_idx)
858
859    category_to_idx = str_to_idx_table(categories)
860    output_str_table("category", category_to_idx)
861
862    # Print out all equation functions.
863    for gen in gens:
864        for set in gen.sets:
865            for counter in set.counters:
866                output_counter_read(gen, set, counter)
867                output_counter_max(gen, set, counter)
868
869    c("\n")
870    c("static const struct intel_perf_query_counter_data counters[] = {\n")
871    c_indent(3)
872
873    counter_to_idx = collections.OrderedDict()
874    idx = 0
875    for gen in gens:
876        for set in gen.sets:
877            for counter in set.counters:
878                key = counter_key(counter)
879                if key not in counter_to_idx:
880                    counter_to_idx[key] = idx
881                    output_counter_struct(set, key, idx,
882                                          name_to_idx,
883                                          desc_to_idx,
884                                          symbol_name_to_idx,
885                                          category_to_idx)
886                    idx += 1
887
888    c_outdent(3)
889    c("};\n\n")
890
891    c(textwrap.dedent("""\
892        static void ATTRIBUTE_NOINLINE
893        intel_perf_query_add_counter_uint64(struct intel_perf_query_info *query,
894                                            int counter_idx, size_t offset,
895                                            intel_counter_read_uint64_t oa_counter_max,
896                                            intel_counter_read_uint64_t oa_counter_read)
897        {
898           struct intel_perf_query_counter *dest = &query->counters[query->n_counters++];
899           const struct intel_perf_query_counter_data *counter = &counters[counter_idx];
900
901           dest->name = &name[counter->name_idx];
902           dest->desc = &desc[counter->desc_idx];
903           dest->symbol_name = &symbol_name[counter->symbol_name_idx];
904           dest->category = &category[counter->category_idx];
905
906           dest->offset = offset;
907           dest->type = counter->type;
908           dest->data_type = counter->data_type;
909           dest->units = counter->units;
910           dest->oa_counter_max_uint64 = oa_counter_max;
911           dest->oa_counter_read_uint64 = oa_counter_read;
912        }
913
914        static void ATTRIBUTE_NOINLINE
915        intel_perf_query_add_counter_float(struct intel_perf_query_info *query,
916                                           int counter_idx, size_t offset,
917                                           intel_counter_read_float_t oa_counter_max,
918                                           intel_counter_read_float_t oa_counter_read)
919        {
920           struct intel_perf_query_counter *dest = &query->counters[query->n_counters++];
921           const struct intel_perf_query_counter_data *counter = &counters[counter_idx];
922
923           dest->name = &name[counter->name_idx];
924           dest->desc = &desc[counter->desc_idx];
925           dest->symbol_name = &symbol_name[counter->symbol_name_idx];
926           dest->category = &category[counter->category_idx];
927
928           dest->offset = offset;
929           dest->type = counter->type;
930           dest->data_type = counter->data_type;
931           dest->units = counter->units;
932           dest->oa_counter_max_float = oa_counter_max;
933           dest->oa_counter_read_float = oa_counter_read;
934        }
935
936        static float ATTRIBUTE_NOINLINE
937        percentage_max_float(struct intel_perf_config *perf,
938                             const struct intel_perf_query_info *query,
939                             const struct intel_perf_query_result *results)
940        {
941           return 100;
942        }
943
944        static uint64_t ATTRIBUTE_NOINLINE
945        percentage_max_uint64(struct intel_perf_config *perf,
946                              const struct intel_perf_query_info *query,
947                              const struct intel_perf_query_result *results)
948        {
949           return 100;
950        }
951        """))
952
953    # Print out all metric sets registration functions for each set in each
954    # generation.
955    for gen in gens:
956        for set in gen.sets:
957            counters = set.counters
958
959            c("\n")
960            c("\nstatic void\n")
961            c("{0}_register_{1}_counter_query(struct intel_perf_config *perf)\n".format(gen.chipset, set.underscore_name))
962            c("{\n")
963            c_indent(3)
964
965            c("struct intel_perf_query_info *query = intel_query_alloc(perf, %u);\n" % len(counters))
966            c("\n")
967            c("query->name = \"" + set.name + "\";\n")
968            c("query->symbol_name = \"" + set.symbol_name + "\";\n")
969            c("query->guid = \"" + set.hw_config_guid + "\";\n")
970
971            c("\n")
972            c("struct intel_perf_query_counter *counter = query->counters;\n")
973
974            c("\n")
975            c("/* Note: we're assuming there can't be any variation in the definition ")
976            c(" * of a query between contexts so it's ok to describe a query within a ")
977            c(" * global variable which only needs to be initialized once... */")
978            c("\nif (!query->data_size) {")
979            c_indent(3)
980
981            generate_register_configs(set)
982
983            offset = 0
984            for counter in counters:
985                offset = output_counter_report(set, counter, counter_to_idx, offset)
986
987
988            c("\ncounter = &query->counters[query->n_counters - 1];\n")
989            c("query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);\n")
990
991            c_outdent(3)
992            c("}");
993
994            c("\n_mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);")
995
996            c_outdent(3)
997            c("}\n")
998
999        h("void intel_oa_register_queries_" + gen.chipset + "(struct intel_perf_config *perf);\n")
1000
1001        c("\nvoid")
1002        c("intel_oa_register_queries_" + gen.chipset + "(struct intel_perf_config *perf)")
1003        c("{")
1004        c_indent(3)
1005
1006        for set in gen.sets:
1007            c("{0}_register_{1}_counter_query(perf);".format(gen.chipset, set.underscore_name))
1008
1009        c_outdent(3)
1010        c("}")
1011
1012
1013if __name__ == '__main__':
1014    main()
1015