1#!/usr/bin/env python3 2# Copyright © 2019, 2022 Intel Corporation 3# SPDX-License-Identifier: MIT 4 5from __future__ import annotations 6from collections import OrderedDict 7import copy 8import io 9import pathlib 10import os.path 11import re 12import xml.etree.ElementTree as et 13import typing 14 15if typing.TYPE_CHECKING: 16 class Args(typing.Protocol): 17 18 files: typing.List[pathlib.Path] 19 validate: bool 20 quiet: bool 21 22 23def get_filename(element: et.Element) -> str: 24 return element.attrib['filename'] 25 26def get_name(element: et.Element) -> str: 27 return element.attrib['name'] 28 29def get_value(element: et.Element) -> int: 30 return int(element.attrib['value'], 0) 31 32def get_start(element: et.Element) -> int: 33 return int(element.attrib['start'], 0) 34 35 36BASE_TYPES = { 37 'address', 38 'offset', 39 'int', 40 'uint', 41 'bool', 42 'float', 43 'mbz', 44 'mbo', 45} 46 47FIXED_PATTERN = re.compile(r"(s|u)(\d+)\.(\d+)") 48 49def is_base_type(name: str) -> bool: 50 return name in BASE_TYPES or FIXED_PATTERN.match(name) is not None 51 52def add_struct_refs(items: typing.OrderedDict[str, bool], node: et.Element) -> None: 53 if node.tag == 'field': 54 if 'type' in node.attrib and not is_base_type(node.attrib['type']): 55 t = node.attrib['type'] 56 items[t] = True 57 return 58 if node.tag not in {'struct', 'group'}: 59 return 60 for c in node: 61 add_struct_refs(items, c) 62 63 64class Struct(object): 65 def __init__(self, xml: et.Element): 66 self.xml = xml 67 self.name = xml.attrib['name'] 68 self.deps: typing.OrderedDict[str, Struct] = OrderedDict() 69 70 def find_deps(self, struct_dict, enum_dict) -> None: 71 deps: typing.OrderedDict[str, bool] = OrderedDict() 72 add_struct_refs(deps, self.xml) 73 for d in deps.keys(): 74 if d in struct_dict: 75 self.deps[d] = struct_dict[d] 76 77 def add_xml(self, items: typing.OrderedDict[str, et.Element]) -> None: 78 for d in self.deps.values(): 79 d.add_xml(items) 80 items[self.name] = self.xml 81 82 83# ordering of the various tag attributes 84GENXML_DESC = { 85 'genxml' : [ 'name', 'gen', ], 86 'import' : [ 'name', ], 87 'exclude' : [ 'name', ], 88 'enum' : [ 'name', 'value', 'prefix', ], 89 'struct' : [ 'name', 'length', ], 90 'field' : [ 'name', 'start', 'end', 'type', 'default', 'prefix', 'nonzero' ], 91 'instruction' : [ 'name', 'bias', 'length', 'engine', ], 92 'value' : [ 'name', 'value', 'dont_use', ], 93 'group' : [ 'count', 'start', 'size', ], 94 'register' : [ 'name', 'length', 'num', ], 95} 96 97 98def node_validator(old: et.Element, new: et.Element) -> bool: 99 """Compare to ElementTree Element nodes. 100 101 There is no builtin equality method, so calling `et.Element == et.Element` is 102 equivalent to calling `et.Element is et.Element`. We instead want to compare 103 that the contents are the same, including the order of children and attributes 104 """ 105 return ( 106 # Check that the attributes are the same 107 old.tag == new.tag and 108 old.text == new.text and 109 (old.tail or "").strip() == (new.tail or "").strip() and 110 list(old.attrib.items()) == list(new.attrib.items()) and 111 len(old) == len(new) and 112 113 # check that there are no unexpected attributes 114 set(new.attrib).issubset(GENXML_DESC[new.tag]) and 115 116 # check that the attributes are sorted 117 list(new.attrib) == list(old.attrib) and 118 all(node_validator(f, s) for f, s in zip(old, new)) 119 ) 120 121 122def process_attribs(elem: et.Element) -> None: 123 valid = GENXML_DESC[elem.tag] 124 # sort and prune attributes 125 elem.attrib = OrderedDict(sorted(((k, v) for k, v in elem.attrib.items() if k in valid), 126 key=lambda x: valid.index(x[0]))) 127 for e in elem: 128 process_attribs(e) 129 130 131def sort_xml(xml: et.ElementTree) -> None: 132 genxml = xml.getroot() 133 134 imports = xml.findall('import') 135 136 enums = sorted(xml.findall('enum'), key=get_name) 137 enum_dict: typing.Dict[str, et.Element] = {} 138 for e in enums: 139 e[:] = sorted(e, key=get_value) 140 enum_dict[e.attrib['name']] = e 141 142 # Structs are a bit annoying because they can refer to each other. We sort 143 # them alphabetically and then build a graph of dependencies. Finally we go 144 # through the alphabetically sorted list and print out dependencies first. 145 structs = sorted(xml.findall('./struct'), key=get_name) 146 wrapped_struct_dict: typing.Dict[str, Struct] = {} 147 for s in structs: 148 s[:] = sorted(s, key=get_start) 149 ws = Struct(s) 150 wrapped_struct_dict[ws.name] = ws 151 152 for ws in wrapped_struct_dict.values(): 153 ws.find_deps(wrapped_struct_dict, enum_dict) 154 155 sorted_structs: typing.OrderedDict[str, et.Element] = OrderedDict() 156 for s in structs: 157 _s = wrapped_struct_dict[s.attrib['name']] 158 _s.add_xml(sorted_structs) 159 160 instructions = sorted(xml.findall('./instruction'), key=get_name) 161 for i in instructions: 162 i[:] = sorted(i, key=get_start) 163 164 registers = sorted(xml.findall('./register'), key=get_name) 165 for r in registers: 166 r[:] = sorted(r, key=get_start) 167 168 new_elems = (imports + enums + list(sorted_structs.values()) + 169 instructions + registers) 170 for n in new_elems: 171 process_attribs(n) 172 genxml[:] = new_elems 173 174 175# `default_imports` documents which files should be imported for our 176# genxml files. This is only useful if a genxml file does not already 177# include imports. 178# 179# Basically, this allows the genxml_import.py tool used with the 180# --import switch to know which files should be added as an import. 181# (genxml_import.py uses GenXml.add_xml_imports, which relies on 182# `default_imports`.) 183default_imports = OrderedDict([ 184 ('gen4.xml', ()), 185 ('gen45.xml', ('gen4.xml',)), 186 ('gen5.xml', ('gen45.xml',)), 187 ('gen6.xml', ('gen5.xml',)), 188 ('gen7.xml', ('gen6.xml',)), 189 ('gen75.xml', ('gen7.xml',)), 190 ('gen8.xml', ('gen75.xml',)), 191 ('gen9.xml', ('gen8.xml',)), 192 ('gen11.xml', ('gen9.xml',)), 193 ('gen12.xml', ('gen11.xml',)), 194 ('gen125.xml', ('gen12.xml',)), 195 ('gen20.xml', ('gen125.xml',)), 196 ('gen20_rt.xml', ('gen125_rt.xml',)), 197 ]) 198known_genxml_files = list(default_imports.keys()) 199 200 201def genxml_path_to_key(path): 202 try: 203 return known_genxml_files.index(path.name) 204 except ValueError: 205 return len(known_genxml_files) 206 207 208def sort_genxml_files(files): 209 files.sort(key=genxml_path_to_key) 210 211class GenXml(object): 212 def __init__(self, filename, import_xml=False, files=None): 213 if files is not None: 214 self.files = files 215 else: 216 self.files = set() 217 self.filename = pathlib.Path(filename) 218 219 # Assert that the file hasn't already been loaded which would 220 # indicate a loop in genxml imports, and lead to infinite 221 # recursion. 222 assert self.filename not in self.files 223 224 self.files.add(self.filename) 225 self.et = et.parse(self.filename) 226 if import_xml: 227 self.merge_imported() 228 229 def process_imported(self, merge=False, drop_dupes=False): 230 """Processes imported genxml files. 231 232 This helper function scans imported genxml files and has two 233 mutually exclusive operating modes. 234 235 If `merge` is True, then items will be merged into the 236 `self.et` data structure. 237 238 If `drop_dupes` is True, then any item that is a duplicate to 239 an item imported will be droped from the `self.et` data 240 structure. This is used by `self.optimize_xml_import` to 241 shrink the size of the genxml file by reducing duplications. 242 243 """ 244 assert merge != drop_dupes 245 orig_elements = set(self.et.getroot()) 246 name_and_obj = lambda i: (get_name(i), i) 247 filter_ty = lambda s: filter(lambda i: i.tag == s, orig_elements) 248 filter_ty_item = lambda s: dict(map(name_and_obj, filter_ty(s))) 249 250 # orig_by_tag stores items defined directly in the genxml 251 # file. If a genxml item is defined in the genxml directly, 252 # then any imported items of the same name are ignored. 253 orig_by_tag = { 254 'enum': filter_ty_item('enum'), 255 'struct': filter_ty_item('struct'), 256 'instruction': filter_ty_item('instruction'), 257 'register': filter_ty_item('register'), 258 } 259 260 for item in orig_elements: 261 if item.tag == 'import': 262 assert 'name' in item.attrib 263 filename = os.path.split(item.attrib['name']) 264 exceptions = set() 265 for e in item: 266 assert e.tag == 'exclude' 267 exceptions.add(e.attrib['name']) 268 # We should be careful to restrict loaded files to 269 # those under the source or build trees. For now, only 270 # allow siblings of the current xml file. 271 assert filename[0] == '', 'Directories not allowed with import' 272 filename = os.path.join(os.path.dirname(self.filename), 273 filename[1]) 274 assert os.path.exists(filename), f'{self.filename} {filename}' 275 276 # Here we load the imported genxml file. We set 277 # `import_xml` to true so that any imports in the 278 # imported genxml will be merged during the loading 279 # process. 280 # 281 # The `files` parameter is a set of files that have 282 # been loaded, and it is used to prevent any cycles 283 # (infinite recursion) while loading imported genxml 284 # files. 285 genxml = GenXml(filename, import_xml=True, files=self.files) 286 imported_elements = set(genxml.et.getroot()) 287 288 # `to_add` is a set of items that were imported an 289 # should be merged into the `self.et` data structure. 290 # This is only used when the `merge` parameter is 291 # True. 292 to_add = set() 293 # `to_remove` is a set of items that can safely be 294 # imported since the item is equivalent. This is only 295 # used when the `drop_duped` parameter is True. 296 to_remove = set() 297 for i in imported_elements: 298 if i.tag not in orig_by_tag: 299 continue 300 if i.attrib['name'] in exceptions: 301 continue 302 if i.attrib['name'] in orig_by_tag[i.tag]: 303 if merge: 304 # An item with this same name was defined 305 # in the genxml directly. There we should 306 # ignore (not merge) the imported item. 307 continue 308 else: 309 if drop_dupes: 310 # Since this item is not the imported 311 # genxml, we can't consider dropping it. 312 continue 313 if merge: 314 to_add.add(i) 315 else: 316 assert drop_dupes 317 orig_element = orig_by_tag[i.tag][i.attrib['name']] 318 if not node_validator(i, orig_element): 319 continue 320 to_remove.add(orig_element) 321 322 if len(to_add) > 0: 323 # Now that we have scanned through all the items 324 # in the imported genxml file, if any items were 325 # found which should be merged, we add them into 326 # our `self.et` data structure. After this it will 327 # be as if the items had been directly present in 328 # the genxml file. 329 assert len(to_remove) == 0 330 self.et.getroot().extend(list(to_add)) 331 sort_xml(self.et) 332 elif len(to_remove) > 0: 333 self.et.getroot()[:] = list(orig_elements - to_remove) 334 sort_xml(self.et) 335 336 def merge_imported(self): 337 """Merge imported items from genxml imports. 338 339 Genxml <import> tags specify that elements should be brought 340 in from another genxml source file. After this function is 341 called, these elements will become part of the `self.et` data 342 structure as if the elements had been directly included in the 343 genxml directly. 344 345 Items from imported genxml files will be completely ignore if 346 an item with the same name is already defined in the genxml 347 file. 348 349 """ 350 self.process_imported(merge=True) 351 352 def flatten_imported(self): 353 """Flattens the genxml to not include any imports 354 355 Essentially this helper will put the `self.et` into a state 356 that includes all imported items directly, and does not 357 contain any <import> tags. This is used by the 358 genxml_import.py with the --flatten switch to "undo" any 359 genxml imports. 360 361 """ 362 self.merge_imported() 363 root = self.et.getroot() 364 imports = root.findall('import') 365 for i in imports: 366 root.remove(i) 367 368 def add_xml_imports(self): 369 """Adds imports to the genxml file. 370 371 Using the `default_imports` structure, we add imports to the 372 genxml file. 373 374 """ 375 # `imports` is a set of filenames currently imported by the 376 # genxml. 377 imports = self.et.findall('import') 378 imports = set(map(lambda el: el.attrib['name'], imports)) 379 new_elements = [] 380 self_flattened = copy.deepcopy(self) 381 self_flattened.flatten_imported() 382 old_names = { el.attrib['name'] for el in self_flattened.et.getroot() } 383 for import_xml in default_imports.get(self.filename.name, tuple()): 384 if import_xml in imports: 385 # This genxml is already imported, so we don't need to 386 # add it as an import. 387 continue 388 el = et.Element('import', {'name': import_xml}) 389 import_path = self.filename.with_name(import_xml) 390 imported_genxml = GenXml(import_path, import_xml=True) 391 imported_names = { el.attrib['name'] 392 for el in imported_genxml.et.getroot() 393 if el.tag != 'import' } 394 # Importing this genxml could add some new items. When 395 # adding a genxml import, we don't want to add new items, 396 # unless they were already in the current genxml. So, we 397 # put them into a list of items to exclude when importing 398 # the genxml. 399 exclude_names = imported_names - old_names 400 for n in sorted(exclude_names): 401 el.append(et.Element('exclude', {'name': n})) 402 new_elements.append(el) 403 if len(new_elements) > 0: 404 self.et.getroot().extend(new_elements) 405 sort_xml(self.et) 406 407 def optimize_xml_import(self): 408 """Optimizes the genxml by dropping items that can be imported 409 410 Scans genxml <import> tags, and loads the imported file. If 411 any item in the imported file is a duplicate to an item in the 412 genxml file, then it will be droped from the `self.et` data 413 structure. 414 415 """ 416 self.process_imported(drop_dupes=True) 417 418 def filter_engines(self, engines): 419 changed = False 420 items = [] 421 for item in self.et.getroot(): 422 # When an instruction doesn't have the engine specified, 423 # it is considered to be for all engines. Otherwise, we 424 # check to see if it's tagged for the engines requested. 425 if item.tag == 'instruction' and 'engine' in item.attrib: 426 i_engines = set(item.attrib["engine"].split('|')) 427 if not (i_engines & engines): 428 # Drop this instruction because it doesn't support 429 # the requested engine types. 430 changed = True 431 continue 432 items.append(item) 433 if changed: 434 self.et.getroot()[:] = items 435 436 def filter_symbols(self, symbol_list): 437 symbols_allowed = {} 438 for sym in symbol_list: 439 symbols_allowed[sym] = sym 440 441 changed = False 442 items = [] 443 for item in self.et.getroot(): 444 if item.tag in ('instruction', 'struct', 'register') and \ 445 item.attrib['name'] not in symbols_allowed: 446 # Drop the item from the tree 447 changed = True 448 continue 449 items.append(item) 450 if changed: 451 self.et.getroot()[:] = items 452 453 def sort(self): 454 sort_xml(self.et) 455 456 def sorted_copy(self): 457 clone = copy.deepcopy(self) 458 clone.sort() 459 return clone 460 461 def is_equivalent_xml(self, other): 462 if len(self.et.getroot()) != len(other.et.getroot()): 463 return False 464 return all(node_validator(old, new) 465 for old, new in zip(self.et.getroot(), other.et.getroot())) 466 467 def write_file(self): 468 try: 469 old_genxml = GenXml(self.filename) 470 if self.is_equivalent_xml(old_genxml): 471 return 472 except Exception: 473 pass 474 475 b_io = io.BytesIO() 476 et.indent(self.et, space=' ') 477 self.et.write(b_io, encoding="utf-8", xml_declaration=True) 478 b_io.write(b'\n') 479 480 tmp = self.filename.with_suffix(f'{self.filename.suffix}.tmp') 481 tmp.write_bytes(b_io.getvalue()) 482 tmp.replace(self.filename) 483