1# Copyright 2017 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from __future__ import division 6 7import array 8import difflib 9import filecmp 10import io 11import operator 12import os 13import posixpath 14import re 15import shutil 16import struct 17import subprocess 18import sys 19import tempfile 20import uuid 21 22from functools import reduce 23 24 25def ZapTimestamp(filename): 26 contents = open(filename, 'rb').read() 27 # midl.exe writes timestamp 2147483647 (2^31 - 1) as creation date into its 28 # outputs, but using the local timezone. To make the output timezone- 29 # independent, replace that date with a fixed string of the same length. 30 # Also blank out the minor version number. 31 if filename.endswith('.tlb'): 32 # See https://chromium-review.googlesource.com/c/chromium/src/+/693223 for 33 # a fairly complete description of the .tlb binary format. 34 # TLB files start with a 54 byte header. Offset 0x20 stores how many types 35 # are defined in the file, and the header is followed by that many uint32s. 36 # After that, 15 section headers appear. Each section header is 16 bytes, 37 # starting with offset and length uint32s. 38 # Section 12 in the file contains custom() data. custom() data has a type 39 # (int, string, etc). Each custom data chunk starts with a uint16_t 40 # describing its type. Type 8 is string data, consisting of a uint32_t 41 # len, followed by that many data bytes, followed by 'W' bytes to pad to a 42 # 4 byte boundary. Type 0x13 is uint32 data, followed by 4 data bytes, 43 # followed by two 'W' to pad to a 4 byte boundary. 44 # The custom block always starts with one string containing "Created by 45 # MIDL version 8...", followed by one uint32 containing 0x7fffffff, 46 # followed by another uint32 containing the MIDL compiler version (e.g. 47 # 0x0801026e for v8.1.622 -- 0x26e == 622). These 3 fields take 0x54 bytes. 48 # There might be more custom data after that, but these 3 blocks are always 49 # there for file-level metadata. 50 # All data is little-endian in the file. 51 assert contents[0:8] == b'MSFT\x02\x00\x01\x00' 52 ntypes, = struct.unpack_from('<I', contents, 0x20) 53 custom_off, custom_len = struct.unpack_from( 54 '<II', contents, 0x54 + 4*ntypes + 11*16) 55 assert custom_len >= 0x54 56 # First: Type string (0x8), followed by 0x3e characters. 57 assert contents[custom_off:custom_off + 6] == b'\x08\x00\x3e\x00\x00\x00' 58 assert re.match( 59 br'Created by MIDL version 8\.\d\d\.\d{4} ' 60 br'at ... Jan 1. ..:..:.. 2038\n', 61 contents[custom_off + 6:custom_off + 6 + 0x3e]) 62 # Second: Type uint32 (0x13) storing 0x7fffffff (followed by WW / 0x57 pad) 63 assert contents[custom_off+6+0x3e:custom_off+6+0x3e+8] == \ 64 b'\x13\x00\xff\xff\xff\x7f\x57\x57' 65 # Third: Type uint32 (0x13) storing MIDL compiler version. 66 assert contents[custom_off + 6 + 0x3e + 8:custom_off + 6 + 0x3e + 8 + 67 2] == b'\x13\x00' 68 # Replace "Created by" string with fixed string, and fixed MIDL version with 69 # 8.1.622 always. 70 contents = ( 71 contents[0:custom_off + 6] + 72 b'Created by MIDL version 8.xx.xxxx at a redacted point in time\n' + 73 # uint32 (0x13) val 0x7fffffff, WW, uint32 (0x13), val 0x0801026e, WW 74 b'\x13\x00\xff\xff\xff\x7f\x57\x57\x13\x00\x6e\x02\x01\x08\x57\x57' + 75 contents[custom_off + 0x54:]) 76 else: 77 contents = re.sub( 78 br'File created by MIDL compiler version 8\.\d\d\.\d{4} \*/\r\n' 79 br'/\* at ... Jan 1. ..:..:.. 2038', 80 br'File created by MIDL compiler version 8.xx.xxxx */\r\n' 81 br'/* at a redacted point in time', contents) 82 contents = re.sub( 83 br' Oicf, W1, Zp8, env=(.....) \(32b run\), ' 84 br'target_arch=(AMD64|X86) 8\.\d\d\.\d{4}', 85 br' Oicf, W1, Zp8, env=\1 (32b run), target_arch=\2 8.xx.xxxx', 86 contents) 87 # TODO(thakis): If we need more hacks than these, try to verify checked-in 88 # outputs when we're using the hermetic toolchain. 89 # midl.exe older than 8.1.622 omit '//' after #endif, fix that: 90 contents = contents.replace(b'#endif !_MIDL_USE_GUIDDEF_', 91 b'#endif // !_MIDL_USE_GUIDDEF_') 92 # midl.exe puts the midl version into code in one place. To have 93 # predictable output, lie about the midl version if it's not 8.1.622. 94 # This is unfortunate, but remember that there's beauty too in imperfection. 95 contents = contents.replace(b'0x801026c, /* MIDL Version 8.1.620 */', 96 b'0x801026e, /* MIDL Version 8.1.622 */') 97 open(filename, 'wb').write(contents) 98 99 100def get_tlb_contents(tlb_file): 101 # See ZapTimestamp() for a short overview of the .tlb format. 102 contents = open(tlb_file, 'rb').read() 103 assert contents[0:8] == b'MSFT\x02\x00\x01\x00' 104 ntypes, = struct.unpack_from('<I', contents, 0x20) 105 type_off, type_len = struct.unpack_from('<II', contents, 0x54 + 4*ntypes) 106 107 guid_off, guid_len = struct.unpack_from( 108 '<II', contents, 0x54 + 4*ntypes + 5*16) 109 assert guid_len % 24 == 0 110 111 contents = array.array('B', contents) 112 113 return contents, ntypes, type_off, guid_off, guid_len 114 115 116def recreate_guid_hashtable(contents, ntypes, guid_off, guid_len): 117 # This function is called after changing guids in section 6 (the "guid" 118 # section). This function recreates the GUID hashtable in section 5. Since the 119 # hash table uses chaining, it's easiest to recompute it from scratch rather 120 # than trying to patch it up. 121 hashtab = [0xffffffff] * (0x80 // 4) 122 for guidind in range(guid_off, guid_off + guid_len, 24): 123 guidbytes, typeoff, nextguid = struct.unpack_from( 124 '<16sII', contents, guidind) 125 words = struct.unpack('<8H', guidbytes) 126 # midl seems to use the following simple hash function for GUIDs: 127 guidhash = reduce(operator.xor, [w for w in words]) % (0x80 // 4) 128 nextguid = hashtab[guidhash] 129 struct.pack_into('<I', contents, guidind + 0x14, nextguid) 130 hashtab[guidhash] = guidind - guid_off 131 hash_off, hash_len = struct.unpack_from( 132 '<II', contents, 0x54 + 4*ntypes + 4*16) 133 for i, hashval in enumerate(hashtab): 134 struct.pack_into('<I', contents, hash_off + 4*i, hashval) 135 136 137def overwrite_guids_h(h_file, dynamic_guids): 138 contents = open(h_file, 'rb').read() 139 for key in dynamic_guids: 140 contents = re.sub(key, dynamic_guids[key], contents, flags=re.I) 141 open(h_file, 'wb').write(contents) 142 143 144def get_uuid_format(guid, prefix): 145 formatted_uuid = b'0x%s,0x%s,0x%s,' % (guid[0:8], guid[9:13], guid[14:18]) 146 formatted_uuid += b'%s0x%s,0x%s' % (prefix, guid[19:21], guid[21:23]) 147 for i in range(24, len(guid), 2): 148 formatted_uuid += b',0x' + guid[i:i + 2] 149 return formatted_uuid 150 151 152def get_uuid_format_iid_file(guid): 153 # Convert from "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83" to 154 # 0xD0E1CACC,0xC63C,0x4192,0x94,0xAB,0xBF,0x8E,0xAD,0x0E,0x3B,0x83. 155 return get_uuid_format(guid, b'') 156 157 158def overwrite_guids_iid(iid_file, dynamic_guids): 159 contents = open(iid_file, 'rb').read() 160 for key in dynamic_guids: 161 contents = re.sub(get_uuid_format_iid_file(key), 162 get_uuid_format_iid_file(dynamic_guids[key]), 163 contents, 164 flags=re.I) 165 open(iid_file, 'wb').write(contents) 166 167 168def get_uuid_format_proxy_file(guid): 169 # Convert from "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83" to 170 # {0xD0E1CACC,0xC63C,0x4192,{0x94,0xAB,0xBF,0x8E,0xAD,0x0E,0x3B,0x83}}. 171 return get_uuid_format(guid, b'{') 172 173 174def overwrite_guids_proxy(proxy_file, dynamic_guids): 175 contents = open(proxy_file, 'rb').read() 176 for key in dynamic_guids: 177 contents = re.sub(get_uuid_format_proxy_file(key), 178 get_uuid_format_proxy_file(dynamic_guids[key]), 179 contents, 180 flags=re.I) 181 open(proxy_file, 'wb').write(contents) 182 183 184def getguid(contents, offset): 185 # Returns a guid string of the form "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83". 186 g0, g1, g2, g3 = struct.unpack_from('<IHH8s', contents, offset) 187 g3 = b''.join([b'%02X' % g for g in bytearray(g3)]) 188 return b'%08X-%04X-%04X-%s-%s' % (g0, g1, g2, g3[0:4], g3[4:]) 189 190 191def setguid(contents, offset, guid): 192 guid = uuid.UUID(guid.decode('utf-8')) 193 struct.pack_into('<IHH8s', contents, offset, 194 *(guid.fields[0:3] + (guid.bytes[8:], ))) 195 196 197def overwrite_guids_tlb(tlb_file, dynamic_guids): 198 contents, ntypes, type_off, guid_off, guid_len = get_tlb_contents(tlb_file) 199 200 for i in range(0, guid_len, 24): 201 current_guid = getguid(contents, guid_off + i) 202 for key in dynamic_guids: 203 if key.lower() == current_guid.lower(): 204 setguid(contents, guid_off + i, dynamic_guids[key]) 205 206 recreate_guid_hashtable(contents, ntypes, guid_off, guid_len) 207 open(tlb_file, 'wb').write(contents) 208 209 210# Handle multiple guid substitutions, where |dynamic_guids| is of the form 211# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791=" 212# "3d852661-c795-4d20-9b95-5561e9a1d2d9," 213# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B=" 214# "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83". 215# 216# Before specifying |dynamic_guids| in the build, the IDL file is first compiled 217# with "158428a4-6014-4978-83ba-9fad0dabe791" and 218# "63B8FFB1-5314-48C9-9C57-93EC8BC6184B". These are the "replaceable" guids, 219# i.e., guids that can be replaced in future builds. The resulting MIDL outputs 220# are copied over to src\third_party\win_build_output\. 221# 222# Then, in the future, any changes to these guids can be accomplished by 223# providing |dynamic_guids| of the format above in the build file. These 224# "dynamic" guid changes by themselves will not require the MIDL compiler and 225# therefore will not require copying output over to 226# src\third_party\win_build_output\. 227# 228# The pre-generated src\third_party\win_build_output\ files are used for 229# cross-compiling on other platforms, since the MIDL compiler is Windows-only. 230def overwrite_guids(h_file, iid_file, proxy_file, tlb_file, dynamic_guids): 231 # Fix up GUIDs in .h, _i.c, _p.c, and .tlb. 232 overwrite_guids_h(h_file, dynamic_guids) 233 overwrite_guids_iid(iid_file, dynamic_guids) 234 overwrite_guids_proxy(proxy_file, dynamic_guids) 235 if tlb_file: 236 overwrite_guids_tlb(tlb_file, dynamic_guids) 237 238 239# This function removes all occurrences of 'PLACEHOLDER-GUID-' from the 240# template, and if |dynamic_guids| is specified, also replaces the guids within 241# the file. Finally, it writes the resultant output to the |idl| file. 242def generate_idl_from_template(idl_template, dynamic_guids, idl): 243 contents = open(idl_template, 'rb').read() 244 contents = re.sub(b'PLACEHOLDER-GUID-', b'', contents, flags=re.I) 245 if dynamic_guids: 246 for key in dynamic_guids: 247 contents = re.sub(key, dynamic_guids[key], contents, flags=re.I) 248 open(idl, 'wb').write(contents) 249 250 251# This function runs the MIDL compiler with the provided arguments. It creates 252# and returns a tuple of |0,midl_output_dir| on success. 253def run_midl(args, env_dict): 254 midl_output_dir = tempfile.mkdtemp() 255 delete_midl_output_dir = True 256 257 try: 258 popen = subprocess.Popen(args + ['/out', midl_output_dir], 259 shell=True, 260 universal_newlines=True, 261 env=env_dict, 262 stdout=subprocess.PIPE, 263 stderr=subprocess.STDOUT) 264 out, _ = popen.communicate() 265 266 # Filter junk out of stdout, and write filtered versions. Output we want 267 # to filter is pairs of lines that look like this: 268 # Processing C:\Program Files (x86)\Microsoft SDKs\...\include\objidl.idl 269 # objidl.idl 270 lines = out.splitlines() 271 prefixes = ('Processing ', '64 bit Processing ') 272 processing = set( 273 os.path.basename(x) for x in lines if x.startswith(prefixes)) 274 for line in lines: 275 if not line.startswith(prefixes) and line not in processing: 276 print(line) 277 278 if popen.returncode != 0: 279 return popen.returncode, midl_output_dir 280 281 for f in os.listdir(midl_output_dir): 282 ZapTimestamp(os.path.join(midl_output_dir, f)) 283 284 delete_midl_output_dir = False 285 finally: 286 if os.path.exists(midl_output_dir) and delete_midl_output_dir: 287 shutil.rmtree(midl_output_dir) 288 289 return 0, midl_output_dir 290 291 292# This function adds support for dynamic generation of guids: when values are 293# specified as 'uuid5:name', this function will substitute the values with 294# generated dynamic guids using the uuid5 function. The uuid5 function generates 295# a guid based on the SHA-1 hash of a namespace identifier (which is the guid 296# that comes after 'PLACEHOLDER-GUID-') and a name (which is a string, such as a 297# version string "87.1.2.3"). 298# 299# For instance, when |dynamic_guid| is of the form: 300# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791=uuid5:88.0.4307.0 301# ," 302# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B=uuid5:88.0.4307.0 303# " 304# 305# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791" would be substituted 306# with uuid5("158428a4-6014-4978-83ba-9fad0dabe791", "88.0.4307.0"), which is 307# "64700170-AD80-5DE3-924E-2F39D862CFD5". And 308# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B" would be 309# substituted with uuid5("63B8FFB1-5314-48C9-9C57-93EC8BC6184B", "88.0.4307.0"), 310# which is "7B6E7538-3C38-5565-BC92-42BCEE268D76". 311def uuid5_substitutions(dynamic_guids): 312 for key, value in dynamic_guids.items(): 313 if value.startswith('uuid5:'): 314 name = value.split('uuid5:', 1)[1] 315 assert name 316 dynamic_guids[key] = str(uuid.uuid5(uuid.UUID(key), name)).upper() 317 318 319def main(arch, gendir, outdir, dynamic_guids, tlb, h, dlldata, iid, proxy, 320 clang, idl, *flags): 321 # Copy checked-in outputs to final location. 322 source = gendir 323 if os.path.isdir(os.path.join(source, os.path.basename(idl))): 324 source = os.path.join(source, os.path.basename(idl)) 325 source = os.path.join(source, arch.split('.')[1]) # Append 'x86' or 'x64'. 326 source = os.path.normpath(source) 327 328 source_exists = True 329 if not os.path.isdir(source): 330 source_exists = False 331 if sys.platform != 'win32': 332 print('Directory %s needs to be populated from Windows first' % source) 333 return 1 334 335 # This is a brand new IDL file that does not have outputs under 336 # third_party\win_build_output\midl. We create an empty directory for now. 337 os.makedirs(source) 338 339 common_files = [h, iid] 340 if tlb != 'none': 341 # Not all projects use tlb files. 342 common_files += [tlb] 343 else: 344 tlb = None 345 346 if dlldata != 'none': 347 # Not all projects use dlldta files. 348 common_files += [dlldata] 349 else: 350 dlldata = None 351 352 # Not all projects use proxy files 353 if proxy != 'none': 354 # Not all projects use proxy files. 355 common_files += [proxy] 356 else: 357 proxy = None 358 359 for source_file in common_files: 360 file_path = os.path.join(source, source_file) 361 if not os.path.isfile(file_path): 362 source_exists = False 363 if sys.platform != 'win32': 364 print('File %s needs to be generated from Windows first' % file_path) 365 return 1 366 367 # Either this is a brand new IDL file that does not have outputs under 368 # third_party\win_build_output\midl or the file is (unexpectedly) missing. 369 # We create an empty file for now. The rest of the machinery below will 370 # then generate the correctly populated file using the MIDL compiler and 371 # instruct the developer to copy that file under 372 # third_party\win_build_output\midl. 373 open(file_path, 'wb').close() 374 shutil.copy(file_path, outdir) 375 376 if dynamic_guids != 'none': 377 assert '=' in dynamic_guids 378 if dynamic_guids.startswith("ignore_proxy_stub,"): 379 # TODO(ganesh): The custom proxy/stub file ("_p.c") is not generated 380 # correctly for dynamic IIDs (but correctly if there are only dynamic 381 # CLSIDs). The proxy/stub lookup functions generated by MIDL.exe within 382 # "_p.c" rely on a sorted set of vtable lists, which we are not currently 383 # regenerating. At the moment, no project in Chromium that uses dynamic 384 # IIDs is relying on the custom proxy/stub file. So for now, if 385 # |dynamic_guids| is prefixed with "ignore_proxy_stub,", we exclude the 386 # custom proxy/stub file from the directory comparisons. 387 common_files.remove(proxy) 388 dynamic_guids = dynamic_guids.split("ignore_proxy_stub,", 1)[1] 389 dynamic_guids = re.sub('PLACEHOLDER-GUID-', '', dynamic_guids, flags=re.I) 390 dynamic_guids = dynamic_guids.split(',') 391 dynamic_guids = dict(s.split('=') for s in dynamic_guids) 392 uuid5_substitutions(dynamic_guids) 393 dynamic_guids_bytes = { 394 k.encode('utf-8'): v.encode('utf-8') 395 for k, v in dynamic_guids.items() 396 } 397 if source_exists: 398 overwrite_guids(*(os.path.join(outdir, file) if file else None 399 for file in [h, iid, proxy, tlb]), 400 dynamic_guids=dynamic_guids_bytes) 401 else: 402 dynamic_guids = None 403 404 # On non-Windows, that's all we can do. 405 if sys.platform != 'win32': 406 return 0 407 408 idl_template = None 409 if dynamic_guids: 410 idl_template = idl 411 412 # posixpath is used here to keep the MIDL-generated files with a uniform 413 # separator of '/' instead of mixed '/' and '\\'. 414 idl = posixpath.join( 415 outdir, 416 os.path.splitext(os.path.basename(idl_template))[0] + '.idl') 417 418 # |idl_template| can contain one or more occurrences of guids that are 419 # substituted with |dynamic_guids|, and then MIDL is run on the substituted 420 # IDL file. 421 generate_idl_from_template(idl_template, dynamic_guids_bytes, idl) 422 423 # On Windows, run midl.exe on the input and check that its outputs are 424 # identical to the checked-in outputs (after replacing guids if 425 # |dynamic_guids| is specified). 426 427 # Read the environment block from the file. This is stored in the format used 428 # by CreateProcess. Drop last 2 NULs, one for list terminator, one for 429 # trailing vs. separator. 430 env_pairs = open(arch).read()[:-2].split('\0') 431 env_dict = dict([item.split('=', 1) for item in env_pairs]) 432 433 # Extract the /D options and send them to the preprocessor. 434 preprocessor_options = '-E -nologo -Wno-nonportable-include-path' 435 preprocessor_options += ''.join( 436 [' ' + flag for flag in flags if flag.startswith('/D')]) 437 args = ['midl', '/nologo'] + list(flags) + (['/tlb', tlb] if tlb else []) + [ 438 '/h', h 439 ] + (['/dlldata', dlldata] if dlldata else []) + ['/iid', iid] + ( 440 ['/proxy', proxy] if proxy else 441 []) + ['/cpp_cmd', clang, '/cpp_opt', preprocessor_options, idl] 442 443 returncode, midl_output_dir = run_midl(args, env_dict) 444 if returncode != 0: 445 return returncode 446 447 # Now compare the output in midl_output_dir to the copied-over outputs. 448 _, mismatch, errors = filecmp.cmpfiles(midl_output_dir, outdir, common_files) 449 assert not errors 450 451 if mismatch: 452 print('midl.exe output different from files in %s, see %s' % 453 (outdir, midl_output_dir)) 454 for f in mismatch: 455 if f.endswith('.tlb'): continue 456 fromfile = os.path.join(outdir, f) 457 tofile = os.path.join(midl_output_dir, f) 458 print(''.join( 459 difflib.unified_diff( 460 io.open(fromfile).readlines(), 461 io.open(tofile).readlines(), fromfile, tofile))) 462 463 if dynamic_guids: 464 # |idl_template| can contain one or more occurrences of guids prefixed 465 # with 'PLACEHOLDER-GUID-'. We first remove the extraneous 466 # 'PLACEHOLDER-GUID-' prefix and then run MIDL on the substituted IDL 467 # file. 468 # No guid substitutions are done at this point, because we want to compile 469 # with the placeholder guids and then instruct the user to copy the output 470 # over to |source| which is typically src\third_party\win_build_output\. 471 # In future runs, the placeholder guids in |source| are replaced with the 472 # guids specified in |dynamic_guids|. 473 generate_idl_from_template(idl_template, None, idl) 474 returncode, midl_output_dir = run_midl(args, env_dict) 475 if returncode != 0: 476 return returncode 477 478 print('To rebaseline:') 479 print(r' copy /y %s\* %s' % (midl_output_dir, source)) 480 return 1 481 482 return 0 483 484 485if __name__ == '__main__': 486 sys.exit(main(*sys.argv[1:])) 487