xref: /aosp_15_r20/external/cronet/build/toolchain/win/midl.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2017 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from __future__ import division
6
7import array
8import difflib
9import filecmp
10import io
11import operator
12import os
13import posixpath
14import re
15import shutil
16import struct
17import subprocess
18import sys
19import tempfile
20import uuid
21
22from functools import reduce
23
24
25def ZapTimestamp(filename):
26  contents = open(filename, 'rb').read()
27  # midl.exe writes timestamp 2147483647 (2^31 - 1) as creation date into its
28  # outputs, but using the local timezone.  To make the output timezone-
29  # independent, replace that date with a fixed string of the same length.
30  # Also blank out the minor version number.
31  if filename.endswith('.tlb'):
32    # See https://chromium-review.googlesource.com/c/chromium/src/+/693223 for
33    # a fairly complete description of the .tlb binary format.
34    # TLB files start with a 54 byte header. Offset 0x20 stores how many types
35    # are defined in the file, and the header is followed by that many uint32s.
36    # After that, 15 section headers appear.  Each section header is 16 bytes,
37    # starting with offset and length uint32s.
38    # Section 12 in the file contains custom() data. custom() data has a type
39    # (int, string, etc).  Each custom data chunk starts with a uint16_t
40    # describing its type.  Type 8 is string data, consisting of a uint32_t
41    # len, followed by that many data bytes, followed by 'W' bytes to pad to a
42    # 4 byte boundary.  Type 0x13 is uint32 data, followed by 4 data bytes,
43    # followed by two 'W' to pad to a 4 byte boundary.
44    # The custom block always starts with one string containing "Created by
45    # MIDL version 8...", followed by one uint32 containing 0x7fffffff,
46    # followed by another uint32 containing the MIDL compiler version (e.g.
47    # 0x0801026e for v8.1.622 -- 0x26e == 622).  These 3 fields take 0x54 bytes.
48    # There might be more custom data after that, but these 3 blocks are always
49    # there for file-level metadata.
50    # All data is little-endian in the file.
51    assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
52    ntypes, = struct.unpack_from('<I', contents, 0x20)
53    custom_off, custom_len = struct.unpack_from(
54        '<II', contents, 0x54 + 4*ntypes + 11*16)
55    assert custom_len >= 0x54
56    # First: Type string (0x8), followed by 0x3e characters.
57    assert contents[custom_off:custom_off + 6] == b'\x08\x00\x3e\x00\x00\x00'
58    assert re.match(
59        br'Created by MIDL version 8\.\d\d\.\d{4} '
60        br'at ... Jan 1. ..:..:.. 2038\n',
61        contents[custom_off + 6:custom_off + 6 + 0x3e])
62    # Second: Type uint32 (0x13) storing 0x7fffffff (followed by WW / 0x57 pad)
63    assert contents[custom_off+6+0x3e:custom_off+6+0x3e+8] == \
64        b'\x13\x00\xff\xff\xff\x7f\x57\x57'
65    # Third: Type uint32 (0x13) storing MIDL compiler version.
66    assert contents[custom_off + 6 + 0x3e + 8:custom_off + 6 + 0x3e + 8 +
67                    2] == b'\x13\x00'
68    # Replace "Created by" string with fixed string, and fixed MIDL version with
69    # 8.1.622 always.
70    contents = (
71        contents[0:custom_off + 6] +
72        b'Created by MIDL version 8.xx.xxxx at a redacted point in time\n' +
73        # uint32 (0x13) val 0x7fffffff, WW, uint32 (0x13), val 0x0801026e, WW
74        b'\x13\x00\xff\xff\xff\x7f\x57\x57\x13\x00\x6e\x02\x01\x08\x57\x57' +
75        contents[custom_off + 0x54:])
76  else:
77    contents = re.sub(
78        br'File created by MIDL compiler version 8\.\d\d\.\d{4} \*/\r\n'
79        br'/\* at ... Jan 1. ..:..:.. 2038',
80        br'File created by MIDL compiler version 8.xx.xxxx */\r\n'
81        br'/* at a redacted point in time', contents)
82    contents = re.sub(
83        br'    Oicf, W1, Zp8, env=(.....) \(32b run\), '
84        br'target_arch=(AMD64|X86) 8\.\d\d\.\d{4}',
85        br'    Oicf, W1, Zp8, env=\1 (32b run), target_arch=\2 8.xx.xxxx',
86        contents)
87    # TODO(thakis): If we need more hacks than these, try to verify checked-in
88    # outputs when we're using the hermetic toolchain.
89    # midl.exe older than 8.1.622 omit '//' after #endif, fix that:
90    contents = contents.replace(b'#endif !_MIDL_USE_GUIDDEF_',
91                                b'#endif // !_MIDL_USE_GUIDDEF_')
92    # midl.exe puts the midl version into code in one place.  To have
93    # predictable output, lie about the midl version if it's not 8.1.622.
94    # This is unfortunate, but remember that there's beauty too in imperfection.
95    contents = contents.replace(b'0x801026c, /* MIDL Version 8.1.620 */',
96                                b'0x801026e, /* MIDL Version 8.1.622 */')
97  open(filename, 'wb').write(contents)
98
99
100def get_tlb_contents(tlb_file):
101  # See ZapTimestamp() for a short overview of the .tlb format.
102  contents = open(tlb_file, 'rb').read()
103  assert contents[0:8] == b'MSFT\x02\x00\x01\x00'
104  ntypes, = struct.unpack_from('<I', contents, 0x20)
105  type_off, type_len = struct.unpack_from('<II', contents, 0x54 + 4*ntypes)
106
107  guid_off, guid_len = struct.unpack_from(
108      '<II', contents, 0x54 + 4*ntypes + 5*16)
109  assert guid_len % 24 == 0
110
111  contents = array.array('B', contents)
112
113  return contents, ntypes, type_off, guid_off, guid_len
114
115
116def recreate_guid_hashtable(contents, ntypes, guid_off, guid_len):
117  # This function is called after changing guids in section 6 (the "guid"
118  # section). This function recreates the GUID hashtable in section 5. Since the
119  # hash table uses chaining, it's easiest to recompute it from scratch rather
120  # than trying to patch it up.
121  hashtab = [0xffffffff] * (0x80 // 4)
122  for guidind in range(guid_off, guid_off + guid_len, 24):
123    guidbytes, typeoff, nextguid = struct.unpack_from(
124        '<16sII', contents, guidind)
125    words = struct.unpack('<8H', guidbytes)
126    # midl seems to use the following simple hash function for GUIDs:
127    guidhash = reduce(operator.xor, [w for w in words]) % (0x80 // 4)
128    nextguid = hashtab[guidhash]
129    struct.pack_into('<I', contents, guidind + 0x14, nextguid)
130    hashtab[guidhash] = guidind - guid_off
131  hash_off, hash_len = struct.unpack_from(
132      '<II', contents, 0x54 + 4*ntypes + 4*16)
133  for i, hashval in enumerate(hashtab):
134    struct.pack_into('<I', contents, hash_off + 4*i, hashval)
135
136
137def overwrite_guids_h(h_file, dynamic_guids):
138  contents = open(h_file, 'rb').read()
139  for key in dynamic_guids:
140    contents = re.sub(key, dynamic_guids[key], contents, flags=re.I)
141  open(h_file, 'wb').write(contents)
142
143
144def get_uuid_format(guid, prefix):
145  formatted_uuid = b'0x%s,0x%s,0x%s,' % (guid[0:8], guid[9:13], guid[14:18])
146  formatted_uuid += b'%s0x%s,0x%s' % (prefix, guid[19:21], guid[21:23])
147  for i in range(24, len(guid), 2):
148    formatted_uuid += b',0x' + guid[i:i + 2]
149  return formatted_uuid
150
151
152def get_uuid_format_iid_file(guid):
153  # Convert from "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83" to
154  # 0xD0E1CACC,0xC63C,0x4192,0x94,0xAB,0xBF,0x8E,0xAD,0x0E,0x3B,0x83.
155  return get_uuid_format(guid, b'')
156
157
158def overwrite_guids_iid(iid_file, dynamic_guids):
159  contents = open(iid_file, 'rb').read()
160  for key in dynamic_guids:
161    contents = re.sub(get_uuid_format_iid_file(key),
162                      get_uuid_format_iid_file(dynamic_guids[key]),
163                      contents,
164                      flags=re.I)
165  open(iid_file, 'wb').write(contents)
166
167
168def get_uuid_format_proxy_file(guid):
169  # Convert from "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83" to
170  # {0xD0E1CACC,0xC63C,0x4192,{0x94,0xAB,0xBF,0x8E,0xAD,0x0E,0x3B,0x83}}.
171  return get_uuid_format(guid, b'{')
172
173
174def overwrite_guids_proxy(proxy_file, dynamic_guids):
175  contents = open(proxy_file, 'rb').read()
176  for key in dynamic_guids:
177    contents = re.sub(get_uuid_format_proxy_file(key),
178                      get_uuid_format_proxy_file(dynamic_guids[key]),
179                      contents,
180                      flags=re.I)
181  open(proxy_file, 'wb').write(contents)
182
183
184def getguid(contents, offset):
185  # Returns a guid string of the form "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83".
186  g0, g1, g2, g3 = struct.unpack_from('<IHH8s', contents, offset)
187  g3 = b''.join([b'%02X' % g for g in bytearray(g3)])
188  return b'%08X-%04X-%04X-%s-%s' % (g0, g1, g2, g3[0:4], g3[4:])
189
190
191def setguid(contents, offset, guid):
192  guid = uuid.UUID(guid.decode('utf-8'))
193  struct.pack_into('<IHH8s', contents, offset,
194                   *(guid.fields[0:3] + (guid.bytes[8:], )))
195
196
197def overwrite_guids_tlb(tlb_file, dynamic_guids):
198  contents, ntypes, type_off, guid_off, guid_len = get_tlb_contents(tlb_file)
199
200  for i in range(0, guid_len, 24):
201    current_guid = getguid(contents, guid_off + i)
202    for key in dynamic_guids:
203      if key.lower() == current_guid.lower():
204        setguid(contents, guid_off + i, dynamic_guids[key])
205
206  recreate_guid_hashtable(contents, ntypes, guid_off, guid_len)
207  open(tlb_file, 'wb').write(contents)
208
209
210# Handle multiple guid substitutions, where |dynamic_guids| is of the form
211# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791="
212# "3d852661-c795-4d20-9b95-5561e9a1d2d9,"
213# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B="
214# "D0E1CACC-C63C-4192-94AB-BF8EAD0E3B83".
215#
216# Before specifying |dynamic_guids| in the build, the IDL file is first compiled
217# with "158428a4-6014-4978-83ba-9fad0dabe791" and
218# "63B8FFB1-5314-48C9-9C57-93EC8BC6184B". These are the "replaceable" guids,
219# i.e., guids that can be replaced in future builds. The resulting MIDL outputs
220# are copied over to src\third_party\win_build_output\.
221#
222# Then, in the future, any changes to these guids can be accomplished by
223# providing |dynamic_guids| of the format above in the build file. These
224# "dynamic" guid changes by themselves will not require the MIDL compiler and
225# therefore will not require copying output over to
226# src\third_party\win_build_output\.
227#
228# The pre-generated src\third_party\win_build_output\ files are used for
229# cross-compiling on other platforms, since the MIDL compiler is Windows-only.
230def overwrite_guids(h_file, iid_file, proxy_file, tlb_file, dynamic_guids):
231  # Fix up GUIDs in .h, _i.c, _p.c, and .tlb.
232  overwrite_guids_h(h_file, dynamic_guids)
233  overwrite_guids_iid(iid_file, dynamic_guids)
234  overwrite_guids_proxy(proxy_file, dynamic_guids)
235  if tlb_file:
236    overwrite_guids_tlb(tlb_file, dynamic_guids)
237
238
239# This function removes all occurrences of 'PLACEHOLDER-GUID-' from the
240# template, and if |dynamic_guids| is specified, also replaces the guids within
241# the file. Finally, it writes the resultant output to the |idl| file.
242def generate_idl_from_template(idl_template, dynamic_guids, idl):
243  contents = open(idl_template, 'rb').read()
244  contents = re.sub(b'PLACEHOLDER-GUID-', b'', contents, flags=re.I)
245  if dynamic_guids:
246    for key in dynamic_guids:
247      contents = re.sub(key, dynamic_guids[key], contents, flags=re.I)
248  open(idl, 'wb').write(contents)
249
250
251# This function runs the MIDL compiler with the provided arguments. It creates
252# and returns a tuple of |0,midl_output_dir| on success.
253def run_midl(args, env_dict):
254  midl_output_dir = tempfile.mkdtemp()
255  delete_midl_output_dir = True
256
257  try:
258    popen = subprocess.Popen(args + ['/out', midl_output_dir],
259                             shell=True,
260                             universal_newlines=True,
261                             env=env_dict,
262                             stdout=subprocess.PIPE,
263                             stderr=subprocess.STDOUT)
264    out, _ = popen.communicate()
265
266    # Filter junk out of stdout, and write filtered versions. Output we want
267    # to filter is pairs of lines that look like this:
268    # Processing C:\Program Files (x86)\Microsoft SDKs\...\include\objidl.idl
269    # objidl.idl
270    lines = out.splitlines()
271    prefixes = ('Processing ', '64 bit Processing ')
272    processing = set(
273        os.path.basename(x) for x in lines if x.startswith(prefixes))
274    for line in lines:
275      if not line.startswith(prefixes) and line not in processing:
276        print(line)
277
278    if popen.returncode != 0:
279      return popen.returncode, midl_output_dir
280
281    for f in os.listdir(midl_output_dir):
282      ZapTimestamp(os.path.join(midl_output_dir, f))
283
284    delete_midl_output_dir = False
285  finally:
286    if os.path.exists(midl_output_dir) and delete_midl_output_dir:
287      shutil.rmtree(midl_output_dir)
288
289  return 0, midl_output_dir
290
291
292# This function adds support for dynamic generation of guids: when values are
293# specified as 'uuid5:name', this function will substitute the values with
294# generated dynamic guids using the uuid5 function. The uuid5 function generates
295# a guid based on the SHA-1 hash of a namespace identifier (which is the guid
296# that comes after 'PLACEHOLDER-GUID-') and a name (which is a string, such as a
297# version string "87.1.2.3").
298#
299# For instance, when |dynamic_guid| is of the form:
300# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791=uuid5:88.0.4307.0
301# ,"
302# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B=uuid5:88.0.4307.0
303# "
304#
305# "PLACEHOLDER-GUID-158428a4-6014-4978-83ba-9fad0dabe791" would be substituted
306# with uuid5("158428a4-6014-4978-83ba-9fad0dabe791", "88.0.4307.0"), which is
307# "64700170-AD80-5DE3-924E-2F39D862CFD5". And
308# "PLACEHOLDER-GUID-63B8FFB1-5314-48C9-9C57-93EC8BC6184B" would be
309# substituted with uuid5("63B8FFB1-5314-48C9-9C57-93EC8BC6184B", "88.0.4307.0"),
310# which is "7B6E7538-3C38-5565-BC92-42BCEE268D76".
311def uuid5_substitutions(dynamic_guids):
312  for key, value in dynamic_guids.items():
313    if value.startswith('uuid5:'):
314      name = value.split('uuid5:', 1)[1]
315      assert name
316      dynamic_guids[key] = str(uuid.uuid5(uuid.UUID(key), name)).upper()
317
318
319def main(arch, gendir, outdir, dynamic_guids, tlb, h, dlldata, iid, proxy,
320         clang, idl, *flags):
321  # Copy checked-in outputs to final location.
322  source = gendir
323  if os.path.isdir(os.path.join(source, os.path.basename(idl))):
324    source = os.path.join(source, os.path.basename(idl))
325  source = os.path.join(source, arch.split('.')[1])  # Append 'x86' or 'x64'.
326  source = os.path.normpath(source)
327
328  source_exists = True
329  if not os.path.isdir(source):
330    source_exists = False
331    if sys.platform != 'win32':
332      print('Directory %s needs to be populated from Windows first' % source)
333      return 1
334
335    # This is a brand new IDL file that does not have outputs under
336    # third_party\win_build_output\midl. We create an empty directory for now.
337    os.makedirs(source)
338
339  common_files = [h, iid]
340  if tlb != 'none':
341    # Not all projects use tlb files.
342    common_files += [tlb]
343  else:
344    tlb = None
345
346  if dlldata != 'none':
347    # Not all projects use dlldta files.
348    common_files += [dlldata]
349  else:
350    dlldata = None
351
352  # Not all projects use proxy files
353  if proxy != 'none':
354    # Not all projects use proxy files.
355    common_files += [proxy]
356  else:
357    proxy = None
358
359  for source_file in common_files:
360    file_path = os.path.join(source, source_file)
361    if not os.path.isfile(file_path):
362      source_exists = False
363      if sys.platform != 'win32':
364        print('File %s needs to be generated from Windows first' % file_path)
365        return 1
366
367      # Either this is a brand new IDL file that does not have outputs under
368      # third_party\win_build_output\midl or the file is (unexpectedly) missing.
369      # We create an empty file for now. The rest of the machinery below will
370      # then generate the correctly populated file using the MIDL compiler and
371      # instruct the developer to copy that file under
372      # third_party\win_build_output\midl.
373      open(file_path, 'wb').close()
374    shutil.copy(file_path, outdir)
375
376  if dynamic_guids != 'none':
377    assert '=' in dynamic_guids
378    if dynamic_guids.startswith("ignore_proxy_stub,"):
379      # TODO(ganesh): The custom proxy/stub file ("_p.c") is not generated
380      # correctly for dynamic IIDs (but correctly if there are only dynamic
381      # CLSIDs). The proxy/stub lookup functions generated by MIDL.exe within
382      # "_p.c" rely on a sorted set of vtable lists, which we are not currently
383      # regenerating. At the moment, no project in Chromium that uses dynamic
384      # IIDs is relying on the custom proxy/stub file. So for now, if
385      # |dynamic_guids| is prefixed with "ignore_proxy_stub,", we exclude the
386      # custom proxy/stub file from the directory comparisons.
387      common_files.remove(proxy)
388      dynamic_guids = dynamic_guids.split("ignore_proxy_stub,", 1)[1]
389    dynamic_guids = re.sub('PLACEHOLDER-GUID-', '', dynamic_guids, flags=re.I)
390    dynamic_guids = dynamic_guids.split(',')
391    dynamic_guids = dict(s.split('=') for s in dynamic_guids)
392    uuid5_substitutions(dynamic_guids)
393    dynamic_guids_bytes = {
394        k.encode('utf-8'): v.encode('utf-8')
395        for k, v in dynamic_guids.items()
396    }
397    if source_exists:
398      overwrite_guids(*(os.path.join(outdir, file) if file else None
399                        for file in [h, iid, proxy, tlb]),
400                      dynamic_guids=dynamic_guids_bytes)
401  else:
402    dynamic_guids = None
403
404  # On non-Windows, that's all we can do.
405  if sys.platform != 'win32':
406    return 0
407
408  idl_template = None
409  if dynamic_guids:
410    idl_template = idl
411
412    # posixpath is used here to keep the MIDL-generated files with a uniform
413    # separator of '/' instead of mixed '/' and '\\'.
414    idl = posixpath.join(
415        outdir,
416        os.path.splitext(os.path.basename(idl_template))[0] + '.idl')
417
418    # |idl_template| can contain one or more occurrences of guids that are
419    # substituted with |dynamic_guids|, and then MIDL is run on the substituted
420    # IDL file.
421    generate_idl_from_template(idl_template, dynamic_guids_bytes, idl)
422
423  # On Windows, run midl.exe on the input and check that its outputs are
424  # identical to the checked-in outputs (after replacing guids if
425  # |dynamic_guids| is specified).
426
427  # Read the environment block from the file. This is stored in the format used
428  # by CreateProcess. Drop last 2 NULs, one for list terminator, one for
429  # trailing vs. separator.
430  env_pairs = open(arch).read()[:-2].split('\0')
431  env_dict = dict([item.split('=', 1) for item in env_pairs])
432
433  # Extract the /D options and send them to the preprocessor.
434  preprocessor_options = '-E -nologo -Wno-nonportable-include-path'
435  preprocessor_options += ''.join(
436      [' ' + flag for flag in flags if flag.startswith('/D')])
437  args = ['midl', '/nologo'] + list(flags) + (['/tlb', tlb] if tlb else []) + [
438      '/h', h
439  ] + (['/dlldata', dlldata] if dlldata else []) + ['/iid', iid] + (
440      ['/proxy', proxy] if proxy else
441      []) + ['/cpp_cmd', clang, '/cpp_opt', preprocessor_options, idl]
442
443  returncode, midl_output_dir = run_midl(args, env_dict)
444  if returncode != 0:
445    return returncode
446
447  # Now compare the output in midl_output_dir to the copied-over outputs.
448  _, mismatch, errors = filecmp.cmpfiles(midl_output_dir, outdir, common_files)
449  assert not errors
450
451  if mismatch:
452    print('midl.exe output different from files in %s, see %s' %
453          (outdir, midl_output_dir))
454    for f in mismatch:
455      if f.endswith('.tlb'): continue
456      fromfile = os.path.join(outdir, f)
457      tofile = os.path.join(midl_output_dir, f)
458      print(''.join(
459          difflib.unified_diff(
460              io.open(fromfile).readlines(),
461              io.open(tofile).readlines(), fromfile, tofile)))
462
463    if dynamic_guids:
464      # |idl_template| can contain one or more occurrences of guids prefixed
465      # with 'PLACEHOLDER-GUID-'. We first remove the extraneous
466      # 'PLACEHOLDER-GUID-' prefix and then run MIDL on the substituted IDL
467      # file.
468      # No guid substitutions are done at this point, because we want to compile
469      # with the placeholder guids and then instruct the user to copy the output
470      # over to |source| which is typically src\third_party\win_build_output\.
471      # In future runs, the placeholder guids in |source| are replaced with the
472      # guids specified in |dynamic_guids|.
473      generate_idl_from_template(idl_template, None, idl)
474      returncode, midl_output_dir = run_midl(args, env_dict)
475      if returncode != 0:
476        return returncode
477
478    print('To rebaseline:')
479    print(r'  copy /y %s\* %s' % (midl_output_dir, source))
480    return 1
481
482  return 0
483
484
485if __name__ == '__main__':
486  sys.exit(main(*sys.argv[1:]))
487