xref: /aosp_15_r20/external/angle/build/toolchain/win/ml.py (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1#!/usr/bin/env python3
2# Copyright 2018 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Wraps ml.exe or ml64.exe and postprocesses the output to be deterministic.
6Sets timestamp in .obj file to 0, hence incompatible with link.exe /incremental.
7
8Use by prefixing the ml(64).exe invocation with this script:
9    python ml.py ml.exe [args...]"""
10
11import array
12import collections
13import struct
14import subprocess
15import sys
16
17
18class Struct(object):
19  """A thin wrapper around the struct module that returns a namedtuple"""
20
21  def __init__(self, name, *args):
22    """Pass the name of the return type, and then an interleaved list of
23    format strings as used by the struct module and of field names."""
24    self.fmt = '<' + ''.join(args[0::2])
25    self.type = collections.namedtuple(name, args[1::2])
26
27  def pack_into(self, buffer, offset, data):
28    return struct.pack_into(self.fmt, buffer, offset, *data)
29
30  def unpack_from(self, buffer, offset=0):
31    return self.type(*struct.unpack_from(self.fmt, buffer, offset))
32
33  def size(self):
34    return struct.calcsize(self.fmt)
35
36
37def Subtract(nt, **kwargs):
38  """Subtract(nt, f=2) returns a new namedtuple with 2 subtracted from nt.f"""
39  return nt._replace(**{k: getattr(nt, k) - v for k, v in kwargs.items()})
40
41
42def MakeDeterministic(objdata):
43  # Takes data produced by ml(64).exe (without any special flags) and
44  # 1. Sets the timestamp to 0
45  # 2. Strips the .debug$S section (which contains an unwanted absolute path)
46
47  # This makes several assumptions about ml's output:
48  # - Section data is in the same order as the corresponding section headers:
49  #   section headers preceding the .debug$S section header have their data
50  #   preceding the .debug$S section data; likewise for section headers
51  #   following the .debug$S section.
52  # - The .debug$S section contains only the absolute path to the obj file and
53  #   nothing else, in particular there's only a single entry in the symbol
54  #   table referring to the .debug$S section.
55  # - There are no COFF line number entries.
56  # - There's no IMAGE_SYM_CLASS_CLR_TOKEN symbol.
57  # These seem to hold in practice; if they stop holding this script needs to
58  # become smarter.
59
60  objdata = array.array('b', objdata)  # Writable, e.g. via struct.pack_into.
61
62  # Read coff header.
63  COFFHEADER = Struct('COFFHEADER', 'H', 'Machine', 'H', 'NumberOfSections',
64                      'I', 'TimeDateStamp', 'I', 'PointerToSymbolTable', 'I',
65                      'NumberOfSymbols', 'H', 'SizeOfOptionalHeader', 'H',
66                      'Characteristics')
67  coff_header = COFFHEADER.unpack_from(objdata)
68  assert coff_header.SizeOfOptionalHeader == 0  # Only set for binaries.
69
70  # Read section headers following coff header.
71  SECTIONHEADER = Struct('SECTIONHEADER', '8s', 'Name', 'I', 'VirtualSize', 'I',
72                         'VirtualAddress', 'I', 'SizeOfRawData', 'I',
73                         'PointerToRawData', 'I', 'PointerToRelocations', 'I',
74                         'PointerToLineNumbers', 'H', 'NumberOfRelocations',
75                         'H', 'NumberOfLineNumbers', 'I', 'Characteristics')
76  section_headers = []
77  debug_section_index = -1
78  for i in range(0, coff_header.NumberOfSections):
79    section_header = SECTIONHEADER.unpack_from(objdata,
80                                               offset=COFFHEADER.size() +
81                                               i * SECTIONHEADER.size())
82    assert not section_header[0].startswith(b'/')  # Support short names only.
83    section_headers.append(section_header)
84
85    if section_header.Name == b'.debug$S':
86      assert debug_section_index == -1
87      debug_section_index = i
88  assert debug_section_index != -1
89
90  data_start = COFFHEADER.size() + len(section_headers) * SECTIONHEADER.size()
91
92  # Verify the .debug$S section looks like we expect.
93  assert section_headers[debug_section_index].Name == b'.debug$S'
94  assert section_headers[debug_section_index].VirtualSize == 0
95  assert section_headers[debug_section_index].VirtualAddress == 0
96  debug_size = section_headers[debug_section_index].SizeOfRawData
97  debug_offset = section_headers[debug_section_index].PointerToRawData
98  assert section_headers[debug_section_index].PointerToRelocations == 0
99  assert section_headers[debug_section_index].PointerToLineNumbers == 0
100  assert section_headers[debug_section_index].NumberOfRelocations == 0
101  assert section_headers[debug_section_index].NumberOfLineNumbers == 0
102
103  # Make sure sections in front of .debug$S have their data preceding it.
104  for header in section_headers[:debug_section_index]:
105    assert header.PointerToRawData < debug_offset
106    assert header.PointerToRelocations < debug_offset
107    assert header.PointerToLineNumbers < debug_offset
108
109  # Make sure sections after of .debug$S have their data following it.
110  for header in section_headers[debug_section_index + 1:]:
111    # Make sure the .debug$S data is at the very end of section data:
112    assert header.PointerToRawData > debug_offset
113    assert header.PointerToRelocations == 0
114    assert header.PointerToLineNumbers == 0
115
116  # Make sure the first non-empty section's data starts right after the section
117  # headers.
118  for section_header in section_headers:
119    if section_header.PointerToRawData == 0:
120      assert section_header.PointerToRelocations == 0
121      assert section_header.PointerToLineNumbers == 0
122      continue
123    assert section_header.PointerToRawData == data_start
124    break
125
126  # Make sure the symbol table (and hence, string table) appear after the last
127  # section:
128  assert (
129      coff_header.PointerToSymbolTable >=
130      section_headers[-1].PointerToRawData + section_headers[-1].SizeOfRawData)
131
132  # The symbol table contains a symbol for the no-longer-present .debug$S
133  # section. If we leave it there, lld-link will complain:
134  #
135  #    lld-link: error: .debug$S should not refer to non-existent section 5
136  #
137  # so we need to remove that symbol table entry as well. This shifts symbol
138  # entries around and we need to update symbol table indices in:
139  # - relocations
140  # - line number records (never present)
141  # - one aux symbol entry (IMAGE_SYM_CLASS_CLR_TOKEN; not present in ml output)
142  SYM = Struct(
143      'SYM',
144      '8s',
145      'Name',
146      'I',
147      'Value',
148      'h',
149      'SectionNumber',  # Note: Signed!
150      'H',
151      'Type',
152      'B',
153      'StorageClass',
154      'B',
155      'NumberOfAuxSymbols')
156  i = 0
157  debug_sym = -1
158  while i < coff_header.NumberOfSymbols:
159    sym_offset = coff_header.PointerToSymbolTable + i * SYM.size()
160    sym = SYM.unpack_from(objdata, sym_offset)
161
162    # 107 is IMAGE_SYM_CLASS_CLR_TOKEN, which has aux entry "CLR Token
163    # Definition", which contains a symbol index. Check it's never present.
164    assert sym.StorageClass != 107
165
166    # Note: sym.SectionNumber is 1-based, debug_section_index is 0-based.
167    if sym.SectionNumber - 1 == debug_section_index:
168      assert debug_sym == -1, 'more than one .debug$S symbol found'
169      debug_sym = i
170      # Make sure the .debug$S symbol looks like we expect.
171      # In particular, it should have exactly one aux symbol.
172      assert sym.Name == b'.debug$S'
173      assert sym.Value == 0
174      assert sym.Type == 0
175      assert sym.StorageClass == 3
176      assert sym.NumberOfAuxSymbols == 1
177    elif sym.SectionNumber > debug_section_index:
178      sym = Subtract(sym, SectionNumber=1)
179      SYM.pack_into(objdata, sym_offset, sym)
180    i += 1 + sym.NumberOfAuxSymbols
181  assert debug_sym != -1, '.debug$S symbol not found'
182
183  # Note: Usually the .debug$S section is the last, but for files saying
184  # `includelib foo.lib`, like safe_terminate_process.asm in 32-bit builds,
185  # this isn't true: .drectve is after .debug$S.
186
187  # Update symbol table indices in relocations.
188  # There are a few processor types that have one or two relocation types
189  # where SymbolTableIndex has a different meaning, but not for x86.
190  REL = Struct('REL', 'I', 'VirtualAddress', 'I', 'SymbolTableIndex', 'H',
191               'Type')
192  for header in section_headers[0:debug_section_index]:
193    for j in range(0, header.NumberOfRelocations):
194      rel_offset = header.PointerToRelocations + j * REL.size()
195      rel = REL.unpack_from(objdata, rel_offset)
196      assert rel.SymbolTableIndex != debug_sym
197      if rel.SymbolTableIndex > debug_sym:
198        rel = Subtract(rel, SymbolTableIndex=2)
199        REL.pack_into(objdata, rel_offset, rel)
200
201  # Update symbol table indices in line numbers -- just check they don't exist.
202  for header in section_headers:
203    assert header.NumberOfLineNumbers == 0
204
205  # Now that all indices are updated, remove the symbol table entry referring to
206  # .debug$S and its aux entry.
207  del objdata[coff_header.PointerToSymbolTable +
208              debug_sym * SYM.size():coff_header.PointerToSymbolTable +
209              (debug_sym + 2) * SYM.size()]
210
211  # Now we know that it's safe to write out the input data, with just the
212  # timestamp overwritten to 0, the last section header cut out (and the
213  # offsets of all other section headers decremented by the size of that
214  # one section header), and the last section's data cut out. The symbol
215  # table offset needs to be reduced by one section header and the size of
216  # the missing section.
217  # (The COFF spec only requires on-disk sections to be aligned in image files,
218  # for obj files it's not required. If that wasn't the case, deleting slices
219  # if data would not generally be safe.)
220
221  # Update section offsets and remove .debug$S section data.
222  for i in range(0, debug_section_index):
223    header = section_headers[i]
224    if header.SizeOfRawData:
225      header = Subtract(header, PointerToRawData=SECTIONHEADER.size())
226    if header.NumberOfRelocations:
227      header = Subtract(header, PointerToRelocations=SECTIONHEADER.size())
228    if header.NumberOfLineNumbers:
229      header = Subtract(header, PointerToLineNumbers=SECTIONHEADER.size())
230    SECTIONHEADER.pack_into(objdata,
231                            COFFHEADER.size() + i * SECTIONHEADER.size(),
232                            header)
233  for i in range(debug_section_index + 1, len(section_headers)):
234    header = section_headers[i]
235    shift = SECTIONHEADER.size() + debug_size
236    if header.SizeOfRawData:
237      header = Subtract(header, PointerToRawData=shift)
238    if header.NumberOfRelocations:
239      header = Subtract(header, PointerToRelocations=shift)
240    if header.NumberOfLineNumbers:
241      header = Subtract(header, PointerToLineNumbers=shift)
242    SECTIONHEADER.pack_into(objdata,
243                            COFFHEADER.size() + i * SECTIONHEADER.size(),
244                            header)
245
246  del objdata[debug_offset:debug_offset + debug_size]
247
248  # Finally, remove .debug$S section header and update coff header.
249  coff_header = coff_header._replace(TimeDateStamp=0)
250  coff_header = Subtract(coff_header,
251                         NumberOfSections=1,
252                         PointerToSymbolTable=SECTIONHEADER.size() + debug_size,
253                         NumberOfSymbols=2)
254  COFFHEADER.pack_into(objdata, 0, coff_header)
255
256  del objdata[COFFHEADER.size() +
257              debug_section_index * SECTIONHEADER.size():COFFHEADER.size() +
258              (debug_section_index + 1) * SECTIONHEADER.size()]
259
260  # All done!
261  if sys.version_info.major == 2:
262    return objdata.tostring()
263  else:
264    return objdata.tobytes()
265
266
267def main():
268  ml_result = subprocess.call(sys.argv[1:])
269  if ml_result != 0:
270    return ml_result
271
272  objfile = None
273  for i in range(1, len(sys.argv)):
274    if sys.argv[i].startswith('/Fo'):
275      objfile = sys.argv[i][len('/Fo'):]
276  assert objfile, 'failed to find ml output'
277
278  with open(objfile, 'rb') as f:
279    objdata = f.read()
280  objdata = MakeDeterministic(objdata)
281  with open(objfile, 'wb') as f:
282    f.write(objdata)
283
284
285if __name__ == '__main__':
286  sys.exit(main())
287