xref: /aosp_15_r20/external/boringssl/src/util/bot/extract.py (revision 8fb009dc861624b67b6cdb62ea21f0f22d0c584b)
1# Copyright (c) 2015, Google Inc.
2#
3# Permission to use, copy, modify, and/or distribute this software for any
4# purpose with or without fee is hereby granted, provided that the above
5# copyright notice and this permission notice appear in all copies.
6#
7# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15"""Extracts archives."""
16
17
18import hashlib
19import optparse
20import os
21import os.path
22import tarfile
23import shutil
24import sys
25import zipfile
26
27
28def CheckedJoin(output, path):
29  """
30  CheckedJoin returns os.path.join(output, path). It does sanity checks to
31  ensure the resulting path is under output, but shouldn't be used on untrusted
32  input.
33  """
34  path = os.path.normpath(path)
35  if os.path.isabs(path) or path.startswith('.'):
36    raise ValueError(path)
37  return os.path.join(output, path)
38
39
40class FileEntry(object):
41  def __init__(self, path, mode, fileobj):
42    self.path = path
43    self.mode = mode
44    self.fileobj = fileobj
45
46
47class SymlinkEntry(object):
48  def __init__(self, path, mode, target):
49    self.path = path
50    self.mode = mode
51    self.target = target
52
53
54def IterateZip(path):
55  """
56  IterateZip opens the zip file at path and returns a generator of entry objects
57  for each file in it.
58  """
59  with zipfile.ZipFile(path, 'r') as zip_file:
60    for info in zip_file.infolist():
61      if info.filename.endswith('/'):
62        continue
63      yield FileEntry(info.filename, None, zip_file.open(info))
64
65
66def IterateTar(path, compression):
67  """
68  IterateTar opens the tar.gz or tar.bz2 file at path and returns a generator of
69  entry objects for each file in it.
70  """
71  with tarfile.open(path, 'r:' + compression) as tar_file:
72    for info in tar_file:
73      if info.isdir():
74        pass
75      elif info.issym():
76        yield SymlinkEntry(info.name, None, info.linkname)
77      elif info.isfile():
78        yield FileEntry(info.name, info.mode, tar_file.extractfile(info))
79      else:
80        raise ValueError('Unknown entry type "%s"' % (info.name, ))
81
82
83def main(args):
84  parser = optparse.OptionParser(usage='Usage: %prog ARCHIVE OUTPUT')
85  parser.add_option('--no-prefix', dest='no_prefix', action='store_true',
86                    help='Do not remove a prefix from paths in the archive.')
87  options, args = parser.parse_args(args)
88
89  if len(args) != 2:
90    parser.print_help()
91    return 1
92
93  archive, output = args
94
95  if not os.path.exists(archive):
96    # Skip archives that weren't downloaded.
97    return 0
98
99  with open(archive, 'rb') as f:
100    sha256 = hashlib.sha256()
101    while True:
102      chunk = f.read(1024 * 1024)
103      if not chunk:
104        break
105      sha256.update(chunk)
106    digest = sha256.hexdigest()
107
108  stamp_path = os.path.join(output, ".boringssl_archive_digest")
109  if os.path.exists(stamp_path):
110    with open(stamp_path) as f:
111      if f.read().strip() == digest:
112        print("Already up-to-date.")
113        return 0
114
115  if archive.endswith('.zip'):
116    entries = IterateZip(archive)
117  elif archive.endswith('.tar.gz'):
118    entries = IterateTar(archive, 'gz')
119  elif archive.endswith('.tar.bz2'):
120    entries = IterateTar(archive, 'bz2')
121  elif archive.endswith('.tar.xz'):
122    entries = IterateTar(archive, 'xz')
123  else:
124    raise ValueError(archive)
125
126  try:
127    if os.path.exists(output):
128      print("Removing %s" % (output, ))
129      shutil.rmtree(output)
130
131    print("Extracting %s to %s" % (archive, output))
132    prefix = None
133    num_extracted = 0
134    for entry in entries:
135      # Even on Windows, zip files must always use forward slashes.
136      if '\\' in entry.path or entry.path.startswith('/'):
137        raise ValueError(entry.path)
138
139      if not options.no_prefix:
140        new_prefix, rest = entry.path.split('/', 1)
141
142        # Ensure the archive is consistent.
143        if prefix is None:
144          prefix = new_prefix
145        if prefix != new_prefix:
146          raise ValueError((prefix, new_prefix))
147      else:
148        rest = entry.path
149
150      # Extract the file into the output directory.
151      fixed_path = CheckedJoin(output, rest)
152      if not os.path.isdir(os.path.dirname(fixed_path)):
153        os.makedirs(os.path.dirname(fixed_path))
154      if isinstance(entry, FileEntry):
155        with open(fixed_path, 'wb') as out:
156          shutil.copyfileobj(entry.fileobj, out)
157      elif isinstance(entry, SymlinkEntry):
158        os.symlink(entry.target, fixed_path)
159      else:
160        raise TypeError('unknown entry type')
161
162      # Fix up permissions if needbe.
163      # TODO(davidben): To be extra tidy, this should only track the execute bit
164      # as in git.
165      if entry.mode is not None:
166        os.chmod(fixed_path, entry.mode)
167
168      # Print every 100 files, so bots do not time out on large archives.
169      num_extracted += 1
170      if num_extracted % 100 == 0:
171        print("Extracted %d files..." % (num_extracted,))
172  finally:
173    entries.close()
174
175  with open(stamp_path, 'w') as f:
176    f.write(digest)
177
178  print("Done. Extracted %d files." % (num_extracted,))
179  return 0
180
181
182if __name__ == '__main__':
183  sys.exit(main(sys.argv[1:]))
184