xref: /aosp_15_r20/external/bazelbuild-rules_python/python/private/python_bootstrap_template.txt (revision 60517a1edbc8ecf509223e9af94a7adec7d736b8)
1%shebang%
2
3# This script must retain compatibility with a wide variety of Python versions
4# since it is run for every py_binary target. Currently we guarantee support
5# going back to Python 2.7, and try to support even Python 2.6 on a best-effort
6# basis. We might abandon 2.6 support once users have the ability to control the
7# above shebang string via the Python toolchain (#8685).
8
9from __future__ import absolute_import
10from __future__ import division
11from __future__ import print_function
12
13import sys
14
15# The Python interpreter unconditionally prepends the directory containing this
16# script (following symlinks) to the import path. This is the cause of #9239,
17# and is a special case of #7091. We therefore explicitly delete that entry.
18# TODO(#7091): Remove this hack when no longer necessary.
19del sys.path[0]
20
21import os
22import subprocess
23import uuid
24
25def IsRunningFromZip():
26  return %is_zipfile%
27
28if IsRunningFromZip():
29  import shutil
30  import tempfile
31  import zipfile
32else:
33  import re
34
35# Return True if running on Windows
36def IsWindows():
37  return os.name == 'nt'
38
39def GetWindowsPathWithUNCPrefix(path):
40  """Adds UNC prefix after getting a normalized absolute Windows path.
41
42  No-op for non-Windows platforms or if running under python2.
43  """
44  path = path.strip()
45
46  # No need to add prefix for non-Windows platforms.
47  # And \\?\ doesn't work in python 2 or on mingw
48  if not IsWindows() or sys.version_info[0] < 3:
49    return path
50
51  # Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been
52  # removed from common Win32 file and directory functions.
53  # Related doc: https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd#enable-long-paths-in-windows-10-version-1607-and-later
54  import platform
55  if platform.win32_ver()[1] >= '10.0.14393':
56    return path
57
58  # import sysconfig only now to maintain python 2.6 compatibility
59  import sysconfig
60  if sysconfig.get_platform() == 'mingw':
61    return path
62
63  # Lets start the unicode fun
64  unicode_prefix = '\\\\?\\'
65  if path.startswith(unicode_prefix):
66    return path
67
68  # os.path.abspath returns a normalized absolute path
69  return unicode_prefix + os.path.abspath(path)
70
71def HasWindowsExecutableExtension(path):
72  return path.endswith('.exe') or path.endswith('.com') or path.endswith('.bat')
73
74PYTHON_BINARY = '%python_binary%'
75if IsWindows() and not HasWindowsExecutableExtension(PYTHON_BINARY):
76  PYTHON_BINARY = PYTHON_BINARY + '.exe'
77
78def SearchPath(name):
79  """Finds a file in a given search path."""
80  search_path = os.getenv('PATH', os.defpath).split(os.pathsep)
81  for directory in search_path:
82    if directory:
83      path = os.path.join(directory, name)
84      if os.path.isfile(path) and os.access(path, os.X_OK):
85        return path
86  return None
87
88def FindPythonBinary(module_space):
89  """Finds the real Python binary if it's not a normal absolute path."""
90  return FindBinary(module_space, PYTHON_BINARY)
91
92def PrintVerbose(*args):
93  if os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE"):
94    print("bootstrap:", *args, file=sys.stderr, flush=True)
95
96def PrintVerboseCoverage(*args):
97  """Print output if VERBOSE_COVERAGE is non-empty in the environment."""
98  if os.environ.get("VERBOSE_COVERAGE"):
99    print(*args, file=sys.stderr)
100
101def IsVerboseCoverage():
102  """Returns True if VERBOSE_COVERAGE is non-empty in the environment."""
103  return os.environ.get("VERBOSE_COVERAGE")
104
105def FindCoverageEntryPoint(module_space):
106  cov_tool = '%coverage_tool%'
107  if cov_tool:
108    PrintVerboseCoverage('Using toolchain coverage_tool %r' % cov_tool)
109  else:
110    cov_tool = os.environ.get('PYTHON_COVERAGE')
111    if cov_tool:
112      PrintVerboseCoverage('PYTHON_COVERAGE: %r' % cov_tool)
113  if cov_tool:
114    return FindBinary(module_space, cov_tool)
115  return None
116
117def FindBinary(module_space, bin_name):
118  """Finds the real binary if it's not a normal absolute path."""
119  if not bin_name:
120    return None
121  if bin_name.startswith("//"):
122    # Case 1: Path is a label. Not supported yet.
123    raise AssertionError(
124        "Bazel does not support execution of Python interpreters via labels yet"
125    )
126  elif os.path.isabs(bin_name):
127    # Case 2: Absolute path.
128    return bin_name
129  # Use normpath() to convert slashes to os.sep on Windows.
130  elif os.sep in os.path.normpath(bin_name):
131    # Case 3: Path is relative to the repo root.
132    return os.path.join(module_space, bin_name)
133  else:
134    # Case 4: Path has to be looked up in the search path.
135    return SearchPath(bin_name)
136
137def CreatePythonPathEntries(python_imports, module_space):
138  parts = python_imports.split(':')
139  return [module_space] + ['%s/%s' % (module_space, path) for path in parts]
140
141def FindModuleSpace(main_rel_path):
142  """Finds the runfiles tree."""
143  # When the calling process used the runfiles manifest to resolve the
144  # location of this stub script, the path may be expanded. This means
145  # argv[0] may no longer point to a location inside the runfiles
146  # directory. We should therefore respect RUNFILES_DIR and
147  # RUNFILES_MANIFEST_FILE set by the caller.
148  runfiles_dir = os.environ.get('RUNFILES_DIR', None)
149  if not runfiles_dir:
150    runfiles_manifest_file = os.environ.get('RUNFILES_MANIFEST_FILE', '')
151    if (runfiles_manifest_file.endswith('.runfiles_manifest') or
152        runfiles_manifest_file.endswith('.runfiles/MANIFEST')):
153      runfiles_dir = runfiles_manifest_file[:-9]
154  # Be defensive: the runfiles dir should contain our main entry point. If
155  # it doesn't, then it must not be our runfiles directory.
156  if runfiles_dir and os.path.exists(os.path.join(runfiles_dir, main_rel_path)):
157    return runfiles_dir
158
159  stub_filename = sys.argv[0]
160  if not os.path.isabs(stub_filename):
161    stub_filename = os.path.join(os.getcwd(), stub_filename)
162
163  while True:
164    module_space = stub_filename + ('.exe' if IsWindows() else '') + '.runfiles'
165    if os.path.isdir(module_space):
166      return module_space
167
168    runfiles_pattern = r'(.*\.runfiles)' + (r'\\' if IsWindows() else '/') + '.*'
169    matchobj = re.match(runfiles_pattern, stub_filename)
170    if matchobj:
171      return matchobj.group(1)
172
173    if not os.path.islink(stub_filename):
174      break
175    target = os.readlink(stub_filename)
176    if os.path.isabs(target):
177      stub_filename = target
178    else:
179      stub_filename = os.path.join(os.path.dirname(stub_filename), target)
180
181  raise AssertionError('Cannot find .runfiles directory for %s' % sys.argv[0])
182
183def ExtractZip(zip_path, dest_dir):
184  """Extracts the contents of a zip file, preserving the unix file mode bits.
185
186  These include the permission bits, and in particular, the executable bit.
187
188  Ideally the zipfile module should set these bits, but it doesn't. See:
189  https://bugs.python.org/issue15795.
190
191  Args:
192      zip_path: The path to the zip file to extract
193      dest_dir: The path to the destination directory
194  """
195  zip_path = GetWindowsPathWithUNCPrefix(zip_path)
196  dest_dir = GetWindowsPathWithUNCPrefix(dest_dir)
197  with zipfile.ZipFile(zip_path) as zf:
198    for info in zf.infolist():
199      zf.extract(info, dest_dir)
200      # UNC-prefixed paths must be absolute/normalized. See
201      # https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file#maximum-path-length-limitation
202      file_path = os.path.abspath(os.path.join(dest_dir, info.filename))
203      # The Unix st_mode bits (see "man 7 inode") are stored in the upper 16
204      # bits of external_attr. Of those, we set the lower 12 bits, which are the
205      # file mode bits (since the file type bits can't be set by chmod anyway).
206      attrs = info.external_attr >> 16
207      if attrs != 0:  # Rumor has it these can be 0 for zips created on Windows.
208        os.chmod(file_path, attrs & 0o7777)
209
210# Create the runfiles tree by extracting the zip file
211def CreateModuleSpace():
212  temp_dir = tempfile.mkdtemp('', 'Bazel.runfiles_')
213  ExtractZip(os.path.dirname(__file__), temp_dir)
214  # IMPORTANT: Later code does `rm -fr` on dirname(module_space) -- it's
215  # important that deletion code be in sync with this directory structure
216  return os.path.join(temp_dir, 'runfiles')
217
218# Returns repository roots to add to the import path.
219def GetRepositoriesImports(module_space, import_all):
220  if import_all:
221    repo_dirs = [os.path.join(module_space, d) for d in os.listdir(module_space)]
222    repo_dirs.sort()
223    return [d for d in repo_dirs if os.path.isdir(d)]
224  return [os.path.join(module_space, '%workspace_name%')]
225
226def RunfilesEnvvar(module_space):
227  """Finds the runfiles manifest or the runfiles directory.
228
229  Returns:
230    A tuple of (var_name, var_value) where var_name is either 'RUNFILES_DIR' or
231    'RUNFILES_MANIFEST_FILE' and var_value is the path to that directory or
232    file, or (None, None) if runfiles couldn't be found.
233  """
234  # If this binary is the data-dependency of another one, the other sets
235  # RUNFILES_MANIFEST_FILE or RUNFILES_DIR for our sake.
236  runfiles = os.environ.get('RUNFILES_MANIFEST_FILE', None)
237  if runfiles:
238    return ('RUNFILES_MANIFEST_FILE', runfiles)
239
240  runfiles = os.environ.get('RUNFILES_DIR', None)
241  if runfiles:
242    return ('RUNFILES_DIR', runfiles)
243
244  # If running from a zip, there's no manifest file.
245  if IsRunningFromZip():
246    return ('RUNFILES_DIR', module_space)
247
248  # Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest"
249  runfiles = module_space + '_manifest'
250  if os.path.exists(runfiles):
251    return ('RUNFILES_MANIFEST_FILE', runfiles)
252
253  # Look for the runfiles "input" manifest, argv[0] + ".runfiles/MANIFEST"
254  # Normally .runfiles_manifest and MANIFEST are both present, but the
255  # former will be missing for zip-based builds or if someone copies the
256  # runfiles tree elsewhere.
257  runfiles = os.path.join(module_space, 'MANIFEST')
258  if os.path.exists(runfiles):
259    return ('RUNFILES_MANIFEST_FILE', runfiles)
260
261  # If running in a sandbox and no environment variables are set, then
262  # Look for the runfiles  next to the binary.
263  if module_space.endswith('.runfiles') and os.path.isdir(module_space):
264    return ('RUNFILES_DIR', module_space)
265
266  return (None, None)
267
268def Deduplicate(items):
269  """Efficiently filter out duplicates, keeping the first element only."""
270  seen = set()
271  for it in items:
272      if it not in seen:
273          seen.add(it)
274          yield it
275
276def InstrumentedFilePaths():
277  """Yields tuples of realpath of each instrumented file with the relative path."""
278  manifest_filename = os.environ.get('COVERAGE_MANIFEST')
279  if not manifest_filename:
280    return
281  with open(manifest_filename, "r") as manifest:
282    for line in manifest:
283      filename = line.strip()
284      if not filename:
285        continue
286      try:
287        realpath = os.path.realpath(filename)
288      except OSError:
289        print(
290          "Could not find instrumented file {}".format(filename),
291          file=sys.stderr)
292        continue
293      if realpath != filename:
294        PrintVerboseCoverage("Fixing up {} -> {}".format(realpath, filename))
295        yield (realpath, filename)
296
297def UnresolveSymlinks(output_filename):
298  # type: (str) -> None
299  """Replace realpath of instrumented files with the relative path in the lcov output.
300
301  Though we are asking coveragepy to use relative file names, currently
302  ignore that for purposes of generating the lcov report (and other reports
303  which are not the XML report), so we need to go and fix up the report.
304
305  This function is a workaround for that issue. Once that issue is fixed
306  upstream and the updated version is widely in use, this should be removed.
307
308  See https://github.com/nedbat/coveragepy/issues/963.
309  """
310  substitutions = list(InstrumentedFilePaths())
311  if substitutions:
312    unfixed_file = output_filename + '.tmp'
313    os.rename(output_filename, unfixed_file)
314    with open(unfixed_file, "r") as unfixed:
315      with open(output_filename, "w") as output_file:
316        for line in unfixed:
317          if line.startswith('SF:'):
318            for (realpath, filename) in substitutions:
319              line = line.replace(realpath, filename)
320          output_file.write(line)
321    os.unlink(unfixed_file)
322
323def ExecuteFile(python_program, main_filename, args, env, module_space,
324                coverage_entrypoint, workspace, delete_module_space):
325  # type: (str, str, list[str], dict[str, str], str, str|None, str|None) -> ...
326  """Executes the given Python file using the various environment settings.
327
328  This will not return, and acts much like os.execv, except is much
329  more restricted, and handles Bazel-related edge cases.
330
331  Args:
332    python_program: (str) Path to the Python binary to use for execution
333    main_filename: (str) The Python file to execute
334    args: (list[str]) Additional args to pass to the Python file
335    env: (dict[str, str]) A dict of environment variables to set for the execution
336    module_space: (str) Path to the module space/runfiles tree directory
337    coverage_entrypoint: (str|None) Path to the coverage tool entry point file.
338    workspace: (str|None) Name of the workspace to execute in. This is expected to be a
339        directory under the runfiles tree.
340    delete_module_space: (bool), True if the module space should be deleted
341        after a successful (exit code zero) program run, False if not.
342  """
343  # We want to use os.execv instead of subprocess.call, which causes
344  # problems with signal passing (making it difficult to kill
345  # Bazel). However, these conditions force us to run via
346  # subprocess.call instead:
347  #
348  # - On Windows, os.execv doesn't handle arguments with spaces
349  #   correctly, and it actually starts a subprocess just like
350  #   subprocess.call.
351  # - When running in a workspace or zip file, we need to clean up the
352  #   workspace after the process finishes so control must return here.
353  # - If we may need to emit a host config warning after execution, we
354  #   can't execv because we need control to return here. This only
355  #   happens for targets built in the host config.
356  # - For coverage targets, at least coveragepy requires running in
357  #   two invocations, which also requires control to return here.
358  #
359  if not (IsWindows() or workspace or coverage_entrypoint or delete_module_space):
360    _RunExecv(python_program, main_filename, args, env)
361
362  if coverage_entrypoint is not None:
363    ret_code = _RunForCoverage(python_program, main_filename, args, env,
364                               coverage_entrypoint, workspace)
365  else:
366    ret_code = subprocess.call(
367      [python_program, main_filename] + args,
368      env=env,
369      cwd=workspace
370    )
371
372  if delete_module_space:
373    # NOTE: dirname() is called because CreateModuleSpace() creates a
374    # sub-directory within a temporary directory, and we want to remove the
375    # whole temporary directory.
376    shutil.rmtree(os.path.dirname(module_space), True)
377  sys.exit(ret_code)
378
379def _RunExecv(python_program, main_filename, args, env):
380  # type: (str, str, list[str], dict[str, str]) -> ...
381  """Executes the given Python file using the various environment settings."""
382  os.environ.update(env)
383  PrintVerbose("RunExecv: environ:", os.environ)
384  argv = [python_program, main_filename] + args
385  PrintVerbose("RunExecv: argv:", python_program, argv)
386  os.execv(python_program, argv)
387
388def _RunForCoverage(python_program, main_filename, args, env,
389                    coverage_entrypoint, workspace):
390  # type: (str, str, list[str], dict[str, str], str, str|None) -> int
391  """Collects coverage infomration for the given Python file.
392
393  Args:
394    python_program: (str) Path to the Python binary to use for execution
395    main_filename: (str) The Python file to execute
396    args: (list[str]) Additional args to pass to the Python file
397    env: (dict[str, str]) A dict of environment variables to set for the execution
398    coverage_entrypoint: (str|None) Path to the coverage entry point to execute with.
399    workspace: (str|None) Name of the workspace to execute in. This is expected to be a
400        directory under the runfiles tree, and will recursively delete the
401        runfiles directory if set.
402  """
403  # We need for coveragepy to use relative paths.  This can only be configured
404  unique_id = uuid.uuid4()
405  rcfile_name = os.path.join(os.environ['COVERAGE_DIR'], ".coveragerc_{}".format(unique_id))
406  with open(rcfile_name, "w") as rcfile:
407    rcfile.write('''[run]
408relative_files = True
409''')
410  PrintVerboseCoverage('Coverage entrypoint:', coverage_entrypoint)
411  # First run the target Python file via coveragepy to create a .coverage
412  # database file, from which we can later export lcov.
413  ret_code = subprocess.call(
414    [
415      python_program,
416      coverage_entrypoint,
417      "run",
418      "--rcfile=" + rcfile_name,
419      "--append",
420      "--branch",
421      main_filename
422    ] + args,
423    env=env,
424    cwd=workspace
425  )
426  output_filename = os.path.join(os.environ['COVERAGE_DIR'], 'pylcov.dat')
427
428  PrintVerboseCoverage('Converting coveragepy database to lcov:', output_filename)
429  # Run coveragepy again to convert its .coverage database file into lcov.
430  # Under normal conditions running lcov outputs to stdout/stderr, which causes problems for `coverage`.
431  params = [python_program, coverage_entrypoint, "lcov", "--rcfile=" + rcfile_name, "-o", output_filename, "--quiet"]
432  kparams = {"env": env, "cwd": workspace, "stdout": subprocess.DEVNULL, "stderr": subprocess.DEVNULL}
433  if IsVerboseCoverage():
434    # reconnect stdout/stderr to lcov generation.  Should be useful for debugging `coverage` issues.
435    params.remove("--quiet")
436    kparams['stdout'] = sys.stderr
437    kparams['stderr'] = sys.stderr
438
439  ret_code = subprocess.call(
440    params,
441    **kparams
442  ) or ret_code
443
444  try:
445    os.unlink(rcfile_name)
446  except OSError as err:
447    # It's possible that the profiled program might execute another Python
448    # binary through a wrapper that would then delete the rcfile.  Not much
449    # we can do about that, besides ignore the failure here.
450    PrintVerboseCoverage('Error removing temporary coverage rc file:', err)
451  if os.path.isfile(output_filename):
452    UnresolveSymlinks(output_filename)
453  return ret_code
454
455def Main():
456  args = sys.argv[1:]
457
458  new_env = {}
459
460  # The main Python source file.
461  # The magic string percent-main-percent is replaced with the runfiles-relative
462  # filename of the main file of the Python binary in BazelPythonSemantics.java.
463  main_rel_path = '%main%'
464  if IsWindows():
465    main_rel_path = main_rel_path.replace('/', os.sep)
466
467  if IsRunningFromZip():
468    module_space = CreateModuleSpace()
469    delete_module_space = True
470  else:
471    module_space = FindModuleSpace(main_rel_path)
472    delete_module_space = False
473
474  python_imports = '%imports%'
475  python_path_entries = CreatePythonPathEntries(python_imports, module_space)
476  python_path_entries += GetRepositoriesImports(module_space, %import_all%)
477  # Remove duplicates to avoid overly long PYTHONPATH (#10977). Preserve order,
478  # keep first occurrence only.
479  python_path_entries = [
480    GetWindowsPathWithUNCPrefix(d)
481    for d in python_path_entries
482  ]
483
484  old_python_path = os.environ.get('PYTHONPATH')
485  if old_python_path:
486    python_path_entries += old_python_path.split(os.pathsep)
487
488  python_path = os.pathsep.join(Deduplicate(python_path_entries))
489
490  if IsWindows():
491    python_path = python_path.replace('/', os.sep)
492
493  new_env['PYTHONPATH'] = python_path
494  runfiles_envkey, runfiles_envvalue = RunfilesEnvvar(module_space)
495  if runfiles_envkey:
496    new_env[runfiles_envkey] = runfiles_envvalue
497
498  # Don't prepend a potentially unsafe path to sys.path
499  # See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
500  new_env['PYTHONSAFEPATH'] = '1'
501
502  main_filename = os.path.join(module_space, main_rel_path)
503  main_filename = GetWindowsPathWithUNCPrefix(main_filename)
504  assert os.path.exists(main_filename), \
505         'Cannot exec() %r: file not found.' % main_filename
506  assert os.access(main_filename, os.R_OK), \
507         'Cannot exec() %r: file not readable.' % main_filename
508
509  program = python_program = FindPythonBinary(module_space)
510  if python_program is None:
511    raise AssertionError('Could not find python binary: ' + PYTHON_BINARY)
512
513  # COVERAGE_DIR is set if coverage is enabled and instrumentation is configured
514  # for something, though it could be another program executing this one or
515  # one executed by this one (e.g. an extension module).
516  if os.environ.get('COVERAGE_DIR'):
517    cov_tool = FindCoverageEntryPoint(module_space)
518    if cov_tool is None:
519      PrintVerboseCoverage('Coverage was enabled, but python coverage tool was not configured.')
520    else:
521      # Inhibit infinite recursion:
522      if 'PYTHON_COVERAGE' in os.environ:
523        del os.environ['PYTHON_COVERAGE']
524
525      if not os.path.exists(cov_tool):
526        raise EnvironmentError(
527          'Python coverage tool %r not found. '
528          'Try running with VERBOSE_COVERAGE=1 to collect more information.'
529          % cov_tool
530        )
531
532      # coverage library expects sys.path[0] to contain the library, and replaces
533      # it with the directory of the program it starts. Our actual sys.path[0] is
534      # the runfiles directory, which must not be replaced.
535      # CoverageScript.do_execute() undoes this sys.path[0] setting.
536      #
537      # Update sys.path such that python finds the coverage package. The coverage
538      # entry point is coverage.coverage_main, so we need to do twice the dirname.
539      python_path_entries = new_env['PYTHONPATH'].split(os.pathsep)
540      python_path_entries.append(os.path.dirname(os.path.dirname(cov_tool)))
541      new_env['PYTHONPATH'] = os.pathsep.join(Deduplicate(python_path_entries))
542  else:
543    cov_tool = None
544
545  # Some older Python versions on macOS (namely Python 3.7) may unintentionally
546  # leave this environment variable set after starting the interpreter, which
547  # causes problems with Python subprocesses correctly locating sys.executable,
548  # which subsequently causes failure to launch on Python 3.11 and later.
549  if '__PYVENV_LAUNCHER__' in os.environ:
550    del os.environ['__PYVENV_LAUNCHER__']
551
552  new_env.update((key, val) for key, val in os.environ.items() if key not in new_env)
553
554  workspace = None
555  if IsRunningFromZip():
556    # If RUN_UNDER_RUNFILES equals 1, it means we need to
557    # change directory to the right runfiles directory.
558    # (So that the data files are accessible)
559    if os.environ.get('RUN_UNDER_RUNFILES') == '1':
560      workspace = os.path.join(module_space, '%workspace_name%')
561
562  try:
563    sys.stdout.flush()
564    # NOTE: ExecuteFile may call execve() and lines after this will never run.
565    ExecuteFile(
566      python_program, main_filename, args, new_env, module_space,
567      cov_tool, workspace,
568      delete_module_space = delete_module_space,
569    )
570
571  except EnvironmentError:
572    # This works from Python 2.4 all the way to 3.x.
573    e = sys.exc_info()[1]
574    # This exception occurs when os.execv() fails for some reason.
575    if not getattr(e, 'filename', None):
576      e.filename = program  # Add info to error message
577    raise
578
579if __name__ == '__main__':
580  Main()
581