1#!/usr/bin/env python
2"""Create a WASM asset bundle directory structure.
3
4The WASM asset bundles are pre-loaded by the final WASM build. The bundle
5contains:
6
7- a stripped down, pyc-only stdlib zip file, e.g. {PREFIX}/lib/python311.zip
8- os.py as marker module {PREFIX}/lib/python3.11/os.py
9- empty lib-dynload directory, to make sure it is copied into the bundle {PREFIX}/lib/python3.11/lib-dynload/.empty
10"""
11
12import argparse
13import pathlib
14import shutil
15import sys
16import sysconfig
17import zipfile
18
19# source directory
20SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute()
21SRCDIR_LIB = SRCDIR / "Lib"
22
23
24# Library directory relative to $(prefix).
25WASM_LIB = pathlib.PurePath("lib")
26WASM_STDLIB_ZIP = (
27    WASM_LIB / f"python{sys.version_info.major}{sys.version_info.minor}.zip"
28)
29WASM_STDLIB = (
30    WASM_LIB / f"python{sys.version_info.major}.{sys.version_info.minor}"
31)
32WASM_DYNLOAD = WASM_STDLIB / "lib-dynload"
33
34
35# Don't ship large files / packages that are not particularly useful at
36# the moment.
37OMIT_FILES = (
38    # regression tests
39    "test/",
40    # package management
41    "ensurepip/",
42    "venv/",
43    # build system
44    "distutils/",
45    "lib2to3/",
46    # deprecated
47    "asyncore.py",
48    "asynchat.py",
49    "uu.py",
50    "xdrlib.py",
51    # other platforms
52    "_aix_support.py",
53    "_bootsubprocess.py",
54    "_osx_support.py",
55    # webbrowser
56    "antigravity.py",
57    "webbrowser.py",
58    # Pure Python implementations of C extensions
59    "_pydecimal.py",
60    "_pyio.py",
61    # concurrent threading
62    "concurrent/futures/thread.py",
63    # Misc unused or large files
64    "pydoc_data/",
65    "msilib/",
66)
67
68# Synchronous network I/O and protocols are not supported; for example,
69# socket.create_connection() raises an exception:
70# "BlockingIOError: [Errno 26] Operation in progress".
71OMIT_NETWORKING_FILES = (
72    "cgi.py",
73    "cgitb.py",
74    "email/",
75    "ftplib.py",
76    "http/",
77    "imaplib.py",
78    "mailbox.py",
79    "mailcap.py",
80    "nntplib.py",
81    "poplib.py",
82    "smtpd.py",
83    "smtplib.py",
84    "socketserver.py",
85    "telnetlib.py",
86    # keep urllib.parse for pydoc
87    "urllib/error.py",
88    "urllib/request.py",
89    "urllib/response.py",
90    "urllib/robotparser.py",
91    "wsgiref/",
92)
93
94OMIT_MODULE_FILES = {
95    "_asyncio": ["asyncio/"],
96    "audioop": ["aifc.py", "sunau.py", "wave.py"],
97    "_crypt": ["crypt.py"],
98    "_curses": ["curses/"],
99    "_ctypes": ["ctypes/"],
100    "_decimal": ["decimal.py"],
101    "_dbm": ["dbm/ndbm.py"],
102    "_gdbm": ["dbm/gnu.py"],
103    "_json": ["json/"],
104    "_multiprocessing": ["concurrent/futures/process.py", "multiprocessing/"],
105    "pyexpat": ["xml/", "xmlrpc/"],
106    "readline": ["rlcompleter.py"],
107    "_sqlite3": ["sqlite3/"],
108    "_ssl": ["ssl.py"],
109    "_tkinter": ["idlelib/", "tkinter/", "turtle.py", "turtledemo/"],
110    "_zoneinfo": ["zoneinfo/"],
111}
112
113# regression test sub directories
114OMIT_SUBDIRS = (
115    "ctypes/test/",
116    "tkinter/test/",
117    "unittest/test/",
118)
119
120SYSCONFIG_NAMES = (
121    "_sysconfigdata__emscripten_wasm32-emscripten",
122    "_sysconfigdata__emscripten_wasm32-emscripten",
123    "_sysconfigdata__wasi_wasm32-wasi",
124    "_sysconfigdata__wasi_wasm64-wasi",
125)
126
127
128def get_builddir(args: argparse.Namespace) -> pathlib.Path:
129    """Get builddir path from pybuilddir.txt"""
130    with open("pybuilddir.txt", encoding="utf-8") as f:
131        builddir = f.read()
132    return pathlib.Path(builddir)
133
134
135def get_sysconfigdata(args: argparse.Namespace) -> pathlib.Path:
136    """Get path to sysconfigdata relative to build root"""
137    data_name = sysconfig._get_sysconfigdata_name()
138    if not data_name.startswith(SYSCONFIG_NAMES):
139        raise ValueError(
140            f"Invalid sysconfig data name '{data_name}'.", SYSCONFIG_NAMES
141        )
142    filename = data_name + ".py"
143    return args.builddir / filename
144
145
146def create_stdlib_zip(
147    args: argparse.Namespace,
148    *,
149    optimize: int = 0,
150) -> None:
151    def filterfunc(filename: str) -> bool:
152        pathname = pathlib.Path(filename).resolve()
153        return pathname not in args.omit_files_absolute
154
155    with zipfile.PyZipFile(
156        args.wasm_stdlib_zip,
157        mode="w",
158        compression=args.compression,
159        optimize=optimize,
160    ) as pzf:
161        if args.compresslevel is not None:
162            pzf.compresslevel = args.compresslevel
163        pzf.writepy(args.sysconfig_data)
164        for entry in sorted(args.srcdir_lib.iterdir()):
165            entry = entry.resolve()
166            if entry.name == "__pycache__":
167                continue
168            if entry.name.endswith(".py") or entry.is_dir():
169                # writepy() writes .pyc files (bytecode).
170                pzf.writepy(entry, filterfunc=filterfunc)
171
172
173def detect_extension_modules(args: argparse.Namespace):
174    modules = {}
175
176    # disabled by Modules/Setup.local ?
177    with open(args.buildroot / "Makefile") as f:
178        for line in f:
179            if line.startswith("MODDISABLED_NAMES="):
180                disabled = line.split("=", 1)[1].strip().split()
181                for modname in disabled:
182                    modules[modname] = False
183                break
184
185    # disabled by configure?
186    with open(args.sysconfig_data) as f:
187        data = f.read()
188    loc = {}
189    exec(data, globals(), loc)
190
191    for key, value in loc["build_time_vars"].items():
192        if not key.startswith("MODULE_") or not key.endswith("_STATE"):
193            continue
194        if value not in {"yes", "disabled", "missing", "n/a"}:
195            raise ValueError(f"Unsupported value '{value}' for {key}")
196
197        modname = key[7:-6].lower()
198        if modname not in modules:
199            modules[modname] = value == "yes"
200    return modules
201
202
203def path(val: str) -> pathlib.Path:
204    return pathlib.Path(val).absolute()
205
206
207parser = argparse.ArgumentParser()
208parser.add_argument(
209    "--buildroot",
210    help="absolute path to build root",
211    default=pathlib.Path(".").absolute(),
212    type=path,
213)
214parser.add_argument(
215    "--prefix",
216    help="install prefix",
217    default=pathlib.Path("/usr/local"),
218    type=path,
219)
220
221
222def main():
223    args = parser.parse_args()
224
225    relative_prefix = args.prefix.relative_to(pathlib.Path("/"))
226    args.srcdir = SRCDIR
227    args.srcdir_lib = SRCDIR_LIB
228    args.wasm_root = args.buildroot / relative_prefix
229    args.wasm_stdlib_zip = args.wasm_root / WASM_STDLIB_ZIP
230    args.wasm_stdlib = args.wasm_root / WASM_STDLIB
231    args.wasm_dynload = args.wasm_root / WASM_DYNLOAD
232
233    # bpo-17004: zipimport supports only zlib compression.
234    # Emscripten ZIP_STORED + -sLZ4=1 linker flags results in larger file.
235    args.compression = zipfile.ZIP_DEFLATED
236    args.compresslevel = 9
237
238    args.builddir = get_builddir(args)
239    args.sysconfig_data = get_sysconfigdata(args)
240    if not args.sysconfig_data.is_file():
241        raise ValueError(f"sysconfigdata file {args.sysconfig_data} missing.")
242
243    extmods = detect_extension_modules(args)
244    omit_files = list(OMIT_FILES)
245    if sysconfig.get_platform().startswith("emscripten"):
246        omit_files.extend(OMIT_NETWORKING_FILES)
247    for modname, modfiles in OMIT_MODULE_FILES.items():
248        if not extmods.get(modname):
249            omit_files.extend(modfiles)
250
251    args.omit_files_absolute = {
252        (args.srcdir_lib / name).resolve() for name in omit_files
253    }
254
255    # Empty, unused directory for dynamic libs, but required for site initialization.
256    args.wasm_dynload.mkdir(parents=True, exist_ok=True)
257    marker = args.wasm_dynload / ".empty"
258    marker.touch()
259    # os.py is a marker for finding the correct lib directory.
260    shutil.copy(args.srcdir_lib / "os.py", args.wasm_stdlib)
261    # The rest of stdlib that's useful in a WASM context.
262    create_stdlib_zip(args)
263    size = round(args.wasm_stdlib_zip.stat().st_size / 1024**2, 2)
264    parser.exit(0, f"Created {args.wasm_stdlib_zip} ({size} MiB)\n")
265
266
267if __name__ == "__main__":
268    main()
269