|
|
|
|
|
""" |
|
Freeze Python packages. |
|
|
|
Freezing makes it possible to ship arbitrary Python modules as part of a C++ |
|
library. The Python source of the module is compiled to bytecode and written |
|
to `.c` files, to be imported by Python's built-in FrozenImporter. |
|
|
|
In a normal Python installation, FrozenImporter is only used to bootstrap the |
|
initialization of the import machinery. Python's importers are defined in |
|
Python (see `_bootstrap.py` and `_bootstrap_external.py`) but need to be |
|
retrieved before any importers are available. Freezing the module bytecode |
|
resolves this circular dependency. |
|
|
|
This script will freeze the Python standard library. It produces two things: |
|
- Bytecode files: A set of `.c` that define C variables containing Python bytecode. |
|
- Main file: A `main.c` file listing all of these modules in the right form to be |
|
consumed by FrozenImporter. |
|
|
|
The library that wishes to these modules make them available to the local |
|
Python instance by extending `PyImport_FrozenModules` appropriately (see |
|
https://docs.python.org/3/c-api/import.html#c.PyImport_FrozenModules). |
|
""" |
|
|
|
import argparse |
|
import functools |
|
import itertools |
|
import marshal |
|
import os |
|
import types |
|
from dataclasses import dataclass |
|
from pathlib import Path |
|
|
|
|
|
PATH_MARKER = "<Generated by torch::deploy>" |
|
MAIN_INCLUDES = """#include <Python.h> |
|
|
|
""" |
|
|
|
MAIN_PREFIX_TEMPLATE = """ |
|
// Compiled standard library modules. These should be appended to the existing |
|
// `PyImport_FrozenModules` that ships with CPython. |
|
struct _frozen {}[] = {{ |
|
""" |
|
|
|
FAKE_PREFIX = MAIN_PREFIX_TEMPLATE.format("_PyImport_FrozenModules") |
|
|
|
MAIN_SUFFIX = """\ |
|
{0, 0, 0} /* sentinel */ |
|
}; |
|
""" |
|
|
|
|
|
|
|
|
|
DENY_LIST = [ |
|
|
|
"dbm", |
|
|
|
"curses", |
|
|
|
"tkinter", |
|
"tkinter", |
|
|
|
"test", |
|
"tests", |
|
"idle_test", |
|
"__phello__.foo.py", |
|
|
|
"_bootstrap.py", |
|
"_bootstrap_external.py", |
|
] |
|
|
|
NUM_BYTECODE_FILES = 5 |
|
|
|
|
|
def indent_msg(fn): |
|
@functools.wraps(fn) |
|
def wrapper(*args, **kwargs): |
|
args[0].indent += 1 |
|
ret = fn(*args, **kwargs) |
|
args[0].indent -= 1 |
|
return ret |
|
|
|
return wrapper |
|
|
|
|
|
@dataclass |
|
class FrozenModule: |
|
|
|
module_name: str |
|
|
|
c_name: str |
|
|
|
size: int |
|
|
|
bytecode: bytes |
|
|
|
|
|
class Freezer: |
|
def __init__(self, verbose: bool): |
|
self.frozen_modules: list[FrozenModule] = [] |
|
self.indent: int = 0 |
|
self.verbose: bool = verbose |
|
|
|
def msg(self, path: Path, code: str): |
|
if not self.verbose: |
|
return |
|
|
|
|
|
|
|
|
|
|
|
print(" " * self.indent, end="") |
|
print(f"{code} {path}") |
|
|
|
def write_bytecode(self, install_root): |
|
""" |
|
Write the `.c` files containing the frozen bytecode. |
|
|
|
Shared frozen modules evenly across the files. |
|
""" |
|
bytecode_file_names = [f"bytecode_{i}.c" for i in range(NUM_BYTECODE_FILES)] |
|
bytecode_files = [ |
|
open(os.path.join(install_root, name), "w") for name in bytecode_file_names |
|
] |
|
it = itertools.cycle(bytecode_files) |
|
for m in self.frozen_modules: |
|
self.write_frozen(m, next(it)) |
|
|
|
for f in bytecode_files: |
|
f.close() |
|
|
|
def write_main(self, install_root, oss, symbol_name): |
|
"""Write the `main.c` file containing a table enumerating all the frozen modules.""" |
|
with open(os.path.join(install_root, "main.c"), "w") as outfp: |
|
outfp.write(MAIN_INCLUDES) |
|
for m in self.frozen_modules: |
|
outfp.write(f"extern unsigned char {m.c_name}[];\n") |
|
|
|
outfp.write(MAIN_PREFIX_TEMPLATE.format(symbol_name)) |
|
for m in self.frozen_modules: |
|
outfp.write(f'\t{{"{m.module_name}", {m.c_name}, {m.size}}},\n') |
|
outfp.write(MAIN_SUFFIX) |
|
if oss: |
|
outfp.write(FAKE_PREFIX) |
|
outfp.write(MAIN_SUFFIX) |
|
|
|
def write_frozen(self, m: FrozenModule, outfp): |
|
"""Write a single frozen module's bytecode out to a C variable.""" |
|
outfp.write(f"unsigned char {m.c_name}[] = {{") |
|
for i in range(0, len(m.bytecode), 16): |
|
outfp.write("\n\t") |
|
for c in bytes(m.bytecode[i : i + 16]): |
|
outfp.write(f"{c:d},") |
|
outfp.write("\n};\n") |
|
|
|
def compile_path(self, path: Path, top_package_path: Path): |
|
"""Entry point for compiling a Path object.""" |
|
if path.is_dir(): |
|
self.compile_package(path, top_package_path) |
|
else: |
|
self.compile_file(path, top_package_path) |
|
|
|
@indent_msg |
|
def compile_package(self, path: Path, top_package_path: Path): |
|
"""Compile all the files within a Python package dir.""" |
|
assert path.is_dir() |
|
if path.name in DENY_LIST: |
|
self.msg(path, "X") |
|
return |
|
|
|
|
|
is_package_dir = any(child.name == "__init__.py" for child in path.iterdir()) |
|
if not is_package_dir: |
|
self.msg(path, "S") |
|
return |
|
|
|
self.msg(path, "P") |
|
|
|
for child in path.iterdir(): |
|
self.compile_path(child, top_package_path) |
|
|
|
def get_module_qualname(self, file_path: Path, top_package_path: Path) -> list[str]: |
|
|
|
|
|
|
|
|
|
normalized_path = file_path.relative_to(top_package_path.parent) |
|
|
|
if normalized_path.name == "__init__.py": |
|
|
|
|
|
|
|
|
|
module_basename = normalized_path.parent.name |
|
|
|
module_parent = normalized_path.parent.parent.parts |
|
else: |
|
module_basename = normalized_path.stem |
|
module_parent = normalized_path.parent.parts |
|
return list(module_parent) + [module_basename] |
|
|
|
def compile_string(self, file_content: str) -> types.CodeType: |
|
|
|
|
|
|
|
|
|
|
|
|
|
path_marker = PATH_MARKER |
|
return compile(file_content, path_marker, "exec") |
|
|
|
@indent_msg |
|
def compile_file(self, path: Path, top_package_path: Path): |
|
""" |
|
Compile a Python source file to frozen bytecode. |
|
|
|
Append the result to `self.frozen_modules`. |
|
""" |
|
assert path.is_file() |
|
if path.suffix != ".py": |
|
self.msg(path, "N") |
|
return |
|
|
|
if path.name in DENY_LIST: |
|
self.msg(path, "X") |
|
return |
|
|
|
self.msg(path, "F") |
|
module_qualname = self.get_module_qualname(path, top_package_path) |
|
module_mangled_name = "__".join(module_qualname) |
|
c_name = "M_" + module_mangled_name |
|
|
|
with open(path) as src_file: |
|
co = self.compile_string(src_file.read()) |
|
|
|
bytecode = marshal.dumps(co) |
|
size = len(bytecode) |
|
if path.name == "__init__.py": |
|
|
|
size = -size |
|
self.frozen_modules.append( |
|
FrozenModule(".".join(module_qualname), c_name, size, bytecode) |
|
) |
|
|
|
|
|
def main() -> None: |
|
parser = argparse.ArgumentParser(description="Compile py source") |
|
parser.add_argument("paths", nargs="*", help="Paths to freeze.") |
|
parser.add_argument("--verbose", action="store_true", help="Print debug logs") |
|
parser.add_argument( |
|
"--install-dir", "--install_dir", help="Root directory for all output files" |
|
) |
|
parser.add_argument( |
|
"--oss", |
|
action="store_true", |
|
help="If it's OSS build, add a fake _PyImport_FrozenModules", |
|
) |
|
parser.add_argument( |
|
"--symbol-name", |
|
"--symbol_name", |
|
help="The name of the frozen module array symbol to generate", |
|
default="_PyImport_FrozenModules_torch", |
|
) |
|
|
|
args = parser.parse_args() |
|
|
|
f = Freezer(args.verbose) |
|
|
|
for p in args.paths: |
|
path = Path(p) |
|
if path.is_dir() and not Path.exists(path / "__init__.py"): |
|
|
|
|
|
|
|
|
|
for mod in sorted(path.glob("*")): |
|
f.compile_path(mod, mod) |
|
else: |
|
f.compile_path(path, path) |
|
|
|
f.write_bytecode(args.install_dir) |
|
f.write_main(args.install_dir, args.oss, args.symbol_name) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|