Remove hardcoded libpython binaries and add debug step

2025-12-07 23:15:18 +01:00
parent 308ce7768e
commit 6a1fe63684
1807 changed files with 172293 additions and 1 deletions
--- a/venv/lib/python3.12/site-packages/PyInstaller/archive/writers.py
+++ b/venv/lib/python3.12/site-packages/PyInstaller/archive/writers.py
@@ -0,0 +1,445 @@
+#-----------------------------------------------------------------------------
+# Copyright (c) 2005-2023, PyInstaller Development Team.
+#
+# Distributed under the terms of the GNU General Public License (version 2
+# or later) with exception for distributing the bootloader.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+#
+# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
+#-----------------------------------------------------------------------------
+"""
+Utilities to create data structures for embedding Python modules and additional files into the executable.
+"""
+
+import marshal
+import os
+import shutil
+import struct
+import sys
+import zlib
+
+from PyInstaller.building.utils import get_code_object, replace_filename_in_code_object
+from PyInstaller.compat import BYTECODE_MAGIC, is_win, strict_collect_mode
+from PyInstaller.loader.pyimod01_archive import PYZ_ITEM_MODULE, PYZ_ITEM_NSPKG, PYZ_ITEM_PKG
+
+
+class ZlibArchiveWriter:
+    """
+    Writer for PyInstaller's PYZ (ZlibArchive) archive. The archive is used to store collected byte-compiled Python
+    modules, as individually-compressed entries.
+    """
+    _PYZ_MAGIC_PATTERN = b'PYZ\0'
+    _HEADER_LENGTH = 12 + 5
+    _COMPRESSION_LEVEL = 6  # zlib compression level
+
+    def __init__(self, filename, entries, code_dict=None):
+        """
+        filename
+            Target filename of the archive.
+        entries
+            An iterable containing entries in the form of tuples: (name, src_path, typecode), where `name` is the name
+            under which the resource is stored (e.g., python module name, without suffix), `src_path` is name of the
+            file from which the resource is read, and `typecode` is the Analysis-level TOC typecode (`PYMODULE`).
+        code_dict
+            Optional code dictionary containing code objects for analyzed/collected python modules.
+        """
+        code_dict = code_dict or {}
+
+        with open(filename, "wb") as fp:
+            # Reserve space for the header.
+            fp.write(b'\0' * self._HEADER_LENGTH)
+
+            # Write entries' data and collect TOC entries
+            toc = []
+            for entry in entries:
+                toc_entry = self._write_entry(fp, entry, code_dict)
+                toc.append(toc_entry)
+
+            # Write TOC
+            toc_offset = fp.tell()
+            toc_data = marshal.dumps(toc)
+            fp.write(toc_data)
+
+            # Write header:
+            #  - PYZ magic pattern (4 bytes)
+            #  - python bytecode magic pattern (4 bytes)
+            #  - TOC offset (32-bit int, 4 bytes)
+            #  - 4 unused bytes
+            fp.seek(0, os.SEEK_SET)
+
+            fp.write(self._PYZ_MAGIC_PATTERN)
+            fp.write(BYTECODE_MAGIC)
+            fp.write(struct.pack('!i', toc_offset))
+
+    @classmethod
+    def _write_entry(cls, fp, entry, code_dict):
+        name, src_path, typecode = entry
+        assert typecode in {'PYMODULE', 'PYMODULE-1', 'PYMODULE-2'}
+
+        if src_path in {'-', None}:
+            # PEP-420 namespace package; these do not have code objects, but we still need an entry in PYZ to inform our
+            # run-time module finder/loader of the package's existence. So create a TOC entry for 0-byte data blob,
+            # and write no data.
+            return (name, (PYZ_ITEM_NSPKG, fp.tell(), 0))
+
+        code_object = code_dict[name]
+
+        src_basename, _ = os.path.splitext(os.path.basename(src_path))
+        if src_basename == '__init__':
+            typecode = PYZ_ITEM_PKG
+            co_filename = os.path.join(*name.split('.'), '__init__.py')
+        else:
+            typecode = PYZ_ITEM_MODULE
+            co_filename = os.path.join(*name.split('.')) + '.py'
+
+        # Replace co_filename on code object with anonymized version without absolute path to the module.
+        code_object = replace_filename_in_code_object(code_object, co_filename)
+
+        # Serialize
+        data = marshal.dumps(code_object)
+
+        # First compress, then encrypt.
+        obj = zlib.compress(data, cls._COMPRESSION_LEVEL)
+
+        # Create TOC entry
+        toc_entry = (name, (typecode, fp.tell(), len(obj)))
+
+        # Write data blob
+        fp.write(obj)
+
+        return toc_entry
+
+
+class CArchiveWriter:
+    """
+    Writer for PyInstaller's CArchive (PKG) archive.
+
+    This archive contains all files that are bundled within an executable; a PYZ (ZlibArchive), DLLs, Python C
+    extensions, and other data files that are bundled in onefile mode.
+
+    The archive can be read from either C (bootloader code at application's run-time) or Python (for debug purposes).
+    """
+    _COOKIE_MAGIC_PATTERN = b'MEI\014\013\012\013\016'
+
+    # For cookie and TOC entry structure, see `PyInstaller.archive.readers.CArchiveReader`.
+    _COOKIE_FORMAT = '!8sIIII64s'
+    _COOKIE_LENGTH = struct.calcsize(_COOKIE_FORMAT)
+
+    _TOC_ENTRY_FORMAT = '!IIIIBc'
+    _TOC_ENTRY_LENGTH = struct.calcsize(_TOC_ENTRY_FORMAT)
+
+    _COMPRESSION_LEVEL = 9  # zlib compression level
+
+    def __init__(self, filename, entries, pylib_name):
+        """
+        filename
+            Target filename of the archive.
+        entries
+            An iterable containing entries in the form of tuples: (dest_name, src_name, compress, typecode), where
+            `dest_name` is the name under which the resource is stored in the archive (and name under which it is
+            extracted at runtime), `src_name` is name of the file from which the resouce is read, `compress` is a
+            boolean compression flag, and `typecode` is the Analysis-level TOC typecode.
+        pylib_name
+            Name of the python shared library.
+        """
+        self._collected_names = set()  # Track collected names for strict package mode.
+
+        with open(filename, "wb") as fp:
+            # Write entries' data and collect TOC entries
+            toc = []
+            for entry in entries:
+                toc_entry = self._write_entry(fp, entry)
+                toc.append(toc_entry)
+
+            # Write TOC
+            toc_offset = fp.tell()
+            toc_data = self._serialize_toc(toc)
+            toc_length = len(toc_data)
+
+            fp.write(toc_data)
+
+            # Write cookie
+            archive_length = toc_offset + toc_length + self._COOKIE_LENGTH
+            pyvers = sys.version_info[0] * 100 + sys.version_info[1]
+            cookie_data = struct.pack(
+                self._COOKIE_FORMAT,
+                self._COOKIE_MAGIC_PATTERN,
+                archive_length,
+                toc_offset,
+                toc_length,
+                pyvers,
+                pylib_name.encode('ascii'),
+            )
+
+            fp.write(cookie_data)
+
+    def _write_entry(self, fp, entry):
+        dest_name, src_name, compress, typecode = entry
+
+        # Write OPTION entries as-is, without normalizing them. This also exempts them from duplication check,
+        # allowing them to be specified multiple times.
+        if typecode == 'o':
+            return self._write_blob(fp, b"", dest_name, typecode)
+
+        # Ensure forward slashes in paths are on Windows converted to back slashes '\\', as on Windows the bootloader
+        # works only with back slashes.
+        dest_name = os.path.normpath(dest_name)
+        if is_win and os.path.sep == '/':
+            # When building under MSYS, the above path normalization uses Unix-style separators, so replace them
+            # manually.
+            dest_name = dest_name.replace(os.path.sep, '\\')
+
+            # For symbolic link entries, also ensure that the symlink target path (stored in src_name) is using
+            # Windows-style back slash separators.
+            if typecode == 'n':
+                src_name = src_name.replace(os.path.sep, '\\')
+
+        # Strict pack/collect mode: keep track of the destination names, and raise an error if we try to add a duplicate
+        # (a file with same destination name, subject to OS case normalization rules).
+        if strict_collect_mode:
+            normalized_dest = None
+            if typecode in {'s', 's1', 's2', 'm', 'M'}:
+                # Exempt python source scripts and modules from the check.
+                pass
+            else:
+                # Everything else; normalize the case
+                normalized_dest = os.path.normcase(dest_name)
+            # Check for existing entry, if applicable
+            if normalized_dest:
+                if normalized_dest in self._collected_names:
+                    raise ValueError(
+                        f"Attempting to collect a duplicated file into CArchive: {normalized_dest} (type: {typecode})"
+                    )
+                self._collected_names.add(normalized_dest)
+
+        if typecode == 'd':
+            # Dependency; merge src_name (= reference path prefix) and dest_name (= name) into single-string format that
+            # is parsed by bootloader.
+            return self._write_blob(fp, b"", f"{src_name}:{dest_name}", typecode)
+        elif typecode in {'s', 's1', 's2'}:
+            # If it is a source code file, compile it to a code object and marshal the object, so it can be unmarshalled
+            # by the bootloader. For that, we need to know target optimization level, which is stored in typecode.
+            optim_level = {'s': 0, 's1': 1, 's2': 2}[typecode]
+            code = get_code_object(dest_name, src_name, optimize=optim_level)
+            # Construct new `co_filename` by taking destination name, and replace its suffix with the one from the code
+            # object's co_filename; this should cover all of the following cases:
+            #  - run-time hook script: the source name has a suffix (that is also present in `co_filename` produced by
+            #    `get_code_object`), destination name has no suffix.
+            #  - entry-point script with a suffix: both source name and destination name have the same suffix (and the
+            #    same suffix is also in `co_filename` produced by `get_code_object`)
+            #  - entry-point script without a suffix: neither source name nor destination name have a suffix, but
+            #    `get_code_object` adds a .py suffix to `co_filename` to mitigate potential issues with POSIX
+            #    executables and `traceback` module; we want to preserve this behavior.
+            co_filename = os.path.splitext(dest_name)[0] + os.path.splitext(code.co_filename)[1]
+            code = replace_filename_in_code_object(code, co_filename)
+            return self._write_blob(fp, marshal.dumps(code), dest_name, 's', compress=compress)
+        elif typecode in ('m', 'M'):
+            # Read the PYC file. We do not perform compilation here (in contrast to script files in the above branch),
+            # so typecode does not contain optimization level information.
+            with open(src_name, "rb") as in_fp:
+                data = in_fp.read()
+            assert data[:4] == BYTECODE_MAGIC
+            # Skip the PYC header, load the code object.
+            code = marshal.loads(data[16:])
+            co_filename = dest_name + '.py'  # Use dest name with added .py suffix.
+            code = replace_filename_in_code_object(code, co_filename)
+            # These module entries are loaded and executed within the bootloader, which requires only the code
+            # object, without the PYC header.
+            return self._write_blob(fp, marshal.dumps(code), dest_name, typecode, compress=compress)
+        elif typecode == 'n':
+            # Symbolic link; store target name (as NULL-terminated string)
+            data = src_name.encode('utf-8') + b'\x00'
+            return self._write_blob(fp, data, dest_name, typecode, compress=compress)
+        else:
+            return self._write_file(fp, src_name, dest_name, typecode, compress=compress)
+
+    def _write_blob(self, out_fp, blob: bytes, dest_name, typecode, compress=False):
+        """
+        Write the binary contents (**blob**) of a small file to the archive and return the corresponding CArchive TOC
+        entry.
+        """
+        data_offset = out_fp.tell()
+        data_length = len(blob)
+        if compress:
+            blob = zlib.compress(blob, level=self._COMPRESSION_LEVEL)
+        out_fp.write(blob)
+
+        return (data_offset, len(blob), data_length, int(compress), typecode, dest_name)
+
+    def _write_file(self, out_fp, src_name, dest_name, typecode, compress=False):
+        """
+        Stream copy a large file into the archive and return the corresponding CArchive TOC entry.
+        """
+        data_offset = out_fp.tell()
+        data_length = os.stat(src_name).st_size
+        with open(src_name, 'rb') as in_fp:
+            if compress:
+                tmp_buffer = bytearray(16 * 1024)
+                compressor = zlib.compressobj(self._COMPRESSION_LEVEL)
+                while True:
+                    num_read = in_fp.readinto(tmp_buffer)
+                    if not num_read:
+                        break
+                    out_fp.write(compressor.compress(tmp_buffer[:num_read]))
+                out_fp.write(compressor.flush())
+            else:
+                shutil.copyfileobj(in_fp, out_fp)
+
+        return (data_offset, out_fp.tell() - data_offset, data_length, int(compress), typecode, dest_name)
+
+    @classmethod
+    def _serialize_toc(cls, toc):
+        serialized_toc = []
+        for toc_entry in toc:
+            data_offset, compressed_length, data_length, compress, typecode, name = toc_entry
+
+            # Encode names as UTF-8. This should be safe as standard python modules only contain ASCII-characters (and
+            # standard shared libraries should have the same), and thus the C-code still can handle this correctly.
+            name = name.encode('utf-8')
+            name_length = len(name) + 1  # Add 1 for string-terminating zero byte.
+
+            # Ensure TOC entries are aligned on 16-byte boundary, so they can be read by bootloader (C code) on
+            # platforms with strict data alignment requirements (for example linux on `armhf`/`armv7`, such as 32-bit
+            # Debian Buster on Raspberry Pi).
+            entry_length = cls._TOC_ENTRY_LENGTH + name_length
+            if entry_length % 16 != 0:
+                padding_length = 16 - (entry_length % 16)
+                name_length += padding_length
+
+            # Serialize
+            serialized_entry = struct.pack(
+                cls._TOC_ENTRY_FORMAT + f"{name_length}s",  # "Ns" format automatically pads the string with zero bytes.
+                cls._TOC_ENTRY_LENGTH + name_length,
+                data_offset,
+                compressed_length,
+                data_length,
+                compress,
+                typecode.encode('ascii'),
+                name,
+            )
+            serialized_toc.append(serialized_entry)
+
+        return b''.join(serialized_toc)
+
+
+class SplashWriter:
+    """
+    Writer for the splash screen resources archive.
+
+    The resulting archive is added as an entry into the CArchive with the typecode PKG_ITEM_SPLASH.
+    """
+    # This struct describes the splash resources as it will be in an buffer inside the bootloader. All necessary parts
+    # are bundled, the *_len and *_offset fields describe the data beyond this header definition.
+    # Whereas script and image fields are binary data, the requirements fields describe an array of strings. Each string
+    # is null-terminated in order to easily iterate over this list from within C.
+    #
+    # typedef struct _splash_data_header
+    # {
+    #     char tcl_libname[16];
+    #     char tk_libname[16];
+    #     char tk_lib[16];
+    #
+    #     uint32_t script_len;
+    #     uint32_t script_offset;
+    #
+    #     uint32_t image_len;
+    #     uint32_t image_offset;
+    #
+    #     uint32_t requirements_len;
+    #     uint32_t requirements_offset;
+    # } SPLASH_DATA_HEADER;
+    #
+    _HEADER_FORMAT = '!32s 32s 16s II II II'
+    _HEADER_LENGTH = struct.calcsize(_HEADER_FORMAT)
+
+    # The created archive is compressed by the CArchive, so no need to compress the data here.
+
+    def __init__(self, filename, name_list, tcl_libname, tk_libname, tklib, image, script):
+        """
+        Writer for splash screen resources that are bundled into the CArchive as a single archive/entry.
+
+        :param filename: The filename of the archive to create
+        :param name_list: List of filenames for the requirements array
+        :param str tcl_libname: Name of the tcl shared library file
+        :param str tk_libname: Name of the tk shared library file
+        :param str tklib: Root of tk library (e.g. tk/)
+        :param Union[str, bytes] image: Image like object
+        :param str script: The tcl/tk script to execute to create the screen.
+        """
+
+        # Ensure forward slashes in dependency names are on Windows converted to back slashes '\\', as on Windows the
+        # bootloader works only with back slashes.
+        def _normalize_filename(filename):
+            filename = os.path.normpath(filename)
+            if is_win and os.path.sep == '/':
+                # When building under MSYS, the above path normalization uses Unix-style separators, so replace them
+                # manually.
+                filename = filename.replace(os.path.sep, '\\')
+            return filename
+
+        name_list = [_normalize_filename(name) for name in name_list]
+
+        with open(filename, "wb") as fp:
+            # Reserve space for the header.
+            fp.write(b'\0' * self._HEADER_LENGTH)
+
+            # Serialize the requirements list. This list (more an array) contains the names of all files the bootloader
+            # needs to extract before the splash screen can be started. The implementation terminates every name with a
+            # null-byte, that keeps the list short memory wise and makes it iterable from C.
+            requirements_len = 0
+            requirements_offset = fp.tell()
+            for name in name_list:
+                name = name.encode('utf-8') + b'\0'
+                fp.write(name)
+                requirements_len += len(name)
+
+            # Write splash script
+            script_offset = fp.tell()
+            script_len = len(script)
+            fp.write(script.encode("utf-8"))
+
+            # Write splash image. If image is a bytes buffer, it is written directly into the archive. Otherwise, it
+            # is assumed to be a path and the file is copied into the archive.
+            image_offset = fp.tell()
+            if isinstance(image, bytes):
+                # Image was converted by PIL/Pillow and is already in buffer
+                image_len = len(image)
+                fp.write(image)
+            else:
+                # Read image into buffer
+                with open(image, 'rb') as image_fp:
+                    image_data = image_fp.read()
+                image_len = len(image_data)
+                fp.write(image_data)
+                del image_data
+
+            # The following strings are written to 16-character fields with zero-padding, which means that we need to
+            # ensure that their length is strictly below 16 characters (if it were exactly 16, the field would have no
+            # terminating NULL character!).
+            def _encode_str(value, field_name, limit):
+                enc_value = value.encode("utf-8")
+                if len(enc_value) >= limit:
+                    raise ValueError(
+                        f"Length of the encoded field {field_name!r} ({len(enc_value)}) is greater or equal to the "
+                        f"limit of {limit} characters!"
+                    )
+
+                return enc_value
+
+            # Write header
+            header_data = struct.pack(
+                self._HEADER_FORMAT,
+                _encode_str(tcl_libname, 'tcl_libname', 32),
+                _encode_str(tk_libname, 'tk_libname', 32),
+                _encode_str(tklib, 'tklib', 16),
+                script_len,
+                script_offset,
+                image_len,
+                image_offset,
+                requirements_len,
+                requirements_offset,
+            )
+
+            fp.seek(0, os.SEEK_SET)
+            fp.write(header_data)