Files
gotify-tray-customized/venv/lib/python3.12/site-packages/PyInstaller/depend/bindepend.py
kdusek 6a1fe63684
All checks were successful
build / build-linux (push) Successful in 16s
Remove hardcoded libpython binaries and add debug step
2025-12-07 23:15:18 +01:00

1132 lines
53 KiB
Python
Executable File

#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Find external dependencies of binary libraries.
"""
import ctypes.util
import functools
import os
import pathlib
import re
import sys
import sysconfig
import subprocess
from PyInstaller import compat
from PyInstaller import log as logging
from PyInstaller.depend import dylib, utils
from PyInstaller.utils.win32 import winutils
from PyInstaller.exceptions import PythonLibraryNotFoundError
if compat.is_darwin:
import PyInstaller.utils.osx as osxutils
logger = logging.getLogger(__name__)
_exe_machine_type = None
if compat.is_win:
_exe_machine_type = winutils.get_pe_file_machine_type(compat.python_executable)
#- High-level binary dependency analysis
def _get_paths_for_parent_directory_preservation():
"""
Return list of paths that serve as prefixes for parent-directory preservation of collected binaries and/or
shared libraries. If a binary is collected from a location that starts with a path from this list, the relative
directory structure is preserved within the frozen application bundle; otherwise, the binary is collected to the
frozen application's top-level directory.
"""
# Use only site-packages paths. We have no control over contents of `sys.path`, so using all paths from that may
# lead to unintended behavior in corner cases. For example, if `sys.path` contained the drive root (see #7028),
# all paths that do not match some other sub-path rooted in that drive will end up recognized as relative to the
# drive root. In such case, any DLL collected from `c:\Windows\system32` will be collected into `Windows\system32`
# sub-directory; ucrt DLLs collected from MSVC or Windows SDK installed in `c:\Program Files\...` will end up
# collected into `Program Files\...` subdirectory; etc.
#
# On the other hand, the DLL parent directory preservation is primarily aimed at packages installed via PyPI
# wheels, which are typically installed into site-packages. Therefore, limiting the directory preservation for
# shared libraries collected from site-packages should do the trick, and should be reasonably safe.
import site
orig_paths = site.getsitepackages()
orig_paths.append(site.getusersitepackages())
# Explicitly excluded paths. `site.getsitepackages` seems to include `sys.prefix`, which we need to exclude, to
# avoid issue swith DLLs in its sub-directories. We need both resolved and unresolved variant to handle cases
# where `base_prefix` itself is a symbolic link (e.g., `scoop`-installed python on Windows, see #8023).
excluded_paths = {
pathlib.Path(sys.base_prefix),
pathlib.Path(sys.base_prefix).resolve(),
pathlib.Path(sys.prefix),
pathlib.Path(sys.prefix).resolve(),
}
# For each path in orig_paths, append a resolved variant. This helps with linux venv where we need to consider
# both `venv/lib/python3.11/site-packages` and `venv/lib/python3.11/site-packages` and `lib64` is a symlink
# to `lib`.
orig_paths += [pathlib.Path(path).resolve() for path in orig_paths]
paths = set()
for path in orig_paths:
if not path:
continue
path = pathlib.Path(path)
# Filter out non-directories (e.g., /path/to/python3x.zip) or non-existent paths
if not path.is_dir():
continue
# Filter out explicitly excluded paths
if path in excluded_paths:
continue
paths.add(path)
# Sort by length (in term of path components) to ensure match against the longest common prefix (for example, match
# /path/to/venv/lib/site-packages instead of /path/to/venv when both paths are in site paths).
paths = sorted(paths, key=lambda x: len(x.parents), reverse=True)
return paths
def _select_destination_directory(src_filename, parent_dir_preservation_paths):
# Check parent directory preservation paths
for parent_dir_preservation_path in parent_dir_preservation_paths:
if parent_dir_preservation_path in src_filename.parents:
# Collect into corresponding sub-directory.
return src_filename.relative_to(parent_dir_preservation_path)
# Collect into top-level directory.
return src_filename.name
def binary_dependency_analysis(binaries, search_paths=None, symlink_suppression_patterns=None):
"""
Perform binary dependency analysis on the given TOC list of collected binaries, by recursively scanning each binary
for linked dependencies (shared library imports). Returns new TOC list that contains both original entries and their
binary dependencies.
Additional search paths for dependencies' full path resolution may be supplied via optional argument.
"""
# Get all path prefixes for binaries' parent-directory preservation. For binaries collected from packages in (for
# example) site-packages directory, we should try to preserve the parent directory structure.
parent_dir_preservation_paths = _get_paths_for_parent_directory_preservation()
# Keep track of processed binaries and processed dependencies.
processed_binaries = set()
processed_dependencies = set()
# Keep track of unresolved dependencies, in order to defer the missing-library warnings until after everything has
# been processed. This allows us to suppress warnings for dependencies that end up being collected anyway; for
# details, see the end of this function.
missing_dependencies = []
# Populate output TOC with input binaries - this also serves as TODO list, as we iterate over it while appending
# new entries at the end.
output_toc = binaries[:]
for dest_name, src_name, typecode in output_toc:
# Do not process symbolic links (already present in input TOC list, or added during analysis below).
if typecode == 'SYMLINK':
continue
# Keep track of processed binaries, to avoid unnecessarily repeating analysis of the same file. Use pathlib.Path
# to avoid having to worry about case normalization.
src_path = pathlib.Path(src_name)
if src_path in processed_binaries:
continue
processed_binaries.add(src_path)
logger.debug("Analyzing binary %r", src_name)
# Analyze imports (linked dependencies)
for dep_name, dep_src_path in get_imports(src_name, search_paths):
logger.debug("Processing dependency, name: %r, resolved path: %r", dep_name, dep_src_path)
# Skip unresolved dependencies. Defer the missing-library warnings until after binary dependency analysis
# is complete.
if not dep_src_path:
missing_dependencies.append((dep_name, src_name))
continue
# Compare resolved dependency against global inclusion/exclusion rules.
if not dylib.include_library(dep_src_path):
logger.debug("Skipping dependency %r due to global exclusion rules.", dep_src_path)
continue
dep_src_path = pathlib.Path(dep_src_path) # Turn into pathlib.Path for subsequent processing
# Avoid processing this dependency if we have already processed it.
if dep_src_path in processed_dependencies:
logger.debug("Skipping dependency %r due to prior processing.", str(dep_src_path))
continue
processed_dependencies.add(dep_src_path)
# Try to preserve parent directory structure, if applicable.
# NOTE: do not resolve the source path, because on macOS and linux, it may be a versioned .so (e.g.,
# libsomething.so.1, pointing at libsomething.so.1.2.3), and we need to collect it under original name!
dep_dest_path = _select_destination_directory(dep_src_path, parent_dir_preservation_paths)
dep_dest_path = pathlib.PurePath(dep_dest_path) # Might be a str() if it is just a basename...
# If we are collecting library into top-level directory on macOS, check whether it comes from a
# .framework bundle. If it does, re-create the .framework bundle in the top-level directory
# instead.
if compat.is_darwin and dep_dest_path.parent == pathlib.PurePath('.'):
if osxutils.is_framework_bundle_lib(dep_src_path):
# dst_src_path is parent_path/Name.framework/Versions/Current/Name
framework_parent_path = dep_src_path.parent.parent.parent.parent
dep_dest_path = pathlib.PurePath(dep_src_path.relative_to(framework_parent_path))
logger.debug("Collecting dependency %r as %r.", str(dep_src_path), str(dep_dest_path))
output_toc.append((str(dep_dest_path), str(dep_src_path), 'BINARY'))
# On non-Windows, if we are not collecting the binary into application's top-level directory ('.'),
# add a symbolic link from top-level directory to the actual location. This is to accommodate
# LD_LIBRARY_PATH being set to the top-level application directory on linux (although library search
# should be mostly done via rpaths, so this might be redundant) and to accommodate library path
# rewriting on macOS, which assumes that the library was collected into top-level directory.
if compat.is_win:
# We do not use symlinks on Windows.
pass
elif dep_dest_path.parent == pathlib.PurePath('.'):
# The shared library itself is being collected into top-level application directory.
pass
elif any(dep_src_path.match(pattern) for pattern in symlink_suppression_patterns):
# Honor symlink suppression patterns specified by hooks.
logger.debug(
"Skipping symbolic link from %r to top-level application directory due to source path matching one "
"of symlink suppression path patterns.", str(dep_dest_path)
)
else:
logger.debug("Adding symbolic link from %r to top-level application directory.", str(dep_dest_path))
output_toc.append((str(dep_dest_path.name), str(dep_dest_path), 'SYMLINK'))
# Handle missing dependencies: display warnings, add missing symbolic links to top-level application directory, etc.
seen_binaries = {
os.path.normcase(os.path.basename(src_name)): (dest_name, src_name, typecode)
for dest_name, src_name, typecode in output_toc if typecode != 'SYMLINK'
}
existing_symlinks = set([dest_name for dest_name, src_name, typecode in output_toc if typecode == 'SYMLINK'])
for dependency_name, referring_binary in missing_dependencies:
# Ignore libraries that we would not collect in the first place.
if not dylib.include_library(dependency_name):
continue
# If the binary with a matching basename happens to be among the discovered binaries, suppress the message as
# well. This might happen either because the library was collected by some other mechanism (for example, via
# hook, or supplied by the user), or because it was discovered during the analysis of another binary (which,
# for example, had properly set run-paths on Linux/macOS or was located next to that other analyzed binary on
# Windows).
#
# On non-Windows, also check if symbolic link to the discovered binary already exists in the top-level
# application directory, and if not, create it. This is important especially on macOS, where our library path
# rewriting assumes that all dependent libraries are available in the top-level application directory, or
# linked into it.
dependency_basename = os.path.normcase(os.path.basename(dependency_name))
dependency_toc_entry = seen_binaries.get(dependency_basename, None)
if dependency_toc_entry is None:
# Not found, emit a warning (subject to global warning suppression rules).
if not dylib.warn_missing_lib(dependency_name):
continue
logger.warning(
"Library not found: could not resolve %r, dependency of %r.", dependency_name, referring_binary
)
elif not compat.is_win:
# Found; generate symbolic link if necessary.
dependency_dest_path = pathlib.PurePath(dependency_toc_entry[0])
dependency_src_path = pathlib.Path(dependency_toc_entry[1])
if dependency_dest_path.parent == pathlib.PurePath('.'):
# The binary is collected into top-level application directory.
continue
elif dependency_basename in existing_symlinks:
# The symbolic link already exists.
continue
# Keep honoring symlink suppression patterns specified by hooks (same as in main binary dependency analysis
# loop).
if any(dependency_src_path.match(pattern) for pattern in symlink_suppression_patterns):
logger.info(
"Missing dependency handling: skipping symbolic link from %r to top-level application directory "
"due to source path matching one of symlink suppression path patterns.", str(dependency_dest_path)
)
continue
# Create the symbolic link
logger.info(
"Missing dependency handling: adding symbolic link from %r to top-level application directory.",
str(dependency_dest_path)
)
output_toc.append((dependency_basename, str(dependency_dest_path), 'SYMLINK'))
existing_symlinks.add(dependency_basename)
return output_toc
#- Low-level import analysis
def get_imports(filename, search_paths=None):
"""
Analyze the given binary file (shared library or executable), and obtain the list of shared libraries it imports
(i.e., link-time dependencies).
Returns set of tuples (name, fullpath). The name component is the referenced name, and on macOS, may not be just
a base name. If the library's full path cannot be resolved, fullpath element is None.
Additional list of search paths may be specified via `search_paths`, to be used as a fall-back when the
platform-specific resolution mechanism fails to resolve a library fullpath.
"""
if compat.is_win:
if str(filename).lower().endswith(".manifest"):
return []
return _get_imports_pefile(filename, search_paths)
elif compat.is_darwin:
return _get_imports_macholib(filename, search_paths)
else:
return _get_imports_ldd(filename, search_paths)
def _get_imports_pefile(filename, search_paths):
"""
Windows-specific helper for `get_imports`, which uses the `pefile` library to walk through PE header.
"""
import pefile
output = set()
# By default, pefile library parses all PE information. We are only interested in the list of dependent dlls.
# Performance is improved by reading only needed information. https://code.google.com/p/pefile/wiki/UsageExamples
pe = pefile.PE(filename, fast_load=True)
pe.parse_data_directories(
directories=[
pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
],
forwarded_exports_only=True,
import_dllnames_only=True,
)
# If a library has no binary dependencies, pe.DIRECTORY_ENTRY_IMPORT does not exist.
for entry in getattr(pe, 'DIRECTORY_ENTRY_IMPORT', []):
dll_str = entry.dll.decode('utf-8')
output.add(dll_str)
# We must also read the exports table to find forwarded symbols:
# http://blogs.msdn.com/b/oldnewthing/archive/2006/07/19/671238.aspx
exported_symbols = getattr(pe, 'DIRECTORY_ENTRY_EXPORT', None)
if exported_symbols:
for symbol in exported_symbols.symbols:
if symbol.forwarder is not None:
# symbol.forwarder is a bytes object. Convert it to a string.
forwarder = symbol.forwarder.decode('utf-8')
# symbol.forwarder is for example 'KERNEL32.EnterCriticalSection'
dll = forwarder.split('.')[0]
output.add(dll + ".dll")
pe.close()
# Attempt to resolve full paths to referenced DLLs. Always add the input binary's parent directory to the search
# paths.
search_paths = [os.path.dirname(filename)] + (search_paths or [])
output = {(lib, resolve_library_path(lib, search_paths)) for lib in output}
return output
def _get_imports_ldd(filename, search_paths):
"""
Helper for `get_imports`, which uses `ldd` to analyze shared libraries. Used on Linux and other POSIX-like platforms
(with exception of macOS).
"""
output = set()
# Output of ldd varies between platforms...
if compat.is_aix:
# Match libs of the form
# 'archivelib.a(objectmember.so/.o)'
# or
# 'sharedlib.so'
# Will not match the fake lib '/unix'
LDD_PATTERN = re.compile(r"^\s*(((?P<libarchive>(.*\.a))(?P<objectmember>\(.*\)))|((?P<libshared>(.*\.so))))$")
elif compat.is_hpux:
# Match libs of the form
# 'sharedlib.so => full-path-to-lib
# e.g.
# 'libpython2.7.so => /usr/local/lib/hpux32/libpython2.7.so'
LDD_PATTERN = re.compile(r"^\s+(.*)\s+=>\s+(.*)$")
elif compat.is_solar:
# Match libs of the form
# 'sharedlib.so => full-path-to-lib
# e.g.
# 'libpython2.7.so.1.0 => /usr/local/lib/libpython2.7.so.1.0'
# Will not match the platform specific libs starting with '/platform'
LDD_PATTERN = re.compile(r"^\s+(.*)\s+=>\s+(.*)$")
elif compat.is_linux:
# Match libs of the form
# libpython3.13.so.1.0 => /home/brenainn/.pyenv/versions/3.13.0/lib/libpython3.13.so.1.0 (0x00007a9e15800000)
# or
# /tmp/python/install/bin/../lib/libpython3.13.so.1.0 (0x00007b9489c82000)
LDD_PATTERN = re.compile(r"^\s*(?:(.*?)\s+=>\s+)?(.*?)\s+\(.*\)")
else:
LDD_PATTERN = re.compile(r"\s*(.*?)\s+=>\s+(.*?)\s+\(.*\)")
# Resolve symlinks since GNU ldd contains a bug in processing a symlink to a binary
# using $ORIGIN: https://sourceware.org/bugzilla/show_bug.cgi?id=25263
p = subprocess.run(
['ldd', os.path.realpath(filename)],
stdin=subprocess.DEVNULL,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
encoding='utf-8',
)
ldd_warnings = []
for line in p.stderr.splitlines():
if not line:
continue
# Python extensions (including stdlib ones) are not linked against python.so but rely on Python's symbols having
# already been loaded into symbol space at runtime. musl's ldd issues a series of harmless warnings to stderr
# telling us that those symbols are unfindable. These should be suppressed.
elif line.startswith("Error relocating ") and line.endswith(" symbol not found"):
continue
# Shared libraries should have the executable bits set; however, this is not the case for shared libraries
# shipped in PyPI wheels, which cause ldd to emit `ldd: warning: you do not have execution permission for ...`
# warnings. Suppress these.
elif line.startswith("ldd: warning: you do not have execution permission for "):
continue
# When `ldd` is ran against a file that is not a dynamic binary (i.e., is not a binary at all, or is a static
# binary), it emits a "not a dynamic executable" warning. Suppress it.
elif "not a dynamic executable" in line:
continue
# Propagate any other warnings it might have.
ldd_warnings.append(line)
if ldd_warnings:
logger.warning("ldd warnings for %r:\n%s", filename, "\n".join(ldd_warnings))
for line in p.stdout.splitlines():
name = None # Referenced name
lib = None # Resolved library path
m = LDD_PATTERN.search(line)
if m:
if compat.is_aix:
libarchive = m.group('libarchive')
if libarchive:
# We matched an archive lib with a request for a particular embedded shared object.
# 'archivelib.a(objectmember.so/.o)'
lib = libarchive
name = os.path.basename(lib) + m.group('objectmember')
else:
# We matched a stand-alone shared library.
# 'sharedlib.so'
lib = m.group('libshared')
name = os.path.basename(lib)
elif compat.is_hpux:
name, lib = m.group(1), m.group(2)
else:
name, lib = m.group(1), m.group(2)
name = name or os.path.basename(lib)
if compat.is_linux:
# Skip all ld variants listed https://sourceware.org/glibc/wiki/ABIList
# plus musl's ld-musl-*.so.*.
if re.fullmatch(r"ld(64)?(-linux|-musl)?(-.+)?\.so(\..+)?", os.path.basename(lib)):
continue
if name[:10] in ('linux-gate', 'linux-vdso'):
# linux-gate is a fake library which does not exist and should be ignored. See also:
# http://www.trilithium.com/johan/2005/08/linux-gate/
continue
if compat.is_cygwin:
# exclude Windows system library
if lib.lower().startswith('/cygdrive/c/windows/system'):
continue
# Reset library path if it does not exist
if not os.path.exists(lib):
lib = None
elif line.endswith("not found"):
# On glibc-based linux distributions, missing libraries are marked with name.so => not found
tokens = line.split('=>')
if len(tokens) != 2:
continue
name = tokens[0].strip()
lib = None
else:
# TODO: should we warn about unprocessed lines?
continue
# Fall back to searching the supplied search paths, if any.
if not lib:
lib = _resolve_library_path_in_search_paths(
os.path.basename(name), # Search for basename of the referenced name.
search_paths,
)
# Normalize the resolved path, to remove any extraneous "../" elements.
if lib:
lib = os.path.normpath(lib)
# Return referenced name as-is instead of computing a basename, to provide additional context when library
# cannot be resolved.
output.add((name, lib))
return output
def _get_imports_macholib(filename, search_paths):
"""
macOS-specific helper for `get_imports`, which uses `macholib` to analyze library load commands in Mach-O headers.
"""
from macholib.dyld import dyld_find
from macholib.mach_o import LC_RPATH
from macholib.MachO import MachO
try:
from macholib.dyld import _dyld_shared_cache_contains_path
except ImportError:
_dyld_shared_cache_contains_path = None
output = set()
# Parent directory of the input binary and parent directory of python executable, used to substitute @loader_path
# and @executable_path. The macOS dylib loader (dyld) fully resolves the symbolic links when using @loader_path
# and @executable_path references, so we need to do the same using `os.path.realpath`.
bin_path = os.path.dirname(os.path.realpath(filename))
python_bin = os.path.realpath(sys.executable)
python_bin_path = os.path.dirname(python_bin)
def _get_referenced_libs(m):
# Collect referenced libraries from MachO object.
referenced_libs = set()
for header in m.headers:
for idx, name, lib in header.walkRelocatables():
referenced_libs.add(lib)
return referenced_libs
def _get_run_paths(m):
# Find LC_RPATH commands to collect rpaths from MachO object.
# macholib does not handle @rpath, so we need to handle run paths ourselves.
run_paths = []
for header in m.headers:
for command in header.commands:
# A command is a tuple like:
# (<macholib.mach_o.load_command object at 0x>,
# <macholib.mach_o.rpath_command object at 0x>,
# '../lib\x00\x00')
cmd_type = command[0].cmd
if cmd_type == LC_RPATH:
rpath = command[2].decode('utf-8')
# Remove trailing '\x00' characters. E.g., '../lib\x00\x00'
rpath = rpath.rstrip('\x00')
# If run path starts with @, ensure it starts with either @loader_path or @executable_path.
# We cannot process anything else.
if rpath.startswith("@") and not rpath.startswith(("@executable_path", "@loader_path")):
logger.warning("Unsupported rpath format %r found in binary %r - ignoring...", rpath, filename)
continue
run_paths.append(rpath)
return run_paths
@functools.lru_cache
def get_run_paths_and_referenced_libs(filename):
# Walk through Mach-O headers, and collect all referenced libraries and run paths.
m = MachO(filename)
return _get_referenced_libs(m), _get_run_paths(m)
@functools.lru_cache
def get_run_paths(filename):
# Walk through Mach-O headers, and collect only run paths.
return _get_run_paths(MachO(filename))
# Collect referenced libraries and run paths from the input binary.
referenced_libs, run_paths = get_run_paths_and_referenced_libs(filename)
# On macOS, run paths (rpaths) are inherited from the executable that loads the given shared library (or from the
# shared library that loads the given shared library). This means that shared libraries and python binary extensions
# can reference other shared libraries using @rpath without having set any run paths themselves.
#
# In order to simulate the run path inheritance that happens in unfrozen python programs, we need to augment the
# run paths from the given binary with those set by the python interpreter executable (`sys.executable`). Anaconda
# python, for example, sets the run path on the python executable to `@loader_path/../lib`, which allows python
# extensions to reference shared libraries in the Anaconda environment's `lib` directory via only `@rpath`
# (for example, the `_ssl` extension can reference the OpenSSL library as `@rpath/libssl.3.dylib`). In another
# example, python executable has its run path set to the top-level directory of its .framework bundle; in this
# case the `ssl` extension references the OpenSSL library as `@rpath/Versions/3.10/lib/libssl.1.1.dylib`.
run_paths += get_run_paths(python_bin)
# This fallback should be fully superseded by the above recovery of run paths from python executable; but for now,
# keep it around in case of unforeseen corner cases.
run_paths.append(os.path.join(compat.base_prefix, 'lib'))
# De-duplicate run_paths while preserving their order.
run_paths = list(dict.fromkeys(run_paths))
def _resolve_using_path(lib):
# Absolute paths should not be resolved; we should just check whether the library exists or not. This used to
# be done using macholib's dyld_find() as well (as it properly handles system libraries that are hidden on
# Big Sur and later), but it turns out that even if given an absolute path, it gives precedence to search paths
# from DYLD_LIBRARY_PATH. This leads to confusing errors when directory in DYLD_LIBRARY_PATH contains a file
# (shared library or data file) that happens to have the same name as a library from a system framework.
if os.path.isabs(lib):
if _dyld_shared_cache_contains_path is not None and _dyld_shared_cache_contains_path(lib):
return lib
if os.path.isfile(lib):
return lib
return None
try:
return dyld_find(lib)
except ValueError:
return None
def _resolve_using_loader_path(lib, bin_path, python_bin_path):
# Strictly speaking, @loader_path should be anchored to parent directory of analyzed binary (`bin_path`), while
# @executable_path should be anchored to the parent directory of the process' executable. Typically, this would
# be python executable (`python_bin_path`). Unless we are analyzing a collected 3rd party executable; in that
# case, `bin_path` is correct option. So we first try resolving using `bin_path`, and then fall back to
# `python_bin_path`. This does not account for transitive run paths of higher-order dependencies, but there is
# only so much we can do here...
#
# NOTE: do not use macholib's `dyld_find`, because its fallback search locations might end up resolving wrong
# instance of the library! For example, if our `bin_path` and `python_bin_path` are anchored in an Anaconda
# python environment and the candidate library path does not exit (because we are calling this function when
# trying to resolve @rpath with multiple candidate run paths), we do not want to fall back to eponymous library
# that happens to be present in the Homebrew python environment...
if lib.startswith('@loader_path/'):
lib = lib[len('@loader_path/'):]
elif lib.startswith('@executable_path/'):
lib = lib[len('@executable_path/'):]
# Try resolving with binary's path first...
resolved_lib = _resolve_using_path(os.path.join(bin_path, lib))
if resolved_lib is not None:
return resolved_lib
# ... and fall-back to resolving with python executable's path
return _resolve_using_path(os.path.join(python_bin_path, lib))
# Try to resolve full path of the referenced libraries.
for referenced_lib in referenced_libs:
resolved_lib = None
# If path starts with @rpath, we have to handle it ourselves.
if referenced_lib.startswith('@rpath'):
lib = os.path.join(*referenced_lib.split(os.sep)[1:]) # Remove the @rpath/ prefix
# Try all run paths.
for run_path in run_paths:
# Join the path.
lib_path = os.path.join(run_path, lib)
if lib_path.startswith(("@executable_path", "@loader_path")):
# Run path starts with @executable_path or @loader_path.
lib_path = _resolve_using_loader_path(lib_path, bin_path, python_bin_path)
else:
# If run path was relative, anchor it to binary's location.
if not os.path.isabs(lib_path):
os.path.join(bin_path, lib_path)
lib_path = _resolve_using_path(lib_path)
if lib_path and os.path.exists(lib_path):
resolved_lib = lib_path
break
else:
if referenced_lib.startswith(("@executable_path", "@loader_path")):
resolved_lib = _resolve_using_loader_path(referenced_lib, bin_path, python_bin_path)
else:
resolved_lib = _resolve_using_path(referenced_lib)
# Fall back to searching the supplied search paths, if any.
if not resolved_lib:
resolved_lib = _resolve_library_path_in_search_paths(
os.path.basename(referenced_lib), # Search for basename of the referenced name.
search_paths,
)
# Normalize the resolved path, to remove any extraneous "../" elements.
if resolved_lib:
resolved_lib = os.path.normpath(resolved_lib)
# Return referenced library name as-is instead of computing a basename. Full referenced name carries additional
# information that might be useful for the caller to determine how to deal with unresolved library (e.g., ignore
# unresolved libraries that are supposed to be located in system-wide directories).
output.add((referenced_lib, resolved_lib))
return output
#- Library full path resolution
def resolve_library_path(name, search_paths=None):
"""
Given a library name, attempt to resolve full path to that library. The search for library is done via
platform-specific mechanism and fall back to optionally-provided list of search paths. Returns None if library
cannot be resolved. If give library name is already an absolute path, the given path is returned without any
processing.
"""
# No-op if path is already absolute.
if os.path.isabs(name):
return name
if compat.is_unix:
# Use platform-specific helper.
fullpath = _resolve_library_path_unix(name)
if fullpath:
return fullpath
# Fall back to searching the supplied search paths, if any
return _resolve_library_path_in_search_paths(name, search_paths)
elif compat.is_win:
# Try the caller-supplied search paths, if any.
fullpath = _resolve_library_path_in_search_paths(name, search_paths)
if fullpath:
return fullpath
# Fall back to default Windows search paths, using the PATH environment variable (which should also include
# the system paths, such as c:\windows and c:\windows\system32)
win_search_paths = [path for path in compat.getenv('PATH', '').split(os.pathsep) if path]
return _resolve_library_path_in_search_paths(name, win_search_paths)
else:
return ctypes.util.find_library(name)
return None
# Compatibility aliases for hooks from contributed hooks repository. All of these now point to the high-level
# `resolve_library_path`.
findLibrary = resolve_library_path
findSystemLibrary = resolve_library_path
def _resolve_library_path_in_search_paths(name, search_paths=None):
"""
Low-level helper for resolving given library name to full path in given list of search paths.
"""
for search_path in search_paths or []:
fullpath = os.path.join(search_path, name)
if not os.path.isfile(fullpath):
continue
# On Windows, ensure that architecture matches that of running python interpreter.
if compat.is_win:
try:
dll_machine_type = winutils.get_pe_file_machine_type(fullpath)
except Exception:
# A search path might contain a DLL that we cannot analyze; for example, a stub file. Skip over.
continue
if dll_machine_type != _exe_machine_type:
continue
return os.path.normpath(fullpath)
return None
def _resolve_library_path_unix(name):
"""
UNIX-specific helper for resolving library path.
Emulates the algorithm used by dlopen. `name` must include the prefix, e.g., ``libpython2.4.so``.
"""
assert compat.is_unix, "Current implementation for Unix only (Linux, Solaris, AIX, FreeBSD)"
if name.endswith('.so') or '.so.' in name:
# We have been given full library name that includes suffix. Use `_resolve_library_path_in_search_paths` to find
# the exact match.
lib_search_func = _resolve_library_path_in_search_paths
else:
# We have been given a library name without suffix. Use `_which_library` as search function, which will try to
# find library with matching basename.
lib_search_func = _which_library
# Look in the LD_LIBRARY_PATH according to platform.
if compat.is_aix:
lp = compat.getenv('LIBPATH', '')
elif compat.is_darwin:
lp = compat.getenv('DYLD_LIBRARY_PATH', '')
else:
lp = compat.getenv('LD_LIBRARY_PATH', '')
lib = lib_search_func(name, filter(None, lp.split(os.pathsep)))
# Look in /etc/ld.so.cache
# Solaris does not have /sbin/ldconfig. Just check if this file exists.
if lib is None:
utils.load_ldconfig_cache()
lib = utils.LDCONFIG_CACHE.get(name)
if lib:
assert os.path.isfile(lib)
# Look in the known safe paths.
if lib is None:
# Architecture independent locations.
paths = ['/lib', '/usr/lib']
# Architecture dependent locations.
if compat.architecture == '32bit':
paths.extend(['/lib32', '/usr/lib32'])
else:
paths.extend(['/lib64', '/usr/lib64'])
# Machine dependent locations.
if compat.machine == 'intel':
if compat.architecture == '32bit':
paths.extend(['/usr/lib/i386-linux-gnu'])
else:
paths.extend(['/usr/lib/x86_64-linux-gnu'])
# On Debian/Ubuntu /usr/bin/python is linked statically with libpython. Newer Debian/Ubuntu with multiarch
# support puts the libpythonX.Y.so in paths like /usr/lib/i386-linux-gnu/. Try to query the arch-specific
# sub-directory, if available.
arch_subdir = sysconfig.get_config_var('multiarchsubdir')
if arch_subdir:
arch_subdir = os.path.basename(arch_subdir)
paths.append(os.path.join('/usr/lib', arch_subdir))
else:
logger.debug('Multiarch directory not detected.')
# Termux (a Ubuntu like subsystem for Android) has an additional libraries directory.
if os.path.isdir('/data/data/com.termux/files/usr/lib'):
paths.append('/data/data/com.termux/files/usr/lib')
if compat.is_aix:
paths.append('/opt/freeware/lib')
elif compat.is_hpux:
if compat.architecture == '32bit':
paths.append('/usr/local/lib/hpux32')
else:
paths.append('/usr/local/lib/hpux64')
elif compat.is_freebsd or compat.is_openbsd:
paths.append('/usr/local/lib')
lib = lib_search_func(name, paths)
return lib
def _which_library(name, dirs):
"""
Search for a shared library in a list of directories.
Args:
name:
The library name including the `lib` prefix but excluding any `.so` suffix.
dirs:
An iterable of folders to search in.
Returns:
The path to the library if found or None otherwise.
"""
matcher = _library_matcher(name)
for path in filter(os.path.exists, dirs):
for _path in os.listdir(path):
if matcher(_path):
return os.path.join(path, _path)
def _library_matcher(name):
"""
Create a callable that matches libraries if **name** is a valid library prefix for input library full names.
"""
return re.compile(name + r"[0-9]*\.").match
#- Python shared library search
def get_python_library_path():
"""
Find Python shared library that belongs to the current interpreter.
Return full path to Python dynamic library or None when not found.
PyInstaller needs to collect the Python shared library, so that bootloader can load it, import Python C API
symbols, and use them to set up the embedded Python interpreter.
The name of the shared library is typically fixed (`python3.X.dll` on Windows, libpython3.X.so on Unix systems,
and `libpython3.X.dylib` on macOS for shared library builds and `Python.framework/Python` for framework build).
Its location can usually be inferred from the Python interpreter executable, when the latter is dynamically
linked against the shared library.
However, some situations require extra handling due to various quirks; for example, Debian-based linux
distributions statically link the Python interpreter executable against the Python library, while also providing
a shared library variant for external users.
"""
# With Windows Python builds, this is pretty straight-forward: `sys.dllhandle` provides a handle to the loaded
# Python DLL, and we can resolve its path using `GetModuleFileName()` from win32 API.
# This is applicable to python.org Windows builds, Anaconda on Windows, and MSYS2 Python.
if compat.is_win:
if hasattr(sys, 'dllhandle'):
import _winapi
return _winapi.GetModuleFileName(sys.dllhandle)
else:
raise PythonLibraryNotFoundError(
"Python was built without a shared library, which is required by PyInstaller."
)
# On other (POSIX) platforms, the name of the Python shared library is available in the `INSTSONAME` variable
# exposed by the `sysconfig` module. There is also the `LDLIBRARY` variable, which points to the unversioned .so
# symbolic link for linking purposes; however, we are interested in the actual, fully-versioned soname.
# This should cover all variations in the naming schemes across different platforms as well as different build
# options (debug build, free-threaded build, etc.).
#
# However, `INSTSONAME` points to the shared library only if shared library is enabled; in static-library builds,
# it points to the static library, which is of no use to us. We can check if Python was built with shared library
# (i.e., the `--enable-shared` option) by checking `Py_ENABLE_SHARED` variable, which should be set to 1 in this
# case (and 0 in the case of a static-library build). On macOS, builds made with `--enable-framework` have
# `Py_ENABLE_SHARED` set to 0, but have `PYTHONFRAMEWORK`set to a non-empty string.
#
# The above description is further complicated by the fact that in some Python builds, the `python` executable is
# built against static Python library, and the shared library is built separately and provided for development and
# for embedders (such as PyInstaller). Presumably, this is done for performance reasons. Also, it is enabled by the
# fact that on POSIX, Python extensions do not need to have the referenced Python symbols resolved at link-time;
# rather, these symbols can be resolved at run-time from the running Python process (and are effectively provided
# by the `python` executable). Such builds come in two variants. In the first variant, `Py_ENABLE_SHARED` is 0 and
# `INSTSONAME` points to the static library; an example of such build is Anaconda Python. In the second variant,
# `Py_ENABLE_SHARED` is 1 and `INSTSONAME` points to the shared library, but `python` executable is not linked
# against it; examples of such build are Debian-packaged Python and `astral-sh/python-build-standalone` Python.
#
# Therefore, our strategy is as follows: if we determine that shared library was enabled (via `Py_ENABLE_SHARED`
# on all platforms and/or via `PYTHONFRAMEWORK` on macOS), we use the name given by `INSTSONAME`. First, we try
# to locate it by analyzing binary dependencies of `python` executable (regular shared-library-enabled build),
# then fall back to standard search locations (second variant of static-executable-with-separate-shared-library).
# If `Py_ENABLE_SHARED` is set to 0, we try to guess the library name based on version and feature flags, but we
# search only `sys.base_prefix` and `lib` directory under `sys.base_prefix`; if the shared library is not found
# there, we assume it is unavailable and raise an error. This attempts to accommodate Anaconda python (and corner
# cases when we cannot reliably identify Anaconda python - see #9273) and prevent accidental bundling of
# system-wide Python shared library in cases when user tries to use custom Python build without shared library.
def _find_lib_in_libdirs(name, *libdirs):
for libdir in libdirs:
full_path = os.path.join(libdir, name)
if not os.path.exists(full_path):
continue
# Resolve potential symbolic links to achieve consistent results with linker-based search; e.g., on
# POSIX systems, linker resolves unversioned library names (python3.X.so) to versioned ones
# (libpython3.X.so.1.0) due to former being symbolic links to the latter. See #6831.
full_path = os.path.realpath(full_path)
if not os.path.exists(full_path):
continue
return full_path
return None
is_shared = (
# Builds made with `--enable-shared` have `Py_ENABLE_SHARED` set to 1. This is true even for Debian-packaged
# Python, which has the `python` executable statically linked against the Python library.
sysconfig.get_config_var("Py_ENABLE_SHARED") or
# On macOS, builds made with `--enable-framework` have `Py_ENABLE_SHARED` set to 0, but have `PYTHONFRAMEWORK`
# set to a non-empty string.
(compat.is_darwin and sysconfig.get_config_var("PYTHONFRAMEWORK"))
)
if not is_shared:
# Anaconda Python; this codepath used to be under `compat.is_conda` switch, but we may also be dealing with
# Anaconda Python without `conda-meta` directory (see #9273). Or some other Python build where shared library
# is provided but `Py_ENABLE_SHARED` is set to 0.
py_major, py_minor = sys.version_info[:2]
py_suffix = "t" if compat.is_nogil else "" # TODO: does Anaconda provide debug builds with "d" suffix?
if compat.is_darwin:
# macOS
expected_name = f"libpython{py_major}.{py_minor}{py_suffix}.dylib"
else:
# Linux; assume any other potential POSIX builds use the same naming scheme.
expected_name = f"libpython{py_major}.{py_minor}{py_suffix}.so.1.0"
# Allow the library to be only in `sys.base_prefix` or the `lib` directory under it. This should prevent us from
# picking up an unrelated copy of shared library that might happen to be available in standard search path, when
# we should instead be raising an error due to Python having been built without a shared library. (In true
# static-library builds, Python's own extension modules are usually turned into built-ins. So picking up an
# unrelated Python shared library that happens to be of the same version results in run-time errors due to
# missing extensions - because in the build that produced the shared library, those extensions are expected to
# be external extension modules!)
python_libname = _find_lib_in_libdirs(
expected_name, # Full name
compat.base_prefix,
os.path.join(compat.base_prefix, 'lib'),
)
if python_libname:
return python_libname
# Raise PythonLibraryNotFoundError
option_str = (
"either the `--enable-shared` or the `--enable-framework` option"
if compat.is_darwin else "the `--enable-shared` option"
)
raise PythonLibraryNotFoundError(
"Python was built without a shared library, which is required by PyInstaller. "
f"If you built Python from source, rebuild it with {option_str}."
)
# Use the library name from `INSTSONAME`.
expected_name = sysconfig.get_config_var('INSTSONAME')
# In Cygwin builds (and also MSYS2 python, although that should be handled by Windows-specific codepath...),
# INSTSONAME is available, but the name has a ".dll.a" suffix; remove that trailing ".a".
if (compat.is_win or compat.is_cygwin) and os.path.normcase(expected_name).endswith('.dll.a'):
expected_name = expected_name[:-2]
# NOTE: on macOS with .framework bundle build, INSTSONAME contains full name of the .framework library, for example
# `Python.framework/Versions/3.13/Python`. Pre-compute a basename for comparisons that are using only basename.
expected_basename = os.path.normcase(os.path.basename(expected_name))
# First, try to find the expected name among the libraries against which the Python executable is linked. This
# assumes that the Python executable was not statically linked against the library (as is the case with
# Debian-packaged Python or `astral-sh/python-build-standalone` Python).
imported_libraries = get_imports(compat.python_executable) # (name, fullpath) tuples
for _, lib_path in imported_libraries:
if lib_path is None:
continue # Skip unresolved imports
if os.path.normcase(os.path.basename(lib_path)) == expected_basename: # Basename comparison
# Python library found. Return absolute path to it.
return lib_path
# As a fallback, try to find the library in several "standard" search locations...
# First, search the `sys.base_prefix` and `lib` directory in `sys.base_prefix`, as these locations have the closest
# ties to our current Python process. This caters to builds such as `astral-sh/python-build-standalone` Python.
python_libname = _find_lib_in_libdirs(
expected_name, # Full name
compat.base_prefix,
os.path.join(compat.base_prefix, 'lib'),
)
if python_libname:
return python_libname
# Perform search in the configured library search locations. This should be done after exhausting all other options;
# it primarily caters to Debian-packaged Python, but we need to make sure that we do not collect shared library from
# system-installed Python when the current interpreter is in fact some other Python build (such as, for example,
# `astral-sh/python-build-standalone` Python that is handled in the preceding code block).
python_libname = resolve_library_path(expected_basename) # Basename
if python_libname:
return python_libname
# Not found. Raise a PythonLibraryNotFoundError with corresponding message.
message = f"ERROR: Python shared library ({expected_name!r}) was not found!"
if compat.is_linux and os.path.isfile('/etc/debian_version'):
# The shared library is provided by `libpython3.x` package (i.e., no need to install full `python3-dev`).
pkg_name = f"libpython3.{sys.version_info.minor}"
message += (
" If you are using system python on Debian/Ubuntu, you might need to install a separate package by running "
f"`apt install {pkg_name}`."
)
raise PythonLibraryNotFoundError(message)
#- Binary vs data (re)classification
def classify_binary_vs_data(filename):
"""
Classify the given file as either BINARY or a DATA, using appropriate platform-specific method. Returns 'BINARY'
or 'DATA' string depending on the determined file type, or None if classification cannot be performed (non-existing
file, missing tool, and other errors during classification).
"""
# We cannot classify non-existent files.
if not os.path.isfile(filename):
return None
# Use platform-specific implementation.
return _classify_binary_vs_data(filename)
if compat.is_linux:
def _classify_binary_vs_data(filename):
# First check for ELF signature, in order to avoid calling `objdump` on every data file, which can be costly.
try:
with open(filename, 'rb') as fp:
sig = fp.read(4)
except Exception:
return None
if sig != b"\x7FELF":
return "DATA"
# Verify the binary by checking if `objdump` recognizes the file. The preceding ELF signature check should
# ensure that this is an ELF file, while this check should ensure that it is a valid ELF file. In the future,
# we could try checking that the architecture matches the running platform.
cmd_args = ['objdump', '-a', filename]
try:
p = subprocess.run(
cmd_args,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.DEVNULL,
encoding='utf8',
)
except Exception:
return None # Failed to run `objdump` or `objdump` unavailable.
return 'BINARY' if p.returncode == 0 else 'DATA'
elif compat.is_win:
@functools.lru_cache()
def _no_op_pefile_gc():
# Disable pefile's reduntant and very slow call to gc.collect(). See #8762.
import types
import gc
import pefile
fake_gc = types.ModuleType("gc")
fake_gc.__dict__.update(gc.__dict__)
fake_gc.collect = lambda *_, **__: None
pefile.gc = fake_gc
def _classify_binary_vs_data(filename):
import pefile
_no_op_pefile_gc()
# First check for MZ signature, which should allow us to quickly classify the majority of data files.
try:
with open(filename, 'rb') as fp:
sig = fp.read(2)
except Exception:
return None
if sig != b"MZ":
return "DATA"
# Check if the file can be opened using `pefile`.
try:
with pefile.PE(filename, fast_load=True) as pe: # noqa: F841
pass
return 'BINARY'
except pefile.PEFormatError:
return 'DATA'
except Exception:
pass
return None
elif compat.is_darwin:
def _classify_binary_vs_data(filename):
# See if the file can be opened using `macholib`.
import macholib.MachO
try:
macho = macholib.MachO.MachO(filename) # noqa: F841
return 'BINARY'
except Exception:
# TODO: catch only `ValueError`?
pass
return 'DATA'
else:
def _classify_binary_vs_data(filename):
# Classification not implemented for the platform.
return None