Remove hardcoded libpython binaries and add debug step
All checks were successful
build / build-linux (push) Successful in 16s

This commit is contained in:
kdusek
2025-12-07 23:15:18 +01:00
parent 308ce7768e
commit 6a1fe63684
1807 changed files with 172293 additions and 1 deletions

View File

@@ -0,0 +1 @@
#

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,366 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2021-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Tools for searching bytecode for key statements that indicate the need for additional resources, such as data files
and package metadata.
By *bytecode* I mean the ``code`` object given by ``compile()``, accessible from the ``__code__`` attribute of any
non-builtin function or, in PyInstallerLand, the ``PyiModuleGraph.node("some.module").code`` attribute. The best
guide for bytecode format I have found is the disassembler reference: https://docs.python.org/3/library/dis.html
This parser implementation aims to combine the flexibility and speed of regex with the clarity of the output of
``dis.dis(code)``. It has not achieved the 2nd, but C'est la vie...
The biggest clarity killer here is the ``EXTENDED_ARG`` opcode which can appear almost anywhere and therefore needs
to be tiptoed around at every step. If this code needs to expand significantly, I would recommend an upgrade to a
regex-based grammar parsing library such as Reparse. This way, little steps like unpacking ``EXTENDED_ARGS`` can be
defined once then simply referenced forming a nice hierarchy rather than copied everywhere its needed.
"""
import dis
import re
from types import CodeType
from typing import Pattern
from PyInstaller import compat
# opcode name -> opcode map
# Python 3.11 introduced specialized opcodes that are not covered by opcode.opmap (and equivalent dis.opmap), but dis
# has a private map of all opcodes called _all_opmap. So use the latter, if available.
opmap = getattr(dis, '_all_opmap', dis.opmap)
def _instruction_to_regex(x: str):
"""
Get a regex-escaped opcode byte from its human readable name.
"""
return re.escape(bytes([opmap[x]]))
def bytecode_regex(pattern: bytes, flags=re.VERBOSE | re.DOTALL):
"""
A regex-powered Python bytecode matcher.
``bytecode_regex`` provides a very thin wrapper around :func:`re.compile`.
* Any opcode names wrapped in backticks are substituted for their corresponding opcode bytes.
* Patterns are compiled in VERBOSE mode by default so that whitespace and comments may be used.
This aims to mirror the output of :func:`dis.dis`, which is far more readable than looking at raw byte strings.
"""
assert isinstance(pattern, bytes)
# Replace anything wrapped in backticks with regex-escaped opcodes.
pattern = re.sub(
rb"`(\w+)`",
lambda m: _instruction_to_regex(m[1].decode()),
pattern,
)
return re.compile(pattern, flags=flags)
def finditer(pattern: Pattern, string: bytes):
"""
Call ``pattern.finditer(string)``, but remove any matches beginning on an odd byte (i.e., matches where
match.start() is not a multiple of 2).
This should be used to avoid false positive matches where a bytecode pair's argument is mistaken for an opcode.
"""
assert isinstance(string, bytes)
string = _cleanup_bytecode_string(string)
matches = pattern.finditer(string)
while True:
for match in matches:
if match.start() % 2 == 0:
# All is good. This match starts on an OPCODE.
yield match
else:
# This match has started on an odd byte, meaning that it is a false positive and should be skipped.
# There is a very slim chance that a genuine match overlaps this one and, because re.finditer() does not
# allow overlapping matches, it would be lost. To avoid that, restart the regex scan, starting at the
# next even byte.
matches = pattern.finditer(string, match.start() + 1)
break
else:
break
# Opcodes involved in function calls with constant arguments. The differences between python versions are handled by
# variables below, which are then used to construct the _call_function_bytecode regex.
# NOTE1: the _OPCODES_* entries are typically used in (non-capturing) groups that match the opcode plus an arbitrary
# argument. But because the entries themselves may contain more than on opcode (with OR operator between them), they
# themselves need to be enclosed in another (non-capturing) group. E.g., "(?:(?:_OPCODES_FUNCTION_GLOBAL).)".
# NOTE2: _OPCODES_EXTENDED_ARG2 is an exception, as it is used as a list of opcodes to exclude, i.e.,
# "[^_OPCODES_EXTENDED_ARG2]". Therefore, multiple opcodes are not separated by the OR operator.
if not compat.is_py311:
# Python 3.7 introduced two new function-related opcodes, LOAD_METHOD and CALL_METHOD
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`"
_OPCODES_EXTENDED_ARG2 = _OPCODES_EXTENDED_ARG
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`|`LOAD_METHOD`"
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`CALL_FUNCTION`|`CALL_METHOD`|`CALL_FUNCTION_EX`"
def _cleanup_bytecode_string(bytecode):
return bytecode # Nothing to do here
elif not compat.is_py312:
# Python 3.11 removed CALL_FUNCTION and CALL_METHOD, and replaced them with PRECALL + CALL instruction sequence.
# As both PRECALL and CALL have the same parameter (the argument count), we need to match only up to the PRECALL.
# The CALL_FUNCTION_EX is still present.
# From Python 3.11b1 on, there is an EXTENDED_ARG_QUICK specialization opcode present.
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`|`EXTENDED_ARG_QUICK`"
_OPCODES_EXTENDED_ARG2 = rb"`EXTENDED_ARG``EXTENDED_ARG_QUICK`" # Special case; see note above the if/else block!
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`|`LOAD_METHOD`"
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`PRECALL`|`CALL_FUNCTION_EX`"
# Starting with python 3.11, the bytecode is peppered with CACHE instructions (which dis module conveniently hides
# unless show_caches=True is used). Dealing with these CACHE instructions in regex rules is going to render them
# unreadable, so instead we pre-process the bytecode and filter the offending opcodes out.
_cache_instruction_filter = bytecode_regex(rb"(`CACHE`.)|(..)")
def _cleanup_bytecode_string(bytecode):
return _cache_instruction_filter.sub(rb"\2", bytecode)
else:
# Python 3.12 merged EXTENDED_ARG_QUICK back in to EXTENDED_ARG, and LOAD_METHOD in to LOAD_ATTR
# PRECALL is no longer a valid key
_OPCODES_EXTENDED_ARG = rb"`EXTENDED_ARG`"
_OPCODES_EXTENDED_ARG2 = _OPCODES_EXTENDED_ARG
if compat.is_py314:
# Python 3.14.0a7 added LOAD_FAST_BORROW.
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`|`LOAD_FAST_BORROW`"
else:
_OPCODES_FUNCTION_GLOBAL = rb"`LOAD_NAME`|`LOAD_GLOBAL`|`LOAD_FAST`"
_OPCODES_FUNCTION_LOAD = rb"`LOAD_ATTR`"
if compat.is_py314:
# Python 3.14.0a2 split LOAD_CONST into LOAD_CONST, LOAD_IMMORTAL_CONST, and LOAD_SMALL_INT.
# https://github.com/python/cpython/commit/faa3272fb8d63d481a136cc0467a0cba6ed7b264
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`|`LOAD_SMALL_INT`|`LOAD_CONST_IMMORTAL`"
else:
_OPCODES_FUNCTION_ARGS = rb"`LOAD_CONST`"
_OPCODES_FUNCTION_CALL = rb"`CALL`|`CALL_FUNCTION_EX`"
# In Python 3.13, PUSH_NULL opcode is emitted after the LOAD_NAME (and after LOAD_ATTR opcode(s), if applicable).
# In python 3.11 and 3.12, it was emitted before the LOAD_NAME, and thus fell outside of our regex matching; now,
# we have to deal with it. But, instead of trying to add it to matching rules and adjusting the post-processing
# to deal with it, we opt to filter them out (at the same time as we filter out CACHE opcodes), and leave the rest
# of processing untouched.
if compat.is_py313:
_cache_instruction_filter = bytecode_regex(rb"(`CACHE`.)|(`PUSH_NULL`.)|(..)")
def _cleanup_bytecode_string(bytecode):
return _cache_instruction_filter.sub(rb"\3", bytecode)
else:
_cache_instruction_filter = bytecode_regex(rb"(`CACHE`.)|(..)")
def _cleanup_bytecode_string(bytecode):
return _cache_instruction_filter.sub(rb"\2", bytecode)
# language=PythonVerboseRegExp
_call_function_bytecode = bytecode_regex(
rb"""
# Matches `global_function('some', 'constant', 'arguments')`.
# Load the global function. In code with >256 of names, this may require extended name references.
(
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:(?:""" + _OPCODES_FUNCTION_GLOBAL + rb""").)
)
# For foo.bar.whizz(), the above is the 'foo', below is the 'bar.whizz' (one opcode per name component, each
# possibly preceded by name reference extension).
(
(?:
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_LOAD + rb""").
)*
)
# Load however many arguments it takes. These (for now) must all be constants.
# Again, code with >256 constants may need extended enumeration.
(
(?:
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_ARGS + rb""").
)*
)
# Call the function. If opcode is CALL_FUNCTION_EX, the parameter are flags. For other opcodes, the parameter
# is the argument count (which may be > 256).
(
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + _OPCODES_FUNCTION_CALL + rb""").
)
"""
)
# language=PythonVerboseRegExp
_extended_arg_bytecode = bytecode_regex(
rb"""(
# Arbitrary number of EXTENDED_ARG pairs.
(?:(?:""" + _OPCODES_EXTENDED_ARG + rb""").)*
# Followed by some other instruction (usually a LOAD).
[^""" + _OPCODES_EXTENDED_ARG2 + rb"""].
)"""
)
def extended_arguments(extended_args: bytes):
"""
Unpack the (extended) integer used to reference names or constants.
The input should be a bytecode snippet of the following form::
EXTENDED_ARG ? # Repeated 0-4 times.
LOAD_xxx ? # Any of LOAD_NAME/LOAD_CONST/LOAD_METHOD/...
Each ? byte combined together gives the number we want.
"""
return int.from_bytes(extended_args[1::2], "big")
def load(raw: bytes, code: CodeType) -> str:
"""
Parse an (extended) LOAD_xxx instruction.
"""
# Get the enumeration.
index = extended_arguments(raw)
# Work out what that enumeration was for (constant/local var/global var).
# If the last instruction byte is a LOAD_FAST:
if raw[-2] == opmap["LOAD_FAST"]:
# Then this is a local variable.
return code.co_varnames[index]
# Or if it is a LOAD_CONST:
if raw[-2] == opmap["LOAD_CONST"]:
# Then this is a literal.
return code.co_consts[index]
# Otherwise, it is a global name.
if compat.is_py311 and raw[-2] == opmap["LOAD_GLOBAL"]:
# In python 3.11, namei>>1 is pushed on stack...
return code.co_names[index >> 1]
if compat.is_py312 and raw[-2] == opmap["LOAD_ATTR"]:
# In python 3.12, namei>>1 is pushed on stack...
return code.co_names[index >> 1]
if compat.is_py314 and raw[-2] == opmap["LOAD_SMALL_INT"]:
# python 3.14 introduced LOAD_SMALL_INT, which pushes its argument (int value < 256) on the stack
return index
if compat.is_py314 and raw[-2] == opmap["LOAD_CONST_IMMORTAL"]:
# python 3.14 introduced LOAD_CONST_IMMORTAL, which pushes co_consts[consti] on the stack. This is intended to
# be a variant of LOAD_CONST for constants that are known to be immortal.
return code.co_consts[index]
if compat.is_py314 and raw[-2] == opmap["LOAD_FAST_BORROW"]:
# python 3.14 introduced LOAD_FAST_BORROW, which pushes a borrowed reference to the local co_varnames[var_num]
# onto the stack.
return code.co_varnames[index]
return code.co_names[index]
def loads(raw: bytes, code: CodeType) -> list:
"""
Parse multiple consecutive LOAD_xxx instructions. Or load() in a for loop.
May be used to unpack a function's parameters or nested attributes ``(foo.bar.pop.whack)``.
"""
return [load(i, code) for i in _extended_arg_bytecode.findall(raw)]
def function_calls(code: CodeType) -> list:
"""
Scan a code object for all function calls on constant arguments.
"""
match: re.Match
out = []
for match in finditer(_call_function_bytecode, code.co_code):
function_root, methods, args, function_call = match.groups()
# For foo():
# `function_root` contains 'foo' and `methods` is empty.
# For foo.bar.whizz():
# `function_root` contains 'foo' and `methods` contains the rest.
function_root = load(function_root, code)
methods = loads(methods, code)
function = ".".join([function_root] + methods)
args = loads(args, code)
if function_call[0] == opmap['CALL_FUNCTION_EX']:
flags = extended_arguments(function_call)
if flags != 0:
# Keyword arguments present. Unhandled at the moment.
continue
# In calls with const arguments, args contains a single
# tuple with all values.
if len(args) != 1 or not isinstance(args[0], tuple):
continue
args = list(args[0])
else:
arg_count = extended_arguments(function_call)
if arg_count != len(args):
# This happens if there are variable or keyword arguments. Bail out in either case.
continue
out.append((function, args))
return out
def search_recursively(search: callable, code: CodeType, _memo=None) -> dict:
"""
Apply a search function to a code object, recursing into child code objects (function definitions).
"""
if _memo is None:
_memo = {}
if code not in _memo:
_memo[code] = search(code)
for const in code.co_consts:
if isinstance(const, CodeType):
search_recursively(search, const, _memo)
return _memo
def recursive_function_calls(code: CodeType) -> dict:
"""
Scan a code object for function calls on constant arguments, recursing into function definitions and bodies of
comprehension loops.
"""
return search_recursively(function_calls, code)
def any_alias(full_name: str):
"""List possible aliases of a fully qualified Python name.
>>> list(any_alias("foo.bar.wizz"))
['foo.bar.wizz', 'bar.wizz', 'wizz']
This crudely allows us to capture uses of wizz() under any of
::
import foo
foo.bar.wizz()
::
from foo import bar
bar.wizz()
::
from foo.bar import wizz
wizz()
However, it will fail for any form of aliases and quite likely find false matches.
"""
parts = full_name.split('.')
while parts:
yield ".".join(parts)
parts = parts[1:]

View File

@@ -0,0 +1,378 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2013-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Manipulating with dynamic libraries.
"""
import os
import pathlib
import re
from PyInstaller import compat
import PyInstaller.log as logging
logger = logging.getLogger(__name__)
# Ignoring some system libraries speeds up packaging process
_excludes = {
# Ignore annoying warnings with Windows system DLLs.
#
# 'W: library kernel32.dll required via ctypes not found'
# 'W: library coredll.dll required via ctypes not found'
#
# These these dlls has to be ignored for all operating systems because they might be resolved when scanning code for
# ctypes dependencies.
r'advapi32\.dll',
r'ws2_32\.dll',
r'gdi32\.dll',
r'oleaut32\.dll',
r'shell32\.dll',
r'ole32\.dll',
r'coredll\.dll',
r'crypt32\.dll',
r'kernel32',
r'kernel32\.dll',
r'msvcrt\.dll',
r'rpcrt4\.dll',
r'user32\.dll',
# Some modules tries to import the Python library. e.g. pyreadline.console.console
r'python\%s\%s',
}
# Regex includes - overrides excludes. Include list is used only to override specific libraries from exclude list.
_includes = set()
_win_includes = {
# We need to allow collection of Visual Studio C++ (VC) runtime DLLs from system directories in order to avoid
# missing DLL errors when the frozen application is run on a system that does not have the corresponding VC
# runtime installed. The VC runtime DLLs may be dependencies of python shared library itself or of extension
# modules provided by 3rd party packages.
# Visual Studio 2010 (VC10) runtime
# http://msdn.microsoft.com/en-us/library/8kche8ah(v=vs.100).aspx
r'atl100\.dll',
r'msvcr100\.dll',
r'msvcp100\.dll',
r'mfc100\.dll',
r'mfc100u\.dll',
r'mfcmifc80\.dll',
r'mfcm100\.dll',
r'mfcm100u\.dll',
# Visual Studio 2012 (VC11) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2013/2012-redistribution-vs
#
# VC110.ATL
r'atl110\.dll',
# VC110.CRT
r'msvcp110\.dll',
r'msvcr110\.dll',
r'vccorlib110\.dll',
# VC110.CXXAMP
r'vcamp110\.dll',
# VC110.MFC
r'mfc110\.dll',
r'mfc110u\.dll',
r'mfcm110\.dll',
r'mfcm110u\.dll',
# VC110.MFCLOC
r'mfc110chs\.dll',
r'mfc110cht\.dll',
r'mfc110enu\.dll',
r'mfc110esn\.dll',
r'mfc110deu\.dll',
r'mfc110fra\.dll',
r'mfc110ita\.dll',
r'mfc110jpn\.dll',
r'mfc110kor\.dll',
r'mfc110rus\.dll',
# VC110.OpenMP
r'vcomp110\.dll',
# DIA SDK
r'msdia110\.dll',
# Visual Studio 2013 (VC12) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2013/2013-redistribution-vs
#
# VC120.CRT
r'msvcp120\.dll',
r'msvcr120\.dll',
r'vccorlib120\.dll',
# VC120.CXXAMP
r'vcamp120\.dll',
# VC120.MFC
r'mfc120\.dll',
r'mfc120u\.dll',
r'mfcm120\.dll',
r'mfcm120u\.dll',
# VC120.MFCLOC
r'mfc120chs\.dll',
r'mfc120cht\.dll',
r'mfc120deu\.dll',
r'mfc120enu\.dll',
r'mfc120esn\.dll',
r'mfc120fra\.dll',
r'mfc120ita\.dll',
r'mfc120jpn\.dll',
r'mfc120kor\.dll',
r'mfc120rus\.dll',
# VC120.OPENMP
r'vcomp120\.dll',
# DIA SDK
r'msdia120\.dll',
# Cpp REST Windows SDK
r'casablanca120.winrt\.dll',
# Mobile Services Cpp Client
r'zumosdk120.winrt\.dll',
# Cpp REST SDK
r'casablanca120\.dll',
# Universal C Runtime Library (since Visual Studio 2015)
#
# NOTE: these should be put under a switch, as they need not to be bundled if deployment target is Windows 10
# and later, as "UCRT is now a system component in Windows 10 and later, managed by Windows Update".
# (https://docs.microsoft.com/en-us/cpp/windows/determining-which-dlls-to-redistribute?view=msvc-170)
# And as discovered in #6326, Windows prefers system-installed version over the bundled one, anyway
# (see https://docs.microsoft.com/en-us/cpp/windows/universal-crt-deployment?view=msvc-170#local-deployment).
r'api-ms-win-core.*',
r'api-ms-win-crt.*',
r'ucrtbase\.dll',
# Visual Studio 2015/2017/2019/2022 (VC14) runtime
# https://docs.microsoft.com/en-us/visualstudio/releases/2022/redistribution
#
# VC141.CRT/VC142.CRT/VC143.CRT
r'concrt140\.dll',
r'msvcp140\.dll',
r'msvcp140_1\.dll',
r'msvcp140_2\.dll',
r'msvcp140_atomic_wait\.dll',
r'msvcp140_codecvt_ids\.dll',
r'vccorlib140\.dll',
r'vcruntime140\.dll',
r'vcruntime140_1\.dll',
# VC141.CXXAMP/VC142.CXXAMP/VC143.CXXAMP
r'vcamp140\.dll',
# VC141.OpenMP/VC142.OpenMP/VC143.OpenMP
r'vcomp140\.dll',
# DIA SDK
r'msdia140\.dll',
# Allow pythonNN.dll, pythoncomNN.dll, pywintypesNN.dll
r'py(?:thon(?:com(?:loader)?)?|wintypes)\d+\.dll',
}
_win_excludes = {
# On Windows, only .dll files can be loaded.
r'.*\.so',
r'.*\.dylib',
# MS assembly excludes
r'Microsoft\.Windows\.Common-Controls',
}
_unix_excludes = {
r'libc\.so(\..*)?',
r'libdl\.so(\..*)?',
r'libm\.so(\..*)?',
r'libpthread\.so(\..*)?',
r'librt\.so(\..*)?',
r'libthread_db\.so(\..*)?',
# glibc regex excludes.
r'ld-linux\.so(\..*)?',
r'libBrokenLocale\.so(\..*)?',
r'libanl\.so(\..*)?',
r'libcidn\.so(\..*)?',
r'libcrypt\.so(\..*)?',
r'libnsl\.so(\..*)?',
r'libnss_compat.*\.so(\..*)?',
r'libnss_dns.*\.so(\..*)?',
r'libnss_files.*\.so(\..*)?',
r'libnss_hesiod.*\.so(\..*)?',
r'libnss_nis.*\.so(\..*)?',
r'libnss_nisplus.*\.so(\..*)?',
r'libresolv\.so(\..*)?',
r'libutil\.so(\..*)?',
# graphical interface libraries come with graphical stack (see libglvnd)
r'libE?(Open)?GLX?(ESv1_CM|ESv2)?(dispatch)?\.so(\..*)?',
r'libdrm\.so(\..*)?',
# a subset of libraries included as part of the Nvidia Linux Graphics Driver as of 520.56.06:
# https://download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/installedcomponents.html
r'nvidia_drv\.so',
r'libglxserver_nvidia\.so(\..*)?',
r'libnvidia-egl-(gbm|wayland)\.so(\..*)?',
r'libnvidia-(cfg|compiler|e?glcore|glsi|glvkspirv|rtcore|allocator|tls|ml)\.so(\..*)?',
r'lib(EGL|GLX)_nvidia\.so(\..*)?',
# libcuda.so, libcuda.so.1, and libcuda.so.{version} are run-time part of NVIDIA driver, and should not be
# collected, as they need to match the rest of driver components on the target system.
r'libcuda\.so(\..*)?',
r'libcudadebugger\.so(\..*)?',
# libxcb-dri changes ABI frequently (e.g.: between Ubuntu LTS releases) and is usually installed as dependency of
# the graphics stack anyway. No need to bundle it.
r'libxcb\.so(\..*)?',
r'libxcb-dri.*\.so(\..*)?',
# system running a Wayland compositor should already have these libraries
# in versions that should not conflict with system drivers, unlike bundled
r'libwayland.*\.so(\..*)?',
}
_aix_excludes = {
r'libbz2\.a',
r'libc\.a',
r'libC\.a',
r'libcrypt\.a',
r'libdl\.a',
r'libintl\.a',
r'libpthreads\.a',
r'librt\\.a',
r'librtl\.a',
r'libz\.a',
}
_solaris_excludes = {
r'libsocket\.so(\..*)?',
}
_cygwin_excludes = {
r'cygwin1\.dll',
}
if compat.is_win:
_includes |= _win_includes
_excludes |= _win_excludes
elif compat.is_cygwin:
_excludes |= _cygwin_excludes
elif compat.is_aix:
# The exclude list for AIX differs from other *nix platforms.
_excludes |= _aix_excludes
elif compat.is_solar:
# The exclude list for Solaris differs from other *nix platforms.
_excludes |= _solaris_excludes
_excludes |= _unix_excludes
elif compat.is_unix:
# Common excludes for *nix platforms -- except AIX.
_excludes |= _unix_excludes
class MatchList:
def __init__(self, entries):
self._regex = re.compile('|'.join(entries), re.I) if entries else None
def check_library(self, libname):
if self._regex:
return self._regex.match(os.path.basename(libname))
return False
if compat.is_darwin:
import macholib.util
class MacExcludeList(MatchList):
def __init__(self, entries):
super().__init__(entries)
def check_library(self, libname):
# Try the global exclude list.
result = super().check_library(libname)
if result:
return result
# Exclude libraries in standard system locations.
return macholib.util.in_system_path(libname)
exclude_list = MacExcludeList(_excludes)
include_list = MatchList(_includes)
elif compat.is_win:
from PyInstaller.utils.win32 import winutils
class WinExcludeList(MatchList):
def __init__(self, entries):
super().__init__(entries)
self._windows_dir = pathlib.Path(winutils.get_windows_dir()).resolve()
# When running as SYSTEM user, the home directory is `%WINDIR%\system32\config\systemprofile`.
self._home_dir = pathlib.Path.home().resolve()
self._system_home = self._windows_dir in self._home_dir.parents
def check_library(self, libname):
# Try the global exclude list. The global exclude list contains lower-cased names, so lower-case the input
# for case-normalized comparison.
result = super().check_library(libname.lower())
if result:
return result
# Exclude everything from the Windows directory by default; but allow contents of user's gome directory if
# that happens to be rooted under Windows directory (e.g., when running PyInstaller as SYSTEM user).
lib_fullpath = pathlib.Path(libname).resolve()
exclude = self._windows_dir in lib_fullpath.parents
if exclude and self._system_home and self._home_dir in lib_fullpath.parents:
exclude = False
return exclude
exclude_list = WinExcludeList(_excludes)
include_list = MatchList(_includes)
else:
exclude_list = MatchList(_excludes)
include_list = MatchList(_includes)
_seen_wine_dlls = set() # Used for warning tracking in include_library()
def include_library(libname):
"""
Check if the dynamic library should be included with application or not.
"""
if exclude_list.check_library(libname) and not include_list.check_library(libname):
# Library is excluded and is not overridden by include list. It should be excluded.
return False
# If we are running under Wine and the library is a Wine built-in DLL, ensure that it is always excluded. Typically,
# excluding a DLL leads to an incomplete bundle and run-time errors when the said DLL is not installed on the target
# system. However, having Wine built-in DLLs collected is even more detrimental, as they usually provide Wine's
# implementation of low-level functionality, and therefore cannot be used on actual Windows (i.e., system libraries
# from the C:\Windows\system32 directory that might end up collected due to ``_win_includes`` list; a prominent
# example are VC runtime DLLs, for which Wine provides their own implementation, unless user explicitly installs
# Microsoft's VC redistributable package in their Wine environment). Therefore, excluding the Wine built-in DLLs
# actually improves the chances of the bundle running on Windows, or at least makes the issue easier to debug by
# turning it into the "standard" missing DLL problem. Exclusion should not affect the bundle's ability to run under
# Wine itself, as the excluded DLLs are available there.
if compat.is_win_wine and compat.is_wine_dll(libname):
# Display warning message only once per DLL. Note that it is also displayed only if the DLL were to be included
# in the first place.
if libname not in _seen_wine_dlls:
logger.warning("Excluding Wine built-in DLL: %s", libname)
_seen_wine_dlls.add(libname)
return False
return True
# Patterns for suppressing warnings about missing dynamically linked libraries
_warning_suppressions = []
# On some systems (e.g., openwrt), libc.so might point to ldd. Suppress warnings about it.
if compat.is_linux:
_warning_suppressions.append(r'ldd')
# Suppress warnings about unresolvable UCRT DLLs (see issue #1566) on Windows 10 and 11.
if compat.is_win_10 or compat.is_win_11:
_warning_suppressions.append(r'api-ms-win-.*\.dll')
missing_lib_warning_suppression_list = MatchList(_warning_suppressions)
def warn_missing_lib(libname):
"""
Check if a missing-library warning should be displayed for the given library name (or full path).
"""
return not missing_lib_warning_suppression_list.check_library(libname)

View File

@@ -0,0 +1,582 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Code related to processing of import hooks.
"""
import glob
import os.path
import sys
import weakref
import re
from PyInstaller import log as logging
from PyInstaller.building.utils import format_binaries_and_datas
from PyInstaller.compat import importlib_load_source
from PyInstaller.depend.imphookapi import PostGraphAPI
from PyInstaller.exceptions import ImportErrorWhenRunningHook
logger = logging.getLogger(__name__)
class ModuleHookCache(dict):
"""
Cache of lazily loadable hook script objects.
This cache is implemented as a `dict` subclass mapping from the fully-qualified names of all modules with at
least one hook script to lists of `ModuleHook` instances encapsulating these scripts. As a `dict` subclass,
all cached module names and hook scripts are accessible via standard dictionary operations.
Attributes
----------
module_graph : ModuleGraph
Current module graph.
_hook_module_name_prefix : str
String prefixing the names of all in-memory modules lazily loaded from cached hook scripts. See also the
`hook_module_name_prefix` parameter passed to the `ModuleHook.__init__()` method.
"""
_cache_id_next = 0
"""
0-based identifier unique to the next `ModuleHookCache` to be instantiated.
This identifier is incremented on each instantiation of a new `ModuleHookCache` to isolate in-memory modules of
lazily loaded hook scripts in that cache to the same cache-specific namespace, preventing edge-case collisions
with existing in-memory modules in other caches.
"""
def __init__(self, module_graph, hook_dirs):
"""
Cache all hook scripts in the passed directories.
**Order of caching is significant** with respect to hooks for the same module, as the values of this
dictionary are lists. Hooks for the same module will be run in the order in which they are cached. Previously
cached hooks are always preserved rather than overridden.
By default, official hooks are cached _before_ user-defined hooks. For modules with both official and
user-defined hooks, this implies that the former take priority over and hence will be loaded _before_ the
latter.
Parameters
----------
module_graph : ModuleGraph
Current module graph.
hook_dirs : list
List of the absolute or relative paths of all directories containing **hook scripts** (i.e.,
Python scripts with filenames matching `hook-{module_name}.py`, where `{module_name}` is the module
hooked by that script) to be cached.
"""
super().__init__()
# To avoid circular references and hence increased memory consumption, a weak rather than strong reference is
# stored to the passed graph. Since this graph is guaranteed to live longer than this cache,
# this is guaranteed to be safe.
self.module_graph = weakref.proxy(module_graph)
# String unique to this cache prefixing the names of all in-memory modules lazily loaded from cached hook
# scripts, privatized for safety.
self._hook_module_name_prefix = '__PyInstaller_hooks_{}_'.format(ModuleHookCache._cache_id_next)
ModuleHookCache._cache_id_next += 1
# Cache all hook scripts in the passed directories.
self._cache_hook_dirs(hook_dirs)
def _cache_hook_dirs(self, hook_dirs):
"""
Cache all hook scripts in the passed directories.
Parameters
----------
hook_dirs : list
List of the absolute or relative paths of all directories containing hook scripts to be cached.
"""
for hook_dir, default_priority in hook_dirs:
# Canonicalize this directory's path and validate its existence.
hook_dir = os.path.abspath(hook_dir)
if not os.path.isdir(hook_dir):
raise FileNotFoundError('Hook directory "{}" not found.'.format(hook_dir))
# For each hook script in this directory...
hook_filenames = glob.glob(os.path.join(hook_dir, 'hook-*.py'))
for hook_filename in hook_filenames:
# Fully-qualified name of this hook's corresponding module, constructed by removing the "hook-" prefix
# and ".py" suffix.
module_name = os.path.basename(hook_filename)[5:-3]
# Lazily loadable hook object.
module_hook = ModuleHook(
module_graph=self.module_graph,
module_name=module_name,
hook_filename=hook_filename,
hook_module_name_prefix=self._hook_module_name_prefix,
default_priority=default_priority,
)
# Add this hook to this module's list of hooks.
module_hooks = self.setdefault(module_name, [])
module_hooks.append(module_hook)
# Post-processing: we allow only one instance of hook per module. Currently, the priority order is defined
# implicitly, via order of hook directories, so the first hook in the list has the highest priority.
for module_name in self.keys():
hooks = self[module_name]
if len(hooks) == 1:
self[module_name] = hooks[0]
else:
# Order by priority value, in descending order.
sorted_hooks = sorted(hooks, key=lambda hook: hook.priority, reverse=True)
self[module_name] = sorted_hooks[0]
def remove_modules(self, *module_names):
"""
Remove the passed modules and all hook scripts cached for these modules from this cache.
Parameters
----------
module_names : list
List of all fully-qualified module names to be removed.
"""
for module_name in module_names:
# Unload this module's hook script modules from memory. Since these are top-level pure-Python modules cached
# only in the "sys.modules" dictionary, popping these modules from this dictionary suffices to garbage
# collect them.
module_hook = self.pop(module_name, None) # Remove our reference, if available.
if module_hook is not None:
sys.modules.pop(module_hook.hook_module_name, None)
def _module_collection_mode_sanitizer(value):
if isinstance(value, dict):
# Hook set a dictionary; use it as-is
return value
elif isinstance(value, str):
# Hook set a mode string; convert to a dictionary and assign the string to `None` (= the hooked module).
return {None: value}
raise ValueError(f"Invalid module collection mode setting value: {value!r}")
def _bindepend_symlink_suppression_sanitizer(value):
if isinstance(value, (list, set)):
# Hook set a list or a set; use it as-is
return set(value)
elif isinstance(value, str):
# Hook set a string; create a set with single element.
return set([value])
raise ValueError(f"Invalid value for bindepend_symlink_suppression: {value!r}")
# Dictionary mapping the names of magic attributes required by the "ModuleHook" class to 2-tuples "(default_type,
# sanitizer_func)", where:
#
# * "default_type" is the type to which that attribute will be initialized when that hook is lazily loaded.
# * "sanitizer_func" is the callable sanitizing the original value of that attribute defined by that hook into a
# safer value consumable by "ModuleHook" callers if any or "None" if the original value requires no sanitization.
#
# To avoid subtleties in the ModuleHook.__getattr__() method, this dictionary is declared as a module rather than a
# class attribute. If declared as a class attribute and then undefined (...for whatever reason), attempting to access
# this attribute from that method would produce infinite recursion.
_MAGIC_MODULE_HOOK_ATTRS = {
# Collections in which order is insignificant. This includes:
#
# * "datas", sanitized from hook-style 2-tuple lists defined by hooks into TOC-style 2-tuple sets consumable by
# "ModuleHook" callers.
# * "binaries", sanitized in the same way.
'datas': (set, format_binaries_and_datas),
'binaries': (set, format_binaries_and_datas),
'excludedimports': (set, None),
# Collections in which order is significant. This includes:
#
# * "hiddenimports", as order of importation is significant. On module importation, hook scripts are loaded and hook
# functions declared by these scripts are called. As these scripts and functions can have side effects dependent
# on module importation order, module importation itself can have side effects dependent on this order!
'hiddenimports': (list, None),
# Flags
'warn_on_missing_hiddenimports': (lambda: True, bool),
# Package/module collection mode dictionary.
'module_collection_mode': (dict, _module_collection_mode_sanitizer),
# Path patterns for suppression of symbolic links created by binary dependency analysis.
'bindepend_symlink_suppression': (set, _bindepend_symlink_suppression_sanitizer),
}
class ModuleHook:
"""
Cached object encapsulating a lazy loadable hook script.
This object exposes public attributes (e.g., `datas`) of the underlying hook script as attributes of the same
name of this object. On the first access of any such attribute, this hook script is lazily loaded into an
in-memory private module reused on subsequent accesses. These dynamic attributes are referred to as "magic." All
other static attributes of this object (e.g., `hook_module_name`) are referred to as "non-magic."
Attributes (Magic)
----------
datas : set
Set of `TOC`-style 2-tuples `(target_file, source_file)` for all external non-executable files required by
the module being hooked, converted from the `datas` list of hook-style 2-tuples `(source_dir_or_glob,
target_dir)` defined by this hook script.
binaries : set
Set of `TOC`-style 2-tuples `(target_file, source_file)` for all external executable files required by the
module being hooked, converted from the `binaries` list of hook-style 2-tuples `(source_dir_or_glob,
target_dir)` defined by this hook script.
excludedimports : set
Set of the fully-qualified names of all modules imported by the module being hooked to be ignored rather than
imported from that module, converted from the `excludedimports` list defined by this hook script. These
modules will only be "locally" rather than "globally" ignored. These modules will remain importable from all
modules other than the module being hooked.
hiddenimports : set
Set of the fully-qualified names of all modules imported by the module being hooked that are _not_
automatically detectable by PyInstaller (usually due to being dynamically imported in that module),
converted from the `hiddenimports` list defined by this hook script.
warn_on_missing_hiddenimports : bool
Boolean flag indicating whether missing hidden imports from the hook should generate warnings or not. This
behavior is enabled by default, but individual hooks can opt out of it.
module_collection_mode : dict
A dictionary of package/module names and their corresponding collection mode strings ('pyz', 'pyc', 'py',
'pyz+py', 'py+pyz').
bindepend_symlink_suppression : set
A set of paths or path patterns corresponding to shared libraries for which binary dependency analysis should
not create symbolic links into top-level application directory.
Attributes (Non-magic)
----------
module_graph : ModuleGraph
Current module graph.
module_name : str
Name of the module hooked by this hook script.
hook_filename : str
Absolute or relative path of this hook script.
hook_module_name : str
Name of the in-memory module of this hook script's interpreted contents.
_hook_module : module
In-memory module of this hook script's interpreted contents, lazily loaded on the first call to the
`_load_hook_module()` method _or_ `None` if this method has yet to be accessed.
_default_priority : int
Default (location-based) priority for this hook.
priority : int
Actual priority for this hook. Might be different from `_default_priority` if hook file specifies the hook
priority override.
"""
#-- Magic --
def __init__(self, module_graph, module_name, hook_filename, hook_module_name_prefix, default_priority):
"""
Initialize this metadata.
Parameters
----------
module_graph : ModuleGraph
Current module graph.
module_name : str
Name of the module hooked by this hook script.
hook_filename : str
Absolute or relative path of this hook script.
hook_module_name_prefix : str
String prefixing the name of the in-memory module for this hook script. To avoid namespace clashes with
similar modules created by other `ModuleHook` objects in other `ModuleHookCache` containers, this string
_must_ be unique to the `ModuleHookCache` container containing this `ModuleHook` object. If this string
is non-unique, an existing in-memory module will be erroneously reused when lazily loading this hook
script, thus erroneously resanitizing previously sanitized hook script attributes (e.g., `datas`) with
the `format_binaries_and_datas()` helper.
default_priority : int
Default, location-based priority for this hook. Used to select active hook when multiple hooks are defined
for the same module.
"""
# Note that the passed module graph is already a weak reference, avoiding circular reference issues. See
# ModuleHookCache.__init__(). TODO: Add a failure message
assert isinstance(module_graph, weakref.ProxyTypes)
self.module_graph = module_graph
self.module_name = module_name
self.hook_filename = hook_filename
# Default priority; used as fall-back for dynamic `hook_priority` attribute.
self._default_priority = default_priority
# Name of the in-memory module fabricated to refer to this hook script.
self.hook_module_name = hook_module_name_prefix + self.module_name.replace('.', '_')
# Attributes subsequently defined by the _load_hook_module() method.
self._loaded = False
self._has_hook_function = False
self._hook_module = None
def __getattr__(self, attr_name):
"""
Get the magic attribute with the passed name (e.g., `datas`) from this lazily loaded hook script if any _or_
raise `AttributeError` otherwise.
This special method is called only for attributes _not_ already defined by this object. This includes
undefined attributes and the first attempt to access magic attributes.
This special method is _not_ called for subsequent attempts to access magic attributes. The first attempt to
access magic attributes defines corresponding instance variables accessible via the `self.__dict__` instance
dictionary (e.g., as `self.datas`) without calling this method. This approach also allows magic attributes to
be deleted from this object _without_ defining the `__delattr__()` special method.
See Also
----------
Class docstring for supported magic attributes.
"""
if attr_name == 'priority':
# If attribute is part of hook metadata, read metadata from hook script and return the attribute value.
self._load_hook_metadata()
return getattr(self, attr_name)
if attr_name in _MAGIC_MODULE_HOOK_ATTRS and not self._loaded:
# If attribute is hook's magic attribute, load and run the hook script, and return the attribute value.
self._load_hook_module()
return getattr(self, attr_name)
else:
# This is an undefined attribute. Raise an exception.
raise AttributeError(attr_name)
def __setattr__(self, attr_name, attr_value):
"""
Set the attribute with the passed name to the passed value.
If this is a magic attribute, this hook script will be lazily loaded before setting this attribute. Unlike
`__getattr__()`, this special method is called to set _any_ attribute -- including magic, non-magic,
and undefined attributes.
See Also
----------
Class docstring for supported magic attributes.
"""
# If this is a magic attribute, initialize this attribute by lazy loading this hook script before overwriting
# this attribute.
if attr_name in _MAGIC_MODULE_HOOK_ATTRS:
self._load_hook_module()
# Set this attribute to the passed value. To avoid recursion, the superclass method rather than setattr() is
# called.
return super().__setattr__(attr_name, attr_value)
#-- Loading --
def _load_hook_metadata(self):
"""
Load hook metadata from its source file.
"""
self.priority = self._default_priority
# Priority override pattern: `# $PyInstaller-Hook-Priority: <value>`
priority_pattern = re.compile(r"^\s*#\s*\$PyInstaller-Hook-Priority:\s*(?P<value>[\S]+)")
with open(self.hook_filename, "r", encoding="utf-8") as f:
for line in f:
# Attempt to match and parse hook priority directive
m = priority_pattern.match(line)
if m is not None:
try:
self.priority = int(m.group('value'))
except Exception:
logger.warning(
"Failed to parse hook priority value string: %r!", m.group('value'), exc_info=True
)
# Currently, this is our only line of interest, so we can stop the search here.
return
def _load_hook_module(self, keep_module_ref=False):
"""
Lazily load this hook script into an in-memory private module.
This method (and, indeed, this class) preserves all attributes and functions defined by this hook script as
is, ensuring sane behaviour in hook functions _not_ expecting unplanned external modification. Instead,
this method copies public attributes defined by this hook script (e.g., `binaries`) into private attributes
of this object, which the special `__getattr__()` and `__setattr__()` methods safely expose to external
callers. For public attributes _not_ defined by this hook script, the corresponding private attributes will
be assigned sane defaults. For some public attributes defined by this hook script, the corresponding private
attributes will be transformed into objects more readily and safely consumed elsewhere by external callers.
See Also
----------
Class docstring for supported attributes.
"""
# If this hook script module has already been loaded, noop.
if self._loaded and (self._hook_module is not None or not keep_module_ref):
return
# Load and execute the hook script. Even if mechanisms from the import machinery are used, this does not import
# the hook as the module.
hook_path, hook_basename = os.path.split(self.hook_filename)
logger.info('Processing standard module hook %r from %r', hook_basename, hook_path)
try:
self._hook_module = importlib_load_source(self.hook_module_name, self.hook_filename)
except ImportError:
logger.debug("Hook failed with:", exc_info=True)
raise ImportErrorWhenRunningHook(self.hook_module_name, self.hook_filename)
# Mark as loaded
self._loaded = True
# Check if module has hook() function.
self._has_hook_function = hasattr(self._hook_module, 'hook')
# Copy hook script attributes into magic attributes exposed as instance variables of the current "ModuleHook"
# instance.
for attr_name, (default_type, sanitizer_func) in _MAGIC_MODULE_HOOK_ATTRS.items():
# Unsanitized value of this attribute.
attr_value = getattr(self._hook_module, attr_name, None)
# If this attribute is undefined, expose a sane default instead.
if attr_value is None:
attr_value = default_type()
# Else if this attribute requires sanitization, do so.
elif sanitizer_func is not None:
attr_value = sanitizer_func(attr_value)
# Else, expose the unsanitized value of this attribute.
# Expose this attribute as an instance variable of the same name.
setattr(self, attr_name, attr_value)
# If module_collection_mode has an entry with None key, reassign it to the hooked module's name.
setattr(
self, 'module_collection_mode', {
key if key is not None else self.module_name: value
for key, value in getattr(self, 'module_collection_mode').items()
}
)
# Release the module if we do not need the reference. This is the case when hook is loaded during the analysis
# rather as part of the post-graph operations.
if not keep_module_ref:
self._hook_module = None
#-- Hooks --
def post_graph(self, analysis):
"""
Call the **post-graph hook** (i.e., `hook()` function) defined by this hook script, if any.
Parameters
----------
analysis: build_main.Analysis
Analysis that calls the hook
This method is intended to be called _after_ the module graph for this application is constructed.
"""
# Lazily load this hook script into an in-memory module.
# The script might have been loaded before during modulegraph analysis; in that case, it needs to be reloaded
# only if it provides a hook() function.
if not self._loaded or self._has_hook_function:
# Keep module reference when loading the hook, so we can call its hook function!
self._load_hook_module(keep_module_ref=True)
# Call this hook script's hook() function, which modifies attributes accessed by subsequent methods and
# hence must be called first.
self._process_hook_func(analysis)
# Order is insignificant here.
self._process_hidden_imports()
def _process_hook_func(self, analysis):
"""
Call this hook's `hook()` function if defined.
Parameters
----------
analysis: build_main.Analysis
Analysis that calls the hook
"""
# If this hook script defines no hook() function, noop.
if not hasattr(self._hook_module, 'hook'):
return
# Call this hook() function.
hook_api = PostGraphAPI(module_name=self.module_name, module_graph=self.module_graph, analysis=analysis)
try:
self._hook_module.hook(hook_api)
except ImportError:
logger.debug("Hook failed with:", exc_info=True)
raise ImportErrorWhenRunningHook(self.hook_module_name, self.hook_filename)
# Update all magic attributes modified by the prior call.
self.datas.update(set(hook_api._added_datas))
self.binaries.update(set(hook_api._added_binaries))
self.hiddenimports.extend(hook_api._added_imports)
self.module_collection_mode.update(hook_api._module_collection_mode)
self.bindepend_symlink_suppression.update(hook_api._bindepend_symlink_suppression)
# FIXME: `hook_api._deleted_imports` should be appended to `self.excludedimports` and used to suppress module
# import during the modulegraph construction rather than handled here. However, for that to work, the `hook()`
# function needs to be ran during modulegraph construction instead of in post-processing (and this in turn
# requires additional code refactoring in order to be able to pass `analysis` to `PostGraphAPI` object at
# that point). So once the modulegraph rewrite is complete, remove the code block below.
for deleted_module_name in hook_api._deleted_imports:
# Remove the graph link between the hooked module and item. This removes the 'item' node from the graph if
# no other links go to it (no other modules import it)
self.module_graph.removeReference(hook_api.node, deleted_module_name)
def _process_hidden_imports(self):
"""
Add all imports listed in this hook script's `hiddenimports` attribute to the module graph as if directly
imported by this hooked module.
These imports are typically _not_ implicitly detectable by PyInstaller and hence must be explicitly defined
by hook scripts.
"""
# For each hidden import required by the module being hooked...
for import_module_name in self.hiddenimports:
try:
# Graph node for this module. Do not implicitly create namespace packages for non-existent packages.
caller = self.module_graph.find_node(self.module_name, create_nspkg=False)
# Manually import this hidden import from this module.
self.module_graph.import_hook(import_module_name, caller)
# If this hidden import is unimportable, print a non-fatal warning. Hidden imports often become
# desynchronized from upstream packages and hence are only "soft" recommendations.
except ImportError:
if self.warn_on_missing_hiddenimports:
logger.warning('Hidden import "%s" not found!', import_module_name)
class AdditionalFilesCache:
"""
Cache for storing what binaries and datas were pushed by what modules when import hooks were processed.
"""
def __init__(self):
self._binaries = {}
self._datas = {}
def add(self, modname, binaries, datas):
self._binaries.setdefault(modname, [])
self._binaries[modname].extend(binaries or [])
self._datas.setdefault(modname, [])
self._datas[modname].extend(datas or [])
def __contains__(self, name):
return name in self._binaries or name in self._datas
def binaries(self, modname):
"""
Return list of binaries for given module name.
"""
return self._binaries.get(modname, [])
def datas(self, modname):
"""
Return list of datas for given module name.
"""
return self._datas.get(modname, [])

View File

@@ -0,0 +1,486 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Classes facilitating communication between PyInstaller and import hooks.
PyInstaller passes instances of classes defined by this module to corresponding functions defined by external import
hooks, which commonly modify the contents of these instances before returning. PyInstaller then detects and converts
these modifications into appropriate operations on the current `PyiModuleGraph` instance, thus modifying which
modules will be frozen into the executable.
"""
from PyInstaller.building.utils import format_binaries_and_datas
from PyInstaller.lib.modulegraph.modulegraph import (RuntimeModule, RuntimePackage)
class PreSafeImportModuleAPI:
"""
Metadata communicating changes made by the current **pre-safe import module hook** (i.e., hook run immediately
_before_ a call to `ModuleGraph._safe_import_module()` recursively adding the hooked module, package,
or C extension and all transitive imports thereof to the module graph) back to PyInstaller.
Pre-safe import module hooks _must_ define a `pre_safe_import_module()` function accepting an instance of this
class, whose attributes describe the subsequent `ModuleGraph._safe_import_module()` call creating the hooked
module's graph node.
Each pre-safe import module hook is run _only_ on the first attempt to create the hooked module's graph node and
then subsequently ignored. If this hook successfully creates that graph node, the subsequent
`ModuleGraph._safe_import_module()` call will observe this fact and silently return without attempting to
recreate that graph node.
Pre-safe import module hooks are typically used to create graph nodes for **runtime modules** (i.e.,
modules dynamically defined at runtime). Most modules are physically defined in external `.py`-suffixed scripts.
Some modules, however, are dynamically defined at runtime (e.g., `six.moves`, dynamically defined by the
physically defined `six.py` module). However, `ModuleGraph` only parses `import` statements residing in external
scripts. `ModuleGraph` is _not_ a full-fledged, Turing-complete Python interpreter and hence has no means of
parsing `import` statements performed by runtime modules existing only in-memory.
'With great power comes great responsibility.'
Attributes (Immutable)
----------------------------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
module_graph : PyiModuleGraph
Current module graph.
parent_package : Package
Graph node for the package providing this module _or_ `None` if this module is a top-level module.
Attributes (Mutable)
-----------------------------
The following attributes are editable.
module_basename : str
Unqualified name of the module to be imported (e.g., `text`).
module_name : str
Fully-qualified name of this module (e.g., `email.mime.text`).
"""
def __init__(self, module_graph, module_basename, module_name, parent_package):
self._module_graph = module_graph
self.module_basename = module_basename
self.module_name = module_name
self._parent_package = parent_package
# Immutable properties. No corresponding setters are defined.
@property
def module_graph(self):
"""
Current module graph.
"""
return self._module_graph
@property
def parent_package(self):
"""
Parent Package of this node.
"""
return self._parent_package
def add_runtime_module(self, module_name):
"""
Add a graph node representing a non-package Python module with the passed name dynamically defined at runtime.
Most modules are statically defined on-disk as standard Python files. Some modules, however, are dynamically
defined in-memory at runtime (e.g., `gi.repository.Gst`, dynamically defined by the statically defined
`gi.repository.__init__` module).
This method adds a graph node representing such a runtime module. Since this module is _not_ a package,
all attempts to import submodules from this module in `from`-style import statements (e.g., the `queue`
submodule in `from six.moves import queue`) will be silently ignored. To circumvent this, simply call
`add_runtime_package()` instead.
Parameters
----------
module_name : str
Fully-qualified name of this module (e.g., `gi.repository.Gst`).
Examples
----------
This method is typically called by `pre_safe_import_module()` hooks, e.g.:
def pre_safe_import_module(api):
api.add_runtime_module(api.module_name)
"""
self._module_graph.add_module(RuntimeModule(module_name))
def add_runtime_package(self, package_name):
"""
Add a graph node representing a non-namespace Python package with the passed name dynamically defined at
runtime.
Most packages are statically defined on-disk as standard subdirectories containing `__init__.py` files. Some
packages, however, are dynamically defined in-memory at runtime (e.g., `six.moves`, dynamically defined by
the statically defined `six` module).
This method adds a graph node representing such a runtime package. All attributes imported from this package
in `from`-style import statements that are submodules of this package (e.g., the `queue` submodule in `from
six.moves import queue`) will be imported rather than ignored.
Parameters
----------
package_name : str
Fully-qualified name of this package (e.g., `six.moves`).
Examples
----------
This method is typically called by `pre_safe_import_module()` hooks, e.g.:
def pre_safe_import_module(api):
api.add_runtime_package(api.module_name)
"""
self._module_graph.add_module(RuntimePackage(package_name))
def add_alias_module(self, real_module_name, alias_module_name):
"""
Alias the source module to the target module with the passed names.
This method ensures that the next call to findNode() given the target module name will resolve this alias.
This includes importing and adding a graph node for the source module if needed as well as adding a reference
from the target to the source module.
Parameters
----------
real_module_name : str
Fully-qualified name of the **existing module** (i.e., the module being aliased).
alias_module_name : str
Fully-qualified name of the **non-existent module** (i.e., the alias to be created).
"""
self._module_graph.alias_module(real_module_name, alias_module_name)
def append_package_path(self, directory):
"""
Modulegraph does a good job at simulating Python's, but it cannot handle packagepath `__path__` modifications
packages make at runtime.
Therefore there is a mechanism whereby you can register extra paths in this map for a package, and it will be
honored.
Parameters
----------
directory : str
Absolute or relative path of the directory to be appended to this package's `__path__` attribute.
"""
self._module_graph.append_package_path(self.module_name, directory)
class PreFindModulePathAPI:
"""
Metadata communicating changes made by the current **pre-find module path hook** (i.e., hook run immediately
_before_ a call to `ModuleGraph._find_module_path()` finding the hooked module's absolute path) back to PyInstaller.
Pre-find module path hooks _must_ define a `pre_find_module_path()` function accepting an instance of this class,
whose attributes describe the subsequent `ModuleGraph._find_module_path()` call to be performed.
Pre-find module path hooks are typically used to change the absolute path from which a module will be
subsequently imported and thus frozen into the executable. To do so, hooks may overwrite the default
`search_dirs` list of the absolute paths of all directories to be searched for that module: e.g.,
def pre_find_module_path(api):
api.search_dirs = ['/the/one/true/package/providing/this/module']
Each pre-find module path hook is run _only_ on the first call to `ModuleGraph._find_module_path()` for the
corresponding module.
Attributes
----------
The following attributes are **mutable** (i.e., modifiable). All changes to these attributes will be immediately
respected by PyInstaller:
search_dirs : list
List of the absolute paths of all directories to be searched for this module (in order). Searching will halt
at the first directory containing this module.
Attributes (Immutable)
----------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
module_name : str
Fully-qualified name of this module.
module_graph : PyiModuleGraph
Current module graph. For efficiency, this attribute is technically mutable. To preserve graph integrity,
this attribute should nonetheless _never_ be modified. While read-only `PyiModuleGraph` methods (e.g.,
`findNode()`) are safely callable from within pre-find module path hooks, methods modifying the graph are
_not_. If graph modifications are required, consider an alternative type of hook (e.g., pre-import module
hooks).
"""
def __init__(
self,
module_graph,
module_name,
search_dirs,
):
# Mutable attributes.
self.search_dirs = search_dirs
# Immutable attributes.
self._module_graph = module_graph
self._module_name = module_name
# Immutable properties. No corresponding setters are defined.
@property
def module_graph(self):
"""
Current module graph.
"""
return self._module_graph
@property
def module_name(self):
"""
Fully-qualified name of this module.
"""
return self._module_name
class PostGraphAPI:
"""
Metadata communicating changes made by the current **post-graph hook** (i.e., hook run for a specific module
transitively imported by the current application _after_ the module graph of all `import` statements performed by
this application has been constructed) back to PyInstaller.
Post-graph hooks may optionally define a `post_graph()` function accepting an instance of this class,
whose attributes describe the current state of the module graph and the hooked module's graph node.
Attributes (Mutable)
----------
The following attributes are **mutable** (i.e., modifiable). All changes to these attributes will be immediately
respected by PyInstaller:
module_graph : PyiModuleGraph
Current module graph.
module : Node
Graph node for the currently hooked module.
'With great power comes great responsibility.'
Attributes (Immutable)
----------
The following attributes are **immutable** (i.e., read-only). For safety, any attempts to change these attributes
_will_ result in a raised exception:
__name__ : str
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
__file__ : str
Absolute path of this module. If this module is:
* A standard (rather than namespace) package, this is the absolute path of this package's directory.
* A namespace (rather than standard) package, this is the abstract placeholder `-`. (Don't ask. Don't tell.)
* A non-package module or C extension, this is the absolute path of the corresponding file.
__path__ : list
List of the absolute paths of all directories comprising this package if this module is a package _or_ `None`
otherwise. If this module is a standard (rather than namespace) package, this list contains only the absolute
path of this package's directory.
co : code
Code object compiled from the contents of `__file__` (e.g., via the `compile()` builtin).
analysis: build_main.Analysis
The Analysis that load the hook.
Attributes (Private)
----------
The following attributes are technically mutable but private, and hence should _never_ be externally accessed or
modified by hooks. Call the corresponding public methods instead:
_added_datas : list
List of the `(name, path)` 2-tuples or TOC objects of all external data files required by the current hook,
defaulting to the empty list. This is equivalent to the global `datas` hook attribute.
_added_imports : list
List of the fully-qualified names of all modules imported by the current hook, defaulting to the empty list.
This is equivalent to the global `hiddenimports` hook attribute.
_added_binaries : list
List of the `(name, path)` 2-tuples or TOC objects of all external C extensions imported by the current hook,
defaulting to the empty list. This is equivalent to the global `binaries` hook attribute.
_module_collection_mode : dict
Dictionary of package/module names and their corresponding collection mode strings. This is equivalent to the
global `module_collection_mode` hook attribute.
_bindepend_symlink_suppression : set
A set of paths or path patterns corresponding to shared libraries for which binary dependency analysis should
not generate symbolic links into top-level application directory.
"""
def __init__(self, module_name, module_graph, analysis):
# Mutable attributes.
self.module_graph = module_graph
self.module = module_graph.find_node(module_name)
assert self.module is not None # should not occur
# Immutable attributes.
self.___name__ = module_name
self.___file__ = self.module.filename
self._co = self.module.code
self._analysis = analysis
# To enforce immutability, convert this module's package path if any into an immutable tuple.
self.___path__ = tuple(self.module.packagepath) \
if self.module.packagepath is not None else None
#FIXME: Refactor "_added_datas", "_added_binaries", and "_deleted_imports" into sets. Since order of
#import is important, "_added_imports" must remain a list.
# Private attributes.
self._added_binaries = []
self._added_datas = []
self._added_imports = []
self._deleted_imports = []
self._module_collection_mode = {}
self._bindepend_symlink_suppression = set()
# Immutable properties. No corresponding setters are defined.
@property
def __file__(self):
"""
Absolute path of this module's file.
"""
return self.___file__
@property
def __path__(self):
"""
List of the absolute paths of all directories comprising this package if this module is a package _or_ `None`
otherwise. If this module is a standard (rather than namespace) package, this list contains only the absolute
path of this package's directory.
"""
return self.___path__
@property
def __name__(self):
"""
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
"""
return self.___name__
@property
def co(self):
"""
Code object compiled from the contents of `__file__` (e.g., via the `compile()` builtin).
"""
return self._co
@property
def analysis(self):
"""
build_main.Analysis that calls the hook.
"""
return self._analysis
# Obsolete immutable properties provided to preserve backward compatibility.
@property
def name(self):
"""
Fully-qualified name of this module (e.g., `six.moves.tkinter`).
**This property has been deprecated by the `__name__` property.**
"""
return self.___name__
@property
def graph(self):
"""
Current module graph.
**This property has been deprecated by the `module_graph` property.**
"""
return self.module_graph
@property
def node(self):
"""
Graph node for the currently hooked module.
**This property has been deprecated by the `module` property.**
"""
return self.module
# TODO: This incorrectly returns the list of the graph nodes of all modules *TRANSITIVELY* (rather than directly)
# imported by this module. Unfortunately, this implies that most uses of this property are currently broken
# (e.g., "hook-PIL.SpiderImagePlugin.py"). We only require this for the aforementioned hook, so contemplate
# alternative approaches.
@property
def imports(self):
"""
List of the graph nodes of all modules directly imported by this module.
"""
return self.module_graph.iter_graph(start=self.module)
def add_imports(self, *module_names):
"""
Add all Python modules whose fully-qualified names are in the passed list as "hidden imports" upon which the
current module depends.
This is equivalent to appending such names to the hook-specific `hiddenimports` attribute.
"""
# Append such names to the current list of all such names.
self._added_imports.extend(module_names)
def del_imports(self, *module_names):
"""
Remove the named fully-qualified modules from the set of imports (either hidden or visible) upon which the
current module depends.
This is equivalent to appending such names to the hook-specific `excludedimports` attribute.
"""
self._deleted_imports.extend(module_names)
def add_binaries(self, binaries):
"""
Add all external dynamic libraries in the passed list of `(src_name, dest_name)` 2-tuples as dependencies of the
current module. This is equivalent to adding to the global `binaries` hook attribute.
For convenience, the `binaries` may also be a list of TOC-style 3-tuples `(dest_name, src_name, typecode)`.
"""
# Detect TOC 3-tuple list by checking the length of the first entry
if binaries and len(binaries[0]) == 3:
self._added_binaries.extend(entry[:2] for entry in binaries)
else:
# NOTE: `format_binaries_and_datas` changes tuples from input format `(src_name, dest_name)` to output
# format `(dest_name, src_name)`.
self._added_binaries.extend(format_binaries_and_datas(binaries))
def add_datas(self, datas):
"""
Add all external data files in the passed list of `(src_name, dest_name)` 2-tuples as dependencies of the
current module. This is equivalent to adding to the global `datas` hook attribute.
For convenience, the `datas` may also be a list of TOC-style 3-tuples `(dest_name, src_name, typecode)`.
"""
# Detect TOC 3-tuple list by checking the length of the first entry
if datas and len(datas[0]) == 3:
self._added_datas.extend(entry[:2] for entry in datas)
else:
# NOTE: `format_binaries_and_datas` changes tuples from input format `(src_name, dest_name)` to output
# format `(dest_name, src_name)`.
self._added_datas.extend(format_binaries_and_datas(datas))
def set_module_collection_mode(self, name, mode):
""""
Set the package/module collection mode for the specified module name. If `name` is `None`, the hooked
module/package name is used. `mode` can be one of valid mode strings (`'pyz'`, `'pyc'`, `'py'`, `'pyz+py'`,
`'py+pyz'`) or `None`, which clears the setting for the module/package - but only within this hook's context!
"""
if name is None:
name = self.__name__
if mode is None:
self._module_collection_mode.pop(name)
else:
self._module_collection_mode[name] = mode
def add_bindepend_symlink_suppression_pattern(self, pattern):
"""
Add the given path or path pattern to the set of patterns that prevent binary dependency analysis from creating
a symbolic link to the top-level application directory.
"""
self._bindepend_symlink_suppression.add(pattern)

View File

@@ -0,0 +1,344 @@
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2023, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
"""
Utility functions related to analyzing/bundling dependencies.
"""
import ctypes.util
import os
import re
import shutil
from types import CodeType
from PyInstaller import compat
from PyInstaller import log as logging
from PyInstaller.depend import bytecode
from PyInstaller.depend.dylib import include_library
from PyInstaller.exceptions import ExecCommandFailed
logger = logging.getLogger(__name__)
def scan_code_for_ctypes(co):
binaries = __recursively_scan_code_objects_for_ctypes(co)
# If any of the libraries has been requested with anything else than the basename, drop that entry and warn the
# user - PyInstaller would need to patch the compiled pyc file to make it work correctly!
binaries = set(binaries)
for binary in list(binaries):
# 'binary' might be in some cases None. Some Python modules (e.g., PyObjC.objc._bridgesupport) might contain
# code like this:
# dll = ctypes.CDLL(None)
if not binary:
# None values have to be removed too.
binaries.remove(binary)
elif binary != os.path.basename(binary):
# TODO make these warnings show up somewhere.
try:
filename = co.co_filename
except Exception:
filename = 'UNKNOWN'
logger.warning(
"Ignoring %s imported from %s - only basenames are supported with ctypes imports!", binary, filename
)
binaries.remove(binary)
binaries = _resolveCtypesImports(binaries)
return binaries
def __recursively_scan_code_objects_for_ctypes(code: CodeType):
"""
Detects ctypes dependencies, using reasonable heuristics that should cover most common ctypes usages; returns a
list containing names of binaries detected as dependencies.
"""
from PyInstaller.depend.bytecode import any_alias, search_recursively
binaries = []
ctypes_dll_names = {
*any_alias("ctypes.CDLL"),
*any_alias("ctypes.cdll.LoadLibrary"),
*any_alias("ctypes.WinDLL"),
*any_alias("ctypes.windll.LoadLibrary"),
*any_alias("ctypes.OleDLL"),
*any_alias("ctypes.oledll.LoadLibrary"),
*any_alias("ctypes.PyDLL"),
*any_alias("ctypes.pydll.LoadLibrary"),
}
find_library_names = {
*any_alias("ctypes.util.find_library"),
}
for calls in bytecode.recursive_function_calls(code).values():
for (name, args) in calls:
if not len(args) == 1 or not isinstance(args[0], str):
continue
if name in ctypes_dll_names:
# ctypes.*DLL() or ctypes.*dll.LoadLibrary()
binaries.append(*args)
elif name in find_library_names:
# ctypes.util.find_library() needs to be handled separately, because we need to resolve the library base
# name given as the argument (without prefix and suffix, e.g. 'gs') into corresponding full name (e.g.,
# 'libgs.so.9').
libname = args[0]
if libname:
try: # this try was inserted due to the ctypes bug https://github.com/python/cpython/issues/93094
libname = ctypes.util.find_library(libname)
except FileNotFoundError:
libname = None
logger.warning(
'ctypes.util.find_library raised a FileNotFoundError. '
'Supressing and assuming no lib with the name "%s" was found.', args[0]
)
if libname:
# On Windows, `find_library` may return a full pathname. See issue #1934.
libname = os.path.basename(libname)
binaries.append(libname)
# The above handles any flavour of function/class call. We still need to capture the (albeit rarely used) case of
# loading libraries with ctypes.cdll's getattr.
for i in search_recursively(_scan_code_for_ctypes_getattr, code).values():
binaries.extend(i)
return binaries
_ctypes_getattr_regex = bytecode.bytecode_regex(
rb"""
# Matches 'foo.bar' or 'foo.bar.whizz'.
# Load the 'foo'.
(
(?:(?:""" + bytecode._OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + bytecode._OPCODES_FUNCTION_GLOBAL + rb""").
)
# Load the 'bar.whizz' (one opcode per name component, each possibly preceded by name reference extension).
(
(?:
(?:(?:""" + bytecode._OPCODES_EXTENDED_ARG + rb""").)*
(?:""" + bytecode._OPCODES_FUNCTION_LOAD + rb""").
)+
)
"""
)
def _scan_code_for_ctypes_getattr(code: CodeType):
"""
Detect uses of ``ctypes.cdll.library_name``, which implies that ``library_name.dll`` should be collected.
"""
key_names = ("cdll", "oledll", "pydll", "windll")
for match in bytecode.finditer(_ctypes_getattr_regex, code.co_code):
name, attrs = match.groups()
name = bytecode.load(name, code)
attrs = bytecode.loads(attrs, code)
if attrs and attrs[-1] == "LoadLibrary":
continue
# Capture `from ctypes import ole; ole.dll_name`.
if len(attrs) == 1:
if name in key_names:
yield attrs[0] + ".dll"
# Capture `import ctypes; ctypes.ole.dll_name`.
if len(attrs) == 2:
if name == "ctypes" and attrs[0] in key_names:
yield attrs[1] + ".dll"
# TODO: reuse this code with modulegraph implementation.
def _resolveCtypesImports(cbinaries):
"""
Completes ctypes BINARY entries for modules with their full path.
Input is a list of c-binary-names (as found by `scan_code_instruction_for_ctypes`). Output is a list of tuples
ready to be appended to the ``binaries`` of a modules.
This function temporarily extents PATH, LD_LIBRARY_PATH or DYLD_LIBRARY_PATH (depending on the platform) by
CONF['pathex'] so shared libs will be search there, too.
Example:
>>> _resolveCtypesImports(['libgs.so'])
[(libgs.so', ''/usr/lib/libgs.so', 'BINARY')]
"""
from ctypes.util import find_library
from PyInstaller.config import CONF
if compat.is_unix:
envvar = "LD_LIBRARY_PATH"
elif compat.is_darwin:
envvar = "DYLD_LIBRARY_PATH"
else:
envvar = "PATH"
def _setPaths():
path = os.pathsep.join(CONF['pathex'])
old = compat.getenv(envvar)
if old is not None:
path = os.pathsep.join((path, old))
compat.setenv(envvar, path)
return old
def _restorePaths(old):
if old is None:
compat.unsetenv(envvar)
else:
compat.setenv(envvar, old)
ret = []
# Try to locate the shared library on the disk. This is done by calling ctypes.util.find_library with
# ImportTracker's local paths temporarily prepended to the library search paths (and restored after the call).
old = _setPaths()
for cbin in cbinaries:
try:
# There is an issue with find_library() where it can run into errors trying to locate the library. See
# #5734.
cpath = find_library(os.path.splitext(cbin)[0])
except FileNotFoundError:
# In these cases, find_library() should return None.
cpath = None
if compat.is_unix or compat.is_cygwin:
# CAVEAT: find_library() is not the correct function. ctype's documentation says that it is meant to resolve
# only the filename (as a *compiler* does) not the full path. Anyway, it works well enough on Windows and
# macOS. On Linux, we need to implement more code to find out the full path.
if cpath is None:
cpath = cbin
# "man ld.so" says that we should first search LD_LIBRARY_PATH and then the ldcache.
for d in compat.getenv(envvar, '').split(os.pathsep):
if os.path.isfile(os.path.join(d, cpath)):
cpath = os.path.join(d, cpath)
break
else:
if LDCONFIG_CACHE is None:
load_ldconfig_cache()
if cpath in LDCONFIG_CACHE:
cpath = LDCONFIG_CACHE[cpath]
assert os.path.isfile(cpath)
else:
cpath = None
if cpath is None:
# Skip warning message if cbin (basename of library) is ignored. This prevents messages like:
# 'W: library kernel32.dll required via ctypes not found'
if not include_library(cbin):
continue
# On non-Windows, automatically ignore all ctypes-based referenes to DLL files. This complements the above
# check, which might not match potential case variations (e.g., `KERNEL32.dll`, instead of `kernel32.dll`)
# due to case-sensitivity of the matching that is in effect on non-Windows platforms.
if (not compat.is_win and not compat.is_cygwin) and cbin.lower().endswith('.dll'):
continue
logger.warning("Library %s required via ctypes not found", cbin)
else:
if not include_library(cpath):
continue
ret.append((cbin, cpath, "BINARY"))
_restorePaths(old)
return ret
LDCONFIG_CACHE = None # cache the output of `/sbin/ldconfig -p`
def load_ldconfig_cache():
"""
Create a cache of the `ldconfig`-output to call it only once.
It contains thousands of libraries and running it on every dylib is expensive.
"""
global LDCONFIG_CACHE
if LDCONFIG_CACHE is not None:
return
if compat.is_cygwin:
# Not available under Cygwin; but we might be re-using general POSIX codepaths, and end up here. So exit early.
LDCONFIG_CACHE = {}
return
if compat.is_musl:
# Musl deliberately doesn't use ldconfig. The ldconfig executable either doesn't exist or it's a functionless
# executable which, on calling with any arguments, simply tells you that those arguments are invalid.
LDCONFIG_CACHE = {}
return
ldconfig = shutil.which('ldconfig')
if ldconfig is None:
# If `ldconfig` is not found in $PATH, search for it in some fixed directories. Simply use a second call instead
# of fiddling around with checks for empty env-vars and string-concat.
ldconfig = shutil.which('ldconfig', path='/usr/sbin:/sbin:/usr/bin:/bin')
# If we still could not find the 'ldconfig' command...
if ldconfig is None:
LDCONFIG_CACHE = {}
return
if compat.is_freebsd or compat.is_openbsd:
# This has a quite different format than other Unixes:
# [vagrant@freebsd-10 ~]$ ldconfig -r
# /var/run/ld-elf.so.hints:
# search directories: /lib:/usr/lib:/usr/lib/compat:...
# 0:-lgeom.5 => /lib/libgeom.so.5
# 184:-lpython2.7.1 => /usr/local/lib/libpython2.7.so.1
ldconfig_arg = '-r'
splitlines_count = 2
pattern = re.compile(r'^\s+\d+:-l(\S+)(\s.*)? => (\S+)')
else:
# Skip first line of the library list because it is just an informative line and might contain localized
# characters. Example of first line with locale set to cs_CZ.UTF-8:
#$ /sbin/ldconfig -p
#V keši „/etc/ld.so.cache“ nalezeno knihoven: 2799
# libzvbi.so.0 (libc6,x86-64) => /lib64/libzvbi.so.0
# libzvbi-chains.so.0 (libc6,x86-64) => /lib64/libzvbi-chains.so.0
ldconfig_arg = '-p'
splitlines_count = 1
pattern = re.compile(r'^\s+(\S+)(\s.*)? => (\S+)')
try:
text = compat.exec_command(ldconfig, ldconfig_arg)
except ExecCommandFailed:
logger.warning("Failed to execute ldconfig. Disabling LD cache.")
LDCONFIG_CACHE = {}
return
text = text.strip().splitlines()[splitlines_count:]
LDCONFIG_CACHE = {}
for line in text:
# :fixme: this assumes library names do not contain whitespace
m = pattern.match(line)
# Sanitize away any abnormal lines of output.
if m is None:
# Warn about it then skip the rest of this iteration.
if re.search("Cache generated by:", line):
# See #5540. This particular line is harmless.
pass
else:
logger.warning("Unrecognised line of output %r from ldconfig", line)
continue
path = m.groups()[-1]
if compat.is_freebsd or compat.is_openbsd:
# Insert `.so` at the end of the lib's basename. soname and filename may have (different) trailing versions.
# We assume the `.so` in the filename to mark the end of the lib's basename.
bname = os.path.basename(path).split('.so', 1)[0]
name = 'lib' + m.group(1)
assert name.startswith(bname)
name = bname + '.so' + name[len(bname):]
else:
name = m.group(1)
# ldconfig may know about several versions of the same lib, e.g., different arch, different libc, etc.
# Use the first entry.
if name not in LDCONFIG_CACHE:
LDCONFIG_CACHE[name] = path