diff --git a/.gitattributes b/.gitattributes index 2f5a030981fb94..5682b9150a36e2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,6 +10,7 @@ *.ico binary *.jpg binary *.pck binary +*.pdf binary *.png binary *.psd binary *.tar binary @@ -67,6 +68,7 @@ PCbuild/readme.txt dos **/clinic/*.cpp.h generated **/clinic/*.h.h generated *_db.h generated +Doc/c-api/lifecycle.dot.svg generated Doc/data/stable_abi.dat generated Doc/library/token-list.inc generated Include/internal/pycore_ast.h generated diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 16331b65e52979..79e18f258d5ca4 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -281,9 +281,13 @@ Doc/howto/clinic.rst @erlend-aasland # Subinterpreters **/*interpreteridobject.* @ericsnowcurrently **/*crossinterp* @ericsnowcurrently -Lib/test/support/interpreters/ @ericsnowcurrently Modules/_interp*module.c @ericsnowcurrently +Lib/test/test__interp*.py @ericsnowcurrently +Lib/concurrent/interpreters/ @ericsnowcurrently +Lib/test/support/channels.py @ericsnowcurrently +Doc/library/concurrent.interpreters.rst @ericsnowcurrently Lib/test/test_interpreters/ @ericsnowcurrently +Lib/concurrent/futures/interpreter.py @ericsnowcurrently # Android **/*Android* @mhsmith @freakboy3742 @@ -298,7 +302,12 @@ Lib/test/test_interpreters/ @ericsnowcurrently **/*-ios* @freakboy3742 # WebAssembly -/Tools/wasm/ @brettcannon @freakboy3742 +Tools/wasm/config.site-wasm32-emscripten @freakboy3742 +/Tools/wasm/README.md @brettcannon @freakboy3742 +/Tools/wasm/wasi-env @brettcannon +/Tools/wasm/wasi_build.py @brettcannon +/Tools/wasm/emscripten @freakboy3742 +/Tools/wasm/wasi @brettcannon # SBOM /Misc/externals.spdx.json @sethmlarson @@ -326,3 +335,8 @@ Modules/_xxtestfuzz/ @ammaraskar **/*templateobject* @lysnikolaou **/*templatelib* @lysnikolaou **/*tstring* @lysnikolaou + +# Remote debugging +Python/remote_debug.h @pablogsal +Python/remote_debugging.c @pablogsal +Modules/_remote_debugging_module.c @pablogsal @ambv @1st1 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b192508c78685c..e26433786f74e4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,13 @@ permissions: contents: read concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}-reusable + # https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#concurrency + # 'group' must be a key uniquely representing a PR or push event. + # github.workflow is the workflow name + # github.actor is the user invoking the workflow + # github.head_ref is the source branch of the PR or otherwise blank + # github.run_id is a unique number for the current run + group: ${{ github.workflow }}-${{ github.actor }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: @@ -43,6 +49,53 @@ jobs: if: fromJSON(needs.build-context.outputs.run-docs) uses: ./.github/workflows/reusable-docs.yml + check-abi: + name: 'Check if the ABI has changed' + runs-on: ubuntu-22.04 # 24.04 causes spurious errors + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + sudo ./.github/workflows/posix-deps-apt.sh + sudo apt-get install -yq abigail-tools + - name: Build CPython + env: + CFLAGS: -g3 -O0 + run: | + # Build Python with the libpython dynamic library + ./configure --enable-shared + make -j4 + - name: Check for changes in the ABI + id: check + run: | + if ! make check-abidump; then + echo "Generated ABI file is not up to date." + echo "Please add the release manager of this branch as a reviewer of this PR." + echo "" + echo "The up to date ABI file should be attached to this build as an artifact." + echo "" + echo "To learn more about this check: https://devguide.python.org/getting-started/setup-building/index.html#regenerate-the-abi-dump" + echo "" + exit 1 + fi + - name: Generate updated ABI files + if: ${{ failure() && steps.check.conclusion == 'failure' }} + run: | + make regen-abidump + - uses: actions/upload-artifact@v4 + name: Publish updated ABI files + if: ${{ failure() && steps.check.conclusion == 'failure' }} + with: + name: abi-data + path: ./Doc/data/*.abi + check-autoconf-regen: name: 'Check if Autoconf files are up to date' # Don't use ubuntu-latest but a specific version to make the job diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 908daaf3a6019a..9dbdd606fff472 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -13,7 +13,13 @@ on: - "Lib/test/libregrtest/**" - "Lib/tomllib/**" - "Misc/mypy/**" + - "Tools/build/compute-changes.py" + - "Tools/build/deepfreeze.py" - "Tools/build/generate_sbom.py" + - "Tools/build/generate-build-details.py" + - "Tools/build/verify_ensurepip_wheels.py" + - "Tools/build/update_file.py" + - "Tools/build/umarshal.py" - "Tools/cases_generator/**" - "Tools/clinic/**" - "Tools/jit/**" diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index d5538cd9367ec6..7773222af5d26f 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -17,6 +17,7 @@ apt-get -yq install \ libreadline6-dev \ libsqlite3-dev \ libssl-dev \ + libzstd-dev \ lzma \ lzma-dev \ strace \ diff --git a/.github/workflows/reusable-context.yml b/.github/workflows/reusable-context.yml index 73dc254edc5fbc..d2668ddcac1a3d 100644 --- a/.github/workflows/reusable-context.yml +++ b/.github/workflows/reusable-context.yml @@ -97,6 +97,7 @@ jobs: run: python Tools/build/compute-changes.py env: GITHUB_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GITHUB_EVENT_NAME: ${{ github.event_name }} CCF_TARGET_REF: ${{ github.base_ref || github.event.repository.default_branch }} CCF_HEAD_REF: ${{ github.event.pull_request.head.sha || github.sha }} diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 6beb91e66d4027..4ad8add0bebc9e 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -64,9 +64,9 @@ jobs: # (Make sure to keep the key in sync with the other config.cache step below.) key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ env.WASI_SDK_VERSION }}-${{ env.WASMTIME_VERSION }}-${{ inputs.config_hash }}-${{ hashFiles('Tools/wasm/wasi.py') }}-${{ env.pythonLocation }} - name: "Configure build Python" - run: python3 Tools/wasm/wasi.py configure-build-python -- --config-cache --with-pydebug + run: python3 Tools/wasm/wasi configure-build-python -- --config-cache --with-pydebug - name: "Make build Python" - run: python3 Tools/wasm/wasi.py make-build-python + run: python3 Tools/wasm/wasi make-build-python - name: "Restore host config.cache" uses: actions/cache@v4 with: @@ -75,9 +75,9 @@ jobs: key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ env.WASI_SDK_VERSION }}-${{ env.WASMTIME_VERSION }}-${{ inputs.config_hash }}-${{ hashFiles('Tools/wasm/wasi.py') }}-${{ env.pythonLocation }} - name: "Configure host" # `--with-pydebug` inferred from configure-build-python - run: python3 Tools/wasm/wasi.py configure-host -- --config-cache + run: python3 Tools/wasm/wasi configure-host -- --config-cache - name: "Make host" - run: python3 Tools/wasm/wasi.py make-host + run: python3 Tools/wasm/wasi make-host - name: "Display build info" run: make --directory "${CROSS_BUILD_WASI}" pythoninfo - name: "Test" diff --git a/.github/workflows/tail-call.yml b/.github/workflows/tail-call.yml index 4636372e26c41b..e32cbf0aaa3c3e 100644 --- a/.github/workflows/tail-call.yml +++ b/.github/workflows/tail-call.yml @@ -137,4 +137,3 @@ jobs: CC=clang-20 ./configure --with-tail-call-interp --disable-gil make all --jobs 4 ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - diff --git a/.gitignore b/.gitignore index 2a6f249275c32e..d4a9700f4647a2 100644 --- a/.gitignore +++ b/.gitignore @@ -171,5 +171,6 @@ Python/frozen_modules/MANIFEST /python !/Python/ -# main branch only: ABI files are not checked/maintained. -Doc/data/python*.abi +# People's custom https://docs.anthropic.com/en/docs/claude-code/memory configs. +/.claude/ +CLAUDE.local.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7ad829c94d50f3..86410c46d1d707 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,13 @@ repos: name: Run Black on Tools/jit/ files: ^Tools/jit/ + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.5 + hooks: + - id: remove-tabs + types: [python] + exclude: ^Tools/c-analyzer/cpython/_parser.py + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: @@ -43,10 +50,14 @@ repos: exclude: ^Lib/test/test_tomllib/ - id: check-yaml - id: end-of-file-fixer - types: [python] + types_or: [python, yaml] exclude: Lib/test/tokenizedata/coding20731.py + - id: end-of-file-fixer + files: '^\.github/CODEOWNERS$' + - id: trailing-whitespace + types_or: [c, inc, python, rst, yaml] - id: trailing-whitespace - types_or: [c, inc, python, rst] + files: '^\.github/CODEOWNERS|\.(gram)$' - repo: https://github.com/python-jsonschema/check-jsonschema rev: 0.33.0 diff --git a/.readthedocs.yml b/.readthedocs.yml index a57de00544e4e3..0a2c3f8345367f 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -32,4 +32,3 @@ build: - make -C Doc venv html - mkdir _readthedocs - mv Doc/build/html _readthedocs/html - diff --git a/Android/README.md b/Android/README.md index 6cabd6ba5d6844..c42eb627006e6a 100644 --- a/Android/README.md +++ b/Android/README.md @@ -156,6 +156,10 @@ repository's `Lib` directory will be picked up immediately. Changes in C files, and architecture-specific files such as sysconfigdata, will not take effect until you re-run `android.py make-host` or `build`. +The testbed app can also be used to test third-party packages. For more details, +run `android.py test --help`, paying attention to the options `--site-packages`, +`--cwd`, `-c` and `-m`. + ## Using in your own app diff --git a/Android/android-env.sh b/Android/android-env.sh index bab4130c9e92d0..7b381a013cf0ba 100644 --- a/Android/android-env.sh +++ b/Android/android-env.sh @@ -3,7 +3,7 @@ : "${HOST:?}" # GNU target triplet # You may also override the following: -: "${api_level:=24}" # Minimum Android API level the build will run on +: "${ANDROID_API_LEVEL:=24}" # Minimum Android API level the build will run on : "${PREFIX:-}" # Path in which to find required libraries @@ -24,7 +24,7 @@ fail() { # * https://android.googlesource.com/platform/ndk/+/ndk-rXX-release/docs/BuildSystemMaintainers.md # where XX is the NDK version. Do a diff against the version you're upgrading from, e.g.: # https://android.googlesource.com/platform/ndk/+/ndk-r25-release..ndk-r26-release/docs/BuildSystemMaintainers.md -ndk_version=27.1.12297006 +ndk_version=27.2.12479018 ndk=$ANDROID_HOME/ndk/$ndk_version if ! [ -e "$ndk" ]; then @@ -43,7 +43,7 @@ fi toolchain=$(echo "$ndk"/toolchains/llvm/prebuilt/*) export AR="$toolchain/bin/llvm-ar" export AS="$toolchain/bin/llvm-as" -export CC="$toolchain/bin/${clang_triplet}${api_level}-clang" +export CC="$toolchain/bin/${clang_triplet}${ANDROID_API_LEVEL}-clang" export CXX="${CC}++" export LD="$toolchain/bin/ld" export NM="$toolchain/bin/llvm-nm" diff --git a/Android/android.py b/Android/android.py index 3f48b42aa17571..551168fc4b2f5a 100755 --- a/Android/android.py +++ b/Android/android.py @@ -14,7 +14,7 @@ from contextlib import asynccontextmanager from datetime import datetime, timezone from glob import glob -from os.path import basename, relpath +from os.path import abspath, basename, relpath from pathlib import Path from subprocess import CalledProcessError from tempfile import TemporaryDirectory @@ -22,9 +22,13 @@ SCRIPT_NAME = Path(__file__).name ANDROID_DIR = Path(__file__).resolve().parent -CHECKOUT = ANDROID_DIR.parent +PYTHON_DIR = ANDROID_DIR.parent +in_source_tree = ( + ANDROID_DIR.name == "Android" and (PYTHON_DIR / "pyconfig.h.in").exists() +) + TESTBED_DIR = ANDROID_DIR / "testbed" -CROSS_BUILD_DIR = CHECKOUT / "cross-build" +CROSS_BUILD_DIR = PYTHON_DIR / "cross-build" HOSTS = ["aarch64-linux-android", "x86_64-linux-android"] APP_ID = "org.python.testbed" @@ -76,39 +80,68 @@ def run(command, *, host=None, env=None, log=True, **kwargs): kwargs.setdefault("check", True) if env is None: env = os.environ.copy() - original_env = env.copy() if host: - env_script = ANDROID_DIR / "android-env.sh" - env_output = subprocess.run( - f"set -eu; " - f"HOST={host}; " - f"PREFIX={subdir(host)}/prefix; " - f". {env_script}; " - f"export", - check=True, shell=True, text=True, stdout=subprocess.PIPE - ).stdout - - for line in env_output.splitlines(): - # We don't require every line to match, as there may be some other - # output from installing the NDK. - if match := re.search( - "^(declare -x |export )?(\\w+)=['\"]?(.*?)['\"]?$", line - ): - key, value = match[2], match[3] - if env.get(key) != value: - print(line) - env[key] = value - - if env == original_env: - raise ValueError(f"Found no variables in {env_script.name} output:\n" - + env_output) + host_env = android_env(host) + print_env(host_env) + env.update(host_env) if log: - print(">", " ".join(map(str, command))) + print(">", join_command(command)) return subprocess.run(command, env=env, **kwargs) +# Format a command so it can be copied into a shell. Like shlex.join, but also +# accepts arguments which are Paths, or a single string/Path outside of a list. +def join_command(args): + if isinstance(args, (str, Path)): + return str(args) + else: + return shlex.join(map(str, args)) + + +# Format the environment so it can be pasted into a shell. +def print_env(env): + for key, value in sorted(env.items()): + print(f"export {key}={shlex.quote(value)}") + + +def android_env(host): + if host: + prefix = subdir(host) / "prefix" + else: + prefix = ANDROID_DIR / "prefix" + sysconfig_files = prefix.glob("lib/python*/_sysconfigdata__android_*.py") + sysconfig_filename = next(sysconfig_files).name + host = re.fullmatch(r"_sysconfigdata__android_(.+).py", sysconfig_filename)[1] + + env_script = ANDROID_DIR / "android-env.sh" + env_output = subprocess.run( + f"set -eu; " + f"export HOST={host}; " + f"PREFIX={prefix}; " + f". {env_script}; " + f"export", + check=True, shell=True, capture_output=True, encoding='utf-8', + ).stdout + + env = {} + for line in env_output.splitlines(): + # We don't require every line to match, as there may be some other + # output from installing the NDK. + if match := re.search( + "^(declare -x |export )?(\\w+)=['\"]?(.*?)['\"]?$", line + ): + key, value = match[2], match[3] + if os.environ.get(key) != value: + env[key] = value + + if not env: + raise ValueError(f"Found no variables in {env_script.name} output:\n" + + env_output) + return env + + def build_python_path(): """The path to the build Python binary.""" build_dir = subdir("build") @@ -127,7 +160,7 @@ def configure_build_python(context): clean("build") os.chdir(subdir("build", create=True)) - command = [relpath(CHECKOUT / "configure")] + command = [relpath(PYTHON_DIR / "configure")] if context.args: command.extend(context.args) run(command) @@ -139,12 +172,13 @@ def make_build_python(context): def unpack_deps(host, prefix_dir): + os.chdir(prefix_dir) deps_url = "https://github.com/beeware/cpython-android-source-deps/releases/download" - for name_ver in ["bzip2-1.0.8-2", "libffi-3.4.4-3", "openssl-3.0.15-4", + for name_ver in ["bzip2-1.0.8-3", "libffi-3.4.4-3", "openssl-3.0.15-4", "sqlite-3.49.1-0", "xz-5.4.6-1"]: filename = f"{name_ver}-{host}.tar.gz" download(f"{deps_url}/{name_ver}/{filename}") - shutil.unpack_archive(filename, prefix_dir) + shutil.unpack_archive(filename) os.remove(filename) @@ -167,7 +201,7 @@ def configure_host_python(context): os.chdir(host_dir) command = [ # Basic cross-compiling configuration - relpath(CHECKOUT / "configure"), + relpath(PYTHON_DIR / "configure"), f"--host={context.host}", f"--build={sysconfig.get_config_var('BUILD_GNU_TYPE')}", f"--with-build-python={build_python_path()}", @@ -196,9 +230,12 @@ def make_host_python(context): for pattern in ("include/python*", "lib/libpython*", "lib/python*"): delete_glob(f"{prefix_dir}/{pattern}") + # The Android environment variables were already captured in the Makefile by + # `configure`, and passing them again when running `make` may cause some + # flags to be duplicated. So we don't use the `host` argument here. os.chdir(host_dir) - run(["make", "-j", str(os.cpu_count())], host=context.host) - run(["make", "install", f"prefix={prefix_dir}"], host=context.host) + run(["make", "-j", str(os.cpu_count())]) + run(["make", "install", f"prefix={prefix_dir}"]) def build_all(context): @@ -228,7 +265,12 @@ def setup_sdk(): if not all((android_home / "licenses" / path).exists() for path in [ "android-sdk-arm-dbt-license", "android-sdk-license" ]): - run([sdkmanager, "--licenses"], text=True, input="y\n" * 100) + run( + [sdkmanager, "--licenses"], + text=True, + capture_output=True, + input="y\n" * 100, + ) # Gradle may install this automatically, but we can't rely on that because # we need to run adb within the logcat task. @@ -474,24 +516,49 @@ async def gradle_task(context): task_prefix = "connected" env["ANDROID_SERIAL"] = context.connected + hidden_output = [] + + def log(line): + # Gradle may take several minutes to install SDK packages, so it's worth + # showing those messages even in non-verbose mode. + if context.verbose or line.startswith('Preparing "Install'): + sys.stdout.write(line) + else: + hidden_output.append(line) + + if context.command: + mode = "-c" + module = context.command + else: + mode = "-m" + module = context.module or "test" + args = [ gradlew, "--console", "plain", f"{task_prefix}DebugAndroidTest", - "-Pandroid.testInstrumentationRunnerArguments.pythonArgs=" - + shlex.join(context.args), + ] + [ + # Build-time properties + f"-Ppython.{name}={value}" + for name, value in [ + ("sitePackages", context.site_packages), ("cwd", context.cwd) + ] if value + ] + [ + # Runtime properties + f"-Pandroid.testInstrumentationRunnerArguments.python{name}={value}" + for name, value in [ + ("Mode", mode), ("Module", module), ("Args", join_command(context.args)) + ] if value ] - hidden_output = [] + if context.verbose >= 2: + args.append("--info") + log("> " + join_command(args)) + try: async with async_process( *args, cwd=TESTBED_DIR, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) as process: while line := (await process.stdout.readline()).decode(*DECODE_ARGS): - # Gradle may take several minutes to install SDK packages, so - # it's worth showing those messages even in non-verbose mode. - if context.verbose or line.startswith('Preparing "Install'): - sys.stdout.write(line) - else: - hidden_output.append(line) + log(line) status = await wait_for(process.wait(), timeout=1) if status == 0: @@ -604,6 +671,10 @@ def package(context): print(f"Wrote {package_path}") +def env(context): + print_env(android_env(getattr(context, "host", None))) + + # Handle SIGTERM the same way as SIGINT. This ensures that if we're terminated # by the buildbot worker, we'll make an attempt to clean up our subprocesses. def install_signal_handler(): @@ -615,36 +686,41 @@ def signal_handler(*args): def parse_args(): parser = argparse.ArgumentParser() - subcommands = parser.add_subparsers(dest="subcommand") + subcommands = parser.add_subparsers(dest="subcommand", required=True) # Subcommands - build = subcommands.add_parser("build", help="Build everything") - configure_build = subcommands.add_parser("configure-build", - help="Run `configure` for the " - "build Python") - make_build = subcommands.add_parser("make-build", - help="Run `make` for the build Python") - configure_host = subcommands.add_parser("configure-host", - help="Run `configure` for Android") - make_host = subcommands.add_parser("make-host", - help="Run `make` for Android") + build = subcommands.add_parser( + "build", help="Run configure-build, make-build, configure-host and " + "make-host") + configure_build = subcommands.add_parser( + "configure-build", help="Run `configure` for the build Python") subcommands.add_parser( - "clean", help="Delete all build and prefix directories") - subcommands.add_parser( - "build-testbed", help="Build the testbed app") - test = subcommands.add_parser( - "test", help="Run the test suite") + "make-build", help="Run `make` for the build Python") + configure_host = subcommands.add_parser( + "configure-host", help="Run `configure` for Android") + make_host = subcommands.add_parser( + "make-host", help="Run `make` for Android") + + subcommands.add_parser("clean", help="Delete all build directories") + subcommands.add_parser("build-testbed", help="Build the testbed app") + test = subcommands.add_parser("test", help="Run the testbed app") package = subcommands.add_parser("package", help="Make a release package") + env = subcommands.add_parser("env", help="Print environment variables") # Common arguments for subcommand in build, configure_build, configure_host: subcommand.add_argument( "--clean", action="store_true", default=False, dest="clean", - help="Delete the relevant build and prefix directories first") - for subcommand in [build, configure_host, make_host, package]: + help="Delete the relevant build directories first") + + host_commands = [build, configure_host, make_host, package] + if in_source_tree: + host_commands.append(env) + for subcommand in host_commands: subcommand.add_argument( "host", metavar="HOST", choices=HOSTS, help="Host triplet: choices=[%(choices)s]") + for subcommand in build, configure_build, configure_host: subcommand.add_argument("args", nargs="*", help="Extra arguments to pass to `configure`") @@ -654,6 +730,7 @@ def parse_args(): "-v", "--verbose", action="count", default=0, help="Show Gradle output, and non-Python logcat messages. " "Use twice to include high-volume messages which are rarely useful.") + device_group = test.add_mutually_exclusive_group(required=True) device_group.add_argument( "--connected", metavar="SERIAL", help="Run on a connected device. " @@ -661,8 +738,24 @@ def parse_args(): device_group.add_argument( "--managed", metavar="NAME", help="Run on a Gradle-managed device. " "These are defined in `managedDevices` in testbed/app/build.gradle.kts.") + + test.add_argument( + "--site-packages", metavar="DIR", type=abspath, + help="Directory to copy as the app's site-packages.") test.add_argument( - "args", nargs="*", help=f"Arguments for `python -m test`. " + "--cwd", metavar="DIR", type=abspath, + help="Directory to copy as the app's working directory.") + + mode_group = test.add_mutually_exclusive_group() + mode_group.add_argument( + "-c", dest="command", help="Execute the given Python code.") + mode_group.add_argument( + "-m", dest="module", help="Execute the module with the given name.") + test.epilog = ( + "If neither -c nor -m are passed, the default is '-m test', which will " + "run Python's own test suite.") + test.add_argument( + "args", nargs="*", help=f"Arguments to add to sys.argv. " f"Separate them from {SCRIPT_NAME}'s own arguments with `--`.") return parser.parse_args() @@ -688,6 +781,7 @@ def main(): "build-testbed": build_testbed, "test": run_testbed, "package": package, + "env": env, } try: @@ -708,14 +802,9 @@ def print_called_process_error(e): if not content.endswith("\n"): stream.write("\n") - # Format the command so it can be copied into a shell. shlex uses single - # quotes, so we surround the whole command with double quotes. - args_joined = ( - e.cmd if isinstance(e.cmd, str) - else " ".join(shlex.quote(str(arg)) for arg in e.cmd) - ) + # shlex uses single quotes, so we surround the command with double quotes. print( - f'Command "{args_joined}" returned exit status {e.returncode}' + f'Command "{join_command(e.cmd)}" returned exit status {e.returncode}' ) diff --git a/Android/testbed/app/build.gradle.kts b/Android/testbed/app/build.gradle.kts index c627cb1b0e0b22..92cffd61f86876 100644 --- a/Android/testbed/app/build.gradle.kts +++ b/Android/testbed/app/build.gradle.kts @@ -85,7 +85,7 @@ android { minSdk = androidEnvFile.useLines { for (line in it) { - """api_level:=(\d+)""".toRegex().find(line)?.let { + """ANDROID_API_LEVEL:=(\d+)""".toRegex().find(line)?.let { return@useLines it.groupValues[1].toInt() } } @@ -205,11 +205,29 @@ androidComponents.onVariants { variant -> into("site-packages") { from("$projectDir/src/main/python") + + val sitePackages = findProperty("python.sitePackages") as String? + if (!sitePackages.isNullOrEmpty()) { + if (!file(sitePackages).exists()) { + throw GradleException("$sitePackages does not exist") + } + from(sitePackages) + } } duplicatesStrategy = DuplicatesStrategy.EXCLUDE exclude("**/__pycache__") } + + into("cwd") { + val cwd = findProperty("python.cwd") as String? + if (!cwd.isNullOrEmpty()) { + if (!file(cwd).exists()) { + throw GradleException("$cwd does not exist") + } + from(cwd) + } + } } } diff --git a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt index 0e888ab71d87da..94be52dd2dc870 100644 --- a/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt +++ b/Android/testbed/app/src/androidTest/java/org/python/testbed/PythonSuite.kt @@ -17,11 +17,11 @@ class PythonSuite { fun testPython() { val start = System.currentTimeMillis() try { - val context = + val status = PythonTestRunner( InstrumentationRegistry.getInstrumentation().targetContext - val args = - InstrumentationRegistry.getArguments().getString("pythonArgs", "") - val status = PythonTestRunner(context).run(args) + ).run( + InstrumentationRegistry.getArguments() + ) assertEquals(0, status) } finally { // Make sure the process lives long enough for the test script to diff --git a/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt b/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt index c4bf6cbe83d8cd..ef28948486fb52 100644 --- a/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt +++ b/Android/testbed/app/src/main/java/org/python/testbed/MainActivity.kt @@ -15,17 +15,29 @@ class MainActivity : AppCompatActivity() { override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) setContentView(R.layout.activity_main) - val status = PythonTestRunner(this).run("-W -uall") + val status = PythonTestRunner(this).run("-m", "test", "-W -uall") findViewById(R.id.tvHello).text = "Exit status $status" } } class PythonTestRunner(val context: Context) { - /** @param args Extra arguments for `python -m test`. - * @return The Python exit status: zero if the tests passed, nonzero if - * they failed. */ - fun run(args: String = "") : Int { + fun run(instrumentationArgs: Bundle) = run( + instrumentationArgs.getString("pythonMode")!!, + instrumentationArgs.getString("pythonModule")!!, + instrumentationArgs.getString("pythonArgs") ?: "", + ) + + /** Run Python. + * + * @param mode Either "-c" or "-m". + * @param module Python statements for "-c" mode, or a module name for + * "-m" mode. + * @param args Arguments to add to sys.argv. Will be parsed by `shlex.split`. + * @return The Python exit status: zero on success, nonzero on failure. */ + fun run(mode: String, module: String, args: String) : Int { + Os.setenv("PYTHON_MODE", mode, true) + Os.setenv("PYTHON_MODULE", module, true) Os.setenv("PYTHON_ARGS", args, true) // Python needs this variable to help it find the temporary directory, @@ -36,8 +48,9 @@ class PythonTestRunner(val context: Context) { System.loadLibrary("main_activity") redirectStdioToLogcat() - // The main module is in src/main/python/main.py. - return runPython(pythonHome.toString(), "main") + // The main module is in src/main/python. We don't simply call it + // "main", as that could clash with third-party test code. + return runPython(pythonHome.toString(), "android_testbed_main") } private fun extractAssets() : File { diff --git a/Android/testbed/app/src/main/python/main.py b/Android/testbed/app/src/main/python/android_testbed_main.py similarity index 68% rename from Android/testbed/app/src/main/python/main.py rename to Android/testbed/app/src/main/python/android_testbed_main.py index d6941b14412fcc..31b8e5343a8449 100644 --- a/Android/testbed/app/src/main/python/main.py +++ b/Android/testbed/app/src/main/python/android_testbed_main.py @@ -26,7 +26,23 @@ # test_signals in test_threadsignals.py. signal.pthread_sigmask(signal.SIG_UNBLOCK, [signal.SIGUSR1]) +mode = os.environ["PYTHON_MODE"] +module = os.environ["PYTHON_MODULE"] sys.argv[1:] = shlex.split(os.environ["PYTHON_ARGS"]) -# The test module will call sys.exit to indicate whether the tests passed. -runpy.run_module("test") +cwd = f"{sys.prefix}/cwd" +if not os.path.exists(cwd): + # Empty directories are lost in the asset packing/unpacking process. + os.mkdir(cwd) +os.chdir(cwd) + +if mode == "-c": + # In -c mode, sys.path starts with an empty string, which means whatever the current + # working directory is at the moment of each import. + sys.path.insert(0, "") + exec(module, {}) +elif mode == "-m": + sys.path.insert(0, os.getcwd()) + runpy.run_module(module, run_name="__main__", alter_sys=True) +else: + raise ValueError(f"unknown mode: {mode}") diff --git a/Android/testbed/build.gradle.kts b/Android/testbed/build.gradle.kts index 4d1d6f87594da3..451517b3f1aeab 100644 --- a/Android/testbed/build.gradle.kts +++ b/Android/testbed/build.gradle.kts @@ -1,5 +1,5 @@ // Top-level build file where you can add configuration options common to all sub-projects/modules. plugins { - id("com.android.application") version "8.6.1" apply false + id("com.android.application") version "8.10.0" apply false id("org.jetbrains.kotlin.android") version "1.9.22" apply false } diff --git a/Android/testbed/gradle/wrapper/gradle-wrapper.properties b/Android/testbed/gradle/wrapper/gradle-wrapper.properties index 36529c896426b0..5d42fbae084da1 100644 --- a/Android/testbed/gradle/wrapper/gradle-wrapper.properties +++ b/Android/testbed/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ #Mon Feb 19 20:29:06 GMT 2024 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.11.1-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/Doc/c-api/allocation.rst b/Doc/c-api/allocation.rst index 7cbc99ad145ad4..59d913a0462382 100644 --- a/Doc/c-api/allocation.rst +++ b/Doc/c-api/allocation.rst @@ -16,7 +16,20 @@ Allocating Objects on the Heap Initialize a newly allocated object *op* with its type and initial reference. Returns the initialized object. Other fields of the object are - not affected. + not initialized. Despite its name, this function is unrelated to the + object's :meth:`~object.__init__` method (:c:member:`~PyTypeObject.tp_init` + slot). Specifically, this function does **not** call the object's + :meth:`!__init__` method. + + In general, consider this function to be a low-level routine. Use + :c:member:`~PyTypeObject.tp_alloc` where possible. + For implementing :c:member:`!tp_alloc` for your type, prefer + :c:func:`PyType_GenericAlloc` or :c:func:`PyObject_New`. + + .. note:: + + This function only initializes the object's memory corresponding to the + initial :c:type:`PyObject` structure. It does not zero the rest. .. c:function:: PyVarObject* PyObject_InitVar(PyVarObject *op, PyTypeObject *type, Py_ssize_t size) @@ -24,38 +37,107 @@ Allocating Objects on the Heap This does everything :c:func:`PyObject_Init` does, and also initializes the length information for a variable-size object. + .. note:: + + This function only initializes some of the object's memory. It does not + zero the rest. + .. c:macro:: PyObject_New(TYPE, typeobj) - Allocate a new Python object using the C structure type *TYPE* - and the Python type object *typeobj* (``PyTypeObject*``). - Fields not defined by the Python object header are not initialized. - The caller will own the only reference to the object - (i.e. its reference count will be one). - The size of the memory allocation is determined from the - :c:member:`~PyTypeObject.tp_basicsize` field of the type object. + Allocates a new Python object using the C structure type *TYPE* and the + Python type object *typeobj* (``PyTypeObject*``) by calling + :c:func:`PyObject_Malloc` to allocate memory and initializing it like + :c:func:`PyObject_Init`. The caller will own the only reference to the + object (i.e. its reference count will be one). + + Avoid calling this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + This macro does not call :c:member:`~PyTypeObject.tp_alloc`, + :c:member:`~PyTypeObject.tp_new` (:meth:`~object.__new__`), or + :c:member:`~PyTypeObject.tp_init` (:meth:`~object.__init__`). + + This cannot be used for objects with :c:macro:`Py_TPFLAGS_HAVE_GC` set in + :c:member:`~PyTypeObject.tp_flags`; use :c:macro:`PyObject_GC_New` instead. + + Memory allocated by this macro must be freed with :c:func:`PyObject_Free` + (usually called via the object's :c:member:`~PyTypeObject.tp_free` slot). + + .. note:: + + The returned memory is not guaranteed to have been completely zeroed + before it was initialized. + + .. note:: + + This macro does not construct a fully initialized object of the given + type; it merely allocates memory and prepares it for further + initialization by :c:member:`~PyTypeObject.tp_init`. To construct a + fully initialized object, call *typeobj* instead. For example:: - Note that this function is unsuitable if *typeobj* has - :c:macro:`Py_TPFLAGS_HAVE_GC` set. For such objects, - use :c:func:`PyObject_GC_New` instead. + PyObject *foo = PyObject_CallNoArgs((PyObject *)&PyFoo_Type); + + .. seealso:: + + * :c:func:`PyObject_Free` + * :c:macro:`PyObject_GC_New` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` .. c:macro:: PyObject_NewVar(TYPE, typeobj, size) - Allocate a new Python object using the C structure type *TYPE* and the - Python type object *typeobj* (``PyTypeObject*``). - Fields not defined by the Python object header - are not initialized. The allocated memory allows for the *TYPE* structure - plus *size* (``Py_ssize_t``) fields of the size - given by the :c:member:`~PyTypeObject.tp_itemsize` field of - *typeobj*. This is useful for implementing objects like tuples, which are - able to determine their size at construction time. Embedding the array of - fields into the same allocation decreases the number of allocations, - improving the memory management efficiency. + Like :c:macro:`PyObject_New` except: + + * It allocates enough memory for the *TYPE* structure plus *size* + (``Py_ssize_t``) fields of the size given by the + :c:member:`~PyTypeObject.tp_itemsize` field of *typeobj*. + * The memory is initialized like :c:func:`PyObject_InitVar`. + + This is useful for implementing objects like tuples, which are able to + determine their size at construction time. Embedding the array of fields + into the same allocation decreases the number of allocations, improving the + memory management efficiency. + + Avoid calling this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + This cannot be used for objects with :c:macro:`Py_TPFLAGS_HAVE_GC` set in + :c:member:`~PyTypeObject.tp_flags`; use :c:macro:`PyObject_GC_NewVar` + instead. + + Memory allocated by this function must be freed with :c:func:`PyObject_Free` + (usually called via the object's :c:member:`~PyTypeObject.tp_free` slot). + + .. note:: + + The returned memory is not guaranteed to have been completely zeroed + before it was initialized. + + .. note:: + + This macro does not construct a fully initialized object of the given + type; it merely allocates memory and prepares it for further + initialization by :c:member:`~PyTypeObject.tp_init`. To construct a + fully initialized object, call *typeobj* instead. For example:: + + PyObject *list_instance = PyObject_CallNoArgs((PyObject *)&PyList_Type); + + .. seealso:: - Note that this function is unsuitable if *typeobj* has - :c:macro:`Py_TPFLAGS_HAVE_GC` set. For such objects, - use :c:func:`PyObject_GC_NewVar` instead. + * :c:func:`PyObject_Free` + * :c:macro:`PyObject_GC_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` .. c:function:: void PyObject_Del(void *op) @@ -71,6 +153,6 @@ Allocating Objects on the Heap .. seealso:: - :c:func:`PyModule_Create` + :ref:`moduleobjects` To allocate and create extension modules. diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 3bbc990b6329c0..ab9f9c4539ae9a 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -685,6 +685,13 @@ Building values ``p`` (:class:`bool`) [int] Convert a C :c:expr:`int` to a Python :class:`bool` object. + + Be aware that this format requires an ``int`` argument. + Unlike most other contexts in C, variadic arguments are not coerced to + a suitable type automatically. + You can convert another type (for example, a pointer or a float) to a + suitable ``int`` value using ``(x) ? 1 : 0`` or ``!!x``. + .. versionadded:: 3.14 ``c`` (:class:`bytes` of length 1) [char] diff --git a/Doc/c-api/capsule.rst b/Doc/c-api/capsule.rst index cdb8aa33e9fd32..64dc4f5275b512 100644 --- a/Doc/c-api/capsule.rst +++ b/Doc/c-api/capsule.rst @@ -105,9 +105,19 @@ Refer to :ref:`using-capsules` for more information on using these objects. ``module.attribute``. The *name* stored in the capsule must match this string exactly. + This function splits *name* on the ``.`` character, and imports the first + element. It then processes further elements using attribute lookups. + Return the capsule's internal *pointer* on success. On failure, set an exception and return ``NULL``. + .. note:: + + If *name* points to an attribute of some submodule or subpackage, this + submodule or subpackage must be previously imported using other means + (for example, by using :c:func:`PyImport_ImportModule`) for the + attribute lookups to succeed. + .. versionchanged:: 3.3 *no_block* has no effect anymore. diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst index 6eae24b38fae48..717b0da8f87c7f 100644 --- a/Doc/c-api/code.rst +++ b/Doc/c-api/code.rst @@ -182,7 +182,7 @@ bound into a function. Type of a code object watcher callback function. If *event* is ``PY_CODE_EVENT_CREATE``, then the callback is invoked - after `co` has been fully initialized. Otherwise, the callback is invoked + after *co* has been fully initialized. Otherwise, the callback is invoked before the destruction of *co* takes place, so the prior state of *co* can be inspected. @@ -211,6 +211,71 @@ bound into a function. .. versionadded:: 3.12 +.. _c_codeobject_flags: + +Code Object Flags +----------------- + +Code objects contain a bit-field of flags, which can be retrieved as the +:attr:`~codeobject.co_flags` Python attribute (for example using +:c:func:`PyObject_GetAttrString`), and set using a *flags* argument to +:c:func:`PyUnstable_Code_New` and similar functions. + +Flags whose names start with ``CO_FUTURE_`` correspond to features normally +selectable by :ref:`future statements `. These flags can be used in +:c:member:`PyCompilerFlags.cf_flags`. +Note that many ``CO_FUTURE_`` flags are mandatory in current versions of +Python, and setting them has no effect. + +The following flags are available. +For their meaning, see the linked documentation of their Python equivalents. + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * Flag + * Meaning + * * .. c:macro:: CO_OPTIMIZED + * :py:data:`inspect.CO_OPTIMIZED` + * * .. c:macro:: CO_NEWLOCALS + * :py:data:`inspect.CO_NEWLOCALS` + * * .. c:macro:: CO_VARARGS + * :py:data:`inspect.CO_VARARGS` + * * .. c:macro:: CO_VARKEYWORDS + * :py:data:`inspect.CO_VARKEYWORDS` + * * .. c:macro:: CO_NESTED + * :py:data:`inspect.CO_NESTED` + * * .. c:macro:: CO_GENERATOR + * :py:data:`inspect.CO_GENERATOR` + * * .. c:macro:: CO_COROUTINE + * :py:data:`inspect.CO_COROUTINE` + * * .. c:macro:: CO_ITERABLE_COROUTINE + * :py:data:`inspect.CO_ITERABLE_COROUTINE` + * * .. c:macro:: CO_ASYNC_GENERATOR + * :py:data:`inspect.CO_ASYNC_GENERATOR` + * * .. c:macro:: CO_HAS_DOCSTRING + * :py:data:`inspect.CO_HAS_DOCSTRING` + * * .. c:macro:: CO_METHOD + * :py:data:`inspect.CO_METHOD` + + * * .. c:macro:: CO_FUTURE_DIVISION + * no effect (:py:data:`__future__.division`) + * * .. c:macro:: CO_FUTURE_ABSOLUTE_IMPORT + * no effect (:py:data:`__future__.absolute_import`) + * * .. c:macro:: CO_FUTURE_WITH_STATEMENT + * no effect (:py:data:`__future__.with_statement`) + * * .. c:macro:: CO_FUTURE_PRINT_FUNCTION + * no effect (:py:data:`__future__.print_function`) + * * .. c:macro:: CO_FUTURE_UNICODE_LITERALS + * no effect (:py:data:`__future__.unicode_literals`) + * * .. c:macro:: CO_FUTURE_GENERATOR_STOP + * no effect (:py:data:`__future__.generator_stop`) + * * .. c:macro:: CO_FUTURE_ANNOTATIONS + * :py:data:`__future__.annotations` + + Extra information ----------------- diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index c8e1b5c2461738..a750cda3e2d474 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -749,6 +749,16 @@ Exception Classes .. versionadded:: 3.2 +.. c:function:: int PyExceptionClass_Check(PyObject *ob) + + Return non-zero if *ob* is an exception class, zero otherwise. This function always succeeds. + + +.. c:function:: const char *PyExceptionClass_Name(PyObject *ob) + + Return :c:member:`~PyTypeObject.tp_name` of the exception class *ob*. + + Exception Objects ================= @@ -982,6 +992,7 @@ the variables: .. index:: single: PyExc_BaseException (C var) + single: PyExc_BaseExceptionGroup (C var) single: PyExc_Exception (C var) single: PyExc_ArithmeticError (C var) single: PyExc_AssertionError (C var) @@ -1041,6 +1052,8 @@ the variables: +=========================================+=================================+==========+ | :c:data:`PyExc_BaseException` | :exc:`BaseException` | [1]_ | +-----------------------------------------+---------------------------------+----------+ +| :c:data:`PyExc_BaseExceptionGroup` | :exc:`BaseExceptionGroup` | [1]_ | ++-----------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_Exception` | :exc:`Exception` | [1]_ | +-----------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_ArithmeticError` | :exc:`ArithmeticError` | [1]_ | @@ -1164,6 +1177,9 @@ the variables: .. versionadded:: 3.6 :c:data:`PyExc_ModuleNotFoundError`. +.. versionadded:: 3.11 + :c:data:`PyExc_BaseExceptionGroup`. + These are compatibility aliases to :c:data:`PyExc_OSError`: .. index:: @@ -1207,6 +1223,7 @@ the variables: single: PyExc_Warning (C var) single: PyExc_BytesWarning (C var) single: PyExc_DeprecationWarning (C var) + single: PyExc_EncodingWarning (C var) single: PyExc_FutureWarning (C var) single: PyExc_ImportWarning (C var) single: PyExc_PendingDeprecationWarning (C var) @@ -1225,6 +1242,8 @@ the variables: +------------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_DeprecationWarning` | :exc:`DeprecationWarning` | | +------------------------------------------+---------------------------------+----------+ +| :c:data:`PyExc_EncodingWarning` | :exc:`EncodingWarning` | | ++------------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_FutureWarning` | :exc:`FutureWarning` | | +------------------------------------------+---------------------------------+----------+ | :c:data:`PyExc_ImportWarning` | :exc:`ImportWarning` | | @@ -1245,6 +1264,9 @@ the variables: .. versionadded:: 3.2 :c:data:`PyExc_ResourceWarning`. +.. versionadded:: 3.10 + :c:data:`PyExc_EncodingWarning`. + Notes: .. [3] diff --git a/Doc/c-api/extension-modules.rst b/Doc/c-api/extension-modules.rst new file mode 100644 index 00000000000000..3d331e6ec12f76 --- /dev/null +++ b/Doc/c-api/extension-modules.rst @@ -0,0 +1,247 @@ +.. highlight:: c + +.. _extension-modules: + +Defining extension modules +-------------------------- + +A C extension for CPython is a shared library (for example, a ``.so`` file +on Linux, ``.pyd`` DLL on Windows), which is loadable into the Python process +(for example, it is compiled with compatible compiler settings), and which +exports an :ref:`initialization function `. + +To be importable by default (that is, by +:py:class:`importlib.machinery.ExtensionFileLoader`), +the shared library must be available on :py:attr:`sys.path`, +and must be named after the module name plus an extension listed in +:py:attr:`importlib.machinery.EXTENSION_SUFFIXES`. + +.. note:: + + Building, packaging and distributing extension modules is best done with + third-party tools, and is out of scope of this document. + One suitable tool is Setuptools, whose documentation can be found at + https://setuptools.pypa.io/en/latest/setuptools.html. + +Normally, the initialization function returns a module definition initialized +using :c:func:`PyModuleDef_Init`. +This allows splitting the creation process into several phases: + +- Before any substantial code is executed, Python can determine which + capabilities the module supports, and it can adjust the environment or + refuse loading an incompatible extension. +- By default, Python itself creates the module object -- that is, it does + the equivalent of :py:meth:`object.__new__` for classes. + It also sets initial attributes like :attr:`~module.__package__` and + :attr:`~module.__loader__`. +- Afterwards, the module object is initialized using extension-specific + code -- the equivalent of :py:meth:`~object.__init__` on classes. + +This is called *multi-phase initialization* to distinguish it from the legacy +(but still supported) *single-phase initialization* scheme, +where the initialization function returns a fully constructed module. +See the :ref:`single-phase-initialization section below ` +for details. + +.. versionchanged:: 3.5 + + Added support for multi-phase initialization (:pep:`489`). + + +Multiple module instances +......................... + +By default, extension modules are not singletons. +For example, if the :py:attr:`sys.modules` entry is removed and the module +is re-imported, a new module object is created, and typically populated with +fresh method and type objects. +The old module is subject to normal garbage collection. +This mirrors the behavior of pure-Python modules. + +Additional module instances may be created in +:ref:`sub-interpreters ` +or after Python runtime reinitialization +(:c:func:`Py_Finalize` and :c:func:`Py_Initialize`). +In these cases, sharing Python objects between module instances would likely +cause crashes or undefined behavior. + +To avoid such issues, each instance of an extension module should +be *isolated*: changes to one instance should not implicitly affect the others, +and all state owned by the module, including references to Python objects, +should be specific to a particular module instance. +See :ref:`isolating-extensions-howto` for more details and a practical guide. + +A simpler way to avoid these issues is +:ref:`raising an error on repeated initialization `. + +All modules are expected to support +:ref:`sub-interpreters `, or otherwise explicitly +signal a lack of support. +This is usually achieved by isolation or blocking repeated initialization, +as above. +A module may also be limited to the main interpreter using +the :c:data:`Py_mod_multiple_interpreters` slot. + + +.. _extension-export-hook: + +Initialization function +....................... + +The initialization function defined by an extension module has the +following signature: + +.. c:function:: PyObject* PyInit_modulename(void) + +Its name should be :samp:`PyInit_{}`, with ```` replaced by the +name of the module. + +For modules with ASCII-only names, the function must instead be named +:samp:`PyInit_{}`, with ```` replaced by the name of the module. +When using :ref:`multi-phase-initialization`, non-ASCII module names +are allowed. In this case, the initialization function name is +:samp:`PyInitU_{}`, with ```` encoded using Python's +*punycode* encoding with hyphens replaced by underscores. In Python: + +.. code-block:: python + + def initfunc_name(name): + try: + suffix = b'_' + name.encode('ascii') + except UnicodeEncodeError: + suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') + return b'PyInit' + suffix + +It is recommended to define the initialization function using a helper macro: + +.. c:macro:: PyMODINIT_FUNC + + Declare an extension module initialization function. + This macro: + + * specifies the :c:expr:`PyObject*` return type, + * adds any special linkage declarations required by the platform, and + * for C++, declares the function as ``extern "C"``. + +For example, a module called ``spam`` would be defined like this:: + + static struct PyModuleDef spam_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "spam", + ... + }; + + PyMODINIT_FUNC + PyInit_spam(void) + { + return PyModuleDef_Init(&spam_module); + } + +It is possible to export multiple modules from a single shared library by +defining multiple initialization functions. However, importing them requires +using symbolic links or a custom importer, because by default only the +function corresponding to the filename is found. +See the `Multiple modules in one library `__ +section in :pep:`489` for details. + +The initialization function is typically the only non-\ ``static`` +item defined in the module's C source. + + +.. _multi-phase-initialization: + +Multi-phase initialization +.......................... + +Normally, the :ref:`initialization function ` +(``PyInit_modulename``) returns a :c:type:`PyModuleDef` instance with +non-``NULL`` :c:member:`~PyModuleDef.m_slots`. +Before it is returned, the ``PyModuleDef`` instance must be initialized +using the following function: + + +.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) + + Ensure a module definition is a properly initialized Python object that + correctly reports its type and a reference count. + + Return *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. + + Calling this function is required for :ref:`multi-phase-initialization`. + It should not be used in other contexts. + + Note that Python assumes that ``PyModuleDef`` structures are statically + allocated. + This function may return either a new reference or a borrowed one; + this reference must not be released. + + .. versionadded:: 3.5 + + +.. _single-phase-initialization: + +Legacy single-phase initialization +.................................. + +.. attention:: + Single-phase initialization is a legacy mechanism to initialize extension + modules, with known drawbacks and design flaws. Extension module authors + are encouraged to use multi-phase initialization instead. + +In single-phase initialization, the +:ref:`initialization function ` (``PyInit_modulename``) +should create, populate and return a module object. +This is typically done using :c:func:`PyModule_Create` and functions like +:c:func:`PyModule_AddObjectRef`. + +Single-phase initialization differs from the :ref:`default ` +in the following ways: + +* Single-phase modules are, or rather *contain*, “singletons”. + + When the module is first initialized, Python saves the contents of + the module's ``__dict__`` (that is, typically, the module's functions and + types). + + For subsequent imports, Python does not call the initialization function + again. + Instead, it creates a new module object with a new ``__dict__``, and copies + the saved contents to it. + For example, given a single-phase module ``_testsinglephase`` + [#testsinglephase]_ that defines a function ``sum`` and an exception class + ``error``: + + .. code-block:: python + + >>> import sys + >>> import _testsinglephase as one + >>> del sys.modules['_testsinglephase'] + >>> import _testsinglephase as two + >>> one is two + False + >>> one.__dict__ is two.__dict__ + False + >>> one.sum is two.sum + True + >>> one.error is two.error + True + + The exact behavior should be considered a CPython implementation detail. + +* To work around the fact that ``PyInit_modulename`` does not take a *spec* + argument, some state of the import machinery is saved and applied to the + first suitable module created during the ``PyInit_modulename`` call. + Specifically, when a sub-module is imported, this mechanism prepends the + parent package name to the name of the module. + + A single-phase ``PyInit_modulename`` function should create “its” module + object as soon as possible, before any other module objects can be created. + +* Non-ASCII module names (``PyInitU_modulename``) are not supported. + +* Single-phase modules support module lookup functions like + :c:func:`PyState_FindModule`. + +.. [#testsinglephase] ``_testsinglephase`` is an internal module used + in CPython's self-test suite; your installation may or may not + include it. diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index 58792edeed25e3..5fb8567ef8c95f 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -95,6 +95,13 @@ There are a few functions specific to Python functions. .. versionadded:: 3.12 + +.. c:function:: PyObject* PyFunction_GetKwDefaults(PyObject *op) + + Return the keyword-only argument default values of the function object *op*. This can be a + dictionary of arguments or ``NULL``. + + .. c:function:: PyObject* PyFunction_GetClosure(PyObject *op) Return the closure associated with the function object *op*. This can be ``NULL`` @@ -123,6 +130,19 @@ There are a few functions specific to Python functions. Raises :exc:`SystemError` and returns ``-1`` on failure. +.. c:function:: PyObject *PyFunction_GET_CODE(PyObject *op) + PyObject *PyFunction_GET_GLOBALS(PyObject *op) + PyObject *PyFunction_GET_MODULE(PyObject *op) + PyObject *PyFunction_GET_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_KW_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_CLOSURE(PyObject *op) + PyObject *PyFunction_GET_ANNOTATIONS(PyObject *op) + + These functions are similar to their ``PyFunction_Get*`` counterparts, but + do not do type checking. Passing anything other than an instance of + :c:data:`PyFunction_Type` is undefined behavior. + + .. c:function:: int PyFunction_AddWatcher(PyFunction_WatchCallback callback) Register *callback* as a function watcher for the current interpreter. @@ -169,7 +189,7 @@ There are a few functions specific to Python functions. unpredictable effects, including infinite recursion. If *event* is ``PyFunction_EVENT_CREATE``, then the callback is invoked - after `func` has been fully initialized. Otherwise, the callback is invoked + after *func* has been fully initialized. Otherwise, the callback is invoked before the modification to *func* takes place, so the prior state of *func* can be inspected. The runtime is permitted to optimize away the creation of function objects when possible. In such cases no event will be emitted. diff --git a/Doc/c-api/gcsupport.rst b/Doc/c-api/gcsupport.rst index d1f0982b818931..f6fa52b36c5ab3 100644 --- a/Doc/c-api/gcsupport.rst +++ b/Doc/c-api/gcsupport.rst @@ -57,11 +57,49 @@ rules: Analogous to :c:macro:`PyObject_New` but for container objects with the :c:macro:`Py_TPFLAGS_HAVE_GC` flag set. + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + Memory allocated by this macro must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + + .. seealso:: + + * :c:func:`PyObject_GC_Del` + * :c:macro:`PyObject_New` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` + + .. c:macro:: PyObject_GC_NewVar(TYPE, typeobj, size) Analogous to :c:macro:`PyObject_NewVar` but for container objects with the :c:macro:`Py_TPFLAGS_HAVE_GC` flag set. + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + Memory allocated by this macro must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + + .. seealso:: + + * :c:func:`PyObject_GC_Del` + * :c:macro:`PyObject_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` + + .. c:function:: PyObject* PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *type, size_t extra_size) Analogous to :c:macro:`PyObject_GC_New` but allocates *extra_size* @@ -73,6 +111,10 @@ rules: The extra data will be deallocated with the object, but otherwise it is not managed by Python. + Memory allocated by this function must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + .. warning:: The function is marked as unstable because the final mechanism for reserving extra data after an instance is not yet decided. @@ -136,6 +178,21 @@ rules: Releases memory allocated to an object using :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar`. + Do not call this directly to free an object's memory; call the type's + :c:member:`~PyTypeObject.tp_free` slot instead. + + Do not use this for memory allocated by :c:macro:`PyObject_New`, + :c:macro:`PyObject_NewVar`, or related allocation functions; use + :c:func:`PyObject_Free` instead. + + .. seealso:: + + * :c:func:`PyObject_Free` is the non-GC equivalent of this function. + * :c:macro:`PyObject_GC_New` + * :c:macro:`PyObject_GC_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_free` + .. c:function:: void PyObject_GC_UnTrack(void *op) @@ -180,9 +237,9 @@ provided. In order to use this macro, the :c:member:`~PyTypeObject.tp_traverse` must name its arguments exactly *visit* and *arg*: -.. c:function:: void Py_VISIT(PyObject *o) +.. c:macro:: Py_VISIT(o) - If *o* is not ``NULL``, call the *visit* callback, with arguments *o* + If the :c:expr:`PyObject *` *o* is not ``NULL``, call the *visit* callback, with arguments *o* and *arg*. If *visit* returns a non-zero value, then return it. Using this macro, :c:member:`~PyTypeObject.tp_traverse` handlers look like:: diff --git a/Doc/c-api/index.rst b/Doc/c-api/index.rst index ba56b03c6ac8e7..e9df2a304d975b 100644 --- a/Doc/c-api/index.rst +++ b/Doc/c-api/index.rst @@ -17,6 +17,7 @@ document the API functions in detail. veryhigh.rst refcounting.rst exceptions.rst + extension-modules.rst utilities.rst abstract.rst concrete.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 52f64a61006b74..dc96ed7f719fcf 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -203,7 +203,7 @@ to 1 and ``-bb`` sets :c:data:`Py_BytesWarningFlag` to 2. Set by the :option:`-i` option. - .. deprecated:: 3.12 + .. deprecated-removed:: 3.12 3.15 .. c:var:: int Py_IsolatedFlag @@ -498,17 +498,8 @@ Initializing and finalizing the interpreter strings other than those passed in (however, the contents of the strings pointed to by the argument list are not modified). - The return value will be ``0`` if the interpreter exits normally (i.e., - without an exception), ``1`` if the interpreter exits due to an exception, - or ``2`` if the argument list does not represent a valid Python command - line. - - Note that if an otherwise unhandled :exc:`SystemExit` is raised, this - function will not return ``1``, but exit the process, as long as - ``Py_InspectFlag`` is not set. If ``Py_InspectFlag`` is set, execution will - drop into the interactive Python prompt, at which point a second otherwise - unhandled :exc:`SystemExit` will still exit the process, while any other - means of exiting will set the return value as described above. + The return value is ``2`` if the argument list does not represent a valid + Python command line, and otherwise the same as :c:func:`Py_RunMain`. In terms of the CPython runtime configuration APIs documented in the :ref:`runtime configuration ` section (and without accounting @@ -545,23 +536,18 @@ Initializing and finalizing the interpreter If :c:member:`PyConfig.inspect` is not set (the default), the return value will be ``0`` if the interpreter exits normally (that is, without raising - an exception), or ``1`` if the interpreter exits due to an exception. If an - otherwise unhandled :exc:`SystemExit` is raised, the function will immediately - exit the process instead of returning ``1``. + an exception), the exit status of an unhandled :exc:`SystemExit`, or ``1`` + for any other unhandled exception. If :c:member:`PyConfig.inspect` is set (such as when the :option:`-i` option is used), rather than returning when the interpreter exits, execution will instead resume in an interactive Python prompt (REPL) using the ``__main__`` module's global namespace. If the interpreter exited with an exception, it is immediately raised in the REPL session. The function return value is - then determined by the way the *REPL session* terminates: returning ``0`` - if the session terminates without raising an unhandled exception, exiting - immediately for an unhandled :exc:`SystemExit`, and returning ``1`` for - any other unhandled exception. + then determined by the way the *REPL session* terminates: ``0``, ``1``, or + the status of a :exc:`SystemExit`, as specified above. - This function always finalizes the Python interpreter regardless of whether - it returns a value or immediately exits the process due to an unhandled - :exc:`SystemExit` exception. + This function always finalizes the Python interpreter before it returns. See :ref:`Python Configuration ` for an example of a customized Python that always runs in isolated mode using @@ -1083,8 +1069,36 @@ Note that the ``PyGILState_*`` functions assume there is only one global interpreter (created automatically by :c:func:`Py_Initialize`). Python supports the creation of additional interpreters (using :c:func:`Py_NewInterpreter`), but mixing multiple interpreters and the -``PyGILState_*`` API is unsupported. +``PyGILState_*`` API is unsupported. This is because :c:func:`PyGILState_Ensure` +and similar functions default to :term:`attaching ` a +:term:`thread state` for the main interpreter, meaning that the thread can't safely +interact with the calling subinterpreter. + +Supporting subinterpreters in non-Python threads +------------------------------------------------ + +If you would like to support subinterpreters with non-Python created threads, you +must use the ``PyThreadState_*`` API instead of the traditional ``PyGILState_*`` +API. + +In particular, you must store the interpreter state from the calling +function and pass it to :c:func:`PyThreadState_New`, which will ensure that +the :term:`thread state` is targeting the correct interpreter:: + + /* The return value of PyInterpreterState_Get() from the + function that created this thread. */ + PyInterpreterState *interp = ThreadData->interp; + PyThreadState *tstate = PyThreadState_New(interp); + PyThreadState_Swap(tstate); + + /* GIL of the subinterpreter is now held. + Perform Python actions here. */ + result = CallSomeFunction(); + /* evaluate result or handle exception */ + /* Destroy the thread state. No Python API allowed beyond this point. */ + PyThreadState_Clear(tstate); + PyThreadState_DeleteCurrent(); .. _fork-and-threads: @@ -1261,6 +1275,10 @@ code, or when embedding the Python interpreter: .. seealso: :c:func:`PyEval_ReleaseThread` + .. note:: + Similar to :c:func:`PyGILState_Ensure`, this function will hang the + thread if the runtime is finalizing. + The following functions use thread-local storage, and are not compatible with sub-interpreters: @@ -1287,10 +1305,10 @@ with sub-interpreters: When the function returns, there will be an :term:`attached thread state` and the thread will be able to call arbitrary Python code. Failure is a fatal error. - .. note:: - Calling this function from a thread when the runtime is finalizing will - hang the thread until the program exits, even if the thread was not - created by Python. Refer to + .. warning:: + Calling this function when the runtime is finalizing is unsafe. Doing + so will either hang the thread until the program ends, or fully crash + the interpreter in rare cases. Refer to :ref:`cautions-regarding-runtime-finalization` for more details. .. versionchanged:: 3.14 @@ -1307,7 +1325,6 @@ with sub-interpreters: Every call to :c:func:`PyGILState_Ensure` must be matched by a call to :c:func:`PyGILState_Release` on the same thread. - .. c:function:: PyThreadState* PyGILState_GetThisThreadState() Get the :term:`attached thread state` for this thread. May return ``NULL`` if no @@ -1315,20 +1332,30 @@ with sub-interpreters: always has such a thread-state, even if no auto-thread-state call has been made on the main thread. This is mainly a helper/diagnostic function. - .. seealso: :c:func:`PyThreadState_Get`` + .. note:: + This function does not account for :term:`thread states ` created + by something other than :c:func:`PyGILState_Ensure` (such as :c:func:`PyThreadState_New`). + Prefer :c:func:`PyThreadState_Get` or :c:func:`PyThreadState_GetUnchecked` + for most cases. + .. seealso: :c:func:`PyThreadState_Get`` .. c:function:: int PyGILState_Check() Return ``1`` if the current thread is holding the :term:`GIL` and ``0`` otherwise. This function can be called from any thread at any time. - Only if it has had its Python thread state initialized and currently is - holding the :term:`GIL` will it return ``1``. + Only if it has had its :term:`thread state ` initialized + via :c:func:`PyGILState_Ensure` will it return ``1``. This is mainly a helper/diagnostic function. It can be useful for example in callback contexts or memory allocation functions when knowing that the :term:`GIL` is locked can allow the caller to perform sensitive actions or otherwise behave differently. + .. note:: + If the current Python process has ever created a subinterpreter, this + function will *always* return ``1``. Prefer :c:func:`PyThreadState_GetUnchecked` + for most cases. + .. versionadded:: 3.4 @@ -1387,7 +1414,7 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. c:function:: void PyInterpreterState_Clear(PyInterpreterState *interp) Reset all information in an interpreter state object. There must be - an :term:`attached thread state` for the the interpreter. + an :term:`attached thread state` for the interpreter. .. audit-event:: cpython.PyInterpreterState_Clear "" c.PyInterpreterState_Clear diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index c8c60eb9f48f45..36bb8d7438a4d7 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -111,33 +111,11 @@ Useful macros ============= Several useful macros are defined in the Python header files. Many are -defined closer to where they are useful (e.g. :c:macro:`Py_RETURN_NONE`). +defined closer to where they are useful (for example, :c:macro:`Py_RETURN_NONE`, +:c:macro:`PyMODINIT_FUNC`). Others of a more general utility are defined here. This is not necessarily a complete listing. -.. c:macro:: PyMODINIT_FUNC - - Declare an extension module ``PyInit`` initialization function. The function - return type is :c:expr:`PyObject*`. The macro declares any special linkage - declarations required by the platform, and for C++ declares the function as - ``extern "C"``. - - The initialization function must be named :samp:`PyInit_{name}`, where - *name* is the name of the module, and should be the only non-\ ``static`` - item defined in the module file. Example:: - - static struct PyModuleDef spam_module = { - PyModuleDef_HEAD_INIT, - .m_name = "spam", - ... - }; - - PyMODINIT_FUNC - PyInit_spam(void) - { - return PyModule_Create(&spam_module); - } - .. c:macro:: Py_ABS(x) @@ -844,3 +822,41 @@ after every statement run by the interpreter.) Please refer to :file:`Misc/SpecialBuilds.txt` in the Python source distribution for more detailed information. + + +.. _c-api-tools: + +Recommended third party tools +============================= + +The following third party tools offer both simpler and more sophisticated +approaches to creating C, C++ and Rust extensions for Python: + +* `Cython `_ +* `cffi `_ +* `HPy `_ +* `nanobind `_ (C++) +* `Numba `_ +* `pybind11 `_ (C++) +* `PyO3 `_ (Rust) +* `SWIG `_ + +Using tools such as these can help avoid writing code that is tightly bound to +a particular version of CPython, avoid reference counting errors, and focus +more on your own code than on using the CPython API. In general, new versions +of Python can be supported by updating the tool, and your code will often use +newer and more efficient APIs automatically. Some tools also support compiling +for other implementations of Python from a single set of sources. + +These projects are not supported by the same people who maintain Python, and +issues need to be raised with the projects directly. Remember to check that the +project is still maintained and supported, as the list above may become +outdated. + +.. seealso:: + + `Python Packaging User Guide: Binary Extensions `_ + The Python Packaging User Guide not only covers several available + tools that simplify the creation of binary extensions, but also + discusses the various reasons why creating an extension module may be + desirable in the first place. diff --git a/Doc/c-api/lifecycle.dot b/Doc/c-api/lifecycle.dot new file mode 100644 index 00000000000000..dca9f87e9e0aca --- /dev/null +++ b/Doc/c-api/lifecycle.dot @@ -0,0 +1,156 @@ +digraph "Life Events" { + graph [ + fontnames="svg" + fontsize=12.0 + id="life_events_graph" + layout="dot" + margin="0,0" + ranksep=0.25 + stylesheet="lifecycle.dot.css" + ] + node [ + fontname="Courier" + fontsize=12.0 + ] + edge [ + fontname="Times-Italic" + fontsize=12.0 + ] + + "start" [fontname="Times-Italic" shape=plain label=< start > style=invis] + { + rank="same" + "tp_new" [href="typeobj.html#c.PyTypeObject.tp_new" target="_top"] + "tp_alloc" [href="typeobj.html#c.PyTypeObject.tp_alloc" target="_top"] + } + "tp_init" [href="typeobj.html#c.PyTypeObject.tp_init" target="_top"] + "reachable" [fontname="Times-Italic" shape=box] + "tp_traverse" [ + href="typeobj.html#c.PyTypeObject.tp_traverse" + ordering="in" + target="_top" + ] + "finalized?" [ + fontname="Times-Italic" + label=finalized?> + ordering="in" + shape=diamond + tooltip="marked as finalized?" + ] + "tp_finalize" [ + href="typeobj.html#c.PyTypeObject.tp_finalize" + ordering="in" + target="_top" + ] + "tp_clear" [href="typeobj.html#c.PyTypeObject.tp_clear" target="_top"] + "uncollectable" [ + fontname="Times-Italic" + label=(leaked)> + shape=box + tooltip="uncollectable (leaked)" + ] + "tp_dealloc" [ + href="typeobj.html#c.PyTypeObject.tp_dealloc" + ordering="in" + target="_top" + ] + "tp_free" [href="typeobj.html#c.PyTypeObject.tp_free" target="_top"] + + "start" -> "tp_new" [ + label=< type call > + ] + "tp_new" -> "tp_alloc" [ + label=< direct call > arrowhead=empty + labeltooltip="tp_new to tp_alloc: direct call" + tooltip="tp_new to tp_alloc: direct call" + ] + "tp_new" -> "tp_init" [tooltip="tp_new to tp_init"] + "tp_init" -> "reachable" [tooltip="tp_init to reachable"] + "reachable" -> "tp_traverse" [ + dir="back" + label=< not in a
cyclic
isolate > + labeltooltip="tp_traverse to reachable: not in a cyclic isolate" + tooltip="tp_traverse to reachable: not in a cyclic isolate" + ] + "reachable" -> "tp_traverse" [ + label=< periodic
cyclic isolate
detection > + labeltooltip="reachable to tp_traverse: periodic cyclic isolate detection" + tooltip="reachable to tp_traverse: periodic cyclic isolate detection" + ] + "reachable" -> "tp_init" [tooltip="reachable to tp_init"] + "reachable" -> "tp_finalize" [ + dir="back" + label=< resurrected
(maybe remove
finalized mark) > + labeltooltip="tp_finalize to reachable: resurrected (maybe remove finalized mark)" + tooltip="tp_finalize to reachable: resurrected (maybe remove finalized mark)" + ] + "tp_traverse" -> "finalized?" [ + label=< cyclic
isolate > + labeltooltip="tp_traverse to finalized?: cyclic isolate" + tooltip="tp_traverse to finalized?: cyclic isolate" + ] + "reachable" -> "finalized?" [ + label=< no refs > + labeltooltip="reachable to finalized?: no refs" + tooltip="reachable to finalized?: no refs" + ] + "finalized?" -> "tp_finalize" [ + label=< no (mark
as finalized) > + labeltooltip="finalized? to tp_finalize: no (mark as finalized)" + tooltip="finalized? to tp_finalize: no (mark as finalized)" + ] + "finalized?" -> "tp_clear" [ + label=< yes > + labeltooltip="finalized? to tp_clear: yes" + tooltip="finalized? to tp_clear: yes" + ] + "tp_finalize" -> "tp_clear" [ + label=< no refs or
cyclic isolate > + labeltooltip="tp_finalize to tp_clear: no refs or cyclic isolate" + tooltip="tp_finalize to tp_clear: no refs or cyclic isolate" + ] + "tp_finalize" -> "tp_dealloc" [ + arrowtail=empty + dir="back" + href="lifecycle.html#c.PyObject_CallFinalizerFromDealloc" + style=dashed + label=< recommended
call (see
explanation)> + labeltooltip="tp_dealloc to tp_finalize: recommended call (see explanation)" + target="_top" + tooltip="tp_dealloc to tp_finalize: recommended call (see explanation)" + ] + "tp_finalize" -> "tp_dealloc" [ + label=< no refs > + labeltooltip="tp_finalize to tp_dealloc: no refs" + tooltip="tp_finalize to tp_dealloc: no refs" + ] + "tp_clear" -> "tp_dealloc" [ + label=< no refs > + labeltooltip="tp_clear to tp_dealloc: no refs" + tooltip="tp_clear to tp_dealloc: no refs" + ] + "tp_clear" -> "uncollectable" [ + label=< cyclic
isolate > + labeltooltip="tp_clear to uncollectable: cyclic isolate" + tooltip="tp_clear to uncollectable: cyclic isolate" + ] + "uncollectable" -> "tp_dealloc" [ + style=invis + tooltip="uncollectable to tp_dealloc" + ] + "reachable" -> "uncollectable" [ + label=< cyclic
isolate
(no GC
support) > + labeltooltip="reachable to uncollectable: cyclic isolate (no GC support)" + tooltip="reachable to uncollectable: cyclic isolate (no GC support)" + ] + "reachable" -> "tp_dealloc" [ + label=< no refs> + labeltooltip="reachable to tp_dealloc: no refs" + ] + "tp_dealloc" -> "tp_free" [ + arrowhead=empty + label=< direct call > + labeltooltip="tp_dealloc to tp_free: direct call" + tooltip="tp_dealloc to tp_free: direct call" + ] +} diff --git a/Doc/c-api/lifecycle.dot.css b/Doc/c-api/lifecycle.dot.css new file mode 100644 index 00000000000000..3abf95b74da6ba --- /dev/null +++ b/Doc/c-api/lifecycle.dot.css @@ -0,0 +1,21 @@ +#life_events_graph { + --svg-fgcolor: currentcolor; + --svg-bgcolor: transparent; +} +#life_events_graph a { + color: inherit; +} +#life_events_graph [stroke="black"] { + stroke: var(--svg-fgcolor); +} +#life_events_graph text, +#life_events_graph [fill="black"] { + fill: var(--svg-fgcolor); +} +#life_events_graph [fill="white"] { + fill: var(--svg-bgcolor); +} +#life_events_graph [fill="none"] { + /* On links, setting fill will make the entire shape clickable */ + fill: var(--svg-bgcolor); +} diff --git a/Doc/c-api/lifecycle.dot.pdf b/Doc/c-api/lifecycle.dot.pdf new file mode 100644 index 00000000000000..ed5b5039c83e2c Binary files /dev/null and b/Doc/c-api/lifecycle.dot.pdf differ diff --git a/Doc/c-api/lifecycle.dot.svg b/Doc/c-api/lifecycle.dot.svg new file mode 100644 index 00000000000000..7ace27dfcba113 --- /dev/null +++ b/Doc/c-api/lifecycle.dot.svg @@ -0,0 +1,374 @@ + + + + + + + +Life Events + + + + +tp_new + + +tp_new + + + + + +start->tp_new + + + + + + +    type call   + + + + + +tp_alloc + + +tp_alloc + + + + + +tp_new->tp_alloc + + + + + + +  direct call   + + + + + +tp_init + + +tp_init + + + + + +tp_new->tp_init + + + + + + + + +reachable + +reachable + + + +tp_init->reachable + + + + + + + + +reachable->tp_init + + + + + + + + +tp_traverse + + +tp_traverse + + + + + +reachable->tp_traverse + + + + + + +  not in a   +  cyclic   +  isolate   + + + + + +reachable->tp_traverse + + + + + + +  periodic   +  cyclic isolate    +  detection   + + + + + +finalized? + + +marked as +finalized? + + + + + +reachable->finalized? + + + + + + +  no refs   + + + + + +tp_finalize + + +tp_finalize + + + + + +reachable->tp_finalize + + + + + + +  resurrected   +  (maybe remove   +  finalized mark)   + + + + + +uncollectable + + +uncollectable +(leaked) + + + + + +reachable->uncollectable + + + + + + +  cyclic   +  isolate   +  (no GC   +  support)   + + + + + +tp_dealloc + + +tp_dealloc + + + + + +reachable->tp_dealloc + + + +  no refs + + + + + +tp_traverse->finalized? + + + + + + +  cyclic   +  isolate   + + + + + +finalized?->tp_finalize + + + + + + +  no (mark   +  as finalized)   + + + + + +tp_clear + + +tp_clear + + + + + +finalized?->tp_clear + + + + + + +  yes   + + + + + +tp_finalize->tp_clear + + + + + + +  no refs or    +  cyclic isolate   + + + + + +tp_finalize->tp_dealloc + + + + + + +  recommended +  call (see +  explanation) + + + + + +tp_finalize->tp_dealloc + + + + + + +   no refs   + + + + + +tp_clear->uncollectable + + + + + + +  cyclic   +  isolate   + + + + + +tp_clear->tp_dealloc + + + + + + +  no refs   + + + + + + +tp_free + + +tp_free + + + + + +tp_dealloc->tp_free + + + + + + +    direct call   + + + + + diff --git a/Doc/c-api/lifecycle.rst b/Doc/c-api/lifecycle.rst new file mode 100644 index 00000000000000..5a170862a26f44 --- /dev/null +++ b/Doc/c-api/lifecycle.rst @@ -0,0 +1,271 @@ +.. highlight:: c + +.. _life-cycle: + +Object Life Cycle +================= + +This section explains how a type's slots relate to each other throughout the +life of an object. It is not intended to be a complete canonical reference for +the slots; instead, refer to the slot-specific documentation in +:ref:`type-structs` for details about a particular slot. + + +Life Events +----------- + +The figure below illustrates the order of events that can occur throughout an +object's life. An arrow from *A* to *B* indicates that event *B* can occur +after event *A* has occurred, with the arrow's label indicating the condition +that must be true for *B* to occur after *A*. + +.. only:: html and not epub + + .. raw:: html + + + + .. raw:: html + :file: lifecycle.dot.svg + + .. raw:: html + + + +.. only:: epub or not (html or latex) + + .. image:: lifecycle.dot.svg + :align: center + :class: invert-in-dark-mode + :alt: Diagram showing events in an object's life. Explained in detail below. + +.. only:: latex + + .. image:: lifecycle.dot.pdf + :align: center + :class: invert-in-dark-mode + :alt: Diagram showing events in an object's life. Explained in detail below. + +.. container:: + :name: life-events-graph-description + + Explanation: + + * When a new object is constructed by calling its type: + + #. :c:member:`~PyTypeObject.tp_new` is called to create a new object. + #. :c:member:`~PyTypeObject.tp_alloc` is directly called by + :c:member:`~PyTypeObject.tp_new` to allocate the memory for the new + object. + #. :c:member:`~PyTypeObject.tp_init` initializes the newly created object. + :c:member:`!tp_init` can be called again to re-initialize an object, if + desired. The :c:member:`!tp_init` call can also be skipped entirely, + for example by Python code calling :py:meth:`~object.__new__`. + + * After :c:member:`!tp_init` completes, the object is ready to use. + * Some time after the last reference to an object is removed: + + #. If an object is not marked as *finalized*, it might be finalized by + marking it as *finalized* and calling its + :c:member:`~PyTypeObject.tp_finalize` function. Python does + *not* finalize an object when the last reference to it is deleted; use + :c:func:`PyObject_CallFinalizerFromDealloc` to ensure that + :c:member:`~PyTypeObject.tp_finalize` is always called. + #. If the object is marked as finalized, + :c:member:`~PyTypeObject.tp_clear` might be called by the garbage collector + to clear references held by the object. It is *not* called when the + object's reference count reaches zero. + #. :c:member:`~PyTypeObject.tp_dealloc` is called to destroy the object. + To avoid code duplication, :c:member:`~PyTypeObject.tp_dealloc` typically + calls into :c:member:`~PyTypeObject.tp_clear` to free up the object's + references. + #. When :c:member:`~PyTypeObject.tp_dealloc` finishes object destruction, + it directly calls :c:member:`~PyTypeObject.tp_free` (usually set to + :c:func:`PyObject_Free` or :c:func:`PyObject_GC_Del` automatically as + appropriate for the type) to deallocate the memory. + + * The :c:member:`~PyTypeObject.tp_finalize` function is permitted to add a + reference to the object if desired. If it does, the object is + *resurrected*, preventing its pending destruction. (Only + :c:member:`!tp_finalize` is allowed to resurrect an object; + :c:member:`~PyTypeObject.tp_clear` and + :c:member:`~PyTypeObject.tp_dealloc` cannot without calling into + :c:member:`!tp_finalize`.) Resurrecting an object may + or may not cause the object's *finalized* mark to be removed. Currently, + Python does not remove the *finalized* mark from a resurrected object if + it supports garbage collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` + flag is set) but does remove the mark if the object does not support + garbage collection; either or both of these behaviors may change in the + future. + * :c:member:`~PyTypeObject.tp_dealloc` can optionally call + :c:member:`~PyTypeObject.tp_finalize` via + :c:func:`PyObject_CallFinalizerFromDealloc` if it wishes to reuse that + code to help with object destruction. This is recommended because it + guarantees that :c:member:`!tp_finalize` is always called before + destruction. See the :c:member:`~PyTypeObject.tp_dealloc` documentation + for example code. + * If the object is a member of a :term:`cyclic isolate` and either + :c:member:`~PyTypeObject.tp_clear` fails to break the reference cycle or + the cyclic isolate is not detected (perhaps :func:`gc.disable` was called, + or the :c:macro:`Py_TPFLAGS_HAVE_GC` flag was erroneously omitted in one + of the involved types), the objects remain indefinitely uncollectable + (they "leak"). See :data:`gc.garbage`. + + If the object is marked as supporting garbage collection (the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`), the following events are also possible: + + * The garbage collector occasionally calls + :c:member:`~PyTypeObject.tp_traverse` to identify :term:`cyclic isolates + `. + * When the garbage collector discovers a :term:`cyclic isolate`, it + finalizes one of the objects in the group by marking it as *finalized* and + calling its :c:member:`~PyTypeObject.tp_finalize` function, if it has one. + This repeats until the cyclic isolate doesn't exist or all of the objects + have been finalized. + * :c:member:`~PyTypeObject.tp_finalize` is permitted to resurrect the object + by adding a reference from outside the :term:`cyclic isolate`. The new + reference causes the group of objects to no longer form a cyclic isolate + (the reference cycle may still exist, but if it does the objects are no + longer isolated). + * When the garbage collector discovers a :term:`cyclic isolate` and all of + the objects in the group have already been marked as *finalized*, the + garbage collector clears one or more of the uncleared objects in the group + (possibly concurrently) by calling each's + :c:member:`~PyTypeObject.tp_clear` function. This repeats as long as the + cyclic isolate still exists and not all of the objects have been cleared. + + +Cyclic Isolate Destruction +-------------------------- + +Listed below are the stages of life of a hypothetical :term:`cyclic isolate` +that continues to exist after each member object is finalized or cleared. It +is a memory leak if a cyclic isolate progresses through all of these stages; it should +vanish once all objects are cleared, if not sooner. A cyclic isolate can +vanish either because the reference cycle is broken or because the objects are +no longer isolated due to finalizer resurrection (see +:c:member:`~PyTypeObject.tp_finalize`). + +0. **Reachable** (not yet a cyclic isolate): All objects are in their normal, + reachable state. A reference cycle could exist, but an external reference + means the objects are not yet isolated. +#. **Unreachable but consistent:** The final reference from outside the cyclic + group of objects has been removed, causing the objects to become isolated + (thus a cyclic isolate is born). None of the group's objects have been + finalized or cleared yet. The cyclic isolate remains at this stage until + some future run of the garbage collector (not necessarily the next run + because the next run might not scan every object). +#. **Mix of finalized and not finalized:** Objects in a cyclic isolate are + finalized one at a time, which means that there is a period of time when the + cyclic isolate is composed of a mix of finalized and non-finalized objects. + Finalization order is unspecified, so it can appear random. A finalized + object must behave in a sane manner when non-finalized objects interact with + it, and a non-finalized object must be able to tolerate the finalization of + an arbitrary subset of its referents. +#. **All finalized:** All objects in a cyclic isolate are finalized before any + of them are cleared. +#. **Mix of finalized and cleared:** The objects can be cleared serially or + concurrently (but with the :term:`GIL` held); either way, some will finish + before others. A finalized object must be able to tolerate the clearing of + a subset of its referents. :pep:`442` calls this stage "cyclic trash". +#. **Leaked:** If a cyclic isolate still exists after all objects in the group + have been finalized and cleared, then the objects remain indefinitely + uncollectable (see :data:`gc.garbage`). It is a bug if a cyclic isolate + reaches this stage---it means the :c:member:`~PyTypeObject.tp_clear` methods + of the participating objects have failed to break the reference cycle as + required. + +If :c:member:`~PyTypeObject.tp_clear` did not exist, then Python would have no +way to safely break a reference cycle. Simply destroying an object in a cyclic +isolate would result in a dangling pointer, triggering undefined behavior when +an object referencing the destroyed object is itself destroyed. The clearing +step makes object destruction a two-phase process: first +:c:member:`~PyTypeObject.tp_clear` is called to partially destroy the objects +enough to detangle them from each other, then +:c:member:`~PyTypeObject.tp_dealloc` is called to complete the destruction. + +Unlike clearing, finalization is not a phase of destruction. A finalized +object must still behave properly by continuing to fulfill its design +contracts. An object's finalizer is allowed to execute arbitrary Python code, +and is even allowed to prevent the impending destruction by adding a reference. +The finalizer is only related to destruction by call order---if it runs, it runs +before destruction, which starts with :c:member:`~PyTypeObject.tp_clear` (if +called) and concludes with :c:member:`~PyTypeObject.tp_dealloc`. + +The finalization step is not necessary to safely reclaim the objects in a +cyclic isolate, but its existence makes it easier to design types that behave +in a sane manner when objects are cleared. Clearing an object might +necessarily leave it in a broken, partially destroyed state---it might be +unsafe to call any of the cleared object's methods or access any of its +attributes. With finalization, only finalized objects can possibly interact +with cleared objects; non-finalized objects are guaranteed to interact with +only non-cleared (but potentially finalized) objects. + +To summarize the possible interactions: + +* A non-finalized object might have references to or from non-finalized and + finalized objects, but not to or from cleared objects. +* A finalized object might have references to or from non-finalized, finalized, + and cleared objects. +* A cleared object might have references to or from finalized and cleared + objects, but not to or from non-finalized objects. + +Without any reference cycles, an object can be simply destroyed once its last +reference is deleted; the finalization and clearing steps are not necessary to +safely reclaim unused objects. However, it can be useful to automatically call +:c:member:`~PyTypeObject.tp_finalize` and :c:member:`~PyTypeObject.tp_clear` +before destruction anyway because type design is simplified when all objects +always experience the same series of events regardless of whether they +participated in a cyclic isolate. Python currently only calls +:c:member:`~PyTypeObject.tp_finalize` and :c:member:`~PyTypeObject.tp_clear` as +needed to destroy a cyclic isolate; this may change in a future version. + + +Functions +--------- + +To allocate and free memory, see :ref:`allocating-objects`. + + +.. c:function:: void PyObject_CallFinalizer(PyObject *op) + + Finalizes the object as described in :c:member:`~PyTypeObject.tp_finalize`. + Call this function (or :c:func:`PyObject_CallFinalizerFromDealloc`) instead + of calling :c:member:`~PyTypeObject.tp_finalize` directly because this + function may deduplicate multiple calls to :c:member:`!tp_finalize`. + Currently, calls are only deduplicated if the type supports garbage + collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set); this may + change in the future. + + +.. c:function:: int PyObject_CallFinalizerFromDealloc(PyObject *op) + + Same as :c:func:`PyObject_CallFinalizer` but meant to be called at the + beginning of the object's destructor (:c:member:`~PyTypeObject.tp_dealloc`). + There must not be any references to the object. If the object's finalizer + resurrects the object, this function returns -1; no further destruction + should happen. Otherwise, this function returns 0 and destruction can + continue normally. + + .. seealso:: + + :c:member:`~PyTypeObject.tp_dealloc` for example code. diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 25d9e62e387279..2d0bda76697e81 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -439,7 +439,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. All *n_bytes* of the buffer are written: large buffers are padded with zeroes. - If the returned value is greater than than *n_bytes*, the value was + If the returned value is greater than *n_bytes*, the value was truncated: as many of the lowest bits of the value as could fit are written, and the higher bits are ignored. This matches the typical behavior of a C-style downcast. diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index 64ae35daa703b8..61fa49f8681cce 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -376,6 +376,24 @@ The :ref:`default object allocator ` uses the If *p* is ``NULL``, no operation is performed. + Do not call this directly to free an object's memory; call the type's + :c:member:`~PyTypeObject.tp_free` slot instead. + + Do not use this for memory allocated by :c:macro:`PyObject_GC_New` or + :c:macro:`PyObject_GC_NewVar`; use :c:func:`PyObject_GC_Del` instead. + + .. seealso:: + + * :c:func:`PyObject_GC_Del` is the equivalent of this function for memory + allocated by types that support garbage collection. + * :c:func:`PyObject_Malloc` + * :c:func:`PyObject_Realloc` + * :c:func:`PyObject_Calloc` + * :c:macro:`PyObject_New` + * :c:macro:`PyObject_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_free` + .. _default-memory-allocators: diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst index f7f4d37d4c721f..c8edcecc5b419f 100644 --- a/Doc/c-api/module.rst +++ b/Doc/c-api/module.rst @@ -127,25 +127,36 @@ Module Objects unencodable filenames, use :c:func:`PyModule_GetFilenameObject` instead. -.. _initializing-modules: +.. _pymoduledef: -Initializing C modules -^^^^^^^^^^^^^^^^^^^^^^ +Module definitions +------------------ -Modules objects are usually created from extension modules (shared libraries -which export an initialization function), or compiled-in modules -(where the initialization function is added using :c:func:`PyImport_AppendInittab`). -See :ref:`building` or :ref:`extending-with-embedding` for details. +The functions in the previous section work on any module object, including +modules imported from Python code. -The initialization function can either pass a module definition instance -to :c:func:`PyModule_Create`, and return the resulting module object, -or request "multi-phase initialization" by returning the definition struct itself. +Modules defined using the C API typically use a *module definition*, +:c:type:`PyModuleDef` -- a statically allocated, constant “description" of +how a module should be created. + +The definition is usually used to define an extension's “main” module object +(see :ref:`extension-modules` for details). +It is also used to +:ref:`create extension modules dynamically `. + +Unlike :c:func:`PyModule_New`, the definition allows management of +*module state* -- a piece of memory that is allocated and cleared together +with the module object. +Unlike the module's Python attributes, Python code cannot replace or delete +data stored in module state. .. c:type:: PyModuleDef The module definition struct, which holds all information needed to create - a module object. There is usually only one statically initialized variable - of this type for each module. + a module object. + This structure must be statically allocated (or be otherwise guaranteed + to be valid while any modules created from it exist). + Usually, there is only one variable of this type for each extension module. .. c:member:: PyModuleDef_Base m_base @@ -170,13 +181,15 @@ or request "multi-phase initialization" by returning the definition struct itsel and freed when the module object is deallocated, after the :c:member:`~PyModuleDef.m_free` function has been called, if present. - Setting ``m_size`` to ``-1`` means that the module does not support - sub-interpreters, because it has global state. - Setting it to a non-negative value means that the module can be re-initialized and specifies the additional amount of memory it requires - for its state. Non-negative ``m_size`` is required for multi-phase - initialization. + for its state. + + Setting ``m_size`` to ``-1`` means that the module does not support + sub-interpreters, because it has global state. + Negative ``m_size`` is only allowed when using + :ref:`legacy single-phase initialization ` + or when :ref:`creating modules dynamically `. See :PEP:`3121` for more details. @@ -189,7 +202,7 @@ or request "multi-phase initialization" by returning the definition struct itsel An array of slot definitions for multi-phase initialization, terminated by a ``{0, NULL}`` entry. - When using single-phase initialization, *m_slots* must be ``NULL``. + When using legacy single-phase initialization, *m_slots* must be ``NULL``. .. versionchanged:: 3.5 @@ -249,78 +262,9 @@ or request "multi-phase initialization" by returning the definition struct itsel .. versionchanged:: 3.9 No longer called before the module state is allocated. -Single-phase initialization -........................... - -The module initialization function may create and return the module object -directly. This is referred to as "single-phase initialization", and uses one -of the following two module creation functions: - -.. c:function:: PyObject* PyModule_Create(PyModuleDef *def) - - Create a new module object, given the definition in *def*. This behaves - like :c:func:`PyModule_Create2` with *module_api_version* set to - :c:macro:`PYTHON_API_VERSION`. - -.. c:function:: PyObject* PyModule_Create2(PyModuleDef *def, int module_api_version) - - Create a new module object, given the definition in *def*, assuming the - API version *module_api_version*. If that version does not match the version - of the running interpreter, a :exc:`RuntimeWarning` is emitted. - - Return ``NULL`` with an exception set on error. - - .. note:: - - Most uses of this function should be using :c:func:`PyModule_Create` - instead; only use this if you are sure you need it. - -Before it is returned from in the initialization function, the resulting module -object is typically populated using functions like :c:func:`PyModule_AddObjectRef`. - -.. _multi-phase-initialization: - -Multi-phase initialization -.......................... - -An alternate way to specify extensions is to request "multi-phase initialization". -Extension modules created this way behave more like Python modules: the -initialization is split between the *creation phase*, when the module object -is created, and the *execution phase*, when it is populated. -The distinction is similar to the :py:meth:`!__new__` and :py:meth:`!__init__` methods -of classes. - -Unlike modules created using single-phase initialization, these modules are not -singletons: if the *sys.modules* entry is removed and the module is re-imported, -a new module object is created, and the old module is subject to normal garbage -collection -- as with Python modules. -By default, multiple modules created from the same definition should be -independent: changes to one should not affect the others. -This means that all state should be specific to the module object (using e.g. -using :c:func:`PyModule_GetState`), or its contents (such as the module's -:attr:`~object.__dict__` or individual classes created with :c:func:`PyType_FromSpec`). - -All modules created using multi-phase initialization are expected to support -:ref:`sub-interpreters `. Making sure multiple modules -are independent is typically enough to achieve this. - -To request multi-phase initialization, the initialization function -(PyInit_modulename) returns a :c:type:`PyModuleDef` instance with non-empty -:c:member:`~PyModuleDef.m_slots`. Before it is returned, the ``PyModuleDef`` -instance must be initialized with the following function: - -.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) - - Ensures a module definition is a properly initialized Python object that - correctly reports its type and reference count. - - Returns *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. - - .. versionadded:: 3.5 - -The *m_slots* member of the module definition must point to an array of -``PyModuleDef_Slot`` structures: +Module slots +............ .. c:type:: PyModuleDef_Slot @@ -334,8 +278,6 @@ The *m_slots* member of the module definition must point to an array of .. versionadded:: 3.5 -The *m_slots* array must be terminated by a slot with id 0. - The available slot types are: .. c:macro:: Py_mod_create @@ -446,21 +388,48 @@ The available slot types are: .. versionadded:: 3.13 -See :PEP:`489` for more details on multi-phase initialization. -Low-level module creation functions -................................... +.. _moduledef-dynamic: -The following functions are called under the hood when using multi-phase -initialization. They can be used directly, for example when creating module -objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and -``PyModule_ExecDef`` must be called to fully initialize a module. +Creating extension modules dynamically +-------------------------------------- + +The following functions may be used to create a module outside of an +extension's :ref:`initialization function `. +They are also used in +:ref:`single-phase initialization `. + +.. c:function:: PyObject* PyModule_Create(PyModuleDef *def) + + Create a new module object, given the definition in *def*. + This is a macro that calls :c:func:`PyModule_Create2` with + *module_api_version* set to :c:macro:`PYTHON_API_VERSION`, or + to :c:macro:`PYTHON_ABI_VERSION` if using the + :ref:`limited API `. + +.. c:function:: PyObject* PyModule_Create2(PyModuleDef *def, int module_api_version) + + Create a new module object, given the definition in *def*, assuming the + API version *module_api_version*. If that version does not match the version + of the running interpreter, a :exc:`RuntimeWarning` is emitted. + + Return ``NULL`` with an exception set on error. + + This function does not support slots. + The :c:member:`~PyModuleDef.m_slots` member of *def* must be ``NULL``. + + + .. note:: + + Most uses of this function should be using :c:func:`PyModule_Create` + instead; only use this if you are sure you need it. .. c:function:: PyObject * PyModule_FromDefAndSpec(PyModuleDef *def, PyObject *spec) - Create a new module object, given the definition in *def* and the - ModuleSpec *spec*. This behaves like :c:func:`PyModule_FromDefAndSpec2` - with *module_api_version* set to :c:macro:`PYTHON_API_VERSION`. + This macro calls :c:func:`PyModule_FromDefAndSpec2` with + *module_api_version* set to :c:macro:`PYTHON_API_VERSION`, or + to :c:macro:`PYTHON_ABI_VERSION` if using the + :ref:`limited API `. .. versionadded:: 3.5 @@ -473,6 +442,10 @@ objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and Return ``NULL`` with an exception set on error. + Note that this does not process execution slots (:c:data:`Py_mod_exec`). + Both ``PyModule_FromDefAndSpec`` and ``PyModule_ExecDef`` must be called + to fully initialize a module. + .. note:: Most uses of this function should be using :c:func:`PyModule_FromDefAndSpec` @@ -486,35 +459,29 @@ objects dynamically. Note that both ``PyModule_FromDefAndSpec`` and .. versionadded:: 3.5 -.. c:function:: int PyModule_SetDocString(PyObject *module, const char *docstring) +.. c:macro:: PYTHON_API_VERSION - Set the docstring for *module* to *docstring*. - This function is called automatically when creating a module from - ``PyModuleDef``, using either ``PyModule_Create`` or - ``PyModule_FromDefAndSpec``. + The C API version. Defined for backwards compatibility. - .. versionadded:: 3.5 + Currently, this constant is not updated in new Python versions, and is not + useful for versioning. This may change in the future. -.. c:function:: int PyModule_AddFunctions(PyObject *module, PyMethodDef *functions) +.. c:macro:: PYTHON_ABI_VERSION - Add the functions from the ``NULL`` terminated *functions* array to *module*. - Refer to the :c:type:`PyMethodDef` documentation for details on individual - entries (due to the lack of a shared module namespace, module level - "functions" implemented in C typically receive the module as their first - parameter, making them similar to instance methods on Python classes). - This function is called automatically when creating a module from - ``PyModuleDef``, using either ``PyModule_Create`` or - ``PyModule_FromDefAndSpec``. + Defined as ``3`` for backwards compatibility. + + Currently, this constant is not updated in new Python versions, and is not + useful for versioning. This may change in the future. - .. versionadded:: 3.5 Support functions -................. +----------------- -The module initialization function (if using single phase initialization) or -a function called from a module execution slot (if using multi-phase -initialization), can use the following functions to help initialize the module -state: +The following functions are provided to help initialize a module +state. +They are intended for a module's execution slots (:c:data:`Py_mod_exec`), +the initialization function for legacy :ref:`single-phase initialization `, +or code that creates modules dynamically. .. c:function:: int PyModule_AddObjectRef(PyObject *module, const char *name, PyObject *value) @@ -663,12 +630,39 @@ state: .. versionadded:: 3.9 +.. c:function:: int PyModule_AddFunctions(PyObject *module, PyMethodDef *functions) + + Add the functions from the ``NULL`` terminated *functions* array to *module*. + Refer to the :c:type:`PyMethodDef` documentation for details on individual + entries (due to the lack of a shared module namespace, module level + "functions" implemented in C typically receive the module as their first + parameter, making them similar to instance methods on Python classes). + + This function is called automatically when creating a module from + ``PyModuleDef`` (such as when using :ref:`multi-phase-initialization`, + ``PyModule_Create``, or ``PyModule_FromDefAndSpec``). + Some module authors may prefer defining functions in multiple + :c:type:`PyMethodDef` arrays; in that case they should call this function + directly. + + .. versionadded:: 3.5 + +.. c:function:: int PyModule_SetDocString(PyObject *module, const char *docstring) + + Set the docstring for *module* to *docstring*. + This function is called automatically when creating a module from + ``PyModuleDef`` (such as when using :ref:`multi-phase-initialization`, + ``PyModule_Create``, or ``PyModule_FromDefAndSpec``). + + .. versionadded:: 3.5 + .. c:function:: int PyUnstable_Module_SetGIL(PyObject *module, void *gil) Indicate that *module* does or does not support running without the global interpreter lock (GIL), using one of the values from :c:macro:`Py_mod_gil`. It must be called during *module*'s initialization - function. If this function is not called during module initialization, the + function when using :ref:`single-phase-initialization`. + If this function is not called during module initialization, the import machinery assumes the module does not support running without the GIL. This function is only available in Python builds configured with :option:`--disable-gil`. @@ -677,10 +671,11 @@ state: .. versionadded:: 3.13 -Module lookup -^^^^^^^^^^^^^ +Module lookup (single-phase initialization) +........................................... -Single-phase initialization creates singleton modules that can be looked up +The legacy :ref:`single-phase initialization ` +initialization scheme creates singleton modules that can be looked up in the context of the current interpreter. This allows the module object to be retrieved later with only a reference to the module definition. @@ -701,7 +696,8 @@ since multiple such modules can be created from a single definition. Only effective on modules created using single-phase initialization. - Python calls ``PyState_AddModule`` automatically after importing a module, + Python calls ``PyState_AddModule`` automatically after importing a module + that uses :ref:`single-phase initialization `, so it is unnecessary (but harmless) to call it from module initialization code. An explicit call is needed only if the module's own init code subsequently calls ``PyState_FindModule``. @@ -709,6 +705,9 @@ since multiple such modules can be created from a single definition. mechanisms (either by calling it directly, or by referring to its implementation for details of the required state updates). + If a module was attached previously using the same *def*, it is replaced + by the new *module*. + The caller must have an :term:`attached thread state`. Return ``-1`` with an exception set on error, ``0`` on success. diff --git a/Doc/c-api/objimpl.rst b/Doc/c-api/objimpl.rst index 8bd8c107c98bdf..83de4248039949 100644 --- a/Doc/c-api/objimpl.rst +++ b/Doc/c-api/objimpl.rst @@ -12,6 +12,7 @@ object types. .. toctree:: allocation.rst + lifecycle.rst structures.rst typeobj.rst gcsupport.rst diff --git a/Doc/c-api/refcounting.rst b/Doc/c-api/refcounting.rst index b23f016f9b0a06..57a0728d4e9af4 100644 --- a/Doc/c-api/refcounting.rst +++ b/Doc/c-api/refcounting.rst @@ -210,7 +210,7 @@ of Python objects. Py_SETREF(dst, src); - That arranges to set *dst* to *src* _before_ releasing the reference + That arranges to set *dst* to *src* *before* releasing the reference to the old value of *dst*, so that any code triggered as a side-effect of *dst* getting torn down no longer believes *dst* points to a valid object. diff --git a/Doc/c-api/stable.rst b/Doc/c-api/stable.rst index 124e58cf950b7a..9b65e0b8d23d93 100644 --- a/Doc/c-api/stable.rst +++ b/Doc/c-api/stable.rst @@ -51,6 +51,7 @@ It is generally intended for specialized, low-level tools like debuggers. Projects that use this API are expected to follow CPython development and spend extra effort adjusting to changes. +.. _stable-application-binary-interface: Stable Application Binary Interface =================================== diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 987bc167c68d6c..58dd915e04f619 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -28,18 +28,52 @@ under :ref:`reference counting `. object. In a normal "release" build, it contains only the object's reference count and a pointer to the corresponding type object. Nothing is actually declared to be a :c:type:`PyObject`, but every pointer - to a Python object can be cast to a :c:expr:`PyObject*`. Access to the - members must be done by using the macros :c:macro:`Py_REFCNT` and - :c:macro:`Py_TYPE`. + to a Python object can be cast to a :c:expr:`PyObject*`. + + The members must not be accessed directly; instead use macros such as + :c:macro:`Py_REFCNT` and :c:macro:`Py_TYPE`. + + .. c:member:: Py_ssize_t ob_refcnt + + The object's reference count, as returned by :c:macro:`Py_REFCNT`. + Do not use this field directly; instead use functions and macros such as + :c:macro:`!Py_REFCNT`, :c:func:`Py_INCREF` and :c:func:`Py_DecRef`. + + The field type may be different from ``Py_ssize_t``, depending on + build configuration and platform. + + .. c:member:: PyTypeObject* ob_type + + The object's type. + Do not use this field directly; use :c:macro:`Py_TYPE` and + :c:func:`Py_SET_TYPE` instead. .. c:type:: PyVarObject - This is an extension of :c:type:`PyObject` that adds the :c:member:`~PyVarObject.ob_size` - field. This is only used for objects that have some notion of *length*. - This type does not often appear in the Python/C API. - Access to the members must be done by using the macros - :c:macro:`Py_REFCNT`, :c:macro:`Py_TYPE`, and :c:macro:`Py_SIZE`. + An extension of :c:type:`PyObject` that adds the + :c:member:`~PyVarObject.ob_size` field. + This is intended for objects that have some notion of *length*. + + As with :c:type:`!PyObject`, the members must not be accessed directly; + instead use macros such as :c:macro:`Py_SIZE`, :c:macro:`Py_REFCNT` and + :c:macro:`Py_TYPE`. + + .. c:member:: Py_ssize_t ob_size + + A size field, whose contents should be considered an object's internal + implementation detail. + + Do not use this field directly; use :c:macro:`Py_SIZE` instead. + + Object creation functions such as :c:func:`PyObject_NewVar` will + generally set this field to the requested size (number of items). + After creation, arbitrary values can be stored in :c:member:`!ob_size` + using :c:macro:`Py_SET_SIZE`. + + To get an object's publicly exposed length, as returned by + the Python function :py:func:`len`, use :c:func:`PyObject_Length` + instead. .. c:macro:: PyObject_HEAD @@ -103,9 +137,8 @@ under :ref:`reference counting `. Get the type of the Python object *o*. - Return a :term:`borrowed reference`. - - Use the :c:func:`Py_SET_TYPE` function to set an object type. + The returned reference is :term:`borrowed ` from *o*. + Do not release it with :c:func:`Py_DECREF` or similar. .. versionchanged:: 3.11 :c:func:`Py_TYPE()` is changed to an inline static function. @@ -122,16 +155,26 @@ under :ref:`reference counting `. .. c:function:: void Py_SET_TYPE(PyObject *o, PyTypeObject *type) - Set the object *o* type to *type*. + Set the type of object *o* to *type*, without any checking or reference + counting. + + This is a very low-level operation. + Consider instead setting the Python attribute :attr:`~object.__class__` + using :c:func:`PyObject_SetAttrString` or similar. + + Note that assigning an incompatible type can lead to undefined behavior. + + If *type* is a :ref:`heap type `, the caller must create a + new reference to it. + Similarly, if the old type of *o* is a heap type, the caller must release + a reference to that type. .. versionadded:: 3.9 .. c:function:: Py_ssize_t Py_SIZE(PyVarObject *o) - Get the size of the Python object *o*. - - Use the :c:func:`Py_SET_SIZE` function to set an object size. + Get the :c:member:`~PyVarObject.ob_size` field of *o*. .. versionchanged:: 3.11 :c:func:`Py_SIZE()` is changed to an inline static function. @@ -140,7 +183,7 @@ under :ref:`reference counting `. .. c:function:: void Py_SET_SIZE(PyVarObject *o, Py_ssize_t size) - Set the object *o* size to *size*. + Set the :c:member:`~PyVarObject.ob_size` field of *o* to *size*. .. versionadded:: 3.9 diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index ec2867b0ce09ba..5bdbff4e0ad990 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -151,14 +151,29 @@ Type Objects .. c:function:: PyObject* PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) - Generic handler for the :c:member:`~PyTypeObject.tp_alloc` slot of a type object. Use - Python's default memory allocation mechanism to allocate a new instance and - initialize all its contents to ``NULL``. + Generic handler for the :c:member:`~PyTypeObject.tp_alloc` slot of a type + object. Uses Python's default memory allocation mechanism to allocate memory + for a new instance, zeros the memory, then initializes the memory as if by + calling :c:func:`PyObject_Init` or :c:func:`PyObject_InitVar`. + + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + For types that support garbage collection (i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set), this function behaves like + :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar` (except the + memory is guaranteed to be zeroed before initialization), and should be + paired with :c:func:`PyObject_GC_Del` in :c:member:`~PyTypeObject.tp_free`. + Otherwise, it behaves like :c:macro:`PyObject_New` or + :c:macro:`PyObject_NewVar` (except the memory is guaranteed to be zeroed + before initialization) and should be paired with :c:func:`PyObject_Free` in + :c:member:`~PyTypeObject.tp_free`. .. c:function:: PyObject* PyType_GenericNew(PyTypeObject *type, PyObject *args, PyObject *kwds) - Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type object. Create a - new instance using the type's :c:member:`~PyTypeObject.tp_alloc` slot. + Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type + object. Creates a new instance using the type's + :c:member:`~PyTypeObject.tp_alloc` slot and returns the resulting object. .. c:function:: int PyType_Ready(PyTypeObject *type) @@ -267,6 +282,10 @@ Type Objects and other places where a method's defining class cannot be passed using the :c:type:`PyCMethod` calling convention. + The returned reference is :term:`borrowed ` from *type*, + and will be valid as long as you hold a reference to *type*. + Do not release it with :c:func:`Py_DECREF` or similar. + .. versionadded:: 3.11 .. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 3b9f07778d5ace..060d6f60174b41 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -79,7 +79,7 @@ Quick Reference | :c:member:`~PyTypeObject.tp_setattro` | :c:type:`setattrofunc` | __setattr__, | X | X | | G | | | | __delattr__ | | | | | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ - | :c:member:`~PyTypeObject.tp_as_buffer` | :c:type:`PyBufferProcs` * | | | | | % | + | :c:member:`~PyTypeObject.tp_as_buffer` | :c:type:`PyBufferProcs` * | :ref:`sub-slots` | | | | % | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ | :c:member:`~PyTypeObject.tp_flags` | unsigned long | | X | X | | ? | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ @@ -325,9 +325,10 @@ sub-slots +---------------------------------------------------------+-----------------------------------+---------------+ | | +---------------------------------------------------------+-----------------------------------+---------------+ - | :c:member:`~PyBufferProcs.bf_getbuffer` | :c:func:`getbufferproc` | | + | :c:member:`~PyBufferProcs.bf_getbuffer` | :c:func:`getbufferproc` | __buffer__ | +---------------------------------------------------------+-----------------------------------+---------------+ - | :c:member:`~PyBufferProcs.bf_releasebuffer` | :c:func:`releasebufferproc` | | + | :c:member:`~PyBufferProcs.bf_releasebuffer` | :c:func:`releasebufferproc` | __release_\ | + | | | buffer\__ | +---------------------------------------------------------+-----------------------------------+---------------+ .. _slot-typedefs-table: @@ -491,9 +492,9 @@ metatype) initializes :c:member:`~PyTypeObject.tp_itemsize`, which means that it type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. -.. c:member:: Py_ssize_t PyObject.ob_refcnt +:c:member:`PyObject.ob_refcnt` - This is the type object's reference count, initialized to ``1`` by the + The type object's reference count is initialized to ``1`` by the ``PyObject_HEAD_INIT`` macro. Note that for :ref:`statically allocated type objects `, the type's instances (objects whose :c:member:`~PyObject.ob_type` points back to the type) do *not* count as references. But for @@ -505,7 +506,7 @@ type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. This field is not inherited by subtypes. -.. c:member:: PyTypeObject* PyObject.ob_type +:c:member:`PyObject.ob_type` This is the type's type, in other words its metatype. It is initialized by the argument to the ``PyObject_HEAD_INIT`` macro, and its value should normally be @@ -531,14 +532,13 @@ type objects) *must* have the :c:member:`~PyVarObject.ob_size` field. PyVarObject Slots ----------------- -.. c:member:: Py_ssize_t PyVarObject.ob_size +:c:member:`PyVarObject.ob_size` For :ref:`statically allocated type objects `, this should be initialized to zero. For :ref:`dynamically allocated type objects `, this field has a special internal meaning. - This field should be accessed using the :c:func:`Py_SIZE()` and - :c:func:`Py_SET_SIZE()` macros. + This field should be accessed using the :c:func:`Py_SIZE()` macro. **Inheritance:** @@ -676,77 +676,142 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_dealloc - A pointer to the instance destructor function. This function must be defined - unless the type guarantees that its instances will never be deallocated (as is - the case for the singletons ``None`` and ``Ellipsis``). The function signature is:: + A pointer to the instance destructor function. The function signature is:: void tp_dealloc(PyObject *self); - The destructor function is called by the :c:func:`Py_DECREF` and - :c:func:`Py_XDECREF` macros when the new reference count is zero. At this point, - the instance is still in existence, but there are no references to it. The - destructor function should free all references which the instance owns, free all - memory buffers owned by the instance (using the freeing function corresponding - to the allocation function used to allocate the buffer), and call the type's - :c:member:`~PyTypeObject.tp_free` function. If the type is not subtypable - (doesn't have the :c:macro:`Py_TPFLAGS_BASETYPE` flag bit set), it is - permissible to call the object deallocator directly instead of via - :c:member:`~PyTypeObject.tp_free`. The object deallocator should be the one used to allocate the - instance; this is normally :c:func:`PyObject_Free` if the instance was allocated - using :c:macro:`PyObject_New` or :c:macro:`PyObject_NewVar`, or - :c:func:`PyObject_GC_Del` if the instance was allocated using - :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar`. - - If the type supports garbage collection (has the :c:macro:`Py_TPFLAGS_HAVE_GC` - flag bit set), the destructor should call :c:func:`PyObject_GC_UnTrack` - before clearing any member fields. + The destructor function should remove all references which the instance owns + (e.g., call :c:func:`Py_CLEAR`), free all memory buffers owned by the + instance, and call the type's :c:member:`~PyTypeObject.tp_free` function to + free the object itself. + + If you may call functions that may set the error indicator, you must use + :c:func:`PyErr_GetRaisedException` and :c:func:`PyErr_SetRaisedException` + to ensure you don't clobber a preexisting error indicator (the deallocation + could have occurred while processing a different error): .. code-block:: c static void - foo_dealloc(PyObject *op) + foo_dealloc(foo_object *self) { + PyObject *et, *ev, *etb; + PyObject *exc = PyErr_GetRaisedException(); + ... + PyErr_SetRaisedException(exc); + } + + The dealloc handler itself must not raise an exception; if it hits an error + case it should call :c:func:`PyErr_FormatUnraisable` to log (and clear) an + unraisable exception. + + No guarantees are made about when an object is destroyed, except: + + * Python will destroy an object immediately or some time after the final + reference to the object is deleted, unless its finalizer + (:c:member:`~PyTypeObject.tp_finalize`) subsequently resurrects the + object. + * An object will not be destroyed while it is being automatically finalized + (:c:member:`~PyTypeObject.tp_finalize`) or automatically cleared + (:c:member:`~PyTypeObject.tp_clear`). + + CPython currently destroys an object immediately from :c:func:`Py_DECREF` + when the new reference count is zero, but this may change in a future + version. + + It is recommended to call :c:func:`PyObject_CallFinalizerFromDealloc` at the + beginning of :c:member:`!tp_dealloc` to guarantee that the object is always + finalized before destruction. + + If the type supports garbage collection (the :c:macro:`Py_TPFLAGS_HAVE_GC` + flag is set), the destructor should call :c:func:`PyObject_GC_UnTrack` + before clearing any member fields. + + It is permissible to call :c:member:`~PyTypeObject.tp_clear` from + :c:member:`!tp_dealloc` to reduce code duplication and to guarantee that the + object is always cleared before destruction. Beware that + :c:member:`!tp_clear` might have already been called. + + If the type is heap allocated (:c:macro:`Py_TPFLAGS_HEAPTYPE`), the + deallocator should release the owned reference to its type object (via + :c:func:`Py_DECREF`) after calling the type deallocator. See the example + code below.:: + + static void + foo_dealloc(PyObject *op) + { foo_object *self = (foo_object *) op; PyObject_GC_UnTrack(self); Py_CLEAR(self->ref); Py_TYPE(self)->tp_free(self); - } + } - Finally, if the type is heap allocated (:c:macro:`Py_TPFLAGS_HEAPTYPE`), the - deallocator should release the owned reference to its type object - (via :c:func:`Py_DECREF`) after - calling the type deallocator. In order to avoid dangling pointers, the - recommended way to achieve this is: + :c:member:`!tp_dealloc` must leave the exception status unchanged. If it + needs to call something that might raise an exception, the exception state + must be backed up first and restored later (after logging any exceptions + with :c:func:`PyErr_WriteUnraisable`). - .. code-block:: c + Example:: - static void - foo_dealloc(PyObject *op) - { - PyTypeObject *tp = Py_TYPE(op); - // free references and buffers here - tp->tp_free(op); - Py_DECREF(tp); - } + static void + foo_dealloc(PyObject *self) + { + PyObject *exc = PyErr_GetRaisedException(); - .. warning:: + if (PyObject_CallFinalizerFromDealloc(self) < 0) { + // self was resurrected. + goto done; + } + + PyTypeObject *tp = Py_TYPE(self); + + if (tp->tp_flags & Py_TPFLAGS_HAVE_GC) { + PyObject_GC_UnTrack(self); + } + + // Optional, but convenient to avoid code duplication. + if (tp->tp_clear && tp->tp_clear(self) < 0) { + PyErr_WriteUnraisable(self); + } - In a garbage collected Python, :c:member:`!tp_dealloc` may be called from - any Python thread, not just the thread which created the object (if the - object becomes part of a refcount cycle, that cycle might be collected by - a garbage collection on any thread). This is not a problem for Python - API calls, since the thread on which :c:member:`!tp_dealloc` is called - with an :term:`attached thread state`. However, if the object being - destroyed in turn destroys objects from some other C or C++ library, care - should be taken to ensure that destroying those objects on the thread - which called :c:member:`!tp_dealloc` will not violate any assumptions of - the library. + // Any additional destruction goes here. + + tp->tp_free(self); + self = NULL; // In case PyErr_WriteUnraisable() is called below. + + if (tp->tp_flags & Py_TPFLAGS_HEAPTYPE) { + Py_CLEAR(tp); + } + + done: + // Optional, if something was called that might have raised an + // exception. + if (PyErr_Occurred()) { + PyErr_WriteUnraisable(self); + } + PyErr_SetRaisedException(exc); + } + + :c:member:`!tp_dealloc` may be called from + any Python thread, not just the thread which created the object (if the + object becomes part of a refcount cycle, that cycle might be collected by + a garbage collection on any thread). This is not a problem for Python + API calls, since the thread on which :c:member:`!tp_dealloc` is called + with an :term:`attached thread state`. However, if the object being + destroyed in turn destroys objects from some other C library, care + should be taken to ensure that destroying those objects on the thread + which called :c:member:`!tp_dealloc` will not violate any assumptions of + the library. **Inheritance:** This field is inherited by subtypes. + .. seealso:: + + :ref:`life-cycle` for details about how this slot relates to other slots. + .. c:member:: Py_ssize_t PyTypeObject.tp_vectorcall_offset @@ -1137,11 +1202,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_HAVE_GC This bit is set when the object supports garbage collection. If this bit - is set, instances must be created using :c:macro:`PyObject_GC_New` and - destroyed using :c:func:`PyObject_GC_Del`. More information in section - :ref:`supporting-cycle-detection`. This bit also implies that the - GC-related fields :c:member:`~PyTypeObject.tp_traverse` and :c:member:`~PyTypeObject.tp_clear` are present in - the type object. + is set, memory for new instances (see :c:member:`~PyTypeObject.tp_alloc`) + must be allocated using :c:macro:`PyObject_GC_New` or + :c:func:`PyType_GenericAlloc` and deallocated (see + :c:member:`~PyTypeObject.tp_free`) using :c:func:`PyObject_GC_Del`. More + information in section :ref:`supporting-cycle-detection`. **Inheritance:** @@ -1192,7 +1257,7 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_MANAGED_DICT - This bit indicates that instances of the class have a `~object.__dict__` + This bit indicates that instances of the class have a :attr:`~object.__dict__` attribute, and that the space for the dictionary is managed by the VM. If this flag is set, :c:macro:`Py_TPFLAGS_HAVE_GC` should also be set. @@ -1478,6 +1543,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) heap-allocated superclass). If they do not, the type object may not be garbage-collected. + .. note:: + + The :c:member:`~PyTypeObject.tp_traverse` function can be called from any + thread. + .. versionchanged:: 3.9 Heap-allocated types are expected to visit ``Py_TYPE(self)`` in @@ -1497,20 +1567,101 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: inquiry PyTypeObject.tp_clear - An optional pointer to a clear function for the garbage collector. This is only - used if the :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit is set. The signature is:: + An optional pointer to a clear function. The signature is:: int tp_clear(PyObject *); - The :c:member:`~PyTypeObject.tp_clear` member function is used to break reference cycles in cyclic - garbage detected by the garbage collector. Taken together, all :c:member:`~PyTypeObject.tp_clear` - functions in the system must combine to break all reference cycles. This is - subtle, and if in any doubt supply a :c:member:`~PyTypeObject.tp_clear` function. For example, - the tuple type does not implement a :c:member:`~PyTypeObject.tp_clear` function, because it's - possible to prove that no reference cycle can be composed entirely of tuples. - Therefore the :c:member:`~PyTypeObject.tp_clear` functions of other types must be sufficient to - break any cycle containing a tuple. This isn't immediately obvious, and there's - rarely a good reason to avoid implementing :c:member:`~PyTypeObject.tp_clear`. + The purpose of this function is to break reference cycles that are causing a + :term:`cyclic isolate` so that the objects can be safely destroyed. A + cleared object is a partially destroyed object; the object is not obligated + to satisfy design invariants held during normal use. + + :c:member:`!tp_clear` does not need to delete references to objects that + can't participate in reference cycles, such as Python strings or Python + integers. However, it may be convenient to clear all references, and write + the type's :c:member:`~PyTypeObject.tp_dealloc` function to invoke + :c:member:`!tp_clear` to avoid code duplication. (Beware that + :c:member:`!tp_clear` might have already been called. Prefer calling + idempotent functions like :c:func:`Py_CLEAR`.) + + Any non-trivial cleanup should be performed in + :c:member:`~PyTypeObject.tp_finalize` instead of :c:member:`!tp_clear`. + + .. note:: + + If :c:member:`!tp_clear` fails to break a reference cycle then the + objects in the :term:`cyclic isolate` may remain indefinitely + uncollectable ("leak"). See :data:`gc.garbage`. + + .. note:: + + Referents (direct and indirect) might have already been cleared; they are + not guaranteed to be in a consistent state. + + .. note:: + + The :c:member:`~PyTypeObject.tp_clear` function can be called from any + thread. + + .. note:: + + An object is not guaranteed to be automatically cleared before its + destructor (:c:member:`~PyTypeObject.tp_dealloc`) is called. + + This function differs from the destructor + (:c:member:`~PyTypeObject.tp_dealloc`) in the following ways: + + * The purpose of clearing an object is to remove references to other objects + that might participate in a reference cycle. The purpose of the + destructor, on the other hand, is a superset: it must release *all* + resources it owns, including references to objects that cannot participate + in a reference cycle (e.g., integers) as well as the object's own memory + (by calling :c:member:`~PyTypeObject.tp_free`). + * When :c:member:`!tp_clear` is called, other objects might still hold + references to the object being cleared. Because of this, + :c:member:`!tp_clear` must not deallocate the object's own memory + (:c:member:`~PyTypeObject.tp_free`). The destructor, on the other hand, + is only called when no (strong) references exist, and as such, must + safely destroy the object itself by deallocating it. + * :c:member:`!tp_clear` might never be automatically called. An object's + destructor, on the other hand, will be automatically called some time + after the object becomes unreachable (i.e., either there are no references + to the object or the object is a member of a :term:`cyclic isolate`). + + No guarantees are made about when, if, or how often Python automatically + clears an object, except: + + * Python will not automatically clear an object if it is reachable, i.e., + there is a reference to it and it is not a member of a :term:`cyclic + isolate`. + * Python will not automatically clear an object if it has not been + automatically finalized (see :c:member:`~PyTypeObject.tp_finalize`). (If + the finalizer resurrected the object, the object may or may not be + automatically finalized again before it is cleared.) + * If an object is a member of a :term:`cyclic isolate`, Python will not + automatically clear it if any member of the cyclic isolate has not yet + been automatically finalized (:c:member:`~PyTypeObject.tp_finalize`). + * Python will not destroy an object until after any automatic calls to its + :c:member:`!tp_clear` function have returned. This ensures that the act + of breaking a reference cycle does not invalidate the ``self`` pointer + while :c:member:`!tp_clear` is still executing. + * Python will not automatically call :c:member:`!tp_clear` multiple times + concurrently. + + CPython currently only automatically clears objects as needed to break + reference cycles in a :term:`cyclic isolate`, but future versions might + clear objects regularly before their destruction. + + Taken together, all :c:member:`~PyTypeObject.tp_clear` functions in the + system must combine to break all reference cycles. This is subtle, and if + in any doubt supply a :c:member:`~PyTypeObject.tp_clear` function. For + example, the tuple type does not implement a + :c:member:`~PyTypeObject.tp_clear` function, because it's possible to prove + that no reference cycle can be composed entirely of tuples. Therefore the + :c:member:`~PyTypeObject.tp_clear` functions of other types are responsible + for breaking any cycle containing a tuple. This isn't immediately obvious, + and there's rarely a good reason to avoid implementing + :c:member:`~PyTypeObject.tp_clear`. Implementations of :c:member:`~PyTypeObject.tp_clear` should drop the instance's references to those of its members that may be Python objects, and set its pointers to those @@ -1545,18 +1696,6 @@ and :c:data:`PyType_Type` effectively act as defaults.) PyObject_ClearManagedDict((PyObject*)self); - Note that :c:member:`~PyTypeObject.tp_clear` is not *always* called - before an instance is deallocated. For example, when reference counting - is enough to determine that an object is no longer used, the cyclic garbage - collector is not involved and :c:member:`~PyTypeObject.tp_dealloc` is - called directly. - - Because the goal of :c:member:`~PyTypeObject.tp_clear` functions is to break reference cycles, - it's not necessary to clear contained objects like Python strings or Python - integers, which can't participate in reference cycles. On the other hand, it may - be convenient to clear all contained Python objects, and write the type's - :c:member:`~PyTypeObject.tp_dealloc` function to invoke :c:member:`~PyTypeObject.tp_clear`. - More information about Python's garbage collection scheme can be found in section :ref:`supporting-cycle-detection`. @@ -1569,6 +1708,10 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:member:`~PyTypeObject.tp_clear` are all inherited from the base type if they are all zero in the subtype. + .. seealso:: + + :ref:`life-cycle` for details about how this slot relates to other slots. + .. c:member:: richcmpfunc PyTypeObject.tp_richcompare @@ -1945,18 +2088,17 @@ and :c:data:`PyType_Type` effectively act as defaults.) **Inheritance:** - This field is inherited by static subtypes, but not by dynamic - subtypes (subtypes created by a class statement). + Static subtypes inherit this slot, which will be + :c:func:`PyType_GenericAlloc` if inherited from :class:`object`. + + :ref:`Heap subtypes ` do not inherit this slot. **Default:** - For dynamic subtypes, this field is always set to - :c:func:`PyType_GenericAlloc`, to force a standard heap - allocation strategy. + For heap subtypes, this field is always set to + :c:func:`PyType_GenericAlloc`. - For static subtypes, :c:data:`PyBaseObject_Type` uses - :c:func:`PyType_GenericAlloc`. That is the recommended value - for all statically defined types. + For static subtypes, this slot is inherited (see above). .. c:member:: newfunc PyTypeObject.tp_new @@ -2004,20 +2146,27 @@ and :c:data:`PyType_Type` effectively act as defaults.) void tp_free(void *self); - An initializer that is compatible with this signature is :c:func:`PyObject_Free`. + This function must free the memory allocated by + :c:member:`~PyTypeObject.tp_alloc`. **Inheritance:** - This field is inherited by static subtypes, but not by dynamic - subtypes (subtypes created by a class statement) + Static subtypes inherit this slot, which will be :c:func:`PyObject_Free` if + inherited from :class:`object`. Exception: If the type supports garbage + collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`) and it would inherit + :c:func:`PyObject_Free`, then this slot is not inherited but instead defaults + to :c:func:`PyObject_GC_Del`. + + :ref:`Heap subtypes ` do not inherit this slot. **Default:** - In dynamic subtypes, this field is set to a deallocator suitable to - match :c:func:`PyType_GenericAlloc` and the value of the - :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit. + For :ref:`heap subtypes `, this slot defaults to a deallocator suitable to match + :c:func:`PyType_GenericAlloc` and the value of the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag. - For static subtypes, :c:data:`PyBaseObject_Type` uses :c:func:`PyObject_Free`. + For static subtypes, this slot is inherited (see above). .. c:member:: inquiry PyTypeObject.tp_is_gc @@ -2144,29 +2293,138 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_finalize - An optional pointer to an instance finalization function. Its signature is:: + An optional pointer to an instance finalization function. This is the C + implementation of the :meth:`~object.__del__` special method. Its signature + is:: void tp_finalize(PyObject *self); - If :c:member:`~PyTypeObject.tp_finalize` is set, the interpreter calls it once when - finalizing an instance. It is called either from the garbage - collector (if the instance is part of an isolated reference cycle) or - just before the object is deallocated. Either way, it is guaranteed - to be called before attempting to break reference cycles, ensuring - that it finds the object in a sane state. + The primary purpose of finalization is to perform any non-trivial cleanup + that must be performed before the object is destroyed, while the object and + any other objects it directly or indirectly references are still in a + consistent state. The finalizer is allowed to execute + arbitrary Python code. + + Before Python automatically finalizes an object, some of the object's direct + or indirect referents might have themselves been automatically finalized. + However, none of the referents will have been automatically cleared + (:c:member:`~PyTypeObject.tp_clear`) yet. - :c:member:`~PyTypeObject.tp_finalize` should not mutate the current exception status; - therefore, a recommended way to write a non-trivial finalizer is:: + Other non-finalized objects might still be using a finalized object, so the + finalizer must leave the object in a sane state (e.g., invariants are still + met). + + .. note:: + + After Python automatically finalizes an object, Python might start + automatically clearing (:c:member:`~PyTypeObject.tp_clear`) the object + and its referents (direct and indirect). Cleared objects are not + guaranteed to be in a consistent state; a finalized object must be able + to tolerate cleared referents. + + .. note:: + + An object is not guaranteed to be automatically finalized before its + destructor (:c:member:`~PyTypeObject.tp_dealloc`) is called. It is + recommended to call :c:func:`PyObject_CallFinalizerFromDealloc` at the + beginning of :c:member:`!tp_dealloc` to guarantee that the object is + always finalized before destruction. + + .. note:: + + The :c:member:`~PyTypeObject.tp_finalize` function can be called from any + thread, although the :term:`GIL` will be held. + + .. note:: + + The :c:member:`!tp_finalize` function can be called during shutdown, + after some global variables have been deleted. See the documentation of + the :meth:`~object.__del__` method for details. + + When Python finalizes an object, it behaves like the following algorithm: + + #. Python might mark the object as *finalized*. Currently, Python always + marks objects whose type supports garbage collection (i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`) and never marks other types of + objects; this might change in a future version. + #. If the object is not marked as *finalized* and its + :c:member:`!tp_finalize` finalizer function is non-``NULL``, the + finalizer function is called. + #. If the finalizer function was called and the finalizer made the object + reachable (i.e., there is a reference to the object and it is not a + member of a :term:`cyclic isolate`), then the finalizer is said to have + *resurrected* the object. It is unspecified whether the finalizer can + also resurrect the object by adding a new reference to the object that + does not make it reachable, i.e., the object is (still) a member of a + cyclic isolate. + #. If the finalizer resurrected the object, the object's pending destruction + is canceled and the object's *finalized* mark might be removed if + present. Currently, Python never removes the *finalized* mark; this + might change in a future version. + + *Automatic finalization* refers to any finalization performed by Python + except via calls to :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc`. No guarantees are made about + when, if, or how often an object is automatically finalized, except: + + * Python will not automatically finalize an object if it is reachable, i.e., + there is a reference to it and it is not a member of a :term:`cyclic + isolate`. + * Python will not automatically finalize an object if finalizing it would + not mark the object as *finalized*. Currently, this applies to objects + whose type does not support garbage collection, i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is not set. Such objects can still be + manually finalized by calling :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc`. + * Python will not automatically finalize any two members of a :term:`cyclic + isolate` concurrently. + * Python will not automatically finalize an object after it has + automatically cleared (:c:member:`~PyTypeObject.tp_clear`) the object. + * If an object is a member of a :term:`cyclic isolate`, Python will not + automatically finalize it after automatically clearing (see + :c:member:`~PyTypeObject.tp_clear`) any other member. + * Python will automatically finalize every member of a :term:`cyclic + isolate` before it automatically clears (see + :c:member:`~PyTypeObject.tp_clear`) any of them. + * If Python is going to automatically clear an object + (:c:member:`~PyTypeObject.tp_clear`), it will automatically finalize the + object first. + + Python currently only automatically finalizes objects that are members of a + :term:`cyclic isolate`, but future versions might finalize objects regularly + before their destruction. + + To manually finalize an object, do not call this function directly; call + :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc` instead. + + :c:member:`~PyTypeObject.tp_finalize` should leave the current exception + status unchanged. The recommended way to write a non-trivial finalizer is + to back up the exception at the beginning by calling + :c:func:`PyErr_GetRaisedException` and restore the exception at the end by + calling :c:func:`PyErr_SetRaisedException`. If an exception is encountered + in the middle of the finalizer, log and clear it with + :c:func:`PyErr_WriteUnraisable` or :c:func:`PyErr_FormatUnraisable`. For + example:: static void - local_finalize(PyObject *self) + foo_finalize(PyObject *self) { - /* Save the current exception, if any. */ + // Save the current exception, if any. PyObject *exc = PyErr_GetRaisedException(); - /* ... */ + // ... - /* Restore the saved exception. */ + if (do_something_that_might_raise() != success_indicator) { + PyErr_WriteUnraisable(self); + goto done; + } + + done: + // Restore the saved exception. This silently discards any exception + // raised above, so be sure to call PyErr_WriteUnraisable first if + // necessary. PyErr_SetRaisedException(exc); } @@ -2182,7 +2440,13 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:macro:`Py_TPFLAGS_HAVE_FINALIZE` flags bit in order for this field to be used. This is no longer required. - .. seealso:: "Safe object finalization" (:pep:`442`) + .. seealso:: + + * :pep:`442`: "Safe object finalization" + * :ref:`life-cycle` for details about how this slot relates to other + slots. + * :c:func:`PyObject_CallFinalizer` + * :c:func:`PyObject_CallFinalizerFromDealloc` .. c:member:: vectorcallfunc PyTypeObject.tp_vectorcall diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 95987e872ce639..e8bdd8c6dee0a0 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -645,6 +645,17 @@ APIs: difference being that it decrements the reference count of *right* by one. +.. c:function:: PyObject* PyUnicode_BuildEncodingMap(PyObject* string) + + Return a mapping suitable for decoding a custom single-byte encoding. + Given a Unicode string *string* of up to 256 characters representing an encoding + table, returns either a compact internal mapping object or a dictionary + mapping character ordinals to byte values. Raises a :exc:`TypeError` and + return ``NULL`` on invalid input. + + .. versionadded:: 3.2 + + .. c:function:: const char* PyUnicode_GetDefaultEncoding(void) Return the name of the default string encoding, ``"utf-8"``. @@ -1450,10 +1461,6 @@ the user settings on the machine running the codec. .. versionadded:: 3.3 -Methods & Slots -""""""""""""""" - - .. _unicodemethodsandslots: Methods and Slot Functions @@ -1715,10 +1722,6 @@ They all return ``NULL`` or ``-1`` if an exception occurs. from user input, prefer calling :c:func:`PyUnicode_FromString` and :c:func:`PyUnicode_InternInPlace` directly. - .. impl-detail:: - - Strings interned this way are made :term:`immortal`. - .. c:function:: unsigned int PyUnicode_CHECK_INTERNED(PyObject *str) @@ -1795,9 +1798,24 @@ object. See also :c:func:`PyUnicodeWriter_DecodeUTF8Stateful`. +.. c:function:: int PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, const char *str, Py_ssize_t size) + + Write the ASCII string *str* into *writer*. + + *size* is the string length in bytes. If *size* is equal to ``-1``, call + ``strlen(str)`` to get the string length. + + *str* must only contain ASCII characters. The behavior is undefined if + *str* contains non-ASCII characters. + + On success, return ``0``. + On error, set an exception, leave the writer unchanged, and return ``-1``. + + .. versionadded:: 3.14 + .. c:function:: int PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer, const wchar_t *str, Py_ssize_t size) - Writer the wide string *str* into *writer*. + Write the wide string *str* into *writer*. *size* is a number of wide characters. If *size* is equal to ``-1``, call ``wcslen(str)`` to get the string length. diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst index 1ef4181d52eb10..fb07fec7effce8 100644 --- a/Doc/c-api/veryhigh.rst +++ b/Doc/c-api/veryhigh.rst @@ -361,7 +361,7 @@ the same library that the Python runtime is using. :py:mod:`!ast` Python module, which exports these constants under the same names. - .. c:var:: int CO_FUTURE_DIVISION - - This bit can be set in *flags* to cause division operator ``/`` to be - interpreted as "true division" according to :pep:`238`. + The "``PyCF``" flags above can be combined with "``CO_FUTURE``" flags such + as :c:macro:`CO_FUTURE_ANNOTATIONS` to enable features normally + selectable using :ref:`future statements `. + See :ref:`c_codeobject_flags` for a complete list. diff --git a/Doc/conf.py b/Doc/conf.py index 467961dd5e2bff..8b2a8f20fcc558 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -79,6 +79,10 @@ rst_epilog = f""" .. |python_version_literal| replace:: ``Python {version}`` .. |python_x_dot_y_literal| replace:: ``python{version}`` +.. |python_x_dot_y_t_literal| replace:: ``python{version}t`` +.. |python_x_dot_y_t_literal_config| replace:: ``python{version}t-config`` +.. |x_dot_y_b2_literal| replace:: ``{version}.0b2`` +.. |applications_python_version_literal| replace:: ``/Applications/Python {version}/`` .. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` .. Apparently this how you hack together a formatted link: @@ -234,6 +238,7 @@ ('c:data', 'PyExc_AssertionError'), ('c:data', 'PyExc_AttributeError'), ('c:data', 'PyExc_BaseException'), + ('c:data', 'PyExc_BaseExceptionGroup'), ('c:data', 'PyExc_BlockingIOError'), ('c:data', 'PyExc_BrokenPipeError'), ('c:data', 'PyExc_BufferError'), @@ -287,6 +292,7 @@ # C API: Standard Python warning classes ('c:data', 'PyExc_BytesWarning'), ('c:data', 'PyExc_DeprecationWarning'), + ('c:data', 'PyExc_EncodingWarning'), ('c:data', 'PyExc_FutureWarning'), ('c:data', 'PyExc_ImportWarning'), ('c:data', 'PyExc_PendingDeprecationWarning'), @@ -308,7 +314,6 @@ ('py:attr', '__annotations__'), ('py:meth', '__missing__'), ('py:attr', '__wrapped__'), - ('py:attr', 'decimal.Context.clamp'), ('py:meth', 'index'), # list.index, tuple.index, etc. ] @@ -630,13 +635,14 @@ 'image': '_static/og-image.png', 'line_color': '#3776ab', } -ogp_custom_meta_tags = [ - '', -] -if 'create-social-cards' not in tags: # noqa: F821 - # Define a static preview image when not creating social cards - ogp_image = '_static/og-image.png' - ogp_custom_meta_tags += [ - '', - '', +if 'builder_html' in tags: # noqa: F821 + ogp_custom_meta_tags = [ + '', ] + if 'create-social-cards' not in tags: # noqa: F821 + # Define a static preview image when not creating social cards + ogp_image = '_static/og-image.png' + ogp_custom_meta_tags += [ + '', + '', + ] diff --git a/Doc/data/python3.14.abi b/Doc/data/python3.14.abi new file mode 100644 index 00000000000000..e2cb3cbe30da47 --- /dev/null +++ b/Doc/data/python3.14.abi @@ -0,0 +1,31558 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index ca99b9e6d37141..a6f1fc4e937c94 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -963,21 +963,45 @@ PyFunction_Check:PyObject*:o:0: PyFunction_GetAnnotations:PyObject*::0: PyFunction_GetAnnotations:PyObject*:op:0: +PyFunction_GET_ANNOTATIONS:PyObject*::0: +PyFunction_GET_ANNOTATIONS:PyObject*:op:0: + PyFunction_GetClosure:PyObject*::0: PyFunction_GetClosure:PyObject*:op:0: +PyFunction_GET_CLOSURE:PyObject*::0: +PyFunction_GET_CLOSURE:PyObject*:op:0: + PyFunction_GetCode:PyObject*::0: PyFunction_GetCode:PyObject*:op:0: +PyFunction_GET_CODE:PyObject*::0: +PyFunction_GET_CODE:PyObject*:op:0: + PyFunction_GetDefaults:PyObject*::0: PyFunction_GetDefaults:PyObject*:op:0: +PyFunction_GET_DEFAULTS:PyObject*::0: +PyFunction_GET_DEFAULTS:PyObject*:op:0: + +PyFunction_GetKwDefaults:PyObject*::0: +PyFunction_GetKwDefaults:PyObject*:op:0: + +PyFunction_GET_KW_DEFAULTS:PyObject*::0: +PyFunction_GET_KW_DEFAULTS:PyObject*:op:0: + PyFunction_GetGlobals:PyObject*::0: PyFunction_GetGlobals:PyObject*:op:0: +PyFunction_GET_GLOBALS:PyObject*::0: +PyFunction_GET_GLOBALS:PyObject*:op:0: + PyFunction_GetModule:PyObject*::0: PyFunction_GetModule:PyObject*:op:0: +PyFunction_GET_MODULE:PyObject*::0: +PyFunction_GET_MODULE:PyObject*:op:0: + PyFunction_New:PyObject*::+1: PyFunction_New:PyObject*:code:+1: PyFunction_New:PyObject*:globals:+1: @@ -1492,9 +1516,6 @@ PyModule_SetDocString:int::: PyModule_SetDocString:PyObject*:module:0: PyModule_SetDocString:const char*:docstring:: -PyModuleDef_Init:PyObject*::0: -PyModuleDef_Init:PyModuleDef*:def:: - PyNumber_Absolute:PyObject*::+1: PyNumber_Absolute:PyObject*:o:0: @@ -2391,6 +2412,10 @@ PyType_GetFlags:PyTypeObject*:type:0: PyType_GetName:PyObject*::+1: PyType_GetName:PyTypeObject*:type:0: +PyType_GetModuleByDef:PyObject*::0: +PyType_GetModuleByDef:PyTypeObject*:type:0: +PyType_GetModuleByDef:PyModuleDef*:def:: + PyType_GetQualName:PyObject*::+1: PyType_GetQualName:PyTypeObject*:type:0: @@ -2781,6 +2806,9 @@ PyUnicode_AppendAndDel:void::: PyUnicode_AppendAndDel:PyObject**:p_left:0: PyUnicode_AppendAndDel:PyObject*:right:-1: +PyUnicode_BuildEncodingMap:PyObject*::+1: +PyUnicode_BuildEncodingMap:PyObject*:string::: + PyUnicode_GetDefaultEncoding:const char*::: PyUnicode_GetDefaultEncoding::void:: diff --git a/Doc/deprecations/c-api-pending-removal-in-3.15.rst b/Doc/deprecations/c-api-pending-removal-in-3.15.rst index a5cc8f1d5b3475..bac80289cdfb3d 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.15.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.15.rst @@ -1,7 +1,6 @@ Pending removal in Python 3.15 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -* The bundled copy of ``libmpdecimal``. * The :c:func:`PyImport_ImportModuleNoBlock`: Use :c:func:`PyImport_ImportModule` instead. * :c:func:`PyWeakref_GetObject` and :c:func:`PyWeakref_GET_OBJECT`: diff --git a/Doc/deprecations/c-api-pending-removal-in-3.16.rst b/Doc/deprecations/c-api-pending-removal-in-3.16.rst new file mode 100644 index 00000000000000..9453f83799c43d --- /dev/null +++ b/Doc/deprecations/c-api-pending-removal-in-3.16.rst @@ -0,0 +1,4 @@ +Pending removal in Python 3.16 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* The bundled copy of ``libmpdec``. diff --git a/Doc/deprecations/pending-removal-in-3.14.rst b/Doc/deprecations/pending-removal-in-3.14.rst index 6159fa48848285..9aac10840a663f 100644 --- a/Doc/deprecations/pending-removal-in-3.14.rst +++ b/Doc/deprecations/pending-removal-in-3.14.rst @@ -78,7 +78,7 @@ Pending removal in Python 3.14 :meth:`~pathlib.PurePath.relative_to`: passing additional arguments is deprecated. -* :mod:`pkgutil`: :func:`!pkgutil.find_loader` and :func:!pkgutil.get_loader` +* :mod:`pkgutil`: :func:`!pkgutil.find_loader` and :func:`!pkgutil.get_loader` now raise :exc:`DeprecationWarning`; use :func:`importlib.util.find_spec` instead. (Contributed by Nikita Sobolev in :gh:`97850`.) diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index 7b32275ad86760..c80588b27b635a 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -85,6 +85,13 @@ Pending removal in Python 3.15 has been deprecated since Python 3.13. Use the class-based syntax or the functional syntax instead. + * When using the functional syntax of :class:`~typing.TypedDict`\s, failing + to pass a value to the *fields* parameter (``TD = TypedDict("TD")``) or + passing ``None`` (``TD = TypedDict("TD", None)``) has been deprecated + since Python 3.13. + Use ``class TD(TypedDict): pass`` or ``TD = TypedDict("TD", {})`` + to create a TypedDict with zero field. + * The :func:`typing.no_type_check_decorator` decorator function has been deprecated since Python 3.13. After eight years in the :mod:`typing` module, diff --git a/Doc/extending/building.rst b/Doc/extending/building.rst index ddde567f6f3efa..098dde39ea597e 100644 --- a/Doc/extending/building.rst +++ b/Doc/extending/building.rst @@ -6,41 +6,10 @@ Building C and C++ Extensions ***************************** -A C extension for CPython is a shared library (e.g. a ``.so`` file on Linux, -``.pyd`` on Windows), which exports an *initialization function*. +A C extension for CPython is a shared library (for example, a ``.so`` file on +Linux, ``.pyd`` on Windows), which exports an *initialization function*. -To be importable, the shared library must be available on :envvar:`PYTHONPATH`, -and must be named after the module name, with an appropriate extension. -When using setuptools, the correct filename is generated automatically. - -The initialization function has the signature: - -.. c:function:: PyObject* PyInit_modulename(void) - -It returns either a fully initialized module, or a :c:type:`PyModuleDef` -instance. See :ref:`initializing-modules` for details. - -.. highlight:: python - -For modules with ASCII-only names, the function must be named -``PyInit_``, with ```` replaced by the name of the -module. When using :ref:`multi-phase-initialization`, non-ASCII module names -are allowed. In this case, the initialization function name is -``PyInitU_``, with ```` encoded using Python's -*punycode* encoding with hyphens replaced by underscores. In Python:: - - def initfunc_name(name): - try: - suffix = b'_' + name.encode('ascii') - except UnicodeEncodeError: - suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') - return b'PyInit' + suffix - -It is possible to export multiple modules from a single shared library by -defining multiple initialization functions. However, importing them requires -using symbolic links or a custom importer, because by default only the -function corresponding to the filename is found. -See the *"Multiple modules in one library"* section in :pep:`489` for details. +See :ref:`extension-modules` for details. .. highlight:: c @@ -51,7 +20,11 @@ See the *"Multiple modules in one library"* section in :pep:`489` for details. Building C and C++ Extensions with setuptools ============================================= -Python 3.12 and newer no longer come with distutils. Please refer to the -``setuptools`` documentation at -https://setuptools.readthedocs.io/en/latest/setuptools.html -to learn more about how build and distribute C/C++ extensions with setuptools. + +Building, packaging and distributing extension modules is best done with +third-party tools, and is out of scope of this document. +One suitable tool is Setuptools, whose documentation can be found at +https://setuptools.pypa.io/en/latest/setuptools.html. + +The :mod:`distutils` module, which was included in the standard library +until Python 3.12, is now maintained as part of Setuptools. diff --git a/Doc/extending/embedding.rst b/Doc/extending/embedding.rst index b777862da79f14..cb41889437c8b0 100644 --- a/Doc/extending/embedding.rst +++ b/Doc/extending/embedding.rst @@ -245,21 +245,23 @@ Python extension. For example:: return PyLong_FromLong(numargs); } - static PyMethodDef EmbMethods[] = { + static PyMethodDef emb_module_methods[] = { {"numargs", emb_numargs, METH_VARARGS, "Return the number of arguments received by the process."}, {NULL, NULL, 0, NULL} }; - static PyModuleDef EmbModule = { - PyModuleDef_HEAD_INIT, "emb", NULL, -1, EmbMethods, - NULL, NULL, NULL, NULL + static struct PyModuleDef emb_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "emb", + .m_size = 0, + .m_methods = emb_module_methods, }; static PyObject* PyInit_emb(void) { - return PyModule_Create(&EmbModule); + return PyModuleDef_Init(&emb_module); } Insert the above code just above the :c:func:`main` function. Also, insert the diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst index b0493bed75b151..fd63495674651b 100644 --- a/Doc/extending/extending.rst +++ b/Doc/extending/extending.rst @@ -203,31 +203,57 @@ function usually raises :c:data:`PyExc_TypeError`. If you have an argument whos value must be in a particular range or must satisfy other conditions, :c:data:`PyExc_ValueError` is appropriate. -You can also define a new exception that is unique to your module. For this, you -usually declare a static object variable at the beginning of your file:: +You can also define a new exception that is unique to your module. +The simplest way to do this is to declare a static global object variable at +the beginning of the file:: - static PyObject *SpamError; + static PyObject *SpamError = NULL; -and initialize it in your module's initialization function (:c:func:`!PyInit_spam`) -with an exception object:: +and initialize it by calling :c:func:`PyErr_NewException` in the module's +:c:data:`Py_mod_exec` function (:c:func:`!spam_module_exec`):: - PyMODINIT_FUNC - PyInit_spam(void) - { - PyObject *m; + SpamError = PyErr_NewException("spam.error", NULL, NULL); - m = PyModule_Create(&spammodule); - if (m == NULL) - return NULL; +Since :c:data:`!SpamError` is a global variable, it will be overwitten every time +the module is reinitialized, when the :c:data:`Py_mod_exec` function is called. + +For now, let's avoid the issue: we will block repeated initialization by raising an +:py:exc:`ImportError`:: + static PyObject *SpamError = NULL; + + static int + spam_module_exec(PyObject *m) + { + if (SpamError != NULL) { + PyErr_SetString(PyExc_ImportError, + "cannot initialize spam module more than once"); + return -1; + } SpamError = PyErr_NewException("spam.error", NULL, NULL); - if (PyModule_AddObjectRef(m, "error", SpamError) < 0) { - Py_CLEAR(SpamError); - Py_DECREF(m); - return NULL; + if (PyModule_AddObjectRef(m, "SpamError", SpamError) < 0) { + return -1; } - return m; + return 0; + } + + static PyModuleDef_Slot spam_module_slots[] = { + {Py_mod_exec, spam_module_exec}, + {0, NULL} + }; + + static struct PyModuleDef spam_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "spam", + .m_size = 0, // non-negative + .m_slots = spam_module_slots, + }; + + PyMODINIT_FUNC + PyInit_spam(void) + { + return PyModuleDef_Init(&spam_module); } Note that the Python name for the exception object is :exc:`!spam.error`. The @@ -242,6 +268,11 @@ needed to ensure that it will not be discarded, causing :c:data:`!SpamError` to become a dangling pointer. Should it become a dangling pointer, C code which raises the exception could cause a core dump or other unintended side effects. +For now, the :c:func:`Py_DECREF` call to remove this reference is missing. +Even when the Python interpreter shuts down, the global :c:data:`!SpamError` +variable will not be garbage-collected. It will "leak". +We did, however, ensure that this will happen at most once per process. + We discuss the use of :c:macro:`PyMODINIT_FUNC` as a function return type later in this sample. @@ -318,7 +349,7 @@ The Module's Method Table and Initialization Function I promised to show how :c:func:`!spam_system` is called from Python programs. First, we need to list its name and address in a "method table":: - static PyMethodDef SpamMethods[] = { + static PyMethodDef spam_methods[] = { ... {"system", spam_system, METH_VARARGS, "Execute a shell command."}, @@ -343,13 +374,10 @@ function. The method table must be referenced in the module definition structure:: - static struct PyModuleDef spammodule = { - PyModuleDef_HEAD_INIT, - "spam", /* name of module */ - spam_doc, /* module documentation, may be NULL */ - -1, /* size of per-interpreter state of the module, - or -1 if the module keeps state in global variables. */ - SpamMethods + static struct PyModuleDef spam_module = { + ... + .m_methods = spam_methods, + ... }; This structure, in turn, must be passed to the interpreter in the module's @@ -360,23 +388,17 @@ only non-\ ``static`` item defined in the module file:: PyMODINIT_FUNC PyInit_spam(void) { - return PyModule_Create(&spammodule); + return PyModuleDef_Init(&spam_module); } Note that :c:macro:`PyMODINIT_FUNC` declares the function as ``PyObject *`` return type, declares any special linkage declarations required by the platform, and for C++ declares the function as ``extern "C"``. -When the Python program imports module :mod:`!spam` for the first time, -:c:func:`!PyInit_spam` is called. (See below for comments about embedding Python.) -It calls :c:func:`PyModule_Create`, which returns a module object, and -inserts built-in function objects into the newly created module based upon the -table (an array of :c:type:`PyMethodDef` structures) found in the module definition. -:c:func:`PyModule_Create` returns a pointer to the module object -that it creates. It may abort with a fatal error for -certain errors, or return ``NULL`` if the module could not be initialized -satisfactorily. The init function must return the module object to its caller, -so that it then gets inserted into ``sys.modules``. +:c:func:`!PyInit_spam` is called when each interpreter imports its module +:mod:`!spam` for the first time. (See below for comments about embedding Python.) +A pointer to the module definition must be returned via :c:func:`PyModuleDef_Init`, +so that the import machinery can create the module and store it in ``sys.modules``. When embedding Python, the :c:func:`!PyInit_spam` function is not called automatically unless there's an entry in the :c:data:`PyImport_Inittab` table. @@ -433,23 +455,19 @@ optionally followed by an import of the module:: .. note:: - Removing entries from ``sys.modules`` or importing compiled modules into - multiple interpreters within a process (or following a :c:func:`fork` without an - intervening :c:func:`exec`) can create problems for some extension modules. - Extension module authors should exercise caution when initializing internal data - structures. + If you declare a global variable or a local static one, the module may + experience unintended side-effects on re-initialisation, for example when + removing entries from ``sys.modules`` or importing compiled modules into + multiple interpreters within a process + (or following a :c:func:`fork` without an intervening :c:func:`exec`). + If module state is not yet fully :ref:`isolated `, + authors should consider marking the module as having no support for subinterpreters + (via :c:macro:`Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED`). A more substantial example module is included in the Python source distribution -as :file:`Modules/xxmodule.c`. This file may be used as a template or simply +as :file:`Modules/xxlimited.c`. This file may be used as a template or simply read as an example. -.. note:: - - Unlike our ``spam`` example, ``xxmodule`` uses *multi-phase initialization* - (new in Python 3.5), where a PyModuleDef structure is returned from - ``PyInit_spam``, and creation of the module is left to the import machinery. - For details on multi-phase initialization, see :PEP:`489`. - .. _compilation: @@ -790,18 +808,17 @@ Philbrick (philbrick@hks.com):: {NULL, NULL, 0, NULL} /* sentinel */ }; - static struct PyModuleDef keywdargmodule = { - PyModuleDef_HEAD_INIT, - "keywdarg", - NULL, - -1, - keywdarg_methods + static struct PyModuleDef keywdarg_module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "keywdarg", + .m_size = 0, + .m_methods = keywdarg_methods, }; PyMODINIT_FUNC PyInit_keywdarg(void) { - return PyModule_Create(&keywdargmodule); + return PyModuleDef_Init(&keywdarg_module); } @@ -1072,8 +1089,9 @@ why his :meth:`!__del__` methods would fail... The second case of problems with a borrowed reference is a variant involving threads. Normally, multiple threads in the Python interpreter can't get in each -other's way, because there is a global lock protecting Python's entire object -space. However, it is possible to temporarily release this lock using the macro +other's way, because there is a :term:`global lock ` +protecting Python's entire object space. +However, it is possible to temporarily release this lock using the macro :c:macro:`Py_BEGIN_ALLOW_THREADS`, and to re-acquire it using :c:macro:`Py_END_ALLOW_THREADS`. This is common around blocking I/O calls, to let other threads use the processor while waiting for the I/O to complete. @@ -1259,20 +1277,15 @@ two more lines must be added:: #include "spammodule.h" The ``#define`` is used to tell the header file that it is being included in the -exporting module, not a client module. Finally, the module's initialization -function must take care of initializing the C API pointer array:: +exporting module, not a client module. Finally, the module's :c:data:`mod_exec +` function must take care of initializing the C API pointer array:: - PyMODINIT_FUNC - PyInit_spam(void) + static int + spam_module_exec(PyObject *m) { - PyObject *m; static void *PySpam_API[PySpam_API_pointers]; PyObject *c_api_object; - m = PyModule_Create(&spammodule); - if (m == NULL) - return NULL; - /* Initialize the C API pointer array */ PySpam_API[PySpam_System_NUM] = (void *)PySpam_System; @@ -1280,11 +1293,10 @@ function must take care of initializing the C API pointer array:: c_api_object = PyCapsule_New((void *)PySpam_API, "spam._C_API", NULL); if (PyModule_Add(m, "_C_API", c_api_object) < 0) { - Py_DECREF(m); - return NULL; + return -1; } - return m; + return 0; } Note that ``PySpam_API`` is declared ``static``; otherwise the pointer @@ -1343,20 +1355,16 @@ like this:: All that a client module must do in order to have access to the function :c:func:`!PySpam_System` is to call the function (or rather macro) -:c:func:`!import_spam` in its initialization function:: +:c:func:`!import_spam` in its :c:data:`mod_exec ` function:: - PyMODINIT_FUNC - PyInit_client(void) + static int + client_module_exec(PyObject *m) { - PyObject *m; - - m = PyModule_Create(&clientmodule); - if (m == NULL) - return NULL; - if (import_spam() < 0) - return NULL; + if (import_spam() < 0) { + return -1; + } /* additional initialization can happen here */ - return m; + return 0; } The main disadvantage of this approach is that the file :file:`spammodule.h` is diff --git a/Doc/extending/index.rst b/Doc/extending/index.rst index 01b4df6d44acff..4cc2c96d8d5b47 100644 --- a/Doc/extending/index.rst +++ b/Doc/extending/index.rst @@ -26,19 +26,9 @@ Recommended third party tools ============================= This guide only covers the basic tools for creating extensions provided -as part of this version of CPython. Third party tools like -`Cython `_, `cffi `_, -`SWIG `_ and `Numba `_ -offer both simpler and more sophisticated approaches to creating C and C++ -extensions for Python. - -.. seealso:: - - `Python Packaging User Guide: Binary Extensions `_ - The Python Packaging User Guide not only covers several available - tools that simplify the creation of binary extensions, but also - discusses the various reasons why creating an extension module may be - desirable in the first place. +as part of this version of CPython. Some :ref:`third party tools +` offer both simpler and more sophisticated approaches to creating +C and C++ extensions for Python. Creating extensions without third party tools @@ -49,6 +39,10 @@ assistance from third party tools. It is intended primarily for creators of those tools, rather than being a recommended way to create your own C extensions. +.. seealso:: + + :pep:`489` -- Multi-phase extension module initialization + .. toctree:: :maxdepth: 2 :numbered: diff --git a/Doc/extending/newtypes_tutorial.rst b/Doc/extending/newtypes_tutorial.rst index 3fc91841416d71..3bbee33bd50698 100644 --- a/Doc/extending/newtypes_tutorial.rst +++ b/Doc/extending/newtypes_tutorial.rst @@ -55,8 +55,10 @@ from the previous chapter. This file defines three things: #. How the :class:`!Custom` **type** behaves: this is the ``CustomType`` struct, which defines a set of flags and function pointers that the interpreter inspects when specific operations are requested. -#. How to initialize the :mod:`!custom` module: this is the ``PyInit_custom`` - function and the associated ``custommodule`` struct. +#. How to define and execute the :mod:`!custom` module: this is the + ``PyInit_custom`` function and the associated ``custom_module`` struct for + defining the module, and the ``custom_module_exec`` function to set up + a fresh module object. The first bit is:: @@ -171,18 +173,18 @@ implementation provided by the API function :c:func:`PyType_GenericNew`. :: .tp_new = PyType_GenericNew, Everything else in the file should be familiar, except for some code in -:c:func:`!PyInit_custom`:: +:c:func:`!custom_module_exec`:: - if (PyType_Ready(&CustomType) < 0) - return; + if (PyType_Ready(&CustomType) < 0) { + return -1; + } This initializes the :class:`!Custom` type, filling in a number of members to the appropriate default values, including :c:member:`~PyObject.ob_type` that we initially set to ``NULL``. :: if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; + return -1; } This adds the type to the module dictionary. This allows us to create @@ -275,7 +277,7 @@ be an instance of a subclass. The explicit cast to ``CustomObject *`` above is needed because we defined ``Custom_dealloc`` to take a ``PyObject *`` argument, as the ``tp_dealloc`` function pointer expects to receive a ``PyObject *`` argument. - By assigning to the the ``tp_dealloc`` slot of a type, we declare + By assigning to the ``tp_dealloc`` slot of a type, we declare that it can only be called with instances of our ``CustomObject`` class, so the cast to ``(CustomObject *)`` is safe. This is object-oriented polymorphism, in C! @@ -875,27 +877,22 @@ but let the base class handle it by calling its own :c:member:`~PyTypeObject.tp_ The :c:type:`PyTypeObject` struct supports a :c:member:`~PyTypeObject.tp_base` specifying the type's concrete base class. Due to cross-platform compiler issues, you can't fill that field directly with a reference to -:c:type:`PyList_Type`; it should be done later in the module initialization +:c:type:`PyList_Type`; it should be done in the :c:data:`Py_mod_exec` function:: - PyMODINIT_FUNC - PyInit_sublist(void) + static int + sublist_module_exec(PyObject *m) { - PyObject* m; SubListType.tp_base = &PyList_Type; - if (PyType_Ready(&SubListType) < 0) - return NULL; - - m = PyModule_Create(&sublistmodule); - if (m == NULL) - return NULL; + if (PyType_Ready(&SubListType) < 0) { + return -1; + } if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { - Py_DECREF(m); - return NULL; + return -1; } - return m; + return 0; } Before calling :c:func:`PyType_Ready`, the type structure must have the diff --git a/Doc/extending/windows.rst b/Doc/extending/windows.rst index 56aa44e4e58c83..a97c6182553c30 100644 --- a/Doc/extending/windows.rst +++ b/Doc/extending/windows.rst @@ -121,7 +121,7 @@ When creating DLLs in Windows, you can use the CPython library in two ways: :file:`Python.h` triggers an implicit, configure-aware link with the library. The header file chooses :file:`pythonXY_d.lib` for Debug, :file:`pythonXY.lib` for Release, and :file:`pythonX.lib` for Release with - the `Limited API `_ enabled. + the :ref:`Limited API ` enabled. To build two DLLs, spam and ni (which uses C functions found in spam), you could use these commands:: diff --git a/Doc/faq/extending.rst b/Doc/faq/extending.rst index 3147fda7c37124..1d5abed2317b0c 100644 --- a/Doc/faq/extending.rst +++ b/Doc/faq/extending.rst @@ -37,24 +37,9 @@ Writing C is hard; are there any alternatives? ---------------------------------------------- There are a number of alternatives to writing your own C extensions, depending -on what you're trying to do. - -.. XXX make sure these all work - -`Cython `_ and its relative `Pyrex -`_ are compilers -that accept a slightly modified form of Python and generate the corresponding -C code. Cython and Pyrex make it possible to write an extension without having -to learn Python's C API. - -If you need to interface to some C or C++ library for which no Python extension -currently exists, you can try wrapping the library's data types and functions -with a tool such as `SWIG `_. `SIP -`__, `CXX -`_ `Boost -`_, or `Weave -`_ are also -alternatives for wrapping C++ libraries. +on what you're trying to do. :ref:`Recommended third party tools ` +offer both simpler and more sophisticated approaches to creating C and C++ +extensions for Python. How can I execute arbitrary Python statements from C? diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 0b26e18efd7f1b..705b0a9279c6d4 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -355,6 +355,12 @@ Glossary tasks (see :mod:`asyncio`) associate each task with a context which becomes the current context whenever the task starts or resumes execution. + cyclic isolate + A subgroup of one or more objects that reference each other in a reference + cycle, but are not referenced by objects outside the group. The goal of + the :term:`cyclic garbage collector ` is to identify these groups and break the reference + cycles so that the memory can be reclaimed. + decorator A function returning another function, usually applied as a function transformation using the ``@wrapper`` syntax. Common examples for @@ -1274,6 +1280,16 @@ Glossary and ending with double underscores. Special methods are documented in :ref:`specialnames`. + standard library + The collection of :term:`packages `, :term:`modules ` + and :term:`extension modules ` distributed as a part + of the official Python interpreter package. The exact membership of the + collection may vary based on platform, available system libraries, or + other criteria. Documentation can be found at :ref:`library-index`. + + See also :data:`sys.stdlib_module_names` for a list of all possible + standard library module names. + statement A statement is part of a suite (a "block" of code). A statement is either an :term:`expression` or one of several constructs with a keyword, such @@ -1284,6 +1300,9 @@ Glossary issues such as incorrect types. See also :term:`type hints ` and the :mod:`typing` module. + stdlib + An abbreviation of :term:`standard library`. + strong reference In Python's C API, a strong reference is a reference to an object which is owned by the code holding the reference. The strong diff --git a/Doc/howto/annotations.rst b/Doc/howto/annotations.rst index 78f3704ba5d000..d7deb6c6bc1768 100644 --- a/Doc/howto/annotations.rst +++ b/Doc/howto/annotations.rst @@ -248,4 +248,9 @@ quirks by using :func:`annotationlib.get_annotations` on Python 3.14+ or :func:`inspect.get_annotations` on Python 3.10+. On earlier versions of Python, you can avoid these bugs by accessing the annotations from the class's :attr:`~type.__dict__` -(e.g., ``cls.__dict__.get('__annotations__', None)``). +(for example, ``cls.__dict__.get('__annotations__', None)``). + +In some versions of Python, instances of classes may have an ``__annotations__`` +attribute. However, this is not supported functionality. If you need the +annotations of an instance, you can use :func:`type` to access its class +(for example, ``annotationlib.get_annotations(type(myinstance))`` on Python 3.14+). diff --git a/Doc/howto/cporting.rst b/Doc/howto/cporting.rst index 7773620b40b973..cf857aed0425ec 100644 --- a/Doc/howto/cporting.rst +++ b/Doc/howto/cporting.rst @@ -14,13 +14,11 @@ We recommend the following resources for porting extension modules to Python 3: module. * The `Porting guide`_ from the *py3c* project provides opinionated suggestions with supporting code. -* The `Cython`_ and `CFFI`_ libraries offer abstractions over - Python's C API. +* :ref:`Recommended third party tools ` offer abstractions over + the Python's C API. Extensions generally need to be re-written to use one of them, but the library then handles differences between various Python versions and implementations. .. _Migrating C extensions: http://python3porting.com/cextensions.html .. _Porting guide: https://py3c.readthedocs.io/en/latest/guide.html -.. _Cython: https://cython.org/ -.. _CFFI: https://cffi.readthedocs.io/en/latest/ diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 3f6ee517050bd8..02b45879ccfaca 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -6,8 +6,8 @@ C API Extension Support for Free Threading ****************************************** -Starting with the 3.13 release, CPython has experimental support for running -with the :term:`global interpreter lock` (GIL) disabled in a configuration +Starting with the 3.13 release, CPython has support for running with +the :term:`global interpreter lock` (GIL) disabled in a configuration called :term:`free threading`. This document describes how to adapt C API extensions to support free threading. @@ -23,6 +23,14 @@ You can use it to enable code that only runs under the free-threaded build:: /* code that only runs in the free-threaded build */ #endif +.. note:: + + On Windows, this macro is not defined automatically, but must be specified + to the compiler when building. The :func:`sysconfig.get_config_var` function + can be used to determine whether the current running interpreter had the + macro defined. + + Module Initialization ===================== @@ -388,7 +396,7 @@ The wheels, shared libraries, and binaries are indicated by a ``t`` suffix. free-threaded build, with the ``t`` suffix, such as ``python3.13t``. * `pypa/cibuildwheel `_ supports the free-threaded build if you set - `CIBW_FREE_THREADED_SUPPORT `_. + `CIBW_ENABLE to cpython-freethreading `_. Limited C API and Stable ABI ............................ diff --git a/Doc/howto/free-threading-python.rst b/Doc/howto/free-threading-python.rst index f7a894ac2cd78e..24069617c47ae1 100644 --- a/Doc/howto/free-threading-python.rst +++ b/Doc/howto/free-threading-python.rst @@ -1,18 +1,21 @@ .. _freethreading-python-howto: -********************************************** -Python experimental support for free threading -********************************************** +********************************* +Python support for free threading +********************************* -Starting with the 3.13 release, CPython has experimental support for a build of +Starting with the 3.13 release, CPython has support for a build of Python called :term:`free threading` where the :term:`global interpreter lock` (GIL) is disabled. Free-threaded execution allows for full utilization of the available processing power by running threads in parallel on available CPU cores. While not all software will benefit from this automatically, programs designed with threading in mind will run faster on multi-core hardware. -**The free-threaded mode is experimental** and work is ongoing to improve it: -expect some bugs and a substantial single-threaded performance hit. +The free-threaded mode is working and continues to be improved, but +there is some additional overhead in single-threaded workloads compared +to the regular build. Additionally, third-party packages, in particular ones +with an :term:`extension module`, may not be ready for use in a +free-threaded build, and will re-enable the :term:`GIL`. This document describes the implications of free threading for Python code. See :ref:`freethreading-extensions-howto` for information on @@ -32,7 +35,7 @@ optionally support installing free-threaded Python binaries. The installers are available at https://www.python.org/downloads/. For information on other platforms, see the `Installing a Free-Threaded Python -`_, a +`_, a community-maintained installation guide for installing free-threaded Python. When building CPython from source, the :option:`--disable-gil` configure option @@ -43,7 +46,7 @@ Identifying free-threaded Python ================================ To check if the current interpreter supports free-threading, :option:`python -VV <-V>` -and :data:`sys.version` contain "experimental free-threading build". +and :data:`sys.version` contain "free-threading build". The new :func:`sys._is_gil_enabled` function can be used to check whether the GIL is actually disabled in the running process. diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index 1f0608fb0fc53f..b0b2414a1a02c8 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -602,7 +602,7 @@ generators: raise an exception inside the generator; the exception is raised by the ``yield`` expression where the generator's execution is paused. -* :meth:`~generator.close` raises a :exc:`GeneratorExit` exception inside the +* :meth:`~generator.close` sends a :exc:`GeneratorExit` exception to the generator to terminate the iteration. On receiving this exception, the generator's code must either raise :exc:`GeneratorExit` or :exc:`StopIteration`; catching the exception and doing anything else is @@ -1217,7 +1217,7 @@ flow inside a program. The book uses Scheme for its examples, but many of the design approaches described in these chapters are applicable to functional-style Python code. -https://www.defmacro.org/ramblings/fp.html: A general introduction to functional +https://defmacro.org/2006/06/19/fp.html: A general introduction to functional programming that uses Java examples and has a lengthy historical introduction. https://en.wikipedia.org/wiki/Functional_programming: General Wikipedia entry diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index a636e06bda8344..7da6dc8a39795e 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -168,7 +168,7 @@ possible, consider explicit locking. If it is necessary to use process-global state, the simplest way to avoid issues with multiple interpreters is to explicitly prevent a module from being loaded more than once per process—see -`Opt-Out: Limiting to One Module Object per Process`_. +:ref:`isolating-extensions-optout`. Managing Per-Module State @@ -207,6 +207,8 @@ An example of a module with per-module state is currently available as example module initialization shown at the bottom of the file. +.. _isolating-extensions-optout: + Opt-Out: Limiting to One Module Object per Process -------------------------------------------------- @@ -215,21 +217,36 @@ multiple interpreters correctly. If this is not yet the case for your module, you can explicitly make your module loadable only once per process. For example:: + // A process-wide flag static int loaded = 0; + // Mutex to provide thread safety (only needed for free-threaded Python) + static PyMutex modinit_mutex = {0}; + static int exec_module(PyObject* module) { + PyMutex_Lock(&modinit_mutex); if (loaded) { + PyMutex_Unlock(&modinit_mutex); PyErr_SetString(PyExc_ImportError, "cannot load module more than once per process"); return -1; } loaded = 1; + PyMutex_Unlock(&modinit_mutex); // ... rest of initialization } +If your module's :c:member:`PyModuleDef.m_clear` function is able to prepare +for future re-initialization, it should clear the ``loaded`` flag. +In this case, your module won't support multiple instances existing +*concurrently*, but it will, for example, support being loaded after +Python runtime shutdown (:c:func:`Py_FinalizeEx`) and re-initialization +(:c:func:`Py_Initialize`). + + Module State Access from Functions ---------------------------------- @@ -436,7 +453,7 @@ Avoiding ``PyObject_New`` GC-tracked objects need to be allocated using GC-aware functions. -If you use use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: +If you use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: - Get and call type's :c:member:`~PyTypeObject.tp_alloc` slot, if possible. That is, replace ``TYPE *o = PyObject_New(TYPE, typeobj)`` with:: @@ -609,8 +626,7 @@ Open Issues Several issues around per-module state and heap types are still open. -Discussions about improving the situation are best held on the `capi-sig -mailing list `__. +Discussions about improving the situation are best held on the `discuss forum under c-api tag `__. Per-Class Scope diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index 7d64a02358adb3..52537a91df542c 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -4078,6 +4078,104 @@ lines. With this approach, you get better output: WARNING:demo: 1/0 WARNING:demo:ZeroDivisionError: division by zero +How to uniformly handle newlines in logging output +-------------------------------------------------- + +Usually, messages that are logged (say to console or file) consist of a single +line of text. However, sometimes there is a need to handle messages with +multiple lines - whether because a logging format string contains newlines, or +logged data contains newlines. If you want to handle such messages uniformly, so +that each line in the logged message appears uniformly formatted as if it was +logged separately, you can do this using a handler mixin, as in the following +snippet: + +.. code-block:: python + + # Assume this is in a module mymixins.py + import copy + + class MultilineMixin: + def emit(self, record): + s = record.getMessage() + if '\n' not in s: + super().emit(record) + else: + lines = s.splitlines() + rec = copy.copy(record) + rec.args = None + for line in lines: + rec.msg = line + super().emit(rec) + +You can use the mixin as in the following script: + +.. code-block:: python + + import logging + + from mymixins import MultilineMixin + + logger = logging.getLogger(__name__) + + class StreamHandler(MultilineMixin, logging.StreamHandler): + pass + + if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)-9s %(message)s', + handlers = [StreamHandler()]) + logger.debug('Single line') + logger.debug('Multiple lines:\nfool me once ...') + logger.debug('Another single line') + logger.debug('Multiple lines:\n%s', 'fool me ...\ncan\'t get fooled again') + +The script, when run, prints something like: + +.. code-block:: text + + 2025-07-02 13:54:47,234 DEBUG Single line + 2025-07-02 13:54:47,234 DEBUG Multiple lines: + 2025-07-02 13:54:47,234 DEBUG fool me once ... + 2025-07-02 13:54:47,234 DEBUG Another single line + 2025-07-02 13:54:47,234 DEBUG Multiple lines: + 2025-07-02 13:54:47,234 DEBUG fool me ... + 2025-07-02 13:54:47,234 DEBUG can't get fooled again + +If, on the other hand, you are concerned about `log injection +`_, you can use a +formatter which escapes newlines, as per the following example: + +.. code-block:: python + + import logging + + logger = logging.getLogger(__name__) + + class EscapingFormatter(logging.Formatter): + def format(self, record): + s = super().format(record) + return s.replace('\n', r'\n') + + if __name__ == '__main__': + h = logging.StreamHandler() + h.setFormatter(EscapingFormatter('%(asctime)s %(levelname)-9s %(message)s')) + logging.basicConfig(level=logging.DEBUG, handlers = [h]) + logger.debug('Single line') + logger.debug('Multiple lines:\nfool me once ...') + logger.debug('Another single line') + logger.debug('Multiple lines:\n%s', 'fool me ...\ncan\'t get fooled again') + +You can, of course, use whatever escaping scheme makes the most sense for you. +The script, when run, should produce output like this: + +.. code-block:: text + + 2025-07-09 06:47:33,783 DEBUG Single line + 2025-07-09 06:47:33,783 DEBUG Multiple lines:\nfool me once ... + 2025-07-09 06:47:33,783 DEBUG Another single line + 2025-07-09 06:47:33,783 DEBUG Multiple lines:\nfool me ...\ncan't get fooled again + +Escaping behaviour can't be the stdlib default , as it would break backwards +compatibility. .. patterns-to-avoid: diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst index e543f6d5657d79..7486a378dbb06f 100644 --- a/Doc/howto/regex.rst +++ b/Doc/howto/regex.rst @@ -1016,7 +1016,9 @@ extension. This regular expression matches ``foo.bar`` and Now, consider complicating the problem a bit; what if you want to match filenames where the extension is not ``bat``? Some incorrect attempts: -``.*[.][^b].*$`` The first attempt above tries to exclude ``bat`` by requiring +``.*[.][^b].*$`` + +The first attempt above tries to exclude ``bat`` by requiring that the first character of the extension is not a ``b``. This is wrong, because the pattern also doesn't match ``foo.bar``. @@ -1043,7 +1045,9 @@ confusing. A negative lookahead cuts through all this confusion: -``.*[.](?!bat$)[^.]*$`` The negative lookahead means: if the expression ``bat`` +``.*[.](?!bat$)[^.]*$`` + +The negative lookahead means: if the expression ``bat`` doesn't match at this point, try the rest of the pattern; if ``bat$`` does match, the whole pattern will fail. The trailing ``$`` is required to ensure that something like ``sample.batch``, where the extension only starts with diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index 33a2a7ea89ea07..d79d1abe8d0577 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -245,75 +245,27 @@ codes in the 100--299 range indicate success, you will usually only see error codes in the 400--599 range. :attr:`http.server.BaseHTTPRequestHandler.responses` is a useful dictionary of -response codes in that shows all the response codes used by :rfc:`2616`. The -dictionary is reproduced here for convenience :: +response codes that shows all the response codes used by :rfc:`2616`. +An excerpt from the dictionary is shown below :: - # Table mapping response codes to messages; entries have the - # form {code: (shortmessage, longmessage)}. responses = { - 100: ('Continue', 'Request received, please continue'), - 101: ('Switching Protocols', - 'Switching to new protocol; obey Upgrade header'), - - 200: ('OK', 'Request fulfilled, document follows'), - 201: ('Created', 'Document created, URL follows'), - 202: ('Accepted', - 'Request accepted, processing continues off-line'), - 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), - 204: ('No Content', 'Request fulfilled, nothing follows'), - 205: ('Reset Content', 'Clear input form for further input.'), - 206: ('Partial Content', 'Partial content follows.'), - - 300: ('Multiple Choices', - 'Object has several resources -- see URI list'), - 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), - 302: ('Found', 'Object moved temporarily -- see URI list'), - 303: ('See Other', 'Object moved -- see Method and URL list'), - 304: ('Not Modified', - 'Document has not changed since given time'), - 305: ('Use Proxy', - 'You must use proxy specified in Location to access this ' - 'resource.'), - 307: ('Temporary Redirect', - 'Object moved temporarily -- see URI list'), - - 400: ('Bad Request', - 'Bad request syntax or unsupported method'), - 401: ('Unauthorized', - 'No permission -- see authorization schemes'), - 402: ('Payment Required', - 'No payment -- see charging schemes'), - 403: ('Forbidden', - 'Request forbidden -- authorization will not help'), - 404: ('Not Found', 'Nothing matches the given URI'), - 405: ('Method Not Allowed', - 'Specified method is invalid for this server.'), - 406: ('Not Acceptable', 'URI not available in preferred format.'), - 407: ('Proxy Authentication Required', 'You must authenticate with ' - 'this proxy before proceeding.'), - 408: ('Request Timeout', 'Request timed out; try again later.'), - 409: ('Conflict', 'Request conflict.'), - 410: ('Gone', - 'URI no longer exists and has been permanently removed.'), - 411: ('Length Required', 'Client must specify Content-Length.'), - 412: ('Precondition Failed', 'Precondition in headers is false.'), - 413: ('Request Entity Too Large', 'Entity is too large.'), - 414: ('Request-URI Too Long', 'URI is too long.'), - 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), - 416: ('Requested Range Not Satisfiable', - 'Cannot satisfy request range.'), - 417: ('Expectation Failed', - 'Expect condition could not be satisfied.'), - - 500: ('Internal Server Error', 'Server got itself in trouble'), - 501: ('Not Implemented', - 'Server does not support this operation'), - 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), - 503: ('Service Unavailable', - 'The server cannot process the request due to a high load'), - 504: ('Gateway Timeout', - 'The gateway server did not receive a timely response'), - 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + ... + : ('OK', 'Request fulfilled, document follows'), + ... + : ('Forbidden', + 'Request forbidden -- authorization will ' + 'not help'), + : ('Not Found', + 'Nothing matches the given URI'), + ... + : ("I'm a Teapot", + 'Server refuses to brew coffee because ' + 'it is a teapot'), + ... + : ('Service Unavailable', + 'The server cannot process the ' + 'request due to a high load'), + ... } When an error is raised the server responds by returning an HTTP error code diff --git a/Doc/includes/newtypes/custom.c b/Doc/includes/newtypes/custom.c index 5253f879360210..039a1a7219349c 100644 --- a/Doc/includes/newtypes/custom.c +++ b/Doc/includes/newtypes/custom.c @@ -16,28 +16,37 @@ static PyTypeObject CustomType = { .tp_new = PyType_GenericNew, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + // Just use this while using static types + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom2.c b/Doc/includes/newtypes/custom2.c index a87917583ca495..1ff8e707d1b0a0 100644 --- a/Doc/includes/newtypes/custom2.c +++ b/Doc/includes/newtypes/custom2.c @@ -106,28 +106,36 @@ static PyTypeObject CustomType = { .tp_methods = Custom_methods, }; -static PyModuleDef custommodule = { - .m_base =PyModuleDef_HEAD_INIT, +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom2", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom2(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom3.c b/Doc/includes/newtypes/custom3.c index 854034d4066d20..22f50eb0e1de89 100644 --- a/Doc/includes/newtypes/custom3.c +++ b/Doc/includes/newtypes/custom3.c @@ -151,28 +151,36 @@ static PyTypeObject CustomType = { .tp_getset = Custom_getsetters, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom3", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom3(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/custom4.c b/Doc/includes/newtypes/custom4.c index a0a1eeb289190b..07585aff5987f3 100644 --- a/Doc/includes/newtypes/custom4.c +++ b/Doc/includes/newtypes/custom4.c @@ -170,28 +170,36 @@ static PyTypeObject CustomType = { .tp_getset = Custom_getsetters, }; -static PyModuleDef custommodule = { +static int +custom_module_exec(PyObject *m) +{ + if (PyType_Ready(&CustomType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot custom_module_slots[] = { + {Py_mod_exec, custom_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef custom_module = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "custom4", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = custom_module_slots, }; PyMODINIT_FUNC PyInit_custom4(void) { - PyObject *m; - if (PyType_Ready(&CustomType) < 0) - return NULL; - - m = PyModule_Create(&custommodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "Custom", (PyObject *) &CustomType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&custom_module); } diff --git a/Doc/includes/newtypes/sublist.c b/Doc/includes/newtypes/sublist.c index 00664f3454156f..b784456a4ef667 100644 --- a/Doc/includes/newtypes/sublist.c +++ b/Doc/includes/newtypes/sublist.c @@ -31,7 +31,7 @@ SubList_init(PyObject *op, PyObject *args, PyObject *kwds) } static PyTypeObject SubListType = { - PyVarObject_HEAD_INIT(NULL, 0) + .ob_base = PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "sublist.SubList", .tp_doc = PyDoc_STR("SubList objects"), .tp_basicsize = sizeof(SubListObject), @@ -41,29 +41,37 @@ static PyTypeObject SubListType = { .tp_methods = SubList_methods, }; -static PyModuleDef sublistmodule = { - PyModuleDef_HEAD_INIT, +static int +sublist_module_exec(PyObject *m) +{ + SubListType.tp_base = &PyList_Type; + if (PyType_Ready(&SubListType) < 0) { + return -1; + } + + if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { + return -1; + } + + return 0; +} + +static PyModuleDef_Slot sublist_module_slots[] = { + {Py_mod_exec, sublist_module_exec}, + {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {0, NULL} +}; + +static PyModuleDef sublist_module = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "sublist", .m_doc = "Example module that creates an extension type.", - .m_size = -1, + .m_size = 0, + .m_slots = sublist_module_slots, }; PyMODINIT_FUNC PyInit_sublist(void) { - PyObject *m; - SubListType.tp_base = &PyList_Type; - if (PyType_Ready(&SubListType) < 0) - return NULL; - - m = PyModule_Create(&sublistmodule); - if (m == NULL) - return NULL; - - if (PyModule_AddObjectRef(m, "SubList", (PyObject *) &SubListType) < 0) { - Py_DECREF(m); - return NULL; - } - - return m; + return PyModuleDef_Init(&sublist_module); } diff --git a/Doc/installing/index.rst b/Doc/installing/index.rst index a46c1caefe4d8a..3a485a43a5a751 100644 --- a/Doc/installing/index.rst +++ b/Doc/installing/index.rst @@ -188,7 +188,7 @@ switch:: Once the Development & Deployment part of PPUG is fleshed out, some of those sections should be linked from new questions here (most notably, we should have a question about avoiding depending on PyPI that links to - https://packaging.python.org/en/latest/mirrors/) + https://packaging.python.org/en/latest/guides/index-mirrors-and-caches/) Common installation issues diff --git a/Doc/library/__future__.rst b/Doc/library/__future__.rst index 4f3b663006fb28..5d916b30112d3c 100644 --- a/Doc/library/__future__.rst +++ b/Doc/library/__future__.rst @@ -37,38 +37,52 @@ No feature description will ever be deleted from :mod:`__future__`. Since its introduction in Python 2.1 the following features have found their way into the language using this mechanism: -+------------------+-------------+--------------+---------------------------------------------+ -| feature | optional in | mandatory in | effect | -+==================+=============+==============+=============================================+ -| nested_scopes | 2.1.0b1 | 2.2 | :pep:`227`: | -| | | | *Statically Nested Scopes* | -+------------------+-------------+--------------+---------------------------------------------+ -| generators | 2.2.0a1 | 2.3 | :pep:`255`: | -| | | | *Simple Generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| division | 2.2.0a2 | 3.0 | :pep:`238`: | -| | | | *Changing the Division Operator* | -+------------------+-------------+--------------+---------------------------------------------+ -| absolute_import | 2.5.0a1 | 3.0 | :pep:`328`: | -| | | | *Imports: Multi-Line and Absolute/Relative* | -+------------------+-------------+--------------+---------------------------------------------+ -| with_statement | 2.5.0a1 | 2.6 | :pep:`343`: | -| | | | *The "with" Statement* | -+------------------+-------------+--------------+---------------------------------------------+ -| print_function | 2.6.0a2 | 3.0 | :pep:`3105`: | -| | | | *Make print a function* | -+------------------+-------------+--------------+---------------------------------------------+ -| unicode_literals | 2.6.0a2 | 3.0 | :pep:`3112`: | -| | | | *Bytes literals in Python 3000* | -+------------------+-------------+--------------+---------------------------------------------+ -| generator_stop | 3.5.0b1 | 3.7 | :pep:`479`: | -| | | | *StopIteration handling inside generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| annotations | 3.7.0b1 | Never [1]_ | :pep:`563`: | -| | | | *Postponed evaluation of annotations*, | -| | | | :pep:`649`: *Deferred evaluation of | -| | | | annotations using descriptors* | -+------------------+-------------+--------------+---------------------------------------------+ + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * feature + * optional in + * mandatory in + * effect + * * .. data:: nested_scopes + * 2.1.0b1 + * 2.2 + * :pep:`227`: *Statically Nested Scopes* + * * .. data:: generators + * 2.2.0a1 + * 2.3 + * :pep:`255`: *Simple Generators* + * * .. data:: division + * 2.2.0a2 + * 3.0 + * :pep:`238`: *Changing the Division Operator* + * * .. data:: absolute_import + * 2.5.0a1 + * 3.0 + * :pep:`328`: *Imports: Multi-Line and Absolute/Relative* + * * .. data:: with_statement + * 2.5.0a1 + * 2.6 + * :pep:`343`: *The “with” Statement* + * * .. data:: print_function + * 2.6.0a2 + * 3.0 + * :pep:`3105`: *Make print a function* + * * .. data:: unicode_literals + * 2.6.0a2 + * 3.0 + * :pep:`3112`: *Bytes literals in Python 3000* + * * .. data:: generator_stop + * 3.5.0b1 + * 3.7 + * :pep:`479`: *StopIteration handling inside generators* + * * .. data:: annotations + * 3.7.0b1 + * Never [1]_ + * :pep:`563`: *Postponed evaluation of annotations*, + :pep:`649`: *Deferred evaluation of annotations using descriptors* .. XXX Adding a new entry? Remember to update simple_stmts.rst, too. diff --git a/Doc/library/annotationlib.rst b/Doc/library/annotationlib.rst index ff9578b6088f28..7dfc11449a6cbc 100644 --- a/Doc/library/annotationlib.rst +++ b/Doc/library/annotationlib.rst @@ -127,16 +127,27 @@ Classes Values are the result of evaluating the annotation expressions. - .. attribute:: FORWARDREF + .. attribute:: VALUE_WITH_FAKE_GLOBALS :value: 2 + Special value used to signal that an annotate function is being + evaluated in a special environment with fake globals. When passed this + value, annotate functions should either return the same value as for + the :attr:`Format.VALUE` format, or raise :exc:`NotImplementedError` + to signal that they do not support execution in this environment. + This format is only used internally and should not be passed to + the functions in this module. + + .. attribute:: FORWARDREF + :value: 3 + Values are real annotation values (as per :attr:`Format.VALUE` format) for defined values, and :class:`ForwardRef` proxies for undefined values. Real objects may contain references to :class:`ForwardRef` proxy objects. .. attribute:: STRING - :value: 3 + :value: 4 Values are the text string of the annotation as it appears in the source code, up to modifications including, but not restricted to, @@ -144,17 +155,6 @@ Classes The exact values of these strings may change in future versions of Python. - .. attribute:: VALUE_WITH_FAKE_GLOBALS - :value: 4 - - Special value used to signal that an annotate function is being - evaluated in a special environment with fake globals. When passed this - value, annotate functions should either return the same value as for - the :attr:`Format.VALUE` format, or raise :exc:`NotImplementedError` - to signal that they do not support execution in this environment. - This format is only used internally and should not be passed to - the functions in this module. - .. versionadded:: 3.14 .. class:: ForwardRef @@ -211,6 +211,10 @@ Classes means may not have any information about their scope, so passing arguments to this method may be necessary to evaluate them successfully. + If no *owner*, *globals*, *locals*, or *type_params* are provided and the + :class:`~ForwardRef` does not contain information about its origin, + empty globals and locals dictionaries are used. + .. versionadded:: 3.14 @@ -485,3 +489,117 @@ annotations from the class and puts them in a separate attribute: typ.classvars = classvars # Store the ClassVars in a separate attribute return typ + +Limitations of the ``STRING`` format +------------------------------------ + +The :attr:`~Format.STRING` format is meant to approximate the source code +of the annotation, but the implementation strategy used means that it is not +always possible to recover the exact source code. + +First, the stringifier of course cannot recover any information that is not present in +the compiled code, including comments, whitespace, parenthesization, and operations that +get simplified by the compiler. + +Second, the stringifier can intercept almost all operations that involve names looked +up in some scope, but it cannot intercept operations that operate fully on constants. +As a corollary, this also means it is not safe to request the ``STRING`` format on +untrusted code: Python is powerful enough that it is possible to achieve arbitrary +code execution even with no access to any globals or builtins. For example: + +.. code-block:: pycon + + >>> def f(x: (1).__class__.__base__.__subclasses__()[-1].__init__.__builtins__["print"]("Hello world")): pass + ... + >>> annotationlib.get_annotations(f, format=annotationlib.Format.SOURCE) + Hello world + {'x': 'None'} + +.. note:: + This particular example works as of the time of writing, but it relies on + implementation details and is not guaranteed to work in the future. + +Among the different kinds of expressions that exist in Python, +as represented by the :mod:`ast` module, some expressions are supported, +meaning that the ``STRING`` format can generally recover the original source code; +others are unsupported, meaning that they may result in incorrect output or an error. + +The following are supported (sometimes with caveats): + +* :class:`ast.BinOp` +* :class:`ast.UnaryOp` + + * :class:`ast.Invert` (``~``), :class:`ast.UAdd` (``+``), and :class:`ast.USub` (``-``) are supported + * :class:`ast.Not` (``not``) is not supported + +* :class:`ast.Dict` (except when using ``**`` unpacking) +* :class:`ast.Set` +* :class:`ast.Compare` + + * :class:`ast.Eq` and :class:`ast.NotEq` are supported + * :class:`ast.Lt`, :class:`ast.LtE`, :class:`ast.Gt`, and :class:`ast.GtE` are supported, but the operand may be flipped + * :class:`ast.Is`, :class:`ast.IsNot`, :class:`ast.In`, and :class:`ast.NotIn` are not supported + +* :class:`ast.Call` (except when using ``**`` unpacking) +* :class:`ast.Constant` (though not the exact representation of the constant; for example, escape + sequences in strings are lost; hexadecimal numbers are converted to decimal) +* :class:`ast.Attribute` (assuming the value is not a constant) +* :class:`ast.Subscript` (assuming the value is not a constant) +* :class:`ast.Starred` (``*`` unpacking) +* :class:`ast.Name` +* :class:`ast.List` +* :class:`ast.Tuple` +* :class:`ast.Slice` + +The following are unsupported, but throw an informative error when encountered by the +stringifier: + +* :class:`ast.FormattedValue` (f-strings; error is not detected if conversion specifiers like ``!r`` + are used) +* :class:`ast.JoinedStr` (f-strings) + +The following are unsupported and result in incorrect output: + +* :class:`ast.BoolOp` (``and`` and ``or``) +* :class:`ast.IfExp` +* :class:`ast.Lambda` +* :class:`ast.ListComp` +* :class:`ast.SetComp` +* :class:`ast.DictComp` +* :class:`ast.GeneratorExp` + +The following are disallowed in annotation scopes and therefore not relevant: + +* :class:`ast.NamedExpr` (``:=``) +* :class:`ast.Await` +* :class:`ast.Yield` +* :class:`ast.YieldFrom` + + +Limitations of the ``FORWARDREF`` format +---------------------------------------- + +The :attr:`~Format.FORWARDREF` format aims to produce real values as much +as possible, with anything that cannot be resolved replaced with +:class:`ForwardRef` objects. It is affected by broadly the same Limitations +as the :attr:`~Format.STRING` format: annotations that perform operations on +literals or that use unsupported expression types may raise exceptions when +evaluated using the :attr:`~Format.FORWARDREF` format. + +Below are a few examples of the behavior with unsupported expressions: + +.. code-block:: pycon + + >>> from annotationlib import get_annotations, Format + >>> def zerodiv(x: 1 / 0): ... + >>> get_annotations(zerodiv, format=Format.STRING) + Traceback (most recent call last): + ... + ZeroDivisionError: division by zero + >>> get_annotations(zerodiv, format=Format.FORWARDREF) + Traceback (most recent call last): + ... + ZeroDivisionError: division by zero + >>> def ifexp(x: 1 if y else 0): ... + >>> get_annotations(ifexp, format=Format.STRING) + {'x': '1'} diff --git a/Doc/library/archiving.rst b/Doc/library/archiving.rst index c9284949af4972..da0b3f8c3e7693 100644 --- a/Doc/library/archiving.rst +++ b/Doc/library/archiving.rst @@ -5,13 +5,15 @@ Data Compression and Archiving ****************************** The modules described in this chapter support data compression with the zlib, -gzip, bzip2 and lzma algorithms, and the creation of ZIP- and tar-format +gzip, bzip2, lzma, and zstd algorithms, and the creation of ZIP- and tar-format archives. See also :ref:`archiving-operations` provided by the :mod:`shutil` module. .. toctree:: + compression.rst + compression.zstd.rst zlib.rst gzip.rst bz2.rst diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 29396c7a0366a1..f189f6b8fa8953 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -839,23 +839,11 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args(['--version']) PROG 2.0 -Only actions that consume command-line arguments (e.g. ``'store'``, -``'append'`` or ``'extend'``) can be used with positional arguments. - -.. class:: BooleanOptionalAction - - You may also specify an arbitrary action by passing an :class:`Action` subclass or - other object that implements the same interface. The :class:`!BooleanOptionalAction` - is available in :mod:`!argparse` and adds support for boolean actions such as - ``--foo`` and ``--no-foo``:: - - >>> import argparse - >>> parser = argparse.ArgumentParser() - >>> parser.add_argument('--foo', action=argparse.BooleanOptionalAction) - >>> parser.parse_args(['--no-foo']) - Namespace(foo=False) - - .. versionadded:: 3.9 +You may also specify an arbitrary action by passing an :class:`Action` subclass +(e.g. :class:`BooleanOptionalAction`) or other object that implements the same +interface. Only actions that consume command-line arguments (e.g. ``'store'``, +``'append'``, ``'extend'``, or custom actions with non-zero ``nargs``) can be used +with positional arguments. The recommended way to create a custom action is to extend :class:`Action`, overriding the :meth:`!__call__` method and optionally the :meth:`!__init__` and @@ -955,7 +943,7 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: .. index:: single: + (plus); in argparse module -* ``'+'``. Just like ``'*'``, all command-line args present are gathered into a +* ``'+'``. Just like ``'*'``, all command-line arguments present are gathered into a list. Additionally, an error message will be generated if there wasn't at least one command-line argument present. For example:: @@ -1429,6 +1417,21 @@ this API may be passed as the ``action`` parameter to and return a string which will be used when printing the usage of the program. If such method is not provided, a sensible default will be used. +.. class:: BooleanOptionalAction + + A subclass of :class:`Action` for handling boolean flags with positive + and negative options. Adding a single argument such as ``--foo`` automatically + creates both ``--foo`` and ``--no-foo`` options, storing ``True`` and ``False`` + respectively:: + + >>> import argparse + >>> parser = argparse.ArgumentParser() + >>> parser.add_argument('--foo', action=argparse.BooleanOptionalAction) + >>> parser.parse_args(['--no-foo']) + Namespace(foo=False) + + .. versionadded:: 3.9 + The parse_args() method ----------------------- @@ -2122,12 +2125,15 @@ Partial parsing .. method:: ArgumentParser.parse_known_args(args=None, namespace=None) - Sometimes a script may only parse a few of the command-line arguments, passing - the remaining arguments on to another script or program. In these cases, the - :meth:`~ArgumentParser.parse_known_args` method can be useful. It works much like - :meth:`~ArgumentParser.parse_args` except that it does not produce an error when - extra arguments are present. Instead, it returns a two item tuple containing - the populated namespace and the list of remaining argument strings. + Sometimes a script only needs to handle a specific set of command-line + arguments, leaving any unrecognized arguments for another script or program. + In these cases, the :meth:`~ArgumentParser.parse_known_args` method can be + useful. + + This method works similarly to :meth:`~ArgumentParser.parse_args`, but it does + not raise an error for extra, unrecognized arguments. Instead, it parses the + known arguments and returns a two item tuple that contains the populated + namespace and the list of any unrecognized arguments. :: diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index ca9a6b0712c9a2..ca0654acb33689 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -268,9 +268,9 @@ Literals .. class:: Constant(value) A constant value. The ``value`` attribute of the ``Constant`` literal contains the - Python object it represents. The values represented can be simple types - such as a number, string or ``None``, but also immutable container types - (tuples and frozensets) if all of their elements are constant. + Python object it represents. The values represented can be instances of :class:`str`, + :class:`bytes`, :class:`int`, :class:`float`, :class:`complex`, and :class:`bool`, + and the constants :data:`None` and :data:`Ellipsis`. .. doctest:: @@ -2445,8 +2445,9 @@ and classes for traversing abstract syntax trees: indents that many spaces per level. If *indent* is a string (such as ``"\t"``), that string is used to indent each level. - If *show_empty* is ``False`` (the default), empty lists and fields that are ``None`` - will be omitted from the output. + If *show_empty* is false (the default), optional empty lists will be + omitted from the output. + Optional ``None`` values are always omitted. .. versionchanged:: 3.9 Added the *indent* option. diff --git a/Doc/library/asyncio-dev.rst b/Doc/library/asyncio-dev.rst index 44b507a9811116..7831b613bd4a60 100644 --- a/Doc/library/asyncio-dev.rst +++ b/Doc/library/asyncio-dev.rst @@ -46,10 +46,6 @@ In addition to enabling the debug mode, consider also: When the debug mode is enabled: -* asyncio checks for :ref:`coroutines that were not awaited - ` and logs them; this mitigates - the "forgotten await" pitfall. - * Many non-threadsafe asyncio APIs (such as :meth:`loop.call_soon` and :meth:`loop.call_at` methods) raise an exception if they are called from a wrong thread. diff --git a/Doc/library/asyncio-eventloop.rst b/Doc/library/asyncio-eventloop.rst index 21f7d0547af1dd..91970c282391f7 100644 --- a/Doc/library/asyncio-eventloop.rst +++ b/Doc/library/asyncio-eventloop.rst @@ -361,7 +361,7 @@ Creating Futures and Tasks .. versionadded:: 3.5.2 -.. method:: loop.create_task(coro, *, name=None, context=None, eager_start=None) +.. method:: loop.create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Schedule the execution of :ref:`coroutine ` *coro*. Return a :class:`Task` object. @@ -370,6 +370,10 @@ Creating Futures and Tasks for interoperability. In this case, the result type is a subclass of :class:`Task`. + The full function signature is largely the same as that of the + :class:`Task` constructor (or factory) - all of the keyword arguments to + this function are passed through to that interface. + If the *name* argument is provided and not ``None``, it is set as the name of the task using :meth:`Task.set_name`. @@ -388,8 +392,15 @@ Creating Futures and Tasks .. versionchanged:: 3.11 Added the *context* parameter. + .. versionchanged:: 3.13.3 + Added ``kwargs`` which passes on arbitrary extra parameters, including ``name`` and ``context``. + + .. versionchanged:: 3.13.4 + Rolled back the change that passes on *name* and *context* (if it is None), + while still passing on other arbitrary keyword arguments (to avoid breaking backwards compatibility with 3.13.3). + .. versionchanged:: 3.14 - Added the *eager_start* parameter. + All *kwargs* are now passed on. The *eager_start* parameter works with eager task factories. .. method:: loop.set_task_factory(factory) @@ -402,6 +413,16 @@ Creating Futures and Tasks event loop, and *coro* is a coroutine object. The callable must pass on all *kwargs*, and return a :class:`asyncio.Task`-compatible object. + .. versionchanged:: 3.13.3 + Required that all *kwargs* are passed on to :class:`asyncio.Task`. + + .. versionchanged:: 3.13.4 + *name* is no longer passed to task factories. *context* is no longer passed + to task factories if it is ``None``. + + .. versionchanged:: 3.14 + *name* and *context* are now unconditionally passed on to task factories again. + .. method:: loop.get_task_factory() Return a task factory or ``None`` if the default one is in use. diff --git a/Doc/library/asyncio-stream.rst b/Doc/library/asyncio-stream.rst index c56166cabb9093..90c90862ca1ed3 100644 --- a/Doc/library/asyncio-stream.rst +++ b/Doc/library/asyncio-stream.rst @@ -171,13 +171,17 @@ and work with streams: .. function:: start_unix_server(client_connected_cb, path=None, \ *, limit=None, sock=None, backlog=100, ssl=None, \ ssl_handshake_timeout=None, \ - ssl_shutdown_timeout=None, start_serving=True) + ssl_shutdown_timeout=None, start_serving=True, cleanup_socket=True) :async: Start a Unix socket server. Similar to :func:`start_server` but works with Unix sockets. + If *cleanup_socket* is true then the Unix socket will automatically + be removed from the filesystem when the server is closed, unless the + socket has been replaced after the server has been created. + See also the documentation of :meth:`loop.create_unix_server`. .. note:: @@ -198,6 +202,9 @@ and work with streams: .. versionchanged:: 3.11 Added the *ssl_shutdown_timeout* parameter. + .. versionchanged:: 3.13 + Added the *cleanup_socket* parameter. + StreamReader ============ diff --git a/Doc/library/asyncio-task.rst b/Doc/library/asyncio-task.rst index 59acce1990ae04..b19ffa8213a971 100644 --- a/Doc/library/asyncio-task.rst +++ b/Doc/library/asyncio-task.rst @@ -238,18 +238,24 @@ Creating Tasks ----------------------------------------------- -.. function:: create_task(coro, *, name=None, context=None) +.. function:: create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Wrap the *coro* :ref:`coroutine ` into a :class:`Task` and schedule its execution. Return the Task object. - If *name* is not ``None``, it is set as the name of the task using - :meth:`Task.set_name`. + The full function signature is largely the same as that of the + :class:`Task` constructor (or factory) - all of the keyword arguments to + this function are passed through to that interface. An optional keyword-only *context* argument allows specifying a custom :class:`contextvars.Context` for the *coro* to run in. The current context copy is created when no *context* is provided. + An optional keyword-only *eager_start* argument allows specifying + if the task should execute eagerly during the call to create_task, + or be scheduled later. If *eager_start* is not passed the mode set + by :meth:`loop.set_task_factory` will be used. + The task is executed in the loop returned by :func:`get_running_loop`, :exc:`RuntimeError` is raised if there is no running loop in current thread. @@ -290,6 +296,9 @@ Creating Tasks .. versionchanged:: 3.11 Added the *context* parameter. + .. versionchanged:: 3.14 + Added the *eager_start* parameter by passing on all *kwargs*. + Task Cancellation ================= @@ -330,7 +339,7 @@ and reliable way to wait for all tasks in the group to finish. .. versionadded:: 3.11 - .. method:: create_task(coro, *, name=None, context=None) + .. method:: create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Create a task in this task group. The signature matches that of :func:`asyncio.create_task`. @@ -342,6 +351,10 @@ and reliable way to wait for all tasks in the group to finish. Close the given coroutine if the task group is not active. + .. versionchanged:: 3.14 + + Passes on all *kwargs* to :meth:`loop.create_task` + Example:: async def main(): diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 834ab2536e6c14..529a7242443820 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -15,14 +15,9 @@ This module provides functions for encoding binary data to printable ASCII characters and decoding such encodings back to binary data. -It provides encoding and decoding functions for the encodings specified in -:rfc:`4648`, which defines the Base16, Base32, and Base64 algorithms, -and for the de-facto standard Ascii85 and Base85 encodings. - -The :rfc:`4648` encodings are suitable for encoding binary data so that it can be -safely sent by email, used as parts of URLs, or included as part of an HTTP -POST request. The encoding algorithm is not the same as the -:program:`uuencode` program. +This includes the :ref:`encodings specified in ` +:rfc:`4648` (Base64, Base32 and Base16) +and the non-standard :ref:`Base85 encodings `. There are two interfaces provided by this module. The modern interface supports encoding :term:`bytes-like objects ` to ASCII @@ -30,7 +25,7 @@ supports encoding :term:`bytes-like objects ` to ASCII strings containing ASCII to :class:`bytes`. Both base-64 alphabets defined in :rfc:`4648` (normal, and URL- and filesystem-safe) are supported. -The legacy interface does not support decoding from strings, but it does +The :ref:`legacy interface ` does not support decoding from strings, but it does provide functions for encoding and decoding to and from :term:`file objects `. It only supports the Base64 standard alphabet, and it adds newlines every 76 characters as per :rfc:`2045`. Note that if you are looking @@ -46,7 +41,15 @@ package instead. Any :term:`bytes-like objects ` are now accepted by all encoding and decoding functions in this module. Ascii85/Base85 support added. -The modern interface provides: + +.. _base64-rfc-4648: + +RFC 4648 Encodings +------------------ + +The :rfc:`4648` encodings are suitable for encoding binary data so that it can be +safely sent by email, used as parts of URLs, or included as part of an HTTP +POST request. .. function:: b64encode(s, altchars=None) @@ -181,6 +184,26 @@ The modern interface provides: incorrectly padded or if there are non-alphabet characters present in the input. +.. _base64-base-85: + +Base85 Encodings +----------------- + +Base85 encoding is not formally specified but rather a de facto standard, +thus different systems perform the encoding differently. + +The :func:`a85encode` and :func:`b85encode` functions in this module are two implementations of +the de facto standard. You should call the function with the Base85 +implementation used by the software you intend to work with. + +The two functions present in this module differ in how they handle the following: + +* Whether to include enclosing ``<~`` and ``~>`` markers +* Whether to include newline characters +* The set of ASCII characters used for encoding +* Handling of null bytes + +Refer to the documentation of the individual functions for more information. .. function:: a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) @@ -262,7 +285,10 @@ The modern interface provides: .. versionadded:: 3.13 -The legacy interface: +.. _base64-legacy: + +Legacy Interface +---------------- .. function:: decode(input, output) diff --git a/Doc/library/code.rst b/Doc/library/code.rst index 8f7692df9fb22d..52587c4dd8f8e8 100644 --- a/Doc/library/code.rst +++ b/Doc/library/code.rst @@ -22,6 +22,12 @@ build applications which provide an interactive interpreter prompt. it defaults to a newly created dictionary with key ``'__name__'`` set to ``'__console__'`` and key ``'__doc__'`` set to ``None``. + Note that functions and classes objects created under an + :class:`!InteractiveInterpreter` instance will belong to the namespace + specified by *locals*. + They are only pickleable if *locals* is the namespace of an existing + module. + .. class:: InteractiveConsole(locals=None, filename="", local_exit=False) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 86511602fa5a60..0e84f18dd4d5d5 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -53,6 +53,14 @@ any codec: :exc:`UnicodeDecodeError`). Refer to :ref:`codec-base-classes` for more information on codec error handling. +.. function:: charmap_build(string) + + Return a mapping suitable for encoding with a custom single-byte encoding. + Given a :class:`str` *string* of up to 256 characters representing a + decoding table, returns either a compact internal mapping object + ``EncodingMap`` or a :class:`dictionary ` mapping character ordinals + to byte values. Raises a :exc:`TypeError` on invalid input. + The full details for each codec can also be looked up directly: .. function:: lookup(encoding, /) @@ -257,6 +265,20 @@ wider range of codecs when working with binary files: :func:`iterencode`. +.. function:: readbuffer_encode(buffer, errors=None, /) + + Return a :class:`tuple` containing the raw bytes of *buffer*, a + :ref:`buffer-compatible object ` or :class:`str` + (encoded to UTF-8 before processing), and their length in bytes. + + The *errors* argument is ignored. + + .. code-block:: pycon + + >>> codecs.readbuffer_encode(b"Zito") + (b'Zito', 4) + + The module also provides the following constants which are useful for reading and writing to platform dependent files: @@ -1476,6 +1498,66 @@ mapping. It is not supported by :meth:`str.encode` (which only produces Restoration of the ``rot13`` alias. +:mod:`encodings` --- Encodings package +-------------------------------------- + +.. module:: encodings + :synopsis: Encodings package + +This module implements the following functions: + +.. function:: normalize_encoding(encoding) + + Normalize encoding name *encoding*. + + Normalization works as follows: all non-alphanumeric characters except the + dot used for Python package names are collapsed and replaced with a single + underscore, leading and trailing underscores are removed. + For example, ``' -;#'`` becomes ``'_'``. + + Note that *encoding* should be ASCII only. + + +.. note:: + The following functions should not be used directly, except for testing + purposes; :func:`codecs.lookup` should be used instead. + + +.. function:: search_function(encoding) + + Search for the codec module corresponding to the given encoding name + *encoding*. + + This function first normalizes the *encoding* using + :func:`normalize_encoding`, then looks for a corresponding alias. + It attempts to import a codec module from the encodings package using either + the alias or the normalized name. If the module is found and defines a valid + ``getregentry()`` function that returns a :class:`codecs.CodecInfo` object, + the codec is cached and returned. + + If the codec module defines a ``getaliases()`` function any returned aliases + are registered for future use. + + +.. function:: win32_code_page_search_function(encoding) + + Search for a Windows code page encoding *encoding* of the form ``cpXXXX``. + + If the code page is valid and supported, return a :class:`codecs.CodecInfo` + object for it. + + .. availability:: Windows. + + .. versionadded:: 3.14 + + +This module implements the following exception: + +.. exception:: CodecRegistryError + + Raised when a codec is invalid or incompatible. + + :mod:`encodings.idna` --- Internationalized Domain Names in Applications ------------------------------------------------------------------------ diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst index c42288419c4d2d..ebbbf857e717a4 100644 --- a/Doc/library/compileall.rst +++ b/Doc/library/compileall.rst @@ -56,11 +56,18 @@ compile Python sources. executed. .. option:: -s strip_prefix + + Remove the given prefix from paths recorded in the ``.pyc`` files. + Paths are made relative to the prefix. + + This option can be used with ``-p`` but not with ``-d``. + .. option:: -p prepend_prefix - Remove (``-s``) or append (``-p``) the given prefix of paths - recorded in the ``.pyc`` files. - Cannot be combined with ``-d``. + Prepend the given prefix to paths recorded in the ``.pyc`` files. + Use ``-p /`` to make the paths absolute. + + This option can be used with ``-s`` but not with ``-d``. .. option:: -x regex diff --git a/Doc/library/compression.rst b/Doc/library/compression.rst new file mode 100644 index 00000000000000..618b4a3c2bd170 --- /dev/null +++ b/Doc/library/compression.rst @@ -0,0 +1,18 @@ +The :mod:`!compression` package +=============================== + +.. versionadded:: 3.14 + +The :mod:`!compression` package contains the canonical compression modules +containing interfaces to several different compression algorithms. Some of +these modules have historically been available as separate modules; those will +continue to be available under their original names for compatibility reasons, +and will not be removed without a deprecation cycle. The use of modules in +:mod:`!compression` is encouraged where practical. + +* :mod:`!compression.bz2` -- Re-exports :mod:`bz2` +* :mod:`!compression.gzip` -- Re-exports :mod:`gzip` +* :mod:`!compression.lzma` -- Re-exports :mod:`lzma` +* :mod:`!compression.zlib` -- Re-exports :mod:`zlib` +* :mod:`compression.zstd` -- Wrapper for the Zstandard compression library + diff --git a/Doc/library/compression.zstd.rst b/Doc/library/compression.zstd.rst new file mode 100644 index 00000000000000..a901403621b84f --- /dev/null +++ b/Doc/library/compression.zstd.rst @@ -0,0 +1,897 @@ +:mod:`!compression.zstd` --- Compression compatible with the Zstandard format +============================================================================= + +.. module:: compression.zstd + :synopsis: Low-level interface to compression and decompression routines in + the zstd library. + +.. versionadded:: 3.14 + +**Source code:** :source:`Lib/compression/zstd/__init__.py` + +-------------- + +This module provides classes and functions for compressing and decompressing +data using the Zstandard (or *zstd*) compression algorithm. The +`zstd manual `__ +describes Zstandard as "a fast lossless compression algorithm, targeting +real-time compression scenarios at zlib-level and better compression ratios." +Also included is a file interface that supports reading and writing the +contents of ``.zst`` files created by the :program:`zstd` utility, as well as +raw zstd compressed streams. + +The :mod:`!compression.zstd` module contains: + +* The :func:`.open` function and :class:`ZstdFile` class for reading and + writing compressed files. +* The :class:`ZstdCompressor` and :class:`ZstdDecompressor` classes for + incremental (de)compression. +* The :func:`compress` and :func:`decompress` functions for one-shot + (de)compression. +* The :func:`train_dict` and :func:`finalize_dict` functions and the + :class:`ZstdDict` class to train and manage Zstandard dictionaries. +* The :class:`CompressionParameter`, :class:`DecompressionParameter`, and + :class:`Strategy` classes for setting advanced (de)compression parameters. + + +Exceptions +---------- + +.. exception:: ZstdError + + This exception is raised when an error occurs during compression or + decompression, or while initializing the (de)compressor state. + + +Reading and writing compressed files +------------------------------------ + +.. function:: open(file, /, mode='rb', *, level=None, options=None, \ + zstd_dict=None, encoding=None, errors=None, newline=None) + + Open a Zstandard-compressed file in binary or text mode, returning a + :term:`file object`. + + The *file* argument can be either a file name (given as a + :class:`str`, :class:`bytes` or :term:`path-like ` + object), in which case the named file is opened, or it can be an existing + file object to read from or write to. + + The mode argument can be either ``'rb'`` for reading (default), ``'wb'`` for + overwriting, ``'ab'`` for appending, or ``'xb'`` for exclusive creation. + These can equivalently be given as ``'r'``, ``'w'``, ``'a'``, and ``'x'`` + respectively. You may also open in text mode with ``'rt'``, ``'wt'``, + ``'at'``, and ``'xt'`` respectively. + + When reading, the *options* argument can be a dictionary providing advanced + decompression parameters; see :class:`DecompressionParameter` for detailed + information about supported + parameters. The *zstd_dict* argument is a :class:`ZstdDict` instance to be + used during decompression. When reading, if the *level* + argument is not None, a :exc:`!TypeError` will be raised. + + When writing, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`CompressionParameter` for detailed information about supported + parameters. The *level* argument is the compression level to use when + writing compressed data. Only one of *level* or *options* may be non-None. + The *zstd_dict* argument is a :class:`ZstdDict` instance to be used during + compression. + + In binary mode, this function is equivalent to the :class:`ZstdFile` + constructor: ``ZstdFile(file, mode, ...)``. In this case, the + *encoding*, *errors*, and *newline* parameters must not be provided. + + In text mode, a :class:`ZstdFile` object is created, and wrapped in an + :class:`io.TextIOWrapper` instance with the specified encoding, error + handling behavior, and line endings. + + +.. class:: ZstdFile(file, /, mode='rb', *, level=None, options=None, \ + zstd_dict=None) + + Open a Zstandard-compressed file in binary mode. + + A :class:`ZstdFile` can wrap an already-open :term:`file object`, or operate + directly on a named file. The *file* argument specifies either the file + object to wrap, or the name of the file to open (as a :class:`str`, + :class:`bytes` or :term:`path-like ` object). If + wrapping an existing file object, the wrapped file will not be closed when + the :class:`ZstdFile` is closed. + + The *mode* argument can be either ``'rb'`` for reading (default), ``'wb'`` + for overwriting, ``'xb'`` for exclusive creation, or ``'ab'`` for appending. + These can equivalently be given as ``'r'``, ``'w'``, ``'x'`` and ``'a'`` + respectively. + + If *file* is a file object (rather than an actual file name), a mode of + ``'w'`` does not truncate the file, and is instead equivalent to ``'a'``. + + When reading, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`DecompressionParameter` for detailed information about supported + parameters. The *zstd_dict* argument is a :class:`ZstdDict` instance to be + used during decompression. When reading, if the *level* + argument is not None, a :exc:`!TypeError` will be raised. + + When writing, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`CompressionParameter` for detailed information about supported + parameters. The *level* argument is the compression level to use when + writing compressed data. Only one of *level* or *options* may be passed. The + *zstd_dict* argument is a :class:`ZstdDict` instance to be used during + compression. + + :class:`!ZstdFile` supports all the members specified by + :class:`io.BufferedIOBase`, except for :meth:`~io.BufferedIOBase.detach` + and :meth:`~io.IOBase.truncate`. + Iteration and the :keyword:`with` statement are supported. + + The following method and attributes are also provided: + + .. method:: peek(size=-1) + + Return buffered data without advancing the file position. At least one + byte of data will be returned, unless EOF has been reached. The exact + number of bytes returned is unspecified (the *size* argument is ignored). + + .. note:: While calling :meth:`peek` does not change the file position of + the :class:`ZstdFile`, it may change the position of the underlying + file object (for example, if the :class:`ZstdFile` was constructed by + passing a file object for *file*). + + .. attribute:: mode + + ``'rb'`` for reading and ``'wb'`` for writing. + + .. attribute:: name + + The name of the Zstandard file. Equivalent to the :attr:`~io.FileIO.name` + attribute of the underlying :term:`file object`. + + +Compressing and decompressing data in memory +-------------------------------------------- + +.. function:: compress(data, level=None, options=None, zstd_dict=None) + + Compress *data* (a :term:`bytes-like object`), returning the compressed + data as a :class:`bytes` object. + + The *level* argument is an integer controlling the level of + compression. *level* is an alternative to setting + :attr:`CompressionParameter.compression_level` in *options*. Use + :meth:`~CompressionParameter.bounds` on + :attr:`~CompressionParameter.compression_level` to get the values that can + be passed for *level*. If advanced compression options are needed, the + *level* argument must be omitted and in the *options* dictionary the + :attr:`!CompressionParameter.compression_level` parameter should be set. + + The *options* argument is a Python dictionary containing advanced + compression parameters. The valid keys and values for compression parameters + are documented as part of the :class:`CompressionParameter` documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data to improve compression efficiency. The + function :func:`train_dict` can be used to generate a Zstandard dictionary. + + +.. function:: decompress(data, zstd_dict=None, options=None) + + Decompress *data* (a :term:`bytes-like object`), returning the uncompressed + data as a :class:`bytes` object. + + The *options* argument is a Python dictionary containing advanced + decompression parameters. The valid keys and values for compression + parameters are documented as part of the :class:`DecompressionParameter` + documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data used during compression. This must be + the same Zstandard dictionary used during compression. + + If *data* is the concatenation of multiple distinct compressed frames, + decompress all of these frames, and return the concatenation of the results. + + +.. class:: ZstdCompressor(level=None, options=None, zstd_dict=None) + + Create a compressor object, which can be used to compress data + incrementally. + + For a more convenient way of compressing a single chunk of data, see the + module-level function :func:`compress`. + + The *level* argument is an integer controlling the level of + compression. *level* is an alternative to setting + :attr:`CompressionParameter.compression_level` in *options*. Use + :meth:`~CompressionParameter.bounds` on + :attr:`~CompressionParameter.compression_level` to get the values that can + be passed for *level*. If advanced compression options are needed, the + *level* argument must be omitted and in the *options* dictionary the + :attr:`!CompressionParameter.compression_level` parameter should be set. + + The *options* argument is a Python dictionary containing advanced + compression parameters. The valid keys and values for compression parameters + are documented as part of the :class:`CompressionParameter` documentation. + + The *zstd_dict* argument is an optional instance of :class:`ZstdDict` + containing trained data to improve compression efficiency. The + function :func:`train_dict` can be used to generate a Zstandard dictionary. + + + .. method:: compress(data, mode=ZstdCompressor.CONTINUE) + + Compress *data* (a :term:`bytes-like object`), returning a :class:`bytes` + object with compressed data if possible, or otherwise an empty + :class:`!bytes` object. Some of *data* may be buffered internally, for + use in later calls to :meth:`!compress` and :meth:`~.flush`. The returned + data should be concatenated with the output of any previous calls to + :meth:`~.compress`. + + The *mode* argument is a :class:`ZstdCompressor` attribute, either + :attr:`~.CONTINUE`, :attr:`~.FLUSH_BLOCK`, + or :attr:`~.FLUSH_FRAME`. + + When all data has been provided to the compressor, call the + :meth:`~.flush` method to finish the compression process. If + :meth:`~.compress` is called with *mode* set to :attr:`~.FLUSH_FRAME`, + :meth:`~.flush` should not be called, as it would write out a new empty + frame. + + .. method:: flush(mode=ZstdCompressor.FLUSH_FRAME) + + Finish the compression process, returning a :class:`bytes` object + containing any data stored in the compressor's internal buffers. + + The *mode* argument is a :class:`ZstdCompressor` attribute, either + :attr:`~.FLUSH_BLOCK`, or :attr:`~.FLUSH_FRAME`. + + .. method:: set_pledged_input_size(size) + + Specify the amount of uncompressed data *size* that will be provided for + the next frame. *size* will be written into the frame header of the next + frame unless :attr:`CompressionParameter.content_size_flag` is ``False`` + or ``0``. A size of ``0`` means that the frame is empty. If *size* is + ``None``, the frame header will omit the frame size. Frames that include + the uncompressed data size require less memory to decompress, especially + at higher compression levels. + + If :attr:`last_mode` is not :attr:`FLUSH_FRAME`, a + :exc:`ValueError` is raised as the compressor is not at the start of + a frame. If the pledged size does not match the actual size of data + provided to :meth:`.compress`, future calls to :meth:`!compress` or + :meth:`flush` may raise :exc:`ZstdError` and the last chunk of data may + be lost. + + After :meth:`flush` or :meth:`.compress` are called with mode + :attr:`FLUSH_FRAME`, the next frame will not include the frame size into + the header unless :meth:`!set_pledged_input_size` is called again. + + .. attribute:: CONTINUE + + Collect more data for compression, which may or may not generate output + immediately. This mode optimizes the compression ratio by maximizing the + amount of data per block and frame. + + .. attribute:: FLUSH_BLOCK + + Complete and write a block to the data stream. The data returned so far + can be immediately decompressed. Past data can still be referenced in + future blocks generated by calls to :meth:`~.compress`, + improving compression. + + .. attribute:: FLUSH_FRAME + + Complete and write out a frame. Future data provided to + :meth:`~.compress` will be written into a new frame and + *cannot* reference past data. + + .. attribute:: last_mode + + The last mode passed to either :meth:`~.compress` or :meth:`~.flush`. + The value can be one of :attr:`~.CONTINUE`, :attr:`~.FLUSH_BLOCK`, or + :attr:`~.FLUSH_FRAME`. The initial value is :attr:`~.FLUSH_FRAME`, + signifying that the compressor is at the start of a new frame. + + +.. class:: ZstdDecompressor(zstd_dict=None, options=None) + + Create a decompressor object, which can be used to decompress data + incrementally. + + For a more convenient way of decompressing an entire compressed stream at + once, see the module-level function :func:`decompress`. + + The *options* argument is a Python dictionary containing advanced + decompression parameters. The valid keys and values for compression + parameters are documented as part of the :class:`DecompressionParameter` + documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data used during compression. This must be + the same Zstandard dictionary used during compression. + + .. note:: + This class does not transparently handle inputs containing multiple + compressed frames, unlike the :func:`decompress` function and + :class:`ZstdFile` class. To decompress a multi-frame input, you should + use :func:`decompress`, :class:`ZstdFile` if working with a + :term:`file object`, or multiple :class:`!ZstdDecompressor` instances. + + .. method:: decompress(data, max_length=-1) + + Decompress *data* (a :term:`bytes-like object`), returning + uncompressed data as bytes. Some of *data* may be buffered + internally, for use in later calls to :meth:`!decompress`. + The returned data should be concatenated with the output of any previous + calls to :meth:`!decompress`. + + If *max_length* is non-negative, the method returns at most *max_length* + bytes of decompressed data. If this limit is reached and further + output can be produced, the :attr:`~.needs_input` attribute will + be set to ``False``. In this case, the next call to + :meth:`~.decompress` may provide *data* as ``b''`` to obtain + more of the output. + + If all of the input data was decompressed and returned (either + because this was less than *max_length* bytes, or because + *max_length* was negative), the :attr:`~.needs_input` attribute + will be set to ``True``. + + Attempting to decompress data after the end of a frame will raise a + :exc:`ZstdError`. Any data found after the end of the frame is ignored + and saved in the :attr:`~.unused_data` attribute. + + .. attribute:: eof + + ``True`` if the end-of-stream marker has been reached. + + .. attribute:: unused_data + + Data found after the end of the compressed stream. + + Before the end of the stream is reached, this will be ``b''``. + + .. attribute:: needs_input + + ``False`` if the :meth:`.decompress` method can provide more + decompressed data before requiring new compressed input. + + +Zstandard dictionaries +---------------------- + + +.. function:: train_dict(samples, dict_size) + + Train a Zstandard dictionary, returning a :class:`ZstdDict` instance. + Zstandard dictionaries enable more efficient compression of smaller sizes + of data, which is traditionally difficult to compress due to less + repetition. If you are compressing multiple similar groups of data (such as + similar files), Zstandard dictionaries can improve compression ratios and + speed significantly. + + The *samples* argument (an iterable of :class:`bytes` objects), is the + population of samples used to train the Zstandard dictionary. + + The *dict_size* argument, an integer, is the maximum size (in bytes) the + Zstandard dictionary should be. The Zstandard documentation suggests an + absolute maximum of no more than 100 KB, but the maximum can often be smaller + depending on the data. Larger dictionaries generally slow down compression, + but improve compression ratios. Smaller dictionaries lead to faster + compression, but reduce the compression ratio. + + +.. function:: finalize_dict(zstd_dict, /, samples, dict_size, level) + + An advanced function for converting a "raw content" Zstandard dictionary into + a regular Zstandard dictionary. "Raw content" dictionaries are a sequence of + bytes that do not need to follow the structure of a normal Zstandard + dictionary. + + The *zstd_dict* argument is a :class:`ZstdDict` instance with + the :attr:`~ZstdDict.dict_content` containing the raw dictionary contents. + + The *samples* argument (an iterable of :class:`bytes` objects), contains + sample data for generating the Zstandard dictionary. + + The *dict_size* argument, an integer, is the maximum size (in bytes) the + Zstandard dictionary should be. See :func:`train_dict` for + suggestions on the maximum dictionary size. + + The *level* argument (an integer) is the compression level expected to be + passed to the compressors using this dictionary. The dictionary information + varies for each compression level, so tuning for the proper compression + level can make compression more efficient. + + +.. class:: ZstdDict(dict_content, /, *, is_raw=False) + + A wrapper around Zstandard dictionaries. Dictionaries can be used to improve + the compression of many small chunks of data. Use :func:`train_dict` if you + need to train a new dictionary from sample data. + + The *dict_content* argument (a :term:`bytes-like object`), is the already + trained dictionary information. + + The *is_raw* argument, a boolean, is an advanced parameter controlling the + meaning of *dict_content*. ``True`` means *dict_content* is a "raw content" + dictionary, without any format restrictions. ``False`` means *dict_content* + is an ordinary Zstandard dictionary, created from Zstandard functions, + for example, :func:`train_dict` or the external :program:`zstd` CLI. + + When passing a :class:`!ZstdDict` to a function, the + :attr:`!as_digested_dict` and :attr:`!as_undigested_dict` attributes can + control how the dictionary is loaded by passing them as the ``zstd_dict`` + argument, for example, ``compress(data, zstd_dict=zd.as_digested_dict)``. + Digesting a dictionary is a costly operation that occurs when loading a + Zstandard dictionary. When making multiple calls to compression or + decompression, passing a digested dictionary will reduce the overhead of + loading the dictionary. + + .. list-table:: Difference for compression + :widths: 10 14 10 + :header-rows: 1 + + * - + - Digested dictionary + - Undigested dictionary + * - Advanced parameters of the compressor which may be overridden by + the dictionary's parameters + - ``window_log``, ``hash_log``, ``chain_log``, ``search_log``, + ``min_match``, ``target_length``, ``strategy``, + ``enable_long_distance_matching``, ``ldm_hash_log``, + ``ldm_min_match``, ``ldm_bucket_size_log``, ``ldm_hash_rate_log``, + and some non-public parameters. + - None + * - :class:`!ZstdDict` internally caches the dictionary + - Yes. It's faster when loading a digested dictionary again with the + same compression level. + - No. If you wish to load an undigested dictionary multiple times, + consider reusing a compressor object. + + If passing a :class:`!ZstdDict` without any attribute, an undigested + dictionary is passed by default when compressing and a digested dictionary + is generated if necessary and passed by default when decompressing. + + .. attribute:: dict_content + + The content of the Zstandard dictionary, a ``bytes`` object. It's the + same as the *dict_content* argument in the ``__init__`` method. It can + be used with other programs, such as the ``zstd`` CLI program. + + .. attribute:: dict_id + + Identifier of the Zstandard dictionary, a non-negative int value. + + Non-zero means the dictionary is ordinary, created by Zstandard + functions and following the Zstandard format. + + ``0`` means a "raw content" dictionary, free of any format restriction, + used for advanced users. + + .. note:: + + The meaning of ``0`` for :attr:`!ZstdDict.dict_id` is different + from the ``dictionary_id`` attribute to the :func:`get_frame_info` + function. + + .. attribute:: as_digested_dict + + Load as a digested dictionary. + + .. attribute:: as_undigested_dict + + Load as an undigested dictionary. + + +Advanced parameter control +-------------------------- + +.. class:: CompressionParameter() + + An :class:`~enum.IntEnum` containing the advanced compression parameter + keys that can be used when compressing data. + + The :meth:`~.bounds` method can be used on any attribute to get the valid + values for that parameter. + + Parameters are optional; any omitted parameter will have it's value selected + automatically. + + Example getting the lower and upper bound of :attr:`~.compression_level`:: + + lower, upper = CompressionParameter.compression_level.bounds() + + Example setting the :attr:`~.window_log` to the maximum size:: + + _lower, upper = CompressionParameter.window_log.bounds() + options = {CompressionParameter.window_log: upper} + compress(b'venezuelan beaver cheese', options=options) + + .. method:: bounds() + + Return the tuple of int bounds, ``(lower, upper)``, of a compression + parameter. This method should be called on the attribute you wish to + retrieve the bounds of. For example, to get the valid values for + :attr:`~.compression_level`, one may check the result of + ``CompressionParameter.compression_level.bounds()``. + + Both the lower and upper bounds are inclusive. + + .. attribute:: compression_level + + A high-level means of setting other compression parameters that affect + the speed and ratio of compressing data. + + Regular compression levels are greater than ``0``. Values greater than + ``20`` are considered "ultra" compression and require more memory than + other levels. Negative values can be used to trade off faster compression + for worse compression ratios. + + Setting the level to zero uses :attr:`COMPRESSION_LEVEL_DEFAULT`. + + .. attribute:: window_log + + Maximum allowed back-reference distance the compressor can use when + compressing data, expressed as power of two, ``1 << window_log`` bytes. + This parameter greatly influences the memory usage of compression. Higher + values require more memory but gain better compression values. + + A value of zero causes the value to be selected automatically. + + .. attribute:: hash_log + + Size of the initial probe table, as a power of two. The resulting memory + usage is ``1 << (hash_log+2)`` bytes. Larger tables improve compression + ratio of strategies <= :attr:`~Strategy.dfast`, and improve compression + speed of strategies > :attr:`~Strategy.dfast`. + + A value of zero causes the value to be selected automatically. + + .. attribute:: chain_log + + Size of the multi-probe search table, as a power of two. The resulting + memory usage is ``1 << (chain_log+2)`` bytes. Larger tables result in + better and slower compression. This parameter has no effect for the + :attr:`~Strategy.fast` strategy. It's still useful when using + :attr:`~Strategy.dfast` strategy, in which case it defines a secondary + probe table. + + A value of zero causes the value to be selected automatically. + + .. attribute:: search_log + + Number of search attempts, as a power of two. More attempts result in + better and slower compression. This parameter is useless for + :attr:`~Strategy.fast` and :attr:`~Strategy.dfast` strategies. + + A value of zero causes the value to be selected automatically. + + .. attribute:: min_match + + Minimum size of searched matches. Larger values increase compression and + decompression speed, but decrease ratio. Note that Zstandard can still + find matches of smaller size, it just tweaks its search algorithm to look + for this size and larger. For all strategies < :attr:`~Strategy.btopt`, + the effective minimum is ``4``; for all strategies + > :attr:`~Strategy.fast`, the effective maximum is ``6``. + + A value of zero causes the value to be selected automatically. + + .. attribute:: target_length + + The impact of this field depends on the selected :class:`Strategy`. + + For strategies :attr:`~Strategy.btopt`, :attr:`~Strategy.btultra` and + :attr:`~Strategy.btultra2`, the value is the length of a match + considered "good enough" to stop searching. Larger values make + compression ratios better, but compresses slower. + + For strategy :attr:`~Strategy.fast`, it is the distance between match + sampling. Larger values make compression faster, but with a worse + compression ratio. + + A value of zero causes the value to be selected automatically. + + .. attribute:: strategy + + The higher the value of selected strategy, the more complex the + compression technique used by zstd, resulting in higher compression + ratios but slower compression. + + .. seealso:: :class:`Strategy` + + .. attribute:: enable_long_distance_matching + + Long distance matching can be used to improve compression for large + inputs by finding large matches at greater distances. It increases memory + usage and window size. + + ``True`` or ``1`` enable long distance matching while ``False`` or ``0`` + disable it. + + Enabling this parameter increases default + :attr:`~CompressionParameter.window_log` to 128 MiB except when expressly + set to a different value. This setting is enabled by default if + :attr:`!window_log` >= 128 MiB and the compression + strategy >= :attr:`~Strategy.btopt` (compression level 16+). + + .. attribute:: ldm_hash_log + + Size of the table for long distance matching, as a power of two. Larger + values increase memory usage and compression ratio, but decrease + compression speed. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_min_match + + Minimum match size for long distance matcher. Larger or too small values + can often decrease the compression ratio. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_bucket_size_log + + Log size of each bucket in the long distance matcher hash table for + collision resolution. Larger values improve collision resolution but + decrease compression speed. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_hash_rate_log + + Frequency of inserting/looking up entries into the long distance matcher + hash table. Larger values improve compression speed. Deviating far from + the default value will likely result in a compression ratio decrease. + + A value of zero causes the value to be selected automatically. + + .. attribute:: content_size_flag + + Write the size of the data to be compressed into the Zstandard frame + header when known prior to compressing. + + This flag only takes effect under the following scenarios: + + * Calling :func:`compress` for one-shot compression + * Providing all of the data to be compressed in the frame in a single + :meth:`ZstdCompressor.compress` call, with the + :attr:`ZstdCompressor.FLUSH_FRAME` mode. + * Calling :meth:`ZstdCompressor.set_pledged_input_size` with the exact + amount of data that will be provided to the compressor prior to any + calls to :meth:`ZstdCompressor.compress` for the current frame. + :meth:`!ZstdCompressor.set_pledged_input_size` must be called for each + new frame. + + All other compression calls may not write the size information into the + frame header. + + ``True`` or ``1`` enable the content size flag while ``False`` or ``0`` + disable it. + + .. attribute:: checksum_flag + + A four-byte checksum using XXHash64 of the uncompressed content is + written at the end of each frame. Zstandard's decompression code verifies + the checksum. If there is a mismatch a :class:`ZstdError` exception is + raised. + + ``True`` or ``1`` enable checksum generation while ``False`` or ``0`` + disable it. + + .. attribute:: dict_id_flag + + When compressing with a :class:`ZstdDict`, the dictionary's ID is written + into the frame header. + + ``True`` or ``1`` enable storing the dictionary ID while ``False`` or + ``0`` disable it. + + .. attribute:: nb_workers + + Select how many threads will be spawned to compress in parallel. When + :attr:`!nb_workers` > 0, enables multi-threaded compression, a value of + ``1`` means "one-thread multi-threaded mode". More workers improve speed, + but also increase memory usage and slightly reduce compression ratio. + + A value of zero disables multi-threading. + + .. attribute:: job_size + + Size of a compression job, in bytes. This value is enforced only when + :attr:`~CompressionParameter.nb_workers` >= 1. Each compression job is + completed in parallel, so this value can indirectly impact the number of + active threads. + + A value of zero causes the value to be selected automatically. + + .. attribute:: overlap_log + + Sets how much data is reloaded from previous jobs (threads) for new jobs + to be used by the look behind window during compression. This value is + only used when :attr:`~CompressionParameter.nb_workers` >= 1. Acceptable + values vary from 0 to 9. + + * 0 means dynamically set the overlap amount + * 1 means no overlap + * 9 means use a full window size from the previous job + + Each increment halves/doubles the overlap size. "8" means an overlap of + ``window_size/2``, "7" means an overlap of ``window_size/4``, etc. + +.. class:: DecompressionParameter() + + An :class:`~enum.IntEnum` containing the advanced decompression parameter + keys that can be used when decompressing data. Parameters are optional; any + omitted parameter will have it's value selected automatically. + + The :meth:`~.bounds` method can be used on any attribute to get the valid + values for that parameter. + + Example setting the :attr:`~.window_log_max` to the maximum size:: + + data = compress(b'Some very long buffer of bytes...') + + _lower, upper = DecompressionParameter.window_log_max.bounds() + + options = {DecompressionParameter.window_log_max: upper} + decompress(data, options=options) + + .. method:: bounds() + + Return the tuple of int bounds, ``(lower, upper)``, of a decompression + parameter. This method should be called on the attribute you wish to + retrieve the bounds of. + + Both the lower and upper bounds are inclusive. + + .. attribute:: window_log_max + + The base-two logarithm of the maximum size of the window used during + decompression. This can be useful to limit the amount of memory used when + decompressing data. A larger maximum window size leads to faster + decompression. + + A value of zero causes the value to be selected automatically. + + +.. class:: Strategy() + + An :class:`~enum.IntEnum` containing strategies for compression. + Higher-numbered strategies correspond to more complex and slower + compression. + + .. note:: + + The values of attributes of :class:`!Strategy` are not necessarily stable + across zstd versions. Only the ordering of the attributes may be relied + upon. The attributes are listed below in order. + + The following strategies are available: + + .. attribute:: fast + + .. attribute:: dfast + + .. attribute:: greedy + + .. attribute:: lazy + + .. attribute:: lazy2 + + .. attribute:: btlazy2 + + .. attribute:: btopt + + .. attribute:: btultra + + .. attribute:: btultra2 + + +Miscellaneous +------------- + +.. function:: get_frame_info(frame_buffer) + + Retrieve a :class:`FrameInfo` object containing metadata about a Zstandard + frame. Frames contain metadata related to the compressed data they hold. + + +.. class:: FrameInfo + + Metadata related to a Zstandard frame. + + .. attribute:: decompressed_size + + The size of the decompressed contents of the frame. + + .. attribute:: dictionary_id + + An integer representing the Zstandard dictionary ID needed for + decompressing the frame. ``0`` means the dictionary ID was not + recorded in the frame header. This may mean that a Zstandard dictionary + is not needed, or that the ID of a required dictionary was not recorded. + + +.. attribute:: COMPRESSION_LEVEL_DEFAULT + + The default compression level for Zstandard: ``3``. + + +.. attribute:: zstd_version_info + + Version number of the runtime zstd library as a tuple of integers + (major, minor, release). + + +Examples +-------- + +Reading in a compressed file: + +.. code-block:: python + + from compression import zstd + + with zstd.open("file.zst") as f: + file_content = f.read() + +Creating a compressed file: + +.. code-block:: python + + from compression import zstd + + data = b"Insert Data Here" + with zstd.open("file.zst", "w") as f: + f.write(data) + +Compressing data in memory: + +.. code-block:: python + + from compression import zstd + + data_in = b"Insert Data Here" + data_out = zstd.compress(data_in) + +Incremental compression: + +.. code-block:: python + + from compression import zstd + + comp = zstd.ZstdCompressor() + out1 = comp.compress(b"Some data\n") + out2 = comp.compress(b"Another piece of data\n") + out3 = comp.compress(b"Even more data\n") + out4 = comp.flush() + # Concatenate all the partial results: + result = b"".join([out1, out2, out3, out4]) + +Writing compressed data to an already-open file: + +.. code-block:: python + + from compression import zstd + + with open("myfile", "wb") as f: + f.write(b"This data will not be compressed\n") + with zstd.open(f, "w") as zstf: + zstf.write(b"This *will* be compressed\n") + f.write(b"Not compressed\n") + +Creating a compressed file using compression parameters: + +.. code-block:: python + + from compression import zstd + + options = { + zstd.CompressionParameter.checksum_flag: 1 + } + with zstd.open("file.zst", "w", options=options) as f: + f.write(b"Mind if I squeeze in?") diff --git a/Doc/library/concurrency.rst b/Doc/library/concurrency.rst index 5be1a1106b09a0..18f9443cbfea20 100644 --- a/Doc/library/concurrency.rst +++ b/Doc/library/concurrency.rst @@ -18,6 +18,7 @@ multitasking). Here's an overview: multiprocessing.shared_memory.rst concurrent.rst concurrent.futures.rst + concurrent.interpreters.rst subprocess.rst sched.rst queue.rst diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index 7efae9e628b828..dd92765038c4f7 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -6,8 +6,9 @@ .. versionadded:: 3.2 -**Source code:** :source:`Lib/concurrent/futures/thread.py` -and :source:`Lib/concurrent/futures/process.py` +**Source code:** :source:`Lib/concurrent/futures/thread.py`, +:source:`Lib/concurrent/futures/process.py`, +and :source:`Lib/concurrent/futures/interpreter.py` -------------- @@ -264,7 +265,7 @@ Each worker's interpreter is isolated from all the other interpreters. "Isolated" means each interpreter has its own runtime state and operates completely independently. For example, if you redirect :data:`sys.stdout` in one interpreter, it will not be automatically -redirected any other interpreter. If you import a module in one +redirected to any other interpreter. If you import a module in one interpreter, it is not automatically imported in any other. You would need to import the module separately in interpreter where you need it. In fact, each module imported in an interpreter is @@ -286,7 +287,7 @@ efficient alternative is to serialize with :mod:`pickle` and then send the bytes over a shared :mod:`socket ` or :func:`pipe `. -.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=(), shared=None) +.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=()) A :class:`ThreadPoolExecutor` subclass that executes calls asynchronously using a pool of at most *max_workers* threads. Each thread runs @@ -303,21 +304,10 @@ the bytes over a shared :mod:`socket ` or and *initargs* using :mod:`pickle` when sending them to the worker's interpreter. - .. note:: - Functions defined in the ``__main__`` module cannot be pickled - and thus cannot be used. - .. note:: The executor may replace uncaught exceptions from *initializer* with :class:`~concurrent.futures.interpreter.ExecutionFailed`. - The optional *shared* argument is a :class:`dict` of objects that all - interpreters in the pool share. The *shared* items are added to each - interpreter's ``__main__`` module. Not all objects are shareable. - Shareable objects include the builtin singletons, :class:`str` - and :class:`bytes`, and :class:`memoryview`. See :pep:`734` - for more info. - Other caveats from parent :class:`ThreadPoolExecutor` apply here. :meth:`~Executor.submit` and :meth:`~Executor.map` work like normal, @@ -325,10 +315,6 @@ except the worker serializes the callable and arguments using :mod:`pickle` when sending them to its interpreter. The worker likewise serializes the return value when sending it back. -.. note:: - Functions defined in the ``__main__`` module cannot be pickled - and thus cannot be used. - When a worker's current task raises an uncaught exception, the worker always tries to preserve the exception as-is. If that is successful then it also sets the ``__cause__`` to a corresponding diff --git a/Doc/library/concurrent.interpreters.rst b/Doc/library/concurrent.interpreters.rst new file mode 100644 index 00000000000000..524d505bcf144f --- /dev/null +++ b/Doc/library/concurrent.interpreters.rst @@ -0,0 +1,387 @@ +:mod:`!concurrent.interpreters` --- Multiple interpreters in the same process +============================================================================= + +.. module:: concurrent.interpreters + :synopsis: Multiple interpreters in the same process + +.. moduleauthor:: Eric Snow +.. sectionauthor:: Eric Snow + +.. versionadded:: 3.14 + +**Source code:** :source:`Lib/concurrent/interpreters.py` + +-------------- + +The :mod:`!concurrent.interpreters` module constructs higher-level +interfaces on top of the lower level :mod:`!_interpreters` module. + +The module is primarily meant to provide a basic API for managing +interpreters (AKA "subinterpreters") and running things in them. +Running mostly involves switching to an interpreter (in the current +thread) and calling a function in that execution context. + +For concurrency, interpreters themselves (and this module) don't +provide much more than isolation, which on its own isn't useful. +Actual concurrency is available separately through +:mod:`threads ` See `below `_ + +.. seealso:: + + :class:`~concurrent.futures.InterpreterPoolExecutor` + combines threads with interpreters in a familiar interface. + + .. XXX Add references to the upcoming HOWTO docs in the seealso block. + + :ref:`isolating-extensions-howto` + how to update an extension module to support multiple interpreters + + :pep:`554` + + :pep:`734` + + :pep:`684` + +.. XXX Why do we disallow multiple interpreters on WASM? + +.. include:: ../includes/wasm-notavail.rst + + +Key details +----------- + +Before we dive in further, there are a small number of details +to keep in mind about using multiple interpreters: + +* `isolated `_, by default +* no implicit threads +* not all PyPI packages support use in multiple interpreters yet + +.. XXX Are there other relevant details to list? + + +.. _interpreters-intro: + +Introduction +------------ + +An "interpreter" is effectively the execution context of the Python +runtime. It contains all of the state the runtime needs to execute +a program. This includes things like the import state and builtins. +(Each thread, even if there's only the main thread, has some extra +runtime state, in addition to the current interpreter, related to +the current exception and the bytecode eval loop.) + +The concept and functionality of the interpreter have been a part of +Python since version 2.2, but the feature was only available through +the C-API and not well known, and the `isolation `_ +was relatively incomplete until version 3.12. + +.. _interp-isolation: + +Multiple Interpreters and Isolation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A Python implementation may support using multiple interpreters in the +same process. CPython has this support. Each interpreter is +effectively isolated from the others (with a limited number of +carefully managed process-global exceptions to the rule). + +That isolation is primarily useful as a strong separation between +distinct logical components of a program, where you want to have +careful control of how those components interact. + +.. note:: + + Interpreters in the same process can technically never be strictly + isolated from one another since there are few restrictions on memory + access within the same process. The Python runtime makes a best + effort at isolation but extension modules may easily violate that. + Therefore, do not use multiple interpreters in security-sensitive + situations, where they shouldn't have access to each other's data. + +Running in an Interpreter +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Running in a different interpreter involves switching to it in the +current thread and then calling some function. The runtime will +execute the function using the current interpreter's state. The +:mod:`!concurrent.interpreters` module provides a basic API for +creating and managing interpreters, as well as the switch-and-call +operation. + +No other threads are automatically started for the operation. +There is `a helper `_ for that though. +There is another dedicated helper for calling the builtin +:func:`exec` in an interpreter. + +When :func:`exec` (or :func:`eval`) are called in an interpreter, +they run using the interpreter's :mod:`!__main__` module as the +"globals" namespace. The same is true for functions that aren't +associated with any module. This is the same as how scripts invoked +from the command-line run in the :mod:`!__main__` module. + + +.. _interp-concurrency: + +Concurrency and Parallelism +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As noted earlier, interpreters do not provide any concurrency +on their own. They strictly represent the isolated execution +context the runtime will use *in the current thread*. That isolation +makes them similar to processes, but they still enjoy in-process +efficiency, like threads. + +All that said, interpreters do naturally support certain flavors of +concurrency, as a powerful side effect of that isolation. +There's a powerful side effect of that isolation. It enables a +different approach to concurrency than you can take with async or +threads. It's a similar concurrency model to CSP or the actor model, +a model which is relatively easy to reason about. + +You can take advantage of that concurrency model in a single thread, +switching back and forth between interpreters, Stackless-style. +However, this model is more useful when you combine interpreters +with multiple threads. This mostly involves starting a new thread, +where you switch to another interpreter and run what you want there. + +Each actual thread in Python, even if you're only running in the main +thread, has its own *current* execution context. Multiple threads can +use the same interpreter or different ones. + +At a high level, you can think of the combination of threads and +interpreters as threads with opt-in sharing. + +As a significant bonus, interpreters are sufficiently isolated that +they do not share the :term:`GIL`, which means combining threads with +multiple interpreters enables full multi-core parallelism. +(This has been the case since Python 3.12.) + +Communication Between Interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In practice, multiple interpreters are useful only if we have a way +to communicate between them. This usually involves some form of +message passing, but can even mean sharing data in some carefully +managed way. + +With this in mind, the :mod:`!concurrent.interpreters` module provides +a :class:`queue.Queue` implementation, available through +:func:`create_queue`. + +.. _interp-object-sharing: + +"Sharing" Objects +^^^^^^^^^^^^^^^^^ + +Any data actually shared between interpreters loses the thread-safety +provided by the :term:`GIL`. There are various options for dealing with +this in extension modules. However, from Python code the lack of +thread-safety means objects can't actually be shared, with a few +exceptions. Instead, a copy must be created, which means mutable +objects won't stay in sync. + +By default, most objects are copied with :mod:`pickle` when they are +passed to another interpreter. Nearly all of the immutable builtin +objects are either directly shared or copied efficiently. For example: + +* :const:`None` +* :class:`bool` (:const:`True` and :const:`False`) +* :class:`bytes` +* :class:`str` +* :class:`int` +* :class:`float` +* :class:`tuple` (of similarly supported objects) + +There is a small number of Python types that actually share mutable +data between interpreters: + +* :class:`memoryview` +* :class:`Queue` + + +Reference +--------- + +This module defines the following functions: + +.. function:: list_all() + + Return a :class:`list` of :class:`Interpreter` objects, + one for each existing interpreter. + +.. function:: get_current() + + Return an :class:`Interpreter` object for the currently running + interpreter. + +.. function:: get_main() + + Return an :class:`Interpreter` object for the main interpreter. + This is the interpreter the runtime created to run the :term:`REPL` + or the script given at the command-line. It is usually the only one. + +.. function:: create() + + Initialize a new (idle) Python interpreter + and return a :class:`Interpreter` object for it. + +.. function:: create_queue() + + Initialize a new cross-interpreter queue and return a :class:`Queue` + object for it. + + +Interpreter objects +^^^^^^^^^^^^^^^^^^^ + +.. class:: Interpreter(id) + + A single interpreter in the current process. + + Generally, :class:`Interpreter` shouldn't be called directly. + Instead, use :func:`create` or one of the other module functions. + + .. attribute:: id + + (read-only) + + The underlying interpreter's ID. + + .. attribute:: whence + + (read-only) + + A string describing where the interpreter came from. + + .. method:: is_running() + + Return ``True`` if the interpreter is currently executing code + in its :mod:`!__main__` module and ``False`` otherwise. + + .. method:: close() + + Finalize and destroy the interpreter. + + .. method:: prepare_main(ns=None, **kwargs) + + Bind objects in the interpreter's :mod:`!__main__` module. + + Some objects are actually shared and some are copied efficiently, + but most are copied via :mod:`pickle`. See :ref:`interp-object-sharing`. + + .. method:: exec(code, /, dedent=True) + + Run the given source code in the interpreter (in the current thread). + + .. method:: call(callable, /, *args, **kwargs) + + Return the result of calling running the given function in the + interpreter (in the current thread). + + .. _interp-call-in-thread: + + .. method:: call_in_thread(callable, /, *args, **kwargs) + + Run the given function in the interpreter (in a new thread). + +Exceptions +^^^^^^^^^^ + +.. exception:: InterpreterError + + This exception, a subclass of :exc:`Exception`, is raised when + an interpreter-related error happens. + +.. exception:: InterpreterNotFoundError + + This exception, a subclass of :exc:`InterpreterError`, is raised when + the targeted interpreter no longer exists. + +.. exception:: ExecutionFailed + + This exception, a subclass of :exc:`InterpreterError`, is raised when + the running code raised an uncaught exception. + + .. attribute:: excinfo + + A basic snapshot of the exception raised in the other interpreter. + +.. XXX Document the excinfoattrs? + +.. exception:: NotShareableError + + This exception, a subclass of :exc:`TypeError`, is raised when + an object cannot be sent to another interpreter. + + +Communicating Between Interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. class:: Queue(id) + + A wrapper around a low-level, cross-interpreter queue, which + implements the :class:`queue.Queue` interface. The underlying queue + can only be created through :func:`create_queue`. + + Some objects are actually shared and some are copied efficiently, + but most are copied via :mod:`pickle`. See :ref:`interp-object-sharing`. + + .. attribute:: id + + (read-only) + + The queue's ID. + + +.. exception:: QueueEmptyError + + This exception, a subclass of :exc:`queue.Empty`, is raised from + :meth:`!Queue.get` and :meth:`!Queue.get_nowait` when the queue + is empty. + +.. exception:: QueueFullError + + This exception, a subclass of :exc:`queue.Full`, is raised from + :meth:`!Queue.put` and :meth:`!Queue.put_nowait` when the queue + is full. + + +Basic usage +----------- + +Creating an interpreter and running code in it:: + + from concurrent import interpreters + + interp = interpreters.create() + + # Run in the current OS thread. + + interp.exec('print("spam!")') + + interp.exec("""if True: + print('spam!') + """) + + from textwrap import dedent + interp.exec(dedent(""" + print('spam!') + """)) + + def run(arg): + return arg + + res = interp.call(run, 'spam!') + print(res) + + def run(): + print('spam!') + + interp.call(run) + + # Run in new OS thread. + + t = interp.call_in_thread(run) + t.join() diff --git a/Doc/library/concurrent.rst b/Doc/library/concurrent.rst index 8caea78bbb57e8..748c72c733bba2 100644 --- a/Doc/library/concurrent.rst +++ b/Doc/library/concurrent.rst @@ -1,6 +1,7 @@ The :mod:`!concurrent` package ============================== -Currently, there is only one module in this package: +This package contains the following modules: * :mod:`concurrent.futures` -- Launching parallel tasks +* :mod:`concurrent.interpreters` -- Multiple interpreters in the same process diff --git a/Doc/library/copy.rst b/Doc/library/copy.rst index 95b41f988a035b..210ad7188003e6 100644 --- a/Doc/library/copy.rst +++ b/Doc/library/copy.rst @@ -122,6 +122,8 @@ and only supports named tuples created by :func:`~collections.namedtuple`, This method should create a new object of the same type, replacing fields with values from *changes*. + .. versionadded:: 3.13 + .. seealso:: diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 533cdf13974be6..d39c4ca4a5838b 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -53,7 +53,7 @@ The :mod:`csv` module defines the following functions: .. index:: single: universal newlines; csv.reader function -.. function:: reader(csvfile, dialect='excel', **fmtparams) +.. function:: reader(csvfile, /, dialect='excel', **fmtparams) Return a :ref:`reader object ` that will process lines from the given *csvfile*. A csvfile must be an iterable of @@ -70,7 +70,7 @@ The :mod:`csv` module defines the following functions: section :ref:`csv-fmt-params`. Each row read from the csv file is returned as a list of strings. No - automatic data type conversion is performed unless the ``QUOTE_NONNUMERIC`` format + automatic data type conversion is performed unless the :data:`QUOTE_NONNUMERIC` format option is specified (in which case unquoted fields are transformed into floats). A short usage example:: @@ -84,7 +84,7 @@ The :mod:`csv` module defines the following functions: Spam, Lovely Spam, Wonderful Spam -.. function:: writer(csvfile, dialect='excel', **fmtparams) +.. function:: writer(csvfile, /, dialect='excel', **fmtparams) Return a writer object responsible for converting the user's data into delimited strings on the given file-like object. *csvfile* can be any object with a @@ -323,23 +323,32 @@ The :mod:`csv` module defines the following constants: .. data:: QUOTE_MINIMAL Instructs :class:`writer` objects to only quote those fields which contain - special characters such as *delimiter*, *quotechar* or any of the characters in - *lineterminator*. + special characters such as *delimiter*, *quotechar*, ``'\r'``, ``'\n'`` + or any of the characters in *lineterminator*. .. data:: QUOTE_NONNUMERIC Instructs :class:`writer` objects to quote all non-numeric fields. - Instructs :class:`reader` objects to convert all non-quoted fields to type *float*. + Instructs :class:`reader` objects to convert all non-quoted fields to type :class:`float`. + .. note:: + Some numeric types, such as :class:`bool`, :class:`~fractions.Fraction`, + or :class:`~enum.IntEnum`, have a string representation that cannot be + converted to :class:`float`. + They cannot be read in the :data:`QUOTE_NONNUMERIC` and + :data:`QUOTE_STRINGS` modes. .. data:: QUOTE_NONE - Instructs :class:`writer` objects to never quote fields. When the current - *delimiter* occurs in output data it is preceded by the current *escapechar* - character. If *escapechar* is not set, the writer will raise :exc:`Error` if + Instructs :class:`writer` objects to never quote fields. + When the current *delimiter*, *quotechar*, *escapechar*, ``'\r'``, ``'\n'`` + or any of the characters in *lineterminator* occurs in output data + it is preceded by the current *escapechar* character. + If *escapechar* is not set, the writer will raise :exc:`Error` if any characters that require escaping are encountered. + Set *quotechar* to ``None`` to prevent its escaping. Instructs :class:`reader` objects to perform no special processing of quote characters. @@ -408,9 +417,16 @@ Dialects support the following attributes: .. attribute:: Dialect.escapechar - A one-character string used by the writer to escape the *delimiter* if *quoting* - is set to :const:`QUOTE_NONE` and the *quotechar* if *doublequote* is - :const:`False`. On reading, the *escapechar* removes any special meaning from + A one-character string used by the writer to escape characters that + require escaping: + + * the *delimiter*, the *quotechar*, ``'\r'``, ``'\n'`` and any of the + characters in *lineterminator* are escaped if *quoting* is set to + :const:`QUOTE_NONE`; + * the *quotechar* is escaped if *doublequote* is :const:`False`; + * the *escapechar* itself. + + On reading, the *escapechar* removes any special meaning from the following character. It defaults to :const:`None`, which disables escaping. .. versionchanged:: 3.11 @@ -430,9 +446,12 @@ Dialects support the following attributes: .. attribute:: Dialect.quotechar - A one-character string used to quote fields containing special characters, such - as the *delimiter* or *quotechar*, or which contain new-line characters. It - defaults to ``'"'``. + A one-character string used to quote fields containing special characters, + such as the *delimiter* or the *quotechar*, or which contain new-line + characters (``'\r'``, ``'\n'`` or any of the characters in *lineterminator*). + It defaults to ``'"'``. + Can be set to ``None`` to prevent escaping ``'"'`` if *quoting* is set + to :const:`QUOTE_NONE`. .. versionchanged:: 3.11 An empty *quotechar* is not allowed. @@ -441,7 +460,8 @@ Dialects support the following attributes: Controls when quotes should be generated by the writer and recognised by the reader. It can take on any of the :ref:`QUOTE_\* constants ` - and defaults to :const:`QUOTE_MINIMAL`. + and defaults to :const:`QUOTE_MINIMAL` if *quotechar* is not ``None``, + and :const:`QUOTE_NONE` otherwise. .. attribute:: Dialect.skipinitialspace @@ -603,7 +623,7 @@ A slightly more advanced use of the reader --- catching and reporting errors:: for row in reader: print(row) except csv.Error as e: - sys.exit('file {}, line {}: {}'.format(filename, reader.line_num, e)) + sys.exit(f'file {filename}, line {reader.line_num}: {e}') And while the module doesn't directly support parsing strings, it can easily be done:: diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index 5b733d5321e907..846cece3761858 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -714,10 +714,16 @@ item in the :attr:`~Structure._fields_` tuples:: ... ("second_16", c_int, 16)] ... >>> print(Int.first_16) - + >>> print(Int.second_16) - - >>> + + +It is important to note that bit field allocation and layout in memory are not +defined as a C standard; their implementation is compiler-specific. +By default, Python will attempt to match the behavior of a "native" compiler +for the current platform. +See the :attr:`~Structure._layout_` attribute for details on the default +behavior and how to change it. .. _ctypes-arrays: @@ -876,7 +882,7 @@ invalid non-\ ``NULL`` pointers would crash Python):: Thread safety without the GIL ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In Python 3.13, the :term:`GIL` may be disabled on :term:`experimental free threaded ` builds. +From Python 3.13 onward, the :term:`GIL` can be disabled on :term:`free threaded ` builds. In ctypes, reads and writes to a single object concurrently is safe, but not across multiple objects: .. code-block:: pycon @@ -2031,35 +2037,55 @@ Utility functions pointer. -.. function:: create_string_buffer(init_or_size, size=None) +.. function:: create_string_buffer(init, size=None) + create_string_buffer(size) This function creates a mutable character buffer. The returned object is a ctypes array of :class:`c_char`. - *init_or_size* must be an integer which specifies the size of the array, or a - bytes object which will be used to initialize the array items. + If *size* is given (and not ``None``), it must be an :class:`int`. + It specifies the size of the returned array. + + If the *init* argument is given, it must be :class:`bytes`. It is used + to initialize the array items. Bytes not initialized this way are + set to zero (NUL). + + If *size* is not given (or if it is ``None``), the buffer is made one element + larger than *init*, effectively adding a NUL terminator. + + If both arguments are given, *size* must not be less than ``len(init)``. - If a bytes object is specified as first argument, the buffer is made one item - larger than its length so that the last element in the array is a NUL - termination character. An integer can be passed as second argument which allows - specifying the size of the array if the length of the bytes should not be used. + .. warning:: + + If *size* is equal to ``len(init)``, a NUL terminator is + not added. Do not treat such a buffer as a C string. + + For example:: + + >>> bytes(create_string_buffer(2)) + b'\x00\x00' + >>> bytes(create_string_buffer(b'ab')) + b'ab\x00' + >>> bytes(create_string_buffer(b'ab', 2)) + b'ab' + >>> bytes(create_string_buffer(b'ab', 4)) + b'ab\x00\x00' + >>> bytes(create_string_buffer(b'abcdef', 2)) + Traceback (most recent call last): + ... + ValueError: byte string too long .. audit-event:: ctypes.create_string_buffer init,size ctypes.create_string_buffer -.. function:: create_unicode_buffer(init_or_size, size=None) +.. function:: create_unicode_buffer(init, size=None) + create_unicode_buffer(size) This function creates a mutable unicode character buffer. The returned object is a ctypes array of :class:`c_wchar`. - *init_or_size* must be an integer which specifies the size of the array, or a - string which will be used to initialize the array items. - - If a string is specified as first argument, the buffer is made one item - larger than the length of the string so that the last element in the array is a - NUL termination character. An integer can be passed as second argument which - allows specifying the size of the array if the length of the string should not - be used. + The function takes the same arguments as :func:`~create_string_buffer` except + *init* must be a string and *size* counts :class:`c_wchar`. .. audit-event:: ctypes.create_unicode_buffer init,size ctypes.create_unicode_buffer @@ -2939,7 +2965,7 @@ fields, or any other data types containing pointer type fields. .. attribute:: is_anonymous True if this field is anonymous, that is, it contains nested sub-fields - that should be be merged into a containing structure or union. + that should be merged into a containing structure or union. .. _ctypes-arrays-pointers: diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index 137504c51b4358..0b13c559295f3c 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -68,7 +68,7 @@ The module :mod:`curses` defines the following exception: The module :mod:`curses` defines the following functions: -.. function:: assume_default_colors(fg, bg) +.. function:: assume_default_colors(fg, bg, /) Allow use of default values for colors on terminals supporting this feature. Use this to support transparency in your application. @@ -988,6 +988,10 @@ the following methods and attributes: window.getstr(y, x, n) Read a bytes object from the user, with primitive line editing capacity. + The maximum value for *n* is 2047. + + .. versionchanged:: 3.14 + The maximum value for *n* was increased from 1023 to 2047. .. method:: window.getyx() @@ -1079,6 +1083,10 @@ the following methods and attributes: current cursor position, or at *y*, *x* if specified. Attributes are stripped from the characters. If *n* is specified, :meth:`instr` returns a string at most *n* characters long (exclusive of the trailing NUL). + The maximum value for *n* is 2047. + + .. versionchanged:: 3.14 + The maximum value for *n* was increased from 1023 to 2047. .. method:: window.is_linetouched(line) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index 72612211b43d5e..299c8aa399c25c 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -121,8 +121,11 @@ Module contents :meth:`!__le__`, :meth:`!__gt__`, or :meth:`!__ge__`, then :exc:`TypeError` is raised. - - *unsafe_hash*: If ``False`` (the default), a :meth:`~object.__hash__` method - is generated according to how *eq* and *frozen* are set. + - *unsafe_hash*: If true, force ``dataclasses`` to create a + :meth:`~object.__hash__` method, even though it may not be safe to do so. + Otherwise, generate a :meth:`~object.__hash__` method according to how + *eq* and *frozen* are set. + The default value is ``False``. :meth:`!__hash__` is used by built-in :meth:`hash`, and when objects are added to hashed collections such as dictionaries and sets. Having a @@ -304,9 +307,9 @@ Module contents .. versionadded:: 3.10 - - ``doc``: optional docstring for this field. + - *doc*: optional docstring for this field. - .. versionadded:: 3.13 + .. versionadded:: 3.14 If the default value of a field is specified by a call to :func:`!field`, then the class attribute for this field will be diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 1ce2013f05da2e..3470f42a6c622d 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -261,6 +261,22 @@ A :class:`timedelta` object represents a duration, the difference between two >>> (d.days, d.seconds, d.microseconds) (-1, 86399, 999999) + Since the string representation of :class:`!timedelta` objects can be confusing, + use the following recipe to produce a more readable format: + + .. code-block:: pycon + + >>> def pretty_timedelta(td): + ... if td.days >= 0: + ... return str(td) + ... return f'-({-td!s})' + ... + >>> d = timedelta(hours=-1) + >>> str(d) # not human-friendly + '-1 day, 23:00:00' + >>> pretty_timedelta(d) + '-(1:00:00)' + Class attributes: diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index f53c1f3467034e..10ddfa02b43156 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -2,7 +2,7 @@ ===================================================================== .. module:: decimal - :synopsis: Implementation of the General Decimal Arithmetic Specification. + :synopsis: Implementation of the General Decimal Arithmetic Specification. .. moduleauthor:: Eric Price .. moduleauthor:: Facundo Batista @@ -121,7 +121,7 @@ reset them before monitoring a calculation. .. _decimal-tutorial: -Quick-start Tutorial +Quick-start tutorial -------------------- The usual start to using decimals is importing the module, viewing the current @@ -1096,40 +1096,52 @@ In addition to the three supplied contexts, new contexts can be created with the default values are copied from the :const:`DefaultContext`. If the *flags* field is not specified or is :const:`None`, all flags are cleared. - *prec* is an integer in the range [``1``, :const:`MAX_PREC`] that sets - the precision for arithmetic operations in the context. + .. attribute:: prec - The *rounding* option is one of the constants listed in the section - `Rounding Modes`_. + An integer in the range [``1``, :const:`MAX_PREC`] that sets + the precision for arithmetic operations in the context. - The *traps* and *flags* fields list any signals to be set. Generally, new - contexts should only set traps and leave the flags clear. + .. attribute:: rounding - The *Emin* and *Emax* fields are integers specifying the outer limits allowable - for exponents. *Emin* must be in the range [:const:`MIN_EMIN`, ``0``], - *Emax* in the range [``0``, :const:`MAX_EMAX`]. + One of the constants listed in the section `Rounding Modes`_. - The *capitals* field is either ``0`` or ``1`` (the default). If set to - ``1``, exponents are printed with a capital ``E``; otherwise, a - lowercase ``e`` is used: ``Decimal('6.02e+23')``. + .. attribute:: traps + flags - The *clamp* field is either ``0`` (the default) or ``1``. - If set to ``1``, the exponent ``e`` of a :class:`Decimal` - instance representable in this context is strictly limited to the - range ``Emin - prec + 1 <= e <= Emax - prec + 1``. If *clamp* is - ``0`` then a weaker condition holds: the adjusted exponent of - the :class:`Decimal` instance is at most :attr:`~Context.Emax`. When *clamp* is - ``1``, a large normal number will, where possible, have its - exponent reduced and a corresponding number of zeros added to its - coefficient, in order to fit the exponent constraints; this - preserves the value of the number but loses information about - significant trailing zeros. For example:: + Lists of any signals to be set. Generally, new contexts should only set + traps and leave the flags clear. - >>> Context(prec=6, Emax=999, clamp=1).create_decimal('1.23e999') - Decimal('1.23000E+999') + .. attribute:: Emin + Emax - A *clamp* value of ``1`` allows compatibility with the - fixed-width decimal interchange formats specified in IEEE 754. + Integers specifying the outer limits allowable for exponents. *Emin* must + be in the range [:const:`MIN_EMIN`, ``0``], *Emax* in the range + [``0``, :const:`MAX_EMAX`]. + + .. attribute:: capitals + + Either ``0`` or ``1`` (the default). If set to + ``1``, exponents are printed with a capital ``E``; otherwise, a + lowercase ``e`` is used: ``Decimal('6.02e+23')``. + + .. attribute:: clamp + + Either ``0`` (the default) or ``1``. If set to ``1``, the exponent ``e`` + of a :class:`Decimal` instance representable in this context is strictly + limited to the range ``Emin - prec + 1 <= e <= Emax - prec + 1``. + If *clamp* is ``0`` then a weaker condition holds: the adjusted exponent of + the :class:`Decimal` instance is at most :attr:`~Context.Emax`. When *clamp* is + ``1``, a large normal number will, where possible, have its + exponent reduced and a corresponding number of zeros added to its + coefficient, in order to fit the exponent constraints; this + preserves the value of the number but loses information about + significant trailing zeros. For example:: + + >>> Context(prec=6, Emax=999, clamp=1).create_decimal('1.23e999') + Decimal('1.23000E+999') + + A *clamp* value of ``1`` allows compatibility with the + fixed-width decimal interchange formats specified in IEEE 754. The :class:`Context` class defines several general purpose methods as well as a large number of methods for doing arithmetic directly in a given context. @@ -1769,7 +1781,7 @@ The following table summarizes the hierarchy of signals:: .. _decimal-notes: -Floating-Point Notes +Floating-point notes -------------------- diff --git a/Doc/library/dialog.rst b/Doc/library/dialog.rst index 191e0da12103fa..e0693e8eb6ed22 100644 --- a/Doc/library/dialog.rst +++ b/Doc/library/dialog.rst @@ -220,7 +220,7 @@ is the base class for dialogs defined in other supporting modules. .. class:: Dialog(master=None, **options) - .. method:: show(color=None, **options) + .. method:: show(**options) Render the Dialog window. diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 44767b5dd2d5c9..11685a32f48e4f 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1094,14 +1094,6 @@ iterations of the loop. .. versionadded:: 3.14 -.. opcode:: LOAD_CONST_IMMORTAL (consti) - - Pushes ``co_consts[consti]`` onto the stack. - Can be used when the constant value is known to be immortal. - - .. versionadded:: 3.14 - - .. opcode:: LOAD_NAME (namei) Pushes the value associated with ``co_names[namei]`` onto the stack. diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index b86fef9fd6f310..8236d703fc1e45 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -174,7 +174,7 @@ with assorted summaries at the end. You can force verbose mode by passing ``verbose=True`` to :func:`testmod`, or prohibit it by passing ``verbose=False``. In either of those cases, -``sys.argv`` is not examined by :func:`testmod` (so passing ``-v`` or not +:data:`sys.argv` is not examined by :func:`testmod` (so passing ``-v`` or not has no effect). There is also a command line shortcut for running :func:`testmod`, see section @@ -231,7 +231,7 @@ documentation:: As with :func:`testmod`, :func:`testfile` won't display anything unless an example fails. If an example does fail, then the failing example(s) and the cause(s) of the failure(s) are printed to stdout, using the same format as -:func:`testmod`. +:func:`!testmod`. By default, :func:`testfile` looks for files in the calling module's directory. See section :ref:`doctest-basic-api` for a description of the optional arguments @@ -311,6 +311,9 @@ Which Docstrings Are Examined? The module docstring, and all function, class and method docstrings are searched. Objects imported into the module are not searched. +.. attribute:: module.__test__ + :no-typesetting: + In addition, there are cases when you want tests to be part of a module but not part of the help text, which requires that the tests not be included in the docstring. Doctest looks for a module-level variable called ``__test__`` and uses it to locate other @@ -533,7 +536,7 @@ Some details you should read once, but won't need to remember: * The interactive shell omits the traceback header line for some :exc:`SyntaxError`\ s. But doctest uses the traceback header line to distinguish exceptions from non-exceptions. So in the rare case where you need - to test a :exc:`SyntaxError` that omits the traceback header, you will need to + to test a :exc:`!SyntaxError` that omits the traceback header, you will need to manually add the traceback header line to your test example. .. index:: single: ^ (caret); marker @@ -860,15 +863,15 @@ The :const:`ELLIPSIS` directive gives a nice approach for the last example: Floating-point numbers are also subject to small output variations across -platforms, because Python defers to the platform C library for float formatting, -and C libraries vary widely in quality here. :: +platforms, because Python defers to the platform C library for some +floating-point calculations, and C libraries vary widely in quality here. :: - >>> 1./7 # risky - 0.14285714285714285 - >>> print(1./7) # safer - 0.142857142857 - >>> print(round(1./7, 6)) # much safer - 0.142857 + >>> 1000**0.1 # risky + 1.9952623149688797 + >>> round(1000**0.1, 9) # safer + 1.995262315 + >>> print(f'{1000**0.1:.4f}') # much safer + 1.9953 Numbers of the form ``I/2.**J`` are safe across all platforms, and I often contrive doctest examples to produce numbers of that form:: @@ -938,13 +941,13 @@ and :ref:`doctest-simple-testfile`. Optional argument *verbose* prints lots of stuff if true, and prints only failures if false; by default, or if ``None``, it's true if and only if ``'-v'`` - is in ``sys.argv``. + is in :data:`sys.argv`. Optional argument *report* prints a summary at the end when true, else prints nothing at the end. In verbose mode, the summary is detailed, else the summary is very brief (in fact, empty if all tests passed). - Optional argument *optionflags* (default value 0) takes the + Optional argument *optionflags* (default value ``0``) takes the :ref:`bitwise OR ` of option flags. See section :ref:`doctest-options`. @@ -1045,7 +1048,7 @@ from text files and modules with doctests: The returned :class:`unittest.TestSuite` is to be run by the unittest framework and runs the interactive examples in each file. If an example in any file - fails, then the synthesized unit test fails, and a :exc:`failureException` + fails, then the synthesized unit test fails, and a :exc:`~unittest.TestCase.failureException` exception is raised showing the name of the file containing the test and a (sometimes approximate) line number. If all the examples in a file are skipped, then the synthesized unit test is also marked as skipped. @@ -1078,13 +1081,14 @@ from text files and modules with doctests: Optional argument *setUp* specifies a set-up function for the test suite. This is called before running the tests in each file. The *setUp* function - will be passed a :class:`DocTest` object. The setUp function can access the - test globals as the *globs* attribute of the test passed. + will be passed a :class:`DocTest` object. The *setUp* function can access the + test globals as the :attr:`~DocTest.globs` attribute of the test passed. Optional argument *tearDown* specifies a tear-down function for the test suite. This is called after running the tests in each file. The *tearDown* - function will be passed a :class:`DocTest` object. The setUp function can - access the test globals as the *globs* attribute of the test passed. + function will be passed a :class:`DocTest` object. The *tearDown* function can + access the test globals as the :attr:`~DocTest.globs` attribute of the test + passed. Optional argument *globs* is a dictionary containing the initial global variables for the tests. A new copy of this dictionary is created for each @@ -1111,11 +1115,12 @@ from text files and modules with doctests: Convert doctest tests for a module to a :class:`unittest.TestSuite`. The returned :class:`unittest.TestSuite` is to be run by the unittest framework - and runs each doctest in the module. If any of the doctests fail, then the - synthesized unit test fails, and a :exc:`failureException` exception is raised + and runs each doctest in the module. + Each docstring is run as a separate unit test. + If any of the doctests fail, then the synthesized unit test fails, + and a :exc:`unittest.TestCase.failureException` exception is raised showing the name of the file containing the test and a (sometimes approximate) line number. If all the examples in a docstring are skipped, then the - synthesized unit test is also marked as skipped. Optional argument *module* provides the module to be tested. It can be a module object or a (possibly dotted) module name. If not specified, the module calling @@ -1123,7 +1128,7 @@ from text files and modules with doctests: Optional argument *globs* is a dictionary containing the initial global variables for the tests. A new copy of this dictionary is created for each - test. By default, *globs* is a new empty dictionary. + test. By default, *globs* is the module's :attr:`~module.__dict__`. Optional argument *extraglobs* specifies an extra set of global variables, which is merged into *globs*. By default, no extra globals are used. @@ -1132,7 +1137,7 @@ from text files and modules with doctests: drop-in replacement) that is used to extract doctests from the module. Optional arguments *setUp*, *tearDown*, and *optionflags* are the same as for - function :func:`DocFileSuite` above. + function :func:`DocFileSuite` above, but they are called for each docstring. This function uses the same search technique as :func:`testmod`. @@ -1140,12 +1145,6 @@ from text files and modules with doctests: :func:`DocTestSuite` returns an empty :class:`unittest.TestSuite` if *module* contains no docstrings instead of raising :exc:`ValueError`. -.. exception:: failureException - - When doctests which have been converted to unit tests by :func:`DocFileSuite` - or :func:`DocTestSuite` fail, this exception is raised showing the name of - the file containing the test and a (sometimes approximate) line number. - Under the covers, :func:`DocTestSuite` creates a :class:`unittest.TestSuite` out of :class:`!doctest.DocTestCase` instances, and :class:`!DocTestCase` is a subclass of :class:`unittest.TestCase`. :class:`!DocTestCase` isn't documented @@ -1158,15 +1157,15 @@ of :class:`!DocTestCase`. So both ways of creating a :class:`unittest.TestSuite` run instances of :class:`!DocTestCase`. This is important for a subtle reason: when you run -:mod:`doctest` functions yourself, you can control the :mod:`doctest` options in -use directly, by passing option flags to :mod:`doctest` functions. However, if -you're writing a :mod:`unittest` framework, :mod:`unittest` ultimately controls +:mod:`doctest` functions yourself, you can control the :mod:`!doctest` options in +use directly, by passing option flags to :mod:`!doctest` functions. However, if +you're writing a :mod:`unittest` framework, :mod:`!unittest` ultimately controls when and how tests get run. The framework author typically wants to control -:mod:`doctest` reporting options (perhaps, e.g., specified by command line -options), but there's no way to pass options through :mod:`unittest` to -:mod:`doctest` test runners. +:mod:`!doctest` reporting options (perhaps, e.g., specified by command line +options), but there's no way to pass options through :mod:`!unittest` to +:mod:`!doctest` test runners. -For this reason, :mod:`doctest` also supports a notion of :mod:`doctest` +For this reason, :mod:`doctest` also supports a notion of :mod:`!doctest` reporting flags specific to :mod:`unittest` support, via this function: @@ -1181,12 +1180,12 @@ reporting flags specific to :mod:`unittest` support, via this function: :mod:`unittest`: the :meth:`!runTest` method of :class:`!DocTestCase` looks at the option flags specified for the test case when the :class:`!DocTestCase` instance was constructed. If no reporting flags were specified (which is the - typical and expected case), :mod:`!doctest`'s :mod:`unittest` reporting flags are + typical and expected case), :mod:`!doctest`'s :mod:`!unittest` reporting flags are :ref:`bitwise ORed ` into the option flags, and the option flags so augmented are passed to the :class:`DocTestRunner` instance created to run the doctest. If any reporting flags were specified when the :class:`!DocTestCase` instance was constructed, :mod:`!doctest`'s - :mod:`unittest` reporting flags are ignored. + :mod:`!unittest` reporting flags are ignored. The value of the :mod:`unittest` reporting flags in effect before the function was called is returned by the function. @@ -1279,7 +1278,7 @@ DocTest Objects .. attribute:: filename The name of the file that this :class:`DocTest` was extracted from; or - ``None`` if the filename is unknown, or if the :class:`DocTest` was not + ``None`` if the filename is unknown, or if the :class:`!DocTest` was not extracted from a file. @@ -1419,10 +1418,10 @@ DocTestFinder objects The globals for each :class:`DocTest` is formed by combining *globs* and *extraglobs* (bindings in *extraglobs* override bindings in *globs*). A new - shallow copy of the globals dictionary is created for each :class:`DocTest`. - If *globs* is not specified, then it defaults to the module's *__dict__*, if - specified, or ``{}`` otherwise. If *extraglobs* is not specified, then it - defaults to ``{}``. + shallow copy of the globals dictionary is created for each :class:`!DocTest`. + If *globs* is not specified, then it defaults to the module's + :attr:`~module.__dict__`, if specified, or ``{}`` otherwise. + If *extraglobs* is not specified, then it defaults to ``{}``. .. _doctest-doctestparser: @@ -1446,7 +1445,7 @@ DocTestParser objects :class:`DocTest` object. *globs*, *name*, *filename*, and *lineno* are attributes for the new - :class:`DocTest` object. See the documentation for :class:`DocTest` for more + :class:`!DocTest` object. See the documentation for :class:`DocTest` for more information. @@ -1461,7 +1460,7 @@ DocTestParser objects Divide the given string into examples and intervening text, and return them as a list of alternating :class:`Example`\ s and strings. Line numbers for the - :class:`Example`\ s are 0-based. The optional argument *name* is a name + :class:`!Example`\ s are 0-based. The optional argument *name* is a name identifying this string, and is only used for error messages. @@ -1501,7 +1500,7 @@ DocTestRunner objects :class:`OutputChecker`. This comparison may be customized with a number of option flags; see section :ref:`doctest-options` for more information. If the option flags are insufficient, then the comparison may also be customized by - passing a subclass of :class:`OutputChecker` to the constructor. + passing a subclass of :class:`!OutputChecker` to the constructor. The test runner's display output can be controlled in two ways. First, an output function can be passed to :meth:`run`; this function will be called @@ -1540,7 +1539,7 @@ DocTestRunner objects output; it should not be called directly. *example* is the example about to be processed. *test* is the test - *containing example*. *out* is the output function that was passed to + containing *example*. *out* is the output function that was passed to :meth:`DocTestRunner.run`. @@ -1940,7 +1939,7 @@ several options for organizing tests: containing test cases for the named topics. These functions can be included in the same file as the module, or separated out into a separate test file. -* Define a ``__test__`` dictionary mapping from regression test topics to +* Define a :attr:`~module.__test__` dictionary mapping from regression test topics to docstrings containing test cases. When you have placed your tests in a module, the module can itself be the test diff --git a/Doc/library/email.compat32-message.rst b/Doc/library/email.compat32-message.rst index 4285c436e8da80..5754c2b65b239f 100644 --- a/Doc/library/email.compat32-message.rst +++ b/Doc/library/email.compat32-message.rst @@ -181,7 +181,7 @@ Here are the methods of the :class:`Message` class: :meth:`set_payload` instead. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.set_content` and the related ``make`` and ``add`` methods. @@ -224,7 +224,7 @@ Here are the methods of the :class:`Message` class: ASCII charset. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.get_content` and :meth:`~email.message.EmailMessage.iter_parts`. @@ -236,7 +236,7 @@ Here are the methods of the :class:`Message` class: the message's default character set; see :meth:`set_charset` for details. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by :meth:`~email.message.EmailMessage.set_content`. @@ -265,9 +265,9 @@ Here are the methods of the :class:`Message` class: using that :mailheader:`Content-Transfer-Encoding` and is not modified. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *charset* parameter of the - :meth:`email.emailmessage.EmailMessage.set_content` method. + :meth:`email.message.EmailMessage.set_content` method. .. method:: get_charset() @@ -276,7 +276,7 @@ Here are the methods of the :class:`Message` class: message's payload. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class it always returns + :class:`~email.message.EmailMessage` class it always returns ``None``. @@ -486,7 +486,7 @@ Here are the methods of the :class:`Message` class: search instead of :mailheader:`Content-Type`. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *params* property of the individual header objects returned by the header access methods. @@ -524,7 +524,7 @@ Here are the methods of the :class:`Message` class: to ``False``. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the *params* property of the individual header objects returned by the header access methods. @@ -579,7 +579,7 @@ Here are the methods of the :class:`Message` class: header is also added. This is a legacy method. On the - :class:`~email.emailmessage.EmailMessage` class its functionality is + :class:`~email.message.EmailMessage` class its functionality is replaced by the ``make_`` and ``add_`` methods. diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst index 219fad0d2f6745..f49885b8785235 100644 --- a/Doc/library/email.header.rst +++ b/Doc/library/email.header.rst @@ -178,16 +178,36 @@ The :mod:`email.header` module also provides the following convenient functions. Decode a message header value without converting the character set. The header value is in *header*. - This function returns a list of ``(decoded_string, charset)`` pairs containing - each of the decoded parts of the header. *charset* is ``None`` for non-encoded - parts of the header, otherwise a lower case string containing the name of the - character set specified in the encoded string. + For historical reasons, this function may return either: - Here's an example:: + 1. A list of pairs containing each of the decoded parts of the header, + ``(decoded_bytes, charset)``, where *decoded_bytes* is always an instance of + :class:`bytes`, and *charset* is either: + + - A lower case string containing the name of the character set specified. + + - ``None`` for non-encoded parts of the header. + + 2. A list of length 1 containing a pair ``(string, None)``, where + *string* is always an instance of :class:`str`. + + An :exc:`email.errors.HeaderParseError` may be raised when certain decoding + errors occur (e.g. a base64 decoding exception). + + Here are examples: >>> from email.header import decode_header >>> decode_header('=?iso-8859-1?q?p=F6stal?=') [(b'p\xf6stal', 'iso-8859-1')] + >>> decode_header('unencoded_string') + [('unencoded_string', None)] + >>> decode_header('bar =?utf-8?B?ZsOzbw==?=') + [(b'bar ', None), (b'f\xc3\xb3o', 'utf-8')] + + .. note:: + + This function exists for backwards compatibility only. For + new code, we recommend using :class:`email.headerregistry.HeaderRegistry`. .. function:: make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' ') @@ -203,3 +223,7 @@ The :mod:`email.header` module also provides the following convenient functions. :class:`Header` instance. Optional *maxlinelen*, *header_name*, and *continuation_ws* are as in the :class:`Header` constructor. + .. note:: + + This function exists for backwards compatibility only, and is + not recommended for use in new code. diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index bb72032891ea98..8cc887b8ceb378 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -204,10 +204,16 @@ The following exceptions are the exceptions that are usually raised. assignment fails. (When an object does not support attribute references or attribute assignments at all, :exc:`TypeError` is raised.) - The :attr:`name` and :attr:`obj` attributes can be set using keyword-only - arguments to the constructor. When set they represent the name of the attribute - that was attempted to be accessed and the object that was accessed for said - attribute, respectively. + The optional *name* and *obj* keyword-only arguments + set the corresponding attributes: + + .. attribute:: name + + The name of the attribute that was attempted to be accessed. + + .. attribute:: obj + + The object that was accessed for the named attribute. .. versionchanged:: 3.10 Added the :attr:`name` and :attr:`obj` attributes. @@ -215,7 +221,7 @@ The following exceptions are the exceptions that are usually raised. .. exception:: EOFError Raised when the :func:`input` function hits an end-of-file condition (EOF) - without reading any data. (N.B.: the :meth:`io.IOBase.read` and + without reading any data. (Note: the :meth:`!io.IOBase.read` and :meth:`io.IOBase.readline` methods return an empty string when they hit EOF.) @@ -312,9 +318,11 @@ The following exceptions are the exceptions that are usually raised. unqualified names. The associated value is an error message that includes the name that could not be found. - The :attr:`name` attribute can be set using a keyword-only argument to the - constructor. When set it represent the name of the variable that was attempted - to be accessed. + The optional *name* keyword-only argument sets the attribute: + + .. attribute:: name + + The name of the variable that was attempted to be accessed. .. versionchanged:: 3.10 Added the :attr:`name` attribute. @@ -382,7 +390,7 @@ The following exceptions are the exceptions that are usually raised. The corresponding error message, as provided by the operating system. It is formatted by the C - functions :c:func:`perror` under POSIX, and :c:func:`FormatMessage` + functions :c:func:`!perror` under POSIX, and :c:func:`!FormatMessage` under Windows. .. attribute:: filename @@ -398,7 +406,7 @@ The following exceptions are the exceptions that are usually raised. .. versionchanged:: 3.3 :exc:`EnvironmentError`, :exc:`IOError`, :exc:`WindowsError`, :exc:`socket.error`, :exc:`select.error` and - :exc:`mmap.error` have been merged into :exc:`OSError`, and the + :exc:`!mmap.error` have been merged into :exc:`OSError`, and the constructor may return a subclass. .. versionchanged:: 3.4 @@ -590,7 +598,7 @@ The following exceptions are the exceptions that are usually raised. handled, the Python interpreter exits; no stack traceback is printed. The constructor accepts the same optional argument passed to :func:`sys.exit`. If the value is an integer, it specifies the system exit status (passed to - C's :c:func:`exit` function); if it is ``None``, the exit status is zero; if + C's :c:func:`!exit` function); if it is ``None``, the exit status is zero; if it has another type (such as a string), the object's value is printed and the exit status is one. @@ -1048,7 +1056,7 @@ their subgroups based on the types of the contained exceptions. subclasses that need a different constructor signature need to override that rather than :meth:`~object.__init__`. For example, the following defines an exception group subclass which accepts an exit_code and - and constructs the group's message from it. :: + constructs the group's message from it. :: class Errors(ExceptionGroup): def __new__(cls, errors, exit_code): diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst index 5058b85bffb15c..f81e6bf5810f86 100644 --- a/Doc/library/faulthandler.rst +++ b/Doc/library/faulthandler.rst @@ -90,7 +90,7 @@ An error will be printed instead of the stack. Additionally, some compilers do not support :term:`CPython's ` implementation of C stack dumps. As a result, a different error may be printed -instead of the stack, even if the the operating system supports dumping stacks. +instead of the stack, even if the operating system supports dumping stacks. .. note:: @@ -228,6 +228,41 @@ handler: Fatal Python error: Segmentation fault Current thread 0x00007fb899f39700 (most recent call first): - File "/home/python/cpython/Lib/ctypes/__init__.py", line 486 in string_at + File "/opt/python/Lib/ctypes/__init__.py", line 486 in string_at File "", line 1 in + + Current thread's C stack trace (most recent call first): + Binary file "/opt/python/python", at _Py_DumpStack+0x42 [0x5b27f7d7147e] + Binary file "/opt/python/python", at +0x32dcbd [0x5b27f7d85cbd] + Binary file "/opt/python/python", at +0x32df8a [0x5b27f7d85f8a] + Binary file "/usr/lib/libc.so.6", at +0x3def0 [0x77b73226bef0] + Binary file "/usr/lib/libc.so.6", at +0x17ef9c [0x77b7323acf9c] + Binary file "/opt/python/build/lib.linux-x86_64-3.14/_ctypes.cpython-314d-x86_64-linux-gnu.so", at +0xcdf6 [0x77b7315dddf6] + Binary file "/usr/lib/libffi.so.8", at +0x7976 [0x77b73158f976] + Binary file "/usr/lib/libffi.so.8", at +0x413c [0x77b73158c13c] + Binary file "/usr/lib/libffi.so.8", at ffi_call+0x12e [0x77b73158ef0e] + Binary file "/opt/python/build/lib.linux-x86_64-3.14/_ctypes.cpython-314d-x86_64-linux-gnu.so", at +0x15a33 [0x77b7315e6a33] + Binary file "/opt/python/build/lib.linux-x86_64-3.14/_ctypes.cpython-314d-x86_64-linux-gnu.so", at +0x164fa [0x77b7315e74fa] + Binary file "/opt/python/build/lib.linux-x86_64-3.14/_ctypes.cpython-314d-x86_64-linux-gnu.so", at +0xc624 [0x77b7315dd624] + Binary file "/opt/python/python", at _PyObject_MakeTpCall+0xce [0x5b27f7b73883] + Binary file "/opt/python/python", at +0x11bab6 [0x5b27f7b73ab6] + Binary file "/opt/python/python", at PyObject_Vectorcall+0x23 [0x5b27f7b73b04] + Binary file "/opt/python/python", at _PyEval_EvalFrameDefault+0x490c [0x5b27f7cbb302] + Binary file "/opt/python/python", at +0x2818e6 [0x5b27f7cd98e6] + Binary file "/opt/python/python", at +0x281aab [0x5b27f7cd9aab] + Binary file "/opt/python/python", at PyEval_EvalCode+0xc5 [0x5b27f7cd9ba3] + Binary file "/opt/python/python", at +0x255957 [0x5b27f7cad957] + Binary file "/opt/python/python", at +0x255ab4 [0x5b27f7cadab4] + Binary file "/opt/python/python", at _PyEval_EvalFrameDefault+0x6c3e [0x5b27f7cbd634] + Binary file "/opt/python/python", at +0x2818e6 [0x5b27f7cd98e6] + Binary file "/opt/python/python", at +0x281aab [0x5b27f7cd9aab] + Binary file "/opt/python/python", at +0x11b6e1 [0x5b27f7b736e1] + Binary file "/opt/python/python", at +0x11d348 [0x5b27f7b75348] + Binary file "/opt/python/python", at +0x11d626 [0x5b27f7b75626] + Binary file "/opt/python/python", at PyObject_Call+0x20 [0x5b27f7b7565e] + Binary file "/opt/python/python", at +0x32a67a [0x5b27f7d8267a] + Binary file "/opt/python/python", at +0x32a7f8 [0x5b27f7d827f8] + Binary file "/opt/python/python", at +0x32ac1b [0x5b27f7d82c1b] + Binary file "/opt/python/python", at Py_RunMain+0x31 [0x5b27f7d82ebe] + Segmentation fault diff --git a/Doc/library/fractions.rst b/Doc/library/fractions.rst index fc7f9a6301a915..392b6d40e861fb 100644 --- a/Doc/library/fractions.rst +++ b/Doc/library/fractions.rst @@ -142,7 +142,7 @@ another rational number, or from a string. .. versionadded:: 3.12 - .. classmethod:: from_float(flt) + .. classmethod:: from_float(f) Alternative constructor which only accepts instances of :class:`float` or :class:`numbers.Integral`. Beware that diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7e367a0f2b6b25..80bd1275973f8d 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1154,44 +1154,44 @@ are always available. They are listed here in alphabetical order. .. function:: locals() - Return a mapping object representing the current local symbol table, with - variable names as the keys, and their currently bound references as the - values. - - At module scope, as well as when using :func:`exec` or :func:`eval` with - a single namespace, this function returns the same namespace as - :func:`globals`. - - At class scope, it returns the namespace that will be passed to the - metaclass constructor. - - When using ``exec()`` or ``eval()`` with separate local and global - arguments, it returns the local namespace passed in to the function call. - - In all of the above cases, each call to ``locals()`` in a given frame of - execution will return the *same* mapping object. Changes made through - the mapping object returned from ``locals()`` will be visible as assigned, - reassigned, or deleted local variables, and assigning, reassigning, or - deleting local variables will immediately affect the contents of the - returned mapping object. - - In an :term:`optimized scope` (including functions, generators, and - coroutines), each call to ``locals()`` instead returns a fresh dictionary - containing the current bindings of the function's local variables and any - nonlocal cell references. In this case, name binding changes made via the - returned dict are *not* written back to the corresponding local variables - or nonlocal cell references, and assigning, reassigning, or deleting local - variables and nonlocal cell references does *not* affect the contents - of previously returned dictionaries. - - Calling ``locals()`` as part of a comprehension in a function, generator, or - coroutine is equivalent to calling it in the containing scope, except that - the comprehension's initialised iteration variables will be included. In - other scopes, it behaves as if the comprehension were running as a nested - function. - - Calling ``locals()`` as part of a generator expression is equivalent to - calling it in a nested generator function. + Return a mapping object representing the current local symbol table, with + variable names as the keys, and their currently bound references as the + values. + + At module scope, as well as when using :func:`exec` or :func:`eval` with + a single namespace, this function returns the same namespace as + :func:`globals`. + + At class scope, it returns the namespace that will be passed to the + metaclass constructor. + + When using ``exec()`` or ``eval()`` with separate local and global + arguments, it returns the local namespace passed in to the function call. + + In all of the above cases, each call to ``locals()`` in a given frame of + execution will return the *same* mapping object. Changes made through + the mapping object returned from ``locals()`` will be visible as assigned, + reassigned, or deleted local variables, and assigning, reassigning, or + deleting local variables will immediately affect the contents of the + returned mapping object. + + In an :term:`optimized scope` (including functions, generators, and + coroutines), each call to ``locals()`` instead returns a fresh dictionary + containing the current bindings of the function's local variables and any + nonlocal cell references. In this case, name binding changes made via the + returned dict are *not* written back to the corresponding local variables + or nonlocal cell references, and assigning, reassigning, or deleting local + variables and nonlocal cell references does *not* affect the contents + of previously returned dictionaries. + + Calling ``locals()`` as part of a comprehension in a function, generator, or + coroutine is equivalent to calling it in the containing scope, except that + the comprehension's initialised iteration variables will be included. In + other scopes, it behaves as if the comprehension were running as a nested + function. + + Calling ``locals()`` as part of a generator expression is equivalent to + calling it in a nested generator function. .. versionchanged:: 3.12 The behaviour of ``locals()`` in a comprehension has been updated as @@ -1839,15 +1839,15 @@ are always available. They are listed here in alphabetical order. ``range(start, stop, step)``. The *start* and *step* arguments default to ``None``. + Slice objects have read-only data attributes :attr:`!start`, + :attr:`!stop`, and :attr:`!step` which merely return the argument + values (or their default). They have no other explicit functionality; + however, they are used by NumPy and other third-party packages. + .. attribute:: slice.start .. attribute:: slice.stop .. attribute:: slice.step - Slice objects have read-only data attributes :attr:`!start`, - :attr:`!stop`, and :attr:`!step` which merely return the argument - values (or their default). They have no other explicit functionality; - however, they are used by NumPy and other third-party packages. - Slice objects are also generated when extended indexing syntax is used. For example: ``a[start:stop:step]`` or ``a[start:stop, i]``. See :func:`itertools.islice` for an alternate version that returns an diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 3a933dff057bbb..3e75621be6dad3 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -403,8 +403,7 @@ The :mod:`functools` module defines the following functions: >>> remove_first_dear(message) 'Hello, dear world!' - :data:`!Placeholder` has no special treatment when used in a keyword - argument to :func:`!partial`. + :data:`!Placeholder` cannot be passed to :func:`!partial` as a keyword argument. .. versionchanged:: 3.14 Added support for :data:`Placeholder` in positional arguments. diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index bb2d2fad23bdb8..4818a4944a512a 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -284,7 +284,7 @@ a file or file-like object. Example: >>> import io, hashlib, hmac - >>> with open(hashlib.__file__, "rb") as f: + >>> with open("library/hashlib.rst", "rb") as f: ... digest = hashlib.file_digest(f, "sha256") ... >>> digest.hexdigest() # doctest: +ELLIPSIS diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst index 6d433b5a04fc4a..dd67fc34e856f1 100644 --- a/Doc/library/html.parser.rst +++ b/Doc/library/html.parser.rst @@ -43,7 +43,9 @@ Example HTML Parser Application As a basic example, below is a simple HTML parser that uses the :class:`HTMLParser` class to print out start tags, end tags, and data -as they are encountered:: +as they are encountered: + +.. testcode:: from html.parser import HTMLParser @@ -63,7 +65,7 @@ as they are encountered:: The output will then be: -.. code-block:: none +.. testoutput:: Encountered a start tag: html Encountered a start tag: head @@ -230,7 +232,9 @@ Examples -------- The following class implements a parser that will be used to illustrate more -examples:: +examples: + +.. testcode:: from html.parser import HTMLParser from html.entities import name2codepoint @@ -266,13 +270,17 @@ examples:: parser = MyHTMLParser() -Parsing a doctype:: +Parsing a doctype: + +.. doctest:: >>> parser.feed('') Decl : DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" -Parsing an element with a few attributes and a title:: +Parsing an element with a few attributes and a title: + +.. doctest:: >>> parser.feed('The Python logo') Start tag: img @@ -285,7 +293,9 @@ Parsing an element with a few attributes and a title:: End tag : h1 The content of ``script`` and ``style`` elements is returned as is, without -further parsing:: +further parsing: + +.. doctest:: >>> parser.feed('') Start tag: style @@ -300,16 +310,25 @@ further parsing:: Data : alert("hello!"); End tag : script -Parsing comments:: +Parsing comments: + +.. doctest:: - >>> parser.feed('' + >>> parser.feed('' ... '') - Comment : a comment + Comment : a comment Comment : [if IE 9]>IE-specific content'``):: +correct char (note: these 3 references are all equivalent to ``'>'``): +.. doctest:: + + >>> parser = MyHTMLParser() + >>> parser.feed('>>>') + Data : >>> + + >>> parser = MyHTMLParser(convert_charrefs=False) >>> parser.feed('>>>') Named ent: > Num ent : > @@ -317,18 +336,22 @@ correct char (note: these 3 references are all equivalent to ``'>'``):: Feeding incomplete chunks to :meth:`~HTMLParser.feed` works, but :meth:`~HTMLParser.handle_data` might be called more than once -(unless *convert_charrefs* is set to ``True``):: +(unless *convert_charrefs* is set to ``True``): - >>> for chunk in ['buff', 'ered ', 'text']: +.. doctest:: + + >>> for chunk in ['buff', 'ered', ' text']: ... parser.feed(chunk) ... Start tag: span Data : buff Data : ered - Data : text + Data : text End tag : span -Parsing invalid HTML (e.g. unquoted attributes) also works:: +Parsing invalid HTML (e.g. unquoted attributes) also works: + +.. doctest:: >>> parser.feed('

tag soup

') Start tag: p diff --git a/Doc/library/http.cookiejar.rst b/Doc/library/http.cookiejar.rst index 23ddecf873876d..251aea891c3f98 100644 --- a/Doc/library/http.cookiejar.rst +++ b/Doc/library/http.cookiejar.rst @@ -570,7 +570,7 @@ Netscape protocol strictness switches: Don't allow setting cookies whose path doesn't path-match request URI. -:attr:`strict_ns_domain` is a collection of flags. Its value is constructed by +:attr:`~DefaultCookiePolicy.strict_ns_domain` is a collection of flags. Its value is constructed by or-ing together (for example, ``DomainStrictNoDots|DomainStrictNonDomain`` means both flags are set). diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 54df4a7e804d50..02016c789b24b4 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -429,8 +429,7 @@ instantiation, of which this module provides three different variants: ``'Last-Modified:'`` header with the file's modification time. Then follows a blank line signifying the end of the headers, and then the - contents of the file are output. If the file's MIME type starts with - ``text/`` the file is opened in text mode; otherwise binary mode is used. + contents of the file are output. For example usage, see the implementation of the ``test`` function in :source:`Lib/http/server.py`. diff --git a/Doc/library/importlib.resources.abc.rst b/Doc/library/importlib.resources.abc.rst index 7a77466bcbaf27..8253a33f591a0b 100644 --- a/Doc/library/importlib.resources.abc.rst +++ b/Doc/library/importlib.resources.abc.rst @@ -49,44 +49,44 @@ .. method:: open_resource(resource) :abstractmethod: - Returns an opened, :term:`file-like object` for binary reading - of the *resource*. + Returns an opened, :term:`file-like object` for binary reading + of the *resource*. - If the resource cannot be found, :exc:`FileNotFoundError` is - raised. + If the resource cannot be found, :exc:`FileNotFoundError` is + raised. .. method:: resource_path(resource) :abstractmethod: - Returns the file system path to the *resource*. + Returns the file system path to the *resource*. - If the resource does not concretely exist on the file system, - raise :exc:`FileNotFoundError`. + If the resource does not concretely exist on the file system, + raise :exc:`FileNotFoundError`. .. method:: is_resource(name) :abstractmethod: - Returns ``True`` if the named *name* is considered a resource. - :exc:`FileNotFoundError` is raised if *name* does not exist. + Returns ``True`` if the named *name* is considered a resource. + :exc:`FileNotFoundError` is raised if *name* does not exist. .. method:: contents() :abstractmethod: - Returns an :term:`iterable` of strings over the contents of - the package. Do note that it is not required that all names - returned by the iterator be actual resources, e.g. it is - acceptable to return names for which :meth:`is_resource` would - be false. - - Allowing non-resource names to be returned is to allow for - situations where how a package and its resources are stored - are known a priori and the non-resource names would be useful. - For instance, returning subdirectory names is allowed so that - when it is known that the package and resources are stored on - the file system then those subdirectory names can be used - directly. - - The abstract method returns an iterable of no items. + Returns an :term:`iterable` of strings over the contents of + the package. Do note that it is not required that all names + returned by the iterator be actual resources, e.g. it is + acceptable to return names for which :meth:`is_resource` would + be false. + + Allowing non-resource names to be returned is to allow for + situations where how a package and its resources are stored + are known a priori and the non-resource names would be useful. + For instance, returning subdirectory names is allowed so that + when it is known that the package and resources are stored on + the file system then those subdirectory names can be used + directly. + + The abstract method returns an iterable of no items. .. class:: Traversable diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 3aa2f35f05eba0..de5cab5aee649f 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -528,14 +528,13 @@ I/O Base Classes It inherits from :class:`IOBase`. The main difference with :class:`RawIOBase` is that methods :meth:`read`, - :meth:`readinto` and :meth:`write` will try (respectively) to read as much - input as requested or to consume all given output, at the expense of - making perhaps more than one system call. + :meth:`readinto` and :meth:`write` will try (respectively) to read + as much input as requested or to emit all provided data. - In addition, those methods can raise :exc:`BlockingIOError` if the - underlying raw stream is in non-blocking mode and cannot take or give - enough data; unlike their :class:`RawIOBase` counterparts, they will - never return ``None``. + In addition, if the underlying raw stream is in non-blocking mode, when the + system returns would block :meth:`write` will raise :exc:`BlockingIOError` + with :attr:`BlockingIOError.characters_written` and :meth:`read` will return + data read so far or ``None`` if no data is available. Besides, the :meth:`read` method does not have a default implementation that defers to :meth:`readinto`. @@ -568,29 +567,40 @@ I/O Base Classes .. method:: read(size=-1, /) - Read and return up to *size* bytes. If the argument is omitted, ``None``, - or negative, data is read and returned until EOF is reached. An empty - :class:`bytes` object is returned if the stream is already at EOF. + Read and return up to *size* bytes. If the argument is omitted, ``None``, + or negative read as much as possible. - If the argument is positive, and the underlying raw stream is not - interactive, multiple raw reads may be issued to satisfy the byte count - (unless EOF is reached first). But for interactive raw streams, at most - one raw read will be issued, and a short result does not imply that EOF is - imminent. + Fewer bytes may be returned than requested. An empty :class:`bytes` object + is returned if the stream is already at EOF. More than one read may be + made and calls may be retried if specific errors are encountered, see + :meth:`os.read` and :pep:`475` for more details. Less than size bytes + being returned does not imply that EOF is imminent. - A :exc:`BlockingIOError` is raised if the underlying raw stream is in - non blocking-mode, and has no data available at the moment. + When reading as much as possible the default implementation will use + ``raw.readall`` if available (which should implement + :meth:`RawIOBase.readall`), otherwise will read in a loop until read + returns ``None``, an empty :class:`bytes`, or a non-retryable error. For + most streams this is to EOF, but for non-blocking streams more data may + become available. + + .. note:: + + When the underlying raw stream is non-blocking, implementations may + either raise :exc:`BlockingIOError` or return ``None`` if no data is + available. :mod:`io` implementations return ``None``. .. method:: read1(size=-1, /) - Read and return up to *size* bytes, with at most one call to the - underlying raw stream's :meth:`~RawIOBase.read` (or - :meth:`~RawIOBase.readinto`) method. This can be useful if you are - implementing your own buffering on top of a :class:`BufferedIOBase` - object. + Read and return up to *size* bytes, calling :meth:`~RawIOBase.readinto` + which may retry if :py:const:`~errno.EINTR` is encountered per + :pep:`475`. If *size* is ``-1`` or not provided, the implementation will + choose an arbitrary value for *size*. - If *size* is ``-1`` (the default), an arbitrary number of bytes are - returned (more than zero unless EOF is reached). + .. note:: + + When the underlying raw stream is non-blocking, implementations may + either raise :exc:`BlockingIOError` or return ``None`` if no data is + available. :mod:`io` implementations return ``None``. .. method:: readinto(b, /) @@ -767,34 +777,21 @@ than raw I/O does. .. method:: peek(size=0, /) - Return bytes from the stream without advancing the position. At most one - single read on the raw stream is done to satisfy the call. The number of - bytes returned may be less or more than requested. + Return bytes from the stream without advancing the position. The number of + bytes returned may be less or more than requested. If the underlying raw + stream is non-blocking and the operation would block, returns empty bytes. .. method:: read(size=-1, /) - Read and return *size* bytes, or if *size* is not given or negative, until - EOF or if the read call would block in non-blocking mode. - - .. note:: - - When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` - may be raised if a read operation cannot be completed immediately. + In :class:`BufferedReader` this is the same as :meth:`io.BufferedIOBase.read` .. method:: read1(size=-1, /) - Read and return up to *size* bytes with only one call on the raw stream. - If at least one byte is buffered, only buffered bytes are returned. - Otherwise, one raw stream read call is made. + In :class:`BufferedReader` this is the same as :meth:`io.BufferedIOBase.read1` .. versionchanged:: 3.7 The *size* argument is now optional. - .. note:: - - When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` - may be raised if a read operation cannot be completed immediately. - .. class:: BufferedWriter(raw, buffer_size=DEFAULT_BUFFER_SIZE) A buffered binary stream providing higher-level access to a writeable, non @@ -826,8 +823,8 @@ than raw I/O does. Write the :term:`bytes-like object`, *b*, and return the number of bytes written. When in non-blocking mode, a - :exc:`BlockingIOError` is raised if the buffer needs to be written out but - the raw stream blocks. + :exc:`BlockingIOError` with :attr:`BlockingIOError.characters_written` set + is raised if the buffer needs to be written out but the raw stream blocks. .. class:: BufferedRandom(raw, buffer_size=DEFAULT_BUFFER_SIZE) @@ -894,9 +891,10 @@ Text I/O .. attribute:: buffer - The underlying binary buffer (a :class:`BufferedIOBase` instance) that - :class:`TextIOBase` deals with. This is not part of the - :class:`TextIOBase` API and may not exist in some implementations. + The underlying binary buffer (a :class:`BufferedIOBase` + or :class:`RawIOBase` instance) that :class:`TextIOBase` deals with. + This is not part of the :class:`TextIOBase` API and may not exist + in some implementations. .. method:: detach() diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 26579ec6328860..12a5a96a3c56f3 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -18,12 +18,17 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. note:: + The term "object" in the context of JSON processing in Python can be + ambiguous. All values in Python are objects. In JSON, an object refers to + any data wrapped in curly braces, similar to a Python dictionary. + .. warning:: Be cautious when parsing JSON data from untrusted sources. A malicious JSON string may cause the decoder to consume considerable CPU and memory resources. Limiting the size of data to be parsed is recommended. -:mod:`json` exposes an API familiar to users of the standard library +This module exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. Encoding basic Python object hierarchies:: @@ -60,7 +65,7 @@ Pretty printing:: "6": 7 } -Specializing JSON object encoding:: +Customizing JSON object encoding:: >>> import json >>> def custom_json(obj): @@ -83,7 +88,7 @@ Decoding JSON:: >>> json.load(io) ['streaming API'] -Specializing JSON object decoding:: +Customizing JSON object decoding:: >>> import json >>> def as_complex(dct): @@ -279,7 +284,7 @@ Basic Usage :param object_hook: If set, a function that is called with the result of - any object literal decoded (a :class:`dict`). + any JSON object literal decoded (a :class:`dict`). The return value of this function will be used instead of the :class:`dict`. This feature can be used to implement custom decoders, @@ -289,7 +294,7 @@ Basic Usage :param object_pairs_hook: If set, a function that is called with the result of - any object literal decoded with an ordered list of pairs. + any JSON object literal decoded with an ordered list of pairs. The return value of this function will be used instead of the :class:`dict`. This feature can be used to implement custom decoders. diff --git a/Doc/library/logging.config.rst b/Doc/library/logging.config.rst index 0e9dc33ae2123a..96cca3073fec7e 100644 --- a/Doc/library/logging.config.rst +++ b/Doc/library/logging.config.rst @@ -548,7 +548,7 @@ mnemonic that the corresponding value is a callable. The ``filters`` member of ``handlers`` and ``loggers`` can take filter instances in addition to ids. -You can also specify a special key ``'.'`` whose value is a dictionary is a +You can also specify a special key ``'.'`` whose value is a mapping of attribute names to values. If found, the specified attributes will be set on the user-defined object before it is returned. Thus, with the following configuration:: @@ -586,7 +586,7 @@ configuration dictionary for the handler named ``foo``, and later (once that handler has been configured) it points to the configured handler instance. Thus, ``cfg://handlers.foo`` could resolve to either a dictionary or a handler instance. In general, it is wise to name handlers in a way such that dependent -handlers are configured _after_ any handlers they depend on; that allows +handlers are configured *after* any handlers they depend on; that allows something like ``cfg://handlers.foo`` to be used in configuring a handler that depends on handler ``foo``. If that dependent handler were named ``bar``, problems would result, because the configuration of ``bar`` would be attempted diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index 63ef533e82c658..d74ef73ee28497 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -352,6 +352,10 @@ module, supports rotation of disk log files. Outputs the record to the file, catering for rollover as described previously. + .. method:: shouldRollover(record) + + See if the supplied record would cause the file to exceed the configured size limit. + .. _timed-rotating-file-handler: TimedRotatingFileHandler @@ -459,7 +463,11 @@ timed intervals. .. method:: getFilesToDelete() Returns a list of filenames which should be deleted as part of rollover. These - are the absolute paths of the oldest backup log files written by the handler. + + .. method:: shouldRollover(record) + + See if enough time has passed for a rollover to occur and if it has, compute + the next rollover time. .. _socket-handler: @@ -1051,6 +1059,15 @@ possible, while any potentially slow operations (such as sending an email via .. note:: If you are using :mod:`multiprocessing`, you should avoid using :class:`~queue.SimpleQueue` and instead use :class:`multiprocessing.Queue`. + .. warning:: + + The :mod:`multiprocessing` module uses an internal logger created and + accessed via :meth:`~multiprocessing.get_logger`. + :class:`multiprocessing.Queue` will log ``DEBUG`` level messages upon + items being queued. If those log messages are processed by a + :class:`QueueHandler` using the same :class:`multiprocessing.Queue` instance, + it will cause a deadlock or infinite recursion. + .. method:: emit(record) Enqueues the result of preparing the LogRecord. Should an exception diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 0749367045dfa9..8dad20bc83bcf3 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -10,8 +10,8 @@ -------------- -This module provides access to the mathematical functions defined by the C -standard. +This module provides access to common mathematical functions and constants, +including those defined by the C standard. These functions cannot be used with complex numbers; use the functions of the same name from the :mod:`cmath` module if you require support for complex @@ -144,8 +144,7 @@ Number-theoretic functions .. function:: factorial(n) - Return *n* factorial as an integer. Raises :exc:`ValueError` if *n* is not integral or - is negative. + Return factorial of the nonnegative integer *n*. .. versionchanged:: 3.10 Floats with integral values (like ``5.0``) are no longer accepted. @@ -775,7 +774,7 @@ Constants The mathematical constant *τ* = 6.283185..., to available precision. Tau is a circle constant equal to 2\ *π*, the ratio of a circle's circumference to its radius. To learn more about Tau, check out Vi Hart's video `Pi is (still) - Wrong `_, and start celebrating + Wrong `_, and start celebrating `Tau day `_ by eating twice as much pie! .. versionadded:: 3.6 diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst index 4e20c07331a220..8fca79b23e4e15 100644 --- a/Doc/library/mmap.rst +++ b/Doc/library/mmap.rst @@ -269,7 +269,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length Resizing a map created with *access* of :const:`ACCESS_READ` or :const:`ACCESS_COPY`, will raise a :exc:`TypeError` exception. - Resizing a map created with with *trackfd* set to ``False``, + Resizing a map created with *trackfd* set to ``False``, will raise a :exc:`ValueError` exception. **On Windows**: Resizing the map will raise an :exc:`OSError` if there are other diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 6c43d5fe166e2f..b41953d21442c9 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -1081,7 +1081,7 @@ Miscellaneous .. function:: freeze_support() Add support for when a program which uses :mod:`multiprocessing` has been - frozen to produce a Windows executable. (Has been tested with **py2exe**, + frozen to produce an executable. (Has been tested with **py2exe**, **PyInstaller** and **cx_Freeze**.) One needs to call this function straight after the ``if __name__ == @@ -1099,10 +1099,10 @@ Miscellaneous If the ``freeze_support()`` line is omitted then trying to run the frozen executable will raise :exc:`RuntimeError`. - Calling ``freeze_support()`` has no effect when invoked on any operating - system other than Windows. In addition, if the module is being run - normally by the Python interpreter on Windows (the program has not been - frozen), then ``freeze_support()`` has no effect. + Calling ``freeze_support()`` has no effect when the start method is not + *spawn*. In addition, if the module is being run normally by the Python + interpreter (the program has not been frozen), then ``freeze_support()`` + has no effect. .. function:: get_all_start_methods() @@ -1118,7 +1118,9 @@ Miscellaneous Return a context object which has the same attributes as the :mod:`multiprocessing` module. - If *method* is ``None`` then the default context is returned. + If *method* is ``None`` then the default context is returned. Note that if + the global start method has not been set, this will set it to the + default method. Otherwise *method* should be ``'fork'``, ``'spawn'``, ``'forkserver'``. :exc:`ValueError` is raised if the specified start method is not available. See :ref:`multiprocessing-start-methods`. @@ -1129,10 +1131,10 @@ Miscellaneous Return the name of start method used for starting processes. - If the start method has not been fixed and *allow_none* is false, - then the start method is fixed to the default and the name is - returned. If the start method has not been fixed and *allow_none* - is true then ``None`` is returned. + If the global start method has not been set and *allow_none* is + ``False``, then the start method is set to the default and the name + is returned. If the start method has not been set and *allow_none* is + ``True`` then ``None`` is returned. The return value can be ``'fork'``, ``'spawn'``, ``'forkserver'`` or ``None``. See :ref:`multiprocessing-start-methods`. diff --git a/Doc/library/netrc.rst b/Doc/library/netrc.rst index f6260383b2b057..74c97e8c9a9759 100644 --- a/Doc/library/netrc.rst +++ b/Doc/library/netrc.rst @@ -24,12 +24,14 @@ the Unix :program:`ftp` program and other FTP clients. a :exc:`FileNotFoundError` exception will be raised. Parse errors will raise :exc:`NetrcParseError` with diagnostic information including the file name, line number, and terminating token. + If no argument is specified on a POSIX system, the presence of passwords in the :file:`.netrc` file will raise a :exc:`NetrcParseError` if the file ownership or permissions are insecure (owned by a user other than the user running the process, or accessible for read or write by any other user). This implements security behavior equivalent to that of ftp and other - programs that use :file:`.netrc`. + programs that use :file:`.netrc`. Such security checks are not available + on platforms that do not support :func:`os.getuid`. .. versionchanged:: 3.4 Added the POSIX permission check. diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index ecbbc1d7605f9f..32a2970d2d3a2c 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -298,9 +298,10 @@ the :mod:`glob` module.) device than *path*, or whether :file:`{path}/..` and *path* point to the same i-node on the same device --- this should detect mount points for all Unix and POSIX variants. It is not able to reliably detect bind mounts on the - same filesystem. On Windows, a drive letter root and a share UNC are - always mount points, and for any other path ``GetVolumePathName`` is called - to see if it is different from the input path. + same filesystem. On Linux systems, it will always return ``True`` for btrfs + subvolumes, even if they aren't mount points. On Windows, a drive letter root + and a share UNC are always mount points, and for any other path + ``GetVolumePathName`` is called to see if it is different from the input path. .. versionchanged:: 3.4 Added support for detecting non-root mount points on Windows. @@ -408,9 +409,26 @@ the :mod:`glob` module.) system). On Windows, this function will also resolve MS-DOS (also called 8.3) style names such as ``C:\\PROGRA~1`` to ``C:\\Program Files``. - If a path doesn't exist or a symlink loop is encountered, and *strict* is - ``True``, :exc:`OSError` is raised. If *strict* is ``False`` these errors - are ignored, and so the result might be missing or otherwise inaccessible. + By default, the path is evaluated up to the first component that does not + exist, is a symlink loop, or whose evaluation raises :exc:`OSError`. + All such components are appended unchanged to the existing part of the path. + + Some errors that are handled this way include "access denied", "not a + directory", or "bad argument to internal function". Thus, the + resulting path may be missing or inaccessible, may still contain + links or loops, and may traverse non-directories. + + This behavior can be modified by keyword arguments: + + If *strict* is ``True``, the first error encountered when evaluating the path is + re-raised. + In particular, :exc:`FileNotFoundError` is raised if *path* does not exist, + or another :exc:`OSError` if it is otherwise inaccessible. + + If *strict* is :py:data:`os.path.ALLOW_MISSING`, errors other than + :exc:`FileNotFoundError` are re-raised (as with ``strict=True``). + Thus, the returned path will not contain any symbolic links, but the named + file and some of its parent directories may be missing. .. note:: This function emulates the operating system's procedure for making a path @@ -429,6 +447,15 @@ the :mod:`glob` module.) .. versionchanged:: 3.10 The *strict* parameter was added. + .. versionchanged:: 3.14 + The :py:data:`~os.path.ALLOW_MISSING` value for the *strict* parameter + was added. + +.. data:: ALLOW_MISSING + + Special value used for the *strict* argument in :func:`realpath`. + + .. versionadded:: 3.14 .. function:: relpath(path, start=os.curdir) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 7d7692dea5c38c..47986a2d9602ee 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1781,9 +1781,12 @@ The following wildcards are supported in patterns for ``?`` Matches one non-separator character. ``[seq]`` - Matches one character in *seq*. + Matches one character in *seq*, where *seq* is a sequence of characters. + Range expressions are supported; for example, ``[a-z]`` matches any lowercase ASCII letter. + Multiple ranges can be combined: ``[a-zA-Z0-9_]`` matches any ASCII letter, digit, or underscore. + ``[!seq]`` - Matches one character not in *seq*. + Matches one character not in *seq*, where *seq* follows the same rules as above. For a literal match, wrap the meta-characters in brackets. For example, ``"[?]"`` matches the character ``"?"``. @@ -1982,7 +1985,7 @@ The :mod:`pathlib.types` module provides types for static type checking. If *follow_symlinks* is ``False``, return ``True`` only if the path is a file (without following symlinks); return ``False`` if the path - is a directory or other other non-file, or if it doesn't exist. + is a directory or other non-file, or if it doesn't exist. .. method:: is_symlink() diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst index a0304edddf6478..f4b51664545be5 100644 --- a/Doc/library/pdb.rst +++ b/Doc/library/pdb.rst @@ -80,7 +80,7 @@ The debugger's prompt is ``(Pdb)``, which is the indicator that you are in debug You can also invoke :mod:`pdb` from the command line to debug other scripts. For example:: - python -m pdb [-c command] (-m module | pyfile) [args ...] + python -m pdb [-c command] (-m module | -p pid | pyfile) [args ...] When invoked as a module, pdb will automatically enter post-mortem debugging if the program being debugged exits abnormally. After post-mortem debugging (or @@ -104,6 +104,24 @@ useful than quitting the debugger upon program's exit. .. versionchanged:: 3.7 Added the ``-m`` option. +.. option:: -p, --pid + + Attach to the process with the specified PID. + + .. versionadded:: 3.14 + + +To attach to a running Python process for remote debugging, use the ``-p`` or +``--pid`` option with the target process's PID:: + + python -m pdb -p 1234 + +.. note:: + + Attaching to a process that is blocked in a system call or waiting for I/O + will only work once the next bytecode instruction is executed or when the + process receives a signal. + Typical usage to execute a statement under control of the debugger is:: >>> import pdb diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index 20b8f6bcf19117..47d24b6f7d06bb 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -69,8 +69,8 @@ support. Yield :term:`finder` objects for the given module name. - If fullname contains a ``'.'``, the finders will be for the package - containing fullname, otherwise they will be all registered top level + If *fullname* contains a ``'.'``, the finders will be for the package + containing *fullname*, otherwise they will be all registered top level finders (i.e. those on both :data:`sys.meta_path` and :data:`sys.path_hooks`). If the named module is in a package, that package is imported as a side diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index 2d57cff10a9278..5506ac828e5abe 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -16,11 +16,10 @@ references to these attributes should be marked using the :member: role. -.. warning:: +.. note:: - The :mod:`pyexpat` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. index:: single: Expat diff --git a/Doc/library/python.rst b/Doc/library/python.rst index c2c231af7c3033..c5c762e11b99e5 100644 --- a/Doc/library/python.rst +++ b/Doc/library/python.rst @@ -27,3 +27,8 @@ overview: inspect.rst annotationlib.rst site.rst + +.. seealso:: + + * See the :mod:`concurrent.interpreters` module, which similarly + exposes core runtime functionality. diff --git a/Doc/library/random.rst b/Doc/library/random.rst index ef0cfb0e76cef6..b1120b3a4d8eb4 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -447,6 +447,11 @@ Alternative Generator Override this method in subclasses to customise the :meth:`~random.getrandbits` behaviour of :class:`!Random` instances. + .. method:: Random.randbytes(n) + + Override this method in subclasses to customise the + :meth:`~random.randbytes` behaviour of :class:`!Random` instances. + .. class:: SystemRandom([seed]) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index eb3b1e5549cc98..75ebbf11c8e47c 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -991,8 +991,8 @@ Functions That way, separator components are always found at the same relative indices within the result list. - Empty matches for the pattern split the string only when not adjacent - to a previous empty match. + Adjacent empty matches are not possible, but an empty match can occur + immediately after a non-empty match. .. code:: pycon @@ -1095,9 +1095,12 @@ Functions The optional argument *count* is the maximum number of pattern occurrences to be replaced; *count* must be a non-negative integer. If omitted or zero, all - occurrences will be replaced. Empty matches for the pattern are replaced only - when not adjacent to a previous empty match, so ``sub('x*', '-', 'abxd')`` returns - ``'-a-b--d-'``. + occurrences will be replaced. + + Adjacent empty matches are not possible, but an empty match can occur + immediately after a non-empty match. + As a result, ``sub('x*', '-', 'abxd')`` returns ``'-a-b--d-'`` + instead of ``'-a-b-d-'``. .. index:: single: \g; in regular expressions @@ -1128,8 +1131,7 @@ Functions .. versionchanged:: 3.7 Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter now are errors. - Empty matches for the pattern are replaced when adjacent to a previous - non-empty match. + An empty match can occur immediately after a non-empty match. .. versionchanged:: 3.12 Group *id* can only contain ASCII digits. diff --git a/Doc/library/security_warnings.rst b/Doc/library/security_warnings.rst index a573c98f73eb0a..70c359cc1c0fc3 100644 --- a/Doc/library/security_warnings.rst +++ b/Doc/library/security_warnings.rst @@ -28,7 +28,7 @@ The following modules have specific security considerations: ` * :mod:`tempfile`: :ref:`mktemp is deprecated due to vulnerability to race conditions ` -* :mod:`xml`: :ref:`XML vulnerabilities ` +* :mod:`xml`: :ref:`XML security ` * :mod:`zipfile`: :ref:`maliciously prepared .zip files can cause disk volume exhaustion ` diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 2cbf95bcf535e4..dde38498206c46 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -47,6 +47,13 @@ Directory and files operations 0, only the contents from the current file position to the end of the file will be copied. + :func:`copyfileobj` will *not* guarantee that the destination stream has + been flushed on completion of the copy. If you want to read from the + destination at the completion of the copy operation (for example, reading + the contents of a temporary file that has been copied from a HTTP stream), + you must ensure that you have called :func:`~io.IOBase.flush` or + :func:`~io.IOBase.close` on the file-like object before attempting to read + the destination file. .. function:: copyfile(src, dst, *, follow_symlinks=True) @@ -327,6 +334,10 @@ Directory and files operations The deprecated *onerror* is similar to *onexc*, except that the third parameter it receives is the tuple returned from :func:`sys.exc_info`. + .. seealso:: + :ref:`shutil-rmtree-example` for an example of handling the removal + of a directory tree that contains read-only files. + .. audit-event:: shutil.rmtree path,dir_fd shutil.rmtree .. versionchanged:: 3.3 @@ -454,6 +465,10 @@ Directory and files operations :envvar:`PATH` environment variable is read from :data:`os.environ`, falling back to :data:`os.defpath` if it is not set. + If *cmd* contains a directory component, :func:`!which` only checks the + specified path directly and does not search the directories listed in + *path* or in the system's :envvar:`PATH` environment variable. + On Windows, the current directory is prepended to the *path* if *mode* does not include ``os.X_OK``. When the *mode* does include ``os.X_OK``, the Windows API ``NeedCurrentDirectoryForExePathW`` will be consulted to @@ -603,7 +618,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. *format* is the archive format: one of "zip" (if the :mod:`zlib` module is available), "tar", "gztar" (if the :mod:`zlib` module is available), "bztar" (if the :mod:`bz2` module is - available), or "xztar" (if the :mod:`lzma` module is available). + available), "xztar" (if the :mod:`lzma` module is available), or "zstdtar" + (if the :mod:`compression.zstd` module is available). *root_dir* is a directory that will be the root directory of the archive, all paths in the archive will be relative to it; for example, @@ -658,6 +674,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. - *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available). - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available). - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available). + - *zstdtar*: Zstandard compressed tar-file (if the :mod:`compression.zstd` + module is available). You can register new formats or provide your own archiver for any existing formats, by using :func:`register_archive_format`. @@ -701,8 +719,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. *extract_dir* is the name of the target directory where the archive is unpacked. If not provided, the current working directory is used. - *format* is the archive format: one of "zip", "tar", "gztar", "bztar", or - "xztar". Or any other format registered with + *format* is the archive format: one of "zip", "tar", "gztar", "bztar", + "xztar", or "zstdtar". Or any other format registered with :func:`register_unpack_format`. If not provided, :func:`unpack_archive` will use the archive file name extension and see if an unpacker was registered for that extension. In case none is found, @@ -774,6 +792,8 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules. - *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available). - *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available). - *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available). + - *zstdtar*: Zstandard compressed tar-file (if the :mod:`compression.zstd` + module is available). You can register new formats or provide your own unpacker for any existing formats, by using :func:`register_unpack_format`. diff --git a/Doc/library/signal.rst b/Doc/library/signal.rst index c28841dbb8cfc8..b0307d3dea1170 100644 --- a/Doc/library/signal.rst +++ b/Doc/library/signal.rst @@ -211,8 +211,8 @@ The variables defined in the :mod:`signal` module are: .. data:: SIGSTKFLT - Stack fault on coprocessor. The Linux kernel does not raise this signal: it - can only be raised in user space. + Stack fault on coprocessor. The Linux kernel does not raise this signal: it + can only be raised in user space. .. availability:: Linux. diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 8fd5187e3a4a36..bc89a3228f0ed9 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -362,10 +362,10 @@ Exceptions Constants ^^^^^^^^^ - The AF_* and SOCK_* constants are now :class:`AddressFamily` and - :class:`SocketKind` :class:`.IntEnum` collections. +The AF_* and SOCK_* constants are now :class:`AddressFamily` and +:class:`SocketKind` :class:`.IntEnum` collections. - .. versionadded:: 3.4 +.. versionadded:: 3.4 .. data:: AF_UNIX AF_INET @@ -773,9 +773,9 @@ Constants Constant to optimize CPU locality, to be used in conjunction with :data:`SO_REUSEPORT`. - .. versionadded:: 3.11 + .. versionadded:: 3.11 - .. availability:: Linux >= 3.9 + .. availability:: Linux >= 3.9 .. data:: SO_REUSEPORT_LB @@ -1492,7 +1492,7 @@ The :mod:`socket` module also offers various network-related services: The *fds* parameter is a sequence of file descriptors. Consult :meth:`~socket.sendmsg` for the documentation of these parameters. - .. availability:: Unix, Windows, not WASI. + .. availability:: Unix, not WASI. Unix platforms supporting :meth:`~socket.sendmsg` and :const:`SCM_RIGHTS` mechanism. @@ -1506,9 +1506,9 @@ The :mod:`socket` module also offers various network-related services: Return ``(msg, list(fds), flags, addr)``. Consult :meth:`~socket.recvmsg` for the documentation of these parameters. - .. availability:: Unix, Windows, not WASI. + .. availability:: Unix, not WASI. - Unix platforms supporting :meth:`~socket.sendmsg` + Unix platforms supporting :meth:`~socket.recvmsg` and :const:`SCM_RIGHTS` mechanism. .. versionadded:: 3.9 diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 59cfa136a3b7da..b048eb71ebfbfb 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -541,7 +541,7 @@ objects that simplify communication by providing the standard file interface):: The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the -the first handler had to use a ``recv()`` loop to accumulate data until a +first handler had to use a ``recv()`` loop to accumulate data until a newline itself. If it had just used a single ``recv()`` without the loop it would just have returned what has been received so far from the client. TCP is stream based: data arrives in the order it was sent, but there no diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index c615650b622f7f..e939e61a9676ee 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -1492,7 +1492,9 @@ Cursor objects :type parameters: :class:`dict` | :term:`sequence` :raises ProgrammingError: - If *sql* contains more than one SQL statement. + When *sql* contains more than one SQL statement. + When :ref:`named placeholders ` are used + and *parameters* is a sequence instead of a :class:`dict`. If :attr:`~Connection.autocommit` is :data:`LEGACY_TRANSACTION_CONTROL`, @@ -1501,13 +1503,11 @@ Cursor objects and there is no open transaction, a transaction is implicitly opened before executing *sql*. - .. deprecated-removed:: 3.12 3.14 + .. versionchanged:: 3.14 - :exc:`DeprecationWarning` is emitted if + :exc:`ProgrammingError` is emitted if :ref:`named placeholders ` are used and *parameters* is a sequence instead of a :class:`dict`. - Starting with Python 3.14, :exc:`ProgrammingError` will - be raised instead. Use :meth:`executescript` to execute multiple SQL statements. @@ -1529,8 +1529,10 @@ Cursor objects :type parameters: :term:`iterable` :raises ProgrammingError: - If *sql* contains more than one SQL statement, - or is not a DML statement. + When *sql* contains more than one SQL statement + or is not a DML statement, + When :ref:`named placeholders ` are used + and the items in *parameters* are sequences instead of :class:`dict`\s. Example: @@ -1554,14 +1556,12 @@ Cursor objects .. _RETURNING clauses: https://www.sqlite.org/lang_returning.html - .. deprecated-removed:: 3.12 3.14 + .. versionchanged:: 3.14 - :exc:`DeprecationWarning` is emitted if + :exc:`ProgrammingError` is emitted if :ref:`named placeholders ` are used and the items in *parameters* are sequences instead of :class:`dict`\s. - Starting with Python 3.14, :exc:`ProgrammingError` will - be raised instead. .. method:: executescript(sql_script, /) @@ -2289,7 +2289,7 @@ This section shows recipes for common adapters and converters. def adapt_datetime_iso(val): """Adapt datetime.datetime to timezone-naive ISO 8601 date.""" - return val.isoformat() + return val.replace(tzinfo=None).isoformat() def adapt_datetime_epoch(val): """Adapt datetime.datetime to Unix timestamp.""" diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 39aaa5da0786f8..394c302fd354b9 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1018,7 +1018,7 @@ operations have the same priority as the corresponding numeric operations. [3]_ | ``s * n`` or | equivalent to adding *s* to | (2)(7) | | ``n * s`` | itself *n* times | | +--------------------------+--------------------------------+----------+ -| ``s[i]`` | *i*\ th item of *s*, origin 0 | \(3) | +| ``s[i]`` | *i*\ th item of *s*, origin 0 | (3)(9) | +--------------------------+--------------------------------+----------+ | ``s[i:j]`` | slice of *s* from *i* to *j* | (3)(4) | +--------------------------+--------------------------------+----------+ @@ -1150,6 +1150,9 @@ Notes: without copying any data and with the returned index being relative to the start of the sequence rather than the start of the slice. +(9) + An :exc:`IndexError` is raised if *i* is outside the sequence range. + .. _typesseq-immutable: @@ -1214,6 +1217,8 @@ accepts integers that meet the value restriction ``0 <= x <= 255``). | ``s[i] = x`` | item *i* of *s* is replaced by | | | | *x* | | +------------------------------+--------------------------------+---------------------+ +| ``del s[i]`` | removes item *i* of *s* | | ++------------------------------+--------------------------------+---------------------+ | ``s[i:j] = t`` | slice of *s* from *i* to *j* | | | | is replaced by the contents of | | | | the iterable *t* | | @@ -1788,8 +1793,14 @@ expression support in the :mod:`re` module). Return centered in a string of length *width*. Padding is done using the specified *fillchar* (default is an ASCII space). The original string is - returned if *width* is less than or equal to ``len(s)``. + returned if *width* is less than or equal to ``len(s)``. For example:: + >>> 'Python'.center(10) + ' Python ' + >>> 'Python'.center(10, '-') + '--Python--' + >>> 'Python'.center(4) + 'Python' .. method:: str.count(sub[, start[, end]]) @@ -1799,8 +1810,18 @@ expression support in the :mod:`re` module). interpreted as in slice notation. If *sub* is empty, returns the number of empty strings between characters - which is the length of the string plus one. - + which is the length of the string plus one. For example:: + + >>> 'spam, spam, spam'.count('spam') + 3 + >>> 'spam, spam, spam'.count('spam', 5) + 2 + >>> 'spam, spam, spam'.count('spam', 5, 10) + 1 + >>> 'spam, spam, spam'.count('eggs') + 0 + >>> 'spam, spam, spam'.count('') + 17 .. method:: str.encode(encoding="utf-8", errors="strict") @@ -1820,6 +1841,14 @@ expression support in the :mod:`re` module). unless an encoding error actually occurs, :ref:`devmode` is enabled or a :ref:`debug build ` is used. + For example:: + + >>> encoded_str_to_bytes = 'Python'.encode() + >>> type(encoded_str_to_bytes) + + >>> encoded_str_to_bytes + b'Python' + .. versionchanged:: 3.1 Added support for keyword arguments. @@ -1834,7 +1863,19 @@ expression support in the :mod:`re` module). Return ``True`` if the string ends with the specified *suffix*, otherwise return ``False``. *suffix* can also be a tuple of suffixes to look for. With optional *start*, test beginning at that position. With optional *end*, stop comparing - at that position. + at that position. Using *start* and *end* is equivalent to + ``str[start:end].endswith(suffix)``. For example:: + + >>> 'Python'.endswith('on') + True + >>> 'a tuple of suffixes'.endswith(('at', 'in')) + False + >>> 'a tuple of suffixes'.endswith(('at', 'es')) + True + >>> 'Python is amazing'.endswith('is', 0, 9) + True + + See also :meth:`startswith` and :meth:`removesuffix`. .. method:: str.expandtabs(tabsize=8) @@ -1850,12 +1891,15 @@ expression support in the :mod:`re` module). (``\n``) or return (``\r``), it is copied and the current column is reset to zero. Any other character is copied unchanged and the current column is incremented by one regardless of how the character is represented when - printed. + printed. For example:: >>> '01\t012\t0123\t01234'.expandtabs() '01 012 0123 01234' >>> '01\t012\t0123\t01234'.expandtabs(4) '01 012 0123 01234' + >>> print('01\t012\n0123\t01234'.expandtabs(4)) + 01 012 + 0123 01234 .. method:: str.find(sub[, start[, end]]) @@ -2012,7 +2056,7 @@ expression support in the :mod:`re` module). .. method:: str.isprintable() - Return true if all characters in the string are printable, false if it + Return ``True`` if all characters in the string are printable, ``False`` if it contains at least one non-printable character. Here "printable" means the character is suitable for :func:`repr` to use in @@ -2269,6 +2313,18 @@ expression support in the :mod:`re` module). >>> ' 1 2 3 '.split() ['1', '2', '3'] + If *sep* is not specified or is ``None`` and *maxsplit* is ``0``, only + leading runs of consecutive whitespace are considered. + + For example:: + + >>> "".split(None, 0) + [] + >>> " ".split(None, 0) + [] + >>> " foo ".split(maxsplit=0) + ['foo '] + .. index:: single: universal newlines; str.splitlines method @@ -4823,7 +4879,13 @@ can be used interchangeably to index the same dictionary entry. being added is already present, the value from the keyword argument replaces the value from the positional argument. - To illustrate, the following examples all return a dictionary equal to + Providing keyword arguments as in the first example only works for keys that + are valid Python identifiers. Otherwise, any valid keys can be used. + + Dictionaries compare equal if and only if they have the same ``(key, + value)`` pairs (regardless of ordering). Order comparisons ('<', '<=', '>=', '>') raise + :exc:`TypeError`. To illustrate dictionary creation and equality, + the following examples all return a dictionary equal to ``{"one": 1, "two": 2, "three": 3}``:: >>> a = dict(one=1, two=2, three=3) @@ -4838,6 +4900,27 @@ can be used interchangeably to index the same dictionary entry. Providing keyword arguments as in the first example only works for keys that are valid Python identifiers. Otherwise, any valid keys can be used. + Dictionaries preserve insertion order. Note that updating a key does not + affect the order. Keys added after deletion are inserted at the end. :: + + >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} + >>> d + {'one': 1, 'two': 2, 'three': 3, 'four': 4} + >>> list(d) + ['one', 'two', 'three', 'four'] + >>> list(d.values()) + [1, 2, 3, 4] + >>> d["one"] = 42 + >>> d + {'one': 42, 'two': 2, 'three': 3, 'four': 4} + >>> del d["two"] + >>> d["two"] = None + >>> d + {'one': 42, 'three': 3, 'four': 4, 'two': None} + + .. versionchanged:: 3.7 + Dictionary order is guaranteed to be insertion order. This behavior was + an implementation detail of CPython from 3.6. These are the operations that dictionaries support (and therefore, custom mapping types should support too): @@ -5008,32 +5091,6 @@ can be used interchangeably to index the same dictionary entry. .. versionadded:: 3.9 - Dictionaries compare equal if and only if they have the same ``(key, - value)`` pairs (regardless of ordering). Order comparisons ('<', '<=', '>=', '>') raise - :exc:`TypeError`. - - Dictionaries preserve insertion order. Note that updating a key does not - affect the order. Keys added after deletion are inserted at the end. :: - - >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} - >>> d - {'one': 1, 'two': 2, 'three': 3, 'four': 4} - >>> list(d) - ['one', 'two', 'three', 'four'] - >>> list(d.values()) - [1, 2, 3, 4] - >>> d["one"] = 42 - >>> d - {'one': 42, 'two': 2, 'three': 3, 'four': 4} - >>> del d["two"] - >>> d["two"] = None - >>> d - {'one': 42, 'three': 3, 'four': 4, 'two': None} - - .. versionchanged:: 3.7 - Dictionary order is guaranteed to be insertion order. This behavior was - an implementation detail of CPython from 3.6. - Dictionaries and dictionary views are reversible. :: >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} diff --git a/Doc/library/string.rst b/Doc/library/string.rst index b44d98819b6998..23e15780075435 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -328,7 +328,7 @@ The general form of a *standard format specifier* is: sign: "+" | "-" | " " width_and_precision: [`width_with_grouping`][`precision_with_grouping`] width_with_grouping: [`width`][`grouping`] - precision_with_grouping: "." [`precision`][`grouping`] + precision_with_grouping: "." [`precision`][`grouping`] | "." `grouping` width: `~python-grammar:digit`+ precision: `~python-grammar:digit`+ grouping: "," | "_" @@ -858,7 +858,7 @@ these rules. The methods of :class:`Template` are: .. method:: is_valid() - Returns false if the template has invalid placeholders that will cause + Returns ``False`` if the template has invalid placeholders that will cause :meth:`substitute` to raise :exc:`ValueError`. .. versionadded:: 3.11 diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst index 0674074b8c0df6..f62a4011e4144b 100644 --- a/Doc/library/sys.monitoring.rst +++ b/Doc/library/sys.monitoring.rst @@ -137,7 +137,8 @@ The following events are supported: .. monitoring-event:: PY_UNWIND - Exit from a Python function during exception unwinding. + Exit from a Python function during exception unwinding. This includes exceptions raised directly within the + function and that are allowed to continue to propagate. .. monitoring-event:: PY_YIELD @@ -171,7 +172,7 @@ events, use the expression ``PY_RETURN | PY_START``. if get_events(DEBUGGER_ID) == NO_EVENTS: ... -Events are divided into three groups: + Setting this event deactivates all events. .. _monitoring-event-local: @@ -243,20 +244,23 @@ raise an exception unless it would be visible to other code. To allow tools to monitor for real exceptions without slowing down generators and coroutines, the :monitoring-event:`STOP_ITERATION` event is provided. -:monitoring-event:`STOP_ITERATION` can be locally disabled, unlike :monitoring-event:`RAISE`. +:monitoring-event:`STOP_ITERATION` can be locally disabled, unlike +:monitoring-event:`RAISE`. -Note that the :monitoring-event:`STOP_ITERATION` event and the :monitoring-event:`RAISE` -event for a :exc:`StopIteration` exception are equivalent, and are treated as interchangeable -when generating events. Implementations will favor :monitoring-event:`STOP_ITERATION` for -performance reasons, but may generate a :monitoring-event:`RAISE` event with a :exc:`StopIteration`. +Note that the :monitoring-event:`STOP_ITERATION` event and the +:monitoring-event:`RAISE` event for a :exc:`StopIteration` exception are +equivalent, and are treated as interchangeable when generating events. +Implementations will favor :monitoring-event:`STOP_ITERATION` for performance +reasons, but may generate a :monitoring-event:`RAISE` event with a +:exc:`StopIteration`. Turning events on and off ------------------------- In order to monitor an event, it must be turned on and a corresponding callback -must be registered. -Events can be turned on or off by setting the events either globally or -for a particular code object. +must be registered. Events can be turned on or off by setting the events either +globally and/or for a particular code object. An event will trigger only once, +even if it is turned on both globally and locally. Setting events globally @@ -292,10 +296,6 @@ in Python (see :ref:`c-api-monitoring`). Activates all the local events for *code* which are set in *event_set*. Raises a :exc:`ValueError` if *tool_id* is not in use. -Local events add to global events, but do not mask them. -In other words, all global events will trigger for a code object, -regardless of the local events. - Disabling events '''''''''''''''' @@ -325,8 +325,6 @@ except for a few breakpoints. Registering callback functions ------------------------------ -To register a callable for events call - .. function:: register_callback(tool_id: int, event: int, func: Callable | None, /) -> Callable | None Registers the callable *func* for the *event* with the given *tool_id* @@ -335,12 +333,16 @@ To register a callable for events call it is unregistered and returned. Otherwise :func:`register_callback` returns ``None``. - Functions can be unregistered by calling ``sys.monitoring.register_callback(tool_id, event, None)``. Callback functions can be registered and unregistered at any time. +Callbacks are called only once regardless if the event is turned on both +globally and locally. As such, if an event could be turned on for both global +and local events by your code then the callback needs to be written to handle +either trigger. + Registering or unregistering a callback function will generate a :func:`sys.audit` event. @@ -353,37 +355,46 @@ Callback function arguments that there are no arguments to the call. When an active event occurs, the registered callback function is called. +Callback functions returning an object other than :data:`DISABLE` will have no effect. Different events will provide the callback function with different arguments, as follows: * :monitoring-event:`PY_START` and :monitoring-event:`PY_RESUME`:: - func(code: CodeType, instruction_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int) -> object * :monitoring-event:`PY_RETURN` and :monitoring-event:`PY_YIELD`:: - func(code: CodeType, instruction_offset: int, retval: object) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, retval: object) -> object -* :monitoring-event:`CALL`, :monitoring-event:`C_RAISE` and :monitoring-event:`C_RETURN`:: +* :monitoring-event:`CALL`, :monitoring-event:`C_RAISE` and :monitoring-event:`C_RETURN` + (*arg0* can be :data:`MISSING` specifically):: - func(code: CodeType, instruction_offset: int, callable: object, arg0: object | MISSING) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, callable: object, arg0: object) -> object + *code* represents the code object where the call is being made, while + *callable* is the object that is about to be called (and thus + triggered the event). If there are no arguments, *arg0* is set to :data:`sys.monitoring.MISSING`. + For instance methods, *callable* will be the function object as found on the + class with *arg0* set to the instance (i.e. the ``self`` argument to the + method). + * :monitoring-event:`RAISE`, :monitoring-event:`RERAISE`, :monitoring-event:`EXCEPTION_HANDLED`, :monitoring-event:`PY_UNWIND`, :monitoring-event:`PY_THROW` and :monitoring-event:`STOP_ITERATION`:: - func(code: CodeType, instruction_offset: int, exception: BaseException) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, exception: BaseException) -> object * :monitoring-event:`LINE`:: - func(code: CodeType, line_number: int) -> DISABLE | Any + func(code: CodeType, line_number: int) -> object * :monitoring-event:`BRANCH_LEFT`, :monitoring-event:`BRANCH_RIGHT` and :monitoring-event:`JUMP`:: - func(code: CodeType, instruction_offset: int, destination_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int, destination_offset: int) -> object Note that the *destination_offset* is where the code will next execute. * :monitoring-event:`INSTRUCTION`:: - func(code: CodeType, instruction_offset: int) -> DISABLE | Any + func(code: CodeType, instruction_offset: int) -> object diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 55e442b20ff877..05bc7cfb9dc089 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -953,6 +953,8 @@ always available. Unless explicitly noted otherwise, all variables are read-only This function should be used for internal and specialized purposes only. It is not guaranteed to exist in all implementations of Python. + .. versionadded:: 3.12 + .. function:: getobjects(limit[, type]) @@ -1185,6 +1187,15 @@ always available. Unless explicitly noted otherwise, all variables are read-only ``cache_tag`` is set to ``None``, it indicates that module caching should be disabled. + *supports_isolated_interpreters* is a boolean value, whether + this implementation supports multiple isolated interpreters. + It is ``True`` for CPython on most platforms. Platforms with + this support implement the low-level :mod:`!_interpreters` module. + + .. seealso:: + + :pep:`684`, :pep:`734`, and :mod:`concurrent.interpreters`. + :data:`sys.implementation` may contain additional attributes specific to the Python implementation. These non-standard attributes must start with an underscore, and are not described here. Regardless of its contents, @@ -1194,6 +1205,9 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. versionadded:: 3.3 + .. versionchanged:: 3.14 + Added ``supports_isolated_interpreters`` field. + .. note:: The addition of new required attributes must go through the normal PEP @@ -1933,6 +1947,22 @@ always available. Unless explicitly noted otherwise, all variables are read-only interpreter is pre-release (alpha, beta, or release candidate) then the local and remote interpreters must be the same exact version. + .. audit-event:: sys.remote_exec pid script_path + + When the code is executed in the remote process, an + :ref:`auditing event ` ``sys.remote_exec`` is raised with + the *pid* and the path to the script file. + This event is raised in the process that called :func:`sys.remote_exec`. + + .. audit-event:: cpython.remote_debugger_script script_path + + When the script is executed in the remote process, an + :ref:`auditing event ` + ``cpython.remote_debugger_script`` is raised + with the path in the remote process. + This event is raised in the remote process, not the one + that called :func:`sys.remote_exec`. + .. availability:: Unix, Windows. .. versionadded:: 3.14 diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index f9cb5495e60cd2..8d10db8f2c2921 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -18,8 +18,8 @@ higher-level functions in :ref:`shutil `. Some facts and figures: -* reads and writes :mod:`gzip`, :mod:`bz2` and :mod:`lzma` compressed archives - if the respective modules are available. +* reads and writes :mod:`gzip`, :mod:`bz2`, :mod:`compression.zstd`, and + :mod:`lzma` compressed archives if the respective modules are available. * read/write support for the POSIX.1-1988 (ustar) format. @@ -47,6 +47,10 @@ Some facts and figures: or paths outside of the destination. Previously, the filter strategy was equivalent to :func:`fully_trusted `. +.. versionchanged:: 3.14 + + Added support for Zstandard compression using :mod:`compression.zstd`. + .. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs) Return a :class:`TarFile` object for the pathname *name*. For detailed @@ -59,8 +63,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | mode | action | +==================+=============================================+ - | ``'r' or 'r:*'`` | Open for reading with transparent | - | | compression (recommended). | + | ``'r'`` or | Open for reading with transparent | + | ``'r:*'`` | compression (recommended). | +------------------+---------------------------------------------+ | ``'r:'`` | Open for reading exclusively without | | | compression. | @@ -71,6 +75,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | ``'r:xz'`` | Open for reading with lzma compression. | +------------------+---------------------------------------------+ + | ``'r:zst'`` | Open for reading with Zstandard compression.| + +------------------+---------------------------------------------+ | ``'x'`` or | Create a tarfile exclusively without | | ``'x:'`` | compression. | | | Raise a :exc:`FileExistsError` exception | @@ -88,10 +94,15 @@ Some facts and figures: | | Raise a :exc:`FileExistsError` exception | | | if it already exists. | +------------------+---------------------------------------------+ - | ``'a' or 'a:'`` | Open for appending with no compression. The | - | | file is created if it does not exist. | + | ``'x:zst'`` | Create a tarfile with Zstandard compression.| + | | Raise a :exc:`FileExistsError` exception | + | | if it already exists. | + +------------------+---------------------------------------------+ + | ``'a'`` or | Open for appending with no compression. The | + | ``'a:'`` | file is created if it does not exist. | +------------------+---------------------------------------------+ - | ``'w' or 'w:'`` | Open for uncompressed writing. | + | ``'w'`` or | Open for uncompressed writing. | + | ``'w:'`` | | +------------------+---------------------------------------------+ | ``'w:gz'`` | Open for gzip compressed writing. | +------------------+---------------------------------------------+ @@ -99,6 +110,8 @@ Some facts and figures: +------------------+---------------------------------------------+ | ``'w:xz'`` | Open for lzma compressed writing. | +------------------+---------------------------------------------+ + | ``'w:zst'`` | Open for Zstandard compressed writing. | + +------------------+---------------------------------------------+ Note that ``'a:gz'``, ``'a:bz2'`` or ``'a:xz'`` is not possible. If *mode* is not suitable to open a certain (compressed) file for reading, @@ -115,6 +128,15 @@ Some facts and figures: For modes ``'w:xz'``, ``'x:xz'`` and ``'w|xz'``, :func:`tarfile.open` accepts the keyword argument *preset* to specify the compression level of the file. + For modes ``'w:zst'``, ``'x:zst'`` and ``'w|zst'``, :func:`tarfile.open` + accepts the keyword argument *level* to specify the compression level of + the file. The keyword argument *options* may also be passed, providing + advanced Zstandard compression parameters described by + :class:`~compression.zstd.CompressionParameter`. The keyword argument + *zstd_dict* can be passed to provide a :class:`~compression.zstd.ZstdDict`, + a Zstandard dictionary used to improve compression of smaller amounts of + data. + For special purposes, there is a second format for *mode*: ``'filemode|[compression]'``. :func:`tarfile.open` will return a :class:`TarFile` object that processes its data as a stream of blocks. No random seeking will @@ -146,6 +168,9 @@ Some facts and figures: | ``'r|xz'`` | Open an lzma compressed *stream* for | | | reading. | +-------------+--------------------------------------------+ + | ``'r|zst'`` | Open a Zstandard compressed *stream* for | + | | reading. | + +-------------+--------------------------------------------+ | ``'w|'`` | Open an uncompressed *stream* for writing. | +-------------+--------------------------------------------+ | ``'w|gz'`` | Open a gzip compressed *stream* for | @@ -157,6 +182,9 @@ Some facts and figures: | ``'w|xz'`` | Open an lzma compressed *stream* for | | | writing. | +-------------+--------------------------------------------+ + | ``'w|zst'`` | Open a Zstandard compressed *stream* for | + | | writing. | + +-------------+--------------------------------------------+ .. versionchanged:: 3.5 The ``'x'`` (exclusive creation) mode was added. @@ -255,6 +283,15 @@ The :mod:`tarfile` module defines the following exceptions: Raised to refuse extracting a symbolic link pointing outside the destination directory. +.. exception:: LinkFallbackError + + Raised to refuse emulating a link (hard or symbolic) by extracting another + archive member, when that member would be rejected by the filter location. + The exception that was raised to reject the replacement member is available + as :attr:`!BaseException.__context__`. + + .. versionadded:: 3.14 + The following constants are available at the module level: @@ -1068,6 +1105,12 @@ reused in custom filters: Implements the ``'data'`` filter. In addition to what ``tar_filter`` does: + - Normalize link targets (:attr:`TarInfo.linkname`) using + :func:`os.path.normpath`. + Note that this removes internal ``..`` components, which may change the + meaning of the link if the path in :attr:`!TarInfo.linkname` traverses + symbolic links. + - :ref:`Refuse ` to extract links (hard or soft) that link to absolute paths, or ones that link outside the destination. @@ -1099,6 +1142,10 @@ reused in custom filters: Note that this filter does not block *all* dangerous archive features. See :ref:`tarfile-further-verification` for details. + .. versionchanged:: 3.14 + + Link targets are now normalized. + .. _tarfile-extraction-refuse: @@ -1127,6 +1174,7 @@ Here is an incomplete list of things to consider: * Extract to a :func:`new temporary directory ` to prevent e.g. exploiting pre-existing links, and to make it easier to clean up after a failed extraction. +* Disallow symbolic links if you do not need the functionality. * When working with untrusted data, use external (e.g. OS-level) limits on disk, memory and CPU usage. * Check filenames against an allow-list of characters diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index 249c0a5cb035c3..cabb41442f8419 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -11,6 +11,52 @@ This module constructs higher-level threading interfaces on top of the lower level :mod:`_thread` module. +.. include:: ../includes/wasm-notavail.rst + +Introduction +------------ + +The :mod:`!threading` module provides a way to run multiple `threads +`_ (smaller +units of a process) concurrently within a single process. It allows for the +creation and management of threads, making it possible to execute tasks in +parallel, sharing memory space. Threads are particularly useful when tasks are +I/O bound, such as file operations or making network requests, +where much of the time is spent waiting for external resources. + +A typical use case for :mod:`!threading` includes managing a pool of worker +threads that can process multiple tasks concurrently. Here's a basic example of +creating and starting threads using :class:`~threading.Thread`:: + + import threading + import time + + def crawl(link, delay=3): + print(f"crawl started for {link}") + time.sleep(delay) # Blocking I/O (simulating a network request) + print(f"crawl ended for {link}") + + links = [ + "https://python.org", + "https://docs.python.org", + "https://peps.python.org", + ] + + # Start threads for each link + threads = [] + for link in links: + # Using `args` to pass positional arguments and `kwargs` for keyword arguments + t = threading.Thread(target=crawl, args=(link,), kwargs={"delay": 2}) + threads.append(t) + + # Start each thread + for t in threads: + t.start() + + # Wait for all threads to finish + for t in threads: + t.join() + .. versionchanged:: 3.7 This module used to be optional, it is now always available. @@ -45,7 +91,25 @@ level :mod:`_thread` module. However, threading is still an appropriate model if you want to run multiple I/O-bound tasks simultaneously. -.. include:: ../includes/wasm-notavail.rst +GIL and performance considerations +---------------------------------- + +Unlike the :mod:`multiprocessing` module, which uses separate processes to +bypass the :term:`global interpreter lock` (GIL), the threading module operates +within a single process, meaning that all threads share the same memory space. +However, the GIL limits the performance gains of threading when it comes to +CPU-bound tasks, as only one thread can execute Python bytecode at a time. +Despite this, threads remain a useful tool for achieving concurrency in many +scenarios. + +As of Python 3.13, :term:`free-threaded ` builds +can disable the GIL, enabling true parallel execution of threads, but this +feature is not available by default (see :pep:`703`). + +.. TODO: At some point this feature will become available by default. + +Reference +--------- This module defines the following functions: @@ -62,7 +126,7 @@ This module defines the following functions: Return the current :class:`Thread` object, corresponding to the caller's thread of control. If the caller's thread of control was not created through the - :mod:`threading` module, a dummy thread object with limited functionality is + :mod:`!threading` module, a dummy thread object with limited functionality is returned. The function ``currentThread`` is a deprecated alias for this function. @@ -157,13 +221,13 @@ This module defines the following functions: .. index:: single: trace function - Set a trace function for all threads started from the :mod:`threading` module. + Set a trace function for all threads started from the :mod:`!threading` module. The *func* will be passed to :func:`sys.settrace` for each thread, before its :meth:`~Thread.run` method is called. .. function:: settrace_all_threads(func) - Set a trace function for all threads started from the :mod:`threading` module + Set a trace function for all threads started from the :mod:`!threading` module and all Python threads that are currently executing. The *func* will be passed to :func:`sys.settrace` for each thread, before its @@ -186,13 +250,13 @@ This module defines the following functions: .. index:: single: profile function - Set a profile function for all threads started from the :mod:`threading` module. + Set a profile function for all threads started from the :mod:`!threading` module. The *func* will be passed to :func:`sys.setprofile` for each thread, before its :meth:`~Thread.run` method is called. .. function:: setprofile_all_threads(func) - Set a profile function for all threads started from the :mod:`threading` module + Set a profile function for all threads started from the :mod:`!threading` module and all Python threads that are currently executing. The *func* will be passed to :func:`sys.setprofile` for each thread, before its @@ -257,8 +321,8 @@ when implemented, are mapped to module-level functions. All of the methods described below are executed atomically. -Thread-Local Data ------------------ +Thread-local data +^^^^^^^^^^^^^^^^^ Thread-local data is data whose values are thread specific. If you have data that you want to be local to a thread, create a @@ -389,8 +453,8 @@ affects what we see:: .. _thread-objects: -Thread Objects --------------- +Thread objects +^^^^^^^^^^^^^^ The :class:`Thread` class represents an activity that is run in a separate thread of control. There are two ways to specify the activity: by passing a @@ -557,7 +621,7 @@ since it is impossible to detect the termination of alien threads. an error to :meth:`~Thread.join` a thread before it has been started and attempts to do so raise the same exception. - If an attempt is made to join a running daemonic thread in in late stages + If an attempt is made to join a running daemonic thread in late stages of :term:`Python finalization ` :meth:`!join` raises a :exc:`PythonFinalizationError`. @@ -645,8 +709,8 @@ since it is impossible to detect the termination of alien threads. .. _lock-objects: -Lock Objects ------------- +Lock objects +^^^^^^^^^^^^ A primitive lock is a synchronization primitive that is not owned by a particular thread when locked. In Python, it is currently the lowest level @@ -738,8 +802,8 @@ All methods are executed atomically. .. _rlock-objects: -RLock Objects -------------- +RLock objects +^^^^^^^^^^^^^ A reentrant lock is a synchronization primitive that may be acquired multiple times by the same thread. Internally, it uses the concepts of "owning thread" @@ -848,8 +912,8 @@ call release as many times the lock has been acquired can lead to deadlock. .. _condition-objects: -Condition Objects ------------------ +Condition objects +^^^^^^^^^^^^^^^^^ A condition variable is always associated with some kind of lock; this can be passed in or one will be created by default. Passing one in is useful when @@ -1026,8 +1090,8 @@ item to the buffer only needs to wake up one consumer thread. .. _semaphore-objects: -Semaphore Objects ------------------ +Semaphore objects +^^^^^^^^^^^^^^^^^ This is one of the oldest synchronization primitives in the history of computer science, invented by the early Dutch computer scientist Edsger W. Dijkstra (he @@ -1107,7 +1171,7 @@ Semaphores also support the :ref:`context management protocol `. .. _semaphore-examples: -:class:`Semaphore` Example +:class:`Semaphore` example ^^^^^^^^^^^^^^^^^^^^^^^^^^ Semaphores are often used to guard resources with limited capacity, for example, @@ -1135,8 +1199,8 @@ causes the semaphore to be released more than it's acquired will go undetected. .. _event-objects: -Event Objects -------------- +Event objects +^^^^^^^^^^^^^ This is one of the simplest mechanisms for communication between threads: one thread signals an event and other threads wait for it. @@ -1192,8 +1256,8 @@ method. The :meth:`~Event.wait` method blocks until the flag is true. .. _timer-objects: -Timer Objects -------------- +Timer objects +^^^^^^^^^^^^^ This class represents an action that should be run only after a certain amount of time has passed --- a timer. :class:`Timer` is a subclass of :class:`Thread` @@ -1230,8 +1294,8 @@ For example:: only work if the timer is still in its waiting stage. -Barrier Objects ---------------- +Barrier objects +^^^^^^^^^^^^^^^ .. versionadded:: 3.2 diff --git a/Doc/library/time.rst b/Doc/library/time.rst index 542493a82af94d..df9be68bf4f69d 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -306,10 +306,11 @@ Functions .. versionadded:: 3.3 .. versionchanged:: 3.5 - The function is now always available and always system-wide. + The function is now always available and the clock is now the same for + all processes. .. versionchanged:: 3.10 - On macOS, the function is now system-wide. + On macOS, the clock is now the same for all processes. .. function:: monotonic_ns() -> int @@ -325,7 +326,8 @@ Functions Return the value (in fractional seconds) of a performance counter, i.e. a clock with the highest available resolution to measure a short duration. It - does include time elapsed during sleep and is system-wide. The reference + does include time elapsed during sleep. The clock is the same for all + processes. The reference point of the returned value is undefined, so that only the difference between the results of two calls is valid. @@ -340,7 +342,7 @@ Functions .. versionadded:: 3.3 .. versionchanged:: 3.10 - On Windows, the function is now system-wide. + On Windows, the clock is now the same for all processes. .. versionchanged:: 3.13 Use the same clock as :func:`time.monotonic`. @@ -712,13 +714,18 @@ Functions Clock: - * On Windows, call ``GetSystemTimeAsFileTime()``. + * On Windows, call ``GetSystemTimePreciseAsFileTime()``. * Call ``clock_gettime(CLOCK_REALTIME)`` if available. * Otherwise, call ``gettimeofday()``. Use :func:`time_ns` to avoid the precision loss caused by the :class:`float` type. +.. versionchanged:: 3.13 + + On Windows, calls ``GetSystemTimePreciseAsFileTime()`` instead of + ``GetSystemTimeAsFileTime()``. + .. function:: time_ns() -> int @@ -982,8 +989,8 @@ The following constant is the only parameter that can be sent to .. data:: CLOCK_REALTIME - System-wide real-time clock. Setting this clock requires appropriate - privileges. + Real-time clock. Setting this clock requires appropriate privileges. + The clock is the same for all processes. .. availability:: Unix. diff --git a/Doc/library/token.rst b/Doc/library/token.rst index 1f92b5df4302bf..c228006d4c1e1d 100644 --- a/Doc/library/token.rst +++ b/Doc/library/token.rst @@ -51,7 +51,7 @@ The token constants are: .. data:: NAME Token value that indicates an :ref:`identifier `. - Note that keywords are also initially tokenized an ``NAME`` tokens. + Note that keywords are also initially tokenized as ``NAME`` tokens. .. data:: NUMBER diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index 54cc3ea3311adf..69df09c779592a 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -3500,20 +3500,11 @@ Introspection helpers Evaluate an :class:`annotationlib.ForwardRef` as a :term:`type hint`. This is similar to calling :meth:`annotationlib.ForwardRef.evaluate`, - but unlike that method, :func:`!evaluate_forward_ref` also: - - * Recursively evaluates forward references nested within the type hint. - * Raises :exc:`TypeError` when it encounters certain objects that are - not valid type hints. - * Replaces type hints that evaluate to :const:`!None` with - :class:`types.NoneType`. - * Supports the :attr:`~annotationlib.Format.FORWARDREF` and - :attr:`~annotationlib.Format.STRING` formats. + but unlike that method, :func:`!evaluate_forward_ref` also + recursively evaluates forward references nested within the type hint. See the documentation for :meth:`annotationlib.ForwardRef.evaluate` for - the meaning of the *owner*, *globals*, *locals*, and *type_params* parameters. - *format* specifies the format of the annotation and is a member of - the :class:`annotationlib.Format` enum. + the meaning of the *owner*, *globals*, *locals*, *type_params*, and *format* parameters. .. versionadded:: 3.14 @@ -3539,28 +3530,32 @@ Constant .. data:: TYPE_CHECKING A special constant that is assumed to be ``True`` by 3rd party static - type checkers. It is ``False`` at runtime. + type checkers. It's ``False`` at runtime. + + A module which is expensive to import, and which only contain types + used for typing annotations, can be safely imported inside an + ``if TYPE_CHECKING:`` block. This prevents the module from actually + being imported at runtime; annotations aren't eagerly evaluated + (see :pep:`649`) so using undefined symbols in annotations is + harmless--as long as you don't later examine them. + Your static type analysis tool will set ``TYPE_CHECKING`` to + ``True`` during static type analysis, which means the module will + be imported and the types will be checked properly during such analysis. Usage:: if TYPE_CHECKING: import expensive_mod - def fun(arg: 'expensive_mod.SomeType') -> None: + def fun(arg: expensive_mod.SomeType) -> None: local_var: expensive_mod.AnotherType = other_fun() - The first type annotation must be enclosed in quotes, making it a - "forward reference", to hide the ``expensive_mod`` reference from the - interpreter runtime. Type annotations for local variables are not - evaluated, so the second annotation does not need to be enclosed in quotes. - - .. note:: - - If ``from __future__ import annotations`` is used, - annotations are not evaluated at function definition time. - Instead, they are stored as strings in ``__annotations__``. - This makes it unnecessary to use quotes around the annotation - (see :pep:`563`). + If you occasionally need to examine type annotations at runtime + which may contain undefined symbols, use + :meth:`annotationlib.get_annotations` with a ``format`` parameter + of :attr:`annotationlib.Format.STRING` or + :attr:`annotationlib.Format.FORWARDREF` to safely retrieve the + annotations without raising :exc:`NameError`. .. versionadded:: 3.5.2 diff --git a/Doc/library/unittest.mock.rst b/Doc/library/unittest.mock.rst index 27c169dde72780..091562cc9aef98 100644 --- a/Doc/library/unittest.mock.rst +++ b/Doc/library/unittest.mock.rst @@ -2654,9 +2654,9 @@ with any methods on the mock: .. code-block:: pycon - >>> mock.has_data() + >>> mock.header_items() - >>> mock.has_data.assret_called_with() # Intentional typo! + >>> mock.header_items.assret_called_with() # Intentional typo! Auto-speccing solves this problem. You can either pass ``autospec=True`` to :func:`patch` / :func:`patch.object` or use the :func:`create_autospec` function to create a diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 61022fe052ca80..f8028075047cc3 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -109,7 +109,7 @@ Here is a short script to test three string methods:: unittest.main() -A testcase is created by subclassing :class:`unittest.TestCase`. The three +A test case is created by subclassing :class:`unittest.TestCase`. The three individual tests are defined with methods whose names start with the letters ``test``. This naming convention informs the test runner about which methods represent tests. @@ -2556,7 +2556,7 @@ To add cleanup code that must be run even in the case of an exception, use .. versionadded:: 3.8 -.. classmethod:: enterModuleContext(cm) +.. function:: enterModuleContext(cm) Enter the supplied :term:`context manager`. If successful, also add its :meth:`~object.__exit__` method as a cleanup function by diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index b0f26724d0c78e..58bd111b5cc374 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -1121,7 +1121,7 @@ HTTPHandler Objects .. method:: HTTPHandler.http_open(req) Send an HTTP request, which can be either GET or POST, depending on - ``req.has_data()``. + ``req.data``. .. _https-handler-objects: @@ -1133,7 +1133,7 @@ HTTPSHandler Objects .. method:: HTTPSHandler.https_open(req) Send an HTTPS request, which can be either GET or POST, depending on - ``req.has_data()``. + ``req.data``. .. _file-handler-objects: diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst index 8cce6b98cbcdb3..6698e6d3f43c43 100644 --- a/Doc/library/uuid.rst +++ b/Doc/library/uuid.rst @@ -193,43 +193,52 @@ The :mod:`uuid` module defines the following functions: .. function:: uuid1(node=None, clock_seq=None) - Generate a UUID from a host ID, sequence number, and the current time. If *node* - is not given, :func:`getnode` is used to obtain the hardware address. If - *clock_seq* is given, it is used as the sequence number; otherwise a random - 14-bit sequence number is chosen. + Generate a UUID from a host ID, sequence number, and the current time + according to :rfc:`RFC 9562, §5.1 <9562#section-5.1>`. + + When *node* is not specified, :func:`getnode` is used to obtain the hardware + address as a 48-bit positive integer. When a sequence number *clock_seq* is + not specified, a pseudo-random 14-bit positive integer is generated. + + If *node* or *clock_seq* exceed their expected bit count, + only their least significant bits are kept. .. function:: uuid3(namespace, name) Generate a UUID based on the MD5 hash of a namespace identifier (which is a UUID) and a name (which is a :class:`bytes` object or a string - that will be encoded using UTF-8). + that will be encoded using UTF-8) + according to :rfc:`RFC 9562, §5.3 <9562#section-5.3>`. .. function:: uuid4() - Generate a random UUID. + Generate a random UUID in a cryptographically-secure method + according to :rfc:`RFC 9562, §5.4 <9562#section-5.4>`. .. function:: uuid5(namespace, name) Generate a UUID based on the SHA-1 hash of a namespace identifier (which is a UUID) and a name (which is a :class:`bytes` object or a string - that will be encoded using UTF-8). + that will be encoded using UTF-8) + according to :rfc:`RFC 9562, §5.5 <9562#section-5.5>`. .. function:: uuid6(node=None, clock_seq=None) Generate a UUID from a sequence number and the current time according to - :rfc:`9562`. + :rfc:`RFC 9562, §5.6 <9562#section-5.6>`. + This is an alternative to :func:`uuid1` to improve database locality. When *node* is not specified, :func:`getnode` is used to obtain the hardware address as a 48-bit positive integer. When a sequence number *clock_seq* is not specified, a pseudo-random 14-bit positive integer is generated. - If *node* or *clock_seq* exceed their expected bit count, only their least - significant bits are kept. + If *node* or *clock_seq* exceed their expected bit count, + only their least significant bits are kept. .. versionadded:: 3.14 @@ -257,6 +266,10 @@ The :mod:`uuid` module defines the following functions: non-specified arguments are substituted for a pseudo-random integer of appropriate size. + By default, *a*, *b* and *c* are not generated by a cryptographically + secure pseudo-random number generator (CSPRNG). Use :func:`uuid4` when + a UUID needs to be used in a security-sensitive context. + .. versionadded:: 3.14 diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst index 00a18751207e7a..9ffedf7366a7b8 100644 --- a/Doc/library/xml.dom.minidom.rst +++ b/Doc/library/xml.dom.minidom.rst @@ -19,11 +19,10 @@ not already proficient with the DOM should consider using the :mod:`xml.etree.ElementTree` module for their XML processing instead. -.. warning:: +.. note:: - The :mod:`xml.dom.minidom` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. DOM applications typically start by parsing some XML into a DOM. With diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index fd96765cbe3c96..8bceeecd46393e 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -19,11 +19,10 @@ responsible for explicitly pulling events from the stream, looping over those events until either processing is finished or an error condition occurs. -.. warning:: +.. note:: - The :mod:`xml.dom.pulldom` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. versionchanged:: 3.7.1 diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 1daf6628013bf0..00075ac2a23e6b 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -20,11 +20,10 @@ for parsing and creating XML data. The :mod:`!xml.etree.cElementTree` module is deprecated. -.. warning:: +.. note:: - The :mod:`xml.etree.ElementTree` module is not secure against - maliciously constructed data. If you need to parse untrusted or - unauthenticated data see :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. Tutorial -------- diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index d495995398959d..28465219a1ac18 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -15,12 +15,10 @@ XML Processing Modules Python's interfaces for processing XML are grouped in the ``xml`` package. -.. warning:: +.. note:: - The XML modules are not secure against erroneous or maliciously - constructed data. If you need to parse untrusted or - unauthenticated data see the :ref:`xml-vulnerabilities` and - :ref:`defusedxml-package` sections. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. It is important to note that modules in the :mod:`xml` package require that there be at least one SAX-compliant XML parser available. The Expat parser is @@ -47,46 +45,22 @@ The XML handling submodules are: * :mod:`xml.parsers.expat`: the Expat parser binding +.. _xml-security: .. _xml-vulnerabilities: -XML vulnerabilities -------------------- +XML security +------------ -The XML processing modules are not secure against maliciously constructed data. An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or circumvent firewalls. -The following table gives an overview of the known attacks and whether -the various modules are vulnerable to them. - -========================= ================== ================== ================== ================== ================== -kind sax etree minidom pulldom xmlrpc -========================= ================== ================== ================== ================== ================== -billion laughs **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) -quadratic blowup **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) **Vulnerable** (1) -external entity expansion Safe (5) Safe (2) Safe (3) Safe (5) Safe (4) -`DTD`_ retrieval Safe (5) Safe Safe Safe (5) Safe -decompression bomb Safe Safe Safe Safe **Vulnerable** -large tokens **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) **Vulnerable** (6) -========================= ================== ================== ================== ================== ================== - -1. Expat 2.4.1 and newer is not vulnerable to the "billion laughs" and - "quadratic blowup" vulnerabilities. Items still listed as vulnerable due to - potential reliance on system-provided libraries. Check - :const:`!pyexpat.EXPAT_VERSION`. -2. :mod:`xml.etree.ElementTree` doesn't expand external entities and raises a - :exc:`~xml.etree.ElementTree.ParseError` when an entity occurs. -3. :mod:`xml.dom.minidom` doesn't expand external entities and simply returns - the unexpanded entity verbatim. -4. :mod:`xmlrpc.client` doesn't expand external entities and omits them. -5. Since Python 3.7.1, external general entities are no longer processed by - default. -6. Expat 2.6.0 and newer is not vulnerable to denial of service - through quadratic runtime caused by parsing large tokens. - Items still listed as vulnerable due to - potential reliance on system-provided libraries. Check - :const:`!pyexpat.EXPAT_VERSION`. +Expat versions lower that 2.6.0 may be vulnerable to "billion laughs", +"quadratic blowup" and "large tokens". Python may be vulnerable if it uses such +older versions of Expat as a system-provided library. +Check :const:`!pyexpat.EXPAT_VERSION`. + +:mod:`xmlrpc` is **vulnerable** to the "decompression bomb" attack. billion laughs / exponential entity expansion @@ -103,16 +77,6 @@ quadratic blowup entity expansion efficient as the exponential case but it avoids triggering parser countermeasures that forbid deeply nested entities. -external entity expansion - Entity declarations can contain more than just text for replacement. They can - also point to external resources or local files. The XML - parser accesses the resource and embeds the content into the XML document. - -`DTD`_ retrieval - Some XML libraries like Python's :mod:`xml.dom.pulldom` retrieve document type - definitions from remote or local locations. The feature has similar - implications as the external entity expansion issue. - decompression bomb Decompression bombs (aka `ZIP bomb`_) apply to all XML libraries that can parse compressed XML streams such as gzipped HTTP streams or @@ -126,21 +90,5 @@ large tokens be used to cause denial of service in the application parsing XML. The issue is known as :cve:`2023-52425`. -The documentation for :pypi:`defusedxml` on PyPI has further information about -all known attack vectors with examples and references. - -.. _defusedxml-package: - -The :mod:`!defusedxml` Package ------------------------------- - -:pypi:`defusedxml` is a pure Python package with modified subclasses of all stdlib -XML parsers that prevent any potentially malicious operation. Use of this -package is recommended for any server code that parses untrusted XML data. The -package also ships with example exploits and extended documentation on more -XML exploits such as XPath injection. - - .. _Billion Laughs: https://en.wikipedia.org/wiki/Billion_laughs .. _ZIP bomb: https://en.wikipedia.org/wiki/Zip_bomb -.. _DTD: https://en.wikipedia.org/wiki/Document_type_definition diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst index c60e9e505f7544..5fa92645a440ce 100644 --- a/Doc/library/xml.sax.rst +++ b/Doc/library/xml.sax.rst @@ -18,11 +18,10 @@ SAX exceptions and the convenience functions which will be most used by users of the SAX API. -.. warning:: +.. note:: - The :mod:`xml.sax` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + If you need to parse untrusted or unauthenticated data, see + :ref:`xml-security`. .. versionchanged:: 3.7.1 diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index 654154cb43d6e5..547cb50be78a78 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -24,8 +24,8 @@ between conformable Python objects and XML on the wire. .. warning:: The :mod:`xmlrpc.client` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + constructed data. If you need to parse untrusted or unauthenticated data, + see :ref:`xml-security`. .. versionchanged:: 3.5 diff --git a/Doc/library/xmlrpc.server.rst b/Doc/library/xmlrpc.server.rst index 06169c7eca8b0c..2a8f6f8d5fc0de 100644 --- a/Doc/library/xmlrpc.server.rst +++ b/Doc/library/xmlrpc.server.rst @@ -20,8 +20,8 @@ servers written in Python. Servers can either be free standing, using .. warning:: The :mod:`xmlrpc.server` module is not secure against maliciously - constructed data. If you need to parse untrusted or unauthenticated data see - :ref:`xml-vulnerabilities`. + constructed data. If you need to parse untrusted or unauthenticated data, + see :ref:`xml-security`. .. include:: ../includes/wasm-notavail.rst diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 6a4fa67332e179..bf9136a2139112 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -129,14 +129,28 @@ The module defines the following items: .. versionadded:: 3.3 +.. data:: ZIP_ZSTANDARD + + The numeric constant for Zstandard compression. This requires the + :mod:`compression.zstd` module. + .. note:: - The ZIP file format specification has included support for bzip2 compression - since 2001, and for LZMA compression since 2006. However, some tools - (including older Python releases) do not support these compression - methods, and may either refuse to process the ZIP file altogether, - or fail to extract individual files. + In APPNOTE 6.3.7, the method ID ``20`` was assigned to Zstandard + compression. This was changed in APPNOTE 6.3.8 to method ID ``93`` to + avoid conflicts, with method ID ``20`` being deprecated. For + compatibility, the :mod:`!zipfile` module reads both method IDs but will + only write data with method ID ``93``. + + .. versionadded:: 3.14 + +.. note:: + The ZIP file format specification has included support for bzip2 compression + since 2001, for LZMA compression since 2006, and Zstandard compression since + 2020. However, some tools (including older Python releases) do not support + these compression methods, and may either refuse to process the ZIP file + altogether, or fail to extract individual files. .. seealso:: @@ -176,10 +190,11 @@ ZipFile Objects *compression* is the ZIP compression method to use when writing the archive, and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, - :const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized - values will cause :exc:`NotImplementedError` to be raised. If - :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified - but the corresponding module (:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not + :const:`ZIP_BZIP2`, :const:`ZIP_LZMA`, or :const:`ZIP_ZSTANDARD`; + unrecognized values will cause :exc:`NotImplementedError` to be raised. If + :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2`, :const:`ZIP_LZMA`, or + :const:`ZIP_ZSTANDARD` is specified but the corresponding module + (:mod:`zlib`, :mod:`bz2`, :mod:`lzma`, or :mod:`compression.zstd`) is not available, :exc:`RuntimeError` is raised. The default is :const:`ZIP_STORED`. If *allowZip64* is ``True`` (the default) zipfile will create ZIP files that @@ -194,6 +209,10 @@ ZipFile Objects (see :class:`zlib ` for more information). When using :const:`ZIP_BZIP2` integers ``1`` through ``9`` are accepted (see :class:`bz2 ` for more information). + When using :const:`ZIP_ZSTANDARD` integers ``-131072`` through ``22`` are + commonly accepted (see + :attr:`CompressionParameter.compression_level ` + for more on retrieving valid values and their meaning). The *strict_timestamps* argument, when set to ``False``, allows to zip files older than 1980-01-01 at the cost of setting the @@ -415,9 +434,10 @@ ZipFile Objects read or append. *pwd* is the password used for encrypted files as a :class:`bytes` object and, if specified, overrides the default password set with :meth:`setpassword`. Calling :meth:`read` on a ZipFile that uses a compression method other than - :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2` or - :const:`ZIP_LZMA` will raise a :exc:`NotImplementedError`. An error will also - be raised if the corresponding compression module is not available. + :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2`, + :const:`ZIP_LZMA`, or :const:`ZIP_ZSTANDARD` will raise a + :exc:`NotImplementedError`. An error will also be raised if the + corresponding compression module is not available. .. versionchanged:: 3.6 Calling :meth:`read` on a closed ZipFile will raise a :exc:`ValueError`. diff --git a/Doc/library/zoneinfo.rst b/Doc/library/zoneinfo.rst index a57f3b8b3e858c..53d8e2598ec1c7 100644 --- a/Doc/library/zoneinfo.rst +++ b/Doc/library/zoneinfo.rst @@ -195,7 +195,7 @@ The ``ZoneInfo`` class The ``ZoneInfo`` class has two alternate constructors: -.. classmethod:: ZoneInfo.from_file(fobj, /, key=None) +.. classmethod:: ZoneInfo.from_file(file_obj, /, key=None) Constructs a ``ZoneInfo`` object from a file-like object returning bytes (e.g. a file opened in binary mode or an :class:`io.BytesIO` object). @@ -325,7 +325,7 @@ The behavior of a ``ZoneInfo`` file depends on how it was constructed: >>> a is b False -3. ``ZoneInfo.from_file(fobj, /, key=None)``: When constructed from a file, the +3. ``ZoneInfo.from_file(file_obj, /, key=None)``: When constructed from a file, the ``ZoneInfo`` object raises an exception on pickling. If an end user wants to pickle a ``ZoneInfo`` constructed from a file, it is recommended that they use a wrapper type or a custom serialization function: either serializing by diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst index f36ed3e122f2bc..e95fa3a6424e23 100644 --- a/Doc/reference/compound_stmts.rst +++ b/Doc/reference/compound_stmts.rst @@ -154,15 +154,15 @@ The :keyword:`for` statement is used to iterate over the elements of a sequence (such as a string, tuple or list) or other iterable object: .. productionlist:: python-grammar - for_stmt: "for" `target_list` "in" `starred_list` ":" `suite` + for_stmt: "for" `target_list` "in" `starred_expression_list` ":" `suite` : ["else" ":" `suite`] -The ``starred_list`` expression is evaluated once; it should yield an -:term:`iterable` object. An :term:`iterator` is created for that iterable. -The first item provided -by the iterator is then assigned to the target list using the standard -rules for assignments (see :ref:`assignment`), and the suite is executed. This -repeats for each item provided by the iterator. When the iterator is exhausted, +The :token:`~python-grammar:starred_expression_list` expression is evaluated +once; it should yield an :term:`iterable` object. An :term:`iterator` is +created for that iterable. The first item provided by the iterator is then +assigned to the target list using the standard rules for assignments +(see :ref:`assignment`), and the suite is executed. This repeats for each +item provided by the iterator. When the iterator is exhausted, the suite in the :keyword:`!else` clause, if present, is executed, and the loop terminates. @@ -1885,7 +1885,7 @@ expressions. The presence of annotations does not change the runtime semantics o the code, except if some mechanism is used that introspects and uses the annotations (such as :mod:`dataclasses` or :func:`functools.singledispatch`). -By default, annotations are lazily evaluated in a :ref:`annotation scope `. +By default, annotations are lazily evaluated in an :ref:`annotation scope `. This means that they are not evaluated when the code containing the annotation is evaluated. Instead, the interpreter saves information that can be used to evaluate the annotation later if requested. The :mod:`annotationlib` module provides tools for evaluating annotations. @@ -1898,6 +1898,12 @@ all annotations are instead stored as strings:: >>> f.__annotations__ {'param': 'annotation'} +This future statement will be deprecated and removed in a future version of Python, +but not before Python 3.13 reaches its end of life (see :pep:`749`). +When it is used, introspection tools like +:func:`annotationlib.get_annotations` and :func:`typing.get_type_hints` are +less likely to be able to resolve annotations at runtime. + .. rubric:: Footnotes diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 005a768f684e2c..4a099e81daccb3 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -262,6 +262,8 @@ Booleans (:class:`bool`) a string, the strings ``"False"`` or ``"True"`` are returned, respectively. +.. _datamodel-float: + :class:`numbers.Real` (:class:`float`) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1228,10 +1230,22 @@ Special attributes :attr:`__annotations__ attributes `. For best practices on working with :attr:`~object.__annotations__`, - please see :mod:`annotationlib`. Where possible, use + please see :mod:`annotationlib`. Use :func:`annotationlib.get_annotations` instead of accessing this attribute directly. + .. warning:: + + Accessing the :attr:`!__annotations__` attribute directly + on a class object may return annotations for the wrong class, specifically + in certain cases where the class, its base class, or a metaclass + is defined under ``from __future__ import annotations``. + See :pep:`749 <749#pep749-metaclasses>` for details. + + This attribute does not exist on certain builtin classes. On + user-defined classes without ``__annotations__``, it is an + empty dictionary. + .. versionchanged:: 3.14 Annotations are now :ref:`lazily evaluated `. See :pep:`649`. diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 8837344e5ddca1..24544a055c3ed2 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -134,8 +134,7 @@ Literals Python supports string and bytes literals and various numeric literals: .. productionlist:: python-grammar - literal: `stringliteral` | `bytesliteral` - : | `integer` | `floatnumber` | `imagnumber` + literal: `stringliteral` | `bytesliteral` | `NUMBER` Evaluation of a literal yields an object of the given type (string, bytes, integer, floating-point number, complex number) with the given value. The value @@ -406,8 +405,9 @@ brackets or curly braces. Variables used in the generator expression are evaluated lazily when the :meth:`~generator.__next__` method is called for the generator object (in the same fashion as normal generators). However, the iterable expression in the -leftmost :keyword:`!for` clause is immediately evaluated, so that an error -produced by it will be emitted at the point where the generator expression +leftmost :keyword:`!for` clause is immediately evaluated, and the +:term:`iterator` is immediately created for that iterable, so that an error +produced while creating the iterator will be emitted at the point where the generator expression is defined, rather than at the point where the first value is retrieved. Subsequent :keyword:`!for` clauses and any filter condition in the leftmost :keyword:`!for` clause cannot be evaluated in the enclosing scope as they may @@ -625,8 +625,10 @@ is already executing raises a :exc:`ValueError` exception. .. method:: generator.close() - Raises a :exc:`GeneratorExit` at the point where the generator function was - paused. If the generator function catches the exception and returns a + Raises a :exc:`GeneratorExit` exception at the point where the generator + function was paused (equivalent to calling ``throw(GeneratorExit)``). + The exception is raised by the yield expression where the generator was paused. + If the generator function catches the exception and returns a value, this value is returned from :meth:`close`. If the generator function is already closed, or raises :exc:`GeneratorExit` (by not catching the exception), :meth:`close` returns :const:`None`. If the generator yields a @@ -1023,7 +1025,7 @@ series of :term:`arguments `: : ["," `keywords_arguments`] : | `starred_and_keywords` ["," `keywords_arguments`] : | `keywords_arguments` - positional_arguments: positional_item ("," positional_item)* + positional_arguments: `positional_item` ("," `positional_item`)* positional_item: `assignment_expression` | "*" `expression` starred_and_keywords: ("*" `expression` | `keyword_item`) : ("," "*" `expression` | "," `keyword_item`)* @@ -1928,7 +1930,7 @@ Expression lists single: , (comma); expression list .. productionlist:: python-grammar - starred_expression: ["*"] `or_expr` + starred_expression: "*" `or_expr` | `expression` flexible_expression: `assignment_expression` | `starred_expression` flexible_expression_list: `flexible_expression` ("," `flexible_expression`)* [","] starred_expression_list: `starred_expression` ("," `starred_expression`)* [","] diff --git a/Doc/reference/grammar.rst b/Doc/reference/grammar.rst index b9cca4444c9141..55c148801d8559 100644 --- a/Doc/reference/grammar.rst +++ b/Doc/reference/grammar.rst @@ -8,15 +8,15 @@ used to generate the CPython parser (see :source:`Grammar/python.gram`). The version here omits details related to code generation and error recovery. -The notation is a mixture of `EBNF -`_ -and `PEG `_. -In particular, ``&`` followed by a symbol, token or parenthesized -group indicates a positive lookahead (i.e., is required to match but -not consumed), while ``!`` indicates a negative lookahead (i.e., is -required *not* to match). We use the ``|`` separator to mean PEG's -"ordered choice" (written as ``/`` in traditional PEG grammars). See -:pep:`617` for more details on the grammar's syntax. +The notation used here is the same as in the preceding docs, +and is described in the :ref:`notation ` section, +except for a few extra complications: + +* ``&e``: a positive lookahead (that is, ``e`` is required to match but + not consumed) +* ``!e``: a negative lookahead (that is, ``e`` is required *not* to match) +* ``~`` ("cut"): commit to the current alternative and fail the rule + even if this fails to parse .. literalinclude:: ../../Grammar/python.gram :language: peg diff --git a/Doc/reference/introduction.rst b/Doc/reference/introduction.rst index b7b70e6be5a5b7..444acac374a690 100644 --- a/Doc/reference/introduction.rst +++ b/Doc/reference/introduction.rst @@ -90,44 +90,122 @@ Notation .. index:: BNF, grammar, syntax, notation -The descriptions of lexical analysis and syntax use a modified -`Backus–Naur form (BNF) `_ grammar -notation. This uses the following style of definition: - -.. productionlist:: notation - name: `lc_letter` (`lc_letter` | "_")* - lc_letter: "a"..."z" - -The first line says that a ``name`` is an ``lc_letter`` followed by a sequence -of zero or more ``lc_letter``\ s and underscores. An ``lc_letter`` in turn is -any of the single characters ``'a'`` through ``'z'``. (This rule is actually -adhered to for the names defined in lexical and grammar rules in this document.) - -Each rule begins with a name (which is the name defined by the rule) and -``::=``. A vertical bar (``|``) is used to separate alternatives; it is the -least binding operator in this notation. A star (``*``) means zero or more -repetitions of the preceding item; likewise, a plus (``+``) means one or more -repetitions, and a phrase enclosed in square brackets (``[ ]``) means zero or -one occurrences (in other words, the enclosed phrase is optional). The ``*`` -and ``+`` operators bind as tightly as possible; parentheses are used for -grouping. Literal strings are enclosed in quotes. White space is only -meaningful to separate tokens. Rules are normally contained on a single line; -rules with many alternatives may be formatted alternatively with each line after -the first beginning with a vertical bar. - -.. index:: lexical definitions, ASCII - -In lexical definitions (as the example above), two more conventions are used: -Two literal characters separated by three dots mean a choice of any single -character in the given (inclusive) range of ASCII characters. A phrase between -angular brackets (``<...>``) gives an informal description of the symbol -defined; e.g., this could be used to describe the notion of 'control character' -if needed. - -Even though the notation used is almost the same, there is a big difference -between the meaning of lexical and syntactic definitions: a lexical definition -operates on the individual characters of the input source, while a syntax -definition operates on the stream of tokens generated by the lexical analysis. -All uses of BNF in the next chapter ("Lexical Analysis") are lexical -definitions; uses in subsequent chapters are syntactic definitions. - +The descriptions of lexical analysis and syntax use a grammar notation that +is a mixture of +`EBNF `_ +and `PEG `_. +For example: + +.. grammar-snippet:: + :group: notation + + name: `letter` (`letter` | `digit` | "_")* + letter: "a"..."z" | "A"..."Z" + digit: "0"..."9" + +In this example, the first line says that a ``name`` is a ``letter`` followed +by a sequence of zero or more ``letter``\ s, ``digit``\ s, and underscores. +A ``letter`` in turn is any of the single characters ``'a'`` through +``'z'`` and ``A`` through ``Z``; a ``digit`` is a single character from ``0`` +to ``9``. + +Each rule begins with a name (which identifies the rule that's being defined) +followed by a colon, ``:``. +The definition to the right of the colon uses the following syntax elements: + +* ``name``: A name refers to another rule. + Where possible, it is a link to the rule's definition. + + * ``TOKEN``: An uppercase name refers to a :term:`token`. + For the purposes of grammar definitions, tokens are the same as rules. + +* ``"text"``, ``'text'``: Text in single or double quotes must match literally + (without the quotes). The type of quote is chosen according to the meaning + of ``text``: + + * ``'if'``: A name in single quotes denotes a :ref:`keyword `. + * ``"case"``: A name in double quotes denotes a + :ref:`soft-keyword `. + * ``'@'``: A non-letter symbol in single quotes denotes an + :py:data:`~token.OP` token, that is, a :ref:`delimiter ` or + :ref:`operator `. + +* ``e1 e2``: Items separated only by whitespace denote a sequence. + Here, ``e1`` must be followed by ``e2``. +* ``e1 | e2``: A vertical bar is used to separate alternatives. + It denotes PEG's "ordered choice": if ``e1`` matches, ``e2`` is + not considered. + In traditional PEG grammars, this is written as a slash, ``/``, rather than + a vertical bar. + See :pep:`617` for more background and details. +* ``e*``: A star means zero or more repetitions of the preceding item. +* ``e+``: Likewise, a plus means one or more repetitions. +* ``[e]``: A phrase enclosed in square brackets means zero or + one occurrences. In other words, the enclosed phrase is optional. +* ``e?``: A question mark has exactly the same meaning as square brackets: + the preceding item is optional. +* ``(e)``: Parentheses are used for grouping. +* ``"a"..."z"``: Two literal characters separated by three dots mean a choice + of any single character in the given (inclusive) range of ASCII characters. + This notation is only used in + :ref:`lexical definitions `. +* ``<...>``: A phrase between angular brackets gives an informal description + of the matched symbol (for example, ````), + or an abbreviation that is defined in nearby text (for example, ````). + This notation is only used in + :ref:`lexical definitions `. + +The unary operators (``*``, ``+``, ``?``) bind as tightly as possible; +the vertical bar (``|``) binds most loosely. + +White space is only meaningful to separate tokens. + +Rules are normally contained on a single line, but rules that are too long +may be wrapped: + +.. grammar-snippet:: + :group: notation + + literal: stringliteral | bytesliteral + | integer | floatnumber | imagnumber + +Alternatively, rules may be formatted with the first line ending at the colon, +and each alternative beginning with a vertical bar on a new line. +For example: + + +.. grammar-snippet:: + :group: notation-alt + + literal: + | stringliteral + | bytesliteral + | integer + | floatnumber + | imagnumber + +This does *not* mean that there is an empty first alternative. + +.. index:: lexical definitions + +.. _notation-lexical-vs-syntactic: + +Lexical and Syntactic definitions +--------------------------------- + +There is some difference between *lexical* and *syntactic* analysis: +the :term:`lexical analyzer` operates on the individual characters of the +input source, while the *parser* (syntactic analyzer) operates on the stream +of :term:`tokens ` generated by the lexical analysis. +However, in some cases the exact boundary between the two phases is a +CPython implementation detail. + +The practical difference between the two is that in *lexical* definitions, +all whitespace is significant. +The lexical analyzer :ref:`discards ` all whitespace that is not +converted to tokens like :data:`token.INDENT` or :data:`~token.NEWLINE`. +*Syntactic* definitions then use these tokens, rather than source characters. + +This documentation uses the same BNF grammar for both styles of definitions. +All uses of BNF in the next chapter (:ref:`lexical`) are lexical definitions; +uses in subsequent chapters are syntactic definitions. diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index ff801a7d4fc494..567c70111c20ec 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -35,11 +35,11 @@ Logical lines .. index:: logical line, physical line, line joining, NEWLINE token -The end of a logical line is represented by the token NEWLINE. Statements -cannot cross logical line boundaries except where NEWLINE is allowed by the -syntax (e.g., between statements in compound statements). A logical line is -constructed from one or more *physical lines* by following the explicit or -implicit *line joining* rules. +The end of a logical line is represented by the token :data:`~token.NEWLINE`. +Statements cannot cross logical line boundaries except where :data:`!NEWLINE` +is allowed by the syntax (e.g., between statements in compound statements). +A logical line is constructed from one or more *physical lines* by following +the explicit or implicit *line joining* rules. .. _physical-lines: @@ -99,7 +99,7 @@ which is recognized by Bram Moolenaar's VIM. If no encoding declaration is found, the default encoding is UTF-8. If the implicit or explicit encoding of a file is UTF-8, an initial UTF-8 byte-order -mark (b'\xef\xbb\xbf') is ignored rather than being a syntax error. +mark (``b'\xef\xbb\xbf'``) is ignored rather than being a syntax error. If an encoding is declared, the encoding name must be recognized by Python (see :ref:`standard-encodings`). The @@ -160,11 +160,12 @@ Blank lines .. index:: single: blank line A logical line that contains only spaces, tabs, formfeeds and possibly a -comment, is ignored (i.e., no NEWLINE token is generated). During interactive -input of statements, handling of a blank line may differ depending on the -implementation of the read-eval-print loop. In the standard interactive -interpreter, an entirely blank logical line (i.e. one containing not even -whitespace or a comment) terminates a multi-line statement. +comment, is ignored (i.e., no :data:`~token.NEWLINE` token is generated). +During interactive input of statements, handling of a blank line may differ +depending on the implementation of the read-eval-print loop. +In the standard interactive interpreter, an entirely blank logical line (that +is, one containing not even whitespace or a comment) terminates a multi-line +statement. .. _indentation: @@ -202,19 +203,20 @@ the space count to zero). .. index:: INDENT token, DEDENT token -The indentation levels of consecutive lines are used to generate INDENT and -DEDENT tokens, using a stack, as follows. +The indentation levels of consecutive lines are used to generate +:data:`~token.INDENT` and :data:`~token.DEDENT` tokens, using a stack, +as follows. Before the first line of the file is read, a single zero is pushed on the stack; this will never be popped off again. The numbers pushed on the stack will always be strictly increasing from bottom to top. At the beginning of each logical line, the line's indentation level is compared to the top of the stack. If it is equal, nothing happens. If it is larger, it is pushed on the stack, and -one INDENT token is generated. If it is smaller, it *must* be one of the +one :data:`!INDENT` token is generated. If it is smaller, it *must* be one of the numbers occurring on the stack; all numbers on the stack that are larger are -popped off, and for each number popped off a DEDENT token is generated. At the -end of the file, a DEDENT token is generated for each number remaining on the -stack that is larger than zero. +popped off, and for each number popped off a :data:`!DEDENT` token is generated. +At the end of the file, a :data:`!DEDENT` token is generated for each number +remaining on the stack that is larger than zero. Here is an example of a correctly (though confusingly) indented piece of Python code:: @@ -254,8 +256,18 @@ Whitespace between tokens Except at the beginning of a logical line or in string literals, the whitespace characters space, tab and formfeed can be used interchangeably to separate tokens. Whitespace is needed between two tokens only if their concatenation -could otherwise be interpreted as a different token (e.g., ab is one token, but -a b is two tokens). +could otherwise be interpreted as a different token. For example, ``ab`` is one +token, but ``a b`` is two tokens. However, ``+a`` and ``+ a`` both produce +two tokens, ``+`` and ``a``, as ``+a`` is not a valid token. + + +.. _endmarker-token: + +End marker +---------- + +At the end of non-interactive input, the lexical analyzer generates an +:data:`~token.ENDMARKER` token. .. _other-tokens: @@ -263,67 +275,94 @@ a b is two tokens). Other tokens ============ -Besides NEWLINE, INDENT and DEDENT, the following categories of tokens exist: -*identifiers*, *keywords*, *literals*, *operators*, and *delimiters*. Whitespace -characters (other than line terminators, discussed earlier) are not tokens, but -serve to delimit tokens. Where ambiguity exists, a token comprises the longest -possible string that forms a legal token, when read from left to right. +Besides :data:`~token.NEWLINE`, :data:`~token.INDENT` and :data:`~token.DEDENT`, +the following categories of tokens exist: +*identifiers* and *keywords* (:data:`~token.NAME`), *literals* (such as +:data:`~token.NUMBER` and :data:`~token.STRING`), and other symbols +(*operators* and *delimiters*, :data:`~token.OP`). +Whitespace characters (other than logical line terminators, discussed earlier) +are not tokens, but serve to delimit tokens. +Where ambiguity exists, a token comprises the longest possible string that +forms a legal token, when read from left to right. .. _identifiers: -Identifiers and keywords -======================== +Names (identifiers and keywords) +================================ .. index:: identifier, name -Identifiers (also referred to as *names*) are described by the following lexical -definitions. +:data:`~token.NAME` tokens represent *identifiers*, *keywords*, and +*soft keywords*. -The syntax of identifiers in Python is based on the Unicode standard annex -UAX-31, with elaboration and changes as defined below; see also :pep:`3131` for -further details. - -Within the ASCII range (U+0001..U+007F), the valid characters for identifiers -include the uppercase and lowercase letters ``A`` through -``Z``, the underscore ``_`` and, except for the first character, the digits +Within the ASCII range (U+0001..U+007F), the valid characters for names +include the uppercase and lowercase letters (``A-Z`` and ``a-z``), +the underscore ``_`` and, except for the first character, the digits ``0`` through ``9``. -Python 3.0 introduced additional characters from outside the ASCII range (see -:pep:`3131`). For these characters, the classification uses the version of the -Unicode Character Database as included in the :mod:`unicodedata` module. -Identifiers are unlimited in length. Case is significant. +Names must contain at least one character, but have no upper length limit. +Case is significant. -.. productionlist:: python-grammar - identifier: `xid_start` `xid_continue`* - id_start: - id_continue: - xid_start: - xid_continue: - -The Unicode category codes mentioned above stand for: - -* *Lu* - uppercase letters -* *Ll* - lowercase letters -* *Lt* - titlecase letters -* *Lm* - modifier letters -* *Lo* - other letters -* *Nl* - letter numbers -* *Mn* - nonspacing marks -* *Mc* - spacing combining marks -* *Nd* - decimal numbers -* *Pc* - connector punctuations -* *Other_ID_Start* - explicit list of characters in `PropList.txt - `_ to support backwards - compatibility -* *Other_ID_Continue* - likewise - -All identifiers are converted into the normal form NFKC while parsing; comparison -of identifiers is based on NFKC. - -A non-normative HTML file listing all valid identifier characters for Unicode -16.0.0 can be found at -https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +Besides ``A-Z``, ``a-z``, ``_`` and ``0-9``, names can also use "letter-like" +and "number-like" characters from outside the ASCII range, as detailed below. + +All identifiers are converted into the `normalization form`_ NFKC while +parsing; comparison of identifiers is based on NFKC. + +Formally, the first character of a normalized identifier must belong to the +set ``id_start``, which is the union of: + +* Unicode category ```` - uppercase letters (includes ``A`` to ``Z``) +* Unicode category ```` - lowercase letters (includes ``a`` to ``z``) +* Unicode category ```` - titlecase letters +* Unicode category ```` - modifier letters +* Unicode category ```` - other letters +* Unicode category ```` - letter numbers +* {``"_"``} - the underscore +* ```` - an explicit set of characters in `PropList.txt`_ + to support backwards compatibility + +The remaining characters must belong to the set ``id_continue``, which is the +union of: + +* all characters in ``id_start`` +* Unicode category ```` - decimal numbers (includes ``0`` to ``9``) +* Unicode category ```` - connector punctuations +* Unicode category ```` - nonspacing marks +* Unicode category ```` - spacing combining marks +* ```` - another explicit set of characters in + `PropList.txt`_ to support backwards compatibility + +Unicode categories use the version of the Unicode Character Database as +included in the :mod:`unicodedata` module. + +These sets are based on the Unicode standard annex `UAX-31`_. +See also :pep:`3131` for further details. + +Even more formally, names are described by the following lexical definitions: + +.. grammar-snippet:: + :group: python-grammar + + NAME: `xid_start` `xid_continue`* + id_start: | | | | | | "_" | + id_continue: `id_start` | | | | | + xid_start: + xid_continue: + identifier: <`NAME`, except keywords> + +A non-normative listing of all valid identifier characters as defined by +Unicode is available in the `DerivedCoreProperties.txt`_ file in the Unicode +Character Database. + + +.. _UAX-31: https://www.unicode.org/reports/tr31/ +.. _PropList.txt: https://www.unicode.org/Public/16.0.0/ucd/PropList.txt +.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms .. _keywords: @@ -335,7 +374,7 @@ Keywords single: keyword single: reserved word -The following identifiers are used as reserved words, or *keywords* of the +The following names are used as reserved words, or *keywords* of the language, and cannot be used as ordinary identifiers. They must be spelled exactly as written here: @@ -359,18 +398,19 @@ Soft Keywords .. versionadded:: 3.10 -Some identifiers are only reserved under specific contexts. These are known as -*soft keywords*. The identifiers ``match``, ``case``, ``type`` and ``_`` can -syntactically act as keywords in certain contexts, +Some names are only reserved under specific contexts. These are known as +*soft keywords*: + +- ``match``, ``case``, and ``_``, when used in the :keyword:`match` statement. +- ``type``, when used in the :keyword:`type` statement. + +These syntactically act as keywords in their specific contexts, but this distinction is done at the parser level, not when tokenizing. As soft keywords, their use in the grammar is possible while still preserving compatibility with existing code that uses these names as identifier names. -``match``, ``case``, and ``_`` are used in the :keyword:`match` statement. -``type`` is used in the :keyword:`type` statement. - .. versionchanged:: 3.12 ``type`` is now a soft keyword. @@ -449,8 +489,9 @@ String literals are described by the following lexical definitions: .. productionlist:: python-grammar stringliteral: [`stringprefix`](`shortstring` | `longstring`) - stringprefix: "r" | "u" | "R" | "U" | "f" | "F" + stringprefix: "r" | "u" | "R" | "U" | "f" | "F" | "t" | "T" : | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" + : | "tr" | "Tr" | "tR" | "TR" | "rt" | "rT" | "Rt" | "RT" shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"' longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""' shortstringitem: `shortstringchar` | `stringescapeseq` @@ -881,11 +922,20 @@ Numeric literals floating-point literal, hexadecimal literal octal literal, binary literal, decimal literal, imaginary literal, complex literal -There are three types of numeric literals: integers, floating-point numbers, and -imaginary numbers. There are no complex literals (complex numbers can be formed -by adding a real number and an imaginary number). +:data:`~token.NUMBER` tokens represent numeric literals, of which there are +three types: integers, floating-point numbers, and imaginary numbers. + +.. grammar-snippet:: + :group: python-grammar + + NUMBER: `integer` | `floatnumber` | `imagnumber` -Note that numeric literals do not include a sign; a phrase like ``-1`` is +The numeric value of a numeric literal is the same as if it were passed as a +string to the :class:`int`, :class:`float` or :class:`complex` class +constructor, respectively. +Note that not all valid inputs for those constructors are also valid literals. + +Numeric literals do not include a sign; a phrase like ``-1`` is actually an expression composed of the unary operator '``-``' and the literal ``1``. @@ -899,38 +949,67 @@ actually an expression composed of the unary operator '``-``' and the literal .. _integers: Integer literals ----------------- +^^^^^^^^^^^^^^^^ -Integer literals are described by the following lexical definitions: +Integer literals denote whole numbers. For example:: -.. productionlist:: python-grammar - integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger` - decinteger: `nonzerodigit` (["_"] `digit`)* | "0"+ (["_"] "0")* - bininteger: "0" ("b" | "B") (["_"] `bindigit`)+ - octinteger: "0" ("o" | "O") (["_"] `octdigit`)+ - hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+ - nonzerodigit: "1"..."9" - digit: "0"..."9" - bindigit: "0" | "1" - octdigit: "0"..."7" - hexdigit: `digit` | "a"..."f" | "A"..."F" + 7 + 3 + 2147483647 There is no limit for the length of integer literals apart from what can be -stored in available memory. +stored in available memory:: + + 7922816251426433759354395033679228162514264337593543950336 + +Underscores can be used to group digits for enhanced readability, +and are ignored for determining the numeric value of the literal. +For example, the following literals are equivalent:: + + 100_000_000_000 + 100000000000 + 1_00_00_00_00_000 -Underscores are ignored for determining the numeric value of the literal. They -can be used to group digits for enhanced readability. One underscore can occur -between digits, and after base specifiers like ``0x``. +Underscores can only occur between digits. +For example, ``_123``, ``321_``, and ``123__321`` are *not* valid literals. -Note that leading zeros in a non-zero decimal number are not allowed. This is -for disambiguation with C-style octal literals, which Python used before version -3.0. +Integers can be specified in binary (base 2), octal (base 8), or hexadecimal +(base 16) using the prefixes ``0b``, ``0o`` and ``0x``, respectively. +Hexadecimal digits 10 through 15 are represented by letters ``A``-``F``, +case-insensitive. For example:: -Some examples of integer literals:: + 0b100110111 + 0b_1110_0101 + 0o177 + 0o377 + 0xdeadbeef + 0xDead_Beef - 7 2147483647 0o177 0b100110111 - 3 79228162514264337593543950336 0o377 0xdeadbeef - 100_000_000_000 0b_1110_0101 +An underscore can follow the base specifier. +For example, ``0x_1f`` is a valid literal, but ``0_x1f`` and ``0x__1f`` are +not. + +Leading zeros in a non-zero decimal number are not allowed. +For example, ``0123`` is not a valid literal. +This is for disambiguation with C-style octal literals, which Python used +before version 3.0. + +Formally, integer literals are described by the following lexical definitions: + +.. grammar-snippet:: + :group: python-grammar + + integer: `decinteger` | `bininteger` | `octinteger` | `hexinteger` | `zerointeger` + decinteger: `nonzerodigit` (["_"] `digit`)* + bininteger: "0" ("b" | "B") (["_"] `bindigit`)+ + octinteger: "0" ("o" | "O") (["_"] `octdigit`)+ + hexinteger: "0" ("x" | "X") (["_"] `hexdigit`)+ + zerointeger: "0"+ (["_"] "0")* + nonzerodigit: "1"..."9" + digit: "0"..."9" + bindigit: "0" | "1" + octdigit: "0"..."7" + hexdigit: `digit` | "a"..."f" | "A"..."F" .. versionchanged:: 3.6 Underscores are now allowed for grouping purposes in literals. @@ -943,26 +1022,58 @@ Some examples of integer literals:: .. _floating: Floating-point literals ------------------------ +^^^^^^^^^^^^^^^^^^^^^^^ -Floating-point literals are described by the following lexical definitions: +Floating-point (float) literals, such as ``3.14`` or ``1.5``, denote +:ref:`approximations of real numbers `. -.. productionlist:: python-grammar - floatnumber: `pointfloat` | `exponentfloat` - pointfloat: [`digitpart`] `fraction` | `digitpart` "." - exponentfloat: (`digitpart` | `pointfloat`) `exponent` - digitpart: `digit` (["_"] `digit`)* - fraction: "." `digitpart` - exponent: ("e" | "E") ["+" | "-"] `digitpart` +They consist of *integer* and *fraction* parts, each composed of decimal digits. +The parts are separated by a decimal point, ``.``:: + + 2.71828 + 4.0 + +Unlike in integer literals, leading zeros are allowed in the numeric parts. +For example, ``077.010`` is legal, and denotes the same number as ``77.10``. + +As in integer literals, single underscores may occur between digits to help +readability:: + + 96_485.332_123 + 3.14_15_93 + +Either of these parts, but not both, can be empty. For example:: + + 10. # (equivalent to 10.0) + .001 # (equivalent to 0.001) + +Optionally, the integer and fraction may be followed by an *exponent*: +the letter ``e`` or ``E``, followed by an optional sign, ``+`` or ``-``, +and a number in the same format as the integer and fraction parts. +The ``e`` or ``E`` represents "times ten raised to the power of":: + + 1.0e3 # (represents 1.0×10³, or 1000.0) + 1.166e-5 # (represents 1.166×10⁻⁵, or 0.00001166) + 6.02214076e+23 # (represents 6.02214076×10²³, or 602214076000000000000000.) + +In floats with only integer and exponent parts, the decimal point may be +omitted:: -Note that the integer and exponent parts are always interpreted using radix 10. -For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The -allowed range of floating-point literals is implementation-dependent. As in -integer literals, underscores are supported for digit grouping. + 1e3 # (equivalent to 1.e3 and 1.0e3) + 0e0 # (equivalent to 0.) -Some examples of floating-point literals:: +Formally, floating-point literals are described by the following +lexical definitions: - 3.14 10. .001 1e100 3.14e-10 0e0 3.14_15_93 +.. grammar-snippet:: + :group: python-grammar + + floatnumber: + | `digitpart` "." [`digitpart`] [`exponent`] + | "." `digitpart` [`exponent`] + | `digitpart` `exponent` + digitpart: `digit` (["_"] `digit`)* + exponent: ("e" | "E") ["+" | "-"] `digitpart` .. versionchanged:: 3.6 Underscores are now allowed for grouping purposes in literals. @@ -973,20 +1084,62 @@ Some examples of floating-point literals:: .. _imaginary: Imaginary literals ------------------- +^^^^^^^^^^^^^^^^^^ -Imaginary literals are described by the following lexical definitions: +Python has :ref:`complex number ` objects, but no complex +literals. +Instead, *imaginary literals* denote complex numbers with a zero +real part. -.. productionlist:: python-grammar - imagnumber: (`floatnumber` | `digitpart`) ("j" | "J") +For example, in math, the complex number 3+4.2\ *i* is written +as the real number 3 added to the imaginary number 4.2\ *i*. +Python uses a similar syntax, except the imaginary unit is written as ``j`` +rather than *i*:: + + 3+4.2j + +This is an expression composed +of the :ref:`integer literal ` ``3``, +the :ref:`operator ` '``+``', +and the :ref:`imaginary literal ` ``4.2j``. +Since these are three separate tokens, whitespace is allowed between them:: + + 3 + 4.2j + +No whitespace is allowed *within* each token. +In particular, the ``j`` suffix, may not be separated from the number +before it. -An imaginary literal yields a complex number with a real part of 0.0. Complex -numbers are represented as a pair of floating-point numbers and have the same -restrictions on their range. To create a complex number with a nonzero real -part, add a floating-point number to it, e.g., ``(3+4j)``. Some examples of -imaginary literals:: +The number before the ``j`` has the same syntax as a floating-point literal. +Thus, the following are valid imaginary literals:: - 3.14j 10.j 10j .001j 1e100j 3.14e-10j 3.14_15_93j + 4.2j + 3.14j + 10.j + .001j + 1e100j + 3.14e-10j + 3.14_15_93j + +Unlike in a floating-point literal the decimal point can be omitted if the +imaginary number only has an integer part. +The number is still evaluated as a floating-point number, not an integer:: + + 10j + 0j + 1000000000000000000000000j # equivalent to 1e+24j + +The ``j`` suffix is case-insensitive. +That means you can use ``J`` instead:: + + 3.14J # equivalent to 3.14j + +Formally, imaginary literals are described by the following lexical definition: + +.. grammar-snippet:: + :group: python-grammar + + imagnumber: (`floatnumber` | `digitpart`) ("j" | "J") .. _operators: diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 926d2cd42bdf02..4f5396857f3024 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -14,11 +14,8 @@ Doc/c-api/typeobj.rst Doc/extending/extending.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst -Doc/library/decimal.rst Doc/library/email.charset.rst -Doc/library/email.compat32-message.rst Doc/library/email.parser.rst -Doc/library/exceptions.rst Doc/library/functools.rst Doc/library/http.cookiejar.rst Doc/library/http.server.rst @@ -74,6 +71,4 @@ Doc/whatsnew/3.5.rst Doc/whatsnew/3.6.rst Doc/whatsnew/3.7.rst Doc/whatsnew/3.8.rst -Doc/whatsnew/3.9.rst Doc/whatsnew/3.10.rst -Doc/whatsnew/3.11.rst diff --git a/Doc/tools/extensions/audit_events.py b/Doc/tools/extensions/audit_events.py index 23d82c0f4414bf..385a58b2145446 100644 --- a/Doc/tools/extensions/audit_events.py +++ b/Doc/tools/extensions/audit_events.py @@ -13,7 +13,7 @@ from sphinx.util.docutils import SphinxDirective if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Iterator, Set from sphinx.application import Sphinx from sphinx.builders import Builder @@ -33,7 +33,7 @@ class AuditEvents: def __init__(self) -> None: self.events: dict[str, list[str]] = {} - self.sources: dict[str, list[tuple[str, str]]] = {} + self.sources: dict[str, set[tuple[str, str]]] = {} def __iter__(self) -> Iterator[tuple[str, list[str], tuple[str, str]]]: for name, args in self.events.items(): @@ -47,7 +47,7 @@ def add_event( self._check_args_match(name, args) else: self.events[name] = args - self.sources.setdefault(name, []).append(source) + self.sources.setdefault(name, set()).add(source) def _check_args_match(self, name: str, args: list[str]) -> None: current_args = self.events[name] @@ -69,11 +69,11 @@ def _check_args_match(self, name: str, args: list[str]) -> None: return def id_for(self, name) -> str: - source_count = len(self.sources.get(name, ())) + source_count = len(self.sources.get(name, set())) name_clean = re.sub(r"\W", "_", name) return f"audit_event_{name_clean}_{source_count}" - def rows(self) -> Iterator[tuple[str, list[str], list[tuple[str, str]]]]: + def rows(self) -> Iterator[tuple[str, list[str], Set[tuple[str, str]]]]: for name in sorted(self.events.keys()): yield name, self.events[name], self.sources[name] @@ -218,7 +218,7 @@ def _make_row( docname: str, name: str, args: list[str], - sources: list[tuple[str, str]], + sources: Set[tuple[str, str]], ) -> nodes.row: row = nodes.row() name_node = nodes.paragraph("", nodes.Text(name)) @@ -233,7 +233,7 @@ def _make_row( row += nodes.entry("", args_node) backlinks_node = nodes.paragraph() - backlinks = enumerate(sorted(set(sources)), start=1) + backlinks = enumerate(sorted(sources), start=1) for i, (doc, label) in backlinks: if isinstance(label, str): ref = nodes.reference("", f"[{i}]", internal=True) @@ -258,7 +258,7 @@ def setup(app: Sphinx): app.connect("env-purge-doc", audit_events_purge) app.connect("env-merge-info", audit_events_merge) return { - "version": "1.0", + "version": "2.0", "parallel_read_safe": True, "parallel_write_safe": True, } diff --git a/Doc/tools/templates/customsourcelink.html b/Doc/tools/templates/customsourcelink.html index eb9db9e341bd75..43d3a7a892a880 100644 --- a/Doc/tools/templates/customsourcelink.html +++ b/Doc/tools/templates/customsourcelink.html @@ -1,11 +1,11 @@ {%- if show_source and has_source and sourcename %}
-

{{ _('This Page') }}

+

{{ _('This page') }}

diff --git a/Doc/tools/templates/download.html b/Doc/tools/templates/download.html index 4645f7d394e29e..47a57eb111ba50 100644 --- a/Doc/tools/templates/download.html +++ b/Doc/tools/templates/download.html @@ -27,7 +27,7 @@ {%- endblock -%} {% block body %} -

{% trans %}Download Python {{ dl_version }} Documentation{% endtrans %}

+

{% trans %}Download Python {{ dl_version }} documentation{% endtrans %}

{% if last_updated %}

{% trans %}Last updated on: {{ last_updated }}.{% endtrans %}

{% endif %} diff --git a/Doc/tools/templates/indexcontent.html b/Doc/tools/templates/indexcontent.html index 06a4223643a05a..544cc4234f441e 100644 --- a/Doc/tools/templates/indexcontent.html +++ b/Doc/tools/templates/indexcontent.html @@ -72,7 +72,7 @@

{{ docstitle|e }}

- + diff --git a/Doc/tools/templates/indexsidebar.html b/Doc/tools/templates/indexsidebar.html index eea29e2449a9cf..086f15662cf87b 100644 --- a/Doc/tools/templates/indexsidebar.html +++ b/Doc/tools/templates/indexsidebar.html @@ -9,9 +9,9 @@

{% trans %}Docs by version{% endtrans %}

{% trans %}Other resources{% endtrans %}

diff --git a/Doc/tools/templates/layout.html b/Doc/tools/templates/layout.html index 56023ebf962703..1cb0200822d9fe 100644 --- a/Doc/tools/templates/layout.html +++ b/Doc/tools/templates/layout.html @@ -26,11 +26,11 @@ {% endblock %} {% block extrahead %} - {% if builder == "html" and enable_analytics %} + {% if builder == "html" %} + {% if enable_analytics %} - {% endif %} - - {% if builder != "htmlhelp" %} + {% endif %} + {% if pagename == 'whatsnew/changelog' and not embedded %} {% endif %} {% endif %} diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index 95939242fb7d44..5c0e8f34bf82f4 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -999,7 +999,8 @@ scope:: 43 The above example uses a lambda expression to return a function. Another use -is to pass a small function as an argument:: +is to pass a small function as an argument. For instance, :meth:`list.sort` +takes a sorting key function *key* which can be a lambda function:: >>> pairs = [(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four')] >>> pairs.sort(key=lambda pair: pair[1]) @@ -1055,7 +1056,7 @@ Here is an example of a multi-line docstring:: >>> print(my_function.__doc__) Do nothing, but document it. - No, really, it doesn't do anything. + No, really, it doesn't do anything. .. _tut-annotations: diff --git a/Doc/tutorial/index.rst b/Doc/tutorial/index.rst index 96791f88c867ab..d0bf77dc40d0a1 100644 --- a/Doc/tutorial/index.rst +++ b/Doc/tutorial/index.rst @@ -4,6 +4,10 @@ The Python Tutorial ###################### +.. Tip:: This tutorial is designed for + *programmers* that are new to the Python language, + **not** *beginners* who are new to programming. + Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. Python's elegant syntax and dynamic typing, @@ -21,7 +25,8 @@ implemented in C or C++ (or other languages callable from C). Python is also suitable as an extension language for customizable applications. This tutorial introduces the reader informally to the basic concepts and -features of the Python language and system. It helps to have a Python +features of the Python language and system. Be aware that it expects you to +have a basic understanding of programming in general. It helps to have a Python interpreter handy for hands-on experience, but all examples are self-contained, so the tutorial can be read off-line as well. diff --git a/Doc/tutorial/introduction.rst b/Doc/tutorial/introduction.rst index bec5da8fd759ac..9e06e03991bc96 100644 --- a/Doc/tutorial/introduction.rst +++ b/Doc/tutorial/introduction.rst @@ -13,10 +13,9 @@ end a multi-line command. .. only:: html - You can toggle the display of prompts and output by clicking on ``>>>`` - in the upper-right corner of an example box. If you hide the prompts - and output for an example, then you can easily copy and paste the input - lines into your interpreter. + You can use the "Copy" button (it appears in the upper-right corner + when hovering over or tapping a code example), which strips prompts + and omits output, to copy and paste the input lines into your interpreter. .. index:: single: # (hash); comment @@ -147,6 +146,8 @@ Python can manipulate text (represented by type :class:`str`, so-called "``Yay! :)``". They can be enclosed in single quotes (``'...'``) or double quotes (``"..."``) with the same result [#]_. +.. code-block:: pycon + >>> 'spam eggs' # single quotes 'spam eggs' >>> "Paris rabbit got your back :)! Yay!" # double quotes diff --git a/Doc/tutorial/modules.rst b/Doc/tutorial/modules.rst index de7aa0e2342946..47bf7547b4ae1d 100644 --- a/Doc/tutorial/modules.rst +++ b/Doc/tutorial/modules.rst @@ -27,14 +27,16 @@ called :file:`fibo.py` in the current directory with the following contents:: # Fibonacci numbers module - def fib(n): # write Fibonacci series up to n + def fib(n): + """Write Fibonacci series up to n.""" a, b = 0, 1 while a < n: print(a, end=' ') a, b = b, a+b print() - def fib2(n): # return Fibonacci series up to n + def fib2(n): + """Return Fibonacci series up to n.""" result = [] a, b = 0, 1 while a < n: diff --git a/Doc/using/android.rst b/Doc/using/android.rst index 65bf23dc994856..cb762310328f1c 100644 --- a/Doc/using/android.rst +++ b/Doc/using/android.rst @@ -63,3 +63,12 @@ link to the relevant file. * Add code to your app to :source:`start Python in embedded mode `. This will need to be C code called via JNI. + +Building a Python package for Android +------------------------------------- + +Python packages can be built for Android as wheels and released on PyPI. The +recommended tool for doing this is `cibuildwheel +`__, which automates +all the details of setting up a cross-compilation environment, building the +wheel, and testing it on an emulator. diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 40a46a62031ef7..cad49e2deeb46f 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -653,7 +653,7 @@ Miscellaneous options .. versionadded:: 3.13 * :samp:`-X thread_inherit_context={0,1}` causes :class:`~threading.Thread` - to, by default, use a copy of context of of the caller of + to, by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, threads will start with an empty context. If unset, the value of this option defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also @@ -669,6 +669,13 @@ Miscellaneous options .. versionadded:: 3.14 + * :samp:`-X tlbc={0,1}` enables (1, the default) or disables (0) thread-local + bytecode in builds configured with :option:`--disable-gil`. When disabled, + this also disables the specializing interpreter. See also + :envvar:`PYTHON_TLBC`. + + .. versionadded:: 3.14 + It also allows passing arbitrary values and retrieving them through the :data:`sys._xoptions` dictionary. @@ -1277,7 +1284,7 @@ conflict. .. envvar:: PYTHON_THREAD_INHERIT_CONTEXT If this variable is set to ``1`` then :class:`~threading.Thread` will, - by default, use a copy of context of of the caller of ``Thread.start()`` + by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, new threads will start with an empty context. If unset, this variable defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also :option:`-X thread_inherit_context<-X>`. @@ -1302,6 +1309,16 @@ conflict. .. versionadded:: 3.13 +.. envvar:: PYTHON_TLBC + + If set to ``1`` enables thread-local bytecode. If set to ``0`` thread-local + bytecode and the specializing interpreter are disabled. Only applies to + builds configured with :option:`--disable-gil`. + + See also the :option:`-X tlbc <-X>` command-line option. + + .. versionadded:: 3.14 + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index b914d3397b6dda..e5fe3c72b1b26e 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -29,6 +29,9 @@ Features and minimum versions required to build CPython: * Tcl/Tk 8.5.12 for the :mod:`tkinter` module. +* `libmpdec `_ 2.5.0 + for the :mod:`decimal` module. + * Autoconf 2.72 and aclocal 1.16.5 are required to regenerate the :file:`configure` script. @@ -290,8 +293,8 @@ General Options .. option:: --disable-gil - Enables **experimental** support for running Python without the - :term:`global interpreter lock` (GIL): free threading build. + Enables support for running Python without the :term:`global interpreter + lock` (GIL): free threading build. Defines the ``Py_GIL_DISABLED`` macro and adds ``"t"`` to :data:`sys.abiflags`. @@ -445,6 +448,14 @@ Options for third-party dependencies C compiler and linker flags for ``libuuid``, used by :mod:`uuid` module, overriding ``pkg-config``. +.. option:: LIBZSTD_CFLAGS +.. option:: LIBZSTD_LIBS + + C compiler and linker flags for ``libzstd``, used by :mod:`compression.zstd` module, + overriding ``pkg-config``. + + .. versionadded:: 3.14 + .. option:: PANEL_CFLAGS .. option:: PANEL_LIBS @@ -675,6 +686,13 @@ also be used to improve performance. not compiled. This includes both the functionality to schedule code to be executed and the functionality to receive code to be executed. + .. c:macro:: Py_REMOTE_DEBUG + + This macro is defined by default, unless Python is configured with + :option:`--without-remote-debug`. + + Note that even if the macro is defined, remote debugging may not be + available (for example, on an incompatible platform). .. versionadded:: 3.14 diff --git a/Doc/using/ios.rst b/Doc/using/ios.rst index 7d5c6331bef5ce..0fb28f8c866b02 100644 --- a/Doc/using/ios.rst +++ b/Doc/using/ios.rst @@ -298,9 +298,9 @@ To add Python to an iOS Xcode project: * Signal handlers (:c:member:`PyConfig.install_signal_handlers`) are *enabled*; * System logging (:c:member:`PyConfig.use_system_logger`) is *enabled* (optional, but strongly recommended; this is enabled by default); - * ``PYTHONHOME`` for the interpreter is configured to point at the + * :envvar:`PYTHONHOME` for the interpreter is configured to point at the ``python`` subfolder of your app's bundle; and - * The ``PYTHONPATH`` for the interpreter includes: + * The :envvar:`PYTHONPATH` for the interpreter includes: - the ``python/lib/python3.X`` subfolder of your app's bundle, - the ``python/lib/python3.X/lib-dynload`` subfolder of your app's bundle, and @@ -324,7 +324,12 @@ modules in your app, some additional steps will be required: the ``lib-dynload`` folder can be copied and adapted for this purpose. * If you're using a separate folder for third-party packages, ensure that folder - is included as part of the ``PYTHONPATH`` configuration in step 10. + is included as part of the :envvar:`PYTHONPATH` configuration in step 10. + +* If any of the folders that contain third-party packages will contain ``.pth`` + files, you should add that folder as a *site directory* (using + :meth:`site.addsitedir`), rather than adding to :envvar:`PYTHONPATH` or + :attr:`sys.path` directly. Testing a Python package ------------------------ diff --git a/Doc/using/mac.rst b/Doc/using/mac.rst index 4b6c884f3d4f25..f88f3c2e0785e4 100644 --- a/Doc/using/mac.rst +++ b/Doc/using/mac.rst @@ -20,13 +20,6 @@ the Pythons provided by the CPython release team for download from the `python.org website `_. See :ref:`alternative_bundles` for some other options. -.. |usemac_x_dot_y| replace:: 3.13 -.. |usemac_python_x_dot_y_literal| replace:: ``python3.13`` -.. |usemac_python_x_dot_y_t_literal| replace:: ``python3.13t`` -.. |usemac_python_x_dot_y_t_literal_config| replace:: ``python3.13t-config`` -.. |usemac_applications_folder_name| replace:: ``Python 3.13`` -.. |usemac_applications_folder_version| replace:: ``/Applications/Python 3.13/`` - .. _getting-osx: .. _getting-and-installing-macpython: @@ -64,7 +57,7 @@ Clicking on the **Continue** button brings up the **Read Me** for this installer Besides other important information, the **Read Me** documents which Python version is going to be installed and on what versions of macOS it is supported. You may need to scroll through to read the whole file. By default, this **Read Me** will also be -installed in |usemac_applications_folder_version| and available to read anytime. +installed in |applications_python_version_literal| and available to read anytime. .. image:: mac_installer_02_readme.png @@ -83,7 +76,7 @@ display. For most uses, the standard set of installation operations is appropria By pressing the **Customize** button, you can choose to omit or select certain package components of the installer. Click on each package name to see a description of what it installs. -To also install support for the optional experimental free-threaded feature, +To also install support for the optional free-threaded feature, see :ref:`install-freethreaded-macos`. .. image:: mac_installer_05_custom_install.png @@ -97,7 +90,7 @@ When the installation is complete, the **Summary** window will appear. .. image:: mac_installer_06_summary.png Double-click on the :command:`Install Certificates.command` -icon or file in the |usemac_applications_folder_version| window to complete the +icon or file in the |applications_python_version_literal| window to complete the installation. .. image:: mac_installer_07_applications.png @@ -114,7 +107,7 @@ Close this terminal window and the installer window. A default install will include: -* A |usemac_applications_folder_name| folder in your :file:`Applications` folder. In here +* A |python_version_literal| folder in your :file:`Applications` folder. In here you find :program:`IDLE`, the development environment that is a standard part of official Python distributions; and :program:`Python Launcher`, which handles double-clicking Python scripts from the macOS `Finder `_. @@ -141,7 +134,7 @@ How to run a Python script There are two ways to invoke the Python interpreter. If you are familiar with using a Unix shell in a terminal -window, you can invoke |usemac_python_x_dot_y_literal| or ``python3`` optionally +window, you can invoke |python_x_dot_y_literal| or ``python3`` optionally followed by one or more command line options (described in :ref:`using-on-general`). The Python tutorial also has a useful section on :ref:`using Python interactively from a shell `. @@ -160,7 +153,7 @@ for more information. To run a Python script file from the terminal window, you can invoke the interpreter with the name of the script file: - |usemac_python_x_dot_y_literal| ``myscript.py`` + |python_x_dot_y_literal| ``myscript.py`` To run your script from the Finder, you can either: @@ -259,20 +252,20 @@ Advanced Topics Installing Free-threaded Binaries --------------------------------- -.. versionadded:: 3.13 (Experimental) - -.. note:: - - Everything described in this section is considered experimental, - and should be expected to change in future releases. +.. versionadded:: 3.13 The ``python.org`` :ref:`Python for macOS ` installer package can optionally install an additional build of -Python |usemac_x_dot_y| that supports :pep:`703`, the experimental free-threading feature +Python |version| that supports :pep:`703`, the free-threading feature (running with the :term:`global interpreter lock` disabled). Check the release page on ``python.org`` for possible updated information. -Because this feature is still considered experimental, the support for it +The free-threaded mode is working and continues to be improved, but +there is some additional overhead in single-threaded workloads compared +to the regular build. Additionally, third-party packages, in particular ones +with an :term:`extension module`, may not be ready for use in a +free-threaded build, and will re-enable the :term:`GIL`. +Therefore, the support for free-threading is not installed by default. It is packaged as a separate install option, available by clicking the **Customize** button on the **Installation Type** step of the installer as described above. @@ -282,46 +275,54 @@ step of the installer as described above. If the box next to the **Free-threaded Python** package name is checked, a separate :file:`PythonT.framework` will also be installed alongside the normal :file:`Python.framework` in :file:`/Library/Frameworks`. -This configuration allows a free-threaded Python |usemac_x_dot_y| build to co-exist -on your system with a traditional (GIL only) Python |usemac_x_dot_y| build with -minimal risk while installing or testing. This installation layout is itself -experimental and is subject to change in future releases. +This configuration allows a free-threaded Python |version| build to co-exist +on your system with a traditional (GIL only) Python |version| build with +minimal risk while installing or testing. This installation layout may +change in future releases. Known cautions and limitations: - The **UNIX command-line tools** package, which is selected by default, - will install links in :file:`/usr/local/bin` for |usemac_python_x_dot_y_t_literal|, - the free-threaded interpreter, and |usemac_python_x_dot_y_t_literal_config|, + will install links in :file:`/usr/local/bin` for |python_x_dot_y_t_literal|, + the free-threaded interpreter, and |python_x_dot_y_t_literal_config|, a configuration utility which may be useful for package builders. Since :file:`/usr/local/bin` is typically included in your shell ``PATH``, in most cases no changes to your ``PATH`` environment variables should - be needed to use |usemac_python_x_dot_y_t_literal|. + be needed to use |python_x_dot_y_t_literal|. - For this release, the **Shell profile updater** package and the - :file:`Update Shell Profile.command` in |usemac_applications_folder_version| + :file:`Update Shell Profile.command` in |applications_python_version_literal| do not support the free-threaded package. - The free-threaded build and the traditional build have separate search paths and separate :file:`site-packages` directories so, by default, if you need a package available in both builds, it may need to be installed in both. The free-threaded package will install a separate instance of :program:`pip` for use - with |usemac_python_x_dot_y_t_literal|. + with |python_x_dot_y_t_literal|. - To install a package using :command:`pip` without a :command:`venv`: - |usemac_python_x_dot_y_t_literal| ``-m pip install `` + .. parsed-literal:: + + python\ |version|\ t -m pip install - When working with multiple Python environments, it is usually safest and easiest to :ref:`create and use virtual environments `. This can avoid possible command name conflicts and confusion about which Python is in use: - |usemac_python_x_dot_y_t_literal| ``-m venv `` + .. parsed-literal:: + + python\ |version|\ t -m venv + then :command:`activate`. - To run a free-threaded version of IDLE: - |usemac_python_x_dot_y_t_literal| ``-m idlelib`` + .. parsed-literal:: + + python\ |version|\ t -m idlelib + - The interpreters in both builds respond to the same :ref:`PYTHON environment variables ` @@ -337,28 +338,28 @@ Known cautions and limitations: thus it only needs to be run once. - If you cannot depend on the link in ``/usr/local/bin`` pointing to the - ``python.org`` free-threaded |usemac_python_x_dot_y_t_literal| (for example, if you want + ``python.org`` free-threaded |python_x_dot_y_t_literal| (for example, if you want to install your own version there or some other distribution does), you can explicitly set your shell ``PATH`` environment variable to include the ``PythonT`` framework ``bin`` directory: - .. code-block:: sh + .. parsed-literal:: - export PATH="/Library/Frameworks/PythonT.framework/Versions/3.13/bin":"$PATH" + export PATH="/Library/Frameworks/PythonT.framework/Versions/\ |version|\ /bin":"$PATH" The traditional framework installation by default does something similar, except for :file:`Python.framework`. Be aware that having both framework ``bin`` directories in ``PATH`` can lead to confusion if there are duplicate names - like ``python3.13`` in both; which one is actually used depends on the order + like |python_x_dot_y_literal| in both; which one is actually used depends on the order they appear in ``PATH``. The ``which python3.x`` or ``which python3.xt`` commands can show which path is being used. Using virtual environments can help avoid such ambiguities. Another option might be to create a shell :command:`alias` to the desired interpreter, like: - .. code-block:: sh + .. parsed-literal:: - alias py3.13="/Library/Frameworks/Python.framework/Versions/3.13/bin/python3.13" - alias py3.13t="/Library/Frameworks/PythonT.framework/Versions/3.13/bin/python3.13t" + alias py\ |version|\ ="/Library/Frameworks/Python.framework/Versions/\ |version|\ /bin/python\ |version|\ " + alias py\ |version|\ t="/Library/Frameworks/PythonT.framework/Versions/\ |version|\ /bin/python\ |version|\ t" Installing using the command line --------------------------------- @@ -369,22 +370,22 @@ the macOS command line :command:`installer` utility lets you select non-default options, too. If you are not familiar with :command:`installer`, it can be somewhat cryptic (see :command:`man installer` for more information). As an example, the following shell snippet shows one way to do it, -using the ``3.13.0b2`` release and selecting the free-threaded interpreter +using the |x_dot_y_b2_literal| release and selecting the free-threaded interpreter option: -.. code-block:: sh +.. parsed-literal:: - RELEASE="python-3.13.0b2-macos11.pkg" + RELEASE="python-\ |version|\ 0b2-macos11.pkg" # download installer pkg - curl -O https://www.python.org/ftp/python/3.13.0/${RELEASE} + curl -O \https://www.python.org/ftp/python/\ |version|\ .0/${RELEASE} # create installer choicechanges to customize the install: - # enable the PythonTFramework-3.13 package + # enable the PythonTFramework-\ |version|\ package # while accepting the other defaults (install all other packages) cat > ./choicechanges.plist < - + @@ -393,7 +394,7 @@ option: choiceAttribute selected choiceIdentifier - org.python.Python.PythonTFramework-3.13 + org.python.Python.PythonTFramework-\ |version|\ @@ -404,19 +405,19 @@ option: You can then test that both installer builds are now available with something like: -.. code-block:: console +.. parsed-literal:: $ # test that the free-threaded interpreter was installed if the Unix Command Tools package was enabled - $ /usr/local/bin/python3.13t -VV - Python 3.13.0b2 experimental free-threading build (v3.13.0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] + $ /usr/local/bin/python\ |version|\ t -VV + Python \ |version|\ .0b2 free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] $ # and the traditional interpreter - $ /usr/local/bin/python3.13 -VV - Python 3.13.0b2 (v3.13.0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] + $ /usr/local/bin/python\ |version|\ -VV + Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] $ # test that they are also available without the prefix if /usr/local/bin is on $PATH - $ python3.13t -VV - Python 3.13.0b2 experimental free-threading build (v3.13.0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] - $ python3.13 -VV - Python 3.13.0b2 (v3.13.0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] + $ python\ |version|\ t -VV + Python \ |version|\ .0b2 free-threading build (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:57:31) [Clang 15.0.0 (clang-1500.3.9.4)] + $ python\ |version|\ -VV + Python \ |version|\ .0b2 (v\ |version|\ .0b2:3a83b172af, Jun 5 2024, 12:50:24) [Clang 15.0.0 (clang-1500.3.9.4)] .. note:: diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 74d6db5d7d1a98..9628da3d2f6b12 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -77,31 +77,30 @@ To install the file downloaded from python.org, either double-click and select "Install", or run ``Add-AppxPackage `` in Windows Powershell. After installation, the ``python``, ``py``, and ``pymanager`` commands should be -available. If they are not, click Start and search for "Manage app execution -aliases". This settings page will let you enable the relevant commands. They -will be labelled "Python (default)", "Python (default windowed)", and "Python -install manager". - -If you have existing installations of Python, or you have modified your -:envvar:`PATH` variable, you may need to remove them or undo the modifications -in order for the commands to work. Old versions of Python can be reinstalled -using the Python install manager. +available. If you have existing installations of Python, or you have modified +your :envvar:`PATH` variable, you may need to remove them or undo the +modifications. See :ref:`pymanager-troubleshoot` for more help with fixing +non-working commands. When you first install a runtime, you will likely be prompted to add a directory to your :envvar:`PATH`. This is optional, if you prefer to use the ``py`` command, but is offered for those who prefer the full range of aliases (such as ``python3.14.exe``) to be available. The directory will be -:file:`%LocalAppData%\Python\bin` by default, but may be customized by an +:file:`%LocalAppData%\\Python\\bin` by default, but may be customized by an administrator. Click Start and search for "Edit environment variables for your account" for the system settings page to add the path. +Each Python runtime you install will have its own directory for scripts. These +also need to be added to :envvar:`PATH` if you want to use them. + The Python install manager will be automatically updated to new releases. This does not affect any installs of Python runtimes. Uninstalling the Python install manager does not uninstall any Python runtimes. If you are not able to install an MSIX in your context, for example, you are -using automated deployment software that does not support it, please see -:ref:`pymanager-advancedinstall` below for more information. +using automated deployment software that does not support it, or are targeting +Windows Server 2019, please see :ref:`pymanager-advancedinstall` below for more +information. Basic Use @@ -147,6 +146,10 @@ want to be passed to the runtime (such as script files or the module to launch): $> py -m this ... +The default runtime can be overridden with the :envvar:`PYTHON_MANAGER_DEFAULT` +environment variable, or a configuration file. See :ref:`pymanager-config` for +information about configuration settings. + To launch a specific runtime, the ``py`` command accepts a ``-V:`` option. This option must be specified before any others. The tag is part or all of the identifier for the runtime; for those from the CPython team, it looks like the @@ -469,6 +472,10 @@ directory (which you may have added to your :envvar:`PATH` environment variable) can be used in a shebang, even if it is not on your :envvar:`PATH`. This allows the use of shebangs like ``/usr/bin/python3.12`` to select a particular runtime. +If no runtimes are installed, or if automatic installation is enabled, the +requested runtime will be installed if necessary. See :ref:`pymanager-config` +for information about configuration settings. + The ``/usr/bin/env`` form of shebang line will also search the :envvar:`PATH` environment variable for unrecognized commands. This corresponds to the behaviour of the Unix ``env`` program, which performs the same search, but @@ -497,6 +504,14 @@ configuration option. installing and uninstalling. +.. _Add-AppxPackage: https://learn.microsoft.com/powershell/module/appx/add-appxpackage + +.. _Remove-AppxPackage: https://learn.microsoft.com/powershell/module/appx/remove-appxpackage + +.. _Add-AppxProvisionedPackage: https://learn.microsoft.com/powershell/module/dism/add-appxprovisionedpackage + +.. _PackageManager: https://learn.microsoft.com/uwp/api/windows.management.deployment.packagemanager + .. _pymanager-advancedinstall: Advanced Installation @@ -509,6 +524,11 @@ per-machine installs to its default location in Program Files. It will attempt to modify the system :envvar:`PATH` environment variable to include this install location, but be sure to validate this on your configuration. +.. note:: + + Windows Server 2019 is the only version of Windows that CPython supports that + does not support MSIX. For Windows Server 2019, you should use the MSI. + Be aware that the MSI package does not bundle any runtimes, and so is not suitable for installs into offline environments without also creating an offline install index. See :ref:`pymanager-offline` and :ref:`pymanager-admin-config` @@ -529,25 +549,62 @@ depending on whether it was installed from python.org or through the Windows Store. Attempting to run the executable directly from Program Files is not recommended. -To programmatically install or uninstall the MSIX without using your -distribution platform's native support, the `Add-AppxPackage -`_ and -`Remove-AppxPackage `_ -PowerShell cmdlets are simplest to use: +To programmatically install the Python install manager, it is easiest to use +WinGet, which is included with all supported versions of Windows: -.. code:: +.. code-block:: powershell + + $> winget install 9NQ7512CXL7T -e --accept-package-agreements --disable-interactivity + + # Optionally run the configuration checker and accept all changes + $> py install --configure -y + +To download the Python install manager and install on another machine, the +following WinGet command will download the required files from the Store to your +Downloads directory (add ``-d `` to customize the output location). +This also generates a YAML file that appears to be unnecessary, as the +downloaded MSIX can be installed by launching or using the commands below. + +.. code-block:: powershell + + $> winget download 9NQ7512CXL7T -e --skip-license --accept-package-agreements --accept-source-agreements + +To programmatically install or uninstall an MSIX using only PowerShell, the +`Add-AppxPackage`_ and `Remove-AppxPackage`_ PowerShell cmdlets are recommended: + +.. code-block:: powershell $> Add-AppxPackage C:\Downloads\python-manager-25.0.msix ... $> Get-AppxPackage PythonSoftwareFoundation.PythonManager | Remove-AppxPackage -The native APIs for package management may be found on the Windows -`PackageManager `_ -class. The :func:`!AddPackageAsync` method installs for the current user, or use -:func:`!StagePackageAsync` followed by :func:`!ProvisionPackageForAllUsersAsync` -to install the Python install manager for all users from the MSIX package. Users -will still need to install their own copies of Python itself, as there is no way -to trigger those installs without being a logged in user. +The latest release can be downloaded and installed by Windows by passing the +AppInstaller file to the Add-AppxPackage command. This installs using the MSIX +on python.org, and is only recommended for cases where installing via the Store +(interactively or using WinGet) is not possible. + +.. code-block:: powershell + + $> Add-AppxPackage -AppInstallerFile https://www.python.org/ftp/python/pymanager/pymanager.appinstaller + +Other tools and APIs may also be used to provision an MSIX package for all users +on a machine, but Python does not consider this a supported scenario. We suggest +looking into the PowerShell `Add-AppxProvisionedPackage`_ cmdlet, the native +Windows `PackageManager`_ class, or the documentation and support for your +deployment tool. + +Regardless of the install method, users will still need to install their own +copies of Python itself, as there is no way to trigger those installs without +being a logged in user. When using the MSIX, the latest version of Python will +be available for all users to install without network access. + +Note that the MSIX downloadable from the Store and from the Python website are +subtly different and cannot be installed at the same time. Wherever possible, +we suggest using the above WinGet commands to download the package from the +Store to reduce the risk of setting up conflicting installs. There are no +licensing restrictions on the Python install manager that would prevent using +the Store package in this way. + .. _pymanager-admin-config: @@ -713,6 +770,16 @@ default). your ``pythonw.exe`` and ``pyw.exe`` aliases are consistent with your others. " + "``pip`` gives me a ""command not found"" error when I type it in my + terminal.","Have you activated a virtual environment? Run the + ``.venv\Scripts\activate`` script in your terminal to activate. + " + "","The package may be available but missing the generated executable. + We recommend using the ``python -m pip`` command instead, or alternatively + the ``python -m pip install --force pip`` command will recreate the + executables and show you the path to add to :envvar:`PATH`. These scripts are + separated for each runtime, and so you may need to add multiple paths. + " .. _windows-embeddable: diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 3c815721a92f8c..f8df802768a945 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -551,11 +551,12 @@ Patterns and classes If you are using classes to structure your data, you can use as a pattern the class name followed by an argument list resembling a constructor. This -pattern has the ability to capture class attributes into variables:: +pattern has the ability to capture instance attributes into variables:: class Point: - x: int - y: int + def __init__(self, x, y): + self.x = x + self.y = y def location(point): match point: diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 2dd205dd2b8831..abf9677fd9cac5 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -1292,7 +1292,7 @@ This section covers specific optimizations independent of the (Contributed by Stefan Behnel in :gh:`68264`.) * Resizing lists is streamlined for the common case, - speeding up :meth:`list.append` by ≈15% + speeding up :meth:`!list.append` by ≈15% and simple :term:`list comprehension`\s by up to 20-30% (Contributed by Dennis Sweeney in :gh:`91165`.) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index a65f59c0a72315..7cfdc287b7fad7 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -2233,6 +2233,8 @@ Deprecated .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + .. include:: ../deprecations/c-api-pending-removal-in-future.rst Removed diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index e20e49325c01d5..77de60f423b4bf 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -730,6 +730,22 @@ asyncio never awaited). (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.) +* The function and methods named ``create_task`` have received a new + ``**kwargs`` argument that is passed through to the task constructor. + This change was accidentally added in 3.13.3, + and broke the API contract for custom task factories. + Several third-party task factories implemented workarounds for this. + In 3.13.4 and later releases the old factory contract is honored + once again (until 3.14). + To keep the workarounds working, the extra ``**kwargs`` argument still + allows passing additional keyword arguments to :class:`~asyncio.Task` + and to custom task factories. + + This affects the following function and methods: + :meth:`asyncio.create_task`, + :meth:`asyncio.loop.create_task`, + :meth:`asyncio.TaskGroup.create_task`. + (Contributed by Thomas Grainger in :gh:`128307`.) base64 ------ @@ -1980,7 +1996,7 @@ New Deprecations (Contributed by Alex Waygood in :gh:`105566` and :gh:`105570`.) * Deprecate the :func:`typing.no_type_check_decorator` decorator function, - to be removed in in Python 3.15. + to be removed in Python 3.15. After eight years in the :mod:`typing` module, it has yet to be supported by any major type checker. (Contributed by Alex Waygood in :gh:`106309`.) @@ -2531,6 +2547,8 @@ Deprecated C APIs .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + .. include:: ../deprecations/c-api-pending-removal-in-3.18.rst .. include:: ../deprecations/c-api-pending-removal-in-future.rst @@ -2577,7 +2595,7 @@ Build Changes * The :file:`configure` option :option:`--with-system-libmpdec` now defaults to ``yes``. - The bundled copy of ``libmpdecimal`` will be removed in Python 3.15. + The bundled copy of ``libmpdec`` will be removed in Python 3.16. * Python built with :file:`configure` :option:`--with-trace-refs` (tracing references) is now ABI compatible with the Python release build diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 894f011ec86a30..a6fb3e953eae58 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -74,7 +74,7 @@ deferred evaluation of annotations (:pep:`649`), and a new type of interpreter that uses tail calls. The library changes include the addition of a new :mod:`!annotationlib` module -for introspecting and wrapping annotations (:pep:`649`), +for introspecting and wrapping annotations (:pep:`749`), a new :mod:`!compression.zstd` module for Zstandard support (:pep:`784`), plus syntax highlighting in the REPL, as well as the usual deprecations and removals, @@ -82,12 +82,15 @@ and improvements in user-friendliness and correctness. .. PEP-sized items next. -* :ref:`PEP 649: deferred evaluation of annotations ` -* :ref:`PEP 741: Python Configuration C API ` +* :ref:`PEP 779: Free-threaded Python is officially supported ` +* :ref:`PEP 649 and 749: deferred evaluation of annotations ` +* :ref:`PEP 734: Multiple interpreters in the stdlib ` +* :ref:`PEP 741: Python configuration C API ` * :ref:`PEP 750: Template strings ` * :ref:`PEP 758: Allow except and except* expressions without parentheses ` * :ref:`PEP 761: Discontinuation of PGP signatures ` * :ref:`PEP 765: Disallow return/break/continue that exit a finally block ` +* :ref:`Free-threaded mode improvements ` * :ref:`PEP 768: Safe external debugger interface for CPython ` * :ref:`PEP 784: Adding Zstandard to the standard library ` * :ref:`A new type of interpreter ` @@ -122,6 +125,127 @@ of Python. See :ref:`below ` for details. New features ============ +.. _whatsnew314-pep779: + +PEP 779: Free-threaded Python is officially supported +----------------------------------------------------- + +The free-threaded build of Python is now supported and no longer experimental. +This is the start of phase II where free-threaded Python is officially supported +but still optional. + +We are confident that the project is on the right path, and we appreciate the +continued dedication from everyone working to make free-threading ready for +broader adoption across the Python community. + +With these recommendations and the acceptance of this PEP, we as the Python +developer community should broadly advertise that free-threading is a supported +Python build option now and into the future, and that it will not be removed +without a proper deprecation schedule. + +Any decision to transition to phase III, with free-threading as the default or +sole build of Python is still undecided, and dependent on many factors both +within CPython itself and the community. This decision is for the future. + +.. seealso:: + :pep:`779` and its `acceptance + `__. + +.. _whatsnew314-pep734: + +PEP 734: Multiple interpreters in the stdlib +-------------------------------------------- + +The CPython runtime supports running multiple copies of Python in the +same process simultaneously and has done so for over 20 years. +Each of these separate copies is called an "interpreter". +However, the feature had been available only through the C-API. + +That limitation is removed in the 3.14 release, +with the new :mod:`concurrent.interpreters` module. + +There are at least two notable reasons why using multiple interpreters +is worth considering: + +* they support a new (to Python), human-friendly concurrency model +* true multi-core parallelism + +For some use cases, concurrency in software enables efficiency and +can simplify software, at a high level. At the same time, implementing +and maintaining all but the simplest concurrency is often a struggle +for the human brain. That especially applies to plain threads +(for example, :mod:`threading`), where all memory is shared between all threads. + +With multiple isolated interpreters, you can take advantage of a class +of concurrency models, like CSP or the actor model, that have found +success in other programming languages, like Smalltalk, Erlang, +Haskell, and Go. Think of multiple interpreters like threads +but with opt-in sharing. + +Regarding multi-core parallelism: as of the 3.12 release, interpreters +are now sufficiently isolated from one another to be used in parallel. +(See :pep:`684`.) This unlocks a variety of CPU-intensive use cases +for Python that were limited by the :term:`GIL`. + +Using multiple interpreters is similar in many ways to +:mod:`multiprocessing`, in that they both provide isolated logical +"processes" that can run in parallel, with no sharing by default. +However, when using multiple interpreters, an application will use +fewer system resources and will operate more efficiently (since it +stays within the same process). Think of multiple interpreters as +having the isolation of processes with the efficiency of threads. + +.. XXX Add an example or two. +.. XXX Link to the not-yet-added HOWTO doc. + +While the feature has been around for decades, multiple interpreters +have not been used widely, due to low awareness and the lack of a stdlib +module. Consequently, they currently have several notable limitations, +which will improve significantly now that the feature is finally +going mainstream. + +Current limitations: + +* starting each interpreter has not been optimized yet +* each interpreter uses more memory than necessary + (we will be working next on extensive internal sharing between + interpreters) +* there aren't many options *yet* for truly sharing objects or other + data between interpreters (other than :type:`memoryview`) +* many extension modules on PyPI are not compatible with multiple + interpreters yet (stdlib extension modules *are* compatible) +* the approach to writing applications that use multiple isolated + interpreters is mostly unfamiliar to Python users, for now + +The impact of these limitations will depend on future CPython +improvements, how interpreters are used, and what the community solves +through PyPI packages. Depending on the use case, the limitations may +not have much impact, so try it out! + +Furthermore, future CPython releases will reduce or eliminate overhead +and provide utilities that are less appropriate on PyPI. In the +meantime, most of the limitations can also be addressed through +extension modules, meaning PyPI packages can fill any gap for 3.14, and +even back to 3.12 where interpreters were finally properly isolated and +stopped sharing the :term:`GIL`. Likewise, we expect to slowly see +libraries on PyPI for high-level abstractions on top of interpreters. + +Regarding extension modules, work is in progress to update some PyPI +projects, as well as tools like Cython, pybind11, nanobind, and PyO3. +The steps for isolating an extension module are found at +:ref:`isolating-extensions-howto`. Isolating a module has a lot of +overlap with what is required to support +:ref:`free-threading `, +so the ongoing work in the community in that area will help accelerate +support for multiple interpreters. + +Also added in 3.14: :ref:`concurrent.futures.InterpreterPoolExecutor +`. + +.. seealso:: + :pep:`734`. + + .. _whatsnew314-pep750: PEP 750: Template strings @@ -153,10 +277,10 @@ As another example, generating HTML attributes from data: .. code-block:: python attributes = {"src": "shrubbery.jpg", "alt": "looks nice"} - template = t"" - assert html(template) == 'looks nice' + template = t"" + assert html(template) == 'looks nice' -Unlike f-strings, the ``html`` function has access to template attributes +Compared to using an f-string, the ``html`` function has access to template attributes containing the original information: static strings, interpolations, and values from the original scope. Unlike existing templating approaches, t-strings build from the well-known f-string syntax and rules. Template systems thus benefit @@ -295,7 +419,7 @@ As can be seen, the API is similar to the APIs of the :mod:`!lzma` and :mod:`!bz2` modules. (Contributed by Emma Harper Smith, Adam Turner, Gregory P. Smith, Tomas Roun, -Victor Stinner, and Rogdham in :gh:`132983`) +Victor Stinner, and Rogdham in :gh:`132983`.) .. seealso:: :pep:`784`. @@ -341,15 +465,16 @@ For example the following expressions are now valid: .. code-block:: python try: - release_new_sleep_token_album() - except AlbumNotFound, SongsTooGoodToBeReleased: - print("Sorry, no new album this year.") + connect_to_server() + except TimeoutError, ConnectionRefusedError: + print("Network issue encountered.") # The same applies to except* (for exception groups): + try: - release_new_sleep_token_album() - except* AlbumNotFound, SongsTooGoodToBeReleased: - print("Sorry, no new album this year.") + connect_to_server() + except* TimeoutError, ConnectionRefusedError: + print("Network issue encountered.") Check :pep:`758` for more details. @@ -361,18 +486,19 @@ Check :pep:`758` for more details. .. _whatsnew314-pep649: -PEP 649: deferred evaluation of annotations -------------------------------------------- +PEP 649 and 749: deferred evaluation of annotations +--------------------------------------------------- The :term:`annotations ` on functions, classes, and modules are no longer evaluated eagerly. Instead, annotations are stored in special-purpose :term:`annotate functions ` and evaluated only when -necessary. This is specified in :pep:`649` and :pep:`749`. +necessary (except if ``from __future__ import annotations`` is used). +This is specified in :pep:`649` and :pep:`749`. This change is designed to make annotations in Python more performant and more usable in most circumstances. The runtime cost for defining annotations is minimized, but it remains possible to introspect annotations at runtime. -It is usually no longer necessary to enclose annotations in strings if they +It is no longer necessary to enclose annotations in strings if they contain forward references. The new :mod:`annotationlib` module provides tools for inspecting deferred @@ -408,7 +534,8 @@ writing annotations the same way you did with previous versions of Python. You will likely be able to remove quoted strings in annotations, which are frequently used for forward references. Similarly, if you use ``from __future__ import annotations`` to avoid having to write strings in annotations, you may well be able to -remove that import. However, if you rely on third-party libraries that read annotations, +remove that import once you support only Python 3.14 and newer. +However, if you rely on third-party libraries that read annotations, those libraries may need changes to support unquoted annotations before they work as expected. @@ -421,6 +548,11 @@ annotations. For example, you may want to use :func:`annotationlib.get_annotatio with the :attr:`~annotationlib.Format.FORWARDREF` format, as the :mod:`dataclasses` module now does. +The external :pypi:`typing_extensions` package provides partial backports of some of the +functionality of the :mod:`annotationlib` module, such as the :class:`~annotationlib.Format` +enum and the :func:`~annotationlib.get_annotations` function. These can be used to +write cross-version code that takes advantage of the new behavior in Python 3.14. + Related changes ^^^^^^^^^^^^^^^ @@ -432,6 +564,14 @@ functions in the standard library, there are many ways in which your code may not work in Python 3.14. To safeguard your code against future changes, use only the documented functionality of the :mod:`annotationlib` module. +In particular, do not read annotations directly from the namespace dictionary +attribute of type objects. Use :func:`annotationlib.get_annotate_from_class_namespace` +during class construction and :func:`annotationlib.get_annotations` afterwards. + +In previous releases, it was sometimes possible to access class annotations from +an instance of an annotated class. This behavior was undocumented and accidental, +and will no longer work in Python 3.14. + ``from __future__ import annotations`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -443,6 +583,11 @@ Python without deferred evaluation of annotations, reaches its end of life in 20 In Python 3.14, the behavior of code using ``from __future__ import annotations`` is unchanged. +(Contributed by Jelle Zijlstra in :gh:`119180`; :pep:`649` was written by Larry Hastings.) + +.. seealso:: + :pep:`649` and :pep:`749`. + Improved error messages ----------------------- @@ -584,13 +729,32 @@ Improved error messages ^^^^^^ SyntaxError: cannot use subscript as import target -.. seealso:: - :pep:`649`. +* Improved error message when trying to add an instance of an unhashable type to + a :class:`dict` or :class:`set`. (Contributed by CF Bolz-Tereick and Victor Stinner + in :gh:`132828`.) + + .. code-block:: pycon + + >>> s = set() + >>> s.add({'pages': 12, 'grade': 'A'}) + Traceback (most recent call last): + File "", line 1, in + s.add({'pages': 12, 'grade': 'A'}) + ~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + TypeError: cannot use 'dict' as a set element (unhashable type: 'dict') + >>> d = {} + >>> l = [1, 2, 3] + >>> d[l] = 12 + Traceback (most recent call last): + File "", line 1, in + d[l] = 12 + ~^^^ + TypeError: cannot use 'list' as a dict key (unhashable type: 'list') .. _whatsnew314-pep741: -PEP 741: Python Configuration C API +PEP 741: Python configuration C API ----------------------------------- Add a :ref:`PyInitConfig C API ` to configure the Python @@ -679,43 +843,58 @@ Executing the new tool on the running process will yield a table like this: python -m asyncio ps 12345 - tid task id task name coroutine chain awaiter name awaiter id - --------------------------------------------------------------------------------------------------------------------------------------- - 8138752 0x564bd3d0210 Task-1 0x0 - 8138752 0x564bd3d0410 Sundowning _aexit -> __aexit__ -> main Task-1 0x564bd3d0210 - 8138752 0x564bd3d0610 TMBTE _aexit -> __aexit__ -> main Task-1 0x564bd3d0210 - 8138752 0x564bd3d0810 TNDNBTG _aexit -> __aexit__ -> album Sundowning 0x564bd3d0410 - 8138752 0x564bd3d0a10 Levitate _aexit -> __aexit__ -> album Sundowning 0x564bd3d0410 - 8138752 0x564bd3e0550 DYWTYLM _aexit -> __aexit__ -> album TMBTE 0x564bd3d0610 - 8138752 0x564bd3e0710 Aqua Regia _aexit -> __aexit__ -> album TMBTE 0x564bd3d0610 + tid task id task name coroutine stack awaiter chain awaiter name awaiter id + ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + 1935500 0x7fc930c18050 Task-1 TaskGroup._aexit -> TaskGroup.__aexit__ -> main 0x0 + 1935500 0x7fc930c18230 Sundowning TaskGroup._aexit -> TaskGroup.__aexit__ -> album TaskGroup._aexit -> TaskGroup.__aexit__ -> main Task-1 0x7fc930c18050 + 1935500 0x7fc93173fa50 TMBTE TaskGroup._aexit -> TaskGroup.__aexit__ -> album TaskGroup._aexit -> TaskGroup.__aexit__ -> main Task-1 0x7fc930c18050 + 1935500 0x7fc93173fdf0 TNDNBTG sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album Sundowning 0x7fc930c18230 + 1935500 0x7fc930d32510 Levitate sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album Sundowning 0x7fc930c18230 + 1935500 0x7fc930d32890 DYWTYLM sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album TMBTE 0x7fc93173fa50 + 1935500 0x7fc93161ec30 Aqua Regia sleep -> play TaskGroup._aexit -> TaskGroup.__aexit__ -> album TMBTE 0x7fc93173fa50 - -or: +or a tree like this: .. code-block:: bash python -m asyncio pstree 12345 └── (T) Task-1 - └── main - └── __aexit__ - └── _aexit + └── main example.py:13 + └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 ├── (T) Sundowning - │ └── album - │ └── __aexit__ - │ └── _aexit + │ └── album example.py:8 + │ └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + │ └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 │ ├── (T) TNDNBTG + │ │ └── play example.py:4 + │ │ └── sleep Lib/asyncio/tasks.py:702 │ └── (T) Levitate + │ └── play example.py:4 + │ └── sleep Lib/asyncio/tasks.py:702 └── (T) TMBTE - └── album - └── __aexit__ - └── _aexit + └── album example.py:8 + └── TaskGroup.__aexit__ Lib/asyncio/taskgroups.py:72 + └── TaskGroup._aexit Lib/asyncio/taskgroups.py:121 ├── (T) DYWTYLM + │ └── play example.py:4 + │ └── sleep Lib/asyncio/tasks.py:702 └── (T) Aqua Regia + └── play example.py:4 + └── sleep Lib/asyncio/tasks.py:702 If a cycle is detected in the async await graph (which could indicate a programming issue), the tool raises an error and lists the cycle paths that -prevent tree construction. +prevent tree construction: + +.. code-block:: bash + + python -m asyncio pstree 12345 + + ERROR: await-graph contains cycles - cannot print a tree! + + cycle: Task-2 → Task-3 → Task-2 (Contributed by Pablo Galindo, Łukasz Langa, Yury Selivanov, and Marta Gomez Macias in :gh:`91048`.) @@ -772,6 +951,50 @@ For further information on how to build Python, see (Contributed by Ken Jin in :gh:`128563`, with ideas on how to implement this in CPython by Mark Shannon, Garrett Gu, Haoran Xu, and Josh Haberman.) +.. _whatsnew314-free-threaded-cpython: + +Free-threaded mode +------------------ + +Free-threaded mode (:pep:`703`), initially added in 3.13, has been significantly improved. +The implementation described in PEP 703 was finished, including C API changes, +and temporary workarounds in the interpreter were replaced with more permanent solutions. +The specializing adaptive interpreter (:pep:`659`) is now enabled in free-threaded mode, +which along with many other optimizations greatly improves its performance. +The performance penalty on single-threaded code in free-threaded mode is now roughly 5-10%, +depending on platform and C compiler used. + +This work was done by many contributors: Sam Gross, Matt Page, Neil Schemenauer, +Thomas Wouters, Donghee Na, Kirill Podoprigora, Ken Jin, Itamar Oren, +Brett Simmers, Dino Viehland, Nathan Goldbaum, Ralf Gommers, Lysandros Nikolaou, +Kumar Aditya, Edgar Margffoy, and many others. + +Some of these contributors are employed by Meta, which has continued to provide +significant engineering resources to support this project. + +From 3.14, when compiling extension modules for the free-threaded build of +CPython on Windows, the preprocessor variable ``Py_GIL_DISABLED`` now needs to +be specified by the build backend, as it will no longer be determined +automatically by the C compiler. For a running interpreter, the setting that +was used at compile time can be found using :func:`sysconfig.get_config_var`. + +A new flag has been added, :data:`~sys.flags.context_aware_warnings`. This +flag defaults to true for the free-threaded build and false for the GIL-enabled +build. If the flag is true then the :class:`warnings.catch_warnings` context +manager uses a context variable for warning filters. This makes the context +manager behave predicably when used with multiple threads or asynchronous +tasks. + +A new flag has been added, :data:`~sys.flags.thread_inherit_context`. This flag +defaults to true for the free-threaded build and false for the GIL-enabled +build. If the flag is true then threads created with :class:`threading.Thread` +start with a copy of the :class:`~contextvars.Context()` of the caller of +:meth:`~threading.Thread.start`. Most significantly, this makes the warning +filtering context established by :class:`~warnings.catch_warnings` be +"inherited" by threads (or asyncio tasks) started within that context. It also +affects other modules that use context variables, such as the :mod:`decimal` +context manager. + .. _whatsnew314-pyrepl-highlighting: @@ -822,6 +1045,18 @@ Please report any bugs or major performance regressions that you encounter! .. seealso:: :pep:`744` +Concurrent safe warnings control +-------------------------------- + +The :class:`warnings.catch_warnings` context manager will now optionally +use a context variable for warning filters. This is enabled by setting +the :data:`~sys.flags.context_aware_warnings` flag, either with the ``-X`` +command-line option or an environment variable. This gives predicable +warnings control when using :class:`~warnings.catch_warnings` combined with +multiple threads or asynchronous tasks. The flag defaults to true for the +free-threaded build and false for the GIL-enabled build. + +(Contributed by Neil Schemenauer and Kumar Aditya in :gh:`130010`.) Other language changes ====================== @@ -1001,6 +1236,24 @@ ast (Contributed by Semyon Moroz in :gh:`133367`.) +asyncio +------- + +* The function and methods named :func:`!create_task` now take an arbitrary + list of keyword arguments. All keyword arguments are passed to the + :class:`~asyncio.Task` constructor or the custom task factory. + (See :meth:`~asyncio.loop.set_task_factory` for details.) + The ``name`` and ``context`` keyword arguments are no longer special; + the name should now be set using the ``name`` keyword argument of the factory, + and ``context`` may be ``None``. + + This affects the following function and methods: + :meth:`asyncio.create_task`, + :meth:`asyncio.loop.create_task`, + :meth:`asyncio.TaskGroup.create_task`. + (Contributed by Thomas Grainger in :gh:`128307`.) + + bdb --- @@ -1023,6 +1276,8 @@ calendar concurrent.futures ------------------ +.. _whatsnew314-concurrent-futures-interp-pool: + * Add :class:`~concurrent.futures.InterpreterPoolExecutor`, which exposes "subinterpreters" (multiple Python interpreters in the same process) to Python code. This is separate from the proposed API @@ -1060,6 +1315,14 @@ concurrent.futures buffer. (Contributed by Enzo Bonnal and Josh Rosenberg in :gh:`74028`.) +configparser +------------ + +* Security fix: will no longer write config files it cannot read. Attempting + to :meth:`configparser.ConfigParser.write` keys containing delimiters or + beginning with the section header pattern will raise a + :class:`configparser.InvalidWriteError`. + (Contributed by Jacob Lincoln in :gh:`129270`.) contextvars ----------- @@ -1119,6 +1382,9 @@ ctypes making it a :term:`generic type`. (Contributed by Brian Schubert in :gh:`132168`.) +* :mod:`ctypes` now supports :term:`free-threading builds `. + (Contributed by Kumar Aditya and Peter Bierma in :gh:`127945`.) + curses ------ @@ -1520,6 +1786,16 @@ os (Contributed by Cody Maloney in :gh:`129205`.) +os.path +------- + +* The *strict* parameter to :func:`os.path.realpath` accepts a new value, + :data:`os.path.ALLOW_MISSING`. + If used, errors other than :exc:`FileNotFoundError` will be re-raised; + the resulting path can be missing but it will be free of symlinks. + (Contributed by Petr Viktorin for :cve:`2025-4517`.) + + pathlib ------- @@ -1708,6 +1984,28 @@ sysconfig (Contributed by Xuehai Pan in :gh:`131799`.) +tarfile +------- + +* :func:`~tarfile.data_filter` now normalizes symbolic link targets in order to + avoid path traversal attacks. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2025-4138`.) +* :func:`~tarfile.TarFile.extractall` now skips fixing up directory attributes + when a directory was removed or replaced by another kind of file. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2024-12718`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + now (re-)apply the extraction filter when substituting a link (hard or + symbolic) with a copy of another archive member, and when fixing up + directory attributes. + The former raises a new exception, :exc:`~tarfile.LinkFallbackError`. + (Contributed by Petr Viktorin for :cve:`2025-4330` and :cve:`2024-12718`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + no longer extract rejected members when + :func:`~tarfile.TarFile.errorlevel` is zero. + (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` + and :cve:`2025-4435`.) + + threading --------- @@ -1916,11 +2214,19 @@ Optimizations asyncio ------- -* :mod:`asyncio` now uses double linked list implementation for native tasks - which speeds up execution by 10% on standard pyperformance benchmarks and - reduces memory usage. +* :mod:`asyncio` has a new per-thread double linked list implementation internally for + :class:`native tasks ` which speeds up execution by 10-20% on standard + pyperformance benchmarks and reduces memory usage. + This enables external introspection tools such as + :ref:`python -m asyncio pstree ` + to introspect the call graph of asyncio tasks running in all threads. (Contributed by Kumar Aditya in :gh:`107803`.) +* :mod:`asyncio` has first class support for :term:`free-threading builds `. + This enables parallel execution of multiple event loops across different threads and scales + linearly with the number of threads. + (Contributed by Kumar Aditya in :gh:`128002`.) + * :mod:`asyncio` has new utility functions for introspecting and printing the program's call graph: :func:`asyncio.capture_call_graph` and :func:`asyncio.print_call_graph`. @@ -2002,7 +2308,6 @@ Deprecated * :class:`asyncio.WindowsProactorEventLoopPolicy` * :func:`asyncio.get_event_loop_policy` * :func:`asyncio.set_event_loop_policy` - * :func:`asyncio.set_event_loop` Users should use :func:`asyncio.run` or :class:`asyncio.Runner` with *loop_factory* to use the desired event loop implementation. @@ -2335,7 +2640,9 @@ pty sqlite3 ------- -* Remove :data:`!version` and :data:`!version_info` from :mod:`sqlite3`. +* Remove :data:`!version` and :data:`!version_info` from :mod:`sqlite3`; + use :data:`~sqlite3.sqlite_version` and :data:`~sqlite3.sqlite_version_info` + for the actual version number of the runtime SQLite library. (Contributed by Hugo van Kemenade in :gh:`118924`.) * Disallow using a sequence of parameters with named placeholders. @@ -2410,6 +2717,11 @@ Changes in the Python API See :ref:`above ` for more details. (Contributed by Jelle Zijlstra in :gh:`105499`.) +* The runtime behavior of annotations has changed in various ways; see + :ref:`above ` for details. While most code that interacts + with annotations should continue to work, some undocumented details may behave + differently. + Build changes ============= @@ -2451,6 +2763,7 @@ New features * :c:func:`PyUnicodeWriter_Discard` * :c:func:`PyUnicodeWriter_Finish` * :c:func:`PyUnicodeWriter_Format` + * :c:func:`PyUnicodeWriter_WriteASCII` * :c:func:`PyUnicodeWriter_WriteChar` * :c:func:`PyUnicodeWriter_WriteRepr` * :c:func:`PyUnicodeWriter_WriteStr` @@ -2533,8 +2846,8 @@ New features * Add :c:func:`PyType_GetBaseByToken` and :c:data:`Py_tp_token` slot for easier superclass identification, which attempts to resolve the `type checking issue - `__ mentioned in :pep:`630` - (:gh:`124153`). + `__ mentioned in :pep:`630`. + (Contributed in :gh:`124153`.) * Add :c:func:`PyUnicode_Equal` function to the limited C API: test if two strings are equal. @@ -2727,7 +3040,7 @@ Deprecated :c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) `. * :c:func:`!_PyUnicodeWriter_WriteASCIIString`: replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with - :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. + :c:func:`PyUnicodeWriter_WriteASCII(writer, str) `. * :c:func:`!_PyUnicodeWriter_WriteLatin1String`: replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. @@ -2740,6 +3053,8 @@ Deprecated .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.16.rst + .. include:: ../deprecations/c-api-pending-removal-in-3.18.rst .. include:: ../deprecations/c-api-pending-removal-in-future.rst diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 896e8f4a489649..40d4a27bff9fee 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -423,8 +423,8 @@ digests. It skips MD5 on platforms that block MD5 digest. fcntl ----- -Added constants :const:`~fcntl.F_OFD_GETLK`, :const:`~fcntl.F_OFD_SETLK` -and :const:`~fcntl.F_OFD_SETLKW`. +Added constants :const:`!fcntl.F_OFD_GETLK`, :const:`!fcntl.F_OFD_SETLK` +and :const:`!fcntl.F_OFD_SETLKW`. (Contributed by Donghee Na in :issue:`38602`.) ftplib @@ -644,7 +644,7 @@ attribute. random ------ -Added a new :attr:`random.Random.randbytes` method: generate random bytes. +Added a new :meth:`random.Random.randbytes` method: generate random bytes. (Contributed by Victor Stinner in :issue:`40286`.) signal @@ -776,7 +776,7 @@ Optimizations :pep:`590` vectorcall protocol. (Contributed by Donghee Na, Mark Shannon, Jeroen Demeyer and Petr Viktorin in :issue:`37207`.) -* Optimized :func:`~set.difference_update` for the case when the other set +* Optimized :meth:`!set.difference_update` for the case when the other set is much larger than the base set. (Suggested by Evgeny Kapun with code contributed by Michele Orrù in :issue:`8425`.) diff --git a/Grammar/Tokens b/Grammar/Tokens index e40a4437afb009..0547e6ed08f79a 100644 --- a/Grammar/Tokens +++ b/Grammar/Tokens @@ -1,3 +1,8 @@ +# When adding new tokens, remember to update the PEG generator in +# Tools/peg_generator/pegen/parser_generator.py +# This will ensure that older versions of Python can generate a Python parser +# using "python -m pegen python ". + ENDMARKER NAME NUMBER diff --git a/Grammar/python.gram b/Grammar/python.gram index f3ef990923eec3..8b15cf29fa02e2 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -96,12 +96,12 @@ func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMA statements[asdl_stmt_seq*]: a=statement+ { _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a)) } -statement[asdl_stmt_seq*]: - | a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } +statement[asdl_stmt_seq*]: + | a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | a[asdl_stmt_seq*]=simple_stmts { a } single_compound_stmt[asdl_stmt_seq*]: - | a=compound_stmt { + | a=compound_stmt { _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a)) } statement_newline[asdl_stmt_seq*]: @@ -449,9 +449,9 @@ except_block[excepthandler_ty]: _PyAST_ExceptHandler(e, ((expr_ty) t)->v.Name.id, b, EXTRA) } | 'except' e=expressions ':' b=block { CHECK_VERSION( - excepthandler_ty, - 14, - "except expressions without parentheses are", + excepthandler_ty, + 14, + "except expressions without parentheses are", _PyAST_ExceptHandler(e, NULL, b, EXTRA)) } | 'except' ':' b=block { _PyAST_ExceptHandler(NULL, NULL, b, EXTRA) } | invalid_except_stmt @@ -463,9 +463,9 @@ except_star_block[excepthandler_ty]: _PyAST_ExceptHandler(e, ((expr_ty) t)->v.Name.id, b, EXTRA) } | 'except' '*' e=expressions ':' b=block { CHECK_VERSION( - excepthandler_ty, - 14, - "except expressions without parentheses are", + excepthandler_ty, + 14, + "except expressions without parentheses are", _PyAST_ExceptHandler(e, NULL, b, EXTRA)) } | invalid_except_star_stmt finally_block[asdl_stmt_seq*]: @@ -977,11 +977,11 @@ tstring_middle[expr_ty]: | tstring_replacement_field | t=TSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) } tstring[expr_ty] (memo): - | a=TSTRING_START b=tstring_middle* c=TSTRING_END { + | a=TSTRING_START b=tstring_middle* c=TSTRING_END { CHECK_VERSION( - expr_ty, - 14, - "t-strings are", + expr_ty, + 14, + "t-strings are", _PyPegen_template_str(p, a, (asdl_expr_seq*)b, c)) } string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_string(p, s) } @@ -1383,11 +1383,11 @@ invalid_import: RAISE_SYNTAX_ERROR_STARTING_FROM(token, "Expected one or more names after 'import'") } invalid_dotted_as_name: | dotted_name 'as' !(NAME (',' | ')' | NEWLINE)) a=expression { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot use %s as import target", _PyPegen_get_expr_name(a)) } invalid_import_from_as_name: | NAME 'as' !(NAME (',' | ')' | NEWLINE)) a=expression { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot use %s as import target", _PyPegen_get_expr_name(a)) } invalid_import_from_targets: @@ -1418,7 +1418,7 @@ invalid_except_stmt: RAISE_SYNTAX_ERROR_STARTING_FROM(a, "multiple exception types must be parenthesized when using 'as'") } | a='except' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='except' NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } - | 'except' expression 'as' a=expression { + | 'except' expression 'as' a=expression ':' block { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use except statement with %s", _PyPegen_get_expr_name(a)) } invalid_except_star_stmt: @@ -1426,7 +1426,7 @@ invalid_except_star_stmt: RAISE_SYNTAX_ERROR_STARTING_FROM(a, "multiple exception types must be parenthesized when using 'as'") } | a='except' '*' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='except' '*' (NEWLINE | ':') { RAISE_SYNTAX_ERROR("expected one or more exception types") } - | 'except' '*' expression 'as' a=expression { + | 'except' '*' expression 'as' a=expression ':' block { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use except* statement with %s", _PyPegen_get_expr_name(a)) } invalid_finally_stmt: diff --git a/Include/Python.h b/Include/Python.h index 64be80145890a3..f34d581f0b4c91 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -59,6 +59,14 @@ # include // __readgsqword() #endif +// Suppress known warnings in Python header files. +#if defined(_MSC_VER) +// Warning that alignas behaviour has changed. Doesn't affect us, because we +// never relied on the old behaviour. +#pragma warning(push) +#pragma warning(disable: 5274) +#endif + // Include Python header files #include "pyport.h" #include "pymacro.h" @@ -138,4 +146,9 @@ #include "cpython/pyfpe.h" #include "cpython/tracemalloc.h" +// Restore warning filter +#ifdef _MSC_VER +#pragma warning(pop) +#endif + #endif /* !Py_PYTHON_H */ diff --git a/Include/abstract.h b/Include/abstract.h index b9199fc03a399a..80f3298701d249 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -138,7 +138,12 @@ extern "C" { Delete attribute named attr_name, for object o. Returns -1 on failure. - This is the equivalent of the Python statement: del o.attr_name. */ + This is the equivalent of the Python statement: del o.attr_name. + + Implemented as a macro in the limited C API 3.12 and older. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030d0000 +# define PyObject_DelAttrString(O, A) PyObject_SetAttrString((O), (A), NULL) +#endif /* Implemented elsewhere: @@ -147,7 +152,12 @@ extern "C" { Delete attribute named attr_name, for object o. Returns -1 on failure. This is the equivalent of the Python - statement: del o.attr_name. */ + statement: del o.attr_name. + + Implemented as a macro in the limited C API 3.12 and older. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030d0000 +# define PyObject_DelAttr(O, A) PyObject_SetAttr((O), (A), NULL) +#endif /* Implemented elsewhere: diff --git a/Include/audit.h b/Include/audit.h index 793b7077e1027b..9be54ad4411096 100644 --- a/Include/audit.h +++ b/Include/audit.h @@ -1,5 +1,5 @@ -#ifndef Py_AUDIT_H -#define Py_AUDIT_H +#ifndef _Py_AUDIT_H +#define _Py_AUDIT_H #ifdef __cplusplus extern "C" { #endif @@ -18,13 +18,13 @@ PyAPI_FUNC(int) PySys_AuditTuple( #ifndef Py_LIMITED_API -# define Py_CPYTHON_AUDIT_H +# define _Py_CPYTHON_AUDIT_H # include "cpython/audit.h" -# undef Py_CPYTHON_AUDIT_H +# undef _Py_CPYTHON_AUDIT_H #endif #ifdef __cplusplus } #endif -#endif /* !Py_AUDIT_H */ +#endif /* !_Py_AUDIT_H */ diff --git a/Include/boolobject.h b/Include/boolobject.h index 3037e61bbf6d0c..b56e2baecaa36c 100644 --- a/Include/boolobject.h +++ b/Include/boolobject.h @@ -34,9 +34,16 @@ PyAPI_FUNC(int) Py_IsTrue(PyObject *x); PyAPI_FUNC(int) Py_IsFalse(PyObject *x); #define Py_IsFalse(x) Py_Is((x), Py_False) -/* Macros for returning Py_True or Py_False, respectively */ -#define Py_RETURN_TRUE return Py_True -#define Py_RETURN_FALSE return Py_False +/* Macros for returning Py_True or Py_False, respectively. + * Only treat Py_True and Py_False as immortal in the limited C API 3.12 + * and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_TRUE return Py_NewRef(Py_True) +# define Py_RETURN_FALSE return Py_NewRef(Py_False) +#else +# define Py_RETURN_TRUE return Py_True +# define Py_RETURN_FALSE return Py_False +#endif /* Function to return a bool from a C long */ PyAPI_FUNC(PyObject *) PyBool_FromLong(long); diff --git a/Include/ceval.h b/Include/ceval.h index 32ab38972e548f..e9df8684996e23 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -133,13 +133,6 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); #define FVS_MASK 0x4 #define FVS_HAVE_SPEC 0x4 -/* Special methods used by LOAD_SPECIAL */ -#define SPECIAL___ENTER__ 0 -#define SPECIAL___EXIT__ 1 -#define SPECIAL___AENTER__ 2 -#define SPECIAL___AEXIT__ 3 -#define SPECIAL_MAX 3 - #ifndef Py_LIMITED_API # define Py_CPYTHON_CEVAL_H # include "cpython/ceval.h" diff --git a/Include/cpython/audit.h b/Include/cpython/audit.h index 3c5c7a8c06091d..536f9248632097 100644 --- a/Include/cpython/audit.h +++ b/Include/cpython/audit.h @@ -1,4 +1,4 @@ -#ifndef Py_CPYTHON_AUDIT_H +#ifndef _Py_CPYTHON_AUDIT_H # error "this header file must not be included directly" #endif diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index 18249b95befe65..598cd330bc9ca9 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -97,11 +97,6 @@ static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) { } #define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func)) -static inline PyObject* PyFunction_GET_BUILTINS(PyObject *func) { - return _PyFunction_CAST(func)->func_builtins; -} -#define PyFunction_GET_BUILTINS(func) PyFunction_GET_BUILTINS(_PyObject_CAST(func)) - static inline PyObject* PyFunction_GET_MODULE(PyObject *func) { return _PyFunction_CAST(func)->func_module; } diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 7f1bc363861ddf..be582122118e44 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -28,10 +28,10 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); #define PyTrace_OPCODE 7 /* Remote debugger support */ -#define MAX_SCRIPT_PATH_SIZE 512 -typedef struct _remote_debugger_support { +#define Py_MAX_SCRIPT_PATH_SIZE 512 +typedef struct { int32_t debugger_pending_call; - char debugger_script_path[MAX_SCRIPT_PATH_SIZE]; + char debugger_script_path[Py_MAX_SCRIPT_PATH_SIZE]; } _PyRemoteDebuggerSupport; typedef struct _err_stackitem { @@ -61,6 +61,8 @@ typedef struct _stack_chunk { PyObject * data[1]; /* Variable sized */ } _PyStackChunk; +/* Minimum size of data stack chunk */ +#define _PY_DATA_STACK_CHUNK_SIZE (16*1024) struct _ts { /* See Python/ceval.c for comments explaining most fields */ diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 136f5d5c5f8425..3d0414f5291fe4 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -478,6 +478,10 @@ PyAPI_FUNC(int) PyUnicodeWriter_WriteUTF8( PyUnicodeWriter *writer, const char *str, Py_ssize_t size); +PyAPI_FUNC(int) PyUnicodeWriter_WriteASCII( + PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size); PyAPI_FUNC(int) PyUnicodeWriter_WriteWideChar( PyUnicodeWriter *writer, const wchar_t *str, diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index d97f51b8eefbe5..a7daa3a40a4c0b 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -634,10 +634,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl mi_track_mem_defined(block,sizeof(mi_block_t)); mi_block_t* next; #ifdef MI_ENCODE_FREELIST - next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); + next = (mi_block_t*)mi_ptr_decode(null, mi_atomic_load_relaxed((_Atomic(mi_encoded_t)*)&block->next), keys); #else MI_UNUSED(keys); MI_UNUSED(null); - next = (mi_block_t*)block->next; + next = (mi_block_t*)mi_atomic_load_relaxed((_Atomic(mi_encoded_t)*)&block->next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); return next; @@ -646,10 +646,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { mi_track_mem_undefined(block,sizeof(mi_block_t)); #ifdef MI_ENCODE_FREELIST - block->next = mi_ptr_encode(null, next, keys); + mi_atomic_store_relaxed(&block->next, mi_ptr_encode(null, next, keys)); #else MI_UNUSED(keys); MI_UNUSED(null); - block->next = (mi_encoded_t)next; + mi_atomic_store_relaxed(&block->next, (mi_encoded_t)next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); } diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index 354839ba955b36..70c600e920bad1 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -235,7 +235,7 @@ typedef size_t mi_threadid_t; // free lists contain blocks typedef struct mi_block_s { - mi_encoded_t next; + _Atomic(mi_encoded_t) next; } mi_block_t; @@ -678,7 +678,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); // Thread Local data // ------------------------------------------------------ -// A "span" is is an available range of slices. The span queues keep +// A "span" is an available range of slices. The span queues keep // track of slice spans of at most the given `slice_count` (but more than the previous size class). typedef struct mi_span_queue_s { mi_slice_t* first; diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 300e7f4896a39e..8ea9b3ebb88454 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -20,8 +20,9 @@ extern PyObject* _PyBytes_FromHex( // Helper for PyBytes_DecodeEscape that detects invalid escape chars. // Export for test_peg_generator. -PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape(const char *, Py_ssize_t, - const char *, const char **); +PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, + const char *, + int *, const char **); // Substring Search. diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 3d8247df31ce32..528154c93348d2 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -239,6 +239,16 @@ static inline void _Py_LeaveRecursiveCall(void) { extern _PyInterpreterFrame* _PyEval_GetFrame(void); +extern PyObject * _PyEval_GetGlobalsFromRunningMain(PyThreadState *); +extern int _PyEval_EnsureBuiltins( + PyThreadState *, + PyObject *, + PyObject **p_builtins); +extern int _PyEval_EnsureBuiltinsWithModule( + PyThreadState *, + PyObject *, + PyObject **p_builtins); + PyAPI_FUNC(PyObject *)_Py_MakeCoro(PyFunctionObject *func); /* Handle signals, pending calls, GIL drop request @@ -353,6 +363,13 @@ PyAPI_FUNC(_PyStackRef) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyS extern int _PyRunRemoteDebugger(PyThreadState *tstate); #endif +/* Special methods used by LOAD_SPECIAL */ +#define SPECIAL___ENTER__ 0 +#define SPECIAL___EXIT__ 1 +#define SPECIAL___AENTER__ 2 +#define SPECIAL___AEXIT__ 3 +#define SPECIAL_MAX 3 + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index b135e30b7ad62a..37a747aa4e3e46 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -614,6 +614,47 @@ PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts( PyObject *globalsns, PyObject *builtinsns); + +/* "Stateless" code is a function or code object which does not rely on + * external state or internal state. It may rely on arguments and + * builtins, but not globals or a closure. Thus it does not rely + * on __globals__ or __closure__, and a stateless function + * is equivalent to its code object. + * + * Stateless code also does not keep any persistent state + * of its own, so it can't have any executors, monitoring, + * instrumentation, or "extras" (i.e. co_extra). + * + * Stateless code may create nested functions, including closures. + * However, nested functions must themselves be stateless, except they + * *can* close on the enclosing locals. + * + * Stateless code may return any value, including nested functions and closures. + * + * Stateless code that takes no arguments and doesn't return anything + * may be treated like a script. + * + * We consider stateless code to be "portable" if it does not return + * any object that holds a reference to any of the code's locals. Thus + * generators and coroutines are not portable. Likewise a function + * that returns a closure is not portable. The concept of + * portability is useful in cases where the code is run + * in a different execution context than where + * the return value will be used. */ + +PyAPI_FUNC(int) _PyCode_CheckNoInternalState(PyCodeObject *, const char **); +PyAPI_FUNC(int) _PyCode_CheckNoExternalState( + PyCodeObject *, + _PyCode_var_counts_t *, + const char **); +PyAPI_FUNC(int) _PyCode_VerifyStateless( + PyThreadState *, + PyCodeObject *, + PyObject *globalnames, + PyObject *globalsns, + PyObject *builtinsns); + +PyAPI_FUNC(int) _PyCode_CheckPureFunction(PyCodeObject *, const char **); PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *); diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index 9de61ef54125d5..81faffac194171 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -131,7 +131,23 @@ PyAPI_FUNC(void) _PyXIData_Clear(PyInterpreterState *, _PyXIData_t *); /* getting cross-interpreter data */ -typedef int (*xidatafunc)(PyThreadState *tstate, PyObject *, _PyXIData_t *); +typedef int xidata_fallback_t; +#define _PyXIDATA_XIDATA_ONLY (0) +#define _PyXIDATA_FULL_FALLBACK (1) + +// Technically, we don't need two different function types; +// we could go with just the fallback one. However, only container +// types like tuple need it, so always having the extra arg would be +// a bit unfortunate. It's also nice to be able to clearly distinguish +// between types that might call _PyObject_GetXIData() and those that won't. +// +typedef int (*xidatafunc)(PyThreadState *, PyObject *, _PyXIData_t *); +typedef int (*xidatafbfunc)( + PyThreadState *, PyObject *, xidata_fallback_t, _PyXIData_t *); +typedef struct { + xidatafunc basic; + xidatafbfunc fallback; +} _PyXIData_getdata_t; PyAPI_FUNC(PyObject *) _PyXIData_GetNotShareableErrorType(PyThreadState *); PyAPI_FUNC(void) _PyXIData_SetNotShareableError(PyThreadState *, const char *); @@ -140,16 +156,21 @@ PyAPI_FUNC(void) _PyXIData_FormatNotShareableError( const char *, ...); -PyAPI_FUNC(xidatafunc) _PyXIData_Lookup( +PyAPI_FUNC(_PyXIData_getdata_t) _PyXIData_Lookup( PyThreadState *, PyObject *); PyAPI_FUNC(int) _PyObject_CheckXIData( PyThreadState *, PyObject *); +PyAPI_FUNC(int) _PyObject_GetXIDataNoFallback( + PyThreadState *, + PyObject *, + _PyXIData_t *); PyAPI_FUNC(int) _PyObject_GetXIData( PyThreadState *, PyObject *, + xidata_fallback_t, _PyXIData_t *); // _PyObject_GetXIData() for bytes @@ -191,6 +212,21 @@ PyAPI_FUNC(int) _PyCode_GetXIData( PyThreadState *, PyObject *, _PyXIData_t *); +PyAPI_FUNC(int) _PyCode_GetScriptXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); +PyAPI_FUNC(int) _PyCode_GetPureScriptXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); + +// _PyObject_GetXIData() for functions +PyAPI_FUNC(PyObject *) _PyFunction_FromXIData(_PyXIData_t *); +PyAPI_FUNC(int) _PyFunction_GetXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); /* using cross-interpreter data */ @@ -267,10 +303,10 @@ typedef struct _excinfo { const char *errdisplay; } _PyXI_excinfo; -PyAPI_FUNC(int) _PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc); +PyAPI_FUNC(_PyXI_excinfo *) _PyXI_NewExcInfo(PyObject *exc); +PyAPI_FUNC(void) _PyXI_FreeExcInfo(_PyXI_excinfo *info); PyAPI_FUNC(PyObject *) _PyXI_FormatExcInfo(_PyXI_excinfo *info); PyAPI_FUNC(PyObject *) _PyXI_ExcInfoAsObject(_PyXI_excinfo *info); -PyAPI_FUNC(void) _PyXI_ClearExcInfo(_PyXI_excinfo *info); typedef enum error_code { @@ -281,42 +317,30 @@ typedef enum error_code { _PyXI_ERR_ALREADY_RUNNING = -4, _PyXI_ERR_MAIN_NS_FAILURE = -5, _PyXI_ERR_APPLY_NS_FAILURE = -6, - _PyXI_ERR_NOT_SHAREABLE = -7, + _PyXI_ERR_PRESERVE_FAILURE = -7, + _PyXI_ERR_EXC_PROPAGATION_FAILURE = -8, + _PyXI_ERR_NOT_SHAREABLE = -9, } _PyXI_errcode; +typedef struct xi_failure _PyXI_failure; -typedef struct _sharedexception { - // The originating interpreter. - PyInterpreterState *interp; - // The kind of error to propagate. - _PyXI_errcode code; - // The exception information to propagate, if applicable. - // This is populated only for some error codes, - // but always for _PyXI_ERR_UNCAUGHT_EXCEPTION. - _PyXI_excinfo uncaught; -} _PyXI_error; - -PyAPI_FUNC(PyObject *) _PyXI_ApplyError(_PyXI_error *err); - - -typedef struct xi_session _PyXI_session; -typedef struct _sharedns _PyXI_namespace; +PyAPI_FUNC(_PyXI_failure *) _PyXI_NewFailure(void); +PyAPI_FUNC(void) _PyXI_FreeFailure(_PyXI_failure *); +PyAPI_FUNC(_PyXI_errcode) _PyXI_GetFailureCode(_PyXI_failure *); +PyAPI_FUNC(int) _PyXI_InitFailure(_PyXI_failure *, _PyXI_errcode, PyObject *); +PyAPI_FUNC(void) _PyXI_InitFailureUTF8( + _PyXI_failure *, + _PyXI_errcode, + const char *); -PyAPI_FUNC(void) _PyXI_FreeNamespace(_PyXI_namespace *ns); -PyAPI_FUNC(_PyXI_namespace *) _PyXI_NamespaceFromNames(PyObject *names); -PyAPI_FUNC(int) _PyXI_FillNamespaceFromDict( - _PyXI_namespace *ns, - PyObject *nsobj, - _PyXI_session *session); -PyAPI_FUNC(int) _PyXI_ApplyNamespace( - _PyXI_namespace *ns, - PyObject *nsobj, - PyObject *dflt); +PyAPI_FUNC(int) _PyXI_UnwrapNotShareableError( + PyThreadState *, + _PyXI_failure *); // A cross-interpreter session involves entering an interpreter -// (_PyXI_Enter()), doing some work with it, and finally exiting -// that interpreter (_PyXI_Exit()). +// with _PyXI_Enter(), doing some work with it, and finally exiting +// that interpreter with _PyXI_Exit(). // // At the boundaries of the session, both entering and exiting, // data may be exchanged between the previous interpreter and the @@ -324,48 +348,40 @@ PyAPI_FUNC(int) _PyXI_ApplyNamespace( // isolation between interpreters. This includes setting objects // in the target's __main__ module on the way in, and capturing // uncaught exceptions on the way out. -struct xi_session { - // Once a session has been entered, this is the tstate that was - // current before the session. If it is different from cur_tstate - // then we must have switched interpreters. Either way, this will - // be the current tstate once we exit the session. - PyThreadState *prev_tstate; - // Once a session has been entered, this is the current tstate. - // It must be current when the session exits. - PyThreadState *init_tstate; - // This is true if init_tstate needs cleanup during exit. - int own_init_tstate; - - // This is true if, while entering the session, init_thread took - // "ownership" of the interpreter's __main__ module. This means - // it is the only thread that is allowed to run code there. - // (Caveat: for now, users may still run exec() against the - // __main__ module's dict, though that isn't advisable.) - int running; - // This is a cached reference to the __dict__ of the entered - // interpreter's __main__ module. It is looked up when at the - // beginning of the session as a convenience. - PyObject *main_ns; - - // This is set if the interpreter is entered and raised an exception - // that needs to be handled in some special way during exit. - _PyXI_errcode *error_override; - // This is set if exit captured an exception to propagate. - _PyXI_error *error; - - // -- pre-allocated memory -- - _PyXI_error _error; - _PyXI_errcode _error_override; -}; +typedef struct xi_session _PyXI_session; + +PyAPI_FUNC(_PyXI_session *) _PyXI_NewSession(void); +PyAPI_FUNC(void) _PyXI_FreeSession(_PyXI_session *); + +typedef struct { + PyObject *preserved; + PyObject *excinfo; + _PyXI_errcode errcode; +} _PyXI_session_result; +PyAPI_FUNC(void) _PyXI_ClearResult(_PyXI_session_result *); PyAPI_FUNC(int) _PyXI_Enter( _PyXI_session *session, PyInterpreterState *interp, - PyObject *nsupdates); -PyAPI_FUNC(void) _PyXI_Exit(_PyXI_session *session); - -PyAPI_FUNC(PyObject *) _PyXI_ApplyCapturedException(_PyXI_session *session); -PyAPI_FUNC(int) _PyXI_HasCapturedException(_PyXI_session *session); + PyObject *nsupdates, + _PyXI_session_result *); +PyAPI_FUNC(int) _PyXI_Exit( + _PyXI_session *, + _PyXI_failure *, + _PyXI_session_result *); + +PyAPI_FUNC(PyObject *) _PyXI_GetMainNamespace( + _PyXI_session *, + _PyXI_failure *); + +PyAPI_FUNC(int) _PyXI_Preserve( + _PyXI_session *, + const char *, + PyObject *, + _PyXI_failure *); +PyAPI_FUNC(PyObject *) _PyXI_GetPreserved( + _PyXI_session_result *, + const char *); /*************/ diff --git a/Include/internal/pycore_crossinterp_data_registry.h b/Include/internal/pycore_crossinterp_data_registry.h index 8f4bcb948e5a45..fbb4cad5cac32e 100644 --- a/Include/internal/pycore_crossinterp_data_registry.h +++ b/Include/internal/pycore_crossinterp_data_registry.h @@ -17,7 +17,7 @@ typedef struct _xid_regitem { /* This is NULL for builtin types. */ PyObject *weakref; size_t refcount; - xidatafunc getdata; + _PyXIData_getdata_t getdata; } _PyXIData_regitem_t; typedef struct { @@ -30,7 +30,7 @@ typedef struct { PyAPI_FUNC(int) _PyXIData_RegisterClass( PyThreadState *, PyTypeObject *, - xidatafunc); + _PyXIData_getdata_t); PyAPI_FUNC(int) _PyXIData_UnregisterClass( PyThreadState *, PyTypeObject *); diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 59d2c9d5377953..1b59fa2ef60014 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -52,9 +52,15 @@ extern "C" { #ifdef Py_GIL_DISABLED # define _Py_Debug_gilruntimestate_enabled offsetof(struct _gil_runtime_state, enabled) # define _Py_Debug_Free_Threaded 1 +# define _Py_Debug_code_object_co_tlbc offsetof(PyCodeObject, co_tlbc) +# define _Py_Debug_interpreter_frame_tlbc_index offsetof(_PyInterpreterFrame, tlbc_index) +# define _Py_Debug_interpreter_state_tlbc_generation offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) #else # define _Py_Debug_gilruntimestate_enabled 0 # define _Py_Debug_Free_Threaded 0 +# define _Py_Debug_code_object_co_tlbc 0 +# define _Py_Debug_interpreter_frame_tlbc_index 0 +# define _Py_Debug_interpreter_state_tlbc_generation 0 #endif @@ -85,6 +91,8 @@ typedef struct _Py_DebugOffsets { uint64_t gil_runtime_state_enabled; uint64_t gil_runtime_state_locked; uint64_t gil_runtime_state_holder; + uint64_t code_object_generation; + uint64_t tlbc_generation; } interpreter_state; // Thread state offset; @@ -109,6 +117,7 @@ typedef struct _Py_DebugOffsets { uint64_t localsplus; uint64_t owner; uint64_t stackpointer; + uint64_t tlbc_index; } interpreter_frame; // Code object offset; @@ -123,6 +132,7 @@ typedef struct _Py_DebugOffsets { uint64_t localsplusnames; uint64_t localspluskinds; uint64_t co_code_adaptive; + uint64_t co_tlbc; } code_object; // PyObject offset; @@ -210,6 +220,11 @@ typedef struct _Py_DebugOffsets { uint64_t gi_frame_state; } gen_object; + struct _llist_node { + uint64_t next; + uint64_t prev; + } llist_node; + struct _debugger_support { uint64_t eval_breaker; uint64_t remote_debugger_support; @@ -245,6 +260,8 @@ typedef struct _Py_DebugOffsets { .gil_runtime_state_enabled = _Py_Debug_gilruntimestate_enabled, \ .gil_runtime_state_locked = offsetof(PyInterpreterState, _gil.locked), \ .gil_runtime_state_holder = offsetof(PyInterpreterState, _gil.last_holder), \ + .code_object_generation = offsetof(PyInterpreterState, _code_object_generation), \ + .tlbc_generation = _Py_Debug_interpreter_state_tlbc_generation, \ }, \ .thread_state = { \ .size = sizeof(PyThreadState), \ @@ -265,6 +282,7 @@ typedef struct _Py_DebugOffsets { .localsplus = offsetof(_PyInterpreterFrame, localsplus), \ .owner = offsetof(_PyInterpreterFrame, owner), \ .stackpointer = offsetof(_PyInterpreterFrame, stackpointer), \ + .tlbc_index = _Py_Debug_interpreter_frame_tlbc_index, \ }, \ .code_object = { \ .size = sizeof(PyCodeObject), \ @@ -277,6 +295,7 @@ typedef struct _Py_DebugOffsets { .localsplusnames = offsetof(PyCodeObject, co_localsplusnames), \ .localspluskinds = offsetof(PyCodeObject, co_localspluskinds), \ .co_code_adaptive = offsetof(PyCodeObject, co_code_adaptive), \ + .co_tlbc = _Py_Debug_code_object_co_tlbc, \ }, \ .pyobject = { \ .size = sizeof(PyObject), \ @@ -339,13 +358,17 @@ typedef struct _Py_DebugOffsets { .gi_iframe = offsetof(PyGenObject, gi_iframe), \ .gi_frame_state = offsetof(PyGenObject, gi_frame_state), \ }, \ + .llist_node = { \ + .next = offsetof(struct llist_node, next), \ + .prev = offsetof(struct llist_node, prev), \ + }, \ .debugger_support = { \ .eval_breaker = offsetof(PyThreadState, eval_breaker), \ .remote_debugger_support = offsetof(PyThreadState, remote_debugger_support), \ .remote_debugging_enabled = offsetof(PyInterpreterState, config.remote_debug), \ .debugger_pending_call = offsetof(_PyRemoteDebuggerSupport, debugger_pending_call), \ .debugger_script_path = offsetof(_PyRemoteDebuggerSupport, debugger_script_path), \ - .debugger_script_path_size = MAX_SCRIPT_PATH_SIZE, \ + .debugger_script_path_size = Py_MAX_SCRIPT_PATH_SIZE, \ }, \ } diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 754eb88a85c33c..25bb224921aa8b 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -150,6 +150,8 @@ extern int _PyDict_Pop_KnownHash( Py_hash_t hash, PyObject **result); +extern void _PyDict_Clear_LockHeld(PyObject *op); + #ifdef Py_GIL_DISABLED PyAPI_FUNC(void) _PyDict_EnsureSharedOnRead(PyDictObject *mp); #endif diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index 209252b2ddc0de..6e1209659565a3 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -35,6 +35,18 @@ PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_cod extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); + +/* See pycore_code.h for explanation about what "stateless" means. */ + +PyAPI_FUNC(int) +_PyFunction_VerifyStateless(PyThreadState *, PyObject *); + +static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) { + return _PyFunction_CAST(func)->func_builtins; +} +#define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func)) + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 9bde4faaf5a040..5997858378cfe0 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -796,6 +796,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(align)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all_threads)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(any)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(append)); @@ -893,6 +894,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(data)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(database)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(day)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(debug)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decode)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(default)); @@ -904,7 +906,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(deterministic)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(device)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dict)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dict_content)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dictcomp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(digest)); @@ -1139,6 +1140,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(offset_src)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(on_type_read)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(onceregistry)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(only_active_thread)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(only_keys)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(oparg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opcode)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 3a83fd6b6042e2..bab5da70f3400c 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -287,6 +287,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(alias) STRUCT_FOR_ID(align) STRUCT_FOR_ID(all) + STRUCT_FOR_ID(all_threads) STRUCT_FOR_ID(allow_code) STRUCT_FOR_ID(any) STRUCT_FOR_ID(append) @@ -384,6 +385,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(data) STRUCT_FOR_ID(database) STRUCT_FOR_ID(day) + STRUCT_FOR_ID(debug) STRUCT_FOR_ID(decode) STRUCT_FOR_ID(decoder) STRUCT_FOR_ID(default) @@ -395,7 +397,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(deterministic) STRUCT_FOR_ID(device) STRUCT_FOR_ID(dict) - STRUCT_FOR_ID(dict_content) STRUCT_FOR_ID(dictcomp) STRUCT_FOR_ID(difference_update) STRUCT_FOR_ID(digest) @@ -630,6 +631,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(offset_src) STRUCT_FOR_ID(on_type_read) STRUCT_FOR_ID(onceregistry) + STRUCT_FOR_ID(only_active_thread) STRUCT_FOR_ID(only_keys) STRUCT_FOR_ID(oparg) STRUCT_FOR_ID(opcode) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index c3e6c77405bfe7..8a29c533b99058 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -726,6 +726,10 @@ typedef struct _PyIndexPool { // Next index to allocate if no free indices are available int32_t next_index; + + // Generation counter incremented on thread creation/destruction + // Used for TLBC cache invalidation in remote debugging + uint32_t tlbc_generation; } _PyIndexPool; typedef union _Py_unique_id_entry { @@ -843,6 +847,8 @@ struct _is { /* The per-interpreter GIL, which might not be used. */ struct _gil_runtime_state _gil; + uint64_t _code_object_generation; + /* ---------- IMPORTANT --------------------------- The fields above this line are declared as early as possible to facilitate out-of-process observability diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ed6c435316708e..3196d1b82084b9 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -158,6 +158,11 @@ PyAPI_FUNC(int) _PyLong_UnsignedLongLong_Converter(PyObject *, void *); // Export for '_testclinic' shared extension (Argument Clinic code) PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt8_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt16_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt32_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt64_Converter(PyObject *, void *); + /* Long value tag bits: * 0-1: Sign bits value = (1-sign), ie. negative=2, positive=0, zero=1. * 2: Set to 1 for the small ints diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index 1cc897c5a69469..95754025a4aa05 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -277,6 +277,7 @@ Known values: Python 3.14a7 3622 (Store annotations in different class dict keys) Python 3.14a7 3623 (Add BUILD_INTERPOLATION & BUILD_TEMPLATE opcodes) Python 3.14b1 3624 (Don't optimize LOAD_FAST when local is killed by DELETE_FAST) + Python 3.14b3 3625 (Fix handling of opcodes that may leave operands on the stack when optimizing LOAD_FAST) Python 3.15 will start with 3650 @@ -289,7 +290,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3624 +#define PYC_MAGIC_NUMBER 3625 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index b7e162c8abcabf..12c5614834f565 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -614,7 +614,7 @@ static inline PyObject * _Py_XGetRef(PyObject **ptr) { for (;;) { - PyObject *value = _Py_atomic_load_ptr(ptr); + PyObject *value = _PyObject_CAST(_Py_atomic_load_ptr(ptr)); if (value == NULL) { return value; } @@ -629,7 +629,7 @@ _Py_XGetRef(PyObject **ptr) static inline PyObject * _Py_TryXGetRef(PyObject **ptr) { - PyObject *value = _Py_atomic_load_ptr(ptr); + PyObject *value = _PyObject_CAST(_Py_atomic_load_ptr(ptr)); if (value == NULL) { return value; } @@ -1007,6 +1007,22 @@ enum _PyAnnotateFormat { _Py_ANNOTATE_FORMAT_STRING = 4, }; +int _PyObject_SetDict(PyObject *obj, PyObject *value); + +#ifndef Py_GIL_DISABLED +static inline Py_ALWAYS_INLINE void _Py_INCREF_MORTAL(PyObject *op) +{ + assert(!_Py_IsStaticImmortal(op)); + op->ob_refcnt++; + _Py_INCREF_STAT_INC(); +#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) + if (!_Py_IsImmortal(op)) { + _Py_INCREF_IncRefTotal(); + } +#endif +} +#endif + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 0e34074f1600a7..8d92e29c435056 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1787,6 +1787,35 @@ const uint8_t _PyOpcode_Caches[256] = { extern const uint8_t _PyOpcode_Deopt[256]; #ifdef NEED_OPCODE_METADATA const uint8_t _PyOpcode_Deopt[256] = { + [121] = 121, + [122] = 122, + [123] = 123, + [124] = 124, + [125] = 125, + [126] = 126, + [127] = 127, + [212] = 212, + [213] = 213, + [214] = 214, + [215] = 215, + [216] = 216, + [217] = 217, + [218] = 218, + [219] = 219, + [220] = 220, + [221] = 221, + [222] = 222, + [223] = 223, + [224] = 224, + [225] = 225, + [226] = 226, + [227] = 227, + [228] = 228, + [229] = 229, + [230] = 230, + [231] = 231, + [232] = 232, + [233] = 233, [BINARY_OP] = BINARY_OP, [BINARY_OP_ADD_FLOAT] = BINARY_OP, [BINARY_OP_ADD_INT] = BINARY_OP, diff --git a/Include/internal/pycore_opcode_utils.h b/Include/internal/pycore_opcode_utils.h index 62af06dc01c125..79a1a242556a52 100644 --- a/Include/internal/pycore_opcode_utils.h +++ b/Include/internal/pycore_opcode_utils.h @@ -56,6 +56,8 @@ extern "C" { #define IS_RETURN_OPCODE(opcode) \ (opcode == RETURN_VALUE) +#define IS_RAISE_OPCODE(opcode) \ + (opcode == RAISE_VARARGS || opcode == RERAISE) /* Flags used in the oparg for MAKE_FUNCTION */ diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index f357b88e220e6e..2c2048f7e1272a 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -94,13 +94,13 @@ extern void _PyErr_Fetch( PyObject **value, PyObject **traceback); -extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate); +PyAPI_FUNC(PyObject*) _PyErr_GetRaisedException(PyThreadState *tstate); PyAPI_FUNC(int) _PyErr_ExceptionMatches( PyThreadState *tstate, PyObject *exc); -extern void _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); +PyAPI_FUNC(void) _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); extern void _PyErr_Restore( PyThreadState *tstate, diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 02537bdfef8598..3e12084b82ab26 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -88,7 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str); extern int _PyMem_DebugEnabled(void); // Enqueue a pointer to be freed possibly after some delay. -extern void _PyMem_FreeDelayed(void *ptr); +extern void _PyMem_FreeDelayed(void *ptr, size_t size); // Enqueue an object to be freed possibly after some delay #ifdef Py_GIL_DISABLED diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 633e5cf77db918..ea3dfbd2eef9c1 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -8,6 +8,7 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_pythonrun.h" // _PyOS_STACK_MARGIN_SHIFT #include "pycore_typedefs.h" // _PyRuntimeState #include "pycore_tstate.h" @@ -325,7 +326,7 @@ _Py_RecursionLimit_GetMargin(PyThreadState *tstate) _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; assert(_tstate->c_stack_hard_limit != 0); intptr_t here_addr = _Py_get_machine_stack_pointer(); - return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, PYOS_STACK_MARGIN_SHIFT); + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, _PyOS_STACK_MARGIN_SHIFT); } #ifdef __cplusplus diff --git a/Include/internal/pycore_pythonrun.h b/Include/internal/pycore_pythonrun.h index 0bfc5704dc4c59..c2832098ddb3e7 100644 --- a/Include/internal/pycore_pythonrun.h +++ b/Include/internal/pycore_pythonrun.h @@ -25,6 +25,7 @@ extern int _PyRun_InteractiveLoopObject( PyObject *filename, PyCompilerFlags *flags); +extern int _PyObject_SupportedAsScript(PyObject *); extern const char* _Py_SourceAsString( PyObject *cmd, const char *funcname, @@ -32,6 +33,28 @@ extern const char* _Py_SourceAsString( PyCompilerFlags *cf, PyObject **cmd_copy); + +/* Stack size, in "pointers". This must be large enough, so + * no two calls to check recursion depth are more than this far + * apart. In practice, that means it must be larger than the C + * stack consumption of PyEval_EvalDefault */ +#if defined(_Py_ADDRESS_SANITIZER) || defined(_Py_THREAD_SANITIZER) +# define _PyOS_LOG2_STACK_MARGIN 12 +#elif defined(Py_DEBUG) && defined(WIN32) +# define _PyOS_LOG2_STACK_MARGIN 12 +#else +# define _PyOS_LOG2_STACK_MARGIN 11 +#endif +#define _PyOS_STACK_MARGIN (1 << _PyOS_LOG2_STACK_MARGIN) +#define _PyOS_STACK_MARGIN_BYTES (_PyOS_STACK_MARGIN * sizeof(void *)) + +#if SIZEOF_VOID_P == 8 +# define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 3) +#else +# define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 2) +#endif + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index b835c3abaf5d0b..1f9b3fcf777493 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -48,8 +48,21 @@ struct _qsbr_thread_state { // Thread state (or NULL) PyThreadState *tstate; - // Used to defer advancing write sequence a fixed number of times - int deferrals; + // Number of held items added by this thread since the last write sequence + // advance + int deferred_count; + + // Estimate for the amount of memory that is held by this thread since + // the last write sequence advance + size_t deferred_memory; + + // Amount of memory in mimalloc pages deferred from collection. When + // deferred, they are prevented from being used for a different size class + // and in a different thread. + size_t deferred_page_memory; + + // True if the deferred memory frees should be processed. + bool should_process; // Is this thread state allocated? bool allocated; @@ -109,11 +122,17 @@ _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal) extern uint64_t _Py_qsbr_advance(struct _qsbr_shared *shared); -// Batches requests to advance the write sequence. This advances the write -// sequence every N calls, which reduces overhead but increases time to -// reclamation. Returns the new goal. +// Return the next value for the write sequence (current plus the increment). extern uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr); +_Py_qsbr_shared_next(struct _qsbr_shared *shared); + +// Return true if deferred memory frees held by QSBR should be processed to +// determine if they can be safely freed. +static inline bool +_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr) +{ + return qsbr->should_process; +} // Have the read sequences advanced to the given goal? If this returns true, // it safe to reclaim any memory tagged with the goal (or earlier goal). diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4a34ffa559e124..e642a42bbfc3e4 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -794,6 +794,7 @@ extern "C" { INIT_ID(alias), \ INIT_ID(align), \ INIT_ID(all), \ + INIT_ID(all_threads), \ INIT_ID(allow_code), \ INIT_ID(any), \ INIT_ID(append), \ @@ -891,6 +892,7 @@ extern "C" { INIT_ID(data), \ INIT_ID(database), \ INIT_ID(day), \ + INIT_ID(debug), \ INIT_ID(decode), \ INIT_ID(decoder), \ INIT_ID(default), \ @@ -902,7 +904,6 @@ extern "C" { INIT_ID(deterministic), \ INIT_ID(device), \ INIT_ID(dict), \ - INIT_ID(dict_content), \ INIT_ID(dictcomp), \ INIT_ID(difference_update), \ INIT_ID(digest), \ @@ -1137,6 +1138,7 @@ extern "C" { INIT_ID(offset_src), \ INIT_ID(on_type_read), \ INIT_ID(onceregistry), \ + INIT_ID(only_active_thread), \ INIT_ID(only_keys), \ INIT_ID(oparg), \ INIT_ID(opcode), \ diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index a2acf311ff4c65..0ce759fc743d82 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -239,6 +239,7 @@ PyStackRef_IsNullOrInt(_PyStackRef ref); #else #define Py_INT_TAG 3 +#define Py_TAG_REFCNT 1 static inline bool PyStackRef_IsTaggedInt(_PyStackRef i) @@ -264,7 +265,7 @@ PyStackRef_UntagInt(_PyStackRef i) #ifdef Py_GIL_DISABLED -#define Py_TAG_DEFERRED (1) +#define Py_TAG_DEFERRED Py_TAG_REFCNT #define Py_TAG_PTR ((uintptr_t)0) #define Py_TAG_BITS ((uintptr_t)1) @@ -442,14 +443,13 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) /* References to immortal objects always have their tag bit set to Py_TAG_REFCNT * as they can (must) have their reclamation deferred */ -#define Py_TAG_BITS 1 -#define Py_TAG_REFCNT 1 +#define Py_TAG_BITS 3 #if _Py_IMMORTAL_FLAGS != Py_TAG_REFCNT # error "_Py_IMMORTAL_FLAGS != Py_TAG_REFCNT" #endif #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) -#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) +#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_REFCNT))) #define PyStackRef_NULL_BITS Py_TAG_REFCNT static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; @@ -529,7 +529,7 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); #if SIZEOF_VOID_P > 4 - unsigned int tag = obj->ob_flags & Py_TAG_BITS; + unsigned int tag = obj->ob_flags & Py_TAG_REFCNT; #else unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; #endif @@ -548,12 +548,6 @@ PyStackRef_FromPyObjectStealMortal(PyObject *obj) return ref; } -// Check if a stackref is exactly the same as another stackref, including the -// the deferred bit. This can only be used safely if you know that the deferred -// bits of `a` and `b` match. -#define PyStackRef_IsExactly(a, b) \ - (assert(((a).bits & Py_TAG_BITS) == ((b).bits & Py_TAG_BITS)), (a).bits == (b).bits) - static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj) { @@ -561,7 +555,7 @@ _PyStackRef_FromPyObjectNew(PyObject *obj) if (_Py_IsImmortal(obj)) { return (_PyStackRef){ .bits = ((uintptr_t)obj) | Py_TAG_REFCNT}; } - Py_INCREF_MORTAL(obj); + _Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; PyStackRef_CheckValid(ref); return ref; @@ -572,7 +566,7 @@ static inline _PyStackRef _PyStackRef_FromPyObjectNewMortal(PyObject *obj) { assert(obj != NULL); - Py_INCREF_MORTAL(obj); + _Py_INCREF_MORTAL(obj); _PyStackRef ref = (_PyStackRef){ .bits = (uintptr_t)obj }; PyStackRef_CheckValid(ref); return ref; @@ -590,14 +584,14 @@ PyStackRef_FromPyObjectImmortal(PyObject *obj) /* WARNING: This macro evaluates its argument more than once */ #ifdef _WIN32 #define PyStackRef_DUP(REF) \ - (PyStackRef_RefcountOnObject(REF) ? (Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) + (PyStackRef_RefcountOnObject(REF) ? (_Py_INCREF_MORTAL(BITS_TO_PTR(REF)), (REF)) : (REF)) #else static inline _PyStackRef PyStackRef_DUP(_PyStackRef ref) { assert(!PyStackRef_IsNull(ref)); if (PyStackRef_RefcountOnObject(ref)) { - Py_INCREF_MORTAL(BITS_TO_PTR(ref)); + _Py_INCREF_MORTAL(BITS_TO_PTR(ref)); } return ref; } @@ -606,7 +600,7 @@ PyStackRef_DUP(_PyStackRef ref) static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return (ref.bits & Py_TAG_BITS) == 0 || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); + return (ref.bits & Py_TAG_BITS) != Py_TAG_REFCNT || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); } static inline _PyStackRef @@ -681,7 +675,7 @@ PyStackRef_XCLOSE(_PyStackRef ref) // Note: this is a macro because MSVC (Windows) has trouble inlining it. -#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS))) +#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_REFCNT)) == ((b).bits & (~Py_TAG_REFCNT))) #endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) @@ -771,7 +765,7 @@ _Py_TryIncrefCompareStackRef(PyObject **src, PyObject *op, _PyStackRef *out) static inline int _Py_TryXGetStackRef(PyObject **src, _PyStackRef *out) { - PyObject *op = _Py_atomic_load_ptr_relaxed(src); + PyObject *op = _PyObject_CAST(_Py_atomic_load_ptr_relaxed(src)); if (op == NULL) { *out = PyStackRef_NULL; return 1; diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index c85d53b89accdb..3791b913c17546 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -139,14 +139,18 @@ extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful( // Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape // chars. // Export for test_peg_generator. -PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2( const char *string, /* Unicode-Escape encoded string */ Py_ssize_t length, /* size of string */ const char *errors, /* error handling */ Py_ssize_t *consumed, /* bytes consumed */ - const char **first_invalid_escape); /* on return, points to first - invalid escaped char in - string. */ + int *first_invalid_escape_char, /* on return, if not -1, contain the first + invalid escaped char (<= 0xff) or invalid + octal escape (> 0xff) in string. */ + const char **first_invalid_escape_ptr); /* on return, if not NULL, may + point to the first invalid escaped + char in string. + May be NULL if errors is not NULL. */ /* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index fefacef77c89ee..c3ecdf64c6ee00 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -936,6 +936,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(all_threads); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(allow_code); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1324,6 +1328,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(debug); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(decode); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1368,10 +1376,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(dict_content); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(dictcomp); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2308,6 +2312,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(only_active_thread); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(only_keys); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Include/internal/pycore_weakref.h b/Include/internal/pycore_weakref.h index 950aa0af290056..4ed8928c0b92a8 100644 --- a/Include/internal/pycore_weakref.h +++ b/Include/internal/pycore_weakref.h @@ -29,6 +29,12 @@ extern "C" { PyMutex_LockFlags(wr->weakrefs_lock, _Py_LOCK_DONT_DETACH) #define UNLOCK_WEAKREFS_FOR_WR(wr) PyMutex_Unlock(wr->weakrefs_lock) +#define FT_CLEAR_WEAKREFS(obj, weakref_list) \ + do { \ + assert(Py_REFCNT(obj) == 0); \ + PyObject_ClearWeakRefs(obj); \ + } while (0) + #else #define LOCK_WEAKREFS(obj) @@ -37,6 +43,14 @@ extern "C" { #define LOCK_WEAKREFS_FOR_WR(wr) #define UNLOCK_WEAKREFS_FOR_WR(wr) +#define FT_CLEAR_WEAKREFS(obj, weakref_list) \ + do { \ + assert(Py_REFCNT(obj) == 0); \ + if (weakref_list != NULL) { \ + PyObject_ClearWeakRefs(obj); \ + } \ + } while (0) + #endif static inline int _is_dead(PyObject *obj) diff --git a/Include/object.h b/Include/object.h index 8cc83abb8574e3..9901815bd64604 100644 --- a/Include/object.h +++ b/Include/object.h @@ -654,8 +654,13 @@ PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ PyAPI_FUNC(int) Py_IsNone(PyObject *x); #define Py_IsNone(x) Py_Is((x), Py_None) -/* Macro for returning Py_None from a function */ -#define Py_RETURN_NONE return Py_None +/* Macro for returning Py_None from a function. + * Only treat Py_None as immortal in the limited C API 3.12 and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_NONE return Py_NewRef(Py_None) +#else +# define Py_RETURN_NONE return Py_None +#endif /* Py_NotImplemented is a singleton used to signal that an operation is diff --git a/Include/patchlevel.h b/Include/patchlevel.h index c9dd659046a367..77fc8aeb918cf6 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -21,10 +21,10 @@ #define PY_MINOR_VERSION 14 #define PY_MICRO_VERSION 0 #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_BETA -#define PY_RELEASE_SERIAL 1 +#define PY_RELEASE_SERIAL 4 /* Version as a string */ -#define PY_VERSION "3.14.0b1" +#define PY_VERSION "3.14.0b4+" /*--end constants--*/ diff --git a/Include/pythonrun.h b/Include/pythonrun.h index fad2b3c77476e4..92b50aa807bf70 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -21,39 +21,15 @@ PyAPI_FUNC(void) PyErr_DisplayException(PyObject *); /* Stuff with no proper home (yet) */ PyAPI_DATA(int) (*PyOS_InputHook)(void); -/* Stack size, in "pointers". This must be large enough, so - * no two calls to check recursion depth are more than this far - * apart. In practice, that means it must be larger than the C - * stack consumption of PyEval_EvalDefault */ -#if defined(_Py_ADDRESS_SANITIZER) || defined(_Py_THREAD_SANITIZER) -# define PYOS_LOG2_STACK_MARGIN 12 -#elif defined(Py_DEBUG) && defined(WIN32) -# define PYOS_LOG2_STACK_MARGIN 12 -#elif defined(__wasi__) - /* Web assembly has two stacks, so this isn't really a size */ -# define PYOS_LOG2_STACK_MARGIN 9 -#else -# define PYOS_LOG2_STACK_MARGIN 11 -#endif -#define PYOS_STACK_MARGIN (1 << PYOS_LOG2_STACK_MARGIN) -#define PYOS_STACK_MARGIN_BYTES (PYOS_STACK_MARGIN * sizeof(void *)) - -#if SIZEOF_VOID_P == 8 -#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 3) -#else -#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 2) -#endif - - #if defined(WIN32) -#define USE_STACKCHECK +# define USE_STACKCHECK #endif - #ifdef USE_STACKCHECK /* Check that we aren't overflowing our stack */ PyAPI_FUNC(int) PyOS_CheckStack(void); #endif + #ifndef Py_LIMITED_API # define Py_CPYTHON_PYTHONRUN_H # include "cpython/pythonrun.h" diff --git a/Include/refcount.h b/Include/refcount.h index 177bbdaf0c5977..1ecba5ee78d685 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -1,5 +1,5 @@ -#ifndef Py_REFCOUNT_H -#define Py_REFCOUNT_H +#ifndef _Py_REFCOUNT_H +#define _Py_REFCOUNT_H #ifdef __cplusplus extern "C" { #endif @@ -33,7 +33,7 @@ increase and decrease the objects reference count. In order to offer sufficient resilience to C extensions using the stable ABI compiled against 3.11 or earlier, we set the initial value near the -middle of the range (2**31, 2**32). That way the the refcount can be +middle of the range (2**31, 2**32). That way the refcount can be off by ~1 billion without affecting immortality. Reference count increases will use saturated arithmetic, taking advantage of @@ -247,20 +247,6 @@ PyAPI_FUNC(void) Py_DecRef(PyObject *); PyAPI_FUNC(void) _Py_IncRef(PyObject *); PyAPI_FUNC(void) _Py_DecRef(PyObject *); -#ifndef Py_GIL_DISABLED -static inline Py_ALWAYS_INLINE void Py_INCREF_MORTAL(PyObject *op) -{ - assert(!_Py_IsStaticImmortal(op)); - op->ob_refcnt++; - _Py_INCREF_STAT_INC(); -#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) - if (!_Py_IsImmortal(op)) { - _Py_INCREF_IncRefTotal(); - } -#endif -} -#endif - static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) { #if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) @@ -564,4 +550,4 @@ static inline PyObject* _Py_XNewRef(PyObject *obj) #ifdef __cplusplus } #endif -#endif // !Py_REFCOUNT_H +#endif // !_Py_REFCOUNT_H diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md index 28589787e1fad7..9e38da4c862f16 100644 --- a/InternalDocs/exception_handling.md +++ b/InternalDocs/exception_handling.md @@ -8,7 +8,7 @@ The cost of raising an exception is increased, but not by much. The following code: -``` +```python try: g(0) except: @@ -18,7 +18,7 @@ except: compiles into intermediate code like the following: -``` +```python RESUME 0 1 SETUP_FINALLY 8 (to L1) @@ -118,13 +118,13 @@ All offsets and lengths are in code units, not bytes. We want the format to be compact, but quickly searchable. For it to be compact, it needs to have variable sized entries so that we can store common (small) offsets compactly, but handle large offsets if needed. -For it to be searchable quickly, we need to support binary search giving us log(n) performance in all cases. +For it to be searchable quickly, we need to support binary search giving us `log(n)` performance in all cases. Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry. It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as: `start, size, target, depth, push-lasti`. -Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each code unit takes 2 bytes. +Also, sizes are limited to `2**30` as the code length cannot exceed `2**31` and each code unit takes 2 bytes. It also happens that depth is generally quite small. So, we need to encode: @@ -140,7 +140,7 @@ lasti (1 bit) We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set. Since the most significant bit is reserved for marking the start of an entry, we have 7 bits per byte to encode offsets. Encoding uses a standard varint encoding, but with only 7 bits instead of the usual 8. -The 8 bits of a byte are (msb left) SXdddddd where S is the start bit. X is the extend bit meaning that the next byte is required to extend the offset. +The 8 bits of a byte are (msb left) `SXdddddd` where `S` is the start bit. `X` is the extend bit meaning that the next byte is required to extend the offset. In addition, we combine `depth` and `lasti` into a single value, `((depth<<1)+lasti)`, before encoding. diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 4da6cd47dc859e..9c35684c945b3e 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -286,7 +286,7 @@ object, the GC does not process it twice. Notice that an object that was marked as "tentatively unreachable" and was later moved back to the reachable list will be visited again by the garbage collector -as now all the references that that object has need to be processed as well. This +as now all the references that the object has need to be processed as well. This process is really a breadth first search over the object graph. Once all the objects are scanned, the GC knows that all container objects in the tentatively unreachable list are really unreachable and can thus be garbage collected. diff --git a/Lib/_ast_unparse.py b/Lib/_ast_unparse.py index 0b669edb2ffec6..c25066eb107de1 100644 --- a/Lib/_ast_unparse.py +++ b/Lib/_ast_unparse.py @@ -627,6 +627,9 @@ def _write_ftstring(self, values, prefix): self._ftstring_helper(fstring_parts) def _tstring_helper(self, node): + if not node.values: + self._write_ftstring([], "t") + return last_idx = 0 for i, value in enumerate(node.values): # This can happen if we have an implicit concat of a t-string @@ -679,9 +682,12 @@ def _unparse_interpolation_value(self, inner): unparser.set_precedence(_Precedence.TEST.next(), inner) return unparser.visit(inner) - def _write_interpolation(self, node): + def _write_interpolation(self, node, is_interpolation=False): with self.delimit("{", "}"): - expr = self._unparse_interpolation_value(node.value) + if is_interpolation: + expr = node.str + else: + expr = self._unparse_interpolation_value(node.value) if expr.startswith("{"): # Separate pair of opening brackets as "{ {" self.write(" ") @@ -696,7 +702,7 @@ def visit_FormattedValue(self, node): self._write_interpolation(node) def visit_Interpolation(self, node): - self._write_interpolation(node) + self._write_interpolation(node, is_interpolation=True) def visit_Name(self, node): self.write(node.id) diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index e3db1b52b1159b..cfcd35827a275a 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -1127,8 +1127,8 @@ def isoformat(self): This is 'YYYY-MM-DD'. References: - - http://www.w3.org/TR/NOTE-datetime - - http://www.cl.cam.ac.uk/~mgk25/iso-time.html + - https://www.w3.org/TR/NOTE-datetime + - https://www.cl.cam.ac.uk/~mgk25/iso-time.html """ return "%04d-%02d-%02d" % (self._year, self._month, self._day) @@ -1262,7 +1262,7 @@ def isocalendar(self): The first week is 1; Monday is 1 ... Sunday is 7. ISO calendar algorithm taken from - http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm + https://www.phys.uu.nl/~vgent/calendar/isocalendar.htm (used with permission) """ year = self._year @@ -2139,7 +2139,7 @@ def isoformat(self, sep='T', timespec='auto'): By default, the fractional part is omitted if self.microsecond == 0. If self.tzinfo is not None, the UTC offset is also attached, giving - giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. Optional argument sep specifies the separator between date and time, default 'T'. diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 46fa9ffcb1e056..9b8e42a2342536 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -6120,9 +6120,13 @@ def _convert_for_comparison(self, other, equality_op=False): (?Pz)? (?P\#)? (?P0)? -(?P(?!0)\d+)? +(?P\d+)? (?P[,_])? -(?:\.(?P0|(?!0)\d+))? +(?:\. + (?=[\d,_]) # lookahead for digit or separator + (?P\d+)? + (?P[,_])? +)? (?P[eEfFgGn%])? \z """, re.VERBOSE|re.DOTALL) @@ -6215,6 +6219,9 @@ def _parse_format_specifier(format_spec, _localeconv=None): format_dict['grouping'] = [3, 0] format_dict['decimal_point'] = '.' + if format_dict['frac_separators'] is None: + format_dict['frac_separators'] = '' + return format_dict def _format_align(sign, body, spec): @@ -6334,6 +6341,11 @@ def _format_number(is_negative, intpart, fracpart, exp, spec): sign = _format_sign(is_negative, spec) + frac_sep = spec['frac_separators'] + if fracpart and frac_sep: + fracpart = frac_sep.join(fracpart[pos:pos + 3] + for pos in range(0, len(fracpart), 3)) + if fracpart or spec['alt']: fracpart = spec['decimal_point'] + fracpart diff --git a/Lib/_pyio.py b/Lib/_pyio.py index a870de5b532542..fb2a6d049caab6 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -407,6 +407,9 @@ def __del__(self): if closed: return + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + # If close() fails, the caller logs the exception with # sys.unraisablehook. close() must be called at the end at __del__(). self.close() @@ -645,8 +648,6 @@ def write(self, b): self._unsupported("write") io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) class BufferedIOBase(IOBase): @@ -853,6 +854,10 @@ def __repr__(self): else: return "<{}.{} name={!r}>".format(modname, clsname, name) + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + ### Lower-level APIs ### def fileno(self): @@ -1563,7 +1568,8 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if not isinstance(fd, int): raise TypeError('expected integer from opener') if fd < 0: - raise OSError('Negative file descriptor') + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') owned_fd = fd if not noinherit_flag: os.set_inheritable(fd, False) @@ -1600,12 +1606,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): raise self._fd = fd - def __del__(self): + def _dealloc_warn(self, source): if self._fd >= 0 and self._closefd and not self.closed: import warnings - warnings.warn('unclosed file %r' % (self,), ResourceWarning, + warnings.warn(f'unclosed file {source!r}', ResourceWarning, stacklevel=2, source=self) - self.close() def __getstate__(self): raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") @@ -1780,7 +1785,7 @@ def close(self): if not self.closed: self._stat_atopen = None try: - if self._closefd: + if self._closefd and self._fd >= 0: os.close(self._fd) finally: super().close() @@ -2689,6 +2694,10 @@ def readline(self, size=None): def newlines(self): return self._decoder.newlines if self._decoder else None + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py index 347f05607c75c5..1e9462a42156d4 100644 --- a/Lib/_pyrepl/_module_completer.py +++ b/Lib/_pyrepl/_module_completer.py @@ -17,8 +17,8 @@ def make_default_module_completer() -> ModuleCompleter: - # Inside pyrepl, __package__ is set to '_pyrepl' - return ModuleCompleter(namespace={'__package__': '_pyrepl'}) + # Inside pyrepl, __package__ is set to None by default + return ModuleCompleter(namespace={'__package__': None}) class ModuleCompleter: @@ -42,11 +42,11 @@ def __init__(self, namespace: Mapping[str, Any] | None = None) -> None: self._global_cache: list[pkgutil.ModuleInfo] = [] self._curr_sys_path: list[str] = sys.path[:] - def get_completions(self, line: str) -> list[str]: + def get_completions(self, line: str) -> list[str] | None: """Return the next possible import completions for 'line'.""" result = ImportParser(line).parse() if not result: - return [] + return None try: return self.complete(*result) except Exception: @@ -81,8 +81,11 @@ def find_modules(self, path: str, prefix: str) -> list[str]: def _find_modules(self, path: str, prefix: str) -> list[str]: if not path: # Top-level import (e.g. `import foo`` or `from foo`)` - return [name for _, name, _ in self.global_cache - if name.startswith(prefix)] + builtin_modules = [name for name in sys.builtin_module_names + if self.is_suggestion_match(name, prefix)] + third_party_modules = [module.name for module in self.global_cache + if self.is_suggestion_match(module.name, prefix)] + return sorted(builtin_modules + third_party_modules) if path.startswith('.'): # Convert relative path to absolute path @@ -97,7 +100,14 @@ def _find_modules(self, path: str, prefix: str) -> list[str]: if mod_info.ispkg and mod_info.name == segment] modules = self.iter_submodules(modules) return [module.name for module in modules - if module.name.startswith(prefix)] + if self.is_suggestion_match(module.name, prefix)] + + def is_suggestion_match(self, module_name: str, prefix: str) -> bool: + if prefix: + return module_name.startswith(prefix) + # For consistency with attribute completion, which + # does not suggest private attributes unless requested. + return not module_name.startswith("_") def iter_submodules(self, parent_modules: list[pkgutil.ModuleInfo]) -> Iterator[pkgutil.ModuleInfo]: """Iterate over all submodules of the given parent modules.""" diff --git a/Lib/_pyrepl/base_eventqueue.py b/Lib/_pyrepl/base_eventqueue.py index 842599bd1877fb..0589a0f437ec7c 100644 --- a/Lib/_pyrepl/base_eventqueue.py +++ b/Lib/_pyrepl/base_eventqueue.py @@ -87,7 +87,7 @@ def push(self, char: int | bytes) -> None: if isinstance(k, dict): self.keymap = k else: - self.insert(Event('key', k, self.flush_buf())) + self.insert(Event('key', k, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap elif self.buf and self.buf[0] == 27: # escape @@ -96,7 +96,7 @@ def push(self, char: int | bytes) -> None: # the docstring in keymap.py trace('unrecognized escape sequence, propagating...') self.keymap = self.compiled_keymap - self.insert(Event('key', '\033', bytearray(b'\033'))) + self.insert(Event('key', '\033', b'\033')) for _c in self.flush_buf()[1:]: self.push(_c) @@ -106,5 +106,5 @@ def push(self, char: int | bytes) -> None: except UnicodeError: return else: - self.insert(Event('key', decoded, self.flush_buf())) + self.insert(Event('key', decoded, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 2354fbb2ec2c1e..50c824995d85b8 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -370,6 +370,13 @@ def do(self) -> None: r = self.reader text = self.event * r.get_arg() r.insert(text) + if r.paste_mode: + data = "" + ev = r.console.getpending() + data += ev.data + if data: + r.insert(data) + r.last_refresh_cache.invalidated = True class insert_nl(EditCommand): @@ -484,7 +491,6 @@ def do(self) -> None: data = "" start = time.time() while done not in data: - self.reader.console.wait(100) ev = self.reader.console.getpending() data += ev.data trace( diff --git a/Lib/_pyrepl/main.py b/Lib/_pyrepl/main.py index a6f824dcc4ad14..447eb1e551e774 100644 --- a/Lib/_pyrepl/main.py +++ b/Lib/_pyrepl/main.py @@ -1,6 +1,7 @@ import errno import os import sys +import types CAN_USE_PYREPL: bool @@ -29,12 +30,10 @@ def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): print(FAIL_REASON, file=sys.stderr) return sys._baserepl() - if mainmodule: - namespace = mainmodule.__dict__ - else: - import __main__ - namespace = __main__.__dict__ - namespace.pop("__pyrepl_interactive_console", None) + if not mainmodule: + mainmodule = types.ModuleType("__main__") + + namespace = mainmodule.__dict__ # sys._baserepl() above does this internally, we do it here startup_path = os.getenv("PYTHONSTARTUP") diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index 560a9db192169e..9560ae779abfea 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -134,7 +134,8 @@ def get_stem(self) -> str: return "".join(b[p + 1 : self.pos]) def get_completions(self, stem: str) -> list[str]: - if module_completions := self.get_module_completions(): + module_completions = self.get_module_completions() + if module_completions is not None: return module_completions if len(stem) == 0 and self.more_lines is not None: b = self.buffer @@ -165,7 +166,7 @@ def get_completions(self, stem: str) -> list[str]: result.sort() return result - def get_module_completions(self) -> list[str]: + def get_module_completions(self) -> list[str] | None: line = self.get_line() return self.config.module_completer.get_completions(line) @@ -606,6 +607,7 @@ def _setup(namespace: Mapping[str, Any]) -> None: # set up namespace in rlcompleter, which requires it to be a bona fide dict if not isinstance(namespace, dict): namespace = dict(namespace) + _wrapper.config.module_completer = ModuleCompleter(namespace) _wrapper.config.readline_completer = RLCompleter(namespace).complete # this is not really what readline.c does. Better than nothing I guess diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index b3848833e14208..965b853c34b392 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -31,6 +31,7 @@ import sys import code import warnings +import errno from .readline import _get_reader, multiline_input, append_history_file @@ -110,6 +111,10 @@ def run_multiline_interactive_console( more_lines = functools.partial(_more_lines, console) input_n = 0 + _is_x_showrefcount_set = sys._xoptions.get("showrefcount") + _is_pydebug_build = hasattr(sys, "gettotalrefcount") + show_ref_count = _is_x_showrefcount_set and _is_pydebug_build + def maybe_run_command(statement: str) -> bool: statement = statement.strip() if statement in console.locals or statement not in REPL_COMMANDS: @@ -149,6 +154,7 @@ def maybe_run_command(statement: str) -> bool: append_history_file() except (FileNotFoundError, PermissionError, OSError) as e: warnings.warn(f"failed to open the history file for writing: {e}") + input_n += 1 except KeyboardInterrupt: r = _get_reader() @@ -167,3 +173,8 @@ def maybe_run_command(statement: str) -> bool: except: console.showtraceback() console.resetbuffer() + if show_ref_count: + console.write( + f"[{sys.gettotalrefcount()} refs," + f" {sys.getallocatedblocks()} blocks]\n" + ) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 38cf6b5a08e892..e04fbdc6c8a5c4 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -41,9 +41,15 @@ def from_re(cls, m: Match[str], group: int | str) -> Self: @classmethod def from_token(cls, token: TI, line_len: list[int]) -> Self: + end_offset = -1 + if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE} + and token.string.endswith(("{", "}"))): + # gh-134158: a visible trailing brace comes from a double brace in input + end_offset += 1 + return cls( line_len[token.start[0] - 1] + token.start[1], - line_len[token.end[0] - 1] + token.end[1] - 1, + line_len[token.end[0] - 1] + token.end[1] + end_offset, ) @@ -102,6 +108,8 @@ def gen_colors(buffer: str) -> Iterator[ColorSpan]: for color in gen_colors_from_token_stream(gen, line_lengths): yield color last_emitted = color + except SyntaxError: + return except tokenize.TokenError as te: yield from recover_unterminated_string( te, line_lengths, last_emitted, buffer diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 95749198b3b2f9..c56dcd6d7dd434 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -419,10 +419,7 @@ def _getscrollbacksize(self) -> int: return info.srWindow.Bottom # type: ignore[no-any-return] - def _read_input(self, block: bool = True) -> INPUT_RECORD | None: - if not block and not self.wait(timeout=0): - return None - + def _read_input(self) -> INPUT_RECORD | None: rec = INPUT_RECORD() read = DWORD() if not ReadConsoleInput(InHandle, rec, 1, read): @@ -431,14 +428,10 @@ def _read_input(self, block: bool = True) -> INPUT_RECORD | None: return rec def _read_input_bulk( - self, block: bool, n: int + self, n: int ) -> tuple[ctypes.Array[INPUT_RECORD], int]: rec = (n * INPUT_RECORD)() read = DWORD() - - if not block and not self.wait(timeout=0): - return rec, 0 - if not ReadConsoleInput(InHandle, rec, n, read): raise WinError(GetLastError()) @@ -449,8 +442,11 @@ def get_event(self, block: bool = True) -> Event | None: and there is no event pending, otherwise waits for the completion of an event.""" + if not block and not self.wait(timeout=0): + return None + while self.event_queue.empty(): - rec = self._read_input(block) + rec = self._read_input() if rec is None: return None @@ -551,12 +547,20 @@ def getpending(self) -> Event: if e2: e.data += e2.data - recs, rec_count = self._read_input_bulk(False, 1024) + recs, rec_count = self._read_input_bulk(1024) for i in range(rec_count): rec = recs[i] + # In case of a legacy console, we do not only receive a keydown + # event, but also a keyup event - and for uppercase letters + # an additional SHIFT_PRESSED event. if rec and rec.EventType == KEY_EVENT: key_event = rec.Event.KeyEvent + if not key_event.bKeyDown: + continue ch = key_event.uChar.UnicodeChar + if ch == "\x00": + # ignore SHIFT_PRESSED and special keys + continue if ch == "\r": ch += "\n" e.data += ch diff --git a/Lib/_strptime.py b/Lib/_strptime.py index aa63933a49d16d..fc7e369c3d1e26 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -14,6 +14,7 @@ import time import locale import calendar +import re from re import compile as re_compile from re import sub as re_sub from re import IGNORECASE @@ -41,6 +42,29 @@ def _findall(haystack, needle): yield i i += len(needle) +def _fixmonths(months): + yield from months + # The lower case of 'İ' ('\u0130') is 'i\u0307'. + # The re module only supports 1-to-1 character matching in + # case-insensitive mode. + for s in months: + if 'i\u0307' in s: + yield s.replace('i\u0307', '\u0130') + +lzh_TW_alt_digits = ( + # 〇:一:二:三:四:五:六:七:八:九 + '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db', + '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d', + # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九 + '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db', + '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d', + # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九 + '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db', + '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d', + # 卅:卅一 + '\u5345', '\u5345\u4e00') + + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -84,6 +108,7 @@ def __init__(self): self.__calc_weekday() self.__calc_month() self.__calc_am_pm() + self.__calc_alt_digits() self.__calc_timezone() self.__calc_date_time() if _getlang() != self.lang: @@ -119,9 +144,43 @@ def __calc_am_pm(self): am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm + def __calc_alt_digits(self): + # Set self.LC_alt_digits by using time.strftime(). + + # The magic data should contain all decimal digits. + time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0)) + s = time.strftime("%x%X", time_tuple) + if s.isascii(): + # Fast path -- all digits are ASCII. + self.LC_alt_digits = () + return + + digits = ''.join(sorted(set(re.findall(r'\d', s)))) + if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9: + # All 10 decimal digits from the same set. + if digits.isascii(): + # All digits are ASCII. + self.LC_alt_digits = () + return + + self.LC_alt_digits = [a + b for a in digits for b in digits] + # Test whether the numbers contain leading zero. + time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0)) + if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2): + self.LC_alt_digits[:10] = digits + return + + # Either non-Gregorian calendar or non-decimal numbers. + if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s): + # lzh_TW + self.LC_alt_digits = lzh_TW_alt_digits + return + + self.LC_alt_digits = None + def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). + # Set self.LC_date_time, self.LC_date, self.LC_time and + # self.LC_time_ampm by using time.strftime(). # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of # overloaded numbers is minimized. The order in which searches for @@ -129,26 +188,32 @@ def __calc_date_time(self): # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) - replacement_pairs = [ + replacement_pairs = [] + + # Non-ASCII digits + if self.LC_alt_digits or self.LC_alt_digits is None: + for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'), + (44, '%OM'), (55, '%OS'), (17, '%Od'), + (3, '%Om'), (2, '%Ow'), (10, '%OI')]: + if self.LC_alt_digits is None: + s = chr(0x660 + n // 10) + chr(0x660 + n % 10) + replacement_pairs.append((s, d)) + if n < 10: + replacement_pairs.append((s[1], d)) + elif len(self.LC_alt_digits) > n: + replacement_pairs.append((self.LC_alt_digits[n], d)) + else: + replacement_pairs.append((time.strftime(d, time_tuple), d)) + replacement_pairs += [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I'), - # Non-ASCII digits - ('\u0661\u0669\u0669\u0669', '%Y'), - ('\u0669\u0669', '%Oy'), - ('\u0662\u0662', '%OH'), - ('\u0664\u0664', '%OM'), - ('\u0665\u0665', '%OS'), - ('\u0661\u0667', '%Od'), - ('\u0660\u0663', '%Om'), - ('\u0663', '%Om'), - ('\u0662', '%Ow'), - ('\u0661\u0660', '%OI'), ] + date_time = [] - for directive in ('%c', '%x', '%X'): + for directive in ('%c', '%x', '%X', '%r'): current_format = time.strftime(directive, time_tuple).lower() current_format = current_format.replace('%', '%%') # The month and the day of the week formats are treated specially @@ -172,9 +237,10 @@ def __calc_date_time(self): if tz: current_format = current_format.replace(tz, "%Z") # Transform all non-ASCII digits to digits in range U+0660 to U+0669. - current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", + 'H': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'k': r"(?P2[0-3]|[0-1]\d|\d| \d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'l': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -305,23 +374,56 @@ def __init__(self, locale_time=None): 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'), + 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'), 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), 'Z'), '%': '%'} - for d in 'dmyHIMS': - mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d - mapping['Ow'] = r'(?P\d)' + if self.locale_time.LC_alt_digits is None: + for d in 'dmyCHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + else: + mapping.update({ + 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd', + '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'), + 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm', + '1[0-2]|0[1-9]|[1-9]'), + 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w', + '[0-6]'), + 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y', + '[0-9][0-9]'), + 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C', + '[0-9][0-9]'), + 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H', + '2[0-3]|[0-1][0-9]|[0-9]'), + 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I', + '1[0-2]|0[1-9]|[1-9]'), + 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M', + '[0-5][0-9]|[0-9]'), + 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S', + '6[0-1]|[0-5][0-9]|[0-9]'), + }) + mapping.update({ + 'e': mapping['d'], + 'Oe': mapping['Od'], + 'P': mapping['p'], + 'Op': mapping['p'], + 'W': mapping['U'].replace('U', 'W'), + }) mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) + base.__setitem__('T', self.pattern('%H:%M:%S')) + base.__setitem__('R', self.pattern('%H:%M')) + base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm)) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) base.__setitem__('x', self.pattern(self.locale_time.LC_date)) base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - def __seqToRE(self, to_convert, directive): + def __seqToRE(self, to_convert, directive, altregex=None): """Convert a list to a regex string for matching a directive. Want possible matching values to be from longest to shortest. This @@ -337,8 +439,9 @@ def __seqToRE(self, to_convert, directive): else: return '' regex = '|'.join(re_escape(stuff) for stuff in to_convert) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex + if altregex is not None: + regex += '|' + altregex + return '(?P<%s>%s)' % (directive, regex) def pattern(self, format): """Return regex pattern for the format string. @@ -365,7 +468,7 @@ def repl(m): nonlocal day_of_month_in_format day_of_month_in_format = True return self[format_char] - format = re_sub(r'%([OE]?\\?.?)', repl, format) + format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format) if day_of_month_in_format and not year_in_format: import warnings warnings.warn("""\ @@ -467,6 +570,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # values weekday = julian = None found_dict = found.groupdict() + if locale_time.LC_alt_digits: + def parse_int(s): + try: + return locale_time.LC_alt_digits.index(s) + except ValueError: + return int(s) + else: + parse_int = int + for group_key in found_dict.keys(): # Directives not explicitly handled below: # c, x, X @@ -474,30 +586,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # U, W # worthless without day of the week if group_key == 'y': - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 + year = parse_int(found_dict['y']) + if 'C' in found_dict: + century = parse_int(found_dict['C']) + year += century * 100 else: - year += 1900 + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 elif group_key == 'Y': year = int(found_dict['Y']) elif group_key == 'G': iso_year = int(found_dict['G']) elif group_key == 'm': - month = int(found_dict['m']) + month = parse_int(found_dict['m']) elif group_key == 'B': month = locale_time.f_month.index(found_dict['B'].lower()) elif group_key == 'b': month = locale_time.a_month.index(found_dict['b'].lower()) elif group_key == 'd': - day = int(found_dict['d']) + day = parse_int(found_dict['d']) elif group_key == 'H': - hour = int(found_dict['H']) + hour = parse_int(found_dict['H']) elif group_key == 'I': - hour = int(found_dict['I']) + hour = parse_int(found_dict['I']) ampm = found_dict.get('p', '').lower() # If there was no AM/PM indicator, we'll treat this like AM if ampm in ('', locale_time.am_pm[0]): @@ -513,9 +629,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): if hour != 12: hour += 12 elif group_key == 'M': - minute = int(found_dict['M']) + minute = parse_int(found_dict['M']) elif group_key == 'S': - second = int(found_dict['S']) + second = parse_int(found_dict['S']) elif group_key == 'f': s = found_dict['f'] # Pad to always return microseconds. diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index c0b1d4395d14ed..c83a1573ccd3d1 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -27,6 +27,9 @@ class Format(enum.IntEnum): _sentinel = object() +# Following `NAME_ERROR_MSG` in `ceval_macros.h`: +_NAME_ERROR_MSG = "name '{name:.200}' is not defined" + # Slots shared by ForwardRef and _Stringifier. The __forward__ names must be # preserved for compatibility with the old typing.ForwardRef class. The remaining @@ -184,7 +187,7 @@ def evaluate( elif is_forwardref_format: return self else: - raise NameError(arg) + raise NameError(_NAME_ERROR_MSG.format(name=arg), name=arg) else: code = self.__forward_code__ try: @@ -305,6 +308,9 @@ def __repr__(self): return f"ForwardRef({self.__forward_arg__!r}{''.join(extra)})" +_Template = type(t"") + + class _Stringifier: # Must match the slots on ForwardRef, so we can turn an instance of one into an # instance of the other in place. @@ -341,6 +347,8 @@ def __convert_to_ast(self, other): if isinstance(other.__ast_node__, str): return ast.Name(id=other.__ast_node__), other.__extra_names__ return other.__ast_node__, other.__extra_names__ + elif type(other) is _Template: + return _template_to_ast(other), None elif ( # In STRING format we don't bother with the create_unique_name() dance; # it's better to emit the repr() of the object instead of an opaque name. @@ -560,6 +568,32 @@ def unary_op(self): del _make_unary_op +def _template_to_ast(template): + values = [] + for part in template: + match part: + case str(): + values.append(ast.Constant(value=part)) + # Interpolation, but we don't want to import the string module + case _: + interp = ast.Interpolation( + str=part.expression, + value=ast.parse(part.expression), + conversion=( + ord(part.conversion) + if part.conversion is not None + else -1 + ), + format_spec=( + ast.Constant(value=part.format_spec) + if part.format_spec != "" + else None + ), + ) + values.append(interp) + return ast.TemplateStr(values=values) + + class _StringifierDict(dict): def __init__(self, namespace, *, globals=None, owner=None, is_class=False, format): super().__init__(namespace) @@ -784,6 +818,8 @@ def _stringify_single(anno): # We have to handle str specially to support PEP 563 stringified annotations. elif isinstance(anno, str): return anno + elif isinstance(anno, _Template): + return ast.unparse(_template_to_ast(anno)) else: return repr(anno) @@ -906,48 +942,49 @@ def get_annotations( if not eval_str: return dict(ann) - if isinstance(obj, type): - # class - obj_globals = None - module_name = getattr(obj, "__module__", None) - if module_name: - module = sys.modules.get(module_name, None) - if module: - obj_globals = getattr(module, "__dict__", None) - obj_locals = dict(vars(obj)) - unwrap = obj - elif isinstance(obj, types.ModuleType): - # module - obj_globals = getattr(obj, "__dict__") - obj_locals = None - unwrap = None - elif callable(obj): - # this includes types.Function, types.BuiltinFunctionType, - # types.BuiltinMethodType, functools.partial, functools.singledispatch, - # "class funclike" from Lib/test/test_inspect... on and on it goes. - obj_globals = getattr(obj, "__globals__", None) - obj_locals = None - unwrap = obj - else: - obj_globals = obj_locals = unwrap = None - - if unwrap is not None: - while True: - if hasattr(unwrap, "__wrapped__"): - unwrap = unwrap.__wrapped__ - continue - if functools := sys.modules.get("functools"): - if isinstance(unwrap, functools.partial): - unwrap = unwrap.func + if globals is None or locals is None: + if isinstance(obj, type): + # class + obj_globals = None + module_name = getattr(obj, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + obj_globals = getattr(module, "__dict__", None) + obj_locals = dict(vars(obj)) + unwrap = obj + elif isinstance(obj, types.ModuleType): + # module + obj_globals = getattr(obj, "__dict__") + obj_locals = None + unwrap = None + elif callable(obj): + # this includes types.Function, types.BuiltinFunctionType, + # types.BuiltinMethodType, functools.partial, functools.singledispatch, + # "class funclike" from Lib/test/test_inspect... on and on it goes. + obj_globals = getattr(obj, "__globals__", None) + obj_locals = None + unwrap = obj + else: + obj_globals = obj_locals = unwrap = None + + if unwrap is not None: + while True: + if hasattr(unwrap, "__wrapped__"): + unwrap = unwrap.__wrapped__ continue - break - if hasattr(unwrap, "__globals__"): - obj_globals = unwrap.__globals__ + if functools := sys.modules.get("functools"): + if isinstance(unwrap, functools.partial): + unwrap = unwrap.func + continue + break + if hasattr(unwrap, "__globals__"): + obj_globals = unwrap.__globals__ - if globals is None: - globals = obj_globals - if locals is None: - locals = obj_locals + if globals is None: + globals = obj_globals + if locals is None: + locals = obj_locals # "Inject" type parameters into the local namespace # (unless they are shadowed by assignments *in* the local namespace), @@ -976,6 +1013,9 @@ def type_repr(value): if value.__module__ == "builtins": return value.__qualname__ return f"{value.__module__}.{value.__qualname__}" + elif isinstance(value, _Template): + tree = _template_to_ast(value) + return ast.unparse(tree) if value is ...: return "..." return repr(value) @@ -1006,14 +1046,27 @@ def _get_and_call_annotate(obj, format): return None +_BASE_GET_ANNOTATIONS = type.__dict__["__annotations__"].__get__ + + def _get_dunder_annotations(obj): """Return the annotations for an object, checking that it is a dictionary. Does not return a fresh dictionary. """ - ann = getattr(obj, "__annotations__", None) - if ann is None: - return None + # This special case is needed to support types defined under + # from __future__ import annotations, where accessing the __annotations__ + # attribute directly might return annotations for the wrong class. + if isinstance(obj, type): + try: + ann = _BASE_GET_ANNOTATIONS(obj) + except AttributeError: + # For static types, the descriptor raises AttributeError. + return None + else: + ann = getattr(obj, "__annotations__", None) + if ann is None: + return None if not isinstance(ann, dict): raise ValueError(f"{obj!r}.__annotations__ is neither a dict nor None") diff --git a/Lib/argparse.py b/Lib/argparse.py index f13ac82dbc50b3..83258cf3e0f37d 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -167,7 +167,6 @@ def __init__( indent_increment=2, max_help_position=24, width=None, - prefix_chars='-', color=False, ): # default setting for width @@ -176,16 +175,7 @@ def __init__( width = shutil.get_terminal_size().columns width -= 2 - from _colorize import can_colorize, decolor, get_theme - - if color and can_colorize(): - self._theme = get_theme(force_color=True).argparse - self._decolor = decolor - else: - self._theme = get_theme(force_no_color=True).argparse - self._decolor = lambda text: text - - self._prefix_chars = prefix_chars + self._set_color(color) self._prog = prog self._indent_increment = indent_increment self._max_help_position = min(max_help_position, @@ -202,9 +192,20 @@ def __init__( self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII) self._long_break_matcher = _re.compile(r'\n\n\n+') + def _set_color(self, color): + from _colorize import can_colorize, decolor, get_theme + + if color and can_colorize(): + self._theme = get_theme(force_color=True).argparse + self._decolor = decolor + else: + self._theme = get_theme(force_no_color=True).argparse + self._decolor = lambda text: text + # =============================== # Section and indentation methods # =============================== + def _indent(self): self._current_indent += self._indent_increment self._level += 1 @@ -256,6 +257,7 @@ def _add_item(self, func, args): # ======================== # Message building methods # ======================== + def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) @@ -299,6 +301,7 @@ def add_arguments(self, actions): # ======================= # Help-formatting methods # ======================= + def format_help(self): help = self._root_section.format_help() if help: @@ -415,14 +418,7 @@ def _format_actions_usage(self, actions, groups): return ' '.join(self._get_actions_usage_parts(actions, groups)) def _is_long_option(self, string): - return len(string) >= 2 and string[1] in self._prefix_chars - - def _is_short_option(self, string): - return ( - not self._is_long_option(string) - and len(string) >= 1 - and string[0] in self._prefix_chars - ) + return len(string) > 2 def _get_actions_usage_parts(self, actions, groups): # find group indices and identify actions in groups @@ -471,25 +467,22 @@ def _get_actions_usage_parts(self, actions, groups): # produce the first way to invoke the option in brackets else: option_string = action.option_strings[0] + if self._is_long_option(option_string): + option_color = t.summary_long_option + else: + option_color = t.summary_short_option # if the Optional doesn't take a value, format is: # -s or --long if action.nargs == 0: part = action.format_usage() - if self._is_long_option(part): - part = f"{t.summary_long_option}{part}{t.reset}" - elif self._is_short_option(part): - part = f"{t.summary_short_option}{part}{t.reset}" + part = f"{option_color}{part}{t.reset}" # if the Optional takes a value, format is: # -s ARGS or --long ARGS else: default = self._get_default_metavar_for_optional(action) args_string = self._format_args(action, default) - if self._is_long_option(option_string): - option_color = t.summary_long_option - elif self._is_short_option(option_string): - option_color = t.summary_short_option part = ( f"{option_color}{option_string} " f"{t.summary_label}{args_string}{t.reset}" @@ -606,10 +599,8 @@ def color_option_strings(strings): for s in strings: if self._is_long_option(s): parts.append(f"{t.long_option}{s}{t.reset}") - elif self._is_short_option(s): - parts.append(f"{t.short_option}{s}{t.reset}") else: - parts.append(s) + parts.append(f"{t.short_option}{s}{t.reset}") return parts # if the Optional doesn't take a value, format is: @@ -1479,6 +1470,7 @@ def __init__(self, # ==================== # Registration methods # ==================== + def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object @@ -1489,6 +1481,7 @@ def _registry_get(self, registry_name, value, default=None): # ================================== # Namespace default accessor methods # ================================== + def set_defaults(self, **kwargs): self._defaults.update(kwargs) @@ -1508,6 +1501,7 @@ def get_default(self, dest): # ======================= # Adding argument actions # ======================= + def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) @@ -1540,7 +1534,7 @@ def add_argument(self, *args, **kwargs): action_name = kwargs.get('action') action_class = self._pop_action_class(kwargs) if not callable(action_class): - raise ValueError('unknown action {action_class!r}') + raise ValueError(f'unknown action {action_class!r}') action = action_class(**kwargs) # raise an error if action for positional argument does not @@ -1933,6 +1927,7 @@ def identity(string): # ======================= # Pretty __repr__ methods # ======================= + def _get_kwargs(self): names = [ 'prog', @@ -1947,6 +1942,7 @@ def _get_kwargs(self): # ================================== # Optional/Positional adding methods # ================================== + def add_subparsers(self, **kwargs): if self._subparsers is not None: raise ValueError('cannot have multiple subparser arguments') @@ -2000,6 +1996,7 @@ def _get_positional_actions(self): # ===================================== # Command line argument parsing methods # ===================================== + def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: @@ -2594,6 +2591,7 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # ======================== # Value conversion methods # ======================== + def _get_values(self, action, arg_strings): # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: @@ -2693,6 +2691,7 @@ def _check_value(self, action, value): # ======================= # Help-formatting methods # ======================= + def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, @@ -2723,20 +2722,14 @@ def format_help(self): return formatter.format_help() def _get_formatter(self): - if isinstance(self.formatter_class, type) and issubclass( - self.formatter_class, HelpFormatter - ): - return self.formatter_class( - prog=self.prog, - prefix_chars=self.prefix_chars, - color=self.color, - ) - else: - return self.formatter_class(prog=self.prog) + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(self.color) + return formatter # ===================== # Help-printing methods # ===================== + def print_usage(self, file=None): if file is None: file = _sys.stdout @@ -2758,6 +2751,7 @@ def _print_message(self, message, file=None): # =============== # Exiting methods # =============== + def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) diff --git a/Lib/ast.py b/Lib/ast.py index b9791bf52d3e08..2f11683ecf7c68 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -147,18 +147,16 @@ def _format(node, level=0): if value is None and getattr(cls, name, ...) is None: keywords = True continue - if ( - not show_empty - and (value is None or value == []) - # Special cases: - # `Constant(value=None)` and `MatchSingleton(value=None)` - and not isinstance(node, (Constant, MatchSingleton)) - ): - args_buffer.append(repr(value)) - continue - elif not keywords: - args.extend(args_buffer) - args_buffer = [] + if not show_empty: + if value == []: + field_type = cls._field_types.get(name, object) + if getattr(field_type, '__origin__', ...) is list: + if not keywords: + args_buffer.append(repr(value)) + continue + if not keywords: + args.extend(args_buffer) + args_buffer = [] value, simple = _format(value, level) allsimple = allsimple and simple if keywords: diff --git a/Lib/asyncio/__init__.py b/Lib/asyncio/__init__.py index 4be7112fa017d4..32a5dbae03af21 100644 --- a/Lib/asyncio/__init__.py +++ b/Lib/asyncio/__init__.py @@ -51,15 +51,24 @@ def __getattr__(name: str): import warnings - deprecated = { - "AbstractEventLoopPolicy", - "DefaultEventLoopPolicy", - "WindowsSelectorEventLoopPolicy", - "WindowsProactorEventLoopPolicy", - } - if name in deprecated: - warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) - # deprecated things have underscores in front of them - return globals()["_" + name] + match name: + case "AbstractEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return events._AbstractEventLoopPolicy + case "DefaultEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + if sys.platform == 'win32': + return windows_events._DefaultEventLoopPolicy + return unix_events._DefaultEventLoopPolicy + case "WindowsSelectorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsSelectorEventLoopPolicy + # Else fall through to the AttributeError below. + case "WindowsProactorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsProactorEventLoopPolicy + # Else fall through to the AttributeError below. raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/asyncio/__main__.py b/Lib/asyncio/__main__.py index 21ca5c5f62ae83..ff3a69d1e17297 100644 --- a/Lib/asyncio/__main__.py +++ b/Lib/asyncio/__main__.py @@ -64,7 +64,7 @@ def callback(): except BaseException as exc: future.set_exception(exc) - loop.call_soon_threadsafe(callback, context=self.context) + self.loop.call_soon_threadsafe(callback, context=self.context) try: return future.result() diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index 04fb961e9985e4..520d4b398545bf 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -1016,38 +1016,43 @@ async def _connect_sock(self, exceptions, addr_info, local_addr_infos=None): family, type_, proto, _, address = addr_info sock = None try: - sock = socket.socket(family=family, type=type_, proto=proto) - sock.setblocking(False) - if local_addr_infos is not None: - for lfamily, _, _, _, laddr in local_addr_infos: - # skip local addresses of different family - if lfamily != family: - continue - try: - sock.bind(laddr) - break - except OSError as exc: - msg = ( - f'error while attempting to bind on ' - f'address {laddr!r}: {str(exc).lower()}' - ) - exc = OSError(exc.errno, msg) - my_exceptions.append(exc) - else: # all bind attempts failed - if my_exceptions: - raise my_exceptions.pop() - else: - raise OSError(f"no matching local address with {family=} found") - await self.sock_connect(sock, address) - return sock - except OSError as exc: - my_exceptions.append(exc) - if sock is not None: - sock.close() - raise + try: + sock = socket.socket(family=family, type=type_, proto=proto) + sock.setblocking(False) + if local_addr_infos is not None: + for lfamily, _, _, _, laddr in local_addr_infos: + # skip local addresses of different family + if lfamily != family: + continue + try: + sock.bind(laddr) + break + except OSError as exc: + msg = ( + f'error while attempting to bind on ' + f'address {laddr!r}: {str(exc).lower()}' + ) + exc = OSError(exc.errno, msg) + my_exceptions.append(exc) + else: # all bind attempts failed + if my_exceptions: + raise my_exceptions.pop() + else: + raise OSError(f"no matching local address with {family=} found") + await self.sock_connect(sock, address) + return sock + except OSError as exc: + my_exceptions.append(exc) + raise except: if sock is not None: - sock.close() + try: + sock.close() + except OSError: + # An error when closing a newly created socket is + # not important, but it can overwrite more important + # non-OSError error. So ignore it. + pass raise finally: exceptions = my_exceptions = None @@ -1161,7 +1166,7 @@ async def create_connection( raise ExceptionGroup("create_connection failed", exceptions) if len(exceptions) == 1: raise exceptions[0] - else: + elif exceptions: # If they all have the same str(), raise one. model = str(exceptions[0]) if all(str(exc) == model for exc in exceptions): @@ -1170,6 +1175,9 @@ async def create_connection( # the various error messages. raise OSError('Multiple exceptions: {}'.format( ', '.join(str(exc) for exc in exceptions))) + else: + # No exceptions were collected, raise a timeout error + raise TimeoutError('create_connection failed') finally: exceptions = None diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index 9c2ba679ce2bf1..d40af422e614c1 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -104,7 +104,12 @@ def close(self): for proto in self._pipes.values(): if proto is None: continue - proto.pipe.close() + # See gh-114177 + # skip closing the pipe if loop is already closed + # this can happen e.g. when loop is closed immediately after + # process is killed + if self._loop and not self._loop.is_closed(): + proto.pipe.close() if (self._proc is not None and # has the child process finished? diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index 2913f901dca65f..a7fb55982abe9c 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -5,14 +5,11 @@ # SPDX-FileCopyrightText: Copyright (c) 2015-2021 MagicStack Inc. http://magic.io __all__ = ( - "_AbstractEventLoopPolicy", "AbstractEventLoop", "AbstractServer", "Handle", "TimerHandle", - "_get_event_loop_policy", "get_event_loop_policy", - "_set_event_loop_policy", "set_event_loop_policy", "get_event_loop", "set_event_loop", @@ -791,7 +788,10 @@ def _init_event_loop_policy(): global _event_loop_policy with _lock: if _event_loop_policy is None: # pragma: no branch - from . import _DefaultEventLoopPolicy + if sys.platform == 'win32': + from .windows_events import _DefaultEventLoopPolicy + else: + from .unix_events import _DefaultEventLoopPolicy _event_loop_policy = _DefaultEventLoopPolicy() diff --git a/Lib/asyncio/graph.py b/Lib/asyncio/graph.py index d8df7c9919abbf..b5bfeb1630a159 100644 --- a/Lib/asyncio/graph.py +++ b/Lib/asyncio/graph.py @@ -1,6 +1,7 @@ """Introspection utils for tasks call graphs.""" import dataclasses +import io import sys import types @@ -16,9 +17,6 @@ 'FutureCallGraph', ) -if False: # for type checkers - from typing import TextIO - # Sadly, we can't re-use the traceback module's datastructures as those # are tailored for error reporting, whereas we need to represent an # async call graph. @@ -270,7 +268,7 @@ def print_call_graph( future: futures.Future | None = None, /, *, - file: TextIO | None = None, + file: io.Writer[str] | None = None, depth: int = 1, limit: int | None = None, ) -> None: diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index 22147451fa7ebd..6ad84044adf146 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -173,7 +173,7 @@ def _accept_connection( # listening socket has triggered an EVENT_READ. There may be multiple # connections waiting for an .accept() so it is called in a loop. # See https://bugs.python.org/issue27906 for more details. - for _ in range(backlog): + for _ in range(backlog + 1): try: conn, addr = sock.accept() if self._debug: diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index 888615f8e5e1b3..fbd5c39a7c56ac 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -908,6 +908,25 @@ def _done_callback(fut, cur_task=cur_task): return outer +def _log_on_exception(fut): + if fut.cancelled(): + return + + exc = fut.exception() + if exc is None: + return + + context = { + 'message': + f'{exc.__class__.__name__} exception in shielded future', + 'exception': exc, + 'future': fut, + } + if fut._source_traceback: + context['source_traceback'] = fut._source_traceback + fut._loop.call_exception_handler(context) + + def shield(arg): """Wait for a future, shielding it from cancellation. @@ -953,14 +972,11 @@ def shield(arg): else: cur_task = None - def _inner_done_callback(inner, cur_task=cur_task): - if cur_task is not None: - futures.future_discard_from_awaited_by(inner, cur_task) + def _clear_awaited_by_callback(inner): + futures.future_discard_from_awaited_by(inner, cur_task) + def _inner_done_callback(inner): if outer.cancelled(): - if not inner.cancelled(): - # Mark inner's result as retrieved. - inner.exception() return if inner.cancelled(): @@ -972,10 +988,16 @@ def _inner_done_callback(inner, cur_task=cur_task): else: outer.set_result(inner.result()) - def _outer_done_callback(outer): if not inner.done(): inner.remove_done_callback(_inner_done_callback) + # Keep only one callback to log on cancel + inner.remove_done_callback(_log_on_exception) + inner.add_done_callback(_log_on_exception) + + if cur_task is not None: + inner.add_done_callback(_clear_awaited_by_callback) + inner.add_done_callback(_inner_done_callback) outer.add_done_callback(_outer_done_callback) diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index bf1cb5e64cbfbe..2683f34cc7113b 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -1,71 +1,99 @@ """Tools to analyze tasks running in asyncio programs.""" -from dataclasses import dataclass -from collections import defaultdict +from collections import defaultdict, namedtuple from itertools import count from enum import Enum import sys -from _remote_debugging import get_all_awaited_by - +from _remote_debugging import RemoteUnwinder, FrameInfo class NodeType(Enum): COROUTINE = 1 TASK = 2 -@dataclass(frozen=True) class CycleFoundException(Exception): """Raised when there is a cycle when drawing the call tree.""" - cycles: list[list[int]] - id2name: dict[int, str] + def __init__( + self, + cycles: list[list[int]], + id2name: dict[int, str], + ) -> None: + super().__init__(cycles, id2name) + self.cycles = cycles + self.id2name = id2name -# ─── indexing helpers ─────────────────────────────────────────── -def _format_stack_entry(elem: tuple[str, str, int] | str) -> str: - if isinstance(elem, tuple): - fqname, path, line_no = elem - return f"{fqname} {path}:{line_no}" +# ─── indexing helpers ─────────────────────────────────────────── +def _format_stack_entry(elem: str|FrameInfo) -> str: + if not isinstance(elem, str): + if elem.lineno == 0 and elem.filename == "": + return f"{elem.funcname}" + else: + return f"{elem.funcname} {elem.filename}:{elem.lineno}" return elem def _index(result): - id2name, awaits = {}, [] - for _thr_id, tasks in result: - for tid, tname, awaited in tasks: - id2name[tid] = tname - for stack, parent_id in awaited: - stack = [_format_stack_entry(elem) for elem in stack] - awaits.append((parent_id, stack, tid)) - return id2name, awaits - - -def _build_tree(id2name, awaits): + id2name, awaits, task_stacks = {}, [], {} + for awaited_info in result: + for task_info in awaited_info.awaited_by: + task_id = task_info.task_id + task_name = task_info.task_name + id2name[task_id] = task_name + + # Store the internal coroutine stack for this task + if task_info.coroutine_stack: + for coro_info in task_info.coroutine_stack: + call_stack = coro_info.call_stack + internal_stack = [_format_stack_entry(frame) for frame in call_stack] + task_stacks[task_id] = internal_stack + + # Add the awaited_by relationships (external dependencies) + if task_info.awaited_by: + for coro_info in task_info.awaited_by: + call_stack = coro_info.call_stack + parent_task_id = coro_info.task_name + stack = [_format_stack_entry(frame) for frame in call_stack] + awaits.append((parent_task_id, stack, task_id)) + return id2name, awaits, task_stacks + + +def _build_tree(id2name, awaits, task_stacks): id2label = {(NodeType.TASK, tid): name for tid, name in id2name.items()} children = defaultdict(list) - cor_names = defaultdict(dict) # (parent) -> {frame: node} - cor_id_seq = count(1) - - def _cor_node(parent_key, frame_name): - """Return an existing or new (NodeType.COROUTINE, …) node under *parent_key*.""" - bucket = cor_names[parent_key] - if frame_name in bucket: - return bucket[frame_name] - node_key = (NodeType.COROUTINE, f"c{next(cor_id_seq)}") - id2label[node_key] = frame_name - children[parent_key].append(node_key) - bucket[frame_name] = node_key + cor_nodes = defaultdict(dict) # Maps parent -> {frame_name: node_key} + next_cor_id = count(1) + + def get_or_create_cor_node(parent, frame): + """Get existing coroutine node or create new one under parent""" + if frame in cor_nodes[parent]: + return cor_nodes[parent][frame] + + node_key = (NodeType.COROUTINE, f"c{next(next_cor_id)}") + id2label[node_key] = frame + children[parent].append(node_key) + cor_nodes[parent][frame] = node_key return node_key - # lay down parent ➜ …frames… ➜ child paths + # Build task dependency tree with coroutine frames for parent_id, stack, child_id in awaits: cur = (NodeType.TASK, parent_id) - for frame in reversed(stack): # outer-most → inner-most - cur = _cor_node(cur, frame) + for frame in reversed(stack): + cur = get_or_create_cor_node(cur, frame) + child_key = (NodeType.TASK, child_id) if child_key not in children[cur]: children[cur].append(child_key) + # Add coroutine stacks for leaf tasks + awaiting_tasks = {parent_id for parent_id, _, _ in awaits} + for task_id in id2name: + if task_id not in awaiting_tasks and task_id in task_stacks: + cur = (NodeType.TASK, task_id) + for frame in reversed(task_stacks[task_id]): + cur = get_or_create_cor_node(cur, frame) + return id2label, children @@ -112,6 +140,11 @@ def dfs(v): # ─── PRINT TREE FUNCTION ─────────────────────────────────────── +def get_all_awaited_by(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_all_awaited_by() + + def build_async_tree(result, task_emoji="(T)", cor_emoji=""): """ Build a list of strings for pretty-print an async call tree. @@ -119,12 +152,12 @@ def build_async_tree(result, task_emoji="(T)", cor_emoji=""): The call tree is produced by `get_all_async_stacks()`, prefixing tasks with `task_emoji` and coroutine frames with `cor_emoji`. """ - id2name, awaits = _index(result) + id2name, awaits, task_stacks = _index(result) g = _task_graph(awaits) cycles = _find_cycles(g) if cycles: raise CycleFoundException(cycles, id2name) - labels, children = _build_tree(id2name, awaits) + labels, children = _build_tree(id2name, awaits, task_stacks) def pretty(node): flag = task_emoji if node[0] == NodeType.TASK else cor_emoji @@ -144,35 +177,40 @@ def render(node, prefix="", last=True, buf=None): def build_task_table(result): - id2name, awaits = _index(result) + id2name, _, _ = _index(result) table = [] - for tid, tasks in result: - for task_id, task_name, awaited in tasks: - if not awaited: - table.append( - [ - tid, - hex(task_id), - task_name, - "", - "", - "0x0" - ] - ) - for stack, awaiter_id in awaited: - stack = [elem[0] if isinstance(elem, tuple) else elem for elem in stack] - coroutine_chain = " -> ".join(stack) - awaiter_name = id2name.get(awaiter_id, "Unknown") - table.append( - [ - tid, - hex(task_id), - task_name, - coroutine_chain, - awaiter_name, - hex(awaiter_id), - ] - ) + + for awaited_info in result: + thread_id = awaited_info.thread_id + for task_info in awaited_info.awaited_by: + # Get task info + task_id = task_info.task_id + task_name = task_info.task_name + + # Build coroutine stack string + frames = [frame for coro in task_info.coroutine_stack + for frame in coro.call_stack] + coro_stack = " -> ".join(_format_stack_entry(x).split(" ")[0] + for x in frames) + + # Handle tasks with no awaiters + if not task_info.awaited_by: + table.append([thread_id, hex(task_id), task_name, coro_stack, + "", "", "0x0"]) + continue + + # Handle tasks with awaiters + for coro_info in task_info.awaited_by: + parent_id = coro_info.task_name + awaiter_frames = [_format_stack_entry(x).split(" ")[0] + for x in coro_info.call_stack] + awaiter_chain = " -> ".join(awaiter_frames) + awaiter_name = id2name.get(parent_id, "Unknown") + parent_id_str = (hex(parent_id) if isinstance(parent_id, int) + else str(parent_id)) + + table.append([thread_id, hex(task_id), task_name, coro_stack, + awaiter_chain, awaiter_name, parent_id_str]) return table @@ -201,11 +239,11 @@ def display_awaited_by_tasks_table(pid: int) -> None: table = build_task_table(tasks) # Print the table in a simple tabular format print( - f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine chain':<50} {'awaiter name':<20} {'awaiter id':<15}" + f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine stack':<50} {'awaiter chain':<50} {'awaiter name':<15} {'awaiter id':<15}" ) - print("-" * 135) + print("-" * 180) for row in table: - print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<20} {row[5]:<15}") + print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<50} {row[5]:<15} {row[6]:<15}") def display_awaited_by_tasks_tree(pid: int) -> None: diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py index f69c6a64c39ae6..1c1458127db5ac 100644 --- a/Lib/asyncio/unix_events.py +++ b/Lib/asyncio/unix_events.py @@ -28,7 +28,6 @@ __all__ = ( 'SelectorEventLoop', - '_DefaultEventLoopPolicy', 'EventLoop', ) diff --git a/Lib/compression/bz2/__init__.py b/Lib/compression/bz2.py similarity index 100% rename from Lib/compression/bz2/__init__.py rename to Lib/compression/bz2.py diff --git a/Lib/compression/gzip/__init__.py b/Lib/compression/gzip.py similarity index 100% rename from Lib/compression/gzip/__init__.py rename to Lib/compression/gzip.py diff --git a/Lib/compression/lzma/__init__.py b/Lib/compression/lzma.py similarity index 100% rename from Lib/compression/lzma/__init__.py rename to Lib/compression/lzma.py diff --git a/Lib/compression/zlib/__init__.py b/Lib/compression/zlib.py similarity index 100% rename from Lib/compression/zlib/__init__.py rename to Lib/compression/zlib.py diff --git a/Lib/compression/zstd/__init__.py b/Lib/compression/zstd/__init__.py index 4f734eb07b00e3..84b25914b0aa93 100644 --- a/Lib/compression/zstd/__init__.py +++ b/Lib/compression/zstd/__init__.py @@ -2,41 +2,48 @@ __all__ = ( # compression.zstd - "COMPRESSION_LEVEL_DEFAULT", - "compress", - "CompressionParameter", - "decompress", - "DecompressionParameter", - "finalize_dict", - "get_frame_info", - "Strategy", - "train_dict", + 'COMPRESSION_LEVEL_DEFAULT', + 'compress', + 'CompressionParameter', + 'decompress', + 'DecompressionParameter', + 'finalize_dict', + 'get_frame_info', + 'Strategy', + 'train_dict', # compression.zstd._zstdfile - "open", - "ZstdFile", + 'open', + 'ZstdFile', # _zstd - "get_frame_size", - "zstd_version", - "zstd_version_info", - "ZstdCompressor", - "ZstdDecompressor", - "ZstdDict", - "ZstdError", + 'get_frame_size', + 'zstd_version', + 'zstd_version_info', + 'ZstdCompressor', + 'ZstdDecompressor', + 'ZstdDict', + 'ZstdError', ) import _zstd import enum -from _zstd import * +from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, + get_frame_size, zstd_version) from compression.zstd._zstdfile import ZstdFile, open, _nbytes -COMPRESSION_LEVEL_DEFAULT = _zstd._compressionLevel_values[0] +# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE) +zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100), + _zstd.zstd_version_number % 100) +"""Version number of the runtime zstd library as a tuple of integers.""" + +COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT """The default compression level for Zstandard, currently '3'.""" class FrameInfo: """Information about a Zstandard frame.""" + __slots__ = 'decompressed_size', 'dictionary_id' def __init__(self, decompressed_size, dictionary_id): @@ -65,7 +72,7 @@ def get_frame_info(frame_buffer): the frame may or may not need a dictionary to be decoded, and the ID of such a dictionary is not specified. """ - return FrameInfo(*_zstd._get_frame_info(frame_buffer)) + return FrameInfo(*_zstd.get_frame_info(frame_buffer)) def train_dict(samples, dict_size): @@ -85,7 +92,7 @@ def train_dict(samples, dict_size): chunk_sizes = tuple(_nbytes(sample) for sample in samples) if not chunks: raise ValueError("samples contained no data; can't train dictionary.") - dict_content = _zstd._train_dict(chunks, chunk_sizes, dict_size) + dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size) return ZstdDict(dict_content) @@ -119,13 +126,13 @@ def finalize_dict(zstd_dict, /, samples, dict_size, level): chunks = b''.join(samples) chunk_sizes = tuple(_nbytes(sample) for sample in samples) if not chunks: - raise ValueError("The samples are empty content, can't finalize the" + raise ValueError("The samples are empty content, can't finalize the " "dictionary.") - dict_content = _zstd._finalize_dict(zstd_dict.dict_content, - chunks, chunk_sizes, - dict_size, level) + dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks, + chunk_sizes, dict_size, level) return ZstdDict(dict_content) + def compress(data, level=None, options=None, zstd_dict=None): """Return Zstandard compressed *data* as bytes. @@ -141,6 +148,7 @@ def compress(data, level=None, options=None, zstd_dict=None): comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME) + def decompress(data, zstd_dict=None, options=None): """Decompress one or more frames of Zstandard compressed *data*. @@ -156,59 +164,59 @@ def decompress(data, zstd_dict=None, options=None): decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict) results.append(decomp.decompress(data)) if not decomp.eof: - raise ZstdError("Compressed data ended before the " - "end-of-stream marker was reached") + raise ZstdError('Compressed data ended before the ' + 'end-of-stream marker was reached') data = decomp.unused_data if not data: break - return b"".join(results) + return b''.join(results) class CompressionParameter(enum.IntEnum): """Compression parameters.""" - compression_level = _zstd._ZSTD_c_compressionLevel - window_log = _zstd._ZSTD_c_windowLog - hash_log = _zstd._ZSTD_c_hashLog - chain_log = _zstd._ZSTD_c_chainLog - search_log = _zstd._ZSTD_c_searchLog - min_match = _zstd._ZSTD_c_minMatch - target_length = _zstd._ZSTD_c_targetLength - strategy = _zstd._ZSTD_c_strategy - - enable_long_distance_matching = _zstd._ZSTD_c_enableLongDistanceMatching - ldm_hash_log = _zstd._ZSTD_c_ldmHashLog - ldm_min_match = _zstd._ZSTD_c_ldmMinMatch - ldm_bucket_size_log = _zstd._ZSTD_c_ldmBucketSizeLog - ldm_hash_rate_log = _zstd._ZSTD_c_ldmHashRateLog - - content_size_flag = _zstd._ZSTD_c_contentSizeFlag - checksum_flag = _zstd._ZSTD_c_checksumFlag - dict_id_flag = _zstd._ZSTD_c_dictIDFlag - - nb_workers = _zstd._ZSTD_c_nbWorkers - job_size = _zstd._ZSTD_c_jobSize - overlap_log = _zstd._ZSTD_c_overlapLog + compression_level = _zstd.ZSTD_c_compressionLevel + window_log = _zstd.ZSTD_c_windowLog + hash_log = _zstd.ZSTD_c_hashLog + chain_log = _zstd.ZSTD_c_chainLog + search_log = _zstd.ZSTD_c_searchLog + min_match = _zstd.ZSTD_c_minMatch + target_length = _zstd.ZSTD_c_targetLength + strategy = _zstd.ZSTD_c_strategy + + enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching + ldm_hash_log = _zstd.ZSTD_c_ldmHashLog + ldm_min_match = _zstd.ZSTD_c_ldmMinMatch + ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog + ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog + + content_size_flag = _zstd.ZSTD_c_contentSizeFlag + checksum_flag = _zstd.ZSTD_c_checksumFlag + dict_id_flag = _zstd.ZSTD_c_dictIDFlag + + nb_workers = _zstd.ZSTD_c_nbWorkers + job_size = _zstd.ZSTD_c_jobSize + overlap_log = _zstd.ZSTD_c_overlapLog def bounds(self): """Return the (lower, upper) int bounds of a compression parameter. Both the lower and upper bounds are inclusive. """ - return _zstd._get_param_bounds(self.value, is_compress=True) + return _zstd.get_param_bounds(self.value, is_compress=True) class DecompressionParameter(enum.IntEnum): """Decompression parameters.""" - window_log_max = _zstd._ZSTD_d_windowLogMax + window_log_max = _zstd.ZSTD_d_windowLogMax def bounds(self): """Return the (lower, upper) int bounds of a decompression parameter. Both the lower and upper bounds are inclusive. """ - return _zstd._get_param_bounds(self.value, is_compress=False) + return _zstd.get_param_bounds(self.value, is_compress=False) class Strategy(enum.IntEnum): @@ -219,16 +227,16 @@ class Strategy(enum.IntEnum): the numeric value might change. """ - fast = _zstd._ZSTD_fast - dfast = _zstd._ZSTD_dfast - greedy = _zstd._ZSTD_greedy - lazy = _zstd._ZSTD_lazy - lazy2 = _zstd._ZSTD_lazy2 - btlazy2 = _zstd._ZSTD_btlazy2 - btopt = _zstd._ZSTD_btopt - btultra = _zstd._ZSTD_btultra - btultra2 = _zstd._ZSTD_btultra2 + fast = _zstd.ZSTD_fast + dfast = _zstd.ZSTD_dfast + greedy = _zstd.ZSTD_greedy + lazy = _zstd.ZSTD_lazy + lazy2 = _zstd.ZSTD_lazy2 + btlazy2 = _zstd.ZSTD_btlazy2 + btopt = _zstd.ZSTD_btopt + btultra = _zstd.ZSTD_btultra + btultra2 = _zstd.ZSTD_btultra2 # Check validity of the CompressionParameter & DecompressionParameter types -_zstd._set_parameter_types(CompressionParameter, DecompressionParameter) +_zstd.set_parameter_types(CompressionParameter, DecompressionParameter) diff --git a/Lib/compression/zstd/_zstdfile.py b/Lib/compression/zstd/_zstdfile.py index fbc9e02a733626..d709f5efc658fa 100644 --- a/Lib/compression/zstd/_zstdfile.py +++ b/Lib/compression/zstd/_zstdfile.py @@ -1,12 +1,9 @@ import io from os import PathLike -from _zstd import (ZstdCompressor, ZstdDecompressor, _ZSTD_DStreamSizes, - ZstdError) +from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize from compression._common import _streams -__all__ = ("ZstdFile", "open") - -_ZSTD_DStreamOutSize = _ZSTD_DStreamSizes[1] +__all__ = ('ZstdFile', 'open') _MODE_CLOSED = 0 _MODE_READ = 1 @@ -33,15 +30,15 @@ class ZstdFile(_streams.BaseStream): FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME - def __init__(self, file, /, mode="r", *, + def __init__(self, file, /, mode='r', *, level=None, options=None, zstd_dict=None): """Open a Zstandard compressed file in binary mode. *file* can be either an file-like object, or a file name to open. - *mode* can be "r" for reading (default), "w" for (over)writing, "x" for - creating exclusively, or "a" for appending. These can equivalently be - given as "rb", "wb", "xb" and "ab" respectively. + *mode* can be 'r' for reading (default), 'w' for (over)writing, 'x' for + creating exclusively, or 'a' for appending. These can equivalently be + given as 'rb', 'wb', 'xb' and 'ab' respectively. *level* is an optional int specifying the compression level to use, or COMPRESSION_LEVEL_DEFAULT if not given. @@ -59,39 +56,38 @@ def __init__(self, file, /, mode="r", *, self._buffer = None if not isinstance(mode, str): - raise ValueError("mode must be a str") + raise ValueError('mode must be a str') if options is not None and not isinstance(options, dict): - raise TypeError("options must be a dict or None") - mode = mode.removesuffix("b") # handle rb, wb, xb, ab - if mode == "r": + raise TypeError('options must be a dict or None') + mode = mode.removesuffix('b') # handle rb, wb, xb, ab + if mode == 'r': if level is not None: - raise TypeError("level is illegal in read mode") + raise TypeError('level is illegal in read mode') self._mode = _MODE_READ - elif mode in {"w", "a", "x"}: + elif mode in {'w', 'a', 'x'}: if level is not None and not isinstance(level, int): - raise TypeError("level must be int or None") + raise TypeError('level must be int or None') self._mode = _MODE_WRITE self._compressor = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) self._pos = 0 else: - raise ValueError(f"Invalid mode: {mode!r}") + raise ValueError(f'Invalid mode: {mode!r}') if isinstance(file, (str, bytes, PathLike)): self._fp = io.open(file, f'{mode}b') self._close_fp = True - elif ((mode == 'r' and hasattr(file, "read")) - or (mode != 'r' and hasattr(file, "write"))): + elif ((mode == 'r' and hasattr(file, 'read')) + or (mode != 'r' and hasattr(file, 'write'))): self._fp = file else: - raise TypeError("file must be a file-like object " - "or a str, bytes, or PathLike object") + raise TypeError('file must be a file-like object ' + 'or a str, bytes, or PathLike object') if self._mode == _MODE_READ: raw = _streams.DecompressReader( self._fp, ZstdDecompressor, - trailing_error=ZstdError, zstd_dict=zstd_dict, options=options, ) @@ -154,22 +150,22 @@ def flush(self, mode=FLUSH_BLOCK): return self._check_not_closed() if mode not in {self.FLUSH_BLOCK, self.FLUSH_FRAME}: - raise ValueError("Invalid mode argument, expected either " - "ZstdFile.FLUSH_FRAME or " - "ZstdFile.FLUSH_BLOCK") + raise ValueError('Invalid mode argument, expected either ' + 'ZstdFile.FLUSH_FRAME or ' + 'ZstdFile.FLUSH_BLOCK') if self._compressor.last_mode == mode: return # Flush zstd block/frame, and write. data = self._compressor.flush(mode) self._fp.write(data) - if hasattr(self._fp, "flush"): + if hasattr(self._fp, 'flush'): self._fp.flush() def read(self, size=-1): """Read up to size uncompressed bytes from the file. If size is negative or omitted, read until EOF is reached. - Returns b"" if the file is already at EOF. + Returns b'' if the file is already at EOF. """ if size is None: size = -1 @@ -181,14 +177,14 @@ def read1(self, size=-1): making multiple reads from the underlying stream. Reads up to a buffer's worth of data if size is negative. - Returns b"" if the file is at EOF. + Returns b'' if the file is at EOF. """ self._check_can_read() if size < 0: # Note this should *not* be io.DEFAULT_BUFFER_SIZE. # ZSTD_DStreamOutSize is the minimum amount to read guaranteeing # a full block is read. - size = _ZSTD_DStreamOutSize + size = ZSTD_DStreamOutSize return self._buffer.read1(size) def readinto(self, b): @@ -296,7 +292,7 @@ def writable(self): return self._mode == _MODE_WRITE -def open(file, /, mode="rb", *, level=None, options=None, zstd_dict=None, +def open(file, /, mode='rb', *, level=None, options=None, zstd_dict=None, encoding=None, errors=None, newline=None): """Open a Zstandard compressed file in binary or text mode. @@ -304,8 +300,8 @@ def open(file, /, mode="rb", *, level=None, options=None, zstd_dict=None, in which case the named file is opened, or it can be an existing file object to read from or write to. - The mode parameter can be "r", "rb" (default), "w", "wb", "x", "xb", "a", - "ab" for binary mode, or "rt", "wt", "xt", "at" for text mode. + The mode parameter can be 'r', 'rb' (default), 'w', 'wb', 'x', 'xb', 'a', + 'ab' for binary mode, or 'rt', 'wt', 'xt', 'at' for text mode. The level, options, and zstd_dict parameters specify the settings the same as ZstdFile. @@ -326,19 +322,19 @@ def open(file, /, mode="rb", *, level=None, options=None, zstd_dict=None, behavior, and line ending(s). """ - text_mode = "t" in mode - mode = mode.replace("t", "") + text_mode = 't' in mode + mode = mode.replace('t', '') if text_mode: - if "b" in mode: - raise ValueError(f"Invalid mode: {mode!r}") + if 'b' in mode: + raise ValueError(f'Invalid mode: {mode!r}') else: if encoding is not None: - raise ValueError("Argument 'encoding' not supported in binary mode") + raise ValueError('Argument "encoding" not supported in binary mode') if errors is not None: - raise ValueError("Argument 'errors' not supported in binary mode") + raise ValueError('Argument "errors" not supported in binary mode') if newline is not None: - raise ValueError("Argument 'newline' not supported in binary mode") + raise ValueError('Argument "newline" not supported in binary mode') binary_file = ZstdFile(file, mode, level=level, options=options, zstd_dict=zstd_dict) diff --git a/Lib/concurrent/futures/__init__.py b/Lib/concurrent/futures/__init__.py index 7ada7431c1ab8c..e717222cf98b32 100644 --- a/Lib/concurrent/futures/__init__.py +++ b/Lib/concurrent/futures/__init__.py @@ -17,7 +17,7 @@ wait, as_completed) -__all__ = ( +__all__ = [ 'FIRST_COMPLETED', 'FIRST_EXCEPTION', 'ALL_COMPLETED', @@ -29,10 +29,18 @@ 'Executor', 'wait', 'as_completed', - 'InterpreterPoolExecutor', 'ProcessPoolExecutor', 'ThreadPoolExecutor', -) +] + + +try: + import _interpreters +except ImportError: + _interpreters = None + +if _interpreters: + __all__.append('InterpreterPoolExecutor') def __dir__(): @@ -43,22 +51,15 @@ def __getattr__(name): global ProcessPoolExecutor, ThreadPoolExecutor, InterpreterPoolExecutor if name == 'ProcessPoolExecutor': - from .process import ProcessPoolExecutor as pe - ProcessPoolExecutor = pe - return pe + from .process import ProcessPoolExecutor + return ProcessPoolExecutor if name == 'ThreadPoolExecutor': - from .thread import ThreadPoolExecutor as te - ThreadPoolExecutor = te - return te + from .thread import ThreadPoolExecutor + return ThreadPoolExecutor - if name == 'InterpreterPoolExecutor': - try: - from .interpreter import InterpreterPoolExecutor as ie - except ModuleNotFoundError: - ie = InterpreterPoolExecutor = None - else: - InterpreterPoolExecutor = ie - return ie + if _interpreters and name == 'InterpreterPoolExecutor': + from .interpreter import InterpreterPoolExecutor + return InterpreterPoolExecutor raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index d17688dc9d7346..cbb60ce80c1813 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -1,69 +1,39 @@ """Implements InterpreterPoolExecutor.""" -import contextlib -import pickle +from concurrent import interpreters +import sys import textwrap from . import thread as _thread -import _interpreters -import _interpqueues +import traceback -class ExecutionFailed(_interpreters.InterpreterError): - """An unhandled exception happened during execution.""" - - def __init__(self, excinfo): - msg = excinfo.formatted - if not msg: - if excinfo.type and excinfo.msg: - msg = f'{excinfo.type.__name__}: {excinfo.msg}' - else: - msg = excinfo.type.__name__ or excinfo.msg - super().__init__(msg) - self.excinfo = excinfo - - def __str__(self): +def do_call(results, func, args, kwargs): + try: + return func(*args, **kwargs) + except BaseException as exc: + # Send the captured exception out on the results queue, + # but still leave it unhandled for the interpreter to handle. try: - formatted = self.excinfo.errdisplay - except Exception: - return super().__str__() - else: - return textwrap.dedent(f""" -{super().__str__()} - -Uncaught in the interpreter: - -{formatted} - """.strip()) - - -UNBOUND = 2 # error; this should not happen. + results.put(exc) + except interpreters.NotShareableError: + # The exception is not shareable. + print('exception is not shareable:', file=sys.stderr) + traceback.print_exception(exc) + results.put(None) + raise # re-raise class WorkerContext(_thread.WorkerContext): @classmethod - def prepare(cls, initializer, initargs, shared): + def prepare(cls, initializer, initargs): def resolve_task(fn, args, kwargs): if isinstance(fn, str): # XXX Circle back to this later. raise TypeError('scripts not supported') - if args or kwargs: - raise ValueError(f'a script does not take args or kwargs, got {args!r} and {kwargs!r}') - data = textwrap.dedent(fn) - kind = 'script' - # Make sure the script compiles. - # Ideally we wouldn't throw away the resulting code - # object. However, there isn't much to be done until - # code objects are shareable and/or we do a better job - # of supporting code objects in _interpreters.exec(). - compile(data, '', 'exec') else: - # Functions defined in the __main__ module can't be pickled, - # so they can't be used here. In the future, we could possibly - # borrow from multiprocessing to work around this. - data = pickle.dumps((fn, args, kwargs)) - kind = 'function' - return (data, kind) + task = (fn, args, kwargs) + return task if initializer is not None: try: @@ -75,73 +45,24 @@ def resolve_task(fn, args, kwargs): else: initdata = None def create_context(): - return cls(initdata, shared) + return cls(initdata) return create_context, resolve_task - @classmethod - @contextlib.contextmanager - def _capture_exc(cls, resultsid): - try: - yield - except BaseException as exc: - # Send the captured exception out on the results queue, - # but still leave it unhandled for the interpreter to handle. - err = pickle.dumps(exc) - _interpqueues.put(resultsid, (None, err), 1, UNBOUND) - raise # re-raise - - @classmethod - def _send_script_result(cls, resultsid): - _interpqueues.put(resultsid, (None, None), 0, UNBOUND) - - @classmethod - def _call(cls, func, args, kwargs, resultsid): - with cls._capture_exc(resultsid): - res = func(*args or (), **kwargs or {}) - # Send the result back. - try: - _interpqueues.put(resultsid, (res, None), 0, UNBOUND) - except _interpreters.NotShareableError: - res = pickle.dumps(res) - _interpqueues.put(resultsid, (res, None), 1, UNBOUND) - - @classmethod - def _call_pickled(cls, pickled, resultsid): - with cls._capture_exc(resultsid): - fn, args, kwargs = pickle.loads(pickled) - cls._call(fn, args, kwargs, resultsid) - - def __init__(self, initdata, shared=None): + def __init__(self, initdata): self.initdata = initdata - self.shared = dict(shared) if shared else None - self.interpid = None - self.resultsid = None + self.interp = None + self.results = None def __del__(self): - if self.interpid is not None: + if self.interp is not None: self.finalize() - def _exec(self, script): - assert self.interpid is not None - excinfo = _interpreters.exec(self.interpid, script, restrict=True) - if excinfo is not None: - raise ExecutionFailed(excinfo) - def initialize(self): - assert self.interpid is None, self.interpid - self.interpid = _interpreters.create(reqrefs=True) + assert self.interp is None, self.interp + self.interp = interpreters.create() try: - _interpreters.incref(self.interpid) - maxsize = 0 - fmt = 0 - self.resultsid = _interpqueues.create(maxsize, fmt, UNBOUND) - - self._exec(f'from {__name__} import WorkerContext') - - if self.shared: - _interpreters.set___main___attrs( - self.interpid, self.shared, restrict=True) + self.results = interpreters.create_queue(maxsize) if self.initdata: self.run(self.initdata) @@ -150,64 +71,25 @@ def initialize(self): raise # re-raise def finalize(self): - interpid = self.interpid - resultsid = self.resultsid - self.resultsid = None - self.interpid = None - if resultsid is not None: - try: - _interpqueues.destroy(resultsid) - except _interpqueues.QueueNotFoundError: - pass - if interpid is not None: - try: - _interpreters.decref(interpid) - except _interpreters.InterpreterNotFoundError: - pass + interp = self.interp + results = self.results + self.results = None + self.interp = None + if results is not None: + del results + if interp is not None: + interp.close() def run(self, task): - data, kind = task - if kind == 'script': - raise NotImplementedError('script kind disabled') - script = f""" -with WorkerContext._capture_exc({self.resultsid}): -{textwrap.indent(data, ' ')} -WorkerContext._send_script_result({self.resultsid})""" - elif kind == 'function': - script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' - else: - raise NotImplementedError(kind) - try: - self._exec(script) - except ExecutionFailed as exc: - exc_wrapper = exc - else: - exc_wrapper = None - - # Return the result, or raise the exception. - while True: - try: - obj = _interpqueues.get(self.resultsid) - except _interpqueues.QueueNotFoundError: + return self.interp.call(do_call, self.results, *task) + except interpreters.ExecutionFailed as wrapper: + # Wait for the exception data to show up. + exc = self.results.get() + if exc is None: + # The exception must have been not shareable. raise # re-raise - except _interpqueues.QueueError: - continue - except ModuleNotFoundError: - # interpreters.queues doesn't exist, which means - # QueueEmpty doesn't. Act as though it does. - continue - else: - break - (res, excdata), pickled, unboundop = obj - assert unboundop is None, unboundop - if excdata is not None: - assert res is None, res - assert pickled - assert exc_wrapper is not None - exc = pickle.loads(excdata) - raise exc from exc_wrapper - return pickle.loads(res) if pickled else res + raise exc from wrapper class BrokenInterpreterPool(_thread.BrokenThreadPool): @@ -221,11 +103,11 @@ class InterpreterPoolExecutor(_thread.ThreadPoolExecutor): BROKEN = BrokenInterpreterPool @classmethod - def prepare_context(cls, initializer, initargs, shared): - return WorkerContext.prepare(initializer, initargs, shared) + def prepare_context(cls, initializer, initargs): + return WorkerContext.prepare(initializer, initargs) def __init__(self, max_workers=None, thread_name_prefix='', - initializer=None, initargs=(), shared=None): + initializer=None, initargs=()): """Initializes a new InterpreterPoolExecutor instance. Args: @@ -235,8 +117,6 @@ def __init__(self, max_workers=None, thread_name_prefix='', initializer: A callable or script used to initialize each worker interpreter. initargs: A tuple of arguments to pass to the initializer. - shared: A mapping of shareabled objects to be inserted into - each worker interpreter. """ super().__init__(max_workers, thread_name_prefix, - initializer, initargs, shared=shared) + initializer, initargs) diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index 76b7b2abe836d8..a14650bf5fa47c 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -755,6 +755,11 @@ def _start_executor_manager_thread(self): self._executor_manager_thread_wakeup def _adjust_process_count(self): + # gh-132969: avoid error when state is reset and executor is still running, + # which will happen when shutdown(wait=False) is called. + if self._processes is None: + return + # if there's an idle process, we don't need to spawn a new one. if self._idle_worker_semaphore.acquire(blocking=False): return diff --git a/Lib/test/support/interpreters/__init__.py b/Lib/concurrent/interpreters/__init__.py similarity index 85% rename from Lib/test/support/interpreters/__init__.py rename to Lib/concurrent/interpreters/__init__.py index e067f259364d2a..aa46a2b37a48d5 100644 --- a/Lib/test/support/interpreters/__init__.py +++ b/Lib/concurrent/interpreters/__init__.py @@ -9,6 +9,10 @@ InterpreterError, InterpreterNotFoundError, NotShareableError, is_shareable, ) +from ._queues import ( + create as create_queue, + Queue, QueueEmpty, QueueFull, +) __all__ = [ @@ -20,21 +24,6 @@ ] -_queuemod = None - -def __getattr__(name): - if name in ('Queue', 'QueueEmpty', 'QueueFull', 'create_queue'): - global create_queue, Queue, QueueEmpty, QueueFull - ns = globals() - from .queues import ( - create as create_queue, - Queue, QueueEmpty, QueueFull, - ) - return ns[name] - else: - raise AttributeError(name) - - _EXEC_FAILURE_STR = """ {superstr} @@ -157,12 +146,8 @@ def __del__(self): self._decref() # for pickling: - def __getnewargs__(self): - return (self._id,) - - # for pickling: - def __getstate__(self): - return None + def __reduce__(self): + return (type(self), (self._id,)) def _decref(self): if not self._ownsref: @@ -226,33 +211,32 @@ def exec(self, code, /): if excinfo is not None: raise ExecutionFailed(excinfo) - def call(self, callable, /): - """Call the object in the interpreter with given args/kwargs. + def _call(self, callable, args, kwargs): + res, excinfo = _interpreters.call(self._id, callable, args, kwargs, restrict=True) + if excinfo is not None: + raise ExecutionFailed(excinfo) + return res - Only functions that take no arguments and have no closure - are supported. + def call(self, callable, /, *args, **kwargs): + """Call the object in the interpreter with given args/kwargs. - The return value is discarded. + Nearly all callables, args, kwargs, and return values are + supported. All "shareable" objects are supported, as are + "stateless" functions (meaning non-closures that do not use + any globals). This method will fall back to pickle. If the callable raises an exception then the error display - (including full traceback) is send back between the interpreters + (including full traceback) is sent back between the interpreters and an ExecutionFailed exception is raised, much like what happens with Interpreter.exec(). """ - # XXX Support args and kwargs. - # XXX Support arbitrary callables. - # XXX Support returning the return value (e.g. via pickle). - excinfo = _interpreters.call(self._id, callable, restrict=True) - if excinfo is not None: - raise ExecutionFailed(excinfo) + return self._call(callable, args, kwargs) - def call_in_thread(self, callable, /): + def call_in_thread(self, callable, /, *args, **kwargs): """Return a new thread that calls the object in the interpreter. The return value and any raised exception are discarded. """ - def task(): - self.call(callable) - t = threading.Thread(target=task) + t = threading.Thread(target=self._call, args=(callable, args, kwargs)) t.start() return t diff --git a/Lib/test/support/interpreters/_crossinterp.py b/Lib/concurrent/interpreters/_crossinterp.py similarity index 98% rename from Lib/test/support/interpreters/_crossinterp.py rename to Lib/concurrent/interpreters/_crossinterp.py index 544e197ba4c028..f47eb693ac861c 100644 --- a/Lib/test/support/interpreters/_crossinterp.py +++ b/Lib/concurrent/interpreters/_crossinterp.py @@ -61,7 +61,7 @@ def __new__(cls): def __repr__(self): return f'{self._MODULE}.{self._NAME}' -# return f'interpreters.queues.UNBOUND' +# return f'interpreters._queues.UNBOUND' UNBOUND = object.__new__(UnboundItem) diff --git a/Lib/test/support/interpreters/queues.py b/Lib/concurrent/interpreters/_queues.py similarity index 60% rename from Lib/test/support/interpreters/queues.py rename to Lib/concurrent/interpreters/_queues.py index deb8e8613af731..ee830973f2a36e 100644 --- a/Lib/test/support/interpreters/queues.py +++ b/Lib/concurrent/interpreters/_queues.py @@ -63,29 +63,34 @@ def _resolve_unbound(flag): return resolved -def create(maxsize=0, *, syncobj=False, unbounditems=UNBOUND): +def create(maxsize=0, *, unbounditems=UNBOUND): """Return a new cross-interpreter queue. The queue may be used to pass data safely between interpreters. - "syncobj" sets the default for Queue.put() - and Queue.put_nowait(). - - "unbounditems" likewise sets the default. See Queue.put() for + "unbounditems" sets the default for Queue.put(); see that method for supported values. The default value is UNBOUND, which replaces the unbound item. """ - fmt = _SHARED_ONLY if syncobj else _PICKLED unbound = _serialize_unbound(unbounditems) unboundop, = unbound - qid = _queues.create(maxsize, fmt, unboundop) - return Queue(qid, _fmt=fmt, _unbound=unbound) + qid = _queues.create(maxsize, unboundop, -1) + self = Queue(qid) + self._set_unbound(unboundop, unbounditems) + return self def list_all(): """Return a list of all open queues.""" - return [Queue(qid, _fmt=fmt, _unbound=(unboundop,)) - for qid, fmt, unboundop in _queues.list_all()] + queues = [] + for qid, unboundop, _ in _queues.list_all(): + self = Queue(qid) + if not hasattr(self, '_unbound'): + self._set_unbound(unboundop) + else: + assert self._unbound[0] == unboundop + queues.append(self) + return queues _known_queues = weakref.WeakValueDictionary() @@ -93,28 +98,17 @@ def list_all(): class Queue: """A cross-interpreter queue.""" - def __new__(cls, id, /, *, _fmt=None, _unbound=None): + def __new__(cls, id, /): # There is only one instance for any given ID. if isinstance(id, int): id = int(id) else: raise TypeError(f'id must be an int, got {id!r}') - if _fmt is None: - if _unbound is None: - _fmt, op = _queues.get_queue_defaults(id) - _unbound = (op,) - else: - _fmt, _ = _queues.get_queue_defaults(id) - elif _unbound is None: - _, op = _queues.get_queue_defaults(id) - _unbound = (op,) try: self = _known_queues[id] except KeyError: self = super().__new__(cls) self._id = id - self._fmt = _fmt - self._unbound = _unbound _known_queues[id] = self _queues.bind(id) return self @@ -136,17 +130,30 @@ def __hash__(self): return hash(self._id) # for pickling: - def __getnewargs__(self): - return (self._id,) + def __reduce__(self): + return (type(self), (self._id,)) - # for pickling: - def __getstate__(self): - return None + def _set_unbound(self, op, items=None): + assert not hasattr(self, '_unbound') + if items is None: + items = _resolve_unbound(op) + unbound = (op, items) + self._unbound = unbound + return unbound @property def id(self): return self._id + @property + def unbounditems(self): + try: + _, items = self._unbound + except AttributeError: + op, _ = _queues.get_queue_defaults(self._id) + _, items = self._set_unbound(op) + return items + @property def maxsize(self): try: @@ -165,77 +172,56 @@ def qsize(self): return _queues.get_count(self._id) def put(self, obj, timeout=None, *, - syncobj=None, - unbound=None, + unbounditems=None, _delay=10 / 1000, # 10 milliseconds ): """Add the object to the queue. This blocks while the queue is full. - If "syncobj" is None (the default) then it uses the - queue's default, set with create_queue(). - - If "syncobj" is false then all objects are supported, - at the expense of worse performance. - - If "syncobj" is true then the object must be "shareable". - Examples of "shareable" objects include the builtin singletons, - str, and memoryview. One benefit is that such objects are - passed through the queue efficiently. - - The key difference, though, is conceptual: the corresponding - object returned from Queue.get() will be strictly equivalent - to the given obj. In other words, the two objects will be - effectively indistinguishable from each other, even if the - object is mutable. The received object may actually be the - same object, or a copy (immutable values only), or a proxy. - Regardless, the received object should be treated as though - the original has been shared directly, whether or not it - actually is. That's a slightly different and stronger promise - than just (initial) equality, which is all "syncobj=False" - can promise. - - "unbound" controls the behavior of Queue.get() for the given + For most objects, the object received through Queue.get() will + be a new one, equivalent to the original and not sharing any + actual underlying data. The notable exceptions include + cross-interpreter types (like Queue) and memoryview, where the + underlying data is actually shared. Furthermore, some types + can be sent through a queue more efficiently than others. This + group includes various immutable types like int, str, bytes, and + tuple (if the items are likewise efficiently shareable). See interpreters.is_shareable(). + + "unbounditems" controls the behavior of Queue.get() for the given object if the current interpreter (calling put()) is later destroyed. - If "unbound" is None (the default) then it uses the + If "unbounditems" is None (the default) then it uses the queue's default, set with create_queue(), which is usually UNBOUND. - If "unbound" is UNBOUND_ERROR then get() will raise an + If "unbounditems" is UNBOUND_ERROR then get() will raise an ItemInterpreterDestroyed exception if the original interpreter has been destroyed. This does not otherwise affect the queue; the next call to put() will work like normal, returning the next item in the queue. - If "unbound" is UNBOUND_REMOVE then the item will be removed + If "unbounditems" is UNBOUND_REMOVE then the item will be removed from the queue as soon as the original interpreter is destroyed. Be aware that this will introduce an imbalance between put() and get() calls. - If "unbound" is UNBOUND then it is returned by get() in place + If "unbounditems" is UNBOUND then it is returned by get() in place of the unbound item. """ - if syncobj is None: - fmt = self._fmt - else: - fmt = _SHARED_ONLY if syncobj else _PICKLED - if unbound is None: - unboundop, = self._unbound + if unbounditems is None: + unboundop = -1 else: - unboundop, = _serialize_unbound(unbound) + unboundop, = _serialize_unbound(unbounditems) if timeout is not None: timeout = int(timeout) if timeout < 0: raise ValueError(f'timeout value must be non-negative') end = time.time() + timeout - if fmt is _PICKLED: - obj = pickle.dumps(obj) while True: try: - _queues.put(self._id, obj, fmt, unboundop) + _queues.put(self._id, obj, unboundop) except QueueFull as exc: if timeout is not None and time.time() >= end: raise # re-raise @@ -243,18 +229,12 @@ def put(self, obj, timeout=None, *, else: break - def put_nowait(self, obj, *, syncobj=None, unbound=None): - if syncobj is None: - fmt = self._fmt + def put_nowait(self, obj, *, unbounditems=None): + if unbounditems is None: + unboundop = -1 else: - fmt = _SHARED_ONLY if syncobj else _PICKLED - if unbound is None: - unboundop, = self._unbound - else: - unboundop, = _serialize_unbound(unbound) - if fmt is _PICKLED: - obj = pickle.dumps(obj) - _queues.put(self._id, obj, fmt, unboundop) + unboundop, = _serialize_unbound(unbounditems) + _queues.put(self._id, obj, unboundop) def get(self, timeout=None, *, _delay=10 / 1000, # 10 milliseconds @@ -265,7 +245,7 @@ def get(self, timeout=None, *, If the next item's original interpreter has been destroyed then the "next object" is determined by the value of the - "unbound" argument to put(). + "unbounditems" argument to put(). """ if timeout is not None: timeout = int(timeout) @@ -274,7 +254,7 @@ def get(self, timeout=None, *, end = time.time() + timeout while True: try: - obj, fmt, unboundop = _queues.get(self._id) + obj, unboundop = _queues.get(self._id) except QueueEmpty as exc: if timeout is not None and time.time() >= end: raise # re-raise @@ -284,10 +264,6 @@ def get(self, timeout=None, *, if unboundop is not None: assert obj is None, repr(obj) return _resolve_unbound(unboundop) - if fmt == _PICKLED: - obj = pickle.loads(obj) - else: - assert fmt == _SHARED_ONLY return obj def get_nowait(self): @@ -297,16 +273,12 @@ def get_nowait(self): is the same as get(). """ try: - obj, fmt, unboundop = _queues.get(self._id) + obj, unboundop = _queues.get(self._id) except QueueEmpty as exc: raise # re-raise if unboundop is not None: assert obj is None, repr(obj) return _resolve_unbound(unboundop) - if fmt == _PICKLED: - obj = pickle.loads(obj) - else: - assert fmt == _SHARED_ONLY return obj diff --git a/Lib/configparser.py b/Lib/configparser.py index 239fda60a02ca0..18af1eadaad111 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -1218,11 +1218,14 @@ def _convert_to_boolean(self, value): def _validate_key_contents(self, key): """Raises an InvalidWriteError for any keys containing - delimiters or that match the section header pattern""" + delimiters or that begins with the section header pattern""" if re.match(self.SECTCRE, key): - raise InvalidWriteError("Cannot write keys matching section pattern") - if any(delim in key for delim in self._delimiters): - raise InvalidWriteError("Cannot write key that contains delimiters") + raise InvalidWriteError( + f"Cannot write key {key}; begins with section pattern") + for delim in self._delimiters: + if delim in key: + raise InvalidWriteError( + f"Cannot write key {key}; contains delimiter {delim}") def _validate_value_types(self, *, section="", option="", value=""): """Raises a TypeError for illegal non-string values. diff --git a/Lib/difflib.py b/Lib/difflib.py index f1f4e62514a7bd..ac1ba4a6e4ee94 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -78,8 +78,8 @@ class SequenceMatcher: sequences. As a rule of thumb, a .ratio() value over 0.6 means the sequences are close matches: - >>> print(round(s.ratio(), 3)) - 0.866 + >>> print(round(s.ratio(), 2)) + 0.87 >>> If you're only interested in where the sequences match, diff --git a/Lib/doctest.py b/Lib/doctest.py index 2acb6cb79f394d..dec10a345165da 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -108,6 +108,8 @@ def _test(): from _colorize import ANSIColors, can_colorize +__unittest = True + class TestResults(namedtuple('TestResults', 'failed attempted')): def __new__(cls, failed, attempted, *, skipped=0): results = super().__new__(cls, failed, attempted) @@ -1395,11 +1397,11 @@ def __run(self, test, compileflags, out): exec(compile(example.source, filename, "single", compileflags, True), test.globs) self.debugger.set_continue() # ==== Example Finished ==== - exception = None + exc_info = None except KeyboardInterrupt: raise - except: - exception = sys.exc_info() + except BaseException as exc: + exc_info = type(exc), exc, exc.__traceback__.tb_next self.debugger.set_continue() # ==== Example Finished ==== got = self._fakeout.getvalue() # the actual output @@ -1408,21 +1410,21 @@ def __run(self, test, compileflags, out): # If the example executed without raising any exceptions, # verify its output. - if exception is None: + if exc_info is None: if check(example.want, got, self.optionflags): outcome = SUCCESS # The example raised an exception: check if it was expected. else: - formatted_ex = traceback.format_exception_only(*exception[:2]) - if issubclass(exception[0], SyntaxError): + formatted_ex = traceback.format_exception_only(*exc_info[:2]) + if issubclass(exc_info[0], SyntaxError): # SyntaxError / IndentationError is special: # we don't care about the carets / suggestions / etc # We only care about the error message and notes. # They start with `SyntaxError:` (or any other class name) exception_line_prefixes = ( - f"{exception[0].__qualname__}:", - f"{exception[0].__module__}.{exception[0].__qualname__}:", + f"{exc_info[0].__qualname__}:", + f"{exc_info[0].__module__}.{exc_info[0].__qualname__}:", ) exc_msg_index = next( index @@ -1433,7 +1435,7 @@ def __run(self, test, compileflags, out): exc_msg = "".join(formatted_ex) if not quiet: - got += _exception_traceback(exception) + got += _exception_traceback(exc_info) # If `example.exc_msg` is None, then we weren't expecting # an exception. @@ -1462,7 +1464,7 @@ def __run(self, test, compileflags, out): elif outcome is BOOM: if not quiet: self.report_unexpected_exception(out, test, example, - exception) + exc_info) failures += 1 else: assert False, ("unknown outcome", outcome) @@ -2324,7 +2326,7 @@ def runTest(self): sys.stdout = old if results.failed: - raise self.failureException(self.format_failure(new.getvalue())) + raise self.failureException(self.format_failure(new.getvalue().rstrip('\n'))) def format_failure(self, err): test = self._dt_test diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 9a51b9437333db..91243378dc0441 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1020,6 +1020,8 @@ def _get_ptext_to_endchars(value, endchars): a flag that is True iff there were any quoted printables decoded. """ + if not value: + return '', '', False fragment, *remainder = _wsp_splitter(value, 1) vchars = [] escape = False @@ -1573,7 +1575,7 @@ def get_dtext(value): def _check_for_early_dl_end(value, domain_literal): if value: return False - domain_literal.append(errors.InvalidHeaderDefect( + domain_literal.defects.append(errors.InvalidHeaderDefect( "end of input inside domain-literal")) domain_literal.append(ValueTerminal(']', 'domain-literal-end')) return True @@ -1592,9 +1594,9 @@ def get_domain_literal(value): raise errors.HeaderParseError("expected '[' at start of domain-literal " "but found '{}'".format(value)) value = value[1:] + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if _check_for_early_dl_end(value, domain_literal): return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if value[0] in WSP: token, value = get_fws(value) domain_literal.append(token) diff --git a/Lib/email/header.py b/Lib/email/header.py index 113a81f41314ec..220a84a7454b21 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -59,16 +59,22 @@ def decode_header(header): """Decode a message header value without converting charset. - Returns a list of (string, charset) pairs containing each of the decoded - parts of the header. Charset is None for non-encoded parts of the header, - otherwise a lower-case string containing the name of the character set - specified in the encoded string. + For historical reasons, this function may return either: + + 1. A list of length 1 containing a pair (str, None). + 2. A list of (bytes, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). + + This function exists for backwards compatibility only. For new code, we + recommend using email.headerregistry.HeaderRegistry instead. """ # If it is a Header object, we can just return the encoded chunks. if hasattr(header, '_chunks'): @@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, This function takes one of those sequence of pairs and returns a Header instance. Optional maxlinelen, header_name, and continuation_ws are as in the Header constructor. + + This function exists for backwards compatibility only, and is not + recommended for use in new code. """ h = Header(maxlinelen=maxlinelen, header_name=header_name, continuation_ws=continuation_ws) diff --git a/Lib/email/message.py b/Lib/email/message.py index 87fcab68868d46..41fcc2b9778798 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -564,7 +564,7 @@ def add_header(self, _name, _value, **_params): msg.add_header('content-disposition', 'attachment', filename='bud.gif') msg.add_header('content-disposition', 'attachment', - filename=('utf-8', '', Fußballer.ppt')) + filename=('utf-8', '', 'Fußballer.ppt')) msg.add_header('content-disposition', 'attachment', filename='Fußballer.ppt')) """ diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 7eab74dc0db9df..3de1f0d24a15b0 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -417,8 +417,14 @@ def decode_params(params): for name, continuations in rfc2231_params.items(): value = [] extended = False - # Sort by number - continuations.sort() + # Sort by number, treating None as 0 if there is no 0, + # and ignore it if there is already a 0. + has_zero = any(x[0] == 0 for x in continuations) + if has_zero: + continuations = [x for x in continuations if x[0] is not None] + else: + continuations = [(x[0] or 0, x[1], x[2]) for x in continuations] + continuations.sort(key=lambda x: x[0]) # And now append all values in numerical order, converting # %-encodings for the encoded segments. If any of the # continuation names ends in a *, then the entire string, after diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index a94bb270671e37..4ecb6b6e297a13 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -405,6 +405,8 @@ 'iso_8859_8' : 'iso8859_8', 'iso_8859_8_1988' : 'iso8859_8', 'iso_ir_138' : 'iso8859_8', + 'iso_8859_8_i' : 'iso8859_8', + 'iso_8859_8_e' : 'iso8859_8', # iso8859_9 codec 'csisolatin5' : 'iso8859_9', diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 60a8d5eb227f82..0c90b4c9fe18fa 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -316,7 +316,7 @@ def _buffer_encode(self, input, errors, final): class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): if errors != 'strict': - raise UnicodeError("Unsupported error handling: {errors}") + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return ("", 0) diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py index c506d654523496..df164ca5b9549c 100644 --- a/Lib/encodings/palmos.py +++ b/Lib/encodings/palmos.py @@ -201,7 +201,7 @@ def getregentry(): '\u02dc' # 0x98 -> SMALL TILDE '\u2122' # 0x99 -> TRADE MARK SIGN '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - '\x9b' # 0x9B -> + '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE '\x9d' # 0x9D -> '\x9e' # 0x9E -> diff --git a/Lib/fractions.py b/Lib/fractions.py index 8163e3bb594f6b..a497ee19935345 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -168,9 +168,13 @@ def _round_to_figures(n, d, figures): # A '0' that's *not* followed by another digit is parsed as a minimum width # rather than a zeropad flag. (?P0(?=[0-9]))? - (?P0|[1-9][0-9]*)? + (?P[0-9]+)? (?P[,_])? - (?:\.(?P0|[1-9][0-9]*))? + (?:\. + (?=[,_0-9]) # lookahead for digit or separator + (?P[0-9]+)? + (?P[,_])? + )? (?P[eEfFgG%]) """, re.DOTALL | re.VERBOSE).fullmatch @@ -499,11 +503,15 @@ def _format_float_style(self, match): minimumwidth = int(match["minimumwidth"] or "0") thousands_sep = match["thousands_sep"] precision = int(match["precision"] or "6") + frac_sep = match["frac_separators"] or "" presentation_type = match["presentation_type"] trim_zeros = presentation_type in "gG" and not alternate_form trim_point = not alternate_form exponent_indicator = "E" if presentation_type in "EFG" else "e" + if align == '=' and fill == '0': + zeropad = True + # Round to get the digits we need, figure out where to place the point, # and decide whether to use scientific notation. 'point_pos' is the # relative to the _end_ of the digit string: that is, it's the number @@ -552,6 +560,9 @@ def _format_float_style(self, match): if trim_zeros: frac_part = frac_part.rstrip("0") separator = "" if trim_point and not frac_part else "." + if frac_sep: + frac_part = frac_sep.join(frac_part[pos:pos + 3] + for pos in range(0, len(frac_part), 3)) trailing = separator + frac_part + suffix # Do zero padding if required. diff --git a/Lib/functools.py b/Lib/functools.py index 714070c6ac9460..7f0eac3f650209 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -323,6 +323,9 @@ def _partial_new(cls, func, /, *args, **keywords): "or a descriptor") if args and args[-1] is Placeholder: raise TypeError("trailing Placeholders are not allowed") + for value in keywords.values(): + if value is Placeholder: + raise TypeError("Placeholder cannot be passed as a keyword argument") if isinstance(func, base_cls): pto_phcount = func._phcount tot_args = func.args diff --git a/Lib/genericpath.py b/Lib/genericpath.py index ba7b0a13c7f81d..9363f564aab7a6 100644 --- a/Lib/genericpath.py +++ b/Lib/genericpath.py @@ -8,7 +8,7 @@ __all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', 'getsize', 'isdevdrive', 'isdir', 'isfile', 'isjunction', 'islink', - 'lexists', 'samefile', 'sameopenfile', 'samestat'] + 'lexists', 'samefile', 'sameopenfile', 'samestat', 'ALLOW_MISSING'] # Does a path exist? @@ -189,3 +189,12 @@ def _check_arg_types(funcname, *args): f'os.PathLike object, not {s.__class__.__name__!r}') from None if hasstr and hasbytes: raise TypeError("Can't mix strings and bytes in path components") from None + +# A singleton with a true boolean value. +@object.__new__ +class ALLOW_MISSING: + """Special value for use in realpath().""" + def __repr__(self): + return 'os.path.ALLOW_MISSING' + def __reduce__(self): + return self.__class__.__name__ diff --git a/Lib/getpass.py b/Lib/getpass.py index f571425e54178a..1dd40e25e09068 100644 --- a/Lib/getpass.py +++ b/Lib/getpass.py @@ -119,9 +119,9 @@ def win_getpass(prompt='Password: ', stream=None, *, echo_char=None): raise KeyboardInterrupt if c == '\b': if echo_char and pw: - msvcrt.putch('\b') - msvcrt.putch(' ') - msvcrt.putch('\b') + msvcrt.putwch('\b') + msvcrt.putwch(' ') + msvcrt.putwch('\b') pw = pw[:-1] else: pw = pw + c @@ -132,14 +132,15 @@ def win_getpass(prompt='Password: ', stream=None, *, echo_char=None): return pw -def fallback_getpass(prompt='Password: ', stream=None): +def fallback_getpass(prompt='Password: ', stream=None, *, echo_char=None): + _check_echo_char(echo_char) import warnings warnings.warn("Can not control echo on the terminal.", GetPassWarning, stacklevel=2) if not stream: stream = sys.stderr print("Warning: Password input may be echoed.", file=stream) - return _raw_input(prompt, stream) + return _raw_input(prompt, stream, echo_char=echo_char) def _check_echo_char(echo_char): diff --git a/Lib/hashlib.py b/Lib/hashlib.py index abacac22ea0106..0e9bd98aa1fc31 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -141,29 +141,29 @@ def __get_openssl_constructor(name): return __get_builtin_constructor(name) -def __py_new(name, data=b'', **kwargs): +def __py_new(name, *args, **kwargs): """new(name, data=b'', **kwargs) - Return a new hashing object using the named algorithm; optionally initialized with data (which must be a bytes-like object). """ - return __get_builtin_constructor(name)(data, **kwargs) + return __get_builtin_constructor(name)(*args, **kwargs) -def __hash_new(name, data=b'', **kwargs): +def __hash_new(name, *args, **kwargs): """new(name, data=b'') - Return a new hashing object using the named algorithm; optionally initialized with data (which must be a bytes-like object). """ if name in __block_openssl_constructor: # Prefer our builtin blake2 implementation. - return __get_builtin_constructor(name)(data, **kwargs) + return __get_builtin_constructor(name)(*args, **kwargs) try: - return _hashlib.new(name, data, **kwargs) + return _hashlib.new(name, *args, **kwargs) except ValueError: # If the _hashlib module (OpenSSL) doesn't support the named # hash, try using our builtin implementations. # This allows for SHA224/256 and SHA384/512 support even though # the OpenSSL library prior to 0.9.8 doesn't provide them. - return __get_builtin_constructor(name)(data) + return __get_builtin_constructor(name)(*args, **kwargs) try: diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 13c95c34e505c8..9b4f09599134bd 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -12,6 +12,7 @@ import _markupbase from html import unescape +from html.entities import html5 as html5_entities __all__ = ['HTMLParser'] @@ -23,20 +24,51 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') +attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') +endtagopen = re.compile('') -commentclose = re.compile(r'--\s*>') +commentclose = re.compile(r'--!?>') +commentabruptclose = re.compile(r'-?>') # Note: -# 1) if you change tagfind/attrfind remember to update locatestarttagend too; -# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will +# 1) if you change tagfind/attrfind remember to update locatetagend too; +# 2) if you change tagfind/attrfind and/or locatetagend the parser will # explode, so don't do it. -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') +# see the HTML5 specs section "13.2.5.6 Tag open state", +# "13.2.5.8 Tag name state" and "13.2.5.33 Attribute name state". +# https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state +# https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state +# https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state +tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*') +attrfind_tolerant = re.compile(r""" + ( + (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + ) + (= # value indicator + ('[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space +""", re.VERBOSE) +locatetagend = re.compile(r""" + [a-zA-Z][^\t\n\r\f />]* # tag name + [\t\n\r\f /]* # optional whitespace before attribute name + (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + (?:= # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + [\t\n\r\f /]* # possibly followed by a space + )* + >? +""", re.VERBOSE) +# The following variables are not used, but are temporarily left for +# backward compatibility. locatestarttagend_tolerant = re.compile(r""" <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name @@ -53,10 +85,24 @@ \s* # trailing whitespace """, re.VERBOSE) endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# ') +# Character reference processing logic specific to attribute values +# See: https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state +def _replace_attr_charref(match): + ref = match.group(0) + # Numeric / hex char refs must always be unescaped + if ref.startswith('&#'): + return unescape(ref) + # Named character / entity references must only be unescaped + # if they are an exact match, and they are not followed by an equals sign + if not ref.endswith('=') and ref[1:] in html5_entities: + return unescape(ref) + # Otherwise do not unescape + return ref + +def _unescape_attrvalue(s): + return attr_charref.sub(_replace_attr_charref, s) class HTMLParser(_markupbase.ParserBase): @@ -122,7 +168,8 @@ def get_starttag_text(self): def set_cdata_mode(self, elem): self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) + self.interesting = re.compile(r'])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) def clear_cdata_mode(self): self.interesting = interesting_normal @@ -147,7 +194,7 @@ def goahead(self, end): # & near the end and see if it's followed by a space or ;. amppos = rawdata.rfind('&', max(i, n-34)) if (amppos >= 0 and - not re.compile(r'[\s;]').search(rawdata, amppos)): + not re.compile(r'[\t\n\r\f ;]').search(rawdata, amppos)): break # wait till we get all the text j = n else: @@ -177,7 +224,7 @@ def goahead(self, end): k = self.parse_pi(i) elif startswith("', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 - else: - k += 1 - if self.convert_charrefs and not self.cdata_elem: - self.handle_data(unescape(rawdata[i:k])) + if starttagopen.match(rawdata, i): # < + letter + pass + elif startswith(" @@ -272,12 +337,27 @@ def parse_html_declaration(self, i): else: return self.parse_bogus_comment(i) + # Internal -- parse comment, return length or -1 if not terminated + # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state + def parse_comment(self, i, report=True): + rawdata = self.rawdata + assert rawdata.startswith(' ¬-an-entity-ref;', "", '

', @@ -278,60 +285,127 @@ def test_cdata_content(self): 'src="http://www.example.org/r=\'+new ' 'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'), '\n\n', - 'foo = "";', '', - # these two should be invalid according to the HTML 5 spec, - # section 8.1.2.2 - #'foo = ', - #'foo = ', - ] - elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style'] - for content in contents: - for element in elements: - element_lower = element.lower() - s = '<{element}>{content}'.format(element=element, - content=content) - self._run_check(s, [("starttag", element_lower, []), - ("data", content), - ("endtag", element_lower)]) - - def test_cdata_with_closing_tags(self): + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + ]) + def test_script_content(self, content): + s = f'' + self._run_check(s, [("starttag", "script", []), + ("data", content), + ("endtag", "script")]) + + @support.subTests('content', [ + 'a::before { content: ""; }', + 'a::before { content: "¬-an-entity-ref;"; }', + 'a::before { content: ""; }', + 'a::before { content: "\u2603"; }', + 'a::before { content: "< /style>"; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + ]) + def test_style_content(self, content): + s = f'' + self._run_check(s, [("starttag", "style", []), + ("data", content), + ("endtag", "style")]) + + @support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n', + 'script/', 'script foo=bar', 'script foo=">"']) + def test_script_closing_tag(self, endtag): # see issue #13358 # make sure that HTMLParser calls handle_data only once for each CDATA. - # The normal event collector normalizes the events in get_events, - # so we override it to return the original list of events. - class Collector(EventCollector): - def get_events(self): - return self.events - content = """ ¬-an-entity-ref;

''""" - for element in [' script', 'script ', ' script ', - '\nscript', 'script\n', '\nscript\n']: - element_lower = element.lower().strip() - s = '{content}{tail}' + self._run_check(s, [("starttag", "script", []), + ("data", content if end else content + tail)], + collector=EventCollectorNoNormalize(convert_charrefs=False)) def test_comments(self): html = ("" '' '' + '' '' + # abrupt-closing-of-empty-comment + '' + '' '' '' - '') + '' + '' + '' + '' + '' + '' + '' + # nested-comment + ' -->' + '' + '' + ) expected = [('comment', " I'm a valid comment "), ('comment', 'me too!'), ('comment', '--'), + ('comment', '-'), + ('comment', ''), + ('comment', ''), ('comment', ''), ('comment', '--I have many hyphens--'), ('comment', ' I have a > in the middle '), - ('comment', ' and I have -- in the middle! ')] + ('comment', ' and I have -- in the middle! '), + ('comment', 'incorrectly-closed-comment'), + ('comment', ''), + ('comment', '--!'), + ('comment', '-- >'), + ('comment', '-!>'), + ('comment', '!>'), + ('comment', ' '), + ('comment', 'a{0}z'.format(charref), + self._run_check('a{0}z'.format(charref), expected, collector=collector()) - # check charrefs at the beginning/end of the text/attributes - expected = [('data', '"'), - ('starttag', 'a', [('x', '"'), ('y', '"X'), ('z', 'X"')]), + # check charrefs at the beginning/end of the text + expected = [('data', '"'), ('starttag', 'a', []), ('data', '"'), ('endtag', 'a'), ('data', '"')] for charref in charrefs: - self._run_check('{0}' + self._run_check('{0}' '{0}{0}'.format(charref), expected, collector=collector()) # check charrefs in ``. + +* Multiple slashes and whitespaces between the last attribute and closing ``>`` + are now ignored in both start and end tags. E.g. ````. + +* Multiple ``=`` between attribute name and value are no longer collapsed. + E.g. ```` produces attribute "foo" with value "=bar". + +* Whitespaces between the ``=`` separator and attribute name or value are no + longer ignored. E.g. ```` produces two attributes "foo" and + "=bar", both with value None; ```` produces two attributes: + "foo" with value "" and "bar" with value None. + +.. + +.. date: 2025-06-18-13-28-08 +.. gh-issue: 102555 +.. nonce: nADrzJ +.. section: Security + +Fix comment parsing in :class:`html.parser.HTMLParser` according to the +HTML5 standard. ``--!>`` now ends the comment. ``-- >`` no longer ends the +comment. Support abnormally ended empty comments ``<-->`` and ``<--->``. + +.. + +.. date: 2025-07-05-09-45-04 +.. gh-issue: 136286 +.. nonce: N67Amr +.. section: Library + +Fix pickling failures for protocols 0 and 1 for many objects realted to +subinterpreters. + +.. + +.. date: 2025-07-05-06-56-16 +.. gh-issue: 136316 +.. nonce: 3zj_Do +.. section: Library + +Improve support for evaluating nested forward references in +:func:`typing.evaluate_forward_ref`. + +.. + +.. date: 2025-06-30-11-12-24 +.. gh-issue: 85702 +.. nonce: 0Lrbwu +.. section: Library + +If ``zoneinfo._common.load_tzdata`` is given a package without a resource a +:exc:`zoneinfo.ZoneInfoNotFoundError` is raised rather than a +:exc:`PermissionError`. Patch by Victor Stinner. + +.. + +.. date: 2025-06-27-13-34-28 +.. gh-issue: 136028 +.. nonce: RY727g +.. section: Library + +Fix parsing month names containing "İ" (U+0130, LATIN CAPITAL LETTER I WITH +DOT ABOVE) in :func:`time.strptime`. This affects locales az_AZ, ber_DZ, +ber_MA and crh_UA. + +.. + +.. date: 2025-06-26-17-28-49 +.. gh-issue: 135995 +.. nonce: pPrDCt +.. section: Library + +In the palmos encoding, make byte ``0x9b`` decode to ``›`` (U+203A - SINGLE +RIGHT-POINTING ANGLE QUOTATION MARK). + +.. + +.. date: 2025-06-26-11-52-40 +.. gh-issue: 53203 +.. nonce: TMigBr +.. section: Library + +Fix :func:`time.strptime` for ``%c`` and ``%x`` formats on locales byn_ER, +wal_ET and lzh_TW, and for ``%X`` format on locales ar_SA, bg_BG and lzh_TW. + +.. + +.. date: 2025-06-25-17-25-53 +.. gh-issue: 91555 +.. nonce: xUpTLD +.. section: Library + +An earlier change, which was introduced in 3.14.0b2, has been reverted. It +disabled logging for a logger during handling of log messages for that +logger. Since the reversion, the behaviour should be as it was before +3.14.0b2. + +.. + +.. date: 2025-06-24-14-43-24 +.. gh-issue: 135878 +.. nonce: Db4roX +.. section: Library + +Fixes a crash of :class:`types.SimpleNamespace` on :term:`free threading` +builds, when several threads were calling its :meth:`~object.__repr__` +method at the same time. + +.. + +.. date: 2025-06-24-10-52-35 +.. gh-issue: 135836 +.. nonce: s37351 +.. section: Library + +Fix :exc:`IndexError` in :meth:`asyncio.loop.create_connection` that could +occur when non-\ :exc:`OSError` exception is raised during connection and +socket's ``close()`` raises :exc:`!OSError`. + +.. + +.. date: 2025-06-23-11-04-25 +.. gh-issue: 135836 +.. nonce: -C-c4v +.. section: Library + +Fix :exc:`IndexError` in :meth:`asyncio.loop.create_connection` that could +occur when the Happy Eyeballs algorithm resulted in an empty exceptions list +during connection attempts. + +.. + +.. date: 2025-06-23-10-19-11 +.. gh-issue: 135855 +.. nonce: -J0AGF +.. section: Library + +Raise :exc:`TypeError` instead of :exc:`SystemError` when +:func:`!_interpreters.set___main___attrs` is passed a non-dict object. Patch +by Brian Schubert. + +.. + +.. date: 2025-06-22-16-23-44 +.. gh-issue: 135815 +.. nonce: 0DandH +.. section: Library + +:mod:`netrc`: skip security checks if :func:`os.getuid` is missing. Patch by +Bénédikt Tran. + +.. + +.. date: 2025-06-22-02-16-17 +.. gh-issue: 135640 +.. nonce: FXyFL6 +.. section: Library + +Address bug where it was possible to call +:func:`xml.etree.ElementTree.ElementTree.write` on an ElementTree object +with an invalid root element. This behavior blanked the file passed to +``write`` if it already existed. + +.. + +.. date: 2025-06-18-13-58-13 +.. gh-issue: 135645 +.. nonce: 109nff +.. section: Library + +Added ``supports_isolated_interpreters`` field to +:data:`sys.implementation`. + +.. + +.. date: 2025-06-18-11-43-17 +.. gh-issue: 135646 +.. nonce: r7ekEn +.. section: Library + +Raise consistent :exc:`NameError` exceptions in +:func:`annotationlib.ForwardRef.evaluate` + +.. + +.. date: 2025-06-17-23-13-56 +.. gh-issue: 135557 +.. nonce: Bfcy4v +.. section: Library + +Fix races on :mod:`heapq` updates and :class:`list` reads on the :term:`free +threaded ` build. + +.. + +.. date: 2025-06-17-22-44-19 +.. gh-issue: 119180 +.. nonce: Ogv8Nj +.. section: Library + +Only fetch globals and locals if necessary in +:func:`annotationlib.get_annotations` + +.. + +.. date: 2025-06-16-15-03-03 +.. gh-issue: 135561 +.. nonce: mJCN8D +.. section: Library + +Fix a crash on DEBUG builds when an HACL* HMAC routine fails. Patch by +Bénédikt Tran. + +.. + +.. date: 2025-06-14-12-06-55 +.. gh-issue: 135487 +.. nonce: KdVFff +.. section: Library + +Fix :meth:`!reprlib.Repr.repr_int` when given integers with more than +:func:`sys.get_int_max_str_digits` digits. Patch by Bénédikt Tran. + +.. + +.. date: 2025-06-10-21-42-04 +.. gh-issue: 135335 +.. nonce: WnUqb_ +.. section: Library + +:mod:`multiprocessing`: Flush ``stdout`` and ``stderr`` after preloading +modules in the ``forkserver``. + +.. + +.. date: 2025-06-03-12-59-17 +.. gh-issue: 135069 +.. nonce: xop30V +.. section: Library + +Fix the "Invalid error handling" exception in +:class:`!encodings.idna.IncrementalDecoder` to correctly replace the +'errors' parameter. + +.. + +.. date: 2025-06-02-14-36-28 +.. gh-issue: 130662 +.. nonce: Gpr2GB +.. section: Library + ++Accept leading zeros in precision and width fields for ++:class:`~decimal.Decimal` formatting, for example ``format(Decimal(1.25), +'.016f')``. + +.. + +.. date: 2025-06-02-14-28-30 +.. gh-issue: 130662 +.. nonce: EIgIR8 +.. section: Library + +Accept leading zeros in precision and width fields for +:class:`~fractions.Fraction` formatting, for example ``format(Fraction(1, +3), '.016f')``. + +.. + +.. date: 2025-04-07-10-20-16 +.. gh-issue: 87790 +.. nonce: X2SjJe +.. section: Library + +Support underscore and comma as thousands separators in the fractional part +for :class:`~fractions.Fraction`'s formatting. Patch by Sergey B Kirpichev. + +.. + +.. date: 2025-04-07-09-53-54 +.. gh-issue: 87790 +.. nonce: 6nj3zQ +.. section: Library + +Support underscore and comma as thousands separators in the fractional part +for :class:`~decimal.Decimal`'s formatting. Patch by Sergey B Kirpichev. + +.. + +.. date: 2025-03-11-05-24-14 +.. gh-issue: 130664 +.. nonce: g0yNMm +.. section: Library + +Handle corner-case for :class:`~fractions.Fraction`'s formatting: treat +zero-padding (preceding the width field by a zero (``'0'``) character) as an +equivalent to a fill character of ``'0'`` with an alignment type of ``'='``, +just as in case of :class:`float`'s. + +.. + +.. date: 2025-07-01-21-04-47 +.. gh-issue: 136155 +.. nonce: ufmH4Q +.. section: Documentation + +EPUB builds are fixed by excluding non-XHTML-compatible tags. + +.. + +.. date: 2025-07-06-14-53-19 +.. gh-issue: 109700 +.. nonce: KVNQQi +.. section: Core and Builtins + +Fix memory error handling in :c:func:`PyDict_SetDefault`. + +.. + +.. date: 2025-06-26-15-25-51 +.. gh-issue: 78465 +.. nonce: MbDN8X +.. section: Core and Builtins + +Fix error message for ``cls.__new__(cls, ...)`` where ``cls`` is not +instantiable builtin or extension type (with ``tp_new`` set to ``NULL``). + +.. + +.. date: 2025-06-24-06-41-47 +.. gh-issue: 129958 +.. nonce: EaJuS0 +.. section: Core and Builtins + +Differentiate between t-strings and f-strings in syntax error for newlines +in format specifiers of single-quoted interpolated strings. + +.. + +.. date: 2025-06-23-18-08-32 +.. gh-issue: 135871 +.. nonce: 50C528 +.. section: Core and Builtins + +Non-blocking mutex lock attempts now return immediately when the lock is +busy instead of briefly spinning in the :term:`free threading` build. + +.. + +.. date: 2025-06-18-16-45-36 +.. gh-issue: 135106 +.. nonce: cpl6Aq +.. section: Core and Builtins + +Restrict the trashcan mechanism to GC'ed objects and untrack them while in +the trashcan to prevent the GC and trashcan mechanisms conflicting. + +.. + +.. date: 2025-06-17-22-34-58 +.. gh-issue: 135607 +.. nonce: ucsLVu +.. section: Core and Builtins + +Fix potential :mod:`weakref` races in an object's destructor on the +:term:`free threaded ` build. + +.. + +.. date: 2025-06-17-12-50-48 +.. gh-issue: 135608 +.. nonce: PnHckD +.. section: Core and Builtins + +Fix a crash in the JIT involving attributes of modules. + +.. + +.. date: 2025-06-16-02-31-42 +.. gh-issue: 135543 +.. nonce: 6b0HOF +.. section: Core and Builtins + +Emit ``sys.remote_exec`` audit event when :func:`sys.remote_exec` is called +and migrate ``remote_debugger_script`` to +``cpython.remote_debugger_script``. + +.. + +.. date: 2025-05-31-19-24-54 +.. gh-issue: 134280 +.. nonce: NDVbzY +.. section: Core and Builtins + +Disable constant folding for ``~`` with a boolean argument. This moves the +deprecation warning from compile time to runtime. + +.. + +.. date: 2025-06-25-01-03-10 +.. gh-issue: 135906 +.. nonce: UBrCWq +.. section: C API + +Fix compilation errors when compiling the internal headers with a C++ +compiler. + +.. + +.. date: 2025-05-19-18-09-20 +.. gh-issue: 134273 +.. nonce: ZAliyy +.. section: Build + +Add support for configuring compiler flags for the JIT with ``CFLAGS_JIT`` diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst new file mode 100644 index 00000000000000..a9501c13c95b3a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst @@ -0,0 +1,2 @@ +Limit excess memory usage in the :term:`free threading` build when a +large dictionary or list is resized and accessed by multiple threads. diff --git a/Misc/NEWS.d/next/Library/2025-05-25-11-02-05.gh-issue-134657.3YFhR9.rst b/Misc/NEWS.d/next/Library/2025-05-25-11-02-05.gh-issue-134657.3YFhR9.rst new file mode 100644 index 00000000000000..1bf8ee504ef180 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-25-11-02-05.gh-issue-134657.3YFhR9.rst @@ -0,0 +1 @@ +:mod:`asyncio`: Remove some private names from ``asyncio.__all__``. diff --git a/Misc/NEWS.d/next/Library/2025-07-07-22-12-32.gh-issue-136380.1b_nXl.rst b/Misc/NEWS.d/next/Library/2025-07-07-22-12-32.gh-issue-136380.1b_nXl.rst new file mode 100644 index 00000000000000..4ac04b9c0a6c5c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-07-22-12-32.gh-issue-136380.1b_nXl.rst @@ -0,0 +1,3 @@ +Raises :exc:`AttributeError` when accessing +:class:`concurrent.futures.InterpreterPoolExecutor` and subinterpreters are +not available. diff --git a/Misc/NEWS.d/next/Library/2025-07-09-20-29-30.gh-issue-136476.HyLLzh.rst b/Misc/NEWS.d/next/Library/2025-07-09-20-29-30.gh-issue-136476.HyLLzh.rst new file mode 100644 index 00000000000000..7634bd3be9346d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-09-20-29-30.gh-issue-136476.HyLLzh.rst @@ -0,0 +1,2 @@ +Fix a bug that was causing the ``get_async_stack_trace`` function to miss +some frames in the stack trace. diff --git a/Misc/python.man b/Misc/python.man index 15174b62d5fea4..612e2bbf56800e 100644 --- a/Misc/python.man +++ b/Misc/python.man @@ -529,11 +529,11 @@ See also the \fB\-X frozen_modules\fR option. If this variable is set to 1, the global interpreter lock (GIL) will be forced on. Setting it to 0 forces the GIL off. Only available in builds configured with \fB\-\-disable\-gil\fP. +.IP +This is equivalent to the \fB\-X gil\fR option. .IP PYTHON_HISTORY This environment variable can be used to set the location of a history file (on Unix, it is \fI~/.python_history\fP by default). -.IP -This is equivalent to the \fB\-X gil\fR option. .IP PYTHONNODEBUGRANGES If this variable is set, it disables the inclusion of the tables mapping extra location information (end line, start column offset and end column diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 4a697d047ca6e4..738b33908857a8 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -314,11 +314,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "4b6e7696e8d84f322fb24b1fbb08ccb9b0e7d51b" + "checksumValue": "808af7ff8a2cb2b4ef3a9ce3dbfef58d90828c9f" }, { "algorithm": "SHA256", - "checksumValue": "50a65a34a7a7569eedf7fa864a7892eeee5840a7fdf6fa8f1e87d42c65f6c877" + "checksumValue": "6a492aa586f2d10b1b300ce8ce4c72c976ff7548fee667aded2253f99969ac87" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b.c" @@ -342,11 +342,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "0f75e44a42775247a46acc2beaa6bae8f199a3d9" + "checksumValue": "1bb072f2be9e5d194274fdcc87825cb094e4b32e" }, { "algorithm": "SHA256", - "checksumValue": "03b612c24193464ed6848aeebbf44f9266b78ec6eed2486056211cde8992c49a" + "checksumValue": "9eb22953ce60dde9dc970fec9dfce9d94235f4b7ccd8f0151cad4707dc835d1d" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c" @@ -384,11 +384,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "65bf44140691b046dcfed3ab1576dbf8bbf96dc5" + "checksumValue": "cdd6e9ca86dbede92d1a47b9224d2af70c599a71" }, { "algorithm": "SHA256", - "checksumValue": "0f98959dafffce039ade9d296f7a05bed151c9c512498f48e4b326a5523a240b" + "checksumValue": "f3204f3e60734d811b6630f879b69ce54eaf367f3fca5889c1026e7a1f3ee1e4" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s.c" @@ -412,11 +412,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "0da9782455923aede8d8dce9dfdc38f4fc1de572" + "checksumValue": "4056bb6e3ed184400d1610bdfd4260b3fd05ccbb" }, { "algorithm": "SHA256", - "checksumValue": "2d17ae768fd3d7d6decddd8b4aaf23ce02a809ee62bb98da32c8a7f54acf92d0" + "checksumValue": "c93746df2f219cbb1634ee6fb0ab1c4cbd381d1f36c637114c68346c9935326d" } ], "fileName": "Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c" @@ -454,11 +454,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "38e8d96ef1879480780494058a93cec181f8d6d7" + "checksumValue": "61f678cd9234c6eab5d4409ae66f470b068b959b" }, { "algorithm": "SHA256", - "checksumValue": "61e77d2063cf60c96e9ce06af215efe5d42c43026833bffed5732326fe97ed1e" + "checksumValue": "5b91ed0339074e2e546119833398e2cdb7190c33a59c405bf43b2417c789547d" } ], "fileName": "Modules/_hacl/Hacl_Hash_MD5.c" @@ -482,11 +482,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "986dd5ba0b34d15f3e5e5c656979aea1b502e8aa" + "checksumValue": "c7fc5c9721caf37c5a24c9beff27b0ac2ed68cc9" }, { "algorithm": "SHA256", - "checksumValue": "38d5f1f2e67a0eb30789f81fc56c07a6e7246e2b1be6c65485bcca1dcd0e0806" + "checksumValue": "ce08721d491f3b8a9bd4cde6c27bfcc8fc01471512ccca4bd3c0b764cb551d29" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA1.c" @@ -510,11 +510,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "f732a6710fe3e13cd28130f0f20504e347d1c412" + "checksumValue": "fffe8c4f67669ac8ccd87c2e0f95db2427481df1" }, { "algorithm": "SHA256", - "checksumValue": "86cf32e4d1f3ba93a94108271923fdafe2204447792a918acf4a2250f352dbde" + "checksumValue": "f8af382de7e29a73c726f9c70770498ddd99e2c4702489ed6e634f0b68597c95" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA2.c" @@ -538,11 +538,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "50f75337b31f509b5bfcc7ebb3d066b82a0f1b33" + "checksumValue": "77d3d879dfa5949030bca0e8ee75d3d369ec54a7" }, { "algorithm": "SHA256", - "checksumValue": "c9e1442899e5b902fa39f413f1a3131f7ab5c2283d5100dc8ac675a7d5ebbdf1" + "checksumValue": "8744f5b6e054c3e5c44f413a60f9154b76dd7230135dcee26fd063755ec64be1" } ], "fileName": "Modules/_hacl/Hacl_Hash_SHA3.c" @@ -566,11 +566,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "8140310f505bb2619a749777a91487d666237bcf" + "checksumValue": "417e68ac8498cb2f93a06a19003ea1cc3f0f6753" }, { "algorithm": "SHA256", - "checksumValue": "9d95e6a651c22185d9b7c38f363d30159f810e6fcdc2208f29492837ed891e82" + "checksumValue": "843db4bba78a476d4d53dabe4eed5c9235f490ccc9fdaf19e22af488af858920" } ], "fileName": "Modules/_hacl/Hacl_Streaming_HMAC.c" @@ -608,11 +608,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "c9651ef21479c4d8a3b04c5baa1902866dbb1cdf" + "checksumValue": "0a0b7f3714167ad45ddf5a6a48d76f525a119c9c" }, { "algorithm": "SHA256", - "checksumValue": "e039c82ba670606ca111573942baad800f75da467abbc74cd7d1fe175ebcdfaf" + "checksumValue": "135d4afb4812468885c963c9c87a55ba5fae9181df4431af5fbad08588dda229" } ], "fileName": "Modules/_hacl/Lib_Memzero0.c" @@ -622,11 +622,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "eaa543c778300238dc23034aafeada0951154af1" + "checksumValue": "911c97a0f24067635b164fbca49da76055f192cd" }, { "algorithm": "SHA256", - "checksumValue": "3fd2552d527a23110d61ad2811c774810efb1eaee008f136c2a0d609daa77f5b" + "checksumValue": "972b5111ebada8e11dd60df3119da1af505fd3e0b6c782ead6cab7f1daf134f1" } ], "fileName": "Modules/_hacl/include/krml/FStar_UInt128_Verified.h" @@ -930,11 +930,11 @@ "checksums": [ { "algorithm": "SHA1", - "checksumValue": "4a0bdb9496d49bbfa3ad50bb7854d8f099e84891" + "checksumValue": "682b069d3888f3e8f8cc90f1a49bac9d1a5903d2" }, { "algorithm": "SHA256", - "checksumValue": "b1a45149239ee7af7de769a3e9339950d47c199bb9eaa10edce8a00fde603b12" + "checksumValue": "8551d2c3fde03b92c6fab5febde00347bd9184ea0085077976863c7836e9669d" } ], "fileName": "Modules/_hacl/python_hacl_namespaces.h" @@ -1752,14 +1752,14 @@ "checksums": [ { "algorithm": "SHA256", - "checksumValue": "02dfcf0c79d488b120d7f2c2a0f9206301c7927ed5106545e0b6f2aef88da76a" + "checksumValue": "39f6fd4f2fe98aecc995a2fd980fae0e0835cef6e563ebbf25f69d3d3102bafd" } ], - "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/7720f6d4fc0468a99d5ea6120976bcc271e42727.zip", + "downloadLocation": "https://github.com/hacl-star/hacl-star/archive/4ef25b547b377dcef855db4289c6a00580e7221c.zip", "externalRefs": [ { "referenceCategory": "SECURITY", - "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:7720f6d4fc0468a99d5ea6120976bcc271e42727:*:*:*:*:*:*:*", + "referenceLocator": "cpe:2.3:a:hacl-star:hacl-star:4ef25b547b377dcef855db4289c6a00580e7221c:*:*:*:*:*:*:*", "referenceType": "cpe23Type" } ], @@ -1767,7 +1767,7 @@ "name": "hacl-star", "originator": "Organization: HACL* Developers", "primaryPackagePurpose": "SOURCE", - "versionInfo": "7720f6d4fc0468a99d5ea6120976bcc271e42727" + "versionInfo": "4ef25b547b377dcef855db4289c6a00580e7221c" }, { "SPDXID": "SPDXRef-PACKAGE-macholib", diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index ad670293ec5b6a..3ba48d5d9d3c64 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -5,6 +5,7 @@ #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_pyatomic_ft_wrappers.h" #include "pycore_typeobject.h" // _PyType_GetModuleState() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include @@ -1532,9 +1533,7 @@ deque_dealloc(PyObject *self) Py_ssize_t i; PyObject_GC_UnTrack(deque); - if (deque->weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, deque->weakreflist); if (deque->leftblock != NULL) { (void)deque_clear(self); assert(deque->leftblock != NULL); diff --git a/Modules/_csv.c b/Modules/_csv.c index e5ae853590bf2c..2e04136e0ac657 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -237,7 +237,7 @@ _set_int(const char *name, int *target, PyObject *src, int dflt) int value; if (!PyLong_CheckExact(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be an integer", name); + "\"%s\" must be an integer, not %T", name, src); return -1; } value = PyLong_AsInt(src); @@ -255,27 +255,29 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt if (src == NULL) { *target = dflt; } - else { + else if (src == Py_None) { *target = NOT_SET; - if (src != Py_None) { - if (!PyUnicode_Check(src)) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be string or None, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; - } - Py_ssize_t len = PyUnicode_GetLength(src); - if (len < 0) { - return -1; - } - if (len != 1) { - PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); - return -1; - } - *target = PyUnicode_READ_CHAR(src, 0); + } + else { + // similar to PyArg_Parse("C?") + if (!PyUnicode_Check(src)) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, not %T", + name, src); + return -1; + } + Py_ssize_t len = PyUnicode_GetLength(src); + if (len < 0) { + return -1; } + if (len != 1) { + PyErr_Format(PyExc_TypeError, + "\"%s\" must be a unicode character or None, " + "not a string of length %zd", + name, len); + return -1; + } + *target = PyUnicode_READ_CHAR(src, 0); } return 0; } @@ -287,11 +289,12 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) *target = dflt; } else { + // similar to PyArg_Parse("C") if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be string, not %.200s", name, - Py_TYPE(src)->tp_name); - return -1; + "\"%s\" must be a unicode character, not %T", + name, src); + return -1; } Py_ssize_t len = PyUnicode_GetLength(src); if (len < 0) { @@ -299,8 +302,9 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) } if (len != 1) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a 1-character string", - name); + "\"%s\" must be a unicode character, " + "not a string of length %zd", + name, len); return -1; } *target = PyUnicode_READ_CHAR(src, 0); @@ -314,16 +318,12 @@ _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) if (src == NULL) *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL); else { - if (src == Py_None) - *target = NULL; - else if (!PyUnicode_Check(src)) { + if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, - "\"%s\" must be a string", name); + "\"%s\" must be a string, not %T", name, src); return -1; } - else { - Py_XSETREF(*target, Py_NewRef(src)); - } + Py_XSETREF(*target, Py_NewRef(src)); } return 0; } @@ -533,11 +533,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) /* validate options */ if (dialect_check_quoting(self->quoting)) goto err; - if (self->delimiter == NOT_SET) { - PyErr_SetString(PyExc_TypeError, - "\"delimiter\" must be a 1-character string"); - goto err; - } if (quotechar == Py_None && quoting == NULL) self->quoting = QUOTE_NONE; if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) { @@ -545,10 +540,6 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) "quotechar must be set if quoting enabled"); goto err; } - if (self->lineterminator == NULL) { - PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); - goto err; - } if (dialect_check_char("delimiter", self->delimiter, self, true) || dialect_check_char("escapechar", self->escapechar, self, !self->skipinitialspace) || diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 1bb65e0a64920d..55a446d02b27f3 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -3591,6 +3591,45 @@ generic_pycdata_new(ctypes_state *st, PyCFuncPtr_Type */ +static inline void +atomic_xsetref(PyObject **field, PyObject *value) +{ +#ifdef Py_GIL_DISABLED + PyObject *old = *field; + _Py_atomic_store_ptr(field, value); + Py_XDECREF(old); +#else + Py_XSETREF(*field, value); +#endif +} +/* + This function atomically loads the reference from *field, and + tries to get a new reference to it. If the incref fails, + it acquires critical section of obj and returns a new reference to the *field. + In the general case, this avoids contention on acquiring the critical section. +*/ +static inline PyObject * +atomic_xgetref(PyObject *obj, PyObject **field) +{ +#ifdef Py_GIL_DISABLED + PyObject *value = _Py_atomic_load_ptr(field); + if (value == NULL) { + return NULL; + } + if (_Py_TryIncrefCompare(field, value)) { + return value; + } + Py_BEGIN_CRITICAL_SECTION(obj); + value = Py_XNewRef(*field); + Py_END_CRITICAL_SECTION(); + return value; +#else + return Py_XNewRef(*field); +#endif +} + + + /*[clinic input] @critical_section @setter @@ -3607,7 +3646,7 @@ _ctypes_CFuncPtr_errcheck_set_impl(PyCFuncPtrObject *self, PyObject *value) return -1; } Py_XINCREF(value); - Py_XSETREF(self->errcheck, value); + atomic_xsetref(&self->errcheck, value); return 0; } @@ -3639,12 +3678,10 @@ static int _ctypes_CFuncPtr_restype_set_impl(PyCFuncPtrObject *self, PyObject *value) /*[clinic end generated code: output=0be0a086abbabf18 input=683c3bef4562ccc6]*/ { - PyObject *checker, *oldchecker; + PyObject *checker; if (value == NULL) { - oldchecker = self->checker; - self->checker = NULL; - Py_CLEAR(self->restype); - Py_XDECREF(oldchecker); + atomic_xsetref(&self->restype, NULL); + atomic_xsetref(&self->checker, NULL); return 0; } ctypes_state *st = get_module_state_by_def(Py_TYPE(Py_TYPE(self))); @@ -3660,11 +3697,9 @@ _ctypes_CFuncPtr_restype_set_impl(PyCFuncPtrObject *self, PyObject *value) if (PyObject_GetOptionalAttr(value, &_Py_ID(_check_retval_), &checker) < 0) { return -1; } - oldchecker = self->checker; - self->checker = checker; Py_INCREF(value); - Py_XSETREF(self->restype, value); - Py_XDECREF(oldchecker); + atomic_xsetref(&self->checker, checker); + atomic_xsetref(&self->restype, value); return 0; } @@ -3709,16 +3744,16 @@ _ctypes_CFuncPtr_argtypes_set_impl(PyCFuncPtrObject *self, PyObject *value) PyObject *converters; if (value == NULL || value == Py_None) { - Py_CLEAR(self->converters); - Py_CLEAR(self->argtypes); + atomic_xsetref(&self->argtypes, NULL); + atomic_xsetref(&self->converters, NULL); } else { ctypes_state *st = get_module_state_by_def(Py_TYPE(Py_TYPE(self))); converters = converters_from_argtypes(st, value); if (!converters) return -1; - Py_XSETREF(self->converters, converters); + atomic_xsetref(&self->converters, converters); Py_INCREF(value); - Py_XSETREF(self->argtypes, value); + atomic_xsetref(&self->argtypes, value); } return 0; } @@ -4514,16 +4549,11 @@ _build_result(PyObject *result, PyObject *callargs, } static PyObject * -PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) +PyCFuncPtr_call(PyObject *op, PyObject *inargs, PyObject *kwds) { - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); - PyObject *restype; - PyObject *converters; - PyObject *checker; - PyObject *argtypes; - PyObject *result; - PyObject *callargs; - PyObject *errcheck; + PyObject *result = NULL; + PyObject *callargs = NULL; + PyObject *ret = NULL; #ifdef MS_WIN32 IUnknown *piunk = NULL; #endif @@ -4541,13 +4571,24 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) } assert(info); /* Cannot be NULL for PyCFuncPtrObject instances */ - restype = self->restype ? self->restype : info->restype; - converters = self->converters ? self->converters : info->converters; - checker = self->checker ? self->checker : info->checker; - argtypes = self->argtypes ? self->argtypes : info->argtypes; -/* later, we probably want to have an errcheck field in stginfo */ - errcheck = self->errcheck /* ? self->errcheck : info->errcheck */; - + PyObject *restype = atomic_xgetref(op, &self->restype); + if (restype == NULL) { + restype = Py_XNewRef(info->restype); + } + PyObject *converters = atomic_xgetref(op, &self->converters); + if (converters == NULL) { + converters = Py_XNewRef(info->converters); + } + PyObject *checker = atomic_xgetref(op, &self->checker); + if (checker == NULL) { + checker = Py_XNewRef(info->checker); + } + PyObject *argtypes = atomic_xgetref(op, &self->argtypes); + if (argtypes == NULL) { + argtypes = Py_XNewRef(info->argtypes); + } + /* later, we probably want to have an errcheck field in stginfo */ + PyObject *errcheck = atomic_xgetref(op, &self->errcheck); pProc = *(void **)self->b_ptr; #ifdef MS_WIN32 @@ -4558,25 +4599,25 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) if (!this) { PyErr_SetString(PyExc_ValueError, "native com method call without 'this' parameter"); - return NULL; + goto finally; } if (!CDataObject_Check(st, this)) { PyErr_SetString(PyExc_TypeError, "Expected a COM this pointer as first argument"); - return NULL; + goto finally; } /* there should be more checks? No, in Python */ /* First arg is a pointer to an interface instance */ if (!this->b_ptr || *(void **)this->b_ptr == NULL) { PyErr_SetString(PyExc_ValueError, "NULL COM pointer access"); - return NULL; + goto finally; } piunk = *(IUnknown **)this->b_ptr; if (NULL == piunk->lpVtbl) { PyErr_SetString(PyExc_ValueError, "COM method call without VTable"); - return NULL; + goto finally; } pProc = ((void **)piunk->lpVtbl)[self->index - 0x1000]; } @@ -4584,8 +4625,9 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) callargs = _build_callargs(st, self, argtypes, inargs, kwds, &outmask, &inoutmask, &numretvals); - if (callargs == NULL) - return NULL; + if (callargs == NULL) { + goto finally; + } if (converters) { int required = Py_SAFE_DOWNCAST(PyTuple_GET_SIZE(converters), @@ -4604,7 +4646,7 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) required, required == 1 ? "" : "s", actual); - return NULL; + goto finally; } } else if (required != actual) { Py_DECREF(callargs); @@ -4613,7 +4655,7 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) required, required == 1 ? "" : "s", actual); - return NULL; + goto finally; } } @@ -4644,23 +4686,19 @@ PyCFuncPtr_call_lock_held(PyObject *op, PyObject *inargs, PyObject *kwds) if (v == NULL || v != callargs) { Py_DECREF(result); Py_DECREF(callargs); - return v; + ret = v; + goto finally; } Py_DECREF(v); } - - return _build_result(result, callargs, - outmask, inoutmask, numretvals); -} - -static PyObject * -PyCFuncPtr_call(PyObject *op, PyObject *inargs, PyObject *kwds) -{ - PyObject *result; - Py_BEGIN_CRITICAL_SECTION(op); - result = PyCFuncPtr_call_lock_held(op, inargs, kwds); - Py_END_CRITICAL_SECTION(); - return result; + ret = _build_result(result, callargs, outmask, inoutmask, numretvals); +finally: + Py_XDECREF(restype); + Py_XDECREF(converters); + Py_XDECREF(checker); + Py_XDECREF(argtypes); + Py_XDECREF(errcheck); + return ret; } static int diff --git a/Modules/_ctypes/_ctypes_test.c b/Modules/_ctypes/_ctypes_test.c index d28e5708b44933..66338805007853 100644 --- a/Modules/_ctypes/_ctypes_test.c +++ b/Modules/_ctypes/_ctypes_test.c @@ -23,7 +23,7 @@ # define _Py_thread_local __thread #endif -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # include // csqrt() # undef I // for _ctypes_test_generated.c.h #endif @@ -457,7 +457,7 @@ EXPORT(double) my_sqrt(double a) return sqrt(a); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) EXPORT(double complex) my_csqrt(double complex a) { return csqrt(a); diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 86bcc805360a3f..662b57a5b825e2 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -759,7 +759,7 @@ d_get(void *ptr, Py_ssize_t size) return PyFloat_FromDouble(val); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) /* We don't use _Complex types here, using arrays instead, as the C11+ standard says: "Each complex type has the same representation and alignment @@ -1599,7 +1599,7 @@ for base_code, base_c_type in [ /////////////////////////////////////////////////////////////////////////// TABLE_ENTRY_SW(d, &ffi_type_double); -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) if (Py_FFI_COMPLEX_AVAILABLE) { TABLE_ENTRY(D, &ffi_type_complex_double); TABLE_ENTRY(F, &ffi_type_complex_float); diff --git a/Modules/_ctypes/ctypes.h b/Modules/_ctypes/ctypes.h index 2d859ed63e1758..03fde30c2e0944 100644 --- a/Modules/_ctypes/ctypes.h +++ b/Modules/_ctypes/ctypes.h @@ -11,7 +11,7 @@ // Do we support C99 complex types in ffi? // For Apple's libffi, this must be determined at runtime (see gh-128156). -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # if USING_APPLE_OS_LIBFFI && defined(__has_builtin) # if __has_builtin(__builtin_available) # define Py_FFI_COMPLEX_AVAILABLE __builtin_available(macOS 10.15, *) diff --git a/Modules/_cursesmodule.c b/Modules/_cursesmodule.c index cd185bc2b02ea5..706aa8ea72122b 100644 --- a/Modules/_cursesmodule.c +++ b/Modules/_cursesmodule.c @@ -1312,32 +1312,27 @@ int py_mvwdelch(WINDOW *w, int y, int x) /* chgat, added by Fabian Kreutz */ #ifdef HAVE_CURSES_WCHGAT -/*[-clinic input] -_curses.window.chgat - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - - n: int = -1 - Number of characters. - - attr: long - Attributes for characters. - / - -Set the attributes of characters. +PyDoc_STRVAR(_curses_window_chgat__doc__, +"chgat([y, x,] [n=-1,] attr)\n" +"Set the attributes of characters.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Number of characters.\n" +" attr\n" +" Attributes for characters.\n" +"\n" +"Set the attributes of num characters at the current cursor position, or at\n" +"position (y, x) if supplied. If no value of num is given or num = -1, the\n" +"attribute will be set on all the characters to the end of the line. This\n" +"function does not move the cursor. The changed line will be touched using\n" +"the touchline() method so that the contents will be redisplayed by the next\n" +"window refresh."); -Set the attributes of num characters at the current cursor position, or at -position (y, x) if supplied. If no value of num is given or num = -1, the -attribute will be set on all the characters to the end of the line. This -function does not move the cursor. The changed line will be touched using -the touchline() method so that the contents will be redisplayed by the next -window refresh. -[-clinic start generated code]*/ static PyObject * PyCursesWindow_ChgAt(PyObject *op, PyObject *args) { @@ -1363,19 +1358,20 @@ PyCursesWindow_ChgAt(PyObject *op, PyObject *args) attr = lattr; break; case 3: - if (!PyArg_ParseTuple(args,"iil;int,int,attr", &y, &x, &lattr)) + if (!PyArg_ParseTuple(args,"iil;y,x,attr", &y, &x, &lattr)) return NULL; attr = lattr; use_xy = TRUE; break; case 4: - if (!PyArg_ParseTuple(args,"iiil;int,int,n,attr", &y, &x, &num, &lattr)) + if (!PyArg_ParseTuple(args,"iiil;y,x,n,attr", &y, &x, &num, &lattr)) return NULL; attr = lattr; use_xy = TRUE; break; default: - PyErr_SetString(PyExc_TypeError, "chgat requires 1 to 4 arguments"); + PyErr_SetString(PyExc_TypeError, + "_curses.window.chgat requires 1 to 4 arguments"); return NULL; } @@ -1533,7 +1529,7 @@ _curses_window_getbkgd_impl(PyCursesWindowObject *self) } /*[clinic input] -_curses.window.getch -> int +_curses.window.getch [ y: int @@ -1550,10 +1546,10 @@ keypad keys and so on return numbers higher than 256. In no-delay mode, -1 is returned if there is no input, else getch() waits until a key is pressed. [clinic start generated code]*/ -static int +static PyObject * _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x) -/*[clinic end generated code: output=980aa6af0c0ca387 input=bb24ebfb379f991f]*/ +/*[clinic end generated code: output=e1639e87d545e676 input=73f350336b1ee8c8]*/ { int rtn; @@ -1566,7 +1562,17 @@ _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, } Py_END_ALLOW_THREADS - return rtn; + if (rtn == ERR) { + // We suppress ERR returned by wgetch() in nodelay mode + // after we handled possible interruption signals. + if (PyErr_CheckSignals()) { + return NULL; + } + // ERR is an implementation detail, so to be on the safe side, + // we forcibly set the return value to -1 as documented above. + rtn = -1; + } + return PyLong_FromLong(rtn); } /*[clinic input] @@ -1678,84 +1684,102 @@ _curses_window_get_wch_impl(PyCursesWindowObject *self, int group_right_1, } #endif -/*[-clinic input] -_curses.window.getstr - - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - n: int = 1023 - Maximal number of characters. - / +/* + * Helper function for parsing parameters from getstr() and instr(). + * This function is necessary because Argument Clinic does not know + * how to handle nested optional groups with default values inside. + * + * Return 1 on success and 0 on failure, similar to PyArg_ParseTuple(). + */ +static int +curses_clinic_parse_optional_xy_n(PyObject *args, + int *y, int *x, unsigned int *n, int *use_xy, + const char *qualname) +{ + switch (PyTuple_GET_SIZE(args)) { + case 0: { + *use_xy = 0; + return 1; + } + case 1: { + *use_xy = 0; + return PyArg_ParseTuple(args, "O&;n", + _PyLong_UnsignedInt_Converter, n); + } + case 2: { + *use_xy = 1; + return PyArg_ParseTuple(args, "ii;y,x", y, x); + } + case 3: { + *use_xy = 1; + return PyArg_ParseTuple(args, "iiO&;y,x,n", y, x, + _PyLong_UnsignedInt_Converter, n); + } + default: { + *use_xy = 0; + PyErr_Format(PyExc_TypeError, "%s requires 0 to 3 arguments", + qualname); + return 0; + } + } +} -Read a string from the user, with primitive line editing capacity. -[-clinic start generated code]*/ +PyDoc_STRVAR(_curses_window_getstr__doc__, +"getstr([[y, x,] n=2047])\n" +"Read a string from the user, with primitive line editing capacity.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Maximal number of characters."); static PyObject * -PyCursesWindow_GetStr(PyObject *op, PyObject *args) +PyCursesWindow_getstr(PyObject *op, PyObject *args) { PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); + int rtn, use_xy = 0, y = 0, x = 0; + unsigned int max_buf_size = 2048; + unsigned int n = max_buf_size - 1; + PyObject *res; - int x, y, n; - char rtn[1024]; /* This should be big enough.. I hope */ - int rtn2; + if (!curses_clinic_parse_optional_xy_n(args, &y, &x, &n, &use_xy, + "_curses.window.instr")) + { + return NULL; + } - switch (PyTuple_Size(args)) { - case 0: - Py_BEGIN_ALLOW_THREADS - rtn2 = wgetnstr(self->win,rtn, 1023); - Py_END_ALLOW_THREADS - break; - case 1: - if (!PyArg_ParseTuple(args,"i;n", &n)) - return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; - } - Py_BEGIN_ALLOW_THREADS - rtn2 = wgetnstr(self->win, rtn, Py_MIN(n, 1023)); - Py_END_ALLOW_THREADS - break; - case 2: - if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) - return NULL; + n = Py_MIN(n, max_buf_size - 1); + res = PyBytes_FromStringAndSize(NULL, n + 1); + if (res == NULL) { + return NULL; + } + char *buf = PyBytes_AS_STRING(res); + + if (use_xy) { Py_BEGIN_ALLOW_THREADS #ifdef STRICT_SYSV_CURSES - rtn2 = wmove(self->win,y,x)==ERR ? ERR : wgetnstr(self->win, rtn, 1023); + rtn = wmove(self->win, y, x) == ERR + ? ERR + : wgetnstr(self->win, buf, n); #else - rtn2 = mvwgetnstr(self->win,y,x,rtn, 1023); + rtn = mvwgetnstr(self->win, y, x, buf, n); #endif Py_END_ALLOW_THREADS - break; - case 3: - if (!PyArg_ParseTuple(args,"iii;y,x,n", &y, &x, &n)) - return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; - } -#ifdef STRICT_SYSV_CURSES - Py_BEGIN_ALLOW_THREADS - rtn2 = wmove(self->win,y,x)==ERR ? ERR : - wgetnstr(self->win, rtn, Py_MIN(n, 1023)); - Py_END_ALLOW_THREADS -#else + } + else { Py_BEGIN_ALLOW_THREADS - rtn2 = mvwgetnstr(self->win, y, x, rtn, Py_MIN(n, 1023)); + rtn = wgetnstr(self->win, buf, n); Py_END_ALLOW_THREADS -#endif - break; - default: - PyErr_SetString(PyExc_TypeError, "getstr requires 0 to 3 arguments"); - return NULL; } - if (rtn2 == ERR) - rtn[0] = 0; - return PyBytes_FromString(rtn); + + if (rtn == ERR) { + Py_DECREF(res); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + _PyBytes_Resize(&res, strlen(buf)); // 'res' is set to NULL on failure + return res; } /*[clinic input] @@ -1880,69 +1904,57 @@ _curses_window_inch_impl(PyCursesWindowObject *self, int group_right_1, return rtn; } -/*[-clinic input] -_curses.window.instr - - [ - y: int - Y-coordinate. - x: int - X-coordinate. - ] - n: int = 1023 - Maximal number of characters. - / - -Return a string of characters, extracted from the window. +PyDoc_STRVAR(_curses_window_instr__doc__, +"instr([y, x,] n=2047)\n" +"Return a string of characters, extracted from the window.\n" +"\n" +" y\n" +" Y-coordinate.\n" +" x\n" +" X-coordinate.\n" +" n\n" +" Maximal number of characters.\n" +"\n" +"Return a string of characters, extracted from the window starting at the\n" +"current cursor position, or at y, x if specified. Attributes are stripped\n" +"from the characters. If n is specified, instr() returns a string at most\n" +"n characters long (exclusive of the trailing NUL)."); -Return a string of characters, extracted from the window starting at the -current cursor position, or at y, x if specified. Attributes are stripped -from the characters. If n is specified, instr() returns a string at most -n characters long (exclusive of the trailing NUL). -[-clinic start generated code]*/ static PyObject * -PyCursesWindow_InStr(PyObject *op, PyObject *args) +PyCursesWindow_instr(PyObject *op, PyObject *args) { PyCursesWindowObject *self = _PyCursesWindowObject_CAST(op); + int rtn, use_xy = 0, y = 0, x = 0; + unsigned int max_buf_size = 2048; + unsigned int n = max_buf_size - 1; + PyObject *res; - int x, y, n; - char rtn[1024]; /* This should be big enough.. I hope */ - int rtn2; + if (!curses_clinic_parse_optional_xy_n(args, &y, &x, &n, &use_xy, + "_curses.window.instr")) + { + return NULL; + } - switch (PyTuple_Size(args)) { - case 0: - rtn2 = winnstr(self->win,rtn, 1023); - break; - case 1: - if (!PyArg_ParseTuple(args,"i;n", &n)) - return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; - } - rtn2 = winnstr(self->win, rtn, Py_MIN(n, 1023)); - break; - case 2: - if (!PyArg_ParseTuple(args,"ii;y,x",&y,&x)) - return NULL; - rtn2 = mvwinnstr(self->win,y,x,rtn,1023); - break; - case 3: - if (!PyArg_ParseTuple(args, "iii;y,x,n", &y, &x, &n)) - return NULL; - if (n < 0) { - PyErr_SetString(PyExc_ValueError, "'n' must be nonnegative"); - return NULL; - } - rtn2 = mvwinnstr(self->win, y, x, rtn, Py_MIN(n,1023)); - break; - default: - PyErr_SetString(PyExc_TypeError, "instr requires 0 or 3 arguments"); + n = Py_MIN(n, max_buf_size - 1); + res = PyBytes_FromStringAndSize(NULL, n + 1); + if (res == NULL) { return NULL; } - if (rtn2 == ERR) - rtn[0] = 0; - return PyBytes_FromString(rtn); + char *buf = PyBytes_AS_STRING(res); + + if (use_xy) { + rtn = mvwinnstr(self->win, y, x, buf, n); + } + else { + rtn = winnstr(self->win, buf, n); + } + + if (rtn == ERR) { + Py_DECREF(res); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); + } + _PyBytes_Resize(&res, strlen(buf)); // 'res' is set to NULL on failure + return res; } /*[clinic input] @@ -2653,7 +2665,10 @@ static PyMethodDef PyCursesWindow_methods[] = { _CURSES_WINDOW_ATTRSET_METHODDEF _CURSES_WINDOW_BKGD_METHODDEF #ifdef HAVE_CURSES_WCHGAT - {"chgat", PyCursesWindow_ChgAt, METH_VARARGS}, + { + "chgat", PyCursesWindow_ChgAt, METH_VARARGS, + _curses_window_chgat__doc__ + }, #endif _CURSES_WINDOW_BKGDSET_METHODDEF _CURSES_WINDOW_BORDER_METHODDEF @@ -2676,7 +2691,10 @@ static PyMethodDef PyCursesWindow_methods[] = { _CURSES_WINDOW_GET_WCH_METHODDEF {"getmaxyx", PyCursesWindow_getmaxyx, METH_NOARGS}, {"getparyx", PyCursesWindow_getparyx, METH_NOARGS}, - {"getstr", PyCursesWindow_GetStr, METH_VARARGS}, + { + "getstr", PyCursesWindow_getstr, METH_VARARGS, + _curses_window_getstr__doc__ + }, {"getyx", PyCursesWindow_getyx, METH_NOARGS}, _CURSES_WINDOW_HLINE_METHODDEF {"idcok", PyCursesWindow_idcok, METH_VARARGS}, @@ -2690,7 +2708,10 @@ static PyMethodDef PyCursesWindow_methods[] = { {"insertln", PyCursesWindow_winsertln, METH_NOARGS}, _CURSES_WINDOW_INSNSTR_METHODDEF _CURSES_WINDOW_INSSTR_METHODDEF - {"instr", PyCursesWindow_InStr, METH_VARARGS}, + { + "instr", PyCursesWindow_instr, METH_VARARGS, + _curses_window_instr__doc__ + }, _CURSES_WINDOW_IS_LINETOUCHED_METHODDEF {"is_wintouched", PyCursesWindow_is_wintouched, METH_NOARGS}, {"keypad", PyCursesWindow_keypad, METH_VARARGS}, diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 9bba0e3354b26b..f5a9712b64e4de 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1928,7 +1928,7 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, } replacement = freplacement; } -#ifdef Py_NORMALIZE_CENTURY +#ifdef _Py_NORMALIZE_CENTURY else if (ch == 'Y' || ch == 'G' || ch == 'F' || ch == 'C' ) { diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 8c3efa36353e24..b9e12ab2026f65 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -17,6 +17,7 @@ #include "Python.h" #include "pycore_pyhash.h" // _Py_HashSecret +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include // offsetof() #include "expat.h" @@ -690,8 +691,7 @@ element_dealloc(PyObject *op) /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); /* element_gc_clear clears all references and deallocates extra */ @@ -811,6 +811,8 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) PyTypeObject *tp = Py_TYPE(self); elementtreestate *st = get_elementtree_state_by_type(tp); + // The deepcopy() helper takes care of incrementing the refcount + // of the object to copy so to avoid use-after-frees. tag = deepcopy(st, self->tag, memo); if (!tag) return NULL; @@ -845,11 +847,13 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) assert(!element->extra || !element->extra->length); if (self->extra) { - if (element_resize(element, self->extra->length) < 0) + Py_ssize_t expected_count = self->extra->length; + if (element_resize(element, expected_count) < 0) { + assert(!element->extra->length); goto error; + } - // TODO(picnixz): check for an evil child's __deepcopy__ on 'self' - for (i = 0; i < self->extra->length; i++) { + for (i = 0; self->extra && i < self->extra->length; i++) { PyObject* child = deepcopy(st, self->extra->children[i], memo); if (!child || !Element_Check(st, child)) { if (child) { @@ -859,11 +863,24 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) element->extra->length = i; goto error; } + if (self->extra && expected_count != self->extra->length) { + // 'self->extra' got mutated and 'element' may not have + // sufficient space to hold the next iteration's item. + expected_count = self->extra->length; + if (element_resize(element, expected_count) < 0) { + Py_DECREF(child); + element->extra->length = i; + goto error; + } + } element->extra->children[i] = child; } assert(!element->extra->length); - element->extra->length = self->extra->length; + // The original 'self->extra' may be gone at this point if deepcopy() + // has side-effects. However, 'i' is the number of copied items that + // we were able to successfully copy. + element->extra->length = i; } /* add object to memo dictionary (so deepcopy won't visit it again) */ @@ -906,13 +923,20 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) break; } } - if (simple) + if (simple) { return PyDict_Copy(object); + } /* Fall through to general case */ } else if (Element_CheckExact(st, object)) { - return _elementtree_Element___deepcopy___impl( + // The __deepcopy__() call may call arbitrary code even if the + // object to copy is a built-in XML element (one of its children + // any of its parents in its own __deepcopy__() implementation). + Py_INCREF(object); + PyObject *res = _elementtree_Element___deepcopy___impl( (ElementObject *)object, memo); + Py_DECREF(object); + return res; } } @@ -923,8 +947,11 @@ deepcopy(elementtreestate *st, PyObject *object, PyObject *memo) return NULL; } + Py_INCREF(object); PyObject *args[2] = {object, memo}; - return PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL); + PyObject *res = PyObject_Vectorcall(st->deepcopy_obj, args, 2, NULL); + Py_DECREF(object); + return res; } diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index e6c454faf4b16f..d3dabd58b89c70 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -7,6 +7,7 @@ #include "pycore_pyatomic_ft_wrappers.h" #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_tuple.h" // _PyTuple_ITEMS() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/_functoolsmodule.c.h" @@ -196,6 +197,19 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw) return NULL; } + /* keyword Placeholder prohibition */ + if (kw != NULL) { + PyObject *key, *val; + Py_ssize_t pos = 0; + while (PyDict_Next(kw, &pos, &key, &val)) { + if (val == phold) { + PyErr_SetString(PyExc_TypeError, + "Placeholder cannot be passed as a keyword argument"); + return NULL; + } + } + } + /* check wrapped function / object */ pto_args = pto_kw = NULL; int res = PyObject_TypeCheck(func, state->partial_type); @@ -338,9 +352,7 @@ partial_dealloc(PyObject *self) PyTypeObject *tp = Py_TYPE(self); /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(self); - if (partialobject_CAST(self)->weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, partialobject_CAST(self)->weakreflist); (void)partial_clear(self); tp->tp_free(self); Py_DECREF(tp); @@ -1370,8 +1382,8 @@ bounded_lru_cache_update_lock_held(lru_cache_object *self, this same key, then this setitem call will update the cache dict with this new link, leaving the old link as an orphan (i.e. not having a cache dict entry that refers to it). */ - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { + if (_PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)self->cache, key, + (PyObject *)link, hash) < 0) { Py_DECREF(link); return NULL; } @@ -1440,8 +1452,8 @@ bounded_lru_cache_update_lock_held(lru_cache_object *self, for successful insertion in the cache dict before adding the link to the linked list. Otherwise, the potentially reentrant __eq__ call could cause the then orphan link to be visited. */ - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { + if (_PyDict_SetItem_KnownHash_LockHeld((PyDictObject *)self->cache, key, + (PyObject *)link, hash) < 0) { /* Somehow the cache dict update failed. We no longer can restore the old link. Let the error propagate upward and leave the cache short one link. */ @@ -1608,9 +1620,7 @@ lru_cache_dealloc(PyObject *op) PyTypeObject *tp = Py_TYPE(obj); /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(obj); - if (obj->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, obj->weakreflist); (void)lru_cache_tp_clear(op); tp->tp_free(obj); @@ -1676,7 +1686,13 @@ _functools__lru_cache_wrapper_cache_clear_impl(PyObject *self) lru_list_elem *list = lru_cache_unlink_list(_self); FT_ATOMIC_STORE_SSIZE_RELAXED(_self->hits, 0); FT_ATOMIC_STORE_SSIZE_RELAXED(_self->misses, 0); - PyDict_Clear(_self->cache); + if (_self->wrapper == bounded_lru_cache_wrapper) { + /* The critical section on the lru cache itself protects the dictionary + for bounded_lru_cache instances. */ + _PyDict_Clear_LockHeld(_self->cache); + } else { + PyDict_Clear(_self->cache); + } lru_cache_clear_list(list); Py_RETURN_NONE; } diff --git a/Modules/_hacl/Hacl_Hash_Blake2b.c b/Modules/_hacl/Hacl_Hash_Blake2b.c index 21ab2b88c799a6..a5b75d61798949 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b.c @@ -544,11 +544,9 @@ void Hacl_Hash_Blake2b_init(uint64_t *hash, uint32_t kk, uint32_t nn) uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk1 - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk1 << 8U ^ + ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); tmp[1U] = p.node_offset; tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; tmp[3U] = 0ULL; @@ -860,14 +858,10 @@ static Hacl_Hash_Blake2b_state_t uint64_t x = r4; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ - ((uint64_t)pv.fanout - << 16U - ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ + ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1059,11 +1053,9 @@ static void reset_raw(Hacl_Hash_Blake2b_state_t *state, Hacl_Hash_Blake2b_params uint64_t x = r; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1200,8 +1192,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1265,8 +1256,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1296,8 +1286,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state10, @@ -1359,8 +1348,7 @@ Hacl_Hash_Blake2b_update(Hacl_Hash_Blake2b_state_t *state, uint8_t *chunk, uint3 nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_state_t){ .block_state = block_state1, @@ -1690,14 +1678,10 @@ Hacl_Hash_Blake2b_hash_with_key_and_params( uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ - ((uint64_t)params.fanout - << 16U - ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); + (uint64_t)nn ^ + ((uint64_t)kk << 8U ^ + ((uint64_t)params.fanout << 16U ^ + ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); tmp[1U] = params.node_offset; tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U; tmp[3U] = 0ULL; diff --git a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c index c4d9b4a689d28f..f955f1c6115b1e 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c +++ b/Modules/_hacl/Hacl_Hash_Blake2b_Simd256.c @@ -274,11 +274,9 @@ Hacl_Hash_Blake2b_Simd256_init(Lib_IntVector_Intrinsics_vec256 *hash, uint32_t k uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk1 - << 8U - ^ ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk1 << 8U ^ + ((uint64_t)p.fanout << 16U ^ ((uint64_t)p.depth << 24U ^ (uint64_t)p.leaf_length << 32U))); tmp[1U] = p.node_offset; tmp[2U] = (uint64_t)p.node_depth ^ (uint64_t)p.inner_length << 8U; tmp[3U] = 0ULL; @@ -746,14 +744,10 @@ static Hacl_Hash_Blake2b_Simd256_state_t uint64_t x = r4; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ - ((uint64_t)pv.fanout - << 16U - ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ + ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -936,11 +930,9 @@ reset_raw(Hacl_Hash_Blake2b_Simd256_state_t *state, Hacl_Hash_Blake2b_params_and uint64_t x = r; os[i0] = x;); tmp[0U] = - (uint64_t)nn1 - ^ - ((uint64_t)kk2 - << 8U - ^ ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); + (uint64_t)nn1 ^ + ((uint64_t)kk2 << 8U ^ + ((uint64_t)pv.fanout << 16U ^ ((uint64_t)pv.depth << 24U ^ (uint64_t)pv.leaf_length << 32U))); tmp[1U] = pv.node_offset; tmp[2U] = (uint64_t)pv.node_depth ^ (uint64_t)pv.inner_length << 8U; tmp[3U] = 0ULL; @@ -1075,8 +1067,7 @@ Hacl_Hash_Blake2b_Simd256_update( uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1140,8 +1131,7 @@ Hacl_Hash_Blake2b_Simd256_update( nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1171,8 +1161,7 @@ Hacl_Hash_Blake2b_Simd256_update( uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state10, @@ -1234,8 +1223,7 @@ Hacl_Hash_Blake2b_Simd256_update( nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2b_Simd256_state_t){ .block_state = block_state1, @@ -1578,14 +1566,10 @@ Hacl_Hash_Blake2b_Simd256_hash_with_key_and_params( uint64_t x = r; os[i] = x;); tmp[0U] = - (uint64_t)nn - ^ - ((uint64_t)kk - << 8U - ^ - ((uint64_t)params.fanout - << 16U - ^ ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); + (uint64_t)nn ^ + ((uint64_t)kk << 8U ^ + ((uint64_t)params.fanout << 16U ^ + ((uint64_t)params.depth << 24U ^ (uint64_t)params.leaf_length << 32U))); tmp[1U] = params.node_offset; tmp[2U] = (uint64_t)params.node_depth ^ (uint64_t)params.inner_length << 8U; tmp[3U] = 0ULL; diff --git a/Modules/_hacl/Hacl_Hash_Blake2s.c b/Modules/_hacl/Hacl_Hash_Blake2s.c index 730ba135afb2fb..0d4fcc7f3951fc 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s.c @@ -543,13 +543,13 @@ void Hacl_Hash_Blake2s_init(uint32_t *hash, uint32_t kk, uint32_t nn) uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)(uint8_t)nn - ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + (uint32_t)(uint8_t)nn ^ + ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); tmp[1U] = p.leaf_length; tmp[2U] = (uint32_t)p.node_offset; tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + (uint32_t)(p.node_offset >> 32U) ^ + ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -846,16 +846,14 @@ static Hacl_Hash_Blake2s_state_t uint32_t x = r4; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ - ((uint32_t)pv.key_length - << 8U - ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ + ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1042,13 +1040,13 @@ static void reset_raw(Hacl_Hash_Blake2s_state_t *state, Hacl_Hash_Blake2b_params uint32_t x = r; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1182,8 +1180,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1237,8 +1234,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1268,8 +1264,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state10, @@ -1321,8 +1316,7 @@ Hacl_Hash_Blake2s_update(Hacl_Hash_Blake2s_state_t *state, uint8_t *chunk, uint3 Hacl_Hash_Blake2s_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_state_t){ .block_state = block_state1, @@ -1639,16 +1633,14 @@ Hacl_Hash_Blake2s_hash_with_key_and_params( uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)params.digest_length - ^ - ((uint32_t)params.key_length - << 8U - ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); + (uint32_t)params.digest_length ^ + ((uint32_t)params.key_length << 8U ^ + ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); tmp[1U] = params.leaf_length; tmp[2U] = (uint32_t)params.node_offset; tmp[3U] = - (uint32_t)(params.node_offset >> 32U) - ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); + (uint32_t)(params.node_offset >> 32U) ^ + ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; diff --git a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c index 7e9cd79544f8f1..fa46be045f3441 100644 --- a/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c +++ b/Modules/_hacl/Hacl_Hash_Blake2s_Simd128.c @@ -271,13 +271,13 @@ Hacl_Hash_Blake2s_Simd128_init(Lib_IntVector_Intrinsics_vec128 *hash, uint32_t k uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)(uint8_t)nn - ^ ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); + (uint32_t)(uint8_t)nn ^ + ((uint32_t)(uint8_t)kk << 8U ^ ((uint32_t)p.fanout << 16U ^ (uint32_t)p.depth << 24U)); tmp[1U] = p.leaf_length; tmp[2U] = (uint32_t)p.node_offset; tmp[3U] = - (uint32_t)(p.node_offset >> 32U) - ^ ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); + (uint32_t)(p.node_offset >> 32U) ^ + ((uint32_t)p.node_depth << 16U ^ (uint32_t)p.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -736,16 +736,14 @@ static Hacl_Hash_Blake2s_Simd128_state_t uint32_t x = r4; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ - ((uint32_t)pv.key_length - << 8U - ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ + ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -923,13 +921,13 @@ reset_raw(Hacl_Hash_Blake2s_Simd128_state_t *state, Hacl_Hash_Blake2b_params_and uint32_t x = r; os[i0] = x;); tmp[0U] = - (uint32_t)pv.digest_length - ^ ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); + (uint32_t)pv.digest_length ^ + ((uint32_t)pv.key_length << 8U ^ ((uint32_t)pv.fanout << 16U ^ (uint32_t)pv.depth << 24U)); tmp[1U] = pv.leaf_length; tmp[2U] = (uint32_t)pv.node_offset; tmp[3U] = - (uint32_t)(pv.node_offset >> 32U) - ^ ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); + (uint32_t)(pv.node_offset >> 32U) ^ + ((uint32_t)pv.node_depth << 16U ^ (uint32_t)pv.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; @@ -1061,8 +1059,7 @@ Hacl_Hash_Blake2s_Simd128_update( uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1116,8 +1113,7 @@ Hacl_Hash_Blake2s_Simd128_update( Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1147,8 +1143,7 @@ Hacl_Hash_Blake2s_Simd128_update( uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state10, @@ -1200,8 +1195,7 @@ Hacl_Hash_Blake2s_Simd128_update( Hacl_Hash_Blake2s_Simd128_update_multi(data1_len, wv, hash, total_len1, data1, nb); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_Blake2s_Simd128_state_t){ .block_state = block_state1, @@ -1531,16 +1525,14 @@ Hacl_Hash_Blake2s_Simd128_hash_with_key_and_params( uint32_t x = r; os[i] = x;); tmp[0U] = - (uint32_t)params.digest_length - ^ - ((uint32_t)params.key_length - << 8U - ^ ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); + (uint32_t)params.digest_length ^ + ((uint32_t)params.key_length << 8U ^ + ((uint32_t)params.fanout << 16U ^ (uint32_t)params.depth << 24U)); tmp[1U] = params.leaf_length; tmp[2U] = (uint32_t)params.node_offset; tmp[3U] = - (uint32_t)(params.node_offset >> 32U) - ^ ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); + (uint32_t)(params.node_offset >> 32U) ^ + ((uint32_t)params.node_depth << 16U ^ (uint32_t)params.inner_length << 24U); uint32_t tmp0 = tmp[0U]; uint32_t tmp1 = tmp[1U]; uint32_t tmp2 = tmp[2U]; diff --git a/Modules/_hacl/Hacl_Hash_MD5.c b/Modules/_hacl/Hacl_Hash_MD5.c index 75ce8d2926e6e1..305c75483c06ea 100644 --- a/Modules/_hacl/Hacl_Hash_MD5.c +++ b/Modules/_hacl/Hacl_Hash_MD5.c @@ -66,11 +66,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti0 = _t[0U]; uint32_t v = - vb0 - + - ((va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) - << 7U - | (va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) >> 25U); + vb0 + + ((va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) << 7U | + (va + ((vb0 & vc0) | (~vb0 & vd0)) + xk + ti0) >> 25U); abcd[0U] = v; uint32_t va0 = abcd[3U]; uint32_t vb1 = abcd[0U]; @@ -82,11 +80,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti1 = _t[1U]; uint32_t v0 = - vb1 - + - ((va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) - << 12U - | (va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) >> 20U); + vb1 + + ((va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) << 12U | + (va0 + ((vb1 & vc1) | (~vb1 & vd1)) + xk0 + ti1) >> 20U); abcd[3U] = v0; uint32_t va1 = abcd[2U]; uint32_t vb2 = abcd[3U]; @@ -98,11 +94,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti2 = _t[2U]; uint32_t v1 = - vb2 - + - ((va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) - << 17U - | (va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) >> 15U); + vb2 + + ((va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) << 17U | + (va1 + ((vb2 & vc2) | (~vb2 & vd2)) + xk1 + ti2) >> 15U); abcd[2U] = v1; uint32_t va2 = abcd[1U]; uint32_t vb3 = abcd[2U]; @@ -114,11 +108,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti3 = _t[3U]; uint32_t v2 = - vb3 - + - ((va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) - << 22U - | (va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) >> 10U); + vb3 + + ((va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) << 22U | + (va2 + ((vb3 & vc3) | (~vb3 & vd3)) + xk2 + ti3) >> 10U); abcd[1U] = v2; uint32_t va3 = abcd[0U]; uint32_t vb4 = abcd[1U]; @@ -130,11 +122,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti4 = _t[4U]; uint32_t v3 = - vb4 - + - ((va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) - << 7U - | (va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) >> 25U); + vb4 + + ((va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) << 7U | + (va3 + ((vb4 & vc4) | (~vb4 & vd4)) + xk3 + ti4) >> 25U); abcd[0U] = v3; uint32_t va4 = abcd[3U]; uint32_t vb5 = abcd[0U]; @@ -146,11 +136,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti5 = _t[5U]; uint32_t v4 = - vb5 - + - ((va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) - << 12U - | (va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) >> 20U); + vb5 + + ((va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) << 12U | + (va4 + ((vb5 & vc5) | (~vb5 & vd5)) + xk4 + ti5) >> 20U); abcd[3U] = v4; uint32_t va5 = abcd[2U]; uint32_t vb6 = abcd[3U]; @@ -162,11 +150,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti6 = _t[6U]; uint32_t v5 = - vb6 - + - ((va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) - << 17U - | (va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) >> 15U); + vb6 + + ((va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) << 17U | + (va5 + ((vb6 & vc6) | (~vb6 & vd6)) + xk5 + ti6) >> 15U); abcd[2U] = v5; uint32_t va6 = abcd[1U]; uint32_t vb7 = abcd[2U]; @@ -178,11 +164,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti7 = _t[7U]; uint32_t v6 = - vb7 - + - ((va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) - << 22U - | (va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) >> 10U); + vb7 + + ((va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) << 22U | + (va6 + ((vb7 & vc7) | (~vb7 & vd7)) + xk6 + ti7) >> 10U); abcd[1U] = v6; uint32_t va7 = abcd[0U]; uint32_t vb8 = abcd[1U]; @@ -194,11 +178,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti8 = _t[8U]; uint32_t v7 = - vb8 - + - ((va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) - << 7U - | (va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) >> 25U); + vb8 + + ((va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) << 7U | + (va7 + ((vb8 & vc8) | (~vb8 & vd8)) + xk7 + ti8) >> 25U); abcd[0U] = v7; uint32_t va8 = abcd[3U]; uint32_t vb9 = abcd[0U]; @@ -210,11 +192,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti9 = _t[9U]; uint32_t v8 = - vb9 - + - ((va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) - << 12U - | (va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) >> 20U); + vb9 + + ((va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) << 12U | + (va8 + ((vb9 & vc9) | (~vb9 & vd9)) + xk8 + ti9) >> 20U); abcd[3U] = v8; uint32_t va9 = abcd[2U]; uint32_t vb10 = abcd[3U]; @@ -226,11 +206,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti10 = _t[10U]; uint32_t v9 = - vb10 - + - ((va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) - << 17U - | (va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) >> 15U); + vb10 + + ((va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) << 17U | + (va9 + ((vb10 & vc10) | (~vb10 & vd10)) + xk9 + ti10) >> 15U); abcd[2U] = v9; uint32_t va10 = abcd[1U]; uint32_t vb11 = abcd[2U]; @@ -242,11 +220,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti11 = _t[11U]; uint32_t v10 = - vb11 - + - ((va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) - << 22U - | (va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) >> 10U); + vb11 + + ((va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) << 22U | + (va10 + ((vb11 & vc11) | (~vb11 & vd11)) + xk10 + ti11) >> 10U); abcd[1U] = v10; uint32_t va11 = abcd[0U]; uint32_t vb12 = abcd[1U]; @@ -258,11 +234,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti12 = _t[12U]; uint32_t v11 = - vb12 - + - ((va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) - << 7U - | (va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) >> 25U); + vb12 + + ((va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) << 7U | + (va11 + ((vb12 & vc12) | (~vb12 & vd12)) + xk11 + ti12) >> 25U); abcd[0U] = v11; uint32_t va12 = abcd[3U]; uint32_t vb13 = abcd[0U]; @@ -274,11 +248,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti13 = _t[13U]; uint32_t v12 = - vb13 - + - ((va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) - << 12U - | (va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) >> 20U); + vb13 + + ((va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) << 12U | + (va12 + ((vb13 & vc13) | (~vb13 & vd13)) + xk12 + ti13) >> 20U); abcd[3U] = v12; uint32_t va13 = abcd[2U]; uint32_t vb14 = abcd[3U]; @@ -290,11 +262,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti14 = _t[14U]; uint32_t v13 = - vb14 - + - ((va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) - << 17U - | (va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) >> 15U); + vb14 + + ((va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) << 17U | + (va13 + ((vb14 & vc14) | (~vb14 & vd14)) + xk13 + ti14) >> 15U); abcd[2U] = v13; uint32_t va14 = abcd[1U]; uint32_t vb15 = abcd[2U]; @@ -306,11 +276,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti15 = _t[15U]; uint32_t v14 = - vb15 - + - ((va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) - << 22U - | (va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) >> 10U); + vb15 + + ((va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) << 22U | + (va14 + ((vb15 & vc15) | (~vb15 & vd15)) + xk14 + ti15) >> 10U); abcd[1U] = v14; uint32_t va15 = abcd[0U]; uint32_t vb16 = abcd[1U]; @@ -322,11 +290,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti16 = _t[16U]; uint32_t v15 = - vb16 - + - ((va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) - << 5U - | (va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) >> 27U); + vb16 + + ((va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) << 5U | + (va15 + ((vb16 & vd16) | (vc16 & ~vd16)) + xk15 + ti16) >> 27U); abcd[0U] = v15; uint32_t va16 = abcd[3U]; uint32_t vb17 = abcd[0U]; @@ -338,11 +304,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti17 = _t[17U]; uint32_t v16 = - vb17 - + - ((va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) - << 9U - | (va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) >> 23U); + vb17 + + ((va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) << 9U | + (va16 + ((vb17 & vd17) | (vc17 & ~vd17)) + xk16 + ti17) >> 23U); abcd[3U] = v16; uint32_t va17 = abcd[2U]; uint32_t vb18 = abcd[3U]; @@ -354,11 +318,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti18 = _t[18U]; uint32_t v17 = - vb18 - + - ((va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) - << 14U - | (va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) >> 18U); + vb18 + + ((va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) << 14U | + (va17 + ((vb18 & vd18) | (vc18 & ~vd18)) + xk17 + ti18) >> 18U); abcd[2U] = v17; uint32_t va18 = abcd[1U]; uint32_t vb19 = abcd[2U]; @@ -370,11 +332,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti19 = _t[19U]; uint32_t v18 = - vb19 - + - ((va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) - << 20U - | (va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) >> 12U); + vb19 + + ((va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) << 20U | + (va18 + ((vb19 & vd19) | (vc19 & ~vd19)) + xk18 + ti19) >> 12U); abcd[1U] = v18; uint32_t va19 = abcd[0U]; uint32_t vb20 = abcd[1U]; @@ -386,11 +346,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti20 = _t[20U]; uint32_t v19 = - vb20 - + - ((va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) - << 5U - | (va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) >> 27U); + vb20 + + ((va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) << 5U | + (va19 + ((vb20 & vd20) | (vc20 & ~vd20)) + xk19 + ti20) >> 27U); abcd[0U] = v19; uint32_t va20 = abcd[3U]; uint32_t vb21 = abcd[0U]; @@ -402,11 +360,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti21 = _t[21U]; uint32_t v20 = - vb21 - + - ((va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) - << 9U - | (va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) >> 23U); + vb21 + + ((va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) << 9U | + (va20 + ((vb21 & vd21) | (vc21 & ~vd21)) + xk20 + ti21) >> 23U); abcd[3U] = v20; uint32_t va21 = abcd[2U]; uint32_t vb22 = abcd[3U]; @@ -418,11 +374,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti22 = _t[22U]; uint32_t v21 = - vb22 - + - ((va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) - << 14U - | (va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) >> 18U); + vb22 + + ((va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) << 14U | + (va21 + ((vb22 & vd22) | (vc22 & ~vd22)) + xk21 + ti22) >> 18U); abcd[2U] = v21; uint32_t va22 = abcd[1U]; uint32_t vb23 = abcd[2U]; @@ -434,11 +388,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti23 = _t[23U]; uint32_t v22 = - vb23 - + - ((va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) - << 20U - | (va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) >> 12U); + vb23 + + ((va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) << 20U | + (va22 + ((vb23 & vd23) | (vc23 & ~vd23)) + xk22 + ti23) >> 12U); abcd[1U] = v22; uint32_t va23 = abcd[0U]; uint32_t vb24 = abcd[1U]; @@ -450,11 +402,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti24 = _t[24U]; uint32_t v23 = - vb24 - + - ((va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) - << 5U - | (va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) >> 27U); + vb24 + + ((va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) << 5U | + (va23 + ((vb24 & vd24) | (vc24 & ~vd24)) + xk23 + ti24) >> 27U); abcd[0U] = v23; uint32_t va24 = abcd[3U]; uint32_t vb25 = abcd[0U]; @@ -466,11 +416,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti25 = _t[25U]; uint32_t v24 = - vb25 - + - ((va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) - << 9U - | (va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) >> 23U); + vb25 + + ((va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) << 9U | + (va24 + ((vb25 & vd25) | (vc25 & ~vd25)) + xk24 + ti25) >> 23U); abcd[3U] = v24; uint32_t va25 = abcd[2U]; uint32_t vb26 = abcd[3U]; @@ -482,11 +430,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti26 = _t[26U]; uint32_t v25 = - vb26 - + - ((va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) - << 14U - | (va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) >> 18U); + vb26 + + ((va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) << 14U | + (va25 + ((vb26 & vd26) | (vc26 & ~vd26)) + xk25 + ti26) >> 18U); abcd[2U] = v25; uint32_t va26 = abcd[1U]; uint32_t vb27 = abcd[2U]; @@ -498,11 +444,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti27 = _t[27U]; uint32_t v26 = - vb27 - + - ((va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) - << 20U - | (va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) >> 12U); + vb27 + + ((va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) << 20U | + (va26 + ((vb27 & vd27) | (vc27 & ~vd27)) + xk26 + ti27) >> 12U); abcd[1U] = v26; uint32_t va27 = abcd[0U]; uint32_t vb28 = abcd[1U]; @@ -514,11 +458,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti28 = _t[28U]; uint32_t v27 = - vb28 - + - ((va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) - << 5U - | (va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) >> 27U); + vb28 + + ((va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) << 5U | + (va27 + ((vb28 & vd28) | (vc28 & ~vd28)) + xk27 + ti28) >> 27U); abcd[0U] = v27; uint32_t va28 = abcd[3U]; uint32_t vb29 = abcd[0U]; @@ -530,11 +472,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti29 = _t[29U]; uint32_t v28 = - vb29 - + - ((va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) - << 9U - | (va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) >> 23U); + vb29 + + ((va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) << 9U | + (va28 + ((vb29 & vd29) | (vc29 & ~vd29)) + xk28 + ti29) >> 23U); abcd[3U] = v28; uint32_t va29 = abcd[2U]; uint32_t vb30 = abcd[3U]; @@ -546,11 +486,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti30 = _t[30U]; uint32_t v29 = - vb30 - + - ((va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) - << 14U - | (va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) >> 18U); + vb30 + + ((va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) << 14U | + (va29 + ((vb30 & vd30) | (vc30 & ~vd30)) + xk29 + ti30) >> 18U); abcd[2U] = v29; uint32_t va30 = abcd[1U]; uint32_t vb31 = abcd[2U]; @@ -562,11 +500,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti31 = _t[31U]; uint32_t v30 = - vb31 - + - ((va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) - << 20U - | (va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) >> 12U); + vb31 + + ((va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) << 20U | + (va30 + ((vb31 & vd31) | (vc31 & ~vd31)) + xk30 + ti31) >> 12U); abcd[1U] = v30; uint32_t va31 = abcd[0U]; uint32_t vb32 = abcd[1U]; @@ -578,11 +514,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti32 = _t[32U]; uint32_t v31 = - vb32 - + - ((va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) - << 4U - | (va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) >> 28U); + vb32 + + ((va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) << 4U | + (va31 + (vb32 ^ (vc32 ^ vd32)) + xk31 + ti32) >> 28U); abcd[0U] = v31; uint32_t va32 = abcd[3U]; uint32_t vb33 = abcd[0U]; @@ -594,11 +528,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti33 = _t[33U]; uint32_t v32 = - vb33 - + - ((va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) - << 11U - | (va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) >> 21U); + vb33 + + ((va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) << 11U | + (va32 + (vb33 ^ (vc33 ^ vd33)) + xk32 + ti33) >> 21U); abcd[3U] = v32; uint32_t va33 = abcd[2U]; uint32_t vb34 = abcd[3U]; @@ -610,11 +542,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti34 = _t[34U]; uint32_t v33 = - vb34 - + - ((va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) - << 16U - | (va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) >> 16U); + vb34 + + ((va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) << 16U | + (va33 + (vb34 ^ (vc34 ^ vd34)) + xk33 + ti34) >> 16U); abcd[2U] = v33; uint32_t va34 = abcd[1U]; uint32_t vb35 = abcd[2U]; @@ -626,11 +556,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti35 = _t[35U]; uint32_t v34 = - vb35 - + - ((va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) - << 23U - | (va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) >> 9U); + vb35 + + ((va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) << 23U | + (va34 + (vb35 ^ (vc35 ^ vd35)) + xk34 + ti35) >> 9U); abcd[1U] = v34; uint32_t va35 = abcd[0U]; uint32_t vb36 = abcd[1U]; @@ -642,11 +570,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti36 = _t[36U]; uint32_t v35 = - vb36 - + - ((va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) - << 4U - | (va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) >> 28U); + vb36 + + ((va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) << 4U | + (va35 + (vb36 ^ (vc36 ^ vd36)) + xk35 + ti36) >> 28U); abcd[0U] = v35; uint32_t va36 = abcd[3U]; uint32_t vb37 = abcd[0U]; @@ -658,11 +584,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti37 = _t[37U]; uint32_t v36 = - vb37 - + - ((va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) - << 11U - | (va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) >> 21U); + vb37 + + ((va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) << 11U | + (va36 + (vb37 ^ (vc37 ^ vd37)) + xk36 + ti37) >> 21U); abcd[3U] = v36; uint32_t va37 = abcd[2U]; uint32_t vb38 = abcd[3U]; @@ -674,11 +598,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti38 = _t[38U]; uint32_t v37 = - vb38 - + - ((va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) - << 16U - | (va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) >> 16U); + vb38 + + ((va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) << 16U | + (va37 + (vb38 ^ (vc38 ^ vd38)) + xk37 + ti38) >> 16U); abcd[2U] = v37; uint32_t va38 = abcd[1U]; uint32_t vb39 = abcd[2U]; @@ -690,11 +612,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti39 = _t[39U]; uint32_t v38 = - vb39 - + - ((va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) - << 23U - | (va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) >> 9U); + vb39 + + ((va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) << 23U | + (va38 + (vb39 ^ (vc39 ^ vd39)) + xk38 + ti39) >> 9U); abcd[1U] = v38; uint32_t va39 = abcd[0U]; uint32_t vb40 = abcd[1U]; @@ -706,11 +626,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti40 = _t[40U]; uint32_t v39 = - vb40 - + - ((va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) - << 4U - | (va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) >> 28U); + vb40 + + ((va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) << 4U | + (va39 + (vb40 ^ (vc40 ^ vd40)) + xk39 + ti40) >> 28U); abcd[0U] = v39; uint32_t va40 = abcd[3U]; uint32_t vb41 = abcd[0U]; @@ -722,11 +640,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti41 = _t[41U]; uint32_t v40 = - vb41 - + - ((va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) - << 11U - | (va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) >> 21U); + vb41 + + ((va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) << 11U | + (va40 + (vb41 ^ (vc41 ^ vd41)) + xk40 + ti41) >> 21U); abcd[3U] = v40; uint32_t va41 = abcd[2U]; uint32_t vb42 = abcd[3U]; @@ -738,11 +654,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti42 = _t[42U]; uint32_t v41 = - vb42 - + - ((va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) - << 16U - | (va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) >> 16U); + vb42 + + ((va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) << 16U | + (va41 + (vb42 ^ (vc42 ^ vd42)) + xk41 + ti42) >> 16U); abcd[2U] = v41; uint32_t va42 = abcd[1U]; uint32_t vb43 = abcd[2U]; @@ -754,11 +668,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti43 = _t[43U]; uint32_t v42 = - vb43 - + - ((va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) - << 23U - | (va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) >> 9U); + vb43 + + ((va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) << 23U | + (va42 + (vb43 ^ (vc43 ^ vd43)) + xk42 + ti43) >> 9U); abcd[1U] = v42; uint32_t va43 = abcd[0U]; uint32_t vb44 = abcd[1U]; @@ -770,11 +682,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti44 = _t[44U]; uint32_t v43 = - vb44 - + - ((va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) - << 4U - | (va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) >> 28U); + vb44 + + ((va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) << 4U | + (va43 + (vb44 ^ (vc44 ^ vd44)) + xk43 + ti44) >> 28U); abcd[0U] = v43; uint32_t va44 = abcd[3U]; uint32_t vb45 = abcd[0U]; @@ -786,11 +696,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti45 = _t[45U]; uint32_t v44 = - vb45 - + - ((va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) - << 11U - | (va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) >> 21U); + vb45 + + ((va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) << 11U | + (va44 + (vb45 ^ (vc45 ^ vd45)) + xk44 + ti45) >> 21U); abcd[3U] = v44; uint32_t va45 = abcd[2U]; uint32_t vb46 = abcd[3U]; @@ -802,11 +710,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti46 = _t[46U]; uint32_t v45 = - vb46 - + - ((va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) - << 16U - | (va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) >> 16U); + vb46 + + ((va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) << 16U | + (va45 + (vb46 ^ (vc46 ^ vd46)) + xk45 + ti46) >> 16U); abcd[2U] = v45; uint32_t va46 = abcd[1U]; uint32_t vb47 = abcd[2U]; @@ -818,11 +724,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti47 = _t[47U]; uint32_t v46 = - vb47 - + - ((va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) - << 23U - | (va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) >> 9U); + vb47 + + ((va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) << 23U | + (va46 + (vb47 ^ (vc47 ^ vd47)) + xk46 + ti47) >> 9U); abcd[1U] = v46; uint32_t va47 = abcd[0U]; uint32_t vb48 = abcd[1U]; @@ -834,11 +738,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti48 = _t[48U]; uint32_t v47 = - vb48 - + - ((va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) - << 6U - | (va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) >> 26U); + vb48 + + ((va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) << 6U | + (va47 + (vc48 ^ (vb48 | ~vd48)) + xk47 + ti48) >> 26U); abcd[0U] = v47; uint32_t va48 = abcd[3U]; uint32_t vb49 = abcd[0U]; @@ -850,11 +752,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti49 = _t[49U]; uint32_t v48 = - vb49 - + - ((va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) - << 10U - | (va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) >> 22U); + vb49 + + ((va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) << 10U | + (va48 + (vc49 ^ (vb49 | ~vd49)) + xk48 + ti49) >> 22U); abcd[3U] = v48; uint32_t va49 = abcd[2U]; uint32_t vb50 = abcd[3U]; @@ -866,11 +766,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti50 = _t[50U]; uint32_t v49 = - vb50 - + - ((va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) - << 15U - | (va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) >> 17U); + vb50 + + ((va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) << 15U | + (va49 + (vc50 ^ (vb50 | ~vd50)) + xk49 + ti50) >> 17U); abcd[2U] = v49; uint32_t va50 = abcd[1U]; uint32_t vb51 = abcd[2U]; @@ -882,11 +780,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti51 = _t[51U]; uint32_t v50 = - vb51 - + - ((va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) - << 21U - | (va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) >> 11U); + vb51 + + ((va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) << 21U | + (va50 + (vc51 ^ (vb51 | ~vd51)) + xk50 + ti51) >> 11U); abcd[1U] = v50; uint32_t va51 = abcd[0U]; uint32_t vb52 = abcd[1U]; @@ -898,11 +794,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti52 = _t[52U]; uint32_t v51 = - vb52 - + - ((va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) - << 6U - | (va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) >> 26U); + vb52 + + ((va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) << 6U | + (va51 + (vc52 ^ (vb52 | ~vd52)) + xk51 + ti52) >> 26U); abcd[0U] = v51; uint32_t va52 = abcd[3U]; uint32_t vb53 = abcd[0U]; @@ -914,11 +808,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti53 = _t[53U]; uint32_t v52 = - vb53 - + - ((va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) - << 10U - | (va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) >> 22U); + vb53 + + ((va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) << 10U | + (va52 + (vc53 ^ (vb53 | ~vd53)) + xk52 + ti53) >> 22U); abcd[3U] = v52; uint32_t va53 = abcd[2U]; uint32_t vb54 = abcd[3U]; @@ -930,11 +822,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti54 = _t[54U]; uint32_t v53 = - vb54 - + - ((va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) - << 15U - | (va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) >> 17U); + vb54 + + ((va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) << 15U | + (va53 + (vc54 ^ (vb54 | ~vd54)) + xk53 + ti54) >> 17U); abcd[2U] = v53; uint32_t va54 = abcd[1U]; uint32_t vb55 = abcd[2U]; @@ -946,11 +836,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti55 = _t[55U]; uint32_t v54 = - vb55 - + - ((va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) - << 21U - | (va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) >> 11U); + vb55 + + ((va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) << 21U | + (va54 + (vc55 ^ (vb55 | ~vd55)) + xk54 + ti55) >> 11U); abcd[1U] = v54; uint32_t va55 = abcd[0U]; uint32_t vb56 = abcd[1U]; @@ -962,11 +850,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti56 = _t[56U]; uint32_t v55 = - vb56 - + - ((va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) - << 6U - | (va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) >> 26U); + vb56 + + ((va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) << 6U | + (va55 + (vc56 ^ (vb56 | ~vd56)) + xk55 + ti56) >> 26U); abcd[0U] = v55; uint32_t va56 = abcd[3U]; uint32_t vb57 = abcd[0U]; @@ -978,11 +864,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti57 = _t[57U]; uint32_t v56 = - vb57 - + - ((va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) - << 10U - | (va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) >> 22U); + vb57 + + ((va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) << 10U | + (va56 + (vc57 ^ (vb57 | ~vd57)) + xk56 + ti57) >> 22U); abcd[3U] = v56; uint32_t va57 = abcd[2U]; uint32_t vb58 = abcd[3U]; @@ -994,11 +878,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti58 = _t[58U]; uint32_t v57 = - vb58 - + - ((va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) - << 15U - | (va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) >> 17U); + vb58 + + ((va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) << 15U | + (va57 + (vc58 ^ (vb58 | ~vd58)) + xk57 + ti58) >> 17U); abcd[2U] = v57; uint32_t va58 = abcd[1U]; uint32_t vb59 = abcd[2U]; @@ -1010,11 +892,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti59 = _t[59U]; uint32_t v58 = - vb59 - + - ((va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) - << 21U - | (va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) >> 11U); + vb59 + + ((va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) << 21U | + (va58 + (vc59 ^ (vb59 | ~vd59)) + xk58 + ti59) >> 11U); abcd[1U] = v58; uint32_t va59 = abcd[0U]; uint32_t vb60 = abcd[1U]; @@ -1026,11 +906,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti60 = _t[60U]; uint32_t v59 = - vb60 - + - ((va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) - << 6U - | (va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) >> 26U); + vb60 + + ((va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) << 6U | + (va59 + (vc60 ^ (vb60 | ~vd60)) + xk59 + ti60) >> 26U); abcd[0U] = v59; uint32_t va60 = abcd[3U]; uint32_t vb61 = abcd[0U]; @@ -1042,11 +920,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti61 = _t[61U]; uint32_t v60 = - vb61 - + - ((va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) - << 10U - | (va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) >> 22U); + vb61 + + ((va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) << 10U | + (va60 + (vc61 ^ (vb61 | ~vd61)) + xk60 + ti61) >> 22U); abcd[3U] = v60; uint32_t va61 = abcd[2U]; uint32_t vb62 = abcd[3U]; @@ -1058,11 +934,9 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti62 = _t[62U]; uint32_t v61 = - vb62 - + - ((va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) - << 15U - | (va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) >> 17U); + vb62 + + ((va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) << 15U | + (va61 + (vc62 ^ (vb62 | ~vd62)) + xk61 + ti62) >> 17U); abcd[2U] = v61; uint32_t va62 = abcd[1U]; uint32_t vb = abcd[2U]; @@ -1074,11 +948,8 @@ static void update(uint32_t *abcd, uint8_t *x) uint32_t ti = _t[63U]; uint32_t v62 = - vb - + - ((va62 + (vc ^ (vb | ~vd)) + xk62 + ti) - << 21U - | (va62 + (vc ^ (vb | ~vd)) + xk62 + ti) >> 11U); + vb + + ((va62 + (vc ^ (vb | ~vd)) + xk62 + ti) << 21U | (va62 + (vc ^ (vb | ~vd)) + xk62 + ti) >> 11U); abcd[1U] = v62; uint32_t a = abcd[0U]; uint32_t b = abcd[1U]; @@ -1282,8 +1153,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1328,8 +1198,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1359,8 +1228,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -1403,8 +1271,7 @@ Hacl_Hash_MD5_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t Hacl_Hash_MD5_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_SHA1.c b/Modules/_hacl/Hacl_Hash_SHA1.c index 508e447bf275da..97bd4f2204cf24 100644 --- a/Modules/_hacl/Hacl_Hash_SHA1.c +++ b/Modules/_hacl/Hacl_Hash_SHA1.c @@ -315,8 +315,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -361,8 +360,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -392,8 +390,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -436,8 +433,7 @@ Hacl_Hash_SHA1_update(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_ Hacl_Hash_SHA1_update_multi(block_state1, data1, data1_len / 64U); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_SHA2.c b/Modules/_hacl/Hacl_Hash_SHA2.c index d612bafa72cdc4..d2ee0c9ef51721 100644 --- a/Modules/_hacl/Hacl_Hash_SHA2.c +++ b/Modules/_hacl/Hacl_Hash_SHA2.c @@ -100,15 +100,14 @@ static inline void sha256_update(uint8_t *b, uint32_t *hash) uint32_t k_e_t = k_t; uint32_t t1 = - h02 - + ((e0 << 26U | e0 >> 6U) ^ ((e0 << 21U | e0 >> 11U) ^ (e0 << 7U | e0 >> 25U))) - + ((e0 & f0) ^ (~e0 & g0)) + h02 + ((e0 << 26U | e0 >> 6U) ^ ((e0 << 21U | e0 >> 11U) ^ (e0 << 7U | e0 >> 25U))) + + ((e0 & f0) ^ (~e0 & g0)) + k_e_t + ws_t; uint32_t t2 = - ((a0 << 30U | a0 >> 2U) ^ ((a0 << 19U | a0 >> 13U) ^ (a0 << 10U | a0 >> 22U))) - + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); + ((a0 << 30U | a0 >> 2U) ^ ((a0 << 19U | a0 >> 13U) ^ (a0 << 10U | a0 >> 22U))) + + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); uint32_t a1 = t1 + t2; uint32_t b1 = a0; uint32_t c1 = b0; @@ -301,15 +300,14 @@ static inline void sha512_update(uint8_t *b, uint64_t *hash) uint64_t k_e_t = k_t; uint64_t t1 = - h02 - + ((e0 << 50U | e0 >> 14U) ^ ((e0 << 46U | e0 >> 18U) ^ (e0 << 23U | e0 >> 41U))) - + ((e0 & f0) ^ (~e0 & g0)) + h02 + ((e0 << 50U | e0 >> 14U) ^ ((e0 << 46U | e0 >> 18U) ^ (e0 << 23U | e0 >> 41U))) + + ((e0 & f0) ^ (~e0 & g0)) + k_e_t + ws_t; uint64_t t2 = - ((a0 << 36U | a0 >> 28U) ^ ((a0 << 30U | a0 >> 34U) ^ (a0 << 25U | a0 >> 39U))) - + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); + ((a0 << 36U | a0 >> 28U) ^ ((a0 << 30U | a0 >> 34U) ^ (a0 << 25U | a0 >> 39U))) + + ((a0 & b0) ^ ((a0 & c0) ^ (b0 & c0))); uint64_t a1 = t1 + t2; uint64_t b1 = a0; uint64_t c1 = b0; @@ -639,8 +637,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -685,8 +682,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -716,8 +712,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state10, @@ -760,8 +755,7 @@ update_224_256(Hacl_Streaming_MD_state_32 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha256_update_nblocks(data1_len / 64U * 64U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_32){ .block_state = block_state1, @@ -1205,8 +1199,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, @@ -1251,8 +1244,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, @@ -1282,8 +1274,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state10, @@ -1326,8 +1317,7 @@ update_384_512(Hacl_Streaming_MD_state_64 *state, uint8_t *chunk, uint32_t chunk Hacl_Hash_SHA2_sha512_update_nblocks(data1_len / 128U * 128U, data1, block_state1); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_MD_state_64){ .block_state = block_state1, diff --git a/Modules/_hacl/Hacl_Hash_SHA3.c b/Modules/_hacl/Hacl_Hash_SHA3.c index 87638df9549fbb..466d2b96c0cdfa 100644 --- a/Modules/_hacl/Hacl_Hash_SHA3.c +++ b/Modules/_hacl/Hacl_Hash_SHA3.c @@ -866,8 +866,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ((Hacl_Hash_SHA3_state_t){ .block_state = block_state1, .buf = buf, .total_len = total_len2 }); } else if (sz == 0U) @@ -910,8 +909,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1)); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state1, @@ -941,8 +939,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state10, @@ -972,10 +969,8 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch uint32_t ite; if ( - (uint64_t)(chunk_len - diff) - % (uint64_t)block_len(i) - == 0ULL - && (uint64_t)(chunk_len - diff) > 0ULL + (uint64_t)(chunk_len - diff) % (uint64_t)block_len(i) == 0ULL && + (uint64_t)(chunk_len - diff) > 0ULL ) { ite = block_len(i); @@ -994,8 +989,7 @@ Hacl_Hash_SHA3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *chunk, uint32_t ch Hacl_Hash_SHA3_update_multi_sha3(a1, s2, data1, data1_len / block_len(a1)); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Hash_SHA3_state_t){ .block_state = block_state1, @@ -2422,9 +2416,7 @@ Hacl_Hash_SHA3_shake128_squeeze_nblocks( 5U, 1U, _C[i] = - state[i - + 0U] - ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); + state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U])));); KRML_MAYBE_FOR5(i2, 0U, 5U, diff --git a/Modules/_hacl/Hacl_Streaming_HMAC.c b/Modules/_hacl/Hacl_Streaming_HMAC.c index d28b39792af576..8dd7e2c0bf3e71 100644 --- a/Modules/_hacl/Hacl_Streaming_HMAC.c +++ b/Modules/_hacl/Hacl_Streaming_HMAC.c @@ -198,8 +198,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_MD5_a, { .case_MD5_a = s1 } }); + st[0U] = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_MD5_a, { .case_MD5_a = s1 } }); } if (st == NULL) { @@ -220,8 +219,8 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA1_a, { .case_SHA1_a = s1 } }); + st[0U] = + ((Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA1_a, { .case_SHA1_a = s1 } }); } if (st == NULL) { @@ -242,8 +241,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_224_a, @@ -270,8 +268,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_256_a, @@ -298,8 +295,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_384_a, @@ -326,8 +322,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA2_512_a, @@ -354,8 +349,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_224_a, @@ -382,8 +376,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_256_a, @@ -410,8 +403,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_384_a, @@ -438,8 +430,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_SHA3_512_a, @@ -466,8 +457,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2S_a, @@ -495,8 +485,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2S_128_a, @@ -531,8 +520,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2B_a, @@ -560,8 +548,7 @@ static Hacl_Agile_Hash_state_s *malloc_(Hacl_Agile_Hash_impl a) *st = (Hacl_Agile_Hash_state_s *)KRML_HOST_MALLOC(sizeof (Hacl_Agile_Hash_state_s)); if (st != NULL) { - st[0U] - = + st[0U] = ( (Hacl_Agile_Hash_state_s){ .tag = Hacl_Agile_Hash_Blake2B_256_a, @@ -2059,8 +2046,8 @@ Hacl_Streaming_HMAC_update( Hacl_Streaming_HMAC_Definitions_index i1 = Hacl_Streaming_HMAC_index_of_state(block_state); if ( - (uint64_t)chunk_len - > max_input_len64(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - total_len + (uint64_t)chunk_len > + max_input_len64(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - total_len ) { return Hacl_Streaming_Types_MaximumLengthExceeded; @@ -2068,9 +2055,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz; if ( - total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len > 0ULL ) { @@ -2079,8 +2064,8 @@ Hacl_Streaming_HMAC_update( else { sz = - (uint32_t)(total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (chunk_len <= block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - sz) { @@ -2091,9 +2076,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2102,14 +2085,13 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf2 = buf + sz1; memcpy(buf2, chunk, chunk_len * sizeof (uint8_t)); uint64_t total_len2 = total_len1 + (uint64_t)chunk_len; - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2127,9 +2109,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2138,8 +2118,8 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (!(sz1 == 0U)) { @@ -2153,8 +2133,8 @@ Hacl_Streaming_HMAC_update( uint32_t ite; if ( - (uint64_t)chunk_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) + (uint64_t)chunk_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && (uint64_t)chunk_len > 0ULL ) @@ -2164,8 +2144,8 @@ Hacl_Streaming_HMAC_update( else { ite = - (uint32_t)((uint64_t)chunk_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)((uint64_t)chunk_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint32_t n_blocks = (chunk_len - ite) / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); @@ -2178,8 +2158,7 @@ Hacl_Streaming_HMAC_update( update_multi(s11, total_len1, data1, data1_len); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2200,9 +2179,8 @@ Hacl_Streaming_HMAC_update( uint32_t sz10; if ( - total_len10 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len10 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == + 0ULL && total_len10 > 0ULL ) { @@ -2211,14 +2189,13 @@ Hacl_Streaming_HMAC_update( else { sz10 = - (uint32_t)(total_len10 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len10 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf2 = buf0 + sz10; memcpy(buf2, chunk1, diff * sizeof (uint8_t)); uint64_t total_len2 = total_len10 + (uint64_t)diff; - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state10, @@ -2233,9 +2210,7 @@ Hacl_Streaming_HMAC_update( uint32_t sz1; if ( - total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len1 % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len1 > 0ULL ) { @@ -2244,8 +2219,8 @@ Hacl_Streaming_HMAC_update( else { sz1 = - (uint32_t)(total_len1 - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len1 % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } if (!(sz1 == 0U)) { @@ -2259,8 +2234,8 @@ Hacl_Streaming_HMAC_update( uint32_t ite; if ( - (uint64_t)(chunk_len - diff) - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) + (uint64_t)(chunk_len - diff) % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && (uint64_t)(chunk_len - diff) > 0ULL ) @@ -2270,13 +2245,12 @@ Hacl_Streaming_HMAC_update( else { ite = - (uint32_t)((uint64_t)(chunk_len - diff) - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)((uint64_t)(chunk_len - diff) % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint32_t n_blocks = - (chunk_len - diff - ite) - / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); + (chunk_len - diff - ite) / block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); uint32_t data1_len = n_blocks * block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))); uint32_t data2_len = chunk_len - diff - data1_len; @@ -2286,8 +2260,7 @@ Hacl_Streaming_HMAC_update( update_multi(s11, total_len1, data1, data1_len); uint8_t *dst = buf; memcpy(dst, data2, data2_len * sizeof (uint8_t)); - *state - = + *state = ( (Hacl_Streaming_HMAC_agile_state){ .block_state = block_state1, @@ -2324,9 +2297,7 @@ Hacl_Streaming_HMAC_digest( uint32_t r; if ( - total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) - == 0ULL + total_len % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1))) == 0ULL && total_len > 0ULL ) { @@ -2335,8 +2306,8 @@ Hacl_Streaming_HMAC_digest( else { r = - (uint32_t)(total_len - % (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); + (uint32_t)(total_len % + (uint64_t)block_len(alg_of_impl(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)))); } uint8_t *buf_1 = buf_; Hacl_Agile_Hash_state_s *s110 = malloc_(dfst__Hacl_Agile_Hash_impl_uint32_t(i1)); diff --git a/Modules/_hacl/Lib_Memzero0.c b/Modules/_hacl/Lib_Memzero0.c index 28abd1aa4e2d54..f94e0e2254a912 100644 --- a/Modules/_hacl/Lib_Memzero0.c +++ b/Modules/_hacl/Lib_Memzero0.c @@ -11,18 +11,18 @@ #if defined(__APPLE__) && defined(__MACH__) #include // memset_s is available from macOS 10.9, iOS 7, watchOS 2, and on all tvOS and visionOS versions. -# if (defined(MAC_OS_X_VERSION_MIN_REQUIRED) && (MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_9)) -# define APPLE_HAS_MEMSET_S 1 -# elif (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && (__IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0)) -# define APPLE_HAS_MEMSET_S 1 +# if (defined(MAC_OS_X_VERSION_MIN_REQUIRED) && defined(MAC_OS_X_VERSION_10_9) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_9)) +# define APPLE_HAS_MEMSET_S +# elif (defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && defined(__IPHONE_7_0) && (__IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0)) +# define APPLE_HAS_MEMSET_S # elif (defined(TARGET_OS_TV) && TARGET_OS_TV) -# define APPLE_HAS_MEMSET_S 1 -# elif (defined(__WATCH_OS_VERSION_MIN_REQUIRED) && (__WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_2_0)) -# define APPLE_HAS_MEMSET_S 1 +# define APPLE_HAS_MEMSET_S +# elif (defined(__WATCH_OS_VERSION_MIN_REQUIRED) && defined(__WATCHOS_2_0) && (__WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_2_0)) +# define APPLE_HAS_MEMSET_S # elif (defined(TARGET_OS_VISION) && TARGET_OS_VISION) -# define APPLE_HAS_MEMSET_S 1 +# define APPLE_HAS_MEMSET_S # else -# define APPLE_HAS_MEMSET_S 0 +# undef APPLE_HAS_MEMSET_S # endif #endif @@ -55,7 +55,7 @@ void Lib_Memzero0_memzero0(void *dst, uint64_t len) { #ifdef _WIN32 SecureZeroMemory(dst, len_); - #elif defined(__APPLE__) && defined(__MACH__) && APPLE_HAS_MEMSET_S + #elif defined(__APPLE__) && defined(__MACH__) && defined(APPLE_HAS_MEMSET_S) memset_s(dst, len_, 0, len_); #elif (defined(__linux__) && !defined(LINUX_NO_EXPLICIT_BZERO)) || defined(__FreeBSD__) || defined(__OpenBSD__) explicit_bzero(dst, len_); diff --git a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h index d4a90220beafb5..f85982f33735d3 100644 --- a/Modules/_hacl/include/krml/FStar_UInt128_Verified.h +++ b/Modules/_hacl/include/krml/FStar_UInt128_Verified.h @@ -257,11 +257,11 @@ FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { FStar_UInt128_uint128 lit; lit.low = - (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) - | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | + (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); lit.high = - (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) - | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | + (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); return lit; } @@ -294,14 +294,12 @@ static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y) { FStar_UInt128_uint128 lit; lit.low = - FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) - * (uint64_t)y - + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), + FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); lit.high = - ((x >> FStar_UInt128_u32_32) - * (uint64_t)y - + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) + ((x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32; return lit; } @@ -315,28 +313,19 @@ static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t { FStar_UInt128_uint128 lit; lit.low = - FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) - * (y >> FStar_UInt128_u32_32) - + - FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), + FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y))); lit.high = - (x >> FStar_UInt128_u32_32) - * (y >> FStar_UInt128_u32_32) - + - (((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) + (x >> FStar_UInt128_u32_32) * (y >> FStar_UInt128_u32_32) + + (((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32) + - ((FStar_UInt128_u64_mod_32(x) - * (y >> FStar_UInt128_u32_32) - + - FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) - * FStar_UInt128_u64_mod_32(y) - + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) + ((FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) >> FStar_UInt128_u32_32); return lit; } diff --git a/Modules/_hacl/python_hacl_namespaces.h b/Modules/_hacl/python_hacl_namespaces.h index 1c2f7fea5c837a..d0b4500395e7c3 100644 --- a/Modules/_hacl/python_hacl_namespaces.h +++ b/Modules/_hacl/python_hacl_namespaces.h @@ -2,95 +2,43 @@ #define _PYTHON_HACL_NAMESPACES_H /* - * C's excuse for namespaces: Use globally unique names to avoid linkage - * conflicts with builds linking or dynamically loading other code potentially - * using HACL* libraries. + * Use globally unique names to avoid linkage conflicts with builds linking + * or dynamically loading other code potentially using HACL* libraries. * - * Something like this to generate new entries for the list: - * - * nm *.o | grep Hacl | cut -c 20- | sort | uniq | grep -v _Py_LibHacl_ | egrep ^_ | sed 's/_\(.*\)/#define \1 _Py_LibHacl_\1/g' - */ + * Assuming that the current working directory is Modules/_hacl, + * use the following command to generate a list of candidates: -#define Lib_Memzero0_memzero0 _Py_LibHacl_Lib_Memzero0_memzero0 + nm -j *.o | grep -i hacl | grep -P '^[a-zA-Z_][a-zA-Z0-9_]+' \ + | sed -e 's/^_Py_LibHacl_//g' \ + | sed 's/\(.*\)/#define \1 _Py_LibHacl_\1/g' \ + | sort -u -#define Hacl_Hash_SHA2_state_sha2_224_s _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_224_s -#define Hacl_Hash_SHA2_state_sha2_224 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_224 -#define Hacl_Hash_SHA2_state_sha2_256 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_256 -#define Hacl_Hash_SHA2_state_sha2_384_s _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_384_s -#define Hacl_Hash_SHA2_state_sha2_384 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_384 -#define Hacl_Hash_SHA2_state_sha2_512 _Py_LibHacl_Hacl_Hash_SHA2_state_sha2_512 -#define Hacl_Hash_SHA2_malloc_256 _Py_LibHacl_Hacl_Hash_SHA2_malloc_256 -#define Hacl_Hash_SHA2_malloc_224 _Py_LibHacl_Hacl_Hash_SHA2_malloc_224 -#define Hacl_Hash_SHA2_malloc_512 _Py_LibHacl_Hacl_Hash_SHA2_malloc_512 -#define Hacl_Hash_SHA2_malloc_384 _Py_LibHacl_Hacl_Hash_SHA2_malloc_384 -#define Hacl_Hash_SHA2_copy_256 _Py_LibHacl_Hacl_Hash_SHA2_copy_256 -#define Hacl_Hash_SHA2_copy_224 _Py_LibHacl_Hacl_Hash_SHA2_copy_224 -#define Hacl_Hash_SHA2_copy_512 _Py_LibHacl_Hacl_Hash_SHA2_copy_512 -#define Hacl_Hash_SHA2_copy_384 _Py_LibHacl_Hacl_Hash_SHA2_copy_384 -#define Hacl_Hash_SHA2_init_256 _Py_LibHacl_Hacl_Hash_SHA2_init_256 -#define Hacl_Hash_SHA2_init_224 _Py_LibHacl_Hacl_Hash_SHA2_init_224 -#define Hacl_Hash_SHA2_init_512 _Py_LibHacl_Hacl_Hash_SHA2_init_512 -#define Hacl_Hash_SHA2_init_384 _Py_LibHacl_Hacl_Hash_SHA2_init_384 -#define Hacl_SHA2_Scalar32_sha512_init _Py_LibHacl_Hacl_SHA2_Scalar32_sha512_init -#define Hacl_Hash_SHA2_update_256 _Py_LibHacl_Hacl_Hash_SHA2_update_256 -#define Hacl_Hash_SHA2_update_224 _Py_LibHacl_Hacl_Hash_SHA2_update_224 -#define Hacl_Hash_SHA2_update_512 _Py_LibHacl_Hacl_Hash_SHA2_update_512 -#define Hacl_Hash_SHA2_update_384 _Py_LibHacl_Hacl_Hash_SHA2_update_384 -#define Hacl_Hash_SHA2_digest_256 _Py_LibHacl_Hacl_Hash_SHA2_digest_256 -#define Hacl_Hash_SHA2_digest_224 _Py_LibHacl_Hacl_Hash_SHA2_digest_224 -#define Hacl_Hash_SHA2_digest_512 _Py_LibHacl_Hacl_Hash_SHA2_digest_512 -#define Hacl_Hash_SHA2_digest_384 _Py_LibHacl_Hacl_Hash_SHA2_digest_384 -#define Hacl_Hash_SHA2_free_256 _Py_LibHacl_Hacl_Hash_SHA2_free_256 -#define Hacl_Hash_SHA2_free_224 _Py_LibHacl_Hacl_Hash_SHA2_free_224 -#define Hacl_Hash_SHA2_free_512 _Py_LibHacl_Hacl_Hash_SHA2_free_512 -#define Hacl_Hash_SHA2_free_384 _Py_LibHacl_Hacl_Hash_SHA2_free_384 -#define Hacl_Hash_SHA2_sha256 _Py_LibHacl_Hacl_Hash_SHA2_sha256 -#define Hacl_Hash_SHA2_sha224 _Py_LibHacl_Hacl_Hash_SHA2_sha224 -#define Hacl_Hash_SHA2_sha512 _Py_LibHacl_Hacl_Hash_SHA2_sha512 -#define Hacl_Hash_SHA2_sha384 _Py_LibHacl_Hacl_Hash_SHA2_sha384 + * Compare the entries to add as follows: -#define Hacl_Hash_MD5_malloc _Py_LibHacl_Hacl_Hash_MD5_malloc -#define Hacl_Hash_MD5_init _Py_LibHacl_Hacl_Hash_MD5_init -#define Hacl_Hash_MD5_update _Py_LibHacl_Hacl_Hash_MD5_update -#define Hacl_Hash_MD5_digest _Py_LibHacl_Hacl_Hash_MD5_digest -#define Hacl_Hash_MD5_free _Py_LibHacl_Hacl_Hash_MD5_free -#define Hacl_Hash_MD5_copy _Py_LibHacl_Hacl_Hash_MD5_copy -#define Hacl_Hash_MD5_hash _Py_LibHacl_Hacl_Hash_MD5_hash - -#define Hacl_Hash_SHA1_malloc _Py_LibHacl_Hacl_Hash_SHA1_malloc -#define Hacl_Hash_SHA1_init _Py_LibHacl_Hacl_Hash_SHA1_init -#define Hacl_Hash_SHA1_update _Py_LibHacl_Hacl_Hash_SHA1_update -#define Hacl_Hash_SHA1_digest _Py_LibHacl_Hacl_Hash_SHA1_digest -#define Hacl_Hash_SHA1_free _Py_LibHacl_Hacl_Hash_SHA1_free -#define Hacl_Hash_SHA1_copy _Py_LibHacl_Hacl_Hash_SHA1_copy -#define Hacl_Hash_SHA1_hash _Py_LibHacl_Hacl_Hash_SHA1_hash - -#define Hacl_Hash_SHA3_update_last_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_last_sha3 -#define Hacl_Hash_SHA3_update_multi_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_multi_sha3 -#define Hacl_Impl_SHA3_absorb_inner _Py_LibHacl_Hacl_Impl_SHA3_absorb_inner -#define Hacl_Impl_SHA3_keccak _Py_LibHacl_Hacl_Impl_SHA3_keccak -#define Hacl_Impl_SHA3_loadState _Py_LibHacl_Hacl_Impl_SHA3_loadState -#define Hacl_Impl_SHA3_squeeze _Py_LibHacl_Hacl_Impl_SHA3_squeeze -#define Hacl_Impl_SHA3_state_permute _Py_LibHacl_Hacl_Impl_SHA3_state_permute -#define Hacl_SHA3_sha3_224 _Py_LibHacl_Hacl_SHA3_sha3_224 -#define Hacl_SHA3_sha3_256 _Py_LibHacl_Hacl_SHA3_sha3_256 -#define Hacl_SHA3_sha3_384 _Py_LibHacl_Hacl_SHA3_sha3_384 -#define Hacl_SHA3_sha3_512 _Py_LibHacl_Hacl_SHA3_sha3_512 -#define Hacl_SHA3_shake128_hacl _Py_LibHacl_Hacl_SHA3_shake128_hacl -#define Hacl_SHA3_shake256_hacl _Py_LibHacl_Hacl_SHA3_shake256_hacl -#define Hacl_Hash_SHA3_block_len _Py_LibHacl_Hacl_Hash_SHA3_block_len -#define Hacl_Hash_SHA3_copy _Py_LibHacl_Hacl_Hash_SHA3_copy -#define Hacl_Hash_SHA3_digest _Py_LibHacl_Hacl_Hash_SHA3_digest -#define Hacl_Hash_SHA3_free _Py_LibHacl_Hacl_Hash_SHA3_free -#define Hacl_Hash_SHA3_get_alg _Py_LibHacl_Hacl_Hash_SHA3_get_alg -#define Hacl_Hash_SHA3_hash_len _Py_LibHacl_Hacl_Hash_SHA3_hash_len -#define Hacl_Hash_SHA3_is_shake _Py_LibHacl_Hacl_Hash_SHA3_is_shake -#define Hacl_Hash_SHA3_init_ _Py_LibHacl_Hacl_Hash_SHA3_init_ -#define Hacl_Hash_SHA3_malloc _Py_LibHacl_Hacl_Hash_SHA3_malloc -#define Hacl_Hash_SHA3_reset _Py_LibHacl_Hacl_Hash_SHA3_reset -#define Hacl_Hash_SHA3_update _Py_LibHacl_Hacl_Hash_SHA3_update -#define Hacl_Hash_SHA3_squeeze _Py_LibHacl_Hacl_Hash_SHA3_squeeze + diff -y --suppress-common-lines \ + <(grep -P '^#define (?!_PY.+_H)' python_hacl_namespaces.h | sort -u) \ + <(nm -j *.o | grep -i hacl | grep -P '^[a-zA-Z_][a-zA-Z0-9_]+' \ + | sed -e 's/^_Py_LibHacl_//g' \ + | sed 's/\(.*\)/#define \1 _Py_LibHacl_\1/g' | sort -u) + */ +// --- Utils ------------------------------------------------------------------ +#define Lib_Memzero0_memzero0 _Py_LibHacl_Lib_Memzero0_memzero0 +// --- HASH-BLAKE-2b ---------------------------------------------------------- +#define Hacl_Hash_Blake2b_copy _Py_LibHacl_Hacl_Hash_Blake2b_copy +#define Hacl_Hash_Blake2b_digest _Py_LibHacl_Hacl_Hash_Blake2b_digest +#define Hacl_Hash_Blake2b_finish _Py_LibHacl_Hacl_Hash_Blake2b_finish +#define Hacl_Hash_Blake2b_free _Py_LibHacl_Hacl_Hash_Blake2b_free +#define Hacl_Hash_Blake2b_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key +#define Hacl_Hash_Blake2b_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key_and_params +#define Hacl_Hash_Blake2b_info _Py_LibHacl_Hacl_Hash_Blake2b_info +#define Hacl_Hash_Blake2b_init _Py_LibHacl_Hacl_Hash_Blake2b_init +#define Hacl_Hash_Blake2b_malloc _Py_LibHacl_Hacl_Hash_Blake2b_malloc +#define Hacl_Hash_Blake2b_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_key +#define Hacl_Hash_Blake2b_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_params_and_key +#define Hacl_Hash_Blake2b_reset _Py_LibHacl_Hacl_Hash_Blake2b_reset +#define Hacl_Hash_Blake2b_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key +#define Hacl_Hash_Blake2b_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key_and_params #define Hacl_Hash_Blake2b_Simd256_copy _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_copy #define Hacl_Hash_Blake2b_Simd256_copy_internal_state _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_copy_internal_state #define Hacl_Hash_Blake2b_Simd256_digest _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_digest @@ -104,7 +52,6 @@ #define Hacl_Hash_Blake2b_Simd256_malloc _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc #define Hacl_Hash_Blake2b_Simd256_malloc_internal_state_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_internal_state_with_key #define Hacl_Hash_Blake2b_Simd256_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_key -#define Hacl_Hash_Blake2b_Simd256_malloc_with_key0 _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_key0 #define Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key #define Hacl_Hash_Blake2b_Simd256_reset _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_reset #define Hacl_Hash_Blake2b_Simd256_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_reset_with_key @@ -115,23 +62,24 @@ #define Hacl_Hash_Blake2b_Simd256_update_last_no_inline _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_last_no_inline #define Hacl_Hash_Blake2b_Simd256_update_multi _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_multi #define Hacl_Hash_Blake2b_Simd256_update_multi_no_inline _Py_LibHacl_Hacl_Hash_Blake2b_Simd256_update_multi_no_inline -#define Hacl_Hash_Blake2b_copy _Py_LibHacl_Hacl_Hash_Blake2b_copy -#define Hacl_Hash_Blake2b_digest _Py_LibHacl_Hacl_Hash_Blake2b_digest -#define Hacl_Hash_Blake2b_finish _Py_LibHacl_Hacl_Hash_Blake2b_finish -#define Hacl_Hash_Blake2b_free _Py_LibHacl_Hacl_Hash_Blake2b_free -#define Hacl_Hash_Blake2b_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key -#define Hacl_Hash_Blake2b_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_hash_with_key_and_params -#define Hacl_Hash_Blake2b_info _Py_LibHacl_Hacl_Hash_Blake2b_info -#define Hacl_Hash_Blake2b_init _Py_LibHacl_Hacl_Hash_Blake2b_init -#define Hacl_Hash_Blake2b_malloc _Py_LibHacl_Hacl_Hash_Blake2b_malloc -#define Hacl_Hash_Blake2b_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_key -#define Hacl_Hash_Blake2b_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2b_malloc_with_params_and_key -#define Hacl_Hash_Blake2b_reset _Py_LibHacl_Hacl_Hash_Blake2b_reset -#define Hacl_Hash_Blake2b_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key -#define Hacl_Hash_Blake2b_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2b_reset_with_key_and_params #define Hacl_Hash_Blake2b_update _Py_LibHacl_Hacl_Hash_Blake2b_update #define Hacl_Hash_Blake2b_update_last _Py_LibHacl_Hacl_Hash_Blake2b_update_last #define Hacl_Hash_Blake2b_update_multi _Py_LibHacl_Hacl_Hash_Blake2b_update_multi +// --- HASH-BLAKE-2s ---------------------------------------------------------- +#define Hacl_Hash_Blake2s_copy _Py_LibHacl_Hacl_Hash_Blake2s_copy +#define Hacl_Hash_Blake2s_digest _Py_LibHacl_Hacl_Hash_Blake2s_digest +#define Hacl_Hash_Blake2s_finish _Py_LibHacl_Hacl_Hash_Blake2s_finish +#define Hacl_Hash_Blake2s_free _Py_LibHacl_Hacl_Hash_Blake2s_free +#define Hacl_Hash_Blake2s_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key +#define Hacl_Hash_Blake2s_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key_and_params +#define Hacl_Hash_Blake2s_info _Py_LibHacl_Hacl_Hash_Blake2s_info +#define Hacl_Hash_Blake2s_init _Py_LibHacl_Hacl_Hash_Blake2s_init +#define Hacl_Hash_Blake2s_malloc _Py_LibHacl_Hacl_Hash_Blake2s_malloc +#define Hacl_Hash_Blake2s_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_key +#define Hacl_Hash_Blake2s_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_params_and_key +#define Hacl_Hash_Blake2s_reset _Py_LibHacl_Hacl_Hash_Blake2s_reset +#define Hacl_Hash_Blake2s_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key +#define Hacl_Hash_Blake2s_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key_and_params #define Hacl_Hash_Blake2s_Simd128_copy _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_copy #define Hacl_Hash_Blake2s_Simd128_copy_internal_state _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_copy_internal_state #define Hacl_Hash_Blake2s_Simd128_digest _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_digest @@ -145,7 +93,6 @@ #define Hacl_Hash_Blake2s_Simd128_malloc _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc #define Hacl_Hash_Blake2s_Simd128_malloc_internal_state_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_internal_state_with_key #define Hacl_Hash_Blake2s_Simd128_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_key -#define Hacl_Hash_Blake2s_Simd128_malloc_with_key0 _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_key0 #define Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key #define Hacl_Hash_Blake2s_Simd128_reset _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_reset #define Hacl_Hash_Blake2s_Simd128_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_reset_with_key @@ -156,37 +103,54 @@ #define Hacl_Hash_Blake2s_Simd128_update_last_no_inline _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_last_no_inline #define Hacl_Hash_Blake2s_Simd128_update_multi _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_multi #define Hacl_Hash_Blake2s_Simd128_update_multi_no_inline _Py_LibHacl_Hacl_Hash_Blake2s_Simd128_update_multi_no_inline -#define Hacl_Hash_Blake2s_copy _Py_LibHacl_Hacl_Hash_Blake2s_copy -#define Hacl_Hash_Blake2s_digest _Py_LibHacl_Hacl_Hash_Blake2s_digest -#define Hacl_Hash_Blake2s_finish _Py_LibHacl_Hacl_Hash_Blake2s_finish -#define Hacl_Hash_Blake2s_free _Py_LibHacl_Hacl_Hash_Blake2s_free -#define Hacl_Hash_Blake2s_hash_with_key _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key -#define Hacl_Hash_Blake2s_hash_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_hash_with_key_and_params -#define Hacl_Hash_Blake2s_info _Py_LibHacl_Hacl_Hash_Blake2s_info -#define Hacl_Hash_Blake2s_init _Py_LibHacl_Hacl_Hash_Blake2s_init -#define Hacl_Hash_Blake2s_malloc _Py_LibHacl_Hacl_Hash_Blake2s_malloc -#define Hacl_Hash_Blake2s_malloc_with_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_key -#define Hacl_Hash_Blake2s_malloc_with_params_and_key _Py_LibHacl_Hacl_Hash_Blake2s_malloc_with_params_and_key -#define Hacl_Hash_Blake2s_reset _Py_LibHacl_Hacl_Hash_Blake2s_reset -#define Hacl_Hash_Blake2s_reset_with_key _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key -#define Hacl_Hash_Blake2s_reset_with_key_and_params _Py_LibHacl_Hacl_Hash_Blake2s_reset_with_key_and_params #define Hacl_Hash_Blake2s_update _Py_LibHacl_Hacl_Hash_Blake2s_update #define Hacl_Hash_Blake2s_update_last _Py_LibHacl_Hacl_Hash_Blake2s_update_last #define Hacl_Hash_Blake2s_update_multi _Py_LibHacl_Hacl_Hash_Blake2s_update_multi +// --- HASH-MD5 --------------------------------------------------------------- +#define Hacl_Hash_MD5_copy _Py_LibHacl_Hacl_Hash_MD5_copy +#define Hacl_Hash_MD5_digest _Py_LibHacl_Hacl_Hash_MD5_digest #define Hacl_Hash_MD5_finish _Py_LibHacl_Hacl_Hash_MD5_finish +#define Hacl_Hash_MD5_free _Py_LibHacl_Hacl_Hash_MD5_free +#define Hacl_Hash_MD5_hash _Py_LibHacl_Hacl_Hash_MD5_hash #define Hacl_Hash_MD5_hash_oneshot _Py_LibHacl_Hacl_Hash_MD5_hash_oneshot +#define Hacl_Hash_MD5_init _Py_LibHacl_Hacl_Hash_MD5_init +#define Hacl_Hash_MD5_malloc _Py_LibHacl_Hacl_Hash_MD5_malloc #define Hacl_Hash_MD5_reset _Py_LibHacl_Hacl_Hash_MD5_reset +#define Hacl_Hash_MD5_update _Py_LibHacl_Hacl_Hash_MD5_update #define Hacl_Hash_MD5_update_last _Py_LibHacl_Hacl_Hash_MD5_update_last #define Hacl_Hash_MD5_update_multi _Py_LibHacl_Hacl_Hash_MD5_update_multi +// --- HASH-SHA-1 ------------------------------------------------------------- +#define Hacl_Hash_SHA1_copy _Py_LibHacl_Hacl_Hash_SHA1_copy +#define Hacl_Hash_SHA1_digest _Py_LibHacl_Hacl_Hash_SHA1_digest #define Hacl_Hash_SHA1_finish _Py_LibHacl_Hacl_Hash_SHA1_finish +#define Hacl_Hash_SHA1_free _Py_LibHacl_Hacl_Hash_SHA1_free +#define Hacl_Hash_SHA1_hash _Py_LibHacl_Hacl_Hash_SHA1_hash #define Hacl_Hash_SHA1_hash_oneshot _Py_LibHacl_Hacl_Hash_SHA1_hash_oneshot +#define Hacl_Hash_SHA1_init _Py_LibHacl_Hacl_Hash_SHA1_init +#define Hacl_Hash_SHA1_malloc _Py_LibHacl_Hacl_Hash_SHA1_malloc #define Hacl_Hash_SHA1_reset _Py_LibHacl_Hacl_Hash_SHA1_reset +#define Hacl_Hash_SHA1_update _Py_LibHacl_Hacl_Hash_SHA1_update #define Hacl_Hash_SHA1_update_last _Py_LibHacl_Hacl_Hash_SHA1_update_last #define Hacl_Hash_SHA1_update_multi _Py_LibHacl_Hacl_Hash_SHA1_update_multi +// --- HASH-SHA-2 ------------------------------------------------------------- +#define Hacl_Hash_SHA2_copy_256 _Py_LibHacl_Hacl_Hash_SHA2_copy_256 +#define Hacl_Hash_SHA2_copy_512 _Py_LibHacl_Hacl_Hash_SHA2_copy_512 +#define Hacl_Hash_SHA2_digest_224 _Py_LibHacl_Hacl_Hash_SHA2_digest_224 +#define Hacl_Hash_SHA2_digest_256 _Py_LibHacl_Hacl_Hash_SHA2_digest_256 +#define Hacl_Hash_SHA2_digest_384 _Py_LibHacl_Hacl_Hash_SHA2_digest_384 +#define Hacl_Hash_SHA2_digest_512 _Py_LibHacl_Hacl_Hash_SHA2_digest_512 +#define Hacl_Hash_SHA2_free_224 _Py_LibHacl_Hacl_Hash_SHA2_free_224 +#define Hacl_Hash_SHA2_free_256 _Py_LibHacl_Hacl_Hash_SHA2_free_256 +#define Hacl_Hash_SHA2_free_384 _Py_LibHacl_Hacl_Hash_SHA2_free_384 +#define Hacl_Hash_SHA2_free_512 _Py_LibHacl_Hacl_Hash_SHA2_free_512 #define Hacl_Hash_SHA2_hash_224 _Py_LibHacl_Hacl_Hash_SHA2_hash_224 #define Hacl_Hash_SHA2_hash_256 _Py_LibHacl_Hacl_Hash_SHA2_hash_256 #define Hacl_Hash_SHA2_hash_384 _Py_LibHacl_Hacl_Hash_SHA2_hash_384 #define Hacl_Hash_SHA2_hash_512 _Py_LibHacl_Hacl_Hash_SHA2_hash_512 +#define Hacl_Hash_SHA2_malloc_224 _Py_LibHacl_Hacl_Hash_SHA2_malloc_224 +#define Hacl_Hash_SHA2_malloc_256 _Py_LibHacl_Hacl_Hash_SHA2_malloc_256 +#define Hacl_Hash_SHA2_malloc_384 _Py_LibHacl_Hacl_Hash_SHA2_malloc_384 +#define Hacl_Hash_SHA2_malloc_512 _Py_LibHacl_Hacl_Hash_SHA2_malloc_512 #define Hacl_Hash_SHA2_reset_224 _Py_LibHacl_Hacl_Hash_SHA2_reset_224 #define Hacl_Hash_SHA2_reset_256 _Py_LibHacl_Hacl_Hash_SHA2_reset_256 #define Hacl_Hash_SHA2_reset_384 _Py_LibHacl_Hacl_Hash_SHA2_reset_384 @@ -207,10 +171,25 @@ #define Hacl_Hash_SHA2_sha512_init _Py_LibHacl_Hacl_Hash_SHA2_sha512_init #define Hacl_Hash_SHA2_sha512_update_last _Py_LibHacl_Hacl_Hash_SHA2_sha512_update_last #define Hacl_Hash_SHA2_sha512_update_nblocks _Py_LibHacl_Hacl_Hash_SHA2_sha512_update_nblocks +#define Hacl_Hash_SHA2_update_224 _Py_LibHacl_Hacl_Hash_SHA2_update_224 +#define Hacl_Hash_SHA2_update_256 _Py_LibHacl_Hacl_Hash_SHA2_update_256 +#define Hacl_Hash_SHA2_update_384 _Py_LibHacl_Hacl_Hash_SHA2_update_384 +#define Hacl_Hash_SHA2_update_512 _Py_LibHacl_Hacl_Hash_SHA2_update_512 +// --- HASH-SHA-3 ------------------------------------------------------------- #define Hacl_Hash_SHA3_absorb_inner_32 _Py_LibHacl_Hacl_Hash_SHA3_absorb_inner_32 +#define Hacl_Hash_SHA3_block_len _Py_LibHacl_Hacl_Hash_SHA3_block_len +#define Hacl_Hash_SHA3_copy _Py_LibHacl_Hacl_Hash_SHA3_copy +#define Hacl_Hash_SHA3_digest _Py_LibHacl_Hacl_Hash_SHA3_digest +#define Hacl_Hash_SHA3_free _Py_LibHacl_Hacl_Hash_SHA3_free +#define Hacl_Hash_SHA3_get_alg _Py_LibHacl_Hacl_Hash_SHA3_get_alg +#define Hacl_Hash_SHA3_hash_len _Py_LibHacl_Hacl_Hash_SHA3_hash_len +#define Hacl_Hash_SHA3_init_ _Py_LibHacl_Hacl_Hash_SHA3_init_ +#define Hacl_Hash_SHA3_is_shake _Py_LibHacl_Hacl_Hash_SHA3_is_shake #define Hacl_Hash_SHA3_keccak_piln _Py_LibHacl_Hacl_Hash_SHA3_keccak_piln #define Hacl_Hash_SHA3_keccak_rndc _Py_LibHacl_Hacl_Hash_SHA3_keccak_rndc #define Hacl_Hash_SHA3_keccak_rotc _Py_LibHacl_Hacl_Hash_SHA3_keccak_rotc +#define Hacl_Hash_SHA3_malloc _Py_LibHacl_Hacl_Hash_SHA3_malloc +#define Hacl_Hash_SHA3_reset _Py_LibHacl_Hacl_Hash_SHA3_reset #define Hacl_Hash_SHA3_sha3_224 _Py_LibHacl_Hacl_Hash_SHA3_sha3_224 #define Hacl_Hash_SHA3_sha3_256 _Py_LibHacl_Hacl_Hash_SHA3_sha3_256 #define Hacl_Hash_SHA3_sha3_384 _Py_LibHacl_Hacl_Hash_SHA3_sha3_384 @@ -220,37 +199,39 @@ #define Hacl_Hash_SHA3_shake128_absorb_nblocks _Py_LibHacl_Hacl_Hash_SHA3_shake128_absorb_nblocks #define Hacl_Hash_SHA3_shake128_squeeze_nblocks _Py_LibHacl_Hacl_Hash_SHA3_shake128_squeeze_nblocks #define Hacl_Hash_SHA3_shake256 _Py_LibHacl_Hacl_Hash_SHA3_shake256 +#define Hacl_Hash_SHA3_squeeze _Py_LibHacl_Hacl_Hash_SHA3_squeeze #define Hacl_Hash_SHA3_state_free _Py_LibHacl_Hacl_Hash_SHA3_state_free #define Hacl_Hash_SHA3_state_malloc _Py_LibHacl_Hacl_Hash_SHA3_state_malloc - -// Streaming HMAC +#define Hacl_Hash_SHA3_update _Py_LibHacl_Hacl_Hash_SHA3_update +#define Hacl_Hash_SHA3_update_last_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_last_sha3 +#define Hacl_Hash_SHA3_update_multi_sha3 _Py_LibHacl_Hacl_Hash_SHA3_update_multi_sha3 +// --- STREAMING-MAC ---------------------------------------------------------- +#define Hacl_Streaming_HMAC_copy _Py_LibHacl_Hacl_Streaming_HMAC_copy +#define Hacl_Streaming_HMAC_digest _Py_LibHacl_Hacl_Streaming_HMAC_digest +#define Hacl_Streaming_HMAC_free _Py_LibHacl_Hacl_Streaming_HMAC_free +#define Hacl_Streaming_HMAC_get_impl _Py_LibHacl_Hacl_Streaming_HMAC_get_impl #define Hacl_Streaming_HMAC_index_of_state _Py_LibHacl_Hacl_Streaming_HMAC_index_of_state #define Hacl_Streaming_HMAC_malloc_ _Py_LibHacl_Hacl_Streaming_HMAC_malloc_ -#define Hacl_Streaming_HMAC_get_impl _Py_LibHacl_Hacl_Streaming_HMAC_get_impl #define Hacl_Streaming_HMAC_reset _Py_LibHacl_Hacl_Streaming_HMAC_reset -#define Hacl_Streaming_HMAC_update _Py_LibHacl_Hacl_Streaming_HMAC_update -#define Hacl_Streaming_HMAC_digest _Py_LibHacl_Hacl_Streaming_HMAC_digest -#define Hacl_Streaming_HMAC_copy _Py_LibHacl_Hacl_Streaming_HMAC_copy -#define Hacl_Streaming_HMAC_free _Py_LibHacl_Hacl_Streaming_HMAC_free #define Hacl_Streaming_HMAC_s1 _Py_LibHacl_Hacl_Streaming_HMAC_s1 #define Hacl_Streaming_HMAC_s2 _Py_LibHacl_Hacl_Streaming_HMAC_s2 - -// HMAC-MD5 +#define Hacl_Streaming_HMAC_update _Py_LibHacl_Hacl_Streaming_HMAC_update +// --- HMAC-MD5 --------------------------------------------------------------- #define Hacl_HMAC_compute_md5 _Py_LibHacl_Hacl_HMAC_compute_md5 -// HMAC-SHA-1 +// --- HMAC-SHA-1 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha1 _Py_LibHacl_Hacl_HMAC_compute_sha1 -// HMAC-SHA-2 +// --- HMAC-SHA-2 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha2_224 _Py_LibHacl_Hacl_HMAC_compute_sha2_224 #define Hacl_HMAC_compute_sha2_256 _Py_LibHacl_Hacl_HMAC_compute_sha2_256 #define Hacl_HMAC_compute_sha2_384 _Py_LibHacl_Hacl_HMAC_compute_sha2_384 #define Hacl_HMAC_compute_sha2_512 _Py_LibHacl_Hacl_HMAC_compute_sha2_512 -// HMAC-SHA-3 +// --- HMAC-SHA-3 ------------------------------------------------------------- #define Hacl_HMAC_compute_sha3_224 _Py_LibHacl_Hacl_HMAC_compute_sha3_224 #define Hacl_HMAC_compute_sha3_256 _Py_LibHacl_Hacl_HMAC_compute_sha3_256 #define Hacl_HMAC_compute_sha3_384 _Py_LibHacl_Hacl_HMAC_compute_sha3_384 #define Hacl_HMAC_compute_sha3_512 _Py_LibHacl_Hacl_HMAC_compute_sha3_512 -// HMAC-BLAKE -#define Hacl_HMAC_compute_blake2s_32 _Py_LibHacl_Hacl_HMAC_compute_blake2s_32 +// --- HMAC-BLAKE-2 ----------------------------------------------------------- #define Hacl_HMAC_compute_blake2b_32 _Py_LibHacl_Hacl_HMAC_compute_blake2b_32 +#define Hacl_HMAC_compute_blake2s_32 _Py_LibHacl_Hacl_HMAC_compute_blake2s_32 #endif // _PYTHON_HACL_NAMESPACES_H diff --git a/Modules/_hacl/refresh.sh b/Modules/_hacl/refresh.sh index d91650b44bb4e7..a6776282423b95 100755 --- a/Modules/_hacl/refresh.sh +++ b/Modules/_hacl/refresh.sh @@ -22,7 +22,7 @@ fi # Update this when updating to a new version after verifying that the changes # the update brings in are good. -expected_hacl_star_rev=7720f6d4fc0468a99d5ea6120976bcc271e42727 +expected_hacl_star_rev=4ef25b547b377dcef855db4289c6a00580e7221c hacl_dir="$(realpath "$1")" cd "$(dirname "$0")" diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 48eed5eac975ed..c8a76e14990751 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -943,9 +943,9 @@ static PyType_Spec EVPXOFtype_spec = { #endif -static PyObject* -py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, - int usedforsecurity) +static PyObject * +_hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, + int usedforsecurity) { Py_buffer view = { 0 }; PY_EVP_MD *digest = NULL; @@ -1017,16 +1017,25 @@ py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj, return (PyObject *)self; } +#define CALL_HASHLIB_NEW(MODULE, NAME, DATA, STRING, USEDFORSECURITY) \ + do { \ + PyObject *data_obj; \ + if (_Py_hashlib_data_argument(&data_obj, DATA, STRING) < 0) { \ + return NULL; \ + } \ + return _hashlib_HASH(MODULE, NAME, data_obj, USEDFORSECURITY); \ + } while (0) /* The module-level function: new() */ /*[clinic input] -_hashlib.new as EVP_new +_hashlib.new - name as name_obj: object - string as data_obj: object(c_default="NULL") = b'' + name: str + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new hash object using the named algorithm. @@ -1037,136 +1046,137 @@ The MD5 and SHA1 algorithms are always supported. [clinic start generated code]*/ static PyObject * -EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=ddd5053f92dffe90 input=c24554d0337be1b0]*/ +_hashlib_new_impl(PyObject *module, const char *name, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=c01feb4ad6a6303d input=f5ec9bf1fa749d07]*/ { - char *name; - if (!PyArg_Parse(name_obj, "s", &name)) { - PyErr_SetString(PyExc_TypeError, "name must be a string"); - return NULL; - } - return py_evp_fromname(module, name, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, name, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_md5 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a md5 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_md5_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=87b0186440a44f8c input=990e36d5e689b16e]*/ +_hashlib_openssl_md5_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=ca8cf184d90f7432 input=e7c0adbd6a867db1]*/ { - return py_evp_fromname(module, Py_hash_md5, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_md5, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha1 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha1 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=6813024cf690670d input=948f2f4b6deabc10]*/ +_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=1736fb7b310d64be input=f7e5bb1711e952d8]*/ { - return py_evp_fromname(module, Py_hash_sha1, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha1, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha224 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha224 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=a2dfe7cc4eb14ebb input=f9272821fadca505]*/ +_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=0d6ff57be5e5c140 input=3820fff7ed3a53b8]*/ { - return py_evp_fromname(module, Py_hash_sha224, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha224, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha256 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=1f874a34870f0a68 input=549fad9d2930d4c5]*/ +_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=412ea7111555b6e7 input=9a2f115cf1f7e0eb]*/ { - return py_evp_fromname(module, Py_hash_sha256, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha256, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha384 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha384 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=58529eff9ca457b2 input=48601a6e3bf14ad7]*/ +_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=2e0dc395b59ed726 input=1ea48f6f01e77cfb]*/ { - return py_evp_fromname(module, Py_hash_sha384, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha384, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha512 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha512 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=2c744c9e4a40d5f6 input=c5c46a2a817aa98f]*/ +_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=4bdd760388dbfc0f input=3cf56903e07d1f5c]*/ { - return py_evp_fromname(module, Py_hash_sha512, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha512, data, string, usedforsecurity); } @@ -1175,77 +1185,81 @@ _hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, /*[clinic input] _hashlib.openssl_sha3_224 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-224 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=144641c1d144b974 input=e3a01b2888916157]*/ +_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=6d8dc2a924f3ba35 input=7f14f16a9f6a3158]*/ { - return py_evp_fromname(module, Py_hash_sha3_224, data_obj, usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_224, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-256 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=c61f1ab772d06668 input=e2908126c1b6deed]*/ +_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=9e520f537b3a4622 input=7987150939d5e352]*/ { - return py_evp_fromname(module, Py_hash_sha3_256, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_256, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_384 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-384 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=f68e4846858cf0ee input=ec0edf5c792f8252]*/ +_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=d239ba0463fd6138 input=fc943401f67e3b81]*/ { - return py_evp_fromname(module, Py_hash_sha3_384, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_384, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_sha3_512 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a sha3-512 hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=2eede478c159354a input=64e2cc0c094d56f4]*/ +_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=17662f21038c2278 input=6601ddd2c6c1516d]*/ { - return py_evp_fromname(module, Py_hash_sha3_512, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_sha3_512, data, string, usedforsecurity); } #endif /* PY_OPENSSL_HAS_SHA3 */ @@ -1253,42 +1267,46 @@ _hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, /*[clinic input] _hashlib.openssl_shake_128 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a shake-128 variable hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=bc49cdd8ada1fa97 input=6c9d67440eb33ec8]*/ +_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=4e6afed8d18980ad input=373c3f1c93d87b37]*/ { - return py_evp_fromname(module, Py_hash_shake_128, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_shake_128, data, string, usedforsecurity); } /*[clinic input] _hashlib.openssl_shake_256 - string as data_obj: object(py_default="b''") = NULL + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Returns a shake-256 variable hash object; optionally initialized with a string [clinic start generated code]*/ static PyObject * -_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity) -/*[clinic end generated code: output=358d213be8852df7 input=479cbe9fefd4a9f8]*/ +_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string) +/*[clinic end generated code: output=62481bce4a77d16c input=101c139ea2ddfcbf]*/ { - return py_evp_fromname(module, Py_hash_shake_256, data_obj , usedforsecurity); + CALL_HASHLIB_NEW(module, Py_hash_shake_256, data, string, usedforsecurity); } #endif /* PY_OPENSSL_HAS_SHAKE */ +#undef CALL_HASHLIB_NEW + /*[clinic input] _hashlib.pbkdf2_hmac as pbkdf2_hmac @@ -2134,7 +2152,7 @@ _hashlib_compare_digest_impl(PyObject *module, PyObject *a, PyObject *b) /* List of functions exported by this module */ static struct PyMethodDef EVP_functions[] = { - EVP_NEW_METHODDEF + _HASHLIB_NEW_METHODDEF PBKDF2_HMAC_METHODDEF _HASHLIB_SCRYPT_METHODDEF _HASHLIB_GET_FIPS_MODE_METHODDEF diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 095866eec7d75a..05d01acd77109b 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -11,7 +11,8 @@ annotated by François Pinard, and converted to C by Raymond Hettinger. #endif #include "Python.h" -#include "pycore_list.h" // _PyList_ITEMS() +#include "pycore_list.h" // _PyList_ITEMS(), _PyList_AppendTakeRef() +#include "pycore_pyatomic_ft_wrappers.h" #include "clinic/_heapqmodule.c.h" @@ -59,8 +60,8 @@ siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) arr = _PyList_ITEMS(heap); parent = arr[parentpos]; newitem = arr[pos]; - arr[parentpos] = newitem; - arr[pos] = parent; + FT_ATOMIC_STORE_PTR_RELAXED(arr[parentpos], newitem); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], parent); pos = parentpos; } return 0; @@ -108,8 +109,8 @@ siftup(PyListObject *heap, Py_ssize_t pos) /* Move the smaller child up. */ tmp1 = arr[childpos]; tmp2 = arr[pos]; - arr[childpos] = tmp2; - arr[pos] = tmp1; + FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], tmp1); pos = childpos; } /* Bubble it up to its final resting place (by sifting its parents down). */ @@ -117,6 +118,7 @@ siftup(PyListObject *heap, Py_ssize_t pos) } /*[clinic input] +@critical_section heap _heapq.heappush heap: object(subclass_of='&PyList_Type') @@ -128,13 +130,22 @@ Push item onto heap, maintaining the heap invariant. static PyObject * _heapq_heappush_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=912c094f47663935 input=7c69611f3698aceb]*/ +/*[clinic end generated code: output=912c094f47663935 input=f7a4f03ef8d52e67]*/ { - if (PyList_Append(heap, item)) + if (item == NULL) { + PyErr_BadInternalCall(); return NULL; + } + + // In a free-threaded build, the heap is locked at this point. + // Therefore, calling _PyList_AppendTakeRef() is safe and no overhead. + if (_PyList_AppendTakeRef((PyListObject *)heap, Py_NewRef(item))) { + return NULL; + } - if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) + if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1)) { return NULL; + } Py_RETURN_NONE; } @@ -162,8 +173,9 @@ heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) if (!n) return lastelt; returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, lastelt); - if (siftup_func((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], lastelt); + if (siftup_func(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -171,6 +183,7 @@ heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) } /*[clinic input] +@critical_section heap _heapq.heappop heap: object(subclass_of='&PyList_Type') @@ -181,7 +194,7 @@ Pop the smallest item off the heap, maintaining the heap invariant. static PyObject * _heapq_heappop_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=96dfe82d37d9af76 input=91487987a583c856]*/ +/*[clinic end generated code: output=96dfe82d37d9af76 input=ed396461b153dd51]*/ { return heappop_internal(heap, siftup); } @@ -197,8 +210,9 @@ heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObjec } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup_func((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup_func(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -207,6 +221,7 @@ heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObjec /*[clinic input] +@critical_section heap _heapq.heapreplace heap: object(subclass_of='&PyList_Type') @@ -226,12 +241,13 @@ this routine unless written as part of a conditional replacement: static PyObject * _heapq_heapreplace_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=82ea55be8fbe24b4 input=719202ac02ba10c8]*/ +/*[clinic end generated code: output=82ea55be8fbe24b4 input=9be1678b817ef1a9]*/ { return heapreplace_internal(heap, item, siftup); } /*[clinic input] +@critical_section heap _heapq.heappushpop heap: object(subclass_of='&PyList_Type') @@ -246,7 +262,7 @@ a separate call to heappop(). static PyObject * _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=67231dc98ed5774f input=5dc701f1eb4a4aa7]*/ +/*[clinic end generated code: output=67231dc98ed5774f input=db05c81b1dd92c44]*/ { PyObject *returnitem; int cmp; @@ -271,8 +287,9 @@ _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup((PyListObject *)heap, 0)) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup(list, 0)) { Py_DECREF(returnitem); return NULL; } @@ -371,6 +388,7 @@ heapify_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t)) } /*[clinic input] +@critical_section heap _heapq.heapify heap: object(subclass_of='&PyList_Type') @@ -381,7 +399,7 @@ Transform list into a heap, in-place, in O(len(heap)) time. static PyObject * _heapq_heapify_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=e63a636fcf83d6d0 input=53bb7a2166febb73]*/ +/*[clinic end generated code: output=e63a636fcf83d6d0 input=aaaaa028b9b6af08]*/ { return heapify_internal(heap, siftup); } @@ -423,8 +441,8 @@ siftdown_max(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos) arr = _PyList_ITEMS(heap); parent = arr[parentpos]; newitem = arr[pos]; - arr[parentpos] = newitem; - arr[pos] = parent; + FT_ATOMIC_STORE_PTR_RELAXED(arr[parentpos], newitem); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], parent); pos = parentpos; } return 0; @@ -445,11 +463,11 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) return -1; } - /* Bubble up the smaller child until hitting a leaf. */ + /* Bubble up the larger child until hitting a leaf. */ arr = _PyList_ITEMS(heap); limit = endpos >> 1; /* smallest pos that has no child */ while (pos < limit) { - /* Set childpos to index of smaller child. */ + /* Set childpos to index of larger child. */ childpos = 2*pos + 1; /* leftmost child position */ if (childpos + 1 < endpos) { PyObject* a = arr[childpos + 1]; @@ -469,11 +487,11 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) return -1; } } - /* Move the smaller child up. */ + /* Move the larger child up. */ tmp1 = arr[childpos]; tmp2 = arr[pos]; - arr[childpos] = tmp2; - arr[pos] = tmp1; + FT_ATOMIC_STORE_PTR_RELAXED(arr[childpos], tmp2); + FT_ATOMIC_STORE_PTR_RELAXED(arr[pos], tmp1); pos = childpos; } /* Bubble it up to its final resting place (by sifting its parents down). */ @@ -481,6 +499,7 @@ siftup_max(PyListObject *heap, Py_ssize_t pos) } /*[clinic input] +@critical_section heap _heapq.heappush_max heap: object(subclass_of='&PyList_Type') @@ -492,9 +511,16 @@ Push item onto max heap, maintaining the heap invariant. static PyObject * _heapq_heappush_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=c869d5f9deb08277 input=4743d7db137b6e2b]*/ +/*[clinic end generated code: output=c869d5f9deb08277 input=c437e3d1ff8dcb70]*/ { - if (PyList_Append(heap, item)) { + if (item == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + // In a free-threaded build, the heap is locked at this point. + // Therefore, calling _PyList_AppendTakeRef() is safe and no overhead. + if (_PyList_AppendTakeRef((PyListObject *)heap, Py_NewRef(item))) { return NULL; } @@ -506,6 +532,7 @@ _heapq_heappush_max_impl(PyObject *module, PyObject *heap, PyObject *item) } /*[clinic input] +@critical_section heap _heapq.heappop_max heap: object(subclass_of='&PyList_Type') @@ -516,12 +543,13 @@ Maxheap variant of heappop. static PyObject * _heapq_heappop_max_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=2f051195ab404b77 input=e62b14016a5a26de]*/ +/*[clinic end generated code: output=2f051195ab404b77 input=5d70c997798aec64]*/ { return heappop_internal(heap, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heapreplace_max heap: object(subclass_of='&PyList_Type') @@ -533,12 +561,13 @@ Maxheap variant of heapreplace. static PyObject * _heapq_heapreplace_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=8770778b5a9cbe9b input=21a3d28d757c881c]*/ +/*[clinic end generated code: output=8770778b5a9cbe9b input=fe70175356e4a649]*/ { return heapreplace_internal(heap, item, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heapify_max heap: object(subclass_of='&PyList_Type') @@ -549,12 +578,13 @@ Maxheap variant of heapify. static PyObject * _heapq_heapify_max_impl(PyObject *module, PyObject *heap) -/*[clinic end generated code: output=8401af3856529807 input=edda4255728c431e]*/ +/*[clinic end generated code: output=8401af3856529807 input=4eee63231e7d1573]*/ { return heapify_internal(heap, siftup_max); } /*[clinic input] +@critical_section heap _heapq.heappushpop_max heap: object(subclass_of='&PyList_Type') @@ -569,7 +599,7 @@ a separate call to heappop_max(). static PyObject * _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) -/*[clinic end generated code: output=ff0019f0941aca0d input=525a843013cbd6c0]*/ +/*[clinic end generated code: output=ff0019f0941aca0d input=24d0defa6fd6df4a]*/ { PyObject *returnitem; int cmp; @@ -595,8 +625,9 @@ _heapq_heappushpop_max_impl(PyObject *module, PyObject *heap, PyObject *item) } returnitem = PyList_GET_ITEM(heap, 0); - PyList_SET_ITEM(heap, 0, Py_NewRef(item)); - if (siftup_max((PyListObject *)heap, 0) < 0) { + PyListObject *list = _PyList_CAST(heap); + FT_ATOMIC_STORE_PTR_RELAXED(list->ob_item[0], Py_NewRef(item)); + if (siftup_max(list, 0) < 0) { Py_DECREF(returnitem); return NULL; } diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index 172cebcaa4884f..9c1f8615161275 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -20,9 +20,11 @@ #endif #define REGISTERS_HEAP_TYPES +#define HAS_FALLBACK #define HAS_UNBOUND_ITEMS #include "_interpreters_common.h" #undef HAS_UNBOUND_ITEMS +#undef HAS_FALLBACK #undef REGISTERS_HEAP_TYPES @@ -218,6 +220,22 @@ wait_for_lock(PyThread_type_lock mutex, PY_TIMEOUT_T timeout) return 0; } +static int +ensure_highlevel_module_loaded(void) +{ + PyObject *highlevel = + PyImport_ImportModule("concurrent.interpreters._channels"); + if (highlevel == NULL) { + PyErr_Clear(); + highlevel = PyImport_ImportModule("test.support.channels"); + if (highlevel == NULL) { + return -1; + } + } + Py_DECREF(highlevel); + return 0; +} + /* module state *************************************************************/ @@ -252,10 +270,10 @@ _get_current_module_state(void) { PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } module_state *state = get_module_state(mod); Py_DECREF(mod); @@ -523,7 +541,7 @@ typedef struct _channelitem { int64_t interpid; _PyXIData_t *data; _waiting_t *waiting; - int unboundop; + unboundop_t unboundop; struct _channelitem *next; } _channelitem; @@ -536,7 +554,7 @@ _channelitem_ID(_channelitem *item) static void _channelitem_init(_channelitem *item, int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { if (interpid < 0) { interpid = _get_interpid(data); @@ -583,7 +601,7 @@ _channelitem_clear(_channelitem *item) static _channelitem * _channelitem_new(int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { _channelitem *item = GLOBAL_MALLOC(_channelitem); if (item == NULL) { @@ -694,7 +712,7 @@ _channelqueue_free(_channelqueue *queue) static int _channelqueue_put(_channelqueue *queue, int64_t interpid, _PyXIData_t *data, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop) { _channelitem *item = _channelitem_new(interpid, data, waiting, unboundop); if (item == NULL) { @@ -798,7 +816,7 @@ _channelqueue_remove(_channelqueue *queue, _channelitem_id_t itemid, } queue->count -= 1; - int unboundop; + unboundop_t unboundop; _channelitem_popped(item, p_data, p_waiting, &unboundop); } @@ -1083,16 +1101,18 @@ typedef struct _channel { PyThread_type_lock mutex; _channelqueue *queue; _channelends *ends; - struct { - int unboundop; + struct _channeldefaults { + unboundop_t unboundop; + xidata_fallback_t fallback; } defaults; int open; struct _channel_closing *closing; } _channel_state; static _channel_state * -_channel_new(PyThread_type_lock mutex, int unboundop) +_channel_new(PyThread_type_lock mutex, struct _channeldefaults defaults) { + assert(check_unbound(defaults.unboundop)); _channel_state *chan = GLOBAL_MALLOC(_channel_state); if (chan == NULL) { return NULL; @@ -1109,7 +1129,7 @@ _channel_new(PyThread_type_lock mutex, int unboundop) GLOBAL_FREE(chan); return NULL; } - chan->defaults.unboundop = unboundop; + chan->defaults = defaults; chan->open = 1; chan->closing = NULL; return chan; @@ -1130,7 +1150,7 @@ _channel_free(_channel_state *chan) static int _channel_add(_channel_state *chan, int64_t interpid, - _PyXIData_t *data, _waiting_t *waiting, int unboundop) + _PyXIData_t *data, _waiting_t *waiting, unboundop_t unboundop) { int res = -1; PyThread_acquire_lock(chan->mutex, WAIT_LOCK); @@ -1611,7 +1631,7 @@ _channels_release_cid_object(_channels *channels, int64_t cid) struct channel_id_and_info { int64_t id; - int unboundop; + struct _channeldefaults defaults; }; static struct channel_id_and_info * @@ -1628,7 +1648,7 @@ _channels_list_all(_channels *channels, int64_t *count) for (int64_t i=0; ref != NULL; ref = ref->next, i++) { ids[i] = (struct channel_id_and_info){ .id = ref->cid, - .unboundop = ref->chan->defaults.unboundop, + .defaults = ref->chan->defaults, }; } *count = channels->numopen; @@ -1714,13 +1734,13 @@ _channel_finish_closing(_channel_state *chan) { // Create a new channel. static int64_t -channel_create(_channels *channels, int unboundop) +channel_create(_channels *channels, struct _channeldefaults defaults) { PyThread_type_lock mutex = PyThread_allocate_lock(); if (mutex == NULL) { return ERR_CHANNEL_MUTEX_INIT; } - _channel_state *chan = _channel_new(mutex, unboundop); + _channel_state *chan = _channel_new(mutex, defaults); if (chan == NULL) { PyThread_free_lock(mutex); return -1; @@ -1752,7 +1772,7 @@ channel_destroy(_channels *channels, int64_t cid) // Optionally request to be notified when it is received. static int channel_send(_channels *channels, int64_t cid, PyObject *obj, - _waiting_t *waiting, int unboundop) + _waiting_t *waiting, unboundop_t unboundop, xidata_fallback_t fallback) { PyThreadState *tstate = _PyThreadState_GET(); PyInterpreterState *interp = tstate->interp; @@ -1779,7 +1799,7 @@ channel_send(_channels *channels, int64_t cid, PyObject *obj, PyThread_release_lock(mutex); return -1; } - if (_PyObject_GetXIData(tstate, obj, data) != 0) { + if (_PyObject_GetXIData(tstate, obj, fallback, data) != 0) { PyThread_release_lock(mutex); GLOBAL_FREE(data); return -1; @@ -1823,7 +1843,8 @@ channel_clear_sent(_channels *channels, int64_t cid, _waiting_t *waiting) // Like channel_send(), but strictly wait for the object to be received. static int channel_send_wait(_channels *channels, int64_t cid, PyObject *obj, - int unboundop, PY_TIMEOUT_T timeout) + unboundop_t unboundop, PY_TIMEOUT_T timeout, + xidata_fallback_t fallback) { // We use a stack variable here, so we must ensure that &waiting // is not held by any channel item at the point this function exits. @@ -1834,7 +1855,7 @@ channel_send_wait(_channels *channels, int64_t cid, PyObject *obj, } /* Queue up the object. */ - int res = channel_send(channels, cid, obj, &waiting, unboundop); + int res = channel_send(channels, cid, obj, &waiting, unboundop, fallback); if (res < 0) { assert(waiting.status == WAITING_NO_STATUS); goto finally; @@ -2005,6 +2026,20 @@ channel_is_associated(_channels *channels, int64_t cid, int64_t interpid, return (end != NULL && end->open); } +static int +channel_get_defaults(_channels *channels, int64_t cid, struct _channeldefaults *defaults) +{ + PyThread_type_lock mutex = NULL; + _channel_state *channel = NULL; + int err = _channels_lookup(channels, cid, &mutex, &channel); + if (err != 0) { + return err; + } + *defaults = channel->defaults; + PyThread_release_lock(mutex); + return 0; +} + static int _channel_get_count(_channels *channels, int64_t cid, Py_ssize_t *p_count) { @@ -2694,7 +2729,7 @@ add_channelid_type(PyObject *mod) Py_DECREF(cls); return NULL; } - if (ensure_xid_class(cls, _channelid_shared) < 0) { + if (ensure_xid_class(cls, GETDATA(_channelid_shared)) < 0) { Py_DECREF(cls); return NULL; } @@ -2723,15 +2758,9 @@ _get_current_channelend_type(int end) } if (cls == NULL) { // Force the module to be loaded, to register the type. - PyObject *highlevel = PyImport_ImportModule("interpreters.channels"); - if (highlevel == NULL) { - PyErr_Clear(); - highlevel = PyImport_ImportModule("test.support.interpreters.channels"); - if (highlevel == NULL) { - return NULL; - } + if (ensure_highlevel_module_loaded() < 0) { + return NULL; } - Py_DECREF(highlevel); if (end == CHANNEL_SEND) { cls = state->send_channel_type; } @@ -2797,12 +2826,12 @@ set_channelend_types(PyObject *mod, PyTypeObject *send, PyTypeObject *recv) // Add and register the types. state->send_channel_type = (PyTypeObject *)Py_NewRef(send); state->recv_channel_type = (PyTypeObject *)Py_NewRef(recv); - if (ensure_xid_class(send, _channelend_shared) < 0) { + if (ensure_xid_class(send, GETDATA(_channelend_shared)) < 0) { Py_CLEAR(state->send_channel_type); Py_CLEAR(state->recv_channel_type); return -1; } - if (ensure_xid_class(recv, _channelend_shared) < 0) { + if (ensure_xid_class(recv, GETDATA(_channelend_shared)) < 0) { (void)clear_xid_class(state->send_channel_type); Py_CLEAR(state->send_channel_type); Py_CLEAR(state->recv_channel_type); @@ -2881,20 +2910,27 @@ clear_interpreter(void *data) static PyObject * channelsmod_create(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"unboundop", NULL}; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "i:create", kwlist, - &unboundop)) + static char *kwlist[] = {"unboundop", "fallback", NULL}; + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|ii:create", kwlist, + &unboundarg, &fallbackarg)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _channeldefaults defaults = {0}; + if (resolve_unboundop(unboundarg, UNBOUND_REPLACE, + &defaults.unboundop) < 0) + { + return NULL; + } + if (resolve_fallback(fallbackarg, _PyXIDATA_FULL_FALLBACK, + &defaults.fallback) < 0) + { return NULL; } - int64_t cid = channel_create(&_globals.channels, unboundop); + int64_t cid = channel_create(&_globals.channels, defaults); if (cid < 0) { (void)handle_channel_error(-1, self, cid); return NULL; @@ -2987,7 +3023,9 @@ channelsmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } assert(cidobj != NULL); - PyObject *item = Py_BuildValue("Oi", cidobj, cur->unboundop); + PyObject *item = Py_BuildValue("Oii", cidobj, + cur->defaults.unboundop, + cur->defaults.fallback); Py_DECREF(cidobj); if (item == NULL) { Py_SETREF(ids, NULL); @@ -3075,40 +3113,54 @@ receive end."); static PyObject * channelsmod_send(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"cid", "obj", "unboundop", "blocking", "timeout", - NULL}; + static char *kwlist[] = {"cid", "obj", "unboundop", "fallback", + "blocking", "timeout", NULL}; struct channel_id_converter_data cid_data = { .module = self, }; PyObject *obj; - int unboundop = UNBOUND_REPLACE; + int unboundarg = -1; + int fallbackarg = -1; int blocking = 1; PyObject *timeout_obj = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O|i$pO:channel_send", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O&O|ii$pO:channel_send", kwlist, channel_id_converter, &cid_data, &obj, - &unboundop, &blocking, &timeout_obj)) + &unboundarg, &fallbackarg, + &blocking, &timeout_obj)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); - return NULL; - } - int64_t cid = cid_data.cid; PY_TIMEOUT_T timeout; if (PyThread_ParseTimeoutArg(timeout_obj, blocking, &timeout) < 0) { return NULL; } + struct _channeldefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = channel_get_defaults(&_globals.channels, cid, &defaults); + if (handle_channel_error(err, self, cid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { + return NULL; + } /* Queue up the object. */ int err = 0; if (blocking) { - err = channel_send_wait(&_globals.channels, cid, obj, unboundop, timeout); + err = channel_send_wait( + &_globals.channels, cid, obj, unboundop, timeout, fallback); } else { - err = channel_send(&_globals.channels, cid, obj, NULL, unboundop); + err = channel_send( + &_globals.channels, cid, obj, NULL, unboundop, fallback); } if (handle_channel_error(err, self, cid)) { return NULL; @@ -3126,32 +3178,44 @@ By default this waits for the object to be received."); static PyObject * channelsmod_send_buffer(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"cid", "obj", "unboundop", "blocking", "timeout", - NULL}; + static char *kwlist[] = {"cid", "obj", "unboundop", "fallback", + "blocking", "timeout", NULL}; struct channel_id_converter_data cid_data = { .module = self, }; PyObject *obj; - int unboundop = UNBOUND_REPLACE; - int blocking = 1; + int unboundarg = -1; + int fallbackarg = -1; + int blocking = -1; PyObject *timeout_obj = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "O&O|i$pO:channel_send_buffer", kwlist, + "O&O|ii$pO:channel_send_buffer", kwlist, channel_id_converter, &cid_data, &obj, - &unboundop, &blocking, &timeout_obj)) { - return NULL; - } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + &unboundarg, &fallbackarg, + &blocking, &timeout_obj)) + { return NULL; } - int64_t cid = cid_data.cid; PY_TIMEOUT_T timeout; if (PyThread_ParseTimeoutArg(timeout_obj, blocking, &timeout) < 0) { return NULL; } + struct _channeldefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = channel_get_defaults(&_globals.channels, cid, &defaults); + if (handle_channel_error(err, self, cid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { + return NULL; + } PyObject *tempobj = PyMemoryView_FromObject(obj); if (tempobj == NULL) { @@ -3162,10 +3226,11 @@ channelsmod_send_buffer(PyObject *self, PyObject *args, PyObject *kwds) int err = 0; if (blocking) { err = channel_send_wait( - &_globals.channels, cid, tempobj, unboundop, timeout); + &_globals.channels, cid, tempobj, unboundop, timeout, fallback); } else { - err = channel_send(&_globals.channels, cid, tempobj, NULL, unboundop); + err = channel_send( + &_globals.channels, cid, tempobj, NULL, unboundop, fallback); } Py_DECREF(tempobj); if (handle_channel_error(err, self, cid)) { @@ -3197,7 +3262,7 @@ channelsmod_recv(PyObject *self, PyObject *args, PyObject *kwds) cid = cid_data.cid; PyObject *obj = NULL; - int unboundop = 0; + unboundop_t unboundop = 0; int err = channel_recv(&_globals.channels, cid, &obj, &unboundop); if (err == ERR_CHANNEL_EMPTY && dflt != NULL) { // Use the default. @@ -3388,17 +3453,14 @@ channelsmod_get_channel_defaults(PyObject *self, PyObject *args, PyObject *kwds) } int64_t cid = cid_data.cid; - PyThread_type_lock mutex = NULL; - _channel_state *channel = NULL; - int err = _channels_lookup(&_globals.channels, cid, &mutex, &channel); + struct _channeldefaults defaults = {0}; + int err = channel_get_defaults(&_globals.channels, cid, &defaults); if (handle_channel_error(err, self, cid)) { return NULL; } - int unboundop = channel->defaults.unboundop; - PyThread_release_lock(mutex); - PyObject *defaults = Py_BuildValue("i", unboundop); - return defaults; + PyObject *res = Py_BuildValue("ii", defaults.unboundop, defaults.fallback); + return res; } PyDoc_STRVAR(channelsmod_get_channel_defaults_doc, @@ -3552,8 +3614,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -3563,8 +3624,7 @@ module_clear(PyObject *mod) assert(state != NULL); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 526249a0e1aec3..e5afe746f90bdc 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -9,9 +9,11 @@ #include "pycore_crossinterp.h" // _PyXIData_t #define REGISTERS_HEAP_TYPES +#define HAS_FALLBACK #define HAS_UNBOUND_ITEMS #include "_interpreters_common.h" #undef HAS_UNBOUND_ITEMS +#undef HAS_FALLBACK #undef REGISTERS_HEAP_TYPES @@ -134,13 +136,10 @@ idarg_int64_converter(PyObject *arg, void *ptr) static int ensure_highlevel_module_loaded(void) { - PyObject *highlevel = PyImport_ImportModule("interpreters.queues"); + PyObject *highlevel = + PyImport_ImportModule("concurrent.interpreters._queues"); if (highlevel == NULL) { - PyErr_Clear(); - highlevel = PyImport_ImportModule("test.support.interpreters.queues"); - if (highlevel == NULL) { - return -1; - } + return -1; } Py_DECREF(highlevel); return 0; @@ -297,7 +296,7 @@ add_QueueError(PyObject *mod) { module_state *state = get_module_state(mod); -#define PREFIX "test.support.interpreters." +#define PREFIX "concurrent.interpreters." #define ADD_EXCTYPE(NAME, BASE, DOC) \ assert(state->NAME == NULL); \ if (add_exctype(mod, &state->NAME, PREFIX #NAME, DOC, BASE) < 0) { \ @@ -401,14 +400,13 @@ typedef struct _queueitem { meaning the interpreter has been destroyed. */ int64_t interpid; _PyXIData_t *data; - int fmt; - int unboundop; + unboundop_t unboundop; struct _queueitem *next; } _queueitem; static void _queueitem_init(_queueitem *item, - int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) + int64_t interpid, _PyXIData_t *data, unboundop_t unboundop) { if (interpid < 0) { interpid = _get_interpid(data); @@ -422,7 +420,6 @@ _queueitem_init(_queueitem *item, *item = (_queueitem){ .interpid = interpid, .data = data, - .fmt = fmt, .unboundop = unboundop, }; } @@ -446,14 +443,14 @@ _queueitem_clear(_queueitem *item) } static _queueitem * -_queueitem_new(int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) +_queueitem_new(int64_t interpid, _PyXIData_t *data, int unboundop) { _queueitem *item = GLOBAL_MALLOC(_queueitem); if (item == NULL) { PyErr_NoMemory(); return NULL; } - _queueitem_init(item, interpid, data, fmt, unboundop); + _queueitem_init(item, interpid, data, unboundop); return item; } @@ -476,10 +473,9 @@ _queueitem_free_all(_queueitem *item) static void _queueitem_popped(_queueitem *item, - _PyXIData_t **p_data, int *p_fmt, int *p_unboundop) + _PyXIData_t **p_data, unboundop_t *p_unboundop) { *p_data = item->data; - *p_fmt = item->fmt; *p_unboundop = item->unboundop; // We clear them here, so they won't be released in _queueitem_clear(). item->data = NULL; @@ -527,16 +523,16 @@ typedef struct _queue { _queueitem *first; _queueitem *last; } items; - struct { - int fmt; + struct _queuedefaults { + xidata_fallback_t fallback; int unboundop; } defaults; } _queue; static int -_queue_init(_queue *queue, Py_ssize_t maxsize, int fmt, int unboundop) +_queue_init(_queue *queue, Py_ssize_t maxsize, struct _queuedefaults defaults) { - assert(check_unbound(unboundop)); + assert(check_unbound(defaults.unboundop)); PyThread_type_lock mutex = PyThread_allocate_lock(); if (mutex == NULL) { return ERR_QUEUE_ALLOC; @@ -547,10 +543,7 @@ _queue_init(_queue *queue, Py_ssize_t maxsize, int fmt, int unboundop) .items = { .maxsize = maxsize, }, - .defaults = { - .fmt = fmt, - .unboundop = unboundop, - }, + .defaults = defaults, }; return 0; } @@ -631,8 +624,7 @@ _queue_unlock(_queue *queue) } static int -_queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, - int fmt, int unboundop) +_queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, int unboundop) { int err = _queue_lock(queue); if (err < 0) { @@ -648,7 +640,7 @@ _queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, return ERR_QUEUE_FULL; } - _queueitem *item = _queueitem_new(interpid, data, fmt, unboundop); + _queueitem *item = _queueitem_new(interpid, data, unboundop); if (item == NULL) { _queue_unlock(queue); return -1; @@ -668,8 +660,7 @@ _queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, } static int -_queue_next(_queue *queue, - _PyXIData_t **p_data, int *p_fmt, int *p_unboundop) +_queue_next(_queue *queue, _PyXIData_t **p_data, int *p_unboundop) { int err = _queue_lock(queue); if (err < 0) { @@ -688,7 +679,7 @@ _queue_next(_queue *queue, } queue->items.count -= 1; - _queueitem_popped(item, p_data, p_fmt, p_unboundop); + _queueitem_popped(item, p_data, p_unboundop); _queue_unlock(queue); return 0; @@ -716,8 +707,11 @@ _queue_is_full(_queue *queue, int *p_is_full) return err; } - assert(queue->items.count <= queue->items.maxsize); - *p_is_full = queue->items.count == queue->items.maxsize; + assert(queue->items.maxsize <= 0 + || queue->items.count <= queue->items.maxsize); + *p_is_full = queue->items.maxsize > 0 + ? queue->items.count == queue->items.maxsize + : 0; _queue_unlock(queue); return 0; @@ -1035,8 +1029,7 @@ _queues_decref(_queues *queues, int64_t qid) struct queue_id_and_info { int64_t id; - int fmt; - int unboundop; + struct _queuedefaults defaults; }; static struct queue_id_and_info * @@ -1053,8 +1046,7 @@ _queues_list_all(_queues *queues, int64_t *p_count) for (int64_t i=0; ref != NULL; ref = ref->next, i++) { ids[i].id = ref->qid; assert(ref->queue != NULL); - ids[i].fmt = ref->queue->defaults.fmt; - ids[i].unboundop = ref->queue->defaults.unboundop; + ids[i].defaults = ref->queue->defaults; } *p_count = queues->count; @@ -1090,13 +1082,14 @@ _queue_free(_queue *queue) // Create a new queue. static int64_t -queue_create(_queues *queues, Py_ssize_t maxsize, int fmt, int unboundop) +queue_create(_queues *queues, Py_ssize_t maxsize, + struct _queuedefaults defaults) { _queue *queue = GLOBAL_MALLOC(_queue); if (queue == NULL) { return ERR_QUEUE_ALLOC; } - int err = _queue_init(queue, maxsize, fmt, unboundop); + int err = _queue_init(queue, maxsize, defaults); if (err < 0) { GLOBAL_FREE(queue); return (int64_t)err; @@ -1125,7 +1118,8 @@ queue_destroy(_queues *queues, int64_t qid) // Push an object onto the queue. static int -queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) +queue_put(_queues *queues, int64_t qid, PyObject *obj, unboundop_t unboundop, + xidata_fallback_t fallback) { PyThreadState *tstate = PyThreadState_Get(); @@ -1138,27 +1132,27 @@ queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) assert(queue != NULL); // Convert the object to cross-interpreter data. - _PyXIData_t *data = _PyXIData_New(); - if (data == NULL) { + _PyXIData_t *xidata = _PyXIData_New(); + if (xidata == NULL) { _queue_unmark_waiter(queue, queues->mutex); return -1; } - if (_PyObject_GetXIData(tstate, obj, data) != 0) { + if (_PyObject_GetXIData(tstate, obj, fallback, xidata) != 0) { _queue_unmark_waiter(queue, queues->mutex); - GLOBAL_FREE(data); + GLOBAL_FREE(xidata); return -1; } - assert(_PyXIData_INTERPID(data) == + assert(_PyXIData_INTERPID(xidata) == PyInterpreterState_GetID(tstate->interp)); // Add the data to the queue. int64_t interpid = -1; // _queueitem_init() will set it. - int res = _queue_add(queue, interpid, data, fmt, unboundop); + int res = _queue_add(queue, interpid, xidata, unboundop); _queue_unmark_waiter(queue, queues->mutex); if (res != 0) { // We may chain an exception here: - (void)_release_xid_data(data, 0); - GLOBAL_FREE(data); + (void)_release_xid_data(xidata, 0); + GLOBAL_FREE(xidata); return res; } @@ -1169,7 +1163,7 @@ queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) // XXX Support a "wait" mutex? static int queue_get(_queues *queues, int64_t qid, - PyObject **res, int *p_fmt, int *p_unboundop) + PyObject **res, int *p_unboundop) { int err; *res = NULL; @@ -1185,7 +1179,7 @@ queue_get(_queues *queues, int64_t qid, // Pop off the next item from the queue. _PyXIData_t *data = NULL; - err = _queue_next(queue, &data, p_fmt, p_unboundop); + err = _queue_next(queue, &data, p_unboundop); _queue_unmark_waiter(queue, queues->mutex); if (err != 0) { return err; @@ -1216,6 +1210,20 @@ queue_get(_queues *queues, int64_t qid, return 0; } +static int +queue_get_defaults(_queues *queues, int64_t qid, + struct _queuedefaults *p_defaults) +{ + _queue *queue = NULL; + int err = _queues_lookup(queues, qid, &queue); + if (err != 0) { + return err; + } + *p_defaults = queue->defaults; + _queue_unmark_waiter(queue, queues->mutex); + return 0; +} + static int queue_get_maxsize(_queues *queues, int64_t qid, Py_ssize_t *p_maxsize) { @@ -1270,7 +1278,7 @@ set_external_queue_type(module_state *state, PyTypeObject *queue_type) } // Add and register the new type. - if (ensure_xid_class(queue_type, _queueobj_shared) < 0) { + if (ensure_xid_class(queue_type, GETDATA(_queueobj_shared)) < 0) { return -1; } state->queue_type = (PyTypeObject *)Py_NewRef(queue_type); @@ -1348,10 +1356,10 @@ _queueobj_from_xid(_PyXIData_t *data) PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } PyTypeObject *cls = get_external_queue_type(mod); @@ -1474,22 +1482,28 @@ qidarg_converter(PyObject *arg, void *ptr) static PyObject * queuesmod_create(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"maxsize", "fmt", "unboundop", NULL}; + static char *kwlist[] = {"maxsize", "unboundop", "fallback", NULL}; Py_ssize_t maxsize; - int fmt; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "nii:create", kwlist, - &maxsize, &fmt, &unboundop)) + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "n|ii:create", kwlist, + &maxsize, &unboundarg, &fallbackarg)) { return NULL; } - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _queuedefaults defaults = {0}; + if (resolve_unboundop(unboundarg, UNBOUND_REPLACE, + &defaults.unboundop) < 0) + { + return NULL; + } + if (resolve_fallback(fallbackarg, _PyXIDATA_FULL_FALLBACK, + &defaults.fallback) < 0) + { return NULL; } - int64_t qid = queue_create(&_globals.queues, maxsize, fmt, unboundop); + int64_t qid = queue_create(&_globals.queues, maxsize, defaults); if (qid < 0) { (void)handle_queue_error((int)qid, self, qid); return NULL; @@ -1511,7 +1525,7 @@ queuesmod_create(PyObject *self, PyObject *args, PyObject *kwds) } PyDoc_STRVAR(queuesmod_create_doc, -"create(maxsize, fmt, unboundop) -> qid\n\ +"create(maxsize, unboundop, fallback) -> qid\n\ \n\ Create a new cross-interpreter queue and return its unique generated ID.\n\ It is a new reference as though bind() had been called on the queue.\n\ @@ -1560,8 +1574,9 @@ queuesmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } struct queue_id_and_info *cur = qids; for (int64_t i=0; i < count; cur++, i++) { - PyObject *item = Py_BuildValue("Lii", cur->id, cur->fmt, - cur->unboundop); + PyObject *item = Py_BuildValue("Lii", cur->id, + cur->defaults.unboundop, + cur->defaults.fallback); if (item == NULL) { Py_SETREF(ids, NULL); break; @@ -1575,34 +1590,44 @@ queuesmod_list_all(PyObject *self, PyObject *Py_UNUSED(ignored)) } PyDoc_STRVAR(queuesmod_list_all_doc, -"list_all() -> [(qid, fmt)]\n\ +"list_all() -> [(qid, unboundop, fallback)]\n\ \n\ Return the list of IDs for all queues.\n\ -Each corresponding default format is also included."); +Each corresponding default unbound op and fallback is also included."); static PyObject * queuesmod_put(PyObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"qid", "obj", "fmt", "unboundop", NULL}; + static char *kwlist[] = {"qid", "obj", "unboundop", "fallback", NULL}; qidarg_converter_data qidarg = {0}; PyObject *obj; - int fmt; - int unboundop; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&Oii:put", kwlist, - qidarg_converter, &qidarg, &obj, &fmt, - &unboundop)) + int unboundarg = -1; + int fallbackarg = -1; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O|ii$p:put", kwlist, + qidarg_converter, &qidarg, &obj, + &unboundarg, &fallbackarg)) { return NULL; } int64_t qid = qidarg.id; - if (!check_unbound(unboundop)) { - PyErr_Format(PyExc_ValueError, - "unsupported unboundop %d", unboundop); + struct _queuedefaults defaults = {-1, -1}; + if (unboundarg < 0 || fallbackarg < 0) { + int err = queue_get_defaults(&_globals.queues, qid, &defaults); + if (handle_queue_error(err, self, qid)) { + return NULL; + } + } + unboundop_t unboundop; + if (resolve_unboundop(unboundarg, defaults.unboundop, &unboundop) < 0) { + return NULL; + } + xidata_fallback_t fallback; + if (resolve_fallback(fallbackarg, defaults.fallback, &fallback) < 0) { return NULL; } /* Queue up the object. */ - int err = queue_put(&_globals.queues, qid, obj, fmt, unboundop); + int err = queue_put(&_globals.queues, qid, obj, unboundop, fallback); // This is the only place that raises QueueFull. if (handle_queue_error(err, self, qid)) { return NULL; @@ -1612,7 +1637,7 @@ queuesmod_put(PyObject *self, PyObject *args, PyObject *kwds) } PyDoc_STRVAR(queuesmod_put_doc, -"put(qid, obj, fmt)\n\ +"put(qid, obj)\n\ \n\ Add the object's data to the queue."); @@ -1628,27 +1653,26 @@ queuesmod_get(PyObject *self, PyObject *args, PyObject *kwds) int64_t qid = qidarg.id; PyObject *obj = NULL; - int fmt = 0; int unboundop = 0; - int err = queue_get(&_globals.queues, qid, &obj, &fmt, &unboundop); + int err = queue_get(&_globals.queues, qid, &obj, &unboundop); // This is the only place that raises QueueEmpty. if (handle_queue_error(err, self, qid)) { return NULL; } if (obj == NULL) { - return Py_BuildValue("Oii", Py_None, fmt, unboundop); + return Py_BuildValue("Oi", Py_None, unboundop); } - PyObject *res = Py_BuildValue("OiO", obj, fmt, Py_None); + PyObject *res = Py_BuildValue("OO", obj, Py_None); Py_DECREF(obj); return res; } PyDoc_STRVAR(queuesmod_get_doc, -"get(qid) -> (obj, fmt)\n\ +"get(qid) -> (obj, unboundop)\n\ \n\ Return a new object from the data at the front of the queue.\n\ -The object's format is also returned.\n\ +The unbound op is also returned.\n\ \n\ If there is nothing to receive then raise QueueEmpty."); @@ -1748,17 +1772,14 @@ queuesmod_get_queue_defaults(PyObject *self, PyObject *args, PyObject *kwds) } int64_t qid = qidarg.id; - _queue *queue = NULL; - int err = _queues_lookup(&_globals.queues, qid, &queue); + struct _queuedefaults defaults = {0}; + int err = queue_get_defaults(&_globals.queues, qid, &defaults); if (handle_queue_error(err, self, qid)) { return NULL; } - int fmt = queue->defaults.fmt; - int unboundop = queue->defaults.unboundop; - _queue_unmark_waiter(queue, _globals.queues.mutex); - PyObject *defaults = Py_BuildValue("ii", fmt, unboundop); - return defaults; + PyObject *res = Py_BuildValue("ii", defaults.unboundop, defaults.fallback); + return res; } PyDoc_STRVAR(queuesmod_get_queue_defaults_doc, @@ -1931,8 +1952,7 @@ static int module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1941,8 +1961,7 @@ module_clear(PyObject *mod) module_state *state = get_module_state(mod); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_interpreters_common.h b/Modules/_interpreters_common.h index edd65577284a20..40fd51d752e324 100644 --- a/Modules/_interpreters_common.h +++ b/Modules/_interpreters_common.h @@ -5,8 +5,10 @@ _RESOLVE_MODINIT_FUNC_NAME(NAME) +#define GETDATA(FUNC) ((_PyXIData_getdata_t){.basic=FUNC}) + static int -ensure_xid_class(PyTypeObject *cls, xidatafunc getdata) +ensure_xid_class(PyTypeObject *cls, _PyXIData_getdata_t getdata) { PyThreadState *tstate = PyThreadState_Get(); return _PyXIData_RegisterClass(tstate, cls, getdata); @@ -37,10 +39,37 @@ _get_interpid(_PyXIData_t *data) } +#ifdef HAS_FALLBACK +static int +resolve_fallback(int arg, xidata_fallback_t dflt, + xidata_fallback_t *p_fallback) +{ + if (arg < 0) { + *p_fallback = dflt; + return 0; + } + xidata_fallback_t fallback; + if (arg == _PyXIDATA_XIDATA_ONLY) { + fallback =_PyXIDATA_XIDATA_ONLY; + } + else if (arg == _PyXIDATA_FULL_FALLBACK) { + fallback = _PyXIDATA_FULL_FALLBACK; + } + else { + PyErr_Format(PyExc_ValueError, "unsupported fallback %d", arg); + return -1; + } + *p_fallback = fallback; + return 0; +} +#endif + + /* unbound items ************************************************************/ #ifdef HAS_UNBOUND_ITEMS +typedef int unboundop_t; #define UNBOUND_REMOVE 1 #define UNBOUND_ERROR 2 #define UNBOUND_REPLACE 3 @@ -51,6 +80,7 @@ _get_interpid(_PyXIData_t *data) // object is released but the underlying data is copied (with the "raw" // allocator) and used when the item is popped off the queue. +#ifndef NDEBUG static int check_unbound(int unboundop) { @@ -63,5 +93,31 @@ check_unbound(int unboundop) return 0; } } +#endif + +static int +resolve_unboundop(int arg, unboundop_t dflt, unboundop_t *p_unboundop) +{ + if (arg < 0) { + *p_unboundop = dflt; + return 0; + } + unboundop_t op; + if (arg == UNBOUND_REMOVE) { + op = UNBOUND_REMOVE; + } + else if (arg == UNBOUND_ERROR) { + op = UNBOUND_ERROR; + } + else if (arg == UNBOUND_REPLACE) { + op = UNBOUND_REPLACE; + } + else { + PyErr_Format(PyExc_ValueError, "unsupported unboundop %d", arg); + return -1; + } + *p_unboundop = op; + return 0; +} #endif diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 77678f7c126005..e7feaa7f186aee 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -8,6 +8,7 @@ #include "Python.h" #include "pycore_code.h" // _PyCode_HAS_EXECUTORS() #include "pycore_crossinterp.h" // _PyXIData_t +#include "pycore_pyerrors.h" // _PyErr_GetRaisedException() #include "pycore_interp.h" // _PyInterpreterState_IDIncref() #include "pycore_modsupport.h" // _PyArg_BadArgument() #include "pycore_namespace.h" // _PyNamespace_New() @@ -71,6 +72,22 @@ is_running_main(PyInterpreterState *interp) } +static inline int +is_notshareable_raised(PyThreadState *tstate) +{ + PyObject *exctype = _PyXIData_GetNotShareableErrorType(tstate); + return _PyErr_ExceptionMatches(tstate, exctype); +} + +static void +unwrap_not_shareable(PyThreadState *tstate, _PyXI_failure *failure) +{ + if (_PyXI_UnwrapNotShareableError(tstate, failure) < 0) { + _PyErr_Clear(tstate); + } +} + + /* Cross-interpreter Buffer Views *******************************************/ /* When a memoryview object is "shared" between interpreters, @@ -286,7 +303,7 @@ register_memoryview_xid(PyObject *mod, PyTypeObject **p_state) *p_state = cls; // Register XID for the builtin memoryview type. - if (ensure_xid_class(&PyMemoryView_Type, _pybuffer_shared) < 0) { + if (ensure_xid_class(&PyMemoryView_Type, GETDATA(_pybuffer_shared)) < 0) { return -1; } // We don't ever bother un-registering memoryview. @@ -319,10 +336,10 @@ _get_current_module_state(void) { PyObject *mod = _get_current_module(); if (mod == NULL) { - // XXX import it? - PyErr_SetString(PyExc_RuntimeError, - MODULE_NAME_STR " module not imported yet"); - return NULL; + mod = PyImport_ImportModule(MODULE_NAME_STR); + if (mod == NULL) { + return NULL; + } } module_state *state = get_module_state(mod); Py_DECREF(mod); @@ -359,96 +376,6 @@ _get_current_xibufferview_type(void) } -/* Python code **************************************************************/ - -static const char * -check_code_str(PyUnicodeObject *text) -{ - assert(text != NULL); - if (PyUnicode_GET_LENGTH(text) == 0) { - return "too short"; - } - - // XXX Verify that it parses? - - return NULL; -} - -static const char * -check_code_object(PyCodeObject *code) -{ - assert(code != NULL); - if (code->co_argcount > 0 - || code->co_posonlyargcount > 0 - || code->co_kwonlyargcount > 0 - || code->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) - { - return "arguments not supported"; - } - if (code->co_ncellvars > 0) { - return "closures not supported"; - } - // We trust that no code objects under co_consts have unbound cell vars. - - if (_PyCode_HAS_EXECUTORS(code) || _PyCode_HAS_INSTRUMENTATION(code)) { - return "only basic functions are supported"; - } - if (code->_co_monitoring != NULL) { - return "only basic functions are supported"; - } - if (code->co_extra != NULL) { - return "only basic functions are supported"; - } - - return NULL; -} - -#define RUN_TEXT 1 -#define RUN_CODE 2 - -static const char * -get_code_str(PyObject *arg, Py_ssize_t *len_p, PyObject **bytes_p, int *flags_p) -{ - const char *codestr = NULL; - Py_ssize_t len = -1; - PyObject *bytes_obj = NULL; - int flags = 0; - - if (PyUnicode_Check(arg)) { - assert(PyUnicode_Check(arg) - && (check_code_str((PyUnicodeObject *)arg) == NULL)); - codestr = PyUnicode_AsUTF8AndSize(arg, &len); - if (codestr == NULL) { - return NULL; - } - if (strlen(codestr) != (size_t)len) { - PyErr_SetString(PyExc_ValueError, - "source code string cannot contain null bytes"); - return NULL; - } - flags = RUN_TEXT; - } - else { - assert(PyCode_Check(arg) - && (check_code_object((PyCodeObject *)arg) == NULL)); - flags = RUN_CODE; - - // Serialize the code object. - bytes_obj = PyMarshal_WriteObjectToString(arg, Py_MARSHAL_VERSION); - if (bytes_obj == NULL) { - return NULL; - } - codestr = PyBytes_AS_STRING(bytes_obj); - len = PyBytes_GET_SIZE(bytes_obj); - } - - *flags_p = flags; - *bytes_p = bytes_obj; - *len_p = len; - return codestr; -} - - /* interpreter-specific code ************************************************/ static int @@ -511,73 +438,290 @@ config_from_object(PyObject *configobj, PyInterpreterConfig *config) } +struct interp_call { + _PyXIData_t *func; + _PyXIData_t *args; + _PyXIData_t *kwargs; + struct { + _PyXIData_t func; + _PyXIData_t args; + _PyXIData_t kwargs; + } _preallocated; +}; + +static void +_interp_call_clear(struct interp_call *call) +{ + if (call->func != NULL) { + _PyXIData_Clear(NULL, call->func); + } + if (call->args != NULL) { + _PyXIData_Clear(NULL, call->args); + } + if (call->kwargs != NULL) { + _PyXIData_Clear(NULL, call->kwargs); + } + *call = (struct interp_call){0}; +} + static int -_run_script(PyObject *ns, const char *codestr, Py_ssize_t codestrlen, int flags) +_interp_call_pack(PyThreadState *tstate, struct interp_call *call, + PyObject *func, PyObject *args, PyObject *kwargs) { - PyObject *result = NULL; - if (flags & RUN_TEXT) { - result = PyRun_StringFlags(codestr, Py_file_input, ns, ns, NULL); - } - else if (flags & RUN_CODE) { - PyObject *code = PyMarshal_ReadObjectFromString(codestr, codestrlen); - if (code != NULL) { - result = PyEval_EvalCode(code, ns, ns); - Py_DECREF(code); + xidata_fallback_t fallback = _PyXIDATA_FULL_FALLBACK; + assert(call->func == NULL); + assert(call->args == NULL); + assert(call->kwargs == NULL); + // Handle the func. + if (!PyCallable_Check(func)) { + _PyErr_Format(tstate, PyExc_TypeError, + "expected a callable, got %R", func); + return -1; + } + if (_PyFunction_GetXIData(tstate, func, &call->_preallocated.func) < 0) { + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (_PyPickle_GetXIData(tstate, func, &call->_preallocated.func) < 0) { + _PyErr_SetRaisedException(tstate, exc); + return -1; } + Py_DECREF(exc); + } + call->func = &call->_preallocated.func; + // Handle the args. + if (args == NULL || args == Py_None) { + // Leave it empty. } else { - Py_UNREACHABLE(); + assert(PyTuple_Check(args)); + if (PyTuple_GET_SIZE(args) > 0) { + if (_PyObject_GetXIData( + tstate, args, fallback, &call->_preallocated.args) < 0) + { + _interp_call_clear(call); + return -1; + } + call->args = &call->_preallocated.args; + } } - if (result == NULL) { - return -1; + // Handle the kwargs. + if (kwargs == NULL || kwargs == Py_None) { + // Leave it empty. + } + else { + assert(PyDict_Check(kwargs)); + if (PyDict_GET_SIZE(kwargs) > 0) { + if (_PyObject_GetXIData( + tstate, kwargs, fallback, &call->_preallocated.kwargs) < 0) + { + _interp_call_clear(call); + return -1; + } + call->kwargs = &call->_preallocated.kwargs; + } } - Py_DECREF(result); // We throw away the result. return 0; } +static void +wrap_notshareable(PyThreadState *tstate, const char *label) +{ + if (!is_notshareable_raised(tstate)) { + return; + } + assert(label != NULL && strlen(label) > 0); + PyObject *cause = _PyErr_GetRaisedException(tstate); + _PyXIData_FormatNotShareableError(tstate, "%s not shareable", label); + PyObject *exc = _PyErr_GetRaisedException(tstate); + PyException_SetCause(exc, cause); + _PyErr_SetRaisedException(tstate, exc); +} + static int -_run_in_interpreter(PyInterpreterState *interp, - const char *codestr, Py_ssize_t codestrlen, - PyObject *shareables, int flags, - PyObject **p_excinfo) +_interp_call_unpack(struct interp_call *call, + PyObject **p_func, PyObject **p_args, PyObject **p_kwargs) { - assert(!PyErr_Occurred()); - _PyXI_session session = {0}; + PyThreadState *tstate = PyThreadState_Get(); - // Prep and switch interpreters. - if (_PyXI_Enter(&session, interp, shareables) < 0) { - if (PyErr_Occurred()) { - // If an error occured at this step, it means that interp - // was not prepared and switched. + // Unpack the func. + PyObject *func = _PyXIData_NewObject(call->func); + if (func == NULL) { + wrap_notshareable(tstate, "func"); + return -1; + } + // Unpack the args. + PyObject *args; + if (call->args == NULL) { + args = PyTuple_New(0); + if (args == NULL) { + Py_DECREF(func); return -1; } - // Now, apply the error from another interpreter: - PyObject *excinfo = _PyXI_ApplyError(session.error); - if (excinfo != NULL) { - *p_excinfo = excinfo; + } + else { + args = _PyXIData_NewObject(call->args); + if (args == NULL) { + wrap_notshareable(tstate, "args"); + Py_DECREF(func); + return -1; } - assert(PyErr_Occurred()); + assert(PyTuple_Check(args)); + } + // Unpack the kwargs. + PyObject *kwargs = NULL; + if (call->kwargs != NULL) { + kwargs = _PyXIData_NewObject(call->kwargs); + if (kwargs == NULL) { + wrap_notshareable(tstate, "kwargs"); + Py_DECREF(func); + Py_DECREF(args); + return -1; + } + assert(PyDict_Check(kwargs)); + } + *p_func = func; + *p_args = args; + *p_kwargs = kwargs; + return 0; +} + +static int +_make_call(struct interp_call *call, + PyObject **p_result, _PyXI_failure *failure) +{ + assert(call != NULL && call->func != NULL); + PyThreadState *tstate = _PyThreadState_GET(); + + // Get the func and args. + PyObject *func = NULL, *args = NULL, *kwargs = NULL; + if (_interp_call_unpack(call, &func, &args, &kwargs) < 0) { + assert(func == NULL); + assert(args == NULL); + assert(kwargs == NULL); + _PyXI_InitFailure(failure, _PyXI_ERR_OTHER, NULL); + unwrap_not_shareable(tstate, failure); return -1; } + assert(!_PyErr_Occurred(tstate)); - // Run the script. - int res = _run_script(session.main_ns, codestr, codestrlen, flags); + // Make the call. + PyObject *resobj = PyObject_Call(func, args, kwargs); + Py_DECREF(func); + Py_XDECREF(args); + Py_XDECREF(kwargs); + if (resobj == NULL) { + return -1; + } + *p_result = resobj; + return 0; +} - // Clean up and switch back. - _PyXI_Exit(&session); +static int +_run_script(_PyXIData_t *script, PyObject *ns, _PyXI_failure *failure) +{ + PyObject *code = _PyXIData_NewObject(script); + if (code == NULL) { + _PyXI_InitFailure(failure, _PyXI_ERR_NOT_SHAREABLE, NULL); + return -1; + } + PyObject *result = PyEval_EvalCode(code, ns, ns); + Py_DECREF(code); + if (result == NULL) { + _PyXI_InitFailure(failure, _PyXI_ERR_UNCAUGHT_EXCEPTION, NULL); + return -1; + } + assert(result == Py_None); + Py_DECREF(result); // We throw away the result. + return 0; +} - // Propagate any exception out to the caller. - assert(!PyErr_Occurred()); - if (res < 0) { - PyObject *excinfo = _PyXI_ApplyCapturedException(&session); - if (excinfo != NULL) { - *p_excinfo = excinfo; +struct run_result { + PyObject *result; + PyObject *excinfo; +}; + +static void +_run_result_clear(struct run_result *runres) +{ + Py_CLEAR(runres->result); + Py_CLEAR(runres->excinfo); +} + +static int +_run_in_interpreter(PyThreadState *tstate, PyInterpreterState *interp, + _PyXIData_t *script, struct interp_call *call, + PyObject *shareables, struct run_result *runres) +{ + assert(!_PyErr_Occurred(tstate)); + int res = -1; + _PyXI_failure *failure = _PyXI_NewFailure(); + if (failure == NULL) { + return -1; + } + _PyXI_session *session = _PyXI_NewSession(); + if (session == NULL) { + _PyXI_FreeFailure(failure); + return -1; + } + _PyXI_session_result result = {0}; + + // Prep and switch interpreters. + if (_PyXI_Enter(session, interp, shareables, &result) < 0) { + // If an error occured at this step, it means that interp + // was not prepared and switched. + _PyXI_FreeSession(session); + _PyXI_FreeFailure(failure); + assert(result.excinfo == NULL); + return -1; + } + + // Run in the interpreter. + if (script != NULL) { + assert(call == NULL); + PyObject *mainns = _PyXI_GetMainNamespace(session, failure); + if (mainns == NULL) { + goto finally; } + res = _run_script(script, mainns, failure); } else { - assert(!_PyXI_HasCapturedException(&session)); + assert(call != NULL); + PyObject *resobj; + res = _make_call(call, &resobj, failure); + if (res == 0) { + res = _PyXI_Preserve(session, "resobj", resobj, failure); + Py_DECREF(resobj); + if (res < 0) { + goto finally; + } + } } +finally: + // Clean up and switch back. + (void)res; + int exitres = _PyXI_Exit(session, failure, &result); + assert(res == 0 || exitres != 0); + _PyXI_FreeSession(session); + _PyXI_FreeFailure(failure); + + res = exitres; + if (_PyErr_Occurred(tstate)) { + // It's a directly propagated exception. + assert(res < 0); + } + else if (res < 0) { + assert(result.excinfo != NULL); + runres->excinfo = Py_NewRef(result.excinfo); + res = -1; + } + else { + assert(result.excinfo == NULL); + runres->result = _PyXI_GetPreserved(&result, "resobj"); + if (_PyErr_Occurred(tstate)) { + res = -1; + } + } + _PyXI_ClearResult(&result); return res; } @@ -895,8 +1039,8 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) PyObject *id, *updates; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "OO|$p:" MODULE_NAME_STR ".set___main___attrs", - kwlist, &id, &updates, &restricted)) + "OO!|$p:" MODULE_NAME_STR ".set___main___attrs", + kwlist, &id, &PyDict_Type, &updates, &restricted)) { return NULL; } @@ -910,34 +1054,39 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) } // Check the updates. - if (updates != Py_None) { - Py_ssize_t size = PyObject_Size(updates); - if (size < 0) { - return NULL; - } - if (size == 0) { - PyErr_SetString(PyExc_ValueError, - "arg 2 must be a non-empty mapping"); - return NULL; - } + Py_ssize_t size = PyDict_Size(updates); + if (size < 0) { + return NULL; + } + if (size == 0) { + PyErr_SetString(PyExc_ValueError, + "arg 2 must be a non-empty dict"); + return NULL; } - _PyXI_session session = {0}; + _PyXI_session *session = _PyXI_NewSession(); + if (session == NULL) { + return NULL; + } // Prep and switch interpreters, including apply the updates. - if (_PyXI_Enter(&session, interp, updates) < 0) { - if (!PyErr_Occurred()) { - _PyXI_ApplyCapturedException(&session); - assert(PyErr_Occurred()); - } - else { - assert(!_PyXI_HasCapturedException(&session)); - } + if (_PyXI_Enter(session, interp, updates, NULL) < 0) { + _PyXI_FreeSession(session); return NULL; } // Clean up and switch back. - _PyXI_Exit(&session); + assert(!PyErr_Occurred()); + int res = _PyXI_Exit(session, NULL, NULL); + _PyXI_FreeSession(session); + assert(res == 0); + if (res < 0) { + // unreachable + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, "unresolved error"); + } + return NULL; + } Py_RETURN_NONE; } @@ -948,122 +1097,31 @@ PyDoc_STRVAR(set___main___attrs_doc, Bind the given attributes in the interpreter's __main__ module."); -static PyUnicodeObject * -convert_script_arg(PyObject *arg, const char *fname, const char *displayname, - const char *expected) -{ - PyUnicodeObject *str = NULL; - if (PyUnicode_CheckExact(arg)) { - str = (PyUnicodeObject *)Py_NewRef(arg); - } - else if (PyUnicode_Check(arg)) { - // XXX str = PyUnicode_FromObject(arg); - str = (PyUnicodeObject *)Py_NewRef(arg); - } - else { - _PyArg_BadArgument(fname, displayname, expected, arg); - return NULL; - } - - const char *err = check_code_str(str); - if (err != NULL) { - Py_DECREF(str); - PyErr_Format(PyExc_ValueError, - "%.200s(): bad script text (%s)", fname, err); - return NULL; - } - - return str; -} - -static PyCodeObject * -convert_code_arg(PyObject *arg, const char *fname, const char *displayname, - const char *expected) +static PyObject * +_handle_script_error(struct run_result *runres) { - const char *kind = NULL; - PyCodeObject *code = NULL; - if (PyFunction_Check(arg)) { - if (PyFunction_GetClosure(arg) != NULL) { - PyErr_Format(PyExc_ValueError, - "%.200s(): closures not supported", fname); - return NULL; - } - code = (PyCodeObject *)PyFunction_GetCode(arg); - if (code == NULL) { - if (PyErr_Occurred()) { - // This chains. - PyErr_Format(PyExc_ValueError, - "%.200s(): bad func", fname); - } - else { - PyErr_Format(PyExc_ValueError, - "%.200s(): func.__code__ missing", fname); - } - return NULL; - } - Py_INCREF(code); - kind = "func"; - } - else if (PyCode_Check(arg)) { - code = (PyCodeObject *)Py_NewRef(arg); - kind = "code object"; - } - else { - _PyArg_BadArgument(fname, displayname, expected, arg); - return NULL; - } - - const char *err = check_code_object(code); - if (err != NULL) { - Py_DECREF(code); - PyErr_Format(PyExc_ValueError, - "%.200s(): bad %s (%s)", fname, kind, err); + assert(runres->result == NULL); + if (runres->excinfo == NULL) { + assert(PyErr_Occurred()); return NULL; } - - return code; -} - -static int -_interp_exec(PyObject *self, PyInterpreterState *interp, - PyObject *code_arg, PyObject *shared_arg, PyObject **p_excinfo) -{ - if (shared_arg != NULL && !PyDict_CheckExact(shared_arg)) { - PyErr_SetString(PyExc_TypeError, "expected 'shared' to be a dict"); - return -1; - } - - // Extract code. - Py_ssize_t codestrlen = -1; - PyObject *bytes_obj = NULL; - int flags = 0; - const char *codestr = get_code_str(code_arg, - &codestrlen, &bytes_obj, &flags); - if (codestr == NULL) { - return -1; - } - - // Run the code in the interpreter. - int res = _run_in_interpreter(interp, codestr, codestrlen, - shared_arg, flags, p_excinfo); - Py_XDECREF(bytes_obj); - if (res < 0) { - return -1; - } - - return 0; + assert(!PyErr_Occurred()); + return runres->excinfo; } static PyObject * interp_exec(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".exec" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "code", "shared", "restrict", NULL}; PyObject *id, *code; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" MODULE_NAME_STR ".exec", kwlist, - &id, &code, &shared, &restricted)) + "OO|O!$p:" FUNCNAME, kwlist, + &id, &code, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1075,27 +1133,24 @@ interp_exec(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - const char *expected = "a string, a function, or a code object"; - if (PyUnicode_Check(code)) { - code = (PyObject *)convert_script_arg(code, MODULE_NAME_STR ".exec", - "argument 2", expected); - } - else { - code = (PyObject *)convert_code_arg(code, MODULE_NAME_STR ".exec", - "argument 2", expected); - } - if (code == NULL) { + // We don't need the script to be "pure", which means it can use + // global variables. They will be resolved against __main__. + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { + unwrap_not_shareable(tstate, NULL); return NULL; } - PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, code, shared, &excinfo); - Py_DECREF(code); + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); + _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(exec_doc, @@ -1118,13 +1173,16 @@ is ignored, including its __globals__ dict."); static PyObject * interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".run_string" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "script", "shared", "restrict", NULL}; PyObject *id, *script; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OU|O$p:" MODULE_NAME_STR ".run_string", - kwlist, &id, &script, &shared, &restricted)) + "OU|O!$p:" FUNCNAME, kwlist, + &id, &script, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1136,20 +1194,27 @@ interp_run_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - script = (PyObject *)convert_script_arg(script, MODULE_NAME_STR ".run_string", - "argument 2", "a string"); - if (script == NULL) { + if (PyFunction_Check(script) || PyCode_Check(script)) { + _PyArg_BadArgument(FUNCNAME, "argument 2", "a string", script); + return NULL; + } + + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, script, &xidata) < 0) { + unwrap_not_shareable(tstate, NULL); return NULL; } - PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, script, shared, &excinfo); - Py_DECREF(script); + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); + _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(run_string_doc, @@ -1162,13 +1227,16 @@ Execute the provided string in the identified interpreter.\n\ static PyObject * interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".run_func" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "func", "shared", "restrict", NULL}; PyObject *id, *func; PyObject *shared = NULL; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|O$p:" MODULE_NAME_STR ".run_func", - kwlist, &id, &func, &shared, &restricted)) + "OO|O!$p:" FUNCNAME, kwlist, + &id, &func, &PyDict_Type, &shared, + &restricted)) { return NULL; } @@ -1180,21 +1248,36 @@ interp_run_func(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyCodeObject *code = convert_code_arg(func, MODULE_NAME_STR ".exec", - "argument 2", - "a function or a code object"); - if (code == NULL) { + // We don't worry about checking globals. They will be resolved + // against __main__. + PyObject *code; + if (PyFunction_Check(func)) { + code = PyFunction_GET_CODE(func); + } + else if (PyCode_Check(func)) { + code = func; + } + else { + _PyArg_BadArgument(FUNCNAME, "argument 2", "a function", func); return NULL; } - PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, (PyObject *)code, shared, &excinfo); - Py_DECREF(code); + _PyXIData_t xidata = {0}; + if (_PyCode_GetScriptXIData(tstate, code, &xidata) < 0) { + unwrap_not_shareable(tstate, NULL); + return NULL; + } + + struct run_result runres = {0}; + int res = _run_in_interpreter( + tstate, interp, &xidata, NULL, shared, &runres); + _PyXIData_Release(&xidata); if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; + return _handle_script_error(&runres); } + assert(runres.result == NULL); Py_RETURN_NONE; +#undef FUNCNAME } PyDoc_STRVAR(run_func_doc, @@ -1209,16 +1292,21 @@ are not supported. Methods and other callables are not supported either.\n\ static PyObject * interp_call(PyObject *self, PyObject *args, PyObject *kwds) { +#define FUNCNAME MODULE_NAME_STR ".call" + PyThreadState *tstate = _PyThreadState_GET(); static char *kwlist[] = {"id", "callable", "args", "kwargs", - "restrict", NULL}; + "preserve_exc", "restrict", NULL}; PyObject *id, *callable; PyObject *args_obj = NULL; PyObject *kwargs_obj = NULL; + int preserve_exc = 0; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, - "OO|OO$p:" MODULE_NAME_STR ".call", kwlist, - &id, &callable, &args_obj, &kwargs_obj, - &restricted)) + "OO|O!O!$pp:" FUNCNAME, kwlist, + &id, &callable, + &PyTuple_Type, &args_obj, + &PyDict_Type, &kwargs_obj, + &preserve_exc, &restricted)) { return NULL; } @@ -1230,42 +1318,37 @@ interp_call(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - if (args_obj != NULL) { - PyErr_SetString(PyExc_ValueError, "got unexpected args"); - return NULL; - } - if (kwargs_obj != NULL) { - PyErr_SetString(PyExc_ValueError, "got unexpected kwargs"); + struct interp_call call = {0}; + if (_interp_call_pack(tstate, &call, callable, args_obj, kwargs_obj) < 0) { return NULL; } - PyObject *code = (PyObject *)convert_code_arg(callable, MODULE_NAME_STR ".call", - "argument 2", "a function"); - if (code == NULL) { - return NULL; + PyObject *res_and_exc = NULL; + struct run_result runres = {0}; + if (_run_in_interpreter(tstate, interp, NULL, &call, NULL, &runres) < 0) { + if (runres.excinfo == NULL) { + assert(_PyErr_Occurred(tstate)); + goto finally; + } + assert(!_PyErr_Occurred(tstate)); } + assert(runres.result == NULL || runres.excinfo == NULL); + res_and_exc = Py_BuildValue("OO", + (runres.result ? runres.result : Py_None), + (runres.excinfo ? runres.excinfo : Py_None)); - PyObject *excinfo = NULL; - int res = _interp_exec(self, interp, code, NULL, &excinfo); - Py_DECREF(code); - if (res < 0) { - assert((excinfo == NULL) != (PyErr_Occurred() == NULL)); - return excinfo; - } - Py_RETURN_NONE; +finally: + _interp_call_clear(&call); + _run_result_clear(&runres); + return res_and_exc; +#undef FUNCNAME } PyDoc_STRVAR(call_doc, "call(id, callable, args=None, kwargs=None, *, restrict=False)\n\ \n\ Call the provided object in the identified interpreter.\n\ -Pass the given args and kwargs, if possible.\n\ -\n\ -\"callable\" may be a plain function with no free vars that takes\n\ -no arguments.\n\ -\n\ -The function's code object is used and all its state\n\ -is ignored, including its __globals__ dict."); +Pass the given args and kwargs, if possible."); static PyObject * @@ -1472,16 +1555,16 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) } PyObject *captured = NULL; - _PyXI_excinfo info = {0}; - if (_PyXI_InitExcInfo(&info, exc) < 0) { + _PyXI_excinfo *info = _PyXI_NewExcInfo(exc); + if (info == NULL) { goto finally; } - captured = _PyXI_ExcInfoAsObject(&info); + captured = _PyXI_ExcInfoAsObject(info); if (captured == NULL) { goto finally; } - PyObject *formatted = _PyXI_FormatExcInfo(&info); + PyObject *formatted = _PyXI_FormatExcInfo(info); if (formatted == NULL) { Py_CLEAR(captured); goto finally; @@ -1494,7 +1577,7 @@ capture_exception(PyObject *self, PyObject *args, PyObject *kwds) } finally: - _PyXI_ClearExcInfo(&info); + _PyXI_FreeExcInfo(info); if (exc != exc_arg) { if (PyErr_Occurred()) { PyErr_SetRaisedException(exc); @@ -1623,8 +1706,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1632,8 +1714,7 @@ module_clear(PyObject *mod) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c index 4724e97982f349..25c8bf8b3d508b 100644 --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -13,6 +13,7 @@ #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _Py_FatalErrorFormat() #include "pycore_pylifecycle.h" // _Py_IsInterpreterFinalizing() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "_iomodule.h" @@ -421,8 +422,7 @@ buffered_dealloc(PyObject *op) return; _PyObject_GC_UNTRACK(self); self->ok = 0; - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); if (self->buffer) { PyMem_Free(self->buffer); self->buffer = NULL; @@ -2312,8 +2312,7 @@ bufferedrwpair_dealloc(PyObject *op) rwpair *self = rwpair_CAST(op); PyTypeObject *tp = Py_TYPE(self); _PyObject_GC_UNTRACK(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); (void)bufferedrwpair_clear(op); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c index e45a2d1a16dcba..400edbce55c4c8 100644 --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -1,6 +1,7 @@ #include "Python.h" #include "pycore_object.h" #include "pycore_sysmodule.h" // _PySys_GetSizeOf() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include // offsetof() #include "_iomodule.h" @@ -583,9 +584,9 @@ _io_BytesIO_readinto_impl(bytesio *self, Py_buffer *buffer) len = 0; } - memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len); assert(self->pos + len < PY_SSIZE_T_MAX); assert(len >= 0); + memcpy(buffer->buf, PyBytes_AS_STRING(self->buf) + self->pos, len); self->pos += len; return PyLong_FromSsize_t(len); @@ -903,8 +904,7 @@ bytesio_dealloc(PyObject *op) } Py_CLEAR(self->buf); Py_CLEAR(self->dict); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); tp->tp_free(self); Py_DECREF(tp); } diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index 8fcb27049d6c7c..26537fc6395e9f 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -4,6 +4,7 @@ #include "pycore_fileutils.h" // _Py_BEGIN_SUPPRESS_IPH #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include // bool #ifdef HAVE_UNISTD_H @@ -570,9 +571,7 @@ fileio_dealloc(PyObject *op) PyMem_Free(self->stat_atopen); self->stat_atopen = NULL; } - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); (void)fileio_clear(op); PyTypeObject *tp = Py_TYPE(op); diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c index cd4c7e7cead277..044f6b7803c571 100644 --- a/Modules/_io/iobase.c +++ b/Modules/_io/iobase.c @@ -14,6 +14,7 @@ #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_object.h" // _PyType_HasFeature() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include // offsetof() #include "_iomodule.h" @@ -383,8 +384,7 @@ iobase_dealloc(PyObject *op) } PyTypeObject *tp = Py_TYPE(self); _PyObject_GC_UNTRACK(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); Py_CLEAR(self->dict); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_io/stringio.c b/Modules/_io/stringio.c index 9d1bfa3ea05cea..b073200bf21204 100644 --- a/Modules/_io/stringio.c +++ b/Modules/_io/stringio.c @@ -1,6 +1,7 @@ #include "Python.h" #include // offsetof() #include "pycore_object.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "_iomodule.h" /* Implementation note: the buffer is always at least one character longer @@ -628,9 +629,7 @@ stringio_dealloc(PyObject *op) } PyUnicodeWriter_Discard(self->writer); (void)stringio_clear(op); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); tp->tp_free(self); Py_DECREF(tp); } diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 86328e46a7b131..5354cf63442599 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -16,6 +16,7 @@ #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "_iomodule.h" @@ -1469,8 +1470,7 @@ textiowrapper_dealloc(PyObject *op) return; self->ok = 0; _PyObject_GC_UNTRACK(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); (void)textiowrapper_clear(op); tp->tp_free(self); Py_DECREF(tp); @@ -1578,6 +1578,8 @@ _io_TextIOWrapper_detach_impl(textio *self) static int _textiowrapper_writeflush(textio *self) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + if (self->pending_bytes == NULL) return 0; @@ -3173,8 +3175,9 @@ _io_TextIOWrapper_close_impl(textio *self) } static PyObject * -textiowrapper_iternext(PyObject *op) +textiowrapper_iternext_lock_held(PyObject *op) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); PyObject *line; textio *self = textio_CAST(op); @@ -3210,6 +3213,16 @@ textiowrapper_iternext(PyObject *op) return line; } +static PyObject * +textiowrapper_iternext(PyObject *op) +{ + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(op); + result = textiowrapper_iternext_lock_held(op); + Py_END_CRITICAL_SECTION(); + return result; +} + /*[clinic input] @critical_section @getter diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c index 3e783b9da45652..950b7fe241ccde 100644 --- a/Modules/_io/winconsoleio.c +++ b/Modules/_io/winconsoleio.c @@ -10,6 +10,7 @@ #include "pycore_fileutils.h" // _Py_BEGIN_SUPPRESS_IPH #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #ifdef HAVE_WINDOWS_CONSOLE_IO @@ -518,8 +519,7 @@ winconsoleio_dealloc(PyObject *op) if (_PyIOBase_finalize(op) < 0) return; _PyObject_GC_UNTRACK(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, self->weakreflist); Py_CLEAR(self->dict); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_json.c b/Modules/_json.c index 89b0a41dd10acb..6c381fab449db2 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -1476,13 +1476,13 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return PyUnicodeWriter_WriteUTF8(writer, "null", 4); + return PyUnicodeWriter_WriteASCII(writer, "null", 4); } else if (obj == Py_True) { - return PyUnicodeWriter_WriteUTF8(writer, "true", 4); + return PyUnicodeWriter_WriteASCII(writer, "true", 4); } else if (obj == Py_False) { - return PyUnicodeWriter_WriteUTF8(writer, "false", 5); + return PyUnicodeWriter_WriteASCII(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1609,6 +1609,12 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs if (*first) { *first = false; + if (s->indent != Py_None) { + if (write_newline_indent(writer, indent_level, indent_cache) < 0) { + Py_DECREF(keystr); + return -1; + } + } } else { if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { @@ -1649,7 +1655,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (PyDict_GET_SIZE(dct) == 0) { /* Fast path */ - return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + return PyUnicodeWriter_WriteASCII(writer, "{}", 2); } if (s->markers != Py_None) { @@ -1676,11 +1682,8 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, if (s->indent != Py_None) { indent_level++; separator = get_item_separator(s, indent_level, indent_cache); - if (separator == NULL || - write_newline_indent(writer, indent_level, indent_cache) < 0) - { + if (separator == NULL) goto bail; - } } if (s->sort_keys || !PyDict_CheckExact(dct)) { @@ -1720,7 +1723,7 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, goto bail; Py_CLEAR(ident); } - if (s->indent != Py_None) { + if (s->indent != Py_None && !first) { indent_level--; if (write_newline_indent(writer, indent_level, indent_cache) < 0) { goto bail; @@ -1753,7 +1756,7 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); + return PyUnicodeWriter_WriteASCII(writer, "[]", 2); } if (s->markers != Py_None) { diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index ad618398d5b824..41e6d48b1dbd9b 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -692,7 +692,17 @@ _locale_nl_langinfo_impl(PyObject *module, int item) result = result != NULL ? result : ""; char *oldloc = NULL; if (langinfo_constants[i].category != LC_CTYPE - && !is_all_ascii(result) + && *result && ( +#ifdef __GLIBC__ + // gh-133740: Always change the locale for ALT_DIGITS and ERA +# ifdef ALT_DIGITS + item == ALT_DIGITS || +# endif +# ifdef ERA + item == ERA || +# endif +#endif + !is_all_ascii(result)) && change_locale(langinfo_constants[i].category, &oldloc) < 0) { return NULL; diff --git a/Modules/_lsprof.c b/Modules/_lsprof.c index 626c176715bdac..f82b446aff2e87 100644 --- a/Modules/_lsprof.c +++ b/Modules/_lsprof.c @@ -631,6 +631,27 @@ _lsprof_Profiler__pystart_callback_impl(ProfilerObject *self, PyObject *code, Py_RETURN_NONE; } +/*[clinic input] +_lsprof.Profiler._pythrow_callback + + code: object + instruction_offset: object + exception: object + / + +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler__pythrow_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *exception) +/*[clinic end generated code: output=0a32988919dfb94c input=fd728fc2c074f5e6]*/ +{ + ptrace_enter_call((PyObject*)self, (void *)code, code); + + Py_RETURN_NONE; +} + /*[clinic input] _lsprof.Profiler._pyreturn_callback @@ -747,7 +768,7 @@ static const struct { } callback_table[] = { {PY_MONITORING_EVENT_PY_START, "_pystart_callback"}, {PY_MONITORING_EVENT_PY_RESUME, "_pystart_callback"}, - {PY_MONITORING_EVENT_PY_THROW, "_pystart_callback"}, + {PY_MONITORING_EVENT_PY_THROW, "_pythrow_callback"}, {PY_MONITORING_EVENT_PY_RETURN, "_pyreturn_callback"}, {PY_MONITORING_EVENT_PY_YIELD, "_pyreturn_callback"}, {PY_MONITORING_EVENT_PY_UNWIND, "_pyreturn_callback"}, @@ -1002,6 +1023,7 @@ static PyMethodDef profiler_methods[] = { _LSPROF_PROFILER_DISABLE_METHODDEF _LSPROF_PROFILER_CLEAR_METHODDEF _LSPROF_PROFILER__PYSTART_CALLBACK_METHODDEF + _LSPROF_PROFILER__PYTHROW_CALLBACK_METHODDEF _LSPROF_PROFILER__PYRETURN_CALLBACK_METHODDEF _LSPROF_PROFILER__CCALL_CALLBACK_METHODDEF _LSPROF_PROFILER__CRETURN_CALLBACK_METHODDEF diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c index f9b4c2a170e9c5..462c2181fa6036 100644 --- a/Modules/_lzmamodule.c +++ b/Modules/_lzmamodule.c @@ -17,6 +17,7 @@ #include +#include "pycore_long.h" // _PyLong_UInt32_Converter() // Blocks output buffer wrappers #include "pycore_blocks_output_buffer.h" @@ -223,8 +224,6 @@ FUNCNAME(PyObject *obj, void *ptr) \ return 1; \ } -INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter) -INT_TYPE_CONVERTER_FUNC(uint64_t, uint64_converter) INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter) INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter) INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter) @@ -254,7 +253,7 @@ parse_filter_spec_lzma(_lzma_state *state, PyObject *spec) return NULL; } if (preset_obj != NULL) { - int ok = uint32_converter(preset_obj, &preset); + int ok = _PyLong_UInt32_Converter(preset_obj, &preset); Py_DECREF(preset_obj); if (!ok) { return NULL; @@ -275,14 +274,14 @@ parse_filter_spec_lzma(_lzma_state *state, PyObject *spec) if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OOO&O&O&O&O&O&O&O&", optnames, &id, &preset_obj, - uint32_converter, &options->dict_size, - uint32_converter, &options->lc, - uint32_converter, &options->lp, - uint32_converter, &options->pb, + _PyLong_UInt32_Converter, &options->dict_size, + _PyLong_UInt32_Converter, &options->lc, + _PyLong_UInt32_Converter, &options->lp, + _PyLong_UInt32_Converter, &options->pb, lzma_mode_converter, &options->mode, - uint32_converter, &options->nice_len, + _PyLong_UInt32_Converter, &options->nice_len, lzma_mf_converter, &options->mf, - uint32_converter, &options->depth)) { + _PyLong_UInt32_Converter, &options->depth)) { PyErr_SetString(PyExc_ValueError, "Invalid filter specifier for LZMA filter"); PyMem_Free(options); @@ -301,7 +300,7 @@ parse_filter_spec_delta(_lzma_state *state, PyObject *spec) lzma_options_delta *options; if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames, - &id, uint32_converter, &dist)) { + &id, _PyLong_UInt32_Converter, &dist)) { PyErr_SetString(PyExc_ValueError, "Invalid filter specifier for delta filter"); return NULL; @@ -325,7 +324,7 @@ parse_filter_spec_bcj(_lzma_state *state, PyObject *spec) lzma_options_bcj *options; if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames, - &id, uint32_converter, &start_offset)) { + &id, _PyLong_UInt32_Converter, &start_offset)) { PyErr_SetString(PyExc_ValueError, "Invalid filter specifier for BCJ filter"); return NULL; @@ -806,7 +805,7 @@ Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) return NULL; } - if (preset_obj != Py_None && !uint32_converter(preset_obj, &preset)) { + if (preset_obj != Py_None && !_PyLong_UInt32_Converter(preset_obj, &preset)) { return NULL; } @@ -1226,7 +1225,7 @@ _lzma_LZMADecompressor_impl(PyTypeObject *type, int format, "Cannot specify memory limit with FORMAT_RAW"); return NULL; } - if (!uint64_converter(memlimit, &memlimit_)) { + if (!_PyLong_UInt64_Converter(memlimit, &memlimit_)) { return NULL; } } diff --git a/Modules/_opcode.c b/Modules/_opcode.c index c295f7b3152577..ef271b6ef566b9 100644 --- a/Modules/_opcode.c +++ b/Modules/_opcode.c @@ -5,7 +5,7 @@ #include "Python.h" #include "compile.h" #include "opcode.h" -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL_MAX #include "pycore_code.h" #include "pycore_compile.h" #include "pycore_intrinsics.h" diff --git a/Modules/_pickle.c b/Modules/_pickle.c index d260f1a68f8c70..2e74d5688629ff 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -5539,17 +5539,16 @@ static int load_counted_binstring(PickleState *st, UnpicklerObject *self, int nbytes) { PyObject *obj; - Py_ssize_t size; + long size; char *s; if (_Unpickler_Read(self, st, &s, nbytes) < 0) return -1; - size = calc_binsize(s, nbytes); + size = calc_binint(s, nbytes); if (size < 0) { - PyErr_Format(st->UnpicklingError, - "BINSTRING exceeds system's maximum size of %zd bytes", - PY_SSIZE_T_MAX); + PyErr_SetString(st->UnpicklingError, + "BINSTRING pickle has negative byte count"); return -1; } diff --git a/Modules/_queuemodule.c b/Modules/_queuemodule.c index 3ee14b61b821d6..01235c77bd7db8 100644 --- a/Modules/_queuemodule.c +++ b/Modules/_queuemodule.c @@ -7,6 +7,7 @@ #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_parking_lot.h" #include "pycore_time.h" // _PyTime_FromSecondsObject() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include #include // offsetof() @@ -221,9 +222,7 @@ simplequeue_dealloc(PyObject *op) PyObject_GC_UnTrack(self); (void)simplequeue_clear(op); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); tp->tp_free(self); Py_DECREF(tp); } diff --git a/Modules/_randommodule.c b/Modules/_randommodule.c index d5bac2f5b78120..2f4f388ce1161a 100644 --- a/Modules/_randommodule.c +++ b/Modules/_randommodule.c @@ -497,34 +497,32 @@ _random_Random_setstate_impl(RandomObject *self, PyObject *state) _random.Random.getrandbits self: self(type="RandomObject *") - k: int + k: uint64 / getrandbits(k) -> x. Generates an int with k random bits. [clinic start generated code]*/ static PyObject * -_random_Random_getrandbits_impl(RandomObject *self, int k) -/*[clinic end generated code: output=b402f82a2158887f input=87603cd60f79f730]*/ +_random_Random_getrandbits_impl(RandomObject *self, uint64_t k) +/*[clinic end generated code: output=c30ef8435f3433cf input=64226ac13bb4d2a3]*/ { - int i, words; + Py_ssize_t i, words; uint32_t r; uint32_t *wordarray; PyObject *result; - if (k < 0) { - PyErr_SetString(PyExc_ValueError, - "number of bits must be non-negative"); - return NULL; - } - if (k == 0) return PyLong_FromLong(0); if (k <= 32) /* Fast path */ return PyLong_FromUnsignedLong(genrand_uint32(self) >> (32 - k)); - words = (k - 1) / 32 + 1; + if ((k - 1u) / 32u + 1u > PY_SSIZE_T_MAX / 4u) { + PyErr_NoMemory(); + return NULL; + } + words = (k - 1u) / 32u + 1u; wordarray = (uint32_t *)PyMem_Malloc(words * 4); if (wordarray == NULL) { PyErr_NoMemory(); diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 9314ddd9bed5d7..0dc797a9da82a0 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -1,5 +1,16 @@ + /****************************************************************************** + * Python Remote Debugging Module + * + * This module provides functionality to debug Python processes remotely by + * reading their memory and reconstructing stack traces and asyncio task states. + ******************************************************************************/ + #define _GNU_SOURCE +/* ============================================================================ + * HEADERS AND INCLUDES + * ============================================================================ */ + #include #include #include @@ -23,6 +34,51 @@ # define HAVE_PROCESS_VM_READV 0 #endif +/* ============================================================================ + * TYPE DEFINITIONS AND STRUCTURES + * ============================================================================ */ + +#define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset))) +#define CLEAR_PTR_TAG(ptr) (((uintptr_t)(ptr) & ~Py_TAG_BITS)) +#define GET_MEMBER_NO_TAG(type, obj, offset) (type)(CLEAR_PTR_TAG(*(type*)((char*)(obj) + (offset)))) + +/* Size macros for opaque buffers */ +#define SIZEOF_BYTES_OBJ sizeof(PyBytesObject) +#define SIZEOF_CODE_OBJ sizeof(PyCodeObject) +#define SIZEOF_GEN_OBJ sizeof(PyGenObject) +#define SIZEOF_INTERP_FRAME sizeof(_PyInterpreterFrame) +#define SIZEOF_LLIST_NODE sizeof(struct llist_node) +#define SIZEOF_PAGE_CACHE_ENTRY sizeof(page_cache_entry_t) +#define SIZEOF_PYOBJECT sizeof(PyObject) +#define SIZEOF_SET_OBJ sizeof(PySetObject) +#define SIZEOF_TASK_OBJ 4096 +#define SIZEOF_THREAD_STATE sizeof(PyThreadState) +#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject) +#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject) +#define SIZEOF_LONG_OBJ sizeof(PyLongObject) + +// Calculate the minimum buffer size needed to read interpreter state fields +// We need to read code_object_generation and potentially tlbc_generation +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#ifdef Py_GIL_DISABLED +#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ + offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)) +#else +#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ + offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)) +#endif +#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256) + + + +// Copied from Modules/_asynciomodule.c because it's not exported + struct _Py_AsyncioModuleDebugOffsets { struct _asyncio_task_object { uint64_t size; @@ -45,147 +101,539 @@ struct _Py_AsyncioModuleDebugOffsets { } asyncio_thread_state; }; -// Helper to chain exceptions and avoid repetitions +/* ============================================================================ + * STRUCTSEQ TYPE DEFINITIONS + * ============================================================================ */ + +// TaskInfo structseq type - replaces 4-tuple (task_id, task_name, coroutine_stack, awaited_by) +static PyStructSequence_Field TaskInfo_fields[] = { + {"task_id", "Task ID (memory address)"}, + {"task_name", "Task name"}, + {"coroutine_stack", "Coroutine call stack"}, + {"awaited_by", "Tasks awaiting this task"}, + {NULL} +}; + +static PyStructSequence_Desc TaskInfo_desc = { + "_remote_debugging.TaskInfo", + "Information about an asyncio task", + TaskInfo_fields, + 4 +}; + +// FrameInfo structseq type - replaces 3-tuple (filename, lineno, funcname) +static PyStructSequence_Field FrameInfo_fields[] = { + {"filename", "Source code filename"}, + {"lineno", "Line number"}, + {"funcname", "Function name"}, + {NULL} +}; + +static PyStructSequence_Desc FrameInfo_desc = { + "_remote_debugging.FrameInfo", + "Information about a frame", + FrameInfo_fields, + 3 +}; + +// CoroInfo structseq type - replaces 2-tuple (call_stack, task_name) +static PyStructSequence_Field CoroInfo_fields[] = { + {"call_stack", "Coroutine call stack"}, + {"task_name", "Task name"}, + {NULL} +}; + +static PyStructSequence_Desc CoroInfo_desc = { + "_remote_debugging.CoroInfo", + "Information about a coroutine", + CoroInfo_fields, + 2 +}; + +// ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info) +static PyStructSequence_Field ThreadInfo_fields[] = { + {"thread_id", "Thread ID"}, + {"frame_info", "Frame information"}, + {NULL} +}; + +static PyStructSequence_Desc ThreadInfo_desc = { + "_remote_debugging.ThreadInfo", + "Information about a thread", + ThreadInfo_fields, + 2 +}; + +// AwaitedInfo structseq type - replaces 2-tuple (tid, awaited_by_list) +static PyStructSequence_Field AwaitedInfo_fields[] = { + {"thread_id", "Thread ID"}, + {"awaited_by", "List of tasks awaited by this thread"}, + {NULL} +}; + +static PyStructSequence_Desc AwaitedInfo_desc = { + "_remote_debugging.AwaitedInfo", + "Information about what a thread is awaiting", + AwaitedInfo_fields, + 2 +}; + +typedef struct { + PyObject *func_name; + PyObject *file_name; + int first_lineno; + PyObject *linetable; // bytes + uintptr_t addr_code_adaptive; +} CachedCodeMetadata; + +typedef struct { + /* Types */ + PyTypeObject *RemoteDebugging_Type; + PyTypeObject *TaskInfo_Type; + PyTypeObject *FrameInfo_Type; + PyTypeObject *CoroInfo_Type; + PyTypeObject *ThreadInfo_Type; + PyTypeObject *AwaitedInfo_Type; +} RemoteDebuggingState; + +typedef struct { + PyObject_HEAD + proc_handle_t handle; + uintptr_t runtime_start_address; + struct _Py_DebugOffsets debug_offsets; + int async_debug_offsets_available; + struct _Py_AsyncioModuleDebugOffsets async_debug_offsets; + uintptr_t interpreter_addr; + uintptr_t tstate_addr; + uint64_t code_object_generation; + _Py_hashtable_t *code_object_cache; + int debug; + int only_active_thread; + RemoteDebuggingState *cached_state; // Cached module state +#ifdef Py_GIL_DISABLED + // TLBC cache invalidation tracking + uint32_t tlbc_generation; // Track TLBC index pool changes + _Py_hashtable_t *tlbc_cache; // Cache of TLBC arrays by code object address +#endif +} RemoteUnwinderObject; + +#define RemoteUnwinder_CAST(op) ((RemoteUnwinderObject *)(op)) + +typedef struct +{ + int lineno; + int end_lineno; + int column; + int end_column; +} LocationInfo; + +typedef struct { + uintptr_t remote_addr; + size_t size; + void *local_copy; +} StackChunkInfo; + +typedef struct { + StackChunkInfo *chunks; + size_t count; +} StackChunkList; + +#include "clinic/_remote_debugging_module.c.h" + +/*[clinic input] +module _remote_debugging +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=5f507d5b2e76a7f7]*/ + + +/* ============================================================================ + * FORWARD DECLARATIONS + * ============================================================================ */ + +static inline int +is_frame_valid( + RemoteUnwinderObject *unwinder, + uintptr_t frame_addr, + uintptr_t code_object_addr +); + +typedef int (*thread_processor_func)( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + unsigned long tid, + void *context +); + +typedef int (*set_entry_processor_func)( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context +); + + +static int +parse_task( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *render_to +); + +static int +parse_coro_chain( + RemoteUnwinderObject *unwinder, + uintptr_t coro_address, + PyObject *render_to +); + +/* Forward declarations for task parsing functions */ +static int parse_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* address_of_code_object, + uintptr_t* previous_frame +); + +static int +parse_async_frame_chain( + RemoteUnwinderObject *unwinder, + PyObject *calls, + uintptr_t address_of_thread, + uintptr_t running_task_code_obj +); + +static int read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr); +static int read_Py_ssize_t(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t *size); + +static int process_task_and_waiters(RemoteUnwinderObject *unwinder, uintptr_t task_addr, PyObject *result); +static int process_task_awaited_by(RemoteUnwinderObject *unwinder, uintptr_t task_address, set_entry_processor_func processor, void *context); +static int find_running_task_in_thread(RemoteUnwinderObject *unwinder, uintptr_t thread_state_addr, uintptr_t *running_task_addr); +static int get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr); +static int append_awaited_by(RemoteUnwinderObject *unwinder, unsigned long tid, uintptr_t head_addr, PyObject *result); + +/* ============================================================================ + * UTILITY FUNCTIONS AND HELPERS + * ============================================================================ */ + +#define set_exception_cause(unwinder, exc_type, message) \ + if (unwinder->debug) { \ + _set_debug_exception_cause(exc_type, message); \ + } + static void -chain_exceptions(PyObject *exception, const char *string) +cached_code_metadata_destroy(void *ptr) { - PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetString(exception, string); - _PyErr_ChainExceptions1(exc); + CachedCodeMetadata *meta = (CachedCodeMetadata *)ptr; + Py_DECREF(meta->func_name); + Py_DECREF(meta->file_name); + Py_DECREF(meta->linetable); + PyMem_RawFree(meta); } -// Get the PyAsyncioDebug section address for any platform -static uintptr_t -_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) +static inline RemoteDebuggingState * +RemoteDebugging_GetState(PyObject *module) { - uintptr_t address; - -#ifdef MS_WINDOWS - // On Windows, search for asyncio debug in executable or DLL - address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); -#elif defined(__linux__) - // On Linux, search for asyncio debug in executable or DLL - address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); -#elif defined(__APPLE__) && TARGET_OS_OSX - // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); - if (address == 0) { - PyErr_Clear(); - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); - } -#else - Py_UNREACHABLE(); -#endif + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (RemoteDebuggingState *)state; +} - return address; +static inline RemoteDebuggingState * +RemoteDebugging_GetStateFromType(PyTypeObject *type) +{ + PyObject *module = PyType_GetModule(type); + assert(module != NULL); + return RemoteDebugging_GetState(module); } -static inline int -read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) +static inline RemoteDebuggingState * +RemoteDebugging_GetStateFromObject(PyObject *obj) { - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void*), ptr_addr); - if (result < 0) { - return -1; + RemoteUnwinderObject *unwinder = (RemoteUnwinderObject *)obj; + if (unwinder->cached_state == NULL) { + unwinder->cached_state = RemoteDebugging_GetStateFromType(Py_TYPE(obj)); } - return 0; + return unwinder->cached_state; } static inline int -read_Py_ssize_t(proc_handle_t *handle, uintptr_t address, Py_ssize_t *size) +RemoteDebugging_InitState(RemoteDebuggingState *st) { - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size); - if (result < 0) { - return -1; - } return 0; } static int -read_py_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) +is_prerelease_version(uint64_t version) { - if (read_ptr(handle, address, ptr_addr)) { - return -1; - } - *ptr_addr &= ~Py_TAG_BITS; - return 0; + return (version & 0xF0) != 0xF0; } -static int -read_char(proc_handle_t *handle, uintptr_t address, char *result) +static inline int +validate_debug_offsets(struct _Py_DebugOffsets *debug_offsets) { - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(char), result); - if (res < 0) { + if (memcmp(debug_offsets->cookie, _Py_Debug_Cookie, sizeof(debug_offsets->cookie)) != 0) { + // The remote is probably running a Python version predating debug offsets. + PyErr_SetString( + PyExc_RuntimeError, + "Can't determine the Python version of the remote process"); + return -1; + } + + // Assume debug offsets could change from one pre-release version to another, + // or one minor version to another, but are stable across patch versions. + if (is_prerelease_version(Py_Version) && Py_Version != debug_offsets->version) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't attach from a pre-release Python interpreter" + " to a process running a different Python version"); + return -1; + } + + if (is_prerelease_version(debug_offsets->version) && Py_Version != debug_offsets->version) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't attach to a pre-release Python interpreter" + " from a process running a different Python version"); + return -1; + } + + unsigned int remote_major = (debug_offsets->version >> 24) & 0xFF; + unsigned int remote_minor = (debug_offsets->version >> 16) & 0xFF; + + if (PY_MAJOR_VERSION != remote_major || PY_MINOR_VERSION != remote_minor) { + PyErr_Format( + PyExc_RuntimeError, + "Can't attach from a Python %d.%d process to a Python %d.%d process", + PY_MAJOR_VERSION, PY_MINOR_VERSION, remote_major, remote_minor); + return -1; + } + + // The debug offsets differ between free threaded and non-free threaded builds. + if (_Py_Debug_Free_Threaded && !debug_offsets->free_threaded) { + PyErr_SetString( + PyExc_RuntimeError, + "Cannot attach from a free-threaded Python process" + " to a process running a non-free-threaded version"); + return -1; + } + + if (!_Py_Debug_Free_Threaded && debug_offsets->free_threaded) { + PyErr_SetString( + PyExc_RuntimeError, + "Cannot attach to a free-threaded Python process" + " from a process running a non-free-threaded version"); return -1; } + return 0; } +// Generic function to iterate through all threads static int -read_sized_int(proc_handle_t *handle, uintptr_t address, void *result, size_t size) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, size, result); - if (res < 0) { +iterate_threads( + RemoteUnwinderObject *unwinder, + thread_processor_func processor, + void *context +) { + uintptr_t thread_state_addr; + unsigned long tid = 0; + + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + unwinder->interpreter_addr + unwinder->debug_offsets.interpreter_state.threads_main, + sizeof(void*), + &thread_state_addr)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state"); return -1; } + + while (thread_state_addr != 0) { + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + thread_state_addr + unwinder->debug_offsets.thread_state.native_thread_id, + sizeof(tid), + &tid)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread ID"); + return -1; + } + + // Call the processor function for this thread + if (processor(unwinder, thread_state_addr, tid, context) < 0) { + return -1; + } + + // Move to next thread + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + thread_state_addr + unwinder->debug_offsets.thread_state.next, + sizeof(void*), + &thread_state_addr)) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next thread state"); + return -1; + } + } + return 0; } +// Generic function to iterate through set entries static int -read_unsigned_long(proc_handle_t *handle, uintptr_t address, unsigned long *result) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(unsigned long), result); - if (res < 0) { +iterate_set_entries( + RemoteUnwinderObject *unwinder, + uintptr_t set_addr, + set_entry_processor_func processor, + void *context +) { + char set_object[SIZEOF_SET_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, set_addr, + SIZEOF_SET_OBJ, set_object) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set object"); return -1; } + + Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used); + Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; + uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table); + + Py_ssize_t i = 0; + Py_ssize_t els = 0; + while (i < set_len && els < num_els) { + uintptr_t key_addr; + if (read_py_ptr(unwinder, table_ptr, &key_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry key"); + return -1; + } + + if ((void*)key_addr != NULL) { + Py_ssize_t ref_cnt; + if (read_Py_ssize_t(unwinder, table_ptr, &ref_cnt) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry ref count"); + return -1; + } + + if (ref_cnt) { + // Process this valid set entry + if (processor(unwinder, key_addr, context) < 0) { + return -1; + } + els++; + } + } + table_ptr += sizeof(void*) * 2; + i++; + } + return 0; } +// Processor function for task waiters +static int +process_waiter_task( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context +) { + PyObject *result = (PyObject *)context; + return process_task_and_waiters(unwinder, key_addr, result); +} + +// Processor function for parsing tasks in sets +static int +process_task_parser( + RemoteUnwinderObject *unwinder, + uintptr_t key_addr, + void *context +) { + PyObject *awaited_by = (PyObject *)context; + return parse_task(unwinder, key_addr, awaited_by); +} + +/* ============================================================================ + * MEMORY READING FUNCTIONS + * ============================================================================ */ + +#define DEFINE_MEMORY_READER(type_name, c_type, error_msg) \ +static inline int \ +read_##type_name(RemoteUnwinderObject *unwinder, uintptr_t address, c_type *result) \ +{ \ + int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(c_type), result); \ + if (res < 0) { \ + set_exception_cause(unwinder, PyExc_RuntimeError, error_msg); \ + return -1; \ + } \ + return 0; \ +} + +DEFINE_MEMORY_READER(ptr, uintptr_t, "Failed to read pointer from remote memory") +DEFINE_MEMORY_READER(Py_ssize_t, Py_ssize_t, "Failed to read Py_ssize_t from remote memory") +DEFINE_MEMORY_READER(char, char, "Failed to read char from remote memory") + static int -read_pyobj(proc_handle_t *handle, uintptr_t address, PyObject *ptr_addr) +read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr) { - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(PyObject), ptr_addr); - if (res < 0) { + if (read_ptr(unwinder, address, ptr_addr)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Python pointer"); return -1; } + *ptr_addr &= ~Py_TAG_BITS; return 0; } +/* ============================================================================ + * PYTHON OBJECT READING FUNCTIONS + * ============================================================================ */ + static PyObject * read_py_str( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, + RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t max_len ) { PyObject *result = NULL; char *buf = NULL; - Py_ssize_t len; - int res = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->unicode_object.length, - sizeof(Py_ssize_t), - &len + // Read the entire PyUnicodeObject at once + char unicode_obj[SIZEOF_UNICODE_OBJ]; + int res = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_UNICODE_OBJ, + unicode_obj ); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyUnicodeObject"); goto err; } + Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length); + if (len < 0 || len > max_len) { + PyErr_Format(PyExc_RuntimeError, + "Invalid string length (%zd) at 0x%lx", len, address); + set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid string length in remote Unicode object"); + return NULL; + } + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for string reading"); return NULL; } - size_t offset = debug_offsets->unicode_object.asciiobject_size; - res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + size_t offset = unwinder->debug_offsets.unicode_object.asciiobject_size; + res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read string data from remote memory"); goto err; } buf[len] = '\0'; result = PyUnicode_FromStringAndSize(buf, len); if (result == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyUnicode from remote string data"); goto err; } @@ -202,39 +650,52 @@ read_py_str( static PyObject * read_py_bytes( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, - uintptr_t address + RemoteUnwinderObject *unwinder, + uintptr_t address, + Py_ssize_t max_len ) { PyObject *result = NULL; char *buf = NULL; - Py_ssize_t len; - int res = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->bytes_object.ob_size, - sizeof(Py_ssize_t), - &len + // Read the entire PyBytesObject at once + char bytes_obj[SIZEOF_BYTES_OBJ]; + int res = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_BYTES_OBJ, + bytes_obj ); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyBytesObject"); goto err; } + Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size); + if (len < 0 || len > max_len) { + PyErr_Format(PyExc_RuntimeError, + "Invalid bytes length (%zd) at 0x%lx", len, address); + set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid bytes length in remote bytes object"); + return NULL; + } + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for bytes reading"); return NULL; } - size_t offset = debug_offsets->bytes_object.ob_sval; - res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + size_t offset = unwinder->debug_offsets.bytes_object.ob_sval; + res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read bytes data from remote memory"); goto err; } buf[len] = '\0'; result = PyBytes_FromStringAndSize(buf, len); if (result == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyBytes from remote bytes data"); goto err; } @@ -249,45 +710,64 @@ read_py_bytes( return NULL; } - - static long -read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address) +read_py_long( + RemoteUnwinderObject *unwinder, + uintptr_t address +) { unsigned int shift = PYLONG_BITS_IN_DIGIT; - Py_ssize_t size; - uintptr_t lv_tag; - - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, address + offsets->long_object.lv_tag, - sizeof(uintptr_t), - &lv_tag); + // Read the entire PyLongObject at once + char long_obj[SIZEOF_LONG_OBJ]; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + unwinder->debug_offsets.long_object.size, + long_obj); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLongObject"); return -1; } + uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag); int negative = (lv_tag & 3) == 2; - size = lv_tag >> 3; + Py_ssize_t size = lv_tag >> 3; if (size == 0) { return 0; } - digit *digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); - if (!digits) { - PyErr_NoMemory(); - return -1; - } + // If the long object has inline digits, use them directly + digit *digits; + if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { + // For small integers, digits are inline in the long_value.ob_digit array + digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); + if (!digits) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong"); + return -1; + } + memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit)); + } else { + // For larger integers, we need to read the digits separately + digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); + if (!digits) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for large PyLong"); + return -1; + } - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->long_object.ob_digit, - sizeof(digit) * size, - digits - ); - if (bytes_read < 0) { - goto error; + bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address + unwinder->debug_offsets.long_object.ob_digit, + sizeof(digit) * size, + digits + ); + if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLong digits from remote memory"); + goto error; + } } long long value = 0; @@ -310,485 +790,840 @@ read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address return -1; } -static PyObject * -parse_task_name( - proc_handle_t *handle, - _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address -) { - uintptr_t task_name_addr; - int err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_name, - &task_name_addr); - if (err) { - return NULL; - } - - // The task name can be a long or a string so we need to check the type +/* ============================================================================ + * ASYNCIO DEBUG FUNCTIONS + * ============================================================================ */ - PyObject task_name_obj; - err = read_pyobj( - handle, - task_name_addr, - &task_name_obj); - if (err) { - return NULL; - } +// Get the PyAsyncioDebug section address for any platform +static uintptr_t +_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) +{ + uintptr_t address; - unsigned long flags; - err = read_unsigned_long( - handle, - (uintptr_t)task_name_obj.ob_type + offsets->type_object.tp_flags, - &flags); - if (err) { - return NULL; +#ifdef MS_WINDOWS + // On Windows, search for asyncio debug in executable or DLL + address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); } - - if ((flags & Py_TPFLAGS_LONG_SUBCLASS)) { - long res = read_py_long(handle, offsets, task_name_addr); +#elif defined(__linux__) + // On Linux, search for asyncio debug in executable or DLL + address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#elif defined(__APPLE__) && TARGET_OS_OSX + // On macOS, try libpython first, then fall back to python + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + if (address == 0) { + PyErr_Clear(); + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + } + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#else + Py_UNREACHABLE(); +#endif + + return address; +} + +static int +read_async_debug( + RemoteUnwinderObject *unwinder +) { + uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(&unwinder->handle); + if (!async_debug_addr) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to get AsyncioDebug address"); + return -1; + } + + size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, async_debug_addr, size, &unwinder->async_debug_offsets); + if (result < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read AsyncioDebug offsets"); + } + return result; +} + +/* ============================================================================ + * ASYNCIO TASK PARSING FUNCTIONS + * ============================================================================ */ + +static PyObject * +parse_task_name( + RemoteUnwinderObject *unwinder, + uintptr_t task_address +) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object"); + return NULL; + } + + uintptr_t task_name_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name); + + // The task name can be a long or a string so we need to check the type + char task_name_obj[SIZEOF_PYOBJECT]; + err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + task_name_addr, + SIZEOF_PYOBJECT, + task_name_obj); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name object"); + return NULL; + } + + // Now read the type object to get the flags + char type_obj[SIZEOF_TYPE_OBJ]; + err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + GET_MEMBER(uintptr_t, task_name_obj, unwinder->debug_offsets.pyobject.ob_type), + SIZEOF_TYPE_OBJ, + type_obj); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name type object"); + return NULL; + } + + if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) { + long res = read_py_long(unwinder, task_name_addr); if (res == -1) { - chain_exceptions(PyExc_RuntimeError, "Failed to get task name"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task name PyLong parsing failed"); return NULL; } return PyUnicode_FromFormat("Task-%d", res); } - if(!(flags & Py_TPFLAGS_UNICODE_SUBCLASS)) { + if(!(GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_UNICODE_SUBCLASS)) { PyErr_SetString(PyExc_RuntimeError, "Invalid task name object"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task name object is neither long nor unicode"); return NULL; } return read_py_str( - handle, - offsets, + unwinder, task_name_addr, 255 ); } -static int -parse_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame -); +static int parse_task_awaited_by( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *awaited_by +) { + return process_task_awaited_by(unwinder, task_address, process_task_parser, awaited_by); +} static int -parse_coro_chain( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t coro_address, +handle_yield_from_frame( + RemoteUnwinderObject *unwinder, + uintptr_t gi_iframe_addr, + uintptr_t gen_type_addr, PyObject *render_to ) { - assert((void*)coro_address != NULL); - - uintptr_t gen_type_addr; - int err = read_ptr( - handle, - coro_address + offsets->pyobject.ob_type, - &gen_type_addr); - if (err) { - return -1; - } - - PyObject* name = NULL; - uintptr_t prev_frame; - if (parse_frame_object( - handle, - &name, - offsets, - coro_address + offsets->gen_object.gi_iframe, - &prev_frame) - < 0) - { + // Read the entire interpreter frame at once + char iframe[SIZEOF_INTERP_FRAME]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + gi_iframe_addr, + SIZEOF_INTERP_FRAME, + iframe); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame in yield_from handler"); return -1; } - if (PyList_Append(render_to, name)) { - Py_DECREF(name); - return -1; - } - Py_DECREF(name); - - int8_t gi_frame_state; - err = read_sized_int( - handle, - coro_address + offsets->gen_object.gi_frame_state, - &gi_frame_state, - sizeof(int8_t) - ); - if (err) { + if (GET_MEMBER(char, iframe, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR) { + PyErr_SetString( + PyExc_RuntimeError, + "generator doesn't own its frame \\_o_/"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame ownership mismatch in yield_from"); return -1; } - if (gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { - char owner; - err = read_char( - handle, - coro_address + offsets->gen_object.gi_iframe + - offsets->interpreter_frame.owner, - &owner - ); - if (err) { - return -1; - } - if (owner != FRAME_OWNED_BY_GENERATOR) { - PyErr_SetString( - PyExc_RuntimeError, - "generator doesn't own its frame \\_o_/"); - return -1; - } + uintptr_t stackpointer_addr = GET_MEMBER_NO_TAG(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer); - uintptr_t stackpointer_addr; + if ((void*)stackpointer_addr != NULL) { + uintptr_t gi_await_addr; err = read_py_ptr( - handle, - coro_address + offsets->gen_object.gi_iframe + - offsets->interpreter_frame.stackpointer, - &stackpointer_addr); + unwinder, + stackpointer_addr - sizeof(void*), + &gi_await_addr); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await address"); return -1; } - if ((void*)stackpointer_addr != NULL) { - uintptr_t gi_await_addr; - err = read_py_ptr( - handle, - stackpointer_addr - sizeof(void*), - &gi_await_addr); + if ((void*)gi_await_addr != NULL) { + uintptr_t gi_await_addr_type_addr; + err = read_ptr( + unwinder, + gi_await_addr + unwinder->debug_offsets.pyobject.ob_type, + &gi_await_addr_type_addr); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await type address"); return -1; } - if ((void*)gi_await_addr != NULL) { - uintptr_t gi_await_addr_type_addr; - int err = read_ptr( - handle, - gi_await_addr + offsets->pyobject.ob_type, - &gi_await_addr_type_addr); + if (gen_type_addr == gi_await_addr_type_addr) { + /* This needs an explanation. We always start with parsing + native coroutine / generator frames. Ultimately they + are awaiting on something. That something can be + a native coroutine frame or... an iterator. + If it's the latter -- we can't continue building + our chain. So the condition to bail out of this is + to do that when the type of the current coroutine + doesn't match the type of whatever it points to + in its cr_await. + */ + err = parse_coro_chain(unwinder, gi_await_addr, render_to); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain in yield_from"); return -1; } - - if (gen_type_addr == gi_await_addr_type_addr) { - /* This needs an explanation. We always start with parsing - native coroutine / generator frames. Ultimately they - are awaiting on something. That something can be - a native coroutine frame or... an iterator. - If it's the latter -- we can't continue building - our chain. So the condition to bail out of this is - to do that when the type of the current coroutine - doesn't match the type of whatever it points to - in its cr_await. - */ - err = parse_coro_chain( - handle, - offsets, - async_offsets, - gi_await_addr, - render_to - ); - if (err) { - return -1; - } - } } } - } return 0; } - static int -parse_task_awaited_by( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address, - PyObject *awaited_by, - int recurse_task -); +parse_coro_chain( + RemoteUnwinderObject *unwinder, + uintptr_t coro_address, + PyObject *render_to +) { + assert((void*)coro_address != NULL); + // Read the entire generator object at once + char gen_object[SIZEOF_GEN_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + coro_address, + SIZEOF_GEN_OBJ, + gen_object); + if (err < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read generator object in coro chain"); + return -1; + } -static int -parse_task( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address, - PyObject *render_to, - int recurse_task -) { - char is_task; - int err = read_char( - handle, - task_address + async_offsets->asyncio_task_object.task_is_task, - &is_task); - if (err) { + int8_t frame_state = GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state); + if (frame_state == FRAME_CLEARED) { + return 0; + } + + uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type); + + PyObject* name = NULL; + + // Parse the previous frame using the gi_iframe from local copy + uintptr_t prev_frame; + uintptr_t gi_iframe_addr = coro_address + unwinder->debug_offsets.gen_object.gi_iframe; + uintptr_t address_of_code_object = 0; + if (parse_frame_object(unwinder, &name, gi_iframe_addr, &address_of_code_object, &prev_frame) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in coro chain"); return -1; } - PyObject* result = PyList_New(0); - if (result == NULL) { + if (!name) { + return 0; + } + + if (PyList_Append(render_to, name)) { + Py_DECREF(name); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to coro chain"); return -1; } + Py_DECREF(name); + + if (frame_state == FRAME_SUSPENDED_YIELD_FROM) { + return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to); + } + + return 0; +} + +static PyObject* +create_task_result( + RemoteUnwinderObject *unwinder, + uintptr_t task_address +) { + PyObject* result = NULL; + PyObject *call_stack = NULL; + PyObject *tn = NULL; + char task_obj[SIZEOF_TASK_OBJ]; + uintptr_t coro_addr; - PyObject *call_stack = PyList_New(0); + // Create call_stack first since it's the first tuple element + call_stack = PyList_New(0); if (call_stack == NULL) { - goto err; + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create call stack list"); + goto error; } - if (PyList_Append(result, call_stack)) { - Py_DECREF(call_stack); - goto err; + + // Create task name/address for second tuple element + tn = PyLong_FromUnsignedLongLong(task_address); + if (tn == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name/address"); + goto error; } - /* we can operate on a borrowed one to simplify cleanup */ - Py_DECREF(call_stack); - if (is_task) { - PyObject *tn = NULL; - if (recurse_task) { - tn = parse_task_name( - handle, offsets, async_offsets, task_address); - } else { - tn = PyLong_FromUnsignedLongLong(task_address); - } - if (tn == NULL) { - goto err; - } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); - goto err; - } - Py_DECREF(tn); + // Parse coroutine chain + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object for coro chain"); + goto error; + } - uintptr_t coro_addr; - err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_coro, - &coro_addr); - if (err) { - goto err; - } + coro_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro); - if ((void*)coro_addr != NULL) { - err = parse_coro_chain( - handle, - offsets, - async_offsets, - coro_addr, - call_stack - ); - if (err) { - goto err; - } + if ((void*)coro_addr != NULL) { + if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain"); + goto error; + } - if (PyList_Reverse(call_stack)) { - goto err; - } + if (PyList_Reverse(call_stack)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reverse call stack"); + goto error; } } - if (PyList_Append(render_to, result)) { - goto err; + // Create final CoroInfo result + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->CoroInfo_Type); + if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create CoroInfo"); + goto error; + } + + // PyStructSequence_SetItem steals references, so we don't need to DECREF on success + PyStructSequence_SetItem(result, 0, call_stack); // This steals the reference + PyStructSequence_SetItem(result, 1, tn); // This steals the reference + + return result; + +error: + Py_XDECREF(result); + Py_XDECREF(call_stack); + Py_XDECREF(tn); + return NULL; +} + +static int +parse_task( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *render_to +) { + char is_task; + PyObject* result = NULL; + int err; + + err = read_char( + unwinder, + task_address + unwinder->async_debug_offsets.asyncio_task_object.task_is_task, + &is_task); + if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read is_task flag"); + goto error; } - if (recurse_task) { - PyObject *awaited_by = PyList_New(0); - if (awaited_by == NULL) { - goto err; + if (is_task) { + result = create_task_result(unwinder, task_address); + if (!result) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task result"); + goto error; } - if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - goto err; + } else { + // Create an empty CoroInfo for non-task objects + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->CoroInfo_Type); + if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty CoroInfo"); + goto error; } - /* we can operate on a borrowed one to simplify cleanup */ - Py_DECREF(awaited_by); - - if (parse_task_awaited_by(handle, offsets, async_offsets, - task_address, awaited_by, 1) - ) { - goto err; + PyObject *empty_list = PyList_New(0); + if (empty_list == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty list"); + goto error; } + PyObject *task_name = PyLong_FromUnsignedLongLong(task_address); + if (task_name == NULL) { + Py_DECREF(empty_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name"); + goto error; + } + PyStructSequence_SetItem(result, 0, empty_list); // This steals the reference + PyStructSequence_SetItem(result, 1, task_name); // This steals the reference + } + if (PyList_Append(render_to, result)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task result to render list"); + goto error; } - Py_DECREF(result); + Py_DECREF(result); return 0; -err: - Py_DECREF(result); +error: + Py_XDECREF(result); return -1; } static int -parse_tasks_in_set( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t set_addr, - PyObject *awaited_by, - int recurse_task +process_single_task_node( + RemoteUnwinderObject *unwinder, + uintptr_t task_addr, + PyObject **task_info, + PyObject *result ) { - uintptr_t set_obj; - if (read_py_ptr( - handle, - set_addr, - &set_obj) - ) { - return -1; + PyObject *tn = NULL; + PyObject *current_awaited_by = NULL; + PyObject *task_id = NULL; + PyObject *result_item = NULL; + PyObject *coroutine_stack = NULL; + + tn = parse_task_name(unwinder, task_addr); + if (tn == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name in single task node"); + goto error; } - Py_ssize_t num_els; - if (read_Py_ssize_t( - handle, - set_obj + offsets->set_object.used, - &num_els) - ) { - return -1; + current_awaited_by = PyList_New(0); + if (current_awaited_by == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list in single task node"); + goto error; } - Py_ssize_t set_len; - if (read_Py_ssize_t( - handle, - set_obj + offsets->set_object.mask, - &set_len) - ) { - return -1; + // Extract the coroutine stack for this task + coroutine_stack = PyList_New(0); + if (coroutine_stack == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coroutine stack list in single task node"); + goto error; } - set_len++; // The set contains the `mask+1` element slots. - uintptr_t table_ptr; - if (read_ptr( - handle, - set_obj + offsets->set_object.table, - &table_ptr) - ) { - return -1; + if (parse_task(unwinder, task_addr, coroutine_stack) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task coroutine stack in single task node"); + goto error; } - Py_ssize_t i = 0; - Py_ssize_t els = 0; - while (i < set_len) { - uintptr_t key_addr; - if (read_py_ptr(handle, table_ptr, &key_addr)) { - return -1; - } + task_id = PyLong_FromUnsignedLongLong(task_addr); + if (task_id == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task ID in single task node"); + goto error; + } - if ((void*)key_addr != NULL) { - Py_ssize_t ref_cnt; - if (read_Py_ssize_t(handle, table_ptr, &ref_cnt)) { - return -1; - } + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result_item = PyStructSequence_New(state->TaskInfo_Type); + if (result_item == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create TaskInfo in single task node"); + goto error; + } - if (ref_cnt) { - // if 'ref_cnt=0' it's a set dummy marker - - if (parse_task( - handle, - offsets, - async_offsets, - key_addr, - awaited_by, - recurse_task - ) - ) { - return -1; - } + PyStructSequence_SetItem(result_item, 0, task_id); // steals ref + PyStructSequence_SetItem(result_item, 1, tn); // steals ref + PyStructSequence_SetItem(result_item, 2, coroutine_stack); // steals ref + PyStructSequence_SetItem(result_item, 3, current_awaited_by); // steals ref - if (++els == num_els) { - break; - } - } - } + // References transferred to tuple + task_id = NULL; + tn = NULL; + coroutine_stack = NULL; + current_awaited_by = NULL; - table_ptr += sizeof(void*) * 2; - i++; + if (PyList_Append(result, result_item)) { + Py_DECREF(result_item); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append result item in single task node"); + return -1; + } + if (task_info != NULL) { + *task_info = result_item; + } + Py_DECREF(result_item); + + // Get back current_awaited_by reference for parse_task_awaited_by + current_awaited_by = PyStructSequence_GetItem(result_item, 3); + if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by in single task node"); + // No cleanup needed here since all references were transferred to result_item + // and result_item was already added to result list and decreffed + return -1; } + return 0; + +error: + Py_XDECREF(tn); + Py_XDECREF(current_awaited_by); + Py_XDECREF(task_id); + Py_XDECREF(result_item); + Py_XDECREF(coroutine_stack); + return -1; } +// Thread processor for get_all_awaited_by +static int +process_thread_for_awaited_by( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + unsigned long tid, + void *context +) { + PyObject *result = (PyObject *)context; + uintptr_t head_addr = thread_state_addr + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head; + return append_awaited_by(unwinder, tid, head_addr, result); +} +// Generic function to process task awaited_by static int -parse_task_awaited_by( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, +process_task_awaited_by( + RemoteUnwinderObject *unwinder, uintptr_t task_address, - PyObject *awaited_by, - int recurse_task + set_entry_processor_func processor, + void *context ) { - uintptr_t task_ab_addr; - int err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - &task_ab_addr); - if (err) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object"); return -1; } + uintptr_t task_ab_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by); if ((void*)task_ab_addr == NULL) { - return 0; + return 0; // No tasks waiting for this one } - char awaited_by_is_a_set; - err = read_char( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by_is_set, - &awaited_by_is_a_set); - if (err) { - return -1; - } + char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set); if (awaited_by_is_a_set) { - if (parse_tasks_in_set( - handle, - offsets, - async_offsets, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - awaited_by, - recurse_task - ) - ) { - return -1; - } + return iterate_set_entries(unwinder, task_ab_addr, processor, context); } else { - uintptr_t sub_task; - if (read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - &sub_task) - ) { - return -1; - } - - if (parse_task( - handle, - offsets, - async_offsets, - sub_task, - awaited_by, - recurse_task - ) - ) { - return -1; - } + // Single task waiting + return processor(unwinder, task_ab_addr, context); } - - return 0; } -typedef struct +static int +process_running_task_chain( + RemoteUnwinderObject *unwinder, + uintptr_t running_task_addr, + uintptr_t thread_state_addr, + PyObject *result +) { + uintptr_t running_task_code_obj = 0; + if(get_task_code_object(unwinder, running_task_addr, &running_task_code_obj) < 0) { + return -1; + } + + // First, add this task to the result + PyObject *task_info = NULL; + if (process_single_task_node(unwinder, running_task_addr, &task_info, result) < 0) { + return -1; + } + + // Get the chain from the current frame to this task + PyObject *coro_chain = PyStructSequence_GET_ITEM(task_info, 2); + assert(coro_chain != NULL); + if (PyList_GET_SIZE(coro_chain) != 1) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Coro chain is not a single item"); + return -1; + } + PyObject *coro_info = PyList_GET_ITEM(coro_chain, 0); + assert(coro_info != NULL); + PyObject *frame_chain = PyStructSequence_GET_ITEM(coro_info, 0); + assert(frame_chain != NULL); + + // Clear the coro_chain + if (PyList_Clear(frame_chain) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to clear coroutine chain"); + return -1; + } + + // Add the chain from the current frame to this task + if (parse_async_frame_chain(unwinder, frame_chain, thread_state_addr, running_task_code_obj) < 0) { + return -1; + } + + // Now find all tasks that are waiting for this task and process them + if (process_task_awaited_by(unwinder, running_task_addr, process_waiter_task, result) < 0) { + return -1; + } + + return 0; +} + +// Thread processor for get_async_stack_trace +static int +process_thread_for_async_stack_trace( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + unsigned long tid, + void *context +) { + PyObject *result = (PyObject *)context; + + // Find running task in this thread + uintptr_t running_task_addr; + if (find_running_task_in_thread(unwinder, thread_state_addr, &running_task_addr) < 0) { + return 0; + } + + // If we found a running task, process it and its waiters + if ((void*)running_task_addr != NULL) { + PyObject *task_list = PyList_New(0); + if (task_list == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create task list for thread"); + return -1; + } + + if (process_running_task_chain(unwinder, running_task_addr, thread_state_addr, task_list) < 0) { + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process running task chain"); + return -1; + } + + // Create AwaitedInfo structure for this thread + PyObject *tid_py = PyLong_FromUnsignedLong(tid); + if (tid_py == NULL) { + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID"); + return -1; + } + + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *awaited_info = PyStructSequence_New(state->AwaitedInfo_Type); + if (awaited_info == NULL) { + Py_DECREF(tid_py); + Py_DECREF(task_list); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo"); + return -1; + } + + PyStructSequence_SetItem(awaited_info, 0, tid_py); // steals ref + PyStructSequence_SetItem(awaited_info, 1, task_list); // steals ref + + if (PyList_Append(result, awaited_info)) { + Py_DECREF(awaited_info); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append AwaitedInfo to result"); + return -1; + } + Py_DECREF(awaited_info); + } + + return 0; +} + +static int +process_task_and_waiters( + RemoteUnwinderObject *unwinder, + uintptr_t task_addr, + PyObject *result +) { + // First, add this task to the result + if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { + return -1; + } + + // Now find all tasks that are waiting for this task and process them + return process_task_awaited_by(unwinder, task_addr, process_waiter_task, result); +} + +static int +find_running_task_in_thread( + RemoteUnwinderObject *unwinder, + uintptr_t thread_state_addr, + uintptr_t *running_task_addr +) { + *running_task_addr = (uintptr_t)NULL; + + uintptr_t address_of_running_loop; + int bytes_read = read_py_ptr( + unwinder, + thread_state_addr + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop, + &address_of_running_loop); + if (bytes_read == -1) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running loop address"); + return -1; + } + + // no asyncio loop is now running + if ((void*)address_of_running_loop == NULL) { + return 0; + } + + int err = read_ptr( + unwinder, + thread_state_addr + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task, + running_task_addr); + if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task address"); + return -1; + } + + return 0; +} + +static int +get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr) { + uintptr_t running_coro_addr = 0; + + if(read_py_ptr( + unwinder, + task_addr + unwinder->async_debug_offsets.asyncio_task_object.task_coro, + &running_coro_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro read failed"); + return -1; + } + + if (running_coro_addr == 0) { + PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro address is NULL"); + return -1; + } + + // note: genobject's gi_iframe is an embedded struct so the address to + // the offset leads directly to its first field: f_executable + if (read_py_ptr( + unwinder, + running_coro_addr + unwinder->debug_offsets.gen_object.gi_iframe, code_obj_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task code object"); + return -1; + } + + if (*code_obj_addr == 0) { + PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Running task code object address is NULL"); + return -1; + } + + return 0; +} + +/* ============================================================================ + * TLBC CACHING FUNCTIONS + * ============================================================================ */ + +#ifdef Py_GIL_DISABLED + +typedef struct { + void *tlbc_array; // Local copy of the TLBC array + Py_ssize_t tlbc_array_size; // Size of the TLBC array + uint32_t generation; // Generation when this was cached +} TLBCCacheEntry; + +static void +tlbc_cache_entry_destroy(void *ptr) { - int lineno; - int end_lineno; - int column; - int end_column; -} LocationInfo; + TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr; + if (entry->tlbc_array) { + PyMem_RawFree(entry->tlbc_array); + } + PyMem_RawFree(entry); +} + +static TLBCCacheEntry * +get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation) +{ + void *key = (void *)code_addr; + TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key); + + if (entry && entry->generation != current_generation) { + // Entry is stale, remove it by setting to NULL + _Py_hashtable_set(self->tlbc_cache, key, NULL); + entry = NULL; + } + + return entry; +} + +static int +cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation) +{ + uintptr_t tlbc_array_ptr; + void *tlbc_array = NULL; + TLBCCacheEntry *entry = NULL; + + // Read the TLBC array pointer + if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0 || tlbc_array_ptr == 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer"); + return 0; // No TLBC array + } + + // Read the TLBC array size + Py_ssize_t tlbc_size; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0 || tlbc_size <= 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size"); + return 0; // Invalid size + } + + // Allocate and read the entire TLBC array + size_t array_data_size = tlbc_size * sizeof(void*); + tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size); + if (!tlbc_array) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC array"); + return 0; // Memory error + } + + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) { + PyMem_RawFree(tlbc_array); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array data"); + return 0; // Read error + } + + // Create cache entry + entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry)); + if (!entry) { + PyMem_RawFree(tlbc_array); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC cache entry"); + return 0; // Memory error + } + + entry->tlbc_array = tlbc_array; + entry->tlbc_array_size = tlbc_size; + entry->generation = generation; + + // Store in cache + void *key = (void *)code_addr; + if (_Py_hashtable_set(unwinder->tlbc_cache, key, entry) < 0) { + tlbc_cache_entry_destroy(entry); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to store TLBC entry in cache"); + return 0; // Cache error + } + + return 1; // Success +} + + + +#endif + +/* ============================================================================ + * LINE TABLE PARSING FUNCTIONS + * ============================================================================ */ static int scan_varint(const uint8_t **ptr) @@ -818,7 +1653,6 @@ scan_signed_varint(const uint8_t **ptr) } } - static bool parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info) { @@ -863,7 +1697,9 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L } default: { uint8_t second_byte = *(ptr++); - assert((second_byte & 128) == 0); + if ((second_byte & 128) != 0) { + return false; + } info->column = code << 3 | (second_byte >> 4); info->end_column = info->column + (second_byte & 15); break; @@ -877,270 +1713,410 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L return false; } +/* ============================================================================ + * CODE OBJECT AND FRAME PARSING FUNCTIONS + * ============================================================================ */ + static int -read_remote_pointer(proc_handle_t *handle, uintptr_t address, uintptr_t *out_ptr, const char *error_message) +parse_code_object(RemoteUnwinderObject *unwinder, + PyObject **result, + uintptr_t address, + uintptr_t instruction_pointer, + uintptr_t *previous_frame, + int32_t tlbc_index) { - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void *), out_ptr); - if (bytes_read < 0) { - return -1; - } + void *key = (void *)address; + CachedCodeMetadata *meta = NULL; + PyObject *func = NULL; + PyObject *file = NULL; + PyObject *linetable = NULL; + PyObject *lineno = NULL; + PyObject *tuple = NULL; - if ((void *)(*out_ptr) == NULL) { - PyErr_SetString(PyExc_RuntimeError, error_message); - return -1; +#ifdef Py_GIL_DISABLED + // In free threading builds, code object addresses might have the low bit set + // as a flag, so we need to mask it off to get the real address + uintptr_t real_address = address & (~1); +#else + uintptr_t real_address = address; +#endif + + if (unwinder && unwinder->code_object_cache != NULL) { + meta = _Py_hashtable_get(unwinder->code_object_cache, key); } - return 0; -} + if (meta == NULL) { + char code_object[SIZEOF_CODE_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0) + { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read code object"); + goto error; + } -static int -read_instruction_ptr(proc_handle_t *handle, struct _Py_DebugOffsets *offsets, - uintptr_t current_frame, uintptr_t *instruction_ptr) -{ - return read_remote_pointer( - handle, - current_frame + offsets->interpreter_frame.instr_ptr, - instruction_ptr, - "No instruction ptr found" - ); -} + func = read_py_str(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024); + if (!func) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read function name from code object"); + goto error; + } -static int -parse_code_object(proc_handle_t *handle, - PyObject **result, - struct _Py_DebugOffsets *offsets, - uintptr_t address, - uintptr_t current_frame, - uintptr_t *previous_frame) -{ - uintptr_t addr_func_name, addr_file_name, addr_linetable, instruction_ptr; + file = read_py_str(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024); + if (!file) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read filename from code object"); + goto error; + } - if (read_remote_pointer(handle, address + offsets->code_object.qualname, &addr_func_name, "No function name found") < 0 || - read_remote_pointer(handle, address + offsets->code_object.filename, &addr_file_name, "No file name found") < 0 || - read_remote_pointer(handle, address + offsets->code_object.linetable, &addr_linetable, "No linetable found") < 0 || - read_instruction_ptr(handle, offsets, current_frame, &instruction_ptr) < 0) { - return -1; - } + linetable = read_py_bytes(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096); + if (!linetable) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read linetable from code object"); + goto error; + } - int firstlineno; - if (_Py_RemoteDebug_ReadRemoteMemory(handle, - address + offsets->code_object.firstlineno, - sizeof(int), - &firstlineno) < 0) { - return -1; + meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata)); + if (!meta) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate cached code metadata"); + goto error; + } + + meta->func_name = func; + meta->file_name = file; + meta->linetable = linetable; + meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno); + meta->addr_code_adaptive = real_address + unwinder->debug_offsets.code_object.co_code_adaptive; + + if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) { + cached_code_metadata_destroy(meta); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache code metadata"); + goto error; + } + + // Ownership transferred to meta + func = NULL; + file = NULL; + linetable = NULL; } - PyObject *py_linetable = read_py_bytes(handle, offsets, addr_linetable); - if (!py_linetable) { - return -1; + uintptr_t ip = instruction_pointer; + ptrdiff_t addrq; + +#ifdef Py_GIL_DISABLED + // Handle thread-local bytecode (TLBC) in free threading builds + if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) { + // No TLBC or no unwinder - use main bytecode directly + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; + goto done_tlbc; } - uintptr_t addr_code_adaptive = address + offsets->code_object.co_code_adaptive; - ptrdiff_t addrq = (uint16_t *)instruction_ptr - (uint16_t *)addr_code_adaptive; + // Try to get TLBC data from cache (we'll get generation from the caller) + TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); + + if (!tlbc_entry) { + // Cache miss - try to read and cache TLBC array + if (!cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache TLBC array"); + goto error; + } + tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); + } - LocationInfo info; - parse_linetable(addrq, PyBytes_AS_STRING(py_linetable), firstlineno, &info); - Py_DECREF(py_linetable); // Done with linetable + if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) { + // Use cached TLBC data + uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t)); + uintptr_t tlbc_bytecode_addr = entries[tlbc_index]; - PyObject *py_line = PyLong_FromLong(info.lineno); - if (!py_line) { - return -1; + if (tlbc_bytecode_addr != 0) { + // Calculate offset from TLBC bytecode + addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr; + goto done_tlbc; + } } - PyObject *py_func_name = read_py_str(handle, offsets, addr_func_name, 256); - if (!py_func_name) { - Py_DECREF(py_line); - return -1; + // Fall back to main bytecode + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; + +done_tlbc: +#else + // Non-free-threaded build, always use the main bytecode + (void)tlbc_index; // Suppress unused parameter warning + (void)unwinder; // Suppress unused parameter warning + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; +#endif + ; // Empty statement to avoid C23 extension warning + LocationInfo info = {0}; + bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable), + meta->first_lineno, &info); + if (!ok) { + info.lineno = -1; } - PyObject *py_file_name = read_py_str(handle, offsets, addr_file_name, 256); - if (!py_file_name) { - Py_DECREF(py_line); - Py_DECREF(py_func_name); - return -1; + lineno = PyLong_FromLong(info.lineno); + if (!lineno) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); + goto error; } - PyObject *result_tuple = PyTuple_New(3); - if (!result_tuple) { - Py_DECREF(py_line); - Py_DECREF(py_func_name); - Py_DECREF(py_file_name); - return -1; + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + tuple = PyStructSequence_New(state->FrameInfo_Type); + if (!tuple) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo for code object"); + goto error; } - PyTuple_SET_ITEM(result_tuple, 0, py_func_name); // steals ref - PyTuple_SET_ITEM(result_tuple, 1, py_file_name); // steals ref - PyTuple_SET_ITEM(result_tuple, 2, py_line); // steals ref + Py_INCREF(meta->func_name); + Py_INCREF(meta->file_name); + PyStructSequence_SetItem(tuple, 0, meta->file_name); + PyStructSequence_SetItem(tuple, 1, lineno); + PyStructSequence_SetItem(tuple, 2, meta->func_name); - *result = result_tuple; + *result = tuple; return 0; + +error: + Py_XDECREF(func); + Py_XDECREF(file); + Py_XDECREF(linetable); + Py_XDECREF(lineno); + Py_XDECREF(tuple); + return -1; +} + +/* ============================================================================ + * STACK CHUNK MANAGEMENT FUNCTIONS + * ============================================================================ */ + +static void +cleanup_stack_chunks(StackChunkList *chunks) +{ + for (size_t i = 0; i < chunks->count; ++i) { + PyMem_RawFree(chunks->chunks[i].local_copy); + } + PyMem_RawFree(chunks->chunks); } static int -parse_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame +process_single_stack_chunk( + RemoteUnwinderObject *unwinder, + uintptr_t chunk_addr, + StackChunkInfo *chunk_info ) { - int err; + // Start with default size assumption + size_t current_size = _PY_DATA_STACK_CHUNK_SIZE; - Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->interpreter_frame.previous, - sizeof(void*), - previous_frame - ); - if (bytes_read < 0) { + char *this_chunk = PyMem_RawMalloc(current_size); + if (!this_chunk) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunk buffer"); return -1; } - char owner; - if (read_char(handle, address + offsets->interpreter_frame.owner, &owner)) { + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, current_size, this_chunk) < 0) { + PyMem_RawFree(this_chunk); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read stack chunk"); return -1; } - if (owner >= FRAME_OWNED_BY_INTERPRETER) { - return 0; - } - - uintptr_t address_of_code_object; - err = read_py_ptr( - handle, - address + offsets->interpreter_frame.executable, - &address_of_code_object - ); - if (err) { - return -1; - } + // Check actual size and reread if necessary + size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size)); + if (actual_size != current_size) { + this_chunk = PyMem_RawRealloc(this_chunk, actual_size); + if (!this_chunk) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to reallocate stack chunk buffer"); + return -1; + } - if ((void*)address_of_code_object == NULL) { - return 0; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, actual_size, this_chunk) < 0) { + PyMem_RawFree(this_chunk); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reread stack chunk with correct size"); + return -1; + } + current_size = actual_size; } - return parse_code_object( - handle, result, offsets, address_of_code_object, address, previous_frame); + chunk_info->remote_addr = chunk_addr; + chunk_info->size = current_size; + chunk_info->local_copy = this_chunk; + return 0; } static int -parse_async_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame, - uintptr_t* code_object -) { - int err; +copy_stack_chunks(RemoteUnwinderObject *unwinder, + uintptr_t tstate_addr, + StackChunkList *out_chunks) +{ + uintptr_t chunk_addr; + StackChunkInfo *chunks = NULL; + size_t count = 0; + size_t max_chunks = 16; - Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->interpreter_frame.previous, - sizeof(void*), - previous_frame - ); - if (bytes_read < 0) { + if (read_ptr(unwinder, tstate_addr + unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read initial stack chunk address"); return -1; } - char owner; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, address + offsets->interpreter_frame.owner, sizeof(char), &owner); - if (bytes_read < 0) { + chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo)); + if (!chunks) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunks array"); return -1; } - if (owner == FRAME_OWNED_BY_CSTACK || owner == FRAME_OWNED_BY_INTERPRETER) { - return 0; // C frame - } + while (chunk_addr != 0) { + // Grow array if needed + if (count >= max_chunks) { + max_chunks *= 2; + StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo)); + if (!new_chunks) { + PyErr_NoMemory(); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to grow stack chunks array"); + goto error; + } + chunks = new_chunks; + } - if (owner != FRAME_OWNED_BY_GENERATOR - && owner != FRAME_OWNED_BY_THREAD) { - PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner); - return -1; - } + // Process this chunk + if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk"); + goto error; + } - err = read_py_ptr( - handle, - address + offsets->interpreter_frame.executable, - code_object - ); - if (err) { - return -1; + // Get next chunk address and increment count + chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous)); + count++; } - assert(code_object != NULL); - if ((void*)*code_object == NULL) { - return 0; - } + out_chunks->chunks = chunks; + out_chunks->count = count; + return 0; - if (parse_code_object( - handle, result, offsets, *code_object, address, previous_frame)) { - return -1; +error: + for (size_t i = 0; i < count; ++i) { + PyMem_RawFree(chunks[i].local_copy); } - - return 1; + PyMem_RawFree(chunks); + return -1; +} + +static void * +find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr) +{ + for (size_t i = 0; i < chunks->count; ++i) { + uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data); + size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data); + + if (remote_ptr >= base && remote_ptr < base + payload) { + return (char *)chunks->chunks[i].local_copy + (remote_ptr - chunks->chunks[i].remote_addr); + } + } + return NULL; } static int -read_async_debug( - proc_handle_t *handle, - struct _Py_AsyncioModuleDebugOffsets* async_debug +parse_frame_from_chunks( + RemoteUnwinderObject *unwinder, + PyObject **result, + uintptr_t address, + uintptr_t *previous_frame, + StackChunkList *chunks ) { - uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(handle); - if (!async_debug_addr) { + void *frame_ptr = find_frame_in_chunks(chunks, address); + if (!frame_ptr) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame not found in stack chunks"); return -1; } - size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, async_debug_addr, size, async_debug); - return result; + char *frame = (char *)frame_ptr; + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable); + int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object); + if (frame_valid != 1) { + return frame_valid; + } + + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); + + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); + } +#endif + + return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index); } +/* ============================================================================ + * INTERPRETER STATE AND THREAD DISCOVERY FUNCTIONS + * ============================================================================ */ + static int -find_running_frame( - proc_handle_t *handle, +populate_initial_state_data( + int all_threads, + RemoteUnwinderObject *unwinder, uintptr_t runtime_start_address, - _Py_DebugOffsets* local_debug_offsets, - uintptr_t *frame + uintptr_t *interpreter_state, + uintptr_t *tstate ) { uint64_t interpreter_state_list_head = - local_debug_offsets->runtime_state.interpreters_head; + unwinder->debug_offsets.runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state address"); return -1; } if (address_of_interpreter_state == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL"); return -1; } - uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_main, + *interpreter_state = address_of_interpreter_state; + + if (all_threads) { + *tstate = 0; + return 0; + } + + uintptr_t address_of_thread = address_of_interpreter_state + + unwinder->debug_offsets.interpreter_state.threads_main; + + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address_of_thread, sizeof(void*), - &address_of_thread); - if (bytes_read < 0) { + tstate) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state address"); return -1; } - // No Python frames are available for us (can happen at tear-down). + return 0; +} + + +static int +find_running_frame( + RemoteUnwinderObject *unwinder, + uintptr_t address_of_thread, + uintptr_t *frame +) { if ((void*)address_of_thread != NULL) { int err = read_ptr( - handle, - address_of_thread + local_debug_offsets->thread_state.current_frame, + unwinder, + address_of_thread + unwinder->debug_offsets.thread_state.current_frame, frame); if (err) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read current frame pointer"); return -1; } return 0; @@ -1150,166 +2126,185 @@ find_running_frame( return 0; } -static int -find_running_task( - proc_handle_t *handle, - uintptr_t runtime_start_address, - _Py_DebugOffsets *local_debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, - uintptr_t *running_task_addr +/* ============================================================================ + * FRAME PARSING FUNCTIONS + * ============================================================================ */ + +static inline int +is_frame_valid( + RemoteUnwinderObject *unwinder, + uintptr_t frame_addr, + uintptr_t code_object_addr ) { - *running_task_addr = (uintptr_t)NULL; + if ((void*)code_object_addr == NULL) { + return 0; + } - uint64_t interpreter_state_list_head = - local_debug_offsets->runtime_state.interpreters_head; + void* frame = (void*)frame_addr; - uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - runtime_start_address + interpreter_state_list_head, - sizeof(void*), - &address_of_interpreter_state); - if (bytes_read < 0) { - return -1; + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { + return 0; // C frame } - if (address_of_interpreter_state == 0) { - PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR + && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { + PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); + set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame"); return -1; } + return 1; +} - uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_head, - sizeof(void*), - &address_of_thread); +static int +parse_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* address_of_code_object, + uintptr_t* previous_frame +) { + char frame[SIZEOF_INTERP_FRAME]; + *address_of_code_object = 0; + + Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_INTERP_FRAME, + frame + ); if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame"); return -1; } - uintptr_t address_of_running_loop; - // No Python frames are available for us (can happen at tear-down). - if ((void*)address_of_thread == NULL) { - return 0; + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); + int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object); + if (frame_valid != 1) { + return frame_valid; } - bytes_read = read_py_ptr( - handle, - address_of_thread - + async_offsets->asyncio_thread_state.asyncio_running_loop, - &address_of_running_loop); - if (bytes_read == -1) { - return -1; + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); + + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); } +#endif - // no asyncio loop is now running - if ((void*)address_of_running_loop == NULL) { - return 0; + *address_of_code_object = code_object; + return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index); +} + +static int + parse_async_frame_chain( + RemoteUnwinderObject *unwinder, + PyObject *calls, + uintptr_t address_of_thread, + uintptr_t running_task_code_obj +) { + uintptr_t address_of_current_frame; + if (find_running_frame(unwinder, address_of_thread, &address_of_current_frame) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Running frame search failed in async chain"); + return -1; } - int err = read_ptr( - handle, - address_of_thread - + async_offsets->asyncio_thread_state.asyncio_running_task, - running_task_addr); - if (err) { + PyObject *frames = PyList_New(0); + if (frames == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frames list"); return -1; } + while ((void*)address_of_current_frame != NULL) { + PyObject* frame_info = NULL; + uintptr_t address_of_code_object; + int res = parse_frame_object( + unwinder, + &frame_info, + address_of_current_frame, + &address_of_code_object, + &address_of_current_frame + ); + + if (res < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Async frame object parsing failed in chain"); + return -1; + } + + if (!frame_info) { + continue; + } + + if (PyList_Append(calls, frame_info) == -1) { + Py_DECREF(frame_info); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame info to async chain"); + return -1; + } + + Py_DECREF(frame_info); + + if (address_of_code_object == running_task_code_obj) { + break; + } + } + return 0; } +/* ============================================================================ + * AWAITED BY PARSING FUNCTIONS + * ============================================================================ */ + static int append_awaited_by_for_thread( - proc_handle_t *handle, + RemoteUnwinderObject *unwinder, uintptr_t head_addr, - struct _Py_DebugOffsets *debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, PyObject *result ) { - struct llist_node task_node; + char task_node[SIZEOF_LLIST_NODE]; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - head_addr, - sizeof(task_node), - &task_node)) - { + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, head_addr, + sizeof(task_node), task_node) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task node head"); return -1; } size_t iteration_count = 0; const size_t MAX_ITERATIONS = 2 << 15; // A reasonable upper bound - while ((uintptr_t)task_node.next != head_addr) { + + while (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) != head_addr) { if (++iteration_count > MAX_ITERATIONS) { PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Task list iteration limit exceeded"); return -1; } - if (task_node.next == NULL) { - PyErr_SetString( - PyExc_RuntimeError, - "Invalid linked list structure reading remote memory"); - return -1; - } - - uintptr_t task_addr = (uintptr_t)task_node.next - - async_offsets->asyncio_task_object.task_node; - - PyObject *tn = parse_task_name( - handle, - debug_offsets, - async_offsets, - task_addr); - if (tn == NULL) { + if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) { + PyErr_SetString(PyExc_RuntimeError, + "Invalid linked list structure reading remote memory"); + set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list"); return -1; } - PyObject *current_awaited_by = PyList_New(0); - if (current_awaited_by == NULL) { - Py_DECREF(tn); - return -1; - } - - PyObject* task_id = PyLong_FromUnsignedLongLong(task_addr); - if (task_id == NULL) { - Py_DECREF(tn); - Py_DECREF(current_awaited_by); - return -1; - } + uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) + - unwinder->async_debug_offsets.asyncio_task_object.task_node; - PyObject *result_item = PyTuple_New(3); - if (result_item == NULL) { - Py_DECREF(tn); - Py_DECREF(current_awaited_by); - Py_DECREF(task_id); + if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process task node in awaited_by"); return -1; } - PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref - PyTuple_SET_ITEM(result_item, 1, tn); // steals ref - PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref - if (PyList_Append(result, result_item)) { - Py_DECREF(result_item); - return -1; - } - Py_DECREF(result_item); - - if (parse_task_awaited_by(handle, debug_offsets, async_offsets, - task_addr, current_awaited_by, 0)) - { - return -1; - } - - // onto the next one... - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - (uintptr_t)task_node.next, - sizeof(task_node), - &task_node)) - { + // Read next node + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next), + sizeof(task_node), + task_node) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by"); return -1; } } @@ -1319,482 +2314,798 @@ append_awaited_by_for_thread( static int append_awaited_by( - proc_handle_t *handle, + RemoteUnwinderObject *unwinder, unsigned long tid, uintptr_t head_addr, - struct _Py_DebugOffsets *debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, PyObject *result) { PyObject *tid_py = PyLong_FromUnsignedLong(tid); if (tid_py == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID object"); return -1; } - PyObject *result_item = PyTuple_New(2); - if (result_item == NULL) { + PyObject* awaited_by_for_thread = PyList_New(0); + if (awaited_by_for_thread == NULL) { Py_DECREF(tid_py); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by thread list"); return -1; } - PyObject* awaited_by_for_thread = PyList_New(0); - if (awaited_by_for_thread == NULL) { + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *result_item = PyStructSequence_New(state->AwaitedInfo_Type); + if (result_item == NULL) { Py_DECREF(tid_py); - Py_DECREF(result_item); + Py_DECREF(awaited_by_for_thread); + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo"); return -1; } - PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref - PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref + PyStructSequence_SetItem(result_item, 0, tid_py); // steals ref + PyStructSequence_SetItem(result_item, 1, awaited_by_for_thread); // steals ref if (PyList_Append(result, result_item)) { Py_DECREF(result_item); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by result item"); return -1; } Py_DECREF(result_item); - if (append_awaited_by_for_thread( - handle, - head_addr, - debug_offsets, - async_offsets, - awaited_by_for_thread)) + if (append_awaited_by_for_thread(unwinder, head_addr, awaited_by_for_thread)) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by for thread"); return -1; } return 0; } -static PyObject* -get_all_awaited_by(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_all_awaited_by is not implemented on this platform"); - return NULL; -#endif +/* ============================================================================ + * STACK UNWINDING FUNCTIONS + * ============================================================================ */ - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; - } +static int +process_frame_chain( + RemoteUnwinderObject *unwinder, + uintptr_t initial_frame_addr, + StackChunkList *chunks, + PyObject *frame_info +) { + uintptr_t frame_addr = initial_frame_addr; + uintptr_t prev_frame_addr = 0; + const size_t MAX_FRAMES = 1024; + size_t frame_count = 0; + + while ((void*)frame_addr != NULL) { + PyObject *frame = NULL; + uintptr_t next_frame_addr = 0; + + if (++frame_count > MAX_FRAMES) { + PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded"); + return -1; + } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; - } + // Try chunks first, fallback to direct memory read + if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) { + PyErr_Clear(); + uintptr_t address_of_code_object = 0; + if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain"); + return -1; + } + } - PyObject *result = NULL; + if (!frame) { + break; + } - uintptr_t runtime_start_addr = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_addr == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); + if (prev_frame_addr && frame_addr != prev_frame_addr) { + PyErr_Format(PyExc_RuntimeError, + "Broken frame chain: expected frame at 0x%lx, got 0x%lx", + prev_frame_addr, frame_addr); + Py_DECREF(frame); + set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain consistency check failed"); + return -1; } - goto result_err; - } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_addr, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; - } + if (PyList_Append(frame_info, frame) == -1) { + Py_DECREF(frame); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to frame info list"); + return -1; + } + Py_DECREF(frame); - struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(handle, &local_async_debug)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); - goto result_err; + prev_frame_addr = next_frame_addr; + frame_addr = next_frame_addr; } - result = PyList_New(0); - if (result == NULL) { - goto result_err; - } + return 0; +} - uint64_t interpreter_state_list_head = - local_debug_offsets.runtime_state.interpreters_head; +static PyObject* +unwind_stack_for_thread( + RemoteUnwinderObject *unwinder, + uintptr_t *current_tstate +) { + PyObject *frame_info = NULL; + PyObject *thread_id = NULL; + PyObject *result = NULL; + StackChunkList chunks = {0}; - uintptr_t interpreter_state_addr; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - runtime_start_addr + interpreter_state_list_head, - sizeof(void*), - &interpreter_state_addr)) - { - goto result_err; + char ts[SIZEOF_THREAD_STATE]; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, *current_tstate, unwinder->debug_offsets.thread_state.size, ts); + if (bytes_read < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread state"); + goto error; } - uintptr_t thread_state_addr; - unsigned long tid = 0; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - interpreter_state_addr - + local_debug_offsets.interpreter_state.threads_head, - sizeof(void*), - &thread_state_addr)) - { - goto result_err; + uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame); + + frame_info = PyList_New(0); + if (!frame_info) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frame info list"); + goto error; } - uintptr_t head_addr; - while (thread_state_addr != 0) { - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - thread_state_addr - + local_debug_offsets.thread_state.native_thread_id, - sizeof(tid), - &tid)) - { - goto result_err; - } + if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy stack chunks"); + goto error; + } - head_addr = thread_state_addr - + local_async_debug.asyncio_thread_state.asyncio_tasks_head; + if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); + goto error; + } - if (append_awaited_by(handle, tid, head_addr, &local_debug_offsets, - &local_async_debug, result)) - { - goto result_err; - } + *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next); - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - thread_state_addr + local_debug_offsets.thread_state.next, - sizeof(void*), - &thread_state_addr)) - { - goto result_err; - } + thread_id = PyLong_FromLongLong( + GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id)); + if (thread_id == NULL) { + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID"); + goto error; } - head_addr = interpreter_state_addr - + local_async_debug.asyncio_interpreter_state.asyncio_tasks_head; - - // On top of a per-thread task lists used by default by asyncio to avoid - // contention, there is also a fallback per-interpreter list of tasks; - // any tasks still pending when a thread is destroyed will be moved to the - // per-interpreter task list. It's unlikely we'll find anything here, but - // interesting for debugging. - if (append_awaited_by(handle, 0, head_addr, &local_debug_offsets, - &local_async_debug, result)) - { - goto result_err; + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + result = PyStructSequence_New(state->ThreadInfo_Type); + if (result == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create ThreadInfo"); + goto error; } - _Py_RemoteDebug_CleanupProcHandle(handle); + PyStructSequence_SetItem(result, 0, thread_id); // Steals reference + PyStructSequence_SetItem(result, 1, frame_info); // Steals reference + + cleanup_stack_chunks(&chunks); return result; -result_err: +error: + Py_XDECREF(frame_info); + Py_XDECREF(thread_id); Py_XDECREF(result); - _Py_RemoteDebug_CleanupProcHandle(handle); + cleanup_stack_chunks(&chunks); return NULL; } -static PyObject* -get_stack_trace(PyObject* self, PyObject* args) + +/* ============================================================================ + * REMOTEUNWINDER CLASS IMPLEMENTATION + * ============================================================================ */ + +/*[clinic input] +class _remote_debugging.RemoteUnwinder "RemoteUnwinderObject *" "&RemoteUnwinder_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=55f164d8803318be]*/ + +/*[clinic input] +_remote_debugging.RemoteUnwinder.__init__ + pid: int + * + all_threads: bool = False + only_active_thread: bool = False + debug: bool = False + +Initialize a new RemoteUnwinder object for debugging a remote Python process. + +Args: + pid: Process ID of the target Python process to debug + all_threads: If True, initialize state for all threads in the process. + If False, only initialize for the main thread. + only_active_thread: If True, only sample the thread holding the GIL. + Cannot be used together with all_threads=True. + debug: If True, chain exceptions to explain the sequence of events that + lead to the exception. + +The RemoteUnwinder provides functionality to inspect and debug a running Python +process, including examining thread states, stack frames and other runtime data. + +Raises: + PermissionError: If access to the target process is denied + OSError: If unable to attach to the target process or access its memory + RuntimeError: If unable to read debug information from the target process + ValueError: If both all_threads and only_active_thread are True +[clinic start generated code]*/ + +static int +_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, + int pid, int all_threads, + int only_active_thread, + int debug) +/*[clinic end generated code: output=13ba77598ecdcbe1 input=8f8f12504e17da04]*/ { -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_stack_trace is not supported on this platform"); - return NULL; + // Validate that all_threads and only_active_thread are not both True + if (all_threads && only_active_thread) { + PyErr_SetString(PyExc_ValueError, + "all_threads and only_active_thread cannot both be True"); + return -1; + } + +#ifdef Py_GIL_DISABLED + if (only_active_thread) { + PyErr_SetString(PyExc_ValueError, + "only_active_thread is not supported when Py_GIL_DISABLED is not defined"); + return -1; + } #endif - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; + self->debug = debug; + self->only_active_thread = only_active_thread; + self->cached_state = NULL; + if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle"); + return -1; } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; + self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle); + if (self->runtime_start_address == 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to get Python runtime address"); + return -1; } - PyObject* result = NULL; + if (_Py_RemoteDebug_ReadDebugOffsets(&self->handle, + &self->runtime_start_address, + &self->debug_offsets) < 0) + { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read debug offsets"); + return -1; + } - uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - goto result_err; + // Validate that the debug offsets are valid + if(validate_debug_offsets(&self->debug_offsets) == -1) { + set_exception_cause(self, PyExc_RuntimeError, "Invalid debug offsets found"); + return -1; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; + // Try to read async debug offsets, but don't fail if they're not available + self->async_debug_offsets_available = 1; + if (read_async_debug(self) < 0) { + PyErr_Clear(); + memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets)); + self->async_debug_offsets_available = 0; } - uintptr_t address_of_current_frame; - if (find_running_frame( - handle, runtime_start_address, &local_debug_offsets, - &address_of_current_frame) - ) { - goto result_err; + if (populate_initial_state_data(all_threads, self, self->runtime_start_address, + &self->interpreter_addr ,&self->tstate_addr) < 0) + { + set_exception_cause(self, PyExc_RuntimeError, "Failed to populate initial state data"); + return -1; } - result = PyList_New(0); - if (result == NULL) { - goto result_err; + self->code_object_cache = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, // keys are stable pointers, don't destroy + cached_code_metadata_destroy, + NULL + ); + if (self->code_object_cache == NULL) { + PyErr_NoMemory(); + set_exception_cause(self, PyExc_MemoryError, "Failed to create code object cache"); + return -1; } - while ((void*)address_of_current_frame != NULL) { - PyObject* frame_info = NULL; - if (parse_frame_object( - handle, - &frame_info, - &local_debug_offsets, - address_of_current_frame, - &address_of_current_frame) - < 0) - { - Py_DECREF(result); - goto result_err; +#ifdef Py_GIL_DISABLED + // Initialize TLBC cache + self->tlbc_generation = 0; + self->tlbc_cache = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, // keys are stable pointers, don't destroy + tlbc_cache_entry_destroy, + NULL + ); + if (self->tlbc_cache == NULL) { + _Py_hashtable_destroy(self->code_object_cache); + PyErr_NoMemory(); + set_exception_cause(self, PyExc_MemoryError, "Failed to create TLBC cache"); + return -1; + } +#endif + + return 0; +} + +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_stack_trace + +Returns a list of stack traces for threads in the target process. + +Each element in the returned list is a tuple of (thread_id, frame_list), where: +- thread_id is the OS thread identifier +- frame_list is a list of tuples (function_name, filename, line_number) representing + the Python stack frames for that thread, ordered from most recent to oldest + +The threads returned depend on the initialization parameters: +- If only_active_thread was True: returns only the thread holding the GIL +- If all_threads was True: returns all threads +- Otherwise: returns only the main thread + +Example: + [ + (1234, [ + ('process_data', 'worker.py', 127), + ('run_worker', 'worker.py', 45), + ('main', 'app.py', 23) + ]), + (1235, [ + ('handle_request', 'server.py', 89), + ('serve_forever', 'server.py', 52) + ]) + ] + +Raises: + RuntimeError: If there is an error copying memory from the target process + OSError: If there is an error accessing the target process + PermissionError: If access to the target process is denied + UnicodeDecodeError: If there is an error decoding strings from the target process + +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=666192b90c69d567 input=f756f341206f9116]*/ +{ + PyObject* result = NULL; + // Read interpreter state into opaque buffer + char interp_state_buffer[INTERP_STATE_BUFFER_SIZE]; + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + self->interpreter_addr, + INTERP_STATE_BUFFER_SIZE, + interp_state_buffer) < 0) { + set_exception_cause(self, PyExc_RuntimeError, "Failed to read interpreter state buffer"); + goto exit; + } + + // Get code object generation from buffer + uint64_t code_object_generation = GET_MEMBER(uint64_t, interp_state_buffer, + self->debug_offsets.interpreter_state.code_object_generation); + + if (code_object_generation != self->code_object_generation) { + self->code_object_generation = code_object_generation; + _Py_hashtable_clear(self->code_object_cache); + } + + // If only_active_thread is true, we need to determine which thread holds the GIL + PyThreadState* gil_holder = NULL; + if (self->only_active_thread) { + // The GIL state is already in interp_state_buffer, just read from there + // Check if GIL is locked + int gil_locked = GET_MEMBER(int, interp_state_buffer, + self->debug_offsets.interpreter_state.gil_runtime_state_locked); + + if (gil_locked) { + // Get the last holder (current holder when GIL is locked) + gil_holder = GET_MEMBER(PyThreadState*, interp_state_buffer, + self->debug_offsets.interpreter_state.gil_runtime_state_holder); + } else { + // GIL is not locked, return empty list + result = PyList_New(0); + if (!result) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create empty result list"); + } + goto exit; } + } + +#ifdef Py_GIL_DISABLED + // Check TLBC generation and invalidate cache if needed + uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer, + self->debug_offsets.interpreter_state.tlbc_generation); + if (current_tlbc_generation != self->tlbc_generation) { + self->tlbc_generation = current_tlbc_generation; + _Py_hashtable_clear(self->tlbc_cache); + } +#endif + + uintptr_t current_tstate; + if (self->only_active_thread && gil_holder != NULL) { + // We have the GIL holder, process only that thread + current_tstate = (uintptr_t)gil_holder; + } else if (self->tstate_addr == 0) { + // Get threads head from buffer + current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer, + self->debug_offsets.interpreter_state.threads_head); + } else { + current_tstate = self->tstate_addr; + } + result = PyList_New(0); + if (!result) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create stack trace result list"); + goto exit; + } + + while (current_tstate != 0) { + PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate); if (!frame_info) { - continue; + Py_CLEAR(result); + set_exception_cause(self, PyExc_RuntimeError, "Failed to unwind stack for thread"); + goto exit; } if (PyList_Append(result, frame_info) == -1) { - Py_DECREF(result); - goto result_err; + Py_DECREF(frame_info); + Py_CLEAR(result); + set_exception_cause(self, PyExc_RuntimeError, "Failed to append thread frame info"); + goto exit; } - Py_DECREF(frame_info); - frame_info = NULL; + // We are targeting a single tstate, break here + if (self->tstate_addr) { + break; + } + + // If we're only processing the GIL holder, we're done after one iteration + if (self->only_active_thread && gil_holder != NULL) { + break; + } } -result_err: - _Py_RemoteDebug_CleanupProcHandle(handle); +exit: + _Py_RemoteDebug_ClearCache(&self->handle); return result; } -static PyObject* -get_async_stack_trace(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_stack_trace is not supported on this platform"); - return NULL; -#endif - int pid; +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_all_awaited_by + +Get all tasks and their awaited_by relationships from the remote process. + +This provides a tree structure showing which tasks are waiting for other tasks. + +For each task, returns: +1. The call stack frames leading to where the task is currently executing +2. The name of the task +3. A list of tasks that this task is waiting for, with their own frames/names/etc + +Returns a list of [frames, task_name, subtasks] where: +- frames: List of (func_name, filename, lineno) showing the call stack +- task_name: String identifier for the task +- subtasks: List of tasks being awaited by this task, in same format + +Raises: + RuntimeError: If AsyncioDebug section is not available in the remote process + MemoryError: If memory allocation fails + OSError: If reading from the remote process fails + +Example output: +[ + # Task c2_root waiting for two subtasks + [ + # Call stack of c2_root + [("c5", "script.py", 10), ("c4", "script.py", 14)], + "c2_root", + [ + # First subtask (sub_main_2) and what it's waiting for + [ + [("c1", "script.py", 23)], + "sub_main_2", + [...] + ], + # Second subtask and its waiters + [...] + ] + ] +] +[clinic start generated code]*/ - if (!PyArg_ParseTuple(args, "i", &pid)) { +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=6a49cd345e8aec53 input=a452c652bb00701a]*/ +{ + if (!self->async_debug_offsets_available) { + PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_all_awaited_by"); return NULL; } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; - } - - PyObject *result = NULL; - - uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } + PyObject *result = PyList_New(0); + if (result == NULL) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create awaited_by result list"); goto result_err; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); + // Process all threads + if (iterate_threads(self, process_thread_for_awaited_by, result) < 0) { goto result_err; } - struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(handle, &local_async_debug)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); - goto result_err; - } + uintptr_t head_addr = self->interpreter_addr + + self->async_debug_offsets.asyncio_interpreter_state.asyncio_tasks_head; - result = PyList_New(1); - if (result == NULL) { - goto result_err; - } - PyObject* calls = PyList_New(0); - if (calls == NULL) { - goto result_err; - } - if (PyList_SetItem(result, 0, calls)) { /* steals ref to 'calls' */ - Py_DECREF(calls); + // On top of a per-thread task lists used by default by asyncio to avoid + // contention, there is also a fallback per-interpreter list of tasks; + // any tasks still pending when a thread is destroyed will be moved to the + // per-interpreter task list. It's unlikely we'll find anything here, but + // interesting for debugging. + if (append_awaited_by(self, 0, head_addr, result)) + { + set_exception_cause(self, PyExc_RuntimeError, "Failed to append interpreter awaited_by in get_all_awaited_by"); goto result_err; } - uintptr_t running_task_addr = (uintptr_t)NULL; - if (find_running_task( - handle, runtime_start_address, &local_debug_offsets, &local_async_debug, - &running_task_addr) - ) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running task"); - goto result_err; - } + _Py_RemoteDebug_ClearCache(&self->handle); + return result; - if ((void*)running_task_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "No running task found"); - goto result_err; - } +result_err: + _Py_RemoteDebug_ClearCache(&self->handle); + Py_XDECREF(result); + return NULL; +} - uintptr_t running_coro_addr; - if (read_py_ptr( - handle, - running_task_addr + local_async_debug.asyncio_task_object.task_coro, - &running_coro_addr - )) { - chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro"); - goto result_err; - } +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_async_stack_trace + +Get the currently running async tasks and their dependency graphs from the remote process. + +This returns information about running tasks and all tasks that are waiting for them, +forming a complete dependency graph for each thread's active task. + +For each thread with a running task, returns the running task plus all tasks that +transitively depend on it (tasks waiting for the running task, tasks waiting for +those tasks, etc.). + +Returns a list of per-thread results, where each thread result contains: +- Thread ID +- List of task information for the running task and all its waiters + +Each task info contains: +- Task ID (memory address) +- Task name +- Call stack frames: List of (func_name, filename, lineno) +- List of tasks waiting for this task (recursive structure) + +Raises: + RuntimeError: If AsyncioDebug section is not available in the target process + MemoryError: If memory allocation fails + OSError: If reading from the remote process fails + +Example output (similar structure to get_all_awaited_by but only for running tasks): +[ + # Thread 140234 results + (140234, [ + # Running task and its complete waiter dependency graph + (4345585712, 'main_task', + [("run_server", "server.py", 127), ("main", "app.py", 23)], + [ + # Tasks waiting for main_task + (4345585800, 'worker_1', [...], [...]), + (4345585900, 'worker_2', [...], [...]) + ]) + ]) +] + +[clinic start generated code]*/ - if ((void*)running_coro_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); - goto result_err; +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=6433d52b55e87bbe input=8744b47c9ec2220a]*/ +{ + if (!self->async_debug_offsets_available) { + PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_async_stack_trace"); + return NULL; } - // note: genobject's gi_iframe is an embedded struct so the address to - // the offset leads directly to its first field: f_executable - uintptr_t address_of_running_task_code_obj; - if (read_py_ptr( - handle, - running_coro_addr + local_debug_offsets.gen_object.gi_iframe, - &address_of_running_task_code_obj - )) { - goto result_err; + PyObject *result = PyList_New(0); + if (result == NULL) { + set_exception_cause(self, PyExc_MemoryError, "Failed to create result list in get_async_stack_trace"); + return NULL; } - if ((void*)address_of_running_task_code_obj == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); + // Process all threads + if (iterate_threads(self, process_thread_for_async_stack_trace, result) < 0) { goto result_err; } - uintptr_t address_of_current_frame; - if (find_running_frame( - handle, runtime_start_address, &local_debug_offsets, - &address_of_current_frame) - ) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running frame"); - goto result_err; + _Py_RemoteDebug_ClearCache(&self->handle); + return result; +result_err: + _Py_RemoteDebug_ClearCache(&self->handle); + Py_XDECREF(result); + return NULL; +} + +static PyMethodDef RemoteUnwinder_methods[] = { + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF + {NULL, NULL} +}; + +static void +RemoteUnwinder_dealloc(PyObject *op) +{ + RemoteUnwinderObject *self = RemoteUnwinder_CAST(op); + PyTypeObject *tp = Py_TYPE(self); + if (self->code_object_cache) { + _Py_hashtable_destroy(self->code_object_cache); + } +#ifdef Py_GIL_DISABLED + if (self->tlbc_cache) { + _Py_hashtable_destroy(self->tlbc_cache); } +#endif + if (self->handle.pid != 0) { + _Py_RemoteDebug_ClearCache(&self->handle); + _Py_RemoteDebug_CleanupProcHandle(&self->handle); + } + PyObject_Del(self); + Py_DECREF(tp); +} - uintptr_t address_of_code_object; - while ((void*)address_of_current_frame != NULL) { - PyObject* frame_info = NULL; - int res = parse_async_frame_object( - handle, - &frame_info, - &local_debug_offsets, - address_of_current_frame, - &address_of_current_frame, - &address_of_code_object - ); +static PyType_Slot RemoteUnwinder_slots[] = { + {Py_tp_doc, (void *)"RemoteUnwinder(pid): Inspect stack of a remote Python process."}, + {Py_tp_methods, RemoteUnwinder_methods}, + {Py_tp_init, _remote_debugging_RemoteUnwinder___init__}, + {Py_tp_dealloc, RemoteUnwinder_dealloc}, + {0, NULL} +}; - if (res < 0) { - chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object"); - goto result_err; - } +static PyType_Spec RemoteUnwinder_spec = { + .name = "_remote_debugging.RemoteUnwinder", + .basicsize = sizeof(RemoteUnwinderObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = RemoteUnwinder_slots, +}; - if (!frame_info) { - continue; - } +/* ============================================================================ + * MODULE INITIALIZATION + * ============================================================================ */ - if (PyList_Append(calls, frame_info) == -1) { - Py_DECREF(calls); - goto result_err; - } +static int +_remote_debugging_exec(PyObject *m) +{ + RemoteDebuggingState *st = RemoteDebugging_GetState(m); +#define CREATE_TYPE(mod, type, spec) \ + do { \ + type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \ + if (type == NULL) { \ + return -1; \ + } \ + } while (0) + + CREATE_TYPE(m, st->RemoteDebugging_Type, &RemoteUnwinder_spec); + + if (PyModule_AddType(m, st->RemoteDebugging_Type) < 0) { + return -1; + } - Py_DECREF(frame_info); - frame_info = NULL; + // Initialize structseq types + st->TaskInfo_Type = PyStructSequence_NewType(&TaskInfo_desc); + if (st->TaskInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->TaskInfo_Type) < 0) { + return -1; + } - if (address_of_code_object == address_of_running_task_code_obj) { - break; - } + st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc); + if (st->FrameInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->FrameInfo_Type) < 0) { + return -1; } - PyObject *tn = parse_task_name( - handle, &local_debug_offsets, &local_async_debug, running_task_addr); - if (tn == NULL) { - goto result_err; + st->CoroInfo_Type = PyStructSequence_NewType(&CoroInfo_desc); + if (st->CoroInfo_Type == NULL) { + return -1; } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); - goto result_err; + if (PyModule_AddType(m, st->CoroInfo_Type) < 0) { + return -1; } - Py_DECREF(tn); - PyObject* awaited_by = PyList_New(0); - if (awaited_by == NULL) { - goto result_err; + st->ThreadInfo_Type = PyStructSequence_NewType(&ThreadInfo_desc); + if (st->ThreadInfo_Type == NULL) { + return -1; } - if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - goto result_err; + if (PyModule_AddType(m, st->ThreadInfo_Type) < 0) { + return -1; } - Py_DECREF(awaited_by); - if (parse_task_awaited_by( - handle, &local_debug_offsets, &local_async_debug, - running_task_addr, awaited_by, 1) - ) { - goto result_err; + st->AwaitedInfo_Type = PyStructSequence_NewType(&AwaitedInfo_desc); + if (st->AwaitedInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->AwaitedInfo_Type) < 0) { + return -1; } +#ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); +#endif + int rc = PyModule_AddIntConstant(m, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV); + if (rc < 0) { + return -1; + } + if (RemoteDebugging_InitState(st) < 0) { + return -1; + } + return 0; +} - _Py_RemoteDebug_CleanupProcHandle(handle); - return result; +static int +remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) +{ + RemoteDebuggingState *state = RemoteDebugging_GetState(mod); + Py_VISIT(state->RemoteDebugging_Type); + Py_VISIT(state->TaskInfo_Type); + Py_VISIT(state->FrameInfo_Type); + Py_VISIT(state->CoroInfo_Type); + Py_VISIT(state->ThreadInfo_Type); + Py_VISIT(state->AwaitedInfo_Type); + return 0; +} -result_err: - _Py_RemoteDebug_CleanupProcHandle(handle); - Py_XDECREF(result); - return NULL; +static int +remote_debugging_clear(PyObject *mod) +{ + RemoteDebuggingState *state = RemoteDebugging_GetState(mod); + Py_CLEAR(state->RemoteDebugging_Type); + Py_CLEAR(state->TaskInfo_Type); + Py_CLEAR(state->FrameInfo_Type); + Py_CLEAR(state->CoroInfo_Type); + Py_CLEAR(state->ThreadInfo_Type); + Py_CLEAR(state->AwaitedInfo_Type); + return 0; } +static void +remote_debugging_free(void *mod) +{ + (void)remote_debugging_clear((PyObject *)mod); +} -static PyMethodDef methods[] = { - {"get_stack_trace", get_stack_trace, METH_VARARGS, - "Get the Python stack from a given pod"}, - {"get_async_stack_trace", get_async_stack_trace, METH_VARARGS, - "Get the asyncio stack from a given pid"}, - {"get_all_awaited_by", get_all_awaited_by, METH_VARARGS, - "Get all tasks and their awaited_by from a given pid"}, +static PyModuleDef_Slot remote_debugging_slots[] = { + {Py_mod_exec, _remote_debugging_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL}, +}; + +static PyMethodDef remote_debugging_methods[] = { {NULL, NULL, 0, NULL}, }; -static struct PyModuleDef module = { - .m_base = PyModuleDef_HEAD_INIT, +static struct PyModuleDef remote_debugging_module = { + PyModuleDef_HEAD_INIT, .m_name = "_remote_debugging", - .m_size = -1, - .m_methods = methods, + .m_size = sizeof(RemoteDebuggingState), + .m_methods = remote_debugging_methods, + .m_slots = remote_debugging_slots, + .m_traverse = remote_debugging_traverse, + .m_clear = remote_debugging_clear, + .m_free = remote_debugging_free, }; PyMODINIT_FUNC PyInit__remote_debugging(void) { - PyObject* mod = PyModule_Create(&module); - if (mod == NULL) { - return NULL; - } -#ifdef Py_GIL_DISABLED - PyUnstable_Module_SetGIL(mod, Py_MOD_GIL_NOT_USED); -#endif - int rc = PyModule_AddIntConstant( - mod, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV); - if (rc < 0) { - Py_DECREF(mod); - return NULL; - } - return mod; + return PyModuleDef_Init(&remote_debugging_module); } diff --git a/Modules/_sqlite/blob.c b/Modules/_sqlite/blob.c index 35d090e3ca2dce..aafefbf316e03d 100644 --- a/Modules/_sqlite/blob.c +++ b/Modules/_sqlite/blob.c @@ -4,6 +4,7 @@ #include "blob.h" #include "util.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #define clinic_state() (pysqlite_get_state_by_type(Py_TYPE(self))) #include "clinic/blob.c.h" @@ -56,9 +57,7 @@ blob_dealloc(PyObject *op) close_blob(self); - if (self->in_weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->in_weakreflist); (void)tp->tp_clear(op); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_sqlite/cursor.c b/Modules/_sqlite/cursor.c index 7943bfcca3679d..0c3f43d0e50b43 100644 --- a/Modules/_sqlite/cursor.c +++ b/Modules/_sqlite/cursor.c @@ -31,6 +31,7 @@ #include "util.h" #include "pycore_pyerrors.h" // _PyErr_FormatFromCause() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() typedef enum { TYPE_LONG, @@ -185,9 +186,7 @@ cursor_dealloc(PyObject *op) pysqlite_Cursor *self = _pysqlite_Cursor_CAST(op); PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - if (self->in_weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->in_weakreflist); (void)tp->tp_clear(op); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 602d0ab8588f62..e8943920043906 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -44,6 +44,7 @@ static const char copyright[] = #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_unicodeobject.h" // _PyUnicode_Copy +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "sre.h" // SRE_CODE @@ -736,10 +737,7 @@ pattern_dealloc(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - PatternObject *obj = _PatternObject_CAST(self); - if (obj->weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, _PatternObject_CAST(self)->weakreflist); (void)pattern_clear(self); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 97a29f4d0e1830..1c1e15eb0e528a 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -563,7 +563,7 @@ fill_and_set_sslerror(_sslmodulestate *state, goto fail; } } - if (PyUnicodeWriter_WriteUTF8(writer, "] ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "] ", 2) < 0) { goto fail; } } diff --git a/Modules/_stat.c b/Modules/_stat.c index f11ca7d23b440d..1dabf2f6d5b07f 100644 --- a/Modules/_stat.c +++ b/Modules/_stat.c @@ -57,7 +57,7 @@ typedef unsigned short mode_t; * Only the names are defined by POSIX but not their value. All common file * types seems to have the same numeric value on all platforms, though. * - * pyport.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK + * fileutils.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK */ #ifndef S_IFBLK @@ -86,7 +86,7 @@ typedef unsigned short mode_t; /* S_ISXXX() - * pyport.h defines S_ISDIR(), S_ISREG() and S_ISCHR() + * fileutils.h defines S_ISDIR(), S_ISREG() and S_ISCHR() */ #ifndef S_ISBLK diff --git a/Modules/_struct.c b/Modules/_struct.c index c36079f1eb8886..3fad35a8c94ee2 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -11,6 +11,7 @@ #include "pycore_bytesobject.h" // _PyBytesWriter #include "pycore_long.h" // _PyLong_AsByteArray() #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include // offsetof() @@ -1794,9 +1795,7 @@ s_dealloc(PyObject *op) PyStructObject *s = PyStructObject_CAST(op); PyTypeObject *tp = Py_TYPE(s); PyObject_GC_UnTrack(s); - if (s->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, s->weakreflist); if (s->s_codes != NULL) { PyMem_Free(s->s_codes); } diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 42243023a45768..6313abf5485fff 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -228,7 +228,7 @@ pylongwriter_create(PyObject *module, PyObject *args) goto error; } - if (num < 0 || num >= PyLong_BASE) { + if (num < 0 || num >= (long)PyLong_BASE) { PyErr_SetString(PyExc_ValueError, "digit doesn't fit into digit"); goto error; } diff --git a/Modules/_testcapi/unicode.c b/Modules/_testcapi/unicode.c index b8ecf53f4f8b9c..e70f5c68bc3b69 100644 --- a/Modules/_testcapi/unicode.c +++ b/Modules/_testcapi/unicode.c @@ -332,6 +332,27 @@ writer_write_utf8(PyObject *self_raw, PyObject *args) } +static PyObject* +writer_write_ascii(PyObject *self_raw, PyObject *args) +{ + WriterObject *self = (WriterObject *)self_raw; + if (writer_check(self) < 0) { + return NULL; + } + + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "yn", &str, &size)) { + return NULL; + } + + if (PyUnicodeWriter_WriteASCII(self->writer, str, size) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + + static PyObject* writer_write_widechar(PyObject *self_raw, PyObject *args) { @@ -513,6 +534,7 @@ writer_finish(PyObject *self_raw, PyObject *Py_UNUSED(args)) static PyMethodDef writer_methods[] = { {"write_char", _PyCFunction_CAST(writer_write_char), METH_VARARGS}, {"write_utf8", _PyCFunction_CAST(writer_write_utf8), METH_VARARGS}, + {"write_ascii", _PyCFunction_CAST(writer_write_ascii), METH_VARARGS}, {"write_widechar", _PyCFunction_CAST(writer_write_widechar), METH_VARARGS}, {"write_ucs4", _PyCFunction_CAST(writer_write_ucs4), METH_VARARGS}, {"write_str", _PyCFunction_CAST(writer_write_str), METH_VARARGS}, diff --git a/Modules/_testcapi/vectorcall.c b/Modules/_testcapi/vectorcall.c index 03aaacb328e0b6..f89dcb6c4cf03c 100644 --- a/Modules/_testcapi/vectorcall.c +++ b/Modules/_testcapi/vectorcall.c @@ -179,14 +179,14 @@ _testcapi_VectorCallClass_set_vectorcall_impl(PyObject *self, if (!PyObject_TypeCheck(self, type)) { return PyErr_Format( PyExc_TypeError, - "expected %s instance", - PyType_GetName(type)); + "expected %N instance", + type); } if (!type->tp_vectorcall_offset) { return PyErr_Format( PyExc_TypeError, - "type %s has no vectorcall offset", - PyType_GetName(type)); + "type %N has no vectorcall offset", + type); } *(vectorcallfunc*)((char*)self + type->tp_vectorcall_offset) = ( VectorCallClass_vectorcall); diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 3aa6e4c9e43a26..71fffedee146fa 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -2424,7 +2424,7 @@ test_critical_sections(PyObject *module, PyObject *Py_UNUSED(args)) // Used by `finalize_thread_hang`. -#ifdef _POSIX_THREADS +#if defined(_POSIX_THREADS) && !defined(__wasi__) static void finalize_thread_hang_cleanup_callback(void *Py_UNUSED(arg)) { // Should not reach here. Py_FatalError("pthread thread termination was triggered unexpectedly"); @@ -3175,6 +3175,48 @@ create_manual_heap_type(void) return (PyObject *)type; } +typedef struct { + PyObject_VAR_HEAD +} ManagedDictObject; + +int ManagedDict_traverse(PyObject *self, visitproc visit, void *arg) { + PyObject_VisitManagedDict(self, visit, arg); + Py_VISIT(Py_TYPE(self)); + return 0; +} + +int ManagedDict_clear(PyObject *self) { + PyObject_ClearManagedDict(self); + return 0; +} + +static PyGetSetDef ManagedDict_getset[] = { + {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict, NULL, NULL}, + {NULL, NULL, NULL, NULL, NULL}, +}; + +static PyType_Slot ManagedDict_slots[] = { + {Py_tp_new, (void *)PyType_GenericNew}, + {Py_tp_getset, (void *)ManagedDict_getset}, + {Py_tp_traverse, (void *)ManagedDict_traverse}, + {Py_tp_clear, (void *)ManagedDict_clear}, + {0} +}; + +static PyType_Spec ManagedDict_spec = { + "_testcapi.ManagedDictType", + sizeof(ManagedDictObject), + 0, // itemsize + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_MANAGED_DICT | Py_TPFLAGS_HEAPTYPE | Py_TPFLAGS_HAVE_GC, + ManagedDict_slots +}; + +static PyObject * +create_managed_dict_type(void) +{ + return PyType_FromSpec(&ManagedDict_spec); +} + static struct PyModuleDef _testcapimodule = { PyModuleDef_HEAD_INIT, .m_name = "_testcapi", @@ -3315,6 +3357,13 @@ PyInit__testcapi(void) return NULL; } + PyObject *managed_dict_type = create_managed_dict_type(); + if (managed_dict_type == NULL) { + return NULL; + } + if (PyModule_Add(m, "ManagedDictType", managed_dict_type) < 0) { + return NULL; + } /* Include tests from the _testcapi/ directory */ if (_PyTestCapi_Init_Vectorcall(m) < 0) { diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index ae060c95fd5a01..8027f0015c7409 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -21,6 +21,7 @@ #include "pycore_fileutils.h" // _Py_normpath() #include "pycore_flowgraph.h" // _PyCompile_OptimizeCfg() #include "pycore_frame.h" // _PyInterpreterFrame +#include "pycore_function.h" // _PyFunction_GET_BUILTINS #include "pycore_gc.h" // PyGC_Head #include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_import.h" // _PyImport_ClearExtension() @@ -120,7 +121,7 @@ get_c_recursion_remaining(PyObject *self, PyObject *Py_UNUSED(args)) PyThreadState *tstate = _PyThreadState_GET(); uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - int remaining = (int)((here_addr - _tstate->c_stack_soft_limit)/PYOS_STACK_MARGIN_BYTES * 50); + int remaining = (int)((here_addr - _tstate->c_stack_soft_limit) / _PyOS_STACK_MARGIN_BYTES * 50); return PyLong_FromLong(remaining); } @@ -1022,7 +1023,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) globalsns = PyFunction_GET_GLOBALS(codearg); } if (builtinsns == NULL) { - builtinsns = PyFunction_GET_BUILTINS(codearg); + builtinsns = _PyFunction_GET_BUILTINS(codearg); } codearg = PyFunction_GET_CODE(codearg); } @@ -1045,6 +1046,9 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) #define SET_COUNT(DICT, STRUCT, NAME) \ do { \ PyObject *count = PyLong_FromLong(STRUCT.NAME); \ + if (count == NULL) { \ + goto error; \ + } \ int res = PyDict_SetItemString(DICT, #NAME, count); \ Py_DECREF(count); \ if (res < 0) { \ @@ -1165,6 +1169,47 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) return NULL; } +static PyObject * +verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs) +{ + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *codearg; + PyObject *globalnames = NULL; + PyObject *globalsns = NULL; + PyObject *builtinsns = NULL; + static char *kwlist[] = {"code", "globalnames", + "globalsns", "builtinsns", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "O|O!O!O!:get_code_var_counts", kwlist, + &codearg, &PySet_Type, &globalnames, + &PyDict_Type, &globalsns, &PyDict_Type, &builtinsns)) + { + return NULL; + } + if (PyFunction_Check(codearg)) { + if (globalsns == NULL) { + globalsns = PyFunction_GET_GLOBALS(codearg); + } + if (builtinsns == NULL) { + builtinsns = _PyFunction_GET_BUILTINS(codearg); + } + codearg = PyFunction_GET_CODE(codearg); + } + else if (!PyCode_Check(codearg)) { + PyErr_SetString(PyExc_TypeError, + "argument must be a code object or a function"); + return NULL; + } + PyCodeObject *code = (PyCodeObject *)codearg; + + if (_PyCode_VerifyStateless( + tstate, code, globalnames, globalsns, builtinsns) < 0) + { + return NULL; + } + Py_RETURN_NONE; +} + #ifdef _Py_TIER2 static PyObject * @@ -1649,11 +1694,12 @@ create_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) static PyObject * destroy_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = {"id", NULL}; + static char *kwlist[] = {"id", "basic", NULL}; PyObject *idobj = NULL; + int basic = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "O:destroy_interpreter", kwlist, - &idobj)) + "O|p:destroy_interpreter", kwlist, + &idobj, &basic)) { return NULL; } @@ -1663,7 +1709,27 @@ destroy_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } - _PyXI_EndInterpreter(interp, NULL, NULL); + if (basic) + { + // Test the basic Py_EndInterpreter with weird out of order thread states + PyThreadState *t1, *t2; + PyThreadState *prev; + t1 = interp->threads.head; + if (t1 == NULL) { + t1 = PyThreadState_New(interp); + } + t2 = PyThreadState_New(interp); + prev = PyThreadState_Swap(t2); + PyThreadState_Clear(t1); + PyThreadState_Delete(t1); + Py_EndInterpreter(t2); + PyThreadState_Swap(prev); + } + else + { + // use the cross interpreter _PyXI_EndInterpreter normally + _PyXI_EndInterpreter(interp, NULL, NULL); + } Py_RETURN_NONE; } @@ -1723,9 +1789,9 @@ exec_interpreter(PyObject *self, PyObject *args, PyObject *kwargs) /* To run some code in a sub-interpreter. -Generally you can use test.support.interpreters, +Generally you can use the interpreters module, but we keep this helper as a distinct implementation. -That's especially important for testing test.support.interpreters. +That's especially important for testing the interpreters module. */ static PyObject * run_in_subinterp_with_config(PyObject *self, PyObject *args, PyObject *kwargs) @@ -1929,7 +1995,14 @@ get_crossinterp_data(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } if (strcmp(mode, "xidata") == 0) { - if (_PyObject_GetXIData(tstate, obj, xidata) != 0) { + if (_PyObject_GetXIDataNoFallback(tstate, obj, xidata) != 0) { + goto error; + } + } + else if (strcmp(mode, "fallback") == 0) { + xidata_fallback_t fallback = _PyXIDATA_FULL_FALLBACK; + if (_PyObject_GetXIData(tstate, obj, fallback, xidata) != 0) + { goto error; } } @@ -1948,6 +2021,21 @@ get_crossinterp_data(PyObject *self, PyObject *args, PyObject *kwargs) goto error; } } + else if (strcmp(mode, "func") == 0) { + if (_PyFunction_GetXIData(tstate, obj, xidata) != 0) { + goto error; + } + } + else if (strcmp(mode, "script") == 0) { + if (_PyCode_GetScriptXIData(tstate, obj, xidata) != 0) { + goto error; + } + } + else if (strcmp(mode, "script-pure") == 0) { + if (_PyCode_GetPureScriptXIData(tstate, obj, xidata) != 0) { + goto error; + } + } else { PyErr_Format(PyExc_ValueError, "unsupported mode %R", modeobj); goto error; @@ -2292,6 +2380,8 @@ static PyMethodDef module_functions[] = { {"get_co_localskinds", get_co_localskinds, METH_O, NULL}, {"get_code_var_counts", _PyCFunction_CAST(get_code_var_counts), METH_VARARGS | METH_KEYWORDS, NULL}, + {"verify_stateless_code", _PyCFunction_CAST(verify_stateless_code), + METH_VARARGS | METH_KEYWORDS, NULL}, #ifdef _Py_TIER2 {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, {"invalidate_executors", invalidate_executors, METH_O, NULL}, diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index 8d678412fe7179..8d8cb992b0e07f 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -57,7 +57,10 @@ lock_thread(void *arg) _Py_atomic_store_int(&test_data->started, 1); PyMutex_Lock(m); - assert(m->_bits == 1); + // gh-135641: in rare cases the lock may still have `_Py_HAS_PARKED` set + // (m->_bits == 3) due to bucket collisions in the parking lot hash table + // between this mutex and the `test_data.done` event. + assert(m->_bits == 1 || m->_bits == 3); PyMutex_Unlock(m); assert(m->_bits == 0); diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 9776a32755db68..73dfb943845095 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -281,6 +281,12 @@ _PyThread_AfterFork(struct _pythread_runtime_state *state) continue; } + // Keep handles for threads that have not been started yet. They are + // safe to start in the child process. + if (handle->state == THREAD_HANDLE_NOT_STARTED) { + continue; + } + // Mark all threads as done. Any attempts to join or detach the // underlying OS thread (if any) could crash. We are the only thread; // it's safe to set this non-atomically. @@ -1011,6 +1017,11 @@ rlock_traverse(PyObject *self, visitproc visit, void *arg) return 0; } +static int +rlock_locked_impl(rlockobject *self) +{ + return PyMutex_IsLocked(&self->lock.mutex); +} static void rlock_dealloc(PyObject *self) @@ -1100,7 +1111,7 @@ static PyObject * rlock_locked(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = rlockobject_CAST(op); - int is_locked = _PyRecursiveMutex_IsLockedByCurrentThread(&self->lock); + int is_locked = rlock_locked_impl(self); return PyBool_FromLong(is_locked); } @@ -1202,10 +1213,17 @@ rlock_repr(PyObject *op) { rlockobject *self = rlockobject_CAST(op); PyThread_ident_t owner = self->lock.thread; - size_t count = self->lock.level + 1; + int locked = rlock_locked_impl(self); + size_t count; + if (locked) { + count = self->lock.level + 1; + } + else { + count = 0; + } return PyUnicode_FromFormat( "<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%zu at %p>", - owner ? "locked" : "unlocked", + locked ? "locked" : "unlocked", Py_TYPE(self)->tp_name, owner, count, self); } @@ -1345,9 +1363,7 @@ static void localdummy_dealloc(PyObject *op) { localdummyobject *self = localdummyobject_CAST(op); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); PyTypeObject *tp = Py_TYPE(self); tp->tp_free(self); Py_DECREF(tp); diff --git a/Modules/_uuidmodule.c b/Modules/_uuidmodule.c index c5e78b1510b5e3..c31a7e8fea5608 100644 --- a/Modules/_uuidmodule.c +++ b/Modules/_uuidmodule.c @@ -78,23 +78,47 @@ py_UuidCreate(PyObject *Py_UNUSED(context), return NULL; } +static int +py_windows_has_stable_node(void) +{ + UUID uuid; + RPC_STATUS res; + Py_BEGIN_ALLOW_THREADS + res = UuidCreateSequential(&uuid); + Py_END_ALLOW_THREADS + return res == RPC_S_OK; +} #endif /* MS_WINDOWS */ static int -uuid_exec(PyObject *module) { +uuid_exec(PyObject *module) +{ +#define ADD_INT(NAME, VALUE) \ + do { \ + if (PyModule_AddIntConstant(module, (NAME), (VALUE)) < 0) { \ + return -1; \ + } \ + } while (0) + assert(sizeof(uuid_t) == 16); #if defined(MS_WINDOWS) - int has_uuid_generate_time_safe = 0; + ADD_INT("has_uuid_generate_time_safe", 0); #elif defined(HAVE_UUID_GENERATE_TIME_SAFE) - int has_uuid_generate_time_safe = 1; + ADD_INT("has_uuid_generate_time_safe", 1); #else - int has_uuid_generate_time_safe = 0; + ADD_INT("has_uuid_generate_time_safe", 0); #endif - if (PyModule_AddIntConstant(module, "has_uuid_generate_time_safe", - has_uuid_generate_time_safe) < 0) { - return -1; - } + +#if defined(MS_WINDOWS) + ADD_INT("has_stable_extractable_node", py_windows_has_stable_node()); +#elif defined(HAVE_UUID_GENERATE_TIME_SAFE_STABLE_MAC) + ADD_INT("has_stable_extractable_node", 1); +#else + ADD_INT("has_stable_extractable_node", 0); +#endif + +#undef ADD_INT return 0; } diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index abd53436b21b29..5c5383d260a040 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -7,6 +7,7 @@ #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_typeobject.h" // _PyType_GetModuleState() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "datetime.h" // PyDateTime_TZInfo @@ -375,9 +376,7 @@ zoneinfo_dealloc(PyObject *obj_self) PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(obj_self); - } + FT_CLEAR_WEAKREFS(obj_self, self->weakreflist); if (self->trans_list_utc != NULL) { PyMem_Free(self->trans_list_utc); diff --git a/Modules/_zstd/_zstdmodule.c b/Modules/_zstd/_zstdmodule.c index 4d046859a1540e..d75c0779474a82 100644 --- a/Modules/_zstd/_zstdmodule.c +++ b/Modules/_zstd/_zstdmodule.c @@ -1,14 +1,16 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif +#include "Python.h" + #include "_zstdmodule.h" +#include // ZSTD_*() +#include // ZDICT_*() + /*[clinic input] module _zstd @@ -17,52 +19,91 @@ module _zstd #include "clinic/_zstdmodule.c.h" +ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, PyObject *dict, int *ptype) +{ + if (state == NULL) { + return NULL; + } + + /* Check ZstdDict */ + if (PyObject_TypeCheck(dict, state->ZstdDict_type)) { + return (ZstdDict*)dict; + } + + /* Check (ZstdDict, type) */ + if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2 + && PyObject_TypeCheck(PyTuple_GET_ITEM(dict, 0), state->ZstdDict_type) + && PyLong_Check(PyTuple_GET_ITEM(dict, 1))) + { + int type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); + if (type == -1 && PyErr_Occurred()) { + return NULL; + } + if (type == DICT_TYPE_DIGESTED + || type == DICT_TYPE_UNDIGESTED + || type == DICT_TYPE_PREFIX) + { + *ptype = type; + return (ZstdDict*)PyTuple_GET_ITEM(dict, 0); + } + } + + /* Wrong type */ + PyErr_SetString(PyExc_TypeError, + "zstd_dict argument should be a ZstdDict object."); + return NULL; +} + /* Format error message and set ZstdError. */ void -set_zstd_error(const _zstd_state* const state, - error_type type, size_t zstd_ret) +set_zstd_error(const _zstd_state *state, error_type type, size_t zstd_ret) { - char *msg; + const char *msg; assert(ZSTD_isError(zstd_ret)); - switch (type) - { - case ERR_DECOMPRESS: - msg = "Unable to decompress zstd data: %s"; - break; - case ERR_COMPRESS: - msg = "Unable to compress zstd data: %s"; - break; - case ERR_SET_PLEDGED_INPUT_SIZE: - msg = "Unable to set pledged uncompressed content size: %s"; - break; - - case ERR_LOAD_D_DICT: - msg = "Unable to load zstd dictionary or prefix for decompression: %s"; - break; - case ERR_LOAD_C_DICT: - msg = "Unable to load zstd dictionary or prefix for compression: %s"; - break; - - case ERR_GET_C_BOUNDS: - msg = "Unable to get zstd compression parameter bounds: %s"; - break; - case ERR_GET_D_BOUNDS: - msg = "Unable to get zstd decompression parameter bounds: %s"; - break; - case ERR_SET_C_LEVEL: - msg = "Unable to set zstd compression level: %s"; - break; - - case ERR_TRAIN_DICT: - msg = "Unable to train zstd dictionary: %s"; - break; - case ERR_FINALIZE_DICT: - msg = "Unable to finalize zstd dictionary: %s"; - break; - - default: - Py_UNREACHABLE(); + if (state == NULL) { + return; + } + switch (type) { + case ERR_DECOMPRESS: + msg = "Unable to decompress Zstandard data: %s"; + break; + case ERR_COMPRESS: + msg = "Unable to compress Zstandard data: %s"; + break; + case ERR_SET_PLEDGED_INPUT_SIZE: + msg = "Unable to set pledged uncompressed content size: %s"; + break; + + case ERR_LOAD_D_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "decompression: %s"; + break; + case ERR_LOAD_C_DICT: + msg = "Unable to load Zstandard dictionary or prefix for " + "compression: %s"; + break; + + case ERR_GET_C_BOUNDS: + msg = "Unable to get zstd compression parameter bounds: %s"; + break; + case ERR_GET_D_BOUNDS: + msg = "Unable to get zstd decompression parameter bounds: %s"; + break; + case ERR_SET_C_LEVEL: + msg = "Unable to set zstd compression level: %s"; + break; + + case ERR_TRAIN_DICT: + msg = "Unable to train the Zstandard dictionary: %s"; + break; + case ERR_FINALIZE_DICT: + msg = "Unable to finalize the Zstandard dictionary: %s"; + break; + + default: + Py_UNREACHABLE(); } PyErr_Format(state->ZstdError, msg, ZSTD_getErrorName(zstd_ret)); } @@ -72,8 +113,7 @@ typedef struct { char parameter_name[32]; } ParameterInfo; -static const ParameterInfo cp_list[] = -{ +static const ParameterInfo cp_list[] = { {ZSTD_c_compressionLevel, "compression_level"}, {ZSTD_c_windowLog, "window_log"}, {ZSTD_c_hashLog, "hash_log"}, @@ -98,22 +138,18 @@ static const ParameterInfo cp_list[] = {ZSTD_c_overlapLog, "overlap_log"} }; -static const ParameterInfo dp_list[] = -{ +static const ParameterInfo dp_list[] = { {ZSTD_d_windowLogMax, "window_log_max"} }; void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v) +set_parameter_error(int is_compress, int key_v, int value_v) { ParameterInfo const *list; int list_size; - char const *name; char *type; ZSTD_bounds bounds; - int i; - char pos_msg[128]; + char pos_msg[64]; if (is_compress) { list = cp_list; @@ -127,8 +163,8 @@ set_parameter_error(const _zstd_state* const state, int is_compress, } /* Find parameter's name */ - name = NULL; - for (i = 0; i < list_size; i++) { + char const *name = NULL; + for (int i = 0; i < list_size; i++) { if (key_v == (list+i)->parameter) { name = (list+i)->parameter_name; break; @@ -150,20 +186,16 @@ set_parameter_error(const _zstd_state* const state, int is_compress, bounds = ZSTD_dParam_getBounds(key_v); } if (ZSTD_isError(bounds.error)) { - PyErr_Format(state->ZstdError, - "Zstd %s parameter \"%s\" is invalid. (zstd v%s)", - type, name, ZSTD_versionString()); + PyErr_Format(PyExc_ValueError, "invalid %s parameter '%s'", + type, name); return; } /* Error message */ - PyErr_Format(state->ZstdError, - "Error when setting zstd %s parameter \"%s\", it " - "should %d <= value <= %d, provided value is %d. " - "(zstd v%s, %d-bit build)", - type, name, - bounds.lowerBound, bounds.upperBound, value_v, - ZSTD_versionString(), 8*(int)sizeof(Py_ssize_t)); + PyErr_Format(PyExc_ValueError, + "%s parameter '%s' received an illegal value %d; " + "the valid range is [%d, %d]", + type, name, value_v, bounds.lowerBound, bounds.upperBound); } static inline _zstd_state* @@ -174,9 +206,57 @@ get_zstd_state(PyObject *module) return (_zstd_state *)state; } +static Py_ssize_t +calculate_samples_stats(PyBytesObject *samples_bytes, PyObject *samples_sizes, + size_t **chunk_sizes) +{ + Py_ssize_t chunks_number; + Py_ssize_t sizes_sum; + Py_ssize_t i; + + chunks_number = PyTuple_GET_SIZE(samples_sizes); + if ((size_t) chunks_number > UINT32_MAX) { + PyErr_Format(PyExc_ValueError, + "The number of samples should be <= %u.", UINT32_MAX); + return -1; + } + + /* Prepare chunk_sizes */ + *chunk_sizes = PyMem_New(size_t, chunks_number); + if (*chunk_sizes == NULL) { + PyErr_NoMemory(); + return -1; + } + + sizes_sum = PyBytes_GET_SIZE(samples_bytes); + for (i = 0; i < chunks_number; i++) { + size_t size = PyLong_AsSize_t(PyTuple_GET_ITEM(samples_sizes, i)); + (*chunk_sizes)[i] = size; + if (size == (size_t)-1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + goto sum_error; + } + return -1; + } + if ((size_t)sizes_sum < size) { + goto sum_error; + } + sizes_sum -= size; + } + + if (sizes_sum != 0) { +sum_error: + PyErr_SetString(PyExc_ValueError, + "The samples size tuple doesn't match the " + "concatenation's size."); + return -1; + } + return chunks_number; +} + /*[clinic input] -_zstd._train_dict +_zstd.train_dict samples_bytes: PyBytesObject Concatenation of samples. @@ -186,59 +266,30 @@ _zstd._train_dict The size of the dictionary. / -Internal function, train a zstd dictionary on sample data. +Train a Zstandard dictionary on sample data. [clinic start generated code]*/ static PyObject * -_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size) -/*[clinic end generated code: output=b5b4f36347c0addd input=2dce5b57d63923e2]*/ +_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size) +/*[clinic end generated code: output=8e87fe43935e8f77 input=d20dedb21c72cb62]*/ { - // TODO(emmatyping): The preamble and suffix to this function and _finalize_dict - // are pretty similar. We should see if we can refactor them to share that code. - Py_ssize_t chunks_number; - size_t *chunk_sizes = NULL; PyObject *dst_dict_bytes = NULL; + size_t *chunk_sizes = NULL; + Py_ssize_t chunks_number; size_t zstd_ret; - Py_ssize_t sizes_sum; - Py_ssize_t i; /* Check arguments */ if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); - return NULL; - } - - chunks_number = Py_SIZE(samples_sizes); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); return NULL; } - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_New(size_t, chunks_number); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) { goto error; } @@ -250,16 +301,16 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, /* Train the dictionary */ char *dst_dict_buffer = PyBytes_AS_STRING(dst_dict_bytes); - char *samples_buffer = PyBytes_AS_STRING(samples_bytes); + const char *samples_buffer = PyBytes_AS_STRING(samples_bytes); Py_BEGIN_ALLOW_THREADS zstd_ret = ZDICT_trainFromBuffer(dst_dict_buffer, dict_size, samples_buffer, chunk_sizes, (uint32_t)chunks_number); Py_END_ALLOW_THREADS - /* Check zstd dict error */ + /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_TRAIN_DICT, zstd_ret); goto error; } @@ -280,7 +331,7 @@ _zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, } /*[clinic input] -_zstd._finalize_dict +_zstd.finalize_dict custom_dict_bytes: PyBytesObject Custom dictionary content. @@ -291,63 +342,36 @@ _zstd._finalize_dict dict_size: Py_ssize_t The size of the dictionary. compression_level: int - Optimize for a specific zstd compression level, 0 means default. + Optimize for a specific Zstandard compression level, 0 means default. / -Internal function, finalize a zstd dictionary. +Finalize a Zstandard dictionary. [clinic start generated code]*/ static PyObject * -_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, - PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size, - int compression_level) -/*[clinic end generated code: output=5dc5b520fddba37f input=8afd42a249078460]*/ +_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size, + int compression_level) +/*[clinic end generated code: output=f91821ba5ae85bda input=3c7e2480aa08fb56]*/ { Py_ssize_t chunks_number; size_t *chunk_sizes = NULL; PyObject *dst_dict_bytes = NULL; size_t zstd_ret; ZDICT_params_t params; - Py_ssize_t sizes_sum; - Py_ssize_t i; /* Check arguments */ if (dict_size <= 0) { - PyErr_SetString(PyExc_ValueError, "dict_size argument should be positive number."); - return NULL; - } - - chunks_number = Py_SIZE(samples_sizes); - if ((size_t) chunks_number > UINT32_MAX) { - PyErr_Format(PyExc_ValueError, - "The number of samples should be <= %u.", UINT32_MAX); + PyErr_SetString(PyExc_ValueError, + "dict_size argument should be positive number."); return NULL; } - /* Prepare chunk_sizes */ - chunk_sizes = PyMem_New(size_t, chunks_number); - if (chunk_sizes == NULL) { - PyErr_NoMemory(); - goto error; - } - - sizes_sum = 0; - for (i = 0; i < chunks_number; i++) { - PyObject *size = PyTuple_GetItem(samples_sizes, i); - chunk_sizes[i] = PyLong_AsSize_t(size); - if (chunk_sizes[i] == (size_t)-1 && PyErr_Occurred()) { - PyErr_Format(PyExc_ValueError, - "Items in samples_sizes should be an int " - "object, with a value between 0 and %u.", SIZE_MAX); - goto error; - } - sizes_sum += chunk_sizes[i]; - } - - if (sizes_sum != Py_SIZE(samples_bytes)) { - PyErr_SetString(PyExc_ValueError, - "The samples size tuple doesn't match the concatenation's size."); + /* Check that the samples are valid and get their sizes */ + chunks_number = calculate_samples_stats(samples_bytes, samples_sizes, + &chunk_sizes); + if (chunks_number < 0) { goto error; } @@ -359,7 +383,7 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, /* Parameters */ - /* Optimize for a specific zstd compression level, 0 means default. */ + /* Optimize for a specific Zstandard compression level, 0 means default. */ params.compressionLevel = compression_level; /* Write log to stderr, 0 = none. */ params.notificationLevel = 0; @@ -370,14 +394,15 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, Py_BEGIN_ALLOW_THREADS zstd_ret = ZDICT_finalizeDictionary( PyBytes_AS_STRING(dst_dict_bytes), dict_size, - PyBytes_AS_STRING(custom_dict_bytes), Py_SIZE(custom_dict_bytes), + PyBytes_AS_STRING(custom_dict_bytes), + Py_SIZE(custom_dict_bytes), PyBytes_AS_STRING(samples_bytes), chunk_sizes, (uint32_t)chunks_number, params); Py_END_ALLOW_THREADS - /* Check zstd dict error */ + /* Check Zstandard dict error */ if (ZDICT_isError(zstd_ret)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_FINALIZE_DICT, zstd_ret); goto error; } @@ -399,26 +424,25 @@ _zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, /*[clinic input] -_zstd._get_param_bounds +_zstd.get_param_bounds parameter: int The parameter to get bounds. is_compress: bool True for CompressionParameter, False for DecompressionParameter. -Internal function, get CompressionParameter/DecompressionParameter bounds. +Get CompressionParameter/DecompressionParameter bounds. [clinic start generated code]*/ static PyObject * -_zstd__get_param_bounds_impl(PyObject *module, int parameter, - int is_compress) -/*[clinic end generated code: output=9892cd822f937e79 input=884cd1a01125267d]*/ +_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress) +/*[clinic end generated code: output=4acf5a876f0620ca input=45742ef0a3531b65]*/ { ZSTD_bounds bound; if (is_compress) { bound = ZSTD_cParam_getBounds(parameter); if (ZSTD_isError(bound.error)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_GET_C_BOUNDS, bound.error); return NULL; } @@ -426,7 +450,7 @@ _zstd__get_param_bounds_impl(PyObject *module, int parameter, else { bound = ZSTD_dParam_getBounds(parameter); if (ZSTD_isError(bound.error)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); set_zstd_error(mod_state, ERR_GET_D_BOUNDS, bound.error); return NULL; } @@ -442,24 +466,23 @@ _zstd.get_frame_size A bytes-like object, it should start from the beginning of a frame, and contains at least one complete frame. -Get the size of a zstd frame, including frame header and 4-byte checksum if it has one. - -It will iterate all blocks' headers within a frame, to accumulate the frame size. +Get the size of a Zstandard frame, including the header and optional checksum. [clinic start generated code]*/ static PyObject * _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) -/*[clinic end generated code: output=a7384c2f8780f442 input=7d3ad24311893bf3]*/ +/*[clinic end generated code: output=a7384c2f8780f442 input=3b9f73f8c8129d38]*/ { size_t frame_size; - frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, frame_buffer->len); + frame_size = ZSTD_findFrameCompressedSize(frame_buffer->buf, + frame_buffer->len); if (ZSTD_isError(frame_size)) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); PyErr_Format(mod_state->ZstdError, - "Error when finding the compressed size of a zstd frame. " - "Make sure the frame_buffer argument starts from the " - "beginning of a frame, and its length not less than this " + "Error when finding the compressed size of a Zstandard frame. " + "Ensure the frame_buffer argument starts from the " + "beginning of a frame, and its length is not less than this " "complete frame. Zstd error message: %s.", ZSTD_getErrorName(frame_size)); return NULL; @@ -469,17 +492,17 @@ _zstd_get_frame_size_impl(PyObject *module, Py_buffer *frame_buffer) } /*[clinic input] -_zstd._get_frame_info +_zstd.get_frame_info frame_buffer: Py_buffer - A bytes-like object, containing the header of a zstd frame. + A bytes-like object, containing the header of a Zstandard frame. -Internal function, get zstd frame infomation from a frame header. +Get Zstandard frame infomation from a frame header. [clinic start generated code]*/ static PyObject * -_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) -/*[clinic end generated code: output=5462855464ecdf81 input=67f1f8e4b7b89c4d]*/ +_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) +/*[clinic end generated code: output=56e033cf48001929 input=94b240583ae22ca5]*/ { uint64_t decompressed_size; uint32_t dict_id; @@ -491,12 +514,12 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) /* #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) */ if (decompressed_size == ZSTD_CONTENTSIZE_ERROR) { - _zstd_state* const mod_state = get_zstd_state(module); + _zstd_state* mod_state = get_zstd_state(module); PyErr_SetString(mod_state->ZstdError, "Error when getting information from the header of " - "a zstd frame. Make sure the frame_buffer argument " + "a Zstandard frame. Ensure the frame_buffer argument " "starts from the beginning of a frame, and its length " - "not less than the frame header (6~18 bytes)."); + "is not less than the frame header (6~18 bytes)."); return NULL; } @@ -511,325 +534,169 @@ _zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer) } /*[clinic input] -_zstd._set_parameter_types +_zstd.set_parameter_types c_parameter_type: object(subclass_of='&PyType_Type') CompressionParameter IntEnum type object d_parameter_type: object(subclass_of='&PyType_Type') DecompressionParameter IntEnum type object -Internal function, set CompressionParameter/DecompressionParameter types for validity check. +Set CompressionParameter and DecompressionParameter types for validity check. [clinic start generated code]*/ static PyObject * -_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, - PyObject *d_parameter_type) -/*[clinic end generated code: output=a13d4890ccbd2873 input=4535545d903853d3]*/ +_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type) +/*[clinic end generated code: output=f3313b1294f19502 input=75d7a953580fae5f]*/ { - _zstd_state* const mod_state = get_zstd_state(module); - - if (!PyType_Check(c_parameter_type) || !PyType_Check(d_parameter_type)) { - PyErr_SetString(PyExc_ValueError, - "The two arguments should be CompressionParameter and " - "DecompressionParameter types."); - return NULL; - } + _zstd_state* mod_state = get_zstd_state(module); - Py_XDECREF(mod_state->CParameter_type); Py_INCREF(c_parameter_type); - mod_state->CParameter_type = (PyTypeObject*)c_parameter_type; - - Py_XDECREF(mod_state->DParameter_type); + Py_XSETREF(mod_state->CParameter_type, (PyTypeObject*)c_parameter_type); Py_INCREF(d_parameter_type); - mod_state->DParameter_type = (PyTypeObject*)d_parameter_type; + Py_XSETREF(mod_state->DParameter_type, (PyTypeObject*)d_parameter_type); Py_RETURN_NONE; } static PyMethodDef _zstd_methods[] = { - _ZSTD__TRAIN_DICT_METHODDEF - _ZSTD__FINALIZE_DICT_METHODDEF - _ZSTD__GET_PARAM_BOUNDS_METHODDEF + _ZSTD_TRAIN_DICT_METHODDEF + _ZSTD_FINALIZE_DICT_METHODDEF + _ZSTD_GET_PARAM_BOUNDS_METHODDEF _ZSTD_GET_FRAME_SIZE_METHODDEF - _ZSTD__GET_FRAME_INFO_METHODDEF - _ZSTD__SET_PARAMETER_TYPES_METHODDEF - - {0} + _ZSTD_GET_FRAME_INFO_METHODDEF + _ZSTD_SET_PARAMETER_TYPES_METHODDEF + {NULL, NULL} }; - -#define ADD_INT_PREFIX_MACRO(module, macro) \ - do { \ - if (PyModule_AddIntConstant(module, "_" #macro, macro) < 0) { \ - return -1; \ - } \ - } while(0) - static int -add_parameters(PyObject *module) -{ - /* If add new parameters, please also add to cp_list/dp_list above. */ - - /* Compression parameters */ - ADD_INT_PREFIX_MACRO(module, ZSTD_c_compressionLevel); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_windowLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_hashLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_chainLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_searchLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_minMatch); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_targetLength); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_strategy); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_enableLongDistanceMatching); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmMinMatch); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmBucketSizeLog); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_ldmHashRateLog); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_contentSizeFlag); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_checksumFlag); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_dictIDFlag); - - ADD_INT_PREFIX_MACRO(module, ZSTD_c_nbWorkers); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_jobSize); - ADD_INT_PREFIX_MACRO(module, ZSTD_c_overlapLog); - - /* Decompression parameters */ - ADD_INT_PREFIX_MACRO(module, ZSTD_d_windowLogMax); - - /* ZSTD_strategy enum */ - ADD_INT_PREFIX_MACRO(module, ZSTD_fast); - ADD_INT_PREFIX_MACRO(module, ZSTD_dfast); - ADD_INT_PREFIX_MACRO(module, ZSTD_greedy); - ADD_INT_PREFIX_MACRO(module, ZSTD_lazy); - ADD_INT_PREFIX_MACRO(module, ZSTD_lazy2); - ADD_INT_PREFIX_MACRO(module, ZSTD_btlazy2); - ADD_INT_PREFIX_MACRO(module, ZSTD_btopt); - ADD_INT_PREFIX_MACRO(module, ZSTD_btultra); - ADD_INT_PREFIX_MACRO(module, ZSTD_btultra2); - - return 0; -} - -static inline PyObject * -get_zstd_version_info(void) +_zstd_exec(PyObject *m) { - uint32_t ver = ZSTD_versionNumber(); - uint32_t major, minor, release; - - major = ver / 10000; - minor = (ver / 100) % 100; - release = ver % 100; - - return Py_BuildValue("III", major, minor, release); -} - -static inline int -add_vars_to_module(PyObject *module) -{ - PyObject *obj; - - /* zstd_version, a str. */ - if (PyModule_AddStringConstant(module, "zstd_version", - ZSTD_versionString()) < 0) { - return -1; - } - - /* zstd_version_info, a tuple. */ - obj = get_zstd_version_info(); - if (PyModule_AddObjectRef(module, "zstd_version_info", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* Add zstd parameters */ - if (add_parameters(module) < 0) { - return -1; - } - - /* _compressionLevel_values: (default, min, max) - ZSTD_defaultCLevel() was added in zstd v1.5.0 */ - obj = Py_BuildValue("iii", -#if ZSTD_VERSION_NUMBER < 10500 - ZSTD_CLEVEL_DEFAULT, -#else - ZSTD_defaultCLevel(), -#endif - ZSTD_minCLevel(), - ZSTD_maxCLevel()); - if (PyModule_AddObjectRef(module, - "_compressionLevel_values", - obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_CStreamSizes */ - obj = Py_BuildValue("II", - (uint32_t)ZSTD_CStreamInSize(), - (uint32_t)ZSTD_CStreamOutSize()); - if (PyModule_AddObjectRef(module, "_ZSTD_CStreamSizes", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_DStreamSizes */ - obj = Py_BuildValue("II", - (uint32_t)ZSTD_DStreamInSize(), - (uint32_t)ZSTD_DStreamOutSize()); - if (PyModule_AddObjectRef(module, "_ZSTD_DStreamSizes", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - /* _ZSTD_CONFIG */ - obj = Py_BuildValue("isOOO", 8*(int)sizeof(Py_ssize_t), "c", - Py_False, - Py_True, -/* User mremap output buffer */ -#if defined(HAVE_MREMAP) - Py_True -#else - Py_False -#endif - ); - if (PyModule_AddObjectRef(module, "_ZSTD_CONFIG", obj) < 0) { - Py_XDECREF(obj); - return -1; - } - Py_DECREF(obj); - - return 0; -} - -#define ADD_STR_TO_STATE_MACRO(STR) \ - do { \ - mod_state->str_##STR = PyUnicode_FromString(#STR); \ - if (mod_state->str_##STR == NULL) { \ - return -1; \ - } \ - } while(0) - -static inline int -add_type_to_module(PyObject *module, const char *name, - PyType_Spec *type_spec, PyTypeObject **dest) -{ - PyObject *temp = PyType_FromModuleAndSpec(module, type_spec, NULL); - - if (PyModule_AddObjectRef(module, name, temp) < 0) { - Py_XDECREF(temp); - return -1; - } - - *dest = (PyTypeObject*) temp; - - return 0; -} - -static inline int -add_constant_to_type(PyTypeObject *type, const char *name, long value) -{ - PyObject *temp; - - temp = PyLong_FromLong(value); - if (temp == NULL) { - return -1; - } - - int rc = PyObject_SetAttrString((PyObject*) type, name, temp); - Py_DECREF(temp); - return rc; -} - -static int _zstd_exec(PyObject *module) { - _zstd_state* const mod_state = get_zstd_state(module); +#define ADD_TYPE(TYPE, SPEC) \ +do { \ + TYPE = (PyTypeObject *)PyType_FromModuleAndSpec(m, &(SPEC), NULL); \ + if (TYPE == NULL) { \ + return -1; \ + } \ + if (PyModule_AddType(m, TYPE) < 0) { \ + return -1; \ + } \ +} while (0) + +#define ADD_INT_MACRO(MACRO) \ + if (PyModule_AddIntConstant((m), #MACRO, (MACRO)) < 0) { \ + return -1; \ + } + +#define ADD_INT_CONST_TO_TYPE(TYPE, NAME, VALUE) \ +do { \ + PyObject *v = PyLong_FromLong((VALUE)); \ + if (v == NULL || PyObject_SetAttrString((PyObject *)(TYPE), \ + (NAME), v) < 0) { \ + Py_XDECREF(v); \ + return -1; \ + } \ + Py_DECREF(v); \ +} while (0) + + _zstd_state* mod_state = get_zstd_state(m); /* Reusable objects & variables */ - mod_state->empty_bytes = PyBytes_FromStringAndSize(NULL, 0); - if (mod_state->empty_bytes == NULL) { - return -1; - } - - mod_state->empty_readonly_memoryview = - PyMemoryView_FromMemory((char*)mod_state, 0, PyBUF_READ); - if (mod_state->empty_readonly_memoryview == NULL) { - return -1; - } - - /* Add str to module state */ - ADD_STR_TO_STATE_MACRO(read); - ADD_STR_TO_STATE_MACRO(readinto); - ADD_STR_TO_STATE_MACRO(write); - ADD_STR_TO_STATE_MACRO(flush); - mod_state->CParameter_type = NULL; mod_state->DParameter_type = NULL; - /* Add variables to module */ - if (add_vars_to_module(module) < 0) { - return -1; - } - - /* ZstdError */ + /* Create and add heap types */ + ADD_TYPE(mod_state->ZstdDict_type, zstd_dict_type_spec); + ADD_TYPE(mod_state->ZstdCompressor_type, zstd_compressor_type_spec); + ADD_TYPE(mod_state->ZstdDecompressor_type, zstd_decompressor_type_spec); mod_state->ZstdError = PyErr_NewExceptionWithDoc( - "_zstd.ZstdError", - "Call to the underlying zstd library failed.", - NULL, NULL); + "compression.zstd.ZstdError", + "An error occurred in the zstd library.", + NULL, NULL); if (mod_state->ZstdError == NULL) { return -1; } - - if (PyModule_AddObjectRef(module, "ZstdError", mod_state->ZstdError) < 0) { - Py_DECREF(mod_state->ZstdError); + if (PyModule_AddType(m, (PyTypeObject *)mod_state->ZstdError) < 0) { return -1; } - /* ZstdDict */ - if (add_type_to_module(module, - "ZstdDict", - &zstddict_type_spec, - &mod_state->ZstdDict_type) < 0) { + /* Add constants */ + if (PyModule_AddIntConstant(m, "zstd_version_number", + ZSTD_versionNumber()) < 0) { return -1; } - // ZstdCompressor - if (add_type_to_module(module, - "ZstdCompressor", - &zstdcompressor_type_spec, - &mod_state->ZstdCompressor_type) < 0) { + if (PyModule_AddStringConstant(m, "zstd_version", + ZSTD_versionString()) < 0) { return -1; } - // Add EndDirective enum to ZstdCompressor - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "CONTINUE", - ZSTD_e_continue) < 0) { +#if ZSTD_VERSION_NUMBER >= 10500 + if (PyModule_AddIntConstant(m, "ZSTD_CLEVEL_DEFAULT", + ZSTD_defaultCLevel()) < 0) { return -1; } +#else + ADD_INT_MACRO(ZSTD_CLEVEL_DEFAULT); +#endif - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "FLUSH_BLOCK", - ZSTD_e_flush) < 0) { + if (PyModule_Add(m, "ZSTD_DStreamOutSize", + PyLong_FromSize_t(ZSTD_DStreamOutSize())) < 0) { return -1; } - if (add_constant_to_type(mod_state->ZstdCompressor_type, - "FLUSH_FRAME", - ZSTD_e_end) < 0) { - return -1; - } - - // ZstdDecompressor - if (add_type_to_module(module, - "ZstdDecompressor", - &ZstdDecompressor_type_spec, - &mod_state->ZstdDecompressor_type) < 0) { - return -1; - } + /* Add zstd compression parameters. All should also be in cp_list. */ + ADD_INT_MACRO(ZSTD_c_compressionLevel); + ADD_INT_MACRO(ZSTD_c_windowLog); + ADD_INT_MACRO(ZSTD_c_hashLog); + ADD_INT_MACRO(ZSTD_c_chainLog); + ADD_INT_MACRO(ZSTD_c_searchLog); + ADD_INT_MACRO(ZSTD_c_minMatch); + ADD_INT_MACRO(ZSTD_c_targetLength); + ADD_INT_MACRO(ZSTD_c_strategy); + + ADD_INT_MACRO(ZSTD_c_enableLongDistanceMatching); + ADD_INT_MACRO(ZSTD_c_ldmHashLog); + ADD_INT_MACRO(ZSTD_c_ldmMinMatch); + ADD_INT_MACRO(ZSTD_c_ldmBucketSizeLog); + ADD_INT_MACRO(ZSTD_c_ldmHashRateLog); + + ADD_INT_MACRO(ZSTD_c_contentSizeFlag); + ADD_INT_MACRO(ZSTD_c_checksumFlag); + ADD_INT_MACRO(ZSTD_c_dictIDFlag); + + ADD_INT_MACRO(ZSTD_c_nbWorkers); + ADD_INT_MACRO(ZSTD_c_jobSize); + ADD_INT_MACRO(ZSTD_c_overlapLog); + + /* Add zstd decompression parameters. All should also be in dp_list. */ + ADD_INT_MACRO(ZSTD_d_windowLogMax); + + /* Add ZSTD_strategy enum members */ + ADD_INT_MACRO(ZSTD_fast); + ADD_INT_MACRO(ZSTD_dfast); + ADD_INT_MACRO(ZSTD_greedy); + ADD_INT_MACRO(ZSTD_lazy); + ADD_INT_MACRO(ZSTD_lazy2); + ADD_INT_MACRO(ZSTD_btlazy2); + ADD_INT_MACRO(ZSTD_btopt); + ADD_INT_MACRO(ZSTD_btultra); + ADD_INT_MACRO(ZSTD_btultra2); + + /* Add ZSTD_EndDirective enum members to ZstdCompressor */ + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "CONTINUE", ZSTD_e_continue); + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "FLUSH_BLOCK", ZSTD_e_flush); + ADD_INT_CONST_TO_TYPE(mod_state->ZstdCompressor_type, + "FLUSH_FRAME", ZSTD_e_end); + + /* Make ZstdCompressor immutable (set Py_TPFLAGS_IMMUTABLETYPE) */ + PyType_Freeze(mod_state->ZstdCompressor_type); + +#undef ADD_TYPE +#undef ADD_INT_MACRO +#undef ADD_ZSTD_COMPRESSOR_INT_CONST return 0; } @@ -837,14 +704,7 @@ static int _zstd_exec(PyObject *module) { static int _zstd_traverse(PyObject *module, visitproc visit, void *arg) { - _zstd_state* const mod_state = get_zstd_state(module); - - Py_VISIT(mod_state->empty_bytes); - Py_VISIT(mod_state->empty_readonly_memoryview); - Py_VISIT(mod_state->str_read); - Py_VISIT(mod_state->str_readinto); - Py_VISIT(mod_state->str_write); - Py_VISIT(mod_state->str_flush); + _zstd_state* mod_state = get_zstd_state(module); Py_VISIT(mod_state->ZstdDict_type); Py_VISIT(mod_state->ZstdCompressor_type); @@ -861,14 +721,7 @@ _zstd_traverse(PyObject *module, visitproc visit, void *arg) static int _zstd_clear(PyObject *module) { - _zstd_state* const mod_state = get_zstd_state(module); - - Py_CLEAR(mod_state->empty_bytes); - Py_CLEAR(mod_state->empty_readonly_memoryview); - Py_CLEAR(mod_state->str_read); - Py_CLEAR(mod_state->str_readinto); - Py_CLEAR(mod_state->str_write); - Py_CLEAR(mod_state->str_flush); + _zstd_state* mod_state = get_zstd_state(module); Py_CLEAR(mod_state->ZstdDict_type); Py_CLEAR(mod_state->ZstdCompressor_type); @@ -890,20 +743,21 @@ _zstd_free(void *module) static struct PyModuleDef_Slot _zstd_slots[] = { {Py_mod_exec, _zstd_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, - - {0} + {0, NULL}, }; -struct PyModuleDef _zstdmodule = { - PyModuleDef_HEAD_INIT, +static struct PyModuleDef _zstdmodule = { + .m_base = PyModuleDef_HEAD_INIT, .m_name = "_zstd", + .m_doc = "Implementation module for Zstandard compression.", .m_size = sizeof(_zstd_state), .m_slots = _zstd_slots, .m_methods = _zstd_methods, .m_traverse = _zstd_traverse, .m_clear = _zstd_clear, - .m_free = _zstd_free + .m_free = _zstd_free, }; PyMODINIT_FUNC diff --git a/Modules/_zstd/_zstdmodule.h b/Modules/_zstd/_zstdmodule.h index 9322ee259c5124..4e8f708f2232c7 100644 --- a/Modules/_zstd/_zstdmodule.h +++ b/Modules/_zstd/_zstdmodule.h @@ -1,138 +1,28 @@ -#pragma once -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* Declarations shared between different parts of the _zstd module*/ -#include "Python.h" +#ifndef ZSTD_MODULE_H +#define ZSTD_MODULE_H -#include "zstd.h" -#include "zdict.h" +#include "zstddict.h" +/* Type specs */ +extern PyType_Spec zstd_dict_type_spec; +extern PyType_Spec zstd_compressor_type_spec; +extern PyType_Spec zstd_decompressor_type_spec; -/* Forward declaration of module state */ -typedef struct _zstd_state _zstd_state; - -/* Forward reference of module def */ -extern PyModuleDef _zstdmodule; - -/* For clinic type calculations */ -static inline _zstd_state * -get_zstd_state_from_type(PyTypeObject *type) { - PyObject *module = PyType_GetModuleByDef(type, &_zstdmodule); - if (module == NULL) { - return NULL; - } - void *state = PyModule_GetState(module); - assert(state != NULL); - return (_zstd_state *)state; -} - -extern PyType_Spec zstddict_type_spec; -extern PyType_Spec zstdcompressor_type_spec; -extern PyType_Spec ZstdDecompressor_type_spec; - -struct _zstd_state { - PyObject *empty_bytes; - PyObject *empty_readonly_memoryview; - PyObject *str_read; - PyObject *str_readinto; - PyObject *str_write; - PyObject *str_flush; - +typedef struct { + /* Module heap types. */ PyTypeObject *ZstdDict_type; PyTypeObject *ZstdCompressor_type; PyTypeObject *ZstdDecompressor_type; PyObject *ZstdError; + /* enum types set by set_parameter_types. */ PyTypeObject *CParameter_type; PyTypeObject *DParameter_type; -}; - -typedef struct { - PyObject_HEAD - - /* Reusable compress/decompress dictionary, they are created once and - can be shared by multiple threads concurrently, since its usage is - read-only. - c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ - ZSTD_DDict *d_dict; - PyObject *c_dicts; - - /* Content of the dictionary, bytes object. */ - PyObject *dict_content; - /* Dictionary id */ - uint32_t dict_id; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdDict; - -typedef struct { - PyObject_HEAD - - /* Compression context */ - ZSTD_CCtx *cctx; - - /* ZstdDict object in use */ - PyObject *dict; - - /* Last mode, initialized to ZSTD_e_end */ - int last_mode; - - /* (nbWorker >= 1) ? 1 : 0 */ - int use_multithread; - - /* Compression level */ - int compression_level; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdCompressor; - -typedef struct { - PyObject_HEAD - - /* Decompression context */ - ZSTD_DCtx *dctx; - - /* ZstdDict object in use */ - PyObject *dict; - - /* Unconsumed input data */ - char *input_buffer; - size_t input_buffer_size; - size_t in_begin, in_end; - - /* Unused data */ - PyObject *unused_data; - - /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ - char needs_input; - - /* For decompress(), 0 or 1. - 1 when both input and output streams are at a frame edge, means a - frame is completely decoded and fully flushed, or the decompressor - just be initialized. */ - char at_frame_edge; - - /* For ZstdDecompressor, 0 or 1. - 1 means the end of the first frame has been reached. */ - char eof; - - /* Used for fast reset above three variables */ - char _unused_char_for_align; - - /* __init__ has been called, 0 or 1. */ - int inited; -} ZstdDecompressor; - -typedef enum { - TYPE_DECOMPRESSOR, // , ZstdDecompressor class - TYPE_ENDLESS_DECOMPRESSOR, // , decompress() function -} decompress_type; +} _zstd_state; typedef enum { ERR_DECOMPRESS, @@ -147,7 +37,7 @@ typedef enum { ERR_SET_C_LEVEL, ERR_TRAIN_DICT, - ERR_FINALIZE_DICT + ERR_FINALIZE_DICT, } error_type; typedef enum { @@ -156,41 +46,16 @@ typedef enum { DICT_TYPE_PREFIX = 2 } dictionary_type; -static inline int -mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) { - return in->size == in->pos && out->size != out->pos; -} +extern ZstdDict * +_Py_parse_zstd_dict(const _zstd_state *state, + PyObject *dict, int *type); /* Format error message and set ZstdError. */ extern void -set_zstd_error(const _zstd_state* const state, - const error_type type, size_t zstd_ret); +set_zstd_error(const _zstd_state *state, + error_type type, size_t zstd_ret); extern void -set_parameter_error(const _zstd_state* const state, int is_compress, - int key_v, int value_v); - -static const char init_twice_msg[] = "__init__ method is called twice."; - -extern int -_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict); - -extern int -_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict); - -extern int -_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, - const char *arg_name, const char *arg_type); - -extern int -_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options); - -extern PyObject * -decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length, - Py_ssize_t initial_size, - decompress_type type); +set_parameter_error(int is_compress, int key_v, int value_v); -extern PyObject * -compress_impl(ZstdCompressor *self, Py_buffer *data, - ZSTD_EndDirective end_directive); +#endif // !ZSTD_MODULE_H diff --git a/Modules/_zstd/buffer.h b/Modules/_zstd/buffer.h index 319b1214833fcf..4c885fa0d720fd 100644 --- a/Modules/_zstd/buffer.h +++ b/Modules/_zstd/buffer.h @@ -1,11 +1,12 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +#ifndef ZSTD_BUFFER_H +#define ZSTD_BUFFER_H -#include "_zstdmodule.h" #include "pycore_blocks_output_buffer.h" +#include // ZSTD_outBuffer + /* Blocks output buffer wrapper code */ /* Initialize the buffer, and grow the buffer. @@ -18,7 +19,8 @@ _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, /* Ensure .list was set to NULL */ assert(buffer->list == NULL); - Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, &ob->dst); + Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length, + &ob->dst); if (res < 0) { return -1; } @@ -33,8 +35,7 @@ _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, Return -1 on failure */ static inline int _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, - Py_ssize_t max_length, - Py_ssize_t init_size) + Py_ssize_t max_length, Py_ssize_t init_size) { Py_ssize_t block_size; @@ -49,7 +50,8 @@ _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob, block_size = init_size; } - Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, &ob->dst); + Py_ssize_t res = _BlocksOutputBuffer_InitWithSize(buffer, block_size, + &ob->dst); if (res < 0) { return -1; } @@ -102,3 +104,5 @@ _OutputBuffer_ReachedMaxLength(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob) return buffer->allocated == buffer->max_length; } + +#endif // !ZSTD_BUFFER_H diff --git a/Modules/_zstd/clinic/_zstdmodule.c.h b/Modules/_zstd/clinic/_zstdmodule.c.h index 2f8225389b7aea..766e1cfa776767 100644 --- a/Modules/_zstd/clinic/_zstdmodule.c.h +++ b/Modules/_zstd/clinic/_zstdmodule.c.h @@ -9,11 +9,11 @@ preserve #include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_CheckPositional() -PyDoc_STRVAR(_zstd__train_dict__doc__, -"_train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n" +PyDoc_STRVAR(_zstd_train_dict__doc__, +"train_dict($module, samples_bytes, samples_sizes, dict_size, /)\n" "--\n" "\n" -"Internal function, train a zstd dictionary on sample data.\n" +"Train a Zstandard dictionary on sample data.\n" "\n" " samples_bytes\n" " Concatenation of samples.\n" @@ -22,31 +22,31 @@ PyDoc_STRVAR(_zstd__train_dict__doc__, " dict_size\n" " The size of the dictionary."); -#define _ZSTD__TRAIN_DICT_METHODDEF \ - {"_train_dict", _PyCFunction_CAST(_zstd__train_dict), METH_FASTCALL, _zstd__train_dict__doc__}, +#define _ZSTD_TRAIN_DICT_METHODDEF \ + {"train_dict", _PyCFunction_CAST(_zstd_train_dict), METH_FASTCALL, _zstd_train_dict__doc__}, static PyObject * -_zstd__train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size); +_zstd_train_dict_impl(PyObject *module, PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size); static PyObject * -_zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_zstd_train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyBytesObject *samples_bytes; PyObject *samples_sizes; Py_ssize_t dict_size; - if (!_PyArg_CheckPositional("_train_dict", nargs, 3, 3)) { + if (!_PyArg_CheckPositional("train_dict", nargs, 3, 3)) { goto exit; } if (!PyBytes_Check(args[0])) { - _PyArg_BadArgument("_train_dict", "argument 1", "bytes", args[0]); + _PyArg_BadArgument("train_dict", "argument 1", "bytes", args[0]); goto exit; } samples_bytes = (PyBytesObject *)args[0]; if (!PyTuple_Check(args[1])) { - _PyArg_BadArgument("_train_dict", "argument 2", "tuple", args[1]); + _PyArg_BadArgument("train_dict", "argument 2", "tuple", args[1]); goto exit; } samples_sizes = args[1]; @@ -62,18 +62,18 @@ _zstd__train_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } dict_size = ival; } - return_value = _zstd__train_dict_impl(module, samples_bytes, samples_sizes, dict_size); + return_value = _zstd_train_dict_impl(module, samples_bytes, samples_sizes, dict_size); exit: return return_value; } -PyDoc_STRVAR(_zstd__finalize_dict__doc__, -"_finalize_dict($module, custom_dict_bytes, samples_bytes,\n" -" samples_sizes, dict_size, compression_level, /)\n" +PyDoc_STRVAR(_zstd_finalize_dict__doc__, +"finalize_dict($module, custom_dict_bytes, samples_bytes, samples_sizes,\n" +" dict_size, compression_level, /)\n" "--\n" "\n" -"Internal function, finalize a zstd dictionary.\n" +"Finalize a Zstandard dictionary.\n" "\n" " custom_dict_bytes\n" " Custom dictionary content.\n" @@ -84,19 +84,19 @@ PyDoc_STRVAR(_zstd__finalize_dict__doc__, " dict_size\n" " The size of the dictionary.\n" " compression_level\n" -" Optimize for a specific zstd compression level, 0 means default."); +" Optimize for a specific Zstandard compression level, 0 means default."); -#define _ZSTD__FINALIZE_DICT_METHODDEF \ - {"_finalize_dict", _PyCFunction_CAST(_zstd__finalize_dict), METH_FASTCALL, _zstd__finalize_dict__doc__}, +#define _ZSTD_FINALIZE_DICT_METHODDEF \ + {"finalize_dict", _PyCFunction_CAST(_zstd_finalize_dict), METH_FASTCALL, _zstd_finalize_dict__doc__}, static PyObject * -_zstd__finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, - PyBytesObject *samples_bytes, - PyObject *samples_sizes, Py_ssize_t dict_size, - int compression_level); +_zstd_finalize_dict_impl(PyObject *module, PyBytesObject *custom_dict_bytes, + PyBytesObject *samples_bytes, + PyObject *samples_sizes, Py_ssize_t dict_size, + int compression_level); static PyObject * -_zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +_zstd_finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyBytesObject *custom_dict_bytes; @@ -105,21 +105,21 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) Py_ssize_t dict_size; int compression_level; - if (!_PyArg_CheckPositional("_finalize_dict", nargs, 5, 5)) { + if (!_PyArg_CheckPositional("finalize_dict", nargs, 5, 5)) { goto exit; } if (!PyBytes_Check(args[0])) { - _PyArg_BadArgument("_finalize_dict", "argument 1", "bytes", args[0]); + _PyArg_BadArgument("finalize_dict", "argument 1", "bytes", args[0]); goto exit; } custom_dict_bytes = (PyBytesObject *)args[0]; if (!PyBytes_Check(args[1])) { - _PyArg_BadArgument("_finalize_dict", "argument 2", "bytes", args[1]); + _PyArg_BadArgument("finalize_dict", "argument 2", "bytes", args[1]); goto exit; } samples_bytes = (PyBytesObject *)args[1]; if (!PyTuple_Check(args[2])) { - _PyArg_BadArgument("_finalize_dict", "argument 3", "tuple", args[2]); + _PyArg_BadArgument("finalize_dict", "argument 3", "tuple", args[2]); goto exit; } samples_sizes = args[2]; @@ -139,32 +139,31 @@ _zstd__finalize_dict(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (compression_level == -1 && PyErr_Occurred()) { goto exit; } - return_value = _zstd__finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level); + return_value = _zstd_finalize_dict_impl(module, custom_dict_bytes, samples_bytes, samples_sizes, dict_size, compression_level); exit: return return_value; } -PyDoc_STRVAR(_zstd__get_param_bounds__doc__, -"_get_param_bounds($module, /, parameter, is_compress)\n" +PyDoc_STRVAR(_zstd_get_param_bounds__doc__, +"get_param_bounds($module, /, parameter, is_compress)\n" "--\n" "\n" -"Internal function, get CompressionParameter/DecompressionParameter bounds.\n" +"Get CompressionParameter/DecompressionParameter bounds.\n" "\n" " parameter\n" " The parameter to get bounds.\n" " is_compress\n" " True for CompressionParameter, False for DecompressionParameter."); -#define _ZSTD__GET_PARAM_BOUNDS_METHODDEF \ - {"_get_param_bounds", _PyCFunction_CAST(_zstd__get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd__get_param_bounds__doc__}, +#define _ZSTD_GET_PARAM_BOUNDS_METHODDEF \ + {"get_param_bounds", _PyCFunction_CAST(_zstd_get_param_bounds), METH_FASTCALL|METH_KEYWORDS, _zstd_get_param_bounds__doc__}, static PyObject * -_zstd__get_param_bounds_impl(PyObject *module, int parameter, - int is_compress); +_zstd_get_param_bounds_impl(PyObject *module, int parameter, int is_compress); static PyObject * -_zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -190,7 +189,7 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg static const char * const _keywords[] = {"parameter", "is_compress", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_get_param_bounds", + .fname = "get_param_bounds", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -211,7 +210,7 @@ _zstd__get_param_bounds(PyObject *module, PyObject *const *args, Py_ssize_t narg if (is_compress < 0) { goto exit; } - return_value = _zstd__get_param_bounds_impl(module, parameter, is_compress); + return_value = _zstd_get_param_bounds_impl(module, parameter, is_compress); exit: return return_value; @@ -221,13 +220,11 @@ PyDoc_STRVAR(_zstd_get_frame_size__doc__, "get_frame_size($module, /, frame_buffer)\n" "--\n" "\n" -"Get the size of a zstd frame, including frame header and 4-byte checksum if it has one.\n" +"Get the size of a Zstandard frame, including the header and optional checksum.\n" "\n" " frame_buffer\n" " A bytes-like object, it should start from the beginning of a frame,\n" -" and contains at least one complete frame.\n" -"\n" -"It will iterate all blocks\' headers within a frame, to accumulate the frame size."); +" and contains at least one complete frame."); #define _ZSTD_GET_FRAME_SIZE_METHODDEF \ {"get_frame_size", _PyCFunction_CAST(_zstd_get_frame_size), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_size__doc__}, @@ -288,23 +285,23 @@ _zstd_get_frame_size(PyObject *module, PyObject *const *args, Py_ssize_t nargs, return return_value; } -PyDoc_STRVAR(_zstd__get_frame_info__doc__, -"_get_frame_info($module, /, frame_buffer)\n" +PyDoc_STRVAR(_zstd_get_frame_info__doc__, +"get_frame_info($module, /, frame_buffer)\n" "--\n" "\n" -"Internal function, get zstd frame infomation from a frame header.\n" +"Get Zstandard frame infomation from a frame header.\n" "\n" " frame_buffer\n" -" A bytes-like object, containing the header of a zstd frame."); +" A bytes-like object, containing the header of a Zstandard frame."); -#define _ZSTD__GET_FRAME_INFO_METHODDEF \ - {"_get_frame_info", _PyCFunction_CAST(_zstd__get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd__get_frame_info__doc__}, +#define _ZSTD_GET_FRAME_INFO_METHODDEF \ + {"get_frame_info", _PyCFunction_CAST(_zstd_get_frame_info), METH_FASTCALL|METH_KEYWORDS, _zstd_get_frame_info__doc__}, static PyObject * -_zstd__get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer); +_zstd_get_frame_info_impl(PyObject *module, Py_buffer *frame_buffer); static PyObject * -_zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -330,7 +327,7 @@ _zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, static const char * const _keywords[] = {"frame_buffer", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_get_frame_info", + .fname = "get_frame_info", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -345,7 +342,7 @@ _zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (PyObject_GetBuffer(args[0], &frame_buffer, PyBUF_SIMPLE) != 0) { goto exit; } - return_value = _zstd__get_frame_info_impl(module, &frame_buffer); + return_value = _zstd_get_frame_info_impl(module, &frame_buffer); exit: /* Cleanup for frame_buffer */ @@ -356,26 +353,26 @@ _zstd__get_frame_info(PyObject *module, PyObject *const *args, Py_ssize_t nargs, return return_value; } -PyDoc_STRVAR(_zstd__set_parameter_types__doc__, -"_set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" +PyDoc_STRVAR(_zstd_set_parameter_types__doc__, +"set_parameter_types($module, /, c_parameter_type, d_parameter_type)\n" "--\n" "\n" -"Internal function, set CompressionParameter/DecompressionParameter types for validity check.\n" +"Set CompressionParameter and DecompressionParameter types for validity check.\n" "\n" " c_parameter_type\n" " CompressionParameter IntEnum type object\n" " d_parameter_type\n" " DecompressionParameter IntEnum type object"); -#define _ZSTD__SET_PARAMETER_TYPES_METHODDEF \ - {"_set_parameter_types", _PyCFunction_CAST(_zstd__set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd__set_parameter_types__doc__}, +#define _ZSTD_SET_PARAMETER_TYPES_METHODDEF \ + {"set_parameter_types", _PyCFunction_CAST(_zstd_set_parameter_types), METH_FASTCALL|METH_KEYWORDS, _zstd_set_parameter_types__doc__}, static PyObject * -_zstd__set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, - PyObject *d_parameter_type); +_zstd_set_parameter_types_impl(PyObject *module, PyObject *c_parameter_type, + PyObject *d_parameter_type); static PyObject * -_zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_zstd_set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -401,7 +398,7 @@ _zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t n static const char * const _keywords[] = {"c_parameter_type", "d_parameter_type", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_set_parameter_types", + .fname = "set_parameter_types", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -415,18 +412,18 @@ _zstd__set_parameter_types(PyObject *module, PyObject *const *args, Py_ssize_t n goto exit; } if (!PyObject_TypeCheck(args[0], &PyType_Type)) { - _PyArg_BadArgument("_set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]); + _PyArg_BadArgument("set_parameter_types", "argument 'c_parameter_type'", (&PyType_Type)->tp_name, args[0]); goto exit; } c_parameter_type = args[0]; if (!PyObject_TypeCheck(args[1], &PyType_Type)) { - _PyArg_BadArgument("_set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]); + _PyArg_BadArgument("set_parameter_types", "argument 'd_parameter_type'", (&PyType_Type)->tp_name, args[1]); goto exit; } d_parameter_type = args[1]; - return_value = _zstd__set_parameter_types_impl(module, c_parameter_type, d_parameter_type); + return_value = _zstd_set_parameter_types_impl(module, c_parameter_type, d_parameter_type); exit: return return_value; } -/*[clinic end generated code: output=189c462236a7096c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=437b084f149e68e5 input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/compressor.c.h b/Modules/_zstd/clinic/compressor.c.h index d7909cdf89fcd1..4f8d93fd9e867c 100644 --- a/Modules/_zstd/clinic/compressor.c.h +++ b/Modules/_zstd/clinic/compressor.c.h @@ -8,30 +8,30 @@ preserve #endif #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdCompressor___init____doc__, +PyDoc_STRVAR(_zstd_ZstdCompressor_new__doc__, "ZstdCompressor(level=None, options=None, zstd_dict=None)\n" "--\n" "\n" "Create a compressor object for compressing data incrementally.\n" "\n" " level\n" -" The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT.\n" +" The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT.\n" " options\n" " A dict object that contains advanced compression parameters.\n" " zstd_dict\n" -" A ZstdDict object, a pre-trained zstd dictionary.\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" "\n" "Thread-safe at method level. For one-shot compression, use the compress()\n" "function instead."); -static int -_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, - PyObject *options, PyObject *zstd_dict); +static PyObject * +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict); -static int -_zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #define NUM_KEYWORDS 3 @@ -89,7 +89,7 @@ _zstd_ZstdCompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) } zstd_dict = fastargs[2]; skip_optional_pos: - return_value = _zstd_ZstdCompressor___init___impl((ZstdCompressor *)self, level, options, zstd_dict); + return_value = _zstd_ZstdCompressor_new_impl(type, level, options, zstd_dict); exit: return return_value; @@ -189,9 +189,9 @@ PyDoc_STRVAR(_zstd_ZstdCompressor_flush__doc__, " Can be these 2 values ZstdCompressor.FLUSH_FRAME,\n" " ZstdCompressor.FLUSH_BLOCK\n" "\n" -"Flush any remaining data left in internal buffers. Since zstd data consists\n" -"of one or more independent frames, the compressor object can still be used\n" -"after this method is called."); +"Flush any remaining data left in internal buffers. Since Zstandard data\n" +"consists of one or more independent frames, the compressor object can still\n" +"be used after this method is called."); #define _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF \ {"flush", _PyCFunction_CAST(_zstd_ZstdCompressor_flush), METH_FASTCALL|METH_KEYWORDS, _zstd_ZstdCompressor_flush__doc__}, @@ -252,4 +252,43 @@ _zstd_ZstdCompressor_flush(PyObject *self, PyObject *const *args, Py_ssize_t nar exit: return return_value; } -/*[clinic end generated code: output=ef69eab155be39f6 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_zstd_ZstdCompressor_set_pledged_input_size__doc__, +"set_pledged_input_size($self, size, /)\n" +"--\n" +"\n" +"Set the uncompressed content size to be written into the frame header.\n" +"\n" +" size\n" +" The size of the uncompressed data to be provided to the compressor.\n" +"\n" +"This method can be used to ensure the header of the frame about to be written\n" +"includes the size of the data, unless the CompressionParameter.content_size_flag\n" +"is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised.\n" +"\n" +"It is important to ensure that the pledged data size matches the actual data\n" +"size. If they do not match the compressed output data may be corrupted and the\n" +"final chunk written may be lost."); + +#define _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF \ + {"set_pledged_input_size", (PyCFunction)_zstd_ZstdCompressor_set_pledged_input_size, METH_O, _zstd_ZstdCompressor_set_pledged_input_size__doc__}, + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size); + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size(PyObject *self, PyObject *arg) +{ + PyObject *return_value = NULL; + unsigned long long size; + + if (!zstd_contentsize_converter(arg, &size)) { + goto exit; + } + return_value = _zstd_ZstdCompressor_set_pledged_input_size_impl((ZstdCompressor *)self, size); + +exit: + return return_value; +} +/*[clinic end generated code: output=c1d5c2cf06a8becd input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/decompressor.c.h b/Modules/_zstd/clinic/decompressor.c.h index 9359c637203f8f..c6fdae74ab0447 100644 --- a/Modules/_zstd/clinic/decompressor.c.h +++ b/Modules/_zstd/clinic/decompressor.c.h @@ -7,31 +7,30 @@ preserve # include "pycore_runtime.h" // _Py_ID() #endif #include "pycore_abstract.h" // _PyNumber_Index() -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdDecompressor___init____doc__, +PyDoc_STRVAR(_zstd_ZstdDecompressor_new__doc__, "ZstdDecompressor(zstd_dict=None, options=None)\n" "--\n" "\n" "Create a decompressor object for decompressing data incrementally.\n" "\n" " zstd_dict\n" -" A ZstdDict object, a pre-trained zstd dictionary.\n" +" A ZstdDict object, a pre-trained Zstandard dictionary.\n" " options\n" " A dict object that contains advanced decompression parameters.\n" "\n" "Thread-safe at method level. For one-shot decompression, use the decompress()\n" "function instead."); -static int -_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, - PyObject *zstd_dict, PyObject *options); +static PyObject * +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options); -static int -_zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #define NUM_KEYWORDS 2 @@ -82,7 +81,7 @@ _zstd_ZstdDecompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs } options = fastargs[1]; skip_optional_pos: - return_value = _zstd_ZstdDecompressor___init___impl((ZstdDecompressor *)self, zstd_dict, options); + return_value = _zstd_ZstdDecompressor_new_impl(type, zstd_dict, options); exit: return return_value; @@ -114,13 +113,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self); static PyObject * _zstd_ZstdDecompressor_unused_data_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDecompressor_unused_data_get_impl((ZstdDecompressor *)self); } PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, @@ -130,7 +123,7 @@ PyDoc_STRVAR(_zstd_ZstdDecompressor_decompress__doc__, "Decompress *data*, returning uncompressed bytes if possible, or b\'\' otherwise.\n" "\n" " data\n" -" A bytes-like object, zstd data to be decompressed.\n" +" A bytes-like object, Zstandard data to be decompressed.\n" " max_length\n" " Maximum size of returned data. When it is negative, the size of\n" " output buffer is unlimited. When it is nonnegative, returns at\n" @@ -227,4 +220,4 @@ _zstd_ZstdDecompressor_decompress(PyObject *self, PyObject *const *args, Py_ssiz return return_value; } -/*[clinic end generated code: output=ae703f0465a2906d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=30c12ef047027ede input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/clinic/zstddict.c.h b/Modules/_zstd/clinic/zstddict.c.h index 4e0f7b64172a74..79db85405d6e6b 100644 --- a/Modules/_zstd/clinic/zstddict.c.h +++ b/Modules/_zstd/clinic/zstddict.c.h @@ -6,38 +6,35 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() -PyDoc_STRVAR(_zstd_ZstdDict___init____doc__, -"ZstdDict(dict_content, is_raw=False)\n" +PyDoc_STRVAR(_zstd_ZstdDict_new__doc__, +"ZstdDict(dict_content, /, *, is_raw=False)\n" "--\n" "\n" -"Represents a zstd dictionary, which can be used for compression/decompression.\n" +"Represents a Zstandard dictionary.\n" "\n" " dict_content\n" -" A bytes-like object, dictionary\'s content.\n" +" The content of a Zstandard dictionary as a bytes-like object.\n" " is_raw\n" -" This parameter is for advanced user. True means dict_content\n" -" argument is a \"raw content\" dictionary, free of any format\n" -" restriction. False means dict_content argument is an ordinary\n" -" zstd dictionary, was created by zstd functions, follow a\n" -" specified format.\n" +" If true, perform no checks on *dict_content*, useful for some\n" +" advanced cases. Otherwise, check that the content represents\n" +" a Zstandard dictionary created by the zstd library or CLI.\n" "\n" -"It\'s thread-safe, and can be shared by multiple ZstdCompressor /\n" -"ZstdDecompressor objects."); +"The dictionary can be used for compression or decompression, and can be shared\n" +"by multiple ZstdCompressor or ZstdDecompressor objects."); -static int -_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, - int is_raw); +static PyObject * +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw); -static int -_zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) +static PyObject * +_zstd_ZstdDict_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - int return_value = -1; + PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 1 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -46,7 +43,7 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(dict_content), &_Py_ID(is_raw), }, + .ob_item = { &_Py_ID(is_raw), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -55,7 +52,7 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"dict_content", "is_raw", NULL}; + static const char * const _keywords[] = {"", "is_raw", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "ZstdDict", @@ -66,33 +63,68 @@ _zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs) PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; - PyObject *dict_content; + Py_buffer dict_content = {NULL, NULL}; int is_raw = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, - /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - dict_content = fastargs[0]; + if (PyObject_GetBuffer(fastargs[0], &dict_content, PyBUF_SIMPLE) != 0) { + goto exit; + } if (!noptargs) { - goto skip_optional_pos; + goto skip_optional_kwonly; } is_raw = PyObject_IsTrue(fastargs[1]); if (is_raw < 0) { goto exit; } -skip_optional_pos: - return_value = _zstd_ZstdDict___init___impl((ZstdDict *)self, dict_content, is_raw); +skip_optional_kwonly: + return_value = _zstd_ZstdDict_new_impl(type, &dict_content, is_raw); exit: + /* Cleanup for dict_content */ + if (dict_content.obj) { + PyBuffer_Release(&dict_content); + } + return return_value; } +PyDoc_STRVAR(_zstd_ZstdDict_dict_content__doc__, +"The content of a Zstandard dictionary, as a bytes object."); +#if defined(_zstd_ZstdDict_dict_content_DOCSTR) +# undef _zstd_ZstdDict_dict_content_DOCSTR +#endif +#define _zstd_ZstdDict_dict_content_DOCSTR _zstd_ZstdDict_dict_content__doc__ + +#if !defined(_zstd_ZstdDict_dict_content_DOCSTR) +# define _zstd_ZstdDict_dict_content_DOCSTR NULL +#endif +#if defined(_ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF) +# undef _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, (setter)_zstd_ZstdDict_dict_content_set, _zstd_ZstdDict_dict_content_DOCSTR}, +#else +# define _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF {"dict_content", (getter)_zstd_ZstdDict_dict_content_get, NULL, _zstd_ZstdDict_dict_content_DOCSTR}, +#endif + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self); + +static PyObject * +_zstd_ZstdDict_dict_content_get(PyObject *self, void *Py_UNUSED(context)) +{ + return _zstd_ZstdDict_dict_content_get_impl((ZstdDict *)self); +} + PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__, "Load as a digested dictionary to compressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_digested_dict)\n" +"\n" "1. Some advanced compression parameters of compressor may be overridden\n" " by parameters of digested dictionary.\n" "2. ZstdDict has a digested dictionaries cache for each compression level.\n" @@ -120,19 +152,15 @@ _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self); } PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__, "Load as an undigested dictionary to compressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_undigested_dict)\n" +"\n" "1. The advanced compression parameters of compressor will not be overridden.\n" "2. Loading an undigested dictionary is costly. If load an undigested dictionary\n" " multiple times, consider reusing a compressor object.\n" @@ -158,19 +186,15 @@ _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self); } PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__, "Load as a prefix to compressor/decompressor.\n" "\n" -"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)\n" +"Pass this attribute as zstd_dict argument:\n" +"compress(dat, zstd_dict=zd.as_prefix)\n" +"\n" "1. Prefix is compatible with long distance matching, while dictionary is not.\n" "2. It only works for the first frame, then the compressor/decompressor will\n" " return to no prefix state.\n" @@ -196,12 +220,6 @@ _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self); static PyObject * _zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context)) { - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(self); - return_value = _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); - Py_END_CRITICAL_SECTION(); - - return return_value; + return _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self); } -/*[clinic end generated code: output=59257c053f74eda7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=4696cbc722e5fdfc input=a9049054013a1b77]*/ diff --git a/Modules/_zstd/compressor.c b/Modules/_zstd/compressor.c index b735981e7476d5..bc9e6eff89af68 100644 --- a/Modules/_zstd/compressor.c +++ b/Modules/_zstd/compressor.c @@ -1,123 +1,196 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdCompressor class definitions */ /*[clinic input] module _zstd -class _zstd.ZstdCompressor "ZstdCompressor *" "clinic_state()->ZstdCompressor_type" +class _zstd.ZstdCompressor "ZstdCompressor *" "&zstd_compressor_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=875bf614798f80cb]*/ - +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7166021db1ef7df8]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif -#include "_zstdmodule.h" +#include "Python.h" +#include "_zstdmodule.h" #include "buffer.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked #include // offsetof() +#include // ZSTD_*() + +typedef struct { + PyObject_HEAD + + /* Compression context */ + ZSTD_CCtx *cctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Last mode, initialized to ZSTD_e_end */ + int last_mode; + /* (nbWorker >= 1) ? 1 : 0 */ + int use_multithread; + + /* Compression level */ + int compression_level; + + /* Lock to protect the compression context */ + PyMutex lock; +} ZstdCompressor; #define ZstdCompressor_CAST(op) ((ZstdCompressor *)op) -int -_PyZstd_set_c_parameters(ZstdCompressor *self, PyObject *level_or_options, - const char *arg_name, const char* arg_type) +/*[python input] + +class zstd_contentsize_converter(CConverter): + type = 'unsigned long long' + converter = 'zstd_contentsize_converter' + +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=0932c350d633c7de]*/ + + +static int +zstd_contentsize_converter(PyObject *size, unsigned long long *p) { - size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; + // None means the user indicates the size is unknown. + if (size == Py_None) { + *p = ZSTD_CONTENTSIZE_UNKNOWN; } - - /* Integer compression level */ - if (PyLong_Check(level_or_options)) { - int level = PyLong_AsInt(level_or_options); - if (level == -1 && PyErr_Occurred()) { + else { + /* ZSTD_CONTENTSIZE_UNKNOWN is 0ULL - 1 + ZSTD_CONTENTSIZE_ERROR is 0ULL - 2 + Users should only pass values < ZSTD_CONTENTSIZE_ERROR */ + unsigned long long pledged_size = PyLong_AsUnsignedLongLong(size); + /* Here we check for (unsigned long long)-1 as a sign of an error in + PyLong_AsUnsignedLongLong */ + if (pledged_size == (unsigned long long)-1 && PyErr_Occurred()) { + *p = ZSTD_CONTENTSIZE_ERROR; + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; + } + return 0; + } + if (pledged_size >= ZSTD_CONTENTSIZE_ERROR) { + *p = ZSTD_CONTENTSIZE_ERROR; PyErr_Format(PyExc_ValueError, - "Compression level should be an int value between %d and %d.", - ZSTD_minCLevel(), ZSTD_maxCLevel()); - return -1; + "size argument should be a positive int less " + "than %ull", ZSTD_CONTENTSIZE_ERROR); + return 0; } + *p = pledged_size; + } + return 1; +} - /* Save for generating ZSTD_CDICT */ - self->compression_level = level; +#include "clinic/compressor.c.h" - /* Set compressionLevel to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, - ZSTD_c_compressionLevel, - level); +static int +_zstd_set_c_level(ZstdCompressor *self, int level) +{ + /* Set integer compression level */ + int min_level = ZSTD_minCLevel(); + int max_level = ZSTD_maxCLevel(); + if (level < min_level || level > max_level) { + PyErr_Format(PyExc_ValueError, + "illegal compression level %d; the valid range is [%d, %d]", + level, min_level, max_level); + return -1; + } - /* Check error */ - if (ZSTD_isError(zstd_ret)) { - set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + /* Save for generating ZSTD_CDICT */ + self->compression_level = level; + + /* Set compressionLevel to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter( + self->cctx, ZSTD_c_compressionLevel, level); + + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_C_LEVEL, zstd_ret); + return -1; + } + return 0; +} + +static int +_zstd_set_c_parameters(ZstdCompressor *self, PyObject *options) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + if (mod_state == NULL) { + return -1; + } + + if (!PyDict_Check(options)) { + PyErr_Format(PyExc_TypeError, + "ZstdCompressor() argument 'options' must be dict, not %T", + options); + return -1; + } + + Py_ssize_t pos = 0; + PyObject *key, *value; + while (PyDict_Next(options, &pos, &key, &value)) { + /* Check key type */ + if (Py_TYPE(key) == mod_state->DParameter_type) { + PyErr_SetString(PyExc_TypeError, + "compression options dictionary key must not be a " + "DecompressionParameter attribute"); return -1; } - return 0; - } - /* Options dict */ - if (PyDict_Check(level_or_options)) { - PyObject *key, *value; - Py_ssize_t pos = 0; + Py_INCREF(key); + Py_INCREF(value); + int key_v = PyLong_AsInt(key); + Py_DECREF(key); + if (key_v == -1 && PyErr_Occurred()) { + Py_DECREF(value); + return -1; + } - while (PyDict_Next(level_or_options, &pos, &key, &value)) { - /* Check key type */ - if (Py_TYPE(key) == mod_state->DParameter_type) { - PyErr_SetString(PyExc_TypeError, - "Key of compression option dict should " - "NOT be DecompressionParameter."); - return -1; - } + int value_v = PyLong_AsInt(value); + Py_DECREF(value); + if (value_v == -1 && PyErr_Occurred()) { + return -1; + } - int key_v = PyLong_AsInt(key); - if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a CompressionParameter attribute."); + if (key_v == ZSTD_c_compressionLevel) { + if (_zstd_set_c_level(self, value_v) < 0) { return -1; } - - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? - int value_v = PyLong_AsInt(value); - if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of option dict should be an int."); - return -1; + continue; + } + if (key_v == ZSTD_c_nbWorkers) { + /* From the zstd library docs: + 1. When nbWorkers >= 1, triggers asynchronous mode when + used with ZSTD_compressStream2(). + 2, Default value is `0`, aka "single-threaded mode" : no + worker is spawned, compression is performed inside + caller's thread, all invocations are blocking. */ + if (value_v != 0) { + self->use_multithread = 1; } + } - if (key_v == ZSTD_c_compressionLevel) { - /* Save for generating ZSTD_CDICT */ - self->compression_level = value_v; - } - else if (key_v == ZSTD_c_nbWorkers) { - /* From zstd library doc: - 1. When nbWorkers >= 1, triggers asynchronous mode when - used with ZSTD_compressStream2(). - 2, Default value is `0`, aka "single-threaded mode" : no - worker is spawned, compression is performed inside - caller's thread, all invocations are blocking. */ - if (value_v != 0) { - self->use_multithread = 1; - } - } + /* Set parameter to compression context */ + size_t zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - /* Set parameter to compression context */ - zstd_ret = ZSTD_CCtx_setParameter(self->cctx, key_v, value_v); - if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 1, key_v, value_v); - return -1; - } + /* Check error */ + if (ZSTD_isError(zstd_ret)) { + set_parameter_error(1, key_v, value_v); + return -1; } - return 0; } - PyErr_Format(PyExc_TypeError, "Invalid type for %s. Expected %s", arg_name, arg_type); - return -1; + return 0; } static void @@ -130,12 +203,12 @@ capsule_free_cdict(PyObject *capsule) ZSTD_CDict * _get_CDict(ZstdDict *self, int compressionLevel) { + assert(PyMutex_IsLocked(&self->lock)); PyObject *level = NULL; - PyObject *capsule; + PyObject *capsule = NULL; ZSTD_CDict *cdict; + int ret; - // TODO(emmatyping): refactor critical section code into a lock_held function - Py_BEGIN_CRITICAL_SECTION(self); /* int level object */ level = PyLong_FromLong(compressionLevel); @@ -144,27 +217,23 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Get PyCapsule object from self->c_dicts */ - capsule = PyDict_GetItemWithError(self->c_dicts, level); + ret = PyDict_GetItemRef(self->c_dicts, level, &capsule); + if (ret < 0) { + goto error; + } if (capsule == NULL) { - if (PyErr_Occurred()) { - goto error; - } - /* Create ZSTD_CDict instance */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - cdict = ZSTD_createCDict(dict_buffer, - dict_len, + cdict = ZSTD_createCDict(self->dict_buffer, self->dict_len, compressionLevel); Py_END_ALLOW_THREADS if (cdict == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, - "Failed to create ZSTD_CDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); + "Failed to create a ZSTD_CDict instance from " + "Zstandard dictionary content."); } goto error; } @@ -177,11 +246,10 @@ _get_CDict(ZstdDict *self, int compressionLevel) } /* Add PyCapsule object to self->c_dicts */ - if (PyDict_SetItem(self->c_dicts, level, capsule) < 0) { - Py_DECREF(capsule); + ret = PyDict_SetItem(self->c_dicts, level, capsule); + if (ret < 0) { goto error; } - Py_DECREF(capsule); } else { /* ZSTD_CDict instance already exists */ @@ -193,61 +261,15 @@ _get_CDict(ZstdDict *self, int compressionLevel) cdict = NULL; success: Py_XDECREF(level); - Py_END_CRITICAL_SECTION(); + Py_XDECREF(capsule); return cdict; } -int -_PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { - +static int +_zstd_load_impl(ZstdCompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) +{ size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* When compressing, use undigested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_UNDIGESTED; - goto load; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; - -load: if (type == DICT_TYPE_DIGESTED) { /* Get ZSTD_CDict */ ZSTD_CDict *c_dict = _get_CDict(zd, self->compression_level); @@ -256,28 +278,18 @@ _PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { } /* Reference a prepared dictionary. It overrides some compression context's parameters. */ - Py_BEGIN_CRITICAL_SECTION(self); zstd_ret = ZSTD_CCtx_refCDict(self->cctx, c_dict); - Py_END_CRITICAL_SECTION(); } else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary. It doesn't override compression context's parameters. */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_loadDictionary( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_CCtx_loadDictionary(self->cctx, zd->dict_buffer, + zd->dict_len); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_CCtx_refPrefix( - self->cctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_CCtx_refPrefix(self->cctx, zd->dict_buffer, + zd->dict_len); } else { Py_UNREACHABLE(); @@ -291,28 +303,57 @@ _PyZstd_load_c_dict(ZstdCompressor *self, PyObject *dict) { return 0; } -#define clinic_state() (get_zstd_state_from_type(type)) -#include "clinic/compressor.c.h" -#undef clinic_state +static int +_zstd_load_c_dict(ZstdCompressor *self, PyObject *dict) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + /* When compressing, use undigested dictionary by default. */ + int type = DICT_TYPE_UNDIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { + return -1; + } + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; +} + +/*[clinic input] +@classmethod +_zstd.ZstdCompressor.__new__ as _zstd_ZstdCompressor_new + level: object = None + The compression level to use. Defaults to COMPRESSION_LEVEL_DEFAULT. + options: object = None + A dict object that contains advanced compression parameters. + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + +Create a compressor object for compressing data incrementally. + +Thread-safe at method level. For one-shot compression, use the compress() +function instead. +[clinic start generated code]*/ static PyObject * -_zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +_zstd_ZstdCompressor_new_impl(PyTypeObject *type, PyObject *level, + PyObject *options, PyObject *zstd_dict) +/*[clinic end generated code: output=cdef61eafecac3d7 input=92de0211ae20ffdc]*/ { - ZstdCompressor *self; - self = PyObject_GC_New(ZstdCompressor, type); + ZstdCompressor* self = PyObject_GC_New(ZstdCompressor, type); if (self == NULL) { goto error; } - self->inited = 0; - self->dict = NULL; self->use_multithread = 0; - + self->dict = NULL; + self->lock = (PyMutex){0}; /* Compression context */ self->cctx = ZSTD_createCCtx(); if (self->cctx == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Unable to create ZSTD_CCtx instance."); @@ -323,12 +364,56 @@ _zstd_ZstdCompressor_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject /* Last mode */ self->last_mode = ZSTD_e_end; + if (level != Py_None && options != Py_None) { + PyErr_SetString(PyExc_TypeError, + "Only one of level or options should be used."); + goto error; + } + + /* Set compression level */ + if (level != Py_None) { + if (!PyLong_Check(level)) { + PyErr_SetString(PyExc_TypeError, + "invalid type for level, expected int"); + goto error; + } + int level_v = PyLong_AsInt(level); + if (level_v == -1 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Format(PyExc_ValueError, + "illegal compression level; the valid range is [%d, %d]", + ZSTD_minCLevel(), ZSTD_maxCLevel()); + } + goto error; + } + if (_zstd_set_c_level(self, level_v) < 0) { + goto error; + } + } + + /* Set options dictionary */ + if (options != Py_None) { + if (_zstd_set_c_parameters(self, options) < 0) { + goto error; + } + } + + /* Load Zstandard dictionary to compression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_c_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + return (PyObject*)self; error: - if (self != NULL) { - PyObject_GC_Del(self); - } + Py_XDECREF(self); return NULL; } @@ -340,7 +425,11 @@ ZstdCompressor_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free compression context */ - ZSTD_freeCCtx(self->cctx); + if (self->cctx) { + ZSTD_freeCCtx(self->cctx); + } + + assert(!PyMutex_IsLocked(&self->lock)); /* Py_XDECREF the dict after free the compression context */ Py_CLEAR(self->dict); @@ -350,72 +439,11 @@ ZstdCompressor_dealloc(PyObject *ob) Py_DECREF(tp); } -/*[clinic input] -_zstd.ZstdCompressor.__init__ - - level: object = None - The compression level to use, defaults to ZSTD_CLEVEL_DEFAULT. - options: object = None - A dict object that contains advanced compression parameters. - zstd_dict: object = None - A ZstdDict object, a pre-trained zstd dictionary. - -Create a compressor object for compressing data incrementally. - -Thread-safe at method level. For one-shot compression, use the compress() -function instead. -[clinic start generated code]*/ - -static int -_zstd_ZstdCompressor___init___impl(ZstdCompressor *self, PyObject *level, - PyObject *options, PyObject *zstd_dict) -/*[clinic end generated code: output=215e6c4342732f96 input=9f79b0d8d34c8ef0]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - if (level != Py_None && options != Py_None) { - PyErr_SetString(PyExc_RuntimeError, "Only one of level or options should be used."); - return -1; - } - - /* Set compressLevel/options to compression context */ - if (level != Py_None) { - if (_PyZstd_set_c_parameters(self, level, "level", "int") < 0) { - return -1; - } - } - - if (options != Py_None) { - if (_PyZstd_set_c_parameters(self, options, "options", "dict") < 0) { - return -1; - } - } - - /* Load dictionary to compression context */ - if (zstd_dict != Py_None) { - if (_PyZstd_load_c_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - // We can only start tracking self with the GC once self->dict is set. - PyObject_GC_Track(self); - return 0; -} - -PyObject * -compress_impl(ZstdCompressor *self, Py_buffer *data, - ZSTD_EndDirective end_directive) +static PyObject * +compress_lock_held(ZstdCompressor *self, Py_buffer *data, + ZSTD_EndDirective end_directive) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -442,12 +470,12 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, } if (_OutputBuffer_InitWithSize(&buffer, &out, -1, - (Py_ssize_t) output_buffer_size) < 0) { + (Py_ssize_t) output_buffer_size) < 0) { goto error; } - /* zstd stream compress */ + /* Zstandard stream compress */ while (1) { Py_BEGIN_ALLOW_THREADS zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, end_directive); @@ -455,10 +483,8 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); goto error; } @@ -487,9 +513,18 @@ compress_impl(ZstdCompressor *self, Py_buffer *data, return NULL; } +#ifndef NDEBUG +static inline int +mt_continue_should_break(ZSTD_inBuffer *in, ZSTD_outBuffer *out) +{ + return in->size == in->pos && out->size != out->pos; +} +#endif + static PyObject * -compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) +compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; @@ -505,24 +540,25 @@ compress_mt_continue_impl(ZstdCompressor *self, Py_buffer *data) goto error; } - /* zstd stream compress */ + /* Zstandard stream compress */ while (1) { Py_BEGIN_ALLOW_THREADS do { - zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, ZSTD_e_continue); - } while (out.pos != out.size && in.pos != in.size && !ZSTD_isError(zstd_ret)); + zstd_ret = ZSTD_compressStream2(self->cctx, &out, &in, + ZSTD_e_continue); + } while (out.pos != out.size + && in.pos != in.size + && !ZSTD_isError(zstd_ret)); Py_END_ALLOW_THREADS /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_COMPRESS, zstd_ret); goto error; } - /* Like compress_impl(), output as much as possible. */ + /* Like compress_lock_held(), output as much as possible. */ if (out.pos == out.size) { if (_OutputBuffer_Grow(&buffer, &out) < 0) { goto error; @@ -581,14 +617,14 @@ _zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, } /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); + PyMutex_Lock(&self->lock); /* Compress */ if (self->use_multithread && mode == ZSTD_e_continue) { - ret = compress_mt_continue_impl(self, data); + ret = compress_mt_continue_lock_held(self, data); } else { - ret = compress_impl(self, data, mode); + ret = compress_lock_held(self, data, mode); } if (ret) { @@ -600,7 +636,7 @@ _zstd_ZstdCompressor_compress_impl(ZstdCompressor *self, Py_buffer *data, /* Resetting cctx's session never fail */ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); } - Py_END_CRITICAL_SECTION(); + PyMutex_Unlock(&self->lock); return ret; } @@ -614,14 +650,14 @@ _zstd.ZstdCompressor.flush Finish the compression process. -Flush any remaining data left in internal buffers. Since zstd data consists -of one or more independent frames, the compressor object can still be used -after this method is called. +Flush any remaining data left in internal buffers. Since Zstandard data +consists of one or more independent frames, the compressor object can still +be used after this method is called. [clinic start generated code]*/ static PyObject * _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) -/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=a766870301932b85]*/ +/*[clinic end generated code: output=b7cf2c8d64dcf2e3 input=0ab19627f323cdbc]*/ { PyObject *ret; @@ -635,8 +671,9 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) } /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - ret = compress_impl(self, NULL, mode); + PyMutex_Lock(&self->lock); + + ret = compress_lock_held(self, NULL, mode); if (ret) { self->last_mode = mode; @@ -647,28 +684,79 @@ _zstd_ZstdCompressor_flush_impl(ZstdCompressor *self, int mode) /* Resetting cctx's session never fail */ ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); } - Py_END_CRITICAL_SECTION(); + PyMutex_Unlock(&self->lock); return ret; } + +/*[clinic input] +_zstd.ZstdCompressor.set_pledged_input_size + + size: zstd_contentsize + The size of the uncompressed data to be provided to the compressor. + / + +Set the uncompressed content size to be written into the frame header. + +This method can be used to ensure the header of the frame about to be written +includes the size of the data, unless the CompressionParameter.content_size_flag +is set to False. If last_mode != FLUSH_FRAME, then a RuntimeError is raised. + +It is important to ensure that the pledged data size matches the actual data +size. If they do not match the compressed output data may be corrupted and the +final chunk written may be lost. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdCompressor_set_pledged_input_size_impl(ZstdCompressor *self, + unsigned long long size) +/*[clinic end generated code: output=3a09e55cc0e3b4f9 input=afd8a7d78cff2eb5]*/ +{ + // Error occured while converting argument, should be unreachable + assert(size != ZSTD_CONTENTSIZE_ERROR); + + /* Thread-safe code */ + PyMutex_Lock(&self->lock); + + /* Check the current mode */ + if (self->last_mode != ZSTD_e_end) { + PyErr_SetString(PyExc_ValueError, + "set_pledged_input_size() method must be called " + "when last_mode == FLUSH_FRAME"); + PyMutex_Unlock(&self->lock); + return NULL; + } + + /* Set pledged content size */ + size_t zstd_ret = ZSTD_CCtx_setPledgedSrcSize(self->cctx, size); + PyMutex_Unlock(&self->lock); + if (ZSTD_isError(zstd_ret)) { + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_SET_PLEDGED_INPUT_SIZE, zstd_ret); + return NULL; + } + + Py_RETURN_NONE; +} + static PyMethodDef ZstdCompressor_methods[] = { _ZSTD_ZSTDCOMPRESSOR_COMPRESS_METHODDEF _ZSTD_ZSTDCOMPRESSOR_FLUSH_METHODDEF - - {0} + _ZSTD_ZSTDCOMPRESSOR_SET_PLEDGED_INPUT_SIZE_METHODDEF + {NULL, NULL} }; PyDoc_STRVAR(ZstdCompressor_last_mode_doc, "The last mode used to this compressor object, its value can be .CONTINUE,\n" ".FLUSH_BLOCK, .FLUSH_FRAME. Initialized to .FLUSH_FRAME.\n\n" -"It can be used to get the current state of a compressor, such as, data flushed,\n" -"a frame ended."); +"It can be used to get the current state of a compressor, such as, data\n" +"flushed, or a frame ended."); static PyMemberDef ZstdCompressor_members[] = { {"last_mode", Py_T_INT, offsetof(ZstdCompressor, last_mode), - Py_READONLY, ZstdCompressor_last_mode_doc}, - {0} + Py_READONLY, ZstdCompressor_last_mode_doc}, + {NULL} }; static int @@ -690,18 +778,20 @@ ZstdCompressor_clear(PyObject *ob) static PyType_Slot zstdcompressor_slots[] = { {Py_tp_new, _zstd_ZstdCompressor_new}, {Py_tp_dealloc, ZstdCompressor_dealloc}, - {Py_tp_init, _zstd_ZstdCompressor___init__}, {Py_tp_methods, ZstdCompressor_methods}, {Py_tp_members, ZstdCompressor_members}, - {Py_tp_doc, (char*)_zstd_ZstdCompressor___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdCompressor_new__doc__}, {Py_tp_traverse, ZstdCompressor_traverse}, {Py_tp_clear, ZstdCompressor_clear}, - {0} + {0, 0} }; -PyType_Spec zstdcompressor_type_spec = { - .name = "_zstd.ZstdCompressor", +PyType_Spec zstd_compressor_type_spec = { + .name = "compression.zstd.ZstdCompressor", .basicsize = sizeof(ZstdCompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + // Py_TPFLAGS_IMMUTABLETYPE is not used here as several + // associated constants need to be added to the type. + // PyType_Freeze is called later to set the flag. + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, .slots = zstdcompressor_slots, }; diff --git a/Modules/_zstd/decompressor.c b/Modules/_zstd/decompressor.c index a4be180c0088fc..c53d6e4cb05cf0 100644 --- a/Modules/_zstd/decompressor.c +++ b/Modules/_zstd/decompressor.c @@ -1,176 +1,142 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdDecompressor class definition */ /*[clinic input] module _zstd -class _zstd.ZstdDecompressor "ZstdDecompressor *" "clinic_state()->ZstdDecompressor_type" +class _zstd.ZstdDecompressor "ZstdDecompressor *" "&zstd_decompressor_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4e6eae327c0c0c76]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e2969ddf48a203e0]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif -#include "_zstdmodule.h" +#include "Python.h" +#include "_zstdmodule.h" #include "buffer.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked +#include // bool #include // offsetof() +#include // ZSTD_*() + +typedef struct { + PyObject_HEAD + + /* Decompression context */ + ZSTD_DCtx *dctx; + + /* ZstdDict object in use */ + PyObject *dict; + + /* Unconsumed input data */ + char *input_buffer; + size_t input_buffer_size; + size_t in_begin, in_end; + + /* Unused data */ + PyObject *unused_data; + + /* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */ + bool needs_input; + + /* For ZstdDecompressor, 0 or 1. + 1 means the end of the first frame has been reached. */ + bool eof; + + /* Lock to protect the decompression context */ + PyMutex lock; +} ZstdDecompressor; #define ZstdDecompressor_CAST(op) ((ZstdDecompressor *)op) +#include "clinic/decompressor.c.h" + static inline ZSTD_DDict * _get_DDict(ZstdDict *self) { + assert(PyMutex_IsLocked(&self->lock)); ZSTD_DDict *ret; - /* Already created */ - if (self->d_dict != NULL) { - return self->d_dict; - } - - Py_BEGIN_CRITICAL_SECTION(self); if (self->d_dict == NULL) { /* Create ZSTD_DDict instance from dictionary content */ - char *dict_buffer = PyBytes_AS_STRING(self->dict_content); - Py_ssize_t dict_len = Py_SIZE(self->dict_content); Py_BEGIN_ALLOW_THREADS - self->d_dict = ZSTD_createDDict(dict_buffer, - dict_len); + ret = ZSTD_createDDict(self->dict_buffer, self->dict_len); Py_END_ALLOW_THREADS + self->d_dict = ret; if (self->d_dict == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, - "Failed to create ZSTD_DDict instance from zstd " - "dictionary content. Maybe the content is corrupted."); + "Failed to create a ZSTD_DDict instance from " + "Zstandard dictionary content."); } } } - /* Don't lose any exception */ - ret = self->d_dict; - Py_END_CRITICAL_SECTION(); - - return ret; + return self->d_dict; } -/* Set decompression parameters to decompression context */ -int -_PyZstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) +static int +_zstd_set_d_parameters(ZstdDecompressor *self, PyObject *options) { - size_t zstd_ret; - PyObject *key, *value; - Py_ssize_t pos; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state == NULL) { return -1; } if (!PyDict_Check(options)) { - PyErr_SetString(PyExc_TypeError, - "options argument should be dict object."); + PyErr_Format(PyExc_TypeError, + "ZstdDecompressor() argument 'options' must be dict, not %T", + options); return -1; } - pos = 0; + Py_ssize_t pos = 0; + PyObject *key, *value; while (PyDict_Next(options, &pos, &key, &value)) { /* Check key type */ if (Py_TYPE(key) == mod_state->CParameter_type) { PyErr_SetString(PyExc_TypeError, - "Key of decompression options dict should " - "NOT be CompressionParameter."); + "compression options dictionary key must not be a " + "CompressionParameter attribute"); return -1; } - /* Both key & value should be 32-bit signed int */ + Py_INCREF(key); + Py_INCREF(value); int key_v = PyLong_AsInt(key); + Py_DECREF(key); if (key_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Key of options dict should be a DecompressionParameter attribute."); return -1; } - // TODO(emmatyping): check bounds when there is a value error here for better - // error message? int value_v = PyLong_AsInt(value); + Py_DECREF(value); if (value_v == -1 && PyErr_Occurred()) { - PyErr_SetString(PyExc_ValueError, - "Value of options dict should be an int."); return -1; } /* Set parameter to compression context */ - Py_BEGIN_CRITICAL_SECTION(self); - zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); - Py_END_CRITICAL_SECTION(); + size_t zstd_ret = ZSTD_DCtx_setParameter(self->dctx, key_v, value_v); /* Check error */ if (ZSTD_isError(zstd_ret)) { - set_parameter_error(mod_state, 0, key_v, value_v); + set_parameter_error(0, key_v, value_v); return -1; } } return 0; } -/* Load dictionary or prefix to decompression context */ -int -_PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) +static int +_zstd_load_impl(ZstdDecompressor *self, ZstdDict *zd, + _zstd_state *mod_state, int type) { size_t zstd_ret; - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return -1; - } - ZstdDict *zd; - int type, ret; - - /* Check ZstdDict */ - ret = PyObject_IsInstance(dict, (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* When decompressing, use digested dictionary by default. */ - zd = (ZstdDict*)dict; - type = DICT_TYPE_DIGESTED; - goto load; - } - - /* Check (ZstdDict, type) */ - if (PyTuple_CheckExact(dict) && PyTuple_GET_SIZE(dict) == 2) { - /* Check ZstdDict */ - ret = PyObject_IsInstance(PyTuple_GET_ITEM(dict, 0), - (PyObject*)mod_state->ZstdDict_type); - if (ret < 0) { - return -1; - } - else if (ret > 0) { - /* type == -1 may indicate an error. */ - type = PyLong_AsInt(PyTuple_GET_ITEM(dict, 1)); - if (type == DICT_TYPE_DIGESTED || - type == DICT_TYPE_UNDIGESTED || - type == DICT_TYPE_PREFIX) - { - assert(type >= 0); - zd = (ZstdDict*)PyTuple_GET_ITEM(dict, 0); - goto load; - } - } - } - - /* Wrong type */ - PyErr_SetString(PyExc_TypeError, - "zstd_dict argument should be ZstdDict object."); - return -1; - -load: if (type == DICT_TYPE_DIGESTED) { /* Get ZSTD_DDict */ ZSTD_DDict *d_dict = _get_DDict(zd); @@ -178,27 +144,17 @@ _PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) return -1; } /* Reference a prepared dictionary */ - Py_BEGIN_CRITICAL_SECTION(self); zstd_ret = ZSTD_DCtx_refDDict(self->dctx, d_dict); - Py_END_CRITICAL_SECTION(); } else if (type == DICT_TYPE_UNDIGESTED) { /* Load a dictionary */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_loadDictionary( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_DCtx_loadDictionary(self->dctx, zd->dict_buffer, + zd->dict_len); } else if (type == DICT_TYPE_PREFIX) { /* Load a prefix */ - Py_BEGIN_CRITICAL_SECTION2(self, zd); - zstd_ret = ZSTD_DCtx_refPrefix( - self->dctx, - PyBytes_AS_STRING(zd->dict_content), - Py_SIZE(zd->dict_content)); - Py_END_CRITICAL_SECTION2(); + zstd_ret = ZSTD_DCtx_refPrefix(self->dctx, zd->dict_buffer, + zd->dict_len); } else { /* Impossible code path */ @@ -215,22 +171,31 @@ _PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) return 0; } - +/* Load dictionary or prefix to decompression context */ +static int +_zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) +{ + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + /* When decompressing, use digested dictionary by default. */ + int type = DICT_TYPE_DIGESTED; + ZstdDict *zd = _Py_parse_zstd_dict(mod_state, dict, &type); + if (zd == NULL) { + return -1; + } + int ret; + PyMutex_Lock(&zd->lock); + ret = _zstd_load_impl(self, zd, mod_state, type); + PyMutex_Unlock(&zd->lock); + return ret; +} /* - Given the two types of decompressors (defined in _zstdmodule.h): - - typedef enum { - TYPE_DECOMPRESSOR, // , ZstdDecompressor class - TYPE_ENDLESS_DECOMPRESSOR, // , decompress() function - } decompress_type; - - Decompress implementation for , , pseudo code: + Decompress implementation in pseudo code: initialize_output_buffer while True: decompress_data - set_object_flag # .eof for , .at_frame_edge for . + set_object_flag # .eof if output_buffer_exhausted: if output_buffer_reached_max_length: @@ -240,76 +205,26 @@ _PyZstd_load_d_dict(ZstdDecompressor *self, PyObject *dict) finish ZSTD_decompressStream()'s size_t return value: - - 0 when a frame is completely decoded and fully flushed, zstd's internal - buffer has no data. + - 0 when a frame is completely decoded and fully flushed, + zstd's internal buffer has no data. - An error code, which can be tested using ZSTD_isError(). - Or any other value > 0, which means there is still some decoding or flushing to do to complete current frame. Note, decompressing "an empty input" in any case will make it > 0. - - supports multiple frames, has an .at_frame_edge flag, it means both the - input and output streams are at a frame edge. The flag can be set by this - statement: - - .at_frame_edge = (zstd_ret == 0) ? 1 : 0 - - But if decompressing "an empty input" at "a frame edge", zstd_ret will be - non-zero, then .at_frame_edge will be wrongly set to false. To solve this - problem, two AFE checks are needed to ensure that: when at "a frame edge", - empty input will not be decompressed. - - // AFE check - if (self->at_frame_edge && in->pos == in->size) { - finish - } - - In , if .at_frame_edge is eventually set to true, but input stream has - unconsumed data (in->pos < in->size), then the outer function - stream_decompress() will set .at_frame_edge to false. In this case, - although the output stream is at a frame edge, for the caller, the input - stream is not at a frame edge, see below diagram. This behavior does not - affect the next AFE check, since (in->pos < in->size). - - input stream: --------------|--- - ^ - output stream: ====================| - ^ */ -PyObject * -decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, - Py_ssize_t max_length, - Py_ssize_t initial_size, - decompress_type type) +static PyObject * +decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in, + Py_ssize_t max_length) { size_t zstd_ret; ZSTD_outBuffer out; _BlocksOutputBuffer buffer = {.list = NULL}; PyObject *ret; - /* The first AFE check for setting .at_frame_edge flag */ - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (self->at_frame_edge && in->pos == in->size) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return NULL; - } - ret = mod_state->empty_bytes; - Py_INCREF(ret); - return ret; - } - } - /* Initialize the output buffer */ - if (initial_size >= 0) { - if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) { - goto error; - } - } - else { - if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { - goto error; - } + if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) { + goto error; } assert(out.pos == 0); @@ -321,33 +236,20 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, /* Check error */ if (ZSTD_isError(zstd_ret)) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state != NULL) { - set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); - } + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); + set_zstd_error(mod_state, ERR_DECOMPRESS, zstd_ret); goto error; } - /* Set .eof/.af_frame_edge flag */ - if (type == TYPE_DECOMPRESSOR) { - /* ZstdDecompressor class stops when a frame is decompressed */ - if (zstd_ret == 0) { - self->eof = 1; - break; - } - } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* decompress() function supports multiple frames */ - self->at_frame_edge = (zstd_ret == 0) ? 1 : 0; - - /* The second AFE check for setting .at_frame_edge flag */ - if (self->at_frame_edge && in->pos == in->size) { - break; - } + /* Set .eof flag */ + if (zstd_ret == 0) { + /* Stop when a frame is decompressed */ + self->eof = 1; + break; } /* Need to check out before in. Maybe zstd's internal buffer still has - a few bytes can be output, grow the buffer and continue. */ + a few bytes that can be output, grow the buffer and continue. */ if (out.pos == out.size) { /* Output buffer exhausted */ @@ -380,67 +282,40 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in, return NULL; } -void -decompressor_reset_session(ZstdDecompressor *self, - decompress_type type) +static void +decompressor_reset_session_lock_held(ZstdDecompressor *self) { - // TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here - // and ensure lock is always held + assert(PyMutex_IsLocked(&self->lock)); /* Reset variables */ self->in_begin = 0; self->in_end = 0; - if (type == TYPE_DECOMPRESSOR) { - Py_CLEAR(self->unused_data); - } + Py_CLEAR(self->unused_data); /* Reset variables in one operation */ self->needs_input = 1; - self->at_frame_edge = 1; self->eof = 0; - self->_unused_char_for_align = 0; - /* Resetting session never fail */ + /* Resetting session is guaranteed to never fail */ ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only); } -PyObject * -stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length, - decompress_type type) +static PyObject * +stream_decompress_lock_held(ZstdDecompressor *self, Py_buffer *data, + Py_ssize_t max_length) { - Py_ssize_t initial_buffer_size = -1; + assert(PyMutex_IsLocked(&self->lock)); ZSTD_inBuffer in; PyObject *ret = NULL; int use_input_buffer; - if (type == TYPE_DECOMPRESSOR) { - /* Check .eof flag */ - if (self->eof) { - PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame."); - assert(ret == NULL); - goto success; - } - } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - /* Fast path for the first frame */ - if (self->at_frame_edge && self->in_begin == self->in_end) { - /* Read decompressed size */ - uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len); - - /* These two zstd constants always > PY_SSIZE_T_MAX: - ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1) - ZSTD_CONTENTSIZE_ERROR is (0ULL - 2) - - Use ZSTD_findFrameCompressedSize() to check complete frame, - prevent allocating too much memory for small input chunk. */ - - if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX && - !ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) ) - { - initial_buffer_size = (Py_ssize_t) decompressed_size; - } - } + /* Check .eof flag */ + if (self->eof) { + PyErr_SetString(PyExc_EOFError, + "Already at the end of a Zstandard frame."); + assert(ret == NULL); + return NULL; } /* Prepare input buffer w/wo unconsumed data */ @@ -527,30 +402,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length assert(in.pos == 0); /* Decompress */ - ret = decompress_impl(self, &in, - max_length, initial_buffer_size, - type); + ret = decompress_lock_held(self, &in, max_length); if (ret == NULL) { goto error; } /* Unconsumed input data */ if (in.pos == in.size) { - if (type == TYPE_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length || self->eof) { - self->needs_input = 0; - } - else { - self->needs_input = 1; - } + if (Py_SIZE(ret) == max_length || self->eof) { + self->needs_input = 0; } - else if (type == TYPE_ENDLESS_DECOMPRESSOR) { - if (Py_SIZE(ret) == max_length && !self->at_frame_edge) { - self->needs_input = 0; - } - else { - self->needs_input = 1; - } + else { + self->needs_input = 1; } if (use_input_buffer) { @@ -564,15 +427,11 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length self->needs_input = 0; - if (type == TYPE_ENDLESS_DECOMPRESSOR) { - self->at_frame_edge = 0; - } - if (!use_input_buffer) { /* Discard buffer if it's too small (resizing it may needlessly copy the current contents) */ - if (self->input_buffer != NULL && - self->input_buffer_size < data_size) + if (self->input_buffer != NULL + && self->input_buffer_size < data_size) { PyMem_Free(self->input_buffer); self->input_buffer = NULL; @@ -600,47 +459,57 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length } } - goto success; + return ret; error: /* Reset decompressor's states/session */ - decompressor_reset_session(self, type); + decompressor_reset_session_lock_held(self); Py_CLEAR(ret); -success: - - return ret; + return NULL; } +/*[clinic input] +@classmethod +_zstd.ZstdDecompressor.__new__ as _zstd_ZstdDecompressor_new + zstd_dict: object = None + A ZstdDict object, a pre-trained Zstandard dictionary. + options: object = None + A dict object that contains advanced decompression parameters. + +Create a decompressor object for decompressing data incrementally. + +Thread-safe at method level. For one-shot decompression, use the decompress() +function instead. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +_zstd_ZstdDecompressor_new_impl(PyTypeObject *type, PyObject *zstd_dict, + PyObject *options) +/*[clinic end generated code: output=590ca65c1102ff4a input=213daa57e3ea4062]*/ { - ZstdDecompressor *self; - self = PyObject_GC_New(ZstdDecompressor, type); + ZstdDecompressor* self = PyObject_GC_New(ZstdDecompressor, type); if (self == NULL) { goto error; } - self->inited = 0; - self->dict = NULL; self->input_buffer = NULL; self->input_buffer_size = 0; self->in_begin = -1; self->in_end = -1; self->unused_data = NULL; self->eof = 0; + self->dict = NULL; + self->lock = (PyMutex){0}; /* needs_input flag */ self->needs_input = 1; - /* at_frame_edge flag */ - self->at_frame_edge = 1; - /* Decompression context */ self->dctx = ZSTD_createDCtx(); if (self->dctx == NULL) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); + _zstd_state* mod_state = PyType_GetModuleState(Py_TYPE(self)); if (mod_state != NULL) { PyErr_SetString(mod_state->ZstdError, "Unable to create ZSTD_DCtx instance."); @@ -648,12 +517,29 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds) goto error; } + /* Load Zstandard dictionary to decompression context */ + if (zstd_dict != Py_None) { + if (_zstd_load_d_dict(self, zstd_dict) < 0) { + goto error; + } + Py_INCREF(zstd_dict); + self->dict = zstd_dict; + } + + /* Set options dictionary */ + if (options != Py_None) { + if (_zstd_set_d_parameters(self, options) < 0) { + goto error; + } + } + + // We can only start GC tracking once self->dict is set. + PyObject_GC_Track(self); + return (PyObject*)self; error: - if (self != NULL) { - PyObject_GC_Del(self); - } + Py_XDECREF(self); return NULL; } @@ -665,7 +551,11 @@ ZstdDecompressor_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free decompression context */ - ZSTD_freeDCtx(self->dctx); + if (self->dctx) { + ZSTD_freeDCtx(self->dctx); + } + + assert(!PyMutex_IsLocked(&self->lock)); /* Py_CLEAR the dict after free decompression context */ Py_CLEAR(self->dict); @@ -682,56 +572,6 @@ ZstdDecompressor_dealloc(PyObject *ob) } /*[clinic input] -_zstd.ZstdDecompressor.__init__ - - zstd_dict: object = None - A ZstdDict object, a pre-trained zstd dictionary. - options: object = None - A dict object that contains advanced decompression parameters. - -Create a decompressor object for decompressing data incrementally. - -Thread-safe at method level. For one-shot decompression, use the decompress() -function instead. -[clinic start generated code]*/ - -static int -_zstd_ZstdDecompressor___init___impl(ZstdDecompressor *self, - PyObject *zstd_dict, PyObject *options) -/*[clinic end generated code: output=703af2f1ec226642 input=8fd72999acc1a146]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Load dictionary to decompression context */ - if (zstd_dict != Py_None) { - if (_PyZstd_load_d_dict(self, zstd_dict) < 0) { - return -1; - } - - /* Py_INCREF the dict */ - Py_INCREF(zstd_dict); - self->dict = zstd_dict; - } - - /* Set option to decompression context */ - if (options != Py_None) { - if (_PyZstd_set_d_parameters(self, options) < 0) { - return -1; - } - } - - // We can only start tracking self with the GC once self->dict is set. - PyObject_GC_Track(self); - return 0; -} - -/*[clinic input] -@critical_section @getter _zstd.ZstdDecompressor.unused_data @@ -743,20 +583,15 @@ decompressed, unused input data after the frame. Otherwise this will be b''. static PyObject * _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) -/*[clinic end generated code: output=f3a20940f11b6b09 input=5233800bef00df04]*/ +/*[clinic end generated code: output=f3a20940f11b6b09 input=54d41ecd681a3444]*/ { PyObject *ret; - /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); + PyMutex_Lock(&self->lock); if (!self->eof) { - _zstd_state* const mod_state = PyType_GetModuleState(Py_TYPE(self)); - if (mod_state == NULL) { - return NULL; - } - ret = mod_state->empty_bytes; - Py_INCREF(ret); + PyMutex_Unlock(&self->lock); + return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); } else { if (self->unused_data == NULL) { @@ -772,8 +607,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) } } - Py_END_CRITICAL_SECTION(); - + PyMutex_Unlock(&self->lock); return ret; } @@ -781,7 +615,7 @@ _zstd_ZstdDecompressor_unused_data_get_impl(ZstdDecompressor *self) _zstd.ZstdDecompressor.decompress data: Py_buffer - A bytes-like object, zstd data to be decompressed. + A bytes-like object, Zstandard data to be decompressed. max_length: Py_ssize_t = -1 Maximum size of returned data. When it is negative, the size of output buffer is unlimited. When it is nonnegative, returns at @@ -807,25 +641,19 @@ static PyObject * _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length) -/*[clinic end generated code: output=a4302b3c940dbec6 input=830e455bc9a50b6e]*/ +/*[clinic end generated code: output=a4302b3c940dbec6 input=6463dfdf98091caa]*/ { PyObject *ret; /* Thread-safe code */ - Py_BEGIN_CRITICAL_SECTION(self); - - ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR); - Py_END_CRITICAL_SECTION(); + PyMutex_Lock(&self->lock); + ret = stream_decompress_lock_held(self, data, max_length); + PyMutex_Unlock(&self->lock); return ret; } -#define clinic_state() (get_zstd_state_from_type(type)) -#include "clinic/decompressor.c.h" -#undef clinic_state - static PyMethodDef ZstdDecompressor_methods[] = { _ZSTD_ZSTDDECOMPRESSOR_DECOMPRESS_METHODDEF - - {0} + {NULL, NULL} }; PyDoc_STRVAR(ZstdDecompressor_eof_doc, @@ -833,24 +661,22 @@ PyDoc_STRVAR(ZstdDecompressor_eof_doc, "after that, an EOFError exception will be raised."); PyDoc_STRVAR(ZstdDecompressor_needs_input_doc, -"If the max_length output limit in .decompress() method has been reached, and\n" -"the decompressor has (or may has) unconsumed input data, it will be set to\n" -"False. In this case, pass b'' to .decompress() method may output further data."); +"If the max_length output limit in .decompress() method has been reached,\n" +"and the decompressor has (or may has) unconsumed input data, it will be set\n" +"to False. In this case, passing b'' to the .decompress() method may output\n" +"further data."); static PyMemberDef ZstdDecompressor_members[] = { {"eof", Py_T_BOOL, offsetof(ZstdDecompressor, eof), Py_READONLY, ZstdDecompressor_eof_doc}, - {"needs_input", Py_T_BOOL, offsetof(ZstdDecompressor, needs_input), Py_READONLY, ZstdDecompressor_needs_input_doc}, - - {0} + {NULL} }; static PyGetSetDef ZstdDecompressor_getset[] = { _ZSTD_ZSTDDECOMPRESSOR_UNUSED_DATA_GETSETDEF - - {0} + {NULL} }; static int @@ -873,19 +699,19 @@ ZstdDecompressor_clear(PyObject *ob) static PyType_Slot ZstdDecompressor_slots[] = { {Py_tp_new, _zstd_ZstdDecompressor_new}, {Py_tp_dealloc, ZstdDecompressor_dealloc}, - {Py_tp_init, _zstd_ZstdDecompressor___init__}, {Py_tp_methods, ZstdDecompressor_methods}, {Py_tp_members, ZstdDecompressor_members}, {Py_tp_getset, ZstdDecompressor_getset}, - {Py_tp_doc, (char*)_zstd_ZstdDecompressor___init____doc__}, + {Py_tp_doc, (void *)_zstd_ZstdDecompressor_new__doc__}, {Py_tp_traverse, ZstdDecompressor_traverse}, {Py_tp_clear, ZstdDecompressor_clear}, - {0} + {0, 0} }; -PyType_Spec ZstdDecompressor_type_spec = { - .name = "_zstd.ZstdDecompressor", +PyType_Spec zstd_decompressor_type_spec = { + .name = "compression.zstd.ZstdDecompressor", .basicsize = sizeof(ZstdDecompressor), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, .slots = ZstdDecompressor_slots, }; diff --git a/Modules/_zstd/zstddict.c b/Modules/_zstd/zstddict.c index a19224c4a6403b..14f74aaed46ec5 100644 --- a/Modules/_zstd/zstddict.c +++ b/Modules/_zstd/zstddict.c @@ -1,38 +1,67 @@ -/* -Low level interface to Meta's zstd library for use in the compression.zstd -Python module. -*/ +/* Low level interface to the Zstandard algorthm & the zstd library. */ /* ZstdDict class definitions */ /*[clinic input] module _zstd -class _zstd.ZstdDict "ZstdDict *" "clinic_state()->ZstdDict_type" +class _zstd.ZstdDict "ZstdDict *" "&zstd_dict_type_spec" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=a5d1254c497e52ba]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dcc175ec974f81c]*/ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif +#include "Python.h" + #include "_zstdmodule.h" +#include "clinic/zstddict.c.h" +#include "internal/pycore_lock.h" // PyMutex_IsLocked -#include // offsetof() +#include // ZSTD_freeDDict(), ZSTD_getDictID_fromDict() #define ZstdDict_CAST(op) ((ZstdDict *)op) +/*[clinic input] +@classmethod +_zstd.ZstdDict.__new__ as _zstd_ZstdDict_new + dict_content: Py_buffer + The content of a Zstandard dictionary as a bytes-like object. + / + * + is_raw: bool = False + If true, perform no checks on *dict_content*, useful for some + advanced cases. Otherwise, check that the content represents + a Zstandard dictionary created by the zstd library or CLI. + +Represents a Zstandard dictionary. + +The dictionary can be used for compression or decompression, and can be shared +by multiple ZstdCompressor or ZstdDecompressor objects. +[clinic start generated code]*/ + static PyObject * -_zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) +_zstd_ZstdDict_new_impl(PyTypeObject *type, Py_buffer *dict_content, + int is_raw) +/*[clinic end generated code: output=685b7406a48b0949 input=9e8c493e31c98383]*/ { - ZstdDict *self; - self = PyObject_GC_New(ZstdDict, type); + /* All dictionaries must be at least 8 bytes */ + if (dict_content->len < 8) { + PyErr_SetString(PyExc_ValueError, + "Zstandard dictionary content too short " + "(must have at least eight bytes)"); + return NULL; + } + + ZstdDict* self = PyObject_GC_New(ZstdDict, type); if (self == NULL) { - goto error; + return NULL; } - self->dict_content = NULL; - self->inited = 0; self->d_dict = NULL; + self->dict_buffer = NULL; + self->dict_id = 0; + self->lock = (PyMutex){0}; /* ZSTD_CDict dict */ self->c_dicts = PyDict_New(); @@ -40,12 +69,29 @@ _zstd_ZstdDict_new(PyTypeObject *type, PyObject *Py_UNUSED(args), PyObject *Py_U goto error; } - return (PyObject*)self; + self->dict_buffer = PyMem_Malloc(dict_content->len); + if (!self->dict_buffer) { + PyErr_NoMemory(); + goto error; + } + memcpy(self->dict_buffer, dict_content->buf, dict_content->len); + self->dict_len = dict_content->len; -error: - if (self != NULL) { - PyObject_GC_Del(self); + /* Get dict_id, 0 means "raw content" dictionary. */ + self->dict_id = ZSTD_getDictID_fromDict(self->dict_buffer, self->dict_len); + + /* Check validity for ordinary dictionary */ + if (!is_raw && self->dict_id == 0) { + PyErr_SetString(PyExc_ValueError, "invalid Zstandard dictionary"); + goto error; } + + PyObject_GC_Track(self); + + return (PyObject *)self; + +error: + Py_XDECREF(self); return NULL; } @@ -57,121 +103,64 @@ ZstdDict_dealloc(PyObject *ob) PyObject_GC_UnTrack(self); /* Free ZSTD_DDict instance */ - ZSTD_freeDDict(self->d_dict); + if (self->d_dict) { + ZSTD_freeDDict(self->d_dict); + } + + assert(!PyMutex_IsLocked(&self->lock)); - /* Release dict_content after Free ZSTD_CDict/ZSTD_DDict instances */ - Py_CLEAR(self->dict_content); + /* Release dict_buffer after freeing ZSTD_CDict/ZSTD_DDict instances */ + PyMem_Free(self->dict_buffer); Py_CLEAR(self->c_dicts); PyTypeObject *tp = Py_TYPE(self); - PyObject_GC_Del(ob); + tp->tp_free(self); Py_DECREF(tp); } -/*[clinic input] -_zstd.ZstdDict.__init__ - - dict_content: object - A bytes-like object, dictionary's content. - is_raw: bool = False - This parameter is for advanced user. True means dict_content - argument is a "raw content" dictionary, free of any format - restriction. False means dict_content argument is an ordinary - zstd dictionary, was created by zstd functions, follow a - specified format. - -Represents a zstd dictionary, which can be used for compression/decompression. - -It's thread-safe, and can be shared by multiple ZstdCompressor / -ZstdDecompressor objects. -[clinic start generated code]*/ - -static int -_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content, - int is_raw) -/*[clinic end generated code: output=c5f5a0d8377d037c input=e6750f62a513b3ee]*/ -{ - /* Only called once */ - if (self->inited) { - PyErr_SetString(PyExc_RuntimeError, init_twice_msg); - return -1; - } - self->inited = 1; - - /* Check dict_content's type */ - self->dict_content = PyBytes_FromObject(dict_content); - if (self->dict_content == NULL) { - PyErr_SetString(PyExc_TypeError, - "dict_content argument should be bytes-like object."); - return -1; - } - - /* Both ordinary dictionary and "raw content" dictionary should - at least 8 bytes */ - if (Py_SIZE(self->dict_content) < 8) { - PyErr_SetString(PyExc_ValueError, - "Zstd dictionary content should at least 8 bytes."); - return -1; - } - - /* Get dict_id, 0 means "raw content" dictionary. */ - self->dict_id = ZSTD_getDictID_fromDict(PyBytes_AS_STRING(self->dict_content), - Py_SIZE(self->dict_content)); - - /* Check validity for ordinary dictionary */ - if (!is_raw && self->dict_id == 0) { - char *msg = "The dict_content argument is not a valid zstd " - "dictionary. The first 4 bytes of a valid zstd dictionary " - "should be a magic number: b'\\x37\\xA4\\x30\\xEC'.\n" - "If you are an advanced user, and can be sure that " - "dict_content argument is a \"raw content\" zstd " - "dictionary, set is_raw parameter to True."; - PyErr_SetString(PyExc_ValueError, msg); - return -1; - } - - // Can only track self once self->dict_content is included - PyObject_GC_Track(self); - return 0; -} - -#define clinic_state() (get_zstd_state(type)) -#include "clinic/zstddict.c.h" -#undef clinic_state - PyDoc_STRVAR(ZstdDict_dictid_doc, -"ID of zstd dictionary, a 32-bit unsigned int value.\n\n" -"Non-zero means ordinary dictionary, was created by zstd functions, follow\n" -"a specified format.\n\n" -"0 means a \"raw content\" dictionary, free of any format restriction, used\n" -"for advanced user."); - -PyDoc_STRVAR(ZstdDict_dictcontent_doc, -"The content of zstd dictionary, a bytes object, it's the same as dict_content\n" -"argument in ZstdDict.__init__() method. It can be used with other programs."); +"the Zstandard dictionary, an int between 0 and 2**32.\n\n" +"A non-zero value represents an ordinary Zstandard dictionary, " +"conforming to the standardised format.\n\n" +"The special value '0' means a 'raw content' dictionary," +"without any restrictions on format or content."); static PyObject * -ZstdDict_str(PyObject *ob) +ZstdDict_repr(PyObject *ob) { ZstdDict *dict = ZstdDict_CAST(ob); return PyUnicode_FromFormat("", - dict->dict_id, Py_SIZE(dict->dict_content)); + (unsigned int)dict->dict_id, dict->dict_len); } static PyMemberDef ZstdDict_members[] = { {"dict_id", Py_T_UINT, offsetof(ZstdDict, dict_id), Py_READONLY, ZstdDict_dictid_doc}, - {"dict_content", Py_T_OBJECT_EX, offsetof(ZstdDict, dict_content), Py_READONLY, ZstdDict_dictcontent_doc}, - {0} + {NULL} }; /*[clinic input] -@critical_section +@getter +_zstd.ZstdDict.dict_content + +The content of a Zstandard dictionary, as a bytes object. +[clinic start generated code]*/ + +static PyObject * +_zstd_ZstdDict_dict_content_get_impl(ZstdDict *self) +/*[clinic end generated code: output=0d05caa5b550eabb input=4ed526d1c151c596]*/ +{ + return PyBytes_FromStringAndSize(self->dict_buffer, self->dict_len); +} + +/*[clinic input] @getter _zstd.ZstdDict.as_digested_dict Load as a digested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_digested_dict) + 1. Some advanced compression parameters of compressor may be overridden by parameters of digested dictionary. 2. ZstdDict has a digested dictionaries cache for each compression level. @@ -182,19 +171,20 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digeste static PyObject * _zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=09b086e7a7320dbb input=585448c79f31f74a]*/ +/*[clinic end generated code: output=09b086e7a7320dbb input=ee45e1b4a48f6f2c]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_DIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_undigested_dict Load as an undigested dictionary to compressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_undigested_dict) + 1. The advanced compression parameters of compressor will not be overridden. 2. Loading an undigested dictionary is costly. If load an undigested dictionary multiple times, consider reusing a compressor object. @@ -203,19 +193,20 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undiges static PyObject * _zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self) -/*[clinic end generated code: output=43c7a989e6d4253a input=022b0829ffb1c220]*/ +/*[clinic end generated code: output=43c7a989e6d4253a input=d39210eedec76fed]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_UNDIGESTED); } /*[clinic input] -@critical_section @getter _zstd.ZstdDict.as_prefix Load as a prefix to compressor/decompressor. -Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) +Pass this attribute as zstd_dict argument: +compress(dat, zstd_dict=zd.as_prefix) + 1. Prefix is compatible with long distance matching, while dictionary is not. 2. It only works for the first frame, then the compressor/decompressor will return to no prefix state. @@ -224,27 +215,24 @@ Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix) static PyObject * _zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self) -/*[clinic end generated code: output=6f7130c356595a16 input=09fb82a6a5407e87]*/ +/*[clinic end generated code: output=6f7130c356595a16 input=d59757b0b5a9551a]*/ { return Py_BuildValue("Oi", self, DICT_TYPE_PREFIX); } static PyGetSetDef ZstdDict_getset[] = { + _ZSTD_ZSTDDICT_DICT_CONTENT_GETSETDEF _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF - _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF - _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF - - {0} + {NULL} }; static Py_ssize_t ZstdDict_length(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - assert(PyBytes_Check(self->dict_content)); - return Py_SIZE(self->dict_content); + return self->dict_len; } static int @@ -252,7 +240,6 @@ ZstdDict_traverse(PyObject *ob, visitproc visit, void *arg) { ZstdDict *self = ZstdDict_CAST(ob); Py_VISIT(self->c_dicts); - Py_VISIT(self->dict_content); return 0; } @@ -260,7 +247,7 @@ static int ZstdDict_clear(PyObject *ob) { ZstdDict *self = ZstdDict_CAST(ob); - Py_CLEAR(self->dict_content); + Py_CLEAR(self->c_dicts); return 0; } @@ -269,18 +256,18 @@ static PyType_Slot zstddict_slots[] = { {Py_tp_getset, ZstdDict_getset}, {Py_tp_new, _zstd_ZstdDict_new}, {Py_tp_dealloc, ZstdDict_dealloc}, - {Py_tp_init, _zstd_ZstdDict___init__}, - {Py_tp_str, ZstdDict_str}, - {Py_tp_doc, (char*)_zstd_ZstdDict___init____doc__}, + {Py_tp_repr, ZstdDict_repr}, + {Py_tp_doc, (void *)_zstd_ZstdDict_new__doc__}, {Py_sq_length, ZstdDict_length}, {Py_tp_traverse, ZstdDict_traverse}, {Py_tp_clear, ZstdDict_clear}, - {0} + {0, 0} }; -PyType_Spec zstddict_type_spec = { - .name = "_zstd.ZstdDict", +PyType_Spec zstd_dict_type_spec = { + .name = "compression.zstd.ZstdDict", .basicsize = sizeof(ZstdDict), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE + | Py_TPFLAGS_HAVE_GC, .slots = zstddict_slots, }; diff --git a/Modules/_zstd/zstddict.h b/Modules/_zstd/zstddict.h new file mode 100644 index 00000000000000..4a403416dbd4a3 --- /dev/null +++ b/Modules/_zstd/zstddict.h @@ -0,0 +1,29 @@ +/* Low level interface to the Zstandard algorthm & the zstd library. */ + +#ifndef ZSTD_DICT_H +#define ZSTD_DICT_H + +#include // ZSTD_DDict + +typedef struct { + PyObject_HEAD + + /* Reusable compress/decompress dictionary, they are created once and + can be shared by multiple threads concurrently, since its usage is + read-only. + c_dicts is a dict, int(compressionLevel):PyCapsule(ZSTD_CDict*) */ + ZSTD_DDict *d_dict; + PyObject *c_dicts; + + /* Dictionary content. */ + char *dict_buffer; + Py_ssize_t dict_len; + + /* Dictionary id */ + uint32_t dict_id; + + /* Lock to protect the digested dictionaries */ + PyMutex lock; +} ZstdDict; + +#endif // !ZSTD_DICT_H diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c index 401a3a7072b846..5d07de2fba9526 100644 --- a/Modules/arraymodule.c +++ b/Modules/arraymodule.c @@ -13,6 +13,7 @@ #include "pycore_ceval.h" // _PyEval_GetBuiltin() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include // offsetof() #include @@ -728,9 +729,7 @@ array_dealloc(PyObject *op) PyObject_GC_UnTrack(op); arrayobject *self = arrayobject_CAST(op); - if (self->weakreflist != NULL) { - PyObject_ClearWeakRefs(op); - } + FT_CLEAR_WEAKREFS(op, self->weakreflist); if (self->ob_item != NULL) { PyMem_Free(self->ob_item); } diff --git a/Modules/blake2module.c b/Modules/blake2module.c index f9acc57f1b2fa3..ae37e2d3383f9b 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -43,12 +43,11 @@ // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't // great; but when compiling a universal2 binary, autoconf will set -// HACL_CAN_COMPILE_SIMD128 and HACL_CAN_COMPILE_SIMD256 because they *can* be -// compiled on x86_64. If we're on macOS ARM64, disable these preprocessor -// symbols. +// _Py_HACL_CAN_COMPILE_VEC{128,256} because they *can* be compiled on x86_64. +// If we're on macOS ARM64, we however disable these preprocessor symbols. #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif // ECX @@ -114,31 +113,32 @@ void detect_cpu_features(cpu_flags *flags) { } } -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 static inline bool has_simd128(cpu_flags *flags) { - // For now this is Intel-only, could conceivably be #ifdef'd to something + // For now this is Intel-only, could conceivably be if'd to something // else. return flags->sse && flags->sse2 && flags->sse3 && flags->sse41 && flags->sse42 && flags->cmov; } #endif -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 static inline bool has_simd256(cpu_flags *flags) { return flags->avx && flags->avx2; } #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b.h" #include "_hacl/Hacl_Hash_Blake2s.h" -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b_Simd256.h" #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 #include "_hacl/Hacl_Hash_Blake2s_Simd128.h" #endif @@ -165,7 +165,7 @@ blake2_get_state(PyObject *module) return (Blake2State *)state; } -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) static inline Blake2State* blake2_get_state_from_type(PyTypeObject *module) { @@ -329,18 +329,18 @@ static inline bool is_blake2s(blake2_impl impl) { } static inline blake2_impl type_to_impl(PyTypeObject *type) { -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) Blake2State* st = blake2_get_state_from_type(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (has_simd256(&st->flags)) return Blake2b_256; else #endif return Blake2b; } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (has_simd128(&st->flags)) return Blake2s_128; else @@ -356,10 +356,10 @@ typedef struct { union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; #endif -#ifdef HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; #endif }; @@ -425,14 +425,14 @@ static void update(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { switch (self->impl) { - // These need to be ifdef'd out otherwise it's an unresolved symbol at - // link-time. -#ifdef HACL_CAN_COMPILE_SIMD256 + // blake2b_256_state and blake2s_128_state must be if'd since + // otherwise this results in an unresolved symbol at link-time. +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update,self->blake2b_256_state, buf, len); return; #endif -#ifdef HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update,self->blake2s_128_state, buf, len); return; @@ -468,12 +468,12 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, // Ensure that the states are NULL-initialized in case of an error. // See: py_blake2_clear() for more details. switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: self->blake2b_256_state = NULL; break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: self->blake2s_128_state = NULL; break; @@ -591,7 +591,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, }; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: { self->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_malloc_with_params_and_key(¶ms, last_node, key->buf); if (self->blake2b_256_state == NULL) { @@ -601,7 +601,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, break; } #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: { self->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_malloc_with_params_and_key(¶ms, last_node, key->buf); if (self->blake2s_128_state == NULL) { @@ -655,8 +655,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, /*[clinic input] @classmethod _blake2.blake2b.__new__ as py_blake2b_new - data: object(c_default="NULL") = b'' - / + data as data_obj: object(c_default="NULL") = b'' * digest_size: int(c_default="HACL_HASH_BLAKE2B_OUT_BYTES") = _blake2.blake2b.MAX_DIGEST_SIZE key: Py_buffer(c_default="NULL", py_default="b''") = None @@ -670,26 +669,31 @@ _blake2.blake2b.__new__ as py_blake2b_new inner_size: int = 0 last_node: bool = False usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new BLAKE2b hash object. [clinic start generated code]*/ static PyObject * -py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) -/*[clinic end generated code: output=32bfd8f043c6896f input=8fee2b7b11428b2d]*/ + int inner_size, int last_node, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=de64bd850606b6a0 input=78cf60a2922d2f90]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); } /*[clinic input] @classmethod _blake2.blake2s.__new__ as py_blake2s_new - data: object(c_default="NULL") = b'' - / + data as data_obj: object(c_default="NULL") = b'' * digest_size: int(c_default="HACL_HASH_BLAKE2S_OUT_BYTES") = _blake2.blake2s.MAX_DIGEST_SIZE key: Py_buffer(c_default="NULL", py_default="b''") = None @@ -703,18 +707,24 @@ _blake2.blake2s.__new__ as py_blake2s_new inner_size: int = 0 last_node: bool = False usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new BLAKE2s hash object. [clinic start generated code]*/ static PyObject * -py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity) -/*[clinic end generated code: output=556181f73905c686 input=8165a11980eac7f3]*/ + int inner_size, int last_node, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=582a0c4295cc3a3c input=6843d6332eefd295]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } return py_blake2b_or_s_new(type, data, digest_size, key, salt, person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); } @@ -723,7 +733,7 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) { assert(cpy != NULL); switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: { cpy->blake2b_256_state = Hacl_Hash_Blake2b_Simd256_copy(self->blake2b_256_state); if (cpy->blake2b_256_state == NULL) { @@ -732,7 +742,7 @@ blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) break; } #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: { cpy->blake2s_128_state = Hacl_Hash_Blake2s_Simd128_copy(self->blake2s_128_state); if (cpy->blake2s_128_state == NULL) { @@ -843,12 +853,12 @@ _blake2_blake2b_digest_impl(Blake2Object *self) ENTER_HASHLIB(self); uint8_t digest_length = 0; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); break; @@ -881,12 +891,12 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self) ENTER_HASHLIB(self); uint8_t digest_length = 0; switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: digest_length = Hacl_Hash_Blake2b_Simd256_digest(self->blake2b_256_state, digest); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: digest_length = Hacl_Hash_Blake2s_Simd128_digest(self->blake2s_128_state, digest); break; @@ -937,11 +947,11 @@ py_blake2b_get_digest_size(PyObject *op, void *Py_UNUSED(closure)) { Blake2Object *self = _Blake2Object_CAST(op); switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return PyLong_FromLong(Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state).digest_length); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return PyLong_FromLong(Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state).digest_length); #endif @@ -972,7 +982,7 @@ py_blake2_clear(PyObject *op) // it. If an error occurs in the constructor, we should only free // states that were allocated (i.e. that are not NULL). switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: if (self->blake2b_256_state != NULL) { Hacl_Hash_Blake2b_Simd256_free(self->blake2b_256_state); @@ -980,7 +990,7 @@ py_blake2_clear(PyObject *op) } break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: if (self->blake2s_128_state != NULL) { Hacl_Hash_Blake2s_Simd128_free(self->blake2s_128_state); diff --git a/Modules/clinic/_cursesmodule.c.h b/Modules/clinic/_cursesmodule.c.h index 552360eb80a545..a898a7e17cf8d1 100644 --- a/Modules/clinic/_cursesmodule.c.h +++ b/Modules/clinic/_cursesmodule.c.h @@ -768,7 +768,7 @@ PyDoc_STRVAR(_curses_window_getch__doc__, #define _CURSES_WINDOW_GETCH_METHODDEF \ {"getch", (PyCFunction)_curses_window_getch, METH_VARARGS, _curses_window_getch__doc__}, -static int +static PyObject * _curses_window_getch_impl(PyCursesWindowObject *self, int group_right_1, int y, int x); @@ -779,7 +779,6 @@ _curses_window_getch(PyObject *self, PyObject *args) int group_right_1 = 0; int y = 0; int x = 0; - int _return_value; switch (PyTuple_GET_SIZE(args)) { case 0: @@ -794,11 +793,7 @@ _curses_window_getch(PyObject *self, PyObject *args) PyErr_SetString(PyExc_TypeError, "_curses.window.getch requires 0 to 2 arguments"); goto exit; } - _return_value = _curses_window_getch_impl((PyCursesWindowObject *)self, group_right_1, y, x); - if ((_return_value == -1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromLong((long)_return_value); + return_value = _curses_window_getch_impl((PyCursesWindowObject *)self, group_right_1, y, x); exit: return return_value; @@ -4440,4 +4435,4 @@ _curses_has_extended_color_support(PyObject *module, PyObject *Py_UNUSED(ignored #ifndef _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #define _CURSES_ASSUME_DEFAULT_COLORS_METHODDEF #endif /* !defined(_CURSES_ASSUME_DEFAULT_COLORS_METHODDEF) */ -/*[clinic end generated code: output=42b2923d88c8d0f6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7753612d7613903c input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_hashopenssl.c.h b/Modules/clinic/_hashopenssl.c.h index 59ab46ca3f0978..6d55f4617a8b08 100644 --- a/Modules/clinic/_hashopenssl.c.h +++ b/Modules/clinic/_hashopenssl.c.h @@ -232,8 +232,8 @@ EVPXOF_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje #endif /* defined(PY_OPENSSL_HAS_SHAKE) */ -PyDoc_STRVAR(EVP_new__doc__, -"new($module, /, name, string=b\'\', *, usedforsecurity=True)\n" +PyDoc_STRVAR(_hashlib_new__doc__, +"new($module, /, name, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new hash object using the named algorithm.\n" @@ -243,20 +243,20 @@ PyDoc_STRVAR(EVP_new__doc__, "\n" "The MD5 and SHA1 algorithms are always supported."); -#define EVP_NEW_METHODDEF \ - {"new", _PyCFunction_CAST(EVP_new), METH_FASTCALL|METH_KEYWORDS, EVP_new__doc__}, +#define _HASHLIB_NEW_METHODDEF \ + {"new", _PyCFunction_CAST(_hashlib_new), METH_FASTCALL|METH_KEYWORDS, _hashlib_new__doc__}, static PyObject * -EVP_new_impl(PyObject *module, PyObject *name_obj, PyObject *data_obj, - int usedforsecurity); +_hashlib_new_impl(PyObject *module, const char *name, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * -EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_hashlib_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -265,7 +265,7 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(name), &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(name), &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -274,30 +274,43 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"name", "string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"name", "data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "new", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; - PyObject *name_obj; - PyObject *data_obj = NULL; + const char *name; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!args) { goto exit; } - name_obj = args[0]; + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("new", "argument 'name'", "str", args[0]); + goto exit; + } + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); + if (name == NULL) { + goto exit; + } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!noptargs) { goto skip_optional_pos; } if (args[1]) { - data_obj = args[1]; + data = args[1]; if (!--noptargs) { goto skip_optional_pos; } @@ -306,19 +319,25 @@ EVP_new(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwn if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[2]); - if (usedforsecurity < 0) { - goto exit; + if (args[2]) { + usedforsecurity = PyObject_IsTrue(args[2]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[3]; skip_optional_kwonly: - return_value = EVP_new_impl(module, name_obj, data_obj, usedforsecurity); + return_value = _hashlib_new_impl(module, name, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_md5__doc__, -"openssl_md5($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_md5($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Returns a md5 hash object; optionally initialized with a string"); @@ -327,8 +346,8 @@ PyDoc_STRVAR(_hashlib_openssl_md5__doc__, {"openssl_md5", _PyCFunction_CAST(_hashlib_openssl_md5), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_md5__doc__}, static PyObject * -_hashlib_openssl_md5_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_md5_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -336,7 +355,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -345,7 +364,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -354,17 +373,18 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_md5", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -375,7 +395,7 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -384,19 +404,25 @@ _hashlib_openssl_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_md5_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_md5_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha1__doc__, -"openssl_sha1($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha1($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Returns a sha1 hash object; optionally initialized with a string"); @@ -405,8 +431,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha1__doc__, {"openssl_sha1", _PyCFunction_CAST(_hashlib_openssl_sha1), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha1__doc__}, static PyObject * -_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha1_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -414,7 +440,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -423,7 +449,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -432,17 +458,18 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha1", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -453,7 +480,7 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -462,19 +489,26 @@ _hashlib_openssl_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha1_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha1_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha224__doc__, -"openssl_sha224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha224($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha224 hash object; optionally initialized with a string"); @@ -483,8 +517,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha224__doc__, {"openssl_sha224", _PyCFunction_CAST(_hashlib_openssl_sha224), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha224__doc__}, static PyObject * -_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -492,7 +526,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -501,7 +535,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -510,17 +544,18 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -531,7 +566,7 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -540,19 +575,26 @@ _hashlib_openssl_sha224(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha224_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha224_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha256__doc__, -"openssl_sha256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha256 hash object; optionally initialized with a string"); @@ -561,8 +603,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha256__doc__, {"openssl_sha256", _PyCFunction_CAST(_hashlib_openssl_sha256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha256__doc__}, static PyObject * -_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -570,7 +612,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -579,7 +621,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -588,17 +630,18 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -609,7 +652,7 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -618,19 +661,26 @@ _hashlib_openssl_sha256(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha256_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha384__doc__, -"openssl_sha384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha384($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha384 hash object; optionally initialized with a string"); @@ -639,8 +689,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha384__doc__, {"openssl_sha384", _PyCFunction_CAST(_hashlib_openssl_sha384), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha384__doc__}, static PyObject * -_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -648,7 +698,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -657,7 +707,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -666,17 +716,18 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -687,7 +738,7 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -696,19 +747,26 @@ _hashlib_openssl_sha384(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha384_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha384_impl(module, data, usedforsecurity, string); exit: return return_value; } PyDoc_STRVAR(_hashlib_openssl_sha512__doc__, -"openssl_sha512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha512($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha512 hash object; optionally initialized with a string"); @@ -717,8 +775,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha512__doc__, {"openssl_sha512", _PyCFunction_CAST(_hashlib_openssl_sha512), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha512__doc__}, static PyObject * -_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -726,7 +784,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -735,7 +793,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -744,17 +802,18 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -765,7 +824,7 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -774,12 +833,18 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha512_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha512_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -788,7 +853,8 @@ _hashlib_openssl_sha512(PyObject *module, PyObject *const *args, Py_ssize_t narg #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_224__doc__, -"openssl_sha3_224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_224($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-224 hash object; optionally initialized with a string"); @@ -797,8 +863,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_224__doc__, {"openssl_sha3_224", _PyCFunction_CAST(_hashlib_openssl_sha3_224), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_224__doc__}, static PyObject * -_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_224_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -806,7 +872,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -815,7 +881,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -824,17 +890,18 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -845,7 +912,7 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -854,12 +921,18 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_224_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_224_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -870,7 +943,8 @@ _hashlib_openssl_sha3_224(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_256__doc__, -"openssl_sha3_256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-256 hash object; optionally initialized with a string"); @@ -879,8 +953,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_256__doc__, {"openssl_sha3_256", _PyCFunction_CAST(_hashlib_openssl_sha3_256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_256__doc__}, static PyObject * -_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -888,7 +962,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -897,7 +971,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -906,17 +980,18 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -927,7 +1002,7 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -936,12 +1011,18 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_256_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -952,7 +1033,8 @@ _hashlib_openssl_sha3_256(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_384__doc__, -"openssl_sha3_384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_384($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-384 hash object; optionally initialized with a string"); @@ -961,8 +1043,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_384__doc__, {"openssl_sha3_384", _PyCFunction_CAST(_hashlib_openssl_sha3_384), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_384__doc__}, static PyObject * -_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_384_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -970,7 +1052,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -979,7 +1061,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -988,17 +1070,18 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1009,7 +1092,7 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1018,12 +1101,18 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_384_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_384_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1034,7 +1123,8 @@ _hashlib_openssl_sha3_384(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHA3) PyDoc_STRVAR(_hashlib_openssl_sha3_512__doc__, -"openssl_sha3_512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_sha3_512($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a sha3-512 hash object; optionally initialized with a string"); @@ -1043,8 +1133,8 @@ PyDoc_STRVAR(_hashlib_openssl_sha3_512__doc__, {"openssl_sha3_512", _PyCFunction_CAST(_hashlib_openssl_sha3_512), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_sha3_512__doc__}, static PyObject * -_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_sha3_512_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1052,7 +1142,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1061,7 +1151,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1070,17 +1160,18 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_sha3_512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1091,7 +1182,7 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1100,12 +1191,18 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_sha3_512_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_sha3_512_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1116,7 +1213,8 @@ _hashlib_openssl_sha3_512(PyObject *module, PyObject *const *args, Py_ssize_t na #if defined(PY_OPENSSL_HAS_SHAKE) PyDoc_STRVAR(_hashlib_openssl_shake_128__doc__, -"openssl_shake_128($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_shake_128($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a shake-128 variable hash object; optionally initialized with a string"); @@ -1125,8 +1223,8 @@ PyDoc_STRVAR(_hashlib_openssl_shake_128__doc__, {"openssl_shake_128", _PyCFunction_CAST(_hashlib_openssl_shake_128), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_shake_128__doc__}, static PyObject * -_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_shake_128_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1134,7 +1232,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1143,7 +1241,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1152,17 +1250,18 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_shake_128", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1173,7 +1272,7 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1182,12 +1281,18 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_shake_128_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_shake_128_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1198,7 +1303,8 @@ _hashlib_openssl_shake_128(PyObject *module, PyObject *const *args, Py_ssize_t n #if defined(PY_OPENSSL_HAS_SHAKE) PyDoc_STRVAR(_hashlib_openssl_shake_256__doc__, -"openssl_shake_256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"openssl_shake_256($module, /, data=b\'\', *, usedforsecurity=True,\n" +" string=None)\n" "--\n" "\n" "Returns a shake-256 variable hash object; optionally initialized with a string"); @@ -1207,8 +1313,8 @@ PyDoc_STRVAR(_hashlib_openssl_shake_256__doc__, {"openssl_shake_256", _PyCFunction_CAST(_hashlib_openssl_shake_256), METH_FASTCALL|METH_KEYWORDS, _hashlib_openssl_shake_256__doc__}, static PyObject * -_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data_obj, - int usedforsecurity); +_hashlib_openssl_shake_256_impl(PyObject *module, PyObject *data, + int usedforsecurity, PyObject *string); static PyObject * _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -1216,7 +1322,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -1225,7 +1331,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -1234,17 +1340,18 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "openssl_shake_256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *data_obj = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -1255,7 +1362,7 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n goto skip_optional_pos; } if (args[0]) { - data_obj = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -1264,12 +1371,18 @@ _hashlib_openssl_shake_256(PyObject *module, PyObject *const *args, Py_ssize_t n if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = args[2]; skip_optional_kwonly: - return_value = _hashlib_openssl_shake_256_impl(module, data_obj, usedforsecurity); + return_value = _hashlib_openssl_shake_256_impl(module, data, usedforsecurity, string); exit: return return_value; @@ -1871,4 +1984,4 @@ _hashlib_compare_digest(PyObject *module, PyObject *const *args, Py_ssize_t narg #ifndef _HASHLIB_SCRYPT_METHODDEF #define _HASHLIB_SCRYPT_METHODDEF #endif /* !defined(_HASHLIB_SCRYPT_METHODDEF) */ -/*[clinic end generated code: output=2c78822e38be64a8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a863ec4166ed2fbb input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_heapqmodule.c.h b/Modules/clinic/_heapqmodule.c.h index 81d108627265ab..b43155b6c24e3c 100644 --- a/Modules/clinic/_heapqmodule.c.h +++ b/Modules/clinic/_heapqmodule.c.h @@ -2,6 +2,7 @@ preserve [clinic start generated code]*/ +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_heapq_heappush__doc__, @@ -32,7 +33,9 @@ _heapq_heappush(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappush_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -61,7 +64,9 @@ _heapq_heappop(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappop_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -103,7 +108,9 @@ _heapq_heapreplace(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapreplace_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -140,7 +147,9 @@ _heapq_heappushpop(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappushpop_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -169,7 +178,9 @@ _heapq_heapify(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapify_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -203,7 +214,9 @@ _heapq_heappush_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs) } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappush_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -232,7 +245,9 @@ _heapq_heappop_max(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappop_max_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -266,7 +281,9 @@ _heapq_heapreplace_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapreplace_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -295,7 +312,9 @@ _heapq_heapify_max(PyObject *module, PyObject *arg) goto exit; } heap = arg; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heapify_max_impl(module, heap); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -332,9 +351,11 @@ _heapq_heappushpop_max(PyObject *module, PyObject *const *args, Py_ssize_t nargs } heap = args[0]; item = args[1]; + Py_BEGIN_CRITICAL_SECTION(heap); return_value = _heapq_heappushpop_max_impl(module, heap, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; } -/*[clinic end generated code: output=f55d8595ce150c76 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=e83d50002c29a96d input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_lsprof.c.h b/Modules/clinic/_lsprof.c.h index 2918a6bc7abe74..c426cd6fe02f39 100644 --- a/Modules/clinic/_lsprof.c.h +++ b/Modules/clinic/_lsprof.c.h @@ -82,6 +82,39 @@ _lsprof_Profiler__pystart_callback(PyObject *self, PyObject *const *args, Py_ssi return return_value; } +PyDoc_STRVAR(_lsprof_Profiler__pythrow_callback__doc__, +"_pythrow_callback($self, code, instruction_offset, exception, /)\n" +"--\n" +"\n"); + +#define _LSPROF_PROFILER__PYTHROW_CALLBACK_METHODDEF \ + {"_pythrow_callback", _PyCFunction_CAST(_lsprof_Profiler__pythrow_callback), METH_FASTCALL, _lsprof_Profiler__pythrow_callback__doc__}, + +static PyObject * +_lsprof_Profiler__pythrow_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *exception); + +static PyObject * +_lsprof_Profiler__pythrow_callback(PyObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *code; + PyObject *instruction_offset; + PyObject *exception; + + if (!_PyArg_CheckPositional("_pythrow_callback", nargs, 3, 3)) { + goto exit; + } + code = args[0]; + instruction_offset = args[1]; + exception = args[2]; + return_value = _lsprof_Profiler__pythrow_callback_impl((ProfilerObject *)self, code, instruction_offset, exception); + +exit: + return return_value; +} + PyDoc_STRVAR(_lsprof_Profiler__pyreturn_callback__doc__, "_pyreturn_callback($self, code, instruction_offset, retval, /)\n" "--\n" @@ -411,4 +444,4 @@ profiler_init(PyObject *self, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=fe231309776df7a7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9e46985561166c37 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_randommodule.c.h b/Modules/clinic/_randommodule.c.h index 1e989e970c9de5..2563a16aea0b6f 100644 --- a/Modules/clinic/_randommodule.c.h +++ b/Modules/clinic/_randommodule.c.h @@ -3,6 +3,7 @@ preserve [clinic start generated code]*/ #include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_long.h" // _PyLong_UInt64_Converter() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_random_Random_random__doc__, @@ -124,16 +125,15 @@ PyDoc_STRVAR(_random_Random_getrandbits__doc__, {"getrandbits", (PyCFunction)_random_Random_getrandbits, METH_O, _random_Random_getrandbits__doc__}, static PyObject * -_random_Random_getrandbits_impl(RandomObject *self, int k); +_random_Random_getrandbits_impl(RandomObject *self, uint64_t k); static PyObject * _random_Random_getrandbits(PyObject *self, PyObject *arg) { PyObject *return_value = NULL; - int k; + uint64_t k; - k = PyLong_AsInt(arg); - if (k == -1 && PyErr_Occurred()) { + if (!_PyLong_UInt64_Converter(arg, &k)) { goto exit; } Py_BEGIN_CRITICAL_SECTION(self); @@ -143,4 +143,4 @@ _random_Random_getrandbits(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=4458b5a69201ebea input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7ce97b2194eecaf7 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h new file mode 100644 index 00000000000000..f6a51cdba6b401 --- /dev/null +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -0,0 +1,291 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, +"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" +" debug=False)\n" +"--\n" +"\n" +"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" +"\n" +"Args:\n" +" pid: Process ID of the target Python process to debug\n" +" all_threads: If True, initialize state for all threads in the process.\n" +" If False, only initialize for the main thread.\n" +" only_active_thread: If True, only sample the thread holding the GIL.\n" +" Cannot be used together with all_threads=True.\n" +" debug: If True, chain exceptions to explain the sequence of events that\n" +" lead to the exception.\n" +"\n" +"The RemoteUnwinder provides functionality to inspect and debug a running Python\n" +"process, including examining thread states, stack frames and other runtime data.\n" +"\n" +"Raises:\n" +" PermissionError: If access to the target process is denied\n" +" OSError: If unable to attach to the target process or access its memory\n" +" RuntimeError: If unable to read debug information from the target process\n" +" ValueError: If both all_threads and only_active_thread are True"); + +static int +_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, + int pid, int all_threads, + int only_active_thread, + int debug); + +static int +_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(debug), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "debug", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "RemoteUnwinder", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; + int pid; + int all_threads = 0; + int only_active_thread = 0; + int debug = 0; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + pid = PyLong_AsInt(fastargs[0]); + if (pid == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (fastargs[1]) { + all_threads = PyObject_IsTrue(fastargs[1]); + if (all_threads < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[2]) { + only_active_thread = PyObject_IsTrue(fastargs[2]); + if (only_active_thread < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + debug = PyObject_IsTrue(fastargs[3]); + if (debug < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, debug); + +exit: + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_stack_trace__doc__, +"get_stack_trace($self, /)\n" +"--\n" +"\n" +"Returns a list of stack traces for threads in the target process.\n" +"\n" +"Each element in the returned list is a tuple of (thread_id, frame_list), where:\n" +"- thread_id is the OS thread identifier\n" +"- frame_list is a list of tuples (function_name, filename, line_number) representing\n" +" the Python stack frames for that thread, ordered from most recent to oldest\n" +"\n" +"The threads returned depend on the initialization parameters:\n" +"- If only_active_thread was True: returns only the thread holding the GIL\n" +"- If all_threads was True: returns all threads\n" +"- Otherwise: returns only the main thread\n" +"\n" +"Example:\n" +" [\n" +" (1234, [\n" +" (\'process_data\', \'worker.py\', 127),\n" +" (\'run_worker\', \'worker.py\', 45),\n" +" (\'main\', \'app.py\', 23)\n" +" ]),\n" +" (1235, [\n" +" (\'handle_request\', \'server.py\', 89),\n" +" (\'serve_forever\', \'server.py\', 52)\n" +" ])\n" +" ]\n" +"\n" +"Raises:\n" +" RuntimeError: If there is an error copying memory from the target process\n" +" OSError: If there is an error accessing the target process\n" +" PermissionError: If access to the target process is denied\n" +" UnicodeDecodeError: If there is an error decoding strings from the target process"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF \ + {"get_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_stack_trace__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_stack_trace_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__, +"get_all_awaited_by($self, /)\n" +"--\n" +"\n" +"Get all tasks and their awaited_by relationships from the remote process.\n" +"\n" +"This provides a tree structure showing which tasks are waiting for other tasks.\n" +"\n" +"For each task, returns:\n" +"1. The call stack frames leading to where the task is currently executing\n" +"2. The name of the task\n" +"3. A list of tasks that this task is waiting for, with their own frames/names/etc\n" +"\n" +"Returns a list of [frames, task_name, subtasks] where:\n" +"- frames: List of (func_name, filename, lineno) showing the call stack\n" +"- task_name: String identifier for the task\n" +"- subtasks: List of tasks being awaited by this task, in same format\n" +"\n" +"Raises:\n" +" RuntimeError: If AsyncioDebug section is not available in the remote process\n" +" MemoryError: If memory allocation fails\n" +" OSError: If reading from the remote process fails\n" +"\n" +"Example output:\n" +"[\n" +" [\n" +" [(\"c5\", \"script.py\", 10), (\"c4\", \"script.py\", 14)],\n" +" \"c2_root\",\n" +" [\n" +" [\n" +" [(\"c1\", \"script.py\", 23)],\n" +" \"sub_main_2\",\n" +" [...]\n" +" ],\n" +" [...]\n" +" ]\n" +" ]\n" +"]"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF \ + {"get_all_awaited_by", (PyCFunction)_remote_debugging_RemoteUnwinder_get_all_awaited_by, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__, +"get_async_stack_trace($self, /)\n" +"--\n" +"\n" +"Get the currently running async tasks and their dependency graphs from the remote process.\n" +"\n" +"This returns information about running tasks and all tasks that are waiting for them,\n" +"forming a complete dependency graph for each thread\'s active task.\n" +"\n" +"For each thread with a running task, returns the running task plus all tasks that\n" +"transitively depend on it (tasks waiting for the running task, tasks waiting for\n" +"those tasks, etc.).\n" +"\n" +"Returns a list of per-thread results, where each thread result contains:\n" +"- Thread ID\n" +"- List of task information for the running task and all its waiters\n" +"\n" +"Each task info contains:\n" +"- Task ID (memory address)\n" +"- Task name\n" +"- Call stack frames: List of (func_name, filename, lineno)\n" +"- List of tasks waiting for this task (recursive structure)\n" +"\n" +"Raises:\n" +" RuntimeError: If AsyncioDebug section is not available in the target process\n" +" MemoryError: If memory allocation fails\n" +" OSError: If reading from the remote process fails\n" +"\n" +"Example output (similar structure to get_all_awaited_by but only for running tasks):\n" +"[\n" +" (140234, [\n" +" (4345585712, \'main_task\',\n" +" [(\"run_server\", \"server.py\", 127), (\"main\", \"app.py\", 23)],\n" +" [\n" +" (4345585800, \'worker_1\', [...], [...]),\n" +" (4345585900, \'worker_2\', [...], [...])\n" +" ])\n" +" ])\n" +"]"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF \ + {"get_async_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_async_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} +/*[clinic end generated code: output=0dd1e6e8bab2a8b1 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/blake2module.c.h b/Modules/clinic/blake2module.c.h index bb2e308574a50a..9e9cd56e569b24 100644 --- a/Modules/clinic/blake2module.c.h +++ b/Modules/clinic/blake2module.c.h @@ -10,20 +10,21 @@ preserve #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(py_blake2b_new__doc__, -"blake2b(data=b\'\', /, *, digest_size=_blake2.blake2b.MAX_DIGEST_SIZE,\n" +"blake2b(data=b\'\', *, digest_size=_blake2.blake2b.MAX_DIGEST_SIZE,\n" " key=b\'\', salt=b\'\', person=b\'\', fanout=1, depth=1, leaf_size=0,\n" " node_offset=0, node_depth=0, inner_size=0, last_node=False,\n" -" usedforsecurity=True)\n" +" usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new BLAKE2b hash object."); static PyObject * -py_blake2b_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2b_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity); + int inner_size, int last_node, int usedforsecurity, + PyObject *string); static PyObject * py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -31,7 +32,7 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 12 + #define NUM_KEYWORDS 14 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -40,7 +41,7 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -49,18 +50,18 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "blake2b", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[13]; + PyObject *argsbuf[14]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int digest_size = HACL_HASH_BLAKE2B_OUT_BYTES; Py_buffer key = {NULL, NULL}; Py_buffer salt = {NULL, NULL}; @@ -73,18 +74,23 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) int inner_size = 0; int last_node = 0; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } @@ -182,12 +188,18 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto skip_optional_kwonly; } } - usedforsecurity = PyObject_IsTrue(fastargs[12]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[12]) { + usedforsecurity = PyObject_IsTrue(fastargs[12]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[13]; skip_optional_kwonly: - return_value = py_blake2b_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + return_value = py_blake2b_new_impl(type, data_obj, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity, string); exit: /* Cleanup for key */ @@ -207,20 +219,21 @@ py_blake2b_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } PyDoc_STRVAR(py_blake2s_new__doc__, -"blake2s(data=b\'\', /, *, digest_size=_blake2.blake2s.MAX_DIGEST_SIZE,\n" +"blake2s(data=b\'\', *, digest_size=_blake2.blake2s.MAX_DIGEST_SIZE,\n" " key=b\'\', salt=b\'\', person=b\'\', fanout=1, depth=1, leaf_size=0,\n" " node_offset=0, node_depth=0, inner_size=0, last_node=False,\n" -" usedforsecurity=True)\n" +" usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new BLAKE2s hash object."); static PyObject * -py_blake2s_new_impl(PyTypeObject *type, PyObject *data, int digest_size, +py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, Py_buffer *key, Py_buffer *salt, Py_buffer *person, int fanout, int depth, unsigned long leaf_size, unsigned long long node_offset, int node_depth, - int inner_size, int last_node, int usedforsecurity); + int inner_size, int last_node, int usedforsecurity, + PyObject *string); static PyObject * py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -228,7 +241,7 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 12 + #define NUM_KEYWORDS 14 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -237,7 +250,7 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(digest_size), &_Py_ID(key), &_Py_ID(salt), &_Py_ID(person), &_Py_ID(fanout), &_Py_ID(depth), &_Py_ID(leaf_size), &_Py_ID(node_offset), &_Py_ID(node_depth), &_Py_ID(inner_size), &_Py_ID(last_node), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -246,18 +259,18 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "digest_size", "key", "salt", "person", "fanout", "depth", "leaf_size", "node_offset", "node_depth", "inner_size", "last_node", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "blake2s", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[13]; + PyObject *argsbuf[14]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int digest_size = HACL_HASH_BLAKE2S_OUT_BYTES; Py_buffer key = {NULL, NULL}; Py_buffer salt = {NULL, NULL}; @@ -270,18 +283,23 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) int inner_size = 0; int last_node = 0; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } @@ -379,12 +397,18 @@ py_blake2s_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto skip_optional_kwonly; } } - usedforsecurity = PyObject_IsTrue(fastargs[12]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[12]) { + usedforsecurity = PyObject_IsTrue(fastargs[12]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[13]; skip_optional_kwonly: - return_value = py_blake2s_new_impl(type, data, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity); + return_value = py_blake2s_new_impl(type, data_obj, digest_size, &key, &salt, &person, fanout, depth, leaf_size, node_offset, node_depth, inner_size, last_node, usedforsecurity, string); exit: /* Cleanup for key */ @@ -478,4 +502,4 @@ _blake2_blake2b_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_hexdigest_impl((Blake2Object *)self); } -/*[clinic end generated code: output=d30e8293bd8e2950 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=eed18dcfaf6f7731 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index 4c2c8acd8f69d8..9df73b187bb827 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -108,9 +108,7 @@ PyDoc_STRVAR(math_factorial__doc__, "factorial($module, n, /)\n" "--\n" "\n" -"Find n!.\n" -"\n" -"Raise a ValueError if x is negative or non-integral."); +"Find n!."); #define MATH_FACTORIAL_METHODDEF \ {"factorial", (PyCFunction)math_factorial, METH_O, math_factorial__doc__}, @@ -1112,4 +1110,4 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=634773bd18cd3f93 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=77e7b8c161c39843 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/md5module.c.h b/Modules/clinic/md5module.c.h index 9ca4f6528ce8f5..f76902586dddb2 100644 --- a/Modules/clinic/md5module.c.h +++ b/Modules/clinic/md5module.c.h @@ -89,7 +89,7 @@ MD5Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_md5_md5__doc__, -"md5($module, /, string=b\'\', *, usedforsecurity=True)\n" +"md5($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new MD5 hash object; optionally initialized with a string."); @@ -98,7 +98,8 @@ PyDoc_STRVAR(_md5_md5__doc__, {"md5", _PyCFunction_CAST(_md5_md5), METH_FASTCALL|METH_KEYWORDS, _md5_md5__doc__}, static PyObject * -_md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity); +_md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -106,7 +107,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -115,7 +116,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -124,17 +125,18 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "md5", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -145,7 +147,7 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -154,14 +156,20 @@ _md5_md5(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _md5_md5_impl(module, string, usedforsecurity); + return_value = _md5_md5_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=73f4d2034d9fcc63 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=920fe54b9ed06f92 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha1module.c.h b/Modules/clinic/sha1module.c.h index 3e5fd1a41ce21f..4a58d0cd9b82a4 100644 --- a/Modules/clinic/sha1module.c.h +++ b/Modules/clinic/sha1module.c.h @@ -89,7 +89,7 @@ SHA1Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_sha1_sha1__doc__, -"sha1($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha1($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA1 hash object; optionally initialized with a string."); @@ -98,7 +98,8 @@ PyDoc_STRVAR(_sha1_sha1__doc__, {"sha1", _PyCFunction_CAST(_sha1_sha1), METH_FASTCALL|METH_KEYWORDS, _sha1_sha1__doc__}, static PyObject * -_sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -106,7 +107,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -115,7 +116,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -124,17 +125,18 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha1", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -145,7 +147,7 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -154,14 +156,20 @@ _sha1_sha1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject * if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha1_sha1_impl(module, string, usedforsecurity); + return_value = _sha1_sha1_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=06161e87e2d645d4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=fd5a917404b68c4f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha2module.c.h b/Modules/clinic/sha2module.c.h index 26612125e75df9..07be91e4f6c68f 100644 --- a/Modules/clinic/sha2module.c.h +++ b/Modules/clinic/sha2module.c.h @@ -169,7 +169,7 @@ SHA512Type_update(PyObject *self, PyObject *obj) } PyDoc_STRVAR(_sha2_sha256__doc__, -"sha256($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha256($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-256 hash object; optionally initialized with a string."); @@ -178,7 +178,8 @@ PyDoc_STRVAR(_sha2_sha256__doc__, {"sha256", _PyCFunction_CAST(_sha2_sha256), METH_FASTCALL|METH_KEYWORDS, _sha2_sha256__doc__}, static PyObject * -_sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -186,7 +187,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -195,7 +196,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -204,17 +205,18 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha256", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -225,7 +227,7 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -234,19 +236,25 @@ _sha2_sha256(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha256_impl(module, string, usedforsecurity); + return_value = _sha2_sha256_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha224__doc__, -"sha224($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha224($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-224 hash object; optionally initialized with a string."); @@ -255,7 +263,8 @@ PyDoc_STRVAR(_sha2_sha224__doc__, {"sha224", _PyCFunction_CAST(_sha2_sha224), METH_FASTCALL|METH_KEYWORDS, _sha2_sha224__doc__}, static PyObject * -_sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -263,7 +272,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -272,7 +281,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -281,17 +290,18 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -302,7 +312,7 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -311,19 +321,25 @@ _sha2_sha224(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha224_impl(module, string, usedforsecurity); + return_value = _sha2_sha224_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha512__doc__, -"sha512($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha512($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-512 hash object; optionally initialized with a string."); @@ -332,7 +348,8 @@ PyDoc_STRVAR(_sha2_sha512__doc__, {"sha512", _PyCFunction_CAST(_sha2_sha512), METH_FASTCALL|METH_KEYWORDS, _sha2_sha512__doc__}, static PyObject * -_sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -340,7 +357,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -349,7 +366,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -358,17 +375,18 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha512", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -379,7 +397,7 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -388,19 +406,25 @@ _sha2_sha512(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha512_impl(module, string, usedforsecurity); + return_value = _sha2_sha512_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } PyDoc_STRVAR(_sha2_sha384__doc__, -"sha384($module, /, string=b\'\', *, usedforsecurity=True)\n" +"sha384($module, /, data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA-384 hash object; optionally initialized with a string."); @@ -409,7 +433,8 @@ PyDoc_STRVAR(_sha2_sha384__doc__, {"sha384", _PyCFunction_CAST(_sha2_sha384), METH_FASTCALL|METH_KEYWORDS, _sha2_sha384__doc__}, static PyObject * -_sha2_sha384_impl(PyObject *module, PyObject *string, int usedforsecurity); +_sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj); static PyObject * _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -417,7 +442,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -426,7 +451,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(string), &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -435,17 +460,18 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"string", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha384", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; - PyObject *string = NULL; + PyObject *data = NULL; int usedforsecurity = 1; + PyObject *string_obj = NULL; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -456,7 +482,7 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject goto skip_optional_pos; } if (args[0]) { - string = args[0]; + data = args[0]; if (!--noptargs) { goto skip_optional_pos; } @@ -465,14 +491,20 @@ _sha2_sha384(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(args[1]); - if (usedforsecurity < 0) { - goto exit; + if (args[1]) { + usedforsecurity = PyObject_IsTrue(args[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string_obj = args[2]; skip_optional_kwonly: - return_value = _sha2_sha384_impl(module, string, usedforsecurity); + return_value = _sha2_sha384_impl(module, data, usedforsecurity, string_obj); exit: return return_value; } -/*[clinic end generated code: output=af11090855b7c85a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=90625b237c774a9f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha3module.c.h b/Modules/clinic/sha3module.c.h index 25f72b74f801db..121be2c0758695 100644 --- a/Modules/clinic/sha3module.c.h +++ b/Modules/clinic/sha3module.c.h @@ -10,13 +10,14 @@ preserve #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(py_sha3_new__doc__, -"sha3_224(data=b\'\', /, *, usedforsecurity=True)\n" +"sha3_224(data=b\'\', *, usedforsecurity=True, string=None)\n" "--\n" "\n" "Return a new SHA3 hash object."); static PyObject * -py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity); +py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, + PyObject *string); static PyObject * py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) @@ -24,7 +25,7 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 1 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -33,7 +34,7 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(usedforsecurity), }, + .ob_item = { &_Py_ID(data), &_Py_ID(usedforsecurity), &_Py_ID(string), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -42,40 +43,51 @@ py_sha3_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "usedforsecurity", NULL}; + static const char * const _keywords[] = {"data", "usedforsecurity", "string", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "sha3_224", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; - PyObject *data = NULL; + PyObject *data_obj = NULL; int usedforsecurity = 1; + PyObject *string = NULL; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 0, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); if (!fastargs) { goto exit; } - if (nargs < 1) { - goto skip_optional_posonly; + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + data_obj = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } } - noptargs--; - data = fastargs[0]; -skip_optional_posonly: +skip_optional_pos: if (!noptargs) { goto skip_optional_kwonly; } - usedforsecurity = PyObject_IsTrue(fastargs[1]); - if (usedforsecurity < 0) { - goto exit; + if (fastargs[1]) { + usedforsecurity = PyObject_IsTrue(fastargs[1]); + if (usedforsecurity < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } } + string = fastargs[2]; skip_optional_kwonly: - return_value = py_sha3_new_impl(type, data, usedforsecurity); + return_value = py_sha3_new_impl(type, data_obj, usedforsecurity, string); exit: return return_value; @@ -158,24 +170,57 @@ _sha3_sha3_224_update(PyObject *self, PyObject *data) } PyDoc_STRVAR(_sha3_shake_128_digest__doc__, -"digest($self, length, /)\n" +"digest($self, /, length)\n" "--\n" "\n" "Return the digest value as a bytes object."); #define _SHA3_SHAKE_128_DIGEST_METHODDEF \ - {"digest", (PyCFunction)_sha3_shake_128_digest, METH_O, _sha3_shake_128_digest__doc__}, + {"digest", _PyCFunction_CAST(_sha3_shake_128_digest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_digest__doc__}, static PyObject * _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length); static PyObject * -_sha3_shake_128_digest(PyObject *self, PyObject *arg) +_sha3_shake_128_digest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "digest", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; unsigned long length; - if (!_PyLong_UnsignedLong_Converter(arg, &length)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { goto exit; } return_value = _sha3_shake_128_digest_impl((SHA3object *)self, length); @@ -185,24 +230,57 @@ _sha3_shake_128_digest(PyObject *self, PyObject *arg) } PyDoc_STRVAR(_sha3_shake_128_hexdigest__doc__, -"hexdigest($self, length, /)\n" +"hexdigest($self, /, length)\n" "--\n" "\n" "Return the digest value as a string of hexadecimal digits."); #define _SHA3_SHAKE_128_HEXDIGEST_METHODDEF \ - {"hexdigest", (PyCFunction)_sha3_shake_128_hexdigest, METH_O, _sha3_shake_128_hexdigest__doc__}, + {"hexdigest", _PyCFunction_CAST(_sha3_shake_128_hexdigest), METH_FASTCALL|METH_KEYWORDS, _sha3_shake_128_hexdigest__doc__}, static PyObject * _sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length); static PyObject * -_sha3_shake_128_hexdigest(PyObject *self, PyObject *arg) +_sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(length), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"length", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "hexdigest", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; unsigned long length; - if (!_PyLong_UnsignedLong_Converter(arg, &length)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_UnsignedLong_Converter(args[0], &length)) { goto exit; } return_value = _sha3_shake_128_hexdigest_impl((SHA3object *)self, length); @@ -210,4 +288,4 @@ _sha3_shake_128_hexdigest(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=5b3ac1c06c6899ea input=a9049054013a1b77]*/ +/*[clinic end generated code: output=65e437799472b89f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/socketmodule.c.h b/Modules/clinic/socketmodule.c.h index 70ebbaa876b32b..573903be87e6ea 100644 --- a/Modules/clinic/socketmodule.c.h +++ b/Modules/clinic/socketmodule.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_long.h" // _PyLong_UInt16_Converter() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() PyDoc_STRVAR(_socket_socket_close__doc__, @@ -369,4 +370,4 @@ _socket_if_indextoname(PyObject *module, PyObject *arg) #ifndef _SOCKET_IF_INDEXTONAME_METHODDEF #define _SOCKET_IF_INDEXTONAME_METHODDEF #endif /* !defined(_SOCKET_IF_INDEXTONAME_METHODDEF) */ -/*[clinic end generated code: output=c971b79d2193b426 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=07776dd21d1e3b56 input=a9049054013a1b77]*/ diff --git a/Modules/getpath.py b/Modules/getpath.py index be2210345afbda..b89d7427e3febd 100644 --- a/Modules/getpath.py +++ b/Modules/getpath.py @@ -364,10 +364,9 @@ def search_up(prefix, *landmarks, test=isfile): venv_prefix = None pyvenvcfg = [] - # Search for the 'home' key in pyvenv.cfg. Currently, we don't consider the - # presence of a pyvenv.cfg file without a 'home' key to signify the - # existence of a virtual environment — we quietly ignore them. - # XXX: If we don't find a 'home' key, we don't look for another pyvenv.cfg! + # Search for the 'home' key in pyvenv.cfg. If a home key isn't found, + # then it means a venv is active and home is based on the venv's + # executable (if its a symlink, home is where the symlink points). for line in pyvenvcfg: key, had_equ, value = line.partition('=') if had_equ and key.strip().lower() == 'home': @@ -412,10 +411,8 @@ def search_up(prefix, *landmarks, test=isfile): if isfile(candidate): base_executable = candidate break + # home key found; stop iterating over lines break - else: - # We didn't find a 'home' key in pyvenv.cfg (no break), reset venv_prefix. - venv_prefix = None # ****************************************************************************** diff --git a/Modules/hashlib.h b/Modules/hashlib.h index 7105e68af7b806..a80b195a765792 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -76,3 +76,32 @@ * to allow the user to optimize based on the platform they're using. */ #define HASHLIB_GIL_MINSIZE 2048 +static inline int +_Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) +{ + if (data != NULL && string == NULL) { + // called as H(data) or H(data=...) + *res = data; + return 1; + } + else if (data == NULL && string != NULL) { + // called as H(string=...) + *res = string; + return 1; + } + else if (data == NULL && string == NULL) { + // fast path when no data is given + assert(!PyErr_Occurred()); + *res = NULL; + return 0; + } + else { + // called as H(data=..., string) + *res = NULL; + PyErr_SetString(PyExc_TypeError, + "'data' and 'string' are mutually exclusive " + "and support for 'string' keyword parameter " + "is slated for removal in a future version."); + return -1; + } +} diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index c7b49d4dee3d0a..8cd470f4f80b3a 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -31,14 +31,15 @@ #endif #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_HMAC.h" #include "_hacl/Hacl_Streaming_HMAC.h" // Hacl_Agile_Hash_* identifiers @@ -234,24 +235,24 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, uint32_t, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, uint32_t, (C statements)) */ #ifndef NDEBUG #define Py_HMAC_HACL_UPDATE_ONCE( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ Py_CHECK_HACL_UINT32_T_LENGTH(LEN); \ hacl_errno_t code = Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN); \ - if (_hacl_convert_errno(code, (ALGORITHM)) < 0) { \ + if (_hacl_convert_errno(code) < 0) { \ ERRACTION; \ } \ } while (0) #else #define Py_HMAC_HACL_UPDATE_ONCE( \ HACL_STATE, BUF, LEN, \ - _ALGORITHM, _ERRACTION \ + _ERRACTION \ ) \ do { \ (void)Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, (LEN)); \ @@ -274,17 +275,17 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) */ #ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 #define Py_HMAC_HACL_UPDATE_LOOP( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ while ((Py_ssize_t)LEN > UINT32_MAX_AS_SSIZE_T) { \ Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, UINT32_MAX, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ BUF += UINT32_MAX; \ LEN -= UINT32_MAX; \ } \ @@ -292,7 +293,7 @@ typedef struct py_hmac_hacl_api { #else #define Py_HMAC_HACL_UPDATE_LOOP( \ HACL_STATE, BUF, LEN, \ - _ALGORITHM, _ERRACTION \ + _ERRACTION \ ) #endif @@ -301,17 +302,17 @@ typedef struct py_hmac_hacl_api { * * The formal signature of this macro is: * - * (HACL_HMAC_state *, uint8_t *, C integer, PyObject *, (C statements)) + * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) */ #define Py_HMAC_HACL_UPDATE( \ HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION \ + ERRACTION \ ) \ do { \ Py_HMAC_HACL_UPDATE_LOOP(HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, LEN, \ - ALGORITHM, ERRACTION); \ + ERRACTION); \ } while (0) /* @@ -464,7 +465,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) { switch (kind) { case Py_hmac_kind_hmac_blake2s_32: { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (state->can_run_simd128) { return Py_hmac_kind_hmac_vectorized_blake2s_32; } @@ -472,7 +473,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) return kind; } case Py_hmac_kind_hmac_blake2b_32: { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (state->can_run_simd256) { return Py_hmac_kind_hmac_vectorized_blake2b_32; } @@ -491,38 +492,40 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) * Otherwise, this sets an appropriate exception and returns -1. */ static int -_hacl_convert_errno(hacl_errno_t code, PyObject *algorithm) +_hacl_convert_errno(hacl_errno_t code) { + assert(PyGILState_GetThisThreadState() != NULL); + if (code == Hacl_Streaming_Types_Success) { + return 0; + } + + PyGILState_STATE gstate = PyGILState_Ensure(); switch (code) { - case Hacl_Streaming_Types_Success: { - return 0; - } case Hacl_Streaming_Types_InvalidAlgorithm: { - // only makes sense if an algorithm is known at call time - assert(algorithm != NULL); - assert(PyUnicode_CheckExact(algorithm)); - PyErr_Format(PyExc_ValueError, "invalid algorithm: %U", algorithm); - return -1; + PyErr_SetString(PyExc_ValueError, "invalid HACL* algorithm"); + break; } case Hacl_Streaming_Types_InvalidLength: { PyErr_SetString(PyExc_ValueError, "invalid length"); - return -1; + break; } case Hacl_Streaming_Types_MaximumLengthExceeded: { PyErr_SetString(PyExc_OverflowError, "maximum length exceeded"); - return -1; + break; } case Hacl_Streaming_Types_OutOfMemory: { PyErr_NoMemory(); - return -1; + break; } default: { PyErr_Format(PyExc_RuntimeError, - "HACL* internal routine failed with error code: %d", + "HACL* internal routine failed with error code: %u", code); - return -1; + break; } } + PyGILState_Release(gstate); + return -1; } /* @@ -536,7 +539,7 @@ _hacl_hmac_state_new(HMAC_Hash_Kind kind, uint8_t *key, uint32_t len) assert(kind != Py_hmac_kind_hash_unknown); HACL_HMAC_state *state = NULL; hacl_errno_t retcode = Hacl_Streaming_HMAC_malloc_(kind, key, len, &state); - if (_hacl_convert_errno(retcode, NULL) < 0) { + if (_hacl_convert_errno(retcode) < 0) { assert(state == NULL); return NULL; } @@ -804,13 +807,13 @@ hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len) } if (len < HASHLIB_GIL_MINSIZE) { - Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, return -1); + Py_HMAC_HACL_UPDATE(self->state, msg, len, return -1); return 0; } int res = 0; Py_BEGIN_ALLOW_THREADS - Py_HMAC_HACL_UPDATE(self->state, msg, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, msg, len, goto error); goto done; #ifndef NDEBUG error: @@ -978,7 +981,7 @@ hmac_update_state_with_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) int res = 0; Py_BEGIN_ALLOW_THREADS PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); goto done; #ifndef NDEBUG error: @@ -1005,7 +1008,7 @@ static int hmac_update_state_cond_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) { ENTER_HASHLIB(self); // conditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, self->name, goto error); + Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); LEAVE_HASHLIB(self); return 0; @@ -1076,7 +1079,7 @@ hmac_digest_compute_cond_lock(HMACObject *self, uint8_t *digest) rc == Hacl_Streaming_Types_Success || rc == Hacl_Streaming_Types_OutOfMemory ); - return _hacl_convert_errno(rc, NULL); + return _hacl_convert_errno(rc); } /*[clinic input] @@ -1759,7 +1762,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -1769,7 +1772,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else diff --git a/Modules/main.c b/Modules/main.c index ea1239ecc57f00..9b2ee103d52662 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -269,13 +269,14 @@ pymain_run_command(wchar_t *command) static int -pymain_start_pyrepl_no_main(void) +pymain_start_pyrepl(int pythonstartup) { int res = 0; PyObject *console = NULL; PyObject *empty_tuple = NULL; PyObject *kwargs = NULL; PyObject *console_result = NULL; + PyObject *main_module = NULL; PyObject *pyrepl = PyImport_ImportModule("_pyrepl.main"); if (pyrepl == NULL) { @@ -299,7 +300,13 @@ pymain_start_pyrepl_no_main(void) res = pymain_exit_err_print(); goto done; } - if (!PyDict_SetItemString(kwargs, "pythonstartup", _PyLong_GetOne())) { + main_module = PyImport_AddModuleRef("__main__"); + if (main_module == NULL) { + res = pymain_exit_err_print(); + goto done; + } + if (!PyDict_SetItemString(kwargs, "mainmodule", main_module) + && !PyDict_SetItemString(kwargs, "pythonstartup", pythonstartup ? Py_True : Py_False)) { console_result = PyObject_Call(console, empty_tuple, kwargs); if (console_result == NULL) { res = pymain_exit_err_print(); @@ -311,6 +318,7 @@ pymain_start_pyrepl_no_main(void) Py_XDECREF(empty_tuple); Py_XDECREF(console); Py_XDECREF(pyrepl); + Py_XDECREF(main_module); return res; } @@ -562,7 +570,7 @@ pymain_run_stdin(PyConfig *config) int run = PyRun_AnyFileExFlags(stdin, "", 0, &cf); return (run != 0); } - return pymain_run_module(L"_pyrepl", 0); + return pymain_start_pyrepl(0); } @@ -595,7 +603,7 @@ pymain_repl(PyConfig *config, int *exitcode) *exitcode = (run != 0); return; } - int run = pymain_start_pyrepl_no_main(); + int run = pymain_start_pyrepl(1); *exitcode = (run != 0); return; } diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 11d9b7418a25a2..71d9c1387f5780 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2008,13 +2008,11 @@ math.factorial / Find n!. - -Raise a ValueError if x is negative or non-integral. [clinic start generated code]*/ static PyObject * math_factorial(PyObject *module, PyObject *arg) -/*[clinic end generated code: output=6686f26fae00e9ca input=713fb771677e8c31]*/ +/*[clinic end generated code: output=6686f26fae00e9ca input=366cc321df3d4773]*/ { long x, two_valuation; int overflow; @@ -2163,6 +2161,27 @@ math_ldexp_impl(PyObject *module, double x, PyObject *i) } else { errno = 0; r = ldexp(x, (int)exp); +#ifdef _MSC_VER + if (DBL_MIN > r && r > -DBL_MIN) { + /* Denormal (or zero) results can be incorrectly rounded here (rather, + truncated). Fixed in newer versions of the C runtime, included + with Windows 11. */ + int original_exp; + frexp(x, &original_exp); + if (original_exp > DBL_MIN_EXP) { + /* Shift down to the smallest normal binade. No bits lost. */ + int shift = DBL_MIN_EXP - original_exp; + x = ldexp(x, shift); + exp -= shift; + } + /* Multiplying by 2**exp finishes the job, and the HW will round as + appropriate. Note: if exp < -DBL_MANT_DIG, all of x is shifted + to be < 0.5ULP of smallest denorm, so should be thrown away. If + exp is so very negative that ldexp underflows to 0, that's fine; + no need to check in advance. */ + r = x*ldexp(1.0, (int)exp); + } +#endif if (isinf(r)) errno = ERANGE; } diff --git a/Modules/md5module.c b/Modules/md5module.c index c36eb41d4d201e..9b5ea2d6e02605 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -276,17 +276,24 @@ static PyType_Spec md5_type_spec = { /*[clinic input] _md5.md5 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new MD5 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=587071f76254a4ac input=7a144a1905636985]*/ +_md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=d45e187d3d16f3a8 input=7ea5c5366dbb44bf]*/ { + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } + MD5object *new; Py_buffer buf; diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 6a385562845849..d9e8c60209dae3 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -25,6 +25,7 @@ #include #include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_fileutils.h" // _Py_stat_struct +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include // offsetof() #ifndef MS_WINDOWS @@ -163,8 +164,7 @@ mmap_object_dealloc(PyObject *op) Py_END_ALLOW_THREADS #endif /* UNIX */ - if (m_obj->weakreflist != NULL) - PyObject_ClearWeakRefs(op); + FT_CLEAR_WEAKREFS(op, m_obj->weakreflist); tp->tp_free(m_obj); Py_DECREF(tp); diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 922694fa367ac3..27352f0c20dd48 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -685,7 +685,8 @@ static void reset_remotedebug_data(PyThreadState *tstate) { tstate->remote_debugger_support.debugger_pending_call = 0; - memset(tstate->remote_debugger_support.debugger_script_path, 0, MAX_SCRIPT_PATH_SIZE); + memset(tstate->remote_debugger_support.debugger_script_path, 0, + Py_MAX_SCRIPT_PATH_SIZE); } @@ -8806,14 +8807,14 @@ os_ptsname_impl(PyObject *module, int fd) #if defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) || defined(HAVE_LOGIN_TTY) || defined(HAVE_DEV_PTMX) #ifdef HAVE_PTY_H #include -#ifdef HAVE_UTMP_H -#include -#endif /* HAVE_UTMP_H */ #elif defined(HAVE_LIBUTIL_H) #include #elif defined(HAVE_UTIL_H) #include #endif /* HAVE_PTY_H */ +#ifdef HAVE_UTMP_H +#include +#endif /* HAVE_UTMP_H */ #ifdef HAVE_STROPTS_H #include #endif @@ -9548,6 +9549,24 @@ os_getlogin_impl(PyObject *module) } else result = PyErr_SetFromWindowsErr(GetLastError()); +#elif defined (HAVE_GETLOGIN_R) +# if defined (HAVE_MAXLOGNAME) + char name[MAXLOGNAME + 1]; +# elif defined (HAVE_UT_NAMESIZE) + char name[UT_NAMESIZE + 1]; +# else + char name[256]; +# endif + int err = getlogin_r(name, sizeof(name)); + if (err) { + int old_errno = errno; + errno = -err; + posix_error(); + errno = old_errno; + } + else { + result = PyUnicode_DecodeFSDefault(name); + } #else char *name; int old_errno = errno; diff --git a/Modules/sha1module.c b/Modules/sha1module.c index f4a00cdb422156..a746bf74f8d4c1 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -272,19 +272,25 @@ static PyType_Spec sha1_type_spec = { /*[clinic input] _sha1.sha1 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA1 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=6f8b3af05126e18e input=bd54b68e2bf36a8a]*/ +_sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=0d453775924f88a7 input=807f25264e0ac656]*/ { SHA1object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); diff --git a/Modules/sha2module.c b/Modules/sha2module.c index e88d7cb2d456bf..72931910c5d720 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -594,18 +594,24 @@ static PyType_Spec sha512_type_spec = { /*[clinic input] _sha2.sha256 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-256 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=243c9dd289931f87 input=6249da1de607280a]*/ +_sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=49828a7bcd418f45 input=9ce1d70e669abc14]*/ { Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); @@ -651,18 +657,25 @@ _sha2_sha256_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha224 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-224 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=68191f232e4a3843 input=c42bcba47fd7d2b7]*/ +_sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=2163cb03b6cf6157 input=612f7682a889bc2a]*/ { Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } + if (string) { GET_BUFFER_VIEW_OR_ERROUT(string, &buf); } @@ -706,19 +719,25 @@ _sha2_sha224_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha512 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-512 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=d55c8996eca214d7 input=0576ae2a6ebfad25]*/ +_sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=cc3fcfce001a4538 input=19c9f2c06d59563a]*/ { SHA512object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } sha2_state *state = sha2_get_state(module); @@ -763,19 +782,25 @@ _sha2_sha512_impl(PyObject *module, PyObject *string, int usedforsecurity) /*[clinic input] _sha2.sha384 - string: object(c_default="NULL") = b'' + data: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string as string_obj: object(c_default="NULL") = None Return a new SHA-384 hash object; optionally initialized with a string. [clinic start generated code]*/ static PyObject * -_sha2_sha384_impl(PyObject *module, PyObject *string, int usedforsecurity) -/*[clinic end generated code: output=b29a0d81d51d1368 input=4e9199d8de0d2f9b]*/ +_sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, + PyObject *string_obj) +/*[clinic end generated code: output=b6e3db593b5a0330 input=9fd50c942ad9e0bf]*/ { SHA512object *new; Py_buffer buf; + PyObject *string; + if (_Py_hashlib_data_argument(&string, data, string_obj) < 0) { + return NULL; + } sha2_state *state = sha2_get_state(module); diff --git a/Modules/sha3module.c b/Modules/sha3module.c index a7edf5c66a1e76..cfbf0cbcc042c5 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -105,18 +105,25 @@ sha3_update(Hacl_Hash_SHA3_state_t *state, uint8_t *buf, Py_ssize_t len) /*[clinic input] @classmethod _sha3.sha3_224.__new__ as py_sha3_new - data: object(c_default="NULL") = b'' - / + + data as data_obj: object(c_default="NULL") = b'' * usedforsecurity: bool = True + string: object(c_default="NULL") = None Return a new SHA3 hash object. [clinic start generated code]*/ static PyObject * -py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity) -/*[clinic end generated code: output=90409addc5d5e8b0 input=637e5f8f6a93982a]*/ +py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, + PyObject *string) +/*[clinic end generated code: output=dcec1eca20395f2a input=c106e0b4e2d67d58]*/ { + PyObject *data; + if (_Py_hashlib_data_argument(&data, data_obj, string) < 0) { + return NULL; + } + Py_buffer buf = {NULL, NULL}; SHA3State *state = _PyType_GetModuleState(type); SHA3object *self = newSHA3object(type); @@ -503,14 +510,13 @@ _SHAKE_digest(PyObject *op, unsigned long digestlen, int hex) _sha3.shake_128.digest length: unsigned_long - / Return the digest value as a bytes object. [clinic start generated code]*/ static PyObject * _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=2313605e2f87bb8f input=418ef6a36d2e6082]*/ +/*[clinic end generated code: output=2313605e2f87bb8f input=93d6d6ff32904f18]*/ { return _SHAKE_digest((PyObject *)self, length, 0); } @@ -520,14 +526,13 @@ _sha3_shake_128_digest_impl(SHA3object *self, unsigned long length) _sha3.shake_128.hexdigest length: unsigned_long - / Return the digest value as a string of hexadecimal digits. [clinic start generated code]*/ static PyObject * _sha3_shake_128_hexdigest_impl(SHA3object *self, unsigned long length) -/*[clinic end generated code: output=bf8e2f1e490944a8 input=69fb29b0926ae321]*/ +/*[clinic end generated code: output=bf8e2f1e490944a8 input=562d74e7060b56ab]*/ { return _SHAKE_digest((PyObject *)self, length, 1); } diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 47958379263793..92c9aa8b510dca 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -111,7 +111,6 @@ Local naming conventions: #include "pycore_moduleobject.h" // _PyModule_GetState #include "pycore_time.h" // _PyTime_AsMilliseconds() #include "pycore_pystate.h" // _Py_AssertHoldsTstate() -#include "pycore_pyatomic_ft_wrappers.h" #ifdef _Py_MEMORY_SANITIZER # include @@ -565,7 +564,6 @@ static int sock_cloexec_works = -1; static inline void set_sock_fd(PySocketSockObject *s, SOCKET_T fd) { -#ifdef Py_GIL_DISABLED #if SIZEOF_SOCKET_T == SIZEOF_INT _Py_atomic_store_int_relaxed((int *)&s->sock_fd, (int)fd); #elif SIZEOF_SOCKET_T == SIZEOF_LONG @@ -575,15 +573,11 @@ set_sock_fd(PySocketSockObject *s, SOCKET_T fd) #else #error "Unsupported SIZEOF_SOCKET_T" #endif -#else - s->sock_fd = fd; -#endif } static inline SOCKET_T get_sock_fd(PySocketSockObject *s) { -#ifdef Py_GIL_DISABLED #if SIZEOF_SOCKET_T == SIZEOF_INT return (SOCKET_T)_Py_atomic_load_int_relaxed((int *)&s->sock_fd); #elif SIZEOF_SOCKET_T == SIZEOF_LONG @@ -593,9 +587,6 @@ get_sock_fd(PySocketSockObject *s) #else #error "Unsupported SIZEOF_SOCKET_T" #endif -#else - return s->sock_fd; -#endif } #define _PySocketSockObject_CAST(op) ((PySocketSockObject *)(op)) @@ -638,33 +629,22 @@ _PyLong_##NAME##_Converter(PyObject *obj, void *ptr) \ return 1; \ } -UNSIGNED_INT_CONVERTER(UInt16, uint16_t) -UNSIGNED_INT_CONVERTER(UInt32, uint32_t) - #if defined(HAVE_IF_NAMEINDEX) || defined(MS_WINDOWS) # ifdef MS_WINDOWS UNSIGNED_INT_CONVERTER(NetIfindex, NET_IFINDEX) # else - UNSIGNED_INT_CONVERTER(NetIfindex, unsigned int) +# define _PyLong_NetIfindex_Converter _PyLong_UnsignedInt_Converter # define NET_IFINDEX unsigned int # endif #endif // defined(HAVE_IF_NAMEINDEX) || defined(MS_WINDOWS) /*[python input] -class uint16_converter(CConverter): - type = "uint16_t" - converter = '_PyLong_UInt16_Converter' - -class uint32_converter(CConverter): - type = "uint32_t" - converter = '_PyLong_UInt32_Converter' - class NET_IFINDEX_converter(CConverter): type = "NET_IFINDEX" converter = '_PyLong_NetIfindex_Converter' [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=3de2e4a03fbf83b8]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=1cf809c40a407c34]*/ /*[clinic input] module _socket diff --git a/Modules/xxlimited.c b/Modules/xxlimited.c index 26ac35734fb060..0480fb0849876b 100644 --- a/Modules/xxlimited.c +++ b/Modules/xxlimited.c @@ -424,6 +424,13 @@ xx_clear(PyObject *module) return 0; } +static void +xx_free(void *module) +{ + // allow xx_modexec to omit calling xx_clear on error + (void)xx_clear((PyObject *)module); +} + static struct PyModuleDef xxmodule = { PyModuleDef_HEAD_INIT, .m_name = "xxlimited", @@ -433,9 +440,7 @@ static struct PyModuleDef xxmodule = { .m_slots = xx_slots, .m_traverse = xx_traverse, .m_clear = xx_clear, - /* m_free is not necessary here: xx_clear clears all references, - * and the module state is deallocated along with the module. - */ + .m_free = xx_free, }; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index fc407ec6bf99d6..87ea1162e03513 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -1075,10 +1075,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len, } /* Unescape a backslash-escaped string. */ -PyObject *_PyBytes_DecodeEscape(const char *s, +PyObject *_PyBytes_DecodeEscape2(const char *s, Py_ssize_t len, const char *errors, - const char **first_invalid_escape) + int *first_invalid_escape_char, + const char **first_invalid_escape_ptr) { int c; char *p; @@ -1092,7 +1093,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s, return NULL; writer.overallocate = 1; - *first_invalid_escape = NULL; + *first_invalid_escape_char = -1; + *first_invalid_escape_ptr = NULL; end = s + len; while (s < end) { @@ -1130,9 +1132,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, c = (c<<3) + *s++ - '0'; } if (c > 0377) { - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-3; /* Back up 3 chars, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = c; + /* Back up 3 chars, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 3; } } *p++ = c; @@ -1173,9 +1176,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s, break; default: - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-1; /* Back up one char, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = (unsigned char)s[-1]; + /* Back up one char, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 1; } *p++ = '\\'; s--; @@ -1195,18 +1199,19 @@ PyObject *PyBytes_DecodeEscape(const char *s, Py_ssize_t Py_UNUSED(unicode), const char *Py_UNUSED(recode_encoding)) { - const char* first_invalid_escape; - PyObject *result = _PyBytes_DecodeEscape(s, len, errors, - &first_invalid_escape); + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyBytes_DecodeEscape2(s, len, errors, + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) return NULL; - if (first_invalid_escape != NULL) { - unsigned char c = *first_invalid_escape; - if ('4' <= c && c <= '7') { + if (first_invalid_escape_char != -1) { + if (first_invalid_escape_char > 0xff) { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "b\"\\%.3s\" is an invalid octal escape sequence. " + "b\"\\%o\" is an invalid octal escape sequence. " "Such sequences will not work in the future. ", - first_invalid_escape) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -1216,7 +1221,7 @@ PyObject *PyBytes_DecodeEscape(const char *s, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "b\"\\%c\" is an invalid escape sequence. " "Such sequences will not work in the future. ", - c) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; diff --git a/Objects/classobject.c b/Objects/classobject.c index 58e1d17977322e..e71f301f2efd77 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -7,6 +7,7 @@ #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/classobject.c.h" @@ -245,8 +246,7 @@ method_dealloc(PyObject *self) { PyMethodObject *im = _PyMethodObject_CAST(self); _PyObject_GC_UNTRACK(im); - if (im->im_weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *)im); + FT_CLEAR_WEAKREFS(self, im->im_weakreflist); Py_DECREF(im->im_func); Py_XDECREF(im->im_self); assert(Py_IS_TYPE(self, &PyMethod_Type)); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 92eaf5e00bc7dd..ba178abc0c071e 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -17,6 +17,7 @@ #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_unicodeobject.h" // _PyUnicode_InternImmortal() #include "pycore_uniqueid.h" // _PyObject_AssignUniqueId() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/codeobject.c.h" #include @@ -1714,7 +1715,7 @@ static int identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, PyObject *globalnames, PyObject *attrnames, PyObject *globalsns, PyObject *builtinsns, - struct co_unbound_counts *counts) + struct co_unbound_counts *counts, int *p_numdupes) { // This function is inspired by inspect.getclosurevars(). // It would be nicer if we had something similar to co_localspluskinds, @@ -1729,6 +1730,7 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, assert(builtinsns == NULL || PyDict_Check(builtinsns)); assert(counts == NULL || counts->total == 0); struct co_unbound_counts unbound = {0}; + int numdupes = 0; Py_ssize_t len = Py_SIZE(co); for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); @@ -1747,6 +1749,12 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, if (PySet_Add(attrnames, name) < 0) { return -1; } + if (PySet_Contains(globalnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } } else if (inst.op.code == LOAD_GLOBAL) { int oparg = GET_OPARG(co, i, inst.op.arg); @@ -1778,11 +1786,20 @@ identify_unbound_names(PyThreadState *tstate, PyCodeObject *co, if (PySet_Add(globalnames, name) < 0) { return -1; } + if (PySet_Contains(attrnames, name)) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + numdupes += 1; + } } } if (counts != NULL) { *counts = unbound; } + if (p_numdupes != NULL) { + *p_numdupes = numdupes; + } return 0; } @@ -1932,20 +1949,24 @@ _PyCode_SetUnboundVarCounts(PyThreadState *tstate, // Fill in unbound.globals and unbound.numattrs. struct co_unbound_counts unbound = {0}; + int numdupes = 0; Py_BEGIN_CRITICAL_SECTION(co); res = identify_unbound_names( tstate, co, globalnames, attrnames, globalsns, builtinsns, - &unbound); + &unbound, &numdupes); Py_END_CRITICAL_SECTION(); if (res < 0) { goto finally; } assert(unbound.numunknown == 0); - assert(unbound.total <= counts->unbound.total); + assert(unbound.total - numdupes <= counts->unbound.total); assert(counts->unbound.numunknown == counts->unbound.total); - unbound.numunknown = counts->unbound.total - unbound.total; - unbound.total = counts->unbound.total; + // There may be a name that is both a global and an attr. + int totalunbound = counts->unbound.total + numdupes; + unbound.numunknown = totalunbound - unbound.total; + unbound.total = totalunbound; counts->unbound = unbound; + counts->total += numdupes; res = 0; finally: @@ -1955,12 +1976,129 @@ _PyCode_SetUnboundVarCounts(PyThreadState *tstate, } +int +_PyCode_CheckNoInternalState(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + // We don't worry about co_executors, co_instrumentation, + // or co_monitoring. They are essentially ephemeral. + if (co->co_extra != NULL) { + errmsg = "only basic code objects are supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_CheckNoExternalState(PyCodeObject *co, _PyCode_var_counts_t *counts, + const char **p_errmsg) +{ + const char *errmsg = NULL; + if (counts->numfree > 0) { // It's a closure. + errmsg = "closures not supported"; + } + else if (counts->unbound.globals.numglobal > 0) { + errmsg = "globals not supported"; + } + else if (counts->unbound.globals.numbuiltin > 0 + && counts->unbound.globals.numunknown > 0) + { + errmsg = "globals not supported"; + } + // Otherwise we don't check counts.unbound.globals.numunknown since we can't + // distinguish beween globals and builtins here. + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + +int +_PyCode_VerifyStateless(PyThreadState *tstate, + PyCodeObject *co, PyObject *globalnames, + PyObject *globalsns, PyObject *builtinsns) +{ + const char *errmsg; + _PyCode_var_counts_t counts = {0}; + _PyCode_GetVarCounts(co, &counts); + if (_PyCode_SetUnboundVarCounts( + tstate, co, &counts, globalnames, NULL, + globalsns, builtinsns) < 0) + { + return -1; + } + // We may consider relaxing the internal state constraints + // if it becomes a problem. + if (!_PyCode_CheckNoInternalState(co, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + if (builtinsns != NULL) { + // Make sure the next check will fail for globals, + // even if there aren't any builtins. + counts.unbound.globals.numbuiltin += 1; + } + if (!_PyCode_CheckNoExternalState(co, &counts, &errmsg)) { + _PyErr_SetString(tstate, PyExc_ValueError, errmsg); + return -1; + } + // Note that we don't check co->co_flags & CO_NESTED for anything here. + return 0; +} + + +int +_PyCode_CheckPureFunction(PyCodeObject *co, const char **p_errmsg) +{ + const char *errmsg = NULL; + if (co->co_flags & CO_GENERATOR) { + errmsg = "generators not supported"; + } + else if (co->co_flags & CO_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ITERABLE_COROUTINE) { + errmsg = "coroutines not supported"; + } + else if (co->co_flags & CO_ASYNC_GENERATOR) { + errmsg = "generators not supported"; + } + + if (errmsg != NULL) { + if (p_errmsg != NULL) { + *p_errmsg = errmsg; + } + return 0; + } + return 1; +} + /* Here "value" means a non-None value, since a bare return is identical * to returning None explicitly. Likewise a missing return statement * at the end of the function is turned into "return None". */ static int code_returns_only_none(PyCodeObject *co) { + if (!_PyCode_CheckPureFunction(co, NULL)) { + return 0; + } + int len = (int)Py_SIZE(co); + assert(len > 0); + + // The last instruction either returns or raises. We can take advantage + // of that for a quick exit. + _Py_CODEUNIT final = _Py_GetBaseCodeUnit(co, len-1); + // Look up None in co_consts. Py_ssize_t nconsts = PyTuple_Size(co->co_consts); int none_index = 0; @@ -1971,26 +2109,42 @@ code_returns_only_none(PyCodeObject *co) } if (none_index == nconsts) { // None wasn't there, which means there was no implicit return, - // "return", or "return None". That means there must be - // an explicit return (non-None). - return 0; - } + // "return", or "return None". - // Walk the bytecode, looking for RETURN_VALUE. - Py_ssize_t len = Py_SIZE(co); - for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { - _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); - if (IS_RETURN_OPCODE(inst.op.code)) { - assert(i != 0); - // Ignore it if it returns None. - _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); - if (prev.op.code == LOAD_CONST) { - // We don't worry about EXTENDED_ARG for now. - if (prev.op.arg == none_index) { - continue; + // That means there must be + // an explicit return (non-None), or it only raises. + if (IS_RETURN_OPCODE(final.op.code)) { + // It was an explicit return (non-None). + return 0; + } + // It must end with a raise then. We still have to walk the + // bytecode to see if there's any explicit return (non-None). + assert(IS_RAISE_OPCODE(final.op.code)); + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + // We alraedy know it isn't returning None. + return 0; + } + } + // It must only raise. + } + else { + // Walk the bytecode, looking for RETURN_VALUE. + for (int i = 0; i < len; i += _PyInstruction_GetLength(co, i)) { + _Py_CODEUNIT inst = _Py_GetBaseCodeUnit(co, i); + if (IS_RETURN_OPCODE(inst.op.code)) { + assert(i != 0); + // Ignore it if it returns None. + _Py_CODEUNIT prev = _Py_GetBaseCodeUnit(co, i-1); + if (prev.op.code == LOAD_CONST) { + // We don't worry about EXTENDED_ARG for now. + if (prev.op.arg == none_index) { + continue; + } } + return 0; } - return 0; } } return 1; @@ -2230,6 +2384,8 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyObject *self) { + PyThreadState *tstate = PyThreadState_GET(); + _Py_atomic_add_uint64(&tstate->interp->_code_object_generation, 1); PyCodeObject *co = _PyCodeObject_CAST(self); _PyObject_ResurrectStart(self); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); @@ -2281,9 +2437,7 @@ code_dealloc(PyObject *self) Py_XDECREF(co->_co_cached->_co_varnames); PyMem_Free(co->_co_cached); } - if (co->co_weakreflist != NULL) { - PyObject_ClearWeakRefs(self); - } + FT_CLEAR_WEAKREFS(self, co->co_weakreflist); free_monitoring_data(co->_co_monitoring); #ifdef Py_GIL_DISABLED // The first element always points to the mutable bytecode at the end of @@ -3214,7 +3368,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 59b0cf1ce7d422..be62ae5eefd00d 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -547,13 +547,13 @@ static inline uint8_t calculate_log2_keysize(Py_ssize_t minsize) { #if SIZEOF_LONG == SIZEOF_SIZE_T - minsize = (minsize | PyDict_MINSIZE) - 1; - return _Py_bit_length(minsize | (PyDict_MINSIZE-1)); + minsize = Py_MAX(minsize, PyDict_MINSIZE); + return _Py_bit_length(minsize - 1); #elif defined(_MSC_VER) - // On 64bit Windows, sizeof(long) == 4. - minsize = (minsize | PyDict_MINSIZE) - 1; + // On 64bit Windows, sizeof(long) == 4. We cannot use _Py_bit_length. + minsize = Py_MAX(minsize, PyDict_MINSIZE); unsigned long msb; - _BitScanReverse64(&msb, (uint64_t)minsize); + _BitScanReverse64(&msb, (uint64_t)minsize - 1); return (uint8_t)(msb + 1); #else uint8_t log2_size; @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys); + _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); return; } #endif @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr) assert(values->embedded == 0); #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(values); + _PyMem_FreeDelayed(values, values_size_from_count(values->capacity)); return; } #endif @@ -2915,6 +2915,11 @@ clear_lock_held(PyObject *op) ASSERT_CONSISTENT(mp); } +void +_PyDict_Clear_LockHeld(PyObject *op) { + clear_lock_held(op); +} + void PyDict_Clear(PyObject *op) { @@ -3178,9 +3183,10 @@ dict_set_fromkeys(PyInterpreterState *interp, PyDictObject *mp, Py_ssize_t pos = 0; PyObject *key; Py_hash_t hash; - - if (dictresize(interp, mp, - estimate_log2_keysize(PySet_GET_SIZE(iterable)), 0)) { + uint8_t new_size = Py_MAX( + estimate_log2_keysize(PySet_GET_SIZE(iterable)), + DK_LOG_SIZE(mp->ma_keys)); + if (dictresize(interp, mp, new_size, 0)) { Py_DECREF(mp); return NULL; } @@ -3852,7 +3858,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe } } - Py_ssize_t orig_size = other->ma_keys->dk_nentries; + Py_ssize_t orig_size = other->ma_used; Py_ssize_t pos = 0; Py_hash_t hash; PyObject *key, *value; @@ -3886,7 +3892,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe if (err != 0) return -1; - if (orig_size != other->ma_keys->dk_nentries) { + if (orig_size != other->ma_used) { PyErr_SetString(PyExc_RuntimeError, "dict mutated during update"); return -1; @@ -4405,6 +4411,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu if (result) { *result = NULL; } + return -1; } STORE_USED(mp, mp->ma_used + 1); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 56df5730db0c55..9532c21fc7082e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1,13 +1,16 @@ /* Function object implementation */ #include "Python.h" +#include "pycore_code.h" // _PyCode_VerifyStateless() #include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_function.h" // _PyFunction_Vectorcall #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_pyerrors.h" // _PyErr_Occurred() +#include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_stats.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() static const char * @@ -1146,9 +1149,7 @@ func_dealloc(PyObject *self) return; } _PyObject_GC_UNTRACK(op); - if (op->func_weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *) op); - } + FT_CLEAR_WEAKREFS(self, op->func_weakreflist); (void)func_clear((PyObject*)op); // These aren't cleared by func_clear(). _Py_DECREF_CODE((PyCodeObject *)op->func_code); @@ -1240,6 +1241,64 @@ PyTypeObject PyFunction_Type = { }; +int +_PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func) +{ + assert(!PyErr_Occurred()); + assert(PyFunction_Check(func)); + + // Check the globals. + PyObject *globalsns = PyFunction_GET_GLOBALS(func); + if (globalsns != NULL && !PyDict_Check(globalsns)) { + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported globals %R", globalsns); + return -1; + } + // Check the builtins. + PyObject *builtinsns = _PyFunction_GET_BUILTINS(func); + if (builtinsns != NULL && !PyDict_Check(builtinsns)) { + _PyErr_Format(tstate, PyExc_TypeError, + "unsupported builtins %R", builtinsns); + return -1; + } + // Disallow __defaults__. + PyObject *defaults = PyFunction_GET_DEFAULTS(func); + if (defaults != NULL) { + assert(PyTuple_Check(defaults)); // per PyFunction_New() + if (PyTuple_GET_SIZE(defaults) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, + "defaults not supported"); + return -1; + } + } + // Disallow __kwdefaults__. + PyObject *kwdefaults = PyFunction_GET_KW_DEFAULTS(func); + if (kwdefaults != NULL) { + assert(PyDict_Check(kwdefaults)); // per PyFunction_New() + if (PyDict_Size(kwdefaults) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, + "keyword defaults not supported"); + return -1; + } + } + // Disallow __closure__. + PyObject *closure = PyFunction_GET_CLOSURE(func); + if (closure != NULL) { + assert(PyTuple_Check(closure)); // per PyFunction_New() + if (PyTuple_GET_SIZE(closure) > 0) { + _PyErr_SetString(tstate, PyExc_ValueError, "closures not supported"); + return -1; + } + } + // Check the code. + PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func); + if (_PyCode_VerifyStateless(tstate, co, NULL, globalsns, builtinsns) < 0) { + return -1; + } + return 0; +} + + static int functools_copy_attr(PyObject *wrapper, PyObject *wrapped, PyObject *name) { diff --git a/Objects/genericaliasobject.c b/Objects/genericaliasobject.c index ec3d01f00a3c3c..3bb961aa2b619d 100644 --- a/Objects/genericaliasobject.c +++ b/Objects/genericaliasobject.c @@ -7,6 +7,7 @@ #include "pycore_typevarobject.h" // _Py_typing_type_repr #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() #include "pycore_unionobject.h" // _Py_union_type_or, _PyGenericAlias_Check +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include @@ -33,9 +34,7 @@ ga_dealloc(PyObject *self) gaobject *alias = (gaobject *)self; _PyObject_GC_UNTRACK(self); - if (alias->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *)alias); - } + FT_CLEAR_WEAKREFS(self, alias->weakreflist); Py_XDECREF(alias->origin); Py_XDECREF(alias->args); Py_XDECREF(alias->parameters); @@ -65,7 +64,7 @@ ga_repr_items_list(PyUnicodeWriter *writer, PyObject *p) for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { return -1; } } @@ -109,7 +108,7 @@ ga_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -126,7 +125,7 @@ ga_repr(PyObject *self) } if (len == 0) { // for something like tuple[()] we should print a "()" - if (PyUnicodeWriter_WriteUTF8(writer, "()", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, "()", 2) < 0) { goto error; } } diff --git a/Objects/genobject.c b/Objects/genobject.c index 98b2c5004df8ac..88814c3578af6e 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -17,6 +17,7 @@ #include "pycore_pyerrors.h" // _PyErr_ClearExcState() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_warnings.h" // _PyErr_WarnUnawaitedCoroutine() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "opcode_ids.h" // RESUME, etc @@ -161,8 +162,7 @@ gen_dealloc(PyObject *self) _PyObject_GC_UNTRACK(gen); - if (gen->gi_weakreflist != NULL) - PyObject_ClearWeakRefs(self); + FT_CLEAR_WEAKREFS(self, gen->gi_weakreflist); _PyObject_GC_TRACK(self); diff --git a/Objects/interpolationobject.c b/Objects/interpolationobject.c index aaea3b8c0670c9..a5d407a7b0e296 100644 --- a/Objects/interpolationobject.c +++ b/Objects/interpolationobject.c @@ -137,6 +137,8 @@ interpolation_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) static PyMethodDef interpolation_methods[] = { {"__reduce__", interpolation_reduce, METH_NOARGS, PyDoc_STR("__reduce__() -> (cls, state)")}, + {"__class_getitem__", Py_GenericAlias, + METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL}, }; diff --git a/Objects/listobject.c b/Objects/listobject.c index c5895645a2dd12..23d3472b6d4153 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr) #ifdef Py_GIL_DISABLED _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item); if (use_qsbr) { - _PyMem_FreeDelayed(array); + size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *); + _PyMem_FreeDelayed(array, size); } else { PyMem_Free(array); diff --git a/Objects/longobject.c b/Objects/longobject.c index 40d90ecf4fa068..2b533312fee673 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -971,16 +971,9 @@ _PyLong_FromByteArray(const unsigned char* bytes, size_t n, ++numsignificantbytes; } - /* How many Python int digits do we need? We have - 8*numsignificantbytes bits, and each Python int digit has - PyLong_SHIFT bits, so it's the ceiling of the quotient. */ - /* catch overflow before it happens */ - if (numsignificantbytes > (PY_SSIZE_T_MAX - PyLong_SHIFT) / 8) { - PyErr_SetString(PyExc_OverflowError, - "byte array too long to convert to int"); - return NULL; - } - ndigits = (numsignificantbytes * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; + /* avoid integer overflow */ + ndigits = numsignificantbytes / PyLong_SHIFT * 8 + + (numsignificantbytes % PyLong_SHIFT * 8 + PyLong_SHIFT - 1) / PyLong_SHIFT; v = long_alloc(ndigits); if (v == NULL) return NULL; @@ -1760,6 +1753,10 @@ UNSIGNED_INT_CONVERTER(UnsignedInt, unsigned int) UNSIGNED_INT_CONVERTER(UnsignedLong, unsigned long) UNSIGNED_INT_CONVERTER(UnsignedLongLong, unsigned long long) UNSIGNED_INT_CONVERTER(Size_t, size_t) +UNSIGNED_INT_CONVERTER(UInt8, uint8_t) +UNSIGNED_INT_CONVERTER(UInt16, uint16_t) +UNSIGNED_INT_CONVERTER(UInt32, uint32_t) +UNSIGNED_INT_CONVERTER(UInt64, uint64_t) #define CHECK_BINOP(v,w) \ diff --git a/Objects/methodobject.c b/Objects/methodobject.c index c3dcd09ad1cdb6..e6e469ca270ac9 100644 --- a/Objects/methodobject.c +++ b/Objects/methodobject.c @@ -8,6 +8,7 @@ #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() /* undefine macro trampoline to PyCFunction_NewEx */ @@ -167,9 +168,7 @@ meth_dealloc(PyObject *self) { PyCFunctionObject *m = _PyCFunctionObject_CAST(self); PyObject_GC_UnTrack(m); - if (m->m_weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject*) m); - } + FT_CLEAR_WEAKREFS(self, m->m_weakreflist); // We need to access ml_flags here rather than later. // `m->m_ml` might have the same lifetime // as `m_self` when it's dynamically allocated. diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index f363ef173cbd46..b68584b5dd571d 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -14,6 +14,7 @@ #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_sysmodule.h" // _PySys_GetOptionalAttrString() #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "osdefs.h" // MAXPATHLEN @@ -827,8 +828,7 @@ module_dealloc(PyObject *self) if (verbose && m->md_name) { PySys_FormatStderr("# destroy %U\n", m->md_name); } - if (m->md_weaklist != NULL) - PyObject_ClearWeakRefs((PyObject *) m); + FT_CLEAR_WEAKREFS(self, m->md_weaklist); /* bpo-39824: Don't call m_free() if m_size > 0 and md_state=NULL */ if (m->md_def && m->md_def->m_free diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index caebe6bf543567..0fc2bcea4cb06e 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -124,9 +124,10 @@ namespace_repr(PyObject *ns) if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) { PyObject *value, *item; - value = PyDict_GetItemWithError(d, key); - if (value != NULL) { + int has_key = PyDict_GetItemRef(d, key, &value); + if (has_key == 1) { item = PyUnicode_FromFormat("%U=%R", key, value); + Py_DECREF(value); if (item == NULL) { loop_error = 1; } @@ -135,7 +136,7 @@ namespace_repr(PyObject *ns) Py_DECREF(item); } } - else if (PyErr_Occurred()) { + else if (has_key < 0) { loop_error = 1; } } diff --git a/Objects/object.c b/Objects/object.c index 723b0427e69251..8dc3ab9643cbb2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1906,34 +1906,11 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value) int PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context) { - PyObject **dictptr = _PyObject_GetDictPtr(obj); - if (dictptr == NULL) { - if (_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_INLINE_VALUES) && - _PyObject_GetManagedDict(obj) == NULL - ) { - /* Was unable to convert to dict */ - PyErr_NoMemory(); - } - else { - PyErr_SetString(PyExc_AttributeError, - "This object has no __dict__"); - } - return -1; - } if (value == NULL) { PyErr_SetString(PyExc_TypeError, "cannot delete __dict__"); return -1; } - if (!PyDict_Check(value)) { - PyErr_Format(PyExc_TypeError, - "__dict__ must be set to a dictionary, " - "not a '%.200s'", Py_TYPE(value)->tp_name); - return -1; - } - Py_BEGIN_CRITICAL_SECTION(obj); - Py_XSETREF(*dictptr, Py_NewRef(value)); - Py_END_CRITICAL_SECTION(); - return 0; + return _PyObject_SetDict(obj, value); } @@ -1996,9 +1973,25 @@ _dir_locals(void) PyObject *names; PyObject *locals; - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) + if (_PyEval_GetFrame() != NULL) { + locals = _PyEval_GetFrameLocals(); + } + else { + PyThreadState *tstate = _PyThreadState_GET(); + locals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (locals == NULL) { + if (!_PyErr_Occurred(tstate)) { + locals = _PyEval_GetFrameLocals(); + assert(_PyErr_Occurred(tstate)); + } + } + else { + Py_INCREF(locals); + } + } + if (locals == NULL) { return NULL; + } names = PyMapping_Keys(locals); Py_DECREF(locals); @@ -2930,57 +2923,28 @@ Py_ReprLeave(PyObject *obj) /* Trashcan support. */ -#ifndef Py_GIL_DISABLED -/* We need to store a pointer in the refcount field of - * an object. It is important that we never store 0 (NULL). -* It is also important to not make the object appear immortal, -* or it might be untracked by the cycle GC. */ -static uintptr_t -pointer_to_safe_refcount(void *ptr) -{ - uintptr_t full = (uintptr_t)ptr; - assert((full & 3) == 0); -#if SIZEOF_VOID_P > 4 - uint32_t refcnt = (uint32_t)full; - if (refcnt >= (uint32_t)_Py_IMMORTAL_MINIMUM_REFCNT) { - full = full - ((uintptr_t)_Py_IMMORTAL_MINIMUM_REFCNT) + 1; - } - return full + 2; -#else - // Make the top two bits 0, so it appears mortal. - return (full >> 2) + 1; -#endif -} - -static void * -safe_refcount_to_pointer(uintptr_t refcnt) -{ -#if SIZEOF_VOID_P > 4 - if (refcnt & 1) { - refcnt += _Py_IMMORTAL_MINIMUM_REFCNT - 1; - } - return (void *)(refcnt - 2); -#else - return (void *)((refcnt -1) << 2); -#endif -} -#endif - /* Add op to the gcstate->trash_delete_later list. Called when the current - * call-stack depth gets large. op must be a currently untracked gc'ed - * object, with refcount 0. Py_DECREF must already have been called on it. + * call-stack depth gets large. op must be a gc'ed object, with refcount 0. + * Py_DECREF must already have been called on it. */ void _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) { _PyObject_ASSERT(op, Py_REFCNT(op) == 0); + PyTypeObject *tp = Py_TYPE(op); + assert(tp->tp_flags & Py_TPFLAGS_HAVE_GC); + int tracked = 0; + if (tp->tp_is_gc == NULL || tp->tp_is_gc(op)) { + tracked = _PyObject_GC_IS_TRACKED(op); + if (tracked) { + _PyObject_GC_UNTRACK(op); + } + } + uintptr_t tagged_ptr = ((uintptr_t)tstate->delete_later) | tracked; #ifdef Py_GIL_DISABLED - op->ob_tid = (uintptr_t)tstate->delete_later; + op->ob_tid = tagged_ptr; #else - /* Store the delete_later pointer in the refcnt field. */ - uintptr_t refcnt = pointer_to_safe_refcount(tstate->delete_later); - *((uintptr_t*)op) = refcnt; - assert(!_Py_IsImmortal(op)); + _Py_AS_GC(op)->_gc_next = tagged_ptr; #endif tstate->delete_later = op; } @@ -2995,17 +2959,17 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate) destructor dealloc = Py_TYPE(op)->tp_dealloc; #ifdef Py_GIL_DISABLED - tstate->delete_later = (PyObject*) op->ob_tid; + uintptr_t tagged_ptr = op->ob_tid; op->ob_tid = 0; _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, _Py_REF_MERGED); #else - /* Get the delete_later pointer from the refcnt field. - * See _PyTrash_thread_deposit_object(). */ - uintptr_t refcnt = *((uintptr_t*)op); - tstate->delete_later = safe_refcount_to_pointer(refcnt); - op->ob_refcnt = 0; + uintptr_t tagged_ptr = _Py_AS_GC(op)->_gc_next; + _Py_AS_GC(op)->_gc_next = 0; #endif - + tstate->delete_later = (PyObject *)(tagged_ptr & ~1); + if (tagged_ptr & 1) { + _PyObject_GC_TRACK(op); + } /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but * Py_DECREF was already called on this object, and in @@ -3079,10 +3043,11 @@ void _Py_Dealloc(PyObject *op) { PyTypeObject *type = Py_TYPE(op); + unsigned long gc_flag = type->tp_flags & Py_TPFLAGS_HAVE_GC; destructor dealloc = type->tp_dealloc; PyThreadState *tstate = _PyThreadState_GET(); intptr_t margin = _Py_RecursionLimit_GetMargin(tstate); - if (margin < 2) { + if (margin < 2 && gc_flag) { _PyTrash_thread_deposit_object(tstate, (PyObject *)op); return; } @@ -3128,7 +3093,7 @@ _Py_Dealloc(PyObject *op) Py_XDECREF(old_exc); Py_DECREF(type); #endif - if (tstate->delete_later && margin >= 4) { + if (tstate->delete_later && margin >= 4 && gc_flag) { _PyTrash_thread_destroy_chain(tstate); } } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index b209808da902da..092be84d2b9954 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -124,6 +124,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page) } +#ifdef Py_GIL_DISABLED + +// If we are deferring collection of more than this amount of memory for +// mimalloc pages, advance the write sequence. Advancing allows these +// pages to be re-used in a different thread or for a different size class. +#define QSBR_PAGE_MEM_LIMIT 4096*20 + +// Return true if the global write sequence should be advanced for a mimalloc +// page that is deferred from collection. +static bool +should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page) +{ + size_t bsize = mi_page_block_size(page); + size_t page_size = page->capacity*bsize; + if (page_size > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + qsbr->deferred_page_memory += page_size; + if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + return false; +} +#endif + static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) { @@ -139,7 +166,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) _PyMem_mi_page_clear_qsbr(page); page->retire_expire = 0; - page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr); + + if (should_advance_qsbr_for_page(tstate->qsbr, page)) { + page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared); + } + llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); return false; } @@ -1141,8 +1175,44 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } + +#ifdef Py_GIL_DISABLED + +// For deferred advance on free: the number of deferred items before advancing +// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally +// want to process a chunk before it overflows. +#define QSBR_DEFERRED_LIMIT 127 + +// If the deferred memory exceeds 1 MiB, advance the write sequence. This +// helps limit memory usage due to QSBR delaying frees too long. +#define QSBR_FREE_MEM_LIMIT 1024*1024 + +// Return true if the global write sequence should be advanced for a deferred +// memory free. +static bool +should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size) +{ + if (size > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + qsbr->deferred_count++; + qsbr->deferred_memory += size; + if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT || + qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + return false; +} +#endif + static void -free_delayed(uintptr_t ptr) +free_delayed(uintptr_t ptr, size_t size) { #ifndef Py_GIL_DISABLED free_work_item(ptr, NULL, NULL); @@ -1200,23 +1270,32 @@ free_delayed(uintptr_t ptr) } assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK); - uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr); + uint64_t seq; + if (should_advance_qsbr_for_free(tstate->qsbr, size)) { + seq = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + seq = _Py_qsbr_shared_next(tstate->qsbr->shared); + } buf->array[buf->wr_idx].ptr = ptr; buf->array[buf->wr_idx].qsbr_goal = seq; buf->wr_idx++; if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) { + // Normally the processing of delayed items is done from the eval + // breaker. Processing here is a safety measure to ensure too much + // work does not accumulate. _PyMem_ProcessDelayed((PyThreadState *)tstate); } #endif } void -_PyMem_FreeDelayed(void *ptr) +_PyMem_FreeDelayed(void *ptr, size_t size) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed((uintptr_t)ptr); + free_delayed((uintptr_t)ptr, size); } } @@ -1226,7 +1305,10 @@ _PyObject_XDecRefDelayed(PyObject *ptr) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed(((uintptr_t)ptr)|0x01); + // We use 0 as the size since we don't have an easy way to know the + // actual size. If we are freeing many objects, the write sequence + // will be advanced due to QSBR_DEFERRED_LIMIT. + free_delayed(((uintptr_t)ptr)|0x01, 0); } } #endif @@ -1238,7 +1320,7 @@ work_queue_first(struct llist_node *head) } static void -process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, +process_queue(struct llist_node *head, _PyThreadStateImpl *tstate, bool keep_empty, delayed_dealloc_cb cb, void *state) { while (!llist_empty(head)) { @@ -1246,7 +1328,7 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, if (buf->rd_idx < buf->wr_idx) { struct _mem_work_item *item = &buf->array[buf->rd_idx]; - if (!_Py_qsbr_poll(qsbr, item->qsbr_goal)) { + if (!_Py_qsbr_poll(tstate->qsbr, item->qsbr_goal)) { return; } @@ -1270,11 +1352,11 @@ process_queue(struct llist_node *head, struct _qsbr_thread_state *qsbr, static void process_interp_queue(struct _Py_mem_interp_free_queue *queue, - struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + _PyThreadStateImpl *tstate, delayed_dealloc_cb cb, void *state) { assert(PyMutex_IsLocked(&queue->mutex)); - process_queue(&queue->head, qsbr, false, cb, state); + process_queue(&queue->head, tstate, false, cb, state); int more_work = !llist_empty(&queue->head); _Py_atomic_store_int_relaxed(&queue->has_work, more_work); @@ -1282,7 +1364,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue, static void maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, - struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + _PyThreadStateImpl *tstate, delayed_dealloc_cb cb, void *state) { if (!_Py_atomic_load_int_relaxed(&queue->has_work)) { @@ -1291,7 +1373,7 @@ maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, // Try to acquire the lock, but don't block if it's already held. if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) { - process_interp_queue(queue, qsbr, cb, state); + process_interp_queue(queue, tstate, cb, state); PyMutex_Unlock(&queue->mutex); } } @@ -1302,11 +1384,13 @@ _PyMem_ProcessDelayed(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + tstate_impl->qsbr->should_process = false; + // Process thread-local work - process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL); + process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL); // Process shared interpreter work - maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, NULL, NULL); } void @@ -1316,10 +1400,10 @@ _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, voi _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; // Process thread-local work - process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state); + process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, cb, state); // Process shared interpreter work - maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl, cb, state); } void @@ -1348,7 +1432,7 @@ _PyMem_AbandonDelayed(PyThreadState *tstate) // Process the merged queue now (see gh-130794). _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); - process_interp_queue(&interp->mem_free_queue, this_tstate->qsbr, NULL, NULL); + process_interp_queue(&interp->mem_free_queue, this_tstate, NULL, NULL); PyMutex_Unlock(&interp->mem_free_queue.mutex); diff --git a/Objects/odictobject.c b/Objects/odictobject.c index 891f6197401503..02fcbbaa0d4536 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -473,6 +473,7 @@ Potential Optimizations #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_tuple.h" // _PyTuple_Recycle() #include // offsetof() +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "clinic/odictobject.c.h" @@ -1391,8 +1392,7 @@ odict_dealloc(PyObject *op) PyObject_GC_UnTrack(self); Py_XDECREF(self->od_inst_dict); - if (self->od_weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *)self); + FT_CLEAR_WEAKREFS(op, self->od_weakreflist); _odict_clear_nodes(self); PyDict_Type.tp_dealloc((PyObject *)self); diff --git a/Objects/picklebufobject.c b/Objects/picklebufobject.c index 3ce800de04c208..50f17687bc4365 100644 --- a/Objects/picklebufobject.c +++ b/Objects/picklebufobject.c @@ -1,6 +1,7 @@ /* PickleBuffer object implementation */ #include "Python.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include typedef struct { @@ -111,8 +112,7 @@ picklebuf_dealloc(PyObject *op) { PyPickleBufferObject *self = (PyPickleBufferObject*)op; PyObject_GC_UnTrack(self); - if (self->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) self); + FT_CLEAR_WEAKREFS(op, self->weakreflist); PyBuffer_Release(&self->view); Py_TYPE(self)->tp_free((PyObject *) self); } diff --git a/Objects/setobject.c b/Objects/setobject.c index 8aa6b0d180907b..6e4fc5957cad7f 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -40,6 +40,7 @@ #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_RELAXED() #include "pycore_pyerrors.h" // _PyErr_SetKeyError() #include "pycore_setobject.h" // _PySet_NextEntry() definition +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() #include "stringlib/eq.h" // unicode_eq() #include // offsetof() @@ -536,8 +537,7 @@ set_dealloc(PyObject *self) /* bpo-31095: UnTrack is needed before calling any callbacks */ PyObject_GC_UnTrack(so); - if (so->weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) so); + FT_CLEAR_WEAKREFS(self, so->weakreflist); for (entry = so->table; used > 0; entry++) { if (entry->key && entry->key != dummy) { diff --git a/Objects/templateobject.c b/Objects/templateobject.c index 7d356980b56cbb..4293a311c440f7 100644 --- a/Objects/templateobject.c +++ b/Objects/templateobject.c @@ -23,6 +23,9 @@ templateiter_next(PyObject *op) if (self->from_strings) { item = PyIter_Next(self->stringsiter); self->from_strings = 0; + if (item == NULL) { + return NULL; + } if (PyUnicode_GET_LENGTH(item) == 0) { Py_SETREF(item, PyIter_Next(self->interpolationsiter)); self->from_strings = 1; @@ -444,6 +447,8 @@ template_reduce(PyObject *op, PyObject *Py_UNUSED(dummy)) static PyMethodDef template_methods[] = { {"__reduce__", template_reduce, METH_NOARGS, NULL}, + {"__class_getitem__", Py_GenericAlias, + METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL}, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a7ab69fef4c721..a348ea1d531c93 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -54,7 +54,6 @@ class object "PyObject *" "&PyBaseObject_Type" PyUnicode_CheckExact(name) && \ (PyUnicode_GET_LENGTH(name) <= MCACHE_MAX_ATTR_SIZE) -#define NEXT_GLOBAL_VERSION_TAG _PyRuntime.types.next_version_tag #define NEXT_VERSION_TAG(interp) \ (interp)->types.next_version_tag @@ -157,8 +156,8 @@ static_ext_type_lookup(PyInterpreterState *interp, size_t index, assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); size_t full_index = index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; - int64_t interp_count = - _PyRuntime.types.managed_static.types[full_index].interp_count; + int64_t interp_count = _Py_atomic_load_int64( + &_PyRuntime.types.managed_static.types[full_index].interp_count); assert((interp_count == 0) == (_PyRuntime.types.managed_static.types[full_index].type == NULL)); *p_interp_count = interp_count; @@ -235,7 +234,7 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self, : index + _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; assert((initial == 1) == - (_PyRuntime.types.managed_static.types[full_index].interp_count == 0)); + (_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count) == 0)); (void)_Py_atomic_add_int64( &_PyRuntime.types.managed_static.types[full_index].interp_count, 1); @@ -284,7 +283,7 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, : &(interp->types.for_extensions.initialized[index]); assert(state != NULL); - assert(_PyRuntime.types.managed_static.types[full_index].interp_count > 0); + assert(_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count) > 0); assert(_PyRuntime.types.managed_static.types[full_index].type == state->type); assert(state->type != NULL); @@ -294,7 +293,7 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, (void)_Py_atomic_add_int64( &_PyRuntime.types.managed_static.types[full_index].interp_count, -1); if (final) { - assert(!_PyRuntime.types.managed_static.types[full_index].interp_count); + assert(!_Py_atomic_load_int64(&_PyRuntime.types.managed_static.types[full_index].interp_count)); _PyRuntime.types.managed_static.types[full_index].type = NULL; managed_static_type_index_clear(self); @@ -1258,6 +1257,19 @@ _PyType_GetVersionForCurrentState(PyTypeObject *tp) #error "_Py_ATTR_CACHE_UNUSED must be bigger than max" #endif +static inline unsigned int +next_global_version_tag(void) +{ + unsigned int old; + do { + old = _Py_atomic_load_uint_relaxed(&_PyRuntime.types.next_version_tag); + if (old >= _Py_MAX_GLOBAL_TYPE_VERSION_TAG) { + return 0; + } + } while (!_Py_atomic_compare_exchange_uint(&_PyRuntime.types.next_version_tag, &old, old + 1)); + return old + 1; +} + static int assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) { @@ -1288,11 +1300,12 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) } if (type->tp_flags & Py_TPFLAGS_IMMUTABLETYPE) { /* static types */ - if (NEXT_GLOBAL_VERSION_TAG > _Py_MAX_GLOBAL_TYPE_VERSION_TAG) { + unsigned int next_version_tag = next_global_version_tag(); + if (next_version_tag == 0) { /* We have run out of version numbers */ return 0; } - set_version_unlocked(type, NEXT_GLOBAL_VERSION_TAG++); + set_version_unlocked(type, next_version_tag); assert (type->tp_version_tag <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); } else { @@ -2065,19 +2078,46 @@ type_set_annotations(PyObject *tp, PyObject *value, void *Py_UNUSED(closure)) return -1; } - int result; PyObject *dict = PyType_GetDict(type); - if (value != NULL) { - /* set */ - result = PyDict_SetItem(dict, &_Py_ID(__annotations_cache__), value); - } else { - /* delete */ - result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); - if (result == 0) { - PyErr_SetString(PyExc_AttributeError, "__annotations__"); + int result = PyDict_ContainsString(dict, "__annotations__"); + if (result < 0) { + Py_DECREF(dict); + return -1; + } + if (result) { + // If __annotations__ is currently in the dict, we update it, + if (value != NULL) { + result = PyDict_SetItem(dict, &_Py_ID(__annotations__), value); + } else { + result = PyDict_Pop(dict, &_Py_ID(__annotations__), NULL); + if (result == 0) { + // Somebody else just deleted it? + PyErr_SetString(PyExc_AttributeError, "__annotations__"); + Py_DECREF(dict); + return -1; + } + } + if (result < 0) { Py_DECREF(dict); return -1; } + // Also clear __annotations_cache__ just in case. + result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); + } + else { + // Else we update only __annotations_cache__. + if (value != NULL) { + /* set */ + result = PyDict_SetItem(dict, &_Py_ID(__annotations_cache__), value); + } else { + /* delete */ + result = PyDict_Pop(dict, &_Py_ID(__annotations_cache__), NULL); + if (result == 0) { + PyErr_SetString(PyExc_AttributeError, "__annotations__"); + Py_DECREF(dict); + return -1; + } + } } if (result < 0) { Py_DECREF(dict); @@ -3649,10 +3689,39 @@ subtype_dict(PyObject *obj, void *context) return PyObject_GenericGetDict(obj, context); } +int +_PyObject_SetDict(PyObject *obj, PyObject *value) +{ + if (value != NULL && !PyDict_Check(value)) { + PyErr_Format(PyExc_TypeError, + "__dict__ must be set to a dictionary, " + "not a '%.200s'", Py_TYPE(value)->tp_name); + return -1; + } + if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { + return _PyObject_SetManagedDict(obj, value); + } + PyObject **dictptr = _PyObject_ComputedDictPointer(obj); + if (dictptr == NULL) { + PyErr_SetString(PyExc_AttributeError, + "This object has no __dict__"); + return -1; + } + Py_BEGIN_CRITICAL_SECTION(obj); + PyObject *olddict = *dictptr; + FT_ATOMIC_STORE_PTR_RELEASE(*dictptr, Py_NewRef(value)); +#ifdef Py_GIL_DISABLED + _PyObject_XDecRefDelayed(olddict); +#else + Py_XDECREF(olddict); +#endif + Py_END_CRITICAL_SECTION(); + return 0; +} + static int subtype_setdict(PyObject *obj, PyObject *value, void *context) { - PyObject **dictptr; PyTypeObject *base; base = get_builtin_base_with_dict(Py_TYPE(obj)); @@ -3670,28 +3739,7 @@ subtype_setdict(PyObject *obj, PyObject *value, void *context) } return func(descr, obj, value); } - /* Almost like PyObject_GenericSetDict, but allow __dict__ to be deleted. */ - if (value != NULL && !PyDict_Check(value)) { - PyErr_Format(PyExc_TypeError, - "__dict__ must be set to a dictionary, " - "not a '%.200s'", Py_TYPE(value)->tp_name); - return -1; - } - - if (Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT) { - return _PyObject_SetManagedDict(obj, value); - } - else { - dictptr = _PyObject_ComputedDictPointer(obj); - if (dictptr == NULL) { - PyErr_SetString(PyExc_AttributeError, - "This object has no __dict__"); - return -1; - } - Py_CLEAR(*dictptr); - *dictptr = Py_XNewRef(value); - } - return 0; + return _PyObject_SetDict(obj, value); } static PyObject * @@ -8669,7 +8717,11 @@ type_ready_set_new(PyTypeObject *type, int initial) && base == &PyBaseObject_Type && !(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { - type_add_flags(type, Py_TPFLAGS_DISALLOW_INSTANTIATION); + if (initial) { + type_add_flags(type, Py_TPFLAGS_DISALLOW_INSTANTIATION); + } else { + assert(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION); + } } if (!(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION)) { @@ -8683,13 +8735,17 @@ type_ready_set_new(PyTypeObject *type, int initial) } } else { - // tp_new is NULL: inherit tp_new from base - type->tp_new = base->tp_new; + if (initial) { + // tp_new is NULL: inherit tp_new from base + type->tp_new = base->tp_new; + } } } else { // Py_TPFLAGS_DISALLOW_INSTANTIATION sets tp_new to NULL - type->tp_new = NULL; + if (initial) { + type->tp_new = NULL; + } } return 0; } @@ -8822,7 +8878,12 @@ type_ready(PyTypeObject *type, int initial) } /* All done -- set the ready flag */ - type_add_flags(type, Py_TPFLAGS_READY); + if (initial) { + type_add_flags(type, Py_TPFLAGS_READY); + } else { + assert(type->tp_flags & Py_TPFLAGS_READY); + } + stop_readying(type); assert(_PyType_CheckConsistency(type)); @@ -8871,15 +8932,16 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_DICT)); assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_WEAKREF)); - if ((self->tp_flags & Py_TPFLAGS_READY) == 0) { - assert(initial); + if (initial) { + assert((self->tp_flags & Py_TPFLAGS_READY) == 0); type_add_flags(self, _Py_TPFLAGS_STATIC_BUILTIN); type_add_flags(self, Py_TPFLAGS_IMMUTABLETYPE); - assert(NEXT_GLOBAL_VERSION_TAG <= _Py_MAX_GLOBAL_TYPE_VERSION_TAG); if (self->tp_version_tag == 0) { - _PyType_SetVersion(self, NEXT_GLOBAL_VERSION_TAG++); + unsigned int next_version_tag = next_global_version_tag(); + assert(next_version_tag != 0); + _PyType_SetVersion(self, next_version_tag); } } else { @@ -9682,6 +9744,11 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds) /* If staticbase is NULL now, it is a really weird type. In the spirit of backwards compatibility (?), just shut up. */ if (staticbase && staticbase->tp_new != type->tp_new) { + if (staticbase->tp_new == NULL) { + PyErr_Format(PyExc_TypeError, + "cannot create '%s' instances", subtype->tp_name); + return NULL; + } PyErr_Format(PyExc_TypeError, "%s.__new__(%s) is not safe, use %s.__new__()", type->tp_name, diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c index 6c199a52aa0ae6..cead6e69af5451 100644 --- a/Objects/typevarobject.c +++ b/Objects/typevarobject.c @@ -192,7 +192,7 @@ constevaluator_call(PyObject *self, PyObject *args, PyObject *kwargs) for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(value); i++) { PyObject *item = PyTuple_GET_ITEM(value, i); if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { PyUnicodeWriter_Discard(writer); return NULL; } @@ -273,7 +273,7 @@ _Py_typing_type_repr(PyUnicodeWriter *writer, PyObject *p) } if (p == (PyObject *)&_PyNone_Type) { - return PyUnicodeWriter_WriteUTF8(writer, "None", 4); + return PyUnicodeWriter_WriteASCII(writer, "None", 4); } if ((rc = PyObject_HasAttrWithError(p, &_Py_ID(__origin__))) > 0 && diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eb3e1c48fd4050..1d01dad9e33e29 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6621,13 +6621,15 @@ _PyUnicode_GetNameCAPI(void) /* --- Unicode Escape Codec ----------------------------------------------- */ PyObject * -_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, +_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed, - const char **first_invalid_escape) + int *first_invalid_escape_char, + const char **first_invalid_escape_ptr) { const char *starts = s; + const char *initial_starts = starts; _PyUnicodeWriter writer; const char *end; PyObject *errorHandler = NULL; @@ -6635,7 +6637,8 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, _PyUnicode_Name_CAPI *ucnhash_capi; // so we can remember if we've seen an invalid escape char or not - *first_invalid_escape = NULL; + *first_invalid_escape_char = -1; + *first_invalid_escape_ptr = NULL; if (size == 0) { if (consumed) { @@ -6723,9 +6726,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, } } if (ch > 0377) { - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-3; /* Back up 3 chars, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = ch; + if (starts == initial_starts) { + /* Back up 3 chars, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 3; + } } } WRITE_CHAR(ch); @@ -6820,9 +6826,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s, goto error; default: - if (*first_invalid_escape == NULL) { - *first_invalid_escape = s-1; /* Back up one char, since we've - already incremented s. */ + if (*first_invalid_escape_char == -1) { + *first_invalid_escape_char = c; + if (starts == initial_starts) { + /* Back up one char, since we've already incremented s. */ + *first_invalid_escape_ptr = s - 1; + } } WRITE_ASCII_CHAR('\\'); WRITE_CHAR(c); @@ -6867,19 +6876,20 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, const char *errors, Py_ssize_t *consumed) { - const char *first_invalid_escape; - PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size, errors, consumed, - &first_invalid_escape); + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) return NULL; - if (first_invalid_escape != NULL) { - unsigned char c = *first_invalid_escape; - if ('4' <= c && c <= '7') { + if (first_invalid_escape_char != -1) { + if (first_invalid_escape_char > 0xff) { if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, - "\"\\%.3s\" is an invalid octal escape sequence. " + "\"\\%o\" is an invalid octal escape sequence. " "Such sequences will not work in the future. ", - first_invalid_escape) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -6889,7 +6899,7 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s, if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "\"\\%c\" is an invalid escape sequence. " "Such sequences will not work in the future. ", - c) < 0) + first_invalid_escape_char) < 0) { Py_DECREF(result); return NULL; @@ -13944,7 +13954,12 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { - if (Py_TYPE(obj) == &PyLong_Type) { + PyTypeObject *type = Py_TYPE(obj); + if (type == &PyUnicode_Type) { + return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj); + } + + if (type == &PyLong_Type) { return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); } @@ -14093,6 +14108,20 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, return 0; } + +int +PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer, + const char *str, + Py_ssize_t size) +{ + assert(writer != NULL); + _Py_AssertHoldsTstate(); + + _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer; + return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size); +} + + int PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer, const char *str, diff --git a/Objects/unionobject.c b/Objects/unionobject.c index 66435924b6c6c3..2206ed80ef03fd 100644 --- a/Objects/unionobject.c +++ b/Objects/unionobject.c @@ -4,6 +4,7 @@ #include "pycore_typevarobject.h" // _PyTypeAlias_Type, _Py_typing_type_repr #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString #include "pycore_unionobject.h" +#include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() typedef struct { @@ -21,9 +22,7 @@ unionobject_dealloc(PyObject *self) unionobject *alias = (unionobject *)self; _PyObject_GC_UNTRACK(self); - if (alias->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *)alias); - } + FT_CLEAR_WEAKREFS(self, alias->weakreflist); Py_XDECREF(alias->args); Py_XDECREF(alias->hashable_args); @@ -290,7 +289,7 @@ union_repr(PyObject *self) } for (Py_ssize_t i = 0; i < len; i++) { - if (i > 0 && PyUnicodeWriter_WriteUTF8(writer, " | ", 3) < 0) { + if (i > 0 && PyUnicodeWriter_WriteASCII(writer, " | ", 3) < 0) { goto error; } PyObject *p = PyTuple_GET_ITEM(alias->args, i); @@ -300,12 +299,12 @@ union_repr(PyObject *self) } #if 0 - PyUnicodeWriter_WriteUTF8(writer, "|args=", 6); + PyUnicodeWriter_WriteASCII(writer, "|args=", 6); PyUnicodeWriter_WriteRepr(writer, alias->args); - PyUnicodeWriter_WriteUTF8(writer, "|h=", 3); + PyUnicodeWriter_WriteASCII(writer, "|h=", 3); PyUnicodeWriter_WriteRepr(writer, alias->hashable_args); if (alias->unhashable_args) { - PyUnicodeWriter_WriteUTF8(writer, "|u=", 3); + PyUnicodeWriter_WriteASCII(writer, "|u=", 3); PyUnicodeWriter_WriteRepr(writer, alias->unhashable_args); } #endif diff --git a/PC/_wmimodule.cpp b/PC/_wmimodule.cpp index b6efb3e4a207b4..30d61c86587fbe 100644 --- a/PC/_wmimodule.cpp +++ b/PC/_wmimodule.cpp @@ -57,11 +57,11 @@ _query_thread(LPVOID param) IEnumWbemClassObject* enumerator = NULL; HRESULT hr = S_OK; BSTR bstrQuery = NULL; - struct _query_data *data = (struct _query_data*)param; + _query_data data = *(struct _query_data*)param; // gh-125315: Copy the query string first, so that if the main thread gives // up on waiting we aren't left with a dangling pointer (and a likely crash) - bstrQuery = SysAllocString(data->query); + bstrQuery = SysAllocString(data.query); if (!bstrQuery) { hr = HRESULT_FROM_WIN32(ERROR_NOT_ENOUGH_MEMORY); } @@ -71,7 +71,7 @@ _query_thread(LPVOID param) } if (FAILED(hr)) { - CloseHandle(data->writePipe); + CloseHandle(data.writePipe); if (bstrQuery) { SysFreeString(bstrQuery); } @@ -96,7 +96,7 @@ _query_thread(LPVOID param) IID_IWbemLocator, (LPVOID *)&locator ); } - if (SUCCEEDED(hr) && !SetEvent(data->initEvent)) { + if (SUCCEEDED(hr) && !SetEvent(data.initEvent)) { hr = HRESULT_FROM_WIN32(GetLastError()); } if (SUCCEEDED(hr)) { @@ -105,7 +105,7 @@ _query_thread(LPVOID param) NULL, NULL, 0, NULL, 0, 0, &services ); } - if (SUCCEEDED(hr) && !SetEvent(data->connectEvent)) { + if (SUCCEEDED(hr) && !SetEvent(data.connectEvent)) { hr = HRESULT_FROM_WIN32(GetLastError()); } if (SUCCEEDED(hr)) { @@ -143,7 +143,7 @@ _query_thread(LPVOID param) if (FAILED(hr) || got != 1 || !value) { continue; } - if (!startOfEnum && !WriteFile(data->writePipe, (LPVOID)L"\0", 2, &written, NULL)) { + if (!startOfEnum && !WriteFile(data.writePipe, (LPVOID)L"\0", 2, &written, NULL)) { hr = HRESULT_FROM_WIN32(GetLastError()); break; } @@ -171,10 +171,10 @@ _query_thread(LPVOID param) DWORD cbStr1, cbStr2; cbStr1 = (DWORD)(wcslen(propName) * sizeof(propName[0])); cbStr2 = (DWORD)(wcslen(propStr) * sizeof(propStr[0])); - if (!WriteFile(data->writePipe, propName, cbStr1, &written, NULL) || - !WriteFile(data->writePipe, (LPVOID)L"=", 2, &written, NULL) || - !WriteFile(data->writePipe, propStr, cbStr2, &written, NULL) || - !WriteFile(data->writePipe, (LPVOID)L"\0", 2, &written, NULL) + if (!WriteFile(data.writePipe, propName, cbStr1, &written, NULL) || + !WriteFile(data.writePipe, (LPVOID)L"=", 2, &written, NULL) || + !WriteFile(data.writePipe, propStr, cbStr2, &written, NULL) || + !WriteFile(data.writePipe, (LPVOID)L"\0", 2, &written, NULL) ) { hr = HRESULT_FROM_WIN32(GetLastError()); } @@ -200,7 +200,7 @@ _query_thread(LPVOID param) locator->Release(); } CoUninitialize(); - CloseHandle(data->writePipe); + CloseHandle(data.writePipe); return (DWORD)hr; } diff --git a/PC/layout/main.py b/PC/layout/main.py index 7324a135133b66..8543e7c56e1c41 100644 --- a/PC/layout/main.py +++ b/PC/layout/main.py @@ -247,9 +247,15 @@ def in_build(f, dest="", new_name=None, no_lib=False): if ns.include_freethreaded: yield from in_build("venvlaunchert.exe", "Lib/venv/scripts/nt/") yield from in_build("venvwlaunchert.exe", "Lib/venv/scripts/nt/") - else: + elif (VER_MAJOR, VER_MINOR) > (3, 12): yield from in_build("venvlauncher.exe", "Lib/venv/scripts/nt/") yield from in_build("venvwlauncher.exe", "Lib/venv/scripts/nt/") + else: + # Older versions of venv expected the scripts to be named 'python' + # and they were renamed at this stage. We need to replicate that + # when packaging older versions. + yield from in_build("venvlauncher.exe", "Lib/venv/scripts/nt/", "python") + yield from in_build("venvwlauncher.exe", "Lib/venv/scripts/nt/", "pythonw") if ns.include_tools: @@ -652,15 +658,6 @@ def main(): ns.doc_build = (Path.cwd() / ns.doc_build).resolve() if ns.include_cat and not ns.include_cat.is_absolute(): ns.include_cat = (Path.cwd() / ns.include_cat).resolve() - if not ns.arch: - # TODO: Calculate arch from files in ns.build instead - if sys.winver.endswith("-arm64"): - ns.arch = "arm64" - elif sys.winver.endswith("-32"): - ns.arch = "win32" - else: - ns.arch = "amd64" - if ns.zip and not ns.zip.is_absolute(): ns.zip = (Path.cwd() / ns.zip).resolve() if ns.catalog and not ns.catalog.is_absolute(): @@ -668,6 +665,17 @@ def main(): configure_logger(ns) + if not ns.arch: + from .support.arch import calculate_from_build_dir + ns.arch = calculate_from_build_dir(ns.build) + + expect = f"{VER_MAJOR}.{VER_MINOR}.{VER_MICRO}{VER_SUFFIX}" + actual = check_patchlevel_version(ns.source) + if actual and actual != expect: + log_error(f"Inferred version {expect} does not match {actual} from patchlevel.h. " + "You should set %PYTHONINCLUDE% or %PYTHON_HEXVERSION% before launching.") + return 5 + log_info( """OPTIONS Source: {ns.source} diff --git a/PC/layout/support/arch.py b/PC/layout/support/arch.py new file mode 100644 index 00000000000000..daf4efbc7ab674 --- /dev/null +++ b/PC/layout/support/arch.py @@ -0,0 +1,34 @@ +from struct import unpack +from .constants import * +from .logging import * + +def calculate_from_build_dir(root): + candidates = [ + root / PYTHON_DLL_NAME, + root / FREETHREADED_PYTHON_DLL_NAME, + *root.glob("*.dll"), + *root.glob("*.pyd"), + # Check EXE last because it's easier to have cross-platform EXE + *root.glob("*.exe"), + ] + + ARCHS = { + b"PE\0\0\x4c\x01": "win32", + b"PE\0\0\x64\x86": "amd64", + b"PE\0\0\x64\xAA": "arm64" + } + + first_exc = None + for pe in candidates: + try: + # Read the PE header to grab the machine type + with open(pe, "rb") as f: + f.seek(0x3C) + offset = int.from_bytes(f.read(4), "little") + f.seek(offset) + arch = ARCHS[f.read(6)] + except (FileNotFoundError, PermissionError, LookupError) as ex: + log_debug("Failed to open {}: {}", pe, ex) + continue + log_info("Inferred architecture {} from {}", arch, pe) + return arch diff --git a/PC/layout/support/constants.py b/PC/layout/support/constants.py index ae22aa16ebfa5d..6b8c915e519743 100644 --- a/PC/layout/support/constants.py +++ b/PC/layout/support/constants.py @@ -6,6 +6,8 @@ __version__ = "3.8" import os +import pathlib +import re import struct import sys @@ -13,9 +15,15 @@ def _unpack_hexversion(): try: hexversion = int(os.getenv("PYTHON_HEXVERSION"), 16) + return struct.pack(">i", hexversion) except (TypeError, ValueError): - hexversion = sys.hexversion - return struct.pack(">i", hexversion) + pass + if os.getenv("PYTHONINCLUDE"): + try: + return _read_patchlevel_version(pathlib.Path(os.getenv("PYTHONINCLUDE"))) + except OSError: + pass + return struct.pack(">i", sys.hexversion) def _get_suffix(field4): @@ -26,6 +34,39 @@ def _get_suffix(field4): return "" +def _read_patchlevel_version(sources): + if not sources.match("Include"): + sources /= "Include" + values = {} + with open(sources / "patchlevel.h", "r", encoding="utf-8") as f: + for line in f: + m = re.match(r'#\s*define\s+(PY_\S+?)\s+(\S+)', line.strip(), re.I) + if m and m.group(2): + v = m.group(2) + if v.startswith('"'): + v = v[1:-1] + else: + v = values.get(v, v) + if isinstance(v, str): + try: + v = int(v, 16 if v.startswith("0x") else 10) + except ValueError: + pass + values[m.group(1)] = v + return ( + values["PY_MAJOR_VERSION"], + values["PY_MINOR_VERSION"], + values["PY_MICRO_VERSION"], + values["PY_RELEASE_LEVEL"] << 4 | values["PY_RELEASE_SERIAL"], + ) + + +def check_patchlevel_version(sources): + got = _read_patchlevel_version(sources) + if got != (VER_MAJOR, VER_MINOR, VER_MICRO, VER_FIELD4): + return f"{got[0]}.{got[1]}.{got[2]}{_get_suffix(got[3])}" + + VER_MAJOR, VER_MINOR, VER_MICRO, VER_FIELD4 = _unpack_hexversion() VER_SUFFIX = _get_suffix(VER_FIELD4) VER_FIELD3 = VER_MICRO << 8 | VER_FIELD4 diff --git a/PC/pyconfig.h.in b/PC/pyconfig.h similarity index 97% rename from PC/pyconfig.h.in rename to PC/pyconfig.h index 9e70303868e5de..710a737ebcc081 100644 --- a/PC/pyconfig.h.in +++ b/PC/pyconfig.h @@ -94,12 +94,17 @@ WIN32 is still required for the locale module. #endif #endif /* Py_BUILD_CORE || Py_BUILD_CORE_BUILTIN || Py_BUILD_CORE_MODULE */ -/* Define to 1 if you want to disable the GIL */ -/* Uncomment the definition for free-threaded builds, or define it manually - * when compiling extension modules. Note that we test with #ifdef, so - * defining as 0 will still disable the GIL. */ -#ifndef Py_GIL_DISABLED -/* #define Py_GIL_DISABLED 1 */ +/* Define to 1 when compiling for experimental free-threaded builds */ +#ifdef Py_GIL_DISABLED +/* We undefine if it was set to zero because all later checks are #ifdef. + * Note that non-Windows builds do not do this, and so every effort should + * be made to avoid defining the variable at all when not desired. However, + * sysconfig.get_config_var always returns a 1 or a 0, and so it seems likely + * that a build backend will define it with the value. + */ +#if Py_GIL_DISABLED == 0 +#undef Py_GIL_DISABLED +#endif #endif /* Compiler specific defines */ diff --git a/PC/winreg.c b/PC/winreg.c index c0de5c1353a349..c342be92b98f08 100644 --- a/PC/winreg.c +++ b/PC/winreg.c @@ -426,7 +426,9 @@ PyHKEY_Close(winreg_state *st, PyObject *ob_handle) if (PyHKEY_Check(st, ob_handle)) { ((PyHKEYObject*)ob_handle)->hkey = 0; } + Py_BEGIN_ALLOW_THREADS rc = key ? RegCloseKey(key) : ERROR_SUCCESS; + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); return rc == ERROR_SUCCESS; @@ -499,14 +501,21 @@ PyWinObject_CloseHKEY(winreg_state *st, PyObject *obHandle) } #if SIZEOF_LONG >= SIZEOF_HKEY else if (PyLong_Check(obHandle)) { - long rc = RegCloseKey((HKEY)PyLong_AsLong(obHandle)); + long rc; + Py_BEGIN_ALLOW_THREADS + rc = RegCloseKey((HKEY)PyLong_AsLong(obHandle)); + Py_END_ALLOW_THREADS ok = (rc == ERROR_SUCCESS); if (!ok) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); } #else else if (PyLong_Check(obHandle)) { - long rc = RegCloseKey((HKEY)PyLong_AsVoidPtr(obHandle)); + long rc; + HKEY hkey = (HKEY)PyLong_AsVoidPtr(obHandle); + Py_BEGIN_ALLOW_THREADS + rc = RegCloseKey(hkey); + Py_END_ALLOW_THREADS ok = (rc == ERROR_SUCCESS); if (!ok) PyErr_SetFromWindowsErrWithFunction(rc, "RegCloseKey"); @@ -924,7 +933,9 @@ winreg_CreateKey_impl(PyObject *module, HKEY key, const wchar_t *sub_key) (Py_ssize_t)KEY_WRITE) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegCreateKeyW(key, sub_key, &retKey); + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) { PyErr_SetFromWindowsErrWithFunction(rc, "CreateKey"); return NULL; @@ -973,8 +984,10 @@ winreg_CreateKeyEx_impl(PyObject *module, HKEY key, const wchar_t *sub_key, (Py_ssize_t)access) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegCreateKeyExW(key, sub_key, reserved, NULL, 0, access, NULL, &retKey, NULL); + Py_END_ALLOW_THREADS if (rc != ERROR_SUCCESS) { PyErr_SetFromWindowsErrWithFunction(rc, "CreateKeyEx"); return NULL; @@ -1187,10 +1200,12 @@ winreg_EnumValue_impl(PyObject *module, HKEY key, int index) (Py_ssize_t)key, index) < 0) { return NULL; } - if ((rc = RegQueryInfoKeyW(key, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, - &retValueSize, &retDataSize, NULL, NULL)) - != ERROR_SUCCESS) + + Py_BEGIN_ALLOW_THREADS + rc = RegQueryInfoKeyW(key, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + &retValueSize, &retDataSize, NULL, NULL); + Py_END_ALLOW_THREADS + if (rc != ERROR_SUCCESS) return PyErr_SetFromWindowsErrWithFunction(rc, "RegQueryInfoKey"); ++retValueSize; /* include null terminators */ @@ -1477,9 +1492,11 @@ winreg_QueryInfoKey_impl(PyObject *module, HKEY key) if (PySys_Audit("winreg.QueryInfoKey", "n", (Py_ssize_t)key) < 0) { return NULL; } - if ((rc = RegQueryInfoKeyW(key, NULL, NULL, 0, &nSubKeys, NULL, NULL, - &nValues, NULL, NULL, NULL, &ft)) - != ERROR_SUCCESS) { + Py_BEGIN_ALLOW_THREADS + rc = RegQueryInfoKeyW(key, NULL, NULL, 0, &nSubKeys, NULL, NULL, + &nValues, NULL, NULL, NULL, &ft); + Py_END_ALLOW_THREADS + if (rc != ERROR_SUCCESS) { return PyErr_SetFromWindowsErrWithFunction(rc, "RegQueryInfoKey"); } li.LowPart = ft.dwLowDateTime; @@ -1587,7 +1604,9 @@ winreg_QueryValue_impl(PyObject *module, HKEY key, const wchar_t *sub_key) PyMem_Free(pbuf); } if (childKey != key) { + Py_BEGIN_ALLOW_THREADS RegCloseKey(childKey); + Py_END_ALLOW_THREADS } return result; } @@ -1625,7 +1644,9 @@ winreg_QueryValueEx_impl(PyObject *module, HKEY key, const wchar_t *name) (Py_ssize_t)key, NULL, name) < 0) { return NULL; } + Py_BEGIN_ALLOW_THREADS rc = RegQueryValueExW(key, name, NULL, NULL, NULL, &bufSize); + Py_END_ALLOW_THREADS if (rc == ERROR_MORE_DATA) bufSize = 256; else if (rc != ERROR_SUCCESS) @@ -1637,8 +1658,10 @@ winreg_QueryValueEx_impl(PyObject *module, HKEY key, const wchar_t *name) while (1) { retSize = bufSize; + Py_BEGIN_ALLOW_THREADS rc = RegQueryValueExW(key, name, NULL, &typ, (BYTE *)retBuf, &retSize); + Py_END_ALLOW_THREADS if (rc != ERROR_MORE_DATA) break; diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 8c161835af9a8c..efff6a58d895cb 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -276,7 +276,7 @@ - + @@ -436,31 +436,6 @@ - - - - - - - - - - @(PyConfigH->'%(FullPath)', ';') - $([System.IO.File]::ReadAllText($(PyConfigH))) - $([System.IO.File]::ReadAllText('$(IntDir)pyconfig.h')) - - - $(PyConfigHText.Replace('/* #define Py_GIL_DISABLED 1 */', '#define Py_GIL_DISABLED 1')) - - - - - diff --git a/PCbuild/_testclinic_limited.vcxproj b/PCbuild/_testclinic_limited.vcxproj index 183a55080e8693..95c205309b1f30 100644 --- a/PCbuild/_testclinic_limited.vcxproj +++ b/PCbuild/_testclinic_limited.vcxproj @@ -70,6 +70,7 @@ {01FDF29A-40A1-46DF-84F5-85EBBD2A2410} _testclinic_limited Win32Proj + false diff --git a/PCbuild/_zstd.vcxproj b/PCbuild/_zstd.vcxproj index b53f93a6af8d11..6f91b8d05cca20 100644 --- a/PCbuild/_zstd.vcxproj +++ b/PCbuild/_zstd.vcxproj @@ -137,6 +137,7 @@ + diff --git a/PCbuild/_zstd.vcxproj.filters b/PCbuild/_zstd.vcxproj.filters index d4d36063c85d09..eec666e5eaf439 100644 --- a/PCbuild/_zstd.vcxproj.filters +++ b/PCbuild/_zstd.vcxproj.filters @@ -128,6 +128,9 @@ Header Files + + Header Files + Header Files\zstd diff --git a/PCbuild/build.bat b/PCbuild/build.bat index 2f358991e484ce..602357048867d6 100644 --- a/PCbuild/build.bat +++ b/PCbuild/build.bat @@ -33,7 +33,7 @@ echo. -k Attempt to kill any running Pythons before building (usually done echo. automatically by the pythoncore project) echo. --pgo Build with Profile-Guided Optimization. This flag echo. overrides -c and -d -echo. --disable-gil Enable experimental support for running without the GIL. +echo. --disable-gil Enable support for running without the GIL. echo. --test-marker Enable the test marker within the build. echo. --regen Regenerate all opcodes, grammar and tokens. echo. --experimental-jit Enable the experimental just-in-time compiler. diff --git a/PCbuild/get_external.py b/PCbuild/get_external.py index 4ecc8925349c93..a78aa6a23041ad 100755 --- a/PCbuild/get_external.py +++ b/PCbuild/get_external.py @@ -5,8 +5,28 @@ import pathlib import sys import time +import urllib.error +import urllib.request import zipfile -from urllib.request import urlretrieve + + +def retrieve_with_retries(download_location, output_path, reporthook, + max_retries=7): + """Download a file with exponential backoff retry and save to disk.""" + for attempt in range(max_retries + 1): + try: + resp = urllib.request.urlretrieve( + download_location, + output_path, + reporthook=reporthook, + ) + except (urllib.error.URLError, ConnectionError) as ex: + if attempt == max_retries: + msg = f"Download from {download_location} failed." + raise OSError(msg) from ex + time.sleep(2.25**attempt) + else: + return resp def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): @@ -16,10 +36,10 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): if verbose: reporthook = print zip_dir.mkdir(parents=True, exist_ok=True) - filename, headers = urlretrieve( + filename, _headers = retrieve_with_retries( url, zip_dir / f'{commit_hash}.zip', - reporthook=reporthook, + reporthook ) return filename diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props index 4e414dc913b9d5..cf35e705f355a7 100644 --- a/PCbuild/pyproject.props +++ b/PCbuild/pyproject.props @@ -10,10 +10,9 @@ $(MSBuildThisFileDirectory)obj\ $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\$(ProjectName)\ $(IntDir.Replace(`\\`, `\`)) - - $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\pythoncore\ $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_frozen\ $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\zlib-ng\ + $(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_$(Configuration) $(ProjectName) $(TargetName)$(PyDebugExt) false @@ -49,11 +48,12 @@ <_PlatformPreprocessorDefinition Condition="$(Platform) == 'x64'">_WIN64; <_PlatformPreprocessorDefinition Condition="$(Platform) == 'x64' and $(PlatformToolset) != 'ClangCL'">_M_X64;$(_PlatformPreprocessorDefinition) <_Py3NamePreprocessorDefinition>PY3_DLLNAME=L"$(Py3DllName)$(PyDebugExt)"; + <_FreeThreadedPreprocessorDefinition Condition="$(DisableGil) == 'true'">Py_GIL_DISABLED=1; - $(PySourcePath)Include;$(PySourcePath)Include\internal;$(PySourcePath)Include\internal\mimalloc;$(GeneratedPyConfigDir);$(PySourcePath)PC;%(AdditionalIncludeDirectories) - WIN32;$(_Py3NamePreprocessorDefinition);$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)%(PreprocessorDefinitions) + $(PySourcePath)Include;$(PySourcePath)Include\internal;$(PySourcePath)Include\internal\mimalloc;$(PySourcePath)PC;%(AdditionalIncludeDirectories) + WIN32;$(_Py3NamePreprocessorDefinition)$(_PlatformPreprocessorDefinition)$(_DebugPreprocessorDefinition)$(_PyStatsPreprocessorDefinition)$(_PydPreprocessorDefinition)$(_FreeThreadedPreprocessorDefinition)%(PreprocessorDefinitions) _Py_USING_PGO=1;%(PreprocessorDefinitions) MaxSpeed @@ -96,19 +96,16 @@ MachineX64 MachineARM MachineARM64 - $(OutDir)$(TargetName).pgd - UseLinkTimeCodeGeneration - PGInstrument - PGUpdate + UseLinkTimeCodeGeneration advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;%(AdditionalDependencies) /OPT:REF,NOICF %(AdditionalOptions) -d2:-pattern-opt-disable:-932189325 %(AdditionalOptions) + /GENPROFILE %(AdditionalOptions) + /USEPROFILE %(AdditionalOptions) false - true - true - true + true $(PySourcePath)PC;$(PySourcePath)Include;$(IntDir);%(AdditionalIncludeDirectories) diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 549d6284972afc..b911c9385634d7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -102,6 +102,7 @@ /Zm200 %(AdditionalOptions) $(PySourcePath)Modules\_hacl;$(PySourcePath)Modules\_hacl\include;$(PySourcePath)Python;%(AdditionalIncludeDirectories) $(zlibNgDir);$(GeneratedZlibNgDir);%(AdditionalIncludeDirectories) + $(GeneratedJitStencilsDir);%(AdditionalIncludeDirectories) _USRDLL;Py_BUILD_CORE;Py_BUILD_CORE_BUILTIN;Py_ENABLE_SHARED;MS_DLL_ID="$(SysWinVer)";ZLIB_COMPAT;%(PreprocessorDefinitions) _Py_HAVE_ZLIB;%(PreprocessorDefinitions) _Py_JIT;%(PreprocessorDefinitions) @@ -409,7 +410,7 @@ - + @@ -418,8 +419,12 @@ - HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions) - HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions) + + _Py_HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions) + + + _Py_HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions) + @@ -688,34 +693,6 @@ - - - - - - - - - @(PyConfigH->'%(FullPath)', ';') - $([System.IO.File]::ReadAllText($(PyConfigH))) - $([System.IO.File]::ReadAllText('$(IntDir)pyconfig.h')) - - - $(PyConfigHText.Replace('/* #define Py_GIL_DISABLED 1 */', '#define Py_GIL_DISABLED 1')) - - - - - - - - - - - git diff --git a/PCbuild/regen.targets b/PCbuild/regen.targets index 3ad17737807235..742597f5cb5ebd 100644 --- a/PCbuild/regen.targets +++ b/PCbuild/regen.targets @@ -29,12 +29,12 @@ <_KeywordSources Include="$(PySourcePath)Grammar\python.gram;$(PySourcePath)Grammar\Tokens" /> <_KeywordOutputs Include="$(PySourcePath)Lib\keyword.py" /> - <_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(GeneratedPyConfigDir)pyconfig.h;$(PySourcePath)Tools\jit\**"/> + <_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(PySourcePath)PC\pyconfig.h;$(PySourcePath)Tools\jit\**"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils.h"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-aarch64-pc-windows-msvc.h" Condition="$(Platform) == 'ARM64'"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-i686-pc-windows-msvc.h" Condition="$(Platform) == 'Win32'"/> - <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils-x86_64-pc-windows-msvc.h" Condition="$(Platform) == 'x64'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils.h"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-aarch64-pc-windows-msvc.h" Condition="$(Platform) == 'ARM64'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-i686-pc-windows-msvc.h" Condition="$(Platform) == 'Win32'"/> + <_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-x86_64-pc-windows-msvc.h" Condition="$(Platform) == 'x64'"/> <_CasesSources Include="$(PySourcePath)Python\bytecodes.c;$(PySourcePath)Python\optimizer_bytecodes.c;"/> <_CasesOutputs Include="$(PySourcePath)Python\generated_cases.c.h;$(PySourcePath)Include\opcode_ids.h;$(PySourcePath)Include\internal\pycore_uop_ids.h;$(PySourcePath)Python\opcode_targets.h;$(PySourcePath)Include\internal\pycore_opcode_metadata.h;$(PySourcePath)Include\internal\pycore_uop_metadata.h;$(PySourcePath)Python\optimizer_cases.c.h;$(PySourcePath)Lib\_opcode_metadata.py"/> <_SbomSources Include="$(PySourcePath)PCbuild\get_externals.bat" /> @@ -116,7 +116,7 @@ @@ -125,8 +125,7 @@ x86_64-pc-windows-msvc $(JITArgs) --debug - + diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 3bcc0870882a29..0d362bf7a9111a 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -965,7 +965,7 @@ _PyPegen_check_fstring_conversion(Parser *p, Token* conv_token, expr_ty conv) if (conv_token->lineno != conv->lineno || conv_token->end_col_offset != conv->col_offset) { return RAISE_SYNTAX_ERROR_KNOWN_RANGE( conv_token, conv, - "%c-string: conversion type must come right after the exclamanation mark", + "%c-string: conversion type must come right after the exclamation mark", TOK_GET_STRING_PREFIX(p->tok) ); } diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index 09e014534fbabc..dba20226c3283a 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -1244,6 +1244,32 @@ def visitModule(self, mod): Py_DECREF(unused); } } + + // Discard fields from 'expecting' that default to None + PyObject *field_types = NULL; + if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), + &_Py_ID(_field_types), + &field_types) < 0) + { + Py_DECREF(expecting); + return -1; + } + if (field_types != NULL) { + Py_ssize_t pos = 0; + PyObject *field_name, *field_type; + while (PyDict_Next(field_types, &pos, &field_name, &field_type)) { + if (_PyUnion_Check(field_type)) { + // optional field + if (PySet_Discard(expecting, field_name) < 0) { + Py_DECREF(expecting); + Py_DECREF(field_types); + return -1; + } + } + } + Py_DECREF(field_types); + } + // Now 'expecting' contains the fields or attributes // that would not be filled inside ast_type_replace(). Py_ssize_t m = PySet_GET_SIZE(expecting); @@ -1486,7 +1512,7 @@ def visitModule(self, mod): for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -1510,7 +1536,7 @@ def visitModule(self, mod): } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -1614,7 +1640,7 @@ def visitModule(self, mod): } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 4d10bccf0a53f2..0a078dd594148c 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -1421,7 +1421,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct return MAKE_TOKEN( _PyTokenizer_syntaxerror( tok, - "f-string: newlines are not allowed in format specifiers for single quoted f-strings" + "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings", + TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok) ) ); } diff --git a/Parser/parser.c b/Parser/parser.c index 509fac7df6e371..932ad0f594124a 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -14,7 +14,7 @@ # define MAXSTACK 4000 # endif #else -# define MAXSTACK 4000 +# define MAXSTACK 6000 #endif static const int n_keyword_lists = 9; static KeywordToken *reserved_keywords[] = { @@ -1677,7 +1677,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('import' | 'from') import_stmt")); stmt_ty import_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_5_rule, p) + _PyPegen_lookahead(1, _tmp_5_rule, p) && (import_stmt_var = import_stmt_rule(p)) // import_stmt ) @@ -1915,7 +1915,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('def' | '@' | 'async') function_def")); stmt_ty function_def_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_6_rule, p) + _PyPegen_lookahead(1, _tmp_6_rule, p) && (function_def_var = function_def_rule(p)) // function_def ) @@ -1957,7 +1957,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('class' | '@') class_def")); stmt_ty class_def_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_7_rule, p) + _PyPegen_lookahead(1, _tmp_7_rule, p) && (class_def_var = class_def_rule(p)) // class_def ) @@ -1978,7 +1978,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('with' | 'async') with_stmt")); stmt_ty with_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_8_rule, p) + _PyPegen_lookahead(1, _tmp_8_rule, p) && (with_stmt_var = with_stmt_rule(p)) // with_stmt ) @@ -1999,7 +1999,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&('for' | 'async') for_stmt")); stmt_ty for_stmt_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_9_rule, p) + _PyPegen_lookahead(1, _tmp_9_rule, p) && (for_stmt_var = for_stmt_rule(p)) // for_stmt ) @@ -3213,7 +3213,7 @@ del_stmt_rule(Parser *p) && (a = del_targets_rule(p)) // del_targets && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_16_rule, p) + _PyPegen_lookahead(1, _tmp_16_rule, p) ) { D(fprintf(stderr, "%*c+ del_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'del' del_targets &(';' | NEWLINE)")); @@ -6856,7 +6856,7 @@ with_item_rule(Parser *p) && (t = star_target_rule(p)) // star_target && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_36_rule, p) + _PyPegen_lookahead(1, _tmp_36_rule, p) ) { D(fprintf(stderr, "%*c+ with_item[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression 'as' star_target &(',' | ')' | ':')")); @@ -8445,7 +8445,7 @@ literal_pattern_rule(Parser *p) if ( (value = signed_number_rule(p)) // signed_number && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_42_rule, p) + _PyPegen_lookahead(0, _tmp_42_rule, p) ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "signed_number !('+' | '-')")); @@ -8679,7 +8679,7 @@ literal_expr_rule(Parser *p) if ( (signed_number_var = signed_number_rule(p)) // signed_number && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_42_rule, p) + _PyPegen_lookahead(0, _tmp_42_rule, p) ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "signed_number !('+' | '-')")); @@ -8717,7 +8717,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&(STRING | FSTRING_START | TSTRING_START) strings")); expr_ty strings_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_43_rule, p) + _PyPegen_lookahead(1, _tmp_43_rule, p) && (strings_var = strings_rule(p)) // strings ) @@ -9281,7 +9281,7 @@ pattern_capture_target_rule(Parser *p) && (name = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_44_rule, p) + _PyPegen_lookahead(0, _tmp_44_rule, p) ) { D(fprintf(stderr, "%*c+ pattern_capture_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!\"_\" NAME !('.' | '(' | '=')")); @@ -9396,7 +9396,7 @@ value_pattern_rule(Parser *p) if ( (attr = attr_rule(p)) // attr && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_44_rule, p) + _PyPegen_lookahead(0, _tmp_44_rule, p) ) { D(fprintf(stderr, "%*c+ value_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "attr !('.' | '(' | '=')")); @@ -15049,7 +15049,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&(STRING | FSTRING_START | TSTRING_START) strings")); expr_ty strings_var; if ( - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_43_rule, p) + _PyPegen_lookahead(1, _tmp_43_rule, p) && (strings_var = strings_rule(p)) // strings ) @@ -19078,7 +19078,7 @@ target_with_star_atom_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ target_with_star_atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -19122,7 +19122,7 @@ target_with_star_atom_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ target_with_star_atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -19469,7 +19469,7 @@ single_subscript_attribute_target_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ single_subscript_attribute_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -19513,7 +19513,7 @@ single_subscript_attribute_target_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ single_subscript_attribute_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -19623,7 +19623,7 @@ t_primary_raw(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME &t_lookahead")); @@ -19667,7 +19667,7 @@ t_primary_raw(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' &t_lookahead")); @@ -19705,7 +19705,7 @@ t_primary_raw(Parser *p) && (b = genexp_rule(p)) // genexp && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary genexp &t_lookahead")); @@ -19749,7 +19749,7 @@ t_primary_raw(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 8)) // token=')' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '(' arguments? ')' &t_lookahead")); @@ -19784,7 +19784,7 @@ t_primary_raw(Parser *p) if ( (a = atom_rule(p)) // atom && - _PyPegen_lookahead(1, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(1, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ t_primary[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "atom &t_lookahead")); @@ -19974,7 +19974,7 @@ del_target_rule(Parser *p) && (b = _PyPegen_name_token(p)) // NAME && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ del_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '.' NAME !t_lookahead")); @@ -20018,7 +20018,7 @@ del_target_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 10)) // token=']' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) t_lookahead_rule, p) + _PyPegen_lookahead(0, t_lookahead_rule, p) ) { D(fprintf(stderr, "%*c+ del_target[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "t_primary '[' slices ']' !t_lookahead")); @@ -20506,7 +20506,7 @@ func_type_comment_rule(Parser *p) && (t = _PyPegen_expect_token(p, TYPE_COMMENT)) // token='TYPE_COMMENT' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_104_rule, p) + _PyPegen_lookahead(1, _tmp_104_rule, p) ) { D(fprintf(stderr, "%*c+ func_type_comment[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NEWLINE TYPE_COMMENT &(NEWLINE INDENT)")); @@ -20700,7 +20700,7 @@ invalid_arguments_rule(Parser *p) && (b = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_110_rule, p) + _PyPegen_lookahead(1, _tmp_110_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "[(args ',')] NAME '=' &(',' | ')')")); @@ -20898,7 +20898,7 @@ invalid_kwarg_rule(Parser *p) expr_ty a; Token * b; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_112_rule, p) + _PyPegen_lookahead(0, _tmp_112_rule, p) && (a = expression_rule(p)) // expression && @@ -21273,7 +21273,7 @@ invalid_expression_rule(Parser *p) expr_ty a; expr_ty b; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_114_rule, p) + _PyPegen_lookahead(0, _tmp_114_rule, p) && (a = disjunction_rule(p)) // disjunction && @@ -21309,7 +21309,7 @@ invalid_expression_rule(Parser *p) && (b = disjunction_rule(p)) // disjunction && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_115_rule, p) + _PyPegen_lookahead(0, _tmp_115_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "disjunction 'if' disjunction !('else' | ':')")); @@ -21344,7 +21344,7 @@ invalid_expression_rule(Parser *p) && (_keyword_1 = _PyPegen_expect_token(p, 686)) // token='else' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) expression_rule, p) + _PyPegen_lookahead_for_expr(0, expression_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "disjunction 'if' disjunction 'else' !expression")); @@ -21534,7 +21534,7 @@ invalid_named_expression_rule(Parser *p) && (b = bitwise_or_rule(p)) // bitwise_or && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_117_rule, p) + _PyPegen_lookahead(0, _tmp_117_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_named_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME '=' bitwise_or !('=' | ':=')")); @@ -21560,7 +21560,7 @@ invalid_named_expression_rule(Parser *p) Token * b; expr_ty bitwise_or_var; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_118_rule, p) + _PyPegen_lookahead(0, _tmp_118_rule, p) && (a = bitwise_or_rule(p)) // bitwise_or && @@ -21568,7 +21568,7 @@ invalid_named_expression_rule(Parser *p) && (bitwise_or_var = bitwise_or_rule(p)) // bitwise_or && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_117_rule, p) + _PyPegen_lookahead(0, _tmp_117_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_named_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!(list | tuple | genexp | 'True' | 'None' | 'False') bitwise_or '=' bitwise_or !('=' | ':=')")); @@ -22402,7 +22402,7 @@ invalid_default_rule(Parser *p) if ( (a = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_125_rule, p) + _PyPegen_lookahead(1, _tmp_125_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'=' &(')' | ',')")); @@ -23351,7 +23351,7 @@ invalid_with_item_rule(Parser *p) && (a = expression_rule(p)) // expression && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_36_rule, p) + _PyPegen_lookahead(1, _tmp_36_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_with_item[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression 'as' expression &(',' | ')' | ':')")); @@ -23663,7 +23663,7 @@ invalid_dotted_as_name_rule(Parser *p) && (_keyword = _PyPegen_expect_token(p, 680)) // token='as' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_138_rule, p) + _PyPegen_lookahead(0, _tmp_138_rule, p) && (a = expression_rule(p)) // expression ) @@ -23714,7 +23714,7 @@ invalid_import_from_as_name_rule(Parser *p) && (_keyword = _PyPegen_expect_token(p, 680)) // token='as' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_138_rule, p) + _PyPegen_lookahead(0, _tmp_138_rule, p) && (a = expression_rule(p)) // expression ) @@ -24084,7 +24084,7 @@ invalid_try_stmt_rule(Parser *p) && (block_var = block_rule(p)) // block && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_143_rule, p) + _PyPegen_lookahead(0, _tmp_143_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_try_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'try' ':' block !('except' | 'finally')")); @@ -24202,7 +24202,7 @@ invalid_try_stmt_rule(Parser *p) // | 'except' expression ',' expressions 'as' NAME ':' // | 'except' expression ['as' NAME] NEWLINE // | 'except' NEWLINE -// | 'except' expression 'as' expression +// | 'except' expression 'as' expression ':' block static void * invalid_except_stmt_rule(Parser *p) { @@ -24318,15 +24318,17 @@ invalid_except_stmt_rule(Parser *p) D(fprintf(stderr, "%*c%s invalid_except_stmt[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' NEWLINE")); } - { // 'except' expression 'as' expression + { // 'except' expression 'as' expression ':' block if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> invalid_except_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression")); + D(fprintf(stderr, "%*c> invalid_except_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression ':' block")); Token * _keyword; Token * _keyword_1; + Token * _literal; expr_ty a; + asdl_stmt_seq* block_var; expr_ty expression_var; if ( (_keyword = _PyPegen_expect_token(p, 677)) // token='except' @@ -24336,9 +24338,13 @@ invalid_except_stmt_rule(Parser *p) (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' && (a = expression_rule(p)) // expression + && + (_literal = _PyPegen_expect_token(p, 11)) // token=':' + && + (block_var = block_rule(p)) // block ) { - D(fprintf(stderr, "%*c+ invalid_except_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression")); + D(fprintf(stderr, "%*c+ invalid_except_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' expression 'as' expression ':' block")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "cannot use except statement with %s" , _PyPegen_get_expr_name ( a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -24349,7 +24355,7 @@ invalid_except_stmt_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s invalid_except_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' expression 'as' expression")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' expression 'as' expression ':' block")); } _res = NULL; done: @@ -24361,7 +24367,7 @@ invalid_except_stmt_rule(Parser *p) // | 'except' '*' expression ',' expressions 'as' NAME ':' // | 'except' '*' expression ['as' NAME] NEWLINE // | 'except' '*' (NEWLINE | ':') -// | 'except' '*' expression 'as' expression +// | 'except' '*' expression 'as' expression ':' block static void * invalid_except_star_stmt_rule(Parser *p) { @@ -24486,16 +24492,18 @@ invalid_except_star_stmt_rule(Parser *p) D(fprintf(stderr, "%*c%s invalid_except_star_stmt[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' (NEWLINE | ':')")); } - { // 'except' '*' expression 'as' expression + { // 'except' '*' expression 'as' expression ':' block if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> invalid_except_star_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression")); + D(fprintf(stderr, "%*c> invalid_except_star_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); Token * _keyword; Token * _keyword_1; Token * _literal; + Token * _literal_1; expr_ty a; + asdl_stmt_seq* block_var; expr_ty expression_var; if ( (_keyword = _PyPegen_expect_token(p, 677)) // token='except' @@ -24507,9 +24515,13 @@ invalid_except_star_stmt_rule(Parser *p) (_keyword_1 = _PyPegen_expect_token(p, 680)) // token='as' && (a = expression_rule(p)) // expression + && + (_literal_1 = _PyPegen_expect_token(p, 11)) // token=':' + && + (block_var = block_rule(p)) // block ) { - D(fprintf(stderr, "%*c+ invalid_except_star_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression")); + D(fprintf(stderr, "%*c+ invalid_except_star_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "cannot use except* statement with %s" , _PyPegen_get_expr_name ( a ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -24520,7 +24532,7 @@ invalid_except_star_stmt_rule(Parser *p) } p->mark = _mark; D(fprintf(stderr, "%*c%s invalid_except_star_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' expression 'as' expression")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'except' '*' expression 'as' expression ':' block")); } _res = NULL; done: @@ -25864,7 +25876,7 @@ invalid_double_starred_kvpairs_rule(Parser *p) && (a = _PyPegen_expect_token(p, 11)) // token=':' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_148_rule, p) + _PyPegen_lookahead(1, _tmp_148_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_double_starred_kvpairs[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ':' &('}' | ',')")); @@ -25974,7 +25986,7 @@ invalid_kvpair_rule(Parser *p) && (a = _PyPegen_expect_token(p, 11)) // token=':' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_148_rule, p) + _PyPegen_lookahead(1, _tmp_148_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_kvpair[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression ':' &('}' | ',')")); @@ -26233,7 +26245,7 @@ invalid_fstring_replacement_field_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 25)) // token='{' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) annotated_rhs_rule, p) + _PyPegen_lookahead_for_expr(0, annotated_rhs_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' !annotated_rhs")); @@ -26262,7 +26274,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (annotated_rhs_var = annotated_rhs_rule(p)) // annotated_rhs && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_149_rule, p) + _PyPegen_lookahead(0, _tmp_149_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs !('=' | '!' | ':' | '}')")); @@ -26294,7 +26306,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_150_rule, p) + _PyPegen_lookahead(0, _tmp_150_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '=' !('!' | ':' | '}')")); @@ -26360,7 +26372,7 @@ invalid_fstring_replacement_field_rule(Parser *p) && (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(0, _tmp_152_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '='? ['!' NAME] !(':' | '}')")); @@ -26485,7 +26497,7 @@ invalid_fstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(1, _tmp_152_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' &(':' | '}')")); @@ -26511,7 +26523,7 @@ invalid_fstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) + _PyPegen_lookahead_for_expr(0, _PyPegen_name_token, p) ) { D(fprintf(stderr, "%*c+ invalid_fstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' !NAME")); @@ -26675,7 +26687,7 @@ invalid_tstring_replacement_field_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 25)) // token='{' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) annotated_rhs_rule, p) + _PyPegen_lookahead_for_expr(0, annotated_rhs_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' !annotated_rhs")); @@ -26704,7 +26716,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (annotated_rhs_var = annotated_rhs_rule(p)) // annotated_rhs && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_149_rule, p) + _PyPegen_lookahead(0, _tmp_149_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs !('=' | '!' | ':' | '}')")); @@ -26736,7 +26748,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (_literal_1 = _PyPegen_expect_token(p, 22)) // token='=' && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_150_rule, p) + _PyPegen_lookahead(0, _tmp_150_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '=' !('!' | ':' | '}')")); @@ -26802,7 +26814,7 @@ invalid_tstring_replacement_field_rule(Parser *p) && (_opt_var_1 = _tmp_151_rule(p), !p->error_indicator) // ['!' NAME] && - _PyPegen_lookahead(0, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(0, _tmp_152_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_replacement_field[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'{' annotated_rhs '='? ['!' NAME] !(':' | '}')")); @@ -26927,7 +26939,7 @@ invalid_tstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead(1, (void *(*)(Parser *)) _tmp_152_rule, p) + _PyPegen_lookahead(1, _tmp_152_rule, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' &(':' | '}')")); @@ -26953,7 +26965,7 @@ invalid_tstring_conversion_character_rule(Parser *p) if ( (_literal = _PyPegen_expect_token(p, 54)) // token='!' && - _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) + _PyPegen_lookahead_for_expr(0, _PyPegen_name_token, p) ) { D(fprintf(stderr, "%*c+ invalid_tstring_conversion_character[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'!' !NAME")); @@ -37392,7 +37404,7 @@ _tmp_168_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_168[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "!STRING expression_without_invalid")); expr_ty expression_without_invalid_var; if ( - _PyPegen_lookahead(0, (void *(*)(Parser *)) _PyPegen_string_token, p) + _PyPegen_lookahead(0, _PyPegen_string_token, p) && (expression_without_invalid_var = expression_without_invalid_rule(p)) // expression_without_invalid ) diff --git a/Parser/pegen.c b/Parser/pegen.c index 3efeba78450d1a..50641de27d37fd 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -379,44 +379,34 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres) return 0; } -int -_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p) -{ - int mark = p->mark; - void *res = func(p); - p->mark = mark; - return (res != NULL) == positive; -} - -int -_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg) -{ - int mark = p->mark; - void *res = func(p, arg); - p->mark = mark; - return (res != NULL) == positive; -} - -int -_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) -{ - int mark = p->mark; - void *res = func(p, arg); - p->mark = mark; - return (res != NULL) == positive; -} - -// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED to disable sanitizer checks on -// undefined behavior (UBsan) in this function, rather than changing 'func' -// callback API. -int _Py_NO_SANITIZE_UNDEFINED -_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p) -{ - int mark = p->mark; - void *res = func(p); - p->mark = mark; - return (res != NULL) == positive; -} +#define LOOKAHEAD1(NAME, RES_TYPE) \ + int \ + NAME (int positive, RES_TYPE (func)(Parser *), Parser *p) \ + { \ + int mark = p->mark; \ + void *res = func(p); \ + p->mark = mark; \ + return (res != NULL) == positive; \ + } + +LOOKAHEAD1(_PyPegen_lookahead, void *) +LOOKAHEAD1(_PyPegen_lookahead_for_expr, expr_ty) +LOOKAHEAD1(_PyPegen_lookahead_for_stmt, stmt_ty) +#undef LOOKAHEAD1 + +#define LOOKAHEAD2(NAME, RES_TYPE, T) \ + int \ + NAME (int positive, RES_TYPE (func)(Parser *, T), Parser *p, T arg) \ + { \ + int mark = p->mark; \ + void *res = func(p, arg); \ + p->mark = mark; \ + return (res != NULL) == positive; \ + } + +LOOKAHEAD2(_PyPegen_lookahead_with_int, Token *, int) +LOOKAHEAD2(_PyPegen_lookahead_with_string, expr_ty, const char *) +#undef LOOKAHEAD2 Token * _PyPegen_expect_token(Parser *p, int type) @@ -549,6 +539,21 @@ _PyPegen_new_identifier(Parser *p, const char *n) } id = id2; } + static const char * const forbidden[] = { + "None", + "True", + "False", + NULL + }; + for (int i = 0; forbidden[i] != NULL; i++) { + if (_PyUnicode_EqualToASCIIString(id, forbidden[i])) { + PyErr_Format(PyExc_ValueError, + "identifier field can't represent '%s' constant", + forbidden[i]); + Py_DECREF(id); + goto error; + } + } PyInterpreterState *interp = _PyInterpreterState_GET(); _PyUnicode_InternImmortal(interp, &id); if (_PyArena_AddPyObject(p->arena, id) < 0) @@ -605,7 +610,8 @@ expr_ty _PyPegen_soft_keyword_token(Parser *p) { Py_ssize_t size; PyBytes_AsStringAndSize(t->bytes, &the_token, &size); for (char **keyword = p->soft_keywords; *keyword != NULL; keyword++) { - if (strncmp(*keyword, the_token, (size_t)size) == 0) { + if (strlen(*keyword) == (size_t)size && + strncmp(*keyword, the_token, (size_t)size) == 0) { return _PyPegen_name_from_token(p, t); } } diff --git a/Parser/pegen.h b/Parser/pegen.h index 1862fd7407ec3c..804f931871aec8 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -145,10 +145,11 @@ int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node); int _PyPegen_update_memo(Parser *p, int mark, int type, void *node); int _PyPegen_is_memoized(Parser *p, int type, void *pres); -int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *); -int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); -int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*); int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *); +int _PyPegen_lookahead_for_expr(int, expr_ty (func)(Parser *), Parser *); +int _PyPegen_lookahead_for_stmt(int, stmt_ty (func)(Parser *), Parser *); +int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); +int _PyPegen_lookahead_with_string(int, expr_ty (func)(Parser *, const char*), Parser *, const char*); Token *_PyPegen_expect_token(Parser *p, int type); void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected); diff --git a/Parser/string_parser.c b/Parser/string_parser.c index d3631b114c5a3c..ebe68989d1af58 100644 --- a/Parser/string_parser.c +++ b/Parser/string_parser.c @@ -196,15 +196,18 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) len = (size_t)(p - buf); s = buf; - const char *first_invalid_escape; - v = _PyUnicode_DecodeUnicodeEscapeInternal(s, (Py_ssize_t)len, NULL, NULL, &first_invalid_escape); + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL, NULL, + &first_invalid_escape_char, + &first_invalid_escape_ptr); // HACK: later we can simply pass the line no, since we don't preserve the tokens // when we are decoding the string but we preserve the line numbers. - if (v != NULL && first_invalid_escape != NULL && t != NULL) { - if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) { - /* We have not decref u before because first_invalid_escape points - inside u. */ + if (v != NULL && first_invalid_escape_ptr != NULL && t != NULL) { + if (warn_invalid_escape_sequence(parser, s, first_invalid_escape_ptr, t) < 0) { + /* We have not decref u before because first_invalid_escape_ptr + points inside u. */ Py_XDECREF(u); Py_DECREF(v); return NULL; @@ -217,14 +220,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) static PyObject * decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) { - const char *first_invalid_escape; - PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape); + int first_invalid_escape_char; + const char *first_invalid_escape_ptr; + PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL, + &first_invalid_escape_char, + &first_invalid_escape_ptr); if (result == NULL) { return NULL; } - if (first_invalid_escape != NULL) { - if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { + if (first_invalid_escape_ptr != NULL) { + if (warn_invalid_escape_sequence(p, s, first_invalid_escape_ptr, t) < 0) { Py_DECREF(result); return NULL; } diff --git a/Python/Python-ast.c b/Python/Python-ast.c index df035568f84be1..660bc598a4862c 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -5528,6 +5528,32 @@ ast_type_replace_check(PyObject *self, Py_DECREF(unused); } } + + // Discard fields from 'expecting' that default to None + PyObject *field_types = NULL; + if (PyObject_GetOptionalAttr((PyObject*)Py_TYPE(self), + &_Py_ID(_field_types), + &field_types) < 0) + { + Py_DECREF(expecting); + return -1; + } + if (field_types != NULL) { + Py_ssize_t pos = 0; + PyObject *field_name, *field_type; + while (PyDict_Next(field_types, &pos, &field_name, &field_type)) { + if (_PyUnion_Check(field_type)) { + // optional field + if (PySet_Discard(expecting, field_name) < 0) { + Py_DECREF(expecting); + Py_DECREF(field_types); + return -1; + } + } + } + Py_DECREF(field_types); + } + // Now 'expecting' contains the fields or attributes // that would not be filled inside ast_type_replace(). Py_ssize_t m = PySet_GET_SIZE(expecting); @@ -5770,7 +5796,7 @@ ast_repr_list(PyObject *list, int depth) for (Py_ssize_t i = 0; i < Py_MIN(length, 2); i++) { if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { goto error; } } @@ -5794,7 +5820,7 @@ ast_repr_list(PyObject *list, int depth) } if (i == 0 && length > 2) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ...", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ...", 5) < 0) { goto error; } } @@ -5898,7 +5924,7 @@ ast_repr_max_depth(AST_object *self, int depth) } if (i > 0) { - if (PyUnicodeWriter_WriteUTF8(writer, ", ", 2) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, ", ", 2) < 0) { Py_DECREF(name); Py_DECREF(value_repr); goto error; diff --git a/Python/asm_trampoline.S b/Python/asm_trampoline.S index 0a3265dfeee204..616752459ba4d9 100644 --- a/Python/asm_trampoline.S +++ b/Python/asm_trampoline.S @@ -9,6 +9,9 @@ # } _Py_trampoline_func_start: #ifdef __x86_64__ +#if defined(__CET__) && (__CET__ & 1) + endbr64 +#endif sub $8, %rsp call *%rcx add $8, %rsp @@ -34,3 +37,22 @@ _Py_trampoline_func_start: .globl _Py_trampoline_func_end _Py_trampoline_func_end: .section .note.GNU-stack,"",@progbits +# Note for indicating the assembly code supports CET +#if defined(__x86_64__) && defined(__CET__) && (__CET__ & 1) + .section .note.gnu.property,"a" + .align 8 + .long 1f - 0f + .long 4f - 1f + .long 5 +0: + .string "GNU" +1: + .align 8 + .long 0xc0000002 + .long 3f - 2f +2: + .long 0x3 +3: + .align 8 +4: +#endif // __x86_64__ diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index c121ec096aebf4..557c12cfda61ff 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -702,6 +702,13 @@ append_templatestr(PyUnicodeWriter *writer, expr_ty e) Py_ssize_t last_idx = 0; Py_ssize_t len = asdl_seq_LEN(e->v.TemplateStr.values); + if (len == 0) { + int result = _write_values_subarray(writer, e->v.TemplateStr.values, + 0, len - 1, 't', arena); + _PyArena_Free(arena); + return result; + } + for (Py_ssize_t i = 0; i < len; i++) { expr_ty value = asdl_seq_GET(e->v.TemplateStr.values, i); @@ -742,6 +749,7 @@ append_templatestr(PyUnicodeWriter *writer, expr_ty e) goto error; } } + _PyArena_Free(arena); return 0; @@ -774,32 +782,37 @@ append_joinedstr(PyUnicodeWriter *writer, expr_ty e, bool is_format_spec) } static int -append_interpolation_value(PyUnicodeWriter *writer, expr_ty e) +append_interpolation_str(PyUnicodeWriter *writer, PyObject *str) { const char *outer_brace = "{"; - /* Grammar allows PR_TUPLE, but use >PR_TEST for adding parenthesis - around a lambda with ':' */ - PyObject *temp_fv_str = expr_as_unicode(e, PR_TEST + 1); - if (!temp_fv_str) { - return -1; - } - if (PyUnicode_Find(temp_fv_str, _Py_LATIN1_CHR('{'), 0, 1, 1) == 0) { + if (PyUnicode_Find(str, _Py_LATIN1_CHR('{'), 0, 1, 1) == 0) { /* Expression starts with a brace, split it with a space from the outer one. */ outer_brace = "{ "; } if (-1 == append_charp(writer, outer_brace)) { - Py_DECREF(temp_fv_str); return -1; } - if (-1 == PyUnicodeWriter_WriteStr(writer, temp_fv_str)) { - Py_DECREF(temp_fv_str); + if (-1 == PyUnicodeWriter_WriteStr(writer, str)) { return -1; } - Py_DECREF(temp_fv_str); return 0; } +static int +append_interpolation_value(PyUnicodeWriter *writer, expr_ty e) +{ + /* Grammar allows PR_TUPLE, but use >PR_TEST for adding parenthesis + around a lambda with ':' */ + PyObject *temp_fv_str = expr_as_unicode(e, PR_TEST + 1); + if (!temp_fv_str) { + return -1; + } + int result = append_interpolation_str(writer, temp_fv_str); + Py_DECREF(temp_fv_str); + return result; +} + static int append_interpolation_conversion(PyUnicodeWriter *writer, int conversion) { @@ -843,7 +856,7 @@ append_interpolation_format_spec(PyUnicodeWriter *writer, expr_ty e) static int append_interpolation(PyUnicodeWriter *writer, expr_ty e) { - if (-1 == append_interpolation_value(writer, e->v.Interpolation.value)) { + if (-1 == append_interpolation_str(writer, e->v.Interpolation.str)) { return -1; } diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 3d0295ee3883f2..09b6c975383883 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -958,6 +958,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, PyObject *locals) /*[clinic end generated code: output=0a0824aa70093116 input=7c7bce5299a89062]*/ { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *result = NULL, *source_copy; const char *str; @@ -971,35 +972,46 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, : "globals must be a dict"); return NULL; } - if (globals == Py_None) { + + int fromframe = 0; + if (globals != Py_None) { + Py_INCREF(globals); + } + else if (_PyEval_GetFrame() != NULL) { + fromframe = 1; globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) - return NULL; - } - else { - Py_INCREF(locals); - } + assert(globals != NULL); + Py_INCREF(globals); } - else if (locals == Py_None) - locals = Py_NewRef(globals); else { - Py_INCREF(locals); + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (!_PyErr_Occurred(tstate)) { + PyErr_SetString(PyExc_TypeError, + "eval must be given globals and locals " + "when called without a frame"); + } + return NULL; + } + Py_INCREF(globals); } - if (globals == NULL || locals == NULL) { - PyErr_SetString(PyExc_TypeError, - "eval must be given globals and locals " - "when called without a frame"); - goto error; + if (locals != Py_None) { + Py_INCREF(locals); } - - int r = PyDict_Contains(globals, &_Py_ID(__builtins__)); - if (r == 0) { - r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins()); + else if (fromframe) { + locals = _PyEval_GetFrameLocals(); + if (locals == NULL) { + assert(PyErr_Occurred()); + Py_DECREF(globals); + return NULL; + } + } + else { + locals = Py_NewRef(globals); } - if (r < 0) { + + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { goto error; } @@ -1040,6 +1052,7 @@ builtin_eval_impl(PyObject *module, PyObject *source, PyObject *globals, } error: + Py_XDECREF(globals); Py_XDECREF(locals); return result; } @@ -1070,29 +1083,44 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, PyObject *locals, PyObject *closure) /*[clinic end generated code: output=7579eb4e7646743d input=25e989b6d87a3a21]*/ { + PyThreadState *tstate = _PyThreadState_GET(); PyObject *v; - if (globals == Py_None) { + int fromframe = 0; + if (globals != Py_None) { + Py_INCREF(globals); + } + else if (_PyEval_GetFrame() != NULL) { + fromframe = 1; globals = PyEval_GetGlobals(); - if (locals == Py_None) { - locals = _PyEval_GetFrameLocals(); - if (locals == NULL) - return NULL; - } - else { - Py_INCREF(locals); + assert(globals != NULL); + Py_INCREF(globals); + } + else { + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (!_PyErr_Occurred(tstate)) { + PyErr_SetString(PyExc_SystemError, + "globals and locals cannot be NULL"); + } + goto error; } - if (!globals || !locals) { - PyErr_SetString(PyExc_SystemError, - "globals and locals cannot be NULL"); + Py_INCREF(globals); + } + + if (locals != Py_None) { + Py_INCREF(locals); + } + else if (fromframe) { + locals = _PyEval_GetFrameLocals(); + if (locals == NULL) { + assert(PyErr_Occurred()); + Py_DECREF(globals); return NULL; } } - else if (locals == Py_None) { - locals = Py_NewRef(globals); - } else { - Py_INCREF(locals); + locals = Py_NewRef(globals); } if (!PyDict_Check(globals)) { @@ -1106,11 +1134,8 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, Py_TYPE(locals)->tp_name); goto error; } - int r = PyDict_Contains(globals, &_Py_ID(__builtins__)); - if (r == 0) { - r = PyDict_SetItem(globals, &_Py_ID(__builtins__), PyEval_GetBuiltins()); - } - if (r < 0) { + + if (_PyEval_EnsureBuiltins(tstate, globals, NULL) < 0) { goto error; } @@ -1187,11 +1212,13 @@ builtin_exec_impl(PyObject *module, PyObject *source, PyObject *globals, } if (v == NULL) goto error; + Py_DECREF(globals); Py_DECREF(locals); Py_DECREF(v); Py_RETURN_NONE; error: + Py_XDECREF(globals); Py_XDECREF(locals); return NULL; } @@ -1241,10 +1268,21 @@ static PyObject * builtin_globals_impl(PyObject *module) /*[clinic end generated code: output=e5dd1527067b94d2 input=9327576f92bb48ba]*/ { - PyObject *d; - - d = PyEval_GetGlobals(); - return Py_XNewRef(d); + PyObject *globals; + if (_PyEval_GetFrame() != NULL) { + globals = PyEval_GetGlobals(); + assert(globals != NULL); + return Py_NewRef(globals); + } + PyThreadState *tstate = _PyThreadState_GET(); + globals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (globals == NULL) { + if (_PyErr_Occurred(tstate)) { + return NULL; + } + Py_RETURN_NONE; + } + return Py_NewRef(globals); } @@ -1888,7 +1926,21 @@ static PyObject * builtin_locals_impl(PyObject *module) /*[clinic end generated code: output=b46c94015ce11448 input=7874018d478d5c4b]*/ { - return _PyEval_GetFrameLocals(); + PyObject *locals; + if (_PyEval_GetFrame() != NULL) { + locals = _PyEval_GetFrameLocals(); + assert(locals != NULL || PyErr_Occurred()); + return locals; + } + PyThreadState *tstate = _PyThreadState_GET(); + locals = _PyEval_GetGlobalsFromRunningMain(tstate); + if (locals == NULL) { + if (_PyErr_Occurred(tstate)) { + return NULL; + } + Py_RETURN_NONE; + } + return Py_NewRef(locals); } @@ -2624,7 +2676,22 @@ builtin_vars(PyObject *self, PyObject *args) if (!PyArg_UnpackTuple(args, "vars", 0, 1, &v)) return NULL; if (v == NULL) { - d = _PyEval_GetFrameLocals(); + if (_PyEval_GetFrame() != NULL) { + d = _PyEval_GetFrameLocals(); + } + else { + PyThreadState *tstate = _PyThreadState_GET(); + d = _PyEval_GetGlobalsFromRunningMain(tstate); + if (d == NULL) { + if (!_PyErr_Occurred(tstate)) { + d = _PyEval_GetFrameLocals(); + assert(_PyErr_Occurred(tstate)); + } + } + else { + Py_INCREF(d); + } + } } else { if (PyObject_GetOptionalAttr(v, &_Py_ID(__dict__), &d) == 0) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6a0766626402b0..568f61a9f5da1b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3155,7 +3155,14 @@ dummy_func( replaced op(_FOR_ITER, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + ERROR_NO_POP(); + } + PyObject *next_o = func(iter_o); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); @@ -3179,7 +3186,14 @@ dummy_func( op(_FOR_ITER_TIER_TWO, (iter -- iter, next)) { /* before: [iter]; after: [iter, iter()] *or* [] (and jump over END_FOR.) */ PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + ERROR_NO_POP(); + } + PyObject *next_o = func(iter_o); if (next_o == NULL) { if (_PyErr_Occurred(tstate)) { int matches = _PyErr_ExceptionMatches(tstate, PyExc_StopIteration); @@ -3202,7 +3216,14 @@ dummy_func( inst(INSTRUMENTED_FOR_ITER, (unused/1, iter -- iter, next)) { PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter); - PyObject *next_o = (*Py_TYPE(iter_o)->tp_iternext)(iter_o); + iternextfunc func = Py_TYPE(iter_o)->tp_iternext; + if (func == NULL) { + _PyErr_Format(tstate, PyExc_TypeError, + "'%.100s' object is not an iterator", + Py_TYPE(iter_o)->tp_name); + ERROR_NO_POP(); + } + PyObject *next_o = func(iter_o); if (next_o != NULL) { next = PyStackRef_FromPyObjectSteal(next_o); INSTRUMENTED_JUMP(this_instr, next_instr, PY_MONITORING_EVENT_BRANCH_LEFT); diff --git a/Python/ceval.c b/Python/ceval.c index 490b653f132a6a..42080f77f2a046 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -8,7 +8,7 @@ #include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL___ENTER__ #include "pycore_code.h" #include "pycore_dict.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS @@ -333,13 +333,13 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - if (here_addr > _tstate->c_stack_soft_limit + margin_count * PYOS_STACK_MARGIN_BYTES) { + if (here_addr > _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES) { return 0; } if (_tstate->c_stack_hard_limit == 0) { _Py_InitializeRecursionLimits(tstate); } - return here_addr <= _tstate->c_stack_soft_limit + margin_count * PYOS_STACK_MARGIN_BYTES; + return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES; } void @@ -360,9 +360,6 @@ _Py_EnterRecursiveCallUnchecked(PyThreadState *tstate) # define Py_C_STACK_SIZE 1200000 #elif defined(__sparc__) # define Py_C_STACK_SIZE 1600000 -#elif defined(__wasi__) - /* Web assembly has two stacks, so this isn't really the stack depth */ -# define Py_C_STACK_SIZE 131072 // wasi-libc DEFAULT_STACK_SIZE #elif defined(__hppa__) || defined(__powerpc64__) # define Py_C_STACK_SIZE 2000000 #else @@ -438,8 +435,8 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) _tstate->c_stack_top = (uintptr_t)high; ULONG guarantee = 0; SetThreadStackGuarantee(&guarantee); - _tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + PYOS_STACK_MARGIN_BYTES; - _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + PYOS_STACK_MARGIN_BYTES; + _tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + _PyOS_STACK_MARGIN_BYTES; + _tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES; #else uintptr_t here_addr = _Py_get_machine_stack_pointer(); # if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && !defined(__NetBSD__) @@ -459,9 +456,9 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) // Thread sanitizer crashes if we use a bit more than half the stack. _tstate->c_stack_soft_limit = base + (stack_size / 2); #else - _tstate->c_stack_soft_limit = base + PYOS_STACK_MARGIN_BYTES * 2; + _tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2; #endif - _tstate->c_stack_hard_limit = base + PYOS_STACK_MARGIN_BYTES; + _tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES; assert(_tstate->c_stack_soft_limit < here_addr); assert(here_addr < _tstate->c_stack_top); return; @@ -469,7 +466,7 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate) # endif _tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096); _tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE; - _tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + PYOS_STACK_MARGIN_BYTES); + _tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES); #endif } @@ -2735,10 +2732,9 @@ _PyEval_GetFrameLocals(void) return locals; } -PyObject * -PyEval_GetGlobals(void) +static PyObject * +_PyEval_GetGlobals(PyThreadState *tstate) { - PyThreadState *tstate = _PyThreadState_GET(); _PyInterpreterFrame *current_frame = _PyThreadState_GetFrame(tstate); if (current_frame == NULL) { return NULL; @@ -2746,6 +2742,120 @@ PyEval_GetGlobals(void) return current_frame->f_globals; } +PyObject * +PyEval_GetGlobals(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + return _PyEval_GetGlobals(tstate); +} + +PyObject * +_PyEval_GetGlobalsFromRunningMain(PyThreadState *tstate) +{ + if (!_PyInterpreterState_IsRunningMain(tstate->interp)) { + return NULL; + } + PyObject *mod = _Py_GetMainModule(tstate); + if (_Py_CheckMainModule(mod) < 0) { + Py_XDECREF(mod); + return NULL; + } + PyObject *globals = PyModule_GetDict(mod); // borrowed + Py_DECREF(mod); + return globals; +} + +static PyObject * +get_globals_builtins(PyObject *globals) +{ + PyObject *builtins = NULL; + if (PyDict_Check(globals)) { + if (PyDict_GetItemRef(globals, &_Py_ID(__builtins__), &builtins) < 0) { + return NULL; + } + } + else { + if (PyMapping_GetOptionalItem( + globals, &_Py_ID(__builtins__), &builtins) < 0) + { + return NULL; + } + } + return builtins; +} + +static int +set_globals_builtins(PyObject *globals, PyObject *builtins) +{ + if (PyDict_Check(globals)) { + if (PyDict_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) { + return -1; + } + } + else { + if (PyObject_SetItem(globals, &_Py_ID(__builtins__), builtins) < 0) { + return -1; + } + } + return 0; +} + +int +_PyEval_EnsureBuiltins(PyThreadState *tstate, PyObject *globals, + PyObject **p_builtins) +{ + PyObject *builtins = get_globals_builtins(globals); + if (builtins == NULL) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + builtins = PyEval_GetBuiltins(); // borrowed + if (builtins == NULL) { + assert(_PyErr_Occurred(tstate)); + return -1; + } + Py_INCREF(builtins); + if (set_globals_builtins(globals, builtins) < 0) { + Py_DECREF(builtins); + return -1; + } + } + if (p_builtins != NULL) { + *p_builtins = builtins; + } + else { + Py_DECREF(builtins); + } + return 0; +} + +int +_PyEval_EnsureBuiltinsWithModule(PyThreadState *tstate, PyObject *globals, + PyObject **p_builtins) +{ + PyObject *builtins = get_globals_builtins(globals); + if (builtins == NULL) { + if (_PyErr_Occurred(tstate)) { + return -1; + } + builtins = PyImport_ImportModuleLevel("builtins", NULL, NULL, NULL, 0); + if (builtins == NULL) { + return -1; + } + if (set_globals_builtins(globals, builtins) < 0) { + Py_DECREF(builtins); + return -1; + } + } + if (p_builtins != NULL) { + *p_builtins = builtins; + } + else { + Py_DECREF(builtins); + } + return 0; +} + PyObject* PyEval_GetFrameLocals(void) { diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 5b5018a63731ab..aa68371ac8febf 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1218,30 +1218,30 @@ static inline int run_remote_debugger_source(PyObject *source) // Note that this function is inline to avoid creating a PLT entry // that would be an easy target for a ROP gadget. -static inline void run_remote_debugger_script(const char *path) +static inline void run_remote_debugger_script(PyObject *path) { - if (0 != PySys_Audit("remote_debugger_script", "s", path)) { + if (0 != PySys_Audit("cpython.remote_debugger_script", "O", path)) { PyErr_FormatUnraisable( - "Audit hook failed for remote debugger script %s", path); + "Audit hook failed for remote debugger script %U", path); return; } // Open the debugger script with the open code hook, and reopen the // resulting file object to get a C FILE* object. - PyObject* fileobj = PyFile_OpenCode(path); + PyObject* fileobj = PyFile_OpenCodeObject(path); if (!fileobj) { - PyErr_FormatUnraisable("Can't open debugger script %s", path); + PyErr_FormatUnraisable("Can't open debugger script %U", path); return; } PyObject* source = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(read)); if (!source) { - PyErr_FormatUnraisable("Error reading debugger script %s", path); + PyErr_FormatUnraisable("Error reading debugger script %U", path); } PyObject* res = PyObject_CallMethodNoArgs(fileobj, &_Py_ID(close)); if (!res) { - PyErr_FormatUnraisable("Error closing debugger script %s", path); + PyErr_FormatUnraisable("Error closing debugger script %U", path); } else { Py_DECREF(res); } @@ -1249,7 +1249,7 @@ static inline void run_remote_debugger_script(const char *path) if (source) { if (0 != run_remote_debugger_source(source)) { - PyErr_FormatUnraisable("Error executing debugger script %s", path); + PyErr_FormatUnraisable("Error executing debugger script %U", path); } Py_DECREF(source); } @@ -1278,7 +1278,14 @@ int _PyRunRemoteDebugger(PyThreadState *tstate) pathsz); path[pathsz - 1] = '\0'; if (*path) { - run_remote_debugger_script(path); + PyObject *path_obj = PyUnicode_DecodeFSDefault(path); + if (path_obj == NULL) { + PyErr_FormatUnraisable("Can't decode debugger script"); + } + else { + run_remote_debugger_script(path_obj); + Py_DECREF(path_obj); + } } PyMem_Free(path); } @@ -1380,6 +1387,10 @@ _Py_HandlePending(PyThreadState *tstate) _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT); _Py_brc_merge_refcounts(tstate); } + /* Process deferred memory frees held by QSBR */ + if (_Py_qsbr_should_process(((_PyThreadStateImpl *)tstate)->qsbr)) { + _PyMem_ProcessDelayed(tstate); + } #endif /* GC scheduled to run */ diff --git a/Python/codegen.c b/Python/codegen.c index 683601103ec99d..b3dbed7096489b 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -28,6 +28,7 @@ #include "pycore_pystate.h" // _Py_GetConfig() #include "pycore_symtable.h" // PySTEntryObject #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString +#include "pycore_ceval.h" // SPECIAL___ENTER__ #define NEED_OPCODE_METADATA #include "pycore_opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed @@ -4411,7 +4412,6 @@ codegen_sync_comprehension_generator(compiler *c, location loc, comprehension_ty gen = (comprehension_ty)asdl_seq_GET(generators, gen_index); - if (!iter_on_stack) { if (gen_index == 0) { assert(METADATA(c)->u_argcount == 1); @@ -4449,7 +4449,6 @@ codegen_sync_comprehension_generator(compiler *c, location loc, if (IS_JUMP_TARGET_LABEL(start)) { depth++; - ADDOP(c, LOC(gen->iter), GET_ITER); USE_LABEL(c, start); ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor); } @@ -4865,7 +4864,10 @@ codegen_comprehension(compiler *c, expr_ty e, int type, } Py_CLEAR(co); - VISIT(c, expr, outermost->iter); + if (codegen_comprehension_iter(c, outermost)) { + goto error; + } + ADDOP_I(c, loc, CALL, 0); if (is_async_comprehension && type != COMP_GENEXP) { diff --git a/Python/context.c b/Python/context.c index dceaae9b42979d..9927cab915cae7 100644 --- a/Python/context.c +++ b/Python/context.c @@ -979,7 +979,7 @@ contextvar_tp_repr(PyObject *op) return NULL; } - if (PyUnicodeWriter_WriteUTF8(writer, "tok_used) { - if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " used", 5) < 0) { goto error; } } - if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) { + if (PyUnicodeWriter_WriteASCII(writer, " var=", 5) < 0) { goto error; } if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) { diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 74ce02f1a26401..16a23f0351cd26 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -6,8 +6,14 @@ #include "osdefs.h" // MAXPATHLEN #include "pycore_ceval.h" // _Py_simple_func #include "pycore_crossinterp.h" // _PyXIData_t +#include "pycore_function.h" // _PyFunction_VerifyStateless() +#include "pycore_global_strings.h" // _Py_ID() +#include "pycore_import.h" // _PyImport_SetModule() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_namespace.h" // _PyNamespace_New() +#include "pycore_pythonrun.h" // _Py_SourceAsString() +#include "pycore_runtime.h" // _PyRuntime +#include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_typeobject.h" // _PyStaticType_InitBuiltin() @@ -19,6 +25,7 @@ _Py_GetMainfile(char *buffer, size_t maxlen) PyThreadState *tstate = _PyThreadState_GET(); PyObject *module = _Py_GetMainModule(tstate); if (_Py_CheckMainModule(module) < 0) { + Py_XDECREF(module); return -1; } Py_ssize_t size = _PyModule_GetFilenameUTF8(module, buffer, maxlen); @@ -27,27 +34,6 @@ _Py_GetMainfile(char *buffer, size_t maxlen) } -static PyObject * -import_get_module(PyThreadState *tstate, const char *modname) -{ - PyObject *module = NULL; - if (strcmp(modname, "__main__") == 0) { - module = _Py_GetMainModule(tstate); - if (_Py_CheckMainModule(module) < 0) { - assert(_PyErr_Occurred(tstate)); - return NULL; - } - } - else { - module = PyImport_ImportModule(modname); - if (module == NULL) { - return NULL; - } - } - return module; -} - - static PyObject * runpy_run_path(const char *filename, const char *modname) { @@ -67,97 +53,192 @@ runpy_run_path(const char *filename, const char *modname) } -static PyObject * -pyerr_get_message(PyObject *exc) +static void +set_exc_with_cause(PyObject *exctype, const char *msg) { - assert(!PyErr_Occurred()); - PyObject *args = PyException_GetArgs(exc); - if (args == NULL || args == Py_None || PyObject_Size(args) < 1) { - return NULL; - } - if (PyUnicode_Check(args)) { - return args; - } - PyObject *msg = PySequence_GetItem(args, 0); - Py_DECREF(args); - if (msg == NULL) { - PyErr_Clear(); - return NULL; - } - if (!PyUnicode_Check(msg)) { - Py_DECREF(msg); - return NULL; - } - return msg; + PyObject *cause = PyErr_GetRaisedException(); + PyErr_SetString(exctype, msg); + PyObject *exc = PyErr_GetRaisedException(); + PyException_SetCause(exc, cause); + PyErr_SetRaisedException(exc); } -#define MAX_MODNAME (255) -#define MAX_ATTRNAME (255) -struct attributeerror_info { - char modname[MAX_MODNAME+1]; - char attrname[MAX_ATTRNAME+1]; +/****************************/ +/* module duplication utils */ +/****************************/ + +struct sync_module_result { + PyObject *module; + PyObject *loaded; + PyObject *failed; +}; + +struct sync_module { + const char *filename; + char _filename[MAXPATHLEN+1]; + struct sync_module_result cached; }; +static void +sync_module_clear(struct sync_module *data) +{ + data->filename = NULL; + Py_CLEAR(data->cached.module); + Py_CLEAR(data->cached.loaded); + Py_CLEAR(data->cached.failed); +} + +static void +sync_module_capture_exc(PyThreadState *tstate, struct sync_module *data) +{ + assert(_PyErr_Occurred(tstate)); + PyObject *context = data->cached.failed; + PyObject *exc = _PyErr_GetRaisedException(tstate); + _PyErr_SetRaisedException(tstate, Py_NewRef(exc)); + if (context != NULL) { + PyException_SetContext(exc, context); + } + data->cached.failed = exc; +} + + static int -_parse_attributeerror(PyObject *exc, struct attributeerror_info *info) +ensure_isolated_main(PyThreadState *tstate, struct sync_module *main) { - assert(exc != NULL); - assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)); - int res = -1; + // Load the module from the original file (or from a cache). - PyObject *msgobj = pyerr_get_message(exc); - if (msgobj == NULL) { + // First try the local cache. + if (main->cached.failed != NULL) { + // We'll deal with this in apply_isolated_main(). + assert(main->cached.module == NULL); + assert(main->cached.loaded == NULL); + return 0; + } + else if (main->cached.loaded != NULL) { + assert(main->cached.module != NULL); + return 0; + } + assert(main->cached.module == NULL); + + if (main->filename == NULL) { + _PyErr_SetString(tstate, PyExc_NotImplementedError, ""); return -1; } - const char *err = PyUnicode_AsUTF8(msgobj); - if (strncmp(err, "module '", 8) != 0) { - goto finally; + // It wasn't in the local cache so we'll need to populate it. + PyObject *mod = _Py_GetMainModule(tstate); + if (_Py_CheckMainModule(mod) < 0) { + // This is probably unrecoverable, so don't bother caching the error. + assert(_PyErr_Occurred(tstate)); + Py_XDECREF(mod); + return -1; } - err += 8; + PyObject *loaded = NULL; - const char *matched = strchr(err, '\''); - if (matched == NULL) { - goto finally; + // Try the per-interpreter cache for the loaded module. + // XXX Store it in sys.modules? + PyObject *interpns = PyInterpreterState_GetDict(tstate->interp); + assert(interpns != NULL); + PyObject *key = PyUnicode_FromString("CACHED_MODULE_NS___main__"); + if (key == NULL) { + // It's probably unrecoverable, so don't bother caching the error. + Py_DECREF(mod); + return -1; } - Py_ssize_t len = matched - err; - if (len > MAX_MODNAME) { - goto finally; + else if (PyDict_GetItemRef(interpns, key, &loaded) < 0) { + // It's probably unrecoverable, so don't bother caching the error. + Py_DECREF(mod); + Py_DECREF(key); + return -1; } - (void)strncpy(info->modname, err, len); - info->modname[len] = '\0'; - err = matched; + else if (loaded == NULL) { + // It wasn't already loaded from file. + loaded = PyModule_NewObject(&_Py_ID(__main__)); + if (loaded == NULL) { + goto error; + } + PyObject *ns = _PyModule_GetDict(loaded); - if (strncmp(err, "' has no attribute '", 20) != 0) { - goto finally; - } - err += 20; + // We don't want to trigger "if __name__ == '__main__':", + // so we use a bogus module name. + PyObject *loaded_ns = + runpy_run_path(main->filename, ""); + if (loaded_ns == NULL) { + goto error; + } + int res = PyDict_Update(ns, loaded_ns); + Py_DECREF(loaded_ns); + if (res < 0) { + goto error; + } - matched = strchr(err, '\''); - if (matched == NULL) { - goto finally; - } - len = matched - err; - if (len > MAX_ATTRNAME) { - goto finally; + // Set the per-interpreter cache entry. + if (PyDict_SetItem(interpns, key, loaded) < 0) { + goto error; + } } - (void)strncpy(info->attrname, err, len); - info->attrname[len] = '\0'; - err = matched + 1; - if (strlen(err) > 0) { - goto finally; + Py_DECREF(key); + main->cached = (struct sync_module_result){ + .module = mod, + .loaded = loaded, + }; + return 0; + +error: + sync_module_capture_exc(tstate, main); + Py_XDECREF(loaded); + Py_DECREF(mod); + Py_XDECREF(key); + return -1; +} + +#ifndef NDEBUG +static int +main_mod_matches(PyObject *expected) +{ + PyObject *mod = PyImport_GetModule(&_Py_ID(__main__)); + Py_XDECREF(mod); + return mod == expected; +} +#endif + +static int +apply_isolated_main(PyThreadState *tstate, struct sync_module *main) +{ + assert((main->cached.loaded == NULL) == (main->cached.loaded == NULL)); + if (main->cached.failed != NULL) { + // It must have failed previously. + assert(main->cached.loaded == NULL); + _PyErr_SetRaisedException(tstate, main->cached.failed); + return -1; } - res = 0; + assert(main->cached.loaded != NULL); -finally: - Py_DECREF(msgobj); - return res; + assert(main_mod_matches(main->cached.module)); + if (_PyImport_SetModule(&_Py_ID(__main__), main->cached.loaded) < 0) { + sync_module_capture_exc(tstate, main); + return -1; + } + return 0; } -#undef MAX_MODNAME -#undef MAX_ATTRNAME +static void +restore_main(PyThreadState *tstate, struct sync_module *main) +{ + assert(main->cached.failed == NULL); + assert(main->cached.module != NULL); + assert(main->cached.loaded != NULL); + PyObject *exc = _PyErr_GetRaisedException(tstate); + assert(main_mod_matches(main->cached.loaded)); + int res = _PyImport_SetModule(&_Py_ID(__main__), main->cached.module); + assert(res == 0); + if (res < 0) { + PyErr_FormatUnraisable("Exception ignored while restoring __main__"); + } + _PyErr_SetRaisedException(tstate, exc); +} /**************/ @@ -207,16 +288,16 @@ _Py_CallInInterpreterAndRawFree(PyInterpreterState *interp, /* cross-interpreter data */ /**************************/ -/* registry of {type -> xidatafunc} */ +/* registry of {type -> _PyXIData_getdata_t} */ -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - xidatafunc. It would be simpler and more efficient. */ +/* For now we use a global registry of shareable classes. + An alternative would be to add a tp_* slot for a class's + _PyXIData_getdata_t. It would be simpler and more efficient. */ static void xid_lookup_init(_PyXIData_lookup_t *); static void xid_lookup_fini(_PyXIData_lookup_t *); struct _dlcontext; -static xidatafunc lookup_getdata(struct _dlcontext *, PyObject *); +static _PyXIData_getdata_t lookup_getdata(struct _dlcontext *, PyObject *); #include "crossinterp_data_lookup.h" @@ -340,7 +421,7 @@ _set_xid_lookup_failure(PyThreadState *tstate, PyObject *obj, const char *msg, set_notshareableerror(tstate, cause, 0, msg); } else { - msg = "%S does not support cross-interpreter data"; + msg = "%R does not support cross-interpreter data"; format_notshareableerror(tstate, cause, 0, msg, obj); } } @@ -353,8 +434,8 @@ _PyObject_CheckXIData(PyThreadState *tstate, PyObject *obj) if (get_lookup_context(tstate, &ctx) < 0) { return -1; } - xidatafunc getdata = lookup_getdata(&ctx, obj); - if (getdata == NULL) { + _PyXIData_getdata_t getdata = lookup_getdata(&ctx, obj); + if (getdata.basic == NULL && getdata.fallback == NULL) { if (!_PyErr_Occurred(tstate)) { _set_xid_lookup_failure(tstate, obj, NULL, NULL); } @@ -385,9 +466,9 @@ _check_xidata(PyThreadState *tstate, _PyXIData_t *xidata) return 0; } -int -_PyObject_GetXIData(PyThreadState *tstate, - PyObject *obj, _PyXIData_t *xidata) +static int +_get_xidata(PyThreadState *tstate, + PyObject *obj, xidata_fallback_t fallback, _PyXIData_t *xidata) { PyInterpreterState *interp = tstate->interp; @@ -395,6 +476,7 @@ _PyObject_GetXIData(PyThreadState *tstate, assert(xidata->obj == NULL); if (xidata->data != NULL || xidata->obj != NULL) { _PyErr_SetString(tstate, PyExc_ValueError, "xidata not cleared"); + return -1; } // Call the "getdata" func for the object. @@ -403,8 +485,8 @@ _PyObject_GetXIData(PyThreadState *tstate, return -1; } Py_INCREF(obj); - xidatafunc getdata = lookup_getdata(&ctx, obj); - if (getdata == NULL) { + _PyXIData_getdata_t getdata = lookup_getdata(&ctx, obj); + if (getdata.basic == NULL && getdata.fallback == NULL) { if (PyErr_Occurred()) { Py_DECREF(obj); return -1; @@ -416,7 +498,9 @@ _PyObject_GetXIData(PyThreadState *tstate, } return -1; } - int res = getdata(tstate, obj, xidata); + int res = getdata.basic != NULL + ? getdata.basic(tstate, obj, xidata) + : getdata.fallback(tstate, obj, fallback, xidata); Py_DECREF(obj); if (res != 0) { PyObject *cause = _PyErr_GetRaisedException(tstate); @@ -436,6 +520,51 @@ _PyObject_GetXIData(PyThreadState *tstate, return 0; } +int +_PyObject_GetXIDataNoFallback(PyThreadState *tstate, + PyObject *obj, _PyXIData_t *xidata) +{ + return _get_xidata(tstate, obj, _PyXIDATA_XIDATA_ONLY, xidata); +} + +int +_PyObject_GetXIData(PyThreadState *tstate, + PyObject *obj, xidata_fallback_t fallback, + _PyXIData_t *xidata) +{ + switch (fallback) { + case _PyXIDATA_XIDATA_ONLY: + return _get_xidata(tstate, obj, fallback, xidata); + case _PyXIDATA_FULL_FALLBACK: + if (_get_xidata(tstate, obj, fallback, xidata) == 0) { + return 0; + } + PyObject *exc = _PyErr_GetRaisedException(tstate); + if (PyFunction_Check(obj)) { + if (_PyFunction_GetXIData(tstate, obj, xidata) == 0) { + Py_DECREF(exc); + return 0; + } + _PyErr_Clear(tstate); + } + // We could try _PyMarshal_GetXIData() but we won't for now. + if (_PyPickle_GetXIData(tstate, obj, xidata) == 0) { + Py_DECREF(exc); + return 0; + } + // Raise the original exception. + _PyErr_SetRaisedException(tstate, exc); + return -1; + default: +#ifdef Py_DEBUG + Py_FatalError("unsupported xidata fallback option"); +#endif + _PyErr_SetString(tstate, PyExc_SystemError, + "unsupported xidata fallback option"); + return -1; + } +} + /* pickle C-API */ @@ -456,28 +585,6 @@ _PyPickle_Dumps(struct _pickle_context *ctx, PyObject *obj) } -struct sync_module_result { - PyObject *module; - PyObject *loaded; - PyObject *failed; -}; - -struct sync_module { - const char *filename; - char _filename[MAXPATHLEN+1]; - struct sync_module_result cached; -}; - -static void -sync_module_clear(struct sync_module *data) -{ - data->filename = NULL; - Py_CLEAR(data->cached.module); - Py_CLEAR(data->cached.loaded); - Py_CLEAR(data->cached.failed); -} - - struct _unpickle_context { PyThreadState *tstate; // We only special-case the __main__ module, @@ -491,142 +598,88 @@ _unpickle_context_clear(struct _unpickle_context *ctx) sync_module_clear(&ctx->main); } -static struct sync_module_result -_unpickle_context_get_module(struct _unpickle_context *ctx, - const char *modname) +static int +check_missing___main___attr(PyObject *exc) { - if (strcmp(modname, "__main__") == 0) { - return ctx->main.cached; + assert(!PyErr_Occurred()); + if (!PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)) { + return 0; } - else { - return (struct sync_module_result){ - .failed = PyExc_NotImplementedError, - }; + + // Get the error message. + PyObject *args = PyException_GetArgs(exc); + if (args == NULL || args == Py_None || PyObject_Size(args) < 1) { + assert(!PyErr_Occurred()); + return 0; + } + PyObject *msgobj = args; + if (!PyUnicode_Check(msgobj)) { + msgobj = PySequence_GetItem(args, 0); + Py_DECREF(args); + if (msgobj == NULL) { + PyErr_Clear(); + return 0; + } } + const char *err = PyUnicode_AsUTF8(msgobj); + + // Check if it's a missing __main__ attr. + int cmp = strncmp(err, "module '__main__' has no attribute '", 36); + Py_DECREF(msgobj); + return cmp == 0; } -static struct sync_module_result -_unpickle_context_set_module(struct _unpickle_context *ctx, - const char *modname) +static PyObject * +_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) { - struct sync_module_result res = {0}; - struct sync_module_result *cached = NULL; - const char *filename = NULL; - const char *run_modname = modname; - if (strcmp(modname, "__main__") == 0) { - cached = &ctx->main.cached; - filename = ctx->main.filename; - // We don't want to trigger "if __name__ == '__main__':". - run_modname = ""; - } - else { - res.failed = PyExc_NotImplementedError; - goto finally; - } + PyThreadState *tstate = ctx->tstate; - res.module = import_get_module(ctx->tstate, modname); - if (res.module == NULL) { - res.failed = _PyErr_GetRaisedException(ctx->tstate); - assert(res.failed != NULL); - goto finally; + PyObject *exc = NULL; + PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads"); + if (loads == NULL) { + return NULL; } - if (filename == NULL) { - Py_CLEAR(res.module); - res.failed = PyExc_NotImplementedError; + // Make an initial attempt to unpickle. + PyObject *obj = PyObject_CallOneArg(loads, pickled); + if (obj != NULL) { goto finally; } - res.loaded = runpy_run_path(filename, run_modname); - if (res.loaded == NULL) { - Py_CLEAR(res.module); - res.failed = _PyErr_GetRaisedException(ctx->tstate); - assert(res.failed != NULL); + assert(_PyErr_Occurred(tstate)); + if (ctx == NULL) { goto finally; } - -finally: - if (cached != NULL) { - assert(cached->module == NULL); - assert(cached->loaded == NULL); - assert(cached->failed == NULL); - *cached = res; - } - return res; -} - - -static int -_handle_unpickle_missing_attr(struct _unpickle_context *ctx, PyObject *exc) -{ - // The caller must check if an exception is set or not when -1 is returned. - assert(!_PyErr_Occurred(ctx->tstate)); - assert(PyErr_GivenExceptionMatches(exc, PyExc_AttributeError)); - struct attributeerror_info info; - if (_parse_attributeerror(exc, &info) < 0) { - return -1; + exc = _PyErr_GetRaisedException(tstate); + if (!check_missing___main___attr(exc)) { + goto finally; } - // Get the module. - struct sync_module_result mod = _unpickle_context_get_module(ctx, info.modname); - if (mod.failed != NULL) { - // It must have failed previously. - return -1; - } - if (mod.module == NULL) { - mod = _unpickle_context_set_module(ctx, info.modname); - if (mod.failed != NULL) { - return -1; - } - assert(mod.module != NULL); + // Temporarily swap in a fake __main__ loaded from the original + // file and cached. Note that functions will use the cached ns + // for __globals__, // not the actual module. + if (ensure_isolated_main(tstate, &ctx->main) < 0) { + goto finally; } - - // Bail out if it is unexpectedly set already. - if (PyObject_HasAttrString(mod.module, info.attrname)) { - return -1; + if (apply_isolated_main(tstate, &ctx->main) < 0) { + goto finally; } - // Try setting the attribute. - PyObject *value = NULL; - if (PyDict_GetItemStringRef(mod.loaded, info.attrname, &value) <= 0) { - return -1; - } - assert(value != NULL); - int res = PyObject_SetAttrString(mod.module, info.attrname, value); - Py_DECREF(value); - if (res < 0) { - return -1; + // Try to unpickle once more. + obj = PyObject_CallOneArg(loads, pickled); + restore_main(tstate, &ctx->main); + if (obj == NULL) { + goto finally; } + Py_CLEAR(exc); - return 0; -} - -static PyObject * -_PyPickle_Loads(struct _unpickle_context *ctx, PyObject *pickled) -{ - PyObject *loads = PyImport_ImportModuleAttrString("pickle", "loads"); - if (loads == NULL) { - return NULL; - } - PyObject *obj = PyObject_CallOneArg(loads, pickled); - if (ctx != NULL) { - while (obj == NULL) { - assert(_PyErr_Occurred(ctx->tstate)); - if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { - // We leave other failures unhandled. - break; - } - // Try setting the attr if not set. - PyObject *exc = _PyErr_GetRaisedException(ctx->tstate); - if (_handle_unpickle_missing_attr(ctx, exc) < 0) { - // Any resulting exceptions are ignored - // in favor of the original. - _PyErr_SetRaisedException(ctx->tstate, exc); - break; - } - Py_CLEAR(exc); - // Retry with the attribute set. - obj = PyObject_CallOneArg(loads, pickled); +finally: + if (exc != NULL) { + if (_PyErr_Occurred(tstate)) { + sync_module_capture_exc(tstate, &ctx->main); } + // We restore the original exception. + // It might make sense to chain it (__context__). + _PyErr_SetRaisedException(tstate, exc); } Py_DECREF(loads); return obj; @@ -784,35 +837,167 @@ _PyMarshal_GetXIData(PyThreadState *tstate, PyObject *obj, _PyXIData_t *xidata) } -/* using cross-interpreter data */ - -PyObject * -_PyXIData_NewObject(_PyXIData_t *xidata) -{ - return xidata->new_object(xidata); -} +/* script wrapper */ static int -_call_clear_xidata(void *data) +verify_script(PyThreadState *tstate, PyCodeObject *co, int checked, int pure) { - _xidata_clear((_PyXIData_t *)data); + // Make sure it isn't a closure and (optionally) doesn't use globals. + PyObject *builtins = NULL; + if (pure) { + builtins = _PyEval_GetBuiltins(tstate); + assert(builtins != NULL); + } + if (checked) { + assert(_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) == 0); + } + else if (_PyCode_VerifyStateless(tstate, co, NULL, NULL, builtins) < 0) { + return -1; + } + // Make sure it doesn't have args. + if (co->co_argcount > 0 + || co->co_posonlyargcount > 0 + || co->co_kwonlyargcount > 0 + || co->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) + { + _PyErr_SetString(tstate, PyExc_ValueError, + "code with args not supported"); + return -1; + } + // Make sure it doesn't return anything. + if (!_PyCode_ReturnsOnlyNone(co)) { + _PyErr_SetString(tstate, PyExc_ValueError, + "code that returns a value is not a script"); + return -1; + } return 0; } static int -_xidata_release(_PyXIData_t *xidata, int rawfree) -{ - if ((xidata->data == NULL || xidata->free == NULL) && xidata->obj == NULL) { - // Nothing to release! - if (rawfree) { - PyMem_RawFree(xidata); - } - else { - xidata->data = NULL; +get_script_xidata(PyThreadState *tstate, PyObject *obj, int pure, + _PyXIData_t *xidata) +{ + // Get the corresponding code object. + PyObject *code = NULL; + int checked = 0; + if (PyCode_Check(obj)) { + code = obj; + Py_INCREF(code); + } + else if (PyFunction_Check(obj)) { + code = PyFunction_GET_CODE(obj); + assert(code != NULL); + Py_INCREF(code); + if (pure) { + if (_PyFunction_VerifyStateless(tstate, obj) < 0) { + goto error; + } + checked = 1; } - return 0; } - + else { + const char *filename = "