diff --git a/.github/workflows/cache_libs.yml b/.github/workflows/cache_libs.yml index e53dc4fdd..ebd1f6c28 100644 --- a/.github/workflows/cache_libs.yml +++ b/.github/workflows/cache_libs.yml @@ -19,6 +19,22 @@ on: default: "1.1.43" required: false type: string + WIN_ZLIB_VERSION: + default: "1.3.1" + required: false + type: string + WIN_LIBICONV_VERSION: + default: "1.17.1" + required: false + type: string + WIN_LIBXML2_VERSION: + default: "2.11.9" + required: false + type: string + WIN_LIBXSLT_VERSION: + default: "1.1.39" + required: false + type: string outputs: ZLIB_VERSION: @@ -29,6 +45,14 @@ on: value: ${{ inputs.LIBXML2_VERSION }} LIBXSLT_VERSION: value: ${{ inputs.LIBXSLT_VERSION }} + WIN_ZLIB_VERSION: + value: ${{ inputs.WIN_ZLIB_VERSION }} + WIN_LIBICONV_VERSION: + value: ${{ inputs.WIN_LIBICONV_VERSION }} + WIN_LIBXML2_VERSION: + value: ${{ inputs.WIN_LIBXML2_VERSION }} + WIN_LIBXSLT_VERSION: + value: ${{ inputs.WIN_LIBXSLT_VERSION }} jobs: @@ -46,10 +70,10 @@ jobs: runs-on: ${{ matrix.os }} env: - ZLIB_VERSION: ${{ inputs.ZLIB_VERSION }} - LIBICONV_VERSION: ${{ inputs.LIBICONV_VERSION }} - LIBXML2_VERSION: ${{ inputs.LIBXML2_VERSION }} - LIBXSLT_VERSION: ${{ inputs.LIBXSLT_VERSION }} + ZLIB_VERSION: ${{ contains(matrix.os, 'windows-') && inputs.WIN_ZLIB_VERSION || inputs.ZLIB_VERSION }} + LIBICONV_VERSION: ${{ contains(matrix.os, 'windows-') && inputs.WIN_LIBICONV_VERSION || inputs.LIBICONV_VERSION }} + LIBXML2_VERSION: ${{ contains(matrix.os, 'windows-') && inputs.WIN_LIBXML2_VERSION || inputs.LIBXML2_VERSION }} + LIBXSLT_VERSION: ${{ contains(matrix.os, 'windows-') && inputs.WIN_LIBXSLT_VERSION || inputs.LIBXSLT_VERSION }} steps: - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 @@ -67,3 +91,12 @@ jobs: env: GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: python3 buildlibxml.py --download-only + + - name: Check Windows library versions + if: ${{ contains(matrix.os, 'windows-') }} + run: | + bash -c ' + for file in libs/zlib-${{ inputs.WIN_ZLIB_VERSION }}.*.zip libs/iconv-${{ inputs.WIN_LIBICONV_VERSION }}.*.zip libs/libxml2-${{ inputs.WIN_LIBXML2_VERSION }}.*.zip libs/libxslt-${{ inputs.WIN_LIBXSLT_VERSION }}.*.zip; do + [[ -f "$file" ]] || { echo "MISSING: $file" ; exit 1; } + done + ' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cd4fb5492..4a723558c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,10 +41,10 @@ jobs: CCACHE_SLOPPINESS: "pch_defines,time_macros" CCACHE_COMPRESS: 1 CCACHE_MAXSIZE: "100M" - ZLIB_VERSION: ${{ needs.cache_libs.outputs.ZLIB_VERSION }} - LIBICONV_VERSION: ${{ needs.cache_libs.outputs.LIBICONV_VERSION }} - LIBXML2_VERSION: ${{ needs.cache_libs.outputs.LIBXML2_VERSION }} - LIBXSLT_VERSION: ${{ needs.cache_libs.outputs.LIBXSLT_VERSION }} + ZLIB_VERSION: ${{ contains(matrix.os, 'windows-') && needs.cache_libs.outputs.WIN_ZLIB_VERSION || needs.cache_libs.outputs.ZLIB_VERSION }} + LIBICONV_VERSION: ${{ contains(matrix.os, 'windows-') && needs.cache_libs.outputs.WIN_LIBICONV_VERSION || needs.cache_libs.outputs.LIBICONV_VERSION }} + LIBXML2_VERSION: ${{ contains(matrix.os, 'windows-') && needs.cache_libs.outputs.WIN_LIBXML2_VERSION || needs.cache_libs.outputs.LIBXML2_VERSION }} + LIBXSLT_VERSION: ${{ contains(matrix.os, 'windows-') && needs.cache_libs.outputs.WIN_LIBXSLT_VERSION || needs.cache_libs.outputs.LIBXSLT_VERSION }} strategy: # Allows for matrix sub-jobs to fail without canceling the rest diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 7e459766e..7a75a797c 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -118,10 +118,10 @@ jobs: runs-on: ${{ matrix.os }} env: - ZLIB_VERSION: ${{ needs.cache_libs.outputs.ZLIB_VERSION }} - LIBICONV_VERSION: ${{ needs.cache_libs.outputs.LIBICONV_VERSION }} - LIBXML2_VERSION: ${{ needs.cache_libs.outputs.LIBXML2_VERSION }} - LIBXSLT_VERSION: ${{ needs.cache_libs.outputs.LIBXSLT_VERSION }} + ZLIB_VERSION: ${{ contains(matrix.os, 'windows-') && needs.cache_libs.outputs.WIN_ZLIB_VERSION || needs.cache_libs.outputs.ZLIB_VERSION }} + LIBICONV_VERSION: ${{ contains(matrix.os, 'windows-') && needs.cache_libs.outputs.WIN_LIBICONV_VERSION || needs.cache_libs.outputs.LIBICONV_VERSION }} + LIBXML2_VERSION: ${{ contains(matrix.os, 'windows-') && needs.cache_libs.outputs.WIN_LIBXML2_VERSION || needs.cache_libs.outputs.LIBXML2_VERSION }} + LIBXSLT_VERSION: ${{ contains(matrix.os, 'windows-') && needs.cache_libs.outputs.WIN_LIBXSLT_VERSION || needs.cache_libs.outputs.LIBXSLT_VERSION }} strategy: fail-fast: false diff --git a/CHANGES.txt b/CHANGES.txt index 35ee04694..caa2646c7 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -32,7 +32,7 @@ Other changes * Built using Cython 3.2.2. -6.0.3 (2025-??-??) +6.0.3 (2026-??-??) ================== Bugs fixed @@ -42,6 +42,10 @@ Bugs fixed * LP#2125399: Some failing tests were fixed or disabled in PyPy. +* LP#2138421: Memory leak in error cases when setting the ``public_id`` or ``system_url`` of a document. + +* Memory leak in case of a memory allocation failure when copying document subtrees. + 6.0.2 (2025-09-21) ================== diff --git a/buildlibxml.py b/buildlibxml.py index e2cdcd751..b576fb3a3 100644 --- a/buildlibxml.py +++ b/buildlibxml.py @@ -6,7 +6,7 @@ from ftplib import FTP import urllib.error -from urllib.parse import urljoin, unquote, urlparse +from urllib.parse import urljoin, quote as urlquote, unquote, urlparse from urllib.request import urlretrieve, urlopen, Request multi_make_options = [] @@ -44,25 +44,6 @@ def read_file_digest(file): def download_and_extract_windows_binaries(destdir): - url = "https://api.github.com/repos/lxml/libxml2-win-binaries/releases?per_page=5" - releases, _ = read_url( - url, - accept="application/vnd.github+json", - as_json=True, - github_api_token=os.environ.get("GITHUB_API_TOKEN"), - ) - - max_release = {'tag_name': ''} - for release in releases: - if max_release['tag_name'] < release.get('tag_name', ''): - max_release = release - - url = "https://github.com/lxml/libxml2-win-binaries/releases/download/%s/" % max_release['tag_name'] - asset_files = { - asset['name']: (asset['size'], asset['digest']) - for asset in max_release.get('assets', ()) - } - # Check for native ARM64 build or the environment variable that is set by # Visual Studio for cross-compilation (same variable as setuptools uses) if platform.machine() == 'ARM64' or os.getenv('VSCMD_ARG_TGT_ARCH') == 'arm64': @@ -72,47 +53,86 @@ def download_and_extract_windows_binaries(destdir): else: arch = "win32" - arch_part = '.' + arch + '.' - asset_files = { - filename: details - for filename, details in asset_files.items() - if arch_part in filename - } + def build_libzip_name(libname, version): + return f"{libname}-{version}.{arch}.zip" - libs = {} - for libname in ['libxml2', 'libxslt', 'zlib', 'iconv']: - libs[libname] = "%s-%s.%s.zip" % ( - libname, - find_max_version(libname, list(asset_files)), - arch, + def read_latest_release(): + url = "https://api.github.com/repos/lxml/libxml2-win-binaries/releases?per_page=5" + releases, _ = read_url( + url, + accept="application/vnd.github+json", + as_json=True, + github_api_token=os.environ.get("GITHUB_API_TOKEN"), ) + max_release = {'tag_name': ''} + for release in releases: + if max_release['tag_name'] < release.get('tag_name', ''): + max_release = release + + return max_release + + def find_local_lib(libname, version): + if not version: + return None + libfn = build_libzip_name(libname, version) + destfile = os.path.join(destdir, libfn) + return libfn if os.path.exists(destfile) else None + if not os.path.exists(destdir): os.makedirs(destdir) - for libname, libfn in libs.items(): - srcfile = urljoin(url, libfn) - destfile = os.path.join(destdir, libfn) - if os.path.exists(destfile): - file_size, file_digest = asset_files.get(libfn, (None, None)) - if file_size and os.path.getsize(destfile) == file_size and read_file_digest(destfile) == file_digest: - print('Using local copy of "{}"'.format(srcfile)) + libs = {} + for libname in ['libxml2', 'libxslt', 'zlib', 'iconv']: + version = os.environ.get('LIBICONV_VERSION' if libname == 'iconv' else f"{libname.upper()}_VERSION") + libfn = find_local_lib(libname, version) + if libfn: + print(f'Using local copy of "{libfn}"') + libs[libname] = libfn + + if None in libs.values(): + # Need to gather version and download URL from winlibs release. + latest_release = read_latest_release() + arch_part = f'.{arch}.' + asset_files = { + asset['name']: (asset['size'], asset['digest']) + for asset in latest_release.get('assets', ()) + if arch_part in asset['name'] + } + release_tag = latest_release['tag_name'] + download_url = f"https://github.com/lxml/libxml2-win-binaries/releases/download/{urlquote(release_tag)}/" + + lib_file_names = list(asset_files) + for libname, libfn in libs.items(): + if libfn: + continue + version = find_max_version(libname, lib_file_names) + libfn = find_local_lib(libname, version) + if libfn: + libs[libname] = libfn + srcfile = urljoin(download_url, libfn) + print(f'Using local copy of "{srcfile}"') continue - print('Retrieving "%s" to "%s"' % (srcfile, destfile)) - urlretrieve(srcfile, destfile) + # Need to download lib. + libfn = build_libzip_name(libname, version) + srcfile = urljoin(download_url, libfn) + destfile = os.path.join(destdir, libfn) - for libname, libfn in libs.items(): - destfile = os.path.join(destdir, libfn) - d = unpack_zipfile(destfile, destdir) - libs[libname] = d + print(f'Retrieving "{srcfile}" to "{destfile}"') + urlretrieve(srcfile, destfile) + libs[libname] = libfn - return libs + lib_dirs = { + libname: unpack_zipfile(os.path.join(destdir, libfn), destdir) + for libname, libfn in libs.items() + } + return lib_dirs def find_top_dir_of_zipfile(zipfile): topdir = None - files = (f.filename for f in zipfile.filelist) + files = [f.filename for f in zipfile.filelist] dirs = [d for d in files if d.endswith('/')] if dirs: dirs.sort(key=len) @@ -328,12 +348,14 @@ def find_max_version(libname, filenames, version_re=None): match = version_re.search(fn) if match: version_string = match.group(1) - versions.append((tuple(map(tryint, version_string.replace("-", ".-").split('.'))), - version_string)) + versions.append(( + tuple(map(tryint, version_string.replace("-", ".-").split('.'))), + version_string, + )) if not versions: raise Exception( "Could not find the most current version of %s from the files: %s" % ( - libname, filenames)) + libname, list(filenames))) versions.sort() version_string = versions[-1][-1] print('Latest version of %s is %s' % (libname, version_string)) @@ -656,7 +678,7 @@ def main(with_zlib=True, download_only=False, platform=None): if platform is None: platform = sys_platform - if sys_platform.startswith('win'): + if platform.startswith('win'): return get_prebuilt_libxml2xslt( download_dir, static_include_dirs, static_library_dirs) diff --git a/pyproject.toml b/pyproject.toml index 993337b32..978e75f2b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ skip = [ ] #test-command = "python {package}/test.py -vv" #test-command = "python -c 'import lxml.etree, lxml.objectify, lxml.html'" -test-command = "python {package}/test.py --no-src -vv" +#test-command = "python {package}/test.py --no-src -vv" [tool.cibuildwheel.linux] #archs = ["x86_64", "aarch64", "i686", "ppc64le", "s390x", "armv7l"] diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx index 2ed3d1e15..3e7dca047 100644 --- a/src/lxml/etree.pyx +++ b/src/lxml/etree.pyx @@ -668,7 +668,7 @@ cdef class DocInfo: return root_name @cython.final - cdef tree.xmlDtd* _get_c_dtd(self): + cdef tree.xmlDtd* _get_c_dtd(self) noexcept: """"Return the DTD. Create it if it does not yet exist.""" cdef xmlDoc* c_doc = self._doc._c_doc cdef xmlNode* c_root_node diff --git a/src/lxml/html/defs.py b/src/lxml/html/defs.py index b70b443cf..18ff1aeae 100644 --- a/src/lxml/html/defs.py +++ b/src/lxml/html/defs.py @@ -47,7 +47,23 @@ 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', - 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) + 'type', 'usemap', 'valign', 'value', 'vspace', 'width', + # ARIA attributes from https://www.w3.org/TR/wai-aria-1.3/ + 'aria-activedescendant', 'aria-atomic', 'aria-autocomplete', + 'aria-braillelabel', 'aria-brailleroledescription', 'aria-busy', + 'aria-checked', 'aria-colcount', 'aria-colindex', 'aria-colindextext', + 'aria-colspan', 'aria-controls', 'aria-current', 'aria-describedby', + 'aria-description', 'aria-details', 'aria-disabled', 'aria-dropeffect', + 'aria-errormessage', 'aria-expanded', 'aria-flowto', 'aria-grabbed', + 'aria-haspopup', 'aria-hidden', 'aria-invalid', 'aria-keyshortcuts', + 'aria-label', 'aria-labelledby', 'aria-level', 'aria-live', 'aria-modal', + 'aria-multiline', 'aria-multiselectable', 'aria-orientation', 'aria-owns', + 'aria-placeholder', 'aria-posinset', 'aria-pressed', 'aria-readonly', + 'aria-relevant', 'aria-required', 'aria-roledescription', 'aria-rowcount', + 'aria-rowindex', 'aria-rowindextext', 'aria-rowspan', 'aria-selected', + 'aria-setsize', 'aria-sort', 'aria-valuemax', 'aria-valuemin', + 'aria-valuenow', 'aria-valuetext', 'role', 'tabindex', + ]) # From http://htmlhelp.com/reference/html40/olist.html top_level_tags = frozenset([