diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c023788..348a2b4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,9 +8,10 @@ on: jobs: mix_test: - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 env: MIX_ENV: test + HTML5EVER_BUILD: "true" name: Elixir ${{ matrix.pair.elixir }} / OTP ${{ matrix.pair.otp }} @@ -19,25 +20,22 @@ jobs: matrix: include: - pair: - elixir: 1.11.2 - otp: 23.1.4 + elixir: 1.13.4 + otp: "24.3" - pair: - elixir: 1.12.3 - otp: 24.1.1 + elixir: 1.16.1 + otp: "26.2" lint: lint steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: erlef/setup-beam@v1 with: - otp-version: ${{matrix.pair.otp}} - elixir-version: ${{matrix.pair.elixir}} + otp-version: ${{ matrix.pair.otp }} + elixir-version: ${{ matrix.pair.elixir }} - name: Install minimal stable Rust toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable + uses: dtolnay/rust-toolchain@stable - name: Install Dependencies run: mix deps.get @@ -48,9 +46,9 @@ jobs: - run: mix deps.unlock --check-unused if: ${{ matrix.lint }} - - run: HTML5EVER_BUILD=1 mix deps.compile + - run: mix deps.compile - - run: HTML5EVER_BUILD=1 mix compile --warnings-as-errors + - run: mix compile --warnings-as-errors if: ${{ matrix.lint }} - - run: HTML5EVER_BUILD=1 mix test + - run: mix test diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 98c1708..52c0cd0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,176 +1,95 @@ name: Build precompiled NIFs -env: - NIF_DIRECTORY: "native/html5ever_nif" +permissions: + id-token: write + attestations: write + contents: write on: push: branches: - main - master + paths: + # Just run on main branch if "native" path changed. + - "native/**" + # Also run if this file changes. + - ".github/workflows/release.yml" tags: - - '*' - -defaults: - run: - # Sets the working dir for "run" scripts. - # Note that this won't change the directory for actions (tasks with "uses"). - working-directory: "./native/html5ever_nif" + # Tags will always run. + - "*" + pull_request: + paths: + # In PRs we only run if this file changes. + - ".github/workflows/release.yml" + workflow_dispatch: jobs: build_release: - name: NIF ${{ matrix.job.nif }} - ${{ matrix.job.target }} (${{ matrix.job.os }}) + name: NIF ${{ matrix.nif }} - ${{ matrix.job.target }} (${{ matrix.job.os }}) runs-on: ${{ matrix.job.os }} strategy: fail-fast: false matrix: + nif: ["2.15"] job: - # NIF version 2.16 - - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , nif: "2.16", use-cross: true } - - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.16", use-cross: true } - - { target: aarch64-apple-darwin , os: macos-11 , nif: "2.16" } - - { target: x86_64-apple-darwin , os: macos-11 , nif: "2.16" } - - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.16" } - - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , nif: "2.16", use-cross: true } - - { target: x86_64-pc-windows-gnu , os: windows-2019 , nif: "2.16" } - - { target: x86_64-pc-windows-msvc , os: windows-2019 , nif: "2.16" } - # NIF version 2.15 - - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , nif: "2.15", use-cross: true } - - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.15", use-cross: true } - - { target: aarch64-apple-darwin , os: macos-11 , nif: "2.15" } - - { target: x86_64-apple-darwin , os: macos-11 , nif: "2.15" } - - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.15" } - - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , nif: "2.15", use-cross: true } - - { target: x86_64-pc-windows-gnu , os: windows-2019 , nif: "2.15" } - - { target: x86_64-pc-windows-msvc , os: windows-2019 , nif: "2.15" } - # NIF version 2.14 - - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , nif: "2.14", use-cross: true } - - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.14", use-cross: true } - - { target: aarch64-apple-darwin , os: macos-11 , nif: "2.14" } - - { target: x86_64-apple-darwin , os: macos-11 , nif: "2.14" } - - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.14" } - - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , nif: "2.14", use-cross: true } - - { target: x86_64-pc-windows-gnu , os: windows-2019 , nif: "2.14" } - - { target: x86_64-pc-windows-msvc , os: windows-2019 , nif: "2.14" } + - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , use-cross: true } + - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , use-cross: true } + - { target: aarch64-unknown-linux-musl , os: ubuntu-20.04 , use-cross: true } + - { target: aarch64-apple-darwin , os: macos-13 } + - { target: riscv64gc-unknown-linux-gnu , os: ubuntu-20.04 , use-cross: true } + - { target: x86_64-apple-darwin , os: macos-13 } + - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 } + - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , use-cross: true } + - { target: x86_64-pc-windows-gnu , os: windows-2019 } + - { target: x86_64-pc-windows-msvc , os: windows-2019 } - env: - RUSTLER_NIF_VERSION: ${{ matrix.job.nif }} steps: - name: Checkout source code - uses: actions/checkout@v2 - - - name: Install prerequisites - shell: bash - run: | - case ${{ matrix.job.target }} in - arm-unknown-linux-*) sudo apt-get -y update ; sudo apt-get -y install gcc-arm-linux-gnueabihf ;; - aarch64-unknown-linux-gnu) sudo apt-get -y update ; sudo apt-get -y install gcc-aarch64-linux-gnu ;; - esac + uses: actions/checkout@v4 - - name: Extract crate information + - name: Extract project version shell: bash run: | - echo "PROJECT_NAME=$(sed -n 's/^name = "\(.*\)"/\1/p' Cargo.toml | head -n1)" >> $GITHUB_ENV # Get the project version from mix.exs - echo "PROJECT_VERSION=$(sed -n 's/^ @version "\(.*\)"/\1/p' ../../mix.exs | head -n1)" >> $GITHUB_ENV + echo "PROJECT_VERSION=$(sed -n 's/^ @version "\(.*\)"/\1/p' mix.exs | head -n1)" >> $GITHUB_ENV - name: Install Rust toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@stable with: toolchain: stable target: ${{ matrix.job.target }} - override: true - profile: minimal - - name: Show version information (Rust, cargo, GCC) - shell: bash - run: | - gcc --version || true - rustup -V - rustup toolchain list - rustup default - cargo -V - rustc -V - rustc --print=cfg + - name: Build the project + id: build-crate + uses: philss/rustler-precompiled-action@v1.1.4 + with: + project-name: html5ever_nif + project-version: ${{ env.PROJECT_VERSION }} + target: ${{ matrix.job.target }} + nif-version: ${{ matrix.nif }} + use-cross: ${{ matrix.job.use-cross }} + project-dir: "native/html5ever_nif" - - name: Download cross from GitHub releases - uses: giantswarm/install-binary-action@v1.0.0 - if: ${{ matrix.job.use-cross }} + - name: Artifact attestation + uses: actions/attest-build-provenance@v1 with: - binary: "cross" - version: "v0.2.1" - download_url: "https://github.com/rust-embedded/cross/releases/download/${version}/cross-${version}-x86_64-unknown-linux-gnu.tar.gz" - tarball_binary_path: "${binary}" - smoke_test: "${binary} --version" + subject-path: ${{ steps.build-crate.outputs.file-path }} - - name: Build - shell: bash - run: | - if [ "${{ matrix.job.use-cross }}" == "true" ]; then - cross build --release --target=${{ matrix.job.target }} - else - cargo build --release --target=${{ matrix.job.target }} - fi + - name: Artifact upload + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.build-crate.outputs.file-name }} + path: ${{ steps.build-crate.outputs.file-path }} - - name: Rename lib to the final name - id: rename - shell: bash + - name: Write SHA256 to the summary run: | - LIB_PREFIX="lib" - case ${{ matrix.job.target }} in - *-pc-windows-*) LIB_PREFIX="" ;; - esac; - - # Figure out suffix of lib - # See: https://doc.rust-lang.org/reference/linkage.html - LIB_SUFFIX=".so" - case ${{ matrix.job.target }} in - *-apple-darwin) LIB_SUFFIX=".dylib" ;; - *-pc-windows-*) LIB_SUFFIX=".dll" ;; - esac; - - CICD_INTERMEDIATES_DIR=$(mktemp -d) - - # Setup paths - LIB_DIR="${CICD_INTERMEDIATES_DIR}/released-lib" - mkdir -p "${LIB_DIR}" - LIB_NAME="${LIB_PREFIX}${{ env.PROJECT_NAME }}${LIB_SUFFIX}" - LIB_PATH="${LIB_DIR}/${LIB_NAME}" - - # Copy the release build lib to the result location - cp "target/${{ matrix.job.target }}/release/${LIB_NAME}" "${LIB_DIR}" - - # Final paths - # In the end we use ".so" for MacOS in the final build - # See: https://www.erlang.org/doc/man/erlang.html#load_nif-2 - LIB_FINAL_SUFFIX="${LIB_SUFFIX}" - case ${{ matrix.job.target }} in - *-apple-darwin) LIB_FINAL_SUFFIX=".so" ;; - esac; - - LIB_FINAL_NAME="${LIB_PREFIX}${PROJECT_NAME}-v${PROJECT_VERSION}-nif-${RUSTLER_NIF_VERSION}-${{ matrix.job.target }}${LIB_FINAL_SUFFIX}" - - # Copy lib to final name on this directory - cp "${LIB_PATH}" "${LIB_FINAL_NAME}" - - tar -cvzf "${LIB_FINAL_NAME}.tar.gz" "${LIB_FINAL_NAME}" - - # Passes the path relative to the root of the project. - LIB_FINAL_PATH="${NIF_DIRECTORY}/${LIB_FINAL_NAME}.tar.gz" - - # Let subsequent steps know where to find the lib - echo ::set-output name=LIB_FINAL_PATH::${LIB_FINAL_PATH} - echo ::set-output name=LIB_FINAL_NAME::${LIB_FINAL_NAME}.tar.gz - - - name: "Artifact upload" - uses: actions/upload-artifact@v2 - with: - name: ${{ steps.rename.outputs.LIB_FINAL_NAME }} - path: ${{ steps.rename.outputs.LIB_FINAL_PATH }} + echo "SHA256 for this artifact:" >> $GITHUB_STEP_SUMMARY + echo "${{ steps.build-crate.outputs.file-sha256 }} ${{ steps.build-crate.outputs.file-name }}" >> $GITHUB_STEP_SUMMARY - name: Publish archives and packages - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 with: files: | - ${{ steps.rename.outputs.LIB_FINAL_PATH }} + ${{ steps.build-crate.outputs.file-path }} if: startsWith(github.ref, 'refs/tags/') diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml new file mode 100644 index 0000000..fb66725 --- /dev/null +++ b/.github/workflows/rust-ci.yml @@ -0,0 +1,38 @@ +name: Rust CI +on: + push: + branches: + - master + paths: + - "native/**" + pull_request: + paths: + - "native/**" + workflow_dispatch: + +jobs: + lint-rust: + name: Lint Rust + runs-on: ubuntu-22.04 + strategy: + matrix: + manifest: + - native/html5ever_nif/Cargo.toml + + steps: + - uses: actions/checkout@v4 + + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - uses: Swatinem/rust-cache@v2 + with: + workspaces: | + native/html5ever_nif + + - name: run rustfmt + run: cargo fmt --manifest-path=${{ matrix.manifest }} --all -- --check + + - name: run clippy + run: cargo clippy --manifest-path=${{ matrix.manifest }} -- -Dwarnings diff --git a/CHANGELOG.md b/CHANGELOG.md index b770844..63dd139 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,92 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.16.1] - 2024-05-15 + +### Fixed + +- Fix parsing of HTML documents that may start with a comment or an XML doc tag. + +## [0.16.0] - 2024-03-25 + +### Fixed + +- Fix parsing of comments in `parse/1`. +- Avoid panic when parsing content with the "template" tag. + +### Removed + +- Drop support for Elixir 1.12 + +## [0.15.0] - 2023-06-16 + +### Added + +- Add two new functions to parse documents: + + * `parse_with_attributes_as_maps/1` + * `flat_parse_with_attributes_as_maps/1` + + And as the names suggest, it returns a document tree with attributes as maps, + instead of lists of pairs. These functions are useful to match node attributes, + since the order of attributes does not matter must of the times. + +### Fixed + +- Use dirty CPU scheduler for all functions. For some reason we were using a + normal scheduler, but this could cause instability. + + With a dirty scheduler we can parse medium to big files without worry about + lengthy work. Please read https://www.erlang.org/doc/man/erl_nif.html#lengthy_work + for further information. + +### Removed + +- Remove support for Elixir 1.11. + +## [0.14.3] - 2023-05-26 + +### Added + +- Add precompilation target for Linux running on RISC-V 64 bits machines. + This is useful for projects using Nerves. + + Note that this is going to require `rustler_precompiled` v0.6 or above, since + the that version includes RISC-V on Linux as defaults. + +- Add support for OTP 26 by updating the `rustler-sys` package. + +## [0.14.2] - 2023-05-20 + +### Added + +- Add precompilation target for Linux running on ARM64 machines (both musl and gnu ABI). + This is useful for projects using Nerves. + + Note that this is going to require `rustler_precompiled` v0.6 or above, since + the that version includes ARM64 on Linux as defaults. + +## [0.14.1] - 2023-05-20 + +### Added + +- Add support for `rustler_precompiled` v0.6. + +### Changed + +- Update Rustler version in the crate from `v0.26` to `v0.28`. + This shouldn't break anything, but would require the installation of rustler `v0.28` + if needed in the Elixir side. + +- Change the Rust edition to 2021 (it was 2018). This shouldn't change any behaviour. + +## [0.14.0] - 2022-11-04 + +### Changed + +- Require `rustler_precompiled` equal or above `v0.5.2` - thanks [@Benjamin-Philip](https://github.com/Benjamin-Philip). +- Use `Application.compile_env/3` instead of `Application.get_env/3` in the native module. + ## [0.13.1] - 2022-06-24 ### Fixed @@ -91,7 +177,14 @@ is not needed for most of people using this project. - Add support for OTP 24. This was achieved by updating Rustler to v0.22. -[Unreleased]: https://github.com/rusterlium/html5ever_elixir/compare/v0.13.1...HEAD +[Unreleased]: https://github.com/rusterlium/html5ever_elixir/compare/v0.16.1...HEAD +[0.16.1]: https://github.com/rusterlium/html5ever_elixir/compare/v0.16.0...v0.16.1 +[0.16.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.15.0...v0.16.0 +[0.15.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.14.3...v0.15.0 +[0.14.3]: https://github.com/rusterlium/html5ever_elixir/compare/v0.14.2...v0.14.3 +[0.14.2]: https://github.com/rusterlium/html5ever_elixir/compare/v0.14.1...v0.14.2 +[0.14.1]: https://github.com/rusterlium/html5ever_elixir/compare/v0.14.0...v0.14.1 +[0.14.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.13.1...v0.14.0 [0.13.1]: https://github.com/rusterlium/html5ever_elixir/compare/v0.13.0...v0.13.1 [0.13.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.12.0...v0.13.0 [0.12.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.11.0...v0.12.0 diff --git a/README.md b/README.md index d608439..60db471 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,16 @@ The package can be installed by adding `html5ever` to your list of dependencies ```elixir def deps do - [{:html5ever, "~> 0.13.0"}] + [{:html5ever, "~> 0.16.0"}] end ``` +Or with [`Mix.install/1`](https://hexdocs.pm/mix/Mix.html#install/2): + +```elixir +Mix.install([:html5ever]) +``` + ## Forcing compilation By default **you don't need Rust installed** because the lib will try to download @@ -33,7 +39,7 @@ the compilation: ```elixir def deps do [ - {:html5ever, "~> 0.13.0"}, + {:html5ever, "~> 0.16.0"}, {:rustler, ">= 0.0.0", optional: true} ] end diff --git a/RELEASE_CHECKLIST.md b/RELEASE_CHECKLIST.md index f7a97b4..7e1fdb2 100644 --- a/RELEASE_CHECKLIST.md +++ b/RELEASE_CHECKLIST.md @@ -7,10 +7,11 @@ In order to release a new version to Hex.pm we first need to: 3. commit and create a tag for that version 4. push the changes to the repository with: `git push origin master --tags` 5. wait the CI to build all release files -6. run `mix rustler.download Html5ever.Native --all --print` +6. run `HTML5EVER_BUILD=1 mix rustler_precompiled.download Html5ever.Native --all --print` 7. copy the output of the mix task and add to the release notes 8. run `mix hex.publish` and **make sure the checksum file is present** -in the list of files to be published. +in the list of files to be published. Also make sure that the `target` +directory of `native/html5ever_elixir` is **NOT** present. It's important to ensure that we publish the checksum file with the package because otherwise the users won't be able to use the lib diff --git a/lib/html5ever.ex b/lib/html5ever.ex index 8d52fc9..2824a7f 100644 --- a/lib/html5ever.ex +++ b/lib/html5ever.ex @@ -37,8 +37,30 @@ defmodule Html5ever do ]} """ - def parse(html) do - parse_dirty(html) + def parse(html) when is_binary(html) do + Html5ever.Native.parse(html, false) + end + + @doc """ + Same as `parse/1`, but with attributes as maps. + + This is going to remove duplicated attributes, keeping the ones + that appear first. + + ## Example + + iex> Html5ever.parse_with_attributes_as_maps( + ...> "

Hello world

" + ...> ) + {:ok, + [ + {:doctype, "html", "", ""}, + {"html", %{}, [{"head", %{}, []}, {"body", %{}, [{"h1", %{"class" => "title"}, ["Hello world"]}]}]} + ]} + + """ + def parse_with_attributes_as_maps(html) when is_binary(html) do + Html5ever.Native.parse(html, true) end @doc """ @@ -92,27 +114,17 @@ defmodule Html5ever do }} """ - def flat_parse(html) do - flat_parse_dirty(html) + def flat_parse(html) when is_binary(html) do + Html5ever.Native.flat_parse(html, false) end - defp parse_dirty(html) do - case Html5ever.Native.parse_sync(html) do - {:html5ever_nif_result, :ok, result} -> - {:ok, result} - - {:html5ever_nif_result, :error, err} -> - {:error, err} - end - end - - defp flat_parse_dirty(html) do - case Html5ever.Native.flat_parse_sync(html) do - {:html5ever_nif_result, :ok, result} -> - {:ok, result} + @doc """ + Same as `flat_parse/1`, but with attributes as maps. - {:html5ever_nif_result, :error, err} -> - {:error, err} - end + This is going to remove duplicated attributes, keeping the ones + that appear first. + """ + def flat_parse_with_attributes_as_maps(html) when is_binary(html) do + Html5ever.Native.flat_parse(html, true) end end diff --git a/lib/html5ever/native.ex b/lib/html5ever/native.ex index 3036538..d4731bc 100644 --- a/lib/html5ever/native.ex +++ b/lib/html5ever/native.ex @@ -6,7 +6,7 @@ defmodule Html5ever.Native do version = mix_config[:version] github_url = mix_config[:package][:links]["GitHub"] - env_config = Application.get_env(:html5ever, Html5ever, []) + env_config = Application.compile_env(:html5ever, Html5ever, []) # This module will be replaced by the NIF module after # loaded. It throws an error in case the NIF can't be loaded. @@ -19,10 +19,8 @@ defmodule Html5ever.Native do System.get_env("HTML5EVER_BUILD") in ["1", "true"] or env_config[:build_from_source], version: version - def parse_sync(_binary), do: err() - def parse_dirty(_binary), do: err() - def flat_parse_sync(_binary), do: err() - def flat_parse_dirty(_binary), do: err() + def parse(_binary, _attrs_as_maps), do: err() + def flat_parse(_binary, _attrs_as_maps), do: err() defp err, do: :erlang.nif_error(:nif_not_loaded) end diff --git a/mix.exs b/mix.exs index 5ee65fe..016a0aa 100644 --- a/mix.exs +++ b/mix.exs @@ -1,14 +1,14 @@ defmodule Html5ever.Mixfile do use Mix.Project - @version "0.13.1" + @version "0.16.1-dev" @repo_url "https://github.com/rusterlium/html5ever_elixir" def project do [ app: :html5ever, version: @version, - elixir: "~> 1.11", + elixir: "~> 1.13", build_embedded: Mix.env() == :prod, start_permanent: Mix.env() == :prod, deps: deps(), @@ -24,8 +24,8 @@ defmodule Html5ever.Mixfile do defp deps do [ - {:rustler_precompiled, "~> 0.4"}, - {:rustler, ">= 0.0.0", optional: true}, + {:rustler_precompiled, "~> 0.8.0"}, + {:rustler, "~> 0.36.0", optional: true}, {:ex_doc, ">= 0.0.0", only: :dev} ] end diff --git a/mix.lock b/mix.lock index dec2ab0..35304b9 100644 --- a/mix.lock +++ b/mix.lock @@ -1,13 +1,21 @@ %{ - "castore": {:hex, :castore, "0.1.17", "ba672681de4e51ed8ec1f74ed624d104c0db72742ea1a5e74edbc770c815182f", [:mix], [], "hexpm", "d9844227ed52d26e7519224525cb6868650c272d4a3d327ce3ca5570c12163f9"}, - "earmark_parser": {:hex, :earmark_parser, "1.4.25", "2024618731c55ebfcc5439d756852ec4e85978a39d0d58593763924d9a15916f", [:mix], [], "hexpm", "56749c5e1c59447f7b7a23ddb235e4b3defe276afc220a6227237f3efe83f51e"}, - "ex_doc": {:hex, :ex_doc, "0.28.4", "001a0ea6beac2f810f1abc3dbf4b123e9593eaa5f00dd13ded024eae7c523298", [:mix], [{:earmark_parser, "~> 1.4.19", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "bf85d003dd34911d89c8ddb8bda1a958af3471a274a4c2150a9c01c78ac3f8ed"}, - "jason": {:hex, :jason, "1.3.0", "fa6b82a934feb176263ad2df0dbd91bf633d4a46ebfdffea0c8ae82953714946", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "53fc1f51255390e0ec7e50f9cb41e751c260d065dcba2bf0d08dc51a4002c2ac"}, - "makeup": {:hex, :makeup, "1.1.0", "6b67c8bc2882a6b6a445859952a602afc1a41c2e08379ca057c0f525366fc3ca", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "0a45ed501f4a8897f580eabf99a2e5234ea3e75a4373c8a52824f6e873be57a6"}, - "makeup_elixir": {:hex, :makeup_elixir, "0.16.0", "f8c570a0d33f8039513fbccaf7108c5d750f47d8defd44088371191b76492b0b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "28b2cbdc13960a46ae9a8858c4bebdec3c9a6d7b4b9e7f4ed1502f8159f338e7"}, - "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"}, - "nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"}, - "rustler": {:hex, :rustler, "0.25.0", "32526b51af7e58a740f61941bf923486ce6415a91c3934cc16c281aa201a2240", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "6b43a11a37fe79c6234d88c4102ab5dfede7a6a764dc5c7b539956cfa02f3cf4"}, - "rustler_precompiled": {:hex, :rustler_precompiled, "0.5.1", "93df423bd7b14b67dcacf994443d132d300623f80756974cac4febeab40af74a", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "3f8cbc8e92eef4e1a71bf441b568b868b16a3730f63f5b803c68073017e30b13"}, - "toml": {:hex, :toml, "0.6.2", "38f445df384a17e5d382befe30e3489112a48d3ba4c459e543f748c2f25dd4d1", [:mix], [], "hexpm", "d013e45126d74c0c26a38d31f5e8e9b83ea19fc752470feb9a86071ca5a672fa"}, + "castore": {:hex, :castore, "1.0.11", "4bbd584741601eb658007339ea730b082cc61f3554cf2e8f39bf693a11b49073", [:mix], [], "hexpm", "e03990b4db988df56262852f20de0f659871c35154691427a5047f4967a16a62"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.43", "34b2f401fe473080e39ff2b90feb8ddfeef7639f8ee0bbf71bb41911831d77c5", [:mix], [], "hexpm", "970a3cd19503f5e8e527a190662be2cee5d98eed1ff72ed9b3d1a3d466692de8"}, + "ex_doc": {:hex, :ex_doc, "0.37.1", "65ca30d242082b95aa852b3b73c9d9914279fff56db5dc7b3859be5504417980", [:mix], [{:earmark_parser, "~> 1.4.42", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "6774f75477733ea88ce861476db031f9399c110640752ca2b400dbbb50491224"}, + "finch": {:hex, :finch, "0.19.0", "c644641491ea854fc5c1bbaef36bfc764e3f08e7185e1f084e35e0672241b76d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "fc5324ce209125d1e2fa0fcd2634601c52a787aff1cd33ee833664a5af4ea2b6"}, + "hpax": {:hex, :hpax, "1.0.2", "762df951b0c399ff67cc57c3995ec3cf46d696e41f0bba17da0518d94acd4aac", [:mix], [], "hexpm", "2f09b4c1074e0abd846747329eaa26d535be0eb3d189fa69d812bfb8bfefd32f"}, + "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, + "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, + "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, + "makeup_erlang": {:hex, :makeup_erlang, "1.0.2", "03e1804074b3aa64d5fad7aa64601ed0fb395337b982d9bcf04029d68d51b6a7", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "af33ff7ef368d5893e4a267933e7744e46ce3cf1f61e2dccf53a111ed3aa3727"}, + "mime": {:hex, :mime, "2.0.6", "8f18486773d9b15f95f4f4f1e39b710045fa1de891fada4516559967276e4dc2", [:mix], [], "hexpm", "c9945363a6b26d747389aac3643f8e0e09d30499a138ad64fe8fd1d13d9b153e"}, + "mint": {:hex, :mint, "1.6.2", "af6d97a4051eee4f05b5500671d47c3a67dac7386045d87a904126fd4bbcea2e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "5ee441dffc1892f1ae59127f74afe8fd82fda6587794278d924e4d90ea3d63f9"}, + "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, + "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, + "req": {:hex, :req, "0.5.8", "50d8d65279d6e343a5e46980ac2a70e97136182950833a1968b371e753f6a662", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "d7fc5898a566477e174f26887821a3c5082b243885520ee4b45555f5d53f40ef"}, + "rustler": {:hex, :rustler, "0.36.0", "1decf059c60ec75911241325517c391717a9ad07d43e9a5ffda9d5c9ddd12936", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "03808c7d289da01da29d8d2fe19d07cae9f3d2f05ebaed87f0820a4dcfabe9d5"}, + "rustler_precompiled": {:hex, :rustler_precompiled, "0.8.2", "5f25cbe220a8fac3e7ad62e6f950fcdca5a5a5f8501835d2823e8c74bf4268d5", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "63d1bd5f8e23096d1ff851839923162096364bac8656a4a3c00d1fff8e83ee0a"}, + "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, + "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"}, } diff --git a/native/html5ever_nif/.cargo/config b/native/html5ever_nif/.cargo/config.toml similarity index 55% rename from native/html5ever_nif/.cargo/config rename to native/html5ever_nif/.cargo/config.toml index 705291c..89a707d 100644 --- a/native/html5ever_nif/.cargo/config +++ b/native/html5ever_nif/.cargo/config.toml @@ -1,18 +1,6 @@ [profile.release] lto = true -[target.x86_64-apple-darwin] -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - -[target.aarch64-apple-darwin] -rustflags = [ - "-C", "link-arg=-undefined", - "-C", "link-arg=dynamic_lookup", -] - [target.arm-unknown-linux-gnueabihf] linker = "arm-linux-gnueabihf-gcc" @@ -21,3 +9,9 @@ linker = "arm-linux-gnueabihf-gcc" rustflags = [ "-C", "target-feature=-crt-static" ] + +# Same as above +[target.aarch64-unknown-linux-musl] +rustflags = [ + "-C", "target-feature=-crt-static" +] diff --git a/native/html5ever_nif/Cargo.lock b/native/html5ever_nif/Cargo.lock index 99419b7..bf4cd41 100644 --- a/native/html5ever_nif/Cargo.lock +++ b/native/html5ever_nif/Cargo.lock @@ -1,27 +1,30 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] -name = "aho-corasick" -version = "0.7.18" +name = "autocfg" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" -dependencies = [ - "memchr", -] +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] -name = "cfg-if" -version = "1.0.0" +name = "bitflags" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] -name = "crossbeam" -version = "0.2.12" +name = "byteorder" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd66663db5a988098a89599d4857919b3acf7f61402e61365acfd3919857b9be" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "futf" @@ -35,37 +38,26 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.2.6" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", - "wasi 0.10.2+wasi-snapshot-preview1", + "wasi", ] [[package]] name = "heck" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "html5ever" -version = "0.26.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" dependencies = [ "log", "mac", @@ -83,31 +75,57 @@ dependencies = [ "lazy_static", "markup5ever", "rustler", - "scoped-pool", "tendril", + "thiserror", +] + +[[package]] +name = "inventory" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b31349d02fe60f80bbbab1a9402364cad7460626d6030494b08ac4a2075bf81" +dependencies = [ + "rustversion", ] [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.122" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] -name = "log" -version = "0.4.14" +name = "libloading" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", + "windows-targets", +] + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", ] +[[package]] +name = "log" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" + [[package]] name = "mac" version = "0.1.1" @@ -116,9 +134,9 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" [[package]] name = "markup5ever" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" dependencies = [ "log", "phf", @@ -129,44 +147,57 @@ dependencies = [ ] [[package]] -name = "memchr" -version = "2.4.1" +name = "new_debug_unreachable" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" [[package]] -name = "new_debug_unreachable" -version = "1.0.4" +name = "once_cell" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] -name = "phf" -version = "0.10.1" +name = "parking_lot" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ - "phf_shared 0.10.0", + "lock_api", + "parking_lot_core", ] [[package]] -name = "phf_codegen" -version = "0.10.0" +name = "parking_lot_core" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ - "phf_generator 0.10.0", - "phf_shared 0.10.0", + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", ] [[package]] -name = "phf_generator" -version = "0.8.0" +name = "phf" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "phf_shared 0.8.0", - "rand 0.7.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", ] [[package]] @@ -176,16 +207,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" dependencies = [ "phf_shared 0.10.0", - "rand 0.8.5", + "rand", ] [[package]] -name = "phf_shared" -version = "0.8.0" +name = "phf_generator" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "siphasher", + "phf_shared 0.11.3", + "rand", ] [[package]] @@ -194,14 +226,26 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" dependencies = [ - "siphasher", + "siphasher 0.3.11", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher 1.0.1", ] [[package]] name = "ppv-lite86" -version = "0.2.10" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "precomputed-hash" @@ -211,36 +255,22 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro2" -version = "1.0.29" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9f5105d4fdaab20335ca9565e106a5d9b82b6219b5ba735731124ac6711d23d" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] name = "quote" -version = "1.0.9" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", - "rand_pcg", -] - [[package]] name = "rand" version = "0.8.5" @@ -248,18 +278,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", + "rand_chacha", + "rand_core", ] [[package]] @@ -269,158 +289,143 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core 0.6.3", + "rand_core", ] [[package]] name = "rand_core" -version = "0.5.1" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.1.16", + "getrandom", ] [[package]] -name = "rand_core" -version = "0.6.3" +name = "redox_syscall" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "getrandom 0.2.6", + "bitflags", ] [[package]] -name = "rand_hc" -version = "0.2.0" +name = "regex-lite" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "rand_pcg" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "regex" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] name = "rustler" -version = "0.25.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e6617fa86bacfb2de792c12e261e0f456bb9ff15038498ae421715bf4128c5" +checksum = "1f7b219d7473cf473409665a4898d66688b34736e51bb5791098b0d3390e4c98" dependencies = [ - "lazy_static", + "inventory", + "libloading", + "regex-lite", "rustler_codegen", - "rustler_sys", ] [[package]] name = "rustler_codegen" -version = "0.25.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05cda738bc4260019ee078a699fac55ce3577fe2db736b2cc64a4d6696950fa6" +checksum = "743ec5267bd5f18fd88d89f7e729c0f43b97d9c2539959915fa1f234300bb621" dependencies = [ "heck", + "inventory", "proc-macro2", "quote", "syn", ] [[package]] -name = "rustler_sys" -version = "2.2.0" +name = "rustversion" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" + +[[package]] +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff26a42e62d538f82913dd34f60105ecfdffbdb25abdc3c3580b0c622285332" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ - "regex", - "unreachable", + "serde_derive", ] [[package]] -name = "scoped-pool" -version = "1.0.0" +name = "serde_derive" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "817a3a15e704545ce59ed2b5c60a5d32bda4d7869befb8b36667b658a6c00b43" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ - "crossbeam", - "scopeguard", - "variance", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "scopeguard" -version = "0.1.2" +name = "siphasher" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59a076157c1e2dc561d8de585151ee6965d910dd4dcb5dabb7ae3e83981a6c57" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] -name = "serde" -version = "1.0.130" +name = "siphasher" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] -name = "siphasher" -version = "0.3.7" +name = "smallvec" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "string_cache" -version = "0.8.1" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ddb1139b5353f96e429e1a5e19fbaf663bddedaa06d1dbd49f82e352601209a" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" dependencies = [ - "lazy_static", "new_debug_unreachable", - "phf_shared 0.8.0", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", "precomputed-hash", "serde", ] [[package]] name = "string_cache_codegen" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" dependencies = [ - "phf_generator 0.8.0", - "phf_shared 0.8.0", + "phf_generator 0.10.0", + "phf_shared 0.10.0", "proc-macro2", "quote", ] [[package]] name = "syn" -version = "1.0.77" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5239bc68e0fef57495900cfea4e8dc75596d9a319d7e16b1e0a440d24e6fe0a0" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", ] [[package]] @@ -435,20 +440,31 @@ dependencies = [ ] [[package]] -name = "unicode-xid" -version = "0.2.2" +name = "thiserror" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +dependencies = [ + "thiserror-impl", +] [[package]] -name = "unreachable" -version = "1.0.0" +name = "thiserror-impl" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ - "void", + "proc-macro2", + "quote", + "syn", ] +[[package]] +name = "unicode-ident" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11cd88e12b17c6494200a9c1b683a04fcac9573ed74cd1b62aeb2727c5592243" + [[package]] name = "utf-8" version = "0.7.6" @@ -456,25 +472,92 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" [[package]] -name = "variance" -version = "0.1.3" +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3abfc2be1fb59663871379ea884fd81de80c496f2274e021c01d6fe56cd77b05" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] -name = "void" -version = "1.0.2" +name = "windows-targets" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] [[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" +name = "windows_aarch64_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "zerocopy" +version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/native/html5ever_nif/Cargo.toml b/native/html5ever_nif/Cargo.toml index c9668e2..f2ef192 100644 --- a/native/html5ever_nif/Cargo.toml +++ b/native/html5ever_nif/Cargo.toml @@ -2,7 +2,7 @@ name = "html5ever_nif" version = "0.1.0" authors = ["HansiHE "] -edition = "2018" +edition = "2021" [lib] name = "html5ever_nif" @@ -10,12 +10,13 @@ path = "src/lib.rs" crate-type = ["cdylib"] [dependencies] -rustler = "0.25.0" +# See the Precompilation guide for details about the features: https://github.com/philss/rustler_precompiled/blob/main/PRECOMPILATION_GUIDE.md +rustler = { version = "0.36", default-features = false, features = ["nif_version_2_15"] } -html5ever = "0.26" -markup5ever = "0.11" +html5ever = "0.27" +markup5ever = "0.12" tendril = "0.4" -lazy_static = "1.4" -scoped-pool = "1.0" +lazy_static = "1.5" +thiserror = "2" diff --git a/native/html5ever_nif/Cross.toml b/native/html5ever_nif/Cross.toml deleted file mode 100644 index 8ba543c..0000000 --- a/native/html5ever_nif/Cross.toml +++ /dev/null @@ -1,4 +0,0 @@ -[build.env] -passthrough = [ - "RUSTLER_NIF_VERSION" -] diff --git a/native/html5ever_nif/src/common.rs b/native/html5ever_nif/src/common.rs index 0ff8d14..ac97c7e 100644 --- a/native/html5ever_nif/src/common.rs +++ b/native/html5ever_nif/src/common.rs @@ -7,18 +7,18 @@ use tendril::StrTendril; // Encoder for these externally defined types. // Unsure if this is a great way of doing it, but it's the way // that produced the cleanest and least noisy code. -pub struct QNW<'a>(pub &'a QualName); -pub struct STW<'a>(pub &'a StrTendril); +pub struct QualNameWrapper<'a>(pub &'a QualName); +pub struct StrTendrilWrapper<'a>(pub &'a StrTendril); -impl<'b> Encoder for QNW<'b> { +impl Encoder for QualNameWrapper<'_> { fn encode<'a>(&self, env: Env<'a>) -> Term<'a> { - let data: &str = &*self.0.local; + let data: &str = &self.0.local; data.encode(env) } } -impl<'b> Encoder for STW<'b> { +impl Encoder for StrTendrilWrapper<'_> { fn encode<'a>(&self, env: Env<'a>) -> Term<'a> { - let data: &str = &*self.0; + let data: &str = self.0; data.encode(env) } } diff --git a/native/html5ever_nif/src/flat_dom.rs b/native/html5ever_nif/src/flat_dom.rs index b1bf031..f1d0c8f 100644 --- a/native/html5ever_nif/src/flat_dom.rs +++ b/native/html5ever_nif/src/flat_dom.rs @@ -1,57 +1,47 @@ -use html5ever::{ QualName, Attribute }; -use html5ever::tree_builder::{ TreeSink, QuirksMode, NodeOrText, ElementFlags }; +use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; +use html5ever::{Attribute, QualName}; use markup5ever::ExpandedName; use tendril::StrTendril; use std::borrow::Cow; -use rustler::{ Env, Encoder, Term }; +use rustler::{Encoder, Env, Term}; -use crate::common::{ STW, QNW }; +use crate::common::{QualNameWrapper, StrTendrilWrapper}; +use crate::Html5everExError; #[derive(Copy, Clone, PartialEq, Debug)] pub struct NodeHandle(pub usize); pub enum PoolOrVec { - Pool { - head: usize, - len: usize, - }, - Vec { - vec: Vec, - } + Pool { head: usize, len: usize }, + Vec { vec: Vec }, } -impl PoolOrVec where T: Clone { - - pub fn new(pool: &Vec) -> Self { +impl PoolOrVec +where + T: Clone, +{ + pub fn new(pool: &[T]) -> Self { PoolOrVec::Pool { head: pool.len(), len: 0, } } - pub fn get<'a>(&'a self, idx: usize, pool: &'a Vec) -> Option<&'a T> { + pub fn get<'a>(&'a self, idx: usize, pool: &'a [T]) -> Option<&'a T> { match self { - PoolOrVec::Pool { head, len } if idx < *len => { - Some(&pool[*head + idx]) - }, - PoolOrVec::Vec { vec } => { - vec.get(idx) - }, + PoolOrVec::Pool { head, len } if idx < *len => Some(&pool[*head + idx]), + PoolOrVec::Vec { vec } => vec.get(idx), _ => None, } } - pub fn as_slice<'a>(&'a self, pool: &'a Vec) -> &'a [T] { + pub fn as_slice<'a>(&'a self, pool: &'a [T]) -> &'a [T] { match self { - PoolOrVec::Pool { head, len } => { - &pool[*head..(*head + *len)] - }, - PoolOrVec::Vec { vec } => { - &*vec - }, + PoolOrVec::Pool { head, len } => &pool[*head..(*head + *len)], + PoolOrVec::Vec { vec } => vec, } } @@ -60,25 +50,23 @@ impl PoolOrVec where T: Clone { PoolOrVec::Pool { head, len } if pool.len() == *head + *len => { pool.push(item); *len += 1; - }, + } val @ PoolOrVec::Pool { .. } => { if let PoolOrVec::Pool { head, len } = val { let mut vec = pool[*head..(*head + *len)].to_owned(); vec.push(item); - *val = PoolOrVec::Vec { - vec: vec, - }; + *val = PoolOrVec::Vec { vec }; } else { unreachable!() } - }, + } PoolOrVec::Vec { vec } => { vec.push(item); - }, + } } } - pub fn iter<'a>(&'a self, pool: &'a Vec) -> impl Iterator + 'a { + pub fn iter<'a>(&'a self, pool: &'a [T]) -> impl Iterator + 'a { self.as_slice(pool).iter() } @@ -96,14 +84,14 @@ impl PoolOrVec where T: Clone { vec }, }; - }, + } PoolOrVec::Vec { vec } => { vec.insert(index, item); - }, + } } } - pub fn remove(&mut self, index: usize, pool: &mut Vec) { + pub fn remove(&mut self, index: usize, pool: &mut [T]) { match self { val @ PoolOrVec::Pool { .. } => { *val = PoolOrVec::Vec { @@ -113,13 +101,12 @@ impl PoolOrVec where T: Clone { vec }, }; - }, + } PoolOrVec::Vec { vec } => { vec.remove(index); - }, + } } } - } pub struct Node { @@ -129,18 +116,18 @@ pub struct Node { data: NodeData, } impl Node { - fn new(id: usize, data: NodeData, pool: &Vec) -> Self { + fn new(id: usize, data: NodeData, pool: &[NodeHandle]) -> Self { Node { id: NodeHandle(id), parent: None, children: PoolOrVec::new(pool), - data: data, + data, } } } #[derive(Debug, PartialEq)] -pub enum NodeData{ +pub enum NodeData { Document, DocType { name: StrTendril, @@ -172,7 +159,6 @@ pub struct FlatSink { } impl FlatSink { - pub fn new() -> FlatSink { let mut sink = FlatSink { root: NodeHandle(0), @@ -181,7 +167,8 @@ impl FlatSink { }; // Element 0 is always root - sink.nodes.push(Node::new(0, NodeData::Document, &sink.pool)); + sink.nodes + .push(Node::new(0, NodeData::Document, &sink.pool)); sink } @@ -190,10 +177,10 @@ impl FlatSink { self.root } - pub fn node_mut<'a>(&'a mut self, handle: NodeHandle) -> &'a mut Node { + pub fn node_mut(&mut self, handle: NodeHandle) -> &mut Node { &mut self.nodes[handle.0] } - pub fn node<'a>(&'a self, handle: NodeHandle) -> &'a Node { + pub fn node(&self, handle: NodeHandle) -> &Node { &self.nodes[handle.0] } @@ -203,17 +190,12 @@ impl FlatSink { self.nodes.push(node); id } - } fn node_or_text_to_node(sink: &mut FlatSink, not: NodeOrText) -> NodeHandle { match not { NodeOrText::AppendNode(handle) => handle, - NodeOrText::AppendText(text) => { - sink.make_node(NodeData::Text { - contents: text, - }) - }, + NodeOrText::AppendText(text) => sink.make_node(NodeData::Text { contents: text }), } } @@ -229,12 +211,28 @@ impl TreeSink for FlatSink { fn parse_error(&mut self, _msg: Cow<'static, str>) {} fn set_quirks_mode(&mut self, _mode: QuirksMode) {} - fn get_document(&mut self) -> Self::Handle { NodeHandle(0) } - fn get_template_contents(&mut self, _target: &Self::Handle) -> Self::Handle { - panic!("Templates not supported"); + fn get_document(&mut self) -> Self::Handle { + NodeHandle(0) + } + fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle { + // Inspired in https://github.com/servo/html5ever/blob/1a62a39879a1def200dcb87b900265993e6c1c83/rcdom/lib.rs#L235 + // It is not getting the templates contents. But is printing the empty tag. + // TODO: print the contents as text. + let node = self.node(*target); + if let NodeData::Element { + ref template_contents, + .. + } = node.data + { + *template_contents.as_ref().expect("not a template element!") + } else { + panic!("not a template element!") + } } - fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool { x == y } + fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool { + x == y + } fn elem_name(&self, target: &Self::Handle) -> ExpandedName { let node = self.node(*target); match node.data { @@ -243,7 +241,12 @@ impl TreeSink for FlatSink { } } - fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) -> Self::Handle { + fn create_element( + &mut self, + name: QualName, + attrs: Vec, + flags: ElementFlags, + ) -> Self::Handle { let template_contents = if flags.template { Some(self.make_node(NodeData::Document)) } else { @@ -251,30 +254,32 @@ impl TreeSink for FlatSink { }; self.make_node(NodeData::Element { - name: name, - attrs: attrs, + name, + attrs, mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, - template_contents: template_contents, + template_contents, }) } fn create_comment(&mut self, text: StrTendril) -> Self::Handle { - self.make_node(NodeData::Comment { - contents: text, - }) + self.make_node(NodeData::Comment { contents: text }) } fn append(&mut self, parent_id: &Self::Handle, child: NodeOrText) { let handle = node_or_text_to_node(self, child); - self.nodes[parent_id.0].children.push(handle, &mut self.pool); + self.nodes[parent_id.0] + .children + .push(handle, &mut self.pool); self.node_mut(handle).parent = Some(*parent_id); } - fn append_based_on_parent_node(&mut self, - element: &Self::Handle, - prev_element: &Self::Handle, - child: NodeOrText) { + fn append_based_on_parent_node( + &mut self, + element: &Self::Handle, + prev_element: &Self::Handle, + child: NodeOrText, + ) { let has_parent = self.node(*element).parent.is_some(); if has_parent { self.append_before_sibling(element, child); @@ -283,33 +288,53 @@ impl TreeSink for FlatSink { } } - fn append_before_sibling(&mut self, sibling: &Self::Handle, new_node: NodeOrText) { + fn append_before_sibling( + &mut self, + sibling: &Self::Handle, + new_node: NodeOrText, + ) { let new_node_handle = node_or_text_to_node(self, new_node); let parent = self.node(*sibling).parent.unwrap(); let parent_node = &mut self.nodes[parent.0]; - let sibling_index = parent_node.children.iter(&self.pool).enumerate() - .find(|&(_, node)| node == sibling).unwrap().0; - parent_node.children.insert(sibling_index, new_node_handle, &mut self.pool); - } - - fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril) { + let sibling_index = parent_node + .children + .iter(&self.pool) + .enumerate() + .find(|&(_, node)| node == sibling) + .unwrap() + .0; + parent_node + .children + .insert(sibling_index, new_node_handle, &mut self.pool); + } + + fn append_doctype_to_document( + &mut self, + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + ) { let doctype = self.make_node(NodeData::DocType { - name: name, - public_id: public_id, - system_id: system_id, + name, + public_id, + system_id, }); let root = self.root; self.nodes[root.0].children.push(doctype, &mut self.pool); self.node_mut(doctype).parent = Some(self.root); } - fn add_attrs_if_missing(&mut self, target_handle: &Self::Handle, mut add_attrs: Vec) { + fn add_attrs_if_missing( + &mut self, + target_handle: &Self::Handle, + mut add_attrs: Vec, + ) { let target = self.node_mut(*target_handle); match target.data { NodeData::Element { ref mut attrs, .. } => { for attr in add_attrs.drain(..) { - if attrs.iter().find(|a| attr.name == a.name) == None { + if !attrs.iter().any(|a| attr.name == a.name) { attrs.push(attr); } } @@ -321,8 +346,13 @@ impl TreeSink for FlatSink { fn remove_from_parent(&mut self, target: &Self::Handle) { let parent = self.node(*target).parent.unwrap(); let parent_node = &mut self.nodes[parent.0]; - let sibling_index = parent_node.children.iter(&self.pool).enumerate() - .find(|&(_, node)| node == target).unwrap().0; + let sibling_index = parent_node + .children + .iter(&self.pool) + .enumerate() + .find(|&(_, node)| node == target) + .unwrap() + .0; parent_node.children.remove(sibling_index, &mut self.pool); } @@ -347,11 +377,10 @@ impl TreeSink for FlatSink { fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle { self.make_node(NodeData::ProcessingInstruction { - target: target, + target, contents: data, }) } - } impl Encoder for NodeHandle { @@ -360,42 +389,76 @@ impl Encoder for NodeHandle { } } -fn encode_node<'a>(node: &Node, env: Env<'a>, pool: &Vec) -> Term<'a> { - let map = ::rustler::types::map::map_new(env) - .map_put(self::atoms::id().encode(env), node.id.encode(env)).ok().unwrap() - .map_put(self::atoms::parent().encode(env), match node.parent { - Some(handle) => handle.encode(env), - None => self::atoms::nil().encode(env), - }).ok().unwrap(); +fn to_custom_error(_err: rustler::error::Error) -> Html5everExError { + Html5everExError::MapEntry +} + +fn encode_node<'a>( + node: &Node, + env: Env<'a>, + pool: &[NodeHandle], + attributes_as_maps: bool, +) -> Result, Html5everExError> { + let pairs: Vec<(Term, Term)> = vec![ + (atoms::id().encode(env), node.id.encode(env)), + ( + atoms::parent().encode(env), + match node.parent { + Some(handle) => handle.encode(env), + None => atoms::nil().encode(env), + }, + ), + ]; + + let mut map = Term::map_from_pairs(env, &pairs).map_err(to_custom_error)?; match node.data { - NodeData::Document => { - map - .map_put(self::atoms::type_().encode(env), self::atoms::document().encode(env)).ok().unwrap() - } - NodeData::Element { ref attrs, ref name, .. } => { - map - .map_put(self::atoms::type_().encode(env), self::atoms::element().encode(env)).ok().unwrap() - .map_put(self::atoms::children().encode(env), node.children.as_slice(pool).encode(env)).ok().unwrap() - .map_put(self::atoms::name().encode(env), QNW(name).encode(env)).ok().unwrap() - .map_put(self::atoms::attrs().encode(env), attrs.iter().map(|attr| { - (QNW(&attr.name), STW(&attr.value)) - }).collect::>().encode(env)).ok().unwrap() - } - NodeData::Text { ref contents } => { - map - .map_put(self::atoms::type_().encode(env), self::atoms::text().encode(env)).ok().unwrap() - .map_put(self::atoms::contents().encode(env), STW(contents).encode(env)).ok().unwrap() - } - NodeData::DocType { .. } => { - map - .map_put(self::atoms::type_().encode(env), self::atoms::doctype().encode(env)).ok().unwrap() - } - NodeData::Comment { ref contents } => { - map - .map_put(self::atoms::type_().encode(env), self::atoms::comment().encode(env)).ok().unwrap() - .map_put(self::atoms::contents().encode(env), STW(contents).encode(env)).ok().unwrap() + NodeData::Document => map + .map_put(atoms::type_().encode(env), atoms::document().encode(env)) + .map_err(to_custom_error), + NodeData::Element { + ref attrs, + ref name, + .. + } => { + let pairs: Vec<(Term, Term)> = vec![ + (atoms::type_().encode(env), atoms::element().encode(env)), + ( + atoms::children().encode(env), + node.children.as_slice(pool).encode(env), + ), + (atoms::name().encode(env), QualNameWrapper(name).encode(env)), + ( + atoms::attrs().encode(env), + attributes_to_term(env, attrs, attributes_as_maps), + ), + ]; + + for (key, value) in pairs { + map = map.map_put(key, value).map_err(to_custom_error)?; + } + + Ok(map) } + NodeData::Text { ref contents } => map + .map_put(atoms::type_().encode(env), atoms::text().encode(env)) + .map_err(to_custom_error)? + .map_put( + atoms::contents().encode(env), + StrTendrilWrapper(contents).encode(env), + ) + .map_err(to_custom_error), + NodeData::DocType { .. } => map + .map_put(atoms::type_().encode(env), atoms::doctype().encode(env)) + .map_err(to_custom_error), + NodeData::Comment { ref contents } => map + .map_put(atoms::type_().encode(env), atoms::comment().encode(env)) + .map_err(to_custom_error)? + .map_put( + atoms::contents().encode(env), + StrTendrilWrapper(contents).encode(env), + ) + .map_err(to_custom_error), _ => unimplemented!(), } } @@ -422,15 +485,27 @@ mod atoms { } } -pub fn flat_sink_to_flat_term<'a>(env: Env<'a>, sink: &FlatSink) -> Term<'a> { - let nodes = sink.nodes.iter() - .fold(rustler::types::map::map_new(env), |acc, node| { - acc.map_put(node.id.encode(env), encode_node(node, env, &sink.pool)).ok().unwrap() - }); +pub fn flat_sink_to_flat_term<'a>( + env: Env<'a>, + sink: &FlatSink, + attributes_as_maps: bool, +) -> Result, Html5everExError> { + let mut nodes_map = rustler::types::map::map_new(env); + + for node in sink.nodes.iter() { + nodes_map = nodes_map + .map_put( + node.id.encode(env), + encode_node(node, env, &sink.pool, attributes_as_maps)?, + ) + .map_err(to_custom_error)?; + } ::rustler::types::map::map_new(env) - .map_put(self::atoms::nodes().encode(env), nodes).ok().unwrap() - .map_put(self::atoms::root().encode(env), sink.root.encode(env)).ok().unwrap() + .map_put(atoms::nodes().encode(env), nodes_map) + .map_err(to_custom_error)? + .map_put(atoms::root().encode(env), sink.root.encode(env)) + .map_err(to_custom_error) } struct RecState { @@ -439,16 +514,20 @@ struct RecState { child_base: usize, } -pub fn flat_sink_to_rec_term<'a>(env: Env<'a>, sink: &FlatSink) -> Term<'a> { +pub fn flat_sink_to_rec_term<'a>( + env: Env<'a>, + sink: &FlatSink, + attributes_as_maps: bool, +) -> Result, Html5everExError> { let mut child_stack = vec![]; - let mut stack: Vec = vec![ - RecState { - node: sink.root(), - child_base: 0, - child_n: 0, - }, - ]; + let mut stack: Vec = vec![RecState { + node: sink.root(), + child_base: 0, + child_n: 0, + }]; + let mut comments_bf_doctype = 0usize; + let mut read_doctype = false; loop { let mut top = stack.pop().unwrap(); @@ -482,34 +561,51 @@ pub fn flat_sink_to_rec_term<'a>(env: Env<'a>, sink: &FlatSink) -> Term<'a> { } assert_eq!(stack.len(), 0); - return term; - }, - NodeData::DocType { name, public_id, system_id } => { - assert!(stack.len() > 0); - assert!(child_stack.len() == 0); + return Ok(term); + } + NodeData::DocType { + name, + public_id, + system_id, + } => { + assert!(!stack.is_empty()); + assert!(child_stack.is_empty() || comments_bf_doctype == child_stack.len()); + + read_doctype = true; term = ( - self::atoms::doctype(), - STW(name), - STW(public_id), - STW(system_id), - ).encode(env); - }, + atoms::doctype(), + StrTendrilWrapper(name), + StrTendrilWrapper(public_id), + StrTendrilWrapper(system_id), + ) + .encode(env); + } NodeData::Element { attrs, name, .. } => { - assert!(stack.len() > 0); + assert!(!stack.is_empty()); + + let attribute_terms = attributes_to_term(env, attrs, attributes_as_maps); - let attribute_terms: Vec> = attrs.iter() - .map(|a| (QNW(&a.name), STW(&a.value)).encode(env)) - .collect(); - term = (QNW(name), attribute_terms, &child_stack[top.child_base..]).encode(env); + term = ( + QualNameWrapper(name), + attribute_terms, + &child_stack[top.child_base..], + ) + .encode(env); for _ in 0..(child_stack.len() - top.child_base) { child_stack.pop(); } - }, + } NodeData::Text { contents } => { - term = STW(contents).encode(env); - }, - NodeData::Comment { .. } => continue, + term = StrTendrilWrapper(contents).encode(env); + } + NodeData::Comment { contents } => { + if !read_doctype { + comments_bf_doctype += 1 + }; + + term = (atoms::comment(), StrTendrilWrapper(contents)).encode(env); + } _ => unimplemented!(""), } @@ -518,11 +614,19 @@ pub fn flat_sink_to_rec_term<'a>(env: Env<'a>, sink: &FlatSink) -> Term<'a> { } } - - - - - - - - +fn attributes_to_term<'a>( + env: Env<'a>, + attributes: &[Attribute], + attributes_as_maps: bool, +) -> Term<'a> { + let pairs: Vec<(QualNameWrapper, StrTendrilWrapper)> = attributes + .iter() + .map(|a| (QualNameWrapper(&a.name), StrTendrilWrapper(&a.value))) + .collect(); + + if attributes_as_maps { + Term::map_from_pairs(env, &pairs).unwrap() + } else { + pairs.encode(env) + } +} diff --git a/native/html5ever_nif/src/lib.rs b/native/html5ever_nif/src/lib.rs index 4f23cf7..e488f09 100644 --- a/native/html5ever_nif/src/lib.rs +++ b/native/html5ever_nif/src/lib.rs @@ -1,101 +1,57 @@ +use flat_dom::FlatSink; use rustler::types::binary::Binary; -use rustler::{Decoder, Encoder, Env, Error, NifResult, Term}; +use rustler::{Env, Term}; -//use html5ever::rcdom::RcDom; use tendril::TendrilSink; +use thiserror::Error; mod common; mod flat_dom; -mod atoms { - rustler::atoms! { - html5ever_nif_result, +#[derive(Error, Debug)] +pub enum Html5everExError { + #[error("cannot transform bytes from binary to a valid UTF8 string")] + BytesToUtf8(#[from] std::str::Utf8Error), - ok, - error, - nif_panic, - - doctype, - comment, - - none, - some, - all, - } + #[error("cannot insert entry in a map")] + MapEntry, } -#[derive(PartialEq, Eq)] -enum ErrorLevel { - None, - Some, - All, -} -impl<'a> Decoder<'a> for ErrorLevel { - fn decode(term: Term<'a>) -> NifResult { - if atoms::none() == term { - Ok(ErrorLevel::None) - } else if atoms::some() == term { - Ok(ErrorLevel::Some) - } else if atoms::all() == term { - Ok(ErrorLevel::All) - } else { - Err(Error::BadArg) - } +impl rustler::Encoder for Html5everExError { + fn encode<'a>(&self, env: Env<'a>) -> Term<'a> { + format!("{self}").encode(env) } } -#[rustler::nif] -fn parse_sync<'a>(env: Env<'a>, binary: Binary) -> Term<'a> { - parse(env, binary) -} - #[rustler::nif(schedule = "DirtyCpu")] -fn parse_dirty<'a>(env: Env<'a>, binary: Binary) -> Term<'a> { - parse(env, binary) -} - -fn parse<'a>(env: Env<'a>, binary: Binary) -> Term<'a> { - let sink = flat_dom::FlatSink::new(); - - let utf = std::str::from_utf8(binary.as_slice()).unwrap(); - - let parser = html5ever::parse_document(sink, Default::default()); - let result = parser.one(utf); - - let result_term = flat_dom::flat_sink_to_rec_term(env, &result); - - (atoms::html5ever_nif_result(), atoms::ok(), result_term).encode(env) -} +fn parse<'a>( + env: Env<'a>, + binary: Binary, + attributes_as_maps: bool, +) -> Result, Html5everExError> { + let flat_sink = build_flat_sink(binary.as_slice())?; -#[rustler::nif] -fn flat_parse_sync<'a>(env: Env<'a>, binary: Binary) -> Term<'a> { - flat_parse(env, binary) + flat_dom::flat_sink_to_rec_term(env, &flat_sink, attributes_as_maps) } #[rustler::nif(schedule = "DirtyCpu")] -fn flat_parse_dirty<'a>(env: Env<'a>, binary: Binary) -> Term<'a> { - flat_parse(env, binary) -} +fn flat_parse<'a>( + env: Env<'a>, + binary: Binary, + attributes_as_maps: bool, +) -> Result, Html5everExError> { + let flat_sink = build_flat_sink(binary.as_slice())?; -fn flat_parse<'a>(env: Env<'a>, binary: Binary) -> Term<'a> { - let sink = flat_dom::FlatSink::new(); + flat_dom::flat_sink_to_flat_term(env, &flat_sink, attributes_as_maps) +} - let utf = std::str::from_utf8(binary.as_slice()).unwrap(); +fn build_flat_sink(bin_slice: &[u8]) -> Result { + let utf8 = std::str::from_utf8(bin_slice)?; + let sink = flat_dom::FlatSink::new(); let parser = html5ever::parse_document(sink, Default::default()); - let result = parser.one(utf); - - let result_term = flat_dom::flat_sink_to_flat_term(env, &result); - (atoms::html5ever_nif_result(), atoms::ok(), result_term).encode(env) + Ok(parser.one(utf8)) } -rustler::init!( - "Elixir.Html5ever.Native", - [parse_sync, parse_dirty, flat_parse_sync, flat_parse_dirty], - load = on_load -); - -fn on_load<'a>(_env: Env<'a>, _load_info: Term<'a>) -> bool { - true -} +rustler::init!("Elixir.Html5ever.Native"); diff --git a/test/html5ever_test.exs b/test/html5ever_test.exs index d0bb592..44797dc 100644 --- a/test/html5ever_test.exs +++ b/test/html5ever_test.exs @@ -3,14 +3,31 @@ defmodule Html5everTest do doctest Html5ever def read_html(name) do - dir = to_string(:code.priv_dir(:html5ever)) <> "/test_data/" - File.read!(dir <> name) + path = Path.join([:code.priv_dir(:html5ever), "test_data", name]) + File.read!(path) end test "parse basic html" do - html = "" - ret = {:ok, [{"html", [], [{"head", [], []}, {"body", [], []}]}]} - assert Html5ever.parse(html) == ret + html = "

Hello

" + + assert Html5ever.parse(html) == + {:ok, + [ + {"html", [], + [ + {"head", [], []}, + {"body", [], [{"h1", [], ["Hello"]}, {:comment, " my comment "}]} + ]} + ]} + end + + test "does not parse with not valid UTF8 binary" do + invalid = + <<98, 29, 104, 122, 46, 145, 14, 37, 122, 155, 227, 121, 49, 120, 108, 209, 155, 113, 229, + 98, 90, 181, 146>> + + assert Html5ever.parse(invalid) == + {:error, "cannot transform bytes from binary to a valid UTF8 string"} end test "flat parse basic html" do @@ -38,6 +55,41 @@ defmodule Html5everTest do assert Html5ever.flat_parse(html) == ret end + test "does not flat parse with not valid UTF8 binary" do + invalid = + <<98, 29, 104, 122, 46, 145, 14, 37, 122, 155, 227, 121, 49, 120, 108, 209, 155, 113, 229, + 98, 90, 181, 146>> + + assert Html5ever.flat_parse(invalid) == + {:error, "cannot transform bytes from binary to a valid UTF8 string"} + end + + test "flat parse basic html with attributes as maps" do + # Duplicated attribute is removed. + html = "" + + ret = + {:ok, + %{ + nodes: %{ + 0 => %{id: 0, parent: nil, type: :document}, + 1 => %{children: [2, 3], id: 1, parent: 0, type: :element, attrs: %{}, name: "html"}, + 2 => %{children: [], id: 2, parent: 1, type: :element, attrs: %{}, name: "head"}, + 3 => %{ + children: [], + id: 3, + parent: 1, + type: :element, + attrs: %{"test" => "woo", "class" => "content"}, + name: "body" + } + }, + root: 0 + }} + + assert Html5ever.flat_parse_with_attributes_as_maps(html) == ret + end + test "parse example.com html" do html = read_html("example.html") assert {:ok, _} = Html5ever.parse(html) @@ -147,4 +199,155 @@ defmodule Html5everTest do ]} ]} = parsed end + + test "reasonably deep html with attributes as maps" do + html = """ + + + + Test + + +
+ +
+ + + very deep content + + +
+ +
+
+ + + """ + + parsed = Html5ever.parse_with_attributes_as_maps(html) + + assert {:ok, + [ + {:doctype, "html", "", ""}, + {"html", %{}, + [ + {"head", %{}, ["\n", " ", {"title", %{}, ["Test"]}, "\n", " "]}, + "\n", + " ", + {"body", %{}, + [ + "\n", + " ", + {"div", %{"class" => "content"}, + [ + "\n", + " ", + {"span", %{}, + [ + "\n", + " ", + {"div", %{}, + [ + "\n", + " ", + {"span", %{}, + [ + "\n", + " ", + {"small", %{}, + ["\n", " very deep content", "\n", " "]}, + "\n", + " " + ]}, + "\n", + " " + ]}, + "\n", + " ", + {"img", %{"src" => "file.jpg"}, []}, + "\n", + " " + ]}, + "\n", + " " + ]}, + "\n", + " ", + "\n", + "\n" + ]} + ]} + ]} = parsed + end + + test "parse html with a template tag ignores template content" do + html = """ + + + With template + +

Document

+ + + + """ + + assert Html5ever.parse(html) == + {:ok, + [ + {:doctype, "html", "", ""}, + {"html", [], + [ + {"head", [], [{"title", [], ["With template"]}]}, + "\n", + {"body", [], + ["\n", {"h1", [], ["Document"]}, "\n", {"template", [], []}, "\n", "\n", "\n"]} + ]} + ]} + end + + test "parse html starting with a XML tag" do + html = """ + + + + + Hello + + link + + + """ + + assert Html5ever.parse(html) == + {:ok, + [ + {:comment, "?xml version=\"1.0\" encoding=\"UTF-8\"?"}, + {:comment, " also a comment is allowed "}, + {:doctype, "html", "-//W3C//DTD XHTML 1.0 Strict//EN", + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"}, + { + "html", + [{"xmlns", "http://www.w3.org/1999/xhtml"}, {"xml:lang", "en"}, {"lang", "en"}], + [ + {"head", [], [{"title", [], ["Hello"]}]}, + "\n", + " ", + {"body", [], + [ + "\n", + " ", + {"a", [{"id", "anchor"}, {"href", "https://example.com"}], ["link"]}, + "\n", + " ", + "\n", + "\n" + ]} + ] + } + ]} + end end