diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c023788..348a2b4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,9 +8,10 @@ on:
jobs:
mix_test:
- runs-on: ubuntu-18.04
+ runs-on: ubuntu-22.04
env:
MIX_ENV: test
+ HTML5EVER_BUILD: "true"
name: Elixir ${{ matrix.pair.elixir }} / OTP ${{ matrix.pair.otp }}
@@ -19,25 +20,22 @@ jobs:
matrix:
include:
- pair:
- elixir: 1.11.2
- otp: 23.1.4
+ elixir: 1.13.4
+ otp: "24.3"
- pair:
- elixir: 1.12.3
- otp: 24.1.1
+ elixir: 1.16.1
+ otp: "26.2"
lint: lint
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v4
- uses: erlef/setup-beam@v1
with:
- otp-version: ${{matrix.pair.otp}}
- elixir-version: ${{matrix.pair.elixir}}
+ otp-version: ${{ matrix.pair.otp }}
+ elixir-version: ${{ matrix.pair.elixir }}
- name: Install minimal stable Rust toolchain
- uses: actions-rs/toolchain@v1
- with:
- profile: minimal
- toolchain: stable
+ uses: dtolnay/rust-toolchain@stable
- name: Install Dependencies
run: mix deps.get
@@ -48,9 +46,9 @@ jobs:
- run: mix deps.unlock --check-unused
if: ${{ matrix.lint }}
- - run: HTML5EVER_BUILD=1 mix deps.compile
+ - run: mix deps.compile
- - run: HTML5EVER_BUILD=1 mix compile --warnings-as-errors
+ - run: mix compile --warnings-as-errors
if: ${{ matrix.lint }}
- - run: HTML5EVER_BUILD=1 mix test
+ - run: mix test
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 98c1708..52c0cd0 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,176 +1,95 @@
name: Build precompiled NIFs
-env:
- NIF_DIRECTORY: "native/html5ever_nif"
+permissions:
+ id-token: write
+ attestations: write
+ contents: write
on:
push:
branches:
- main
- master
+ paths:
+ # Just run on main branch if "native" path changed.
+ - "native/**"
+ # Also run if this file changes.
+ - ".github/workflows/release.yml"
tags:
- - '*'
-
-defaults:
- run:
- # Sets the working dir for "run" scripts.
- # Note that this won't change the directory for actions (tasks with "uses").
- working-directory: "./native/html5ever_nif"
+ # Tags will always run.
+ - "*"
+ pull_request:
+ paths:
+ # In PRs we only run if this file changes.
+ - ".github/workflows/release.yml"
+ workflow_dispatch:
jobs:
build_release:
- name: NIF ${{ matrix.job.nif }} - ${{ matrix.job.target }} (${{ matrix.job.os }})
+ name: NIF ${{ matrix.nif }} - ${{ matrix.job.target }} (${{ matrix.job.os }})
runs-on: ${{ matrix.job.os }}
strategy:
fail-fast: false
matrix:
+ nif: ["2.15"]
job:
- # NIF version 2.16
- - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , nif: "2.16", use-cross: true }
- - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.16", use-cross: true }
- - { target: aarch64-apple-darwin , os: macos-11 , nif: "2.16" }
- - { target: x86_64-apple-darwin , os: macos-11 , nif: "2.16" }
- - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.16" }
- - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , nif: "2.16", use-cross: true }
- - { target: x86_64-pc-windows-gnu , os: windows-2019 , nif: "2.16" }
- - { target: x86_64-pc-windows-msvc , os: windows-2019 , nif: "2.16" }
- # NIF version 2.15
- - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , nif: "2.15", use-cross: true }
- - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.15", use-cross: true }
- - { target: aarch64-apple-darwin , os: macos-11 , nif: "2.15" }
- - { target: x86_64-apple-darwin , os: macos-11 , nif: "2.15" }
- - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.15" }
- - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , nif: "2.15", use-cross: true }
- - { target: x86_64-pc-windows-gnu , os: windows-2019 , nif: "2.15" }
- - { target: x86_64-pc-windows-msvc , os: windows-2019 , nif: "2.15" }
- # NIF version 2.14
- - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , nif: "2.14", use-cross: true }
- - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.14", use-cross: true }
- - { target: aarch64-apple-darwin , os: macos-11 , nif: "2.14" }
- - { target: x86_64-apple-darwin , os: macos-11 , nif: "2.14" }
- - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , nif: "2.14" }
- - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , nif: "2.14", use-cross: true }
- - { target: x86_64-pc-windows-gnu , os: windows-2019 , nif: "2.14" }
- - { target: x86_64-pc-windows-msvc , os: windows-2019 , nif: "2.14" }
+ - { target: arm-unknown-linux-gnueabihf , os: ubuntu-20.04 , use-cross: true }
+ - { target: aarch64-unknown-linux-gnu , os: ubuntu-20.04 , use-cross: true }
+ - { target: aarch64-unknown-linux-musl , os: ubuntu-20.04 , use-cross: true }
+ - { target: aarch64-apple-darwin , os: macos-13 }
+ - { target: riscv64gc-unknown-linux-gnu , os: ubuntu-20.04 , use-cross: true }
+ - { target: x86_64-apple-darwin , os: macos-13 }
+ - { target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 }
+ - { target: x86_64-unknown-linux-musl , os: ubuntu-20.04 , use-cross: true }
+ - { target: x86_64-pc-windows-gnu , os: windows-2019 }
+ - { target: x86_64-pc-windows-msvc , os: windows-2019 }
- env:
- RUSTLER_NIF_VERSION: ${{ matrix.job.nif }}
steps:
- name: Checkout source code
- uses: actions/checkout@v2
-
- - name: Install prerequisites
- shell: bash
- run: |
- case ${{ matrix.job.target }} in
- arm-unknown-linux-*) sudo apt-get -y update ; sudo apt-get -y install gcc-arm-linux-gnueabihf ;;
- aarch64-unknown-linux-gnu) sudo apt-get -y update ; sudo apt-get -y install gcc-aarch64-linux-gnu ;;
- esac
+ uses: actions/checkout@v4
- - name: Extract crate information
+ - name: Extract project version
shell: bash
run: |
- echo "PROJECT_NAME=$(sed -n 's/^name = "\(.*\)"/\1/p' Cargo.toml | head -n1)" >> $GITHUB_ENV
# Get the project version from mix.exs
- echo "PROJECT_VERSION=$(sed -n 's/^ @version "\(.*\)"/\1/p' ../../mix.exs | head -n1)" >> $GITHUB_ENV
+ echo "PROJECT_VERSION=$(sed -n 's/^ @version "\(.*\)"/\1/p' mix.exs | head -n1)" >> $GITHUB_ENV
- name: Install Rust toolchain
- uses: actions-rs/toolchain@v1
+ uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable
target: ${{ matrix.job.target }}
- override: true
- profile: minimal
- - name: Show version information (Rust, cargo, GCC)
- shell: bash
- run: |
- gcc --version || true
- rustup -V
- rustup toolchain list
- rustup default
- cargo -V
- rustc -V
- rustc --print=cfg
+ - name: Build the project
+ id: build-crate
+ uses: philss/rustler-precompiled-action@v1.1.4
+ with:
+ project-name: html5ever_nif
+ project-version: ${{ env.PROJECT_VERSION }}
+ target: ${{ matrix.job.target }}
+ nif-version: ${{ matrix.nif }}
+ use-cross: ${{ matrix.job.use-cross }}
+ project-dir: "native/html5ever_nif"
- - name: Download cross from GitHub releases
- uses: giantswarm/install-binary-action@v1.0.0
- if: ${{ matrix.job.use-cross }}
+ - name: Artifact attestation
+ uses: actions/attest-build-provenance@v1
with:
- binary: "cross"
- version: "v0.2.1"
- download_url: "https://github.com/rust-embedded/cross/releases/download/${version}/cross-${version}-x86_64-unknown-linux-gnu.tar.gz"
- tarball_binary_path: "${binary}"
- smoke_test: "${binary} --version"
+ subject-path: ${{ steps.build-crate.outputs.file-path }}
- - name: Build
- shell: bash
- run: |
- if [ "${{ matrix.job.use-cross }}" == "true" ]; then
- cross build --release --target=${{ matrix.job.target }}
- else
- cargo build --release --target=${{ matrix.job.target }}
- fi
+ - name: Artifact upload
+ uses: actions/upload-artifact@v4
+ with:
+ name: ${{ steps.build-crate.outputs.file-name }}
+ path: ${{ steps.build-crate.outputs.file-path }}
- - name: Rename lib to the final name
- id: rename
- shell: bash
+ - name: Write SHA256 to the summary
run: |
- LIB_PREFIX="lib"
- case ${{ matrix.job.target }} in
- *-pc-windows-*) LIB_PREFIX="" ;;
- esac;
-
- # Figure out suffix of lib
- # See: https://doc.rust-lang.org/reference/linkage.html
- LIB_SUFFIX=".so"
- case ${{ matrix.job.target }} in
- *-apple-darwin) LIB_SUFFIX=".dylib" ;;
- *-pc-windows-*) LIB_SUFFIX=".dll" ;;
- esac;
-
- CICD_INTERMEDIATES_DIR=$(mktemp -d)
-
- # Setup paths
- LIB_DIR="${CICD_INTERMEDIATES_DIR}/released-lib"
- mkdir -p "${LIB_DIR}"
- LIB_NAME="${LIB_PREFIX}${{ env.PROJECT_NAME }}${LIB_SUFFIX}"
- LIB_PATH="${LIB_DIR}/${LIB_NAME}"
-
- # Copy the release build lib to the result location
- cp "target/${{ matrix.job.target }}/release/${LIB_NAME}" "${LIB_DIR}"
-
- # Final paths
- # In the end we use ".so" for MacOS in the final build
- # See: https://www.erlang.org/doc/man/erlang.html#load_nif-2
- LIB_FINAL_SUFFIX="${LIB_SUFFIX}"
- case ${{ matrix.job.target }} in
- *-apple-darwin) LIB_FINAL_SUFFIX=".so" ;;
- esac;
-
- LIB_FINAL_NAME="${LIB_PREFIX}${PROJECT_NAME}-v${PROJECT_VERSION}-nif-${RUSTLER_NIF_VERSION}-${{ matrix.job.target }}${LIB_FINAL_SUFFIX}"
-
- # Copy lib to final name on this directory
- cp "${LIB_PATH}" "${LIB_FINAL_NAME}"
-
- tar -cvzf "${LIB_FINAL_NAME}.tar.gz" "${LIB_FINAL_NAME}"
-
- # Passes the path relative to the root of the project.
- LIB_FINAL_PATH="${NIF_DIRECTORY}/${LIB_FINAL_NAME}.tar.gz"
-
- # Let subsequent steps know where to find the lib
- echo ::set-output name=LIB_FINAL_PATH::${LIB_FINAL_PATH}
- echo ::set-output name=LIB_FINAL_NAME::${LIB_FINAL_NAME}.tar.gz
-
- - name: "Artifact upload"
- uses: actions/upload-artifact@v2
- with:
- name: ${{ steps.rename.outputs.LIB_FINAL_NAME }}
- path: ${{ steps.rename.outputs.LIB_FINAL_PATH }}
+ echo "SHA256 for this artifact:" >> $GITHUB_STEP_SUMMARY
+ echo "${{ steps.build-crate.outputs.file-sha256 }} ${{ steps.build-crate.outputs.file-name }}" >> $GITHUB_STEP_SUMMARY
- name: Publish archives and packages
- uses: softprops/action-gh-release@v1
+ uses: softprops/action-gh-release@v2
with:
files: |
- ${{ steps.rename.outputs.LIB_FINAL_PATH }}
+ ${{ steps.build-crate.outputs.file-path }}
if: startsWith(github.ref, 'refs/tags/')
diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml
new file mode 100644
index 0000000..fb66725
--- /dev/null
+++ b/.github/workflows/rust-ci.yml
@@ -0,0 +1,38 @@
+name: Rust CI
+on:
+ push:
+ branches:
+ - master
+ paths:
+ - "native/**"
+ pull_request:
+ paths:
+ - "native/**"
+ workflow_dispatch:
+
+jobs:
+ lint-rust:
+ name: Lint Rust
+ runs-on: ubuntu-22.04
+ strategy:
+ matrix:
+ manifest:
+ - native/html5ever_nif/Cargo.toml
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: dtolnay/rust-toolchain@stable
+ with:
+ components: rustfmt, clippy
+
+ - uses: Swatinem/rust-cache@v2
+ with:
+ workspaces: |
+ native/html5ever_nif
+
+ - name: run rustfmt
+ run: cargo fmt --manifest-path=${{ matrix.manifest }} --all -- --check
+
+ - name: run clippy
+ run: cargo clippy --manifest-path=${{ matrix.manifest }} -- -Dwarnings
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b770844..63dd139 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,92 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
+## [0.16.1] - 2024-05-15
+
+### Fixed
+
+- Fix parsing of HTML documents that may start with a comment or an XML doc tag.
+
+## [0.16.0] - 2024-03-25
+
+### Fixed
+
+- Fix parsing of comments in `parse/1`.
+- Avoid panic when parsing content with the "template" tag.
+
+### Removed
+
+- Drop support for Elixir 1.12
+
+## [0.15.0] - 2023-06-16
+
+### Added
+
+- Add two new functions to parse documents:
+
+ * `parse_with_attributes_as_maps/1`
+ * `flat_parse_with_attributes_as_maps/1`
+
+ And as the names suggest, it returns a document tree with attributes as maps,
+ instead of lists of pairs. These functions are useful to match node attributes,
+ since the order of attributes does not matter must of the times.
+
+### Fixed
+
+- Use dirty CPU scheduler for all functions. For some reason we were using a
+ normal scheduler, but this could cause instability.
+
+ With a dirty scheduler we can parse medium to big files without worry about
+ lengthy work. Please read https://www.erlang.org/doc/man/erl_nif.html#lengthy_work
+ for further information.
+
+### Removed
+
+- Remove support for Elixir 1.11.
+
+## [0.14.3] - 2023-05-26
+
+### Added
+
+- Add precompilation target for Linux running on RISC-V 64 bits machines.
+ This is useful for projects using Nerves.
+
+ Note that this is going to require `rustler_precompiled` v0.6 or above, since
+ the that version includes RISC-V on Linux as defaults.
+
+- Add support for OTP 26 by updating the `rustler-sys` package.
+
+## [0.14.2] - 2023-05-20
+
+### Added
+
+- Add precompilation target for Linux running on ARM64 machines (both musl and gnu ABI).
+ This is useful for projects using Nerves.
+
+ Note that this is going to require `rustler_precompiled` v0.6 or above, since
+ the that version includes ARM64 on Linux as defaults.
+
+## [0.14.1] - 2023-05-20
+
+### Added
+
+- Add support for `rustler_precompiled` v0.6.
+
+### Changed
+
+- Update Rustler version in the crate from `v0.26` to `v0.28`.
+ This shouldn't break anything, but would require the installation of rustler `v0.28`
+ if needed in the Elixir side.
+
+- Change the Rust edition to 2021 (it was 2018). This shouldn't change any behaviour.
+
+## [0.14.0] - 2022-11-04
+
+### Changed
+
+- Require `rustler_precompiled` equal or above `v0.5.2` - thanks [@Benjamin-Philip](https://github.com/Benjamin-Philip).
+- Use `Application.compile_env/3` instead of `Application.get_env/3` in the native module.
+
## [0.13.1] - 2022-06-24
### Fixed
@@ -91,7 +177,14 @@ is not needed for most of people using this project.
- Add support for OTP 24. This was achieved by updating Rustler to v0.22.
-[Unreleased]: https://github.com/rusterlium/html5ever_elixir/compare/v0.13.1...HEAD
+[Unreleased]: https://github.com/rusterlium/html5ever_elixir/compare/v0.16.1...HEAD
+[0.16.1]: https://github.com/rusterlium/html5ever_elixir/compare/v0.16.0...v0.16.1
+[0.16.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.15.0...v0.16.0
+[0.15.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.14.3...v0.15.0
+[0.14.3]: https://github.com/rusterlium/html5ever_elixir/compare/v0.14.2...v0.14.3
+[0.14.2]: https://github.com/rusterlium/html5ever_elixir/compare/v0.14.1...v0.14.2
+[0.14.1]: https://github.com/rusterlium/html5ever_elixir/compare/v0.14.0...v0.14.1
+[0.14.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.13.1...v0.14.0
[0.13.1]: https://github.com/rusterlium/html5ever_elixir/compare/v0.13.0...v0.13.1
[0.13.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.12.0...v0.13.0
[0.12.0]: https://github.com/rusterlium/html5ever_elixir/compare/v0.11.0...v0.12.0
diff --git a/README.md b/README.md
index d608439..60db471 100644
--- a/README.md
+++ b/README.md
@@ -12,10 +12,16 @@ The package can be installed by adding `html5ever` to your list of dependencies
```elixir
def deps do
- [{:html5ever, "~> 0.13.0"}]
+ [{:html5ever, "~> 0.16.0"}]
end
```
+Or with [`Mix.install/1`](https://hexdocs.pm/mix/Mix.html#install/2):
+
+```elixir
+Mix.install([:html5ever])
+```
+
## Forcing compilation
By default **you don't need Rust installed** because the lib will try to download
@@ -33,7 +39,7 @@ the compilation:
```elixir
def deps do
[
- {:html5ever, "~> 0.13.0"},
+ {:html5ever, "~> 0.16.0"},
{:rustler, ">= 0.0.0", optional: true}
]
end
diff --git a/RELEASE_CHECKLIST.md b/RELEASE_CHECKLIST.md
index f7a97b4..7e1fdb2 100644
--- a/RELEASE_CHECKLIST.md
+++ b/RELEASE_CHECKLIST.md
@@ -7,10 +7,11 @@ In order to release a new version to Hex.pm we first need to:
3. commit and create a tag for that version
4. push the changes to the repository with: `git push origin master --tags`
5. wait the CI to build all release files
-6. run `mix rustler.download Html5ever.Native --all --print`
+6. run `HTML5EVER_BUILD=1 mix rustler_precompiled.download Html5ever.Native --all --print`
7. copy the output of the mix task and add to the release notes
8. run `mix hex.publish` and **make sure the checksum file is present**
-in the list of files to be published.
+in the list of files to be published. Also make sure that the `target`
+directory of `native/html5ever_elixir` is **NOT** present.
It's important to ensure that we publish the checksum file with the
package because otherwise the users won't be able to use the lib
diff --git a/lib/html5ever.ex b/lib/html5ever.ex
index 8d52fc9..2824a7f 100644
--- a/lib/html5ever.ex
+++ b/lib/html5ever.ex
@@ -37,8 +37,30 @@ defmodule Html5ever do
]}
"""
- def parse(html) do
- parse_dirty(html)
+ def parse(html) when is_binary(html) do
+ Html5ever.Native.parse(html, false)
+ end
+
+ @doc """
+ Same as `parse/1`, but with attributes as maps.
+
+ This is going to remove duplicated attributes, keeping the ones
+ that appear first.
+
+ ## Example
+
+ iex> Html5ever.parse_with_attributes_as_maps(
+ ...> "
Hello world
"
+ ...> )
+ {:ok,
+ [
+ {:doctype, "html", "", ""},
+ {"html", %{}, [{"head", %{}, []}, {"body", %{}, [{"h1", %{"class" => "title"}, ["Hello world"]}]}]}
+ ]}
+
+ """
+ def parse_with_attributes_as_maps(html) when is_binary(html) do
+ Html5ever.Native.parse(html, true)
end
@doc """
@@ -92,27 +114,17 @@ defmodule Html5ever do
}}
"""
- def flat_parse(html) do
- flat_parse_dirty(html)
+ def flat_parse(html) when is_binary(html) do
+ Html5ever.Native.flat_parse(html, false)
end
- defp parse_dirty(html) do
- case Html5ever.Native.parse_sync(html) do
- {:html5ever_nif_result, :ok, result} ->
- {:ok, result}
-
- {:html5ever_nif_result, :error, err} ->
- {:error, err}
- end
- end
-
- defp flat_parse_dirty(html) do
- case Html5ever.Native.flat_parse_sync(html) do
- {:html5ever_nif_result, :ok, result} ->
- {:ok, result}
+ @doc """
+ Same as `flat_parse/1`, but with attributes as maps.
- {:html5ever_nif_result, :error, err} ->
- {:error, err}
- end
+ This is going to remove duplicated attributes, keeping the ones
+ that appear first.
+ """
+ def flat_parse_with_attributes_as_maps(html) when is_binary(html) do
+ Html5ever.Native.flat_parse(html, true)
end
end
diff --git a/lib/html5ever/native.ex b/lib/html5ever/native.ex
index 3036538..d4731bc 100644
--- a/lib/html5ever/native.ex
+++ b/lib/html5ever/native.ex
@@ -6,7 +6,7 @@ defmodule Html5ever.Native do
version = mix_config[:version]
github_url = mix_config[:package][:links]["GitHub"]
- env_config = Application.get_env(:html5ever, Html5ever, [])
+ env_config = Application.compile_env(:html5ever, Html5ever, [])
# This module will be replaced by the NIF module after
# loaded. It throws an error in case the NIF can't be loaded.
@@ -19,10 +19,8 @@ defmodule Html5ever.Native do
System.get_env("HTML5EVER_BUILD") in ["1", "true"] or env_config[:build_from_source],
version: version
- def parse_sync(_binary), do: err()
- def parse_dirty(_binary), do: err()
- def flat_parse_sync(_binary), do: err()
- def flat_parse_dirty(_binary), do: err()
+ def parse(_binary, _attrs_as_maps), do: err()
+ def flat_parse(_binary, _attrs_as_maps), do: err()
defp err, do: :erlang.nif_error(:nif_not_loaded)
end
diff --git a/mix.exs b/mix.exs
index 5ee65fe..016a0aa 100644
--- a/mix.exs
+++ b/mix.exs
@@ -1,14 +1,14 @@
defmodule Html5ever.Mixfile do
use Mix.Project
- @version "0.13.1"
+ @version "0.16.1-dev"
@repo_url "https://github.com/rusterlium/html5ever_elixir"
def project do
[
app: :html5ever,
version: @version,
- elixir: "~> 1.11",
+ elixir: "~> 1.13",
build_embedded: Mix.env() == :prod,
start_permanent: Mix.env() == :prod,
deps: deps(),
@@ -24,8 +24,8 @@ defmodule Html5ever.Mixfile do
defp deps do
[
- {:rustler_precompiled, "~> 0.4"},
- {:rustler, ">= 0.0.0", optional: true},
+ {:rustler_precompiled, "~> 0.8.0"},
+ {:rustler, "~> 0.36.0", optional: true},
{:ex_doc, ">= 0.0.0", only: :dev}
]
end
diff --git a/mix.lock b/mix.lock
index dec2ab0..35304b9 100644
--- a/mix.lock
+++ b/mix.lock
@@ -1,13 +1,21 @@
%{
- "castore": {:hex, :castore, "0.1.17", "ba672681de4e51ed8ec1f74ed624d104c0db72742ea1a5e74edbc770c815182f", [:mix], [], "hexpm", "d9844227ed52d26e7519224525cb6868650c272d4a3d327ce3ca5570c12163f9"},
- "earmark_parser": {:hex, :earmark_parser, "1.4.25", "2024618731c55ebfcc5439d756852ec4e85978a39d0d58593763924d9a15916f", [:mix], [], "hexpm", "56749c5e1c59447f7b7a23ddb235e4b3defe276afc220a6227237f3efe83f51e"},
- "ex_doc": {:hex, :ex_doc, "0.28.4", "001a0ea6beac2f810f1abc3dbf4b123e9593eaa5f00dd13ded024eae7c523298", [:mix], [{:earmark_parser, "~> 1.4.19", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "bf85d003dd34911d89c8ddb8bda1a958af3471a274a4c2150a9c01c78ac3f8ed"},
- "jason": {:hex, :jason, "1.3.0", "fa6b82a934feb176263ad2df0dbd91bf633d4a46ebfdffea0c8ae82953714946", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "53fc1f51255390e0ec7e50f9cb41e751c260d065dcba2bf0d08dc51a4002c2ac"},
- "makeup": {:hex, :makeup, "1.1.0", "6b67c8bc2882a6b6a445859952a602afc1a41c2e08379ca057c0f525366fc3ca", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "0a45ed501f4a8897f580eabf99a2e5234ea3e75a4373c8a52824f6e873be57a6"},
- "makeup_elixir": {:hex, :makeup_elixir, "0.16.0", "f8c570a0d33f8039513fbccaf7108c5d750f47d8defd44088371191b76492b0b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "28b2cbdc13960a46ae9a8858c4bebdec3c9a6d7b4b9e7f4ed1502f8159f338e7"},
- "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"},
- "nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"},
- "rustler": {:hex, :rustler, "0.25.0", "32526b51af7e58a740f61941bf923486ce6415a91c3934cc16c281aa201a2240", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "6b43a11a37fe79c6234d88c4102ab5dfede7a6a764dc5c7b539956cfa02f3cf4"},
- "rustler_precompiled": {:hex, :rustler_precompiled, "0.5.1", "93df423bd7b14b67dcacf994443d132d300623f80756974cac4febeab40af74a", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "3f8cbc8e92eef4e1a71bf441b568b868b16a3730f63f5b803c68073017e30b13"},
- "toml": {:hex, :toml, "0.6.2", "38f445df384a17e5d382befe30e3489112a48d3ba4c459e543f748c2f25dd4d1", [:mix], [], "hexpm", "d013e45126d74c0c26a38d31f5e8e9b83ea19fc752470feb9a86071ca5a672fa"},
+ "castore": {:hex, :castore, "1.0.11", "4bbd584741601eb658007339ea730b082cc61f3554cf2e8f39bf693a11b49073", [:mix], [], "hexpm", "e03990b4db988df56262852f20de0f659871c35154691427a5047f4967a16a62"},
+ "earmark_parser": {:hex, :earmark_parser, "1.4.43", "34b2f401fe473080e39ff2b90feb8ddfeef7639f8ee0bbf71bb41911831d77c5", [:mix], [], "hexpm", "970a3cd19503f5e8e527a190662be2cee5d98eed1ff72ed9b3d1a3d466692de8"},
+ "ex_doc": {:hex, :ex_doc, "0.37.1", "65ca30d242082b95aa852b3b73c9d9914279fff56db5dc7b3859be5504417980", [:mix], [{:earmark_parser, "~> 1.4.42", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "6774f75477733ea88ce861476db031f9399c110640752ca2b400dbbb50491224"},
+ "finch": {:hex, :finch, "0.19.0", "c644641491ea854fc5c1bbaef36bfc764e3f08e7185e1f084e35e0672241b76d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "fc5324ce209125d1e2fa0fcd2634601c52a787aff1cd33ee833664a5af4ea2b6"},
+ "hpax": {:hex, :hpax, "1.0.2", "762df951b0c399ff67cc57c3995ec3cf46d696e41f0bba17da0518d94acd4aac", [:mix], [], "hexpm", "2f09b4c1074e0abd846747329eaa26d535be0eb3d189fa69d812bfb8bfefd32f"},
+ "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
+ "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"},
+ "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"},
+ "makeup_erlang": {:hex, :makeup_erlang, "1.0.2", "03e1804074b3aa64d5fad7aa64601ed0fb395337b982d9bcf04029d68d51b6a7", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "af33ff7ef368d5893e4a267933e7744e46ce3cf1f61e2dccf53a111ed3aa3727"},
+ "mime": {:hex, :mime, "2.0.6", "8f18486773d9b15f95f4f4f1e39b710045fa1de891fada4516559967276e4dc2", [:mix], [], "hexpm", "c9945363a6b26d747389aac3643f8e0e09d30499a138ad64fe8fd1d13d9b153e"},
+ "mint": {:hex, :mint, "1.6.2", "af6d97a4051eee4f05b5500671d47c3a67dac7386045d87a904126fd4bbcea2e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "5ee441dffc1892f1ae59127f74afe8fd82fda6587794278d924e4d90ea3d63f9"},
+ "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
+ "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"},
+ "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
+ "req": {:hex, :req, "0.5.8", "50d8d65279d6e343a5e46980ac2a70e97136182950833a1968b371e753f6a662", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "d7fc5898a566477e174f26887821a3c5082b243885520ee4b45555f5d53f40ef"},
+ "rustler": {:hex, :rustler, "0.36.0", "1decf059c60ec75911241325517c391717a9ad07d43e9a5ffda9d5c9ddd12936", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "03808c7d289da01da29d8d2fe19d07cae9f3d2f05ebaed87f0820a4dcfabe9d5"},
+ "rustler_precompiled": {:hex, :rustler_precompiled, "0.8.2", "5f25cbe220a8fac3e7ad62e6f950fcdca5a5a5f8501835d2823e8c74bf4268d5", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "63d1bd5f8e23096d1ff851839923162096364bac8656a4a3c00d1fff8e83ee0a"},
+ "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"},
+ "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"},
}
diff --git a/native/html5ever_nif/.cargo/config b/native/html5ever_nif/.cargo/config.toml
similarity index 55%
rename from native/html5ever_nif/.cargo/config
rename to native/html5ever_nif/.cargo/config.toml
index 705291c..89a707d 100644
--- a/native/html5ever_nif/.cargo/config
+++ b/native/html5ever_nif/.cargo/config.toml
@@ -1,18 +1,6 @@
[profile.release]
lto = true
-[target.x86_64-apple-darwin]
-rustflags = [
- "-C", "link-arg=-undefined",
- "-C", "link-arg=dynamic_lookup",
-]
-
-[target.aarch64-apple-darwin]
-rustflags = [
- "-C", "link-arg=-undefined",
- "-C", "link-arg=dynamic_lookup",
-]
-
[target.arm-unknown-linux-gnueabihf]
linker = "arm-linux-gnueabihf-gcc"
@@ -21,3 +9,9 @@ linker = "arm-linux-gnueabihf-gcc"
rustflags = [
"-C", "target-feature=-crt-static"
]
+
+# Same as above
+[target.aarch64-unknown-linux-musl]
+rustflags = [
+ "-C", "target-feature=-crt-static"
+]
diff --git a/native/html5ever_nif/Cargo.lock b/native/html5ever_nif/Cargo.lock
index 99419b7..bf4cd41 100644
--- a/native/html5ever_nif/Cargo.lock
+++ b/native/html5ever_nif/Cargo.lock
@@ -1,27 +1,30 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
-version = 3
+version = 4
[[package]]
-name = "aho-corasick"
-version = "0.7.18"
+name = "autocfg"
+version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
-dependencies = [
- "memchr",
-]
+checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
-name = "cfg-if"
-version = "1.0.0"
+name = "bitflags"
+version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
[[package]]
-name = "crossbeam"
-version = "0.2.12"
+name = "byteorder"
+version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd66663db5a988098a89599d4857919b3acf7f61402e61365acfd3919857b9be"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "futf"
@@ -35,37 +38,26 @@ dependencies = [
[[package]]
name = "getrandom"
-version = "0.1.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce"
-dependencies = [
- "cfg-if",
- "libc",
- "wasi 0.9.0+wasi-snapshot-preview1",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.2.6"
+version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad"
+checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
- "wasi 0.10.2+wasi-snapshot-preview1",
+ "wasi",
]
[[package]]
name = "heck"
-version = "0.4.0"
+version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "html5ever"
-version = "0.26.0"
+version = "0.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bea68cab48b8459f17cf1c944c67ddc572d272d9f2b274140f223ecb1da4a3b7"
+checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
dependencies = [
"log",
"mac",
@@ -83,31 +75,57 @@ dependencies = [
"lazy_static",
"markup5ever",
"rustler",
- "scoped-pool",
"tendril",
+ "thiserror",
+]
+
+[[package]]
+name = "inventory"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b31349d02fe60f80bbbab1a9402364cad7460626d6030494b08ac4a2075bf81"
+dependencies = [
+ "rustversion",
]
[[package]]
name = "lazy_static"
-version = "1.4.0"
+version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
-version = "0.2.122"
+version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259"
+checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
-name = "log"
-version = "0.4.14"
+name = "libloading"
+version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
+checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34"
dependencies = [
"cfg-if",
+ "windows-targets",
+]
+
+[[package]]
+name = "lock_api"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
+dependencies = [
+ "autocfg",
+ "scopeguard",
]
+[[package]]
+name = "log"
+version = "0.4.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
+
[[package]]
name = "mac"
version = "0.1.1"
@@ -116,9 +134,9 @@ checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
-version = "0.11.0"
+version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a2629bb1404f3d34c2e921f21fd34ba00b206124c81f65c50b43b6aaefeb016"
+checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
dependencies = [
"log",
"phf",
@@ -129,44 +147,57 @@ dependencies = [
]
[[package]]
-name = "memchr"
-version = "2.4.1"
+name = "new_debug_unreachable"
+version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
-name = "new_debug_unreachable"
-version = "1.0.4"
+name = "once_cell"
+version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
+checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
-name = "phf"
-version = "0.10.1"
+name = "parking_lot"
+version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
+checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
- "phf_shared 0.10.0",
+ "lock_api",
+ "parking_lot_core",
]
[[package]]
-name = "phf_codegen"
-version = "0.10.0"
+name = "parking_lot_core"
+version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd"
+checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
- "phf_generator 0.10.0",
- "phf_shared 0.10.0",
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-targets",
]
[[package]]
-name = "phf_generator"
-version = "0.8.0"
+name = "phf"
+version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
+checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
- "phf_shared 0.8.0",
- "rand 0.7.3",
+ "phf_shared 0.11.3",
+]
+
+[[package]]
+name = "phf_codegen"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
+dependencies = [
+ "phf_generator 0.11.3",
+ "phf_shared 0.11.3",
]
[[package]]
@@ -176,16 +207,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
- "rand 0.8.5",
+ "rand",
]
[[package]]
-name = "phf_shared"
-version = "0.8.0"
+name = "phf_generator"
+version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
+checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
- "siphasher",
+ "phf_shared 0.11.3",
+ "rand",
]
[[package]]
@@ -194,14 +226,26 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
- "siphasher",
+ "siphasher 0.3.11",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
+dependencies = [
+ "siphasher 1.0.1",
]
[[package]]
name = "ppv-lite86"
-version = "0.2.10"
+version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
+checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+dependencies = [
+ "zerocopy",
+]
[[package]]
name = "precomputed-hash"
@@ -211,36 +255,22 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro2"
-version = "1.0.29"
+version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9f5105d4fdaab20335ca9565e106a5d9b82b6219b5ba735731124ac6711d23d"
+checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
dependencies = [
- "unicode-xid",
+ "unicode-ident",
]
[[package]]
name = "quote"
-version = "1.0.9"
+version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
+checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
-[[package]]
-name = "rand"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
-dependencies = [
- "getrandom 0.1.16",
- "libc",
- "rand_chacha 0.2.2",
- "rand_core 0.5.1",
- "rand_hc",
- "rand_pcg",
-]
-
[[package]]
name = "rand"
version = "0.8.5"
@@ -248,18 +278,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
- "rand_chacha 0.3.1",
- "rand_core 0.6.3",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.2.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
-dependencies = [
- "ppv-lite86",
- "rand_core 0.5.1",
+ "rand_chacha",
+ "rand_core",
]
[[package]]
@@ -269,158 +289,143 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
- "rand_core 0.6.3",
+ "rand_core",
]
[[package]]
name = "rand_core"
-version = "0.5.1"
+version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
- "getrandom 0.1.16",
+ "getrandom",
]
[[package]]
-name = "rand_core"
-version = "0.6.3"
+name = "redox_syscall"
+version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
+checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
dependencies = [
- "getrandom 0.2.6",
+ "bitflags",
]
[[package]]
-name = "rand_hc"
-version = "0.2.0"
+name = "regex-lite"
+version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
-dependencies = [
- "rand_core 0.5.1",
-]
-
-[[package]]
-name = "rand_pcg"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
-dependencies = [
- "rand_core 0.5.1",
-]
-
-[[package]]
-name = "regex"
-version = "1.5.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
-dependencies = [
- "aho-corasick",
- "memchr",
- "regex-syntax",
-]
-
-[[package]]
-name = "regex-syntax"
-version = "0.6.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a"
[[package]]
name = "rustler"
-version = "0.25.0"
+version = "0.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3e6617fa86bacfb2de792c12e261e0f456bb9ff15038498ae421715bf4128c5"
+checksum = "1f7b219d7473cf473409665a4898d66688b34736e51bb5791098b0d3390e4c98"
dependencies = [
- "lazy_static",
+ "inventory",
+ "libloading",
+ "regex-lite",
"rustler_codegen",
- "rustler_sys",
]
[[package]]
name = "rustler_codegen"
-version = "0.25.0"
+version = "0.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05cda738bc4260019ee078a699fac55ce3577fe2db736b2cc64a4d6696950fa6"
+checksum = "743ec5267bd5f18fd88d89f7e729c0f43b97d9c2539959915fa1f234300bb621"
dependencies = [
"heck",
+ "inventory",
"proc-macro2",
"quote",
"syn",
]
[[package]]
-name = "rustler_sys"
-version = "2.2.0"
+name = "rustversion"
+version = "1.0.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ff26a42e62d538f82913dd34f60105ecfdffbdb25abdc3c3580b0c622285332"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.217"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [
- "regex",
- "unreachable",
+ "serde_derive",
]
[[package]]
-name = "scoped-pool"
-version = "1.0.0"
+name = "serde_derive"
+version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "817a3a15e704545ce59ed2b5c60a5d32bda4d7869befb8b36667b658a6c00b43"
+checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [
- "crossbeam",
- "scopeguard",
- "variance",
+ "proc-macro2",
+ "quote",
+ "syn",
]
[[package]]
-name = "scopeguard"
-version = "0.1.2"
+name = "siphasher"
+version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59a076157c1e2dc561d8de585151ee6965d910dd4dcb5dabb7ae3e83981a6c57"
+checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d"
[[package]]
-name = "serde"
-version = "1.0.130"
+name = "siphasher"
+version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913"
+checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]]
-name = "siphasher"
-version = "0.3.7"
+name = "smallvec"
+version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b"
+checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "string_cache"
-version = "0.8.1"
+version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ddb1139b5353f96e429e1a5e19fbaf663bddedaa06d1dbd49f82e352601209a"
+checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
dependencies = [
- "lazy_static",
"new_debug_unreachable",
- "phf_shared 0.8.0",
+ "once_cell",
+ "parking_lot",
+ "phf_shared 0.10.0",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
-version = "0.5.1"
+version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97"
+checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
dependencies = [
- "phf_generator 0.8.0",
- "phf_shared 0.8.0",
+ "phf_generator 0.10.0",
+ "phf_shared 0.10.0",
"proc-macro2",
"quote",
]
[[package]]
name = "syn"
-version = "1.0.77"
+version = "2.0.96"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5239bc68e0fef57495900cfea4e8dc75596d9a319d7e16b1e0a440d24e6fe0a0"
+checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80"
dependencies = [
"proc-macro2",
"quote",
- "unicode-xid",
+ "unicode-ident",
]
[[package]]
@@ -435,20 +440,31 @@ dependencies = [
]
[[package]]
-name = "unicode-xid"
-version = "0.2.2"
+name = "thiserror"
+version = "2.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
+checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
+dependencies = [
+ "thiserror-impl",
+]
[[package]]
-name = "unreachable"
-version = "1.0.0"
+name = "thiserror-impl"
+version = "2.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
+checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
dependencies = [
- "void",
+ "proc-macro2",
+ "quote",
+ "syn",
]
+[[package]]
+name = "unicode-ident"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11cd88e12b17c6494200a9c1b683a04fcac9573ed74cd1b62aeb2727c5592243"
+
[[package]]
name = "utf-8"
version = "0.7.6"
@@ -456,25 +472,92 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
-name = "variance"
-version = "0.1.3"
+name = "wasi"
+version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3abfc2be1fb59663871379ea884fd81de80c496f2274e021c01d6fe56cd77b05"
+checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
-name = "void"
-version = "1.0.2"
+name = "windows-targets"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
[[package]]
-name = "wasi"
-version = "0.9.0+wasi-snapshot-preview1"
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
-name = "wasi"
-version = "0.10.2+wasi-snapshot-preview1"
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "zerocopy"
+version = "0.7.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+dependencies = [
+ "byteorder",
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/native/html5ever_nif/Cargo.toml b/native/html5ever_nif/Cargo.toml
index c9668e2..f2ef192 100644
--- a/native/html5ever_nif/Cargo.toml
+++ b/native/html5ever_nif/Cargo.toml
@@ -2,7 +2,7 @@
name = "html5ever_nif"
version = "0.1.0"
authors = ["HansiHE "]
-edition = "2018"
+edition = "2021"
[lib]
name = "html5ever_nif"
@@ -10,12 +10,13 @@ path = "src/lib.rs"
crate-type = ["cdylib"]
[dependencies]
-rustler = "0.25.0"
+# See the Precompilation guide for details about the features: https://github.com/philss/rustler_precompiled/blob/main/PRECOMPILATION_GUIDE.md
+rustler = { version = "0.36", default-features = false, features = ["nif_version_2_15"] }
-html5ever = "0.26"
-markup5ever = "0.11"
+html5ever = "0.27"
+markup5ever = "0.12"
tendril = "0.4"
-lazy_static = "1.4"
-scoped-pool = "1.0"
+lazy_static = "1.5"
+thiserror = "2"
diff --git a/native/html5ever_nif/Cross.toml b/native/html5ever_nif/Cross.toml
deleted file mode 100644
index 8ba543c..0000000
--- a/native/html5ever_nif/Cross.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-[build.env]
-passthrough = [
- "RUSTLER_NIF_VERSION"
-]
diff --git a/native/html5ever_nif/src/common.rs b/native/html5ever_nif/src/common.rs
index 0ff8d14..ac97c7e 100644
--- a/native/html5ever_nif/src/common.rs
+++ b/native/html5ever_nif/src/common.rs
@@ -7,18 +7,18 @@ use tendril::StrTendril;
// Encoder for these externally defined types.
// Unsure if this is a great way of doing it, but it's the way
// that produced the cleanest and least noisy code.
-pub struct QNW<'a>(pub &'a QualName);
-pub struct STW<'a>(pub &'a StrTendril);
+pub struct QualNameWrapper<'a>(pub &'a QualName);
+pub struct StrTendrilWrapper<'a>(pub &'a StrTendril);
-impl<'b> Encoder for QNW<'b> {
+impl Encoder for QualNameWrapper<'_> {
fn encode<'a>(&self, env: Env<'a>) -> Term<'a> {
- let data: &str = &*self.0.local;
+ let data: &str = &self.0.local;
data.encode(env)
}
}
-impl<'b> Encoder for STW<'b> {
+impl Encoder for StrTendrilWrapper<'_> {
fn encode<'a>(&self, env: Env<'a>) -> Term<'a> {
- let data: &str = &*self.0;
+ let data: &str = self.0;
data.encode(env)
}
}
diff --git a/native/html5ever_nif/src/flat_dom.rs b/native/html5ever_nif/src/flat_dom.rs
index b1bf031..f1d0c8f 100644
--- a/native/html5ever_nif/src/flat_dom.rs
+++ b/native/html5ever_nif/src/flat_dom.rs
@@ -1,57 +1,47 @@
-use html5ever::{ QualName, Attribute };
-use html5ever::tree_builder::{ TreeSink, QuirksMode, NodeOrText, ElementFlags };
+use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
+use html5ever::{Attribute, QualName};
use markup5ever::ExpandedName;
use tendril::StrTendril;
use std::borrow::Cow;
-use rustler::{ Env, Encoder, Term };
+use rustler::{Encoder, Env, Term};
-use crate::common::{ STW, QNW };
+use crate::common::{QualNameWrapper, StrTendrilWrapper};
+use crate::Html5everExError;
#[derive(Copy, Clone, PartialEq, Debug)]
pub struct NodeHandle(pub usize);
pub enum PoolOrVec {
- Pool {
- head: usize,
- len: usize,
- },
- Vec {
- vec: Vec,
- }
+ Pool { head: usize, len: usize },
+ Vec { vec: Vec },
}
-impl PoolOrVec where T: Clone {
-
- pub fn new(pool: &Vec) -> Self {
+impl PoolOrVec
+where
+ T: Clone,
+{
+ pub fn new(pool: &[T]) -> Self {
PoolOrVec::Pool {
head: pool.len(),
len: 0,
}
}
- pub fn get<'a>(&'a self, idx: usize, pool: &'a Vec) -> Option<&'a T> {
+ pub fn get<'a>(&'a self, idx: usize, pool: &'a [T]) -> Option<&'a T> {
match self {
- PoolOrVec::Pool { head, len } if idx < *len => {
- Some(&pool[*head + idx])
- },
- PoolOrVec::Vec { vec } => {
- vec.get(idx)
- },
+ PoolOrVec::Pool { head, len } if idx < *len => Some(&pool[*head + idx]),
+ PoolOrVec::Vec { vec } => vec.get(idx),
_ => None,
}
}
- pub fn as_slice<'a>(&'a self, pool: &'a Vec) -> &'a [T] {
+ pub fn as_slice<'a>(&'a self, pool: &'a [T]) -> &'a [T] {
match self {
- PoolOrVec::Pool { head, len } => {
- &pool[*head..(*head + *len)]
- },
- PoolOrVec::Vec { vec } => {
- &*vec
- },
+ PoolOrVec::Pool { head, len } => &pool[*head..(*head + *len)],
+ PoolOrVec::Vec { vec } => vec,
}
}
@@ -60,25 +50,23 @@ impl PoolOrVec where T: Clone {
PoolOrVec::Pool { head, len } if pool.len() == *head + *len => {
pool.push(item);
*len += 1;
- },
+ }
val @ PoolOrVec::Pool { .. } => {
if let PoolOrVec::Pool { head, len } = val {
let mut vec = pool[*head..(*head + *len)].to_owned();
vec.push(item);
- *val = PoolOrVec::Vec {
- vec: vec,
- };
+ *val = PoolOrVec::Vec { vec };
} else {
unreachable!()
}
- },
+ }
PoolOrVec::Vec { vec } => {
vec.push(item);
- },
+ }
}
}
- pub fn iter<'a>(&'a self, pool: &'a Vec) -> impl Iterator- + 'a {
+ pub fn iter<'a>(&'a self, pool: &'a [T]) -> impl Iterator
- + 'a {
self.as_slice(pool).iter()
}
@@ -96,14 +84,14 @@ impl PoolOrVec where T: Clone {
vec
},
};
- },
+ }
PoolOrVec::Vec { vec } => {
vec.insert(index, item);
- },
+ }
}
}
- pub fn remove(&mut self, index: usize, pool: &mut Vec) {
+ pub fn remove(&mut self, index: usize, pool: &mut [T]) {
match self {
val @ PoolOrVec::Pool { .. } => {
*val = PoolOrVec::Vec {
@@ -113,13 +101,12 @@ impl PoolOrVec where T: Clone {
vec
},
};
- },
+ }
PoolOrVec::Vec { vec } => {
vec.remove(index);
- },
+ }
}
}
-
}
pub struct Node {
@@ -129,18 +116,18 @@ pub struct Node {
data: NodeData,
}
impl Node {
- fn new(id: usize, data: NodeData, pool: &Vec) -> Self {
+ fn new(id: usize, data: NodeData, pool: &[NodeHandle]) -> Self {
Node {
id: NodeHandle(id),
parent: None,
children: PoolOrVec::new(pool),
- data: data,
+ data,
}
}
}
#[derive(Debug, PartialEq)]
-pub enum NodeData{
+pub enum NodeData {
Document,
DocType {
name: StrTendril,
@@ -172,7 +159,6 @@ pub struct FlatSink {
}
impl FlatSink {
-
pub fn new() -> FlatSink {
let mut sink = FlatSink {
root: NodeHandle(0),
@@ -181,7 +167,8 @@ impl FlatSink {
};
// Element 0 is always root
- sink.nodes.push(Node::new(0, NodeData::Document, &sink.pool));
+ sink.nodes
+ .push(Node::new(0, NodeData::Document, &sink.pool));
sink
}
@@ -190,10 +177,10 @@ impl FlatSink {
self.root
}
- pub fn node_mut<'a>(&'a mut self, handle: NodeHandle) -> &'a mut Node {
+ pub fn node_mut(&mut self, handle: NodeHandle) -> &mut Node {
&mut self.nodes[handle.0]
}
- pub fn node<'a>(&'a self, handle: NodeHandle) -> &'a Node {
+ pub fn node(&self, handle: NodeHandle) -> &Node {
&self.nodes[handle.0]
}
@@ -203,17 +190,12 @@ impl FlatSink {
self.nodes.push(node);
id
}
-
}
fn node_or_text_to_node(sink: &mut FlatSink, not: NodeOrText) -> NodeHandle {
match not {
NodeOrText::AppendNode(handle) => handle,
- NodeOrText::AppendText(text) => {
- sink.make_node(NodeData::Text {
- contents: text,
- })
- },
+ NodeOrText::AppendText(text) => sink.make_node(NodeData::Text { contents: text }),
}
}
@@ -229,12 +211,28 @@ impl TreeSink for FlatSink {
fn parse_error(&mut self, _msg: Cow<'static, str>) {}
fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
- fn get_document(&mut self) -> Self::Handle { NodeHandle(0) }
- fn get_template_contents(&mut self, _target: &Self::Handle) -> Self::Handle {
- panic!("Templates not supported");
+ fn get_document(&mut self) -> Self::Handle {
+ NodeHandle(0)
+ }
+ fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle {
+ // Inspired in https://github.com/servo/html5ever/blob/1a62a39879a1def200dcb87b900265993e6c1c83/rcdom/lib.rs#L235
+ // It is not getting the templates contents. But is printing the empty tag.
+ // TODO: print the contents as text.
+ let node = self.node(*target);
+ if let NodeData::Element {
+ ref template_contents,
+ ..
+ } = node.data
+ {
+ *template_contents.as_ref().expect("not a template element!")
+ } else {
+ panic!("not a template element!")
+ }
}
- fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool { x == y }
+ fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool {
+ x == y
+ }
fn elem_name(&self, target: &Self::Handle) -> ExpandedName {
let node = self.node(*target);
match node.data {
@@ -243,7 +241,12 @@ impl TreeSink for FlatSink {
}
}
- fn create_element(&mut self, name: QualName, attrs: Vec, flags: ElementFlags) -> Self::Handle {
+ fn create_element(
+ &mut self,
+ name: QualName,
+ attrs: Vec,
+ flags: ElementFlags,
+ ) -> Self::Handle {
let template_contents = if flags.template {
Some(self.make_node(NodeData::Document))
} else {
@@ -251,30 +254,32 @@ impl TreeSink for FlatSink {
};
self.make_node(NodeData::Element {
- name: name,
- attrs: attrs,
+ name,
+ attrs,
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
- template_contents: template_contents,
+ template_contents,
})
}
fn create_comment(&mut self, text: StrTendril) -> Self::Handle {
- self.make_node(NodeData::Comment {
- contents: text,
- })
+ self.make_node(NodeData::Comment { contents: text })
}
fn append(&mut self, parent_id: &Self::Handle, child: NodeOrText) {
let handle = node_or_text_to_node(self, child);
- self.nodes[parent_id.0].children.push(handle, &mut self.pool);
+ self.nodes[parent_id.0]
+ .children
+ .push(handle, &mut self.pool);
self.node_mut(handle).parent = Some(*parent_id);
}
- fn append_based_on_parent_node(&mut self,
- element: &Self::Handle,
- prev_element: &Self::Handle,
- child: NodeOrText) {
+ fn append_based_on_parent_node(
+ &mut self,
+ element: &Self::Handle,
+ prev_element: &Self::Handle,
+ child: NodeOrText,
+ ) {
let has_parent = self.node(*element).parent.is_some();
if has_parent {
self.append_before_sibling(element, child);
@@ -283,33 +288,53 @@ impl TreeSink for FlatSink {
}
}
- fn append_before_sibling(&mut self, sibling: &Self::Handle, new_node: NodeOrText) {
+ fn append_before_sibling(
+ &mut self,
+ sibling: &Self::Handle,
+ new_node: NodeOrText,
+ ) {
let new_node_handle = node_or_text_to_node(self, new_node);
let parent = self.node(*sibling).parent.unwrap();
let parent_node = &mut self.nodes[parent.0];
- let sibling_index = parent_node.children.iter(&self.pool).enumerate()
- .find(|&(_, node)| node == sibling).unwrap().0;
- parent_node.children.insert(sibling_index, new_node_handle, &mut self.pool);
- }
-
- fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril) {
+ let sibling_index = parent_node
+ .children
+ .iter(&self.pool)
+ .enumerate()
+ .find(|&(_, node)| node == sibling)
+ .unwrap()
+ .0;
+ parent_node
+ .children
+ .insert(sibling_index, new_node_handle, &mut self.pool);
+ }
+
+ fn append_doctype_to_document(
+ &mut self,
+ name: StrTendril,
+ public_id: StrTendril,
+ system_id: StrTendril,
+ ) {
let doctype = self.make_node(NodeData::DocType {
- name: name,
- public_id: public_id,
- system_id: system_id,
+ name,
+ public_id,
+ system_id,
});
let root = self.root;
self.nodes[root.0].children.push(doctype, &mut self.pool);
self.node_mut(doctype).parent = Some(self.root);
}
- fn add_attrs_if_missing(&mut self, target_handle: &Self::Handle, mut add_attrs: Vec) {
+ fn add_attrs_if_missing(
+ &mut self,
+ target_handle: &Self::Handle,
+ mut add_attrs: Vec,
+ ) {
let target = self.node_mut(*target_handle);
match target.data {
NodeData::Element { ref mut attrs, .. } => {
for attr in add_attrs.drain(..) {
- if attrs.iter().find(|a| attr.name == a.name) == None {
+ if !attrs.iter().any(|a| attr.name == a.name) {
attrs.push(attr);
}
}
@@ -321,8 +346,13 @@ impl TreeSink for FlatSink {
fn remove_from_parent(&mut self, target: &Self::Handle) {
let parent = self.node(*target).parent.unwrap();
let parent_node = &mut self.nodes[parent.0];
- let sibling_index = parent_node.children.iter(&self.pool).enumerate()
- .find(|&(_, node)| node == target).unwrap().0;
+ let sibling_index = parent_node
+ .children
+ .iter(&self.pool)
+ .enumerate()
+ .find(|&(_, node)| node == target)
+ .unwrap()
+ .0;
parent_node.children.remove(sibling_index, &mut self.pool);
}
@@ -347,11 +377,10 @@ impl TreeSink for FlatSink {
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Self::Handle {
self.make_node(NodeData::ProcessingInstruction {
- target: target,
+ target,
contents: data,
})
}
-
}
impl Encoder for NodeHandle {
@@ -360,42 +389,76 @@ impl Encoder for NodeHandle {
}
}
-fn encode_node<'a>(node: &Node, env: Env<'a>, pool: &Vec) -> Term<'a> {
- let map = ::rustler::types::map::map_new(env)
- .map_put(self::atoms::id().encode(env), node.id.encode(env)).ok().unwrap()
- .map_put(self::atoms::parent().encode(env), match node.parent {
- Some(handle) => handle.encode(env),
- None => self::atoms::nil().encode(env),
- }).ok().unwrap();
+fn to_custom_error(_err: rustler::error::Error) -> Html5everExError {
+ Html5everExError::MapEntry
+}
+
+fn encode_node<'a>(
+ node: &Node,
+ env: Env<'a>,
+ pool: &[NodeHandle],
+ attributes_as_maps: bool,
+) -> Result, Html5everExError> {
+ let pairs: Vec<(Term, Term)> = vec![
+ (atoms::id().encode(env), node.id.encode(env)),
+ (
+ atoms::parent().encode(env),
+ match node.parent {
+ Some(handle) => handle.encode(env),
+ None => atoms::nil().encode(env),
+ },
+ ),
+ ];
+
+ let mut map = Term::map_from_pairs(env, &pairs).map_err(to_custom_error)?;
match node.data {
- NodeData::Document => {
- map
- .map_put(self::atoms::type_().encode(env), self::atoms::document().encode(env)).ok().unwrap()
- }
- NodeData::Element { ref attrs, ref name, .. } => {
- map
- .map_put(self::atoms::type_().encode(env), self::atoms::element().encode(env)).ok().unwrap()
- .map_put(self::atoms::children().encode(env), node.children.as_slice(pool).encode(env)).ok().unwrap()
- .map_put(self::atoms::name().encode(env), QNW(name).encode(env)).ok().unwrap()
- .map_put(self::atoms::attrs().encode(env), attrs.iter().map(|attr| {
- (QNW(&attr.name), STW(&attr.value))
- }).collect::>().encode(env)).ok().unwrap()
- }
- NodeData::Text { ref contents } => {
- map
- .map_put(self::atoms::type_().encode(env), self::atoms::text().encode(env)).ok().unwrap()
- .map_put(self::atoms::contents().encode(env), STW(contents).encode(env)).ok().unwrap()
- }
- NodeData::DocType { .. } => {
- map
- .map_put(self::atoms::type_().encode(env), self::atoms::doctype().encode(env)).ok().unwrap()
- }
- NodeData::Comment { ref contents } => {
- map
- .map_put(self::atoms::type_().encode(env), self::atoms::comment().encode(env)).ok().unwrap()
- .map_put(self::atoms::contents().encode(env), STW(contents).encode(env)).ok().unwrap()
+ NodeData::Document => map
+ .map_put(atoms::type_().encode(env), atoms::document().encode(env))
+ .map_err(to_custom_error),
+ NodeData::Element {
+ ref attrs,
+ ref name,
+ ..
+ } => {
+ let pairs: Vec<(Term, Term)> = vec![
+ (atoms::type_().encode(env), atoms::element().encode(env)),
+ (
+ atoms::children().encode(env),
+ node.children.as_slice(pool).encode(env),
+ ),
+ (atoms::name().encode(env), QualNameWrapper(name).encode(env)),
+ (
+ atoms::attrs().encode(env),
+ attributes_to_term(env, attrs, attributes_as_maps),
+ ),
+ ];
+
+ for (key, value) in pairs {
+ map = map.map_put(key, value).map_err(to_custom_error)?;
+ }
+
+ Ok(map)
}
+ NodeData::Text { ref contents } => map
+ .map_put(atoms::type_().encode(env), atoms::text().encode(env))
+ .map_err(to_custom_error)?
+ .map_put(
+ atoms::contents().encode(env),
+ StrTendrilWrapper(contents).encode(env),
+ )
+ .map_err(to_custom_error),
+ NodeData::DocType { .. } => map
+ .map_put(atoms::type_().encode(env), atoms::doctype().encode(env))
+ .map_err(to_custom_error),
+ NodeData::Comment { ref contents } => map
+ .map_put(atoms::type_().encode(env), atoms::comment().encode(env))
+ .map_err(to_custom_error)?
+ .map_put(
+ atoms::contents().encode(env),
+ StrTendrilWrapper(contents).encode(env),
+ )
+ .map_err(to_custom_error),
_ => unimplemented!(),
}
}
@@ -422,15 +485,27 @@ mod atoms {
}
}
-pub fn flat_sink_to_flat_term<'a>(env: Env<'a>, sink: &FlatSink) -> Term<'a> {
- let nodes = sink.nodes.iter()
- .fold(rustler::types::map::map_new(env), |acc, node| {
- acc.map_put(node.id.encode(env), encode_node(node, env, &sink.pool)).ok().unwrap()
- });
+pub fn flat_sink_to_flat_term<'a>(
+ env: Env<'a>,
+ sink: &FlatSink,
+ attributes_as_maps: bool,
+) -> Result, Html5everExError> {
+ let mut nodes_map = rustler::types::map::map_new(env);
+
+ for node in sink.nodes.iter() {
+ nodes_map = nodes_map
+ .map_put(
+ node.id.encode(env),
+ encode_node(node, env, &sink.pool, attributes_as_maps)?,
+ )
+ .map_err(to_custom_error)?;
+ }
::rustler::types::map::map_new(env)
- .map_put(self::atoms::nodes().encode(env), nodes).ok().unwrap()
- .map_put(self::atoms::root().encode(env), sink.root.encode(env)).ok().unwrap()
+ .map_put(atoms::nodes().encode(env), nodes_map)
+ .map_err(to_custom_error)?
+ .map_put(atoms::root().encode(env), sink.root.encode(env))
+ .map_err(to_custom_error)
}
struct RecState {
@@ -439,16 +514,20 @@ struct RecState {
child_base: usize,
}
-pub fn flat_sink_to_rec_term<'a>(env: Env<'a>, sink: &FlatSink) -> Term<'a> {
+pub fn flat_sink_to_rec_term<'a>(
+ env: Env<'a>,
+ sink: &FlatSink,
+ attributes_as_maps: bool,
+) -> Result, Html5everExError> {
let mut child_stack = vec![];
- let mut stack: Vec = vec![
- RecState {
- node: sink.root(),
- child_base: 0,
- child_n: 0,
- },
- ];
+ let mut stack: Vec = vec![RecState {
+ node: sink.root(),
+ child_base: 0,
+ child_n: 0,
+ }];
+ let mut comments_bf_doctype = 0usize;
+ let mut read_doctype = false;
loop {
let mut top = stack.pop().unwrap();
@@ -482,34 +561,51 @@ pub fn flat_sink_to_rec_term<'a>(env: Env<'a>, sink: &FlatSink) -> Term<'a> {
}
assert_eq!(stack.len(), 0);
- return term;
- },
- NodeData::DocType { name, public_id, system_id } => {
- assert!(stack.len() > 0);
- assert!(child_stack.len() == 0);
+ return Ok(term);
+ }
+ NodeData::DocType {
+ name,
+ public_id,
+ system_id,
+ } => {
+ assert!(!stack.is_empty());
+ assert!(child_stack.is_empty() || comments_bf_doctype == child_stack.len());
+
+ read_doctype = true;
term = (
- self::atoms::doctype(),
- STW(name),
- STW(public_id),
- STW(system_id),
- ).encode(env);
- },
+ atoms::doctype(),
+ StrTendrilWrapper(name),
+ StrTendrilWrapper(public_id),
+ StrTendrilWrapper(system_id),
+ )
+ .encode(env);
+ }
NodeData::Element { attrs, name, .. } => {
- assert!(stack.len() > 0);
+ assert!(!stack.is_empty());
+
+ let attribute_terms = attributes_to_term(env, attrs, attributes_as_maps);
- let attribute_terms: Vec> = attrs.iter()
- .map(|a| (QNW(&a.name), STW(&a.value)).encode(env))
- .collect();
- term = (QNW(name), attribute_terms, &child_stack[top.child_base..]).encode(env);
+ term = (
+ QualNameWrapper(name),
+ attribute_terms,
+ &child_stack[top.child_base..],
+ )
+ .encode(env);
for _ in 0..(child_stack.len() - top.child_base) {
child_stack.pop();
}
- },
+ }
NodeData::Text { contents } => {
- term = STW(contents).encode(env);
- },
- NodeData::Comment { .. } => continue,
+ term = StrTendrilWrapper(contents).encode(env);
+ }
+ NodeData::Comment { contents } => {
+ if !read_doctype {
+ comments_bf_doctype += 1
+ };
+
+ term = (atoms::comment(), StrTendrilWrapper(contents)).encode(env);
+ }
_ => unimplemented!(""),
}
@@ -518,11 +614,19 @@ pub fn flat_sink_to_rec_term<'a>(env: Env<'a>, sink: &FlatSink) -> Term<'a> {
}
}
-
-
-
-
-
-
-
-
+fn attributes_to_term<'a>(
+ env: Env<'a>,
+ attributes: &[Attribute],
+ attributes_as_maps: bool,
+) -> Term<'a> {
+ let pairs: Vec<(QualNameWrapper, StrTendrilWrapper)> = attributes
+ .iter()
+ .map(|a| (QualNameWrapper(&a.name), StrTendrilWrapper(&a.value)))
+ .collect();
+
+ if attributes_as_maps {
+ Term::map_from_pairs(env, &pairs).unwrap()
+ } else {
+ pairs.encode(env)
+ }
+}
diff --git a/native/html5ever_nif/src/lib.rs b/native/html5ever_nif/src/lib.rs
index 4f23cf7..e488f09 100644
--- a/native/html5ever_nif/src/lib.rs
+++ b/native/html5ever_nif/src/lib.rs
@@ -1,101 +1,57 @@
+use flat_dom::FlatSink;
use rustler::types::binary::Binary;
-use rustler::{Decoder, Encoder, Env, Error, NifResult, Term};
+use rustler::{Env, Term};
-//use html5ever::rcdom::RcDom;
use tendril::TendrilSink;
+use thiserror::Error;
mod common;
mod flat_dom;
-mod atoms {
- rustler::atoms! {
- html5ever_nif_result,
+#[derive(Error, Debug)]
+pub enum Html5everExError {
+ #[error("cannot transform bytes from binary to a valid UTF8 string")]
+ BytesToUtf8(#[from] std::str::Utf8Error),
- ok,
- error,
- nif_panic,
-
- doctype,
- comment,
-
- none,
- some,
- all,
- }
+ #[error("cannot insert entry in a map")]
+ MapEntry,
}
-#[derive(PartialEq, Eq)]
-enum ErrorLevel {
- None,
- Some,
- All,
-}
-impl<'a> Decoder<'a> for ErrorLevel {
- fn decode(term: Term<'a>) -> NifResult {
- if atoms::none() == term {
- Ok(ErrorLevel::None)
- } else if atoms::some() == term {
- Ok(ErrorLevel::Some)
- } else if atoms::all() == term {
- Ok(ErrorLevel::All)
- } else {
- Err(Error::BadArg)
- }
+impl rustler::Encoder for Html5everExError {
+ fn encode<'a>(&self, env: Env<'a>) -> Term<'a> {
+ format!("{self}").encode(env)
}
}
-#[rustler::nif]
-fn parse_sync<'a>(env: Env<'a>, binary: Binary) -> Term<'a> {
- parse(env, binary)
-}
-
#[rustler::nif(schedule = "DirtyCpu")]
-fn parse_dirty<'a>(env: Env<'a>, binary: Binary) -> Term<'a> {
- parse(env, binary)
-}
-
-fn parse<'a>(env: Env<'a>, binary: Binary) -> Term<'a> {
- let sink = flat_dom::FlatSink::new();
-
- let utf = std::str::from_utf8(binary.as_slice()).unwrap();
-
- let parser = html5ever::parse_document(sink, Default::default());
- let result = parser.one(utf);
-
- let result_term = flat_dom::flat_sink_to_rec_term(env, &result);
-
- (atoms::html5ever_nif_result(), atoms::ok(), result_term).encode(env)
-}
+fn parse<'a>(
+ env: Env<'a>,
+ binary: Binary,
+ attributes_as_maps: bool,
+) -> Result, Html5everExError> {
+ let flat_sink = build_flat_sink(binary.as_slice())?;
-#[rustler::nif]
-fn flat_parse_sync<'a>(env: Env<'a>, binary: Binary) -> Term<'a> {
- flat_parse(env, binary)
+ flat_dom::flat_sink_to_rec_term(env, &flat_sink, attributes_as_maps)
}
#[rustler::nif(schedule = "DirtyCpu")]
-fn flat_parse_dirty<'a>(env: Env<'a>, binary: Binary) -> Term<'a> {
- flat_parse(env, binary)
-}
+fn flat_parse<'a>(
+ env: Env<'a>,
+ binary: Binary,
+ attributes_as_maps: bool,
+) -> Result, Html5everExError> {
+ let flat_sink = build_flat_sink(binary.as_slice())?;
-fn flat_parse<'a>(env: Env<'a>, binary: Binary) -> Term<'a> {
- let sink = flat_dom::FlatSink::new();
+ flat_dom::flat_sink_to_flat_term(env, &flat_sink, attributes_as_maps)
+}
- let utf = std::str::from_utf8(binary.as_slice()).unwrap();
+fn build_flat_sink(bin_slice: &[u8]) -> Result {
+ let utf8 = std::str::from_utf8(bin_slice)?;
+ let sink = flat_dom::FlatSink::new();
let parser = html5ever::parse_document(sink, Default::default());
- let result = parser.one(utf);
-
- let result_term = flat_dom::flat_sink_to_flat_term(env, &result);
- (atoms::html5ever_nif_result(), atoms::ok(), result_term).encode(env)
+ Ok(parser.one(utf8))
}
-rustler::init!(
- "Elixir.Html5ever.Native",
- [parse_sync, parse_dirty, flat_parse_sync, flat_parse_dirty],
- load = on_load
-);
-
-fn on_load<'a>(_env: Env<'a>, _load_info: Term<'a>) -> bool {
- true
-}
+rustler::init!("Elixir.Html5ever.Native");
diff --git a/test/html5ever_test.exs b/test/html5ever_test.exs
index d0bb592..44797dc 100644
--- a/test/html5ever_test.exs
+++ b/test/html5ever_test.exs
@@ -3,14 +3,31 @@ defmodule Html5everTest do
doctest Html5ever
def read_html(name) do
- dir = to_string(:code.priv_dir(:html5ever)) <> "/test_data/"
- File.read!(dir <> name)
+ path = Path.join([:code.priv_dir(:html5ever), "test_data", name])
+ File.read!(path)
end
test "parse basic html" do
- html = ""
- ret = {:ok, [{"html", [], [{"head", [], []}, {"body", [], []}]}]}
- assert Html5ever.parse(html) == ret
+ html = "
Hello
"
+
+ assert Html5ever.parse(html) ==
+ {:ok,
+ [
+ {"html", [],
+ [
+ {"head", [], []},
+ {"body", [], [{"h1", [], ["Hello"]}, {:comment, " my comment "}]}
+ ]}
+ ]}
+ end
+
+ test "does not parse with not valid UTF8 binary" do
+ invalid =
+ <<98, 29, 104, 122, 46, 145, 14, 37, 122, 155, 227, 121, 49, 120, 108, 209, 155, 113, 229,
+ 98, 90, 181, 146>>
+
+ assert Html5ever.parse(invalid) ==
+ {:error, "cannot transform bytes from binary to a valid UTF8 string"}
end
test "flat parse basic html" do
@@ -38,6 +55,41 @@ defmodule Html5everTest do
assert Html5ever.flat_parse(html) == ret
end
+ test "does not flat parse with not valid UTF8 binary" do
+ invalid =
+ <<98, 29, 104, 122, 46, 145, 14, 37, 122, 155, 227, 121, 49, 120, 108, 209, 155, 113, 229,
+ 98, 90, 181, 146>>
+
+ assert Html5ever.flat_parse(invalid) ==
+ {:error, "cannot transform bytes from binary to a valid UTF8 string"}
+ end
+
+ test "flat parse basic html with attributes as maps" do
+ # Duplicated attribute is removed.
+ html = ""
+
+ ret =
+ {:ok,
+ %{
+ nodes: %{
+ 0 => %{id: 0, parent: nil, type: :document},
+ 1 => %{children: [2, 3], id: 1, parent: 0, type: :element, attrs: %{}, name: "html"},
+ 2 => %{children: [], id: 2, parent: 1, type: :element, attrs: %{}, name: "head"},
+ 3 => %{
+ children: [],
+ id: 3,
+ parent: 1,
+ type: :element,
+ attrs: %{"test" => "woo", "class" => "content"},
+ name: "body"
+ }
+ },
+ root: 0
+ }}
+
+ assert Html5ever.flat_parse_with_attributes_as_maps(html) == ret
+ end
+
test "parse example.com html" do
html = read_html("example.html")
assert {:ok, _} = Html5ever.parse(html)
@@ -147,4 +199,155 @@ defmodule Html5everTest do
]}
]} = parsed
end
+
+ test "reasonably deep html with attributes as maps" do
+ html = """
+
+
+
+ Test
+
+
+
+
+
+
+
+ very deep content
+
+
+
+
+
+
+
+
+ """
+
+ parsed = Html5ever.parse_with_attributes_as_maps(html)
+
+ assert {:ok,
+ [
+ {:doctype, "html", "", ""},
+ {"html", %{},
+ [
+ {"head", %{}, ["\n", " ", {"title", %{}, ["Test"]}, "\n", " "]},
+ "\n",
+ " ",
+ {"body", %{},
+ [
+ "\n",
+ " ",
+ {"div", %{"class" => "content"},
+ [
+ "\n",
+ " ",
+ {"span", %{},
+ [
+ "\n",
+ " ",
+ {"div", %{},
+ [
+ "\n",
+ " ",
+ {"span", %{},
+ [
+ "\n",
+ " ",
+ {"small", %{},
+ ["\n", " very deep content", "\n", " "]},
+ "\n",
+ " "
+ ]},
+ "\n",
+ " "
+ ]},
+ "\n",
+ " ",
+ {"img", %{"src" => "file.jpg"}, []},
+ "\n",
+ " "
+ ]},
+ "\n",
+ " "
+ ]},
+ "\n",
+ " ",
+ "\n",
+ "\n"
+ ]}
+ ]}
+ ]} = parsed
+ end
+
+ test "parse html with a template tag ignores template content" do
+ html = """
+
+
+ With template
+
+ Document
+
+ Flower
+
+
+
+
+ """
+
+ assert Html5ever.parse(html) ==
+ {:ok,
+ [
+ {:doctype, "html", "", ""},
+ {"html", [],
+ [
+ {"head", [], [{"title", [], ["With template"]}]},
+ "\n",
+ {"body", [],
+ ["\n", {"h1", [], ["Document"]}, "\n", {"template", [], []}, "\n", "\n", "\n"]}
+ ]}
+ ]}
+ end
+
+ test "parse html starting with a XML tag" do
+ html = """
+
+
+
+
+ Hello
+
+ link
+
+
+ """
+
+ assert Html5ever.parse(html) ==
+ {:ok,
+ [
+ {:comment, "?xml version=\"1.0\" encoding=\"UTF-8\"?"},
+ {:comment, " also a comment is allowed "},
+ {:doctype, "html", "-//W3C//DTD XHTML 1.0 Strict//EN",
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"},
+ {
+ "html",
+ [{"xmlns", "http://www.w3.org/1999/xhtml"}, {"xml:lang", "en"}, {"lang", "en"}],
+ [
+ {"head", [], [{"title", [], ["Hello"]}]},
+ "\n",
+ " ",
+ {"body", [],
+ [
+ "\n",
+ " ",
+ {"a", [{"id", "anchor"}, {"href", "https://example.com"}], ["link"]},
+ "\n",
+ " ",
+ "\n",
+ "\n"
+ ]}
+ ]
+ }
+ ]}
+ end
end