diff --git a/.clang-format b/.clang-format index ee0513ae..101b23e3 100644 --- a/.clang-format +++ b/.clang-format @@ -25,4 +25,6 @@ BraceWrapping: AllowAllConstructorInitializersOnNextLine: true ConstructorInitializerAllOnOneLineOrOnePerLine: true AllowShortCaseLabelsOnASingleLine: true +IfMacros: + - RAPIDFUZZ_IF_CONSTEXPR IndentPPDirectives: AfterHash diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 6d1ac2d5..8d2392cc 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -6,37 +6,75 @@ env: BUILD_TYPE: Release jobs: - # clang builds are broken in github actions right now: https://github.com/actions/runner-images/issues/8659 - #build_linux_clang: - # runs-on: ubuntu-latest - # strategy: - # fail-fast: false - # matrix: - # BUILD_TYPE: [Release, Debug] - # - # steps: - # - uses: actions/checkout@v2 - # - # - name: Configure CMake - # run: cmake -B build -DCMAKE_BUILD_TYPE=${{matrix.BUILD_TYPE}} -DRAPIDFUZZ_BUILD_TESTING=1 -DRAPIDFUZZ_ENABLE_LINTERS=1 -DRAPIDFUZZ_BUILD_FUZZERS=1 -DCMAKE_CXX_COMPILER=clang++ - # - # - name: Build - # run: cmake --build build --config ${{matrix.BUILD_TYPE}} - # - # - name: Test - # working-directory: build - # run: ctest -C ${{matrix.BUILD_TYPE}} --rerun-failed --output-on-failure - # - # - name: Fuzz Test - # working-directory: build - # run: | - # fuzzing/fuzz_lcs_similarity -max_total_time=30 - # fuzzing/fuzz_levenshtein_distance -max_total_time=30 - # fuzzing/fuzz_levenshtein_editops -max_total_time=30 - # fuzzing/fuzz_indel_distance -max_total_time=30 - # fuzzing/fuzz_indel_editops -max_total_time=30 - # fuzzing/fuzz_osa_distance -max_total_time=30 - # fuzzing/fuzz_damerau_levenshtein_distance -max_total_time=30 + build_linux_clang: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + BUILD_TYPE: [Release, Debug] + + steps: + - uses: actions/checkout@v2 + + - name: Configure CMake + run: cmake -B build -DCMAKE_BUILD_TYPE=${{matrix.BUILD_TYPE}} -DRAPIDFUZZ_BUILD_TESTING=1 -DRAPIDFUZZ_ENABLE_LINTERS=1 -DRAPIDFUZZ_BUILD_FUZZERS=1 -DCMAKE_CXX_COMPILER=clang++ + + - name: Build + run: cmake --build build --config ${{matrix.BUILD_TYPE}} + + - name: Test + working-directory: build + run: ctest -C ${{matrix.BUILD_TYPE}} --rerun-failed --output-on-failure + + - name: Fuzz Test + working-directory: build + run: | + fuzzing/fuzz_lcs_similarity -max_total_time=30 + fuzzing/fuzz_levenshtein_distance -max_total_time=30 + fuzzing/fuzz_levenshtein_editops -max_total_time=30 + fuzzing/fuzz_indel_distance -max_total_time=30 + fuzzing/fuzz_indel_editops -max_total_time=30 + fuzzing/fuzz_osa_distance -max_total_time=30 + fuzzing/fuzz_damerau_levenshtein_distance -max_total_time=30 + + build_linux_clang_32: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + BUILD_TYPE: [Release, Debug] + env: + CXXFLAGS: -m32 + CFLAGS: -m32 + + steps: + - uses: actions/checkout@v2 + + - name: Install Dependencies + run: | + sudo apt update + sudo apt install -y libc6-dev-i386 g++-multilib + + - name: Configure CMake + run: cmake -B build -DCMAKE_BUILD_TYPE=${{matrix.BUILD_TYPE}} -DRAPIDFUZZ_BUILD_TESTING=1 -DRAPIDFUZZ_ENABLE_LINTERS=1 -DRAPIDFUZZ_BUILD_FUZZERS=1 -DCMAKE_CXX_COMPILER=clang++ + + - name: Build + run: cmake --build build --config ${{matrix.BUILD_TYPE}} + + - name: Test + working-directory: build + run: ctest -C ${{matrix.BUILD_TYPE}} --rerun-failed --output-on-failure + + - name: Fuzz Test + working-directory: build + run: | + fuzzing/fuzz_lcs_similarity -max_total_time=30 + fuzzing/fuzz_levenshtein_distance -max_total_time=30 + fuzzing/fuzz_levenshtein_editops -max_total_time=30 + fuzzing/fuzz_indel_distance -max_total_time=30 + fuzzing/fuzz_indel_editops -max_total_time=30 + fuzzing/fuzz_osa_distance -max_total_time=30 + fuzzing/fuzz_damerau_levenshtein_distance -max_total_time=30 build_linux_gcc: runs-on: ubuntu-latest diff --git a/.github/workflows/linux-simple.yml b/.github/workflows/linux-simple.yml new file mode 100644 index 00000000..1b95a775 --- /dev/null +++ b/.github/workflows/linux-simple.yml @@ -0,0 +1,134 @@ +name: Linux builds (basic) + +on: [push, pull_request] + +jobs: + build: + name: ${{matrix.cxx}}, C++${{matrix.std}}, ${{matrix.build_type}} + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + compiler: + - g++-5 + - g++-6 + - g++-7 + - g++-8 + - g++-9 + - g++-10 + - clang++-6.0 + - clang++-7 + - clang++-8 + - clang++-9 + - clang++-10 + build_type: [Debug, Release] + std: [11] + include: + - cxx: g++-5 + other_pkgs: g++-5 + - cxx: g++-6 + other_pkgs: g++-6 + - cxx: g++-7 + other_pkgs: g++-7 + - cxx: g++-8 + other_pkgs: g++-8 + - cxx: g++-9 + other_pkgs: g++-9 + - cxx: g++-10 + other_pkgs: g++-10 + - cxx: clang++-6.0 + other_pkgs: clang-6.0 + - cxx: clang++-7 + other_pkgs: clang-7 + - cxx: clang++-8 + other_pkgs: clang-8 + - cxx: clang++-9 + other_pkgs: clang-9 + - cxx: clang++-10 + other_pkgs: clang-10 + + - cxx: clang++-10 + other_pkgs: clang-10 + std: 14 + build_type: Debug + - cxx: clang++-10 + other_pkgs: clang-10 + std: 17 + build_type: Debug + - cxx: clang++-10 + other_pkgs: clang-10 + std: 20 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 14 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 17 + build_type: Debug + - cxx: g++-10 + other_pkgs: g++-10 + std: 20 + build_type: Debug + + - cxx: clang++-10 + other_pkgs: clang-10 + std: 14 + build_type: Release + - cxx: clang++-10 + other_pkgs: clang-10 + std: 17 + build_type: Release + - cxx: clang++-10 + other_pkgs: clang-10 + std: 20 + build_type: Release + - cxx: g++-10 + other_pkgs: g++-10 + std: 14 + build_type: Release + - cxx: g++-10 + other_pkgs: g++-10 + std: 17 + build_type: Release + - cxx: g++-10 + other_pkgs: g++-10 + std: 20 + build_type: Release + + steps: + - uses: actions/checkout@v4 + + - name: Add repositories for older GCC + run: | + sudo apt-add-repository 'deb http://azure.archive.ubuntu.com/ubuntu/ bionic main' + sudo apt-add-repository 'deb http://azure.archive.ubuntu.com/ubuntu/ bionic universe' + if: ${{ matrix.cxx == 'g++-5' || matrix.cxx == 'g++-6' }} + + - name: Prepare environment + run: | + sudo apt-get update + sudo apt-get install -y ninja-build ${{matrix.other_pkgs}} + + - name: Configure CMake + env: + CXX: ${{matrix.cxx}} + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} \ + -DCMAKE_CXX_STANDARD=${{matrix.std}} \ + -DCMAKE_CXX_STANDARD_REQUIRED=ON \ + -DCMAKE_CXX_EXTENSIONS=OFF \ + -DRAPIDFUZZ_BUILD_TESTING=1 \ + -DRAPIDFUZZ_ENABLE_LINTERS=1 \ + -G Ninja + + - name: Build + working-directory: build + run: ninja + + - name: Test + working-directory: build + run: ctest -C ${{matrix.build_type}} --rerun-failed --output-on-failure -j `nproc` + diff --git a/CHANGELOG.md b/CHANGELOG.md index f3c3e962..9f8cf7fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ ## Changelog +## [3.3.2] - 2025-02-11 +### Fixed +- fixed compilation with old msvc versions that don't properly support if constexpr + +## [3.3.1] - 2025-01-22 +### Fixed +- fixed tests not building with catch2 versions >= 3.0 + +## [3.3.0] - 2025-01-18 +### Changed +- add C++11 and C++14 support + +## [3.2.0] - 2024-12-17 +### Performance +- improve calculation of min score inside partial_ratio so it can skip more alignments + +## [3.1.1] - 2024-10-24 +### Fixed +- Fixed incorrect score calculation for SIMD implementations of Levenshtein and OSA on 32 bit systems + +## [3.1.0] - 024-10-24 +### Changed +- split `editops_apply`/`opcodes_apply` into `*_apply_str` and `*_apply_vec`. This avoids the instantiation of + std::basic_string for unsupported types. + ## [3.0.5] - 2024-07-02 ### Fixed - the editops implementation didn't properly account for some cells in the Levenshtein matrix. diff --git a/CMakeLists.txt b/CMakeLists.txt index fcdc0176..67cec073 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # Cmake config largely taken from catch2 -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.5..3.31) if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.24) cmake_policy(SET CMP0135 NEW) @@ -32,7 +32,7 @@ if (CMAKE_BINARY_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) message(FATAL_ERROR "Building in-source is not supported! Create a build dir and remove ${CMAKE_SOURCE_DIR}/CMakeCache.txt") endif() -project(rapidfuzz LANGUAGES CXX VERSION 3.0.5) +project(rapidfuzz LANGUAGES CXX VERSION 3.3.2) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake") include(GNUInstallDirs) @@ -50,7 +50,7 @@ add_library(rapidfuzz INTERFACE) # provide a namespaced alias for clients to 'link' against if RapidFuzz is included as a sub-project add_library(rapidfuzz::rapidfuzz ALIAS rapidfuzz) -target_compile_features(rapidfuzz INTERFACE cxx_std_17) +target_compile_features(rapidfuzz INTERFACE cxx_std_11) target_include_directories(rapidfuzz INTERFACE diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index e8978d0d..24c71695 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2024-07-02 16:47:26.932914 +// Generated: 2025-02-11 13:48:20.141647 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -20,7 +20,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* hashmap for integers which can only grow, but can't remove elements */ template @@ -213,7 +214,8 @@ struct HybridGrowingHashmap { std::array m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -221,15 +223,16 @@ struct HybridGrowingHashmap { #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct BitMatrixView { using value_type = T; using size_type = size_t; - using pointer = std::conditional_t; - using reference = std::conditional_t; + using pointer = typename std::conditional::type; + using reference = typename std::conditional::type; BitMatrixView(pointer vector, size_type cols) noexcept : m_vector(vector), m_cols(cols) {} @@ -389,12 +392,12 @@ struct ShiftedBitMatrix { return bool(m_matrix[row][col_word] & col_mask); } - auto operator[](size_t row) noexcept + BitMatrixView operator[](size_t row) noexcept { return m_matrix[row]; } - auto operator[](size_t row) const noexcept + BitMatrixView operator[](size_t row) const noexcept { return m_matrix[row]; } @@ -409,7 +412,8 @@ struct ShiftedBitMatrix { std::vector m_offsets; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -421,227 +425,34 @@ struct ShiftedBitMatrix { #include #include -namespace rapidfuzz::detail { +#include -static inline void assume(bool b) -{ -#if defined(__clang__) - __builtin_assume(b); -#elif defined(__GNUC__) || defined(__GNUG__) - if (!b) __builtin_unreachable(); -#elif defined(_MSC_VER) - __assume(b); +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES #endif -} - -template -CharT* to_begin(CharT* s) -{ - return s; -} - -template -auto to_begin(T& x) -{ - using std::begin; - return begin(x); -} - -template -CharT* to_end(CharT* s) -{ - assume(s != nullptr); - while (*s != 0) - ++s; - - return s; -} - -template -auto to_end(T& x) -{ - using std::end; - return end(x); -} - -template -class Range { - Iter _first; - Iter _last; - // todo we might not want to cache the size for iterators - // that can can retrieve the size in O(1) time - size_t _size; - -public: - using value_type = typename std::iterator_traits::value_type; - using iterator = Iter; - using reverse_iterator = std::reverse_iterator; - - constexpr Range(Iter first, Iter last) : _first(first), _last(last) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } - - constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) - {} - - template - constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x)) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } - - constexpr iterator begin() const noexcept - { - return _first; - } - constexpr iterator end() const noexcept - { - return _last; - } - - constexpr reverse_iterator rbegin() const noexcept - { - return reverse_iterator(end()); - } - constexpr reverse_iterator rend() const noexcept - { - return reverse_iterator(begin()); - } - - constexpr size_t size() const - { - return _size; - } - - constexpr bool empty() const - { - return size() == 0; - } - explicit constexpr operator bool() const - { - return !empty(); - } - template < - typename... Dummy, typename IterCopy = Iter, - typename = std::enable_if_t::iterator_category>>> - constexpr decltype(auto) operator[](size_t n) const - { - return _first[static_cast(n)]; - } - - constexpr void remove_prefix(size_t n) - { - if constexpr (std::is_base_of_v::iterator_category>) - _first += static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _first++; - - _size -= n; - } - constexpr void remove_suffix(size_t n) - { - if constexpr (std::is_base_of_v::iterator_category>) - _last -= static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _last--; - - _size -= n; - } - - constexpr Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) - { - if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); - - Range res = *this; - res.remove_prefix(pos); - if (count < res.size()) res.remove_suffix(res.size() - count); - - return res; - } - - constexpr decltype(auto) front() const - { - return *(_first); - } - - constexpr decltype(auto) back() const - { - return *(_last - 1); - } - - constexpr Range reversed() const - { - return {rbegin(), rend(), _size}; - } - - friend std::ostream& operator<<(std::ostream& os, const Range& seq) - { - os << "["; - for (auto x : seq) - os << static_cast(x) << ", "; - os << "]"; - return os; - } -}; - -template -Range(T& x) -> Range; - -template -inline bool operator==(const Range& a, const Range& b) -{ - return std::equal(a.begin(), a.end(), b.begin(), b.end()); -} - -template -inline bool operator!=(const Range& a, const Range& b) -{ - return !(a == b); -} - -template -inline bool operator<(const Range& a, const Range& b) -{ - return (std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end())); -} - -template -inline bool operator>(const Range& a, const Range& b) -{ - return b < a; -} - -template -inline bool operator<=(const Range& a, const Range& b) -{ - return !(b < a); -} - -template -inline bool operator>=(const Range& a, const Range& b) -{ - return !(a < b); -} - -template -using RangeVec = std::vector>; - -} // namespace rapidfuzz::detail - -#include +/* older versions of msvc have bugs in their if constexpr support + * see https://github.com/rapidfuzz/rapidfuzz-cpp/issues/122 + * since we don't know the exact version this was fixed in, use the earliest we could test + */ +#if defined(_MSC_VER) && _MSC_VER < 1920 +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if +#elif ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1 +# define RAPIDFUZZ_IF_CONSTEXPR if constexpr +#else +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if +#endif -#include +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) || __cplusplus >= 201402L) +# define RAPIDFUZZ_CONSTEXPR_CXX14 constexpr +#else +# define RAPIDFUZZ_CONSTEXPR_CXX14 inline +#endif -#include #include #include #include @@ -974,13 +785,10 @@ class Editops : private std::vector { inline bool operator==(const Editops& lhs, const Editops& rhs) { - if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) { - return false; - } + if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) return false; + + if (lhs.size() != rhs.size()) return false; - if (lhs.size() != rhs.size()) { - return false; - } return std::equal(lhs.begin(), lhs.end(), rhs.begin()); } @@ -1256,31 +1064,265 @@ using char_type = decltype(detail::inner_type(std::declval())); template using iter_value_t = typename std::iterator_traits::value_type; -// taken from -// https://stackoverflow.com/questions/16893992/check-if-type-can-be-explicitly-converted -template -struct is_explicitly_convertible { - template - static void f(T); +// taken from +// https://stackoverflow.com/questions/16893992/check-if-type-can-be-explicitly-converted +template +struct is_explicitly_convertible { + template + static void f(T); + + template + static constexpr auto test(int /*unused*/) -> decltype(f(static_cast(std::declval())), true) + { + return true; + } + + template + static constexpr auto test(...) -> bool + { + return false; + } + + static bool const value = test(0); +}; + +template +using rf_enable_if_t = typename std::enable_if::type; + +} // namespace rapidfuzz + +namespace rapidfuzz { +namespace detail { + +static inline void assume(bool b) +{ +#if defined(__clang__) + __builtin_assume(b); +#elif defined(__GNUC__) || defined(__GNUG__) + if (!b) __builtin_unreachable(); +#elif defined(_MSC_VER) + __assume(b); +#endif +} + +namespace to_begin_detail { +using std::begin; + +template +CharT* to_begin(CharT* s) +{ + return s; +} + +template +auto to_begin(T& x) -> decltype(begin(x)) +{ + + return begin(x); +} +} // namespace to_begin_detail + +using to_begin_detail::to_begin; + +namespace to_end_detail { +using std::end; + +template +CharT* to_end(CharT* s) +{ + assume(s != nullptr); + while (*s != 0) + ++s; + + return s; +} + +template +auto to_end(T& x) -> decltype(end(x)) +{ + return end(x); +} +} // namespace to_end_detail + +using to_end_detail::to_end; + +template +class Range { + Iter _first; + Iter _last; + // todo we might not want to cache the size for iterators + // that can can retrieve the size in O(1) time + size_t _size; + +public: + using value_type = typename std::iterator_traits::value_type; + using iterator = Iter; + using reverse_iterator = std::reverse_iterator; + + Range(Iter first, Iter last) : _first(first), _last(last) + { + assert(std::distance(_first, _last) >= 0); + _size = static_cast(std::distance(_first, _last)); + } + + Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) + {} + + template + Range(T& x) : Range(to_begin(x), to_end(x)) + {} + + iterator begin() const noexcept + { + return _first; + } + iterator end() const noexcept + { + return _last; + } + + reverse_iterator rbegin() const noexcept + { + return reverse_iterator(end()); + } + reverse_iterator rend() const noexcept + { + return reverse_iterator(begin()); + } + + size_t size() const + { + return _size; + } + + bool empty() const + { + return size() == 0; + } + explicit operator bool() const + { + return !empty(); + } + + template ::iterator_category>::value>> + auto operator[](size_t n) const -> decltype(*_first) + { + return _first[static_cast(n)]; + } + + void remove_prefix(size_t n) + { + std::advance(_first, static_cast(n)); + _size -= n; + } + + void remove_suffix(size_t n) + { + std::advance(_last, -static_cast(n)); + _size -= n; + } + + Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) + { + if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); + + Range res = *this; + res.remove_prefix(pos); + if (count < res.size()) res.remove_suffix(res.size() - count); + + return res; + } + + const value_type& front() const + { + return *_first; + } + + const value_type& back() const + { + return *(_last - 1); + } + + Range reversed() const + { + return {rbegin(), rend(), _size}; + } + + friend std::ostream& operator<<(std::ostream& os, const Range& seq) + { + os << "["; + for (auto x : seq) + os << static_cast(x) << ", "; + os << "]"; + return os; + } +}; + +template +auto make_range(Iter first, Iter last) -> Range +{ + return Range(first, last); +} + +template +auto make_range(T& x) -> Range +{ + return {to_begin(x), to_end(x)}; +} + +template +inline bool operator==(const Range& a, const Range& b) +{ + if (a.size() != b.size()) return false; + + return std::equal(a.begin(), a.end(), b.begin()); +} + +template +inline bool operator!=(const Range& a, const Range& b) +{ + return !(a == b); +} + +template +inline bool operator<(const Range& a, const Range& b) +{ + return (std::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end())); +} + +template +inline bool operator>(const Range& a, const Range& b) +{ + return b < a; +} - template - static constexpr auto test(int /*unused*/) -> decltype(f(static_cast(std::declval())), true) - { - return true; - } +template +inline bool operator<=(const Range& a, const Range& b) +{ + return !(b < a); +} - template - static constexpr auto test(...) -> bool - { - return false; - } +template +inline bool operator>=(const Range& a, const Range& b) +{ + return !(a < b); +} - static bool const value = test(0); -}; +template +using RangeVec = std::vector>; +} // namespace detail } // namespace rapidfuzz -namespace rapidfuzz::detail { +#include + +#include + +namespace rapidfuzz { +namespace detail { template class SplittedSentenceView { @@ -1288,7 +1330,7 @@ class SplittedSentenceView { using CharT = iter_value_t; SplittedSentenceView(RangeVec sentence) noexcept( - std::is_nothrow_move_constructible_v>) + std::is_nothrow_move_constructible>::value) : m_sentence(std::move(sentence)) {} @@ -1360,7 +1402,8 @@ auto SplittedSentenceView::join() const -> std::vector return joined; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -1373,7 +1416,8 @@ auto SplittedSentenceView::join() const -> std::vector # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template T bit_mask_lsb(size_t n) @@ -1409,7 +1453,7 @@ constexpr uint64_t shl64(uint64_t a, U shift) return (shift < 64) ? a << shift : 0; } -constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) +RAPIDFUZZ_CONSTEXPR_CXX14 uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) { /* todo should use _addcarry_u64 when available */ a += carryin; @@ -1420,7 +1464,7 @@ constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* ca } template -constexpr T ceil_div(T a, U divisor) +RAPIDFUZZ_CONSTEXPR_CXX14 T ceil_div(T a, U divisor) { T _div = static_cast(divisor); return a / _div + static_cast(a % _div != 0); @@ -1456,7 +1500,7 @@ static inline size_t popcount(uint8_t x) } template -constexpr T rotl(T x, unsigned int n) +RAPIDFUZZ_CONSTEXPR_CXX14 T rotl(T x, unsigned int n) { unsigned int num_bits = std::numeric_limits::digits; assert(n < num_bits); @@ -1556,25 +1600,41 @@ static inline unsigned int countr_zero(uint8_t x) return countr_zero(static_cast(x)); } -template -constexpr void unroll_impl(std::integer_sequence, F&& f) -{ - (f(std::integral_constant{}), ...); -} +template +struct UnrollImpl; + +template +struct UnrollImpl { + template + static void call(F&& f) + { + f(Pos); + UnrollImpl::call(std::forward(f)); + } +}; + +template +struct UnrollImpl { + template + static void call(F&&) + {} +}; -template -constexpr void unroll(F&& f) +template +RAPIDFUZZ_CONSTEXPR_CXX14 void unroll(F&& f) { - unroll_impl(std::make_integer_sequence{}, std::forward(f)); + UnrollImpl::call(f); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #if defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES) # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct DecomposedSet { @@ -1594,6 +1654,13 @@ static inline size_t abs_diff(size_t a, size_t b) return a > b ? a - b : b - a; } +template +TO opt_static_cast(const FROM& value) +{ + /* calling the cast through this template function somehow avoids useless cast warnings */ + return static_cast(value); +} + /** * @defgroup Common Common * Common utilities shared among multiple functions @@ -1648,13 +1715,15 @@ static inline void rf_aligned_free(void* ptr) /**@}*/ -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template DecomposedSet set_decomposition(SplittedSentenceView a, @@ -1682,6 +1751,15 @@ DecomposedSet set_decomposition(SplittedSentenceVi return {difference_ab, difference_ba, intersection}; } +template +std::pair rf_mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +{ + while (first1 != last1 && first2 != last2 && *first1 == *first2) + ++first1, ++first2; + + return std::make_pair(first1, first2); +} + /** * Removes common prefix of two string views */ @@ -1690,7 +1768,7 @@ size_t remove_common_prefix(Range& s1, Range& s2) { auto first1 = std::begin(s1); size_t prefix = static_cast( - std::distance(first1, std::mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); + std::distance(first1, rf_mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); s1.remove_prefix(prefix); s2.remove_prefix(prefix); return prefix; @@ -1702,9 +1780,9 @@ size_t remove_common_prefix(Range& s1, Range& s2) template size_t remove_common_suffix(Range& s1, Range& s2) { - auto rfirst1 = std::rbegin(s1); + auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, std::mismatch(rfirst1, std::rend(s1), std::rbegin(s2), std::rend(s2)).first)); + std::distance(rfirst1, rf_mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; @@ -1818,7 +1896,8 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last) return SplittedSentenceView(splitted); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include @@ -2424,13 +2503,13 @@ static inline native_simd min32(const native_simd& a, const native_simd return _mm256_min_epu32(a, b); } -/* taken from https://stackoverflow.com/a/51807800/11335032 */ +/* taken from https://stackoverflow.com/a/51807800 */ static inline native_simd sllv(const native_simd& a, const native_simd& count_) noexcept { __m256i mask_hi = _mm256_set1_epi32(static_cast(0xFF00FF00)); - __m256i multiplier_lut = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, char(128), 64, 32, 16, 8, 4, 2, 1, 0, 0, - 0, 0, 0, 0, 0, 0, char(128), 64, 32, 16, 8, 4, 2, 1); + __m256i multiplier_lut = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, char(-128), 64, 32, 16, 8, 4, 2, 1, 0, 0, + 0, 0, 0, 0, 0, 0, char(-128), 64, 32, 16, 8, 4, 2, 1); __m256i count_sat = _mm256_min_epu8(count_, _mm256_set1_epi8(8)); /* AVX shift counts are not masked. So a_i << n_i = 0 @@ -2447,7 +2526,7 @@ static inline native_simd sllv(const native_simd& a, return x; } -/* taken from https://stackoverflow.com/a/51805592/11335032 */ +/* taken from https://stackoverflow.com/a/51805592 */ static inline native_simd sllv(const native_simd& a, const native_simd& count) noexcept { @@ -3080,42 +3159,43 @@ static inline native_simd operator<(const native_simd& a, const native_sim #endif #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_distance(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_distance(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, + return _normalized_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_similarity(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, - score_hint); + return _normalized_similarity(make_range(s1), make_range(s2), std::forward(args)..., + score_cutoff, score_hint); } protected: @@ -3153,11 +3233,11 @@ struct NormalizedMetricBase { template struct DistanceBase : public NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return T::_distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3165,15 +3245,16 @@ struct DistanceBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _similarity(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3181,7 +3262,8 @@ struct DistanceBase : public NormalizedMetricBase { static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -3208,11 +3290,11 @@ struct DistanceBase : public NormalizedMetricBase { template struct SimilarityBase : public NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -3220,23 +3302,25 @@ struct SimilarityBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return T::_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -3251,11 +3335,21 @@ struct SimilarityBase : public NormalizedMetricBase { (maximum >= score_hint) ? maximum - score_hint : static_cast(WorstSimilarity); ResType sim = T::_similarity(s1, s2, std::forward(args)..., cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } SimilarityBase() @@ -3269,27 +3363,27 @@ struct CachedNormalizedMetricBase { double normalized_distance(InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(first2, last2), score_cutoff, score_hint); + return _normalized_distance(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_distance(const Sentence2& s2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(s2), score_cutoff, score_hint); + return _normalized_distance(make_range(s2), score_cutoff, score_hint); } template double normalized_similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(first2, last2), score_cutoff, score_hint); + return _normalized_similarity(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(s2), score_cutoff, score_hint); + return _normalized_similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3330,7 +3424,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(first2, last2), score_cutoff, score_hint); + return derived._distance(make_range(first2, last2), score_cutoff, score_hint); } template @@ -3338,7 +3432,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(s2), score_cutoff, score_hint); + return derived._distance(make_range(s2), score_cutoff, score_hint); } template @@ -3346,14 +3440,14 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(first2, last2), score_cutoff, score_hint); + return _similarity(make_range(first2, last2), score_cutoff, score_hint); } template ResType similarity(const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(s2), score_cutoff, score_hint); + return _similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3384,14 +3478,14 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(first2, last2), score_cutoff, score_hint); + return _distance(make_range(first2, last2), score_cutoff, score_hint); } template ResType distance(const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(s2), score_cutoff, score_hint); + return _distance(make_range(s2), score_cutoff, score_hint); } template @@ -3400,7 +3494,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(first2, last2), score_cutoff, score_hint); + return derived._similarity(make_range(first2, last2), score_cutoff, score_hint); } template @@ -3408,7 +3502,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(s2), score_cutoff, score_hint); + return derived._similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -3421,11 +3515,21 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType hint_similarity = (maximum > score_hint) ? maximum - score_hint : 0; ResType sim = derived._similarity(s2, cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } CachedSimilarityBase() @@ -3439,28 +3543,28 @@ struct MultiNormalizedMetricBase { void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_distance(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(s2), score_cutoff); + _normalized_distance(scores, score_count, make_range(s2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(s2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -3474,7 +3578,8 @@ struct MultiNormalizedMetricBase { // reinterpretation only works when the types have the same size ResType* scores_orig = nullptr; - if constexpr (sizeof(double) == sizeof(ResType)) + + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) == sizeof(ResType)) scores_orig = reinterpret_cast(scores); else scores_orig = new ResType[derived.result_count()]; @@ -3488,7 +3593,7 @@ struct MultiNormalizedMetricBase { scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0; } - if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig; + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) != sizeof(ResType)) delete[] scores_orig; } template @@ -3516,7 +3621,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(first2, last2), score_cutoff); + derived._distance(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -3524,21 +3629,21 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(s2), score_cutoff); + derived._distance(scores, score_count, make_range(s2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(first2, last2), score_cutoff); + _similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(s2), score_cutoff); + _similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -3567,14 +3672,14 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(first2, last2), score_cutoff); + _distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void distance(ResType* scores, size_t score_count, const Sentence2& s2, - ResType score_cutoff = WorstDistance) const + ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(s2), score_cutoff); + _distance(scores, score_count, make_range(s2), score_cutoff); } template @@ -3582,7 +3687,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(first2, last2), score_cutoff); + derived._similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -3590,7 +3695,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(s2), score_cutoff); + derived._similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -3603,22 +3708,34 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { for (size_t i = 0; i < derived.get_input_count(); ++i) { ResType maximum = derived.maximum(i, s2); ResType dist = maximum - scores[i]; - - if constexpr (std::is_floating_point_v) - scores[i] = (dist <= score_cutoff) ? dist : 1.0; - else - scores[i] = (dist <= score_cutoff) ? dist : score_cutoff + 1; + scores[i] = _apply_distance_score_cutoff(dist, score_cutoff); } } + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; + } + MultiSimilarityBase() {} friend T; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct RowId { @@ -3671,10 +3788,10 @@ size_t damerau_levenshtein_distance_zhao(const Range& s1, const Range< auto iter_s2 = s2.begin(); for (IntType j = 1; j <= len2; j++) { - ptrdiff_t diag = R1[j - 1] + static_cast(*iter_s1 != *iter_s2); - ptrdiff_t left = R[j - 1] + 1; - ptrdiff_t up = R1[j] + 1; - ptrdiff_t temp = std::min({diag, left, up}); + int64_t diag = R1[j - 1] + static_cast(*iter_s1 != *iter_s2); + int64_t left = R[j - 1] + 1; + int64_t up = R1[j] + 1; + int64_t temp = std::min({diag, left, up}); if (*iter_s1 == *iter_s2) { last_col_id = j; // last occurence of s1_i @@ -3682,15 +3799,15 @@ size_t damerau_levenshtein_distance_zhao(const Range& s1, const Range< T = last_i2l1; // save H_i-2,l-1 } else { - ptrdiff_t k = last_row_id.get(static_cast(*iter_s2)).val; - ptrdiff_t l = last_col_id; + int64_t k = last_row_id.get(static_cast(*iter_s2)).val; + int64_t l = last_col_id; if ((j - l) == 1) { - ptrdiff_t transpose = FR[j] + (i - k); + int64_t transpose = FR[j] + (i - k); temp = std::min(temp, transpose); } else if ((i - k) == 1) { - ptrdiff_t transpose = T + (j - l); + int64_t transpose = T + (j - l); temp = std::min(temp, transpose); } } @@ -3737,14 +3854,14 @@ class DamerauLevenshtein } template - static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, size_t) { return damerau_levenshtein_distance(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { /* the API will require a change when adding custom weights */ @@ -3875,8 +3992,7 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { return rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, score_cutoff); } @@ -3884,11 +4000,13 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedDamerauLevenshtein(const Sentence1& s1_) -> CachedDamerauLevenshtein>; template CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevenshtein>; +#endif } // namespace experimental } // namespace rapidfuzz @@ -3897,7 +4015,8 @@ CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevens #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Hamming : public DistanceBase::max(), bool> { friend DistanceBase::max(), bool>; @@ -3911,7 +4030,7 @@ class Hamming : public DistanceBase static size_t _distance(const Range& s1, const Range& s2, bool pad, - size_t score_cutoff, [[maybe_unused]] size_t score_hint) + size_t score_cutoff, size_t) { if (!pad && s1.size() != s2.size()) throw std::invalid_argument("Sequences are not the same length."); @@ -3948,7 +4067,8 @@ Editops hamming_editops(const Range& s1, const Range& s2, bo return ops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -4022,7 +4142,7 @@ template Editops hamming_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(first1, last1), detail::Range(first2, last2), pad_, + return detail::hamming_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), pad_, score_hint); } @@ -4030,7 +4150,7 @@ template Editops hamming_editops(const Sentence1& s1, const Sentence2& s2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(s1), detail::Range(s2), pad_, score_hint); + return detail::hamming_editops(detail::make_range(s1), detail::make_range(s2), pad_, score_hint); } /** @@ -4095,8 +4215,7 @@ struct CachedHamming : public detail::CachedDistanceBase, } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t score_hint) const { return detail::Hamming::distance(s1, s2, pad, score_cutoff, score_hint); } @@ -4105,11 +4224,13 @@ struct CachedHamming : public detail::CachedDistanceBase, bool pad; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedHamming(const Sentence1& s1_, bool pad_ = true) -> CachedHamming>; template CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHamming>; +#endif /**@}*/ @@ -4121,7 +4242,8 @@ CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHammin #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct BitvectorHashmap { BitvectorHashmap() : m_map() @@ -4329,14 +4451,16 @@ struct BlockPatternMatchVector { BitMatrix m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct LCSseqResult; @@ -4353,6 +4477,20 @@ struct LCSseqResult { size_t sim; }; +template +LCSseqResult& getMatrixRef(LCSseqResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + /* * An encoded mbleven model table. * @@ -4466,12 +4604,12 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const size_t cur_vec = 0; for (; cur_vec + interleaveCount * vecs <= block.size(); cur_vec += interleaveCount * vecs) { std::array, interleaveCount> S; - unroll([&](auto j) { S[j] = static_cast(-1); }); + unroll([&](size_t j) { S[j] = static_cast(-1); }); for (const auto& ch : s2) { - unroll([&](auto j) { + unroll([&](size_t j) { alignas(32) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); native_simd Matches(stored.data()); native_simd u = S[j] & Matches; @@ -4479,10 +4617,10 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const }); } - unroll([&](auto j) { + unroll([&](size_t j) { auto counts = popcount(~S[j]); - unroll([&](auto i) { - *score_iter = (counts[i] >= score_cutoff) ? counts[i] : 0; + unroll([&](size_t i) { + *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); }); @@ -4493,7 +4631,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const for (const auto& ch : s2) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd Matches(stored.data()); native_simd u = S & Matches; @@ -4501,8 +4639,8 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const } auto counts = popcount(~S); - unroll([&](auto i) { - *score_iter = (counts[i] >= score_cutoff) ? counts[i] : 0; + unroll([&](size_t i) { + *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); } @@ -4518,7 +4656,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& unroll([&](size_t i) { S[i] = ~UINT64_C(0); }); LCSseqResult res; - if constexpr (RecordMatrix) res.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + } auto iter_s2 = s2.begin(); for (size_t i = 0; i < s2.size(); ++i) { @@ -4533,7 +4674,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); } @@ -4544,7 +4688,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); iter_s2++; @@ -4579,10 +4726,11 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range res; - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = band_width_left + 1 + band_width_right; size_t full_band_words = std::min(words, full_band / word_size + 2); - res.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); } /* first_block is the index of the first block in Ukkonen band. */ @@ -4593,7 +4741,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S.set_offset(row, static_cast(first_block * word_size)); + } for (size_t word = first_block; word < last_block; ++word) { const uint64_t Matches = PM.get(word, *iter_s2); @@ -4604,7 +4755,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range band_width_right) first_block = (row - band_width_right) / word_size; @@ -4676,8 +4830,7 @@ size_t lcs_seq_similarity(const BlockPatternMatchVector& block, Range size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -4709,8 +4862,7 @@ size_t lcs_seq_similarity(Range s1, Range s2, size_t score_c size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -4744,7 +4896,9 @@ Editops recover_alignment(const Range& s1, const Range& s2, if (dist == 0) return editops; - [[maybe_unused]] size_t band_width_right = s2.size() - matrix.sim; +#ifndef NDEBUG + size_t band_width_right = s2.size() - matrix.sim; +#endif auto col = len1; auto row = len2; @@ -4843,13 +4997,14 @@ class LCSseq : public SimilarityBase static size_t _similarity(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + size_t) { return lcs_seq_similarity(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include #include @@ -4912,13 +5067,13 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d template Editops lcs_seq_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { - return detail::lcs_seq_editops(detail::Range(first1, last1), detail::Range(first2, last2)); + return detail::lcs_seq_editops(detail::make_range(first1, last1), detail::make_range(first2, last2)); } template Editops lcs_seq_editops(const Sentence1& s1, const Sentence2& s2) { - return detail::lcs_seq_editops(detail::Range(s1), detail::Range(s2)); + return detail::lcs_seq_editops(detail::make_range(s1), detail::make_range(s2)); } #ifdef RAPIDFUZZ_SIMD @@ -4930,26 +5085,26 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz friend detail::MultiSimilarityBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -5011,14 +5166,14 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::lcs_simd(scores_, PM, s2, score_cutoff); } @@ -5049,7 +5204,7 @@ struct CachedLCSseq {} template - CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -5063,25 +5218,27 @@ struct CachedLCSseq } template - size_t _similarity(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(const detail::Range& s2, size_t score_cutoff, size_t) const { - return detail::lcs_seq_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::lcs_seq_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedLCSseq(const Sentence1& s1_) -> CachedLCSseq>; template CachedLCSseq(InputIt1 first1, InputIt1 last1) -> CachedLCSseq>; +#endif } // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template size_t indel_distance(const BlockPatternMatchVector& block, const Range& s1, @@ -5138,7 +5295,8 @@ class Indel : public DistanceBase } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -5315,11 +5473,13 @@ struct CachedIndel CachedLCSseq scorer; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedIndel(const Sentence1& s1_) -> CachedIndel>; template CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel>; +#endif } // namespace rapidfuzz @@ -5327,7 +5487,8 @@ CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct FlaggedCharsWord { uint64_t P_flag; @@ -5410,7 +5571,11 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) template static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, - [[maybe_unused]] const Range& P, +#ifdef NDEBUG + const Range&, +#else + const Range& P, +#endif const Range& T, size_t Bound) { assert(P.size() <= 64); @@ -5478,7 +5643,7 @@ static inline void flag_similar_characters_step(const BlockPatternMatchVector& P if (T_j >= 0 && T_j < 256) { for (; word + 3 < last_word - 1; word += 4) { uint64_t PM_j[4]; - unroll([&](auto i) { + unroll([&](size_t i) { PM_j[i] = PM.get(word + i, static_cast(T_j)) & (~flagged.P_flag[word + i]); }); @@ -5614,7 +5779,8 @@ static inline size_t count_transpositions_block(const BlockPatternMatchVector& P uint64_t PatternFlagMask = blsi(P_flag); - Transpositions += !(PM.get(PatternWord, T_first[countr_zero(T_flag)]) & PatternFlagMask); + Transpositions += !(PM.get(PatternWord, T_first[static_cast(countr_zero(T_flag))]) & + PatternFlagMask); T_flag = blsr(T_flag); P_flag ^= PatternFlagMask; @@ -5776,6 +5942,11 @@ struct JaroSimilaritySimdBounds { template static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -5783,7 +5954,9 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng using namespace simd_sse2; # endif - [[maybe_unused]] static constexpr size_t alignment = native_simd::alignment; +# ifndef RAPIDFUZZ_AVX2 + static constexpr size_t alignment = native_simd::alignment; +# endif static constexpr size_t vec_width = native_simd::size; assert(s2.size() <= sizeof(VecType) * 8); @@ -5823,7 +5996,7 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng // todo try to find a simd implementation for sse2 for (size_t i = 0; i < vec_width; ++i) { - size_t Bound = jaro_bounds(s1_lengths[i], s2.size()); + size_t Bound = jaro_bounds(static_cast(s1_lengths[i]), s2.size()); if (Bound > bounds.maxBound) bounds.maxBound = Bound; @@ -5835,7 +6008,7 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng bounds.boundMask = native_simd(reinterpret_cast(boundMask_.data())); # endif - size_t lastRelevantChar = maxLen + bounds.maxBound; + size_t lastRelevantChar = static_cast(maxLen) + bounds.maxBound; if (s2.size() > lastRelevantChar) s2.remove_suffix(s2.size() - lastRelevantChar); return bounds; @@ -5843,6 +6016,11 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng template static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -5865,7 +6043,7 @@ static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengt bounds.boundMaskSize = native_simd(bit_mask_lsb(2 * bounds.maxBound)); bounds.boundMask = native_simd(bit_mask_lsb(bounds.maxBound + 1)); - size_t lastRelevantChar = maxLen + bounds.maxBound; + size_t lastRelevantChar = static_cast(maxLen) + bounds.maxBound; if (s2.size() > lastRelevantChar) s2.remove_suffix(s2.size() - lastRelevantChar); return bounds; @@ -5932,7 +6110,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -5946,7 +6124,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -5966,8 +6144,10 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa T_flag[i].store(T_flags + i * vec_width); for (size_t i = 0; i < vec_width; ++i) { - VecType CommonChars = counts[i]; - if (!jaro_common_char_filter(s1_lengths[result_index], s2.size(), CommonChars, score_cutoff)) { + size_t CommonChars = static_cast(counts[i]); + if (!jaro_common_char_filter(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, score_cutoff)) + { scores[result_index] = 0.0; result_index++; continue; @@ -6001,8 +6181,8 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa } } - double Sim = - jaro_calculate_similarity(s1_lengths[result_index], s2.size(), CommonChars, Transpositions); + double Sim = jaro_calculate_similarity(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, Transpositions); scores[result_index] = (Sim >= score_cutoff) ? Sim : 0; result_index++; @@ -6047,7 +6227,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6060,7 +6240,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -6077,8 +6257,10 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM alignas(alignment) std::array T_flags; T_flag.store(T_flags.data()); for (size_t i = 0; i < vec_width; ++i) { - VecType CommonChars = counts[i]; - if (!jaro_common_char_filter(s1_lengths[result_index], s2.size(), CommonChars, score_cutoff)) { + size_t CommonChars = static_cast(counts[i]); + if (!jaro_common_char_filter(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, score_cutoff)) + { scores[result_index] = 0.0; result_index++; continue; @@ -6101,8 +6283,8 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM P_flag_cur ^= PatternFlagMask; } - double Sim = - jaro_calculate_similarity(s1_lengths[result_index], s2.size(), CommonChars, Transpositions); + double Sim = jaro_calculate_similarity(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, Transpositions); scores[result_index] = (Sim >= score_cutoff) ? Sim : 0; result_index++; @@ -6149,13 +6331,14 @@ class Jaro : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) + double) { return jaro_similarity(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include @@ -6222,12 +6405,13 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, friend detail::MultiSimilarityBase, double, 0, 1>; friend detail::MultiNormalizedMetricBase, double>; - static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64); + static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64, "incorrect MaxLen used"); - using VecType = typename std::conditional_t< + using VecType = typename std::conditional< MaxLen == 8, uint8_t, - typename std::conditional_t>>; + typename std::conditional::type>::type>:: + type; constexpr static size_t get_vec_size() { @@ -6247,7 +6431,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, # endif } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -6318,12 +6502,12 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); detail::jaro_similarity_simd(scores_, PM, str_lens, str_lens_size, s2, score_cutoff); } template - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -6350,7 +6534,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub {} template - CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -6364,25 +6548,27 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub } template - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { - return detail::jaro_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::jaro_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedJaro(const Sentence1& s1_) -> CachedJaro>; template CachedJaro(InputIt1 first1, InputIt1 last1) -> CachedJaro>; +#endif } // namespace rapidfuzz -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template double jaro_winkler_similarity(const Range& P, const Range& T, double prefix_weight, @@ -6460,18 +6646,19 @@ class JaroWinkler : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double prefix_weight, - double score_cutoff, [[maybe_unused]] double score_hint) + double score_cutoff, double) { return jaro_winkler_similarity(s1, s2, prefix_weight, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -6487,7 +6674,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -6503,7 +6690,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -6519,7 +6706,7 @@ double jaro_winkler_normalized_distance(const Sentence1& s1, const Sentence2& s2 } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -6607,7 +6794,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -6636,7 +6823,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) - : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::Range(first1, last1)) + : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -6650,10 +6837,9 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { - return detail::jaro_winkler_similarity(PM, detail::Range(s1), s2, prefix_weight, score_cutoff); + return detail::jaro_winkler_similarity(PM, detail::make_range(s1), s2, prefix_weight, score_cutoff); } double prefix_weight; @@ -6661,6 +6847,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1) -> CachedJaroWinkler>; @@ -6668,6 +6855,7 @@ explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1) template CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) -> CachedJaroWinkler>; +#endif } // namespace rapidfuzz @@ -6678,7 +6866,8 @@ CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct LevenshteinRow { uint64_t VP; @@ -6717,6 +6906,34 @@ struct LevenshteinResult { size_t dist; }; +template +LevenshteinResult& getMatrixRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + +template +LevenshteinResult& getBitRowRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordBitRow); + return reinterpret_cast&>(res); +#endif +} + template size_t generalized_levenshtein_wagner_fischer(const Range& s1, const Range& s2, LevenshteinWeightTable weights, size_t max) @@ -6904,9 +7121,10 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R LevenshteinResult res; res.dist = s1.size(); - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); } /* mask used when computing D[m,j] in the paper 10^(m-1) */ @@ -6935,19 +7153,21 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R VP = HN | ~(D0 | HP); VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } if (res.dist > max) res.dist = max + 1; - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = s2.size(); - res.vecs.emplace_back(VP, VN); + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = s2.size(); + res_.vecs.emplace_back(VP, VN); } return res; @@ -6979,12 +7199,12 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte native_simd VN(VecType(0)); alignas(alignment) std::array currDist_; - unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + unroll( + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -6995,7 +7215,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd X(stored.data()); auto D0 = (((X & VP) + VP) ^ VP) | X | VN; @@ -7019,7 +7239,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -7027,7 +7247,9 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (!std::is_same_v) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -7148,14 +7370,15 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range res; res.dist = max; - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); ptrdiff_t start_offset = static_cast(max) + 2 - 64; for (size_t i = 0; i < s2.size(); ++i) { - res.VP.set_offset(i, start_offset + static_cast(i)); - res.VN.set_offset(i, start_offset + static_cast(i)); + res_.VP.set_offset(i, start_offset + static_cast(i)); + res_.VN.set_offset(i, start_offset + static_cast(i)); } } @@ -7209,9 +7432,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -7251,9 +7475,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -7288,17 +7513,19 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range scores[words - 1] = s1.size(); - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = std::min(s1.size(), 2 * max + 1); size_t full_band_words = std::min(words, full_band / word_size + 2); - res.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); + res_.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); } - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = 0; + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = 0; } max = std::min(max, std::max(s1.size(), s2.size())); @@ -7315,9 +7542,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range uint64_t HP_carry = 1; uint64_t HN_carry = 0; - if constexpr (RecordMatrix) { - res.VP.set_offset(row, static_cast(first_block * word_size)); - res.VN.set_offset(row, static_cast(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP.set_offset(row, static_cast(first_block * word_size)); + res_.VN.set_offset(row, static_cast(first_block * word_size)); } auto advance_block = [&](size_t word) { @@ -7351,9 +7579,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range vecs[word].VP = HN | ~(D0 | HP); vecs[word].VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[row][word - first_block] = vecs[word].VP; - res.VN[row][word - first_block] = vecs[word].VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[row][word - first_block] = vecs[word].VP; + res_.VN[row][word - first_block] = vecs[word].VN; } return static_cast(HP_carry) - static_cast(HN_carry); @@ -7391,7 +7620,8 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range vecs[last_block].VN = 0; size_t chars_in_block = (last_block + 1 == words) ? ((s1.size() - 1) % word_size + 1) : 64; - scores[last_block] = scores[last_block - 1] + chars_in_block - HP_carry + HN_carry; + scores[last_block] = scores[last_block - 1] + chars_in_block - + opt_static_cast(HP_carry) + opt_static_cast(HN_carry); // todo probably wrong types scores[last_block] = static_cast(static_cast(scores[last_block]) + advance_block(last_block)); @@ -7438,26 +7668,27 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range return res; } - if constexpr (RecordBitRow) { + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { if (row == stop_row) { + auto& res_ = getBitRowRef(res); if (first_block == 0) - res.prev_score = stop_row + 1; + res_.prev_score = stop_row + 1; else { /* count backwards to find score at last position in previous block */ size_t relevant_bits = std::min((first_block + 1) * 64, s1.size()) % 64; uint64_t mask = ~UINT64_C(0); if (relevant_bits) mask >>= 64 - relevant_bits; - res.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - - popcount(vecs[first_block].VP & mask); + res_.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - + popcount(vecs[first_block].VP & mask); } - res.first_block = first_block; - res.last_block = last_block; - res.vecs = std::move(vecs); + res_.first_block = first_block; + res_.last_block = last_block; + res_.vecs = std::move(vecs); /* unknown so make sure it is <= max */ - res.dist = 0; + res_.dist = 0; return res; } } @@ -7479,7 +7710,7 @@ size_t uniform_levenshtein_distance(const BlockPatternMatchVector& block, Range< if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; if (score_cutoff < abs_diff(s1.size(), s2.size())) return score_cutoff + 1; @@ -7537,7 +7768,7 @@ size_t uniform_levenshtein_distance(Range s1, Range s2, size if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; // at least length difference insertions/deletions required if (score_cutoff < (s1.size() - s2.size())) return score_cutoff + 1; @@ -7794,9 +8025,6 @@ HirschbergPos find_hirschberg_pos(const Range& s1, const Range= 0); - assert(hpos.right_score >= 0); - if (hpos.left_score + hpos.right_score > max) return find_hirschberg_pos(s1, s2, max * 2); else { @@ -7880,7 +8108,8 @@ Editops levenshtein_editops(const Range& s1, const Range& s2 return editops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -8159,7 +8388,7 @@ template Editops levenshtein_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(first1, last1), detail::Range(first2, last2), + return detail::levenshtein_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), score_hint); } @@ -8167,7 +8396,7 @@ template Editops levenshtein_editops(const Sentence1& s1, const Sentence2& s2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(s1), detail::Range(s2), score_hint); + return detail::levenshtein_editops(detail::make_range(s1), detail::make_range(s2), score_hint); } #ifdef RAPIDFUZZ_SIMD @@ -8180,26 +8409,26 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -8263,14 +8492,14 @@ struct MultiLevenshtein : public detail::MultiDistanceBase= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -8304,7 +8533,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) - : s1(first1, last1), PM(detail::Range(first1, last1)), weights(aWeights) + : s1(first1, last1), PM(detail::make_range(first1, last1)), weights(aWeights) {} private: @@ -8330,7 +8559,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase= weights.insert_cost + weights.delete_cost) { // max can make use of the common divisor of the three weights size_t new_max = detail::ceil_div(score_cutoff, weights.insert_cost); - size_t dist = detail::indel_distance(PM, detail::Range(s1), s2, new_max); + size_t dist = detail::indel_distance(PM, detail::make_range(s1), s2, new_max); dist *= weights.insert_cost; return (dist <= score_cutoff) ? dist : score_cutoff + 1; } } - return detail::generalized_levenshtein_distance(detail::Range(s1), s2, weights, score_cutoff); + return detail::generalized_levenshtein_distance(detail::make_range(s1), s2, weights, score_cutoff); } std::vector s1; @@ -8357,6 +8586,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein>; @@ -8364,6 +8594,7 @@ explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights template CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein>; +#endif } // namespace rapidfuzz @@ -8371,7 +8602,8 @@ CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeigh #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /** * @brief Bitparallel implementation of the OSA distance. @@ -8461,12 +8693,12 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV native_simd PM_j_old(VecType(0)); alignas(alignment) std::array currDist_; - unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + unroll( + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -8477,7 +8709,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd PM_j(stored.data()); auto TR = (andnot(PM_j, D0) << 1) & PM_j_old; @@ -8504,7 +8736,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -8512,7 +8744,9 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (!std::is_same_v) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -8631,7 +8865,8 @@ class OSA : public DistanceBase::ma } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -8745,26 +8980,26 @@ struct MultiOSA friend detail::MultiDistanceBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -8825,14 +9060,14 @@ struct MultiOSA if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -8863,7 +9098,7 @@ struct CachedOSA {} template - CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -8877,8 +9112,7 @@ struct CachedOSA } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { size_t res; if (s1.empty()) @@ -8886,9 +9120,9 @@ struct CachedOSA else if (s2.empty()) res = s1.size(); else if (s1.size() < 64) - res = detail::osa_hyrroe2003(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003(PM, detail::make_range(s1), s2, score_cutoff); else - res = detail::osa_hyrroe2003_block(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003_block(PM, detail::make_range(s1), s2, score_cutoff); return (res <= score_cutoff) ? res : score_cutoff + 1; } @@ -8897,18 +9131,21 @@ struct CachedOSA detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedOSA(const Sentence1& s1_) -> CachedOSA>; template CachedOSA(InputIt1 first1, InputIt1 last1) -> CachedOSA>; +#endif /**@}*/ } // namespace rapidfuzz #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Postfix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -8921,15 +9158,15 @@ class Postfix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_suffix(s1, s2); return (dist >= score_cutoff) ? dist : 0; } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -9009,8 +9246,7 @@ struct CachedPostfix : public detail::CachedSimilarityBase } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t score_hint) const { return detail::Postfix::similarity(s1, s2, score_cutoff, score_hint); } @@ -9018,19 +9254,21 @@ struct CachedPostfix : public detail::CachedSimilarityBase std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPostfix(const Sentence1& s1_) -> CachedPostfix>; template CachedPostfix(InputIt1 first1, InputIt1 last1) -> CachedPostfix>; - +#endif /**@}*/ } // namespace rapidfuzz #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Prefix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -9043,15 +9281,15 @@ class Prefix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_prefix(s1, s2); return (dist >= score_cutoff) ? dist : 0; } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz namespace rapidfuzz { @@ -9130,8 +9368,7 @@ struct CachedPrefix : public detail::CachedSimilarityBase, } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t) const { return detail::Prefix::similarity(s1, s2, score_cutoff, score_cutoff); } @@ -9139,11 +9376,13 @@ struct CachedPrefix : public detail::CachedSimilarityBase, std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPrefix(const Sentence1& s1_) -> CachedPrefix>; template CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix>; +#endif /**@}*/ @@ -9151,14 +9390,15 @@ CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix -std::basic_string editops_apply(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, - InputIt2 last2) +namespace detail { +template +ReturnType editops_apply_impl(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2) { auto len1 = static_cast(std::distance(first1, last1)); auto len2 = static_cast(std::distance(first2, last2)); - std::basic_string res_str; + ReturnType res_str; res_str.resize(len1 + len2); size_t src_pos = 0; size_t dest_pos = 0; @@ -9166,7 +9406,8 @@ std::basic_string editops_apply(const Editops& ops, InputIt1 first1, Inpu for (const auto& op : ops) { /* matches between last and current editop */ while (src_pos < op.src_pos) { - res_str[dest_pos] = static_cast(first1[static_cast(src_pos)]); + res_str[dest_pos] = + static_cast(first1[static_cast(src_pos)]); src_pos++; dest_pos++; } @@ -9174,12 +9415,14 @@ std::basic_string editops_apply(const Editops& ops, InputIt1 first1, Inpu switch (op.type) { case EditType::None: case EditType::Replace: - res_str[dest_pos] = static_cast(first2[static_cast(op.dest_pos)]); + res_str[dest_pos] = + static_cast(first2[static_cast(op.dest_pos)]); src_pos++; dest_pos++; break; case EditType::Insert: - res_str[dest_pos] = static_cast(first2[static_cast(op.dest_pos)]); + res_str[dest_pos] = + static_cast(first2[static_cast(op.dest_pos)]); dest_pos++; break; case EditType::Delete: src_pos++; break; @@ -9188,7 +9431,8 @@ std::basic_string editops_apply(const Editops& ops, InputIt1 first1, Inpu /* matches after the last editop */ while (src_pos < len1) { - res_str[dest_pos] = static_cast(first1[static_cast(src_pos)]); + res_str[dest_pos] = + static_cast(first1[static_cast(src_pos)]); src_pos++; dest_pos++; } @@ -9197,21 +9441,14 @@ std::basic_string editops_apply(const Editops& ops, InputIt1 first1, Inpu return res_str; } -template -std::basic_string editops_apply(const Editops& ops, const Sentence1& s1, const Sentence2& s2) -{ - return editops_apply(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2), - detail::to_end(s2)); -} - -template -std::basic_string opcodes_apply(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, - InputIt2 last2) +template +ReturnType opcodes_apply_impl(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2) { auto len1 = static_cast(std::distance(first1, last1)); auto len2 = static_cast(std::distance(first2, last2)); - std::basic_string res_str; + ReturnType res_str; res_str.resize(len1 + len2); size_t dest_pos = 0; @@ -9219,13 +9456,15 @@ std::basic_string opcodes_apply(const Opcodes& ops, InputIt1 first1, Inpu switch (op.type) { case EditType::None: for (auto i = op.src_begin; i < op.src_end; ++i) { - res_str[dest_pos++] = static_cast(first1[static_cast(i)]); + res_str[dest_pos++] = + static_cast(first1[static_cast(i)]); } break; case EditType::Replace: case EditType::Insert: for (auto i = op.dest_begin; i < op.dest_end; ++i) { - res_str[dest_pos++] = static_cast(first2[static_cast(i)]); + res_str[dest_pos++] = + static_cast(first2[static_cast(i)]); } break; case EditType::Delete: break; @@ -9236,11 +9475,62 @@ std::basic_string opcodes_apply(const Opcodes& ops, InputIt1 first1, Inpu return res_str; } +} // namespace detail + +template +std::basic_string editops_apply_str(const Editops& ops, InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2) +{ + return detail::editops_apply_impl>(ops, first1, last1, first2, last2); +} + +template +std::basic_string editops_apply_str(const Editops& ops, const Sentence1& s1, const Sentence2& s2) +{ + return detail::editops_apply_impl>(ops, detail::to_begin(s1), detail::to_end(s1), + detail::to_begin(s2), detail::to_end(s2)); +} + +template +std::basic_string opcodes_apply_str(const Opcodes& ops, InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2) +{ + return detail::opcodes_apply_impl>(ops, first1, last1, first2, last2); +} + +template +std::basic_string opcodes_apply_str(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) +{ + return detail::opcodes_apply_impl>(ops, detail::to_begin(s1), detail::to_end(s1), + detail::to_begin(s2), detail::to_end(s2)); +} + +template +std::vector editops_apply_vec(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2) +{ + return detail::editops_apply_impl>(ops, first1, last1, first2, last2); +} + +template +std::vector editops_apply_vec(const Editops& ops, const Sentence1& s1, const Sentence2& s2) +{ + return detail::editops_apply_impl>(ops, detail::to_begin(s1), detail::to_end(s1), + detail::to_begin(s2), detail::to_end(s2)); +} + +template +std::vector opcodes_apply_vec(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2) +{ + return detail::opcodes_apply_impl>(ops, first1, last1, first2, last2); +} + template -std::basic_string opcodes_apply(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) +std::vector opcodes_apply_vec(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) { - return opcodes_apply(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2), - detail::to_end(s2)); + return detail::opcodes_apply_impl>(ops, detail::to_begin(s1), detail::to_end(s1), + detail::to_begin(s2), detail::to_end(s2)); } } // namespace rapidfuzz @@ -9252,10 +9542,11 @@ std::basic_string opcodes_apply(const Opcodes& ops, const Sentence1& s1, #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* - * taken from https://stackoverflow.com/a/17251989/11335032 + * taken from https://stackoverflow.com/a/17251989 */ template bool CanTypeFitValue(const U value) @@ -9314,9 +9605,11 @@ struct CharSet { } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /** * @defgroup Fuzz Fuzz @@ -9383,7 +9676,7 @@ struct MultiRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template @@ -9424,11 +9717,13 @@ struct CachedRatio { CachedIndel cached_indel; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedRatio(const Sentence1& s1) -> CachedRatio>; template CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio>; +#endif template ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2, @@ -9497,11 +9792,13 @@ struct CachedPartialRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialRatio(const Sentence1& s1) -> CachedPartialRatio>; template CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::ratio between @@ -9606,11 +9903,13 @@ struct CachedTokenSortRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSortRatio(const Sentence1& s1) -> CachedTokenSortRatio>; template CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::partial_ratio @@ -9664,6 +9963,7 @@ struct CachedPartialTokenSortRatio { CachedPartialRatio cached_partial_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSortRatio(const Sentence1& s1) -> CachedPartialTokenSortRatio>; @@ -9671,6 +9971,7 @@ explicit CachedPartialTokenSortRatio(const Sentence1& s1) template CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSortRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -9732,11 +10033,13 @@ struct CachedTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSetRatio(const Sentence1& s1) -> CachedTokenSetRatio>; template CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -9789,12 +10092,14 @@ struct CachedPartialTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio>; template CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenSetRatio>; +#endif /** * @brief Helper method that returns the maximum of fuzz::token_set_ratio and @@ -9851,11 +10156,13 @@ struct CachedTokenRatio { CachedRatio cached_ratio_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenRatio(const Sentence1& s1) -> CachedTokenRatio>; template CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio>; +#endif /** * @brief Helper method that returns the maximum of @@ -9912,11 +10219,13 @@ struct CachedPartialTokenRatio { std::vector s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenRatio(const Sentence1& s1) -> CachedPartialTokenRatio>; template CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio>; +#endif /** * @brief Calculates a weighted ratio based on the other ratio algorithms @@ -9972,11 +10281,13 @@ struct CachedWRatio { rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedWRatio(const Sentence1& s1) -> CachedWRatio>; template CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio>; +#endif /** * @brief Calculates a quick ratio between two strings using fuzz.ratio @@ -10035,13 +10346,13 @@ struct MultiQRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const { - rapidfuzz::detail::Range s2_(s2); + auto s2_ = detail::make_range(s2); if (s2_.empty()) { for (size_t i = 0; i < str_lens.size(); ++i) scores[i] = 0; @@ -10084,15 +10395,18 @@ struct CachedQRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedQRatio(const Sentence1& s1) -> CachedQRatio>; template CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio>; +#endif /**@}*/ -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz #include @@ -10102,7 +10416,8 @@ CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /********************************************** * ratio @@ -10111,7 +10426,7 @@ namespace rapidfuzz::fuzz { template double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff) { - return ratio(detail::Range(first1, last1), detail::Range(first2, last2), score_cutoff); + return ratio(detail::make_range(first1, last1), detail::make_range(first2, last2), score_cutoff); } template @@ -10125,7 +10440,7 @@ template double CachedRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double score_hint) const { - return similarity(detail::Range(first2, last2), score_cutoff, score_hint); + return similarity(detail::make_range(first2, last2), score_cutoff, score_hint); } template @@ -10141,7 +10456,7 @@ double CachedRatio::similarity(const Sentence2& s2, double score_cutoff, namespace fuzz_detail { -static constexpr double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) +static RAPIDFUZZ_CONSTEXPR_CXX14 double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) { double score = (lensum > 0) ? (100.0 - 100.0 * static_cast(dist) / static_cast(lensum)) : 100.0; @@ -10181,8 +10496,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(window.first); auto subseq2_first = s2.begin() + static_cast(window.second); - detail::Range subseq1(subseq1_first, subseq1_first + static_cast(len1)); - detail::Range subseq2(subseq2_first, subseq2_first + static_cast(len1)); + auto subseq1 = + detail::make_range(subseq1_first, subseq1_first + static_cast(len1)); + auto subseq2 = + detail::make_range(subseq2_first, subseq2_first + static_cast(len1)); if (scores[window.first] == std::numeric_limits::max()) { scores[window.first] = cached_ratio.cached_indel.distance(subseq1); @@ -10215,9 +10532,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range last */ size_t known_edits = detail::abs_diff(scores[window.first], scores[window.second]); /* half of the cells that are not needed for known_edits can lead to a better score */ + size_t max_score_improvement = (cell_diff - known_edits / 2) / 2 * 2; ptrdiff_t min_score = static_cast(std::min(scores[window.first], scores[window.second])) - - static_cast(cell_diff + known_edits / 2); + static_cast(max_score_improvement); if (min_score < static_cast(cutoff_dist)) { size_t center = cell_diff / 2; new_windows.emplace_back(window.first, window.first + center); @@ -10235,7 +10553,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i)); + auto subseq = rapidfuzz::detail::make_range(s2.begin(), s2.begin() + static_cast(i)); if (!s1_char_set.find(subseq.back())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -10248,7 +10566,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i), s2.end()); + auto subseq = rapidfuzz::detail::make_range(s2.begin() + static_cast(i), s2.end()); if (!s1_char_set.find(subseq.front())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -10297,8 +10615,8 @@ ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, if (!len1 || !len2) return ScoreAlignment(static_cast(len1 == len2) * 100.0, 0, len1, 0, len1); - auto s1 = detail::Range(first1, last1); - auto s2 = detail::Range(first2, last2); + auto s1 = detail::make_range(first1, last1); + auto s2 = detail::make_range(first2, last2); auto alignment = fuzz_detail::partial_ratio_impl(s1, s2, score_cutoff); if (alignment.score != 100 && s1.size() == s2.size()) { @@ -10345,7 +10663,7 @@ CachedPartialRatio::CachedPartialRatio(InputIt1 first1, InputIt1 last1) template template double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { size_t len1 = s1.size(); size_t len2 = static_cast(std::distance(first2, last2)); @@ -10357,8 +10675,8 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d if (!len1 || !len2) return static_cast(len1 == len2) * 100.0; - auto s1_ = detail::Range(s1); - auto s2 = detail::Range(first2, last2); + auto s1_ = detail::make_range(s1); + auto s2 = detail::make_range(first2, last2); double score = fuzz_detail::partial_ratio_impl(s1_, s2, cached_ratio, s1_char_set, score_cutoff).score; if (score != 100 && s1_.size() == s2.size()) { @@ -10372,8 +10690,7 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d template template -double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10400,7 +10717,7 @@ double token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double score_c template template double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10409,8 +10726,7 @@ double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10439,7 +10755,7 @@ double partial_token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10448,8 +10764,7 @@ double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10528,7 +10843,7 @@ double token_set_ratio(const Sentence1& s1, const Sentence2& s2, double score_cu template template double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10537,8 +10852,7 @@ double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10587,7 +10901,7 @@ double partial_token_set_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -10596,8 +10910,7 @@ double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10739,8 +11052,9 @@ double token_ratio(const std::vector& s1_sorted, double result = 0; auto s2_sorted = tokens_b.join(); if (s1_sorted.size() < 65) { - double norm_sim = detail::indel_normalized_similarity(blockmap_s1_sorted, detail::Range(s1_sorted), - detail::Range(s2_sorted), score_cutoff / 100); + double norm_sim = + detail::indel_normalized_similarity(blockmap_s1_sorted, detail::make_range(s1_sorted), + detail::make_range(s2_sorted), score_cutoff / 100); result = norm_sim * 100; } else { @@ -10775,15 +11089,14 @@ double token_ratio(const std::vector& s1_sorted, template template double CachedTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::token_ratio(s1_tokens, cached_ratio_s1_sorted, first2, last2, score_cutoff); } template template -double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10861,15 +11174,14 @@ double partial_token_ratio(const std::vector& s1_sorted, template template double CachedPartialTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::partial_token_ratio(s1_sorted, tokens_s1, first2, last2, score_cutoff); } template template -double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -10927,13 +11239,12 @@ CachedWRatio::CachedWRatio(InputIt1 first1, InputIt1 last1) cached_partial_ratio(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))), s1_sorted(tokens_s1.join()), - blockmap_s1_sorted(detail::Range(s1_sorted)) + blockmap_s1_sorted(detail::make_range(s1_sorted)) {} template template -double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { if (score_cutoff > 100) return 0; @@ -10972,8 +11283,7 @@ double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -11004,8 +11314,7 @@ double QRatio(const Sentence1& s1, const Sentence2& s2, double score_cutoff) template template -double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { auto len2 = std::distance(first2, last2); @@ -11018,12 +11327,12 @@ double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz #endif // RAPIDFUZZ_AMALGAMATED_HPP_INCLUDED diff --git a/fuzzing/CMakeLists.txt b/fuzzing/CMakeLists.txt index 64a8819b..2a71733d 100644 --- a/fuzzing/CMakeLists.txt +++ b/fuzzing/CMakeLists.txt @@ -1,6 +1,6 @@ function(create_fuzzer fuzzer) add_executable(fuzz_${fuzzer} fuzz_${fuzzer}.cpp) - target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_17) + target_compile_features(fuzz_${fuzzer} PUBLIC cxx_std_11) target_link_libraries(fuzz_${fuzzer} PRIVATE rapidfuzz::rapidfuzz) target_compile_options(fuzz_${fuzzer} PRIVATE -g -O1 -fsanitize=fuzzer,address -march=native) @@ -20,3 +20,5 @@ create_fuzzer(osa_distance) create_fuzzer(damerau_levenshtein_distance) create_fuzzer(jaro_similarity) + +create_fuzzer(partial_ratio) diff --git a/fuzzing/fuzz_damerau_levenshtein_distance.cpp b/fuzzing/fuzz_damerau_levenshtein_distance.cpp index b1066168..743cf74a 100644 --- a/fuzzing/fuzz_damerau_levenshtein_distance.cpp +++ b/fuzzing/fuzz_damerau_levenshtein_distance.cpp @@ -8,8 +8,8 @@ #include #include -void validate_distance(size_t reference_dist, const std::basic_string& s1, - const std::basic_string& s2, size_t score_cutoff) +void validate_distance(size_t reference_dist, const std::vector& s1, const std::vector& s2, + size_t score_cutoff) { if (reference_dist > score_cutoff) reference_dist = score_cutoff + 1; @@ -26,7 +26,7 @@ void validate_distance(size_t reference_dist, const std::basic_string& extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - std::basic_string s1, s2; + std::vector s1, s2; if (!extract_strings(data, size, s1, s2)) return 0; size_t reference_dist = rapidfuzz_reference::damerau_levenshtein_distance(s1, s2); @@ -40,8 +40,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) /* test long sequences */ for (unsigned int i = 2; i < 9; ++i) { - std::basic_string s1_ = str_multiply(s1, pow(2, i)); - std::basic_string s2_ = str_multiply(s2, pow(2, i)); + std::vector s1_ = vec_multiply(s1, pow(2, i)); + std::vector s2_ = vec_multiply(s2, pow(2, i)); if (s1_.size() > 10000 || s2_.size() > 10000) break; diff --git a/fuzzing/fuzz_indel_distance.cpp b/fuzzing/fuzz_indel_distance.cpp index 88b8ade1..546bcd14 100644 --- a/fuzzing/fuzz_indel_distance.cpp +++ b/fuzzing/fuzz_indel_distance.cpp @@ -8,8 +8,7 @@ #include #include -void validate_distance(const std::basic_string& s1, const std::basic_string& s2, - size_t score_cutoff) +void validate_distance(const std::vector& s1, const std::vector& s2, size_t score_cutoff) { auto dist = rapidfuzz::indel_distance(s1, s2, score_cutoff); auto reference_dist = rapidfuzz_reference::indel_distance(s1, s2, score_cutoff); @@ -25,7 +24,7 @@ void validate_distance(const std::basic_string& s1, const std::basic_st extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - std::basic_string s1, s2; + std::vector s1, s2; if (!extract_strings(data, size, s1, s2)) return 0; validate_distance(s1, s2, 0); diff --git a/fuzzing/fuzz_indel_editops.cpp b/fuzzing/fuzz_indel_editops.cpp index 07cdf85e..4d99c300 100644 --- a/fuzzing/fuzz_indel_editops.cpp +++ b/fuzzing/fuzz_indel_editops.cpp @@ -9,13 +9,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - std::basic_string s1, s2; + std::vector s1, s2; if (!extract_strings(data, size, s1, s2)) return 0; size_t score = rapidfuzz_reference::indel_distance(s1, s2); rapidfuzz::Editops ops = rapidfuzz::indel_editops(s1, s2); - if (ops.size() == score && s2 != rapidfuzz::editops_apply(ops, s1, s2)) + if (ops.size() == score && s2 != rapidfuzz::editops_apply_vec(ops, s1, s2)) throw std::logic_error("levenshtein_editops failed"); return 0; diff --git a/fuzzing/fuzz_jaro_similarity.cpp b/fuzzing/fuzz_jaro_similarity.cpp index 1ac257b2..6ba5a046 100644 --- a/fuzzing/fuzz_jaro_similarity.cpp +++ b/fuzzing/fuzz_jaro_similarity.cpp @@ -14,7 +14,7 @@ bool is_close(double a, double b, double epsilon) } template -void validate_simd(const std::basic_string& s1, const std::basic_string& s2) +void validate_simd(const std::vector& s1, const std::vector& s2) { #ifdef RAPIDFUZZ_SIMD size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0); @@ -22,7 +22,7 @@ void validate_simd(const std::basic_string& s1, const std::basic_string rapidfuzz::experimental::MultiJaro scorer(count); - std::vector> strings; + std::vector> strings; for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) { if (std::distance(it1, s1.end()) < static_cast(MaxLen)) { @@ -59,7 +59,7 @@ void validate_simd(const std::basic_string& s1, const std::basic_string #endif } -void validate_distance(const std::basic_string& s1, const std::basic_string& s2) +void validate_distance(const std::vector& s1, const std::vector& s2) { double reference_sim = rapidfuzz_reference::jaro_similarity(s1, s2); double sim = rapidfuzz::jaro_similarity(s1, s2); @@ -80,15 +80,15 @@ void validate_distance(const std::basic_string& s1, const std::basic_st extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - std::basic_string s1, s2; + std::vector s1, s2; if (!extract_strings(data, size, s1, s2)) return 0; validate_distance(s1, s2); /* test long sequences */ for (unsigned int i = 2; i < 9; ++i) { - std::basic_string s1_ = str_multiply(s1, pow(2, i)); - std::basic_string s2_ = str_multiply(s2, pow(2, i)); + std::vector s1_ = vec_multiply(s1, pow(2, i)); + std::vector s2_ = vec_multiply(s2, pow(2, i)); if (s1_.size() > 10000 || s2_.size() > 10000) break; diff --git a/fuzzing/fuzz_lcs_similarity.cpp b/fuzzing/fuzz_lcs_similarity.cpp index 5a82120f..7f833c51 100644 --- a/fuzzing/fuzz_lcs_similarity.cpp +++ b/fuzzing/fuzz_lcs_similarity.cpp @@ -9,13 +9,13 @@ #include template -void validate_simd(const std::basic_string& s1, const std::basic_string& s2) +void validate_simd(const std::vector& s1, const std::vector& s2) { #ifdef RAPIDFUZZ_SIMD size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0); rapidfuzz::experimental::MultiLCSseq scorer(count); - std::vector> strings; + std::vector> strings; for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) { if (std::distance(it1, s1.end()) < static_cast(MaxLen)) { @@ -51,7 +51,7 @@ void validate_simd(const std::basic_string& s1, const std::basic_string extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - std::basic_string s1, s2; + std::vector s1, s2; if (!extract_strings(data, size, s1, s2)) { return 0; } diff --git a/fuzzing/fuzz_levenshtein_distance.cpp b/fuzzing/fuzz_levenshtein_distance.cpp index a595b8b9..a577c608 100644 --- a/fuzzing/fuzz_levenshtein_distance.cpp +++ b/fuzzing/fuzz_levenshtein_distance.cpp @@ -9,7 +9,7 @@ #include template -void validate_simd(const std::basic_string& s1, const std::basic_string& s2) +void validate_simd(const std::vector& s1, const std::vector& s2) { #ifdef RAPIDFUZZ_SIMD size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0); @@ -17,7 +17,7 @@ void validate_simd(const std::basic_string& s1, const std::basic_string rapidfuzz::experimental::MultiLevenshtein scorer(count); - std::vector> strings; + std::vector> strings; for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) { if (std::distance(it1, s1.end()) < static_cast(MaxLen)) { @@ -52,8 +52,8 @@ void validate_simd(const std::basic_string& s1, const std::basic_string #endif } -void validate_distance(size_t reference_dist, const std::basic_string& s1, - const std::basic_string& s2, size_t score_cutoff) +void validate_distance(size_t reference_dist, const std::vector& s1, const std::vector& s2, + size_t score_cutoff) { if (reference_dist > score_cutoff) reference_dist = score_cutoff + 1; @@ -75,7 +75,7 @@ void validate_distance(size_t reference_dist, const std::basic_string& extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - std::basic_string s1, s2; + std::vector s1, s2; if (!extract_strings(data, size, s1, s2)) return 0; size_t reference_dist = rapidfuzz_reference::levenshtein_distance(s1, s2); diff --git a/fuzzing/fuzz_levenshtein_editops.cpp b/fuzzing/fuzz_levenshtein_editops.cpp index 596caaaa..7a540eae 100644 --- a/fuzzing/fuzz_levenshtein_editops.cpp +++ b/fuzzing/fuzz_levenshtein_editops.cpp @@ -7,17 +7,17 @@ #include #include -void validate_editops(const std::basic_string& s1, const std::basic_string& s2, - size_t score, size_t score_hint = std::numeric_limits::max()) +void validate_editops(const std::vector& s1, const std::vector& s2, size_t score, + size_t score_hint = std::numeric_limits::max()) { rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s1, s2, score_hint); - if (ops.size() == score && s2 != rapidfuzz::editops_apply(ops, s1, s2)) + if (ops.size() == score && s2 != rapidfuzz::editops_apply_vec(ops, s1, s2)) throw std::logic_error("levenshtein_editops failed"); } extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - std::basic_string s1, s2; + std::vector s1, s2; if (!extract_strings(data, size, s1, s2)) return 0; /* hirschbergs algorithm is only used for very long sequences which are apparently not generated a lot by @@ -30,14 +30,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) validate_editops(s1, s2, score, score); if (s1.size() > 1 && s2.size() > 1) { - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); if (hpos.left_score + hpos.right_score != score) throw std::logic_error("find_hirschberg_pos failed"); } - s1 = str_multiply(s1, 2); - s2 = str_multiply(s2, 2); + s1 = vec_multiply(s1, 2); + s2 = vec_multiply(s2, 2); } return 0; diff --git a/fuzzing/fuzz_osa_distance.cpp b/fuzzing/fuzz_osa_distance.cpp index 0fa9f977..5cd75020 100644 --- a/fuzzing/fuzz_osa_distance.cpp +++ b/fuzzing/fuzz_osa_distance.cpp @@ -8,8 +8,8 @@ #include #include -void validate_distance(size_t reference_dist, const std::basic_string& s1, - const std::basic_string& s2, size_t score_cutoff) +void validate_distance(size_t reference_dist, const std::vector& s1, const std::vector& s2, + size_t score_cutoff) { if (reference_dist > score_cutoff) reference_dist = score_cutoff + 1; @@ -26,7 +26,7 @@ void validate_distance(size_t reference_dist, const std::basic_string& extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - std::basic_string s1, s2; + std::vector s1, s2; if (!extract_strings(data, size, s1, s2)) return 0; size_t reference_dist = rapidfuzz_reference::osa_distance(s1, s2); @@ -40,8 +40,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) /* test long sequences */ for (unsigned int i = 2; i < 9; ++i) { - std::basic_string s1_ = str_multiply(s1, pow(2, i)); - std::basic_string s2_ = str_multiply(s2, pow(2, i)); + std::vector s1_ = vec_multiply(s1, pow(2, i)); + std::vector s2_ = vec_multiply(s2, pow(2, i)); if (s1_.size() > 10000 || s2_.size() > 10000) break; diff --git a/fuzzing/fuzz_partial_ratio.cpp b/fuzzing/fuzz_partial_ratio.cpp new file mode 100644 index 00000000..c250ea2e --- /dev/null +++ b/fuzzing/fuzz_partial_ratio.cpp @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2021 Max Bachmann */ + +#include "../rapidfuzz_reference/fuzz.hpp" +#include "fuzzing.hpp" +#include +#include +#include +#include + +bool is_close(double a, double b, double epsilon) +{ + return fabs(a - b) <= ((fabs(a) < fabs(b) ? fabs(b) : fabs(a)) * epsilon); +} + +void validate_distance(const std::vector& s1, const std::vector& s2) +{ + auto sim = rapidfuzz::fuzz::partial_ratio(s1, s2); + auto reference_sim = rapidfuzz_reference::partial_ratio(s1, s2); + if (!is_close(sim, reference_sim, 0.0001)) { + print_seq("s1: ", s1); + print_seq("s2: ", s2); + throw std::logic_error(std::string("partial_ratio failed (reference_score = ") + + std::to_string(reference_sim) + std::string(", score = ") + + std::to_string(sim) + ")"); + } +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) +{ + std::vector s1, s2; + if (!extract_strings(data, size, s1, s2)) return 0; + + validate_distance(s1, s2); + validate_distance(s2, s1); + + /* test long sequences */ + for (unsigned int i = 2; i < 9; ++i) { + std::vector s1_ = vec_multiply(s1, pow(2, i)); + std::vector s2_ = vec_multiply(s2, pow(2, i)); + + if (s1_.size() > 10000 || s2_.size() > 10000) break; + + validate_distance(s1_, s2_); + validate_distance(s2_, s1_); + validate_distance(s1, s2_); + validate_distance(s2_, s1); + validate_distance(s1_, s2); + validate_distance(s2, s1_); + } + + return 0; +} diff --git a/fuzzing/fuzzing.hpp b/fuzzing/fuzzing.hpp index e77a58b5..282baf98 100644 --- a/fuzzing/fuzzing.hpp +++ b/fuzzing/fuzzing.hpp @@ -1,10 +1,10 @@ #pragma once #include #include -#include +#include -static inline bool extract_strings(const uint8_t* data, size_t size, std::basic_string& s1, - std::basic_string& s2) +static inline bool extract_strings(const uint8_t* data, size_t size, std::vector& s1, + std::vector& s2) { if (size <= sizeof(uint32_t)) { return false; @@ -17,8 +17,8 @@ static inline bool extract_strings(const uint8_t* data, size_t size, std::basic_ data += sizeof(len1); size -= sizeof(len1); - s1 = std::basic_string(data, len1); - s2 = std::basic_string(data + len1, size - len1); + s1 = std::vector(data, data + len1); + s2 = std::vector(data + len1, data + size); return true; } @@ -36,17 +36,17 @@ static inline T pow(T x, unsigned int p) } template -std::basic_string str_multiply(std::basic_string a, size_t b) +std::vector vec_multiply(const std::vector& a, size_t b) { - std::basic_string output; + std::vector output; while (b--) - output += a; + output.insert(output.end(), a.begin(), a.end()); return output; } template -void print_seq(const std::string& name, const std::basic_string& seq) +void print_seq(const std::string& name, const std::vector& seq) { std::cout << name << " len: " << seq.size() << " content: "; for (const auto& ch : seq) diff --git a/rapidfuzz/details/CharSet.hpp b/rapidfuzz/details/CharSet.hpp index a00e3ee1..496deefa 100644 --- a/rapidfuzz/details/CharSet.hpp +++ b/rapidfuzz/details/CharSet.hpp @@ -9,10 +9,11 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* - * taken from https://stackoverflow.com/a/17251989/11335032 + * taken from https://stackoverflow.com/a/17251989 */ template bool CanTypeFitValue(const U value) @@ -71,4 +72,5 @@ struct CharSet { } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/GrowingHashmap.hpp b/rapidfuzz/details/GrowingHashmap.hpp index ba0edebc..db63ea0b 100644 --- a/rapidfuzz/details/GrowingHashmap.hpp +++ b/rapidfuzz/details/GrowingHashmap.hpp @@ -7,7 +7,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /* hashmap for integers which can only grow, but can't remove elements */ template @@ -200,4 +201,5 @@ struct HybridGrowingHashmap { std::array m_extendedAscii; }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/Matrix.hpp b/rapidfuzz/details/Matrix.hpp index 7525f193..76c0c868 100644 --- a/rapidfuzz/details/Matrix.hpp +++ b/rapidfuzz/details/Matrix.hpp @@ -8,15 +8,16 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct BitMatrixView { using value_type = T; using size_type = size_t; - using pointer = std::conditional_t; - using reference = std::conditional_t; + using pointer = typename std::conditional::type; + using reference = typename std::conditional::type; BitMatrixView(pointer vector, size_type cols) noexcept : m_vector(vector), m_cols(cols) {} @@ -176,12 +177,12 @@ struct ShiftedBitMatrix { return bool(m_matrix[row][col_word] & col_mask); } - auto operator[](size_t row) noexcept + BitMatrixView operator[](size_t row) noexcept { return m_matrix[row]; } - auto operator[](size_t row) const noexcept + BitMatrixView operator[](size_t row) const noexcept { return m_matrix[row]; } @@ -196,4 +197,5 @@ struct ShiftedBitMatrix { std::vector m_offsets; }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/details/PatternMatchVector.hpp b/rapidfuzz/details/PatternMatchVector.hpp index 9c56a656..f60f2fa4 100644 --- a/rapidfuzz/details/PatternMatchVector.hpp +++ b/rapidfuzz/details/PatternMatchVector.hpp @@ -11,7 +11,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct BitvectorHashmap { BitvectorHashmap() : m_map() @@ -219,4 +220,5 @@ struct BlockPatternMatchVector { BitMatrix m_extendedAscii; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/Range.hpp b/rapidfuzz/details/Range.hpp index d8ac443e..f0c3b10c 100644 --- a/rapidfuzz/details/Range.hpp +++ b/rapidfuzz/details/Range.hpp @@ -13,7 +13,10 @@ #include #include -namespace rapidfuzz::detail { +#include + +namespace rapidfuzz { +namespace detail { static inline void assume(bool b) { @@ -26,6 +29,9 @@ static inline void assume(bool b) #endif } +namespace to_begin_detail { +using std::begin; + template CharT* to_begin(CharT* s) { @@ -33,11 +39,17 @@ CharT* to_begin(CharT* s) } template -auto to_begin(T& x) +auto to_begin(T& x) -> decltype(begin(x)) { - using std::begin; + return begin(x); } +} // namespace to_begin_detail + +using to_begin_detail::to_begin; + +namespace to_end_detail { +using std::end; template CharT* to_end(CharT* s) @@ -50,11 +62,13 @@ CharT* to_end(CharT* s) } template -auto to_end(T& x) +auto to_end(T& x) -> decltype(end(x)) { - using std::end; return end(x); } +} // namespace to_end_detail + +using to_end_detail::to_end; template class Range { @@ -69,87 +83,73 @@ class Range { using iterator = Iter; using reverse_iterator = std::reverse_iterator; - constexpr Range(Iter first, Iter last) : _first(first), _last(last) + Range(Iter first, Iter last) : _first(first), _last(last) { assert(std::distance(_first, _last) >= 0); _size = static_cast(std::distance(_first, _last)); } - constexpr Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) + Range(Iter first, Iter last, size_t size) : _first(first), _last(last), _size(size) {} template - constexpr Range(T& x) : _first(to_begin(x)), _last(to_end(x)) - { - assert(std::distance(_first, _last) >= 0); - _size = static_cast(std::distance(_first, _last)); - } + Range(T& x) : Range(to_begin(x), to_end(x)) + {} - constexpr iterator begin() const noexcept + iterator begin() const noexcept { return _first; } - constexpr iterator end() const noexcept + iterator end() const noexcept { return _last; } - constexpr reverse_iterator rbegin() const noexcept + reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } - constexpr reverse_iterator rend() const noexcept + reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } - constexpr size_t size() const + size_t size() const { return _size; } - constexpr bool empty() const + bool empty() const { return size() == 0; } - explicit constexpr operator bool() const + explicit operator bool() const { return !empty(); } - template < - typename... Dummy, typename IterCopy = Iter, - typename = std::enable_if_t::iterator_category>>> - constexpr decltype(auto) operator[](size_t n) const + template ::iterator_category>::value>> + auto operator[](size_t n) const -> decltype(*_first) { return _first[static_cast(n)]; } - constexpr void remove_prefix(size_t n) + void remove_prefix(size_t n) { - if constexpr (std::is_base_of_v::iterator_category>) - _first += static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _first++; - + std::advance(_first, static_cast(n)); _size -= n; } - constexpr void remove_suffix(size_t n) - { - if constexpr (std::is_base_of_v::iterator_category>) - _last -= static_cast(n); - else - for (size_t i = 0; i < n; ++i) - _last--; + void remove_suffix(size_t n) + { + std::advance(_last, -static_cast(n)); _size -= n; } - constexpr Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) + Range subseq(size_t pos = 0, size_t count = std::numeric_limits::max()) { if (pos > size()) throw std::out_of_range("Index out of range in Range::substr"); @@ -160,17 +160,17 @@ class Range { return res; } - constexpr decltype(auto) front() const + const value_type& front() const { - return *(_first); + return *_first; } - constexpr decltype(auto) back() const + const value_type& back() const { return *(_last - 1); } - constexpr Range reversed() const + Range reversed() const { return {rbegin(), rend(), _size}; } @@ -185,13 +185,24 @@ class Range { } }; +template +auto make_range(Iter first, Iter last) -> Range +{ + return Range(first, last); +} + template -Range(T& x) -> Range; +auto make_range(T& x) -> Range +{ + return {to_begin(x), to_end(x)}; +} template inline bool operator==(const Range& a, const Range& b) { - return std::equal(a.begin(), a.end(), b.begin(), b.end()); + if (a.size() != b.size()) return false; + + return std::equal(a.begin(), a.end(), b.begin()); } template @@ -227,4 +238,5 @@ inline bool operator>=(const Range& a, const Range& b) template using RangeVec = std::vector>; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/SplittedSentenceView.hpp b/rapidfuzz/details/SplittedSentenceView.hpp index a6b06955..52653a37 100644 --- a/rapidfuzz/details/SplittedSentenceView.hpp +++ b/rapidfuzz/details/SplittedSentenceView.hpp @@ -3,7 +3,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template class SplittedSentenceView { @@ -11,7 +12,7 @@ class SplittedSentenceView { using CharT = iter_value_t; SplittedSentenceView(RangeVec sentence) noexcept( - std::is_nothrow_move_constructible_v>) + std::is_nothrow_move_constructible>::value) : m_sentence(std::move(sentence)) {} @@ -83,4 +84,5 @@ auto SplittedSentenceView::join() const -> std::vector return joined; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/common.hpp b/rapidfuzz/details/common.hpp index 0db0d7d8..61733d8e 100644 --- a/rapidfuzz/details/common.hpp +++ b/rapidfuzz/details/common.hpp @@ -13,7 +13,8 @@ # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct DecomposedSet { @@ -33,6 +34,13 @@ static inline size_t abs_diff(size_t a, size_t b) return a > b ? a - b : b - a; } +template +TO opt_static_cast(const FROM& value) +{ + /* calling the cast through this template function somehow avoids useless cast warnings */ + return static_cast(value); +} + /** * @defgroup Common Common * Common utilities shared among multiple functions @@ -87,6 +95,7 @@ static inline void rf_aligned_free(void* ptr) /**@}*/ -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz #include diff --git a/rapidfuzz/details/common_impl.hpp b/rapidfuzz/details/common_impl.hpp index 2d803442..7821c6d3 100644 --- a/rapidfuzz/details/common_impl.hpp +++ b/rapidfuzz/details/common_impl.hpp @@ -5,7 +5,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template DecomposedSet set_decomposition(SplittedSentenceView a, @@ -33,6 +34,15 @@ DecomposedSet set_decomposition(SplittedSentenceVi return {difference_ab, difference_ba, intersection}; } +template +std::pair rf_mismatch(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) +{ + while (first1 != last1 && first2 != last2 && *first1 == *first2) + ++first1, ++first2; + + return std::make_pair(first1, first2); +} + /** * Removes common prefix of two string views */ @@ -41,7 +51,7 @@ size_t remove_common_prefix(Range& s1, Range& s2) { auto first1 = std::begin(s1); size_t prefix = static_cast( - std::distance(first1, std::mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); + std::distance(first1, rf_mismatch(first1, std::end(s1), std::begin(s2), std::end(s2)).first)); s1.remove_prefix(prefix); s2.remove_prefix(prefix); return prefix; @@ -53,9 +63,9 @@ size_t remove_common_prefix(Range& s1, Range& s2) template size_t remove_common_suffix(Range& s1, Range& s2) { - auto rfirst1 = std::rbegin(s1); + auto rfirst1 = s1.rbegin(); size_t suffix = static_cast( - std::distance(rfirst1, std::mismatch(rfirst1, std::rend(s1), std::rbegin(s2), std::rend(s2)).first)); + std::distance(rfirst1, rf_mismatch(rfirst1, s1.rend(), s2.rbegin(), s2.rend()).first)); s1.remove_suffix(suffix); s2.remove_suffix(suffix); return suffix; @@ -169,4 +179,5 @@ SplittedSentenceView sorted_split(InputIt first, InputIt last) return SplittedSentenceView(splitted); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/config.hpp b/rapidfuzz/details/config.hpp new file mode 100644 index 00000000..b17c4aac --- /dev/null +++ b/rapidfuzz/details/config.hpp @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2020 Max Bachmann */ + +#pragma once + +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES +#endif + +/* older versions of msvc have bugs in their if constexpr support + * see https://github.com/rapidfuzz/rapidfuzz-cpp/issues/122 + * since we don't know the exact version this was fixed in, use the earliest we could test + */ +#if defined(_MSC_VER) && _MSC_VER < 1920 +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if +#elif ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L) +# define RAPIDFUZZ_DEDUCTION_GUIDES +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 1 +# define RAPIDFUZZ_IF_CONSTEXPR if constexpr +#else +# define RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE 0 +# define RAPIDFUZZ_IF_CONSTEXPR if +#endif + +#if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) || __cplusplus >= 201402L) +# define RAPIDFUZZ_CONSTEXPR_CXX14 constexpr +#else +# define RAPIDFUZZ_CONSTEXPR_CXX14 inline +#endif diff --git a/rapidfuzz/details/distance.hpp b/rapidfuzz/details/distance.hpp index d3c5fc91..fb87057d 100644 --- a/rapidfuzz/details/distance.hpp +++ b/rapidfuzz/details/distance.hpp @@ -9,42 +9,43 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_distance(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_distance(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, + return _normalized_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static double normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return _normalized_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static double normalized_similarity(const Sentence1& s1, const Sentence2& s2, Args... args, double score_cutoff, double score_hint) { - return _normalized_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, - score_hint); + return _normalized_similarity(make_range(s1), make_range(s2), std::forward(args)..., + score_cutoff, score_hint); } protected: @@ -82,11 +83,11 @@ struct NormalizedMetricBase { template struct DistanceBase : public NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return T::_distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -94,15 +95,16 @@ struct DistanceBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _similarity(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -110,7 +112,8 @@ struct DistanceBase : public NormalizedMetricBase { static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -137,11 +140,11 @@ struct DistanceBase : public NormalizedMetricBase { template struct SimilarityBase : public NormalizedMetricBase { template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(first1, last1), Range(first2, last2), std::forward(args)..., + return _distance(make_range(first1, last1), make_range(first2, last2), std::forward(args)..., score_cutoff, score_hint); } @@ -149,23 +152,25 @@ struct SimilarityBase : public NormalizedMetricBase { static ResType distance(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return _distance(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return _distance(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } template >> + typename = rapidfuzz::rf_enable_if_t::value>> static ResType similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(first1, last1), Range(first2, last2), std::forward(args)..., - score_cutoff, score_hint); + return T::_similarity(make_range(first1, last1), make_range(first2, last2), + std::forward(args)..., score_cutoff, score_hint); } template static ResType similarity(const Sentence1& s1, const Sentence2& s2, Args... args, ResType score_cutoff, ResType score_hint) { - return T::_similarity(Range(s1), Range(s2), std::forward(args)..., score_cutoff, score_hint); + return T::_similarity(make_range(s1), make_range(s2), std::forward(args)..., score_cutoff, + score_hint); } protected: @@ -180,11 +185,21 @@ struct SimilarityBase : public NormalizedMetricBase { (maximum >= score_hint) ? maximum - score_hint : static_cast(WorstSimilarity); ResType sim = T::_similarity(s1, s2, std::forward(args)..., cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } SimilarityBase() @@ -198,27 +213,27 @@ struct CachedNormalizedMetricBase { double normalized_distance(InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(first2, last2), score_cutoff, score_hint); + return _normalized_distance(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_distance(const Sentence2& s2, double score_cutoff = 1.0, double score_hint = 1.0) const { - return _normalized_distance(Range(s2), score_cutoff, score_hint); + return _normalized_distance(make_range(s2), score_cutoff, score_hint); } template double normalized_similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(first2, last2), score_cutoff, score_hint); + return _normalized_similarity(make_range(first2, last2), score_cutoff, score_hint); } template double normalized_similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const { - return _normalized_similarity(Range(s2), score_cutoff, score_hint); + return _normalized_similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -259,7 +274,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(first2, last2), score_cutoff, score_hint); + return derived._distance(make_range(first2, last2), score_cutoff, score_hint); } template @@ -267,7 +282,7 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - return derived._distance(Range(s2), score_cutoff, score_hint); + return derived._distance(make_range(s2), score_cutoff, score_hint); } template @@ -275,14 +290,14 @@ struct CachedDistanceBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(first2, last2), score_cutoff, score_hint); + return _similarity(make_range(first2, last2), score_cutoff, score_hint); } template ResType similarity(const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity), ResType score_hint = static_cast(WorstSimilarity)) const { - return _similarity(Range(s2), score_cutoff, score_hint); + return _similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -313,14 +328,14 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(first2, last2), score_cutoff, score_hint); + return _distance(make_range(first2, last2), score_cutoff, score_hint); } template ResType distance(const Sentence2& s2, ResType score_cutoff = static_cast(WorstDistance), ResType score_hint = static_cast(WorstDistance)) const { - return _distance(Range(s2), score_cutoff, score_hint); + return _distance(make_range(s2), score_cutoff, score_hint); } template @@ -329,7 +344,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(first2, last2), score_cutoff, score_hint); + return derived._similarity(make_range(first2, last2), score_cutoff, score_hint); } template @@ -337,7 +352,7 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType score_hint = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - return derived._similarity(Range(s2), score_cutoff, score_hint); + return derived._similarity(make_range(s2), score_cutoff, score_hint); } protected: @@ -350,11 +365,21 @@ struct CachedSimilarityBase : public CachedNormalizedMetricBase { ResType hint_similarity = (maximum > score_hint) ? maximum - score_hint : 0; ResType sim = derived._similarity(s2, cutoff_similarity, hint_similarity); ResType dist = maximum - sim; + return _apply_distance_score_cutoff(dist, score_cutoff); + } - if constexpr (std::is_floating_point_v) - return (dist <= score_cutoff) ? dist : 1.0; - else - return (dist <= score_cutoff) ? dist : score_cutoff + 1; + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; } CachedSimilarityBase() @@ -368,28 +393,28 @@ struct MultiNormalizedMetricBase { void normalized_distance(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_distance(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 1.0) const { - _normalized_distance(scores, score_count, Range(s2), score_cutoff); + _normalized_distance(scores, score_count, make_range(s2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(first2, last2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void normalized_similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0.0) const { - _normalized_similarity(scores, score_count, Range(s2), score_cutoff); + _normalized_similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -403,7 +428,8 @@ struct MultiNormalizedMetricBase { // reinterpretation only works when the types have the same size ResType* scores_orig = nullptr; - if constexpr (sizeof(double) == sizeof(ResType)) + + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) == sizeof(ResType)) scores_orig = reinterpret_cast(scores); else scores_orig = new ResType[derived.result_count()]; @@ -417,7 +443,7 @@ struct MultiNormalizedMetricBase { scores[i] = (norm_dist <= score_cutoff) ? norm_dist : 1.0; } - if constexpr (sizeof(double) != sizeof(ResType)) delete[] scores_orig; + RAPIDFUZZ_IF_CONSTEXPR (sizeof(double) != sizeof(ResType)) delete[] scores_orig; } template @@ -445,7 +471,7 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(first2, last2), score_cutoff); + derived._distance(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -453,21 +479,21 @@ struct MultiDistanceBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstDistance)) const { const T& derived = static_cast(*this); - derived._distance(scores, score_count, Range(s2), score_cutoff); + derived._distance(scores, score_count, make_range(s2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(first2, last2), score_cutoff); + _similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template void similarity(ResType* scores, size_t score_count, const Sentence2& s2, ResType score_cutoff = static_cast(WorstSimilarity)) const { - _similarity(scores, score_count, Range(s2), score_cutoff); + _similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -496,14 +522,14 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { void distance(ResType* scores, size_t score_count, InputIt2 first2, InputIt2 last2, ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(first2, last2), score_cutoff); + _distance(scores, score_count, make_range(first2, last2), score_cutoff); } template void distance(ResType* scores, size_t score_count, const Sentence2& s2, - ResType score_cutoff = WorstDistance) const + ResType score_cutoff = static_cast(WorstDistance)) const { - _distance(scores, score_count, Range(s2), score_cutoff); + _distance(scores, score_count, make_range(s2), score_cutoff); } template @@ -511,7 +537,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(first2, last2), score_cutoff); + derived._similarity(scores, score_count, make_range(first2, last2), score_cutoff); } template @@ -519,7 +545,7 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { ResType score_cutoff = static_cast(WorstSimilarity)) const { const T& derived = static_cast(*this); - derived._similarity(scores, score_count, Range(s2), score_cutoff); + derived._similarity(scores, score_count, make_range(s2), score_cutoff); } protected: @@ -532,17 +558,28 @@ struct MultiSimilarityBase : public MultiNormalizedMetricBase { for (size_t i = 0; i < derived.get_input_count(); ++i) { ResType maximum = derived.maximum(i, s2); ResType dist = maximum - scores[i]; - - if constexpr (std::is_floating_point_v) - scores[i] = (dist <= score_cutoff) ? dist : 1.0; - else - scores[i] = (dist <= score_cutoff) ? dist : score_cutoff + 1; + scores[i] = _apply_distance_score_cutoff(dist, score_cutoff); } } + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : 1.0; + } + + template + static rapidfuzz::rf_enable_if_t::value, U> + _apply_distance_score_cutoff(U score, U score_cutoff) + { + return (score <= score_cutoff) ? score : score_cutoff + 1; + } + MultiSimilarityBase() {} friend T; }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/intrinsics.hpp b/rapidfuzz/details/intrinsics.hpp index d5bd0a14..7952759e 100644 --- a/rapidfuzz/details/intrinsics.hpp +++ b/rapidfuzz/details/intrinsics.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -14,7 +15,8 @@ # include #endif -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template T bit_mask_lsb(size_t n) @@ -50,7 +52,7 @@ constexpr uint64_t shl64(uint64_t a, U shift) return (shift < 64) ? a << shift : 0; } -constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) +RAPIDFUZZ_CONSTEXPR_CXX14 uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* carryout) { /* todo should use _addcarry_u64 when available */ a += carryin; @@ -61,7 +63,7 @@ constexpr uint64_t addc64(uint64_t a, uint64_t b, uint64_t carryin, uint64_t* ca } template -constexpr T ceil_div(T a, U divisor) +RAPIDFUZZ_CONSTEXPR_CXX14 T ceil_div(T a, U divisor) { T _div = static_cast(divisor); return a / _div + static_cast(a % _div != 0); @@ -97,7 +99,7 @@ static inline size_t popcount(uint8_t x) } template -constexpr T rotl(T x, unsigned int n) +RAPIDFUZZ_CONSTEXPR_CXX14 T rotl(T x, unsigned int n) { unsigned int num_bits = std::numeric_limits::digits; assert(n < num_bits); @@ -197,16 +199,31 @@ static inline unsigned int countr_zero(uint8_t x) return countr_zero(static_cast(x)); } -template -constexpr void unroll_impl(std::integer_sequence, F&& f) -{ - (f(std::integral_constant{}), ...); -} +template +struct UnrollImpl; + +template +struct UnrollImpl { + template + static void call(F&& f) + { + f(Pos); + UnrollImpl::call(std::forward(f)); + } +}; + +template +struct UnrollImpl { + template + static void call(F&&) + {} +}; -template -constexpr void unroll(F&& f) +template +RAPIDFUZZ_CONSTEXPR_CXX14 void unroll(F&& f) { - unroll_impl(std::make_integer_sequence{}, std::forward(f)); + UnrollImpl::call(f); } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/details/simd_avx2.hpp b/rapidfuzz/details/simd_avx2.hpp index 65a446cd..1b7ba0e7 100644 --- a/rapidfuzz/details/simd_avx2.hpp +++ b/rapidfuzz/details/simd_avx2.hpp @@ -597,13 +597,13 @@ static inline native_simd min32(const native_simd& a, const native_simd return _mm256_min_epu32(a, b); } -/* taken from https://stackoverflow.com/a/51807800/11335032 */ +/* taken from https://stackoverflow.com/a/51807800 */ static inline native_simd sllv(const native_simd& a, const native_simd& count_) noexcept { __m256i mask_hi = _mm256_set1_epi32(static_cast(0xFF00FF00)); - __m256i multiplier_lut = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, char(128), 64, 32, 16, 8, 4, 2, 1, 0, 0, - 0, 0, 0, 0, 0, 0, char(128), 64, 32, 16, 8, 4, 2, 1); + __m256i multiplier_lut = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, char(-128), 64, 32, 16, 8, 4, 2, 1, 0, 0, + 0, 0, 0, 0, 0, 0, char(-128), 64, 32, 16, 8, 4, 2, 1); __m256i count_sat = _mm256_min_epu8(count_, _mm256_set1_epi8(8)); /* AVX shift counts are not masked. So a_i << n_i = 0 @@ -620,7 +620,7 @@ static inline native_simd sllv(const native_simd& a, return x; } -/* taken from https://stackoverflow.com/a/51805592/11335032 */ +/* taken from https://stackoverflow.com/a/51805592 */ static inline native_simd sllv(const native_simd& a, const native_simd& count) noexcept { diff --git a/rapidfuzz/details/type_traits.hpp b/rapidfuzz/details/type_traits.hpp index 06b6b1e2..2f05d2b4 100644 --- a/rapidfuzz/details/type_traits.hpp +++ b/rapidfuzz/details/type_traits.hpp @@ -49,4 +49,7 @@ struct is_explicitly_convertible { static bool const value = test(0); }; +template +using rf_enable_if_t = typename std::enable_if::type; + } // namespace rapidfuzz diff --git a/rapidfuzz/details/types.hpp b/rapidfuzz/details/types.hpp index ac3c4559..b8ec393c 100644 --- a/rapidfuzz/details/types.hpp +++ b/rapidfuzz/details/types.hpp @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include @@ -336,13 +337,10 @@ class Editops : private std::vector { inline bool operator==(const Editops& lhs, const Editops& rhs) { - if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) { - return false; - } + if (lhs.get_src_len() != rhs.get_src_len() || lhs.get_dest_len() != rhs.get_dest_len()) return false; + + if (lhs.size() != rhs.size()) return false; - if (lhs.size() != rhs.size()) { - return false; - } return std::equal(lhs.begin(), lhs.end(), rhs.begin()); } diff --git a/rapidfuzz/distance.hpp b/rapidfuzz/distance.hpp index 75bdb732..da686f7b 100644 --- a/rapidfuzz/distance.hpp +++ b/rapidfuzz/distance.hpp @@ -15,14 +15,15 @@ namespace rapidfuzz { -template -std::basic_string editops_apply(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, - InputIt2 last2) +namespace detail { +template +ReturnType editops_apply_impl(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2) { auto len1 = static_cast(std::distance(first1, last1)); auto len2 = static_cast(std::distance(first2, last2)); - std::basic_string res_str; + ReturnType res_str; res_str.resize(len1 + len2); size_t src_pos = 0; size_t dest_pos = 0; @@ -30,7 +31,8 @@ std::basic_string editops_apply(const Editops& ops, InputIt1 first1, Inpu for (const auto& op : ops) { /* matches between last and current editop */ while (src_pos < op.src_pos) { - res_str[dest_pos] = static_cast(first1[static_cast(src_pos)]); + res_str[dest_pos] = + static_cast(first1[static_cast(src_pos)]); src_pos++; dest_pos++; } @@ -38,12 +40,14 @@ std::basic_string editops_apply(const Editops& ops, InputIt1 first1, Inpu switch (op.type) { case EditType::None: case EditType::Replace: - res_str[dest_pos] = static_cast(first2[static_cast(op.dest_pos)]); + res_str[dest_pos] = + static_cast(first2[static_cast(op.dest_pos)]); src_pos++; dest_pos++; break; case EditType::Insert: - res_str[dest_pos] = static_cast(first2[static_cast(op.dest_pos)]); + res_str[dest_pos] = + static_cast(first2[static_cast(op.dest_pos)]); dest_pos++; break; case EditType::Delete: src_pos++; break; @@ -52,7 +56,8 @@ std::basic_string editops_apply(const Editops& ops, InputIt1 first1, Inpu /* matches after the last editop */ while (src_pos < len1) { - res_str[dest_pos] = static_cast(first1[static_cast(src_pos)]); + res_str[dest_pos] = + static_cast(first1[static_cast(src_pos)]); src_pos++; dest_pos++; } @@ -61,21 +66,14 @@ std::basic_string editops_apply(const Editops& ops, InputIt1 first1, Inpu return res_str; } -template -std::basic_string editops_apply(const Editops& ops, const Sentence1& s1, const Sentence2& s2) -{ - return editops_apply(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2), - detail::to_end(s2)); -} - -template -std::basic_string opcodes_apply(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, - InputIt2 last2) +template +ReturnType opcodes_apply_impl(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2) { auto len1 = static_cast(std::distance(first1, last1)); auto len2 = static_cast(std::distance(first2, last2)); - std::basic_string res_str; + ReturnType res_str; res_str.resize(len1 + len2); size_t dest_pos = 0; @@ -83,13 +81,15 @@ std::basic_string opcodes_apply(const Opcodes& ops, InputIt1 first1, Inpu switch (op.type) { case EditType::None: for (auto i = op.src_begin; i < op.src_end; ++i) { - res_str[dest_pos++] = static_cast(first1[static_cast(i)]); + res_str[dest_pos++] = + static_cast(first1[static_cast(i)]); } break; case EditType::Replace: case EditType::Insert: for (auto i = op.dest_begin; i < op.dest_end; ++i) { - res_str[dest_pos++] = static_cast(first2[static_cast(i)]); + res_str[dest_pos++] = + static_cast(first2[static_cast(i)]); } break; case EditType::Delete: break; @@ -100,11 +100,62 @@ std::basic_string opcodes_apply(const Opcodes& ops, InputIt1 first1, Inpu return res_str; } +} // namespace detail + +template +std::basic_string editops_apply_str(const Editops& ops, InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2) +{ + return detail::editops_apply_impl>(ops, first1, last1, first2, last2); +} + +template +std::basic_string editops_apply_str(const Editops& ops, const Sentence1& s1, const Sentence2& s2) +{ + return detail::editops_apply_impl>(ops, detail::to_begin(s1), detail::to_end(s1), + detail::to_begin(s2), detail::to_end(s2)); +} + +template +std::basic_string opcodes_apply_str(const Opcodes& ops, InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2) +{ + return detail::opcodes_apply_impl>(ops, first1, last1, first2, last2); +} + +template +std::basic_string opcodes_apply_str(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) +{ + return detail::opcodes_apply_impl>(ops, detail::to_begin(s1), detail::to_end(s1), + detail::to_begin(s2), detail::to_end(s2)); +} + +template +std::vector editops_apply_vec(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2) +{ + return detail::editops_apply_impl>(ops, first1, last1, first2, last2); +} + +template +std::vector editops_apply_vec(const Editops& ops, const Sentence1& s1, const Sentence2& s2) +{ + return detail::editops_apply_impl>(ops, detail::to_begin(s1), detail::to_end(s1), + detail::to_begin(s2), detail::to_end(s2)); +} + +template +std::vector opcodes_apply_vec(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, + InputIt2 last2) +{ + return detail::opcodes_apply_impl>(ops, first1, last1, first2, last2); +} + template -std::basic_string opcodes_apply(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) +std::vector opcodes_apply_vec(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) { - return opcodes_apply(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2), - detail::to_end(s2)); + return detail::opcodes_apply_impl>(ops, detail::to_begin(s1), detail::to_end(s1), + detail::to_begin(s2), detail::to_end(s2)); } } // namespace rapidfuzz diff --git a/rapidfuzz/distance/DamerauLevenshtein.hpp b/rapidfuzz/distance/DamerauLevenshtein.hpp index b1209ed7..98f3f2f1 100644 --- a/rapidfuzz/distance/DamerauLevenshtein.hpp +++ b/rapidfuzz/distance/DamerauLevenshtein.hpp @@ -133,8 +133,7 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { return rapidfuzz::experimental::damerau_levenshtein_distance(s1, s2, score_cutoff); } @@ -142,11 +141,13 @@ struct CachedDamerauLevenshtein : public detail::CachedDistanceBase s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedDamerauLevenshtein(const Sentence1& s1_) -> CachedDamerauLevenshtein>; template CachedDamerauLevenshtein(InputIt1 first1, InputIt1 last1) -> CachedDamerauLevenshtein>; +#endif } // namespace experimental } // namespace rapidfuzz diff --git a/rapidfuzz/distance/DamerauLevenshtein_impl.hpp b/rapidfuzz/distance/DamerauLevenshtein_impl.hpp index b452945f..5e34556d 100644 --- a/rapidfuzz/distance/DamerauLevenshtein_impl.hpp +++ b/rapidfuzz/distance/DamerauLevenshtein_impl.hpp @@ -11,7 +11,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct RowId { @@ -64,10 +65,10 @@ size_t damerau_levenshtein_distance_zhao(const Range& s1, const Range< auto iter_s2 = s2.begin(); for (IntType j = 1; j <= len2; j++) { - ptrdiff_t diag = R1[j - 1] + static_cast(*iter_s1 != *iter_s2); - ptrdiff_t left = R[j - 1] + 1; - ptrdiff_t up = R1[j] + 1; - ptrdiff_t temp = std::min({diag, left, up}); + int64_t diag = R1[j - 1] + static_cast(*iter_s1 != *iter_s2); + int64_t left = R[j - 1] + 1; + int64_t up = R1[j] + 1; + int64_t temp = std::min({diag, left, up}); if (*iter_s1 == *iter_s2) { last_col_id = j; // last occurence of s1_i @@ -75,15 +76,15 @@ size_t damerau_levenshtein_distance_zhao(const Range& s1, const Range< T = last_i2l1; // save H_i-2,l-1 } else { - ptrdiff_t k = last_row_id.get(static_cast(*iter_s2)).val; - ptrdiff_t l = last_col_id; + int64_t k = last_row_id.get(static_cast(*iter_s2)).val; + int64_t l = last_col_id; if ((j - l) == 1) { - ptrdiff_t transpose = FR[j] + (i - k); + int64_t transpose = FR[j] + (i - k); temp = std::min(temp, transpose); } else if ((i - k) == 1) { - ptrdiff_t transpose = T + (j - l); + int64_t transpose = T + (j - l); temp = std::min(temp, transpose); } } @@ -130,11 +131,11 @@ class DamerauLevenshtein } template - static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _distance(const Range& s1, const Range& s2, size_t score_cutoff, size_t) { return damerau_levenshtein_distance(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/distance/Hamming.hpp b/rapidfuzz/distance/Hamming.hpp index d5160722..3ce33b91 100644 --- a/rapidfuzz/distance/Hamming.hpp +++ b/rapidfuzz/distance/Hamming.hpp @@ -78,7 +78,7 @@ template Editops hamming_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(first1, last1), detail::Range(first2, last2), pad_, + return detail::hamming_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), pad_, score_hint); } @@ -86,7 +86,7 @@ template Editops hamming_editops(const Sentence1& s1, const Sentence2& s2, bool pad_ = true, size_t score_hint = std::numeric_limits::max()) { - return detail::hamming_editops(detail::Range(s1), detail::Range(s2), pad_, score_hint); + return detail::hamming_editops(detail::make_range(s1), detail::make_range(s2), pad_, score_hint); } /** @@ -151,8 +151,7 @@ struct CachedHamming : public detail::CachedDistanceBase, } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t score_hint) const { return detail::Hamming::distance(s1, s2, pad, score_cutoff, score_hint); } @@ -161,11 +160,13 @@ struct CachedHamming : public detail::CachedDistanceBase, bool pad; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedHamming(const Sentence1& s1_, bool pad_ = true) -> CachedHamming>; template CachedHamming(InputIt1 first1, InputIt1 last1, bool pad_ = true) -> CachedHamming>; +#endif /**@}*/ diff --git a/rapidfuzz/distance/Hamming_impl.hpp b/rapidfuzz/distance/Hamming_impl.hpp index 8389f902..34d286cd 100644 --- a/rapidfuzz/distance/Hamming_impl.hpp +++ b/rapidfuzz/distance/Hamming_impl.hpp @@ -6,7 +6,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Hamming : public DistanceBase::max(), bool> { friend DistanceBase::max(), bool>; @@ -20,7 +21,7 @@ class Hamming : public DistanceBase static size_t _distance(const Range& s1, const Range& s2, bool pad, - size_t score_cutoff, [[maybe_unused]] size_t score_hint) + size_t score_cutoff, size_t) { if (!pad && s1.size() != s2.size()) throw std::invalid_argument("Sequences are not the same length."); @@ -57,4 +58,5 @@ Editops hamming_editops(const Range& s1, const Range& s2, bo return ops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Indel.hpp b/rapidfuzz/distance/Indel.hpp index 9cfa902b..16410075 100644 --- a/rapidfuzz/distance/Indel.hpp +++ b/rapidfuzz/distance/Indel.hpp @@ -182,10 +182,12 @@ struct CachedIndel CachedLCSseq scorer; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedIndel(const Sentence1& s1_) -> CachedIndel>; template CachedIndel(InputIt1 first1, InputIt1 last1) -> CachedIndel>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Indel_impl.hpp b/rapidfuzz/distance/Indel_impl.hpp index d0ab9d50..b3139fab 100644 --- a/rapidfuzz/distance/Indel_impl.hpp +++ b/rapidfuzz/distance/Indel_impl.hpp @@ -8,7 +8,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template size_t indel_distance(const BlockPatternMatchVector& block, const Range& s1, @@ -65,4 +66,5 @@ class Indel : public DistanceBase } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Jaro.hpp b/rapidfuzz/distance/Jaro.hpp index 764332cd..2948c784 100644 --- a/rapidfuzz/distance/Jaro.hpp +++ b/rapidfuzz/distance/Jaro.hpp @@ -70,12 +70,13 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, friend detail::MultiSimilarityBase, double, 0, 1>; friend detail::MultiNormalizedMetricBase, double>; - static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64); + static_assert(MaxLen == 8 || MaxLen == 16 || MaxLen == 32 || MaxLen == 64, "incorrect MaxLen used"); - using VecType = typename std::conditional_t< + using VecType = typename std::conditional< MaxLen == 8, uint8_t, - typename std::conditional_t>>; + typename std::conditional::type>::type>:: + type; constexpr static size_t get_vec_size() { @@ -95,7 +96,7 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, # endif } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -166,12 +167,12 @@ struct MultiJaro : public detail::MultiSimilarityBase, double, if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); + auto scores_ = detail::make_range(scores, scores + score_count); detail::jaro_similarity_simd(scores_, PM, str_lens, str_lens_size, s2, score_cutoff); } template - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -198,7 +199,7 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub {} template - CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedJaro(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -212,20 +213,21 @@ struct CachedJaro : public detail::CachedSimilarityBase, doub } template - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { - return detail::jaro_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::jaro_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedJaro(const Sentence1& s1_) -> CachedJaro>; template CachedJaro(InputIt1 first1, InputIt1 last1) -> CachedJaro>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/JaroWinkler.hpp b/rapidfuzz/distance/JaroWinkler.hpp index d6dd0753..1bd2f082 100644 --- a/rapidfuzz/distance/JaroWinkler.hpp +++ b/rapidfuzz/distance/JaroWinkler.hpp @@ -9,7 +9,7 @@ namespace rapidfuzz { template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -25,7 +25,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -41,7 +41,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_distance(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 1.0) { @@ -57,7 +57,7 @@ double jaro_winkler_normalized_distance(const Sentence1& s1, const Sentence2& s2 } template >> + typename = rapidfuzz::rf_enable_if_t::value>> double jaro_winkler_normalized_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double prefix_weight = 0.1, double score_cutoff = 0.0) { @@ -145,7 +145,7 @@ struct MultiJaroWinkler : public detail::MultiSimilarityBase - double maximum([[maybe_unused]] size_t s1_idx, const detail::Range&) const + double maximum(size_t, const detail::Range&) const { return 1.0; } @@ -174,7 +174,7 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) - : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::Range(first1, last1)) + : prefix_weight(_prefix_weight), s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -188,10 +188,9 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase - double _similarity(const detail::Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double _similarity(const detail::Range& s2, double score_cutoff, double) const { - return detail::jaro_winkler_similarity(PM, detail::Range(s1), s2, prefix_weight, score_cutoff); + return detail::jaro_winkler_similarity(PM, detail::make_range(s1), s2, prefix_weight, score_cutoff); } double prefix_weight; @@ -199,12 +198,14 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase -explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1) - -> CachedJaroWinkler>; +explicit CachedJaroWinkler(const Sentence1& s1_, + double _prefix_weight = 0.1) -> CachedJaroWinkler>; template -CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1) - -> CachedJaroWinkler>; +CachedJaroWinkler(InputIt1 first1, InputIt1 last1, + double _prefix_weight = 0.1) -> CachedJaroWinkler>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/JaroWinkler_impl.hpp b/rapidfuzz/distance/JaroWinkler_impl.hpp index c8eb6575..a40f8af8 100644 --- a/rapidfuzz/distance/JaroWinkler_impl.hpp +++ b/rapidfuzz/distance/JaroWinkler_impl.hpp @@ -3,7 +3,8 @@ #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template double jaro_winkler_similarity(const Range& P, const Range& T, double prefix_weight, @@ -81,10 +82,11 @@ class JaroWinkler : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double prefix_weight, - double score_cutoff, [[maybe_unused]] double score_hint) + double score_cutoff, double) { return jaro_winkler_similarity(s1, s2, prefix_weight, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Jaro_impl.hpp b/rapidfuzz/distance/Jaro_impl.hpp index 4060a102..81bdf2f7 100644 --- a/rapidfuzz/distance/Jaro_impl.hpp +++ b/rapidfuzz/distance/Jaro_impl.hpp @@ -9,7 +9,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct FlaggedCharsWord { uint64_t P_flag; @@ -92,7 +93,11 @@ static inline size_t count_common_chars(const FlaggedCharsMultiword& flagged) template static inline FlaggedCharsWord flag_similar_characters_word(const PM_Vec& PM, - [[maybe_unused]] const Range& P, +#ifdef NDEBUG + const Range&, +#else + const Range& P, +#endif const Range& T, size_t Bound) { assert(P.size() <= 64); @@ -160,7 +165,7 @@ static inline void flag_similar_characters_step(const BlockPatternMatchVector& P if (T_j >= 0 && T_j < 256) { for (; word + 3 < last_word - 1; word += 4) { uint64_t PM_j[4]; - unroll([&](auto i) { + unroll([&](size_t i) { PM_j[i] = PM.get(word + i, static_cast(T_j)) & (~flagged.P_flag[word + i]); }); @@ -296,7 +301,8 @@ static inline size_t count_transpositions_block(const BlockPatternMatchVector& P uint64_t PatternFlagMask = blsi(P_flag); - Transpositions += !(PM.get(PatternWord, T_first[countr_zero(T_flag)]) & PatternFlagMask); + Transpositions += !(PM.get(PatternWord, T_first[static_cast(countr_zero(T_flag))]) & + PatternFlagMask); T_flag = blsr(T_flag); P_flag ^= PatternFlagMask; @@ -458,6 +464,11 @@ struct JaroSimilaritySimdBounds { template static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -465,7 +476,9 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng using namespace simd_sse2; # endif - [[maybe_unused]] static constexpr size_t alignment = native_simd::alignment; +# ifndef RAPIDFUZZ_AVX2 + static constexpr size_t alignment = native_simd::alignment; +# endif static constexpr size_t vec_width = native_simd::size; assert(s2.size() <= sizeof(VecType) * 8); @@ -505,7 +518,7 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng // todo try to find a simd implementation for sse2 for (size_t i = 0; i < vec_width; ++i) { - size_t Bound = jaro_bounds(s1_lengths[i], s2.size()); + size_t Bound = jaro_bounds(static_cast(s1_lengths[i]), s2.size()); if (Bound > bounds.maxBound) bounds.maxBound = Bound; @@ -517,7 +530,7 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng bounds.boundMask = native_simd(reinterpret_cast(boundMask_.data())); # endif - size_t lastRelevantChar = maxLen + bounds.maxBound; + size_t lastRelevantChar = static_cast(maxLen) + bounds.maxBound; if (s2.size() > lastRelevantChar) s2.remove_suffix(s2.size() - lastRelevantChar); return bounds; @@ -525,6 +538,11 @@ static inline auto jaro_similarity_prepare_bound_short_s2(const VecType* s1_leng template static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengths, Range& s2) +# ifdef RAPIDFUZZ_AVX2 + -> JaroSimilaritySimdBounds> +# else + -> JaroSimilaritySimdBounds> +# endif { # ifdef RAPIDFUZZ_AVX2 using namespace simd_avx2; @@ -547,7 +565,7 @@ static inline auto jaro_similarity_prepare_bound_long_s2(const VecType* s1_lengt bounds.boundMaskSize = native_simd(bit_mask_lsb(2 * bounds.maxBound)); bounds.boundMask = native_simd(bit_mask_lsb(bounds.maxBound + 1)); - size_t lastRelevantChar = maxLen + bounds.maxBound; + size_t lastRelevantChar = static_cast(maxLen) + bounds.maxBound; if (s2.size() > lastRelevantChar) s2.remove_suffix(s2.size() - lastRelevantChar); return bounds; @@ -614,7 +632,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -628,7 +646,7 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -648,8 +666,10 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa T_flag[i].store(T_flags + i * vec_width); for (size_t i = 0; i < vec_width; ++i) { - VecType CommonChars = counts[i]; - if (!jaro_common_char_filter(s1_lengths[result_index], s2.size(), CommonChars, score_cutoff)) { + size_t CommonChars = static_cast(counts[i]); + if (!jaro_common_char_filter(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, score_cutoff)) + { scores[result_index] = 0.0; result_index++; continue; @@ -683,8 +703,8 @@ jaro_similarity_simd_long_s2(Range scores, const detail::BlockPatternMa } } - double Sim = - jaro_calculate_similarity(s1_lengths[result_index], s2.size(), CommonChars, Transpositions); + double Sim = jaro_calculate_similarity(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, Transpositions); scores[result_index] = (Sim >= score_cutoff) ? Sim : 0; result_index++; @@ -729,7 +749,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM size_t j = 0; for (; j < std::min(bounds.maxBound, s2_cur.size()); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -742,7 +762,7 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM for (; j < s2_cur.size(); ++j) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, s2_cur[j]); }); native_simd X(stored.data()); native_simd PM_j = andnot(X & bounds.boundMask, P_flag); @@ -759,8 +779,10 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM alignas(alignment) std::array T_flags; T_flag.store(T_flags.data()); for (size_t i = 0; i < vec_width; ++i) { - VecType CommonChars = counts[i]; - if (!jaro_common_char_filter(s1_lengths[result_index], s2.size(), CommonChars, score_cutoff)) { + size_t CommonChars = static_cast(counts[i]); + if (!jaro_common_char_filter(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, score_cutoff)) + { scores[result_index] = 0.0; result_index++; continue; @@ -783,8 +805,8 @@ jaro_similarity_simd_short_s2(Range scores, const detail::BlockPatternM P_flag_cur ^= PatternFlagMask; } - double Sim = - jaro_calculate_similarity(s1_lengths[result_index], s2.size(), CommonChars, Transpositions); + double Sim = jaro_calculate_similarity(static_cast(s1_lengths[result_index]), s2.size(), + CommonChars, Transpositions); scores[result_index] = (Sim >= score_cutoff) ? Sim : 0; result_index++; @@ -831,10 +853,11 @@ class Jaro : public SimilarityBase { template static double _similarity(const Range& s1, const Range& s2, double score_cutoff, - [[maybe_unused]] double score_hint) + double) { return jaro_similarity(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/LCSseq.hpp b/rapidfuzz/distance/LCSseq.hpp index 9082dc37..8891cf2c 100644 --- a/rapidfuzz/distance/LCSseq.hpp +++ b/rapidfuzz/distance/LCSseq.hpp @@ -65,13 +65,13 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d template Editops lcs_seq_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2) { - return detail::lcs_seq_editops(detail::Range(first1, last1), detail::Range(first2, last2)); + return detail::lcs_seq_editops(detail::make_range(first1, last1), detail::make_range(first2, last2)); } template Editops lcs_seq_editops(const Sentence1& s1, const Sentence2& s2) { - return detail::lcs_seq_editops(detail::Range(s1), detail::Range(s2)); + return detail::lcs_seq_editops(detail::make_range(s1), detail::make_range(s2)); } #ifdef RAPIDFUZZ_SIMD @@ -83,26 +83,26 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz friend detail::MultiSimilarityBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -164,14 +164,14 @@ struct MultiLCSseq : public detail::MultiSimilarityBase, siz if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::lcs_simd(scores_, PM, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::lcs_simd(scores_, PM, s2, score_cutoff); } @@ -202,7 +202,7 @@ struct CachedLCSseq {} template - CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedLCSseq(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -216,20 +216,21 @@ struct CachedLCSseq } template - size_t _similarity(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(const detail::Range& s2, size_t score_cutoff, size_t) const { - return detail::lcs_seq_similarity(PM, detail::Range(s1), s2, score_cutoff); + return detail::lcs_seq_similarity(PM, detail::make_range(s1), s2, score_cutoff); } std::vector s1; detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedLCSseq(const Sentence1& s1_) -> CachedLCSseq>; template CachedLCSseq(InputIt1 first1, InputIt1 last1) -> CachedLCSseq>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index 75852f2a..87242db6 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -13,7 +13,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { template struct LCSseqResult; @@ -30,6 +31,20 @@ struct LCSseqResult { size_t sim; }; +template +LCSseqResult& getMatrixRef(LCSseqResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + /* * An encoded mbleven model table. * @@ -143,12 +158,12 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const size_t cur_vec = 0; for (; cur_vec + interleaveCount * vecs <= block.size(); cur_vec += interleaveCount * vecs) { std::array, interleaveCount> S; - unroll([&](auto j) { S[j] = static_cast(-1); }); + unroll([&](size_t j) { S[j] = static_cast(-1); }); for (const auto& ch : s2) { - unroll([&](auto j) { + unroll([&](size_t j) { alignas(32) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + j * vecs + i, ch); }); native_simd Matches(stored.data()); native_simd u = S[j] & Matches; @@ -156,10 +171,10 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const }); } - unroll([&](auto j) { + unroll([&](size_t j) { auto counts = popcount(~S[j]); - unroll([&](auto i) { - *score_iter = (counts[i] >= score_cutoff) ? counts[i] : 0; + unroll([&](size_t i) { + *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); }); @@ -170,7 +185,7 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const for (const auto& ch : s2) { alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd Matches(stored.data()); native_simd u = S & Matches; @@ -178,8 +193,8 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const } auto counts = popcount(~S); - unroll([&](auto i) { - *score_iter = (counts[i] >= score_cutoff) ? counts[i] : 0; + unroll([&](size_t i) { + *score_iter = (counts[i] >= score_cutoff) ? static_cast(counts[i]) : 0; score_iter++; }); } @@ -188,14 +203,17 @@ void lcs_simd(Range scores, const BlockPatternMatchVector& block, const #endif template -auto lcs_unroll(const PMV& block, const Range&, const Range& s2, size_t score_cutoff = 0) - -> LCSseqResult +auto lcs_unroll(const PMV& block, const Range&, const Range& s2, + size_t score_cutoff = 0) -> LCSseqResult { uint64_t S[N]; unroll([&](size_t i) { S[i] = ~UINT64_C(0); }); LCSseqResult res; - if constexpr (RecordMatrix) res.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S = ShiftedBitMatrix(s2.size(), N, ~UINT64_C(0)); + } auto iter_s2 = s2.begin(); for (size_t i = 0; i < s2.size(); ++i) { @@ -210,7 +228,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); } @@ -221,7 +242,10 @@ auto lcs_unroll(const PMV& block, const Range&, const Range& uint64_t x = addc64(S[word], u, carry, &carry); S[word] = x | (S[word] - u); - if constexpr (RecordMatrix) res.S[i][word] = S[word]; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S[i][word] = S[word]; + } }); iter_s2++; @@ -256,10 +280,11 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range res; - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = band_width_left + 1 + band_width_right; size_t full_band_words = std::min(words, full_band / word_size + 2); - res.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.S = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); } /* first_block is the index of the first block in Ukkonen band. */ @@ -270,7 +295,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.S.set_offset(row, static_cast(first_block * word_size)); + } for (size_t word = first_block; word < last_block; ++word) { const uint64_t Matches = PM.get(word, *iter_s2); @@ -281,7 +309,10 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range band_width_right) first_block = (row - band_width_right) / word_size; @@ -353,8 +384,7 @@ size_t lcs_seq_similarity(const BlockPatternMatchVector& block, Range size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -386,8 +416,7 @@ size_t lcs_seq_similarity(Range s1, Range s2, size_t score_c size_t max_misses = len1 + len2 - 2 * score_cutoff; /* no edits are allowed */ - if (max_misses == 0 || (max_misses == 1 && len1 == len2)) - return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ? len1 : 0; + if (max_misses == 0 || (max_misses == 1 && len1 == len2)) return s1 == s2 ? len1 : 0; if (max_misses < abs_diff(len1, len2)) return 0; @@ -421,7 +450,9 @@ Editops recover_alignment(const Range& s1, const Range& s2, if (dist == 0) return editops; - [[maybe_unused]] size_t band_width_right = s2.size() - matrix.sim; +#ifndef NDEBUG + size_t band_width_right = s2.size() - matrix.sim; +#endif auto col = len1; auto row = len2; @@ -520,10 +551,11 @@ class LCSseq : public SimilarityBase static size_t _similarity(const Range& s1, const Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + size_t) { return lcs_seq_similarity(s1, s2, score_cutoff); } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Levenshtein.hpp b/rapidfuzz/distance/Levenshtein.hpp index c2b21bde..45245fea 100644 --- a/rapidfuzz/distance/Levenshtein.hpp +++ b/rapidfuzz/distance/Levenshtein.hpp @@ -283,7 +283,7 @@ template Editops levenshtein_editops(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(first1, last1), detail::Range(first2, last2), + return detail::levenshtein_editops(detail::make_range(first1, last1), detail::make_range(first2, last2), score_hint); } @@ -291,7 +291,7 @@ template Editops levenshtein_editops(const Sentence1& s1, const Sentence2& s2, size_t score_hint = std::numeric_limits::max()) { - return detail::levenshtein_editops(detail::Range(s1), detail::Range(s2), score_hint); + return detail::levenshtein_editops(detail::make_range(s1), detail::make_range(s2), score_hint); } #ifdef RAPIDFUZZ_SIMD @@ -304,26 +304,26 @@ struct MultiLevenshtein : public detail::MultiDistanceBase::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -387,14 +387,14 @@ struct MultiLevenshtein : public detail::MultiDistanceBase= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::levenshtein_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -428,7 +428,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) - : s1(first1, last1), PM(detail::Range(first1, last1)), weights(aWeights) + : s1(first1, last1), PM(detail::make_range(first1, last1)), weights(aWeights) {} private: @@ -454,7 +454,7 @@ struct CachedLevenshtein : public detail::CachedDistanceBase= weights.insert_cost + weights.delete_cost) { // max can make use of the common divisor of the three weights size_t new_max = detail::ceil_div(score_cutoff, weights.insert_cost); - size_t dist = detail::indel_distance(PM, detail::Range(s1), s2, new_max); + size_t dist = detail::indel_distance(PM, detail::make_range(s1), s2, new_max); dist *= weights.insert_cost; return (dist <= score_cutoff) ? dist : score_cutoff + 1; } } - return detail::generalized_levenshtein_distance(detail::Range(s1), s2, weights, score_cutoff); + return detail::generalized_levenshtein_distance(detail::make_range(s1), s2, weights, score_cutoff); } std::vector s1; @@ -481,12 +481,14 @@ struct CachedLevenshtein : public detail::CachedDistanceBase -explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = {1, 1, 1}) - -> CachedLevenshtein>; +explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = { + 1, 1, 1}) -> CachedLevenshtein>; template -CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1}) - -> CachedLevenshtein>; +CachedLevenshtein(InputIt1 first1, InputIt1 last1, + LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein>; +#endif } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Levenshtein_impl.hpp b/rapidfuzz/distance/Levenshtein_impl.hpp index 2fa07f35..991f10c9 100644 --- a/rapidfuzz/distance/Levenshtein_impl.hpp +++ b/rapidfuzz/distance/Levenshtein_impl.hpp @@ -14,7 +14,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { struct LevenshteinRow { uint64_t VP; @@ -53,6 +54,34 @@ struct LevenshteinResult { size_t dist; }; +template +LevenshteinResult& getMatrixRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordMatrix); + return reinterpret_cast&>(res); +#endif +} + +template +LevenshteinResult& getBitRowRef(LevenshteinResult& res) +{ +#if RAPIDFUZZ_IF_CONSTEXPR_AVAILABLE + return res; +#else + // this is a hack since the compiler doesn't know early enough that + // this is never called when the types differ. + // On C++17 this properly uses if constexpr + assert(RecordBitRow); + return reinterpret_cast&>(res); +#endif +} + template size_t generalized_levenshtein_wagner_fischer(const Range& s1, const Range& s2, LevenshteinWeightTable weights, size_t max) @@ -240,9 +269,10 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R LevenshteinResult res; res.dist = s1.size(); - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); } /* mask used when computing D[m,j] in the paper 10^(m-1) */ @@ -271,19 +301,21 @@ auto levenshtein_hyrroe2003(const PM_Vec& PM, const Range& s1, const R VP = HN | ~(D0 | HP); VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } if (res.dist > max) res.dist = max + 1; - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = s2.size(); - res.vecs.emplace_back(VP, VN); + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = s2.size(); + res_.vecs.emplace_back(VP, VN); } return res; @@ -315,12 +347,12 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte native_simd VN(VecType(0)); alignas(alignment) std::array currDist_; - unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + unroll( + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -331,7 +363,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd X(stored.data()); auto D0 = (((X & VP) + VP) ^ VP) | X | VN; @@ -355,7 +387,7 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -363,7 +395,9 @@ void levenshtein_hyrroe2003_simd(Range scores, const detail::BlockPatte } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (!std::is_same_v) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -471,8 +505,8 @@ size_t levenshtein_hyrroe2003_small_band(const BlockPatternMatchVector& PM, cons } template -auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range& s2, size_t max) - -> LevenshteinResult +auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range& s2, + size_t max) -> LevenshteinResult { assert(max <= s1.size()); assert(max <= s2.size()); @@ -484,14 +518,15 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range res; res.dist = max; - if constexpr (RecordMatrix) { - res.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), 1, 0); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP = ShiftedBitMatrix(s2.size(), 1, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), 1, 0); ptrdiff_t start_offset = static_cast(max) + 2 - 64; for (size_t i = 0; i < s2.size(); ++i) { - res.VP.set_offset(i, start_offset + static_cast(i)); - res.VN.set_offset(i, start_offset + static_cast(i)); + res_.VP.set_offset(i, start_offset + static_cast(i)); + res_.VN.set_offset(i, start_offset + static_cast(i)); } } @@ -545,9 +580,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -587,9 +623,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range> 1) | HP); VN = (D0 >> 1) & HP; - if constexpr (RecordMatrix) { - res.VP[i][0] = VP; - res.VN[i][0] = VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[i][0] = VP; + res_.VN[i][0] = VN; } } @@ -624,17 +661,19 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range scores[words - 1] = s1.size(); - if constexpr (RecordMatrix) { + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); size_t full_band = std::min(s1.size(), 2 * max + 1); size_t full_band_words = std::min(words, full_band / word_size + 2); - res.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); - res.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); + res_.VP = ShiftedBitMatrix(s2.size(), full_band_words, ~UINT64_C(0)); + res_.VN = ShiftedBitMatrix(s2.size(), full_band_words, 0); } - if constexpr (RecordBitRow) { - res.first_block = 0; - res.last_block = 0; - res.prev_score = 0; + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { + auto& res_ = getBitRowRef(res); + res_.first_block = 0; + res_.last_block = 0; + res_.prev_score = 0; } max = std::min(max, std::max(s1.size(), s2.size())); @@ -651,9 +690,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range uint64_t HP_carry = 1; uint64_t HN_carry = 0; - if constexpr (RecordMatrix) { - res.VP.set_offset(row, static_cast(first_block * word_size)); - res.VN.set_offset(row, static_cast(first_block * word_size)); + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP.set_offset(row, static_cast(first_block * word_size)); + res_.VN.set_offset(row, static_cast(first_block * word_size)); } auto advance_block = [&](size_t word) { @@ -687,9 +727,10 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range vecs[word].VP = HN | ~(D0 | HP); vecs[word].VN = HP & D0; - if constexpr (RecordMatrix) { - res.VP[row][word - first_block] = vecs[word].VP; - res.VN[row][word - first_block] = vecs[word].VN; + RAPIDFUZZ_IF_CONSTEXPR (RecordMatrix) { + auto& res_ = getMatrixRef(res); + res_.VP[row][word - first_block] = vecs[word].VP; + res_.VN[row][word - first_block] = vecs[word].VN; } return static_cast(HP_carry) - static_cast(HN_carry); @@ -727,7 +768,8 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range vecs[last_block].VN = 0; size_t chars_in_block = (last_block + 1 == words) ? ((s1.size() - 1) % word_size + 1) : 64; - scores[last_block] = scores[last_block - 1] + chars_in_block - HP_carry + HN_carry; + scores[last_block] = scores[last_block - 1] + chars_in_block - + opt_static_cast(HP_carry) + opt_static_cast(HN_carry); // todo probably wrong types scores[last_block] = static_cast(static_cast(scores[last_block]) + advance_block(last_block)); @@ -774,26 +816,27 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range return res; } - if constexpr (RecordBitRow) { + RAPIDFUZZ_IF_CONSTEXPR (RecordBitRow) { if (row == stop_row) { + auto& res_ = getBitRowRef(res); if (first_block == 0) - res.prev_score = stop_row + 1; + res_.prev_score = stop_row + 1; else { /* count backwards to find score at last position in previous block */ size_t relevant_bits = std::min((first_block + 1) * 64, s1.size()) % 64; uint64_t mask = ~UINT64_C(0); if (relevant_bits) mask >>= 64 - relevant_bits; - res.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - - popcount(vecs[first_block].VP & mask); + res_.prev_score = scores[first_block] + popcount(vecs[first_block].VN & mask) - + popcount(vecs[first_block].VP & mask); } - res.first_block = first_block; - res.last_block = last_block; - res.vecs = std::move(vecs); + res_.first_block = first_block; + res_.last_block = last_block; + res_.vecs = std::move(vecs); /* unknown so make sure it is <= max */ - res.dist = 0; + res_.dist = 0; return res; } } @@ -815,7 +858,7 @@ size_t uniform_levenshtein_distance(const BlockPatternMatchVector& block, Range< if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; if (score_cutoff < abs_diff(s1.size(), s2.size())) return score_cutoff + 1; @@ -873,7 +916,7 @@ size_t uniform_levenshtein_distance(Range s1, Range s2, size if (score_hint < 31) score_hint = 31; // when no differences are allowed a direct comparision is sufficient - if (score_cutoff == 0) return !std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()); + if (score_cutoff == 0) return s1 != s2; // at least length difference insertions/deletions required if (score_cutoff < (s1.size() - s2.size())) return score_cutoff + 1; @@ -1130,9 +1173,6 @@ HirschbergPos find_hirschberg_pos(const Range& s1, const Range= 0); - assert(hpos.right_score >= 0); - if (hpos.left_score + hpos.right_score > max) return find_hirschberg_pos(s1, s2, max * 2); else { @@ -1216,4 +1256,5 @@ Editops levenshtein_editops(const Range& s1, const Range& s2 return editops; } -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/OSA.hpp b/rapidfuzz/distance/OSA.hpp index 0e56eadd..ed9d16d0 100644 --- a/rapidfuzz/distance/OSA.hpp +++ b/rapidfuzz/distance/OSA.hpp @@ -119,26 +119,26 @@ struct MultiOSA friend detail::MultiDistanceBase, size_t, 0, std::numeric_limits::max()>; friend detail::MultiNormalizedMetricBase, size_t>; - constexpr static size_t get_vec_size() + RAPIDFUZZ_CONSTEXPR_CXX14 static size_t get_vec_size() { # ifdef RAPIDFUZZ_AVX2 using namespace detail::simd_avx2; # else using namespace detail::simd_sse2; # endif - if constexpr (MaxLen <= 8) + RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 8) return native_simd::size; - else if constexpr (MaxLen <= 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 16) return native_simd::size; - else if constexpr (MaxLen <= 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 32) return native_simd::size; - else if constexpr (MaxLen <= 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen <= 64) return native_simd::size; - static_assert(MaxLen <= 64); + static_assert(MaxLen <= 64, "expected MaxLen <= 64"); } - constexpr static size_t find_block_count(size_t count) + static size_t find_block_count(size_t count) { size_t vec_size = get_vec_size(); size_t simd_vec_count = detail::ceil_div(count, vec_size); @@ -199,14 +199,14 @@ struct MultiOSA if (score_count < result_count()) throw std::invalid_argument("scores has to have >= result_count() elements"); - detail::Range scores_(scores, scores + score_count); - if constexpr (MaxLen == 8) + auto scores_ = detail::make_range(scores, scores + score_count); + RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 8) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 16) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 16) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 32) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 32) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); - else if constexpr (MaxLen == 64) + else RAPIDFUZZ_IF_CONSTEXPR (MaxLen == 64) detail::osa_hyrroe2003_simd(scores_, PM, str_lens, s2, score_cutoff); } @@ -237,7 +237,7 @@ struct CachedOSA {} template - CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::Range(first1, last1)) + CachedOSA(InputIt1 first1, InputIt1 last1) : s1(first1, last1), PM(detail::make_range(first1, last1)) {} private: @@ -251,8 +251,7 @@ struct CachedOSA } template - size_t _distance(const detail::Range& s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _distance(const detail::Range& s2, size_t score_cutoff, size_t) const { size_t res; if (s1.empty()) @@ -260,9 +259,9 @@ struct CachedOSA else if (s2.empty()) res = s1.size(); else if (s1.size() < 64) - res = detail::osa_hyrroe2003(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003(PM, detail::make_range(s1), s2, score_cutoff); else - res = detail::osa_hyrroe2003_block(PM, detail::Range(s1), s2, score_cutoff); + res = detail::osa_hyrroe2003_block(PM, detail::make_range(s1), s2, score_cutoff); return (res <= score_cutoff) ? res : score_cutoff + 1; } @@ -271,11 +270,13 @@ struct CachedOSA detail::BlockPatternMatchVector PM; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedOSA(const Sentence1& s1_) -> CachedOSA>; template CachedOSA(InputIt1 first1, InputIt1 last1) -> CachedOSA>; +#endif /**@}*/ } // namespace rapidfuzz diff --git a/rapidfuzz/distance/OSA_impl.hpp b/rapidfuzz/distance/OSA_impl.hpp index adc536da..8fa73289 100644 --- a/rapidfuzz/distance/OSA_impl.hpp +++ b/rapidfuzz/distance/OSA_impl.hpp @@ -10,7 +10,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { /** * @brief Bitparallel implementation of the OSA distance. @@ -100,12 +101,12 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV native_simd PM_j_old(VecType(0)); alignas(alignment) std::array currDist_; - unroll( - [&](auto i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); + unroll( + [&](size_t i) { currDist_[i] = static_cast(s1_lengths[result_index + i]); }); native_simd currDist(reinterpret_cast(currDist_.data())); /* mask used when computing D[m,j] in the paper 10^(m-1) */ alignas(alignment) std::array mask_; - unroll([&](auto i) { + unroll([&](size_t i) { if (s1_lengths[result_index + i] == 0) mask_[i] = 0; else @@ -116,7 +117,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV for (const auto& ch : s2) { /* Step 1: Computing D0 */ alignas(alignment) std::array stored; - unroll([&](auto i) { stored[i] = block.get(cur_vec + i, ch); }); + unroll([&](size_t i) { stored[i] = block.get(cur_vec + i, ch); }); native_simd PM_j(stored.data()); auto TR = (andnot(PM_j, D0) << 1) & PM_j_old; @@ -143,7 +144,7 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV alignas(alignment) std::array distances; currDist.store(distances.data()); - unroll([&](auto i) { + unroll([&](size_t i) { size_t score = 0; /* strings of length 0 are not handled correctly */ if (s1_lengths[result_index] == 0) { @@ -151,7 +152,9 @@ void osa_hyrroe2003_simd(Range scores, const detail::BlockPatternMatchV } /* calculate score under consideration of wraparounds in parallel counter */ else { - if constexpr (!std::is_same_v) { + RAPIDFUZZ_IF_CONSTEXPR (std::numeric_limits::max() < + std::numeric_limits::max()) + { size_t min_dist = abs_diff(s1_lengths[result_index], s2.size()); size_t wraparound_score = static_cast(std::numeric_limits::max()) + 1; @@ -270,4 +273,5 @@ class OSA : public DistanceBase::ma } }; -} // namespace rapidfuzz::detail \ No newline at end of file +} // namespace detail +} // namespace rapidfuzz \ No newline at end of file diff --git a/rapidfuzz/distance/Postfix.hpp b/rapidfuzz/distance/Postfix.hpp index 0da830f7..57320a5f 100644 --- a/rapidfuzz/distance/Postfix.hpp +++ b/rapidfuzz/distance/Postfix.hpp @@ -85,8 +85,7 @@ struct CachedPostfix : public detail::CachedSimilarityBase } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t score_hint) const { return detail::Postfix::similarity(s1, s2, score_cutoff, score_hint); } @@ -94,12 +93,13 @@ struct CachedPostfix : public detail::CachedSimilarityBase std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPostfix(const Sentence1& s1_) -> CachedPostfix>; template CachedPostfix(InputIt1 first1, InputIt1 last1) -> CachedPostfix>; - +#endif /**@}*/ } // namespace rapidfuzz diff --git a/rapidfuzz/distance/Postfix_impl.hpp b/rapidfuzz/distance/Postfix_impl.hpp index 0be3abf6..87eb6ad2 100644 --- a/rapidfuzz/distance/Postfix_impl.hpp +++ b/rapidfuzz/distance/Postfix_impl.hpp @@ -6,7 +6,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Postfix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -19,12 +20,12 @@ class Postfix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_suffix(s1, s2); return (dist >= score_cutoff) ? dist : 0; } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/distance/Prefix.hpp b/rapidfuzz/distance/Prefix.hpp index 64173dc7..d3123950 100644 --- a/rapidfuzz/distance/Prefix.hpp +++ b/rapidfuzz/distance/Prefix.hpp @@ -84,8 +84,7 @@ struct CachedPrefix : public detail::CachedSimilarityBase, } template - size_t _similarity(detail::Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) const + size_t _similarity(detail::Range s2, size_t score_cutoff, size_t) const { return detail::Prefix::similarity(s1, s2, score_cutoff, score_cutoff); } @@ -93,11 +92,13 @@ struct CachedPrefix : public detail::CachedSimilarityBase, std::vector s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPrefix(const Sentence1& s1_) -> CachedPrefix>; template CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix>; +#endif /**@}*/ diff --git a/rapidfuzz/distance/Prefix_impl.hpp b/rapidfuzz/distance/Prefix_impl.hpp index 41ab1f69..fac93c49 100644 --- a/rapidfuzz/distance/Prefix_impl.hpp +++ b/rapidfuzz/distance/Prefix_impl.hpp @@ -6,7 +6,8 @@ #include #include -namespace rapidfuzz::detail { +namespace rapidfuzz { +namespace detail { class Prefix : public SimilarityBase::max()> { friend SimilarityBase::max()>; @@ -19,12 +20,12 @@ class Prefix : public SimilarityBase - static size_t _similarity(Range s1, Range s2, size_t score_cutoff, - [[maybe_unused]] size_t score_hint) + static size_t _similarity(Range s1, Range s2, size_t score_cutoff, size_t) { size_t dist = remove_common_prefix(s1, s2); return (dist >= score_cutoff) ? dist : 0; } }; -} // namespace rapidfuzz::detail +} // namespace detail +} // namespace rapidfuzz diff --git a/rapidfuzz/fuzz.hpp b/rapidfuzz/fuzz.hpp index fbf7ae26..de49be9f 100644 --- a/rapidfuzz/fuzz.hpp +++ b/rapidfuzz/fuzz.hpp @@ -8,7 +8,8 @@ #include #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /** * @defgroup Fuzz Fuzz @@ -75,7 +76,7 @@ struct MultiRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template @@ -116,11 +117,13 @@ struct CachedRatio { CachedIndel cached_indel; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template CachedRatio(const Sentence1& s1) -> CachedRatio>; template CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio>; +#endif template ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2, @@ -189,11 +192,13 @@ struct CachedPartialRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialRatio(const Sentence1& s1) -> CachedPartialRatio>; template CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::ratio between @@ -298,11 +303,13 @@ struct CachedTokenSortRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSortRatio(const Sentence1& s1) -> CachedTokenSortRatio>; template CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio>; +#endif /** * @brief Sorts the words in the strings and calculates the fuzz::partial_ratio @@ -356,13 +363,15 @@ struct CachedPartialTokenSortRatio { CachedPartialRatio cached_partial_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSortRatio(const Sentence1& s1) -> CachedPartialTokenSortRatio>; template -CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1) - -> CachedPartialTokenSortRatio>; +CachedPartialTokenSortRatio(InputIt1 first1, + InputIt1 last1) -> CachedPartialTokenSortRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -424,11 +433,13 @@ struct CachedTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenSetRatio(const Sentence1& s1) -> CachedTokenSetRatio>; template CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio>; +#endif /** * @brief Compares the words in the strings based on unique and common words @@ -481,12 +492,14 @@ struct CachedPartialTokenSetRatio { detail::SplittedSentenceView::iterator> tokens_s1; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio>; template -CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1) - -> CachedPartialTokenSetRatio>; +CachedPartialTokenSetRatio(InputIt1 first1, + InputIt1 last1) -> CachedPartialTokenSetRatio>; +#endif /** * @brief Helper method that returns the maximum of fuzz::token_set_ratio and @@ -543,11 +556,13 @@ struct CachedTokenRatio { CachedRatio cached_ratio_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedTokenRatio(const Sentence1& s1) -> CachedTokenRatio>; template CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio>; +#endif /** * @brief Helper method that returns the maximum of @@ -604,11 +619,13 @@ struct CachedPartialTokenRatio { std::vector s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedPartialTokenRatio(const Sentence1& s1) -> CachedPartialTokenRatio>; template CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio>; +#endif /** * @brief Calculates a weighted ratio based on the other ratio algorithms @@ -664,11 +681,13 @@ struct CachedWRatio { rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedWRatio(const Sentence1& s1) -> CachedWRatio>; template CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio>; +#endif /** * @brief Calculates a quick ratio between two strings using fuzz.ratio @@ -727,13 +746,13 @@ struct MultiQRatio { void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) const { - similarity(scores, score_count, detail::Range(first2, last2), score_cutoff); + similarity(scores, score_count, detail::make_range(first2, last2), score_cutoff); } template void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const { - rapidfuzz::detail::Range s2_(s2); + auto s2_ = detail::make_range(s2); if (s2_.empty()) { for (size_t i = 0; i < str_lens.size(); ++i) scores[i] = 0; @@ -776,14 +795,17 @@ struct CachedQRatio { CachedRatio cached_ratio; }; +#ifdef RAPIDFUZZ_DEDUCTION_GUIDES template explicit CachedQRatio(const Sentence1& s1) -> CachedQRatio>; template CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio>; +#endif /**@}*/ -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz #include diff --git a/rapidfuzz/fuzz_impl.hpp b/rapidfuzz/fuzz_impl.hpp index a2e874a7..84be44bf 100644 --- a/rapidfuzz/fuzz_impl.hpp +++ b/rapidfuzz/fuzz_impl.hpp @@ -2,6 +2,7 @@ /* Copyright © 2021-present Max Bachmann */ /* Copyright © 2011 Adam Cohen */ +#include "rapidfuzz/details/Range.hpp" #include #include @@ -11,7 +12,8 @@ #include #include -namespace rapidfuzz::fuzz { +namespace rapidfuzz { +namespace fuzz { /********************************************** * ratio @@ -20,7 +22,7 @@ namespace rapidfuzz::fuzz { template double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff) { - return ratio(detail::Range(first1, last1), detail::Range(first2, last2), score_cutoff); + return ratio(detail::make_range(first1, last1), detail::make_range(first2, last2), score_cutoff); } template @@ -34,7 +36,7 @@ template double CachedRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double score_hint) const { - return similarity(detail::Range(first2, last2), score_cutoff, score_hint); + return similarity(detail::make_range(first2, last2), score_cutoff, score_hint); } template @@ -50,7 +52,7 @@ double CachedRatio::similarity(const Sentence2& s2, double score_cutoff, namespace fuzz_detail { -static constexpr double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) +static RAPIDFUZZ_CONSTEXPR_CXX14 double norm_distance(size_t dist, size_t lensum, double score_cutoff = 0) { double score = (lensum > 0) ? (100.0 - 100.0 * static_cast(dist) / static_cast(lensum)) : 100.0; @@ -90,8 +92,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(window.first); auto subseq2_first = s2.begin() + static_cast(window.second); - detail::Range subseq1(subseq1_first, subseq1_first + static_cast(len1)); - detail::Range subseq2(subseq2_first, subseq2_first + static_cast(len1)); + auto subseq1 = + detail::make_range(subseq1_first, subseq1_first + static_cast(len1)); + auto subseq2 = + detail::make_range(subseq2_first, subseq2_first + static_cast(len1)); if (scores[window.first] == std::numeric_limits::max()) { scores[window.first] = cached_ratio.cached_indel.distance(subseq1); @@ -124,9 +128,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range last */ size_t known_edits = detail::abs_diff(scores[window.first], scores[window.second]); /* half of the cells that are not needed for known_edits can lead to a better score */ + size_t max_score_improvement = (cell_diff - known_edits / 2) / 2 * 2; ptrdiff_t min_score = static_cast(std::min(scores[window.first], scores[window.second])) - - static_cast(cell_diff + known_edits / 2); + static_cast(max_score_improvement); if (min_score < static_cast(cutoff_dist)) { size_t center = cell_diff / 2; new_windows.emplace_back(window.first, window.first + center); @@ -144,7 +149,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i)); + auto subseq = rapidfuzz::detail::make_range(s2.begin(), s2.begin() + static_cast(i)); if (!s1_char_set.find(subseq.back())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -157,7 +162,7 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range(i), s2.end()); + auto subseq = rapidfuzz::detail::make_range(s2.begin() + static_cast(i), s2.end()); if (!s1_char_set.find(subseq.front())) continue; double ls_ratio = cached_ratio.similarity(subseq, score_cutoff); @@ -206,8 +211,8 @@ ScoreAlignment partial_ratio_alignment(InputIt1 first1, InputIt1 last1, if (!len1 || !len2) return ScoreAlignment(static_cast(len1 == len2) * 100.0, 0, len1, 0, len1); - auto s1 = detail::Range(first1, last1); - auto s2 = detail::Range(first2, last2); + auto s1 = detail::make_range(first1, last1); + auto s2 = detail::make_range(first2, last2); auto alignment = fuzz_detail::partial_ratio_impl(s1, s2, score_cutoff); if (alignment.score != 100 && s1.size() == s2.size()) { @@ -254,7 +259,7 @@ CachedPartialRatio::CachedPartialRatio(InputIt1 first1, InputIt1 last1) template template double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { size_t len1 = s1.size(); size_t len2 = static_cast(std::distance(first2, last2)); @@ -266,8 +271,8 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d if (!len1 || !len2) return static_cast(len1 == len2) * 100.0; - auto s1_ = detail::Range(s1); - auto s2 = detail::Range(first2, last2); + auto s1_ = detail::make_range(s1); + auto s2 = detail::make_range(first2, last2); double score = fuzz_detail::partial_ratio_impl(s1_, s2, cached_ratio, s1_char_set, score_cutoff).score; if (score != 100 && s1_.size() == s2.size()) { @@ -281,8 +286,7 @@ double CachedPartialRatio::similarity(InputIt2 first2, InputIt2 last2, d template template -double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -309,7 +313,7 @@ double token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double score_c template template double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -318,8 +322,7 @@ double CachedTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -348,7 +351,7 @@ double partial_token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -357,8 +360,7 @@ double CachedPartialTokenSortRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSortRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -437,7 +439,7 @@ double token_set_ratio(const Sentence1& s1, const Sentence2& s2, double score_cu template template double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -446,8 +448,7 @@ double CachedTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, template template -double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -496,7 +497,7 @@ double partial_token_set_ratio(const Sentence1& s1, const Sentence2& s2, double template template double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { if (score_cutoff > 100) return 0; @@ -505,8 +506,7 @@ double CachedPartialTokenSetRatio::similarity(InputIt2 first2, InputIt2 template template -double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenSetRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -648,8 +648,9 @@ double token_ratio(const std::vector& s1_sorted, double result = 0; auto s2_sorted = tokens_b.join(); if (s1_sorted.size() < 65) { - double norm_sim = detail::indel_normalized_similarity(blockmap_s1_sorted, detail::Range(s1_sorted), - detail::Range(s2_sorted), score_cutoff / 100); + double norm_sim = + detail::indel_normalized_similarity(blockmap_s1_sorted, detail::make_range(s1_sorted), + detail::make_range(s2_sorted), score_cutoff / 100); result = norm_sim * 100; } else { @@ -684,15 +685,14 @@ double token_ratio(const std::vector& s1_sorted, template template double CachedTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::token_ratio(s1_tokens, cached_ratio_s1_sorted, first2, last2, score_cutoff); } template template -double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -770,15 +770,14 @@ double partial_token_ratio(const std::vector& s1_sorted, template template double CachedPartialTokenRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const + double) const { return fuzz_detail::partial_token_ratio(s1_sorted, tokens_s1, first2, last2, score_cutoff); } template template -double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedPartialTokenRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -836,13 +835,12 @@ CachedWRatio::CachedWRatio(InputIt1 first1, InputIt1 last1) cached_partial_ratio(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))), s1_sorted(tokens_s1.join()), - blockmap_s1_sorted(detail::Range(s1_sorted)) + blockmap_s1_sorted(detail::make_range(s1_sorted)) {} template template -double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { if (score_cutoff > 100) return 0; @@ -881,8 +879,7 @@ double CachedWRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedWRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } @@ -913,8 +910,7 @@ double QRatio(const Sentence1& s1, const Sentence2& s2, double score_cutoff) template template -double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double score_cutoff, double) const { auto len2 = std::distance(first2, last2); @@ -927,10 +923,10 @@ double CachedQRatio::similarity(InputIt2 first2, InputIt2 last2, double template template -double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, - [[maybe_unused]] double score_hint) const +double CachedQRatio::similarity(const Sentence2& s2, double score_cutoff, double) const { return similarity(detail::to_begin(s2), detail::to_end(s2), score_cutoff); } -} // namespace rapidfuzz::fuzz +} // namespace fuzz +} // namespace rapidfuzz diff --git a/rapidfuzz_reference/Indel.hpp b/rapidfuzz_reference/Indel.hpp index cf2520ac..ec3bf3f6 100644 --- a/rapidfuzz_reference/Indel.hpp +++ b/rapidfuzz_reference/Indel.hpp @@ -22,4 +22,17 @@ size_t indel_distance(const Sentence1& s1, const Sentence2& s2, return levenshtein_distance(s1, s2, {1, 1, 2}, score_cutoff); } +template +double indel_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, + double score_cutoff = 0.0) +{ + return levenshtein_similarity(first1, last1, first2, last2, {1, 1, 2}, score_cutoff); +} + +template +double indel_similarity(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0.0) +{ + return levenshtein_similarity(s1, s2, {1, 1, 2}, score_cutoff); +} + } // namespace rapidfuzz_reference diff --git a/rapidfuzz_reference/JaroWinkler.hpp b/rapidfuzz_reference/JaroWinkler.hpp index 3b717d8e..b2447ebb 100644 --- a/rapidfuzz_reference/JaroWinkler.hpp +++ b/rapidfuzz_reference/JaroWinkler.hpp @@ -7,7 +7,7 @@ namespace rapidfuzz_reference { template >> + typename = typename std::enable_if::value>::type> double jaro_winkler_similarity(InputIt1 P_first, InputIt1 P_last, InputIt2 T_first, InputIt2 T_last, double prefix_weight = 0.1, double score_cutoff = 0.0) { diff --git a/rapidfuzz_reference/Levenshtein.hpp b/rapidfuzz_reference/Levenshtein.hpp index acfc9f74..90a9c56b 100644 --- a/rapidfuzz_reference/Levenshtein.hpp +++ b/rapidfuzz_reference/Levenshtein.hpp @@ -16,6 +16,18 @@ struct LevenshteinWeightTable { size_t replace_cost; }; +static inline size_t levenshtein_maximum(size_t len1, size_t len2, LevenshteinWeightTable weights) +{ + size_t max_dist = len1 * weights.delete_cost + len2 * weights.insert_cost; + + if (len1 >= len2) + max_dist = std::min(max_dist, len2 * weights.replace_cost + (len1 - len2) * weights.delete_cost); + else + max_dist = std::min(max_dist, len1 * weights.replace_cost + (len2 - len1) * weights.insert_cost); + + return max_dist; +} + template Matrix levenshtein_matrix(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, LevenshteinWeightTable weights = {1, 1, 1}) @@ -69,4 +81,24 @@ size_t levenshtein_distance(const Sentence1& s1, const Sentence2& s2, score_cutoff); } +template +double levenshtein_similarity(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, + LevenshteinWeightTable weights = {1, 1, 1}, double score_cutoff = 0.0) +{ + size_t len1 = static_cast(std::distance(first1, last1)); + size_t len2 = static_cast(std::distance(first2, last2)); + size_t dist = levenshtein_distance(first1, last1, first2, last2, weights); + size_t max = levenshtein_maximum(len1, len2, weights); + double sim = 1.0 - (double)dist / max; + return (sim >= score_cutoff) ? sim : 0.0; +} + +template +double levenshtein_similarity(const Sentence1& s1, const Sentence2& s2, + LevenshteinWeightTable weights = {1, 1, 1}, double score_cutoff = 0.0) +{ + return levenshtein_similarity(std::begin(s1), std::end(s1), std::begin(s2), std::end(s2), weights, + score_cutoff); +} + } // namespace rapidfuzz_reference diff --git a/rapidfuzz_reference/fuzz.hpp b/rapidfuzz_reference/fuzz.hpp new file mode 100644 index 00000000..693a6043 --- /dev/null +++ b/rapidfuzz_reference/fuzz.hpp @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2022-present Max Bachmann */ + +#pragma once + +#include "Indel.hpp" + +namespace rapidfuzz_reference { + +template +double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0) +{ + return indel_similarity(first1, last1, first2, last2, score_cutoff / 100.0) * 100; +} + +template +double ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0.0) +{ + return indel_similarity(s1, s2, score_cutoff / 100.0) * 100; +} + +template +double partial_ratio_impl(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, + double score_cutoff = 0.0) +{ + size_t len1 = static_cast(std::distance(first1, last1)); + size_t len2 = static_cast(std::distance(first2, last2)); + if (len1 == 0 && len2 == 0) return 100.0; + + if (len1 == 0 || len2 == 0) return 0.0; + + if (len1 > len2) return partial_ratio_impl(first2, last2, first1, last1, score_cutoff); + + double res = 0.0; + for (ptrdiff_t i = -1 * (ptrdiff_t)len1; i < (ptrdiff_t)len2; i++) { + ptrdiff_t start = std::max(ptrdiff_t(0), i); + ptrdiff_t end = std::min(ptrdiff_t(len2), i + ptrdiff_t(len1)); + InputIt2 first2_ = first2 + start; + InputIt2 last2_ = first2 + end; + res = std::max(res, ratio(first1, last1, first2_, last2_, score_cutoff)); + } + return res; +} + +template +double partial_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, + double score_cutoff = 0.0) +{ + size_t len1 = static_cast(std::distance(first1, last1)); + size_t len2 = static_cast(std::distance(first2, last2)); + if (len1 != len2) return partial_ratio_impl(first1, last1, first2, last2, score_cutoff); + + return std::max(partial_ratio_impl(first1, last1, first2, last2, score_cutoff), + partial_ratio_impl(first2, last2, first1, last1, score_cutoff)); +} + +template +double partial_ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0.0) +{ + return partial_ratio(std::begin(s1), std::end(s1), std::begin(s2), std::end(s2), score_cutoff); +} + +} // namespace rapidfuzz_reference diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a7717030..b0f7eca1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ -find_package(Catch2 3 QUIET) +find_package(Catch2 QUIET) if (Catch2_FOUND) message("Using system supplied version of Catch2") else() @@ -7,9 +7,10 @@ else() FetchContent_Declare( Catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.4.0 + GIT_TAG v2.13.10 ) FetchContent_MakeAvailable(Catch2) + set(Catch2_VERSION "2.13.10") endif() if (RAPIDFUZZ_ENABLE_LINTERS) @@ -50,11 +51,19 @@ if (RAPIDFUZZ_ENABLE_LINTERS) endif() function(rapidfuzz_add_test test) - add_executable(test_${test} tests-${test}.cpp) - target_link_libraries(test_${test} ${PROJECT_NAME}) - target_link_libraries(test_${test} Catch2::Catch2WithMain) + if(Catch2_VERSION VERSION_LESS "3.0") + add_executable(test_${test} tests-main.cpp tests-${test}.cpp) + target_link_libraries(test_${test} PRIVATE Catch2::Catch2) + target_compile_definitions(test_${test} PRIVATE CATCH2_VERSION=2) + else() + add_executable(test_${test} tests-${test}.cpp) + target_link_libraries(test_${test} PRIVATE Catch2::Catch2WithMain) + target_compile_definitions(test_${test} PRIVATE CATCH2_VERSION=3) + endif() + + target_link_libraries(test_${test} PRIVATE ${PROJECT_NAME}) if (RAPIDFUZZ_ENABLE_LINTERS) - target_link_libraries(test_${test} project_warnings) + target_link_libraries(test_${test} PRIVATE project_warnings) endif() add_test(NAME ${test} COMMAND test_${test}) endfunction() diff --git a/test/common.hpp b/test/common.hpp index 238e2055..3e4d728c 100644 --- a/test/common.hpp +++ b/test/common.hpp @@ -49,7 +49,7 @@ class BidirectionalIterWrapper { return *this; } - const auto& operator*() const + const value_type& operator*() const { return *iter; } @@ -58,7 +58,13 @@ class BidirectionalIterWrapper { T iter; }; -template +template +constexpr auto make_bidir(Iter iter) -> BidirectionalIterWrapper +{ + return BidirectionalIterWrapper(iter); +} + +template ::value>> std::basic_string str_multiply(std::basic_string a, size_t b) { std::basic_string output; diff --git a/test/distance/CMakeLists.txt b/test/distance/CMakeLists.txt index c08c9d65..412d3daa 100644 --- a/test/distance/CMakeLists.txt +++ b/test/distance/CMakeLists.txt @@ -1,7 +1,15 @@ function(rapidfuzz_add_test test) - add_executable(test_${test} tests-${test}.cpp examples/ocr.cpp examples/pythonLevenshteinIssue9.cpp) + if(Catch2_VERSION VERSION_LESS "3.0") + add_executable(test_${test} ../tests-main.cpp tests-${test}.cpp examples/ocr.cpp examples/pythonLevenshteinIssue9.cpp) + target_link_libraries(test_${test} PRIVATE Catch2::Catch2) + target_compile_definitions(test_${test} PRIVATE CATCH2_VERSION=2) + else() + add_executable(test_${test} tests-${test}.cpp examples/ocr.cpp examples/pythonLevenshteinIssue9.cpp) + target_link_libraries(test_${test} PRIVATE Catch2::Catch2WithMain) + target_compile_definitions(test_${test} PRIVATE CATCH2_VERSION=3) + endif() + target_link_libraries(test_${test} PRIVATE ${PROJECT_NAME}) - target_link_libraries(test_${test} PRIVATE Catch2::Catch2WithMain) if (RAPIDFUZZ_ENABLE_LINTERS) target_link_libraries(test_${test} PRIVATE project_warnings) endif() diff --git a/test/distance/examples/ocr.cpp b/test/distance/examples/ocr.cpp index c1a024ad..96b9f5c2 100644 --- a/test/distance/examples/ocr.cpp +++ b/test/distance/examples/ocr.cpp @@ -1,6 +1,6 @@ #include "ocr.hpp" -std::basic_string ocr_example1 = { +std::vector ocr_example1 = { 22, 18, 27, 22, 8, 23, 23, 18, 29, 27, 8, 23, 28, 18, 29, 27, 8, 24, 18, 27, 31, 8, 24, 18, 29, 22, 8, 24, 24, 18, 31, 24, 8, 23, 24, 18, 25, 25, 8, 24, 26, 18, 30, 24, 8, 23, 26, 18, 25, 30, 8, 29, 11, 2, 22, 18, 27, 22, 8, 23, 23, 18, 29, @@ -5075,7 +5075,7 @@ std::basic_string ocr_example1 = { 27, 8, 29, 7, 8, 39, 61, 80, 8, 27, 28, 22, 21, 8, 65, 79, 68, 61, 72, 81, 65, 74, 2}; -std::basic_string ocr_example2 = { +std::vector ocr_example2 = { 22, 18, 27, 22, 8, 23, 23, 18, 29, 27, 8, 23, 28, 18, 29, 27, 8, 24, 18, 27, 31, 8, 24, 18, 29, 22, 8, 24, 24, 18, 31, 24, 8, 23, 24, 18, 25, 25, 8, 24, 26, 18, 30, 24, 8, 23, 26, 18, 25, 30, 11, 2, 22, 18, 27, 22, 8, 23, 23, 18, 29, 27, 8, diff --git a/test/distance/examples/ocr.hpp b/test/distance/examples/ocr.hpp index 77d4cc13..f277d54b 100644 --- a/test/distance/examples/ocr.hpp +++ b/test/distance/examples/ocr.hpp @@ -1,6 +1,6 @@ #pragma once #include -#include +#include -extern std::basic_string ocr_example1; -extern std::basic_string ocr_example2; +extern std::vector ocr_example1; +extern std::vector ocr_example2; diff --git a/test/distance/examples/pythonLevenshteinIssue9.cpp b/test/distance/examples/pythonLevenshteinIssue9.cpp index 171c58fa..fdd636f9 100644 --- a/test/distance/examples/pythonLevenshteinIssue9.cpp +++ b/test/distance/examples/pythonLevenshteinIssue9.cpp @@ -2,7 +2,7 @@ namespace pythonLevenshteinIssue9 { -std::basic_string example1 = { +std::vector example1 = { 8, 14, 4, 2, 3, 7, 15, 6, 4, 5, 8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 10, 11, 12, 13, 8, 2, 8, 14, 4, 2, 3, 7, 15, 6, 4, 5, 8, 6, 7, 16, 7, 13, 17, 2, 4, 16, 14, 7, 14, 18, 19, 8, 20, 14, 4, 21, 13, 20, 22, 8, 2, 3, 4, 5, 6, 20, 8, 9, 10, 2, 10, 11, 12, 13, 8, 18, 14, @@ -206,7 +206,7 @@ std::basic_string example1 = { 9, 8, 6, 7, 3, 7, 23, 4, 41, 7, 51, 8, 48, 69, 43, 6, 4, 9, 11, 20, 2, 13, 32, 5, 8, 18, 16}; -std::basic_string example2 = { +std::vector example2 = { 3, 4, 5, 6, 7, 8, 9, 10, 2, 10, 11, 12, 13, 8, 2, 8, 41, 7, 9, 7, 13, 3, 18, 10, 5, 2, 4, 16, 14, 7, 14, 18, 19, 8, 20, 14, 4, 21, 13, 20, 22, 8, 2, 3, 4, 5, 6, 20, 8, 9, 10, 2, 10, 11, 12, 13, 8, 18, 14, 10, 7, 23, 17, 13, 4, 8, 11, 4, 14, 8, 15, 7, 12, 8, 14, 18, 16, 7, diff --git a/test/distance/examples/pythonLevenshteinIssue9.hpp b/test/distance/examples/pythonLevenshteinIssue9.hpp index b1e78a1e..b6e0cd78 100644 --- a/test/distance/examples/pythonLevenshteinIssue9.hpp +++ b/test/distance/examples/pythonLevenshteinIssue9.hpp @@ -1,8 +1,8 @@ #pragma once #include -#include +#include namespace pythonLevenshteinIssue9 { -extern std::basic_string example1; -extern std::basic_string example2; +extern std::vector example1; +extern std::vector example2; } // namespace pythonLevenshteinIssue9 diff --git a/test/distance/tests-DamerauLevenshtein.cpp b/test/distance/tests-DamerauLevenshtein.cpp index f00547ca..37b78466 100644 --- a/test/distance/tests-DamerauLevenshtein.cpp +++ b/test/distance/tests-DamerauLevenshtein.cpp @@ -1,5 +1,10 @@ -#include -#include +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif + #include #include #include @@ -8,6 +13,8 @@ #include "../common.hpp" +using Catch::Matchers::WithinAbs; + template size_t damerau_levenshtein_distance(const Sentence1& s1, const Sentence2& s2, size_t max = std::numeric_limits::max()) @@ -16,9 +23,8 @@ size_t damerau_levenshtein_distance(const Sentence1& s1, const Sentence2& s2, size_t res2 = rapidfuzz::experimental::damerau_levenshtein_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); size_t res3 = rapidfuzz::experimental::damerau_levenshtein_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -35,9 +41,8 @@ size_t damerau_levenshtein_similarity(const Sentence1& s1, const Sentence2& s2, size_t res2 = rapidfuzz::experimental::damerau_levenshtein_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); size_t res3 = rapidfuzz::experimental::damerau_levenshtein_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -55,15 +60,15 @@ double damerau_levenshtein_normalized_distance(const Sentence1& s1, const Senten double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance( s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), + score_cutoff); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } @@ -75,15 +80,15 @@ double damerau_levenshtein_normalized_similarity(const Sentence1& s1, const Sent double res2 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity( s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); double res3 = rapidfuzz::experimental::damerau_levenshtein_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::experimental::CachedDamerauLevenshtein scorer(s1); + make_bidir(s1.begin()), make_bidir(s1.end()), make_bidir(s2.begin()), make_bidir(s2.end()), + score_cutoff); + rapidfuzz::experimental::CachedDamerauLevenshtein> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } @@ -114,19 +119,15 @@ TEST_CASE("Levenshtein") SECTION("weighted levenshtein calculates correct ratios") { REQUIRE(damerau_levenshtein_normalized_similarity(test, test) == 1.0); - REQUIRE(damerau_levenshtein_normalized_similarity(test, no_suffix) == - Catch::Approx(0.75).epsilon(0.0001)); - REQUIRE(damerau_levenshtein_normalized_similarity(swapped1, swapped2) == - Catch::Approx(0.75).epsilon(0.0001)); - REQUIRE(damerau_levenshtein_normalized_similarity(test, no_suffix2) == - Catch::Approx(0.75).epsilon(0.0001)); + REQUIRE_THAT(damerau_levenshtein_normalized_similarity(test, no_suffix), WithinAbs(0.75, 0.0001)); + REQUIRE_THAT(damerau_levenshtein_normalized_similarity(swapped1, swapped2), WithinAbs(0.75, 0.0001)); + REQUIRE_THAT(damerau_levenshtein_normalized_similarity(test, no_suffix2), WithinAbs(0.75, 0.0001)); REQUIRE(damerau_levenshtein_normalized_similarity(test, replace_all) == 0.0); { std::string s1 = "CA"; std::string s2 = "ABC"; - REQUIRE(damerau_levenshtein_normalized_similarity(s1, s2) == - Catch::Approx(0.33333).epsilon(0.0001)); + REQUIRE_THAT(damerau_levenshtein_normalized_similarity(s1, s2), WithinAbs(0.33333, 0.0001)); } } } diff --git a/test/distance/tests-Hamming.cpp b/test/distance/tests-Hamming.cpp index 7fa7187f..5bf94e0f 100644 --- a/test/distance/tests-Hamming.cpp +++ b/test/distance/tests-Hamming.cpp @@ -1,10 +1,18 @@ -#include -#include +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif + #include #include #include #include "../common.hpp" +#include "rapidfuzz/details/type_traits.hpp" + +using Catch::Matchers::WithinAbs; template size_t hamming_distance(const Sentence1& s1, const Sentence2& s2, @@ -12,10 +20,9 @@ size_t hamming_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::hamming_distance(s1, s2, max); size_t res2 = rapidfuzz::hamming_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::hamming_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedHamming scorer(s1); + size_t res3 = rapidfuzz::hamming_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedHamming> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -30,10 +37,9 @@ size_t hamming_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = { size_t res1 = rapidfuzz::hamming_similarity(s1, s2, max); size_t res2 = rapidfuzz::hamming_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::hamming_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedHamming scorer(s1); + size_t res3 = rapidfuzz::hamming_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedHamming> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); REQUIRE(res1 == res2); @@ -49,16 +55,16 @@ double hamming_normalized_distance(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::hamming_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::hamming_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::hamming_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedHamming scorer(s1); + double res3 = + rapidfuzz::hamming_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedHamming> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } @@ -68,16 +74,16 @@ double hamming_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d double res1 = rapidfuzz::hamming_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::hamming_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::hamming_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedHamming scorer(s1); + double res3 = + rapidfuzz::hamming_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedHamming> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } @@ -110,13 +116,13 @@ TEST_CASE("Hamming_editops") { rapidfuzz::Editops ops = rapidfuzz::hamming_editops(s, d); - REQUIRE(d == rapidfuzz::editops_apply(ops, s, d)); + REQUIRE(d == rapidfuzz::editops_apply_str(ops, s, d)); REQUIRE(ops.get_src_len() == s.size()); REQUIRE(ops.get_dest_len() == d.size()); } { rapidfuzz::Editops ops = rapidfuzz::hamming_editops(d, s); - REQUIRE(s == rapidfuzz::editops_apply(ops, d, s)); + REQUIRE(s == rapidfuzz::editops_apply_str(ops, d, s)); REQUIRE(ops.get_src_len() == d.size()); REQUIRE(ops.get_dest_len() == s.size()); } diff --git a/test/distance/tests-Indel.cpp b/test/distance/tests-Indel.cpp index 8b9aedc9..c8e49d14 100644 --- a/test/distance/tests-Indel.cpp +++ b/test/distance/tests-Indel.cpp @@ -1,5 +1,10 @@ -#include -#include +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif + #include #include @@ -7,7 +12,7 @@ #include "../common.hpp" -using Catch::Approx; +using Catch::Matchers::WithinAbs; template size_t indel_distance(const Sentence1& s1, const Sentence2& s2, @@ -15,10 +20,9 @@ size_t indel_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::indel_distance(s1, s2, max); size_t res2 = rapidfuzz::indel_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::indel_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedIndel scorer(s1); + size_t res3 = rapidfuzz::indel_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedIndel> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -63,10 +67,9 @@ size_t indel_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = 0 { size_t res1 = rapidfuzz::indel_similarity(s1, s2, max); size_t res2 = rapidfuzz::indel_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::indel_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedIndel scorer(s1); + size_t res3 = rapidfuzz::indel_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedIndel> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -110,10 +113,10 @@ double indel_normalized_distance(const Sentence1& s1, const Sentence2& s2, doubl double res1 = rapidfuzz::indel_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::indel_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::indel_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedIndel scorer(s1); + double res3 = + rapidfuzz::indel_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedIndel> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); #ifdef RAPIDFUZZ_SIMD @@ -141,13 +144,13 @@ double indel_normalized_distance(const Sentence1& s1, const Sentence2& s2, doubl simd_scorer.normalized_distance(&results[0], results.size(), s2, score_cutoff); } - REQUIRE(res1 == Catch::Approx(results[0]).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.0001)); } #endif - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } @@ -157,10 +160,10 @@ double indel_normalized_similarity(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::indel_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::indel_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::indel_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedIndel scorer(s1); + double res3 = + rapidfuzz::indel_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedIndel> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); #ifdef RAPIDFUZZ_SIMD @@ -188,13 +191,13 @@ double indel_normalized_similarity(const Sentence1& s1, const Sentence2& s2, dou simd_scorer.normalized_similarity(&results[0], results.size(), s2, score_cutoff); } - REQUIRE(res1 == Catch::Approx(results[0]).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.0001)); } #endif - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } @@ -247,8 +250,9 @@ TEST_CASE("Indel") { std::string a = "001"; std::string b = "220"; - REQUIRE(Approx(0.3333333) == rapidfuzz::indel_normalized_similarity(a, b)); - REQUIRE(Approx(0.3333333) == rapidfuzz::CachedIndel(a).normalized_similarity(b)); + REQUIRE_THAT(rapidfuzz::indel_normalized_similarity(a, b), WithinAbs(0.3333333, 0.000001)); + REQUIRE_THAT(rapidfuzz::CachedIndel(a).normalized_similarity(b), + WithinAbs(0.3333333, 0.000001)); } SECTION("test banded implementation") @@ -277,7 +281,7 @@ TEST_CASE("Indel") REQUIRE(indel_distance(s1, s2) == 231); rapidfuzz::Editops ops = rapidfuzz::indel_editops(s1, s2); - REQUIRE(s2 == rapidfuzz::editops_apply(ops, s1, s2)); + REQUIRE(s2 == rapidfuzz::editops_apply_str(ops, s1, s2)); } } } diff --git a/test/distance/tests-Jaro.cpp b/test/distance/tests-Jaro.cpp index d9a69ddd..cc077ece 100644 --- a/test/distance/tests-Jaro.cpp +++ b/test/distance/tests-Jaro.cpp @@ -1,11 +1,16 @@ +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif + #include "../../rapidfuzz_reference/Jaro.hpp" -#include -#include #include #include "../common.hpp" -using Catch::Approx; +using Catch::Matchers::WithinAbs; template double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0.0) @@ -17,10 +22,10 @@ double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cu rapidfuzz::jaro_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); #if 0 // todo double res5 = rapidfuzz::jaro_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); + make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); #endif - rapidfuzz::CachedJaro scorer(s1); + rapidfuzz::CachedJaro> scorer(s1); double res6 = scorer.similarity(s2, score_cutoff); double res7 = scorer.similarity(s2.begin(), s2.end(), score_cutoff); double res8 = scorer.normalized_similarity(s2, score_cutoff); @@ -35,7 +40,7 @@ double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cu simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); for (size_t i = 0; i < 32; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } if (s1.size() <= 16) { rapidfuzz::experimental::MultiJaro<16> simd_scorer(16); @@ -44,7 +49,7 @@ double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cu simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); for (size_t i = 0; i < 16; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } if (s1.size() <= 32) { rapidfuzz::experimental::MultiJaro<32> simd_scorer(8); @@ -53,7 +58,7 @@ double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cu simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); for (size_t i = 0; i < 8; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } if (s1.size() <= 64) { rapidfuzz::experimental::MultiJaro<64> simd_scorer(4); @@ -62,18 +67,18 @@ double jaro_similarity(const Sentence1& s1, const Sentence2& s2, double score_cu simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); for (size_t i = 0; i < 4; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } #endif - REQUIRE(res1 == Approx(res2)); - REQUIRE(res1 == Approx(res3)); - REQUIRE(res1 == Approx(res4)); - // REQUIRE(res1 == Approx(res5)); - REQUIRE(res1 == Approx(res6)); - REQUIRE(res1 == Approx(res7)); - REQUIRE(res1 == Approx(res8)); - REQUIRE(res1 == Approx(res9)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.000001)); + // REQUIRE_THAT(res1, WithinAbs(res5, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res6, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res7, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res8, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res9, 0.000001)); return res1; } @@ -87,10 +92,10 @@ double jaro_distance(const Sentence1& s1, const Sentence2& s2, double score_cuto rapidfuzz::jaro_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); #if 0 // todo double res5 = rapidfuzz::jaro_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); + make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); #endif - rapidfuzz::CachedJaro scorer(s1); + rapidfuzz::CachedJaro> scorer(s1); double res6 = scorer.distance(s2, score_cutoff); double res7 = scorer.distance(s2.begin(), s2.end(), score_cutoff); double res8 = scorer.normalized_distance(s2, score_cutoff); @@ -105,7 +110,7 @@ double jaro_distance(const Sentence1& s1, const Sentence2& s2, double score_cuto simd_scorer.distance(&results[0], results.size(), s2, score_cutoff); for (size_t i = 0; i < 32; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } if (s1.size() <= 16) { rapidfuzz::experimental::MultiJaro<16> simd_scorer(16); @@ -114,7 +119,7 @@ double jaro_distance(const Sentence1& s1, const Sentence2& s2, double score_cuto simd_scorer.distance(&results[0], results.size(), s2, score_cutoff); for (size_t i = 0; i < 16; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } if (s1.size() <= 32) { rapidfuzz::experimental::MultiJaro<32> simd_scorer(8); @@ -123,7 +128,7 @@ double jaro_distance(const Sentence1& s1, const Sentence2& s2, double score_cuto simd_scorer.distance(&results[0], results.size(), s2, score_cutoff); for (size_t i = 0; i < 8; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } if (s1.size() <= 64) { rapidfuzz::experimental::MultiJaro<64> simd_scorer(4); @@ -132,18 +137,18 @@ double jaro_distance(const Sentence1& s1, const Sentence2& s2, double score_cuto simd_scorer.distance(&results[0], results.size(), s2, score_cutoff); for (size_t i = 0; i < 4; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } #endif - REQUIRE(res1 == Approx(res2)); - REQUIRE(res1 == Approx(res3)); - REQUIRE(res1 == Approx(res4)); - // REQUIRE(res1 == Approx(res5)); - REQUIRE(res1 == Approx(res6)); - REQUIRE(res1 == Approx(res7)); - REQUIRE(res1 == Approx(res8)); - REQUIRE(res1 == Approx(res9)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.000001)); + // REQUIRE_THAT(res1, WithinAbs(res5, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res6, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res7, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res8, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res9, 0.000001)); return res1; } @@ -157,10 +162,10 @@ double jaro_sim_test(const Sentence1& s1, const Sentence2& s2, double score_cuto double Sim_bitparallel2 = jaro_similarity(s2, s1, score_cutoff); double Dist_bitparallel2 = jaro_distance(s2, s1, 1.0 - score_cutoff); - REQUIRE(Sim_original == Approx(Sim_bitparallel)); - REQUIRE((1.0 - Sim_original) == Approx(Dist_bitparallel)); - REQUIRE(Sim_original == Approx(Sim_bitparallel2)); - REQUIRE((1.0 - Sim_original) == Approx(Dist_bitparallel2)); + REQUIRE_THAT(Sim_original, WithinAbs(Sim_bitparallel, 0.000001)); + REQUIRE_THAT((1.0 - Sim_original), WithinAbs(Dist_bitparallel, 0.000001)); + REQUIRE_THAT(Sim_original, WithinAbs(Sim_bitparallel2, 0.000001)); + REQUIRE_THAT((1.0 - Sim_original), WithinAbs(Dist_bitparallel2, 0.000001)); return Sim_original; } @@ -183,43 +188,46 @@ TEST_CASE("JaroTest") SECTION("testEdgeCaseLengths") { - REQUIRE(jaro_sim_test(std::string(""), std::string("")) == Approx(1)); - REQUIRE(jaro_sim_test(std::string("0"), std::string("0")) == Approx(1)); - REQUIRE(jaro_sim_test(std::string("00"), std::string("00")) == Approx(1)); - REQUIRE(jaro_sim_test(std::string("0"), std::string("00")) == Approx(0.833333)); - - REQUIRE(jaro_sim_test(str_multiply(std::string("0"), 65), str_multiply(std::string("0"), 65)) == - Approx(1)); - REQUIRE(jaro_sim_test(str_multiply(std::string("0"), 64), str_multiply(std::string("0"), 65)) == - Approx(0.994872)); - REQUIRE(jaro_sim_test(str_multiply(std::string("0"), 63), str_multiply(std::string("0"), 65)) == - Approx(0.989744)); - - REQUIRE(jaro_sim_test(std::string("000000001"), std::string("0000010")) == Approx(0.8783068783)); + REQUIRE_THAT(jaro_sim_test(std::string(""), std::string("")), WithinAbs(1, 0.000001)); + REQUIRE_THAT(jaro_sim_test(std::string("0"), std::string("0")), WithinAbs(1, 0.000001)); + REQUIRE_THAT(jaro_sim_test(std::string("00"), std::string("00")), WithinAbs(1, 0.000001)); + REQUIRE_THAT(jaro_sim_test(std::string("0"), std::string("00")), WithinAbs(0.833333, 0.000001)); + + REQUIRE_THAT(jaro_sim_test(str_multiply(std::string("0"), 65), str_multiply(std::string("0"), 65)), + WithinAbs(1, 0.000001)); + REQUIRE_THAT(jaro_sim_test(str_multiply(std::string("0"), 64), str_multiply(std::string("0"), 65)), + WithinAbs(0.994872, 0.000001)); + REQUIRE_THAT(jaro_sim_test(str_multiply(std::string("0"), 63), str_multiply(std::string("0"), 65)), + WithinAbs(0.989744, 0.000001)); + + REQUIRE_THAT(jaro_sim_test(std::string("000000001"), std::string("0000010")), + WithinAbs(0.8783068783, 0.000001)); { std::string s1("01234567"); std::string s2 = str_multiply(std::string("0"), 170) + std::string("7654321") + str_multiply(std::string("0"), 200); - REQUIRE(jaro_sim_test(s1, s2) == Approx(0.5487400531)); + REQUIRE_THAT(jaro_sim_test(s1, s2), WithinAbs(0.5487400531, 0.000001)); } - REQUIRE(jaro_sim_test(std::string("01"), std::string("1111100000")) == Approx(0.53333333)); + REQUIRE_THAT(jaro_sim_test(std::string("01"), std::string("1111100000")), + WithinAbs(0.53333333, 0.000001)); - REQUIRE( + REQUIRE_THAT( jaro_sim_test(std::string("10000000000000000000000000000000000000000000000000000000000000020"), - std::string("00000000000000000000000000000000000000000000000000000000000000000")) == - Approx(0.979487)); - REQUIRE(jaro_sim_test( - std::string("00000000000000100000000000000000000000010000000000000000000000000"), - std::string( - "0000000000000000000000000000000000000000000000000000000000000000000000000000001")) == - Approx(0.922233)); - REQUIRE( + std::string("00000000000000000000000000000000000000000000000000000000000000000")), + WithinAbs(0.979487, 0.000001)); + REQUIRE_THAT( + jaro_sim_test( + std::string("00000000000000100000000000000000000000010000000000000000000000000"), + std::string( + "0000000000000000000000000000000000000000000000000000000000000000000000000000001")), + WithinAbs(0.922233, 0.000001)); + REQUIRE_THAT( jaro_sim_test(std::string("00000000000000000000000000000000000000000000000000000000000000000"), std::string("0100000000000000000000000000000000000000000000000000000000000000000000" - "0000000000000000000000000000000000000000000000000000000000")) == - Approx(0.8359375)); + "0000000000000000000000000000000000000000000000000000000000")), + WithinAbs(0.8359375, 0.000001)); } SECTION("testFuzzingRegressions") @@ -240,9 +248,9 @@ TEST_CASE("JaroTest") simd_scorer.similarity(&results[0], results.size(), s2); for (size_t i = 0; i < 32; ++i) - REQUIRE(results[i] == Approx(0.593750)); + REQUIRE_THAT(results[i], WithinAbs(0.593750, 0.000001)); - REQUIRE(results[32] == Approx(0.593750)); + REQUIRE_THAT(results[32], WithinAbs(0.593750, 0.000001)); } #endif } diff --git a/test/distance/tests-JaroWinkler.cpp b/test/distance/tests-JaroWinkler.cpp index 0838da51..277053df 100644 --- a/test/distance/tests-JaroWinkler.cpp +++ b/test/distance/tests-JaroWinkler.cpp @@ -1,11 +1,16 @@ +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif + #include "../../rapidfuzz_reference/JaroWinkler.hpp" -#include -#include #include #include "../common.hpp" -using Catch::Approx; +using Catch::Matchers::WithinAbs; template double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double prefix_weight = 0.1, @@ -17,7 +22,7 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double double res3 = rapidfuzz::jaro_winkler_normalized_similarity(s1, s2, prefix_weight, score_cutoff); double res4 = rapidfuzz::jaro_winkler_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), prefix_weight, score_cutoff); - rapidfuzz::CachedJaroWinkler scorer(s1, prefix_weight); + rapidfuzz::CachedJaroWinkler> scorer(s1, prefix_weight); double res5 = scorer.similarity(s2, score_cutoff); double res6 = scorer.similarity(s2.begin(), s2.end(), score_cutoff); double res7 = scorer.normalized_similarity(s2, score_cutoff); @@ -31,35 +36,35 @@ double jaro_winkler_similarity(const Sentence1& s1, const Sentence2& s2, double simd_scorer.insert(s1); simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); for (unsigned int i = 0; i < 32; ++i) - REQUIRE(res1 == Approx(results[i])); + REQUIRE_THAT(res1, WithinAbs(results[i], 0.000001)); } if (s1.size() <= 16) { rapidfuzz::experimental::MultiJaroWinkler<16> simd_scorer(1, prefix_weight); simd_scorer.insert(s1); simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); - REQUIRE(res1 == Approx(results[0])); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.000001)); } if (s1.size() <= 32) { rapidfuzz::experimental::MultiJaroWinkler<32> simd_scorer(1, prefix_weight); simd_scorer.insert(s1); simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); - REQUIRE(res1 == Approx(results[0])); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.000001)); } if (s1.size() <= 64) { rapidfuzz::experimental::MultiJaroWinkler<64> simd_scorer(1, prefix_weight); simd_scorer.insert(s1); simd_scorer.similarity(&results[0], results.size(), s2, score_cutoff); - REQUIRE(res1 == Approx(results[0])); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.000001)); } #endif - REQUIRE(res1 == Approx(res2)); - REQUIRE(res1 == Approx(res3)); - REQUIRE(res1 == Approx(res4)); - REQUIRE(res1 == Approx(res5)); - REQUIRE(res1 == Approx(res6)); - REQUIRE(res1 == Approx(res7)); - REQUIRE(res1 == Approx(res8)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res6, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res7, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res8, 0.000001)); return res1; } @@ -73,7 +78,7 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr double res3 = rapidfuzz::jaro_winkler_normalized_distance(s1, s2, prefix_weight, score_cutoff); double res4 = rapidfuzz::jaro_winkler_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), prefix_weight, score_cutoff); - rapidfuzz::CachedJaroWinkler scorer(s1, prefix_weight); + rapidfuzz::CachedJaroWinkler> scorer(s1, prefix_weight); double res5 = scorer.distance(s2, score_cutoff); double res6 = scorer.distance(s2.begin(), s2.end(), score_cutoff); double res7 = scorer.normalized_distance(s2, score_cutoff); @@ -85,35 +90,35 @@ double jaro_winkler_distance(const Sentence1& s1, const Sentence2& s2, double pr rapidfuzz::experimental::MultiJaroWinkler<8> simd_scorer(1, prefix_weight); simd_scorer.insert(s1); simd_scorer.distance(&results[0], results.size(), s2, score_cutoff); - REQUIRE(res1 == Approx(results[0])); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.000001)); } if (s1.size() <= 16) { rapidfuzz::experimental::MultiJaroWinkler<16> simd_scorer(1, prefix_weight); simd_scorer.insert(s1); simd_scorer.distance(&results[0], results.size(), s2, score_cutoff); - REQUIRE(res1 == Approx(results[0])); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.000001)); } if (s1.size() <= 32) { rapidfuzz::experimental::MultiJaroWinkler<32> simd_scorer(1, prefix_weight); simd_scorer.insert(s1); simd_scorer.distance(&results[0], results.size(), s2, score_cutoff); - REQUIRE(res1 == Approx(results[0])); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.000001)); } if (s1.size() <= 64) { rapidfuzz::experimental::MultiJaroWinkler<64> simd_scorer(1, prefix_weight); simd_scorer.insert(s1); simd_scorer.distance(&results[0], results.size(), s2, score_cutoff); - REQUIRE(res1 == Approx(results[0])); + REQUIRE_THAT(res1, WithinAbs(results[0], 0.000001)); } #endif - REQUIRE(res1 == Approx(res2)); - REQUIRE(res1 == Approx(res3)); - REQUIRE(res1 == Approx(res4)); - REQUIRE(res1 == Approx(res5)); - REQUIRE(res1 == Approx(res6)); - REQUIRE(res1 == Approx(res7)); - REQUIRE(res1 == Approx(res8)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res6, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res7, 0.000001)); + REQUIRE_THAT(res1, WithinAbs(res8, 0.000001)); return res1; } @@ -127,10 +132,10 @@ double jaro_winkler_sim_test(const Sentence1& s1, const Sentence2& s2, double sc double Sim_bitparallel2 = jaro_winkler_similarity(s2, s1, 0.1, score_cutoff); double Dist_bitparallel2 = jaro_winkler_distance(s2, s1, 0.1, 1.0 - score_cutoff); - REQUIRE(Sim_original == Approx(Sim_bitparallel)); - REQUIRE((1.0 - Sim_original) == Approx(Dist_bitparallel)); - REQUIRE(Sim_original == Approx(Sim_bitparallel2)); - REQUIRE((1.0 - Sim_original) == Approx(Dist_bitparallel2)); + REQUIRE_THAT(Sim_original, WithinAbs(Sim_bitparallel, 0.000001)); + REQUIRE_THAT((1.0 - Sim_original), WithinAbs(Dist_bitparallel, 0.000001)); + REQUIRE_THAT(Sim_original, WithinAbs(Sim_bitparallel2, 0.000001)); + REQUIRE_THAT((1.0 - Sim_original), WithinAbs(Dist_bitparallel2, 0.000001)); return Sim_original; } @@ -171,35 +176,42 @@ TEST_CASE("JaroWinklerTest") SECTION("testEdgeCaseLengths") { - REQUIRE(jaro_winkler_sim_test(std::string(""), std::string("")) == Approx(1)); - REQUIRE(jaro_winkler_sim_test(std::string("0"), std::string("0")) == Approx(1)); - REQUIRE(jaro_winkler_sim_test(std::string("00"), std::string("00")) == Approx(1)); - REQUIRE(jaro_winkler_sim_test(std::string("0"), std::string("00")) == Approx(0.85)); - - REQUIRE(jaro_winkler_sim_test(str_multiply(std::string("0"), 65), - str_multiply(std::string("0"), 65)) == Approx(1)); - REQUIRE(jaro_winkler_sim_test(str_multiply(std::string("0"), 64), - str_multiply(std::string("0"), 65)) == Approx(0.996923)); - REQUIRE(jaro_winkler_sim_test(str_multiply(std::string("0"), 63), - str_multiply(std::string("0"), 65)) == Approx(0.993846)); - - REQUIRE(jaro_winkler_sim_test(std::string("000000001"), std::string("0000010")) == - Approx(0.926984127)); - - REQUIRE(jaro_winkler_sim_test(std::string("01"), std::string("1111100000")) == Approx(0.53333333)); - - REQUIRE(jaro_winkler_sim_test( - std::string("10000000000000000000000000000000000000000000000000000000000000020"), - std::string("00000000000000000000000000000000000000000000000000000000000000000")) == - Approx(0.979487)); - REQUIRE(jaro_winkler_sim_test( - std::string("00000000000000100000000000000000000000010000000000000000000000000"), - std::string( - "0000000000000000000000000000000000000000000000000000000000000000000000000000001")) == - Approx(0.95334)); - REQUIRE(jaro_winkler_sim_test( - std::string("00000000000000000000000000000000000000000000000000000000000000000"), - std::string("0100000000000000000000000000000000000000000000000000000000000000000000000000" - "0000000000000000000000000000000000000000000000000000")) == Approx(0.852344)); + REQUIRE_THAT(jaro_winkler_sim_test(std::string(""), std::string("")), WithinAbs(1, 0.000001)); + REQUIRE_THAT(jaro_winkler_sim_test(std::string("0"), std::string("0")), WithinAbs(1, 0.000001)); + REQUIRE_THAT(jaro_winkler_sim_test(std::string("00"), std::string("00")), WithinAbs(1, 0.000001)); + REQUIRE_THAT(jaro_winkler_sim_test(std::string("0"), std::string("00")), WithinAbs(0.85, 0.000001)); + + REQUIRE_THAT( + jaro_winkler_sim_test(str_multiply(std::string("0"), 65), str_multiply(std::string("0"), 65)), + WithinAbs(1, 0.000001)); + REQUIRE_THAT( + jaro_winkler_sim_test(str_multiply(std::string("0"), 64), str_multiply(std::string("0"), 65)), + WithinAbs(0.996923, 0.000001)); + REQUIRE_THAT( + jaro_winkler_sim_test(str_multiply(std::string("0"), 63), str_multiply(std::string("0"), 65)), + WithinAbs(0.993846, 0.000001)); + + REQUIRE_THAT(jaro_winkler_sim_test(std::string("000000001"), std::string("0000010")), + WithinAbs(0.926984127, 0.000001)); + + REQUIRE_THAT(jaro_winkler_sim_test(std::string("01"), std::string("1111100000")), + WithinAbs(0.53333333, 0.000001)); + + REQUIRE_THAT(jaro_winkler_sim_test( + std::string("10000000000000000000000000000000000000000000000000000000000000020"), + std::string("00000000000000000000000000000000000000000000000000000000000000000")), + WithinAbs(0.979487, 0.000001)); + REQUIRE_THAT( + jaro_winkler_sim_test( + std::string("00000000000000100000000000000000000000010000000000000000000000000"), + std::string( + "0000000000000000000000000000000000000000000000000000000000000000000000000000001")), + WithinAbs(0.95334, 0.000001)); + REQUIRE_THAT( + jaro_winkler_sim_test( + std::string("00000000000000000000000000000000000000000000000000000000000000000"), + std::string("0100000000000000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000")), + WithinAbs(0.852344, 0.000001)); } } \ No newline at end of file diff --git a/test/distance/tests-LCSseq.cpp b/test/distance/tests-LCSseq.cpp index 70740686..369a58f2 100644 --- a/test/distance/tests-LCSseq.cpp +++ b/test/distance/tests-LCSseq.cpp @@ -1,5 +1,10 @@ -#include -#include +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif + #include #include @@ -7,16 +12,17 @@ #include "../common.hpp" +using Catch::Matchers::WithinAbs; + template size_t lcs_seq_distance(const Sentence1& s1, const Sentence2& s2, size_t max = std::numeric_limits::max()) { size_t res1 = rapidfuzz::lcs_seq_distance(s1, s2, max); size_t res2 = rapidfuzz::lcs_seq_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::lcs_seq_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedLCSseq scorer(s1); + size_t res3 = rapidfuzz::lcs_seq_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedLCSseq> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -61,10 +67,9 @@ size_t lcs_seq_similarity(const Sentence1& s1, const Sentence2& s2, size_t max = { size_t res1 = rapidfuzz::lcs_seq_similarity(s1, s2, max); size_t res2 = rapidfuzz::lcs_seq_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::lcs_seq_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedLCSseq scorer(s1); + size_t res3 = rapidfuzz::lcs_seq_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedLCSseq> scorer(s1); size_t res4 = scorer.similarity(s2, max); size_t res5 = scorer.similarity(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -108,16 +113,16 @@ double lcs_seq_normalized_distance(const Sentence1& s1, const Sentence2& s2, dou double res1 = rapidfuzz::lcs_seq_normalized_distance(s1, s2, score_cutoff); double res2 = rapidfuzz::lcs_seq_normalized_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::lcs_seq_normalized_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedLCSseq scorer(s1); + double res3 = + rapidfuzz::lcs_seq_normalized_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedLCSseq> scorer(s1); double res4 = scorer.normalized_distance(s2, score_cutoff); double res5 = scorer.normalized_distance(s2.begin(), s2.end(), score_cutoff); - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } @@ -127,16 +132,16 @@ double lcs_seq_normalized_similarity(const Sentence1& s1, const Sentence2& s2, d double res1 = rapidfuzz::lcs_seq_normalized_similarity(s1, s2, score_cutoff); double res2 = rapidfuzz::lcs_seq_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), score_cutoff); - double res3 = rapidfuzz::lcs_seq_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), score_cutoff); - rapidfuzz::CachedLCSseq scorer(s1); + double res3 = + rapidfuzz::lcs_seq_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), score_cutoff); + rapidfuzz::CachedLCSseq> scorer(s1); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } diff --git a/test/distance/tests-Levenshtein.cpp b/test/distance/tests-Levenshtein.cpp index 38dea437..ae938786 100644 --- a/test/distance/tests-Levenshtein.cpp +++ b/test/distance/tests-Levenshtein.cpp @@ -1,5 +1,10 @@ -#include -#include +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif + #include #include #include @@ -11,6 +16,8 @@ #include "../common.hpp" +using Catch::Matchers::WithinAbs; + template size_t levenshtein_distance(const Sentence1& s1, const Sentence2& s2, rapidfuzz::LevenshteinWeightTable weights = {1, 1, 1}, @@ -18,10 +25,9 @@ size_t levenshtein_distance(const Sentence1& s1, const Sentence2& s2, { size_t res1 = rapidfuzz::levenshtein_distance(s1, s2, weights, max); size_t res2 = rapidfuzz::levenshtein_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), weights, max); - size_t res3 = rapidfuzz::levenshtein_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), weights, max); - rapidfuzz::CachedLevenshtein scorer(s1, weights); + size_t res3 = rapidfuzz::levenshtein_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), weights, max); + rapidfuzz::CachedLevenshtein> scorer(s1, weights); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD @@ -63,9 +69,9 @@ size_t levenshtein_distance(const Sentence1& s1, const Sentence2& s2, } template -std::basic_string get_subsequence(const std::basic_string& s, ptrdiff_t pos, ptrdiff_t len) +std::vector get_subsequence(const std::vector& s, ptrdiff_t pos, ptrdiff_t len) { - return std::basic_string(std::begin(s) + pos, std::begin(s) + pos + len); + return std::vector(std::begin(s) + pos, std::begin(s) + pos + len); } template @@ -76,16 +82,16 @@ double levenshtein_normalized_similarity(const Sentence1& s1, const Sentence2& s double res1 = rapidfuzz::levenshtein_normalized_similarity(s1, s2, weights, score_cutoff); double res2 = rapidfuzz::levenshtein_normalized_similarity(s1.begin(), s1.end(), s2.begin(), s2.end(), weights, score_cutoff); - double res3 = rapidfuzz::levenshtein_normalized_similarity( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), weights, score_cutoff); - rapidfuzz::CachedLevenshtein scorer(s1, weights); + double res3 = rapidfuzz::levenshtein_normalized_similarity(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), + weights, score_cutoff); + rapidfuzz::CachedLevenshtein> scorer(s1, weights); double res4 = scorer.normalized_similarity(s2, score_cutoff); double res5 = scorer.normalized_similarity(s2.begin(), s2.end(), score_cutoff); - REQUIRE(res1 == Catch::Approx(res2).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res3).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res4).epsilon(0.0001)); - REQUIRE(res1 == Catch::Approx(res5).epsilon(0.0001)); + REQUIRE_THAT(res1, WithinAbs(res2, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res3, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res4, 0.0001)); + REQUIRE_THAT(res1, WithinAbs(res5, 0.0001)); return res1; } @@ -127,12 +133,11 @@ TEST_CASE("Levenshtein") SECTION("weighted levenshtein calculates correct ratios") { REQUIRE(levenshtein_normalized_similarity(test, test, {1, 1, 2}) == 1.0); - REQUIRE(levenshtein_normalized_similarity(test, no_suffix, {1, 1, 2}) == - Catch::Approx(0.8571).epsilon(0.0001)); - REQUIRE(levenshtein_normalized_similarity(swapped1, swapped2, {1, 1, 2}) == - Catch::Approx(0.75).epsilon(0.0001)); - REQUIRE(levenshtein_normalized_similarity(test, no_suffix2, {1, 1, 2}) == - Catch::Approx(0.75).epsilon(0.0001)); + REQUIRE_THAT(levenshtein_normalized_similarity(test, no_suffix, {1, 1, 2}), + WithinAbs(0.8571, 0.0001)); + REQUIRE_THAT(levenshtein_normalized_similarity(swapped1, swapped2, {1, 1, 2}), + WithinAbs(0.75, 0.0001)); + REQUIRE_THAT(levenshtein_normalized_similarity(test, no_suffix2, {1, 1, 2}), WithinAbs(0.75, 0.0001)); REQUIRE(levenshtein_normalized_similarity(test, replace_all, {1, 1, 2}) == 0.0); } @@ -233,7 +238,7 @@ TEST_CASE("Levenshtein_editops") std::string d = "XYZLorem ABC iPsum"; rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s, d); - REQUIRE(d == rapidfuzz::editops_apply(ops, s, d)); + REQUIRE(d == rapidfuzz::editops_apply_str(ops, s, d)); REQUIRE(ops.get_src_len() == s.size()); REQUIRE(ops.get_dest_len() == d.size()); } @@ -244,8 +249,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = str_multiply(std::string("abb"), 2); std::string s2 = str_multiply(std::string("ccccca"), 2); - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 5); REQUIRE(hpos.right_score == 6); REQUIRE(hpos.s2_mid == 6); @@ -256,8 +261,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = str_multiply(std::string("abb"), 8 * 64); std::string s2 = str_multiply(std::string("ccccca"), 8 * 64); - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 1280); REQUIRE(hpos.right_score == 1281); REQUIRE(hpos.s2_mid == 1536); @@ -268,8 +273,8 @@ TEST_CASE("Levenshtein_find_hirschberg_pos") std::string s1 = "aaaa"; std::string s2 = "bbbbbbaaaa"; - auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); + auto hpos = rapidfuzz::detail::find_hirschberg_pos(rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); REQUIRE(hpos.left_score == 5); REQUIRE(hpos.right_score == 1); REQUIRE(hpos.s2_mid == 5); @@ -293,21 +298,21 @@ TEST_CASE("Levenshtein_editops[fuzzing_regressions]") std::string s1 = "b"; std::string s2 = "aaaaaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s1, s2); - REQUIRE(s2 == rapidfuzz::editops_apply(ops, s1, s2)); + REQUIRE(s2 == rapidfuzz::editops_apply_str(ops, s1, s2)); } { std::string s1 = "aa"; std::string s2 = "abb"; rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s1, s2); - REQUIRE(s2 == rapidfuzz::editops_apply(ops, s1, s2)); + REQUIRE(s2 == rapidfuzz::editops_apply_str(ops, s1, s2)); } { std::string s1 = str_multiply(std::string("abb"), 8 * 64); std::string s2 = str_multiply(std::string("ccccca"), 8 * 64); rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s1, s2); - REQUIRE(s2 == rapidfuzz::editops_apply(ops, s1, s2)); + REQUIRE(s2 == rapidfuzz::editops_apply_str(ops, s1, s2)); } } @@ -350,12 +355,12 @@ TEST_CASE("Levenshtein small band") "LOTJKTie3OINeOTeJKWeOSeCGOdccNKLYemunmeJKWk"; rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); - REQUIRE(s2 == rapidfuzz::editops_apply(ops1, s1, s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); + REQUIRE(s2 == rapidfuzz::editops_apply_str(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } @@ -398,12 +403,12 @@ TEST_CASE("Levenshtein small band") "HXUJGDGOhccZ"; rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); - REQUIRE(s2 == rapidfuzz::editops_apply(ops1, s1, s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); + REQUIRE(s2 == rapidfuzz::editops_apply_str(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } } @@ -416,21 +421,21 @@ TEST_CASE("Levenshtein large band (python-Levenshtein issue 9)") REQUIRE(example2.size() == 5569); { - std::basic_string s1 = get_subsequence(example1, 3718, 1509); - std::basic_string s2 = get_subsequence(example2, 2784, 2785); + std::vector s1 = get_subsequence(example1, 3718, 1509); + std::vector s2 = get_subsequence(example2, 2784, 2785); REQUIRE(rapidfuzz::levenshtein_distance(s1, s2) == 1587); rapidfuzz::Editops ops1 = rapidfuzz::levenshtein_editops(s1, s2); REQUIRE(ops1.size() == 1587); - REQUIRE(s2 == rapidfuzz::editops_apply(ops1, s1, s2)); + REQUIRE(s2 == rapidfuzz::editops_apply_vec(ops1, s1, s2)); } { REQUIRE(rapidfuzz::levenshtein_distance(example1, example2) == 2590); rapidfuzz::Editops ops1 = rapidfuzz::levenshtein_editops(example1, example2); REQUIRE(ops1.size() == 2590); - REQUIRE(example2 == rapidfuzz::editops_apply(ops1, example1, example2)); + REQUIRE(example2 == rapidfuzz::editops_apply_vec(ops1, example1, example2)); } } @@ -440,16 +445,16 @@ TEST_CASE("Levenshtein large band (ocr example)") REQUIRE(ocr_example2.size() == 107244); { - std::basic_string s1 = get_subsequence(ocr_example1, 51, 6541); - std::basic_string s2 = get_subsequence(ocr_example2, 51, 6516); + std::vector s1 = get_subsequence(ocr_example1, 51, 6541); + std::vector s2 = get_subsequence(ocr_example2, 51, 6516); rapidfuzz::Editops ops1; - rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), - rapidfuzz::detail::Range(s2)); - REQUIRE(s2 == rapidfuzz::editops_apply(ops1, s1, s2)); + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2)); + REQUIRE(s2 == rapidfuzz::editops_apply_vec(ops1, s1, s2)); rapidfuzz::Editops ops2; - rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), - ops1.size()); + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::make_range(s1), + rapidfuzz::detail::make_range(s2), ops1.size()); REQUIRE(ops1 == ops2); } @@ -464,17 +469,17 @@ TEST_CASE("Levenshtein large band (ocr example)") { rapidfuzz::Editops ops1 = rapidfuzz::levenshtein_editops(ocr_example1, ocr_example2); REQUIRE(ops1.size() == 5278); - REQUIRE(ocr_example2 == rapidfuzz::editops_apply(ops1, ocr_example1, ocr_example2)); + REQUIRE(ocr_example2 == rapidfuzz::editops_apply_vec(ops1, ocr_example1, ocr_example2)); } { rapidfuzz::Editops ops1 = rapidfuzz::levenshtein_editops(ocr_example1, ocr_example2, 5278); REQUIRE(ops1.size() == 5278); - REQUIRE(ocr_example2 == rapidfuzz::editops_apply(ops1, ocr_example1, ocr_example2)); + REQUIRE(ocr_example2 == rapidfuzz::editops_apply_vec(ops1, ocr_example1, ocr_example2)); } { rapidfuzz::Editops ops1 = rapidfuzz::levenshtein_editops(ocr_example1, ocr_example2, 2000); REQUIRE(ops1.size() == 5278); - REQUIRE(ocr_example2 == rapidfuzz::editops_apply(ops1, ocr_example1, ocr_example2)); + REQUIRE(ocr_example2 == rapidfuzz::editops_apply_vec(ops1, ocr_example1, ocr_example2)); } } diff --git a/test/distance/tests-OSA.cpp b/test/distance/tests-OSA.cpp index 33b948f7..e9328cde 100644 --- a/test/distance/tests-OSA.cpp +++ b/test/distance/tests-OSA.cpp @@ -1,5 +1,10 @@ -#include -#include +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif + #include #include #include @@ -11,10 +16,9 @@ size_t osa_distance(const Sentence1& s1, const Sentence2& s2, size_t max = std:: { size_t res1 = rapidfuzz::osa_distance(s1, s2, max); size_t res2 = rapidfuzz::osa_distance(s1.begin(), s1.end(), s2.begin(), s2.end(), max); - size_t res3 = rapidfuzz::osa_distance( - BidirectionalIterWrapper(s1.begin()), BidirectionalIterWrapper(s1.end()), - BidirectionalIterWrapper(s2.begin()), BidirectionalIterWrapper(s2.end()), max); - rapidfuzz::CachedOSA scorer(s1); + size_t res3 = rapidfuzz::osa_distance(make_bidir(s1.begin()), make_bidir(s1.end()), + make_bidir(s2.begin()), make_bidir(s2.end()), max); + rapidfuzz::CachedOSA> scorer(s1); size_t res4 = scorer.distance(s2, max); size_t res5 = scorer.distance(s2.begin(), s2.end(), max); #ifdef RAPIDFUZZ_SIMD diff --git a/test/tests-common.cpp b/test/tests-common.cpp index bf561233..ab7dc762 100644 --- a/test/tests-common.cpp +++ b/test/tests-common.cpp @@ -1,4 +1,11 @@ -#include +#include "rapidfuzz/details/Range.hpp" + +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif #include @@ -8,28 +15,28 @@ TEST_CASE("remove affix") std::string s2 = "aaabbbbaaaaa"; { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); REQUIRE(rapidfuzz::detail::remove_common_prefix(s1_, s2_) == 2); - REQUIRE(s1_ == rapidfuzz::detail::Range("bbbbaaaa")); - REQUIRE(s2_ == rapidfuzz::detail::Range("abbbbaaaaa")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("bbbbaaaa")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("abbbbaaaaa")); } { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); REQUIRE(rapidfuzz::detail::remove_common_suffix(s1_, s2_) == 4); - REQUIRE(s1_ == rapidfuzz::detail::Range("aabbbb")); - REQUIRE(s2_ == rapidfuzz::detail::Range("aaabbbba")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("aabbbb")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("aaabbbba")); } { - rapidfuzz::detail::Range s1_(s1); - rapidfuzz::detail::Range s2_(s2); + auto s1_ = rapidfuzz::detail::make_range(s1); + auto s2_ = rapidfuzz::detail::make_range(s2); auto affix = rapidfuzz::detail::remove_common_affix(s1_, s2_); REQUIRE(affix.prefix_len == 2); REQUIRE(affix.suffix_len == 4); - REQUIRE(s1_ == rapidfuzz::detail::Range("bbbb")); - REQUIRE(s2_ == rapidfuzz::detail::Range("abbbba")); + REQUIRE(s1_ == rapidfuzz::detail::make_range("bbbb")); + REQUIRE(s2_ == rapidfuzz::detail::make_range("abbbba")); } } diff --git a/test/tests-fuzz.cpp b/test/tests-fuzz.cpp index 47f739a7..797ba039 100644 --- a/test/tests-fuzz.cpp +++ b/test/tests-fuzz.cpp @@ -1,5 +1,9 @@ -#include -#include +#if CATCH2_VERSION == 2 +# include +#else +# include +# include +#endif #include diff --git a/test/tests-main.cpp b/test/tests-main.cpp new file mode 100644 index 00000000..e06520b3 --- /dev/null +++ b/test/tests-main.cpp @@ -0,0 +1,3 @@ +// test main file so catch2 does not has to be recompiled +#define CATCH_CONFIG_MAIN +#include \ No newline at end of file