Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5b6a846

Browse filesBrowse files
committed
Optimize std::for_each_n for segmented iterators
1 parent e9280a1 commit 5b6a846
Copy full SHA for 5b6a846

File tree

8 files changed

+318
-48
lines changed
Filter options

8 files changed

+318
-48
lines changed

‎libcxx/docs/ReleaseNotes/21.rst

Copy file name to clipboardExpand all lines: libcxx/docs/ReleaseNotes/21.rst
+3
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ Improvements and New Features
7070
- The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available
7171
in C++23 and later.
7272

73+
- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of
74+
up to 17.7x for ``std::deque<short>`` iterators, and up to 13.9x for ``std::join_view<vector<vector<short>>>`` iterators.
75+
7376
Deprecations and Removals
7477
-------------------------
7578

‎libcxx/include/CMakeLists.txt

Copy file name to clipboardExpand all lines: libcxx/include/CMakeLists.txt
+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ set(files
2525
__algorithm/find_segment_if.h
2626
__algorithm/for_each.h
2727
__algorithm/for_each_n.h
28+
__algorithm/for_each_n_segment.h
2829
__algorithm/for_each_segment.h
2930
__algorithm/generate.h
3031
__algorithm/generate_n.h

‎libcxx/include/__algorithm/for_each.h

Copy file name to clipboardExpand all lines: libcxx/include/__algorithm/for_each.h
-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_H
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_H
12-
1312
#include <__algorithm/for_each_segment.h>
1413
#include <__config>
1514
#include <__iterator/segmented_iterator.h>

‎libcxx/include/__algorithm/for_each_n.h

Copy file name to clipboardExpand all lines: libcxx/include/__algorithm/for_each_n.h
+63-7
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,88 @@
1010
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_H
1111
#define _LIBCPP___ALGORITHM_FOR_EACH_N_H
1212

13+
#include <__algorithm/for_each.h>
14+
#include <__algorithm/for_each_n_segment.h>
1315
#include <__config>
16+
#include <__iterator/iterator_traits.h>
17+
#include <__iterator/segmented_iterator.h>
18+
#include <__type_traits/enable_if.h>
1419
#include <__utility/convert_to_integral.h>
20+
#include <__utility/move.h>
1521

1622
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
1723
# pragma GCC system_header
1824
#endif
1925

20-
_LIBCPP_BEGIN_NAMESPACE_STD
26+
_LIBCPP_PUSH_MACROS
27+
#include <__undef_macros>
2128

22-
#if _LIBCPP_STD_VER >= 17
29+
_LIBCPP_BEGIN_NAMESPACE_STD
2330

24-
template <class _InputIterator, class _Size, class _Function>
25-
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
26-
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
31+
template <class _InputIterator,
32+
class _Size,
33+
class _Func,
34+
__enable_if_t<!__has_random_access_iterator_category<_InputIterator>::value &&
35+
(!__is_segmented_iterator<_InputIterator>::value
36+
// || !__has_random_access_iterator_category<
37+
// typename __segmented_iterator_traits<_InputIterator>::__local_iterator>::value
38+
), // TODO: __segmented_iterator_traits<_InputIterator> results in template instantiation
39+
// during SFINAE, which is a hard error to be fixed. Once fixed, we should uncomment.
40+
int> = 0>
41+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
42+
__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) {
2743
typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize;
2844
_IntegralSize __n = __orig_n;
2945
while (__n > 0) {
3046
__f(*__first);
3147
++__first;
3248
--__n;
3349
}
34-
return __first;
50+
return std::move(__first);
3551
}
3652

37-
#endif
53+
template <class _RandIter,
54+
class _Size,
55+
class _Func,
56+
__enable_if_t<__has_random_access_iterator_category<_RandIter>::value, int> = 0>
57+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter
58+
__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) {
59+
typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n;
60+
auto __last = __first + __n;
61+
std::__for_each(__first, __last, __f);
62+
return std::move(__last);
63+
}
64+
65+
#ifndef _LIBCPP_CXX03_LANG
66+
template <class _SegmentedIterator,
67+
class _Size,
68+
class _Func,
69+
__enable_if_t<!__has_random_access_iterator_category<_SegmentedIterator>::value &&
70+
__is_segmented_iterator<_SegmentedIterator>::value &&
71+
__has_random_access_iterator_category<
72+
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
73+
int> = 0>
74+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator
75+
__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) {
76+
using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator;
77+
return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) {
78+
std::__for_each(__lfirst, __llast, __f);
79+
});
80+
}
81+
#endif // !_LIBCPP_CXX03_LANG
82+
83+
#if _LIBCPP_STD_VER >= 17
84+
85+
template <class _InputIterator, class _Size, class _Function>
86+
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
87+
for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) {
88+
return std::__for_each_n(__first, __orig_n, __f);
89+
}
90+
91+
#endif // _LIBCPP_STD_VER >= 17
3892

3993
_LIBCPP_END_NAMESPACE_STD
4094

95+
_LIBCPP_POP_MACROS
96+
4197
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_H
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
10+
#define _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H
11+
12+
#include <__config>
13+
#include <__iterator/iterator_traits.h>
14+
#include <__iterator/segmented_iterator.h>
15+
16+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
17+
# pragma GCC system_header
18+
#endif
19+
20+
_LIBCPP_BEGIN_NAMESPACE_STD
21+
22+
// __for_each_n_segment optimizes linear iteration over segmented iterators. It processes a segmented
23+
// input range [__first, __first + __n) by applying the functor __func to each element within the segment.
24+
// The return value of __func is ignored, and the function returns an iterator pointing to one past the
25+
// last processed element in the input range.
26+
27+
template <class _SegmentedIterator, class _Size, class _Functor>
28+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
29+
__for_each_n_segment(_SegmentedIterator __first, _Size __orig_n, _Functor __func) {
30+
static_assert(__is_segmented_iterator<_SegmentedIterator>::value &&
31+
__has_random_access_iterator_category<
32+
typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value,
33+
"__for_each_n_segment only works with segmented iterators with random-access local iterators");
34+
if (__orig_n <= 0)
35+
return __first;
36+
37+
using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
38+
using __local_iter_t = typename _Traits::__local_iterator;
39+
using __difference_t = typename std::iterator_traits<__local_iter_t>::difference_type;
40+
__difference_t __n = __orig_n;
41+
auto __seg = _Traits::__segment(__first);
42+
auto __local_first = _Traits::__local(__first);
43+
__local_iter_t __local_last;
44+
45+
while (__n > 0) {
46+
__local_last = _Traits::__end(__seg);
47+
auto __seg_size = __local_last - __local_first;
48+
if (__n <= __seg_size) {
49+
__local_last = __local_first + __n;
50+
__func(__local_first, __local_last);
51+
break;
52+
}
53+
__func(__local_first, __local_last);
54+
__n -= __seg_size;
55+
__local_first = _Traits::__begin(++__seg);
56+
}
57+
58+
return _Traits::__compose(__seg, __local_last);
59+
}
60+
61+
_LIBCPP_END_NAMESPACE_STD
62+
63+
#endif // _LIBCPP___ALGORITHM_FOR_EACH_N_SEGMENT_H

‎libcxx/include/module.modulemap.in

Copy file name to clipboardExpand all lines: libcxx/include/module.modulemap.in
+1
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ module std [system] {
437437
module find_segment_if { header "__algorithm/find_segment_if.h" }
438438
module find { header "__algorithm/find.h" }
439439
module for_each_n { header "__algorithm/for_each_n.h" }
440+
module for_each_n_segment { header "__algorithm/for_each_n_segment.h" }
440441
module for_each_segment { header "__algorithm/for_each_segment.h" }
441442
module for_each { header "__algorithm/for_each.h" }
442443
module generate_n { header "__algorithm/generate_n.h" }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17
10+
11+
#include <algorithm>
12+
#include <cstddef>
13+
#include <deque>
14+
#include <list>
15+
#include <ranges>
16+
#include <string>
17+
#include <vector>
18+
19+
#include <benchmark/benchmark.h>
20+
21+
int main(int argc, char** argv) {
22+
auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); };
23+
24+
// std::for_each_n
25+
{
26+
auto bm = []<class Container>(std::string name, auto for_each_n) {
27+
using ElemType = typename Container::value_type;
28+
benchmark::RegisterBenchmark(
29+
name,
30+
[for_each_n](auto& st) {
31+
std::size_t const n = st.range(0);
32+
Container c(n, 1);
33+
auto first = c.begin();
34+
35+
for ([[maybe_unused]] auto _ : st) {
36+
benchmark::DoNotOptimize(c);
37+
auto result = for_each_n(first, n, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
38+
benchmark::DoNotOptimize(result);
39+
}
40+
})
41+
->Arg(8)
42+
->Arg(32)
43+
->Arg(50) // non power-of-two
44+
->Arg(1024)
45+
->Arg(4096)
46+
->Arg(8192)
47+
->Arg(1 << 14)
48+
->Arg(1 << 16)
49+
->Arg(1 << 18);
50+
};
51+
bm.operator()<std::vector<int>>("std::for_each_n(vector<int>)", std_for_each_n);
52+
bm.operator()<std::deque<int>>("std::for_each_n(deque<int>)", std_for_each_n);
53+
bm.operator()<std::list<int>>("std::for_each_n(list<int>)", std_for_each_n);
54+
}
55+
56+
// std::for_each_n for join_view
57+
{
58+
auto bm = []<class Container>(std::string name, auto for_each_n) {
59+
using C1 = typename Container::value_type;
60+
using ElemType = typename C1::value_type;
61+
benchmark::RegisterBenchmark(
62+
name,
63+
[for_each_n](auto& st) {
64+
std::size_t const size = st.range(0);
65+
std::size_t const seg_size = 256;
66+
std::size_t const segments = (size + seg_size - 1) / seg_size;
67+
Container c(segments);
68+
for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) {
69+
c[i].resize(std::min(seg_size, n), ElemType(1));
70+
}
71+
72+
auto view = c | std::views::join;
73+
auto first = view.begin();
74+
75+
for ([[maybe_unused]] auto _ : st) {
76+
benchmark::DoNotOptimize(c);
77+
auto result = for_each_n(first, size, [](ElemType& x) { x = std::clamp<ElemType>(x, 10, 100); });
78+
benchmark::DoNotOptimize(result);
79+
}
80+
})
81+
->Arg(8)
82+
->Arg(32)
83+
->Arg(50) // non power-of-two
84+
->Arg(1024)
85+
->Arg(4096)
86+
->Arg(8192)
87+
->Arg(1 << 14)
88+
->Arg(1 << 16)
89+
->Arg(1 << 18);
90+
};
91+
bm.operator()<std::vector<std::vector<int>>>("std::for_each_n(join_view(vector<vector<int>>))", std_for_each_n);
92+
}
93+
94+
benchmark::Initialize(&argc, argv);
95+
benchmark::RunSpecifiedBenchmarks();
96+
benchmark::Shutdown();
97+
return 0;
98+
}

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.