Skip to content

Commit 36459f4

Browse files
Rewrite relocation to be compliant with P1144 (cmuparlay#67)
[Relocation](https://quuxplusone.github.io/blog/2018/07/18/announcing-trivially-relocatable/) now mostly follows the API proposed in [P1144](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2023/p1144r9.html). Parlay will also defer the implementation of relocation operations to the compiler/library if they are present, which currently works on Arthur O'Dwyer's LLVM fork [here](https://github.com/Quuxplusone/llvm-project).
1 parent c655b8f commit 36459f4

25 files changed

Lines changed: 922 additions & 659 deletions

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
# -------------------------------------------------------------------
77

88
cmake_minimum_required(VERSION 3.14)
9-
project(PARLAY VERSION 2.2.4
9+
10+
project(PARLAY VERSION 2.3.1
1011
DESCRIPTION "A collection of parallel algorithms and other support for parallelism in C++"
1112
LANGUAGES CXX)
1213

benchmark/bench_sequence.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,61 @@ static void bench_short_subscript(benchmark::State& state) {
2727
}
2828
}
2929

30+
static void bench_grow_int64(benchmark::State& state) {
31+
parlay::sequence<int64_t> s;
32+
for (auto _ : state) {
33+
state.PauseTiming();
34+
s = parlay::sequence<int64_t>(10000000);
35+
state.ResumeTiming();
36+
s.reserve(s.capacity() + 1); // Trigger grow
37+
}
38+
}
39+
40+
// No annotation needed since this one should be detectable
41+
struct Relocatable {
42+
std::unique_ptr<int> x;
43+
Relocatable() = default;
44+
Relocatable(int x_) : x(std::make_unique<int>(x_)) { }
45+
};
46+
47+
#if defined(PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE)
48+
namespace parlay {
49+
template<>
50+
PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(Relocatable);
51+
}
52+
#endif
53+
54+
static_assert(parlay::is_trivially_relocatable_v<Relocatable>);
55+
56+
struct NotRelocatable {
57+
std::unique_ptr<int> x;
58+
NotRelocatable() = default;
59+
NotRelocatable(int x_) : x(std::make_unique<int>(x_)) { }
60+
NotRelocatable(NotRelocatable&& other) noexcept : x(std::move(other.x)) { }
61+
~NotRelocatable() { }
62+
};
63+
static_assert(!parlay::is_trivially_relocatable_v<NotRelocatable>);
64+
65+
static void bench_grow_relocatable(benchmark::State& state) {
66+
parlay::sequence<Relocatable> s;
67+
for (auto _ : state) {
68+
state.PauseTiming();
69+
s = parlay::sequence<Relocatable>(10000000);
70+
state.ResumeTiming();
71+
s.reserve(s.capacity() + 1); // Trigger grow
72+
}
73+
}
74+
75+
static void bench_grow_nonrelocatable(benchmark::State& state) {
76+
parlay::sequence<NotRelocatable> s;
77+
for (auto _ : state) {
78+
state.PauseTiming();
79+
s = parlay::sequence<NotRelocatable>(10000000);
80+
state.ResumeTiming();
81+
s.reserve(s.capacity() + 1); // Trigger grow
82+
}
83+
}
84+
3085
// ------------------------- Registration -------------------------------
3186

3287
#define BENCH(NAME) BENCHMARK(bench_ ## NAME) \
@@ -35,3 +90,6 @@ static void bench_short_subscript(benchmark::State& state) {
3590

3691
BENCH(subscript);
3792
BENCH(short_subscript);
93+
BENCH(grow_int64);
94+
BENCH(grow_relocatable);
95+
BENCH(grow_nonrelocatable);

include/parlay/alloc.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,11 @@ struct allocator {
183183
template <class U> /* implicit */ constexpr allocator(const allocator<U>&) noexcept { }
184184
};
185185

186-
template<typename T>
187-
struct is_trivially_relocatable<allocator<T>> : std::true_type {};
186+
// Allocator should be trivially copyable since it is stateless and has no user-provided copy
187+
// constructor. This should guarantee that it is also trivially relocatable.
188+
static_assert(std::is_trivially_copyable_v<allocator<int>>);
189+
static_assert(is_trivially_relocatable_v<allocator<int>>);
190+
188191

189192
template <class T, class U>
190193
bool operator==(const allocator<T>&, const allocator<U>&) { return true; }

include/parlay/internal/bucket_sort.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void radix_step_(slice<InIterator, InIterator> A,
4242

4343
for (size_t j = n; j > 0; j--) {
4444
auto x = --counts[keys[j-1]];
45-
uninitialized_relocate(&B[x], &A[j-1]);
45+
relocate_at(&A[j - 1], &B[x]);
4646
}
4747
}
4848

@@ -128,7 +128,7 @@ void base_sort(slice<InIterator, InIterator> in,
128128
else {
129129
quicksort(in.begin(), in.size(), f);
130130
if (!inplace) {
131-
uninitialized_relocate_n(out.begin(), in.begin(), in.size());
131+
parlay::uninitialized_relocate(in.begin(), in.end(), out.begin());
132132
}
133133
}
134134
}

include/parlay/internal/collect_reduce.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ auto seq_collect_reduce_sparse(Slice A, Helper const &helper) {
271271
auto r = r_s.begin();
272272
size_t j = 0;
273273
for (size_t i = 0; i < table_size; i++)
274-
if (flags[i]) uninitialized_relocate(&r[j++], &table[i]);
274+
if (flags[i]) relocate_at(&table[i], &r[j++]);
275275
assert(j == count);
276276
return r_s;
277277
}

include/parlay/internal/counting_sort.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ auto count_sort_inplace(slice<InIterator, InIterator> In, KeyS const& Keys, size
320320
using value_type = typename slice<InIterator, InIterator>::value_type;
321321
auto Tmp = uninitialized_sequence<value_type>(In.size());
322322
auto a = count_sort<uninitialized_relocate_tag>(In, make_slice(Tmp), make_slice(Keys), num_buckets);
323-
uninitialized_relocate_n(In.begin(), Tmp.begin(), In.size());
323+
parlay::uninitialized_relocate(Tmp.begin(), Tmp.end(), In.begin());
324324
return a.first;
325325
}
326326

include/parlay/internal/delayed/filter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ struct block_delayed_filter_t :
9696
}
9797
}
9898
auto res = sequence<It>::uninitialized(n);
99-
uninitialized_relocate_n(res.begin(), temp.begin(), n);
99+
parlay::uninitialized_relocate_n(temp.begin(), n, res.begin());
100100
return res;
101101
}
102102

include/parlay/internal/delayed/filter_op.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ struct block_delayed_filter_op_t :
8585
}
8686
}
8787
auto res = sequence<result_type>::uninitialized(n);
88-
uninitialized_relocate_n(res.begin(), temp.begin(), n);
88+
parlay::uninitialized_relocate_n(temp.begin(), n, res.begin());
8989
return res;
9090
}
9191

include/parlay/internal/integer_sort.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ void seq_radix_sort_(slice<InIterator, InIterator> In,
8080
}
8181

8282
if (swapped && inplace) {
83-
uninitialized_relocate_n(In.begin(), Out.begin(), In.size());
83+
parlay::uninitialized_relocate(Out.begin(), Out.end(), In.begin());
8484
}
8585
else if (!swapped && !inplace) {
86-
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
86+
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
8787
}
8888
}
8989

@@ -105,10 +105,10 @@ void seq_radix_sort(slice<InIterator, InIterator> In,
105105
size_t n = In.size();
106106
if (odd) {
107107
// We could just use assign_dispatch(Tmp[i], In[i]) for each i, but we
108-
// can optimize better by calling destructive_move_slice, since this
108+
// can optimize better by calling uninitialized_relocate, since this
109109
// has the ability to memcpy multiple elements at once
110110
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
111-
uninitialized_relocate_n(Tmp.begin(), In.begin(), Tmp.size());
111+
parlay::uninitialized_relocate(In.begin(), In.end(), Tmp.begin());
112112
}
113113
else {
114114
for (size_t i = 0; i < n; i++)
@@ -117,7 +117,7 @@ void seq_radix_sort(slice<InIterator, InIterator> In,
117117
seq_radix_sort_(Tmp, Out, g, key_bits, false);
118118
} else {
119119
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
120-
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
120+
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
121121
}
122122
else {
123123
for (size_t i = 0; i < n; i++)
@@ -219,7 +219,7 @@ sequence<size_t> integer_sort_r(slice<InIterator, InIterator> In,
219219
// uninitialized_relocate_n, which can memcpy multiple elements at a time
220220
// to save on performing every copy individually.
221221
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
222-
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
222+
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
223223
}
224224
else {
225225
parallel_for(0, In.size(), [&](size_t i) {
@@ -248,7 +248,7 @@ sequence<size_t> integer_sort_r(slice<InIterator, InIterator> In,
248248

249249
if constexpr (inplace_tag::value == true) {
250250
if (!one_bucket) {
251-
uninitialized_relocate_n(In.begin(), Out.begin(), In.size());
251+
parlay::uninitialized_relocate(Out.begin(), Out.end(), In.begin());
252252
}
253253
}
254254

include/parlay/internal/merge_sort.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ void merge_sort_(slice<InIterator, InIterator> In,
2828
insertion_sort(In.begin(), In.size(), f);
2929
if (!inplace) {
3030
for (size_t i = 0; i < In.size(); i++) {
31-
uninitialized_relocate(&Out[i], &In[i]);
31+
relocate_at(&In[i], &Out[i]);
3232
}
3333
}
3434
}

0 commit comments

Comments
 (0)