Skip to content

Commit 9d4611c

Browse files
Update step.h
1 parent 7649eaf commit 9d4611c

1 file changed

Lines changed: 20 additions & 43 deletions

File tree

src/gravity/step.h

Lines changed: 20 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,18 @@
2020
#include <mpi.h>
2121
#endif
2222

23-
/**
24-
* @brief Performs a complete Leapfrog Step (Kick-Drift-Kick) using SoA data.
25-
*/
2623
inline void Step(ParticleSystem &ps, real dt) {
2724
if (ps.size() == 0) return;
2825

2926
const real theta = real(0.5);
3027
const real half = dt * real(0.5);
3128
const int N = static_cast<int>(ps.size());
3229

33-
// ---------------------------------------------------------
34-
// MPI SETUP
35-
// ---------------------------------------------------------
3630
#ifdef NEXT_MPI
3731
int rank, size;
3832
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
3933
MPI_Comm_size(MPI_COMM_WORLD, &size);
4034

41-
// Select MPI datatype matching
4235
MPI_Datatype MPI_REAL_T;
4336
# ifdef NEXT_FP64
4437
MPI_REAL_T = MPI_DOUBLE;
@@ -52,14 +45,10 @@ inline void Step(ParticleSystem &ps, real dt) {
5245
int size = 1;
5346
#endif
5447

55-
// ---------------------------------------------------------
56-
// DOMAIN DECOMPOSITION
57-
// ---------------------------------------------------------
5848
const int start = (rank * N) / size;
5949
const int end = ((rank + 1) * N) / size;
6050

6151
#ifdef NEXT_MPI
62-
// Precompute counts and displacements for Allgatherv
6352
std::vector<int> counts(size), displs(size);
6453
for (int r = 0; r < size; ++r) {
6554
const int s = (r * N) / size;
@@ -69,9 +58,6 @@ inline void Step(ParticleSystem &ps, real dt) {
6958
}
7059
#endif
7160

72-
// ---------------------------------------------------------
73-
// TREE BUILDER
74-
// ---------------------------------------------------------
7561
auto buildTree = [&]() -> std::unique_ptr<Octree> {
7662
struct BBox { real minx, miny, minz, maxx, maxy, maxz; };
7763
BBox local{ real(1e30), real(1e30), real(1e30),
@@ -87,16 +73,13 @@ inline void Step(ParticleSystem &ps, real dt) {
8773
}
8874

8975
#ifdef NEXT_MPI
90-
BBox global;
9176
real mins[3] = {local.minx, local.miny, local.minz};
9277
real maxs[3] = {local.maxx, local.maxy, local.maxz};
9378

9479
MPI_Allreduce(MPI_IN_PLACE, mins, 3, MPI_REAL_T, MPI_MIN, MPI_COMM_WORLD);
9580
MPI_Allreduce(MPI_IN_PLACE, maxs, 3, MPI_REAL_T, MPI_MAX, MPI_COMM_WORLD);
9681

97-
global.minx = mins[0]; global.miny = mins[1]; global.minz = mins[2];
98-
global.maxx = maxs[0]; global.maxy = maxs[1]; global.maxz = maxs[2];
99-
82+
BBox global{mins[0], mins[1], mins[2], maxs[0], maxs[1], maxs[2]};
10083
#else
10184
BBox global = local;
10285
#endif
@@ -119,9 +102,7 @@ inline void Step(ParticleSystem &ps, real dt) {
119102
return root;
120103
};
121104

122-
// ---------------------------------------------------------
123105
// FIRST KICK
124-
// ---------------------------------------------------------
125106
{
126107
auto root = buildTree();
127108

@@ -137,22 +118,20 @@ inline void Step(ParticleSystem &ps, real dt) {
137118

138119
#ifdef NEXT_MPI
139120
MPI_Request reqs[3];
140-
MPI_Iallgatherv(ps.vx.data() + start, end - start, MPI_REAL_T,
121+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
141122
ps.vx.data(), counts.data(), displs.data(), MPI_REAL_T,
142123
MPI_COMM_WORLD, &reqs[0]);
143-
MPI_Iallgatherv(ps.vy.data() + start, end - start, MPI_REAL_T,
124+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
144125
ps.vy.data(), counts.data(), displs.data(), MPI_REAL_T,
145126
MPI_COMM_WORLD, &reqs[1]);
146-
MPI_Iallgatherv(ps.vz.data() + start, end - start, MPI_REAL_T,
127+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
147128
ps.vz.data(), counts.data(), displs.data(), MPI_REAL_T,
148129
MPI_COMM_WORLD, &reqs[2]);
149130
MPI_Waitall(3, reqs, MPI_STATUSES_IGNORE);
150131
#endif
151132
}
152133

153-
// ---------------------------------------------------------
154134
// DRIFT
155-
// ---------------------------------------------------------
156135
#pragma omp parallel for schedule(static)
157136
for (int i = start; i < end; ++i) {
158137
ps.x[i] += ps.vx[i] * dt;
@@ -161,22 +140,20 @@ inline void Step(ParticleSystem &ps, real dt) {
161140
}
162141

163142
#ifdef NEXT_MPI
164-
MPI_Request reqs[3];
165-
MPI_Iallgatherv(ps.x.data() + start, end - start, MPI_REAL_T,
143+
MPI_Request reqs3[3];
144+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
166145
ps.x.data(), counts.data(), displs.data(), MPI_REAL_T,
167-
MPI_COMM_WORLD, &reqs[0]);
168-
MPI_Iallgatherv(ps.y.data() + start, end - start, MPI_REAL_T,
146+
MPI_COMM_WORLD, &reqs3[0]);
147+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
169148
ps.y.data(), counts.data(), displs.data(), MPI_REAL_T,
170-
MPI_COMM_WORLD, &reqs[1]);
171-
MPI_Iallgatherv(ps.z.data() + start, end - start, MPI_REAL_T,
149+
MPI_COMM_WORLD, &reqs3[1]);
150+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
172151
ps.z.data(), counts.data(), displs.data(), MPI_REAL_T,
173-
MPI_COMM_WORLD, &reqs[2]);
174-
MPI_Waitall(3, reqs, MPI_STATUSES_IGNORE);
152+
MPI_COMM_WORLD, &reqs3[2]);
153+
MPI_Waitall(3, reqs3, MPI_STATUSES_IGNORE);
175154
#endif
176155

177-
// ---------------------------------------------------------
178156
// SECOND KICK
179-
// ---------------------------------------------------------
180157
{
181158
auto root = buildTree();
182159

@@ -191,17 +168,17 @@ inline void Step(ParticleSystem &ps, real dt) {
191168
}
192169

193170
#ifdef NEXT_MPI
194-
MPI_Request reqs2[3];
195-
MPI_Iallgatherv(ps.vx.data() + start, end - start, MPI_REAL_T,
171+
MPI_Request reqs4[3];
172+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
196173
ps.vx.data(), counts.data(), displs.data(), MPI_REAL_T,
197-
MPI_COMM_WORLD, &reqs2[0]);
198-
MPI_Iallgatherv(ps.vy.data() + start, end - start, MPI_REAL_T,
174+
MPI_COMM_WORLD, &reqs4[0]);
175+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
199176
ps.vy.data(), counts.data(), displs.data(), MPI_REAL_T,
200-
MPI_COMM_WORLD, &reqs2[1]);
201-
MPI_Iallgatherv(ps.vz.data() + start, end - start, MPI_REAL_T,
177+
MPI_COMM_WORLD, &reqs4[1]);
178+
MPI_Iallgatherv(MPI_IN_PLACE, 0, MPI_REAL_T,
202179
ps.vz.data(), counts.data(), displs.data(), MPI_REAL_T,
203-
MPI_COMM_WORLD, &reqs2[2]);
204-
MPI_Waitall(3, reqs2, MPI_STATUSES_IGNORE);
180+
MPI_COMM_WORLD, &reqs4[2]);
181+
MPI_Waitall(3, reqs4, MPI_STATUSES_IGNORE);
205182
#endif
206183
}
207184
}

0 commit comments

Comments
 (0)