Skip to content

Commit ddd6f5a

Browse files
author
JuliaRS
committed
fix imb-ext: correct timing and validation
- Measure only fence → Accumulate → fence (exclude init/validation/barriers). - Move target initialization and CHK_DIFF outside the timed region. - Normalize AGGREGATE time by r_cache_iter when CHECK is enabled. Improves accuracy of AGGREGATE and NON-AGGREGATE results with -DCHECK.
1 parent d59ef1a commit ddd6f5a

1 file changed

Lines changed: 20 additions & 11 deletions

File tree

src_c/IMB_ones_accu.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ Output variables:
115115
int i;
116116

117117
#ifdef CHECK
118-
int asize = (int) sizeof(assign_type);
119-
int root = (c_info->rank == 0);
118+
const int asize = (int) sizeof(assign_type);
119+
const int root = (c_info->rank == 0);
120120
defect = 0;
121121
#endif
122122

@@ -134,6 +134,9 @@ Output variables:
134134
*time = 0.;
135135
else {
136136
if (!RUN_MODE->AGGREGATE) {
137+
/* Measure only the RMA critical section: fence → Accumulate → fence.
138+
* All target initialization and validation are performed outside timing. */
139+
double t_sum = 0.0;
137140

138141
*time = MPI_Wtime();
139142

@@ -142,7 +145,7 @@ Output variables:
142145
#ifdef CHECK
143146
/* Initialize the target buffer BEFORE the first RMA operation for this sample */
144147
{
145-
const int root = (c_info->rank == 0);
148+
root = (c_info->rank == 0);
146149
if (root) {
147150
char* tgt = (char*)c_info->r_buffer
148151
+ (MPI_Aint)(i % ITERATIONS->r_cache_iter) * ITERATIONS->r_offs;
@@ -152,6 +155,10 @@ Output variables:
152155
MPI_Barrier(c_info->communicator);
153156
}
154157
#endif
158+
159+
/* Time only the RMA epoch and operation(s). */
160+
double t0 = MPI_Wtime();
161+
155162
/* Start RMA epoch */
156163
MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN));
157164

@@ -168,9 +175,12 @@ Output variables:
168175
/* End RMA epoch and ensure completion */
169176
MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOSUCCEED, c_info->WIN));
170177

178+
double t1 = MPI_Wtime();
179+
t_sum += (t1 - t0);
180+
171181
#ifdef CHECK
172182
{
173-
const int root = (c_info->rank == 0);
183+
root = (c_info->rank == 0);
174184
if (root) {
175185
CHK_DIFF("Accumulate", c_info,
176186
(char*)c_info->r_buffer
@@ -187,23 +197,18 @@ Output variables:
187197
#endif
188198

189199
}
190-
*time = (MPI_Wtime() - *time) / ITERATIONS->n_sample;
200+
*time = t_sum / ITERATIONS->n_sample;
191201
}
192202

193203
if (RUN_MODE->AGGREGATE) {
194204

195205
for (i = 0; i < N_BARR; i++)
196206
MPI_Barrier(c_info->communicator);
197207

198-
*time = MPI_Wtime();
199-
200-
/* Start one large RMA epoch for all Accumulate operations */
201-
MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN));
202-
203208
#ifdef CHECK
204209
/* Initialize ALL target slots before starting the epoch */
205210
{
206-
const int root = (c_info->rank == 0);
211+
root = (c_info->rank == 0);
207212
if (root) {
208213
for (int k = 0; k < ITERATIONS->r_cache_iter; k++) {
209214
char* tgt = (char*)c_info->r_buffer + (MPI_Aint)k * ITERATIONS->r_offs;
@@ -213,6 +218,10 @@ Output variables:
213218
MPI_Barrier(c_info->communicator);
214219
}
215220
#endif
221+
*time = MPI_Wtime();
222+
/* Start one large RMA epoch for all Accumulate operations */
223+
MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN));
224+
216225

217226
#ifdef CHECK
218227
for (i = 0; i < ITERATIONS->r_cache_iter; i++)

0 commit comments

Comments
 (0)