Skip to content

Commit 4c9276c

Browse files
authored
Refactor Drivers (#6)
* refactor drivers
1 parent 0e5e5b5 commit 4c9276c

457 files changed

Lines changed: 9245 additions & 435 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "tpl/kokkos"]
2+
path = tpl/kokkos
3+
url = https://github.com/kokkos/kokkos

drivers/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,7 @@ of threads as the first command line argument i.e. `./a.out 4`.
7979

8080
Make sure you are running in a proper environment for the tests you want to run.
8181
For example, have a GPU for cuda tests or multiple nodes for MPI. Do not
82-
execute `run-all.py` on a login node without the `--dry` flag.
82+
execute `run-all.py` on a login node without the `--dry` flag.
83+
84+
MPI benchmarks require the correct result to be returned on rank 0. The initial
85+
data distribution varies by problem.

drivers/cpp/KokkosCMakeLists.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
cmake_minimum_required(VERSION 3.16)
2+
project(KokkosBenchmark)
3+
4+
if (NOT DRIVER_PATH)
5+
message(FATAL_ERROR "DRIVER_PATH not set")
6+
endif()
7+
8+
if (NOT DRIVER_SRC_FILE)
9+
message(FATAL_ERROR "DRIVER_SRC_FILE not set")
10+
endif()
11+
12+
set(default_build_type "RelWithDebInfo")
13+
set(Kokkos_DIR ${DRIVER_PATH}/../tpl/kokkos/build)
14+
message(STATUS "Kokkos_DIR: ${Kokkos_DIR}")
15+
find_package(Kokkos REQUIRED)
16+
add_compile_definitions(USE_KOKKOS)
17+
18+
add_executable(a.out ${DRIVER_PATH}/${DRIVER_SRC_FILE})
19+
target_link_libraries(a.out Kokkos::kokkos)
20+
target_link_libraries(a.out ${DRIVER_PATH}/cpp/models/kokkos-driver.o)
21+
target_include_directories(a.out PRIVATE ${DRIVER_PATH}/cpp)
22+
target_include_directories(a.out PRIVATE ${DRIVER_PATH}/cpp/models)
23+
target_include_directories(a.out PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})

drivers/cpp/Makefile

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,39 @@
11
CXX = g++
22
MPICXX = mpicxx
33
CXX_FLAGS = -std=c++17 -O3
4+
Kokkos_PATH ?= ../../tpl/kokkos/build
45

6+
SERIAL_DRIVERS = $(patsubst %.cc,%.o, $(wildcard */*serial-driver.cc))
57
OMP_DRIVERS = $(patsubst %.cc,%.o, $(wildcard */*omp-driver.cc))
68
MPI_DRIVERS = $(patsubst %.cc,%.o, $(wildcard */*mpi-driver.cc))
9+
MPI_OMP_DRIVERS = $(patsubst %.cc,%.o, $(wildcard */*mpi-omp-driver.cc))
10+
CUDA_DRIVERS = $(patsubst %.cu,%.o, $(wildcard */*cuda-driver.cu))
11+
KOKKOS_DRIVERS = $(patsubst %.cc,%.o, $(wildcard */*kokkos-driver.cc))
712

8-
all: $(OMP_DRIVERS) $(MPI_DRIVERS)
13+
ALL_DRIVERS = $(SERIAL_DRIVERS) $(OMP_DRIVERS) $(MPI_DRIVERS) $(MPI_OMP_DRIVERS) $(CUDA_DRIVERS) $(KOKKOS_DRIVERS)
14+
15+
all: $(ALL_DRIVERS)
916

1017
%.o: %.cc
1118
$(CXX) $(CXX_FLAGS) -o $@ -c $<
1219

20+
%serial-driver.o: %serial-driver.cc
21+
$(CXX) $(CXX_FLAGS) -o $@ -c $<
22+
1323
%omp-driver.o: %omp-driver.cc
1424
$(CXX) $(CXX_FLAGS) -fopenmp -o $@ -c $<
1525

1626
%mpi-driver.o: %mpi-driver.cc
1727
$(MPICXX) $(CXX_FLAGS) -o $@ -c $<
1828

29+
%mpi-omp-driver.o: %mpi-omp-driver.cc
30+
$(MPICXX) $(CXX_FLAGS) -fopenmp -o $@ -c $<
31+
32+
%cuda-driver.o: %cuda-driver.cu
33+
nvcc -std=c++17 -O3 -o $@ -c $<
34+
35+
%kokkos-driver.o: %kokkos-driver.cc
36+
$(CXX) $(CXX_FLAGS) -I$(Kokkos_PATH)/include -L$(Kokkos_PATH)/lib64 -fopenmp -o $@ -c $<
37+
1938
clean:
20-
rm -f $(OMP_DRIVERS) $(MPI_DRIVERS)
39+
rm -f $(ALL_DRIVERS)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#pragma once
2+
3+
#include <numeric>
4+
#include <vector>
5+
6+
/* Compute the prefix sum array of the vector x and return its sum.
7+
Example:
8+
9+
input: [-7, 2, 1, 9, 4, 8]
10+
output: 15
11+
*/
12+
double correctSumOfPrefixSum(std::vector<double> const& x) {
13+
std::vector<double> prefixSum(x.size());
14+
std::inclusive_scan(x.begin(), x.end(), prefixSum.begin());
15+
return std::accumulate(prefixSum.begin(), prefixSum.end(), 0.0);
16+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Driver for 1_scan_sum_of_prefix_sum
2+
// /* Compute the prefix sum array of the vector x and return its sum.
3+
// Example:
4+
//
5+
// input: [-7, 2, 1, 9, 4, 8]
6+
// output: 15
7+
// */
8+
// double sumOfPrefixSum(std::vector<double> const& x) {
9+
10+
#include <algorithm>
11+
#include <numeric>
12+
#include <random>
13+
#include <vector>
14+
15+
#include "baseline.hpp"
16+
#include "utilities.hpp"
17+
#include "generated-code.hpp" // code generated by LLM
18+
19+
20+
struct Context {
21+
std::vector<double> x;
22+
};
23+
24+
void reset(Context *ctx) {
25+
fillRand(ctx->x, -100.0, 100.0);
26+
}
27+
28+
Context *init() {
29+
Context *ctx = new Context();
30+
ctx->x.resize(1 << 20);
31+
reset(ctx);
32+
return ctx;
33+
}
34+
35+
void compute(Context *ctx) {
36+
double val = sumOfPrefixSum(ctx->x);
37+
(void) val;
38+
}
39+
40+
void best(Context *ctx) {
41+
double val = correctSumOfPrefixSum(ctx->x);
42+
(void) val;
43+
}
44+
45+
bool validate(Context *ctx) {
46+
47+
const size_t numTries = 5;
48+
for (int i = 0; i < numTries; i += 1) {
49+
std::vector<double> input(2048);
50+
fillRand(input, -100.0, 100.0);
51+
52+
// compute correct result
53+
double correctResult = correctSumOfPrefixSum(input);
54+
55+
// compute test result
56+
double testResult = sumOfPrefixSum(input);
57+
58+
if (std::fabs(correctResult - testResult) > 1e-5) {
59+
return false;
60+
}
61+
}
62+
63+
return true;
64+
}
65+
66+
void destroy(Context *ctx) {
67+
delete ctx;
68+
}
69+
70+
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Driver for 28_scan_sum_of_prefix_sum
2+
// /* Compute the prefix sum array of the vector x and return its sum.
3+
// Example:
4+
//
5+
// input: [-7, 2, 1, 9, 4, 8]
6+
// output: 15
7+
// */
8+
// double sumOfPrefixSum(std::vector<double> const& x) {
9+
10+
#include <algorithm>
11+
#include <numeric>
12+
#include <random>
13+
#include <vector>
14+
15+
#include "baseline.hpp"
16+
#include "utilities.hpp"
17+
#include "generated-code.hpp" // code generated by LLM
18+
19+
20+
struct Context {
21+
Kokkos::View<double*> x;
22+
std::vector<double> xVec;
23+
};
24+
25+
void reset(Context *ctx) {
26+
fillRandKokkos(ctx->x, -100.0, 100.0);
27+
fillRand(ctx->xVec, -100.0, 100.0);
28+
}
29+
30+
Context *init() {
31+
Context *ctx = new Context();
32+
ctx->x = Kokkos::View<double*>("x", 1 << 20);
33+
ctx->xVec.resize(1 << 20);
34+
reset(ctx);
35+
return ctx;
36+
}
37+
38+
void compute(Context *ctx) {
39+
double val = sumOfPrefixSum(ctx->x);
40+
(void) val;
41+
}
42+
43+
void best(Context *ctx) {
44+
double val = correctSumOfPrefixSum(ctx->xVec);
45+
(void) val;
46+
}
47+
48+
bool validate(Context *ctx) {
49+
50+
const size_t numTries = 5;
51+
for (int i = 0; i < numTries; i += 1) {
52+
std::vector<double> input(2048);
53+
fillRand(input, -100.0, 100.0);
54+
55+
Kokkos::View<double*> inputView("input", input.size());
56+
copyVectorToView(input, inputView);
57+
58+
// compute correct result
59+
double correctResult = correctSumOfPrefixSum(input);
60+
61+
// compute test result
62+
double testResult = sumOfPrefixSum(inputView);
63+
64+
if (std::fabs(correctResult - testResult) > 1e-5) {
65+
return false;
66+
}
67+
}
68+
69+
return true;
70+
}
71+
72+
void destroy(Context *ctx) {
73+
delete ctx;
74+
}
75+
76+
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#pragma once
2+
3+
#include <algorithm>
4+
#include <vector>
5+
6+
7+
void correctSortIgnoreZero(std::vector<int> &x) {
8+
std::vector<int> nonZeroElements;
9+
for (int num : x) {
10+
if (num != 0) {
11+
nonZeroElements.push_back(num);
12+
}
13+
}
14+
15+
std::sort(nonZeroElements.begin(), nonZeroElements.end());
16+
17+
size_t nonZeroIndex = 0;
18+
for (int i = 0; i < x.size(); i += 1) {
19+
if (x[i] != 0) {
20+
x[i] = nonZeroElements[nonZeroIndex];
21+
nonZeroIndex += 1;
22+
}
23+
}
24+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// Driver for 0_sort_non-zero
2+
// /* Sort the vector x in ascending order ignoring elements with value 0.
3+
// Leave zero valued elements in-place.
4+
// Example:
5+
//
6+
// input: [8, 4, 0, 9, 8, 0, 1, -1, 7]
7+
// output: [-1, 1, 0, 4, 7, 0, 8, 8, 9]
8+
// */
9+
// void sortIgnoreZero(std::vector<int> &x) {
10+
11+
#include <algorithm>
12+
#include <numeric>
13+
#include <random>
14+
#include <vector>
15+
16+
#include "baseline.hpp"
17+
#include "utilities.hpp"
18+
#include "generated-code.hpp" // code generated by LLM
19+
20+
21+
struct Context {
22+
std::vector<int> x;
23+
};
24+
25+
void fillRandWithZeroes(std::vector<int> &x) {
26+
// fill x with random values, but set some to zero
27+
for (int i = 0; i < x.size(); i += 1) {
28+
x[i] = rand();
29+
if (rand() % 5) {
30+
x[i] = 0;
31+
}
32+
}
33+
}
34+
35+
void reset(Context *ctx) {
36+
fillRandWithZeroes(ctx->x);
37+
}
38+
39+
Context *init() {
40+
Context *ctx = new Context();
41+
ctx->x.resize(100000);
42+
reset(ctx);
43+
return ctx;
44+
}
45+
46+
void compute(Context *ctx) {
47+
sortIgnoreZero(ctx->x);
48+
}
49+
50+
void best(Context *ctx) {
51+
correctSortIgnoreZero(ctx->x);
52+
}
53+
54+
bool validate(Context *ctx) {
55+
56+
const size_t numTries = 5;
57+
for (int i = 0; i < numTries; i += 1) {
58+
std::vector<int> input(1024);
59+
fillRandWithZeroes(input);
60+
61+
// compute correct result
62+
std::vector<int> correctResult = input;
63+
correctSortIgnoreZero(correctResult);
64+
65+
// compute test result
66+
std::vector<int> testResult = input;
67+
sortIgnoreZero(testResult);
68+
69+
if (!std::equal(correctResult.begin(), correctResult.end(), testResult.begin())) {
70+
return false;
71+
}
72+
}
73+
74+
return true;
75+
}
76+
77+
void destroy(Context *ctx) {
78+
delete ctx;
79+
}
80+
81+

0 commit comments

Comments
 (0)