Skip to content

Commit bcc2e92

Browse files
committed
Initialize basic file structure
1 parent 51d7e6a commit bcc2e92

5 files changed

Lines changed: 181 additions & 0 deletions

File tree

examples/basic/CMakeLists.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
cmake_minimum_required(VERSION 3.18)
2+
project(ranges LANGUAGES CUDA)
3+
4+
set(_TARGET_NORMAL normal)
5+
6+
add_executable(${_TARGET_NORMAL})
7+
target_sources(${_TARGET_NORMAL}
8+
PRIVATE
9+
normal.cu)
10+
set_target_properties(${_TARGET_NORMAL} PROPERTIES
11+
CUDA_CXX_STANDARD 17
12+
)
13+
14+
15+
set(_TARGET_ANNOTATED annotated)
16+
17+
add_executable(${_TARGET_ANNOTATED})
18+
target_sources(${_TARGET_ANNOTATED}
19+
PRIVATE
20+
annotated.cu)
21+
set_target_properties(${_TARGET_ANNOTATED} PROPERTIES
22+
CUDA_CXX_STANDARD 17
23+
)

examples/basic/annotated.cu

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#include <vector>
2+
#include <numeric>
3+
#include <iostream>
4+
#include <fstream>
5+
6+
inline void checkCudaError(cudaError_t err) {
7+
if (err != cudaSuccess) {
8+
std::cerr << "\rCuda Error " << err << ": " << cudaGetErrorString(err) << std::endl;
9+
std::cerr << "Aborting..." << std::endl;
10+
exit(1);
11+
}
12+
}
13+
14+
struct MemAccessData {
15+
int id = 0;
16+
};
17+
18+
__device__ int profile_access(int id, MemAccessData * mem_access){
19+
mem_access[id].id = id;
20+
return id;
21+
}
22+
23+
// int * const
24+
// mem_access<int * const>
25+
__global__ void kernel(int prob_size, int * const input, int * output, MemAccessData * mem_access){
26+
int id = threadIdx.x + blockIdx.x * blockDim.x;
27+
if(id < prob_size){
28+
// output[id] = input[id];
29+
output[id] = input[profile_access(id, mem_access)];
30+
}
31+
}
32+
33+
// for 1D and 2D: common image format (in best case without extra library)
34+
// or HTML
35+
void visualize(std::vector<MemAccessData> const & mem_accs){
36+
std::ofstream fs("visu.txt");
37+
fs << "data\n";
38+
fs.close();
39+
}
40+
41+
int main(){
42+
constexpr int prob_size = 100;
43+
44+
std::vector<int> h_input(prob_size);
45+
std::iota(h_input.begin(), h_input.end(), 0);
46+
int * d_input = nullptr;
47+
checkCudaError(cudaMalloc((void**) &d_input, sizeof(int)*prob_size));
48+
49+
std::vector<int> h_output(prob_size, 0);
50+
int * d_output = nullptr;
51+
checkCudaError(cudaMalloc((void**) &d_output, sizeof(int)*prob_size));
52+
53+
checkCudaError(cudaMemcpy(d_input, h_input.data(), sizeof(int)* prob_size, cudaMemcpyHostToDevice));
54+
55+
std::vector<MemAccessData> h_mem_access(prob_size);
56+
MemAccessData * d_mem_access = nullptr;
57+
checkCudaError(cudaMalloc((void**) &d_mem_access, sizeof(MemAccessData)*prob_size));
58+
59+
constexpr int threads = 32;
60+
constexpr int blocks = (prob_size/threads)+1;
61+
62+
kernel<<<blocks, threads>>>(prob_size, d_input, d_output, d_mem_access);
63+
checkCudaError(cudaGetLastError());
64+
65+
checkCudaError(cudaMemcpy(h_output.data(), d_output, sizeof(int)*prob_size, cudaMemcpyDeviceToHost));
66+
checkCudaError(cudaMemcpy(h_mem_access.data(), d_mem_access, sizeof(MemAccessData)*prob_size, cudaMemcpyDeviceToHost));
67+
68+
69+
for(auto i = 0; i < h_input.size(); ++i){
70+
if(h_input[i] != h_output[i]){
71+
std::cerr << "Element at position " << i << "is not equal (input - output): " << h_input[i] << " != " << h_output[i] << std::endl;
72+
std::exit(1);
73+
}
74+
}
75+
76+
visualize(h_mem_access);
77+
78+
checkCudaError(cudaFree(d_input));
79+
checkCudaError(cudaFree(d_output));
80+
checkCudaError(cudaFree(d_mem_access));
81+
82+
std::cout << "kernel finished successful" << std::endl;
83+
return 0;
84+
}

examples/basic/normal.cu

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#include <vector>
2+
#include <numeric>
3+
#include <iostream>
4+
5+
inline void checkCudaError(cudaError_t err) {
6+
if (err != cudaSuccess) {
7+
std::cerr << "\rCuda Error " << err << ": " << cudaGetErrorString(err) << std::endl;
8+
std::cerr << "Aborting..." << std::endl;
9+
exit(1);
10+
}
11+
}
12+
13+
14+
__global__ void kernel(int prob_size, int * const input, int * output){
15+
int id = threadIdx.x + blockIdx.x * blockDim.x;
16+
if(id < prob_size){
17+
output[id] = input[id];
18+
}
19+
}
20+
21+
int main(){
22+
constexpr int prob_size = 100;
23+
24+
std::vector<int> h_input(prob_size);
25+
std::iota(h_input.begin(), h_input.end(), 0);
26+
int * d_input = nullptr;
27+
checkCudaError(cudaMalloc((void**) &d_input, sizeof(int)*prob_size));
28+
29+
std::vector<int> h_output(prob_size, 0);
30+
int * d_output = nullptr;
31+
checkCudaError(cudaMalloc((void**) &d_output, sizeof(int)*prob_size));
32+
33+
checkCudaError(cudaMemcpy(d_input, h_input.data(), sizeof(int)* prob_size, cudaMemcpyHostToDevice));
34+
35+
constexpr int threads = 32;
36+
constexpr int blocks = (prob_size/threads)+1;
37+
38+
kernel<<<blocks, threads>>>(prob_size, d_input, d_output);
39+
checkCudaError(cudaGetLastError());
40+
41+
checkCudaError(cudaMemcpy(h_output.data(), d_output, sizeof(int)*prob_size, cudaMemcpyDeviceToHost));
42+
43+
for(auto i = 0; i < h_input.size(); ++i){
44+
if(h_input[i] != h_output[i]){
45+
std::cerr << "Element at position " << i << "is not equal (input - output): " << h_input[i] << " != " << h_output[i] << std::endl;
46+
std::exit(1);
47+
}
48+
}
49+
50+
checkCudaError(cudaFree(d_input));
51+
checkCudaError(cudaFree(d_output));
52+
53+
std::cout << "kernel finished successful" << std::endl;
54+
return 0;
55+
}

html/basic_template.html

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset="utf-8">
5+
<title>Memory Visualization</title>
6+
</head>
7+
8+
<body>
9+
10+
<!-- HTML_TEMPLATE -->
11+
12+
</body>
13+
14+
<script>
15+
16+
// JS_TEMPLATE
17+
18+
</script>
19+
</html>

src/cuda_mav.cuh

Whitespace-only changes.

0 commit comments

Comments
 (0)