Initialize basic file structure

NetroScript · NetroScript · commit bcc2e92bb26d · 2022-12-19T18:12:46.000+01:00
diff --git a/examples/basic/CMakeLists.txt b/examples/basic/CMakeLists.txt
@@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.18)
+project(ranges LANGUAGES CUDA)
+
+set(_TARGET_NORMAL normal)
+
+add_executable(${_TARGET_NORMAL})
+target_sources(${_TARGET_NORMAL}
+   PRIVATE
+   normal.cu)
+set_target_properties(${_TARGET_NORMAL} PROPERTIES
+  CUDA_CXX_STANDARD 17
+)
+
+
+set(_TARGET_ANNOTATED annotated)
+
+add_executable(${_TARGET_ANNOTATED})
+target_sources(${_TARGET_ANNOTATED}
+   PRIVATE
+   annotated.cu)
+set_target_properties(${_TARGET_ANNOTATED} PROPERTIES
+  CUDA_CXX_STANDARD 17
+)
diff --git a/examples/basic/annotated.cu b/examples/basic/annotated.cu
@@ -0,0 +1,84 @@
+#include <vector>
+#include <numeric>
+#include <iostream>
+#include <fstream>
+
+inline void checkCudaError(cudaError_t err) {
+    if (err != cudaSuccess) {
+        std::cerr << "\rCuda Error " << err << ": " << cudaGetErrorString(err) << std::endl;
+        std::cerr << "Aborting..." << std::endl;
+        exit(1);
+    }
+}
+
+struct MemAccessData {
+    int id = 0;
+};
+
+__device__ int profile_access(int id, MemAccessData * mem_access){
+    mem_access[id].id = id;
+    return id;
+}
+
+// int * const
+// mem_access<int * const>
+__global__ void kernel(int prob_size, int * const input, int * output, MemAccessData * mem_access){
+    int id = threadIdx.x + blockIdx.x * blockDim.x;
+    if(id < prob_size){
+        // output[id] = input[id];
+        output[id] = input[profile_access(id, mem_access)];
+    }
+}
+
+// for 1D and 2D: common image format (in best case without extra library)
+// or HTML
+void visualize(std::vector<MemAccessData> const & mem_accs){
+    std::ofstream fs("visu.txt");
+    fs << "data\n";
+    fs.close();
+}
+
+int main(){
+    constexpr int prob_size = 100;
+    
+    std::vector<int> h_input(prob_size);
+    std::iota(h_input.begin(), h_input.end(), 0);
+    int * d_input = nullptr;
+    checkCudaError(cudaMalloc((void**) &d_input, sizeof(int)*prob_size));
+
+    std::vector<int> h_output(prob_size, 0);
+    int * d_output = nullptr;
+    checkCudaError(cudaMalloc((void**) &d_output, sizeof(int)*prob_size));
+
+    checkCudaError(cudaMemcpy(d_input, h_input.data(), sizeof(int)* prob_size, cudaMemcpyHostToDevice));
+
+    std::vector<MemAccessData> h_mem_access(prob_size);
+    MemAccessData * d_mem_access = nullptr;
+    checkCudaError(cudaMalloc((void**) &d_mem_access, sizeof(MemAccessData)*prob_size));
+
+    constexpr int threads = 32;
+    constexpr int blocks = (prob_size/threads)+1;
+
+    kernel<<<blocks, threads>>>(prob_size, d_input, d_output, d_mem_access);
+    checkCudaError(cudaGetLastError());
+
+    checkCudaError(cudaMemcpy(h_output.data(), d_output, sizeof(int)*prob_size, cudaMemcpyDeviceToHost));
+    checkCudaError(cudaMemcpy(h_mem_access.data(), d_mem_access, sizeof(MemAccessData)*prob_size, cudaMemcpyDeviceToHost));
+
+
+    for(auto i = 0; i < h_input.size(); ++i){
+        if(h_input[i] != h_output[i]){
+            std::cerr << "Element at position " << i << "is not equal (input - output): " << h_input[i] << " != " << h_output[i] << std::endl;
+            std::exit(1); 
+        }
+    }
+
+    visualize(h_mem_access);
+
+    checkCudaError(cudaFree(d_input));
+    checkCudaError(cudaFree(d_output));
+    checkCudaError(cudaFree(d_mem_access));
+
+    std::cout << "kernel finished successful" << std::endl;
+    return 0;
+}
diff --git a/examples/basic/normal.cu b/examples/basic/normal.cu
@@ -0,0 +1,55 @@
+#include <vector>
+#include <numeric>
+#include <iostream>
+
+inline void checkCudaError(cudaError_t err) {
+    if (err != cudaSuccess) {
+        std::cerr << "\rCuda Error " << err << ": " << cudaGetErrorString(err) << std::endl;
+        std::cerr << "Aborting..." << std::endl;
+        exit(1);
+    }
+}
+
+
+__global__ void kernel(int prob_size, int * const input, int * output){
+    int id = threadIdx.x + blockIdx.x * blockDim.x;
+    if(id < prob_size){
+        output[id] = input[id];
+    }
+}
+
+int main(){
+    constexpr int prob_size = 100;
+    
+    std::vector<int> h_input(prob_size);
+    std::iota(h_input.begin(), h_input.end(), 0);
+    int * d_input = nullptr;
+    checkCudaError(cudaMalloc((void**) &d_input, sizeof(int)*prob_size));
+
+    std::vector<int> h_output(prob_size, 0);
+    int * d_output = nullptr;
+    checkCudaError(cudaMalloc((void**) &d_output, sizeof(int)*prob_size));
+
+    checkCudaError(cudaMemcpy(d_input, h_input.data(), sizeof(int)* prob_size, cudaMemcpyHostToDevice));
+
+    constexpr int threads = 32;
+    constexpr int blocks = (prob_size/threads)+1;
+
+    kernel<<<blocks, threads>>>(prob_size, d_input, d_output);
+    checkCudaError(cudaGetLastError());
+
+    checkCudaError(cudaMemcpy(h_output.data(), d_output, sizeof(int)*prob_size, cudaMemcpyDeviceToHost));
+
+    for(auto i = 0; i < h_input.size(); ++i){
+        if(h_input[i] != h_output[i]){
+            std::cerr << "Element at position " << i << "is not equal (input - output): " << h_input[i] << " != " << h_output[i] << std::endl;
+            std::exit(1); 
+        }
+    }
+
+    checkCudaError(cudaFree(d_input));
+    checkCudaError(cudaFree(d_output));
+
+    std::cout << "kernel finished successful" << std::endl;
+    return 0;
+}
diff --git a/html/basic_template.html b/html/basic_template.html
@@ -0,0 +1,19 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <title>Memory Visualization</title>
+</head>
+
+<body>
+
+    <!-- HTML_TEMPLATE -->
+
+</body>
+
+    <script>
+
+        // JS_TEMPLATE
+        
+    </script>
+</html>
diff --git a/src/cuda_mav.cuh b/src/cuda_mav.cuh