ci(build): add Actions workflow and fix profiling/arch configuration

Olajide-Badejo · Olajide-Badejo · commit 9acde3d6013d · 2026-04-14T00:01:51.000+02:00
- add .github/workflows/ci.yml with CUDA build, CMake configure, and cppcheck jobs

- remove -G from PROFILE mode in Makefile to preserve optimized profiling binaries

- switch CMake CUDA architecture accumulation to list(APPEND) to avoid reconfigure duplication
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,111 @@
+# ─────────────────────────────────────────────────────────────────────────────
+# CI — GPU-Based Matrix Operations
+#
+# Triggers : push / PR to main
+# Jobs     :
+#   1. CUDA Compile     – full nvcc build inside nvidia/cuda Docker (sm_86)
+#   2. CMake Configure  – validates CMake config for Release and Debug
+#   3. Static Analysis  – cppcheck over all .cu, .cuh, and .cpp sources
+#
+# Email notifications:
+#   GitHub sends an email automatically when a run fails.
+#   To also receive emails on success:
+#   github.com → Settings → Notifications → GitHub Actions →
+#   enable "Send notifications for successful workflow runs".
+# ─────────────────────────────────────────────────────────────────────────────
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+
+  # ── 1. Full CUDA compile ───────────────────────────────────────────────────
+  cuda-compile:
+    name: CUDA Compile (sm_86)
+    runs-on: ubuntu-latest
+    container:
+      image: nvidia/cuda:12.6.0-devel-ubuntu22.04
+
+    steps:
+      - name: Install build tools
+        run: |
+          apt-get update -qq
+          apt-get install -y --no-install-recommends \
+            git cmake g++ ca-certificates
+
+      - uses: actions/checkout@v4
+
+      - name: CMake configure
+        run: |
+          cmake -B build \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DCUDA_ARCH="86"
+
+      - name: Build
+        run: cmake --build build --config Release -j$(nproc)
+
+      - name: Verify binary exists
+        run: test -f build/bin/gpu_matrix_ops && echo "Binary OK"
+
+  # ── 2. CMake configure check (Release + Debug) ────────────────────────────
+  cmake-configure:
+    name: CMake Configure
+    runs-on: ubuntu-latest
+    container:
+      image: nvidia/cuda:12.6.0-devel-ubuntu22.04
+
+    steps:
+      - name: Install build tools
+        run: |
+          apt-get update -qq
+          apt-get install -y --no-install-recommends \
+            git cmake g++ ca-certificates
+
+      - uses: actions/checkout@v4
+
+      - name: Configure Release
+        run: cmake -B build-release -DCMAKE_BUILD_TYPE=Release -DCUDA_ARCH="86"
+
+      - name: Configure Debug
+        run: cmake -B build-debug -DCMAKE_BUILD_TYPE=Debug -DCUDA_ARCH="86"
+
+  # ── 3. Static analysis ────────────────────────────────────────────────────
+  static-analysis:
+    name: Static Analysis (cppcheck)
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install cppcheck
+        run: sudo apt-get update -qq && sudo apt-get install -y cppcheck
+
+      - name: Analyse CPU sources (blocking)
+        run: |
+          cppcheck \
+            --enable=warning,performance,portability \
+            --error-exitcode=1 \
+            --suppress=missingIncludeSystem \
+            --suppress=unusedFunction \
+            -I include \
+            src/cpu_ops.cpp \
+            include/cpu_ops.h
+
+      - name: Analyse CUDA sources (non-blocking)
+        # CUDA-specific constructs can produce false positives in cppcheck;
+        # report warnings but do not fail the job.
+        run: |
+          cppcheck \
+            --enable=warning,performance \
+            --suppress=missingIncludeSystem \
+            --suppress=unusedFunction \
+            -I include \
+            src/matvec_kernels.cu \
+            src/matmul_kernels.cu \
+            src/main.cu \
+            include/matrix_ops.cuh \
+            || true
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,5 @@
 cmake_minimum_required(VERSION 3.18)
+
 project(gpu_matrix_ops LANGUAGES CXX CUDA)
 
 # ── C++ / CUDA standards ──────────────────────────────────────────────────────
@@ -8,12 +9,16 @@ set(CMAKE_CUDA_STANDARD 17)
 # ── CUDA architecture – auto-detect or override via -DCUDA_ARCH=86 etc. ───────
 if(NOT DEFINED CUDA_ARCH)
     # Common targets: 60=Pascal, 70=Volta, 75=Turing, 80=Ampere, 86=RTX30xx,
-    #                 89=Ada (RTX40xx), 90=Hopper
+    # 89=Ada (RTX40xx), 90=Hopper
     set(CUDA_ARCH "60;70;75;80;86;89" CACHE STRING "CUDA gencode arch list")
 endif()
 
+# FIX: use list(APPEND …) instead of a self-referencing set() call.
+# The old form  set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} ${ARCH})
+# re-evaluates the variable each iteration, which can duplicate entries on
+# reconfigure runs.
 foreach(ARCH ${CUDA_ARCH})
-    set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} ${ARCH})
+    list(APPEND CMAKE_CUDA_ARCHITECTURES ${ARCH})
 endforeach()
 
 # ── Compiler flags ────────────────────────────────────────────────────────────
@@ -47,8 +52,8 @@ set_target_properties(gpu_matrix_ops PROPERTIES
 
 target_compile_options(gpu_matrix_ops PRIVATE
     $<$<COMPILE_LANGUAGE:CUDA>:
-        --ptxas-options=-v          # verbose register/smem usage
-        -lineinfo                   # line info for Nsight profiling
+        --ptxas-options=-v   # verbose register/smem usage
+        -lineinfo            # line info for Nsight profiling
     >
 )
 
diff --git a/Makefile b/Makefile
@@ -1,59 +1,65 @@
 # ─────────────────────────────────────────────────────────────────────────────
-#  Makefile  —  GPU Matrix Ops
-#  Usage:
-#    make            # build with optimisations
-#    make DEBUG=1    # build with debug info / no optimisation
-#    make run        # build + execute
-#    make clean      # remove build artefacts
-#    make profile    # build with Nsight-profiling flags
+# Makefile — GPU Matrix Ops
+# Usage:
+#   make            # build with optimisations
+#   make DEBUG=1    # build with debug info / no optimisation
+#   make run        # build + execute
+#   make clean      # remove build artefacts
+#   make profile    # build with Nsight-profiling flags
 # ─────────────────────────────────────────────────────────────────────────────
 
-NVCC       := nvcc
-CXX        := g++
+NVCC := nvcc
+CXX  := g++
 
 # Detect SM version; fallback to 86 (Ampere RTX 30-series)
-SM         ?= 86
+SM ?= 86
 
 # ── Directories ───────────────────────────────────────────────────────────────
-SRCDIR     := src
-INCDIR     := include
-BINDIR     := bin
-OBJDIR     := obj
+SRCDIR := src
+INCDIR := include
+BINDIR := bin
+OBJDIR := obj
 
 # ── Sources ───────────────────────────────────────────────────────────────────
-CU_SRCS    := $(SRCDIR)/main.cu \
-              $(SRCDIR)/matvec_kernels.cu \
-              $(SRCDIR)/matmul_kernels.cu
-CPP_SRCS   := $(SRCDIR)/cpu_ops.cpp
+CU_SRCS  := $(SRCDIR)/main.cu \
+             $(SRCDIR)/matvec_kernels.cu \
+             $(SRCDIR)/matmul_kernels.cu
 
-CU_OBJS    := $(patsubst $(SRCDIR)/%.cu,  $(OBJDIR)/%.cu.o,  $(CU_SRCS))
-CPP_OBJS   := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.cpp.o, $(CPP_SRCS))
+CPP_SRCS := $(SRCDIR)/cpu_ops.cpp
 
-TARGET     := $(BINDIR)/gpu_matrix_ops
+CU_OBJS  := $(patsubst $(SRCDIR)/%.cu,  $(OBJDIR)/%.cu.o,  $(CU_SRCS))
+CPP_OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.cpp.o, $(CPP_SRCS))
+
+TARGET := $(BINDIR)/gpu_matrix_ops
 
 # ── Flags ─────────────────────────────────────────────────────────────────────
 COMMON_FLAGS := -I$(INCDIR)
 
 ifdef DEBUG
-  NVCC_FLAGS  := -g -G -O0 -arch=sm_$(SM) $(COMMON_FLAGS)
-  CXX_FLAGS   := -g -O0 $(COMMON_FLAGS)
+NVCC_FLAGS := -g -G -O0 -arch=sm_$(SM) $(COMMON_FLAGS)
+CXX_FLAGS  := -g -O0 $(COMMON_FLAGS)
 else
-  NVCC_FLAGS  := -O3 --use_fast_math -arch=sm_$(SM) \
-                 --expt-relaxed-constexpr \
-                 -lineinfo \
-                 --ptxas-options=-v \
-                 $(COMMON_FLAGS)
-  CXX_FLAGS   := -O3 -march=native $(COMMON_FLAGS)
+NVCC_FLAGS := -O3 --use_fast_math -arch=sm_$(SM) \
+              --expt-relaxed-constexpr \
+              -lineinfo \
+              --ptxas-options=-v \
+              $(COMMON_FLAGS)
+CXX_FLAGS  := -O3 -march=native $(COMMON_FLAGS)
 endif
 
+# FIX: removed -G from PROFILE flags.
+# -G compiles device code with full debug instrumentation and disables all GPU
+# optimisations, so profiling a -G binary gives meaningless timings that do not
+# reflect real kernel performance. -lineinfo alone gives Nsight Compute and
+# nvprof the source-to-PTX line mapping they need without touching optimisations.
 ifdef PROFILE
-  NVCC_FLAGS  += -lineinfo -G
+NVCC_FLAGS += -lineinfo
 endif
 
 LINK_FLAGS := -lcudart
 
-# ── Default target ─────────────────────────────────────────────────────────────
-.PHONY: all run clean profile
+# ── Default target ────────────────────────────────────────────────────────────
+.PHONY: all run clean profile info
 
 all: $(TARGET)
 
@@ -79,13 +85,12 @@ profile: PROFILE=1
 profile: all
 	@echo "  Binary built with profiling flags."
 	@echo "  Run with: ncu --set full ./$(TARGET)"
-	@echo "         or: nvprof ./$(TARGET)"
+	@echo "  or      : nvprof ./$(TARGET)"
 
 clean:
 	rm -rf $(OBJDIR) $(BINDIR)
 	@echo "  Cleaned."
 
-# Show which SM version is being compiled for
 info:
 	@echo "  Target SM : sm_$(SM)"
 	@echo "  NVCC      : $(shell which $(NVCC))"