Skip to content

Commit 9acde3d

Browse files
ci(build): add Actions workflow and fix profiling/arch configuration
- add .github/workflows/ci.yml with CUDA build, CMake configure, and cppcheck jobs - remove -G from PROFILE mode in Makefile to preserve optimized profiling binaries - switch CMake CUDA architecture accumulation to list(APPEND) to avoid reconfigure duplication
1 parent 83c4828 commit 9acde3d

3 files changed

Lines changed: 159 additions & 38 deletions

File tree

.github/workflows/ci.yml

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# ─────────────────────────────────────────────────────────────────────────────
2+
# CI — GPU-Based Matrix Operations
3+
#
4+
# Triggers : push / PR to main
5+
# Jobs :
6+
# 1. CUDA Compile – full nvcc build inside nvidia/cuda Docker (sm_86)
7+
# 2. CMake Configure – validates CMake config for Release and Debug
8+
# 3. Static Analysis – cppcheck over all .cu, .cuh, and .cpp sources
9+
#
10+
# Email notifications:
11+
# GitHub sends an email automatically when a run fails.
12+
# To also receive emails on success:
13+
# github.com → Settings → Notifications → GitHub Actions →
14+
# enable "Send notifications for successful workflow runs".
15+
# ─────────────────────────────────────────────────────────────────────────────
16+
name: CI
17+
18+
on:
19+
push:
20+
branches: [main]
21+
pull_request:
22+
branches: [main]
23+
24+
jobs:
25+
26+
# ── 1. Full CUDA compile ───────────────────────────────────────────────────
27+
cuda-compile:
28+
name: CUDA Compile (sm_86)
29+
runs-on: ubuntu-latest
30+
container:
31+
image: nvidia/cuda:12.6.0-devel-ubuntu22.04
32+
33+
steps:
34+
- name: Install build tools
35+
run: |
36+
apt-get update -qq
37+
apt-get install -y --no-install-recommends \
38+
git cmake g++ ca-certificates
39+
40+
- uses: actions/checkout@v4
41+
42+
- name: CMake configure
43+
run: |
44+
cmake -B build \
45+
-DCMAKE_BUILD_TYPE=Release \
46+
-DCUDA_ARCH="86"
47+
48+
- name: Build
49+
run: cmake --build build --config Release -j$(nproc)
50+
51+
- name: Verify binary exists
52+
run: test -f build/bin/gpu_matrix_ops && echo "Binary OK"
53+
54+
# ── 2. CMake configure check (Release + Debug) ────────────────────────────
55+
cmake-configure:
56+
name: CMake Configure
57+
runs-on: ubuntu-latest
58+
container:
59+
image: nvidia/cuda:12.6.0-devel-ubuntu22.04
60+
61+
steps:
62+
- name: Install build tools
63+
run: |
64+
apt-get update -qq
65+
apt-get install -y --no-install-recommends \
66+
git cmake g++ ca-certificates
67+
68+
- uses: actions/checkout@v4
69+
70+
- name: Configure Release
71+
run: cmake -B build-release -DCMAKE_BUILD_TYPE=Release -DCUDA_ARCH="86"
72+
73+
- name: Configure Debug
74+
run: cmake -B build-debug -DCMAKE_BUILD_TYPE=Debug -DCUDA_ARCH="86"
75+
76+
# ── 3. Static analysis ────────────────────────────────────────────────────
77+
static-analysis:
78+
name: Static Analysis (cppcheck)
79+
runs-on: ubuntu-latest
80+
81+
steps:
82+
- uses: actions/checkout@v4
83+
84+
- name: Install cppcheck
85+
run: sudo apt-get update -qq && sudo apt-get install -y cppcheck
86+
87+
- name: Analyse CPU sources (blocking)
88+
run: |
89+
cppcheck \
90+
--enable=warning,performance,portability \
91+
--error-exitcode=1 \
92+
--suppress=missingIncludeSystem \
93+
--suppress=unusedFunction \
94+
-I include \
95+
src/cpu_ops.cpp \
96+
include/cpu_ops.h
97+
98+
- name: Analyse CUDA sources (non-blocking)
99+
# CUDA-specific constructs can produce false positives in cppcheck;
100+
# report warnings but do not fail the job.
101+
run: |
102+
cppcheck \
103+
--enable=warning,performance \
104+
--suppress=missingIncludeSystem \
105+
--suppress=unusedFunction \
106+
-I include \
107+
src/matvec_kernels.cu \
108+
src/matmul_kernels.cu \
109+
src/main.cu \
110+
include/matrix_ops.cuh \
111+
|| true

CMakeLists.txt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
cmake_minimum_required(VERSION 3.18)
2+
23
project(gpu_matrix_ops LANGUAGES CXX CUDA)
34

45
# ── C++ / CUDA standards ──────────────────────────────────────────────────────
@@ -8,12 +9,16 @@ set(CMAKE_CUDA_STANDARD 17)
89
# ── CUDA architecture – auto-detect or override via -DCUDA_ARCH=86 etc. ───────
910
if(NOT DEFINED CUDA_ARCH)
1011
# Common targets: 60=Pascal, 70=Volta, 75=Turing, 80=Ampere, 86=RTX30xx,
11-
# 89=Ada (RTX40xx), 90=Hopper
12+
# 89=Ada (RTX40xx), 90=Hopper
1213
set(CUDA_ARCH "60;70;75;80;86;89" CACHE STRING "CUDA gencode arch list")
1314
endif()
1415

16+
# FIX: use list(APPEND …) instead of a self-referencing set() call.
17+
# The old form set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} ${ARCH})
18+
# re-evaluates the variable each iteration, which can duplicate entries on
19+
# reconfigure runs.
1520
foreach(ARCH ${CUDA_ARCH})
16-
set(CMAKE_CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} ${ARCH})
21+
list(APPEND CMAKE_CUDA_ARCHITECTURES ${ARCH})
1722
endforeach()
1823

1924
# ── Compiler flags ────────────────────────────────────────────────────────────
@@ -47,8 +52,8 @@ set_target_properties(gpu_matrix_ops PROPERTIES
4752

4853
target_compile_options(gpu_matrix_ops PRIVATE
4954
$<$<COMPILE_LANGUAGE:CUDA>:
50-
--ptxas-options=-v # verbose register/smem usage
51-
-lineinfo # line info for Nsight profiling
55+
--ptxas-options=-v # verbose register/smem usage
56+
-lineinfo # line info for Nsight profiling
5257
>
5358
)
5459

Makefile

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,65 @@
11
# ─────────────────────────────────────────────────────────────────────────────
2-
# Makefile GPU Matrix Ops
3-
# Usage:
4-
# make # build with optimisations
5-
# make DEBUG=1 # build with debug info / no optimisation
6-
# make run # build + execute
7-
# make clean # remove build artefacts
8-
# make profile # build with Nsight-profiling flags
2+
# Makefile GPU Matrix Ops
3+
# Usage:
4+
# make # build with optimisations
5+
# make DEBUG=1 # build with debug info / no optimisation
6+
# make run # build + execute
7+
# make clean # remove build artefacts
8+
# make profile # build with Nsight-profiling flags
99
# ─────────────────────────────────────────────────────────────────────────────
1010

11-
NVCC := nvcc
12-
CXX := g++
11+
NVCC := nvcc
12+
CXX := g++
1313

1414
# Detect SM version; fallback to 86 (Ampere RTX 30-series)
15-
SM ?= 86
15+
SM ?= 86
1616

1717
# ── Directories ───────────────────────────────────────────────────────────────
18-
SRCDIR := src
19-
INCDIR := include
20-
BINDIR := bin
21-
OBJDIR := obj
18+
SRCDIR := src
19+
INCDIR := include
20+
BINDIR := bin
21+
OBJDIR := obj
2222

2323
# ── Sources ───────────────────────────────────────────────────────────────────
24-
CU_SRCS := $(SRCDIR)/main.cu \
25-
$(SRCDIR)/matvec_kernels.cu \
26-
$(SRCDIR)/matmul_kernels.cu
27-
CPP_SRCS := $(SRCDIR)/cpu_ops.cpp
24+
CU_SRCS := $(SRCDIR)/main.cu \
25+
$(SRCDIR)/matvec_kernels.cu \
26+
$(SRCDIR)/matmul_kernels.cu
2827

29-
CU_OBJS := $(patsubst $(SRCDIR)/%.cu, $(OBJDIR)/%.cu.o, $(CU_SRCS))
30-
CPP_OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.cpp.o, $(CPP_SRCS))
28+
CPP_SRCS := $(SRCDIR)/cpu_ops.cpp
3129

32-
TARGET := $(BINDIR)/gpu_matrix_ops
30+
CU_OBJS := $(patsubst $(SRCDIR)/%.cu, $(OBJDIR)/%.cu.o, $(CU_SRCS))
31+
CPP_OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.cpp.o, $(CPP_SRCS))
32+
33+
TARGET := $(BINDIR)/gpu_matrix_ops
3334

3435
# ── Flags ─────────────────────────────────────────────────────────────────────
3536
COMMON_FLAGS := -I$(INCDIR)
3637

3738
ifdef DEBUG
38-
NVCC_FLAGS := -g -G -O0 -arch=sm_$(SM) $(COMMON_FLAGS)
39-
CXX_FLAGS := -g -O0 $(COMMON_FLAGS)
39+
NVCC_FLAGS := -g -G -O0 -arch=sm_$(SM) $(COMMON_FLAGS)
40+
CXX_FLAGS := -g -O0 $(COMMON_FLAGS)
4041
else
41-
NVCC_FLAGS := -O3 --use_fast_math -arch=sm_$(SM) \
42-
--expt-relaxed-constexpr \
43-
-lineinfo \
44-
--ptxas-options=-v \
45-
$(COMMON_FLAGS)
46-
CXX_FLAGS := -O3 -march=native $(COMMON_FLAGS)
42+
NVCC_FLAGS := -O3 --use_fast_math -arch=sm_$(SM) \
43+
--expt-relaxed-constexpr \
44+
-lineinfo \
45+
--ptxas-options=-v \
46+
$(COMMON_FLAGS)
47+
CXX_FLAGS := -O3 -march=native $(COMMON_FLAGS)
4748
endif
4849

50+
# FIX: removed -G from PROFILE flags.
51+
# -G compiles device code with full debug instrumentation and disables all GPU
52+
# optimisations, so profiling a -G binary gives meaningless timings that do not
53+
# reflect real kernel performance. -lineinfo alone gives Nsight Compute and
54+
# nvprof the source-to-PTX line mapping they need without touching optimisations.
4955
ifdef PROFILE
50-
NVCC_FLAGS += -lineinfo -G
56+
NVCC_FLAGS += -lineinfo
5157
endif
5258

5359
LINK_FLAGS := -lcudart
5460

55-
# ── Default target ────────────────────────────────────────────────────────────
56-
.PHONY: all run clean profile
61+
# ── Default target ────────────────────────────────────────────────────────────
62+
.PHONY: all run clean profile info
5763

5864
all: $(TARGET)
5965

@@ -79,13 +85,12 @@ profile: PROFILE=1
7985
profile: all
8086
@echo " Binary built with profiling flags."
8187
@echo " Run with: ncu --set full ./$(TARGET)"
82-
@echo " or: nvprof ./$(TARGET)"
88+
@echo " or : nvprof ./$(TARGET)"
8389

8490
clean:
8591
rm -rf $(OBJDIR) $(BINDIR)
8692
@echo " Cleaned."
8793

88-
# Show which SM version is being compiled for
8994
info:
9095
@echo " Target SM : sm_$(SM)"
9196
@echo " NVCC : $(shell which $(NVCC))"

0 commit comments

Comments
 (0)