|
| 1 | +# ───────────────────────────────────────────────────────────────────────────── |
| 2 | +# Makefile — GPU Matrix Ops |
| 3 | +# Usage: |
| 4 | +# make # build with optimisations |
| 5 | +# make DEBUG=1 # build with debug info / no optimisation |
| 6 | +# make run # build + execute |
| 7 | +# make clean # remove build artefacts |
| 8 | +# make profile # build with Nsight-profiling flags |
| 9 | +# ───────────────────────────────────────────────────────────────────────────── |
| 10 | + |
| 11 | +NVCC := nvcc |
| 12 | +CXX := g++ |
| 13 | + |
| 14 | +# Detect SM version; fallback to 86 (Ampere RTX 30-series) |
| 15 | +SM ?= 86 |
| 16 | + |
| 17 | +# ── Directories ─────────────────────────────────────────────────────────────── |
| 18 | +SRCDIR := src |
| 19 | +INCDIR := include |
| 20 | +BINDIR := bin |
| 21 | +OBJDIR := obj |
| 22 | + |
| 23 | +# ── Sources ─────────────────────────────────────────────────────────────────── |
| 24 | +CU_SRCS := $(SRCDIR)/main.cu \ |
| 25 | + $(SRCDIR)/matvec_kernels.cu \ |
| 26 | + $(SRCDIR)/matmul_kernels.cu |
| 27 | +CPP_SRCS := $(SRCDIR)/cpu_ops.cpp |
| 28 | + |
| 29 | +CU_OBJS := $(patsubst $(SRCDIR)/%.cu, $(OBJDIR)/%.cu.o, $(CU_SRCS)) |
| 30 | +CPP_OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.cpp.o, $(CPP_SRCS)) |
| 31 | + |
| 32 | +TARGET := $(BINDIR)/gpu_matrix_ops |
| 33 | + |
| 34 | +# ── Flags ───────────────────────────────────────────────────────────────────── |
| 35 | +COMMON_FLAGS := -I$(INCDIR) |
| 36 | + |
| 37 | +ifdef DEBUG |
| 38 | + NVCC_FLAGS := -g -G -O0 -arch=sm_$(SM) $(COMMON_FLAGS) |
| 39 | + CXX_FLAGS := -g -O0 $(COMMON_FLAGS) |
| 40 | +else |
| 41 | + NVCC_FLAGS := -O3 --use_fast_math -arch=sm_$(SM) \ |
| 42 | + --expt-relaxed-constexpr \ |
| 43 | + -lineinfo \ |
| 44 | + --ptxas-options=-v \ |
| 45 | + $(COMMON_FLAGS) |
| 46 | + CXX_FLAGS := -O3 -march=native $(COMMON_FLAGS) |
| 47 | +endif |
| 48 | + |
| 49 | +ifdef PROFILE |
| 50 | + NVCC_FLAGS += -lineinfo -G |
| 51 | +endif |
| 52 | + |
| 53 | +LINK_FLAGS := -lcudart |
| 54 | + |
| 55 | +# ── Default target ───────────────────────────────────────────────────────────── |
| 56 | +.PHONY: all run clean profile |
| 57 | + |
| 58 | +all: $(TARGET) |
| 59 | + |
| 60 | +$(TARGET): $(CU_OBJS) $(CPP_OBJS) |
| 61 | + @mkdir -p $(BINDIR) |
| 62 | + $(NVCC) $(NVCC_FLAGS) $^ -o $@ $(LINK_FLAGS) |
| 63 | + @echo "" |
| 64 | + @echo " Built: $@" |
| 65 | + |
| 66 | +$(OBJDIR)/%.cu.o: $(SRCDIR)/%.cu |
| 67 | + @mkdir -p $(OBJDIR) |
| 68 | + $(NVCC) $(NVCC_FLAGS) -dc -c $< -o $@ |
| 69 | + |
| 70 | +$(OBJDIR)/%.cpp.o: $(SRCDIR)/%.cpp |
| 71 | + @mkdir -p $(OBJDIR) |
| 72 | + $(CXX) $(CXX_FLAGS) -c $< -o $@ |
| 73 | + |
| 74 | +run: all |
| 75 | + @echo "" |
| 76 | + ./$(TARGET) |
| 77 | + |
| 78 | +profile: PROFILE=1 |
| 79 | +profile: all |
| 80 | + @echo " Binary built with profiling flags." |
| 81 | + @echo " Run with: ncu --set full ./$(TARGET)" |
| 82 | + @echo " or: nvprof ./$(TARGET)" |
| 83 | + |
| 84 | +clean: |
| 85 | + rm -rf $(OBJDIR) $(BINDIR) |
| 86 | + @echo " Cleaned." |
| 87 | + |
| 88 | +# Show which SM version is being compiled for |
| 89 | +info: |
| 90 | + @echo " Target SM : sm_$(SM)" |
| 91 | + @echo " NVCC : $(shell which $(NVCC))" |
| 92 | + @echo " CUDA ver : $(shell $(NVCC) --version | tail -1)" |
0 commit comments