Skip to content

Commit d0df313

Browse files
committed
Release v0.3.1: Introduce runtime profiling infrastructure
1 parent 03a4c55 commit d0df313

14 files changed

Lines changed: 611 additions & 15 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,4 @@ cython_debug/
220220
marimo/_static/
221221
marimo/_lsp/
222222
__marimo__/
223+
.pgo-profile/

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [0.3.1] - 2025-12-08
9+
10+
### Added
11+
12+
- **Profiling**: Runtime profiling infrastructure for C++ hot paths (`shortest_paths_core`, `place_demand`, `place_on_dag`).
13+
- Enable via `NGRAPH_CORE_PROFILE=1` environment variable.
14+
- Python API: `profiling_enabled()`, `profiling_dump()`, `profiling_reset()`.
15+
- Minimal overhead when disabled (single static bool check per instrumented scope).
16+
- ~2% overhead when enabled.
17+
18+
### Changed
19+
20+
- **Build**: Default optimizations: LTO, loop unrolling, `-fno-math-errno`. Add `make install-native` for CPU-specific builds.
21+
822
## [0.3.0] - 2025-12-06
923

1024
### Changed

CMakeLists.txt

Lines changed: 100 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,33 +105,125 @@ if(APPLE)
105105
target_compile_definitions(_netgraph_core PRIVATE _LIBCPP_DISABLE_AVAILABILITY=1 _LIBCPP_ABI_VERSION=1)
106106
endif()
107107

108-
# Optional coverage instrumentation for GCC/Clang
108+
# =============================================================================
109+
# Performance optimizations (disabled when coverage/sanitizers are enabled)
110+
# =============================================================================
111+
109112
option(NETGRAPH_CORE_COVERAGE "Enable C++ coverage instrumentation" OFF)
113+
option(NETGRAPH_CORE_SANITIZE "Enable Address/Undefined sanitizers" OFF)
114+
option(NETGRAPH_CORE_NATIVE "Optimize for current CPU (not portable)" OFF)
115+
option(NETGRAPH_CORE_FAST_MATH "Enable fast math (may affect precision)" OFF)
116+
option(NETGRAPH_CORE_PGO_GENERATE "Instrument for PGO profile collection" OFF)
117+
option(NETGRAPH_CORE_PGO_USE "Use PGO profile data (set NETGRAPH_CORE_PGO_DIR)" OFF)
118+
set(NETGRAPH_CORE_PGO_DIR "${CMAKE_BINARY_DIR}/pgo" CACHE PATH "Directory for PGO profile data")
119+
120+
# Only apply optimizations when not in debug/instrumentation mode
121+
if(NOT NETGRAPH_CORE_COVERAGE AND NOT NETGRAPH_CORE_SANITIZE)
122+
# LTO (Link Time Optimization) - enables cross-module inlining
123+
include(CheckIPOSupported)
124+
check_ipo_supported(RESULT ipo_supported OUTPUT ipo_error)
125+
if(ipo_supported)
126+
message(STATUS "Enabling LTO (Link Time Optimization)")
127+
set_target_properties(netgraph_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
128+
set_target_properties(_netgraph_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
129+
else()
130+
message(STATUS "LTO not supported: ${ipo_error}")
131+
endif()
132+
133+
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
134+
# GCC-only: allow aggressive inlining by promising symbols won't be interposed
135+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
136+
target_compile_options(netgraph_core PRIVATE -fno-semantic-interposition)
137+
target_compile_options(_netgraph_core PRIVATE -fno-semantic-interposition)
138+
endif()
139+
140+
# Safe floating-point optimizations (we don't use errno, FP exceptions, or change rounding mode)
141+
target_compile_options(netgraph_core PRIVATE -fno-math-errno -fno-trapping-math)
142+
target_compile_options(_netgraph_core PRIVATE -fno-math-errno -fno-trapping-math)
143+
144+
# Aggressive loop optimizations
145+
target_compile_options(netgraph_core PRIVATE -funroll-loops)
146+
target_compile_options(_netgraph_core PRIVATE -funroll-loops)
147+
148+
# Native CPU instructions (AVX2, etc.) - only for local builds
149+
if(NETGRAPH_CORE_NATIVE)
150+
message(STATUS "Enabling native CPU optimizations (-march=native)")
151+
target_compile_options(netgraph_core PRIVATE -march=native)
152+
target_compile_options(_netgraph_core PRIVATE -march=native)
153+
endif()
154+
155+
# Fast math - trades IEEE compliance for speed
156+
if(NETGRAPH_CORE_FAST_MATH)
157+
message(STATUS "Enabling fast math optimizations")
158+
target_compile_options(netgraph_core PRIVATE -ffast-math)
159+
target_compile_options(_netgraph_core PRIVATE -ffast-math)
160+
endif()
161+
162+
# PGO (Profile-Guided Optimization) - uses runtime profile to guide inlining decisions
163+
# GCC: -fprofile-generate/-fprofile-use, Clang: -fprofile-instr-*
164+
if(NETGRAPH_CORE_PGO_GENERATE)
165+
message(STATUS "PGO: Instrumenting for profile generation -> ${NETGRAPH_CORE_PGO_DIR}")
166+
file(MAKE_DIRECTORY ${NETGRAPH_CORE_PGO_DIR})
167+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
168+
target_compile_options(netgraph_core PRIVATE -fprofile-generate=${NETGRAPH_CORE_PGO_DIR})
169+
target_compile_options(_netgraph_core PRIVATE -fprofile-generate=${NETGRAPH_CORE_PGO_DIR})
170+
target_link_options(netgraph_core PRIVATE -fprofile-generate=${NETGRAPH_CORE_PGO_DIR})
171+
target_link_options(_netgraph_core PRIVATE -fprofile-generate=${NETGRAPH_CORE_PGO_DIR})
172+
else()
173+
# Clang/AppleClang: use LLVM instrumentation PGO
174+
target_compile_options(netgraph_core PRIVATE -fprofile-instr-generate=${NETGRAPH_CORE_PGO_DIR}/default.profraw)
175+
target_compile_options(_netgraph_core PRIVATE -fprofile-instr-generate=${NETGRAPH_CORE_PGO_DIR}/default.profraw)
176+
target_link_options(netgraph_core PRIVATE -fprofile-instr-generate=${NETGRAPH_CORE_PGO_DIR}/default.profraw)
177+
target_link_options(_netgraph_core PRIVATE -fprofile-instr-generate=${NETGRAPH_CORE_PGO_DIR}/default.profraw)
178+
endif()
179+
elseif(NETGRAPH_CORE_PGO_USE)
180+
message(STATUS "PGO: Using profile data from ${NETGRAPH_CORE_PGO_DIR}")
181+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
182+
target_compile_options(netgraph_core PRIVATE -fprofile-use=${NETGRAPH_CORE_PGO_DIR} -fprofile-correction)
183+
target_compile_options(_netgraph_core PRIVATE -fprofile-use=${NETGRAPH_CORE_PGO_DIR} -fprofile-correction)
184+
else()
185+
# Clang/AppleClang: use merged .profdata file
186+
target_compile_options(netgraph_core PRIVATE -fprofile-instr-use=${NETGRAPH_CORE_PGO_DIR}/default.profdata)
187+
target_compile_options(_netgraph_core PRIVATE -fprofile-instr-use=${NETGRAPH_CORE_PGO_DIR}/default.profdata)
188+
endif()
189+
endif()
190+
elseif(MSVC)
191+
# /O2: full optimization, /GL: whole program optimization (MSVC's LTO)
192+
target_compile_options(netgraph_core PRIVATE /O2 /GL)
193+
target_compile_options(_netgraph_core PRIVATE /O2 /GL)
194+
if(NETGRAPH_CORE_FAST_MATH)
195+
target_compile_options(netgraph_core PRIVATE /fp:fast)
196+
target_compile_options(_netgraph_core PRIVATE /fp:fast)
197+
endif()
198+
endif()
199+
endif()
200+
201+
# =============================================================================
202+
# Debug/instrumentation options (mutually exclusive with optimizations above)
203+
# =============================================================================
204+
110205
if(NETGRAPH_CORE_COVERAGE)
111-
message(STATUS "Enabling coverage instrumentation for C++ targets")
206+
message(STATUS "Enabling coverage instrumentation (disables optimizations)")
112207
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
113208
foreach(tgt netgraph_core _netgraph_core)
114-
# Use no optimizations and include debug info for accurate line mapping
115209
target_compile_options(${tgt} PRIVATE -O0 -g --coverage)
116-
# Link coverage runtime
117210
target_link_options(${tgt} PRIVATE --coverage)
118211
endforeach()
119212
else()
120-
message(WARNING "NETGRAPH_CORE_COVERAGE is set but compiler '${CMAKE_CXX_COMPILER_ID}' is not supported")
213+
message(WARNING "NETGRAPH_CORE_COVERAGE requires GCC or Clang")
121214
endif()
122215
endif()
123216

124-
# Optional sanitizers for debug testing
125-
option(NETGRAPH_CORE_SANITIZE "Enable Address/Undefined sanitizers" OFF)
126217
if(NETGRAPH_CORE_SANITIZE)
218+
message(STATUS "Enabling sanitizers (disables optimizations)")
127219
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
128220
set(SAN_FLAGS "-fsanitize=address,undefined -fno-omit-frame-pointer")
129221
foreach(tgt netgraph_core _netgraph_core)
130222
target_compile_options(${tgt} PRIVATE ${SAN_FLAGS})
131223
target_link_options(${tgt} PRIVATE ${SAN_FLAGS})
132224
endforeach()
133225
else()
134-
message(WARNING "NETGRAPH_CORE_SANITIZE is set but compiler '${CMAKE_CXX_COMPILER_ID}' is not supported")
226+
message(WARNING "NETGRAPH_CORE_SANITIZE requires GCC or Clang")
135227
endif()
136228
endif()
137229

Makefile

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# NetGraph-Core Development Makefile
22

3-
.PHONY: help venv clean-venv dev install check check-ci lint format test qt build clean check-dist publish-test publish info hooks check-python cpp-test cov sanitize-test rebuild
3+
.PHONY: help venv clean-venv dev install install-native install-pgo check check-ci lint format test qt build clean check-dist publish-test publish info hooks check-python cpp-test cov sanitize-test rebuild
44

55
.DEFAULT_GOAL := help
66

@@ -41,7 +41,9 @@ help:
4141
@echo "Setup & Installation:"
4242
@echo " make venv - Create a local virtualenv (./venv)"
4343
@echo " make dev - Full development environment (package + dev deps + hooks)"
44-
@echo " make install - Install package for usage (no dev dependencies)"
44+
@echo " make install - Install package (default optimizations: LTO, loop unrolling)"
45+
@echo " make install-native - Install with CPU-specific optimizations (faster, not portable)"
46+
@echo " make install-pgo - Profile-guided optimization (two-phase build, experimental)"
4547
@echo " make clean-venv - Remove virtual environment"
4648
@echo " make rebuild - Clean and rebuild (respects CMAKE_ARGS)"
4749
@echo ""
@@ -115,9 +117,39 @@ clean-venv:
115117
@rm -rf venv/
116118

117119
install:
118-
@echo "📦 Installing package (editable)"
120+
@echo "📦 Installing package (editable, default optimizations)"
119121
@$(DEV_ENV) $(PIP) install -e .
120122

123+
install-native:
124+
@echo "📦 Installing with native CPU optimizations (-march=native)"
125+
@echo " Note: Binary will only work on this CPU architecture"
126+
@$(ENV_MACOS) $(ENV_CC) $(ENV_CXX) CMAKE_ARGS="$(strip $(CMAKE_ARGS) -DNETGRAPH_CORE_NATIVE=ON)" $(PIP) install -e .
127+
128+
# PGO profile stored outside build/ so it survives rebuild
129+
PGO_DIR := $(PWD)/.pgo-profile
130+
131+
install-pgo:
132+
@echo "📦 PGO Build Phase 1/3: Instrumenting..."
133+
@rm -rf build/ $(PGO_DIR)
134+
@mkdir -p $(PGO_DIR)
135+
@$(ENV_MACOS) $(ENV_CC) $(ENV_CXX) CMAKE_ARGS="$(strip $(CMAKE_ARGS) -DNETGRAPH_CORE_PGO_GENERATE=ON -DNETGRAPH_CORE_PGO_DIR=$(PGO_DIR) -DNETGRAPH_CORE_NATIVE=ON)" $(PIP) install -e .
136+
@echo "📦 PGO Build Phase 2/3: Collecting profile (running benchmark)..."
137+
@$(PYTHON) dev/benchmark_profiling_overhead.py --mesh-size 25 --spf-iters 1000 --flow-iters 200 >/dev/null 2>&1
138+
@# Clang: merge raw profiles into .profdata (profile may be in cwd or PGO_DIR)
139+
@if ls $(PGO_DIR)/*.profraw >/dev/null 2>&1; then \
140+
echo " Merging Clang profile data..."; \
141+
xcrun llvm-profdata merge -output=$(PGO_DIR)/default.profdata $(PGO_DIR)/*.profraw 2>/dev/null || \
142+
llvm-profdata merge -output=$(PGO_DIR)/default.profdata $(PGO_DIR)/*.profraw; \
143+
elif ls *.profraw >/dev/null 2>&1; then \
144+
mv *.profraw $(PGO_DIR)/; \
145+
xcrun llvm-profdata merge -output=$(PGO_DIR)/default.profdata $(PGO_DIR)/*.profraw 2>/dev/null || \
146+
llvm-profdata merge -output=$(PGO_DIR)/default.profdata $(PGO_DIR)/*.profraw; \
147+
fi
148+
@echo "📦 PGO Build Phase 3/3: Rebuilding with profile data..."
149+
@rm -rf build/
150+
@$(ENV_MACOS) $(ENV_CC) $(ENV_CXX) CMAKE_ARGS="$(strip $(CMAKE_ARGS) -DNETGRAPH_CORE_PGO_USE=ON -DNETGRAPH_CORE_PGO_DIR=$(PGO_DIR) -DNETGRAPH_CORE_NATIVE=ON)" $(PIP) install -e .
151+
@echo "✅ PGO build complete"
152+
121153
check:
122154
@PYTHON=$(PYTHON) bash dev/run-checks.sh
123155

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ Or from source:
6464
pip install -e .
6565
```
6666

67+
### Build Optimizations
68+
69+
Default builds include LTO and loop unrolling. For local development:
70+
71+
```bash
72+
make install-native # CPU-specific optimizations (not portable)
73+
```
74+
6775
## Repository Structure
6876

6977
```
@@ -81,9 +89,9 @@ tests/py/ # Python tests (pytest)
8189
make dev # Setup: venv, dependencies, pre-commit hooks
8290
make check # Run all tests and linting (auto-fix formatting)
8391
make check-ci # Strict checks without auto-fix (for CI)
92+
make test # Python tests with coverage
8493
make cpp-test # C++ tests only
85-
make py-test # Python tests only
86-
make cov # Coverage report (C++ + Python)
94+
make cov # Combined coverage report (C++ + Python)
8795
```
8896

8997
## Requirements

bindings/python/module.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "netgraph/core/backend.hpp"
2424
#include "netgraph/core/algorithms.hpp"
2525
#include "netgraph/core/options.hpp"
26+
#include "netgraph/core/profiling.hpp"
2627

2728
namespace py = pybind11;
2829
using namespace netgraph::core;
@@ -604,4 +605,12 @@ PYBIND11_MODULE(_netgraph_core, m) {
604605
py::arg("flow_graph"), py::arg("src"), py::arg("dst"), py::arg("flowClass"), py::arg("target"))
605606
.def("remove_demand", [](FlowPolicy& p, FlowGraph& fg){ py::gil_scoped_release rel; p.remove_demand(fg); py::gil_scoped_acquire acq; })
606607
.def_property_readonly("flows", [](const FlowPolicy& p){ py::dict out; for (auto const& kv : p.flows()) { const auto& idx = kv.first; const auto& f = kv.second; out[py::make_tuple(idx.src, idx.dst, idx.flowClass, idx.flowId)] = py::make_tuple(f.src, f.dst, f.cost, f.placed_flow); } return out; });
608+
609+
// Profiling functions (enabled via NGRAPH_CORE_PROFILE=1 environment variable)
610+
m.def("profiling_enabled", &profiling_enabled,
611+
"Return True if profiling is enabled (NGRAPH_CORE_PROFILE=1).");
612+
m.def("profiling_dump", []{ ProfilingStats::instance().dump(); },
613+
"Print profiling statistics to stderr.");
614+
m.def("profiling_reset", []{ ProfilingStats::instance().reset(); },
615+
"Clear all profiling statistics.");
607616
}

0 commit comments

Comments
 (0)