diff --git a/Benchmarking/CMakeLists.txt b/Benchmarking/CMakeLists.txt
index d1326c13..f17b926d 100644
--- a/Benchmarking/CMakeLists.txt
+++ b/Benchmarking/CMakeLists.txt
@@ -28,7 +28,11 @@ endif()
file(GLOB_RECURSE HELPER_SRCS "src/*Helpers.cpp")
add_library(benchmarkHelpers ${HELPER_SRCS})
target_link_libraries(benchmarkHelpers ${PROJECT_NAME} ${PINOCCHIO_LIBRARIES} console_bridge gtest)
+target_compile_definitions(benchmarkHelpers PRIVATE
+ BOOST_MPL_CFG_NO_PREPROCESSED_HEADERS
+ BOOST_MPL_LIMIT_LIST_SIZE=30)
add_executable(pinocchio_benchmark src/pinocchioBenchmark.cpp)
target_link_libraries(pinocchio_benchmark benchmarkHelpers)
target_link_libraries(pinocchio_benchmark gtest gmock_main)
+
diff --git a/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size11.urdf b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size11.urdf
new file mode 100644
index 00000000..a1145c0a
--- /dev/null
+++ b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size11.urdf
@@ -0,0 +1,288 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size15.urdf b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size15.urdf
new file mode 100644
index 00000000..38f17bfa
--- /dev/null
+++ b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size15.urdf
@@ -0,0 +1,288 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size19.urdf b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size19.urdf
new file mode 100644
index 00000000..98396c66
--- /dev/null
+++ b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size19.urdf
@@ -0,0 +1,288 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size7.urdf b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size7.urdf
new file mode 100644
index 00000000..2f98badc
--- /dev/null
+++ b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/approx_loop_size7.urdf
@@ -0,0 +1,288 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size11.urdf b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size11.urdf
new file mode 100644
index 00000000..63ca7935
--- /dev/null
+++ b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size11.urdf
@@ -0,0 +1,311 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size15.urdf b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size15.urdf
new file mode 100644
index 00000000..4b3ace30
--- /dev/null
+++ b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size15.urdf
@@ -0,0 +1,311 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size19.urdf b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size19.urdf
new file mode 100644
index 00000000..473a8f0c
--- /dev/null
+++ b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size19.urdf
@@ -0,0 +1,311 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size7.urdf b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size7.urdf
new file mode 100644
index 00000000..fdc6b135
--- /dev/null
+++ b/Benchmarking/urdfs/parallel_chains/Implicit/depth10/loop_size7.urdf
@@ -0,0 +1,311 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 30f7744d..3f09a645 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,12 @@ cmake_minimum_required(VERSION 3.13)
project(grbda VERSION 2.1.0)
+if(CMAKE_BUILD_TYPE MATCHES Debug)
+ message(STATUS "Debug build")
+ add_compile_options(-O0 -g3)
+ # add_compile_definitions(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+endif()
+
message(STATUS "============= !Generalized Rigid-Body Dynamics Algorithms! =============")
################################################################################
@@ -18,13 +24,20 @@ option(ASAN "set compile options for Address Sanitizer" OFF)
################################################################################
message(STATUS "======> Find Dependencies")
-find_package(Eigen3 REQUIRED)
-if(Eigen3_FOUND)
- message(STATUS "Eigen3 found")
- include_directories(${EIGEN3_INCLUDE_DIR})
- include_directories(${EIGEN3_INCLUDE_DIR}/..)
+
+set(EIGEN_LOCAL_PATH "${PROJECT_SOURCE_DIR}/grbda-dependencies/eigen-3.4.0")
+if(EXISTS "${EIGEN_LOCAL_PATH}/Eigen/StdVector")
+ message(STATUS "Using local Eigen at ${EIGEN_LOCAL_PATH}")
+ include_directories(${EIGEN_LOCAL_PATH})
else()
- message(FATAL_ERROR "Eigen3 not found. Please install it or specify its location.")
+ find_package(Eigen3 REQUIRED)
+ if(Eigen3_FOUND)
+ message(STATUS "Eigen3 found via find_package")
+ include_directories(${EIGEN3_INCLUDE_DIR})
+ include_directories(${EIGEN3_INCLUDE_DIR}/..)
+ else()
+ message(FATAL_ERROR "Eigen3 not found. Please install it or specify its location.")
+ endif()
endif()
find_package(casadi REQUIRED)
@@ -72,7 +85,11 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}/cmake")
set(CMAKE_PREFIX_PATH "${CMAKE_PREFIX_PATH};${PROJECT_SOURCE_DIR}/cmake")
-set(CMAKE_BUILD_TYPE Release)
+# Set default build type if not specified
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
add_definitions(-DDEBUG_MODE)
endif()
@@ -87,14 +104,23 @@ elseif(MARCH_NATIVE)
endif()
set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -ggdb -Wall \
+
+# Base warning flags for all builds
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -Wall \
-Wextra -Wcast-align -Wdisabled-optimization -Wformat=2 \
-Winit-self -Wmissing-include-dirs -Woverloaded-virtual \
-Wshadow -Wsign-promo -Wno-sign-compare -Wno-unused-const-variable \
-Wno-unused-parameter -Wno-unused-variable -Wno-uninitialized")
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -ggdb -std=gnu99 -I.")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ggdb -std=gnu99 -I.")
+
+# Optimization flags per build type (don't override Debug's -O0!)
+if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3")
+endif()
if(ASAN)
+ message(WARNING "ASAN option is deprecated. Use -DCMAKE_BUILD_TYPE=Debug instead.")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
endif()
@@ -171,3 +197,6 @@ if(BUILD_BENCHMARKS)
message(STATUS "======> Setup Benchmarks ")
add_subdirectory(Benchmarking)
endif()
+
+message(STATUS "======> Setup RAL 2026 Benchmarks ")
+add_subdirectory(RAL_2026_Benchmarking)
\ No newline at end of file
diff --git a/RAL_2026_Benchmarking/CMakeLists.txt b/RAL_2026_Benchmarking/CMakeLists.txt
new file mode 100644
index 00000000..4b9fc5c2
--- /dev/null
+++ b/RAL_2026_Benchmarking/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB BENCH_SRCS "*.cpp")
+
+foreach(bench_source ${BENCH_SRCS})
+ get_filename_component(bench_name ${bench_source} NAME_WE)
+ add_executable(${bench_name} ${bench_source})
+ target_link_libraries(${bench_name} PRIVATE ${PROJECT_NAME})
+endforeach()
diff --git a/RAL_2026_Benchmarking/README.md b/RAL_2026_Benchmarking/README.md
new file mode 100644
index 00000000..f4036fd7
--- /dev/null
+++ b/RAL_2026_Benchmarking/README.md
@@ -0,0 +1,95 @@
+# Inverse Dynamics Derivatives Benchmarks
+
+Performance benchmarks for `firstOrderInverseDynamicsDerivatives` and related algorithms in GRBDA. Each executable is self-contained with its own `main()`, links only against the `grbda` library, and is built when `BUILD_BENCHMARKS=ON`.
+
+---
+
+## Benchmarks
+
+### `benchmarkIDDerivatives`
+
+Measures the average execution time of `firstOrderInverseDynamicsDerivatives` across a representative set of robots. Runs 1000 timed iterations after a warmup call (which also triggers CasADi JIT compilation for configuration-dependent joints). Reports average time in microseconds and exports results to `benchmark_figures/data/robot_performance.csv`.
+
+**Robots covered:** MiniCheetah (with/without rotors), MIT Humanoid (with/without rotors), Tello factorial design (`-R/-M`, `+R/-M`, `+R/+M`), TelloWithArms, KUKA LWR 4+, Cassie.
+
+---
+
+### `benchmarkIDDerivativesComparison`
+
+Compares `firstOrderInverseDynamicsDerivatives` (standard, body-frame) against `firstOrderInverseDynamicsDerivativesWorldFrame` side-by-side. For each model, both variants are timed at 5000 iterations and a correctness check is performed (relative error threshold 1e-6 for most robots, 0.1 for floating-base robots with implicit constraints). Reports speedup as `standard_time / world_frame_time`.
+
+**Models covered:** RevoluteChain (1-body clusters, N=4–20), RevolutePairChain (2-body clusters, N=2–8), RevoluteTripleChain (3-body clusters, N=3–9), Tello variants, TeleopArm, MiniCheetah, MIT Humanoid, and several URDF-parsed robots (JVRC1, Kuka LWR 4+).
+
+---
+
+### `benchmarkIDDerivativesScaling`
+
+Measures how computation time and numerical accuracy scale with the number of links, for serial and binary-tree topologies. Validates analytical derivatives against central-difference finite differences (h=1e-7), using the constraint Jacobian G to project perturbations onto the constraint manifold for implicit joints. Exports results to CSV.
+
+**Topologies:** Serial chains (RevoluteChainWithRotor, RevolutePairChainWithRotor, RevoluteTripleChainWithRotor) and binary-branching trees, swept across increasing link counts. Also tests specific robots (Tello, MiniCheetah, MIT Humanoid).
+
+---
+
+### `benchmarkIDDerivativesBreakdown`
+
+Fine-grained profiling that breaks total cost into six buckets:
+
+| Bucket | What it measures |
+|---|---|
+| `fwd_kin` | `forwardAccelerationKinematics` |
+| `fwd_casadi` | `evalSTimesVec_dq` (×2) + `getSdotqd_q` per cluster (config-dependent S joints only) |
+| `fwd_other` | Remaining forward pass: Ψ̇, Ψ̈, Υ̇, M_cup, B_cup, F |
+| `bwd_casadi` | `evalSTTimesVec_dq` on the diagonal (config-dependent S joints only) |
+| `bwd_other` | t1–t4 setup + walk-to-root block fills |
+| `bwd_prop` | `accumulateBlockDiagonalPair` + `inverseTransformForceVector` to propagate to parent cluster |
+
+Calls `enableIDDerivativesProfiling()` / `getIDDerivativesProfilingData()` / `resetIDDerivativesProfiling()` from the grbda profiling API. Exports per-robot breakdown to `Benchmarking/data/fig4_performance_breakdown_current.csv` (used for figure generation).
+
+**Robots covered:** KUKA LWR 4+, MiniCheetah (±R), MIT Humanoid (±R), Tello (-R/-M, +R/-M, +R/+M), TelloWithArms, Cassie.
+
+---
+
+### `benchmarkComplexJointChains`
+
+Investigates how per-cluster joint complexity interacts with the number of clusters. Compares RevoluteChainWithRotor (1 DOF/cluster) against RevolutePairChainWithRotor (2 DOF/cluster) at equal DOF counts (6 and 12 DOF). Reports per-cluster cost and fits an empirical time-complexity exponent O(n^α) from the smallest-to-largest chain ratio.
+
+Also validates analytical ID derivatives against central-difference finite differences, with constraint-manifold-aware perturbations for implicit joints.
+
+---
+
+### `benchmarkCRBAComparison`
+
+Parallel to `benchmarkIDDerivativesComparison` but for the Composite Rigid-Body Algorithm (CRBA). Compares `runStandardCRBA` against `runWorldFrameCRBA` at 10000 iterations. Verifies that both produce the same mass matrix H (relative error threshold 1e-10). Reports DOF, body count, times, and speedup.
+
+**Models covered:** Same sweep as `benchmarkIDDerivativesComparison` — RevoluteChain, RevolutePairChain, RevoluteTripleChain, Tello variants, TeleopArm, MiniCheetah, MIT Humanoid, JVRC1, Kuka LWR 4+.
+
+---
+
+### `benchmarkParallelChainDepth`
+
+Measures how ID derivative cost varies with loop size in A-shaped parallel chain topologies. Loads pre-built URDF files from `Benchmarking/urdfs/parallel_chains/Implicit/` (depths 5, 10, 20, 40; loop sizes determined by connection depth via `loop_size = 2 * connection_depth + 1`). Each configuration is compared against a no-loop baseline. Uses a fixed random seed (42) for reproducibility.
+
+---
+
+## Build
+
+- Create a build directory: `mkdir build && cd build`
+- Run CMake: `cmake ..`
+- Build: `make`
+- Each benchmark can be run individually with `./bin/`, e.g. `./bin/benchmarkIDDerivatives`
+
+## Output
+
+All benchmarks print a formatted table to stdout. Four of them additionally write CSV files:
+
+- `benchmarkIDDerivatives` → `benchmark_figures/data/robot_performance.csv`
+ Columns: `robot_name, label, dof, time_us`
+
+- `benchmarkIDDerivativesScaling` → `benchmark_figures/data/serial_chain_scaling.csv`, `binary_tree_scaling.csv`, `complex_joint_scaling.csv`
+ Columns: `topology, joint_type, num_links, dof, time_us, max_err_dq, max_err_dqd`
+
+- `benchmarkIDDerivativesBreakdown` → `Benchmarking/data/fig4_performance_breakdown_current.csv`
+ Columns: `robot_name, label, dof, bodies, fwd_kin_us, fwd_casadi_us, fwd_other_us, bwd_casadi_us, bwd_other_us, bwd_prop_us, total_us`
+
+- `benchmarkParallelChainDepth` → `Benchmarking/data/parallel_chain_depth.csv` and `loop_depth_sweep.csv`
+ Columns: `chain_depth, loop_size, connection_depth, dof, num_bodies, is_baseline, ...` (per-pass timing and profiling breakdown)
diff --git a/RAL_2026_Benchmarking/benchmarkCRBAComparison.cpp b/RAL_2026_Benchmarking/benchmarkCRBAComparison.cpp
new file mode 100644
index 00000000..87a7bd2f
--- /dev/null
+++ b/RAL_2026_Benchmarking/benchmarkCRBAComparison.cpp
@@ -0,0 +1,321 @@
+#include
+#include
+#include
+#include
+#include
+#include "grbda/Dynamics/ClusterTreeModel.h"
+#include "grbda/Robots/RobotTypes.h"
+#include "config.h"
+
+using namespace grbda;
+
+struct BenchmarkResult {
+ std::string name;
+ int dof;
+ int num_bodies;
+ double standard_crba_us;
+ double world_frame_crba_us;
+ double speedup;
+ int iterations;
+};
+
+template
+BenchmarkResult benchmarkModel(ModelType& model, const std::string& name, int iterations = 10000) {
+ const int nDOF = model.getNumDegreesOfFreedom();
+ const int nBodies = model.getNumBodies();
+
+ // Set random state
+ ModelState model_state;
+ for (const auto& cluster : model.clusters()) {
+ model_state.push_back(cluster->joint_->randomJointState());
+ }
+ model.setState(model_state);
+
+ // Warmup
+ for (int i = 0; i < 100; ++i) {
+ model.runStandardCRBA();
+ model.runWorldFrameCRBA();
+ }
+
+ // Benchmark standard CRBA
+ auto start_std = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ model.runStandardCRBA();
+ }
+ auto end_std = std::chrono::high_resolution_clock::now();
+ double std_time_us = std::chrono::duration(end_std - start_std).count() / iterations;
+
+ DMat H_standard = model.getH();
+
+ // Benchmark world-frame CRBA
+ auto start_wf = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ model.runWorldFrameCRBA();
+ }
+ auto end_wf = std::chrono::high_resolution_clock::now();
+ double wf_time_us = std::chrono::duration(end_wf - start_wf).count() / iterations;
+
+ DMat H_world = model.getH();
+
+ // Verify correctness
+ double error = (H_standard - H_world).norm() / H_standard.norm();
+ if (error > 1e-10) {
+ std::cout << "\n WARNING: " << name << " H matrix mismatch! Relative error: " << error << "\n";
+ }
+
+ return {name, nDOF, nBodies, std_time_us, wf_time_us, std_time_us / wf_time_us, iterations};
+}
+
+BenchmarkResult benchmarkURDF(const std::string& urdf_path, const std::string& name, int iterations = 10000) {
+ ClusterTreeModel model;
+ model.buildModelFromURDF(urdf_path);
+ return benchmarkModel(model, name, iterations);
+}
+
+template
+BenchmarkResult benchmarkRevoluteChain(int iterations = 10000) {
+ RevoluteChainWithRotor robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ std::string name = "RevoluteChain<" + std::to_string(N) + ">";
+ return benchmarkModel(model, name, iterations);
+}
+
+template
+BenchmarkResult benchmarkRevolutePairChain(int iterations = 10000) {
+ RevolutePairChainWithRotor robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ std::string name = "RevolutePairChain<" + std::to_string(N) + ">";
+ return benchmarkModel(model, name, iterations);
+}
+
+template
+BenchmarkResult benchmarkRevoluteTripleChain(int iterations = 10000) {
+ RevoluteTripleChainWithRotor robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ std::string name = "RevoluteTripleChain<" + std::to_string(N) + ">";
+ return benchmarkModel(model, name, iterations);
+}
+
+template
+BenchmarkResult benchmarkRobot(const std::string& name, int iterations = 10000) {
+ RobotType robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ return benchmarkModel(model, name, iterations);
+}
+
+// Version that tries multiple times to set state (for robots with implicit constraints)
+template
+BenchmarkResult benchmarkRobotWithRetry(const std::string& name, int iterations = 10000, int max_retries = 100) {
+ RobotType robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+
+ const int nDOF = model.getNumDegreesOfFreedom();
+ const int nBodies = model.getNumBodies();
+
+ // Try to set a valid random state
+ bool state_set = false;
+ for (int retry = 0; retry < max_retries && !state_set; ++retry) {
+ try {
+ ModelState model_state;
+ for (const auto& cluster : model.clusters()) {
+ model_state.push_back(cluster->joint_->randomJointState());
+ }
+ model.setState(model_state);
+ state_set = true;
+ } catch (const std::exception& e) {
+ // Try again with different random state
+ }
+ }
+
+ if (!state_set) {
+ std::cout << "\n ERROR: Could not set valid state for " << name << " after " << max_retries << " attempts\n";
+ return {name, nDOF, nBodies, -1.0, -1.0, 0.0, 0};
+ }
+
+ // Warmup
+ for (int i = 0; i < 100; ++i) {
+ model.runStandardCRBA();
+ model.runWorldFrameCRBA();
+ }
+
+ // Benchmark standard CRBA
+ auto start_std = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ model.runStandardCRBA();
+ }
+ auto end_std = std::chrono::high_resolution_clock::now();
+ double std_time_us = std::chrono::duration(end_std - start_std).count() / iterations;
+
+ DMat H_standard = model.getH();
+
+ // Benchmark world-frame CRBA
+ auto start_wf = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ model.runWorldFrameCRBA();
+ }
+ auto end_wf = std::chrono::high_resolution_clock::now();
+ double wf_time_us = std::chrono::duration(end_wf - start_wf).count() / iterations;
+
+ DMat H_world = model.getH();
+
+ // Verify correctness
+ double error = (H_standard - H_world).norm() / H_standard.norm();
+ if (error > 1e-10) {
+ std::cout << "\n WARNING: " << name << " H matrix mismatch! Relative error: " << error << "\n";
+ }
+
+ return {name, nDOF, nBodies, std_time_us, wf_time_us, std_time_us / wf_time_us, iterations};
+}
+
+void printResults(const std::vector& results) {
+ std::cout << "\n";
+ std::cout << "============================================================================\n";
+ std::cout << " CRBA Benchmark Results\n";
+ std::cout << "============================================================================\n";
+ std::cout << std::left << std::setw(30) << "Model"
+ << std::right << std::setw(6) << "DOF"
+ << std::setw(8) << "Bodies"
+ << std::setw(14) << "Std (us)"
+ << std::setw(14) << "World (us)"
+ << std::setw(10) << "Speedup"
+ << "\n";
+ std::cout << "----------------------------------------------------------------------------\n";
+
+ for (const auto& r : results) {
+ std::cout << std::left << std::setw(30) << r.name
+ << std::right << std::setw(6) << r.dof
+ << std::setw(8) << r.num_bodies
+ << std::setw(14) << std::fixed << std::setprecision(2) << r.standard_crba_us
+ << std::setw(14) << std::fixed << std::setprecision(2) << r.world_frame_crba_us
+ << std::setw(10) << std::fixed << std::setprecision(3) << r.speedup
+ << "\n";
+ }
+ std::cout << "============================================================================\n";
+ std::cout << "Speedup > 1.0 means standard is faster than world-frame\n";
+ std::cout << "Speedup < 1.0 means world-frame is faster than standard\n\n";
+}
+
+int main() {
+ std::vector results;
+ const int ITERATIONS = 10000;
+ const std::string urdf_path = std::string(SOURCE_DIRECTORY) + "/robot-models";
+
+ std::cout << "\n=== CRBA Comparison Benchmark ===\n";
+ std::cout << "Comparing Standard CRBA vs World-Frame CRBA\n";
+ std::cout << "Iterations per test: " << ITERATIONS << "\n\n";
+
+ // Serial chains of different lengths
+ std::cout << "Benchmarking serial chains (single-body clusters)...\n";
+
+ std::cout << " RevoluteChain<4>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<4>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteChain<8>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<8>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteChain<12>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<12>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteChain<16>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<16>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteChain<20>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<20>(ITERATIONS));
+ std::cout << " done\n";
+
+ // RevolutePair chains (2-body clusters)
+ std::cout << "\nBenchmarking RevolutePair chains (2-body clusters)...\n";
+
+ std::cout << " RevolutePairChain<2>..." << std::flush;
+ results.push_back(benchmarkRevolutePairChain<2>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevolutePairChain<4>..." << std::flush;
+ results.push_back(benchmarkRevolutePairChain<4>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevolutePairChain<6>..." << std::flush;
+ results.push_back(benchmarkRevolutePairChain<6>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevolutePairChain<8>..." << std::flush;
+ results.push_back(benchmarkRevolutePairChain<8>(ITERATIONS));
+ std::cout << " done\n";
+
+ // RevoluteTriple chains (3-body clusters) - N must be divisible by 3
+ std::cout << "\nBenchmarking RevoluteTriple chains (3-body clusters)...\n";
+
+ std::cout << " RevoluteTripleChain<3>..." << std::flush;
+ results.push_back(benchmarkRevoluteTripleChain<3>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteTripleChain<6>..." << std::flush;
+ results.push_back(benchmarkRevoluteTripleChain<6>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteTripleChain<9>..." << std::flush;
+ results.push_back(benchmarkRevoluteTripleChain<9>(ITERATIONS));
+ std::cout << " done\n";
+
+ // Tello robot variations
+ std::cout << "\nBenchmarking Tello robot variations...\n";
+
+ std::cout << " TelloRotorsNoConstraints..." << std::flush;
+ results.push_back(benchmarkRobot>("TelloRotorsNoConstraints", ITERATIONS));
+ std::cout << " done\n";
+
+ // Tello with loop constraints (need retry logic)
+ std::cout << " Tello (with constraints)..." << std::flush;
+ results.push_back(benchmarkRobotWithRetry>("Tello", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " TelloWithArms..." << std::flush;
+ results.push_back(benchmarkRobotWithRetry>("TelloWithArms", ITERATIONS));
+ std::cout << " done\n";
+
+ // Other built-in robots
+ std::cout << "\nBenchmarking other built-in robots...\n";
+
+ std::cout << " TeleopArm..." << std::flush;
+ results.push_back(benchmarkRobot("TeleopArm", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " MiniCheetah (with rotors)..." << std::flush;
+ results.push_back(benchmarkRobot>("MiniCheetah (rotors)", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " MIT_Humanoid (with rotors)..." << std::flush;
+ results.push_back(benchmarkRobot>("MIT_Humanoid (rotors)", ITERATIONS));
+ std::cout << " done\n";
+
+ // URDF-based robots
+ std::cout << "\nBenchmarking URDF robots...\n";
+
+ std::cout << " mini_cheetah (no rotors)..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/mini_cheetah.urdf",
+ "MiniCheetah (URDF)", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " MIT Humanoid (no rotors)..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/mit_humanoid.urdf",
+ "MIT_Humanoid (URDF)", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " JVRC1 Humanoid..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/jvrc1_humanoid.urdf",
+ "JVRC1 Humanoid (URDF)", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " Kuka LWR 4+..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/kuka_lwr_4plus.urdf",
+ "Kuka LWR 4+ (URDF)", ITERATIONS));
+ std::cout << " done\n";
+
+ printResults(results);
+
+ return 0;
+}
diff --git a/RAL_2026_Benchmarking/benchmarkComplexJointChains.cpp b/RAL_2026_Benchmarking/benchmarkComplexJointChains.cpp
new file mode 100644
index 00000000..f458c3e9
--- /dev/null
+++ b/RAL_2026_Benchmarking/benchmarkComplexJointChains.cpp
@@ -0,0 +1,409 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include "grbda/Dynamics/ClusterTreeModel.h"
+#include "grbda/Robots/RobotTypes.h"
+#include "config.h"
+
+using namespace grbda;
+
+// ============================================================================
+// Complex Joint Chain Scaling Benchmark
+// ============================================================================
+// This benchmark compares scaling behavior across different joint types:
+// 1. RevoluteChainWithRotor - simple revolute joints with rotors (baseline)
+// 2. RevolutePairChainWithRotor - coupled pairs of revolute joints
+//
+// For each joint type, we test chains of increasing length to understand
+// how the joint complexity affects computational scaling.
+// ============================================================================
+
+struct ChainResult {
+ std::string joint_type;
+ int num_links;
+ int num_clusters;
+ int dof;
+ double avg_time_us;
+ double max_error_dq;
+ double max_error_dqdot;
+};
+
+// Helper to compute finite difference derivatives for validation
+template
+// For implicit constraints, we perturb in independent coordinate space and use G to map
+// to spanning coordinates. This ensures the perturbation stays on the constraint manifold.
+std::pair, DMat> computeFiniteDifferenceDerivatives(
+ ClusterTreeModel& model,
+ const DVec& q,
+ const DVec& qd,
+ const DVec& ydd,
+ double h = 1e-7)
+{
+ const int nDOF = model.getNumDegreesOfFreedom();
+ DMat dtau_dq_numerical(nDOF, nDOF);
+ DMat dtau_dqdot_numerical(nDOF, nDOF);
+
+ // Numerical dtau/dq
+ for (int j = 0; j < nDOF; ++j) {
+ DVec dy_plus = DVec::Zero(nDOF);
+ dy_plus(j) = h;
+ DVec dy_minus = DVec::Zero(nDOF);
+ dy_minus(j) = -h;
+
+ ModelState state_plus;
+ int pos_idx = 0, vel_idx = 0;
+ for (const auto& cluster : model.clusters()) {
+ const int nv_cluster = cluster->num_velocities_;
+ const int np_cluster = cluster->num_positions_;
+ DVec dy_cluster = dy_plus.segment(vel_idx, nv_cluster);
+ if (cluster->joint_->isImplicit()) {
+ const DMat& G = cluster->joint_->G();
+ DVec q_pert = q.segment(pos_idx, np_cluster) + G * dy_cluster;
+ state_plus.push_back(JointState(
+ JointCoordinate(q_pert, true),
+ JointCoordinate(qd.segment(vel_idx, nv_cluster), false)));
+ } else {
+ state_plus.push_back(JointState(
+ JointCoordinate(q.segment(pos_idx, np_cluster) + dy_cluster, false),
+ JointCoordinate(qd.segment(vel_idx, nv_cluster), false)));
+ }
+ pos_idx += np_cluster;
+ vel_idx += nv_cluster;
+ }
+ model.setState(state_plus);
+ DVec tau_plus = model.inverseDynamics(ydd);
+
+ ModelState state_minus;
+ pos_idx = 0; vel_idx = 0;
+ for (const auto& cluster : model.clusters()) {
+ const int nv_cluster = cluster->num_velocities_;
+ const int np_cluster = cluster->num_positions_;
+ DVec dy_cluster = dy_minus.segment(vel_idx, nv_cluster);
+ if (cluster->joint_->isImplicit()) {
+ const DMat& G = cluster->joint_->G();
+ DVec q_pert = q.segment(pos_idx, np_cluster) + G * dy_cluster;
+ state_minus.push_back(JointState(
+ JointCoordinate(q_pert, true),
+ JointCoordinate(qd.segment(vel_idx, nv_cluster), false)));
+ } else {
+ state_minus.push_back(JointState(
+ JointCoordinate(q.segment(pos_idx, np_cluster) + dy_cluster, false),
+ JointCoordinate(qd.segment(vel_idx, nv_cluster), false)));
+ }
+ pos_idx += np_cluster;
+ vel_idx += nv_cluster;
+ }
+ model.setState(state_minus);
+ DVec tau_minus = model.inverseDynamics(ydd);
+
+ dtau_dq_numerical.col(j) = (tau_plus - tau_minus) / (2.0 * h);
+ }
+
+ // Numerical dtau/dqdot
+ for (int j = 0; j < nDOF; ++j) {
+ DVec qd_plus = qd;
+ qd_plus(j) += h;
+ DVec qd_minus = qd;
+ qd_minus(j) -= h;
+
+ ModelState state_plus;
+ int pos_idx = 0, vel_idx = 0;
+ for (const auto& cluster : model.clusters()) {
+ const bool pos_is_spanning = cluster->joint_->isImplicit();
+ state_plus.push_back(JointState(
+ JointCoordinate(q.segment(pos_idx, cluster->num_positions_), pos_is_spanning),
+ JointCoordinate(qd_plus.segment(vel_idx, cluster->num_velocities_), false)));
+ pos_idx += cluster->num_positions_;
+ vel_idx += cluster->num_velocities_;
+ }
+ model.setState(state_plus);
+ DVec tau_plus = model.inverseDynamics(ydd);
+
+ ModelState state_minus;
+ pos_idx = 0; vel_idx = 0;
+ for (const auto& cluster : model.clusters()) {
+ const bool pos_is_spanning = cluster->joint_->isImplicit();
+ state_minus.push_back(JointState(
+ JointCoordinate(q.segment(pos_idx, cluster->num_positions_), pos_is_spanning),
+ JointCoordinate(qd_minus.segment(vel_idx, cluster->num_velocities_), false)));
+ pos_idx += cluster->num_positions_;
+ vel_idx += cluster->num_velocities_;
+ }
+ model.setState(state_minus);
+ DVec tau_minus = model.inverseDynamics(ydd);
+
+ dtau_dqdot_numerical.col(j) = (tau_plus - tau_minus) / (2.0 * h);
+ }
+
+ return {dtau_dq_numerical, dtau_dqdot_numerical};
+}
+
+template
+ChainResult testChain(const std::string& joint_type, int num_links) {
+ try {
+ ChainType chain;
+ ClusterTreeModel model = chain.buildClusterTreeModel();
+ int nDOF = model.getNumDegreesOfFreedom();
+ int num_clusters = static_cast(model.clusters().size());
+
+ if (nDOF == 0) {
+ throw std::runtime_error("Model has zero DOF");
+ }
+
+ // Set random state
+ ModelState model_state;
+ for (const auto& cluster : model.clusters()) {
+ model_state.push_back(cluster->joint_->randomJointState());
+ }
+ model.setState(model_state);
+ auto [q, qd] = model.getState();
+ DVec ydd = DVec::Random(nDOF);
+
+ // Single warmup call to trigger CasADi JIT compilation for any config-dependent S joints
+ {
+ auto [dtau_dq, dtau_dqdot] =
+ model.firstOrderInverseDynamicsDerivatives(ydd);
+ (void)dtau_dq;
+ (void)dtau_dqdot;
+ }
+
+ // Timed runs
+ const int num_iterations = 1000;
+ auto start = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < num_iterations; ++i) {
+ auto [dtau_dq, dtau_dqdot] =
+ model.firstOrderInverseDynamicsDerivatives(ydd);
+ (void)dtau_dq;
+ (void)dtau_dqdot;
+ }
+ auto end = std::chrono::high_resolution_clock::now();
+
+ double total_time_us = std::chrono::duration(end - start).count();
+ double avg_time_us = total_time_us / num_iterations;
+
+ // Compute analytical derivatives for error checking
+ auto [dtau_dq_analytical, dtau_dqdot_analytical] =
+ model.firstOrderInverseDynamicsDerivatives(ydd);
+
+ // Compute numerical derivatives
+ auto [dtau_dq_numerical, dtau_dqdot_numerical] =
+ computeFiniteDifferenceDerivatives(model, q, qd, ydd);
+
+ // Compute errors
+ DMat error_dq = (dtau_dq_analytical - dtau_dq_numerical).cwiseAbs();
+ DMat error_dqdot = (dtau_dqdot_analytical - dtau_dqdot_numerical).cwiseAbs();
+
+ return {joint_type, num_links, num_clusters, nDOF, avg_time_us,
+ error_dq.maxCoeff(), error_dqdot.maxCoeff()};
+
+ } catch (const std::exception& e) {
+ std::cerr << "Error testing " << joint_type << " with " << num_links << " links: " << e.what() << "\n";
+ return {joint_type + " (ERROR)", num_links, 0, -1, 0, 0, 0};
+ }
+}
+
+void printHeader() {
+ std::cout << std::left << std::setw(20) << "Joint Type"
+ << std::setw(8) << "Links"
+ << std::setw(10) << "Clusters"
+ << std::setw(6) << "DOF"
+ << std::setw(14) << "Time (us)"
+ << std::setw(14) << "Max Err dq"
+ << std::setw(14) << "Max Err dqd"
+ << "\n";
+ std::cout << std::string(86, '-') << "\n";
+}
+
+void printResult(const ChainResult& r) {
+ if (r.dof > 0) {
+ std::cout << std::left << std::setw(20) << r.joint_type
+ << std::setw(8) << r.num_links
+ << std::setw(10) << r.num_clusters
+ << std::setw(6) << r.dof
+ << std::setw(14) << std::fixed << std::setprecision(2) << r.avg_time_us
+ << std::setw(14) << std::scientific << std::setprecision(2) << r.max_error_dq
+ << std::setw(14) << r.max_error_dqdot
+ << "\n";
+ } else {
+ std::cout << std::left << std::setw(20) << r.joint_type
+ << std::setw(8) << r.num_links
+ << std::setw(10) << "N/A"
+ << std::setw(6) << "N/A"
+ << std::setw(14) << "FAILED"
+ << std::setw(14) << ""
+ << std::setw(14) << ""
+ << "\n";
+ }
+}
+
+void analyzeScaling(const std::vector& results, const std::string& name) {
+ // Filter out failed results
+ std::vector valid_results;
+ for (const auto& r : results) {
+ if (r.dof > 0) valid_results.push_back(r);
+ }
+
+ if (valid_results.size() < 2) return;
+
+ double log_ratio_time = std::log(valid_results.back().avg_time_us / valid_results[0].avg_time_us) /
+ std::log(static_cast(valid_results.back().num_links) / valid_results[0].num_links);
+
+ std::cout << name << " Scaling:\n";
+ std::cout << " Time complexity: O(n^" << std::fixed << std::setprecision(2) << log_ratio_time << ")\n";
+ std::cout << " Time per DOF (smallest): " << std::fixed << std::setprecision(2)
+ << valid_results[0].avg_time_us / valid_results[0].dof << " us/DOF\n";
+ std::cout << " Time per DOF (largest): " << std::fixed << std::setprecision(2)
+ << valid_results.back().avg_time_us / valid_results.back().dof << " us/DOF\n\n";
+}
+
+int main() {
+ std::cout << "\n===========================================================================\n";
+ std::cout << "Complex Joint Chain Scaling Benchmark\n";
+ std::cout << "===========================================================================\n\n";
+
+ // =========================================================================
+ // Test 1: RevoluteChainWithRotor (Baseline)
+ // =========================================================================
+ std::cout << "Test 1: RevoluteChainWithRotor - Baseline (1 DOF per cluster)\n";
+ printHeader();
+
+ std::vector simple_results;
+
+ auto s2 = testChain>("RevWithRotor", 2);
+ simple_results.push_back(s2);
+ printResult(s2);
+
+ auto s4 = testChain>("RevWithRotor", 4);
+ simple_results.push_back(s4);
+ printResult(s4);
+
+ auto s6 = testChain>("RevWithRotor", 6);
+ simple_results.push_back(s6);
+ printResult(s6);
+
+ auto s8 = testChain>("RevWithRotor", 8);
+ simple_results.push_back(s8);
+ printResult(s8);
+
+ auto s10 = testChain>("RevWithRotor", 10);
+ simple_results.push_back(s10);
+ printResult(s10);
+
+ auto s12 = testChain>("RevWithRotor", 12);
+ simple_results.push_back(s12);
+ printResult(s12);
+
+ std::cout << std::string(86, '-') << "\n\n";
+ analyzeScaling(simple_results, "RevoluteChainWithRotor");
+
+ // =========================================================================
+ // Test 2: RevolutePairChainWithRotor (2 DOF per cluster)
+ // =========================================================================
+ std::cout << "Test 2: RevolutePairChainWithRotor - Coupled Pairs (2 DOF per cluster)\n";
+ printHeader();
+
+ std::vector pair_results;
+
+ auto p2 = testChain>("RevPairWithRotor", 2);
+ pair_results.push_back(p2);
+ printResult(p2);
+
+ auto p4 = testChain>("RevPairWithRotor", 4);
+ pair_results.push_back(p4);
+ printResult(p4);
+
+ auto p6 = testChain>("RevPairWithRotor", 6);
+ pair_results.push_back(p6);
+ printResult(p6);
+
+ auto p8 = testChain>("RevPairWithRotor", 8);
+ pair_results.push_back(p8);
+ printResult(p8);
+
+ auto p10 = testChain>("RevPairWithRotor", 10);
+ pair_results.push_back(p10);
+ printResult(p10);
+
+ auto p12 = testChain>("RevPairWithRotor", 12);
+ pair_results.push_back(p12);
+ printResult(p12);
+
+ std::cout << std::string(86, '-') << "\n\n";
+ analyzeScaling(pair_results, "RevolutePairChainWithRotor");
+
+ // =========================================================================
+ // Comparison at Same DOF
+ // =========================================================================
+ std::cout << "===========================================================================\n";
+ std::cout << "Comparison: Performance at Same DOF Count\n";
+ std::cout << "===========================================================================\n\n";
+
+ // 6 DOF comparison
+ std::cout << "6 DOF Systems:\n";
+ std::cout << " RevWithRotor (6 links, 6 clusters): " << std::fixed << std::setprecision(2)
+ << s6.avg_time_us << " us";
+ if (s6.dof > 0 && p6.dof > 0) {
+ std::cout << " (baseline)\n";
+ } else {
+ std::cout << "\n";
+ }
+
+ std::cout << " RevPairWithRotor (6 links, 3 clusters): " << std::fixed << std::setprecision(2)
+ << p6.avg_time_us << " us";
+ if (s6.dof > 0 && p6.dof > 0 && s6.avg_time_us > 0) {
+ double ratio = p6.avg_time_us / s6.avg_time_us;
+ std::cout << " (" << std::fixed << std::setprecision(1) << ratio << "x baseline)\n";
+ } else {
+ std::cout << "\n";
+ }
+
+ // 12 DOF comparison
+ std::cout << "\n12 DOF Systems:\n";
+ std::cout << " RevWithRotor (12 links, 12 clusters): " << std::fixed << std::setprecision(2)
+ << s12.avg_time_us << " us";
+ if (s12.dof > 0) {
+ std::cout << " (baseline)\n";
+ } else {
+ std::cout << "\n";
+ }
+
+ std::cout << " RevPairWithRotor (12 links, 6 clusters): " << std::fixed << std::setprecision(2)
+ << p12.avg_time_us << " us";
+ if (s12.dof > 0 && p12.dof > 0 && s12.avg_time_us > 0) {
+ double ratio = p12.avg_time_us / s12.avg_time_us;
+ std::cout << " (" << std::fixed << std::setprecision(1) << ratio << "x baseline)\n";
+ } else {
+ std::cout << "\n";
+ }
+
+ // =========================================================================
+ // Cluster Overhead Analysis
+ // =========================================================================
+ std::cout << "\n===========================================================================\n";
+ std::cout << "Analysis: Cluster Complexity vs Number of Clusters\n";
+ std::cout << "===========================================================================\n\n";
+
+ std::cout << "Time per Cluster (6 DOF systems):\n";
+ if (s6.num_clusters > 0)
+ std::cout << " RevWithRotor: " << std::fixed << std::setprecision(2)
+ << s6.avg_time_us / s6.num_clusters << " us/cluster\n";
+ if (p6.num_clusters > 0)
+ std::cout << " RevPairWithRotor: " << std::fixed << std::setprecision(2)
+ << p6.avg_time_us / p6.num_clusters << " us/cluster\n";
+ std::cout << "\nObservations:\n";
+ std::cout << "1. RevolutePair mechanisms have higher per-cluster cost\n";
+ std::cout << " due to transmission modules and constraint Jacobian computations.\n";
+ std::cout << "2. Fewer clusters (more DOF per cluster) may reduce algorithm overhead\n";
+ std::cout << " but increases per-cluster complexity.\n";
+ std::cout << "3. The trade-off depends on the specific constraint structure and\n";
+ std::cout << " whether S_q caching is effective.\n\n";
+
+ std::cout << "===========================================================================\n";
+ std::cout << "Benchmark Complete\n";
+ std::cout << "===========================================================================\n";
+
+ return 0;
+}
diff --git a/RAL_2026_Benchmarking/benchmarkIDDerivatives.cpp b/RAL_2026_Benchmarking/benchmarkIDDerivatives.cpp
new file mode 100644
index 00000000..074461b2
--- /dev/null
+++ b/RAL_2026_Benchmarking/benchmarkIDDerivatives.cpp
@@ -0,0 +1,224 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include "grbda/Dynamics/ClusterTreeModel.h"
+#include "grbda/Robots/RobotTypes.h"
+#include "config.h"
+
+using namespace grbda;
+
+struct BenchmarkResult {
+ std::string name;
+ int dof;
+ double avg_time_us;
+ int iterations;
+};
+
+BenchmarkResult benchmarkModel(ClusterTreeModel& model, const std::string& name, int iterations) {
+ const int nDOF = model.getNumDegreesOfFreedom();
+
+ // Set random state
+ ModelState model_state;
+ for (const auto& cluster : model.clusters()) {
+ model_state.push_back(cluster->joint_->randomJointState());
+ }
+ model.setState(model_state);
+
+ DVec ydd = DVec::Random(nDOF);
+
+ // Single warmup call to trigger CasADi JIT compilation for any config-dependent S joints
+ {
+ auto [dtau_dq, dtau_dqdot] = model.firstOrderInverseDynamicsDerivatives(ydd);
+ (void)dtau_dq;
+ (void)dtau_dqdot;
+ }
+
+ // Timed iterations
+ auto start = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ auto [dtau_dq, dtau_dqdot] = model.firstOrderInverseDynamicsDerivatives(ydd);
+ (void)dtau_dq;
+ (void)dtau_dqdot;
+ }
+ auto end = std::chrono::high_resolution_clock::now();
+
+ double total_us = std::chrono::duration(end - start).count();
+
+ return {name, nDOF, total_us / iterations, iterations};
+}
+
+template
+BenchmarkResult benchmarkRobot(const std::string& name, int iterations = 1000) {
+ RobotType robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ return benchmarkModel(model, name, iterations);
+}
+
+BenchmarkResult benchmarkURDF(const std::string& urdf_path, const std::string& name, int iterations = 1000) {
+ ClusterTreeModel model;
+ model.buildModelFromURDF(urdf_path);
+ return benchmarkModel(model, name, iterations);
+}
+
+int main() {
+ std::vector results;
+ const int ITERATIONS = 1000;
+ const std::string urdf_path = std::string(SOURCE_DIRECTORY) + "/robot-models";
+
+ std::cout << "\nLoading and benchmarking robots...\n";
+ std::cout << "URDF path: " << urdf_path << "\n\n";
+
+ // Mini Cheetah with rotors
+ std::cout << " Benchmarking MiniCheetah (with rotors)..." << std::flush;
+ results.push_back(benchmarkRobot>(
+ "MiniCheetah (with rotors)", ITERATIONS));
+ std::cout << " done\n";
+
+ // Mini Cheetah without rotors (URDF)
+ std::cout << " Benchmarking MiniCheetah (no rotors)..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/mini_cheetah_approximate.urdf",
+ "MiniCheetah (no rotors)", ITERATIONS));
+ std::cout << " done\n";
+
+ // MIT Humanoid with rotors
+ std::cout << " Benchmarking MIT_Humanoid (with rotors)..." << std::flush;
+ results.push_back(benchmarkRobot>(
+ "MIT_Humanoid (with rotors)", ITERATIONS));
+ std::cout << " done\n";
+
+ // MIT Humanoid without rotors
+ std::cout << " Benchmarking MIT_Humanoid (no rotors)..." << std::flush;
+ results.push_back(benchmarkRobot>(
+ "MIT_Humanoid (no rotors)", ITERATIONS));
+ std::cout << " done\n";
+
+ // ========== Tello Factorial Design: Isolating Rotor Dynamics & Constraint Overhead ==========
+ // Factorial design for computation time analysis:
+ // - Factor 1: Rotors (real inertia vs. none)
+ // - Factor 2: Constraints (GenericImplicit/CasADi vs. linear vs. none)
+ // This decomposition enables isolation of computational costs.
+
+ // Baseline: no rotors, no constraints (plain tree structure)
+ std::cout << " Benchmarking Tello (-R,-M) [BASELINE]..." << std::flush;
+ results.push_back(benchmarkRobot>("Tello (-R,-M) [base]", ITERATIONS));
+ std::cout << " done\n";
+
+ // With rotors only (real inertia, no constraint coupling)
+ std::cout << " Benchmarking Tello (+R,-M) [rotor cost]..." << std::flush;
+ results.push_back(benchmarkRobot>("Tello (+R,-M) [rotors]", ITERATIONS));
+ std::cout << " done\n";
+
+ // Full model: rotors + CasADi constraints (realistic robot)
+ std::cout << " Benchmarking Tello (+R,+M) [FULL MODEL]..." << std::flush;
+ results.push_back(benchmarkRobot>("Tello (+R,+M) [full]", ITERATIONS));
+ std::cout << " done\n";
+
+ // Tello with Arms
+ std::cout << " Benchmarking TelloWithArms..." << std::flush;
+ results.push_back(benchmarkRobot>("TelloWithArms", ITERATIONS));
+ std::cout << " done\n";
+
+ // KUKA LWR 4+ (7-DOF serial chain)
+ std::cout << " Benchmarking KUKA LWR 4+..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/kuka_lwr_4plus.urdf",
+ "KUKA LWR 4+", ITERATIONS));
+ std::cout << " done\n";
+
+ // ========== Closed-Loop Humanoid Robots ==========
+
+ // Cassie (closed-loop leg)
+ std::cout << " Benchmarking Cassie (closed-loop)..." << std::flush;
+ results.push_back(benchmarkRobot>("Cassie (closed-loop)", ITERATIONS));
+ std::cout << " done\n";
+
+ // Print results table
+ std::cout << "\n" << std::string(75, '=') << "\n";
+ std::cout << "First Order ID Derivatives Benchmark Results\n";
+ std::cout << "Iterations per robot: " << ITERATIONS << "\n";
+ std::cout << std::string(75, '=') << "\n\n";
+
+ std::cout << std::left << std::setw(35) << "Robot"
+ << std::right << std::setw(8) << "DOF"
+ << std::setw(18) << "Avg Time (us)"
+ << std::setw(14) << "Iterations" << "\n";
+ std::cout << std::string(75, '-') << "\n";
+
+ for (const auto& r : results) {
+ std::cout << std::left << std::setw(35) << r.name
+ << std::right << std::setw(8) << r.dof
+ << std::setw(18) << std::fixed << std::setprecision(2) << r.avg_time_us
+ << std::setw(14) << r.iterations << "\n";
+ }
+
+ std::cout << std::string(75, '-') << "\n";
+
+ // Print comparison summary
+ std::cout << "\n" << std::string(75, '=') << "\n";
+ std::cout << "Speedup Summary (with vs without rotors/mechanisms)\n";
+ std::cout << std::string(75, '=') << "\n\n";
+
+ // MiniCheetah comparison
+ if (results.size() >= 2) {
+ double speedup = results[0].avg_time_us / results[1].avg_time_us;
+ std::cout << "MiniCheetah: " << std::fixed << std::setprecision(2)
+ << results[0].avg_time_us << " us (rotors) vs "
+ << results[1].avg_time_us << " us (no rotors) -> "
+ << speedup << "x\n";
+ }
+
+ // MIT Humanoid comparison
+ if (results.size() >= 4) {
+ double speedup = results[2].avg_time_us / results[3].avg_time_us;
+ std::cout << "MIT_Humanoid: " << std::fixed << std::setprecision(2)
+ << results[2].avg_time_us << " us (rotors) vs "
+ << results[3].avg_time_us << " us (no rotors) -> "
+ << speedup << "x\n";
+ }
+
+ // Tello comparison (3 variants at indices 4, 5, 6)
+ // 4: -R,-M (TelloNoRotors) baseline
+ // 5: +R,-M (TelloRotorsNoConstraints)
+ // 6: +R,+M (full Tello)
+ if (results.size() >= 7) {
+ std::cout << "Tello:\n";
+ std::cout << " -R,-M: " << std::fixed << std::setprecision(2) << results[4].avg_time_us << " us\n";
+ std::cout << " +R,-M: " << std::fixed << std::setprecision(2) << results[5].avg_time_us << " us\n";
+ std::cout << " +R,+M: " << std::fixed << std::setprecision(2) << results[6].avg_time_us << " us\n";
+ std::cout << " Rotor overhead: " << std::fixed << std::setprecision(2)
+ << results[5].avg_time_us / results[4].avg_time_us << "x (+R,-M vs -R,-M)\n";
+ std::cout << " Mechanism overhead: " << std::fixed << std::setprecision(2)
+ << results[6].avg_time_us / results[5].avg_time_us << "x (+R,+M vs +R,-M)\n";
+ std::cout << " Total overhead: " << std::fixed << std::setprecision(2)
+ << results[6].avg_time_us / results[4].avg_time_us << "x (+R,+M vs -R,-M)\n";
+ }
+
+ std::cout << "\n";
+
+ // Export results to CSV
+ std::string csv_path = std::string(SOURCE_DIRECTORY) + "/../benchmark_figures/data/robot_performance.csv";
+ std::ofstream csv(csv_path);
+ if (csv.is_open()) {
+ csv << "robot_name,label,dof,time_us\n";
+ for (const auto& r : results) {
+ // Create a clean CSV name from the display name
+ std::string csv_name = r.name;
+ // Replace spaces and special chars for CSV compatibility
+ for (char& c : csv_name) {
+ if (c == ' ' || c == '(' || c == ')' || c == '/' || c == ',') c = '_';
+ }
+ csv << csv_name << ","
+ << r.name << ","
+ << r.dof << ","
+ << std::fixed << std::setprecision(2) << r.avg_time_us << "\n";
+ }
+ csv.close();
+ std::cout << "CSV written to: " << csv_path << "\n";
+ } else {
+ std::cerr << "Warning: Could not write CSV to " << csv_path << "\n";
+ }
+
+ return 0;
+}
diff --git a/RAL_2026_Benchmarking/benchmarkIDDerivativesBreakdown.cpp b/RAL_2026_Benchmarking/benchmarkIDDerivativesBreakdown.cpp
new file mode 100644
index 00000000..60f12282
--- /dev/null
+++ b/RAL_2026_Benchmarking/benchmarkIDDerivativesBreakdown.cpp
@@ -0,0 +1,238 @@
+/**
+ * @file benchmarkIDDerivativesBreakdown.cpp
+ * @brief Benchmark ID derivatives with detailed profiling breakdown for figure generation.
+ *
+ * Outputs CSV data suitable for plotting performance breakdown figures.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include "grbda/Dynamics/ClusterTreeModel.h"
+#include "grbda/Robots/RobotTypes.h"
+#include "config.h"
+
+using namespace grbda;
+
+// Profiling bucket definitions (see firstOrderInverseDynamicsDerivatives in ClusterTreeDynamics.cpp):
+//
+// Forward pass:
+// fwd_kin — forwardAccelerationKinematics only
+// fwd_casadi — evalSTimesVec_dq (x2) + getSdotqd_q per cluster; only for config-dependent S joints
+// fwd_other — remaining forward pass: Psi_dot, Psi_ddot, Upsilon_dot, M_cup, B_cup, F computation
+//
+// Backward pass:
+// bwd_casadi — evalSTTimesVec_dq on the diagonal; only for config-dependent S joints
+// bwd_other — t1-t4 setup (blockDiagonalInertiaTimesMotionSubspace) + walk-to-root block fills
+// (these two sub-costs are combined into one bucket)
+// bwd_prop — accumulateBlockDiagonalPair + inverseTransformForceVector to propagate M_cup/B_cup/F
+// to parent cluster
+struct ProfilingResult {
+ std::string robot_name; // Internal name for CSV
+ std::string label; // Display label
+ int dof;
+ int bodies;
+ double fwd_kin_us;
+ double fwd_casadi_us;
+ double fwd_other_us;
+ double bwd_casadi_us;
+ double bwd_other_us;
+ double bwd_prop_us;
+ double total_us;
+};
+
+ProfilingResult profileModel(ClusterTreeModel& model,
+ const std::string& robot_name,
+ const std::string& label,
+ int bodies,
+ int iterations = 1000) {
+ const int nDOF = model.getNumDegreesOfFreedom();
+
+ // Set random state
+ ModelState model_state;
+ for (const auto& cluster : model.clusters()) {
+ model_state.push_back(cluster->joint_->randomJointState());
+ }
+ model.setState(model_state);
+
+ DVec ydd = DVec::Random(nDOF);
+
+ // Warmup (100 calls)
+ for (int i = 0; i < 100; ++i) {
+ auto [dtau_dq, dtau_dqdot] = model.firstOrderInverseDynamicsDerivatives(ydd);
+ (void)dtau_dq;
+ (void)dtau_dqdot;
+ }
+
+ // Profile iterations
+ enableIDDerivativesProfiling();
+ for (int i = 0; i < iterations; ++i) {
+ auto [dtau_dq, dtau_dqdot] = model.firstOrderInverseDynamicsDerivatives(ydd);
+ (void)dtau_dq;
+ (void)dtau_dqdot;
+ }
+
+ auto data = getIDDerivativesProfilingData();
+ resetIDDerivativesProfiling();
+
+ return {
+ robot_name,
+ label,
+ nDOF,
+ bodies,
+ data[0], // fwd_kin
+ data[1], // fwd_casadi
+ data[2], // fwd_other
+ data[3], // bwd_casadi
+ data[4], // bwd_other
+ data[5], // bwd_prop
+ data[6] // total
+ };
+}
+
+template
+ProfilingResult profileRobot(const std::string& robot_name,
+ const std::string& label,
+ int bodies,
+ int iterations = 1000) {
+ RobotType robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ return profileModel(model, robot_name, label, bodies, iterations);
+}
+
+ProfilingResult profileURDF(const std::string& urdf_path,
+ const std::string& robot_name,
+ const std::string& label,
+ int bodies,
+ int iterations = 1000) {
+ ClusterTreeModel model;
+ model.buildModelFromURDF(urdf_path);
+ return profileModel(model, robot_name, label, bodies, iterations);
+}
+
+int main(int argc, char** argv) {
+ std::vector results;
+ const int ITERATIONS = 1000;
+ const std::string urdf_path = std::string(SOURCE_DIRECTORY) + "/robot-models";
+
+ std::cout << "\nProfiling ID derivatives breakdown...\n\n";
+
+ // KUKA LWR 4+
+ std::cout << " KUKA LWR 4+..." << std::flush;
+ results.push_back(profileURDF(urdf_path + "/kuka_lwr_4plus.urdf",
+ "KUKA_LWR_4plus", "KUKA LWR 4+ (-R)", 8, ITERATIONS));
+ std::cout << " done\n";
+
+ // MiniCheetah with rotors
+ std::cout << " MiniCheetah (+R)..." << std::flush;
+ results.push_back(profileRobot>(
+ "MiniCheetah_rotors", "Mini Cheetah (+R)", 25, ITERATIONS));
+ std::cout << " done\n";
+
+ // MiniCheetah without rotors
+ std::cout << " MiniCheetah (-R)..." << std::flush;
+ results.push_back(profileURDF(urdf_path + "/mini_cheetah_approximate.urdf",
+ "MiniCheetah_no_rotors", "Mini Cheetah (-R)", 13, ITERATIONS));
+ std::cout << " done\n";
+
+ // MIT Humanoid with rotors
+ std::cout << " MIT_Humanoid (+R)..." << std::flush;
+ results.push_back(profileRobot>(
+ "MIT_Humanoid_rotors", "MIT Humanoid (+R)", 37, ITERATIONS));
+ std::cout << " done\n";
+
+ // MIT Humanoid without rotors
+ std::cout << " MIT_Humanoid (-R)..." << std::flush;
+ results.push_back(profileRobot>(
+ "MIT_Humanoid_no_rotors", "MIT Humanoid (-R)", 19, ITERATIONS));
+ std::cout << " done\n";
+
+ // Tello (-R/-M) - no rotors, no mechanisms (baseline)
+ std::cout << " Tello (-R/-M)..." << std::flush;
+ results.push_back(profileRobot>(
+ "Tello_no_rotors_no_mech", "Tello (-R/-M)", 11, ITERATIONS));
+ std::cout << " done\n";
+
+ // Tello (+R/-M) - rotors, no mechanisms
+ std::cout << " Tello (+R/-M)..." << std::flush;
+ results.push_back(profileRobot>(
+ "Tello_rotors_no_mech", "Tello (+R/-M)", 21, ITERATIONS));
+ std::cout << " done\n";
+
+ // Tello (+R/+M) - rotors with mechanisms (CasADi)
+ std::cout << " Tello (+R/+M)..." << std::flush;
+ results.push_back(profileRobot>(
+ "Tello_rotors_mech", "Tello (+R/+M)", 21, ITERATIONS));
+ std::cout << " done\n";
+
+ // TelloWithArms
+ std::cout << " TelloWithArms..." << std::flush;
+ results.push_back(profileRobot>(
+ "TelloWithArms", "Tello with Arms (+R/+M)", 37, ITERATIONS));
+ std::cout << " done\n";
+
+ // Cassie (closed-loop biped)
+ std::cout << " Cassie (closed-loop)..." << std::flush;
+ results.push_back(profileRobot>(
+ "Cassie", "Cassie (closed-loop)", 22, ITERATIONS));
+ std::cout << " done\n";
+
+ // Print results table
+ std::cout << "\n" << std::string(120, '=') << "\n";
+ std::cout << "ID Derivatives Profiling Breakdown (us/call)\n";
+ std::cout << std::string(120, '=') << "\n\n";
+
+ std::cout << std::left << std::setw(28) << "Robot"
+ << std::right << std::setw(6) << "DOF"
+ << std::setw(10) << "FwdKin"
+ << std::setw(10) << "FwdCasADi"
+ << std::setw(10) << "FwdOther"
+ << std::setw(10) << "BwdCasADi"
+ << std::setw(10) << "BwdOther"
+ << std::setw(10) << "BwdProp"
+ << std::setw(10) << "Total" << "\n";
+ std::cout << std::string(120, '-') << "\n";
+
+ for (const auto& r : results) {
+ std::cout << std::left << std::setw(28) << r.label
+ << std::right << std::setw(6) << r.dof
+ << std::setw(10) << std::fixed << std::setprecision(2) << r.fwd_kin_us
+ << std::setw(10) << r.fwd_casadi_us
+ << std::setw(10) << r.fwd_other_us
+ << std::setw(10) << r.bwd_casadi_us
+ << std::setw(10) << r.bwd_other_us
+ << std::setw(10) << r.bwd_prop_us
+ << std::setw(10) << r.total_us << "\n";
+ }
+ std::cout << std::string(120, '-') << "\n";
+
+ // Write CSV
+ std::string csv_path = std::string(SOURCE_DIRECTORY) + "/Benchmarking/data/fig4_performance_breakdown_current.csv";
+ std::ofstream csv(csv_path);
+ if (csv.is_open()) {
+ csv << "robot_name,label,dof,bodies,fwd_kin_us,fwd_casadi_us,fwd_other_us,bwd_casadi_us,bwd_other_us,bwd_prop_us,total_us\n";
+ for (const auto& r : results) {
+ csv << r.robot_name << ","
+ << r.label << ","
+ << r.dof << ","
+ << r.bodies << ","
+ << std::fixed << std::setprecision(4)
+ << r.fwd_kin_us << ","
+ << r.fwd_casadi_us << ","
+ << r.fwd_other_us << ","
+ << r.bwd_casadi_us << ","
+ << r.bwd_other_us << ","
+ << r.bwd_prop_us << ","
+ << r.total_us << "\n";
+ }
+ csv.close();
+ std::cout << "\nCSV written to: " << csv_path << "\n";
+ } else {
+ std::cerr << "\nWarning: Could not write CSV to " << csv_path << "\n";
+ }
+
+ return 0;
+}
diff --git a/RAL_2026_Benchmarking/benchmarkIDDerivativesComparison.cpp b/RAL_2026_Benchmarking/benchmarkIDDerivativesComparison.cpp
new file mode 100644
index 00000000..b66c53bc
--- /dev/null
+++ b/RAL_2026_Benchmarking/benchmarkIDDerivativesComparison.cpp
@@ -0,0 +1,346 @@
+#include
+#include
+#include
+#include
+#include
+#include "grbda/Dynamics/ClusterTreeModel.h"
+#include "grbda/Robots/RobotTypes.h"
+#include "config.h"
+
+using namespace grbda;
+
+struct BenchmarkResult {
+ std::string name;
+ int dof;
+ int num_bodies;
+ double standard_us;
+ double world_frame_us;
+ double speedup;
+ int iterations;
+};
+
+template
+BenchmarkResult benchmarkModel(ModelType& model, const std::string& name, int iterations = 5000) {
+ const int nDOF = model.getNumDegreesOfFreedom();
+ const int nBodies = model.getNumBodies();
+
+ // Set random state
+ ModelState model_state;
+ for (const auto& cluster : model.clusters()) {
+ model_state.push_back(cluster->joint_->randomJointState());
+ }
+ model.setState(model_state);
+
+ // Random acceleration
+ DVec qdd = DVec::Random(nDOF);
+
+ // Warmup
+ for (int i = 0; i < 100; ++i) {
+ model.firstOrderInverseDynamicsDerivatives(qdd);
+ model.firstOrderInverseDynamicsDerivativesWorldFrame(qdd);
+ }
+
+ // Benchmark standard ID derivatives
+ auto start_std = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ model.firstOrderInverseDynamicsDerivatives(qdd);
+ }
+ auto end_std = std::chrono::high_resolution_clock::now();
+ double std_time_us = std::chrono::duration(end_std - start_std).count() / iterations;
+
+ auto [dtau_dq_standard, dtau_dqd_standard] = model.firstOrderInverseDynamicsDerivatives(qdd);
+
+ // Benchmark world-frame ID derivatives
+ auto start_wf = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ model.firstOrderInverseDynamicsDerivativesWorldFrame(qdd);
+ }
+ auto end_wf = std::chrono::high_resolution_clock::now();
+ double wf_time_us = std::chrono::duration(end_wf - start_wf).count() / iterations;
+
+ auto [dtau_dq_world, dtau_dqd_world] = model.firstOrderInverseDynamicsDerivativesWorldFrame(qdd);
+
+ // Verify correctness
+ double error_dq = (dtau_dq_standard - dtau_dq_world).norm() / (dtau_dq_standard.norm() + 1e-10);
+ double error_dqd = (dtau_dqd_standard - dtau_dqd_world).norm() / (dtau_dqd_standard.norm() + 1e-10);
+ bool has_nan_std = dtau_dq_standard.array().isNaN().any() || dtau_dqd_standard.array().isNaN().any();
+ bool has_nan_wf = dtau_dq_world.array().isNaN().any() || dtau_dqd_world.array().isNaN().any();
+ // Use 1e-6 tolerance for numerical precision
+ if (error_dq > 1e-6 || error_dqd > 1e-6) {
+ std::cout << "\n WARNING: " << name << " ID derivatives mismatch!"
+ << " dtau_dq error: " << error_dq
+ << " dtau_dqd error: " << error_dqd;
+ if (has_nan_std || has_nan_wf) {
+ std::cout << " (NaN in" << (has_nan_std ? " std" : "") << (has_nan_wf ? " wf" : "") << ")";
+ }
+ // Print norms for debugging
+ if (error_dq > 0.5 || error_dqd > 0.5) {
+ std::cout << " [std_norm=" << dtau_dq_standard.norm()
+ << ", wf_norm=" << dtau_dq_world.norm() << "]";
+ }
+ std::cout << "\n";
+ }
+
+ return {name, nDOF, nBodies, std_time_us, wf_time_us, std_time_us / wf_time_us, iterations};
+}
+
+BenchmarkResult benchmarkURDF(const std::string& urdf_path, const std::string& name, int iterations = 5000) {
+ ClusterTreeModel model;
+ model.buildModelFromURDF(urdf_path);
+ return benchmarkModel(model, name, iterations);
+}
+
+template
+BenchmarkResult benchmarkRevoluteChain(int iterations = 5000) {
+ RevoluteChainWithRotor robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ std::string name = "RevoluteChain<" + std::to_string(N) + ">";
+ return benchmarkModel(model, name, iterations);
+}
+
+template
+BenchmarkResult benchmarkRevolutePairChain(int iterations = 5000) {
+ RevolutePairChainWithRotor robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ std::string name = "RevolutePairChain<" + std::to_string(N) + ">";
+ return benchmarkModel(model, name, iterations);
+}
+
+template
+BenchmarkResult benchmarkRevoluteTripleChain(int iterations = 5000) {
+ RevoluteTripleChainWithRotor robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ std::string name = "RevoluteTripleChain<" + std::to_string(N) + ">";
+ return benchmarkModel(model, name, iterations);
+}
+
+template
+BenchmarkResult benchmarkRobot(const std::string& name, int iterations = 5000) {
+ RobotType robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+ return benchmarkModel(model, name, iterations);
+}
+
+// Version that tries multiple times to set state (for robots with implicit constraints)
+template
+BenchmarkResult benchmarkRobotWithRetry(const std::string& name, int iterations = 5000, int max_retries = 100) {
+ RobotType robot;
+ ClusterTreeModel model = robot.buildClusterTreeModel();
+
+ const int nDOF = model.getNumDegreesOfFreedom();
+ const int nBodies = model.getNumBodies();
+
+ // Try to set a valid random state
+ bool state_set = false;
+ for (int retry = 0; retry < max_retries && !state_set; ++retry) {
+ try {
+ ModelState model_state;
+ for (const auto& cluster : model.clusters()) {
+ model_state.push_back(cluster->joint_->randomJointState());
+ }
+ model.setState(model_state);
+ state_set = true;
+ } catch (const std::exception& e) {
+ // Try again with different random state
+ }
+ }
+
+ if (!state_set) {
+ std::cout << "\n ERROR: Could not set valid state for " << name << " after " << max_retries << " attempts\n";
+ return {name, nDOF, nBodies, -1.0, -1.0, 0.0, 0};
+ }
+
+ // Random acceleration
+ DVec qdd = DVec::Random(nDOF);
+
+ // Warmup
+ for (int i = 0; i < 100; ++i) {
+ model.firstOrderInverseDynamicsDerivatives(qdd);
+ model.firstOrderInverseDynamicsDerivativesWorldFrame(qdd);
+ }
+
+ // Benchmark standard ID derivatives
+ auto start_std = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ model.firstOrderInverseDynamicsDerivatives(qdd);
+ }
+ auto end_std = std::chrono::high_resolution_clock::now();
+ double std_time_us = std::chrono::duration(end_std - start_std).count() / iterations;
+
+ auto [dtau_dq_standard, dtau_dqd_standard] = model.firstOrderInverseDynamicsDerivatives(qdd);
+
+ // Benchmark world-frame ID derivatives
+ auto start_wf = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ model.firstOrderInverseDynamicsDerivativesWorldFrame(qdd);
+ }
+ auto end_wf = std::chrono::high_resolution_clock::now();
+ double wf_time_us = std::chrono::duration(end_wf - start_wf).count() / iterations;
+
+ auto [dtau_dq_world, dtau_dqd_world] = model.firstOrderInverseDynamicsDerivativesWorldFrame(qdd);
+
+ // Verify correctness
+ double error_dq = (dtau_dq_standard - dtau_dq_world).norm() / (dtau_dq_standard.norm() + 1e-10);
+ double error_dqd = (dtau_dqd_standard - dtau_dqd_world).norm() / (dtau_dqd_standard.norm() + 1e-10);
+ // Use 0.1 (10%) tolerance - floating-base robots may have small discrepancies
+ if (error_dq > 0.1 || error_dqd > 0.1) {
+ std::cout << "\n WARNING: " << name << " ID derivatives mismatch!"
+ << " dtau_dq error: " << error_dq
+ << " dtau_dqd error: " << error_dqd << "\n";
+ }
+
+ return {name, nDOF, nBodies, std_time_us, wf_time_us, std_time_us / wf_time_us, iterations};
+}
+
+void printResults(const std::vector& results) {
+ std::cout << "\n";
+ std::cout << "============================================================================\n";
+ std::cout << " ID Derivatives Benchmark Results\n";
+ std::cout << "============================================================================\n";
+ std::cout << std::left << std::setw(30) << "Model"
+ << std::right << std::setw(6) << "DOF"
+ << std::setw(8) << "Bodies"
+ << std::setw(14) << "Std (us)"
+ << std::setw(14) << "World (us)"
+ << std::setw(10) << "Speedup"
+ << "\n";
+ std::cout << "----------------------------------------------------------------------------\n";
+
+ for (const auto& r : results) {
+ std::cout << std::left << std::setw(30) << r.name
+ << std::right << std::setw(6) << r.dof
+ << std::setw(8) << r.num_bodies
+ << std::setw(14) << std::fixed << std::setprecision(2) << r.standard_us
+ << std::setw(14) << std::fixed << std::setprecision(2) << r.world_frame_us
+ << std::setw(10) << std::fixed << std::setprecision(3) << r.speedup
+ << "\n";
+ }
+ std::cout << "============================================================================\n";
+ std::cout << "Speedup > 1.0 means standard is faster than world-frame\n";
+ std::cout << "Speedup < 1.0 means world-frame is faster than standard\n\n";
+}
+
+int main() {
+ std::vector results;
+ const int ITERATIONS = 5000;
+ const std::string urdf_path = std::string(SOURCE_DIRECTORY) + "/robot-models";
+
+ std::cout << "\n=== ID Derivatives Comparison Benchmark ===\n";
+ std::cout << "Comparing Standard vs World-Frame ID Derivatives\n";
+ std::cout << "Iterations per test: " << ITERATIONS << "\n\n";
+
+ // Serial chains of different lengths
+ std::cout << "Benchmarking serial chains (single-body clusters)...\n";
+
+ std::cout << " RevoluteChain<4>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<4>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteChain<8>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<8>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteChain<12>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<12>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteChain<16>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<16>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteChain<20>..." << std::flush;
+ results.push_back(benchmarkRevoluteChain<20>(ITERATIONS));
+ std::cout << " done\n";
+
+ // RevolutePair chains (2-body clusters)
+ std::cout << "\nBenchmarking RevolutePair chains (2-body clusters)...\n";
+
+ std::cout << " RevolutePairChain<2>..." << std::flush;
+ results.push_back(benchmarkRevolutePairChain<2>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevolutePairChain<4>..." << std::flush;
+ results.push_back(benchmarkRevolutePairChain<4>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevolutePairChain<6>..." << std::flush;
+ results.push_back(benchmarkRevolutePairChain<6>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevolutePairChain<8>..." << std::flush;
+ results.push_back(benchmarkRevolutePairChain<8>(ITERATIONS));
+ std::cout << " done\n";
+
+ // RevoluteTriple chains (3-body clusters) - N must be divisible by 3
+ std::cout << "\nBenchmarking RevoluteTriple chains (3-body clusters)...\n";
+
+ std::cout << " RevoluteTripleChain<3>..." << std::flush;
+ results.push_back(benchmarkRevoluteTripleChain<3>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteTripleChain<6>..." << std::flush;
+ results.push_back(benchmarkRevoluteTripleChain<6>(ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " RevoluteTripleChain<9>..." << std::flush;
+ results.push_back(benchmarkRevoluteTripleChain<9>(ITERATIONS));
+ std::cout << " done\n";
+
+ // Tello robot variations
+ std::cout << "\nBenchmarking Tello robot variations...\n";
+
+ std::cout << " TelloRotorsNoConstraints..." << std::flush;
+ results.push_back(benchmarkRobot>("TelloRotorsNoConstraints", ITERATIONS));
+ std::cout << " done\n";
+
+ // Tello with loop constraints (need retry logic)
+ std::cout << " Tello (with constraints)..." << std::flush;
+ results.push_back(benchmarkRobotWithRetry>("Tello", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " TelloWithArms..." << std::flush;
+ results.push_back(benchmarkRobotWithRetry>("TelloWithArms", ITERATIONS));
+ std::cout << " done\n";
+
+ // Other built-in robots
+ std::cout << "\nBenchmarking other built-in robots...\n";
+
+ std::cout << " TeleopArm..." << std::flush;
+ results.push_back(benchmarkRobot("TeleopArm", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " MiniCheetah (with rotors)..." << std::flush;
+ results.push_back(benchmarkRobot>("MiniCheetah (rotors)", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " MIT_Humanoid (with rotors)..." << std::flush;
+ results.push_back(benchmarkRobot>("MIT_Humanoid (rotors)", ITERATIONS));
+ std::cout << " done\n";
+
+ // URDF-based robots
+ std::cout << "\nBenchmarking URDF robots...\n";
+
+ std::cout << " mini_cheetah (no rotors)..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/mini_cheetah.urdf",
+ "MiniCheetah (URDF)", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " MIT Humanoid (no rotors)..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/mit_humanoid.urdf",
+ "MIT_Humanoid (URDF)", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " JVRC1 Humanoid..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/jvrc1_humanoid.urdf",
+ "JVRC1 Humanoid (URDF)", ITERATIONS));
+ std::cout << " done\n";
+
+ std::cout << " Kuka LWR 4+..." << std::flush;
+ results.push_back(benchmarkURDF(urdf_path + "/kuka_lwr_4plus.urdf",
+ "Kuka LWR 4+ (URDF)", ITERATIONS));
+ std::cout << " done\n";
+
+ printResults(results);
+
+ return 0;
+}
diff --git a/RAL_2026_Benchmarking/benchmarkIDDerivativesScaling.cpp b/RAL_2026_Benchmarking/benchmarkIDDerivativesScaling.cpp
new file mode 100644
index 00000000..fe8eca64
--- /dev/null
+++ b/RAL_2026_Benchmarking/benchmarkIDDerivativesScaling.cpp
@@ -0,0 +1,771 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "grbda/Dynamics/ClusterTreeModel.h"
+#include "grbda/Robots/RobotTypes.h"
+#include "config.h"
+
+using namespace grbda;
+
+// ============================================================================
+// Inverse Dynamics Derivatives Scaling Benchmark
+// ============================================================================
+// This benchmark tests how computational cost and error scale with:
+// 1. Number of links in serial chains (RevoluteChainWithRotor)
+// 2. Number of links in binary trees (branching kinematic structures)
+// 3. Different joint types: simple revolute, revolute pair, revolute triple
+// ============================================================================
+
+struct ScalingResult {
+ std::string topology;
+ std::string joint_type;
+ int num_links;
+ int dof;
+ double avg_time_us;
+ double max_error_dq;
+ double max_error_dqdot;
+};
+
+// Helper to compute finite difference derivatives for validation
+// For implicit constraints, we perturb in independent coordinate space and use G to map
+// to spanning coordinates. This ensures the perturbation stays on the constraint manifold.
+template
+std::pair, DMat> computeFiniteDifferenceDerivatives(
+ ClusterTreeModel& model,
+ const DVec& q,
+ const DVec& qd,
+ const DVec& ydd,
+ double h = 1e-7)
+{
+ const int nDOF = model.getNumDegreesOfFreedom();
+ DMat dtau_dq_numerical(nDOF, nDOF);
+ DMat dtau_dqdot_numerical(nDOF, nDOF);
+
+ // Numerical dtau/dq
+ for (int j = 0; j < nDOF; ++j) {
+ // Create perturbation in independent velocity space
+ DVec dy_plus = DVec::Zero(nDOF);
+ dy_plus(j) = h;
+ DVec dy_minus = DVec::Zero(nDOF);
+ dy_minus(j) = -h;
+
+ // Build perturbed states using G to map independent perturbations to spanning
+ ModelState state_plus;
+ int pos_idx = 0, vel_idx = 0;
+ for (const auto& cluster : model.clusters()) {
+ const int nv_cluster = cluster->num_velocities_;
+ const int np_cluster = cluster->num_positions_;
+
+ DVec dy_cluster = dy_plus.segment(vel_idx, nv_cluster);
+
+ if (cluster->joint_->isImplicit()) {
+ // Use G to map independent perturbation to spanning coordinates
+ const DMat& G = cluster->joint_->G();
+ DVec dq_span = G * dy_cluster;
+ DVec q_pert_cluster = q.segment(pos_idx, np_cluster) + dq_span;
+
+ JointState js(JointCoordinate(q_pert_cluster, true),
+ JointCoordinate(qd.segment(vel_idx, nv_cluster), false));
+ state_plus.push_back(js);
+ } else {
+ DVec q_pert_cluster = q.segment(pos_idx, np_cluster) + dy_cluster;
+
+ JointState js(JointCoordinate(q_pert_cluster, false),
+ JointCoordinate(qd.segment(vel_idx, nv_cluster), false));
+ state_plus.push_back(js);
+ }
+ pos_idx += np_cluster;
+ vel_idx += nv_cluster;
+ }
+ model.setState(state_plus);
+ DVec tau_plus = model.inverseDynamics(ydd);
+
+ ModelState state_minus;
+ pos_idx = 0; vel_idx = 0;
+ for (const auto& cluster : model.clusters()) {
+ const int nv_cluster = cluster->num_velocities_;
+ const int np_cluster = cluster->num_positions_;
+
+ DVec dy_cluster = dy_minus.segment(vel_idx, nv_cluster);
+
+ if (cluster->joint_->isImplicit()) {
+ const DMat& G = cluster->joint_->G();
+ DVec dq_span = G * dy_cluster;
+ DVec q_pert_cluster = q.segment(pos_idx, np_cluster) + dq_span;
+
+ JointState js(JointCoordinate(q_pert_cluster, true),
+ JointCoordinate(qd.segment(vel_idx, nv_cluster), false));
+ state_minus.push_back(js);
+ } else {
+ DVec q_pert_cluster = q.segment(pos_idx, np_cluster) + dy_cluster;
+
+ JointState js(JointCoordinate(q_pert_cluster, false),
+ JointCoordinate(qd.segment(vel_idx, nv_cluster), false));
+ state_minus.push_back(js);
+ }
+ pos_idx += np_cluster;
+ vel_idx += nv_cluster;
+ }
+ model.setState(state_minus);
+ DVec tau_minus = model.inverseDynamics(ydd);
+
+ dtau_dq_numerical.col(j) = (tau_plus - tau_minus) / (2.0 * h);
+ }
+
+ // Numerical dtau/dqdot
+ for (int j = 0; j < nDOF; ++j) {
+ DVec qd_plus = qd;
+ qd_plus(j) += h;
+ DVec qd_minus = qd;
+ qd_minus(j) -= h;
+
+ ModelState state_plus;
+ int pos_idx = 0, vel_idx = 0;
+ for (const auto& cluster : model.clusters()) {
+ const bool pos_is_spanning = cluster->joint_->isImplicit();
+ JointState js(JointCoordinate(
+ q.segment(pos_idx, cluster->num_positions_), pos_is_spanning),
+ JointCoordinate(
+ qd_plus.segment(vel_idx, cluster->num_velocities_), false));
+ state_plus.push_back(js);
+ pos_idx += cluster->num_positions_;
+ vel_idx += cluster->num_velocities_;
+ }
+ model.setState(state_plus);
+ DVec tau_plus = model.inverseDynamics(ydd);
+
+ ModelState