From 49f4f47f1f89d94ce7d22d0f3a29d0b3f715038a Mon Sep 17 00:00:00 2001
From: Maarten Marsman <m.marsman@uva.nl>
Date: Fri, 15 May 2026 08:01:16 +0200
Subject: [PATCH 1/2] fix(mixed-mrf): port full-space Jacobian + Pfaffian split
 to Kyy block

Completes the Fix 2 port from GGMGradientEngine to the mixed-MRF Kyy
block. Replaces the previous Roverato-style Cholesky Jacobian
    log|det J| = q log 2 + sum_j (deg_higher_yy(j) + 2) psi_j
with the graph-agnostic split plus a per-column constraint correction:
    ldj      = q log 2 + sum_j (q + 1 - j) psi_j
    pfaffian = 0.5 * sum_qq log det(A_qq diag(M_qq^{-1}) A_qq^T)
    logp    += ldj - pfaffian
For the full Kyy graph all A_qq are empty and the Pfaffian collapses to
zero, so the new and old formulas agree there. For sparse Kyy the
previous formula did not represent any RATTLE measure correction; the
new one is the correct manifold-marginal under any diagonal mass.

Applied to both gradient paths:
- logp_and_gradient (theta-space): identity mass; Pfaffian = 0 on full
  Kyy, which is the only regime the dispatcher routes here.
- logp_and_gradient_full (full-space): mass-weighted Pfaffian using
  this->inv_mass_ (empty => identity). Backward pass adds the matching
  adjoint to R_bar via two triangular solves through chol(G_qq).

ensure_constraint_structure() is now called at the top of each gradient
function so chol_constraint_structure_.columns[col].excluded_indices is
available when building per-column A_qq. The projection helpers
(MixedMRFModel::project_position/_momentum) already built mass-weighted
A_qq matrices for RATTLE; the new gradient code consumes the same
constraint structure.

No regressions in `devtools::test()` (6989 pass, 0 fail).
---
 src/models/mixed/mixed_mrf_gradient.cpp | 192 ++++++++++++++++++++----
 1 file changed, 162 insertions(+), 30 deletions(-)
diff --git a/src/models/mixed/mixed_mrf_gradient.cpp b/src/models/mixed/mixed_mrf_gradient.cpp
index cb0735c3..2cac4a2c 100644
--- a/src/models/mixed/mixed_mrf_gradient.cpp
+++ b/src/models/mixed/mixed_mrf_gradient.cpp
@@ -192,6 +192,7 @@ std::pair<double, arma::vec> MixedMRFModel::logp_and_gradient(
     const arma::vec& parameters)
 {
     ensure_gradient_cache();
+    ensure_constraint_structure();
 
     // --- Unvectorize into temporaries (blocks 1–4) ---
     arma::mat temp_main_discrete = main_effects_discrete_;
@@ -637,32 +638,85 @@ std::pair<double, arma::vec> MixedMRFModel::logp_and_gradient(
     arma::mat Omega_bar_sym = Omega_bar + Omega_bar.t();
     arma::mat R_bar = temp_cholesky * Omega_bar_sym;
 
-    // Roverato graph-constrained Cholesky Jacobian for the Kyy block:
-    //   log|det J| = q log 2 + Σ_j (deg_higher_yy(j) + 2) ψ_j
-    // where deg_higher_yy(j) = number of active Kyy edges (j, qq) with qq > j.
-    // For the full Kyy graph deg_higher_yy(j) = q - 1 - j, so this reduces to
-    // (q - j + 1) — the original formula. For sparse Kyy graphs, the
-    // graph-aware count is required for the prior to integrate correctly
-    // (Marsman/Claude, 2026-05-01).
-    size_t gidx = static_cast<size_t>(chol_grad_offset_);
+    // Cholesky-to-K Jacobian (graph-agnostic) + per-column Pfaffian correction.
+    // Mirrors GGMGradientEngine::logp_and_gradient_full:
+    //   ldj      = q*log(2) + Σ_j (q+1-j) ψ_j
+    //   pfaffian = 0.5 * Σ_qq log det(A_qq A_qq^T)        (identity mass here)
+    //   logp    += ldj - pfaffian
+    // Theta-space integration uses identity mass on the manifold; for the
+    // full-space (RATTLE) path we plug through the integrator's inverse-mass
+    // diagonal. For the full Kyy graph, all A_qq are empty, so the Pfaffian
+    // collapses to 0 and the formula reduces to the original q-j+1 weight.
+    logp += static_cast<double>(q_) * std::log(2.0);
     for(size_t j = 0; j < q_; ++j) {
-        double psi_j = std::log(temp_cholesky(j, j));
-        size_t deg_higher_yy = 0;
-        for(size_t qq = j + 1; qq < q_; ++qq) {
-            if(edge_indicators_(p_ + j, p_ + qq) == 1) ++deg_higher_yy;
+        logp += static_cast<double>(q_ + 1 - j) * std::log(temp_cholesky(j, j));
+    }
+
+    const auto& cs = chol_constraint_structure_;
+    arma::mat Aq_buf;
+    std::vector<arma::mat> G_chol(q_);
+    std::vector<arma::mat> Aq_cache(q_);
+    double pfaffian = 0.0;
+    for(size_t col = 1; col < q_; ++col) {
+        const auto& cc = cs.columns[col];
+        if(cc.m_q == 0) continue;
+
+        GGMGradientEngine::build_Aq(temp_cholesky, cc, col, Aq_buf);
+        Aq_cache[col] = Aq_buf;
+
+        // Identity mass (theta-space): G_q = A_q A_q^T.
+        arma::mat G_q = Aq_buf * Aq_buf.t();
+
+        arma::mat L_q;
+        bool chol_ok = arma::chol(L_q, G_q, "lower");
+        if(!chol_ok) {
+            double ridge = 1e-12 * (arma::trace(G_q) /
+                                    static_cast<double>(cc.m_q) + 1.0);
+            chol_ok = arma::chol(L_q, G_q + ridge * arma::eye(cc.m_q, cc.m_q),
+                                 "lower");
+            if(!chol_ok) {
+                return {-std::numeric_limits<double>::infinity(),
+                        arma::vec(grad.n_elem, arma::fill::zeros)};
+            }
+        }
+        G_chol[col] = L_q;
+        pfaffian += arma::accu(arma::log(arma::diagvec(L_q)));
+    }
+    logp -= pfaffian;
+
+    // Pfaffian adjoint: d/dA_q [-0.5 log det(A_q A_q^T)] = -G_q^{-1} A_q.
+    // Each A_q(r, l) = R(l, i_r) for l <= i_r, so the adjoint flows back to
+    // column i_r of R_bar at rows l = 0..i_r.
+    for(size_t col = 1; col < q_; ++col) {
+        const auto& cc = cs.columns[col];
+        if(cc.m_q == 0) continue;
+
+        const arma::mat& L_q = G_chol[col];
+        const arma::mat& Aq = Aq_cache[col];
+
+        arma::mat Z = arma::solve(arma::trimatl(L_q), Aq,
+                                  arma::solve_opts::fast);
+        Z = arma::solve(arma::trimatu(L_q.t()), Z,
+                        arma::solve_opts::fast);
+
+        for(size_t r = 0; r < cc.m_q; ++r) {
+            size_t i_r = cc.excluded_indices[r];
+            for(size_t l = 0; l <= i_r; ++l) {
+                R_bar(l, i_r) -= Z(r, l);
+            }
         }
-        double jac_weight = static_cast<double>(deg_higher_yy + 2);
-        logp += jac_weight * psi_j;
+    }
 
-        // Off-diagonal Cholesky entries: ∂ℓ/∂R_{ij} = R̄_{ij}
+    // Extract position gradient from R_bar with the unified weight (q+1-j)
+    // on the diagonal-psi entries.
+    size_t gidx = static_cast<size_t>(chol_grad_offset_);
+    for(size_t j = 0; j < q_; ++j) {
+        double w_j = static_cast<double>(q_ + 1 - j);
         for(size_t i = 0; i < j; ++i) {
             grad(gidx++) = R_bar(i, j);
         }
-        // Diagonal (log-scale): ∂ℓ/∂ψ_j = R̄_{jj} R_{jj} + (deg_higher_yy + 2)
-        grad(gidx++) = R_bar(j, j) * temp_cholesky(j, j) + jac_weight;
+        grad(gidx++) = R_bar(j, j) * temp_cholesky(j, j) + w_j;
     }
-    // Add constant Jacobian term to logp
-    logp += static_cast<double>(q_) * std::log(2.0);
 
     return {logp, grad};
 }
@@ -680,6 +734,7 @@ std::pair<double, arma::vec> MixedMRFModel::logp_and_gradient(
 std::pair<double, arma::vec> MixedMRFModel::logp_and_gradient_full(
     const arma::vec& x)
 {
+    ensure_constraint_structure();
     const size_t full_dim = full_parameter_dimension();
 
     // --- Unpack all 5 blocks from full-space vector ---
@@ -1110,24 +1165,101 @@ std::pair<double, arma::vec> MixedMRFModel::logp_and_gradient_full(
     arma::mat Omega_bar_sym = Omega_bar + Omega_bar.t();
     arma::mat R_bar = temp_cholesky * Omega_bar_sym;
 
-    // Roverato graph-constrained Cholesky Jacobian for the Kyy block.
-    // jac_weight = deg_higher_yy(j) + 2; reduces to q - j + 1 for full graph.
-    size_t gidx = chol_offset;
+    // Cholesky-to-K Jacobian (graph-agnostic) + mass-weighted per-column
+    // Pfaffian correction for the RATTLE manifold marginal:
+    //   ldj      = q*log(2) + Σ_j (q+1-j) ψ_j
+    //   pfaffian = 0.5 * Σ_qq log det(A_qq diag(M_qq^{-1}) A_qq^T)
+    //   logp    += ldj - pfaffian
+    // Mass diagonal is plumbed from this->inv_mass_ (empty ⇒ identity).
+    // For the full Kyy graph, all A_qq are empty and Pfaffian = 0.
+    logp += static_cast<double>(q_) * std::log(2.0);
     for(size_t j = 0; j < q_; ++j) {
-        double psi_j = std::log(temp_cholesky(j, j));
-        size_t deg_higher_yy = 0;
-        for(size_t qq = j + 1; qq < q_; ++qq) {
-            if(edge_indicators_(p_ + j, p_ + qq) == 1) ++deg_higher_yy;
+        logp += static_cast<double>(q_ + 1 - j) * std::log(temp_cholesky(j, j));
+    }
+
+    const auto& cs = chol_constraint_structure_;
+    const bool identity_mass = inv_mass_.is_empty();
+    arma::mat Aq_buf;
+    std::vector<arma::mat> G_chol(q_);
+    std::vector<arma::mat> Aq_cache(q_);
+    std::vector<arma::vec> inv_mass_q_cache(q_);
+    double pfaffian = 0.0;
+    for(size_t col = 1; col < q_; ++col) {
+        const auto& cc = cs.columns[col];
+        if(cc.m_q == 0) continue;
+
+        GGMGradientEngine::build_Aq(temp_cholesky, cc, col, Aq_buf);
+        Aq_cache[col] = Aq_buf;
+
+        arma::vec inv_mass_q(col);
+        if(identity_mass) {
+            inv_mass_q.ones();
+        } else {
+            size_t off_q = chol_block_offset_ + cs.full_theta_offsets[col];
+            for(size_t l = 0; l < col; ++l) {
+                inv_mass_q(l) = inv_mass_(off_q + l);
+            }
         }
-        double jac_weight = static_cast<double>(deg_higher_yy + 2);
-        logp += jac_weight * psi_j;
+        inv_mass_q_cache[col] = inv_mass_q;
+
+        // G_q = A_q diag(inv_mass_q) A_q^T
+        arma::mat Aq_scaled = Aq_buf;
+        Aq_scaled.each_row() %= inv_mass_q.t();
+        arma::mat G_q = Aq_scaled * Aq_buf.t();
+
+        arma::mat L_q;
+        bool chol_ok = arma::chol(L_q, G_q, "lower");
+        if(!chol_ok) {
+            double ridge = 1e-12 * (arma::trace(G_q) /
+                                    static_cast<double>(cc.m_q) + 1.0);
+            chol_ok = arma::chol(L_q, G_q + ridge * arma::eye(cc.m_q, cc.m_q),
+                                 "lower");
+            if(!chol_ok) {
+                return {-std::numeric_limits<double>::infinity(),
+                        arma::vec(full_dim, arma::fill::zeros)};
+            }
+        }
+        G_chol[col] = L_q;
+        pfaffian += arma::accu(arma::log(arma::diagvec(L_q)));
+    }
+    logp -= pfaffian;
+
+    // Pfaffian adjoint: d/dA_q [-0.5 log det(A_q M_q^{-1} A_q^T)] flows back
+    // to R_bar at the excluded-edge columns. dA_q = G_q^{-1} A_q · diag(M_q^{-1}).
+    for(size_t col = 1; col < q_; ++col) {
+        const auto& cc = cs.columns[col];
+        if(cc.m_q == 0) continue;
+
+        const arma::mat& L_q = G_chol[col];
+        const arma::mat& Aq = Aq_cache[col];
+        const arma::vec& inv_mass_q = inv_mass_q_cache[col];
+
+        arma::mat Z = arma::solve(arma::trimatl(L_q), Aq,
+                                  arma::solve_opts::fast);
+        Z = arma::solve(arma::trimatu(L_q.t()), Z,
+                        arma::solve_opts::fast);
+
+        arma::mat dAq = Z;
+        dAq.each_row() %= inv_mass_q.t();
+
+        for(size_t r = 0; r < cc.m_q; ++r) {
+            size_t i_r = cc.excluded_indices[r];
+            for(size_t l = 0; l <= i_r; ++l) {
+                R_bar(l, i_r) -= dAq(r, l);
+            }
+        }
+    }
 
+    // Extract position gradient from R_bar with the unified weight (q+1-j)
+    // on the diagonal-psi entries.
+    size_t gidx = chol_offset;
+    for(size_t j = 0; j < q_; ++j) {
+        double w_j = static_cast<double>(q_ + 1 - j);
         for(size_t i = 0; i < j; ++i) {
             grad(gidx++) = R_bar(i, j);
         }
-        grad(gidx++) = R_bar(j, j) * temp_cholesky(j, j) + jac_weight;
+        grad(gidx++) = R_bar(j, j) * temp_cholesky(j, j) + w_j;
     }
-    logp += static_cast<double>(q_) * std::log(2.0);
 
     return {logp, grad};
 }

From 119a4cde46df6423f55ecf2b55fc5658811dfa93 Mon Sep 17 00:00:00 2001
From: Maarten Marsman <m.marsman@uva.nl>
Date: Fri, 15 May 2026 08:05:41 +0200
Subject: [PATCH 2/2] test(mixed-mrf): finite-difference checks for Kyy
 Pfaffian gradient

Adds tests/testthat/test-mixed-gradient-pfaffian.R covering the new
Cholesky-to-K + Pfaffian split in MixedMRFModel::logp_and_gradient_full.
Four cases at max relative error < 1e-5:
  - full Kyy graph (refactor sanity; Pfaffian = 0)
  - sparse Kyy, 1 excluded edge, identity mass
  - sparse Kyy, 1 excluded edge, random diagonal mass
  - sparse Kyy, 2 excluded edges, random diagonal mass (multi-column)

Plumbs an optional inv_mass_diag arg through mixed_test_logp_and_gradient_full
so the integrator's mass-weighted Pfaffian can be exercised from R; the
arg falls back to identity in the gradient engine when NULL.
RcppExports regenerated via Rcpp::compileAttributes().
---
 R/RcppExports.R                               |   4 +-
 src/RcppExports.cpp                           |   9 +-
 src/mixed_gradient_interface.cpp              |  10 +-
 tests/testthat/test-mixed-gradient-pfaffian.R | 150 ++++++++++++++++++
 4 files changed, 166 insertions(+), 7 deletions(-)
 create mode 100644 tests/testthat/test-mixed-gradient-pfaffian.R

diff --git a/R/RcppExports.R b/R/RcppExports.R
index 33422a1b..47938d7b 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -69,8 +69,8 @@ mixed_test_logp_and_gradient <- function(params, discrete_observations, continuo
     .Call(`_bgms_mixed_test_logp_and_gradient`, params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate)
 }
 
-mixed_test_logp_and_gradient_full <- function(params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha = 1.0, main_beta = 1.0, interaction_prior_type = "cauchy", threshold_prior_type = "beta-prime", threshold_scale = 1.0, means_prior_type = "normal", means_scale = 1.0, diagonal_prior_type = "gamma", diagonal_shape = 1.0, diagonal_rate = 1.0) {
-    .Call(`_bgms_mixed_test_logp_and_gradient_full`, params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate)
+mixed_test_logp_and_gradient_full <- function(params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha = 1.0, main_beta = 1.0, interaction_prior_type = "cauchy", threshold_prior_type = "beta-prime", threshold_scale = 1.0, means_prior_type = "normal", means_scale = 1.0, diagonal_prior_type = "gamma", diagonal_shape = 1.0, diagonal_rate = 1.0, inv_mass_diag = NULL) {
+    .Call(`_bgms_mixed_test_logp_and_gradient_full`, params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate, inv_mass_diag)
 }
 
 mixed_test_project_position <- function(x, inv_mass, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha = 1.0, main_beta = 1.0, interaction_prior_type = "cauchy", threshold_prior_type = "beta-prime", threshold_scale = 1.0) {
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 4e15686c..b07142dc 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -303,8 +303,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // mixed_test_logp_and_gradient_full
-Rcpp::List mixed_test_logp_and_gradient_full(const arma::vec& params, const arma::imat& discrete_observations, const arma::mat& continuous_observations, const arma::ivec& num_categories, const arma::uvec& is_ordinal_variable, const arma::ivec& baseline_category, const arma::imat& edge_indicators, double pairwise_scale, double main_alpha, double main_beta, std::string interaction_prior_type, std::string threshold_prior_type, double threshold_scale, std::string means_prior_type, double means_scale, std::string diagonal_prior_type, double diagonal_shape, double diagonal_rate);
-RcppExport SEXP _bgms_mixed_test_logp_and_gradient_full(SEXP paramsSEXP, SEXP discrete_observationsSEXP, SEXP continuous_observationsSEXP, SEXP num_categoriesSEXP, SEXP is_ordinal_variableSEXP, SEXP baseline_categorySEXP, SEXP edge_indicatorsSEXP, SEXP pairwise_scaleSEXP, SEXP main_alphaSEXP, SEXP main_betaSEXP, SEXP interaction_prior_typeSEXP, SEXP threshold_prior_typeSEXP, SEXP threshold_scaleSEXP, SEXP means_prior_typeSEXP, SEXP means_scaleSEXP, SEXP diagonal_prior_typeSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP) {
+Rcpp::List mixed_test_logp_and_gradient_full(const arma::vec& params, const arma::imat& discrete_observations, const arma::mat& continuous_observations, const arma::ivec& num_categories, const arma::uvec& is_ordinal_variable, const arma::ivec& baseline_category, const arma::imat& edge_indicators, double pairwise_scale, double main_alpha, double main_beta, std::string interaction_prior_type, std::string threshold_prior_type, double threshold_scale, std::string means_prior_type, double means_scale, std::string diagonal_prior_type, double diagonal_shape, double diagonal_rate, Rcpp::Nullable<Rcpp::NumericVector> inv_mass_diag);
+RcppExport SEXP _bgms_mixed_test_logp_and_gradient_full(SEXP paramsSEXP, SEXP discrete_observationsSEXP, SEXP continuous_observationsSEXP, SEXP num_categoriesSEXP, SEXP is_ordinal_variableSEXP, SEXP baseline_categorySEXP, SEXP edge_indicatorsSEXP, SEXP pairwise_scaleSEXP, SEXP main_alphaSEXP, SEXP main_betaSEXP, SEXP interaction_prior_typeSEXP, SEXP threshold_prior_typeSEXP, SEXP threshold_scaleSEXP, SEXP means_prior_typeSEXP, SEXP means_scaleSEXP, SEXP diagonal_prior_typeSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP, SEXP inv_mass_diagSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -326,7 +326,8 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< std::string >::type diagonal_prior_type(diagonal_prior_typeSEXP);
     Rcpp::traits::input_parameter< double >::type diagonal_shape(diagonal_shapeSEXP);
     Rcpp::traits::input_parameter< double >::type diagonal_rate(diagonal_rateSEXP);
-    rcpp_result_gen = Rcpp::wrap(mixed_test_logp_and_gradient_full(params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate));
+    Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::NumericVector> >::type inv_mass_diag(inv_mass_diagSEXP);
+    rcpp_result_gen = Rcpp::wrap(mixed_test_logp_and_gradient_full(params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate, inv_mass_diag));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -802,7 +803,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_bgms_compute_rhat_cpp", (DL_FUNC) &_bgms_compute_rhat_cpp, 1},
     {"_bgms_compute_indicator_ess_cpp", (DL_FUNC) &_bgms_compute_indicator_ess_cpp, 1},
     {"_bgms_mixed_test_logp_and_gradient", (DL_FUNC) &_bgms_mixed_test_logp_and_gradient, 18},
-    {"_bgms_mixed_test_logp_and_gradient_full", (DL_FUNC) &_bgms_mixed_test_logp_and_gradient_full, 18},
+    {"_bgms_mixed_test_logp_and_gradient_full", (DL_FUNC) &_bgms_mixed_test_logp_and_gradient_full, 19},
     {"_bgms_mixed_test_project_position", (DL_FUNC) &_bgms_mixed_test_project_position, 14},
     {"_bgms_mixed_test_project_momentum", (DL_FUNC) &_bgms_mixed_test_project_momentum, 15},
     {"_bgms_mixed_test_leapfrog_constrained", (DL_FUNC) &_bgms_mixed_test_leapfrog_constrained, 17},
diff --git a/src/mixed_gradient_interface.cpp b/src/mixed_gradient_interface.cpp
index 6957065b..c113c3e0 100644
--- a/src/mixed_gradient_interface.cpp
+++ b/src/mixed_gradient_interface.cpp
@@ -75,7 +75,8 @@ Rcpp::List mixed_test_logp_and_gradient_full(
     double means_scale = 1.0,
     std::string diagonal_prior_type = "gamma",
     double diagonal_shape = 1.0,
-    double diagonal_rate = 1.0)
+    double diagonal_rate = 1.0,
+    Rcpp::Nullable<Rcpp::NumericVector> inv_mass_diag = R_NilValue)
 {
     size_t p = discrete_observations.n_cols;
     size_t q = continuous_observations.n_cols;
@@ -95,6 +96,13 @@ Rcpp::List mixed_test_logp_and_gradient_full(
         42
     );
 
+    // Plug the integrator's inverse-mass diagonal through to the gradient
+    // so the mass-weighted Pfaffian correction can be exercised from R.
+    // Empty/NULL falls back to identity in the gradient engine.
+    if(inv_mass_diag.isNotNull()) {
+        model.set_inv_mass(Rcpp::as<arma::vec>(inv_mass_diag));
+    }
+
     auto result = model.logp_and_gradient_full(params);
 
     return Rcpp::List::create(
diff --git a/tests/testthat/test-mixed-gradient-pfaffian.R b/tests/testthat/test-mixed-gradient-pfaffian.R
new file mode 100644
index 00000000..d39a4247
--- /dev/null
+++ b/tests/testthat/test-mixed-gradient-pfaffian.R
@@ -0,0 +1,150 @@
+# --------------------------------------------------------------------------- #
+# Finite-difference checks for MixedMRFModel::logp_and_gradient_full.
+#
+# Targets the Cholesky-to-K Jacobian + per-column Pfaffian split that ports
+# GGM Fix 2 to the mixed-MRF Kyy block. The key code paths exercised are:
+#   - the unified ldj = q*log(2) + sum_j (q+1-j) psi_j Jacobian
+#   - the per-column Pfaffian 0.5 * sum_qq log det(A_qq diag(M_qq^-1) A_qq^T)
+#   - the matching Pfaffian adjoint that flows into R_bar at excluded-edge
+#     positions, then into the diagonal psi gradient through the exp() chain
+#
+# Full Kyy graph: A_qq are empty so Pfaffian = 0 — this verifies the
+# refactor doesn't change anything in the no-constraint case.
+# Sparse Kyy:    Pfaffian is non-trivial; tests both identity and random
+#                positive diagonal mass (the latter is what NUTS uses after
+#                warmup).
+# --------------------------------------------------------------------------- #
+
+# ---- Helpers ---------------------------------------------------------------- #
+
+mixed_full_fd_gradient = function(params, x, y, num_cats, is_ord, base_cat,
+                                  edge_ind, scale, inv_mass = NULL,
+                                  eps = 1e-5) {
+  n_total = length(params)
+  fd = numeric(n_total)
+  for(k in seq_len(n_total)) {
+    p_plus = params
+    p_minus = params
+    p_plus[k] = p_plus[k] + eps
+    p_minus[k] = p_minus[k] - eps
+    fp = mixed_test_logp_and_gradient_full(
+      p_plus, x, y, num_cats, as.integer(is_ord),
+      base_cat, edge_ind, scale,
+      inv_mass_diag = inv_mass
+    )$value
+    fm = mixed_test_logp_and_gradient_full(
+      p_minus, x, y, num_cats, as.integer(is_ord),
+      base_cat, edge_ind, scale,
+      inv_mass_diag = inv_mass
+    )$value
+    fd[k] = (fp - fm) / (2 * eps)
+  }
+  fd
+}
+
+mixed_full_check_gradient = function(params, x, y, num_cats, is_ord, base_cat,
+                                     edge_ind, scale, inv_mass = NULL,
+                                     eps = 1e-5) {
+  res = mixed_test_logp_and_gradient_full(
+    params, x, y, num_cats, as.integer(is_ord),
+    base_cat, edge_ind, scale,
+    inv_mass_diag = inv_mass
+  )
+  fd = mixed_full_fd_gradient(
+    params, x, y, num_cats, is_ord, base_cat,
+    edge_ind, scale, inv_mass, eps
+  )
+  ag = res$gradient
+  denom = pmax(abs(ag), abs(fd), 1)
+  max(abs(ag - fd) / denom)
+}
+
+# Build a small mixed model with q continuous and p discrete variables,
+# returning everything needed by the gradient harness in one shot.
+mixed_full_make_setup = function(p = 2, q = 3, n = 60, seed = 42) {
+  set.seed(seed)
+  x = matrix(sample(0:1, n * p, replace = TRUE), n, p)
+  y = matrix(rnorm(n * q), n, q)
+  num_cats = rep(2L, p)
+  is_ord = rep(1L, p)
+  base_cat = rep(0L, p)
+  total = p + q
+  edge_ind = matrix(1L, total, total)
+  diag(edge_ind) = 0L
+
+  n_main = sum(num_cats)
+  n_pw_xx = p * (p - 1L) / 2L
+  n_means = q
+  n_xy = p * q
+  n_chol = q * (q + 1L) / 2L
+  total_dim = n_main + n_pw_xx + n_means + n_xy + n_chol
+
+  set.seed(seed + 1)
+  params = rnorm(total_dim, sd = 0.3)
+
+  list(
+    x = x, y = y, num_cats = num_cats, is_ord = is_ord, base_cat = base_cat,
+    edge_ind = edge_ind, params = params,
+    p = p, q = q, total = total, total_dim = total_dim
+  )
+}
+
+
+# ---- Full Kyy graph: Pfaffian = 0 ------------------------------------------- #
+
+test_that("gradient matches FD on full Kyy graph (identity mass)", {
+  s = mixed_full_make_setup()
+  err = mixed_full_check_gradient(
+    s$params, s$x, s$y, s$num_cats, s$is_ord, s$base_cat,
+    s$edge_ind, 2.5
+  )
+  expect_lt(err, 1e-5)
+})
+
+
+# ---- Sparse Kyy graph: Pfaffian is non-trivial ------------------------------ #
+
+test_that("gradient matches FD with one excluded Kyy edge (identity mass)", {
+  s = mixed_full_make_setup()
+  # Exclude one Kyy edge: between continuous-1 and continuous-3.
+  s$edge_ind[s$p + 1L, s$p + 3L] = 0L
+  s$edge_ind[s$p + 3L, s$p + 1L] = 0L
+  err = mixed_full_check_gradient(
+    s$params, s$x, s$y, s$num_cats, s$is_ord, s$base_cat,
+    s$edge_ind, 2.5
+  )
+  expect_lt(err, 1e-5)
+})
+
+
+test_that("gradient matches FD with one excluded Kyy edge (random mass)", {
+  s = mixed_full_make_setup()
+  s$edge_ind[s$p + 1L, s$p + 3L] = 0L
+  s$edge_ind[s$p + 3L, s$p + 1L] = 0L
+  set.seed(7)
+  inv_mass = runif(s$total_dim, min = 0.5, max = 2.0)
+  err = mixed_full_check_gradient(
+    s$params, s$x, s$y, s$num_cats, s$is_ord, s$base_cat,
+    s$edge_ind, 2.5,
+    inv_mass = inv_mass
+  )
+  expect_lt(err, 1e-5)
+})
+
+
+test_that("gradient matches FD with two excluded Kyy edges (random mass)", {
+  s = mixed_full_make_setup(q = 4)
+  # Exclude (p+1, p+3) and (p+2, p+4): two columns carry constraints.
+  s$edge_ind[s$p + 1L, s$p + 3L] = 0L
+  s$edge_ind[s$p + 3L, s$p + 1L] = 0L
+  s$edge_ind[s$p + 2L, s$p + 4L] = 0L
+  s$edge_ind[s$p + 4L, s$p + 2L] = 0L
+  set.seed(13)
+  inv_mass = runif(s$total_dim, min = 0.5, max = 2.0)
+  err = mixed_full_check_gradient(
+    s$params, s$x, s$y, s$num_cats, s$is_ord, s$base_cat,
+    s$edge_ind, 2.5,
+    inv_mass = inv_mass
+  )
+  expect_lt(err, 1e-5)
+})