Bayesian-Graphical-Modelling-Lab · MaartenMarsman · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -137,8 +137,8 @@ ggm_test_logp_and_gradient_prior <- function(theta, suf_stat, n, edge_indicators
     .Call(`_bgms_ggm_test_logp_and_gradient_prior`, theta, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate)
 }
 
-ggm_test_logp_and_gradient_full_prior <- function(x, suf_stat, n, edge_indicators, interaction_prior_type = "cauchy", interaction_scale = 1.0, interaction_alpha = 0.5, interaction_beta = 0.5, diagonal_prior_type = "gamma", diagonal_shape = 1.0, diagonal_rate = 1.0) {
-    .Call(`_bgms_ggm_test_logp_and_gradient_full_prior`, x, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate)
+ggm_test_logp_and_gradient_full_prior <- function(x, suf_stat, n, edge_indicators, interaction_prior_type = "cauchy", interaction_scale = 1.0, interaction_alpha = 0.5, interaction_beta = 0.5, diagonal_prior_type = "gamma", diagonal_shape = 1.0, diagonal_rate = 1.0, inv_mass_diag = NULL) {
+    .Call(`_bgms_ggm_test_logp_and_gradient_full_prior`, x, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate, inv_mass_diag)
 }
 
 sample_ggm <- function(inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, sampler_type, seed, no_threads, progress_type, progress_callback = NULL, edge_prior = "Bernoulli", beta_bernoulli_alpha = 1.0, beta_bernoulli_beta = 1.0, beta_bernoulli_alpha_between = 1.0, beta_bernoulli_beta_between = 1.0, dirichlet_alpha = 1.0, lambda = 1.0, target_acceptance = 0.8, max_tree_depth = 10L, na_impute = FALSE, missing_index_nullable = NULL) {

diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -647,8 +647,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // ggm_test_logp_and_gradient_full_prior
-Rcpp::List ggm_test_logp_and_gradient_full_prior(const arma::vec& x, const arma::mat& suf_stat, int n, const arma::imat& edge_indicators, const std::string& interaction_prior_type, double interaction_scale, double interaction_alpha, double interaction_beta, const std::string& diagonal_prior_type, double diagonal_shape, double diagonal_rate);
-RcppExport SEXP _bgms_ggm_test_logp_and_gradient_full_prior(SEXP xSEXP, SEXP suf_statSEXP, SEXP nSEXP, SEXP edge_indicatorsSEXP, SEXP interaction_prior_typeSEXP, SEXP interaction_scaleSEXP, SEXP interaction_alphaSEXP, SEXP interaction_betaSEXP, SEXP diagonal_prior_typeSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP) {
+Rcpp::List ggm_test_logp_and_gradient_full_prior(const arma::vec& x, const arma::mat& suf_stat, int n, const arma::imat& edge_indicators, const std::string& interaction_prior_type, double interaction_scale, double interaction_alpha, double interaction_beta, const std::string& diagonal_prior_type, double diagonal_shape, double diagonal_rate, Rcpp::Nullable<Rcpp::NumericVector> inv_mass_diag);
+RcppExport SEXP _bgms_ggm_test_logp_and_gradient_full_prior(SEXP xSEXP, SEXP suf_statSEXP, SEXP nSEXP, SEXP edge_indicatorsSEXP, SEXP interaction_prior_typeSEXP, SEXP interaction_scaleSEXP, SEXP interaction_alphaSEXP, SEXP interaction_betaSEXP, SEXP diagonal_prior_typeSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP, SEXP inv_mass_diagSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -663,7 +663,8 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const std::string& >::type diagonal_prior_type(diagonal_prior_typeSEXP);
     Rcpp::traits::input_parameter< double >::type diagonal_shape(diagonal_shapeSEXP);
     Rcpp::traits::input_parameter< double >::type diagonal_rate(diagonal_rateSEXP);
-    rcpp_result_gen = Rcpp::wrap(ggm_test_logp_and_gradient_full_prior(x, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate));
+    Rcpp::traits::input_parameter< Rcpp::Nullable<Rcpp::NumericVector> >::type inv_mass_diag(inv_mass_diagSEXP);
+    rcpp_result_gen = Rcpp::wrap(ggm_test_logp_and_gradient_full_prior(x, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate, inv_mass_diag));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -818,7 +819,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_bgms_test_parameter_prior", (DL_FUNC) &_bgms_test_parameter_prior, 6},
     {"_bgms_test_scale_prior", (DL_FUNC) &_bgms_test_scale_prior, 4},
     {"_bgms_ggm_test_logp_and_gradient_prior", (DL_FUNC) &_bgms_ggm_test_logp_and_gradient_prior, 11},
-    {"_bgms_ggm_test_logp_and_gradient_full_prior", (DL_FUNC) &_bgms_ggm_test_logp_and_gradient_full_prior, 11},
+    {"_bgms_ggm_test_logp_and_gradient_full_prior", (DL_FUNC) &_bgms_ggm_test_logp_and_gradient_full_prior, 12},
     {"_bgms_sample_ggm", (DL_FUNC) &_bgms_sample_ggm, 23},
     {"_bgms_sample_mixed_mrf", (DL_FUNC) &_bgms_sample_mixed_mrf, 24},
     {"_bgms_sample_omrf", (DL_FUNC) &_bgms_sample_omrf, 24},

diff --git a/src/ggm_gradient_interface.cpp b/src/ggm_gradient_interface.cpp
@@ -350,13 +350,15 @@ Rcpp::List ggm_test_leapfrog_constrained_checked(
 // the likelihood vanishes and the sampler targets the prior:
 //   -K_ij/2 | graph ~ Cauchy(0, scale) or Normal(0, scale)       (included edges)
 //   K_ij = 0                                                      (excluded edges)
-//   K_ii ~ Gamma(gamma_shape, gamma_rate)                         (diagonal)
+//   K_ii/2 ~ Gamma(gamma_shape, gamma_rate)                       (diagonal)
 //
-// Note on convention: `interaction_prior` is applied on the association scale
-// K_yy_{ij} = -K_{ij}/2 (consistent with the mixed-MRF continuous block),
-// NOT on the precision entry K_{ij} directly. A `Normal(0, scale)` prior
-// therefore constrains K_yy off-diagonals with standard deviation `scale`,
-// equivalent to Normal(0, 2*scale) on K_{ij}.
+// Note on convention: both `interaction_prior` and `diagonal_prior` are
+// applied on the partial-association scale K_yy = -K/2 (consistent with the
+// mixed-MRF continuous block), NOT on the precision entries directly.
+// A `Normal(0, scale)` prior therefore constrains K_yy off-diagonals with
+// standard deviation `scale`, equivalent to Normal(0, 2*scale) on K_{ij}.
+// A `Gamma(shape, rate)` diagonal prior is applied to -K_yy_{ii} = K_{ii}/2,
+// so the implied prior on K_{ii} is Gamma(shape, rate/2).
 //
 // edge_indicators: p x p integer matrix with 1 = edge included, 0 = excluded.
 //   Defaults to all-ones (full graph). For edge selection SBC, pass the

diff --git a/src/models/ggm/ggm_gradient.cpp b/src/models/ggm/ggm_gradient.cpp
@@ -277,10 +277,12 @@ std::pair<double, arma::vec> GGMGradientEngine::logp_and_gradient(
         }
     }
 
-    // Prior on diagonal K entries
+    // Prior on the partial-association diagonal: -K_yy_{ii} = K_{ii}/2.
+    // Evaluated at 0.5 * K_{ii} so `diagonal_prior_` refers to the same
+    // quantity (the partial association) as `interaction_prior_`.
     double log_diag_prior = 0.0;
     for (size_t i = 0; i < p_; ++i) {
-        log_diag_prior += diagonal_prior_->logp(K(i, i));
+        log_diag_prior += diagonal_prior_->logp(0.5 * K(i, i));
     }
 
     double lp = log_lik + log_slab + log_diag_prior + fm.log_det_jacobian;
@@ -293,11 +295,13 @@ std::pair<double, arma::vec> GGMGradientEngine::logp_and_gradient(
 
     // Phase 1: Phi_bar from data + priors (direct Phi-space, no K^{-1})
     // Data:  d/dPhi [-0.5 tr(Phi^T Phi S)] = -Phi S = -P
-    // Diagonal prior: d/dPhi [log p(K_ii)] = grad(K_ii) * 2 Phi(:,i)
+    // Diagonal prior: d/dPhi [log p(K_ii/2)]
+    //   = (1/2) * grad(K_ii/2) * dK_ii/dPhi
+    //   = (1/2) * grad(K_ii/2) * 2 Phi(:,i) = grad(K_ii/2) * Phi(:,i).
     arma::mat Phi_bar = -P;
     for (size_t i = 0; i < p_; ++i) {
-        double dg = diagonal_prior_->grad(K(i, i));
-        Phi_bar.col(i).head(i + 1) += 2.0 * dg * Phi.col(i).head(i + 1);
+        double dg = diagonal_prior_->grad(0.5 * K(i, i));
+        Phi_bar.col(i).head(i + 1) += dg * Phi.col(i).head(i + 1);
     }
 
     // Interaction prior adjoint on included edges.
@@ -455,8 +459,11 @@ std::pair<double, arma::vec> GGMGradientEngine::logp_and_gradient(
 // from the previous K-space adjoint approach.
 
 std::pair<double, arma::vec> GGMGradientEngine::logp_and_gradient_full(
-    const arma::vec& x) const
+    const arma::vec& x,
+    const arma::vec& inv_mass_diag) const
 {
+    const bool use_identity_mass = inv_mass_diag.is_empty();
+
     // --- Forward pass: unpack x -> Phi ---
     arma::mat Phi(p_, p_, arma::fill::zeros);
     arma::vec psi(p_);
@@ -506,38 +513,85 @@ std::pair<double, arma::vec> GGMGradientEngine::logp_and_gradient_full(
         }
     }
 
-    // Prior on diagonal K entries
+    // Prior on the partial-association diagonal: -K_yy_{ii} = K_{ii}/2.
     double log_diag_prior = 0.0;
     for (size_t i = 0; i < p_; ++i) {
         double kii = arma::dot(
             Phi.col(i).head(i + 1),
             Phi.col(i).head(i + 1));
-        log_diag_prior += diagonal_prior_->logp(kii);
+        log_diag_prior += diagonal_prior_->logp(0.5 * kii);
     }
 
-    // Roverato graph-constrained Cholesky Jacobian:
-    //   log|det J(x_full -> K_active)| = p*log(2)
-    //                                  + sum_k (deg_higher(k) + 2) * psi_k
-    // where deg_higher(k) = number of active edges (k, q) with q > k.
-    // For the full graph deg_higher(k) = p - 1 - k, so this reduces to the
-    // earlier "p*log(2) + 2*sum(psi) + sum_{i<p-1}(p-1-i)*psi_i" form.
-    // RATTLE/SHAKE handles the surface measure of the constraint manifold;
-    // the prior's Jacobian must use the graph-aware degree-higher count
-    // so that the implied conditional density on the manifold matches the
-    // intended K | G prior.
+    // Full Cholesky Jacobian:
+    //   log|det d(K)/d(Phi)| = p*log(2) + sum_k (p+1-k) * psi_k
+    // (i.e. 2*sum(psi) + sum_{k<p-1} (p-1-k)*psi_k).  The graph-specific
+    // volume reduction is handled separately by the -0.5 log det G_q
+    // correction below; combining the two reproduces the theta-space
+    // Roverato Jacobian exactly. This is the SAME formula the theta-space
+    // path uses, so the two log-targets agree at corresponding (theta, x).
     double ldj = static_cast<double>(p_) * std::log(2.0);
-    arma::uvec deg_higher_vec(p_, arma::fill::zeros);
     for (size_t k = 0; k < p_; ++k) {
-        for (size_t q = k + 1; q < p_; ++q) {
-            const auto& inc = structure_->columns[q].included_indices;
-            if (std::find(inc.begin(), inc.end(), k) != inc.end()) {
-                ++deg_higher_vec(k);
+        ldj += 2.0 * psi(k);
+    }
+    for (size_t k = 0; k + 1 < p_; ++k) {
+        ldj += static_cast<double>(p_ - 1 - k) * psi(k);
+    }
+
+    // Graph-aware constraint Jacobian correction: -0.5 * sum_q log det G_q
+    // with G_q = A_q diag(inv_mass_q) A_q^T. For identity mass this matches
+    // the theta-space Roverato/QR Jacobian term -sum log R_diag; with the
+    // NUTS-adapted diagonal mass we use the mass-weighted determinant so the
+    // RATTLE manifold marginal targets the intended K | G prior.
+    // The factor is *subtracted* (mirroring the theta-space path); the
+    // per-column G_q form is exact here because the position-projection is
+    // applied column-by-column (Roverato ordering: each column-q constraint
+    // is linear in column q's off-diagonals given earlier columns).
+    arma::mat Aq_buf;
+    std::vector<arma::mat> G_chol(p_);   // lower Cholesky of G_q per column
+    std::vector<arma::mat> Aq_cache(p_); // cached A_q per column
+    std::vector<arma::vec> inv_mass_q_cache(p_);
+    double pfaffian = 0.0;
+    for (size_t q = 1; q < p_; ++q) {
+        const auto& col = structure_->columns[q];
+        if (col.m_q == 0) continue;
+
+        build_Aq(Phi, col, q, Aq_buf);
+        Aq_cache[q] = Aq_buf;
+
+        size_t off_q = structure_->full_theta_offsets[q];
+        arma::vec inv_mass_q(q);
+        if (use_identity_mass) {
+            inv_mass_q.ones();
+        } else {
+            for (size_t l = 0; l < q; ++l) {
+                inv_mass_q(l) = inv_mass_diag(off_q + l);
+            }
+        }
+        inv_mass_q_cache[q] = inv_mass_q;
+
+        // G_q = A_q diag(inv_mass_q) A_q^T  (m_q x m_q, symmetric PD)
+        arma::mat Aq_scaled = Aq_buf;
+        Aq_scaled.each_row() %= inv_mass_q.t();
+        arma::mat G_q = Aq_scaled * Aq_buf.t();
+
+        arma::mat L_q;
+        bool chol_ok = arma::chol(L_q, G_q, "lower");
+        if (!chol_ok) {
+            // Tiny ridge for numerical safety; harmless during normal NUTS use.
+            double ridge = 1e-12 * (arma::trace(G_q) /
+                                    static_cast<double>(col.m_q) + 1.0);
+            chol_ok = arma::chol(L_q, G_q + ridge * arma::eye(col.m_q, col.m_q),
+                                 "lower");
+            if (!chol_ok) {
+                return {-std::numeric_limits<double>::infinity(),
+                        arma::vec(x.n_elem, arma::fill::zeros)};
             }
         }
-        ldj += static_cast<double>(deg_higher_vec(k) + 2) * psi(k);
+        G_chol[q] = L_q;
+        pfaffian += arma::accu(arma::log(arma::diagvec(L_q)));
     }
 
-    double lp = log_lik + log_slab + log_diag_prior + ldj;
+    double lp = log_lik + log_slab + log_diag_prior + ldj - pfaffian;
 
     if (!std::isfinite(lp)) {
         return {lp, arma::vec(x.n_elem, arma::fill::zeros)};
@@ -550,13 +604,14 @@ std::pair<double, arma::vec> GGMGradientEngine::logp_and_gradient_full(
     P *= -1.0;
     // P now holds -Phi*S = data part of Phi_bar
 
-    // Diagonal prior adjoint: d/dK_ii[log p(K_ii)] → grad(K_ii) * 2 Phi(:,i)
+    // Diagonal prior adjoint: d/dPhi [log p(K_ii/2)]
+    //   = (1/2) * grad(K_ii/2) * 2 Phi(:,i) = grad(K_ii/2) * Phi(:,i).
     for (size_t i = 0; i < p_; ++i) {
         double kii = arma::dot(
             Phi.col(i).head(i + 1),
             Phi.col(i).head(i + 1));
-        double dg = diagonal_prior_->grad(kii);
-        P.col(i).head(i + 1) += 2.0 * dg * Phi.col(i).head(i + 1);
+        double dg = diagonal_prior_->grad(0.5 * kii);
+        P.col(i).head(i + 1) += dg * Phi.col(i).head(i + 1);
     }
 
     // Interaction prior adjoint on included edges (K_yy scale).
@@ -571,6 +626,37 @@ std::pair<double, arma::vec> GGMGradientEngine::logp_and_gradient_full(
         }
     }
 
+    // Pfaffian adjoint (subtracted in the forward pass, so subtract here too):
+    //   d/dA_q [-0.5 log det(A_q M_q A_q^T)] = -(A_q M_q A_q^T)^{-1} A_q M_q.
+    // Each A_q[r, l] = Phi[l, i_r] (l <= i_r), so the adjoint flows to
+    // column i_r of Phi_bar. Diagonal positions (l = i_r) are handled by
+    // the existing exp(psi) chain when extracting psi_bar from P(i,i).
+    for (size_t q = 1; q < p_; ++q) {
+        const auto& col = structure_->columns[q];
+        if (col.m_q == 0) continue;
+
+        const arma::mat& L_q = G_chol[q];
+        const arma::mat& Aq = Aq_cache[q];
+        const arma::vec& inv_mass_q = inv_mass_q_cache[q];
+
+        // Z = G_q^{-1} A_q via two triangular solves
+        arma::mat Z = arma::solve(arma::trimatl(L_q), Aq,
+                                  arma::solve_opts::fast);
+        Z = arma::solve(arma::trimatu(L_q.t()), Z,
+                        arma::solve_opts::fast);
+
+        // dAq = Z * diag(inv_mass_q)  (m_q x q)
+        arma::mat dAq = Z;
+        dAq.each_row() %= inv_mass_q.t();
+
+        for (size_t r = 0; r < col.m_q; ++r) {
+            size_t i_r = col.excluded_indices[r];
+            for (size_t l = 0; l <= i_r; ++l) {
+                P(l, i_r) -= dAq(r, l);
+            }
+        }
+    }
+
     // --- Extract gradient from Phi_bar (stored in P) ---
     arma::vec gradient(x.n_elem, arma::fill::none);
 
@@ -582,10 +668,14 @@ std::pair<double, arma::vec> GGMGradientEngine::logp_and_gradient_full(
             gradient(offset + i) = P(i, q);
         }
 
-        // Diagonal (psi_q): chain rule through exp + log-det + Jacobian
-        // (deg_higher(q) + 2) is the graph-aware Roverato Jacobian coefficient.
+        // Diagonal (psi_q): chain rule through exp + log-det + Jacobian.
+        // ldj contributes (p+1-q) on psi_q except at q = p-1 where it is 2;
+        // those two cases collapse to (p+1-q) since for q = p-1, p+1-q = 2.
         double psi_bar = P(q, q) * Phi(q, q);
-        psi_bar += n + static_cast<double>(deg_higher_vec(q) + 2);
+        psi_bar += n + 2.0;
+        if (q + 1 < p_) {
+            psi_bar += static_cast<double>(p_ - 1 - q);
+        }
         gradient(offset + q) = psi_bar;
     }
 

diff --git a/src/models/ggm/ggm_gradient.h b/src/models/ggm/ggm_gradient.h
@@ -111,18 +111,22 @@ class GGMGradientEngine {
      *
      * Operates on the full position vector x in R^{p(p+1)/2} —
      * the raw Cholesky entries (off-diagonal) and log-diagonal (psi).
-     * No null-space transformation or QR decomposition is needed:
-     * the gradient w.r.t. each x_{iq} is simply Phi_bar_{iq}.
      *
-     * The Jacobian includes only the Cholesky-to-K and log-diagonal
-     * terms. The QR Jacobian terms from the null-space basis are not
-     * needed because RATTLE preserves the correct measure on the
-     * constraint manifold.
+     * Includes the RATTLE constraint Pfaffian +0.5 log|det(J M^{-1} J^T)|
+     * so that the manifold marginal targets log p(K) + log|det dK/dx_manifold|.
+     * The Pfaffian factors across columns as a sum of per-column terms
+     * 0.5 log det(A_q diag(inv_mass_q) A_q^T), where A_q is the column-q
+     * excluded-edge constraint matrix.
      *
-     * @param x  Full position vector of dimension p(p+1)/2
+     * @param x              Full position vector of dimension p(p+1)/2
+     * @param inv_mass_diag  Diagonal of the inverse mass matrix used by the
+     *                       integrator; pass an empty vector to use identity
+     *                       (only correct when the integrator runs with M = I).
      * @return (log-posterior value, gradient vector of same dimension)
      */
-    std::pair<double, arma::vec> logp_and_gradient_full(const arma::vec& x) const;
+    std::pair<double, arma::vec> logp_and_gradient_full(
+        const arma::vec& x,
+        const arma::vec& inv_mass_diag = arma::vec()) const;
 
     /**
      * Givens QR of an n x m matrix M (n >= m).