From 49f4f47f1f89d94ce7d22d0f3a29d0b3f715038a Mon Sep 17 00:00:00 2001 From: Maarten Marsman Date: Fri, 15 May 2026 08:01:16 +0200 Subject: [PATCH 1/2] fix(mixed-mrf): port full-space Jacobian + Pfaffian split to Kyy block Completes the Fix 2 port from GGMGradientEngine to the mixed-MRF Kyy block. Replaces the previous Roverato-style Cholesky Jacobian log|det J| = q log 2 + sum_j (deg_higher_yy(j) + 2) psi_j with the graph-agnostic split plus a per-column constraint correction: ldj = q log 2 + sum_j (q + 1 - j) psi_j pfaffian = 0.5 * sum_qq log det(A_qq diag(M_qq^{-1}) A_qq^T) logp += ldj - pfaffian For the full Kyy graph all A_qq are empty and the Pfaffian collapses to zero, so the new and old formulas agree there. For sparse Kyy the previous formula did not represent any RATTLE measure correction; the new one is the correct manifold-marginal under any diagonal mass. Applied to both gradient paths: - logp_and_gradient (theta-space): identity mass; Pfaffian = 0 on full Kyy, which is the only regime the dispatcher routes here. - logp_and_gradient_full (full-space): mass-weighted Pfaffian using this->inv_mass_ (empty => identity). Backward pass adds the matching adjoint to R_bar via two triangular solves through chol(G_qq). ensure_constraint_structure() is now called at the top of each gradient function so chol_constraint_structure_.columns[col].excluded_indices is available when building per-column A_qq. The projection helpers (MixedMRFModel::project_position/_momentum) already built mass-weighted A_qq matrices for RATTLE; the new gradient code consumes the same constraint structure. No regressions in `devtools::test()` (6989 pass, 0 fail). --- src/models/mixed/mixed_mrf_gradient.cpp | 192 ++++++++++++++++++++---- 1 file changed, 162 insertions(+), 30 deletions(-) diff --git a/src/models/mixed/mixed_mrf_gradient.cpp b/src/models/mixed/mixed_mrf_gradient.cpp index cb0735c3..2cac4a2c 100644 --- a/src/models/mixed/mixed_mrf_gradient.cpp +++ b/src/models/mixed/mixed_mrf_gradient.cpp @@ -192,6 +192,7 @@ std::pair MixedMRFModel::logp_and_gradient( const arma::vec& parameters) { ensure_gradient_cache(); + ensure_constraint_structure(); // --- Unvectorize into temporaries (blocks 1–4) --- arma::mat temp_main_discrete = main_effects_discrete_; @@ -637,32 +638,85 @@ std::pair MixedMRFModel::logp_and_gradient( arma::mat Omega_bar_sym = Omega_bar + Omega_bar.t(); arma::mat R_bar = temp_cholesky * Omega_bar_sym; - // Roverato graph-constrained Cholesky Jacobian for the Kyy block: - // log|det J| = q log 2 + Σ_j (deg_higher_yy(j) + 2) ψ_j - // where deg_higher_yy(j) = number of active Kyy edges (j, qq) with qq > j. - // For the full Kyy graph deg_higher_yy(j) = q - 1 - j, so this reduces to - // (q - j + 1) — the original formula. For sparse Kyy graphs, the - // graph-aware count is required for the prior to integrate correctly - // (Marsman/Claude, 2026-05-01). - size_t gidx = static_cast(chol_grad_offset_); + // Cholesky-to-K Jacobian (graph-agnostic) + per-column Pfaffian correction. + // Mirrors GGMGradientEngine::logp_and_gradient_full: + // ldj = q*log(2) + Σ_j (q+1-j) ψ_j + // pfaffian = 0.5 * Σ_qq log det(A_qq A_qq^T) (identity mass here) + // logp += ldj - pfaffian + // Theta-space integration uses identity mass on the manifold; for the + // full-space (RATTLE) path we plug through the integrator's inverse-mass + // diagonal. For the full Kyy graph, all A_qq are empty, so the Pfaffian + // collapses to 0 and the formula reduces to the original q-j+1 weight. + logp += static_cast(q_) * std::log(2.0); for(size_t j = 0; j < q_; ++j) { - double psi_j = std::log(temp_cholesky(j, j)); - size_t deg_higher_yy = 0; - for(size_t qq = j + 1; qq < q_; ++qq) { - if(edge_indicators_(p_ + j, p_ + qq) == 1) ++deg_higher_yy; + logp += static_cast(q_ + 1 - j) * std::log(temp_cholesky(j, j)); + } + + const auto& cs = chol_constraint_structure_; + arma::mat Aq_buf; + std::vector G_chol(q_); + std::vector Aq_cache(q_); + double pfaffian = 0.0; + for(size_t col = 1; col < q_; ++col) { + const auto& cc = cs.columns[col]; + if(cc.m_q == 0) continue; + + GGMGradientEngine::build_Aq(temp_cholesky, cc, col, Aq_buf); + Aq_cache[col] = Aq_buf; + + // Identity mass (theta-space): G_q = A_q A_q^T. + arma::mat G_q = Aq_buf * Aq_buf.t(); + + arma::mat L_q; + bool chol_ok = arma::chol(L_q, G_q, "lower"); + if(!chol_ok) { + double ridge = 1e-12 * (arma::trace(G_q) / + static_cast(cc.m_q) + 1.0); + chol_ok = arma::chol(L_q, G_q + ridge * arma::eye(cc.m_q, cc.m_q), + "lower"); + if(!chol_ok) { + return {-std::numeric_limits::infinity(), + arma::vec(grad.n_elem, arma::fill::zeros)}; + } + } + G_chol[col] = L_q; + pfaffian += arma::accu(arma::log(arma::diagvec(L_q))); + } + logp -= pfaffian; + + // Pfaffian adjoint: d/dA_q [-0.5 log det(A_q A_q^T)] = -G_q^{-1} A_q. + // Each A_q(r, l) = R(l, i_r) for l <= i_r, so the adjoint flows back to + // column i_r of R_bar at rows l = 0..i_r. + for(size_t col = 1; col < q_; ++col) { + const auto& cc = cs.columns[col]; + if(cc.m_q == 0) continue; + + const arma::mat& L_q = G_chol[col]; + const arma::mat& Aq = Aq_cache[col]; + + arma::mat Z = arma::solve(arma::trimatl(L_q), Aq, + arma::solve_opts::fast); + Z = arma::solve(arma::trimatu(L_q.t()), Z, + arma::solve_opts::fast); + + for(size_t r = 0; r < cc.m_q; ++r) { + size_t i_r = cc.excluded_indices[r]; + for(size_t l = 0; l <= i_r; ++l) { + R_bar(l, i_r) -= Z(r, l); + } } - double jac_weight = static_cast(deg_higher_yy + 2); - logp += jac_weight * psi_j; + } - // Off-diagonal Cholesky entries: ∂ℓ/∂R_{ij} = R̄_{ij} + // Extract position gradient from R_bar with the unified weight (q+1-j) + // on the diagonal-psi entries. + size_t gidx = static_cast(chol_grad_offset_); + for(size_t j = 0; j < q_; ++j) { + double w_j = static_cast(q_ + 1 - j); for(size_t i = 0; i < j; ++i) { grad(gidx++) = R_bar(i, j); } - // Diagonal (log-scale): ∂ℓ/∂ψ_j = R̄_{jj} R_{jj} + (deg_higher_yy + 2) - grad(gidx++) = R_bar(j, j) * temp_cholesky(j, j) + jac_weight; + grad(gidx++) = R_bar(j, j) * temp_cholesky(j, j) + w_j; } - // Add constant Jacobian term to logp - logp += static_cast(q_) * std::log(2.0); return {logp, grad}; } @@ -680,6 +734,7 @@ std::pair MixedMRFModel::logp_and_gradient( std::pair MixedMRFModel::logp_and_gradient_full( const arma::vec& x) { + ensure_constraint_structure(); const size_t full_dim = full_parameter_dimension(); // --- Unpack all 5 blocks from full-space vector --- @@ -1110,24 +1165,101 @@ std::pair MixedMRFModel::logp_and_gradient_full( arma::mat Omega_bar_sym = Omega_bar + Omega_bar.t(); arma::mat R_bar = temp_cholesky * Omega_bar_sym; - // Roverato graph-constrained Cholesky Jacobian for the Kyy block. - // jac_weight = deg_higher_yy(j) + 2; reduces to q - j + 1 for full graph. - size_t gidx = chol_offset; + // Cholesky-to-K Jacobian (graph-agnostic) + mass-weighted per-column + // Pfaffian correction for the RATTLE manifold marginal: + // ldj = q*log(2) + Σ_j (q+1-j) ψ_j + // pfaffian = 0.5 * Σ_qq log det(A_qq diag(M_qq^{-1}) A_qq^T) + // logp += ldj - pfaffian + // Mass diagonal is plumbed from this->inv_mass_ (empty ⇒ identity). + // For the full Kyy graph, all A_qq are empty and Pfaffian = 0. + logp += static_cast(q_) * std::log(2.0); for(size_t j = 0; j < q_; ++j) { - double psi_j = std::log(temp_cholesky(j, j)); - size_t deg_higher_yy = 0; - for(size_t qq = j + 1; qq < q_; ++qq) { - if(edge_indicators_(p_ + j, p_ + qq) == 1) ++deg_higher_yy; + logp += static_cast(q_ + 1 - j) * std::log(temp_cholesky(j, j)); + } + + const auto& cs = chol_constraint_structure_; + const bool identity_mass = inv_mass_.is_empty(); + arma::mat Aq_buf; + std::vector G_chol(q_); + std::vector Aq_cache(q_); + std::vector inv_mass_q_cache(q_); + double pfaffian = 0.0; + for(size_t col = 1; col < q_; ++col) { + const auto& cc = cs.columns[col]; + if(cc.m_q == 0) continue; + + GGMGradientEngine::build_Aq(temp_cholesky, cc, col, Aq_buf); + Aq_cache[col] = Aq_buf; + + arma::vec inv_mass_q(col); + if(identity_mass) { + inv_mass_q.ones(); + } else { + size_t off_q = chol_block_offset_ + cs.full_theta_offsets[col]; + for(size_t l = 0; l < col; ++l) { + inv_mass_q(l) = inv_mass_(off_q + l); + } } - double jac_weight = static_cast(deg_higher_yy + 2); - logp += jac_weight * psi_j; + inv_mass_q_cache[col] = inv_mass_q; + + // G_q = A_q diag(inv_mass_q) A_q^T + arma::mat Aq_scaled = Aq_buf; + Aq_scaled.each_row() %= inv_mass_q.t(); + arma::mat G_q = Aq_scaled * Aq_buf.t(); + + arma::mat L_q; + bool chol_ok = arma::chol(L_q, G_q, "lower"); + if(!chol_ok) { + double ridge = 1e-12 * (arma::trace(G_q) / + static_cast(cc.m_q) + 1.0); + chol_ok = arma::chol(L_q, G_q + ridge * arma::eye(cc.m_q, cc.m_q), + "lower"); + if(!chol_ok) { + return {-std::numeric_limits::infinity(), + arma::vec(full_dim, arma::fill::zeros)}; + } + } + G_chol[col] = L_q; + pfaffian += arma::accu(arma::log(arma::diagvec(L_q))); + } + logp -= pfaffian; + + // Pfaffian adjoint: d/dA_q [-0.5 log det(A_q M_q^{-1} A_q^T)] flows back + // to R_bar at the excluded-edge columns. dA_q = G_q^{-1} A_q · diag(M_q^{-1}). + for(size_t col = 1; col < q_; ++col) { + const auto& cc = cs.columns[col]; + if(cc.m_q == 0) continue; + + const arma::mat& L_q = G_chol[col]; + const arma::mat& Aq = Aq_cache[col]; + const arma::vec& inv_mass_q = inv_mass_q_cache[col]; + + arma::mat Z = arma::solve(arma::trimatl(L_q), Aq, + arma::solve_opts::fast); + Z = arma::solve(arma::trimatu(L_q.t()), Z, + arma::solve_opts::fast); + + arma::mat dAq = Z; + dAq.each_row() %= inv_mass_q.t(); + + for(size_t r = 0; r < cc.m_q; ++r) { + size_t i_r = cc.excluded_indices[r]; + for(size_t l = 0; l <= i_r; ++l) { + R_bar(l, i_r) -= dAq(r, l); + } + } + } + // Extract position gradient from R_bar with the unified weight (q+1-j) + // on the diagonal-psi entries. + size_t gidx = chol_offset; + for(size_t j = 0; j < q_; ++j) { + double w_j = static_cast(q_ + 1 - j); for(size_t i = 0; i < j; ++i) { grad(gidx++) = R_bar(i, j); } - grad(gidx++) = R_bar(j, j) * temp_cholesky(j, j) + jac_weight; + grad(gidx++) = R_bar(j, j) * temp_cholesky(j, j) + w_j; } - logp += static_cast(q_) * std::log(2.0); return {logp, grad}; } From 119a4cde46df6423f55ecf2b55fc5658811dfa93 Mon Sep 17 00:00:00 2001 From: Maarten Marsman Date: Fri, 15 May 2026 08:05:41 +0200 Subject: [PATCH 2/2] test(mixed-mrf): finite-difference checks for Kyy Pfaffian gradient Adds tests/testthat/test-mixed-gradient-pfaffian.R covering the new Cholesky-to-K + Pfaffian split in MixedMRFModel::logp_and_gradient_full. Four cases at max relative error < 1e-5: - full Kyy graph (refactor sanity; Pfaffian = 0) - sparse Kyy, 1 excluded edge, identity mass - sparse Kyy, 1 excluded edge, random diagonal mass - sparse Kyy, 2 excluded edges, random diagonal mass (multi-column) Plumbs an optional inv_mass_diag arg through mixed_test_logp_and_gradient_full so the integrator's mass-weighted Pfaffian can be exercised from R; the arg falls back to identity in the gradient engine when NULL. RcppExports regenerated via Rcpp::compileAttributes(). --- R/RcppExports.R | 4 +- src/RcppExports.cpp | 9 +- src/mixed_gradient_interface.cpp | 10 +- tests/testthat/test-mixed-gradient-pfaffian.R | 150 ++++++++++++++++++ 4 files changed, 166 insertions(+), 7 deletions(-) create mode 100644 tests/testthat/test-mixed-gradient-pfaffian.R diff --git a/R/RcppExports.R b/R/RcppExports.R index 33422a1b..47938d7b 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -69,8 +69,8 @@ mixed_test_logp_and_gradient <- function(params, discrete_observations, continuo .Call(`_bgms_mixed_test_logp_and_gradient`, params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate) } -mixed_test_logp_and_gradient_full <- function(params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha = 1.0, main_beta = 1.0, interaction_prior_type = "cauchy", threshold_prior_type = "beta-prime", threshold_scale = 1.0, means_prior_type = "normal", means_scale = 1.0, diagonal_prior_type = "gamma", diagonal_shape = 1.0, diagonal_rate = 1.0) { - .Call(`_bgms_mixed_test_logp_and_gradient_full`, params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate) +mixed_test_logp_and_gradient_full <- function(params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha = 1.0, main_beta = 1.0, interaction_prior_type = "cauchy", threshold_prior_type = "beta-prime", threshold_scale = 1.0, means_prior_type = "normal", means_scale = 1.0, diagonal_prior_type = "gamma", diagonal_shape = 1.0, diagonal_rate = 1.0, inv_mass_diag = NULL) { + .Call(`_bgms_mixed_test_logp_and_gradient_full`, params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate, inv_mass_diag) } mixed_test_project_position <- function(x, inv_mass, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha = 1.0, main_beta = 1.0, interaction_prior_type = "cauchy", threshold_prior_type = "beta-prime", threshold_scale = 1.0) { diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 4e15686c..b07142dc 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -303,8 +303,8 @@ BEGIN_RCPP END_RCPP } // mixed_test_logp_and_gradient_full -Rcpp::List mixed_test_logp_and_gradient_full(const arma::vec& params, const arma::imat& discrete_observations, const arma::mat& continuous_observations, const arma::ivec& num_categories, const arma::uvec& is_ordinal_variable, const arma::ivec& baseline_category, const arma::imat& edge_indicators, double pairwise_scale, double main_alpha, double main_beta, std::string interaction_prior_type, std::string threshold_prior_type, double threshold_scale, std::string means_prior_type, double means_scale, std::string diagonal_prior_type, double diagonal_shape, double diagonal_rate); -RcppExport SEXP _bgms_mixed_test_logp_and_gradient_full(SEXP paramsSEXP, SEXP discrete_observationsSEXP, SEXP continuous_observationsSEXP, SEXP num_categoriesSEXP, SEXP is_ordinal_variableSEXP, SEXP baseline_categorySEXP, SEXP edge_indicatorsSEXP, SEXP pairwise_scaleSEXP, SEXP main_alphaSEXP, SEXP main_betaSEXP, SEXP interaction_prior_typeSEXP, SEXP threshold_prior_typeSEXP, SEXP threshold_scaleSEXP, SEXP means_prior_typeSEXP, SEXP means_scaleSEXP, SEXP diagonal_prior_typeSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP) { +Rcpp::List mixed_test_logp_and_gradient_full(const arma::vec& params, const arma::imat& discrete_observations, const arma::mat& continuous_observations, const arma::ivec& num_categories, const arma::uvec& is_ordinal_variable, const arma::ivec& baseline_category, const arma::imat& edge_indicators, double pairwise_scale, double main_alpha, double main_beta, std::string interaction_prior_type, std::string threshold_prior_type, double threshold_scale, std::string means_prior_type, double means_scale, std::string diagonal_prior_type, double diagonal_shape, double diagonal_rate, Rcpp::Nullable inv_mass_diag); +RcppExport SEXP _bgms_mixed_test_logp_and_gradient_full(SEXP paramsSEXP, SEXP discrete_observationsSEXP, SEXP continuous_observationsSEXP, SEXP num_categoriesSEXP, SEXP is_ordinal_variableSEXP, SEXP baseline_categorySEXP, SEXP edge_indicatorsSEXP, SEXP pairwise_scaleSEXP, SEXP main_alphaSEXP, SEXP main_betaSEXP, SEXP interaction_prior_typeSEXP, SEXP threshold_prior_typeSEXP, SEXP threshold_scaleSEXP, SEXP means_prior_typeSEXP, SEXP means_scaleSEXP, SEXP diagonal_prior_typeSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP, SEXP inv_mass_diagSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -326,7 +326,8 @@ BEGIN_RCPP Rcpp::traits::input_parameter< std::string >::type diagonal_prior_type(diagonal_prior_typeSEXP); Rcpp::traits::input_parameter< double >::type diagonal_shape(diagonal_shapeSEXP); Rcpp::traits::input_parameter< double >::type diagonal_rate(diagonal_rateSEXP); - rcpp_result_gen = Rcpp::wrap(mixed_test_logp_and_gradient_full(params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate)); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type inv_mass_diag(inv_mass_diagSEXP); + rcpp_result_gen = Rcpp::wrap(mixed_test_logp_and_gradient_full(params, discrete_observations, continuous_observations, num_categories, is_ordinal_variable, baseline_category, edge_indicators, pairwise_scale, main_alpha, main_beta, interaction_prior_type, threshold_prior_type, threshold_scale, means_prior_type, means_scale, diagonal_prior_type, diagonal_shape, diagonal_rate, inv_mass_diag)); return rcpp_result_gen; END_RCPP } @@ -802,7 +803,7 @@ static const R_CallMethodDef CallEntries[] = { {"_bgms_compute_rhat_cpp", (DL_FUNC) &_bgms_compute_rhat_cpp, 1}, {"_bgms_compute_indicator_ess_cpp", (DL_FUNC) &_bgms_compute_indicator_ess_cpp, 1}, {"_bgms_mixed_test_logp_and_gradient", (DL_FUNC) &_bgms_mixed_test_logp_and_gradient, 18}, - {"_bgms_mixed_test_logp_and_gradient_full", (DL_FUNC) &_bgms_mixed_test_logp_and_gradient_full, 18}, + {"_bgms_mixed_test_logp_and_gradient_full", (DL_FUNC) &_bgms_mixed_test_logp_and_gradient_full, 19}, {"_bgms_mixed_test_project_position", (DL_FUNC) &_bgms_mixed_test_project_position, 14}, {"_bgms_mixed_test_project_momentum", (DL_FUNC) &_bgms_mixed_test_project_momentum, 15}, {"_bgms_mixed_test_leapfrog_constrained", (DL_FUNC) &_bgms_mixed_test_leapfrog_constrained, 17}, diff --git a/src/mixed_gradient_interface.cpp b/src/mixed_gradient_interface.cpp index 6957065b..c113c3e0 100644 --- a/src/mixed_gradient_interface.cpp +++ b/src/mixed_gradient_interface.cpp @@ -75,7 +75,8 @@ Rcpp::List mixed_test_logp_and_gradient_full( double means_scale = 1.0, std::string diagonal_prior_type = "gamma", double diagonal_shape = 1.0, - double diagonal_rate = 1.0) + double diagonal_rate = 1.0, + Rcpp::Nullable inv_mass_diag = R_NilValue) { size_t p = discrete_observations.n_cols; size_t q = continuous_observations.n_cols; @@ -95,6 +96,13 @@ Rcpp::List mixed_test_logp_and_gradient_full( 42 ); + // Plug the integrator's inverse-mass diagonal through to the gradient + // so the mass-weighted Pfaffian correction can be exercised from R. + // Empty/NULL falls back to identity in the gradient engine. + if(inv_mass_diag.isNotNull()) { + model.set_inv_mass(Rcpp::as(inv_mass_diag)); + } + auto result = model.logp_and_gradient_full(params); return Rcpp::List::create( diff --git a/tests/testthat/test-mixed-gradient-pfaffian.R b/tests/testthat/test-mixed-gradient-pfaffian.R new file mode 100644 index 00000000..d39a4247 --- /dev/null +++ b/tests/testthat/test-mixed-gradient-pfaffian.R @@ -0,0 +1,150 @@ +# --------------------------------------------------------------------------- # +# Finite-difference checks for MixedMRFModel::logp_and_gradient_full. +# +# Targets the Cholesky-to-K Jacobian + per-column Pfaffian split that ports +# GGM Fix 2 to the mixed-MRF Kyy block. The key code paths exercised are: +# - the unified ldj = q*log(2) + sum_j (q+1-j) psi_j Jacobian +# - the per-column Pfaffian 0.5 * sum_qq log det(A_qq diag(M_qq^-1) A_qq^T) +# - the matching Pfaffian adjoint that flows into R_bar at excluded-edge +# positions, then into the diagonal psi gradient through the exp() chain +# +# Full Kyy graph: A_qq are empty so Pfaffian = 0 — this verifies the +# refactor doesn't change anything in the no-constraint case. +# Sparse Kyy: Pfaffian is non-trivial; tests both identity and random +# positive diagonal mass (the latter is what NUTS uses after +# warmup). +# --------------------------------------------------------------------------- # + +# ---- Helpers ---------------------------------------------------------------- # + +mixed_full_fd_gradient = function(params, x, y, num_cats, is_ord, base_cat, + edge_ind, scale, inv_mass = NULL, + eps = 1e-5) { + n_total = length(params) + fd = numeric(n_total) + for(k in seq_len(n_total)) { + p_plus = params + p_minus = params + p_plus[k] = p_plus[k] + eps + p_minus[k] = p_minus[k] - eps + fp = mixed_test_logp_and_gradient_full( + p_plus, x, y, num_cats, as.integer(is_ord), + base_cat, edge_ind, scale, + inv_mass_diag = inv_mass + )$value + fm = mixed_test_logp_and_gradient_full( + p_minus, x, y, num_cats, as.integer(is_ord), + base_cat, edge_ind, scale, + inv_mass_diag = inv_mass + )$value + fd[k] = (fp - fm) / (2 * eps) + } + fd +} + +mixed_full_check_gradient = function(params, x, y, num_cats, is_ord, base_cat, + edge_ind, scale, inv_mass = NULL, + eps = 1e-5) { + res = mixed_test_logp_and_gradient_full( + params, x, y, num_cats, as.integer(is_ord), + base_cat, edge_ind, scale, + inv_mass_diag = inv_mass + ) + fd = mixed_full_fd_gradient( + params, x, y, num_cats, is_ord, base_cat, + edge_ind, scale, inv_mass, eps + ) + ag = res$gradient + denom = pmax(abs(ag), abs(fd), 1) + max(abs(ag - fd) / denom) +} + +# Build a small mixed model with q continuous and p discrete variables, +# returning everything needed by the gradient harness in one shot. +mixed_full_make_setup = function(p = 2, q = 3, n = 60, seed = 42) { + set.seed(seed) + x = matrix(sample(0:1, n * p, replace = TRUE), n, p) + y = matrix(rnorm(n * q), n, q) + num_cats = rep(2L, p) + is_ord = rep(1L, p) + base_cat = rep(0L, p) + total = p + q + edge_ind = matrix(1L, total, total) + diag(edge_ind) = 0L + + n_main = sum(num_cats) + n_pw_xx = p * (p - 1L) / 2L + n_means = q + n_xy = p * q + n_chol = q * (q + 1L) / 2L + total_dim = n_main + n_pw_xx + n_means + n_xy + n_chol + + set.seed(seed + 1) + params = rnorm(total_dim, sd = 0.3) + + list( + x = x, y = y, num_cats = num_cats, is_ord = is_ord, base_cat = base_cat, + edge_ind = edge_ind, params = params, + p = p, q = q, total = total, total_dim = total_dim + ) +} + + +# ---- Full Kyy graph: Pfaffian = 0 ------------------------------------------- # + +test_that("gradient matches FD on full Kyy graph (identity mass)", { + s = mixed_full_make_setup() + err = mixed_full_check_gradient( + s$params, s$x, s$y, s$num_cats, s$is_ord, s$base_cat, + s$edge_ind, 2.5 + ) + expect_lt(err, 1e-5) +}) + + +# ---- Sparse Kyy graph: Pfaffian is non-trivial ------------------------------ # + +test_that("gradient matches FD with one excluded Kyy edge (identity mass)", { + s = mixed_full_make_setup() + # Exclude one Kyy edge: between continuous-1 and continuous-3. + s$edge_ind[s$p + 1L, s$p + 3L] = 0L + s$edge_ind[s$p + 3L, s$p + 1L] = 0L + err = mixed_full_check_gradient( + s$params, s$x, s$y, s$num_cats, s$is_ord, s$base_cat, + s$edge_ind, 2.5 + ) + expect_lt(err, 1e-5) +}) + + +test_that("gradient matches FD with one excluded Kyy edge (random mass)", { + s = mixed_full_make_setup() + s$edge_ind[s$p + 1L, s$p + 3L] = 0L + s$edge_ind[s$p + 3L, s$p + 1L] = 0L + set.seed(7) + inv_mass = runif(s$total_dim, min = 0.5, max = 2.0) + err = mixed_full_check_gradient( + s$params, s$x, s$y, s$num_cats, s$is_ord, s$base_cat, + s$edge_ind, 2.5, + inv_mass = inv_mass + ) + expect_lt(err, 1e-5) +}) + + +test_that("gradient matches FD with two excluded Kyy edges (random mass)", { + s = mixed_full_make_setup(q = 4) + # Exclude (p+1, p+3) and (p+2, p+4): two columns carry constraints. + s$edge_ind[s$p + 1L, s$p + 3L] = 0L + s$edge_ind[s$p + 3L, s$p + 1L] = 0L + s$edge_ind[s$p + 2L, s$p + 4L] = 0L + s$edge_ind[s$p + 4L, s$p + 2L] = 0L + set.seed(13) + inv_mass = runif(s$total_dim, min = 0.5, max = 2.0) + err = mixed_full_check_gradient( + s$params, s$x, s$y, s$num_cats, s$is_ord, s$base_cat, + s$edge_ind, 2.5, + inv_mass = inv_mass + ) + expect_lt(err, 1e-5) +})