diff --git a/R/RcppExports.R b/R/RcppExports.R index 7950a66d..33422a1b 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -137,8 +137,8 @@ ggm_test_logp_and_gradient_prior <- function(theta, suf_stat, n, edge_indicators .Call(`_bgms_ggm_test_logp_and_gradient_prior`, theta, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate) } -ggm_test_logp_and_gradient_full_prior <- function(x, suf_stat, n, edge_indicators, interaction_prior_type = "cauchy", interaction_scale = 1.0, interaction_alpha = 0.5, interaction_beta = 0.5, diagonal_prior_type = "gamma", diagonal_shape = 1.0, diagonal_rate = 1.0) { - .Call(`_bgms_ggm_test_logp_and_gradient_full_prior`, x, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate) +ggm_test_logp_and_gradient_full_prior <- function(x, suf_stat, n, edge_indicators, interaction_prior_type = "cauchy", interaction_scale = 1.0, interaction_alpha = 0.5, interaction_beta = 0.5, diagonal_prior_type = "gamma", diagonal_shape = 1.0, diagonal_rate = 1.0, inv_mass_diag = NULL) { + .Call(`_bgms_ggm_test_logp_and_gradient_full_prior`, x, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate, inv_mass_diag) } sample_ggm <- function(inputFromR, prior_inclusion_prob, initial_edge_indicators, no_iter, no_warmup, no_chains, edge_selection, sampler_type, seed, no_threads, progress_type, progress_callback = NULL, edge_prior = "Bernoulli", beta_bernoulli_alpha = 1.0, beta_bernoulli_beta = 1.0, beta_bernoulli_alpha_between = 1.0, beta_bernoulli_beta_between = 1.0, dirichlet_alpha = 1.0, lambda = 1.0, target_acceptance = 0.8, max_tree_depth = 10L, na_impute = FALSE, missing_index_nullable = NULL) { diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 44a5fdd9..4e15686c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -647,8 +647,8 @@ BEGIN_RCPP END_RCPP } // ggm_test_logp_and_gradient_full_prior -Rcpp::List ggm_test_logp_and_gradient_full_prior(const arma::vec& x, const arma::mat& suf_stat, int n, const arma::imat& edge_indicators, const std::string& interaction_prior_type, double interaction_scale, double interaction_alpha, double interaction_beta, const std::string& diagonal_prior_type, double diagonal_shape, double diagonal_rate); -RcppExport SEXP _bgms_ggm_test_logp_and_gradient_full_prior(SEXP xSEXP, SEXP suf_statSEXP, SEXP nSEXP, SEXP edge_indicatorsSEXP, SEXP interaction_prior_typeSEXP, SEXP interaction_scaleSEXP, SEXP interaction_alphaSEXP, SEXP interaction_betaSEXP, SEXP diagonal_prior_typeSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP) { +Rcpp::List ggm_test_logp_and_gradient_full_prior(const arma::vec& x, const arma::mat& suf_stat, int n, const arma::imat& edge_indicators, const std::string& interaction_prior_type, double interaction_scale, double interaction_alpha, double interaction_beta, const std::string& diagonal_prior_type, double diagonal_shape, double diagonal_rate, Rcpp::Nullable inv_mass_diag); +RcppExport SEXP _bgms_ggm_test_logp_and_gradient_full_prior(SEXP xSEXP, SEXP suf_statSEXP, SEXP nSEXP, SEXP edge_indicatorsSEXP, SEXP interaction_prior_typeSEXP, SEXP interaction_scaleSEXP, SEXP interaction_alphaSEXP, SEXP interaction_betaSEXP, SEXP diagonal_prior_typeSEXP, SEXP diagonal_shapeSEXP, SEXP diagonal_rateSEXP, SEXP inv_mass_diagSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -663,7 +663,8 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const std::string& >::type diagonal_prior_type(diagonal_prior_typeSEXP); Rcpp::traits::input_parameter< double >::type diagonal_shape(diagonal_shapeSEXP); Rcpp::traits::input_parameter< double >::type diagonal_rate(diagonal_rateSEXP); - rcpp_result_gen = Rcpp::wrap(ggm_test_logp_and_gradient_full_prior(x, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate)); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type inv_mass_diag(inv_mass_diagSEXP); + rcpp_result_gen = Rcpp::wrap(ggm_test_logp_and_gradient_full_prior(x, suf_stat, n, edge_indicators, interaction_prior_type, interaction_scale, interaction_alpha, interaction_beta, diagonal_prior_type, diagonal_shape, diagonal_rate, inv_mass_diag)); return rcpp_result_gen; END_RCPP } @@ -818,7 +819,7 @@ static const R_CallMethodDef CallEntries[] = { {"_bgms_test_parameter_prior", (DL_FUNC) &_bgms_test_parameter_prior, 6}, {"_bgms_test_scale_prior", (DL_FUNC) &_bgms_test_scale_prior, 4}, {"_bgms_ggm_test_logp_and_gradient_prior", (DL_FUNC) &_bgms_ggm_test_logp_and_gradient_prior, 11}, - {"_bgms_ggm_test_logp_and_gradient_full_prior", (DL_FUNC) &_bgms_ggm_test_logp_and_gradient_full_prior, 11}, + {"_bgms_ggm_test_logp_and_gradient_full_prior", (DL_FUNC) &_bgms_ggm_test_logp_and_gradient_full_prior, 12}, {"_bgms_sample_ggm", (DL_FUNC) &_bgms_sample_ggm, 23}, {"_bgms_sample_mixed_mrf", (DL_FUNC) &_bgms_sample_mixed_mrf, 24}, {"_bgms_sample_omrf", (DL_FUNC) &_bgms_sample_omrf, 24}, diff --git a/src/ggm_gradient_interface.cpp b/src/ggm_gradient_interface.cpp index ca364e60..cff375da 100644 --- a/src/ggm_gradient_interface.cpp +++ b/src/ggm_gradient_interface.cpp @@ -350,13 +350,15 @@ Rcpp::List ggm_test_leapfrog_constrained_checked( // the likelihood vanishes and the sampler targets the prior: // -K_ij/2 | graph ~ Cauchy(0, scale) or Normal(0, scale) (included edges) // K_ij = 0 (excluded edges) -// K_ii ~ Gamma(gamma_shape, gamma_rate) (diagonal) +// K_ii/2 ~ Gamma(gamma_shape, gamma_rate) (diagonal) // -// Note on convention: `interaction_prior` is applied on the association scale -// K_yy_{ij} = -K_{ij}/2 (consistent with the mixed-MRF continuous block), -// NOT on the precision entry K_{ij} directly. A `Normal(0, scale)` prior -// therefore constrains K_yy off-diagonals with standard deviation `scale`, -// equivalent to Normal(0, 2*scale) on K_{ij}. +// Note on convention: both `interaction_prior` and `diagonal_prior` are +// applied on the partial-association scale K_yy = -K/2 (consistent with the +// mixed-MRF continuous block), NOT on the precision entries directly. +// A `Normal(0, scale)` prior therefore constrains K_yy off-diagonals with +// standard deviation `scale`, equivalent to Normal(0, 2*scale) on K_{ij}. +// A `Gamma(shape, rate)` diagonal prior is applied to -K_yy_{ii} = K_{ii}/2, +// so the implied prior on K_{ii} is Gamma(shape, rate/2). // // edge_indicators: p x p integer matrix with 1 = edge included, 0 = excluded. // Defaults to all-ones (full graph). For edge selection SBC, pass the diff --git a/src/models/ggm/ggm_gradient.cpp b/src/models/ggm/ggm_gradient.cpp index 0a5c5b70..09dec889 100644 --- a/src/models/ggm/ggm_gradient.cpp +++ b/src/models/ggm/ggm_gradient.cpp @@ -277,10 +277,12 @@ std::pair GGMGradientEngine::logp_and_gradient( } } - // Prior on diagonal K entries + // Prior on the partial-association diagonal: -K_yy_{ii} = K_{ii}/2. + // Evaluated at 0.5 * K_{ii} so `diagonal_prior_` refers to the same + // quantity (the partial association) as `interaction_prior_`. double log_diag_prior = 0.0; for (size_t i = 0; i < p_; ++i) { - log_diag_prior += diagonal_prior_->logp(K(i, i)); + log_diag_prior += diagonal_prior_->logp(0.5 * K(i, i)); } double lp = log_lik + log_slab + log_diag_prior + fm.log_det_jacobian; @@ -293,11 +295,13 @@ std::pair GGMGradientEngine::logp_and_gradient( // Phase 1: Phi_bar from data + priors (direct Phi-space, no K^{-1}) // Data: d/dPhi [-0.5 tr(Phi^T Phi S)] = -Phi S = -P - // Diagonal prior: d/dPhi [log p(K_ii)] = grad(K_ii) * 2 Phi(:,i) + // Diagonal prior: d/dPhi [log p(K_ii/2)] + // = (1/2) * grad(K_ii/2) * dK_ii/dPhi + // = (1/2) * grad(K_ii/2) * 2 Phi(:,i) = grad(K_ii/2) * Phi(:,i). arma::mat Phi_bar = -P; for (size_t i = 0; i < p_; ++i) { - double dg = diagonal_prior_->grad(K(i, i)); - Phi_bar.col(i).head(i + 1) += 2.0 * dg * Phi.col(i).head(i + 1); + double dg = diagonal_prior_->grad(0.5 * K(i, i)); + Phi_bar.col(i).head(i + 1) += dg * Phi.col(i).head(i + 1); } // Interaction prior adjoint on included edges. @@ -455,8 +459,11 @@ std::pair GGMGradientEngine::logp_and_gradient( // from the previous K-space adjoint approach. std::pair GGMGradientEngine::logp_and_gradient_full( - const arma::vec& x) const + const arma::vec& x, + const arma::vec& inv_mass_diag) const { + const bool use_identity_mass = inv_mass_diag.is_empty(); + // --- Forward pass: unpack x -> Phi --- arma::mat Phi(p_, p_, arma::fill::zeros); arma::vec psi(p_); @@ -506,38 +513,85 @@ std::pair GGMGradientEngine::logp_and_gradient_full( } } - // Prior on diagonal K entries + // Prior on the partial-association diagonal: -K_yy_{ii} = K_{ii}/2. double log_diag_prior = 0.0; for (size_t i = 0; i < p_; ++i) { double kii = arma::dot( Phi.col(i).head(i + 1), Phi.col(i).head(i + 1)); - log_diag_prior += diagonal_prior_->logp(kii); + log_diag_prior += diagonal_prior_->logp(0.5 * kii); } - // Roverato graph-constrained Cholesky Jacobian: - // log|det J(x_full -> K_active)| = p*log(2) - // + sum_k (deg_higher(k) + 2) * psi_k - // where deg_higher(k) = number of active edges (k, q) with q > k. - // For the full graph deg_higher(k) = p - 1 - k, so this reduces to the - // earlier "p*log(2) + 2*sum(psi) + sum_{i(p_) * std::log(2.0); - arma::uvec deg_higher_vec(p_, arma::fill::zeros); for (size_t k = 0; k < p_; ++k) { - for (size_t q = k + 1; q < p_; ++q) { - const auto& inc = structure_->columns[q].included_indices; - if (std::find(inc.begin(), inc.end(), k) != inc.end()) { - ++deg_higher_vec(k); + ldj += 2.0 * psi(k); + } + for (size_t k = 0; k + 1 < p_; ++k) { + ldj += static_cast(p_ - 1 - k) * psi(k); + } + + // Graph-aware constraint Jacobian correction: -0.5 * sum_q log det G_q + // with G_q = A_q diag(inv_mass_q) A_q^T. For identity mass this matches + // the theta-space Roverato/QR Jacobian term -sum log R_diag; with the + // NUTS-adapted diagonal mass we use the mass-weighted determinant so the + // RATTLE manifold marginal targets the intended K | G prior. + // The factor is *subtracted* (mirroring the theta-space path); the + // per-column G_q form is exact here because the position-projection is + // applied column-by-column (Roverato ordering: each column-q constraint + // is linear in column q's off-diagonals given earlier columns). + arma::mat Aq_buf; + std::vector G_chol(p_); // lower Cholesky of G_q per column + std::vector Aq_cache(p_); // cached A_q per column + std::vector inv_mass_q_cache(p_); + double pfaffian = 0.0; + for (size_t q = 1; q < p_; ++q) { + const auto& col = structure_->columns[q]; + if (col.m_q == 0) continue; + + build_Aq(Phi, col, q, Aq_buf); + Aq_cache[q] = Aq_buf; + + size_t off_q = structure_->full_theta_offsets[q]; + arma::vec inv_mass_q(q); + if (use_identity_mass) { + inv_mass_q.ones(); + } else { + for (size_t l = 0; l < q; ++l) { + inv_mass_q(l) = inv_mass_diag(off_q + l); + } + } + inv_mass_q_cache[q] = inv_mass_q; + + // G_q = A_q diag(inv_mass_q) A_q^T (m_q x m_q, symmetric PD) + arma::mat Aq_scaled = Aq_buf; + Aq_scaled.each_row() %= inv_mass_q.t(); + arma::mat G_q = Aq_scaled * Aq_buf.t(); + + arma::mat L_q; + bool chol_ok = arma::chol(L_q, G_q, "lower"); + if (!chol_ok) { + // Tiny ridge for numerical safety; harmless during normal NUTS use. + double ridge = 1e-12 * (arma::trace(G_q) / + static_cast(col.m_q) + 1.0); + chol_ok = arma::chol(L_q, G_q + ridge * arma::eye(col.m_q, col.m_q), + "lower"); + if (!chol_ok) { + return {-std::numeric_limits::infinity(), + arma::vec(x.n_elem, arma::fill::zeros)}; } } - ldj += static_cast(deg_higher_vec(k) + 2) * psi(k); + G_chol[q] = L_q; + pfaffian += arma::accu(arma::log(arma::diagvec(L_q))); } - double lp = log_lik + log_slab + log_diag_prior + ldj; + double lp = log_lik + log_slab + log_diag_prior + ldj - pfaffian; if (!std::isfinite(lp)) { return {lp, arma::vec(x.n_elem, arma::fill::zeros)}; @@ -550,13 +604,14 @@ std::pair GGMGradientEngine::logp_and_gradient_full( P *= -1.0; // P now holds -Phi*S = data part of Phi_bar - // Diagonal prior adjoint: d/dK_ii[log p(K_ii)] → grad(K_ii) * 2 Phi(:,i) + // Diagonal prior adjoint: d/dPhi [log p(K_ii/2)] + // = (1/2) * grad(K_ii/2) * 2 Phi(:,i) = grad(K_ii/2) * Phi(:,i). for (size_t i = 0; i < p_; ++i) { double kii = arma::dot( Phi.col(i).head(i + 1), Phi.col(i).head(i + 1)); - double dg = diagonal_prior_->grad(kii); - P.col(i).head(i + 1) += 2.0 * dg * Phi.col(i).head(i + 1); + double dg = diagonal_prior_->grad(0.5 * kii); + P.col(i).head(i + 1) += dg * Phi.col(i).head(i + 1); } // Interaction prior adjoint on included edges (K_yy scale). @@ -571,6 +626,37 @@ std::pair GGMGradientEngine::logp_and_gradient_full( } } + // Pfaffian adjoint (subtracted in the forward pass, so subtract here too): + // d/dA_q [-0.5 log det(A_q M_q A_q^T)] = -(A_q M_q A_q^T)^{-1} A_q M_q. + // Each A_q[r, l] = Phi[l, i_r] (l <= i_r), so the adjoint flows to + // column i_r of Phi_bar. Diagonal positions (l = i_r) are handled by + // the existing exp(psi) chain when extracting psi_bar from P(i,i). + for (size_t q = 1; q < p_; ++q) { + const auto& col = structure_->columns[q]; + if (col.m_q == 0) continue; + + const arma::mat& L_q = G_chol[q]; + const arma::mat& Aq = Aq_cache[q]; + const arma::vec& inv_mass_q = inv_mass_q_cache[q]; + + // Z = G_q^{-1} A_q via two triangular solves + arma::mat Z = arma::solve(arma::trimatl(L_q), Aq, + arma::solve_opts::fast); + Z = arma::solve(arma::trimatu(L_q.t()), Z, + arma::solve_opts::fast); + + // dAq = Z * diag(inv_mass_q) (m_q x q) + arma::mat dAq = Z; + dAq.each_row() %= inv_mass_q.t(); + + for (size_t r = 0; r < col.m_q; ++r) { + size_t i_r = col.excluded_indices[r]; + for (size_t l = 0; l <= i_r; ++l) { + P(l, i_r) -= dAq(r, l); + } + } + } + // --- Extract gradient from Phi_bar (stored in P) --- arma::vec gradient(x.n_elem, arma::fill::none); @@ -582,10 +668,14 @@ std::pair GGMGradientEngine::logp_and_gradient_full( gradient(offset + i) = P(i, q); } - // Diagonal (psi_q): chain rule through exp + log-det + Jacobian - // (deg_higher(q) + 2) is the graph-aware Roverato Jacobian coefficient. + // Diagonal (psi_q): chain rule through exp + log-det + Jacobian. + // ldj contributes (p+1-q) on psi_q except at q = p-1 where it is 2; + // those two cases collapse to (p+1-q) since for q = p-1, p+1-q = 2. double psi_bar = P(q, q) * Phi(q, q); - psi_bar += n + static_cast(deg_higher_vec(q) + 2); + psi_bar += n + 2.0; + if (q + 1 < p_) { + psi_bar += static_cast(p_ - 1 - q); + } gradient(offset + q) = psi_bar; } diff --git a/src/models/ggm/ggm_gradient.h b/src/models/ggm/ggm_gradient.h index e70693a8..d95d6b65 100644 --- a/src/models/ggm/ggm_gradient.h +++ b/src/models/ggm/ggm_gradient.h @@ -111,18 +111,22 @@ class GGMGradientEngine { * * Operates on the full position vector x in R^{p(p+1)/2} — * the raw Cholesky entries (off-diagonal) and log-diagonal (psi). - * No null-space transformation or QR decomposition is needed: - * the gradient w.r.t. each x_{iq} is simply Phi_bar_{iq}. * - * The Jacobian includes only the Cholesky-to-K and log-diagonal - * terms. The QR Jacobian terms from the null-space basis are not - * needed because RATTLE preserves the correct measure on the - * constraint manifold. + * Includes the RATTLE constraint Pfaffian +0.5 log|det(J M^{-1} J^T)| + * so that the manifold marginal targets log p(K) + log|det dK/dx_manifold|. + * The Pfaffian factors across columns as a sum of per-column terms + * 0.5 log det(A_q diag(inv_mass_q) A_q^T), where A_q is the column-q + * excluded-edge constraint matrix. * - * @param x Full position vector of dimension p(p+1)/2 + * @param x Full position vector of dimension p(p+1)/2 + * @param inv_mass_diag Diagonal of the inverse mass matrix used by the + * integrator; pass an empty vector to use identity + * (only correct when the integrator runs with M = I). * @return (log-posterior value, gradient vector of same dimension) */ - std::pair logp_and_gradient_full(const arma::vec& x) const; + std::pair logp_and_gradient_full( + const arma::vec& x, + const arma::vec& inv_mass_diag = arma::vec()) const; /** * Givens QR of an n x m matrix M (n >= m). diff --git a/src/models/ggm/ggm_model.cpp b/src/models/ggm/ggm_model.cpp index 4ed455b3..6e6cf667 100644 --- a/src/models/ggm/ggm_model.cpp +++ b/src/models/ggm/ggm_model.cpp @@ -144,7 +144,12 @@ std::pair GGMModel::logp_and_gradient_full( const arma::vec& x) { ensure_constraint_structure(); - return gradient_engine_.logp_and_gradient_full(x); + // Pass the current NUTS-adapted inverse mass diagonal so the gradient + // engine can use the correct mass-weighted Pfaffian + // +0.5 log det(A_q M_q^{-1} A_q^T) + // for the RATTLE manifold-marginal correction. Empty inv_mass_ falls + // back to identity in the engine. + return gradient_engine_.logp_and_gradient_full(x, inv_mass_); } arma::vec GGMModel::get_active_inv_mass() const { @@ -158,54 +163,23 @@ arma::vec GGMModel::get_active_inv_mass() const { return arma::ones(cs.active_dim); } - // inv_mass_ has full dimension (from stage 2, all edges on). - // Rotate into the current constrained basis using N_q. + // The theta-space (unconstrained) NUTS path is the only caller of this + // function. get_full_vectorized_parameters() scatters each active theta + // entry into a definite slot in the full-dim vector (included off-diag + // slot for f_q[k]; full_psi_offset for psi_q; excluded slots stay 0). + // Welford in NUTSAdaptationController therefore tracks the variance of + // the theta entry at that slot directly, so the active inverse mass is + // simply the included-slot subset — no N_q rotation needed. if (inv_mass_.n_elem == cs.full_dim) { arma::vec active(cs.active_dim); - arma::mat Aq_buf; - for (size_t q = 0; q < p_; ++q) { const auto& col = cs.columns[q]; size_t active_offset = cs.theta_offsets[q]; size_t full_offset = cs.full_theta_offsets[q]; - - if (q == 0 || col.d_q == 0) { - // psi_q only — pass through directly - active(cs.psi_offset(q)) = inv_mass_(cs.full_psi_offset(q)); - continue; - } - - // Gather per-Cholesky-entry variances for column q - arma::vec var_xq(q); - for (size_t j = 0; j < q; ++j) { - var_xq(j) = inv_mass_(full_offset + j); - } - - if (col.m_q == 0) { - // No constraints: N_q = I, so f_q = x_q and - // mass entries pass through directly. - for (size_t k = 0; k < col.d_q; ++k) { - active(active_offset + k) = var_xq(col.included_indices[k]); - } - } else { - // Build N_q and rotate: M^{-1}_{f_k} = sum_j N_{jk}^2 var(x_j) - arma::mat Q_tmp, R_tmp; - arma::vec R_diag; - std::vector rots_tmp; - GGMGradientEngine::build_Aq(cholesky_of_precision_, col, q, Aq_buf); - GGMGradientEngine::givens_qr(Aq_buf.t(), Q_tmp, R_tmp, R_diag, rots_tmp); - arma::mat Nq = Q_tmp.cols(col.m_q, q - 1); - - for (size_t k = 0; k < col.d_q; ++k) { - double mass_k = 0.0; - for (size_t j = 0; j < q; ++j) { - mass_k += Nq(j, k) * Nq(j, k) * var_xq(j); - } - active(active_offset + k) = mass_k; - } + for (size_t k = 0; k < col.d_q; ++k) { + active(active_offset + k) = + inv_mass_(full_offset + col.included_indices[k]); } - - // psi_q: pass through unchanged active(cs.psi_offset(q)) = inv_mass_(cs.full_psi_offset(q)); } return active; @@ -702,13 +676,11 @@ void GGMModel::update_edge_parameter(size_t i, size_t j, int iteration) { ln_alpha += interaction_prior_->logp(-0.5 * precision_proposal_(i, j)); ln_alpha -= interaction_prior_->logp(-0.5 * precision_matrix_(i, j)); - // Diagonal prior on K_jj. K_jj is a deterministic function of K_ij via - // constrained_diagonal, so its change must enter the MH ratio. This was - // previously claimed to "cancel under Gamma(1,1)", but that cancellation - // is incorrect for any non-Gamma(1,1) precision_scale_prior and shows up - // as edge-selection SBC failure. - ln_alpha += diagonal_prior_->logp(precision_proposal_(j, j)); - ln_alpha -= diagonal_prior_->logp(precision_matrix_(j, j)); + // Gamma(shape, rate) prior on changed diagonal K_jj. The Roverato move + // slaves K_jj = c_3 + phi_{q-1,q}^2, so K_jj moves with the off-diagonal + // and its prior must be re-evaluated. + ln_alpha += diagonal_prior_->logp(0.5 * precision_proposal_(j, j)); + ln_alpha -= diagonal_prior_->logp(0.5 * precision_matrix_(j, j)); if (MY_LOG(runif(rng_)) < ln_alpha) { double omega_ij_old = precision_matrix_(i, j); @@ -783,8 +755,8 @@ void GGMModel::update_diagonal_parameter(size_t i, int iteration) { double ln_alpha = log_density_impl_diag(i); - ln_alpha += diagonal_prior_->logp(precision_proposal_(i, i)); - ln_alpha -= diagonal_prior_->logp(precision_matrix_(i, i)); + ln_alpha += diagonal_prior_->logp(0.5 * precision_proposal_(i, i)); + ln_alpha -= diagonal_prior_->logp(0.5 * precision_matrix_(i, i)); ln_alpha += 2.0 * (theta_prop - theta_curr); // Jacobian: dK_ii/dtheta = 2*exp(2*theta) if (MY_LOG(runif(rng_)) < ln_alpha) { @@ -863,12 +835,11 @@ void GGMModel::update_edge_indicator_parameter_pair(size_t i, size_t j) { // Interaction prior on K_yy_{ij} = -0.5 * Omega_{ij} ln_alpha -= interaction_prior_->logp(-0.5 * precision_matrix_(i, j)); - // Diagonal prior on K_jj: changes from precision_matrix_(j, j) - // (edge ON) to precision_proposal_(j, j) = constrained_diagonal(0) - // (edge OFF). The earlier "cancels under Gamma(1,1)" claim is wrong - // for general precision_scale_prior. - ln_alpha += diagonal_prior_->logp(precision_proposal_(j, j)); - ln_alpha -= diagonal_prior_->logp(precision_matrix_(j, j)); + // Gamma(shape, rate) prior on changed diagonal K_jj. The Roverato move + // slaves K_jj = c_3 + phi_{q-1,q}^2, so K_jj moves with the off-diagonal + // and its prior must be re-evaluated. + ln_alpha += diagonal_prior_->logp(0.5 * precision_proposal_(j, j)); + ln_alpha -= diagonal_prior_->logp(0.5 * precision_matrix_(j, j)); if (MY_LOG(runif(rng_)) < ln_alpha) { @@ -921,12 +892,11 @@ void GGMModel::update_edge_indicator_parameter_pair(size_t i, size_t j) { // Prior change: add slab (interaction prior on K_yy_{ij} = -0.5 * Omega_{ij}) ln_alpha += interaction_prior_->logp(-0.5 * omega_prop_ij); - // Diagonal prior on K_jj: changes from constants_[5] (edge OFF) to - // omega_prop_jj (edge ON via constrained parameterization). The - // earlier "cancels under Gamma(1,1)" claim is wrong for general - // precision_scale_prior. - ln_alpha += diagonal_prior_->logp(omega_prop_jj); - ln_alpha -= diagonal_prior_->logp(precision_matrix_(j, j)); + // Gamma(shape, rate) prior on changed diagonal K_jj. The Roverato move + // slaves K_jj = c_3 + phi_{q-1,q}^2, so K_jj moves with the off-diagonal + // and its prior must be re-evaluated. + ln_alpha += diagonal_prior_->logp(0.5 * precision_proposal_(j, j)); + ln_alpha -= diagonal_prior_->logp(0.5 * precision_matrix_(j, j)); // Proposal term: proposed edge value given it was generated from truncated normal ln_alpha -= R::dnorm(omega_prop_ij / constants_[3], 0.0, proposal_sd, true) - MY_LOG(constants_[3]); @@ -1031,9 +1001,11 @@ void GGMModel::tune_proposal_sd(int iteration, const WarmupSchedule& schedule) { ln_alpha += interaction_prior_->logp(-0.5 * precision_proposal_(i, j)); ln_alpha -= interaction_prior_->logp(-0.5 * precision_matrix_(i, j)); - // Diagonal prior on changed K_jj (constrained-diagonal coupling) - ln_alpha += diagonal_prior_->logp(precision_proposal_(j, j)); - ln_alpha -= diagonal_prior_->logp(precision_matrix_(j, j)); + // Gamma(shape, rate) prior on changed diagonal K_jj. The Roverato move + // slaves K_jj = c_3 + phi_{q-1,q}^2, so K_jj moves with the off-diagonal + // and its prior must be re-evaluated. + ln_alpha += diagonal_prior_->logp(0.5 * precision_proposal_(j, j)); + ln_alpha -= diagonal_prior_->logp(0.5 * precision_matrix_(j, j)); if (MY_LOG(runif(rng_)) < ln_alpha) { double omega_ij_old = precision_matrix_(i, j); @@ -1066,8 +1038,8 @@ void GGMModel::tune_proposal_sd(int iteration, const WarmupSchedule& schedule) { + MY_EXP(theta_prop) * MY_EXP(theta_prop); double ln_alpha = log_density_impl_diag(i); - ln_alpha += diagonal_prior_->logp(precision_proposal_(i, i)); - ln_alpha -= diagonal_prior_->logp(precision_matrix_(i, i)); + ln_alpha += diagonal_prior_->logp(0.5 * precision_proposal_(i, i)); + ln_alpha -= diagonal_prior_->logp(0.5 * precision_matrix_(i, i)); ln_alpha += 2.0 * (theta_prop - theta_curr); // Jacobian: dK_ii/dtheta = 2*exp(2*theta) if (MY_LOG(runif(rng_)) < ln_alpha) { diff --git a/src/models/ggm/ggm_model.h b/src/models/ggm/ggm_model.h index e9f4f8d1..997c7b99 100644 --- a/src/models/ggm/ggm_model.h +++ b/src/models/ggm/ggm_model.h @@ -363,8 +363,17 @@ class GGMModel : public BaseModel { // RATTLE constrained integration // ----------------------------------------------------------------- - /** @return true when constraints exist (edge selection or sparse graph). */ - bool has_constraints() const override { return edge_selection_ || has_sparse_graph_; } + /** + * @return true only when edge selection is enabled. + * + * Fixed-sparse graphs (`edge_selection=false` with some edges excluded) + * are sampled via the theta-space (free-element Cholesky) NUTS path, + * not RATTLE. The constraint manifold is constant in that case and is + * parameterised directly by the null-space coordinates f_q, so RATTLE + * is unnecessary. RATTLE is still required when edge selection is on + * because the manifold changes whenever an edge indicator flips. + */ + bool has_constraints() const override { return edge_selection_; } /** * Pack the Cholesky factor into a full-dimension position vector. diff --git a/src/models/mixed/mixed_mrf_gradient.cpp b/src/models/mixed/mixed_mrf_gradient.cpp index 82501b6a..cb0735c3 100644 --- a/src/models/mixed/mixed_mrf_gradient.cpp +++ b/src/models/mixed/mixed_mrf_gradient.cpp @@ -607,18 +607,25 @@ std::pair MixedMRFModel::logp_and_gradient( * Theta_bar * temp_pairwise_cross * temp_covariance; // --- Phase 3: Priors on precision entries --- - // Diagonal prior on Ω_{jj} + // Prior on the partial-association diagonal: -K_yy_{jj} = Theta_{jj}/2. + // logp uses 0.5 * Theta_jj. + // d/dTheta_jj log p(Theta_jj/2) = 0.5 * grad(Theta_jj/2). for(size_t j = 0; j < q_; ++j) { - logp += diagonal_prior_->logp(temp_precision(j, j)); - Omega_bar(j, j) += diagonal_prior_->grad(temp_precision(j, j)); + double half_kjj = 0.5 * temp_precision(j, j); + logp += diagonal_prior_->logp(half_kjj); + Omega_bar(j, j) += 0.5 * diagonal_prior_->grad(half_kjj); } - // Interaction prior on off-diagonal Kyy_{ij} = -Ω_{ij}/2 (upper triangle only) + // Interaction prior on off-diagonal Kyy_{ij} = -Ω_{ij}/2 (upper triangle only). + // Gated on edge_indicators_: inactive edges have K_yy_{ij} = 0 (point mass) + // and contribute no slab density, matching the GGM convention at + // ggm_gradient.cpp where the slab is summed over included_indices only. // Only add to Omega_bar(i,j), not (j,i): the symmetrization // Ω̄ + Ω̄ᵀ in Phase 4 handles the lower triangle automatically. // The prior is on Kyy_{ij}, so we evaluate at -Ω_{ij}/2 and apply // chain rule: ∂logπ/∂Ω_{ij} = ∂logπ/∂Kyy_{ij} · (-1/2). for(size_t i = 0; i < q_ - 1; ++i) { for(size_t j = i + 1; j < q_; ++j) { + if(edge_indicators_(p_ + i, p_ + j) == 0) continue; double kyy_val = -0.5 * temp_precision(i, j); logp += interaction_prior_->logp(kyy_val); Omega_bar(i, j) += -0.5 * interaction_prior_->grad(kyy_val); @@ -1036,9 +1043,11 @@ std::pair MixedMRFModel::logp_and_gradient_full( } } - // Kxx priors + // Kxx priors. Gated on edge_indicators_: inactive edges have K_xx_{ij} = 0 + // (point mass) and contribute no slab density. Matches the GGM convention. for(size_t i = 0; i < p_ - 1; ++i) { for(size_t j = i + 1; j < p_; ++j) { + if(edge_indicators_(i, j) == 0) continue; double val = temp_pairwise_discrete(i, j); logp += interaction_prior_->logp(val); grad(kxx_idx(i, j)) += interaction_prior_->grad(val); @@ -1052,9 +1061,11 @@ std::pair MixedMRFModel::logp_and_gradient_full( grad(mean_offset + j) += means_prior_->grad(val); } - // Kxy priors + // Kxy priors. Gated on edge_indicators_: inactive edges have K_xy_{ij} = 0 + // (point mass) and contribute no slab density. for(size_t i = 0; i < p_; ++i) { for(size_t j = 0; j < q_; ++j) { + if(edge_indicators_(i, p_ + j) == 0) continue; double val = temp_pairwise_cross(i, j); logp += interaction_prior_->logp(val); grad(kxy_idx(i, j)) += interaction_prior_->grad(val); @@ -1074,15 +1085,21 @@ std::pair MixedMRFModel::logp_and_gradient_full( Omega_bar -= 2.0 * temp_covariance * temp_pairwise_cross.t() * Theta_bar * temp_pairwise_cross * temp_covariance; - // Diagonal prior on Ω_{jj} + // Prior on the partial-association diagonal: -K_yy_{jj} = Theta_{jj}/2. + // d/dTheta_jj log p(Theta_jj/2) = 0.5 * grad(Theta_jj/2). for(size_t j = 0; j < q_; ++j) { - logp += diagonal_prior_->logp(temp_precision(j, j)); - Omega_bar(j, j) += diagonal_prior_->grad(temp_precision(j, j)); + double half_kjj = 0.5 * temp_precision(j, j); + logp += diagonal_prior_->logp(half_kjj); + Omega_bar(j, j) += 0.5 * diagonal_prior_->grad(half_kjj); } - // Interaction prior on off-diagonal Kyy_{ij} = -Ω_{ij}/2 + // Interaction prior on off-diagonal Kyy_{ij} = -Ω_{ij}/2. + // Gated on edge_indicators_: inactive edges have K_yy_{ij} = 0 (point mass) + // and contribute no slab density, matching the GGM convention at + // ggm_gradient.cpp where the slab is summed over included_indices only. // Chain rule: ∂logπ/∂Ω_{ij} = ∂logπ/∂Kyy_{ij} · (-1/2) for(size_t i = 0; i < q_ - 1; ++i) { for(size_t j = i + 1; j < q_; ++j) { + if(edge_indicators_(p_ + i, p_ + j) == 0) continue; double kyy_val = -0.5 * temp_precision(i, j); logp += interaction_prior_->logp(kyy_val); Omega_bar(i, j) += -0.5 * interaction_prior_->grad(kyy_val); diff --git a/src/models/mixed/mixed_mrf_metropolis.cpp b/src/models/mixed/mixed_mrf_metropolis.cpp index e4850eb9..ede7fba2 100644 --- a/src/models/mixed/mixed_mrf_metropolis.cpp +++ b/src/models/mixed/mixed_mrf_metropolis.cpp @@ -187,7 +187,7 @@ double MixedMRFModel::precision_constrained_diagonal(double x) const { // an O(nq) rank-2 shortcut. // ============================================================================= -double MixedMRFModel::log_ggm_ratio_edge(int i, int j) const { +double MixedMRFModel::log_ggm_ratio_edge(int i, int j, arma::mat& cov_prop_out) const { size_t ui = static_cast(i); size_t uj = static_cast(j); @@ -250,6 +250,7 @@ double MixedMRFModel::log_ggm_ratio_edge(int i, int j) const { double quad_prop = arma::accu((D_prop * precision_proposal_) % D_prop); double n = static_cast(n_); + cov_prop_out = std::move(cov_prop); return n / 2.0 * logdet_ratio - (quad_prop - quad_curr) / 2.0; } @@ -261,7 +262,7 @@ double MixedMRFModel::log_ggm_ratio_edge(int i, int j) const { // Same structure as log_ggm_ratio_edge but simpler (Ui = 0). // ============================================================================= -double MixedMRFModel::log_ggm_ratio_diag(int i) const { +double MixedMRFModel::log_ggm_ratio_diag(int i, arma::mat& cov_prop_out) const { size_t ui = static_cast(i); // Current precision diagonal @@ -296,6 +297,7 @@ double MixedMRFModel::log_ggm_ratio_diag(int i) const { double quad_prop = arma::accu((D_prop * precision_proposal_) % D_prop); double n = static_cast(n_); + cov_prop_out = std::move(cov_prop); return n / 2.0 * logdet_ratio - (quad_prop - quad_curr) / 2.0; } @@ -398,22 +400,29 @@ void MixedMRFModel::update_pairwise_effects_continuous_offdiag(int i, int j, int precision_proposal_(j, i) = theta_prop_ij; precision_proposal_(j, j) = theta_prop_jj; - double ln_alpha = log_ggm_ratio_edge(i, j); + arma::mat cov_prop; + double ln_alpha = log_ggm_ratio_edge(i, j, cov_prop); - // OMRF ratio with proposed continuous interactions + // OMRF ratio with proposed continuous interactions. The marginal + // interactions M = A_xx + 2 A_xy Σ A_xy^T depend on Σ; when Kyy changes, + // Σ changes too, so we must use Σ' (cov_prop) for the proposed-state + // recomputation, not the cached covariance_continuous_. for(size_t s = 0; s < p_; ++s) ln_alpha -= log_marginal_omrf(s); arma::mat marginal_saved = marginal_interactions_; + arma::mat covariance_saved = covariance_continuous_; arma::mat pairwise_effects_continuous_saved = pairwise_effects_continuous_; - // Temporarily set continuous interactions to proposed value + // Temporarily set continuous interactions and covariance to proposed values. pairwise_effects_continuous_(i, j) = -0.5 * theta_prop_ij; pairwise_effects_continuous_(j, i) = -0.5 * theta_prop_ij; pairwise_effects_continuous_(j, j) = -0.5 * theta_prop_jj; + covariance_continuous_ = cov_prop; recompute_marginal_interactions(); for(size_t s = 0; s < p_; ++s) ln_alpha += log_marginal_omrf(s); pairwise_effects_continuous_ = pairwise_effects_continuous_saved; + covariance_continuous_ = std::move(covariance_saved); marginal_interactions_ = std::move(marginal_saved); // Prior ratio: @@ -426,8 +435,8 @@ void MixedMRFModel::update_pairwise_effects_continuous_offdiag(int i, int j, int ln_alpha -= interaction_prior_->logp(cont_curr_ij); // Gamma(1,1) prior on changed diagonal K_jj - ln_alpha += diagonal_prior_->logp(theta_prop_jj); - ln_alpha -= diagonal_prior_->logp(-2.0 * pairwise_effects_continuous_(j, j)); + ln_alpha += diagonal_prior_->logp(0.5 * theta_prop_jj); + ln_alpha -= diagonal_prior_->logp(0.5 * (-2.0 * pairwise_effects_continuous_(j, j))); if(MY_LOG(runif(rng_)) < ln_alpha) { // Pass old precision values to Cholesky update @@ -478,24 +487,29 @@ void MixedMRFModel::update_pairwise_effects_continuous_diag(int i, int iteration precision_proposal_ = -2.0 * pairwise_effects_continuous_; precision_proposal_(i, i) = theta_ii_prop; - double ln_alpha = log_ggm_ratio_diag(i); + arma::mat cov_prop; + double ln_alpha = log_ggm_ratio_diag(i, cov_prop); - // OMRF ratio with proposed continuous interactions + // OMRF ratio with proposed continuous interactions. Use Σ' (cov_prop) for + // the proposed-state marginal_interactions, not the cached Σ. for(size_t s = 0; s < p_; ++s) ln_alpha -= log_marginal_omrf(s); arma::mat marginal_saved = marginal_interactions_; + arma::mat covariance_saved = covariance_continuous_; double cont_saved = pairwise_effects_continuous_(i, i); pairwise_effects_continuous_(i, i) = -0.5 * theta_ii_prop; + covariance_continuous_ = cov_prop; recompute_marginal_interactions(); for(size_t s = 0; s < p_; ++s) ln_alpha += log_marginal_omrf(s); pairwise_effects_continuous_(i, i) = cont_saved; + covariance_continuous_ = std::move(covariance_saved); marginal_interactions_ = std::move(marginal_saved); // Prior ratio: Gamma(1,1) on K_ii (precision diagonal) - ln_alpha += diagonal_prior_->logp(theta_ii_prop); - ln_alpha -= diagonal_prior_->logp(theta_ii_curr); + ln_alpha += diagonal_prior_->logp(0.5 * theta_ii_prop); + ln_alpha -= diagonal_prior_->logp(0.5 * theta_ii_curr); // Jacobian: dK_ii/dtheta = 2*exp(2*theta) ln_alpha += 2.0 * (theta_prop - theta_curr); @@ -666,21 +680,27 @@ void MixedMRFModel::update_edge_indicator_continuous(int i, int j) { precision_proposal_(j, j) = theta_prop_jj; // --- Likelihood ratio --- - double ln_alpha = log_ggm_ratio_edge(i, j); + arma::mat cov_prop; + double ln_alpha = log_ggm_ratio_edge(i, j, cov_prop); + // OMRF ratio with proposed continuous interactions. Use Σ' (cov_prop) for + // the proposed-state marginal_interactions, not the cached Σ. for(size_t s = 0; s < p_; ++s) ln_alpha -= log_marginal_omrf(s); arma::mat marginal_saved = marginal_interactions_; + arma::mat covariance_saved = covariance_continuous_; arma::mat pairwise_effects_continuous_saved = pairwise_effects_continuous_; - // Temporarily set continuous interactions to proposed value + // Temporarily set continuous interactions and covariance to proposed values. pairwise_effects_continuous_(i, j) = -0.5 * theta_prop_ij; pairwise_effects_continuous_(j, i) = -0.5 * theta_prop_ij; pairwise_effects_continuous_(j, j) = -0.5 * theta_prop_jj; + covariance_continuous_ = cov_prop; recompute_marginal_interactions(); for(size_t s = 0; s < p_; ++s) ln_alpha += log_marginal_omrf(s); pairwise_effects_continuous_ = pairwise_effects_continuous_saved; + covariance_continuous_ = std::move(covariance_saved); marginal_interactions_ = std::move(marginal_saved); // --- Spike-and-slab terms --- @@ -689,8 +709,8 @@ void MixedMRFModel::update_edge_indicator_continuous(int i, int j) { double cont_curr_ij = pairwise_effects_continuous_(i, j); // Gamma(1,1) prior on changed diagonal K_jj (always present, not part of spike-and-slab) - ln_alpha += diagonal_prior_->logp(theta_prop_jj); - ln_alpha -= diagonal_prior_->logp(-2.0 * pairwise_effects_continuous_(j, j)); + ln_alpha += diagonal_prior_->logp(0.5 * theta_prop_jj); + ln_alpha -= diagonal_prior_->logp(0.5 * (-2.0 * pairwise_effects_continuous_(j, j))); // The proposal density is on the precision reparameterized scale: // theta_prop_ij = C[3] * epsilon, so epsilon = theta_prop_ij / C[3] diff --git a/src/models/mixed/mixed_mrf_model.h b/src/models/mixed/mixed_mrf_model.h index 7c8056d3..e5bfa78e 100644 --- a/src/models/mixed/mixed_mrf_model.h +++ b/src/models/mixed/mixed_mrf_model.h @@ -503,12 +503,16 @@ class MixedMRFModel : public BaseModel { double precision_constrained_diagonal(double x) const; // Log-likelihood ratio for a proposed off-diagonal precision change (rank-2). - // Assumes precision_proposal_ is already filled by the caller. - double log_ggm_ratio_edge(int i, int j) const; + // Assumes precision_proposal_ is already filled by the caller. Writes the + // proposed covariance Σ' (computed via Woodbury) to cov_prop_out so callers + // can use it to recompute marginal_interactions_ for the OMRF likelihood + // ratio at the proposed Kyy. + double log_ggm_ratio_edge(int i, int j, arma::mat& cov_prop_out) const; // Log-likelihood ratio for a proposed diagonal precision change (rank-1). - // Assumes precision_proposal_ is already filled by the caller. - double log_ggm_ratio_diag(int i) const; + // Assumes precision_proposal_ is already filled by the caller. Writes the + // proposed covariance Σ' (computed via Sherman-Morrison) to cov_prop_out. + double log_ggm_ratio_diag(int i, arma::mat& cov_prop_out) const; // Rank-1 Cholesky update after accepting an off-diagonal precision change. void cholesky_update_after_precision_edge(double old_ij, double old_jj, int i, int j); diff --git a/src/prior_test_interface.cpp b/src/prior_test_interface.cpp index 6b0b064d..f6b07781 100644 --- a/src/prior_test_interface.cpp +++ b/src/prior_test_interface.cpp @@ -90,7 +90,8 @@ Rcpp::List ggm_test_logp_and_gradient_full_prior( double interaction_beta = 0.5, const std::string& diagonal_prior_type = "gamma", double diagonal_shape = 1.0, - double diagonal_rate = 1.0 + double diagonal_rate = 1.0, + Rcpp::Nullable inv_mass_diag = R_NilValue ) { GraphConstraintStructure cs; cs.build(edge_indicators); @@ -103,7 +104,14 @@ Rcpp::List ggm_test_logp_and_gradient_full_prior( GGMGradientEngine engine; engine.rebuild(cs, static_cast(n), suf_stat, *ip, *dp); - auto result = engine.logp_and_gradient_full(x); + // Empty inv_mass_diag => identity mass. Otherwise plug through to the + // mass-weighted Pfaffian inside the engine. + arma::vec inv_mass; + if (inv_mass_diag.isNotNull()) { + inv_mass = Rcpp::as(inv_mass_diag); + } + + auto result = engine.logp_and_gradient_full(x, inv_mass); return Rcpp::List::create( Rcpp::Named("value") = result.first, diff --git a/tests/testthat/test-sample-precision-prior.R b/tests/testthat/test-sample-precision-prior.R index 6bd90172..d87f497e 100644 --- a/tests/testthat/test-sample-precision-prior.R +++ b/tests/testthat/test-sample-precision-prior.R @@ -72,11 +72,11 @@ test_that("excluded edges are exactly zero in K_offdiag", { # ---- Scale prior plumbing ---------------------------------------------------- -test_that("gamma_prior(shape, rate) shifts diagonal mean toward shape/rate", { - # Gamma(shape=4, rate=2) has mean 2.0; Gamma(1, 1) has mean 1.0. - # With p = 3 the prior on K_ii is exactly the supplied scale prior, so - # the empirical mean of the diagonal across draws should track shape/rate - # within Monte Carlo error. +test_that("gamma_prior(shape, rate) shifts diagonal mean toward 2*shape/rate", { + # The diagonal prior is on the partial-association diagonal + # -K_yy_{ii} = K_{ii}/2, so gamma_prior(shape, rate) implies + # K_ii ~ 2 * Gamma(shape, rate), mean = 2 * shape/rate. + # Gamma(4, 2) -> mean(K_ii) = 4.0; Gamma(1, 1) -> mean(K_ii) = 2.0. draws_default = short_run( p = 3L, n_samples = 400L, n_warmup = 200L ) @@ -86,7 +86,7 @@ test_that("gamma_prior(shape, rate) shifts diagonal mean toward shape/rate", { ) expect_lt(mean(draws_default$K_diag), mean(draws_heavy$K_diag)) - expect_equal(mean(draws_heavy$K_diag), 2.0, tolerance = 0.4) + expect_equal(mean(draws_heavy$K_diag), 4.0, tolerance = 0.6) }) diff --git a/tests/testthat/test-sbc-ggm.R b/tests/testthat/test-sbc-ggm.R index faf819bc..a8790375 100644 --- a/tests/testthat/test-sbc-ggm.R +++ b/tests/testthat/test-sbc-ggm.R @@ -33,26 +33,31 @@ skip_unless_slow_sbc = function() { # ---- Prior sampler ----------------------------------------------------------- -# Draw a symmetric PD precision matrix from the GGM prior (no edge -# selection): -# K_ij ~ Cauchy(0, scale) (off-diagonal, i < j, symmetrised) -# K_ii ~ Gamma(1, 1) -# Rejection-samples until the result is positive definite. +# Draw a symmetric PD precision matrix K from the GGM prior (no edge +# selection). The prior is specified on the partial-association scale +# Omega = -K/2 to match the bgms sampler convention: +# Omega_ij ~ Cauchy(0, scale) (off-diagonal, i < j, symmetrised) +# -Omega_ii ~ Gamma(1, 1) (diagonal, positive) +# Implies on K: +# K_ij = -2 * Omega_ij ~ Cauchy(0, 2*scale) +# K_ii = 2 * (-Omega_ii) ~ 2 * Gamma(1, 1) = Gamma(1, 1/2) +# Rejection-samples until K is positive definite. draw_prior_K = function(p, scale = 2.5, max_tries = 10000) { for(attempt in seq_len(max_tries)) { K = matrix(0, p, p) - # Off-diagonal (upper triangle) + # Off-diagonal (upper triangle): Omega ~ Cauchy(0, scale), K = -2 * Omega for(i in seq_len(p - 1)) { for(j in (i + 1):p) { - K[i, j] = rcauchy(1, 0, scale) + omega_ij = rcauchy(1, 0, scale) + K[i, j] = -2 * omega_ij K[j, i] = K[i, j] } } - # Diagonal + # Diagonal: -Omega_ii ~ Gamma(1, 1), K_ii = 2 * (-Omega_ii) for(i in seq_len(p)) { - K[i, i] = rgamma(1, shape = 1, rate = 1) + K[i, i] = 2 * rgamma(1, shape = 1, rate = 1) } # Check positive definiteness @@ -253,10 +258,14 @@ test_that("SBC: GGM MH produces uniform ranks (p=3, no edge selection)", { # ---- Prior sampler with edge selection --------------------------------------- -# Draw a precision matrix from the spike-and-slab GGM prior: +# Draw a precision matrix K from the spike-and-slab GGM prior, where the +# prior is specified on the partial-association scale Omega = -K/2: # gamma_ij ~ Bernoulli(0.5) -# K_ij | gamma_ij=1 ~ Cauchy(0, scale); K_ij | gamma_ij=0 = 0 -# K_ii ~ Gamma(1, 1) +# Omega_ij | gamma_ij=1 ~ Cauchy(0, scale); Omega_ij | gamma_ij=0 = 0 +# -Omega_ii ~ Gamma(1, 1) +# Implies on K: +# K_ij | gamma_ij=1 ~ Cauchy(0, 2*scale); K_ij | gamma_ij=0 = 0 +# K_ii ~ 2 * Gamma(1, 1) # Rejection-samples until K is positive definite. draw_prior_K_es = function(p, scale = 2.5, inclusion_prob = 0.5, max_tries = 50000) { @@ -269,14 +278,15 @@ draw_prior_K_es = function(p, scale = 2.5, inclusion_prob = 0.5, if(runif(1) < inclusion_prob) { gamma[i, j] = 1L gamma[j, i] = 1L - K[i, j] = rcauchy(1, 0, scale) + omega_ij = rcauchy(1, 0, scale) + K[i, j] = -2 * omega_ij K[j, i] = K[i, j] } } } for(i in seq_len(p)) { - K[i, i] = rgamma(1, shape = 1, rate = 1) + K[i, i] = 2 * rgamma(1, shape = 1, rate = 1) } ev = eigen(K, symmetric = TRUE, only.values = TRUE)$values