From f6052920346bf266403e0c0295f553c3ccf76d26 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Fri, 27 Jun 2025 17:35:57 +0200 Subject: [PATCH 01/21] Add option to change the number of datapoints --- include/data/dataset.h | 9 +++++++++ mcmpy/include/py_dataset.h | 3 +++ mcmpy/src/py_dataset.cpp | 4 +++- src/data/complexity.cpp | 2 +- src/data/dataset.cpp | 10 ++++++++++ src/data/evidence.cpp | 9 +++++---- src/data/likelihood.cpp | 5 +++-- src/search/mcm_search/annealing.cpp | 13 +++++++------ src/search/mcm_search/div_and_conq.cpp | 14 +++++++------- src/search/mcm_search/greedy.cpp | 13 +++++++------ src/utilities/partition.cpp | 26 -------------------------- 11 files changed, 55 insertions(+), 53 deletions(-) diff --git a/include/data/dataset.h b/include/data/dataset.h index 2c7a21c..653460a 100644 --- a/include/data/dataset.h +++ b/include/data/dataset.h @@ -31,6 +31,14 @@ class Data { */ Data(const std::vector, unsigned int>>& _dataset, int n_var, int n_states, int n_samples); + /** + * Change the number of assumed datapoints in the dataset. + * This can be changed to perform an analysis of the dataset under the assumption that it is larger or smaller. + * + * @param n_datapoints The new number of assumed datapoints in the dataset. + */ + void set_N_assumed(int n_datapoints); + /** * Calculate the entropy of the dataset. * @@ -128,6 +136,7 @@ class Data { int N; // Number of datapoints int N_unique; // Number of different datapoints int n_ints; // Number of 128bit integers necessary to represent the data + int N_assumed; // The assumed number of datapoints in the dataset std::vector<__uint128_t> pow_q; // Vector containing the first n powers of q used to speed up the calculation of the evidence }; diff --git a/mcmpy/include/py_dataset.h b/mcmpy/include/py_dataset.h index c4ca24f..f138340 100644 --- a/mcmpy/include/py_dataset.h +++ b/mcmpy/include/py_dataset.h @@ -65,9 +65,12 @@ class PyData { int get_n() {return this->data.n;}; int get_N() {return this->data.N;}; + int get_N_assumed() {return this->data.N_assumed;}; int get_N_unique() {return this->data.N_unique;}; int get_q() {return this->data.q;}; + void set_N_assumed(int n_datapoints) {this->data.set_N_assumed(n_datapoints);}; + Data data; }; diff --git a/mcmpy/src/py_dataset.cpp b/mcmpy/src/py_dataset.cpp index de74447..8ecaf11 100644 --- a/mcmpy/src/py_dataset.cpp +++ b/mcmpy/src/py_dataset.cpp @@ -187,8 +187,10 @@ void bind_data_class(py::module &m) { .def("entropy", &PyData::entropy, py::arg("base") = -1) .def("entropy_of_spin_operator", &PyData::entropy_of_spin_op, py::arg("spin_op")) + .def_property_readonly("n", &PyData::get_n) .def_property_readonly("q", &PyData::get_q) .def_property_readonly("N", &PyData::get_N) - .def_property_readonly("N_unique", &PyData::get_N_unique); + .def_property_readonly("N_unique", &PyData::get_N_unique) + .def_property("N_assumed", &PyData::get_N_assumed, &PyData::set_N_assumed); } diff --git a/src/data/complexity.cpp b/src/data/complexity.cpp index 26aad50..5562997 100644 --- a/src/data/complexity.cpp +++ b/src/data/complexity.cpp @@ -22,7 +22,7 @@ double Data::calc_param_complexity_icc(__uint128_t component){ // Determine the size of the component int r = bit_count(component); __uint128_t n_param = this->pow_q[r] - 1; - return (n_param / 2.) * log(this->N / (2 * M_PI)); + return (n_param / 2.) * log(this->N_assumed / (2 * M_PI)); } double Data::calc_param_complexity(std::vector<__uint128_t>& partition){ diff --git a/src/data/dataset.cpp b/src/data/dataset.cpp index 36158fb..b95dbb9 100644 --- a/src/data/dataset.cpp +++ b/src/data/dataset.cpp @@ -17,6 +17,7 @@ Data::Data(const std::string& filename, int n_var, int n_states){ this->n_ints = ceil(log2(q)); // Process the dataset this->N = processing(filename, n_var, n_ints, n_states, dataset); + this->N_assumed = this->N; this->N_unique = dataset.size(); // Precompute the powers of q to speed up the calculation of the evidence (q^r) @@ -34,6 +35,7 @@ Data::Data(const std::vector, unsigned int>>& this->q = n_states; this->dataset = _dataset; this->N = n_samples; + this->N_assumed = this->N; this->N_unique = _dataset.size(); // Calculate the number of integers necessary to represent the data @@ -48,6 +50,14 @@ Data::Data(const std::vector, unsigned int>>& } } +void Data::set_N_assumed(int n_datapoints){ + // Check if the given number of datapoints is valid + if (n_datapoints < 1){ + throw std::invalid_argument("The number of datapoints should be a positive number."); + } + this->N_assumed = n_datapoints; +} + double Data::entropy(int base){ // Default base is q if (base == -1){ diff --git a/src/data/evidence.cpp b/src/data/evidence.cpp index 73905d5..1d6aef4 100644 --- a/src/data/evidence.cpp +++ b/src/data/evidence.cpp @@ -5,21 +5,22 @@ double Data::calc_log_ev_icc(__uint128_t component){ double log_evidence = 0; // Determine the size of the component int r = bit_count(component); + double alpha = this->N_assumed / this->N; // Contributions from the datapoint frequencies std::unordered_map, unsigned int, HashVector128> counts = build_histogram(*this, component); std::unordered_map, unsigned int, HashVector128>::iterator count_iter = counts.begin(); while (count_iter != counts.end()){ - log_evidence += (lgamma(count_iter->second + 0.5) - 0.5 * log(M_PI)); + log_evidence += (lgamma(alpha * count_iter->second + 0.5) - 0.5 * log(M_PI)); ++count_iter; } // Calculate prefactor if (r > 25){ // Approximate for large components because lgamma overflows - log_evidence -= (r * log(this->q) * this->N); + log_evidence -= (r * log(this->q) * this->N_assumed); } else{ - log_evidence += lgamma(this->pow_q[r]/2.) - lgamma(this->N + pow_q[r]/2.); + log_evidence += lgamma(this->pow_q[r]/2.) - lgamma(this->N_assumed + pow_q[r]/2.); } return log_evidence; @@ -37,7 +38,7 @@ double Data::calc_log_ev(std::vector<__uint128_t>& partition){ } } // Contribution from variables not in the model - log_ev -= this->N * (this->n - r) * log(this->q); + log_ev -= this->N_assumed * (this->n - r) * log(this->q); return log_ev; } diff --git a/src/data/likelihood.cpp b/src/data/likelihood.cpp index e61d62c..e5b5e33 100644 --- a/src/data/likelihood.cpp +++ b/src/data/likelihood.cpp @@ -4,13 +4,14 @@ double Data::calc_log_likelihood_icc(__uint128_t component){ double log_likelihood = 0; double N_datapoints = this->N; + double alpha = this->N_assumed / this->N; // Determine the size of the component int r = bit_count(component); // Get the datapoint frequencies std::unordered_map, unsigned int, HashVector128> counts = build_histogram(*this, component); std::unordered_map, unsigned int, HashVector128>::iterator count_iter = counts.begin(); while (count_iter != counts.end()){ - log_likelihood += count_iter->second * log(count_iter->second / N_datapoints); + log_likelihood += alpha * count_iter->second * log(count_iter->second / N_datapoints); ++count_iter; } return log_likelihood; @@ -28,7 +29,7 @@ double Data::calc_log_likelihood(std::vector<__uint128_t>& partition){ } } // Contribution from variables not in the model - log_likelihood -= this->N * (this->n - r) * log(this->q); + log_likelihood -= this->N_assumed * (this->n - r) * log(this->q); return log_likelihood; } \ No newline at end of file diff --git a/src/search/mcm_search/annealing.cpp b/src/search/mcm_search/annealing.cpp index 80e4ba7..ddced3a 100644 --- a/src/search/mcm_search/annealing.cpp +++ b/src/search/mcm_search/annealing.cpp @@ -51,15 +51,16 @@ MCM MCMSearch::simulated_annealing(Data& data, MCM* init_mcm, std::string file_n *this->output_file << "Number of variables: " << data.n << "\n"; *this->output_file << "Number of states per variable: " << data.q << "\n"; *this->output_file << "Number of datapoints: " << data.N << "\n"; + *this->output_file << "Number of assumed datapoints: " << data.N_assumed << "\n"; *this->output_file << "Number of unique datapoint: " << data.N_unique << "\n"; *this->output_file << "Entropy of the data: " << data.entropy() << " q-its \n\n"; *this->output_file << "Initial partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << mcm_tmp.log_ev << " = " << mcm_tmp.log_ev / (data.N * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << mcm_tmp.log_ev << " = " << mcm_tmp.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, mcm_tmp, data.N, data.q); + print_partition_details_to_file(*this->output_file, mcm_tmp, data.N_assumed, data.q); *this->output_file << "\n"; *this->output_file << "Start annealing \n"; @@ -114,7 +115,7 @@ MCM MCMSearch::simulated_annealing(Data& data, MCM* init_mcm, std::string file_n // Write to output file if (this->output_file){ - *this->output_file << "Iteration " << i << " \t\t Temperature: " << settings.temp << " \t\t Log-evidence (q-its/datapoint): " << this->mcm_out.log_ev / (this->data->N * log(this->data->q)) << "\n"; + *this->output_file << "Iteration " << i << " \t\t Temperature: " << settings.temp << " \t\t Log-evidence (q-its/datapoint): " << this->mcm_out.log_ev / (this->data->N_assumed * log(this->data->q)) << "\n"; } } else{steps_since_improve++;} @@ -161,10 +162,10 @@ MCM MCMSearch::simulated_annealing(Data& data, MCM* init_mcm, std::string file_n *this->output_file << "\nFinal partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N * log(data.q)) << " q-its/datapoint \n"; - *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n"; + *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); *this->output_file << "\n"; this->output_file.reset(); } diff --git a/src/search/mcm_search/div_and_conq.cpp b/src/search/mcm_search/div_and_conq.cpp index 96966b8..5e5e441 100644 --- a/src/search/mcm_search/div_and_conq.cpp +++ b/src/search/mcm_search/div_and_conq.cpp @@ -50,15 +50,16 @@ MCM MCMSearch::divide_and_conquer(Data& data, MCM* init_mcm, std::string file_na *this->output_file << "Number of variables: " << data.n << "\n"; *this->output_file << "Number of states per variable: " << data.q << "\n"; *this->output_file << "Number of datapoints: " << data.N << "\n"; + *this->output_file << "Number of assumed datapoints: " << data.N_assumed << "\n"; *this->output_file << "Number of unique datapoint: " << data.N_unique << "\n"; *this->output_file << "Entropy of the data: " << data.entropy() << " q-its \n\n"; *this->output_file << "Initial partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); *this->output_file << "\n"; *this->output_file << "Start dividing \n"; @@ -83,10 +84,10 @@ MCM MCMSearch::divide_and_conquer(Data& data, MCM* init_mcm, std::string file_na *this->output_file << "\nFinal partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N * log(data.q)) << " q-its/datapoint \n"; - *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n"; + *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); *this->output_file << "\n"; this->output_file.reset(); } @@ -174,8 +175,7 @@ int MCMSearch::division(int move_from, int move_to){ this->log_evidence_trajectory.push_back(this->mcm_out.log_ev); if (this->output_file){ - std::cout << "Should only print with output file"; - *this->output_file << "Splitting component " << move_from << "\t Log-evidence (q-its/datapoint): " << this->mcm_out.log_ev / (this->data->N * log(this->data->q)) << "\n"; + *this->output_file << "Splitting component " << move_from << "\t Log-evidence (q-its/datapoint): " << this->mcm_out.log_ev / (this->data->N_assumed * log(this->data->q)) << "\n"; *this->output_file << "\t Component " << move_from << " : \t" + int_to_string(this->mcm_out.partition[move_from], this->mcm_out.n) << "\n"; *this->output_file << "\t Component " << move_to << " : \t" + int_to_string(this->mcm_out.partition[move_to], this->mcm_out.n) << "\n\n"; } diff --git a/src/search/mcm_search/greedy.cpp b/src/search/mcm_search/greedy.cpp index 6acb93f..2378003 100644 --- a/src/search/mcm_search/greedy.cpp +++ b/src/search/mcm_search/greedy.cpp @@ -49,15 +49,16 @@ MCM MCMSearch::greedy_search(Data& data, MCM* init_mcm, std::string file_name){ *this->output_file << "Number of variables: " << data.n << "\n"; *this->output_file << "Number of states per variable: " << data.q << "\n"; *this->output_file << "Number of datapoints: " << data.N << "\n"; + *this->output_file << "Number of assumed datapoints: " << data.N_assumed << "\n"; *this->output_file << "Number of unique datapoint: " << data.N_unique << "\n"; *this->output_file << "Entropy of the data: " << data.entropy() << " q-its \n\n"; *this->output_file << "Initial partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); *this->output_file << "\n"; } @@ -76,10 +77,10 @@ MCM MCMSearch::greedy_search(Data& data, MCM* init_mcm, std::string file_name){ *this->output_file << "\nFinal partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N * log(data.q)) << " q-its/datapoint \n"; - *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n"; + *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); *this->output_file << "\n"; this->output_file.reset(); } @@ -150,7 +151,7 @@ void MCMSearch::hierarchical_merging(){ // Write to the output file if (this->output_file){ - *this->output_file << "Merging components " << best_i << " and " << best_j << " \t Log-evidence (q-its/datapoint): "<< (this->mcm_out.log_ev) / (this->data->N * log(this->data->q)) << "\n"; + *this->output_file << "Merging components " << best_i << " and " << best_j << " \t Log-evidence (q-its/datapoint): "<< (this->mcm_out.log_ev) / (this->data->N_assumed * log(this->data->q)) << "\n"; } } } diff --git a/src/utilities/partition.cpp b/src/utilities/partition.cpp index 99e5569..675a29f 100644 --- a/src/utilities/partition.cpp +++ b/src/utilities/partition.cpp @@ -1,32 +1,6 @@ #include "utilities/partition.h" #include "utilities/miscellaneous.h" -/* -std::vector generate_random_partition(int n){ - // Check if the number of variables is a valid number - if (n > 128){ - throw std::domain_error("The maximum system size is 128 variables."); - } - if (n < 0){ - throw std::domain_error("The number of variables should be a positive number."); - } - - std::vector growth_string(n, 0); - - int index; - int max_index = 2; - for (int i = 1; i < n; i++){ - index = rand()/(RAND_MAX/max_index); - growth_string[i] = index; - - if (index == (max_index-1)){ - max_index += 1; - } - } - - return growth_string; -}*/ - std::vector<__uint128_t> generate_random_partition(int n){ // Check if the number of variables is a valid number if (n > 128){ From c96a71a881d6953ed62383f0faaf50bbbe86b8c5 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Fri, 27 Jun 2025 17:52:04 +0200 Subject: [PATCH 02/21] Update documentation --- docs/api/data.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/api/data.rst b/docs/api/data.rst index 85bbaee..28c8f2e 100644 --- a/docs/api/data.rst +++ b/docs/api/data.rst @@ -240,6 +240,13 @@ to infer either the best minimally complex (MCM) or the optimal basis representa The actual number of datapoints in the dataset (read-only). + .. py:attribute:: N_assumed + :type: int + + The assumed number of datapoints in the dataset. + + This attribute can be changed to perform an analysis of the dataset under the assumption that it is either larger or smaller. + .. py:attribute:: N_unique :type: int From 49b08534280c0cc4247bdf48bc2f32040d31ae27 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Fri, 27 Jun 2025 19:05:54 +0200 Subject: [PATCH 03/21] Change variable name --- docs/api/data.rst | 6 +++--- include/data/dataset.h | 10 +++++----- mcmpy/include/py_dataset.h | 4 ++-- mcmpy/src/py_dataset.cpp | 2 +- src/data/complexity.cpp | 2 +- src/data/dataset.cpp | 8 ++++---- src/data/evidence.cpp | 8 ++++---- src/data/likelihood.cpp | 4 ++-- src/search/mcm_search/annealing.cpp | 14 +++++++------- src/search/mcm_search/div_and_conq.cpp | 14 +++++++------- src/search/mcm_search/greedy.cpp | 14 +++++++------- 11 files changed, 43 insertions(+), 43 deletions(-) diff --git a/docs/api/data.rst b/docs/api/data.rst index 28c8f2e..2630757 100644 --- a/docs/api/data.rst +++ b/docs/api/data.rst @@ -240,12 +240,12 @@ to infer either the best minimally complex (MCM) or the optimal basis representa The actual number of datapoints in the dataset (read-only). - .. py:attribute:: N_assumed + .. py:attribute:: N_synthetic :type: int - The assumed number of datapoints in the dataset. + The synthetic number of datapoints in the dataset. - This attribute can be changed to perform an analysis of the dataset under the assumption that it is either larger or smaller. + This attribute can be changed to perform an analysis of the dataset as if it is either larger or smaller. .. py:attribute:: N_unique :type: int diff --git a/include/data/dataset.h b/include/data/dataset.h index 653460a..c8cd02b 100644 --- a/include/data/dataset.h +++ b/include/data/dataset.h @@ -32,12 +32,12 @@ class Data { Data(const std::vector, unsigned int>>& _dataset, int n_var, int n_states, int n_samples); /** - * Change the number of assumed datapoints in the dataset. - * This can be changed to perform an analysis of the dataset under the assumption that it is larger or smaller. + * Change the value for the number of datapoints in the dataset that is used for analysis. + * This can be changed to perform an analysis of the dataset as if it is larger or smaller. * - * @param n_datapoints The new number of assumed datapoints in the dataset. + * @param n_datapoints The synthetic number of datapoints in the dataset. */ - void set_N_assumed(int n_datapoints); + void set_N_synthetic(int n_datapoints); /** * Calculate the entropy of the dataset. @@ -136,7 +136,7 @@ class Data { int N; // Number of datapoints int N_unique; // Number of different datapoints int n_ints; // Number of 128bit integers necessary to represent the data - int N_assumed; // The assumed number of datapoints in the dataset + int N_synthetic; // The synthetic number of datapoints in the dataset std::vector<__uint128_t> pow_q; // Vector containing the first n powers of q used to speed up the calculation of the evidence }; diff --git a/mcmpy/include/py_dataset.h b/mcmpy/include/py_dataset.h index f138340..e3a5648 100644 --- a/mcmpy/include/py_dataset.h +++ b/mcmpy/include/py_dataset.h @@ -65,11 +65,11 @@ class PyData { int get_n() {return this->data.n;}; int get_N() {return this->data.N;}; - int get_N_assumed() {return this->data.N_assumed;}; + int get_N_synthetic() {return this->data.N_synthetic;}; int get_N_unique() {return this->data.N_unique;}; int get_q() {return this->data.q;}; - void set_N_assumed(int n_datapoints) {this->data.set_N_assumed(n_datapoints);}; + void set_N_synthetic(int n_datapoints) {this->data.set_N_synthetic(n_datapoints);}; Data data; }; diff --git a/mcmpy/src/py_dataset.cpp b/mcmpy/src/py_dataset.cpp index 8ecaf11..ade3dd7 100644 --- a/mcmpy/src/py_dataset.cpp +++ b/mcmpy/src/py_dataset.cpp @@ -192,5 +192,5 @@ void bind_data_class(py::module &m) { .def_property_readonly("q", &PyData::get_q) .def_property_readonly("N", &PyData::get_N) .def_property_readonly("N_unique", &PyData::get_N_unique) - .def_property("N_assumed", &PyData::get_N_assumed, &PyData::set_N_assumed); + .def_property("N_synthetic", &PyData::get_N_synthetic, &PyData::set_N_synthetic); } diff --git a/src/data/complexity.cpp b/src/data/complexity.cpp index 5562997..dbbf3f0 100644 --- a/src/data/complexity.cpp +++ b/src/data/complexity.cpp @@ -22,7 +22,7 @@ double Data::calc_param_complexity_icc(__uint128_t component){ // Determine the size of the component int r = bit_count(component); __uint128_t n_param = this->pow_q[r] - 1; - return (n_param / 2.) * log(this->N_assumed / (2 * M_PI)); + return (n_param / 2.) * log(this->N_synthetic / (2 * M_PI)); } double Data::calc_param_complexity(std::vector<__uint128_t>& partition){ diff --git a/src/data/dataset.cpp b/src/data/dataset.cpp index b95dbb9..37d35f4 100644 --- a/src/data/dataset.cpp +++ b/src/data/dataset.cpp @@ -17,7 +17,7 @@ Data::Data(const std::string& filename, int n_var, int n_states){ this->n_ints = ceil(log2(q)); // Process the dataset this->N = processing(filename, n_var, n_ints, n_states, dataset); - this->N_assumed = this->N; + this->N_synthetic = this->N; this->N_unique = dataset.size(); // Precompute the powers of q to speed up the calculation of the evidence (q^r) @@ -35,7 +35,7 @@ Data::Data(const std::vector, unsigned int>>& this->q = n_states; this->dataset = _dataset; this->N = n_samples; - this->N_assumed = this->N; + this->N_synthetic = this->N; this->N_unique = _dataset.size(); // Calculate the number of integers necessary to represent the data @@ -50,12 +50,12 @@ Data::Data(const std::vector, unsigned int>>& } } -void Data::set_N_assumed(int n_datapoints){ +void Data::set_N_synthetic(int n_datapoints){ // Check if the given number of datapoints is valid if (n_datapoints < 1){ throw std::invalid_argument("The number of datapoints should be a positive number."); } - this->N_assumed = n_datapoints; + this->N_synthetic = n_datapoints; } double Data::entropy(int base){ diff --git a/src/data/evidence.cpp b/src/data/evidence.cpp index 1d6aef4..2f16a65 100644 --- a/src/data/evidence.cpp +++ b/src/data/evidence.cpp @@ -5,7 +5,7 @@ double Data::calc_log_ev_icc(__uint128_t component){ double log_evidence = 0; // Determine the size of the component int r = bit_count(component); - double alpha = this->N_assumed / this->N; + double alpha = this->N_synthetic / this->N; // Contributions from the datapoint frequencies std::unordered_map, unsigned int, HashVector128> counts = build_histogram(*this, component); std::unordered_map, unsigned int, HashVector128>::iterator count_iter = counts.begin(); @@ -17,10 +17,10 @@ double Data::calc_log_ev_icc(__uint128_t component){ // Calculate prefactor if (r > 25){ // Approximate for large components because lgamma overflows - log_evidence -= (r * log(this->q) * this->N_assumed); + log_evidence -= (r * log(this->q) * this->N_synthetic); } else{ - log_evidence += lgamma(this->pow_q[r]/2.) - lgamma(this->N_assumed + pow_q[r]/2.); + log_evidence += lgamma(this->pow_q[r]/2.) - lgamma(this->N_synthetic + pow_q[r]/2.); } return log_evidence; @@ -38,7 +38,7 @@ double Data::calc_log_ev(std::vector<__uint128_t>& partition){ } } // Contribution from variables not in the model - log_ev -= this->N_assumed * (this->n - r) * log(this->q); + log_ev -= this->N_synthetic * (this->n - r) * log(this->q); return log_ev; } diff --git a/src/data/likelihood.cpp b/src/data/likelihood.cpp index e5b5e33..d1bfafb 100644 --- a/src/data/likelihood.cpp +++ b/src/data/likelihood.cpp @@ -4,7 +4,7 @@ double Data::calc_log_likelihood_icc(__uint128_t component){ double log_likelihood = 0; double N_datapoints = this->N; - double alpha = this->N_assumed / this->N; + double alpha = this->N_synthetic / this->N; // Determine the size of the component int r = bit_count(component); // Get the datapoint frequencies @@ -29,7 +29,7 @@ double Data::calc_log_likelihood(std::vector<__uint128_t>& partition){ } } // Contribution from variables not in the model - log_likelihood -= this->N_assumed * (this->n - r) * log(this->q); + log_likelihood -= this->N_synthetic * (this->n - r) * log(this->q); return log_likelihood; } \ No newline at end of file diff --git a/src/search/mcm_search/annealing.cpp b/src/search/mcm_search/annealing.cpp index ddced3a..26b669d 100644 --- a/src/search/mcm_search/annealing.cpp +++ b/src/search/mcm_search/annealing.cpp @@ -51,16 +51,16 @@ MCM MCMSearch::simulated_annealing(Data& data, MCM* init_mcm, std::string file_n *this->output_file << "Number of variables: " << data.n << "\n"; *this->output_file << "Number of states per variable: " << data.q << "\n"; *this->output_file << "Number of datapoints: " << data.N << "\n"; - *this->output_file << "Number of assumed datapoints: " << data.N_assumed << "\n"; + *this->output_file << "Number of synthetic datapoints: " << data.N_synthetic << "\n"; *this->output_file << "Number of unique datapoint: " << data.N_unique << "\n"; *this->output_file << "Entropy of the data: " << data.entropy() << " q-its \n\n"; *this->output_file << "Initial partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << mcm_tmp.log_ev << " = " << mcm_tmp.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << mcm_tmp.log_ev << " = " << mcm_tmp.log_ev / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, mcm_tmp, data.N_assumed, data.q); + print_partition_details_to_file(*this->output_file, mcm_tmp, data.N_synthetic, data.q); *this->output_file << "\n"; *this->output_file << "Start annealing \n"; @@ -115,7 +115,7 @@ MCM MCMSearch::simulated_annealing(Data& data, MCM* init_mcm, std::string file_n // Write to output file if (this->output_file){ - *this->output_file << "Iteration " << i << " \t\t Temperature: " << settings.temp << " \t\t Log-evidence (q-its/datapoint): " << this->mcm_out.log_ev / (this->data->N_assumed * log(this->data->q)) << "\n"; + *this->output_file << "Iteration " << i << " \t\t Temperature: " << settings.temp << " \t\t Log-evidence (q-its/datapoint): " << this->mcm_out.log_ev / (this->data->N_synthetic * log(this->data->q)) << "\n"; } } else{steps_since_improve++;} @@ -162,10 +162,10 @@ MCM MCMSearch::simulated_annealing(Data& data, MCM* init_mcm, std::string file_n *this->output_file << "\nFinal partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n"; - *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n"; + *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_synthetic, data.q); *this->output_file << "\n"; this->output_file.reset(); } diff --git a/src/search/mcm_search/div_and_conq.cpp b/src/search/mcm_search/div_and_conq.cpp index 5e5e441..361f987 100644 --- a/src/search/mcm_search/div_and_conq.cpp +++ b/src/search/mcm_search/div_and_conq.cpp @@ -50,16 +50,16 @@ MCM MCMSearch::divide_and_conquer(Data& data, MCM* init_mcm, std::string file_na *this->output_file << "Number of variables: " << data.n << "\n"; *this->output_file << "Number of states per variable: " << data.q << "\n"; *this->output_file << "Number of datapoints: " << data.N << "\n"; - *this->output_file << "Number of assumed datapoints: " << data.N_assumed << "\n"; + *this->output_file << "Number of synthetic datapoints: " << data.N_synthetic << "\n"; *this->output_file << "Number of unique datapoint: " << data.N_unique << "\n"; *this->output_file << "Entropy of the data: " << data.entropy() << " q-its \n\n"; *this->output_file << "Initial partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_synthetic, data.q); *this->output_file << "\n"; *this->output_file << "Start dividing \n"; @@ -84,10 +84,10 @@ MCM MCMSearch::divide_and_conquer(Data& data, MCM* init_mcm, std::string file_na *this->output_file << "\nFinal partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n"; - *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n"; + *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_synthetic, data.q); *this->output_file << "\n"; this->output_file.reset(); } @@ -175,7 +175,7 @@ int MCMSearch::division(int move_from, int move_to){ this->log_evidence_trajectory.push_back(this->mcm_out.log_ev); if (this->output_file){ - *this->output_file << "Splitting component " << move_from << "\t Log-evidence (q-its/datapoint): " << this->mcm_out.log_ev / (this->data->N_assumed * log(this->data->q)) << "\n"; + *this->output_file << "Splitting component " << move_from << "\t Log-evidence (q-its/datapoint): " << this->mcm_out.log_ev / (this->data->N_synthetic * log(this->data->q)) << "\n"; *this->output_file << "\t Component " << move_from << " : \t" + int_to_string(this->mcm_out.partition[move_from], this->mcm_out.n) << "\n"; *this->output_file << "\t Component " << move_to << " : \t" + int_to_string(this->mcm_out.partition[move_to], this->mcm_out.n) << "\n\n"; } diff --git a/src/search/mcm_search/greedy.cpp b/src/search/mcm_search/greedy.cpp index 2378003..a176b94 100644 --- a/src/search/mcm_search/greedy.cpp +++ b/src/search/mcm_search/greedy.cpp @@ -49,16 +49,16 @@ MCM MCMSearch::greedy_search(Data& data, MCM* init_mcm, std::string file_name){ *this->output_file << "Number of variables: " << data.n << "\n"; *this->output_file << "Number of states per variable: " << data.q << "\n"; *this->output_file << "Number of datapoints: " << data.N << "\n"; - *this->output_file << "Number of assumed datapoints: " << data.N_assumed << "\n"; + *this->output_file << "Number of synthetic datapoints: " << data.N_synthetic << "\n"; *this->output_file << "Number of unique datapoint: " << data.N_unique << "\n"; *this->output_file << "Entropy of the data: " << data.entropy() << " q-its \n\n"; *this->output_file << "Initial partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_synthetic, data.q); *this->output_file << "\n"; } @@ -77,10 +77,10 @@ MCM MCMSearch::greedy_search(Data& data, MCM* init_mcm, std::string file_name){ *this->output_file << "\nFinal partition \n"; *this->output_file << "----------------- \n\n"; - *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_assumed * log(data.q)) << " q-its/datapoint \n"; - *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_assumed * log(data.q)) << " q-its/datapoint \n\n"; + *this->output_file << "Log-evidence: " << this->mcm_out.log_ev << " = " << this->mcm_out.log_ev / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n"; + *this->output_file << "Max-Log-likelihood: " << max_log_likelihood << " = " << max_log_likelihood / (data.N_synthetic * log(data.q)) << " q-its/datapoint \n\n"; - print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_assumed, data.q); + print_partition_details_to_file(*this->output_file, this->mcm_out, data.N_synthetic, data.q); *this->output_file << "\n"; this->output_file.reset(); } @@ -151,7 +151,7 @@ void MCMSearch::hierarchical_merging(){ // Write to the output file if (this->output_file){ - *this->output_file << "Merging components " << best_i << " and " << best_j << " \t Log-evidence (q-its/datapoint): "<< (this->mcm_out.log_ev) / (this->data->N_assumed * log(this->data->q)) << "\n"; + *this->output_file << "Merging components " << best_i << " and " << best_j << " \t Log-evidence (q-its/datapoint): "<< (this->mcm_out.log_ev) / (this->data->N_synthetic * log(this->data->q)) << "\n"; } } } From 7397dd90fe25f4035aba6569725b8f65c8daad4b Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Fri, 27 Jun 2025 19:18:01 +0200 Subject: [PATCH 04/21] Fix calculation of alpha --- src/data/evidence.cpp | 3 ++- src/data/likelihood.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/data/evidence.cpp b/src/data/evidence.cpp index 2f16a65..87201c7 100644 --- a/src/data/evidence.cpp +++ b/src/data/evidence.cpp @@ -5,7 +5,8 @@ double Data::calc_log_ev_icc(__uint128_t component){ double log_evidence = 0; // Determine the size of the component int r = bit_count(component); - double alpha = this->N_synthetic / this->N; + double N_syn = this->N_synthetic; + double alpha = N_syn / this->N; // Contributions from the datapoint frequencies std::unordered_map, unsigned int, HashVector128> counts = build_histogram(*this, component); std::unordered_map, unsigned int, HashVector128>::iterator count_iter = counts.begin(); diff --git a/src/data/likelihood.cpp b/src/data/likelihood.cpp index d1bfafb..fa0187e 100644 --- a/src/data/likelihood.cpp +++ b/src/data/likelihood.cpp @@ -4,7 +4,7 @@ double Data::calc_log_likelihood_icc(__uint128_t component){ double log_likelihood = 0; double N_datapoints = this->N; - double alpha = this->N_synthetic / this->N; + double alpha = this->N_synthetic / N_datapoints; // Determine the size of the component int r = bit_count(component); // Get the datapoint frequencies From 19946e4094dd135ffc3f1e6e79ae2f7141a2e807 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 8 Jul 2025 20:39:31 +0200 Subject: [PATCH 05/21] Test pydata class --- mcmpy/include/py_dataset.h | 2 +- mcmpy/src/py_dataset.cpp | 10 +++---- mcmpy/src/py_partition.cpp | 3 ++ mcmpy/tests/test_data.py | 59 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 6 deletions(-) create mode 100644 mcmpy/tests/test_data.py diff --git a/mcmpy/include/py_dataset.h b/mcmpy/include/py_dataset.h index e3a5648..86093df 100644 --- a/mcmpy/include/py_dataset.h +++ b/mcmpy/include/py_dataset.h @@ -12,7 +12,7 @@ namespace py = pybind11; -std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t& spin_op, int q, int n_ints); +std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t& spin_op, int q, int n_ints, int n); class PyData { public: diff --git a/mcmpy/src/py_dataset.cpp b/mcmpy/src/py_dataset.cpp index ade3dd7..310ea82 100644 --- a/mcmpy/src/py_dataset.cpp +++ b/mcmpy/src/py_dataset.cpp @@ -1,6 +1,6 @@ #include "py_dataset.h" -std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t& spin_op, int q, int n_ints){ +std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t& spin_op, int q, int n_ints, int n){ py::buffer_info buff = spin_op.request(); // Check if there is only one dimension @@ -9,9 +9,9 @@ std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t& spi throw std::invalid_argument("The spin operator should be given as a 1D numpy array."); } // Check if the system size is valid - int n = buff.shape[0]; - if (n > 128){ - throw std::invalid_argument("The maximum system size is 128."); + int n_entries = buff.shape[0]; + if (n_entries != n){ + throw std::invalid_argument("The given spin operator doesn't contain n elements."); } std::vector conv_spin_op(n, 0); @@ -154,7 +154,7 @@ double PyData::entropy(int base){ } double PyData::entropy_of_spin_op(const py::array_t& op){ - std::vector<__uint128_t> spin_op = convert_spin_op_from_py(op, this->data.q, this->data.n_ints); + std::vector<__uint128_t> spin_op = convert_spin_op_from_py(op, this->data.q, this->data.n_ints, this->data.n); return calc_entropy_of_spin_op(this->data, spin_op); } diff --git a/mcmpy/src/py_partition.cpp b/mcmpy/src/py_partition.cpp index 399d531..3fdfd4e 100644 --- a/mcmpy/src/py_partition.cpp +++ b/mcmpy/src/py_partition.cpp @@ -76,6 +76,9 @@ std::vector<__uint128_t> convert_partition_from_py_2d_array(py::array_t& element = 1; for (int j = 0; j < n; j++){ if(ptr[i*n + j]){ + if(ptr[i*n + j] != 1){ + throw std::invalid_argument("Entries of the 2D array should be either 0 or 1."); + } partition[i] += element; } element <<= 1; diff --git a/mcmpy/tests/test_data.py b/mcmpy/tests/test_data.py new file mode 100644 index 0000000..3ac8e2b --- /dev/null +++ b/mcmpy/tests/test_data.py @@ -0,0 +1,59 @@ +import pytest +import numpy as np +from mcmpy import Data + +# Test information theoretic criteria against previous code +# (https://github.com/clelidm/MinCompSpin_ExhaustiveSearch) by C. de Mulatier; +# Results of the optimal MCM for the binary SCOTUS data in the original basis + +def test_log_evidence(scotus_data_q2, opt_mcm_scotus_q2): + assert np.isclose(scotus_data_q2.log_evidence(opt_mcm_scotus_q2), -3300.4) + +def test_log_likelihood(scotus_data_q2, opt_mcm_scotus_q2): + assert np.isclose(scotus_data_q2.log_likelihood(opt_mcm_scotus_q2), -3194.36) + +def test_parametric_complexity(scotus_data_q2, opt_mcm_scotus_q2): + assert np.isclose(scotus_data_q2.complexity_parametric(opt_mcm_scotus_q2), 114.056) + +def test_geometric_complexity(scotus_data_q2, opt_mcm_scotus_q2): + assert np.isclose(scotus_data_q2.complexity_geometric(opt_mcm_scotus_q2), -8.95092) + +def test_total_complexity(scotus_data_q2, opt_mcm_scotus_q2): + parametric_complexity = scotus_data_q2.complexity_parametric(opt_mcm_scotus_q2) + geometric_complexity = scotus_data_q2.complexity_geometric(opt_mcm_scotus_q2) + assert np.isclose(parametric_complexity + geometric_complexity , 105.105) + +def test_mdl(scotus_data_q2, opt_mcm_scotus_q2): + assert np.isclose(scotus_data_q2.minimum_description_length(opt_mcm_scotus_q2), -3299.46) + +def test_log_evidence_icc(scotus_data_q2, opt_mcm_scotus_q2): + assert np.all(np.isclose(scotus_data_q2.log_evidence_icc(opt_mcm_scotus_q2), [-1754.41, -1545.98])) + +def test_log_likelihood_icc(scotus_data_q2, opt_mcm_scotus_q2): + assert np.all(np.isclose(scotus_data_q2.log_likelihood_icc(opt_mcm_scotus_q2), [-1686.28, -1508.08])) + +def test_parametric_complexity_icc(scotus_data_q2, opt_mcm_scotus_q2): + assert np.all(np.isclose(scotus_data_q2.complexity_parametric_icc(opt_mcm_scotus_q2), [76.8637, 37.1921])) + +def test_geometric_complexity_icc(scotus_data_q2, opt_mcm_scotus_q2): + assert np.all(np.isclose(scotus_data_q2.complexity_geometric_icc(opt_mcm_scotus_q2), [-9.58359, 0.632678])) + + +# Input array test +def test_partition_input(scotus_data_q2): + with pytest.raises(ValueError, match="The partition should be a 1D or 2D array."): + scotus_data_q2.log_evidence(np.ones((2,2,2))) + + with pytest.raises(ValueError, match="Entries of the 2D array should be either 0 or 1."): + scotus_data_q2.log_evidence([[1,2,0], [0,0,1]]) + +def test_entropy_of_op_input(scotus_data_q2): + with pytest.raises(ValueError, match="The spin operator should be given as a 1D numpy array."): + scotus_data_q2.entropy_of_spin_operator(np.ones((2,2))) + + with pytest.raises(ValueError, match="The vector should only contain values between 0 and q-1."): + scotus_data_q2.entropy_of_spin_operator([1,2,0,1,0,1,0,1,1]) + + with pytest.raises(ValueError, match="The given spin operator doesn't contain n elements."): + scotus_data_q2.entropy_of_spin_operator([1,0,1,0,1,0,1]) + From dde949e571f0b90634e260c7c51160b73d84e108 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 8 Jul 2025 20:40:20 +0200 Subject: [PATCH 06/21] Test pymcm class --- mcmpy/tests/test_mcm.py | 127 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 mcmpy/tests/test_mcm.py diff --git a/mcmpy/tests/test_mcm.py b/mcmpy/tests/test_mcm.py new file mode 100644 index 0000000..d09371a --- /dev/null +++ b/mcmpy/tests/test_mcm.py @@ -0,0 +1,127 @@ +import pytest +import numpy as np +from mcmpy import MCM, Data + +@pytest.fixture +def model(): + return MCM(8, [0,1,2,0,1,-1,5]) + +# Test input with specific partition +def test_mcm_array(model): + assert np.all(model.array == [[1,0,0,1,0,0,0,0], + [0,1,0,0,1,0,0,0], + [0,0,1,0,0,0,0,0], + [0,0,0,0,0,0,1,0]]) + +def test_mcm_gray_code(model): + assert np.all(model.array_gray_code == [0,1,2,0,1,-1,3,-1]) + +def test_read_only_properties(model): + assert(model.n == 8) + assert(model.n_icc == 4) + assert(model.rank == 6) + assert(model.is_optimized is False) + +def test_move_in_invalid(model): + with pytest.raises(ValueError, match="The variable index should be between 0 and n-1."): + model.move_variable_in(8,5) + + with pytest.raises(ValueError, match="This variable is already present in the partition."): + model.move_variable_in(1,6) + +def test_move_in(model): + model.move_variable_in(5,2) + model.move_variable_in(7,7) + + # Check gray code + assert np.all(model.array_gray_code == [0,1,2,0,1,2,3,4]) + + # Check binary array + assert np.all(model.array == [[1,0,0,1,0,0,0,0], + [0,1,0,0,1,0,0,0], + [0,0,1,0,0,1,0,0], + [0,0,0,0,0,0,1,0], + [0,0,0,0,0,0,0,1]]) + + # Check properties + assert(model.n == 8) + assert(model.n_icc == 5) + assert(model.rank == 8) + assert(model.is_optimized is False) + +def test_move_out_invalid(model): + with pytest.raises(ValueError, match="The variable index should be between 0 and n-1."): + model.move_variable_out(8) + + with pytest.raises(ValueError, match="This variable is not present in the partition."): + model.move_variable_out(7) + +def test_move_out(model): + model.move_variable_out(2) + + # Check gray code + assert np.all(model.array_gray_code == [0,1,-1,0,1,-1,2,-1]) + + # Check binary array + assert np.all(model.array == [[1,0,0,1,0,0,0,0], + [0,1,0,0,1,0,0,0], + [0,0,0,0,0,0,1,0]]) + + # Check properties + assert(model.n == 8) + assert(model.n_icc == 3) + assert(model.rank == 5) + assert(model.is_optimized is False) + +def test_move(model): + model.move_variable(6,0) + + # Check gray code + assert np.all(model.array_gray_code == [0,1,2,0,1,-1,0,-1]) + + # Check binary array + assert np.all(model.array == [[1,0,0,1,0,0,1,0], + [0,1,0,0,1,0,0,0], + [0,0,1,0,0,0,0,0]]) + + # Check properties + assert(model.n == 8) + assert(model.n_icc == 3) + assert(model.rank == 6) + assert(model.is_optimized is False) + +def test_properties(): + # Independent model + model = MCM(12, "independent") + assert(model.n == 12) + assert(model.n_icc == 12) + assert(model.rank == 12) + assert(model.is_optimized is False) + + # Complete model + model = MCM(12, "complete") + assert(model.n == 12) + assert(model.n_icc == 1) + assert(model.rank == 12) + assert(model.is_optimized is False) + +def test_data_object(scotus_data_q2, opt_mcm_scotus_q2): + data = opt_mcm_scotus_q2.generate_data_object(10_000, scotus_data_q2) + + assert data.N == 10_000 + assert data.N_synthetic == 10_000 + assert data.q == scotus_data_q2.q + assert data.n == scotus_data_q2.n + +def test_data_file(scotus_data_q2, opt_mcm_scotus_q2, tmp_path): + filepath = str(tmp_path / "test.dat") + print(type(filepath)) + opt_mcm_scotus_q2.generate_data_file(10_000, scotus_data_q2, filepath) + # Create data object from file + data = Data(filepath, scotus_data_q2.n, scotus_data_q2.q) + + assert data.N == 10_000 + assert data.N_synthetic == 10_000 + assert data.q == scotus_data_q2.q + assert data.n == scotus_data_q2.n + From 54ce859382b4a5b453085c078706acb38ccd437e Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 8 Jul 2025 20:43:47 +0200 Subject: [PATCH 07/21] Add conftest file --- mcmpy/tests/conftest.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 mcmpy/tests/conftest.py diff --git a/mcmpy/tests/conftest.py b/mcmpy/tests/conftest.py new file mode 100644 index 0000000..a583bf9 --- /dev/null +++ b/mcmpy/tests/conftest.py @@ -0,0 +1,27 @@ +import pytest +from mcmpy import Data, MCM + +# Data + +@pytest.fixture +def scotus_data_q2(): + return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 2) + +@pytest.fixture +def scotus_data_q3(): + return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 3) + +@pytest.fixture +def scotus_data_q4(): + return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 4) + +@pytest.fixture +def scotus_data_q5(): + return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 5) + + +# MCM + +@pytest.fixture +def opt_mcm_scotus_q2(): + return MCM(9, [[1,0,1,1,1,0,1,0,0], [0,1,0,0,0,1,0,1,1]]) \ No newline at end of file From fbb4c529469a935c0e81bfda1414214a206b2847 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Sun, 13 Jul 2025 12:07:09 +0200 Subject: [PATCH 08/21] Test pymcmsearch class --- mcmpy/tests/conftest.py | 18 ++-- mcmpy/tests/test_mcm_search.py | 185 +++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 7 deletions(-) create mode 100644 mcmpy/tests/test_mcm_search.py diff --git a/mcmpy/tests/conftest.py b/mcmpy/tests/conftest.py index a583bf9..2743275 100644 --- a/mcmpy/tests/conftest.py +++ b/mcmpy/tests/conftest.py @@ -1,27 +1,31 @@ import pytest -from mcmpy import Data, MCM +from mcmpy import Data, MCM, MCMSearch # Data -@pytest.fixture +@pytest.fixture(scope="module") def scotus_data_q2(): return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 2) -@pytest.fixture +@pytest.fixture(scope="module") def scotus_data_q3(): return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 3) -@pytest.fixture +@pytest.fixture(scope="module") def scotus_data_q4(): return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 4) -@pytest.fixture +@pytest.fixture(scope="module") def scotus_data_q5(): return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 5) # MCM -@pytest.fixture +@pytest.fixture(scope="module") def opt_mcm_scotus_q2(): - return MCM(9, [[1,0,1,1,1,0,1,0,0], [0,1,0,0,0,1,0,1,1]]) \ No newline at end of file + return MCM(9, [[1,0,1,1,1,0,1,0,0], [0,1,0,0,0,1,0,1,1]]) + +@pytest.fixture(scope="module") +def random_mcm(): + return MCM(9, "random") diff --git a/mcmpy/tests/test_mcm_search.py b/mcmpy/tests/test_mcm_search.py new file mode 100644 index 0000000..8bd4267 --- /dev/null +++ b/mcmpy/tests/test_mcm_search.py @@ -0,0 +1,185 @@ +import pytest +import numpy as np +from mcmpy import Data, MCM, MCMSearch + +@pytest.fixture +def mcm_searcher(): + return MCMSearch() + +## Exhaustive search + +# Test against previous code +# (https://github.com/clelidm/MinCompSpin_ExhaustiveSearch) by C. de Mulatier; +# (https://github.com/AaronDC60/MinCompSpin_discrete) by Aaron De Clercq. + +class TestMCMSearch: + + def setup_class(self): + scotus_data = Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 2) + self.searcher = MCMSearch() + + mcm_in_merging = MCM(9, [0,1,0,3,4,5,6,7,8]) + + self.opt_mcms = {} + self.mcms_out = {} + self.mcms_in = {} + + self.opt_mcms["simulated_annealing"] = self.searcher.simulated_annealing(scotus_data) + self.mcms_out["simulated_annealing"] = self.searcher.get_mcm_out() + self.mcms_in["simulated_annealing"] = self.searcher.get_mcm_in() + + self.opt_mcms["hierarchical_greedy_merging"] = self.searcher.hierarchical_greedy_merging(scotus_data, mcm_in_merging) + self.mcms_out["hierarchical_greedy_merging"] = self.searcher.get_mcm_out() + self.mcms_in["hierarchical_greedy_merging"] = self.searcher.get_mcm_in() + + self.opt_mcms["hierarchical_greedy_divisive"] = self.searcher.hierarchical_greedy_divisive(scotus_data) + self.mcms_out["hierarchical_greedy_divisive"] = self.searcher.get_mcm_out() + self.mcms_in["hierarchical_greedy_divisive"] = self.searcher.get_mcm_in() + + self.opt_mcms["exhaustive"] = self.searcher.exhaustive(scotus_data) + self.mcms_out["exhaustive"] = self.searcher.get_mcm_out() + + def test_log_ev_trajectory_exhaustive(self): + # Exhaustive search stores all parititons + # Number of partitions for n variables is given by Bell number of n + assert len(self.searcher.log_evidence_trajectory) == 21_147 + + @pytest.mark.parametrize("method", ["exhaustive", "simulated_annealing", "hierarchical_greedy_merging", "hierarchical_greedy_divisive"]) + def test_opt_mcms_structure(self, method): + assert self.opt_mcms[method].n == 9 + assert self.opt_mcms[method].n_icc == 2 + assert self.opt_mcms[method].rank == 9 + assert self.opt_mcms[method].is_optimized is True + + if method != "simulated_annealing": + # Simulated annealing inverts the order sometimes + assert np.all(self.opt_mcms[method].array == [[1,0,1,1,1,0,1,0,0], [0,1,0,0,0,1,0,1,1]]) + assert np.all(self.opt_mcms[method].array_gray_code == [0,1,0,0,0,1,0,1,1]) + else: + assert [1,0,1,1,1,0,1,0,0] in self.opt_mcms[method].array + assert [0,1,0,0,0,1,0,1,1] in self.opt_mcms[method].array + + @pytest.mark.parametrize("method", ["exhaustive", "simulated_annealing", "hierarchical_greedy_merging", "hierarchical_greedy_divisive"]) + def test_mcms_out_structure(self, method): + assert self.mcms_out[method].n == 9 + assert self.mcms_out[method].n_icc == 2 + assert self.mcms_out[method].rank == 9 + assert self.mcms_out[method].is_optimized is True + + if method != "simulated_annealing": + # Simulated annealing inverts the order sometimes + assert np.all(self.mcms_out[method].array == [[1,0,1,1,1,0,1,0,0], [0,1,0,0,0,1,0,1,1]]) + assert np.all(self.mcms_out[method].array_gray_code == [0,1,0,0,0,1,0,1,1]) + else: + assert [1,0,1,1,1,0,1,0,0] in self.mcms_out[method].array + assert [0,1,0,0,0,1,0,1,1] in self.mcms_out[method].array + + @pytest.mark.parametrize("method", ["exhaustive", "simulated_annealing", "hierarchical_greedy_merging", "hierarchical_greedy_divisive"]) + def test_opt_mcm_log_ev(self, method): + log_ev_per_icc = self.opt_mcms[method].get_best_log_evidence_icc() + log_ev = self.opt_mcms[method].get_best_log_evidence() + + assert np.isclose(log_ev, -3300.4) + assert np.allclose(log_ev_per_icc, [-1754.41, -1545.98]) or np.allclose(log_ev_per_icc, [-1545.98, -1754.41]) + + @pytest.mark.parametrize("method", ["exhaustive", "simulated_annealing", "hierarchical_greedy_merging", "hierarchical_greedy_divisive"]) + def test_opt_mcm_log_ev(self, method): + log_ev_per_icc = self.opt_mcms[method].get_best_log_evidence_icc() + log_ev = self.opt_mcms[method].get_best_log_evidence() + + assert np.isclose(log_ev, -3300.4) + assert np.allclose(log_ev_per_icc, [-1754.41, -1545.98]) or np.allclose(log_ev_per_icc, [-1545.98, -1754.41]) + + def test_mcm_in_invalid(self): + # Try before any search has been run + search = MCMSearch() + with pytest.raises(RuntimeError, match="No search has been ran yet."): + search.get_mcm_in() + + def test_mcm_in_exhaustive(self): + with pytest.raises(RuntimeError, match="Exhaustive search does not have an initial MCM."): + self.searcher.get_mcm_in() + + def test_mcm_in_greedy_divisive(self): + # Starts by default from the complete model + assert self.mcms_in["hierarchical_greedy_divisive"].n == 9 + assert self.mcms_in["hierarchical_greedy_divisive"].n_icc == 1 + assert self.mcms_in["hierarchical_greedy_divisive"].rank == 9 + assert self.mcms_in["hierarchical_greedy_divisive"].is_optimized is False + + def test_mcm_in_greedy_merging(self): + # mcm_in used is defined in the setup + assert self.mcms_in["hierarchical_greedy_merging"].n == 9 + assert self.mcms_in["hierarchical_greedy_merging"].n_icc == 8 + assert self.mcms_in["hierarchical_greedy_merging"].rank == 9 + assert self.mcms_in["hierarchical_greedy_merging"].is_optimized is False + + assert np.all(self.mcms_in["hierarchical_greedy_merging"].array_gray_code == [0,1,0,2,3,4,5,6,7]) + + def test_mcm_in_sim_annealing(self): + # Starts by default from the independent model + assert self.mcms_in["simulated_annealing"].n == 9 + assert self.mcms_in["simulated_annealing"].n_icc == 9 + assert self.mcms_in["simulated_annealing"].rank == 9 + assert self.mcms_in["simulated_annealing"].is_optimized is False + +# Check the optimal structure and log-evidence for the SCOTUS dataset with different values of q + +def test_scotus_q_3(mcm_searcher, scotus_data_q3): + opt_mcm = mcm_searcher.exhaustive(scotus_data_q3) + log_ev = opt_mcm.get_best_log_evidence() + + # MCM properties + assert opt_mcm.n == 9 + assert opt_mcm.n_icc == 2 + assert opt_mcm.rank == 9 + assert opt_mcm.is_optimized is True + + # MCM structure + assert np.all(opt_mcm.array == [[1,0,1,1,1,0,1,0,0], [0,1,0,0,0,1,0,1,1]]) + assert np.all(opt_mcm.array_gray_code == [0,1,0,0,0,1,0,1,1]) + + # MCM log-evidence + assert np.allclose(log_ev, -3714.65) + +def test_scotus_q_4(mcm_searcher, scotus_data_q4): + opt_mcm = mcm_searcher.exhaustive(scotus_data_q4) + log_ev = opt_mcm.get_best_log_evidence() + + # MCM properties + assert opt_mcm.n == 9 + assert opt_mcm.n_icc == 3 + assert opt_mcm.rank == 9 + assert opt_mcm.is_optimized is True + + # MCM structure + assert np.all(opt_mcm.array == [[1,0,1,0,1,0,0,0,0], + [0,1,0,0,0,1,0,1,1], + [0,0,0,1,0,0,1,0,0]]) + assert np.all(opt_mcm.array_gray_code == [0,1,0,2,0,1,2,1,1]) + + # MCM log-evidence + assert np.allclose(log_ev, -4033.14) + +def test_scotus_q_5(mcm_searcher, scotus_data_q5): + opt_mcm = mcm_searcher.exhaustive(scotus_data_q5) + log_ev = opt_mcm.get_best_log_evidence() + + # MCM properties + assert opt_mcm.n == 9 + assert opt_mcm.n_icc == 4 + assert opt_mcm.rank == 9 + assert opt_mcm.is_optimized is True + + # MCM structure + assert np.all(opt_mcm.array == [[1,0,1,0,1,0,0,0,0], + [0,1,0,0,0,0,0,0,0], + [0,0,0,1,0,0,1,0,0], + [0,0,0,0,0,1,0,1,1]]) + assert np.all(opt_mcm.array_gray_code == [0,1,0,2,0,3,2,3,3]) + + # MCM log-evidence + assert np.allclose(log_ev, -4261.89) + + + From f716307019637f75d36039060cc8b1baa94ed18d Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 21 Apr 2026 18:59:19 +0200 Subject: [PATCH 09/21] Fix basis reset --- mcmpy/tests/conftest.py | 14 +++++--------- mcmpy/tests/test_mcm_search.py | 3 --- src/basis/basis.cpp | 6 +++++- tests/basis/basis.cpp | 15 ++++++++++----- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/mcmpy/tests/conftest.py b/mcmpy/tests/conftest.py index 2743275..d77727d 100644 --- a/mcmpy/tests/conftest.py +++ b/mcmpy/tests/conftest.py @@ -3,29 +3,25 @@ # Data -@pytest.fixture(scope="module") +@pytest.fixture def scotus_data_q2(): return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 2) -@pytest.fixture(scope="module") +@pytest.fixture def scotus_data_q3(): return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 3) -@pytest.fixture(scope="module") +@pytest.fixture def scotus_data_q4(): return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 4) -@pytest.fixture(scope="module") +@pytest.fixture def scotus_data_q5(): return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 5) # MCM -@pytest.fixture(scope="module") +@pytest.fixture def opt_mcm_scotus_q2(): return MCM(9, [[1,0,1,1,1,0,1,0,0], [0,1,0,0,0,1,0,1,1]]) - -@pytest.fixture(scope="module") -def random_mcm(): - return MCM(9, "random") diff --git a/mcmpy/tests/test_mcm_search.py b/mcmpy/tests/test_mcm_search.py index 8bd4267..fadbca8 100644 --- a/mcmpy/tests/test_mcm_search.py +++ b/mcmpy/tests/test_mcm_search.py @@ -180,6 +180,3 @@ def test_scotus_q_5(mcm_searcher, scotus_data_q5): # MCM log-evidence assert np.allclose(log_ev, -4261.89) - - - diff --git a/src/basis/basis.cpp b/src/basis/basis.cpp index 5083d74..6e8b937 100644 --- a/src/basis/basis.cpp +++ b/src/basis/basis.cpp @@ -156,6 +156,10 @@ void Basis::set_basis_default() { // Set the default base: s1, s2, ..., sn __uint128_t element = 1; for (int i = 0; i < this->n; i++){ + // Reset operator + std::fill(this->basis_ops[i].begin(), this->basis_ops[i].end(), 0); + std::fill(this->basis_ops_matrix[i].begin(), this->basis_ops_matrix[i].end(), 0); + // log2(q) 128 bit integer representation this->basis_ops[i][0] = element; element <<= 1; @@ -185,7 +189,7 @@ void Basis::print_details() { std::string op_string; for (std::vector<__uint128_t> op : this->basis_ops){ op_string = convert_128bit_vec_to_string(op, this->n); - std::cout << "Operator " << i << ": \t" << op_string << "\t Indices: "; + std::cout << "Operator " << i << ": \t" << op_string << "\t Indices of variables involved: "; // Find the indices of the variables present in the spin operator for (size_t j = 0; j < this->n; ++j){ if (op_string[j] != '0') {std::cout << j << "\t";} diff --git a/tests/basis/basis.cpp b/tests/basis/basis.cpp index 9bf0303..4c5c4c8 100644 --- a/tests/basis/basis.cpp +++ b/tests/basis/basis.cpp @@ -49,13 +49,10 @@ TEST(basis, init_n){ Basis basis(n, q); exp_spin_ops = {{1,0,0}, {2,0,0}, {4,0,0}}; - spin_ops = basis.get_basis(); - exp_spin_ops_array = {{1,0,0}, {0,1,0}, {0,0,1}}; - spin_ops_array = basis.get_basis_matrix(); - EXPECT_EQ(exp_spin_ops, spin_ops); - EXPECT_EQ(exp_spin_ops_array, spin_ops_array); + EXPECT_EQ(exp_spin_ops, basis.get_basis()); + EXPECT_EQ(exp_spin_ops_array, basis.get_basis_matrix()); EXPECT_EQ(basis.get_n(), n); EXPECT_EQ(basis.get_q(), q); EXPECT_EQ(basis.get_n_ints(), 3); @@ -186,6 +183,14 @@ TEST(basis, init_spin_ops){ catch(std::invalid_argument const & err) { EXPECT_EQ(err.what(), std::string("Given basis is not linearly independent.")); } + + // Reset to default basis + exp_spin_ops = {{1,0}, {2,0}, {4,0}, {8,0}}; + exp_matrix = {{1,0,0,0}, {0,1,0,0}, {0,0,1,0}, {0,0,0,1}}; + basis.set_basis_default(); + + EXPECT_EQ(basis.get_basis(), exp_spin_ops); + EXPECT_EQ(basis.get_basis_matrix(), exp_matrix); } TEST(basis, init_file){ From b40047b872bb3a45c85d796c7b53a428338bfff3 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 21 Apr 2026 20:55:20 +0200 Subject: [PATCH 10/21] Update mcm tests --- mcmpy/tests/test_mcm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mcmpy/tests/test_mcm.py b/mcmpy/tests/test_mcm.py index d09371a..8fa33bd 100644 --- a/mcmpy/tests/test_mcm.py +++ b/mcmpy/tests/test_mcm.py @@ -115,7 +115,6 @@ def test_data_object(scotus_data_q2, opt_mcm_scotus_q2): def test_data_file(scotus_data_q2, opt_mcm_scotus_q2, tmp_path): filepath = str(tmp_path / "test.dat") - print(type(filepath)) opt_mcm_scotus_q2.generate_data_file(10_000, scotus_data_q2, filepath) # Create data object from file data = Data(filepath, scotus_data_q2.n, scotus_data_q2.q) @@ -124,4 +123,3 @@ def test_data_file(scotus_data_q2, opt_mcm_scotus_q2, tmp_path): assert data.N_synthetic == 10_000 assert data.q == scotus_data_q2.q assert data.n == scotus_data_q2.n - From 914a935d449c05eba4e23c5abea909ae504b6b95 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 21 Apr 2026 20:55:54 +0200 Subject: [PATCH 11/21] Update mcm search tests --- mcmpy/tests/test_mcm_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mcmpy/tests/test_mcm_search.py b/mcmpy/tests/test_mcm_search.py index fadbca8..0e9ef7d 100644 --- a/mcmpy/tests/test_mcm_search.py +++ b/mcmpy/tests/test_mcm_search.py @@ -18,6 +18,7 @@ def setup_class(self): scotus_data = Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 2) self.searcher = MCMSearch() + # MCM used as starting partition in the hierarchical greedy merging algorithm mcm_in_merging = MCM(9, [0,1,0,3,4,5,6,7,8]) self.opt_mcms = {} @@ -108,7 +109,7 @@ def test_mcm_in_greedy_divisive(self): assert self.mcms_in["hierarchical_greedy_divisive"].is_optimized is False def test_mcm_in_greedy_merging(self): - # mcm_in used is defined in the setup + # mcm_in used is defined in the setup (mcm_in_merging) assert self.mcms_in["hierarchical_greedy_merging"].n == 9 assert self.mcms_in["hierarchical_greedy_merging"].n_icc == 8 assert self.mcms_in["hierarchical_greedy_merging"].rank == 9 From 60cacdee94068466e2fceaeb755f3be9f5304566 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 21 Apr 2026 20:56:49 +0200 Subject: [PATCH 12/21] Add tests for basis class --- mcmpy/tests/test_basis.py | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 mcmpy/tests/test_basis.py diff --git a/mcmpy/tests/test_basis.py b/mcmpy/tests/test_basis.py new file mode 100644 index 0000000..6d1afcd --- /dev/null +++ b/mcmpy/tests/test_basis.py @@ -0,0 +1,52 @@ +import pytest +import numpy as np +from mcmpy import Basis + + +def test_init_default(): + n = 3 + q = 5 + basis = Basis(n,q) + + # Default basis should be the n one-body spin operators + assert np.all(basis.matrix == np.eye(n)) + assert(basis.n == n) + assert(basis.q == q) + +def test_init_array(): + n = 4 + q = 3 + array = np.array([[1,2,0,0],[2,2,0,0],[0,0,1,0],[0,0,2,1]]) + basis = Basis(n,q, array) + + # Check the matrix representation + # Should be equal to the transpose of the input given that columns represent operators + assert np.all(basis.matrix == array.T) + assert(basis.n == n) + assert(basis.q == q) + + # Check reset to default + basis.set_default() + assert np.all(basis.matrix == np.eye(n)) + +def test_init_file(): + n = 4 + q = 3 + basis = Basis(n,q, "../../tests/basis2.dat") + + # Check the matrix representation + assert np.all(basis.matrix == [[1,2,0,0],[2,2,0,0],[0,0,1,2],[0,0,0,1]]) + assert(basis.n == n) + assert(basis.q == q) + +def test_set_from_file(): + n = 4 + q = 3 + + basis = Basis(n,q) + basis.set_from_file("../../tests/basis2.dat") + + # Check the matrix representation + assert np.all(basis.matrix == [[1,2,0,0],[2,2,0,0],[0,0,1,2],[0,0,0,1]]) + assert(basis.n == n) + assert(basis.q == q) From 337ce1797332485e91fa1cba75be71c0a7f108b1 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 21 Apr 2026 20:57:08 +0200 Subject: [PATCH 13/21] Add tests for basis search class --- mcmpy/tests/test_basis_search.py | 92 ++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 mcmpy/tests/test_basis_search.py diff --git a/mcmpy/tests/test_basis_search.py b/mcmpy/tests/test_basis_search.py new file mode 100644 index 0000000..62ead4e --- /dev/null +++ b/mcmpy/tests/test_basis_search.py @@ -0,0 +1,92 @@ +import pytest +import numpy as np +from mcmpy import Data, MCM, MCMSearch, Basis, BasisSearch + +# Best basis search of supreme court + +# Test against previous code +# (https://github.com/clelidm/MinCompSpin_ExhaustiveSearch) by C. de Mulatier; +# (https://github.com/AaronDC60/MinCompSpin_discrete) by Aaron De Clercq. + +class TestBasisSearch: + + def setup_class(self): + self.searcher = BasisSearch() + self.mcmsearcher = MCMSearch() + + def test_exhaustive_search(self): + dataset = Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 2) + opt_basis_1 = self.searcher.exhaustive(dataset) + opt_basis_2 = self.searcher.get_basis() + assert np.all(opt_basis_1.matrix == [[0,0,0,1,1,1,0,0,0], + [0,0,0,0,0,0,1,0,0], + [0,0,0,0,0,1,0,1,0], + [1,0,0,0,0,0,0,0,0], + [0,0,0,1,0,0,0,0,0], + [0,1,0,0,0,0,0,0,0], + [1,0,0,0,1,0,0,0,1], + [0,1,1,0,0,0,1,0,0], + [0,0,1,0,0,0,0,1,0]]) + + assert np.all(opt_basis_1.matrix == opt_basis_2.matrix) + assert opt_basis_1.n == 9 + assert opt_basis_2.n == 9 + + assert opt_basis_1.q == 2 + assert opt_basis_2.q == 2 + + def get_best_mcm(self, q): + dataset = Data("../../input/US_SupremeCourt_n9_N895.dat", 9, q) + # Calculate the best basis + opt_basis = self.searcher.exhaustive(dataset) + # Transform data + transformed_data = opt_basis.gauge_transform_data(dataset) + # Get best mcm in the new basis + best_mcm = self.mcmsearcher.exhaustive(transformed_data) + + return best_mcm + + def test_mcm_opt_basis_q2(self): + best_mcm = self.get_best_mcm(2) + + # Check the mcm and log-evidence + assert np.all(best_mcm.array == [[1,0,0,0,0,0,0,0,0], + [0,1,1,0,0,0,1,0,0], + [0,0,0,1,1,1,0,1,1]]) + + assert np.isclose(best_mcm.get_best_log_evidence(), -3154.42) + + def test_mcm_opt_basis_q3(self): + best_mcm = self.get_best_mcm(3) + + # Check the mcm and log-evidence + assert np.all(best_mcm.array == [[1,0,0,0,1,0,0,1,0], + [0,1,0,1,0,0,1,0,0], + [0,0,1,0,0,1,0,0,1]]) + + assert np.isclose(best_mcm.get_best_log_evidence(), -3587.68) + + def test_mcm_opt_basis_q4(self): + best_mcm = self.get_best_mcm(4) + + # Check the mcm and log-evidence + assert np.all(best_mcm.array == [[1,0,0,0,0,0,0,0,0], + [0,1,0,0,0,0,1,0,0], + [0,0,1,0,0,1,0,0,0], + [0,0,0,1,0,0,0,0,1], + [0,0,0,0,1,0,0,1,0]]) + + assert np.isclose(best_mcm.get_best_log_evidence(), -3763.43) + + def test_mcm_opt_basis_q5(self): + best_mcm = self.get_best_mcm(5) + + # Check the mcm and log-evidence + assert np.all(best_mcm.array == [[1,0,0,0,0,0,0,0,0], + [0,1,0,0,0,0,0,0,0], + [0,0,1,0,0,1,0,0,0], + [0,0,0,1,0,0,1,0,0], + [0,0,0,0,1,0,0,1,0], + [0,0,0,0,0,0,0,0,1]]) + + assert np.isclose(best_mcm.get_best_log_evidence(), -3848.06) From 66441cdb17761e03eeb95f8f0ee9ef488fdc6c00 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 21 Apr 2026 21:00:17 +0200 Subject: [PATCH 14/21] Add python tests to the ci --- .github/workflows/ci.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e65d426..198675a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,4 +29,9 @@ jobs: - name : Test project run: | cd build - ctest --output-on-failure \ No newline at end of file + ctest --output-on-failure + + - name : Test python module + run: | + cd mcmpy/tests + pytest From 3d9c8ce5dfb101a70e3f346ac4bf9707a67e8680 Mon Sep 17 00:00:00 2001 From: AaronDC60 Date: Tue, 21 Apr 2026 21:04:46 +0200 Subject: [PATCH 15/21] Fix ci --- .github/workflows/ci.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 198675a..588d8df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,6 +3,9 @@ on: push: branches: - main + pull_request: + branches: + - main jobs: build: @@ -30,8 +33,8 @@ jobs: run: | cd build ctest --output-on-failure - - - name : Test python module - run: | + + - name : Test python module + run: | cd mcmpy/tests - pytest + pytest \ No newline at end of file From ecd1186ef794099dc754c28217792005f12def2c Mon Sep 17 00:00:00 2001 From: Aaron De Clercq <60502264+AaronDC60@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:14:07 +0200 Subject: [PATCH 16/21] Update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 811d746..8f7aa3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get install -y g++ cmake python3 + sudo apt-get install -y g++ cmake python3 pytest - name : Build project run: | From b4b23a47c75898d847d9498456d1077be13f4d83 Mon Sep 17 00:00:00 2001 From: Aaron De Clercq <60502264+AaronDC60@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:19:37 +0200 Subject: [PATCH 17/21] Update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8f7aa3a..e1b2c8b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get install -y g++ cmake python3 pytest + sudo apt-get install -y g++ cmake python3 python3-pytest - name : Build project run: | From d4bba6bac3505e90c58780c719ba20fdd32e3076 Mon Sep 17 00:00:00 2001 From: Aaron De Clercq <60502264+AaronDC60@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:23:39 +0200 Subject: [PATCH 18/21] Update ci.yml --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e1b2c8b..b0ad909 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,11 @@ jobs: run: | sudo apt-get update sudo apt-get install -y g++ cmake python3 python3-pytest + + - name: Install dependencies (macOS) + if: runner.os == 'macOS' + run: | + python3 -m pip install pytest - name : Build project run: | From 1b96a06e7ecb29af0396fe3ca8665a2eb917649f Mon Sep 17 00:00:00 2001 From: Aaron De Clercq <60502264+AaronDC60@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:42:22 +0200 Subject: [PATCH 19/21] Update ci.yml --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b0ad909..58691d5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,6 +19,11 @@ jobs: with: submodules: recursive + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install dependencies (Linux) if: runner.os == 'Linux' run: | @@ -28,6 +33,7 @@ jobs: - name: Install dependencies (macOS) if: runner.os == 'macOS' run: | + python3 -m pip install --upgrade pip python3 -m pip install pytest - name : Build project From ef0ab8707d9e421d3407db8e57a0e3c436f33ea1 Mon Sep 17 00:00:00 2001 From: Aaron De Clercq <60502264+AaronDC60@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:46:29 +0200 Subject: [PATCH 20/21] Update ci.yml --- .github/workflows/ci.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 58691d5..ef51b4e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,11 +30,10 @@ jobs: sudo apt-get update sudo apt-get install -y g++ cmake python3 python3-pytest - - name: Install dependencies (macOS) - if: runner.os == 'macOS' + - name: Install dependencies run: | python3 -m pip install --upgrade pip - python3 -m pip install pytest + python3 -m pip install pytest numpy - name : Build project run: | From df36255a73097266dd3355997ec9c1795495dd81 Mon Sep 17 00:00:00 2001 From: Aaron De Clercq <60502264+AaronDC60@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:50:49 +0200 Subject: [PATCH 21/21] Update ci.yml --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ef51b4e..0cb2f88 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,6 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get install -y g++ cmake python3 python3-pytest - name: Install dependencies run: |