Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,20 @@ jobs:
with:
submodules: recursive

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies (Linux)
if: runner.os == 'Linux'
run: |
sudo apt-get update
sudo apt-get install -y g++ cmake python3

- name: Install dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install pytest numpy

- name : Build project
run: |
Expand All @@ -33,3 +42,8 @@ jobs:
run: |
cd build
ctest --output-on-failure

- name : Test python module
run: |
cd mcmpy/tests
pytest
7 changes: 7 additions & 0 deletions docs/api/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,13 @@ to infer either the best minimally complex (MCM) or the optimal basis representa

The actual number of datapoints in the dataset (read-only).

.. py:attribute:: N_synthetic
:type: int

The synthetic number of datapoints in the dataset.

This attribute can be changed to perform an analysis of the dataset as if it is either larger or smaller.

.. py:attribute:: N_unique
:type: int

Expand Down
9 changes: 9 additions & 0 deletions include/data/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ class Data {
*/
Data(const std::vector<std::pair<std::vector<__uint128_t>, unsigned int>>& _dataset, int n_var, int n_states, int n_samples);

/**
* Change the value for the number of datapoints in the dataset that is used for analysis.
* This can be changed to perform an analysis of the dataset as if it is larger or smaller.
*
* @param n_datapoints The synthetic number of datapoints in the dataset.
*/
void set_N_synthetic(int n_datapoints);

/**
* Calculate the entropy of the dataset.
*
Expand Down Expand Up @@ -128,6 +136,7 @@ class Data {
int N; // Number of datapoints
int N_unique; // Number of different datapoints
int n_ints; // Number of 128bit integers necessary to represent the data
int N_synthetic; // The synthetic number of datapoints in the dataset

std::vector<__uint128_t> pow_q; // Vector containing the first n powers of q used to speed up the calculation of the evidence
};
5 changes: 4 additions & 1 deletion mcmpy/include/py_dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

namespace py = pybind11;

std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t<uint8_t>& spin_op, int q, int n_ints);
std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t<uint8_t>& spin_op, int q, int n_ints, int n);

class PyData {
public:
Expand Down Expand Up @@ -65,9 +65,12 @@ class PyData {

int get_n() {return this->data.n;};
int get_N() {return this->data.N;};
int get_N_synthetic() {return this->data.N_synthetic;};
int get_N_unique() {return this->data.N_unique;};
int get_q() {return this->data.q;};

void set_N_synthetic(int n_datapoints) {this->data.set_N_synthetic(n_datapoints);};

Data data;
};

Expand Down
14 changes: 8 additions & 6 deletions mcmpy/src/py_dataset.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "py_dataset.h"

std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t<uint8_t>& spin_op, int q, int n_ints){
std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t<uint8_t>& spin_op, int q, int n_ints, int n){
py::buffer_info buff = spin_op.request();

// Check if there is only one dimension
Expand All @@ -9,9 +9,9 @@ std::vector<__uint128_t> convert_spin_op_from_py(const py::array_t<uint8_t>& spi
throw std::invalid_argument("The spin operator should be given as a 1D numpy array.");
}
// Check if the system size is valid
int n = buff.shape[0];
if (n > 128){
throw std::invalid_argument("The maximum system size is 128.");
int n_entries = buff.shape[0];
if (n_entries != n){
throw std::invalid_argument("The given spin operator doesn't contain n elements.");
}

std::vector<uint8_t> conv_spin_op(n, 0);
Expand Down Expand Up @@ -154,7 +154,7 @@ double PyData::entropy(int base){
}

double PyData::entropy_of_spin_op(const py::array_t<int8_t>& op){
std::vector<__uint128_t> spin_op = convert_spin_op_from_py(op, this->data.q, this->data.n_ints);
std::vector<__uint128_t> spin_op = convert_spin_op_from_py(op, this->data.q, this->data.n_ints, this->data.n);
return calc_entropy_of_spin_op(this->data, spin_op);
}

Expand Down Expand Up @@ -187,8 +187,10 @@ void bind_data_class(py::module &m) {

.def("entropy", &PyData::entropy, py::arg("base") = -1)
.def("entropy_of_spin_operator", &PyData::entropy_of_spin_op, py::arg("spin_op"))

.def_property_readonly("n", &PyData::get_n)
.def_property_readonly("q", &PyData::get_q)
.def_property_readonly("N", &PyData::get_N)
.def_property_readonly("N_unique", &PyData::get_N_unique);
.def_property_readonly("N_unique", &PyData::get_N_unique)
.def_property("N_synthetic", &PyData::get_N_synthetic, &PyData::set_N_synthetic);
}
3 changes: 3 additions & 0 deletions mcmpy/src/py_partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ std::vector<__uint128_t> convert_partition_from_py_2d_array(py::array_t<int8_t>&
element = 1;
for (int j = 0; j < n; j++){
if(ptr[i*n + j]){
if(ptr[i*n + j] != 1){
throw std::invalid_argument("Entries of the 2D array should be either 0 or 1.");
}
partition[i] += element;
}
element <<= 1;
Expand Down
27 changes: 27 additions & 0 deletions mcmpy/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest
from mcmpy import Data, MCM, MCMSearch

# Data

@pytest.fixture
def scotus_data_q2():
return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 2)

@pytest.fixture
def scotus_data_q3():
return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 3)

@pytest.fixture
def scotus_data_q4():
return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 4)

@pytest.fixture
def scotus_data_q5():
return Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 5)


# MCM

@pytest.fixture
def opt_mcm_scotus_q2():
return MCM(9, [[1,0,1,1,1,0,1,0,0], [0,1,0,0,0,1,0,1,1]])
52 changes: 52 additions & 0 deletions mcmpy/tests/test_basis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import pytest
import numpy as np
from mcmpy import Basis


def test_init_default():
n = 3
q = 5
basis = Basis(n,q)

# Default basis should be the n one-body spin operators
assert np.all(basis.matrix == np.eye(n))
assert(basis.n == n)
assert(basis.q == q)

def test_init_array():
n = 4
q = 3
array = np.array([[1,2,0,0],[2,2,0,0],[0,0,1,0],[0,0,2,1]])
basis = Basis(n,q, array)

# Check the matrix representation
# Should be equal to the transpose of the input given that columns represent operators
assert np.all(basis.matrix == array.T)
assert(basis.n == n)
assert(basis.q == q)

# Check reset to default
basis.set_default()
assert np.all(basis.matrix == np.eye(n))

def test_init_file():
n = 4
q = 3
basis = Basis(n,q, "../../tests/basis2.dat")

# Check the matrix representation
assert np.all(basis.matrix == [[1,2,0,0],[2,2,0,0],[0,0,1,2],[0,0,0,1]])
assert(basis.n == n)
assert(basis.q == q)

def test_set_from_file():
n = 4
q = 3

basis = Basis(n,q)
basis.set_from_file("../../tests/basis2.dat")

# Check the matrix representation
assert np.all(basis.matrix == [[1,2,0,0],[2,2,0,0],[0,0,1,2],[0,0,0,1]])
assert(basis.n == n)
assert(basis.q == q)
92 changes: 92 additions & 0 deletions mcmpy/tests/test_basis_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import pytest
import numpy as np
from mcmpy import Data, MCM, MCMSearch, Basis, BasisSearch

# Best basis search of supreme court

# Test against previous code
# (https://github.com/clelidm/MinCompSpin_ExhaustiveSearch) by C. de Mulatier;
# (https://github.com/AaronDC60/MinCompSpin_discrete) by Aaron De Clercq.

class TestBasisSearch:

def setup_class(self):
self.searcher = BasisSearch()
self.mcmsearcher = MCMSearch()

def test_exhaustive_search(self):
dataset = Data("../../input/US_SupremeCourt_n9_N895.dat", 9, 2)
opt_basis_1 = self.searcher.exhaustive(dataset)
opt_basis_2 = self.searcher.get_basis()
assert np.all(opt_basis_1.matrix == [[0,0,0,1,1,1,0,0,0],
[0,0,0,0,0,0,1,0,0],
[0,0,0,0,0,1,0,1,0],
[1,0,0,0,0,0,0,0,0],
[0,0,0,1,0,0,0,0,0],
[0,1,0,0,0,0,0,0,0],
[1,0,0,0,1,0,0,0,1],
[0,1,1,0,0,0,1,0,0],
[0,0,1,0,0,0,0,1,0]])

assert np.all(opt_basis_1.matrix == opt_basis_2.matrix)
assert opt_basis_1.n == 9
assert opt_basis_2.n == 9

assert opt_basis_1.q == 2
assert opt_basis_2.q == 2

def get_best_mcm(self, q):
dataset = Data("../../input/US_SupremeCourt_n9_N895.dat", 9, q)
# Calculate the best basis
opt_basis = self.searcher.exhaustive(dataset)
# Transform data
transformed_data = opt_basis.gauge_transform_data(dataset)
# Get best mcm in the new basis
best_mcm = self.mcmsearcher.exhaustive(transformed_data)

return best_mcm

def test_mcm_opt_basis_q2(self):
best_mcm = self.get_best_mcm(2)

# Check the mcm and log-evidence
assert np.all(best_mcm.array == [[1,0,0,0,0,0,0,0,0],
[0,1,1,0,0,0,1,0,0],
[0,0,0,1,1,1,0,1,1]])

assert np.isclose(best_mcm.get_best_log_evidence(), -3154.42)

def test_mcm_opt_basis_q3(self):
best_mcm = self.get_best_mcm(3)

# Check the mcm and log-evidence
assert np.all(best_mcm.array == [[1,0,0,0,1,0,0,1,0],
[0,1,0,1,0,0,1,0,0],
[0,0,1,0,0,1,0,0,1]])

assert np.isclose(best_mcm.get_best_log_evidence(), -3587.68)

def test_mcm_opt_basis_q4(self):
best_mcm = self.get_best_mcm(4)

# Check the mcm and log-evidence
assert np.all(best_mcm.array == [[1,0,0,0,0,0,0,0,0],
[0,1,0,0,0,0,1,0,0],
[0,0,1,0,0,1,0,0,0],
[0,0,0,1,0,0,0,0,1],
[0,0,0,0,1,0,0,1,0]])

assert np.isclose(best_mcm.get_best_log_evidence(), -3763.43)

def test_mcm_opt_basis_q5(self):
best_mcm = self.get_best_mcm(5)

# Check the mcm and log-evidence
assert np.all(best_mcm.array == [[1,0,0,0,0,0,0,0,0],
[0,1,0,0,0,0,0,0,0],
[0,0,1,0,0,1,0,0,0],
[0,0,0,1,0,0,1,0,0],
[0,0,0,0,1,0,0,1,0],
[0,0,0,0,0,0,0,0,1]])

assert np.isclose(best_mcm.get_best_log_evidence(), -3848.06)
59 changes: 59 additions & 0 deletions mcmpy/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pytest
import numpy as np
from mcmpy import Data

# Test information theoretic criteria against previous code
# (https://github.com/clelidm/MinCompSpin_ExhaustiveSearch) by C. de Mulatier;
# Results of the optimal MCM for the binary SCOTUS data in the original basis

def test_log_evidence(scotus_data_q2, opt_mcm_scotus_q2):
assert np.isclose(scotus_data_q2.log_evidence(opt_mcm_scotus_q2), -3300.4)

def test_log_likelihood(scotus_data_q2, opt_mcm_scotus_q2):
assert np.isclose(scotus_data_q2.log_likelihood(opt_mcm_scotus_q2), -3194.36)

def test_parametric_complexity(scotus_data_q2, opt_mcm_scotus_q2):
assert np.isclose(scotus_data_q2.complexity_parametric(opt_mcm_scotus_q2), 114.056)

def test_geometric_complexity(scotus_data_q2, opt_mcm_scotus_q2):
assert np.isclose(scotus_data_q2.complexity_geometric(opt_mcm_scotus_q2), -8.95092)

def test_total_complexity(scotus_data_q2, opt_mcm_scotus_q2):
parametric_complexity = scotus_data_q2.complexity_parametric(opt_mcm_scotus_q2)
geometric_complexity = scotus_data_q2.complexity_geometric(opt_mcm_scotus_q2)
assert np.isclose(parametric_complexity + geometric_complexity , 105.105)

def test_mdl(scotus_data_q2, opt_mcm_scotus_q2):
assert np.isclose(scotus_data_q2.minimum_description_length(opt_mcm_scotus_q2), -3299.46)

def test_log_evidence_icc(scotus_data_q2, opt_mcm_scotus_q2):
assert np.all(np.isclose(scotus_data_q2.log_evidence_icc(opt_mcm_scotus_q2), [-1754.41, -1545.98]))

def test_log_likelihood_icc(scotus_data_q2, opt_mcm_scotus_q2):
assert np.all(np.isclose(scotus_data_q2.log_likelihood_icc(opt_mcm_scotus_q2), [-1686.28, -1508.08]))

def test_parametric_complexity_icc(scotus_data_q2, opt_mcm_scotus_q2):
assert np.all(np.isclose(scotus_data_q2.complexity_parametric_icc(opt_mcm_scotus_q2), [76.8637, 37.1921]))

def test_geometric_complexity_icc(scotus_data_q2, opt_mcm_scotus_q2):
assert np.all(np.isclose(scotus_data_q2.complexity_geometric_icc(opt_mcm_scotus_q2), [-9.58359, 0.632678]))


# Input array test
def test_partition_input(scotus_data_q2):
with pytest.raises(ValueError, match="The partition should be a 1D or 2D array."):
scotus_data_q2.log_evidence(np.ones((2,2,2)))

with pytest.raises(ValueError, match="Entries of the 2D array should be either 0 or 1."):
scotus_data_q2.log_evidence([[1,2,0], [0,0,1]])

def test_entropy_of_op_input(scotus_data_q2):
with pytest.raises(ValueError, match="The spin operator should be given as a 1D numpy array."):
scotus_data_q2.entropy_of_spin_operator(np.ones((2,2)))

with pytest.raises(ValueError, match="The vector should only contain values between 0 and q-1."):
scotus_data_q2.entropy_of_spin_operator([1,2,0,1,0,1,0,1,1])

with pytest.raises(ValueError, match="The given spin operator doesn't contain n elements."):
scotus_data_q2.entropy_of_spin_operator([1,0,1,0,1,0,1])

Loading
Loading