diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 1f4df4bd..609ebec0 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -16,7 +16,7 @@ jobs: - name: Install Python uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.12 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/deploy_pypi.yml b/.github/workflows/deploy_pypi.yml index e4b3f82d..26896fb2 100644 --- a/.github/workflows/deploy_pypi.yml +++ b/.github/workflows/deploy_pypi.yml @@ -17,7 +17,7 @@ jobs: - name: Install Python uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.12 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1352986a..58e3faa3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,7 +6,7 @@ jobs: strategy: matrix: os: [ubuntu-latest] - python: ['3.8', '3.9', '3.10'] + python: ['3.11', '3.12', '3.13', '3.14'] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v2 @@ -20,7 +20,7 @@ jobs: - name: run tests run: python3 -m pytest --cov=tlviz - name: upload coverage to Codecov - if: ${{matrix.os == 'ubuntu-latest' && matrix.python == '3.10'}} + if: ${{matrix.os == 'ubuntu-latest' && matrix.python == '3.13'}} uses: codecov/codecov-action@v2 with: verbose: true diff --git a/tlviz/_xarray_wrapper.py b/tlviz/_xarray_wrapper.py index f1082ee8..e83fdd6c 100644 --- a/tlviz/_xarray_wrapper.py +++ b/tlviz/_xarray_wrapper.py @@ -56,13 +56,13 @@ def add_factor_metadata(cp_tensor, dataset): >>> bikes = load_oslo_city_bike() >>> bikes.coords Coordinates: - * End station name (End station name) object '7 Juni Plassen' ... 'Økernve... - lat (End station name) float64 59.92 59.93 ... 59.93 59.92 - lon (End station name) float64 10.73 10.75 ... 10.8 10.78 - * Hour (Hour) int32 0 1 2 3 4 5 6 7 8 ... 16 17 18 19 20 21 22 23 - * Month (Month) int32 1 2 3 4 5 6 7 8 9 10 11 12 - * Day of week (Day of week) int32 0 1 2 3 4 5 6 - * Year (Year) int32 2020 2021 + * End station name (End station name) object 2kB '7 Juni Plassen' ... 'Øke... + * Year (Year) int32 8B 2020 2021 + * Month (Month) int32 48B 1 2 3 4 5 6 7 8 9 10 11 12 + * Day of week (Day of week) int32 28B 0 1 2 3 4 5 6 + * Hour (Hour) int32 96B 0 1 2 3 4 5 6 7 ... 17 18 19 20 21 22 23 + lat (End station name) float64 2kB 59.92 59.93 ... 59.93 59.92 + lon (End station name) float64 2kB 10.73 10.75 ... 10.8 10.78 We see that the ``End station name`` dimension has two additional columns: ``lat`` and ``lon``. These contain metadata about the end station coordinates, and it can be useful to have these diff --git a/tlviz/model_evaluation.py b/tlviz/model_evaluation.py index 8e43b1b5..50fcd33a 100644 --- a/tlviz/model_evaluation.py +++ b/tlviz/model_evaluation.py @@ -194,7 +194,7 @@ def sse(cp_tensor, dataset): >>> cp = random_cp((4, 5, 6), 3, random_state=rng) >>> X = rng.random_sample((4, 5, 6)) >>> sse(cp, X) - 18.948918157419186 + np.float64(18.948918157419186) """ X_hat = cp_to_tensor(cp_tensor) return np.sum((dataset - X_hat) ** 2) @@ -236,7 +236,7 @@ def relative_sse(cp_tensor, dataset, sum_squared_dataset=None): >>> cp = random_cp((4, 5, 6), 3, random_state=rng) >>> X = rng.random_sample((4, 5, 6)) >>> relative_sse(cp, X) - 0.4817407254961442 + np.float64(0.4817407254961442) """ if sum_squared_dataset is None: sum_squared_x = np.sum(dataset**2) @@ -279,13 +279,13 @@ def fit(cp_tensor, dataset, sum_squared_dataset=None): >>> cp = random_cp((4, 5, 6), 3, random_state=rng) >>> X = rng.random_sample((4, 5, 6)) >>> fit(cp, X) - 0.5182592745038558 + np.float64(0.5182592745038558) We can see that it is equal to 1 - relative SSE >>> from tlviz.model_evaluation import relative_sse >>> 1 - relative_sse(cp, X) - 0.5182592745038558 + np.float64(0.5182592745038558) """ return 1 - relative_sse(cp_tensor, dataset, sum_squared_dataset=sum_squared_dataset) diff --git a/tlviz/multimodel_evaluation.py b/tlviz/multimodel_evaluation.py index 720c2b4e..64d3fbd0 100644 --- a/tlviz/multimodel_evaluation.py +++ b/tlviz/multimodel_evaluation.py @@ -149,12 +149,12 @@ def get_model_with_lowest_error(cp_tensors, dataset, error_function=None, return And that it is the model that has the lowest error >>> errors[index] == min(errors) - True + np.True_ And finally that this error is equal to the relative SSE >>> errors[index] == relative_sse(model, dataset) - True + np.True_ """ if error_function is None: error_function = model_evaluation.relative_sse diff --git a/tlviz/visualisation.py b/tlviz/visualisation.py index 0c72e1c7..99048516 100644 --- a/tlviz/visualisation.py +++ b/tlviz/visualisation.py @@ -39,6 +39,21 @@ ] +def _get_next_style(ax): + """Get the next style of the matplotlib axes property cycler and increment its position. + + Before Matplotlib v3.8, we directly accessed the prop_cycler of ax._get_lines. However this attribute + was deleted to make pickling work correctly for Matplotlib axes, so now we need this workaround. + """ + lines = ax._get_lines + if hasattr(lines, "prop_cycler"): + return next(lines.prop_cycler) + else: + out = lines._cycler_items[lines._idx] + lines._idx = (lines._idx + 1) % len(lines._cycler_items) + return out + + def scree_plot(cp_tensors, dataset, errors=None, metric="Fit", ax=None): """Create scree plot for the given cp tensors. @@ -177,7 +192,7 @@ def histogram_of_residuals(cp_tensor, dataset, ax=None, standardised=True, **kwa >>> true_cp, X = simulated_random_cp_tensor((10, 20, 30), 3, seed=0) >>> est_cp = parafac(X, 3) >>> histogram_of_residuals(est_cp, X) - + >>> plt.show() """ estimated_dataset = cp_to_tensor(cp_tensor) @@ -244,7 +259,7 @@ def residual_qq(cp_tensor, dataset, ax=None, use_pingouin=False, **kwargs): >>> true_cp, X = simulated_random_cp_tensor((10, 20, 30), 3, seed=0) >>> est_cp = parafac(X, 3) >>> residual_qq(est_cp, X) - + >>> plt.show() """ estimated_dataset = cp_to_tensor(cp_tensor) @@ -337,7 +352,7 @@ def outlier_plot( >>> outlier_plot( ... cp, data, leverage_rules_of_thumb='p-value', residual_rules_of_thumb='p-value', p_value=[0.05, 0.01] ... ) - + >>> plt.show() We can also provide multiple types of rules of thumb @@ -360,7 +375,7 @@ def outlier_plot( >>> outlier_plot( ... cp, data, leverage_rules_of_thumb=['huber lower', 'hw higher'], residual_rules_of_thumb='two sigma' ... ) - + >>> plt.show() See Also @@ -438,7 +453,7 @@ def outlier_plot( # Draw the lines for key, value in leverage_thresholds.items(): - ax.axvline(value, label=key, **next(ax._get_lines.prop_cycler)) + ax.axvline(value, label=key, **_get_next_style(ax)) residual_thresholds = {} if residual_rules_of_thumb is not None: @@ -465,7 +480,7 @@ def outlier_plot( name = residual_rule_of_thumb residual_thresholds[name] = threshold for key, value in residual_thresholds.items(): - ax.axhline(value, label=key, **next(ax._get_lines.prop_cycler)) + ax.axhline(value, label=key, **_get_next_style(ax)) if len(leverage_thresholds) > 0 or len(residual_thresholds) > 0: ax.legend() @@ -521,7 +536,7 @@ def component_scatterplot(cp_tensor, mode, x_component=0, y_component=1, ax=None >>> import matplotlib.pyplot as plt >>> cp_tensor = random_cp(shape=(5,10,15), rank=2) >>> component_scatterplot(cp_tensor, mode=0) - + >>> plt.show() Eexample with PCA of a real stock dataset @@ -553,7 +568,7 @@ def component_scatterplot(cp_tensor, mode, x_component=0, y_component=1, ax=None >>> >>> # Visualise the components with components_plot >>> component_scatterplot(cp_tensor, mode=1) - + >>> plt.show() """ if ax is None: @@ -624,7 +639,7 @@ def core_element_plot(cp_tensor, dataset, normalised=False, ax=None): >>> true_cp, X = simulated_random_cp_tensor((10, 20, 30), 3, seed=42) >>> est_cp = parafac(X, 3) >>> core_element_plot(est_cp, X) - + >>> plt.show() """ weights, factors = cp_tensor @@ -1300,7 +1315,7 @@ def percentage_variation_plot( >>> import matplotlib.pyplot as plt >>> cp_tensor, dataset = simulated_random_cp_tensor(shape=(5,10,15), rank=3, noise_level=0.5, seed=0) >>> percentage_variation_plot(cp_tensor) - + >>> plt.show() We can also get the percentage of variation in the data that each component explains @@ -1314,7 +1329,7 @@ def percentage_variation_plot( >>> import matplotlib.pyplot as plt >>> cp_tensor, dataset = simulated_random_cp_tensor(shape=(5,10,15), rank=3, noise_level=0.5, seed=0) >>> percentage_variation_plot(cp_tensor, dataset, method="data") - + >>> plt.show() Or both the variation in the data and in the model @@ -1328,7 +1343,7 @@ def percentage_variation_plot( >>> import matplotlib.pyplot as plt >>> cp_tensor, dataset = simulated_random_cp_tensor(shape=(5,10,15), rank=3, noise_level=0.5, seed=0) >>> percentage_variation_plot(cp_tensor, dataset, method="both") - + >>> plt.show() """ if ax is None: