diff --git a/doc/release_notes.rst b/doc/release_notes.rst index f775b8cd..7738d1aa 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -55,7 +55,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Bug Fixes** * ``add_variables`` / ``add_constraints``: extends 0.7.0's coords-as-truth rule to ``lower``, ``upper`` and ``mask`` for every bound type and dim order. Pandas ``Series`` / ``DataFrame`` bounds or masks missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 `__); the variable's dimension order always follows ``coords`` (`#706 `__); bare-tuple coord entries (``coords=[(0, 1, 2)]``) now behave like lists. Mismatched values or extra dims raise ``ValueError`` with a labelled message; sparse-coord masks (formerly a v0.6.3 ``FutureWarning``, #580) raise ``ValueError``, and masks with dims not in the data raise ``ValueError`` instead of ``AssertionError``. -* Pandas inputs whose index names *levels* of a stacked-``MultiIndex`` ``coords`` dimension are now projected onto that dimension: a level subset broadcasts across the others, the full set aligns element-wise. This fixes PyPSA multi-investment arithmetic (e.g. an expression over a ``(period, timestep)`` ``snapshot`` MultiIndex times a ``period``-indexed weighting). In ``add_variables`` / ``add_constraints`` the input must cover every entry of the MultiIndex or a ``ValueError`` is raised. On the arithmetic path the same projections still work but now emit an ``EvolvingAPIWarning`` when they rely on an *implicit* broadcast (level subset) or NaN-fill (uncovered entries) — the upcoming v1 arithmetic convention will require these to be made explicit (e.g. ``.reindex`` onto the dimension, or a named ``.mul(..., join=...)``). Aligning the full level set with full coverage stays silent. +* Pandas inputs whose index names *levels* of a stacked-``MultiIndex`` ``coords`` dimension are now projected onto that dimension: a level subset broadcasts across the others, the full set aligns element-wise. This fixes PyPSA multi-investment arithmetic (e.g. an expression over a ``(period, timestep)`` ``snapshot`` MultiIndex times a ``period``-indexed weighting). In ``add_variables`` / ``add_constraints`` the input must provide a value for every level combination of the MultiIndex or a ``ValueError`` is raised (the error lists the missing combinations). **Implicit level projections are deprecated**: they emit an ``EvolvingAPIWarning`` everywhere — in arithmetic *and* in ``add_variables`` / ``add_constraints`` — and will raise under the upcoming v1 convention. Project the input onto the dimension explicitly (select with the dimension's level values) to keep current behavior. Aligning the full level set with full coverage stays silent. * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes. * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 `__; pass ``reformulate_sos=True`` as a workaround. * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning. @@ -71,7 +71,7 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y **Internal** -* ``linopy.common.as_dataarray`` is the single broadcasting primitive; strict subset-dim / coord-value checks live in ``validate_alignment`` (via ``align_to_coords`` in ``add_variables`` / ``add_constraints``). When ``coords`` is a mapping, extra keys beyond the positional ``dims`` are broadcast in rather than dropped. +* ``linopy.common`` provides two DataArray conversion helpers: ``as_dataarray`` (convert only) and ``broadcast_to_coords`` (convert and broadcast against ``coords``). The latter takes ``strict`` (default ``True``): any mismatch with ``coords`` raises, naming ``label`` in the error; ``strict=False`` passes mismatches through for downstream xarray alignment. * Each ``Solver`` subclass now overrides at most three hooks: ``_build_direct`` (build the native model), ``_run_direct`` (run it), and ``_run_file`` (run the solver on an LP/MPS file). File-only solvers (CBC, GLPK, CPLEX, SCIP, Knitro, COPT, MindOpt) only override ``_run_file``. * New ``ConstraintLabelIndex`` cached on ``Model.constraints`` (mirrors the existing ``Variables.label_index``); ``ConstraintBase`` gains ``active_labels()`` and a ``range`` property; ``CSRConstraint`` exposes ``coords``. * ``linopy.common`` gains ``values_to_lookup_array``; the legacy pandas-based helpers ``series_to_lookup_array`` and ``lookup_vals`` are removed. diff --git a/linopy/common.py b/linopy/common.py index 235b17c7..2ed979a3 100644 --- a/linopy/common.py +++ b/linopy/common.py @@ -12,7 +12,7 @@ from collections.abc import Callable, Generator, Hashable, Iterable, Mapping, Sequence from functools import cached_property, partial, reduce, wraps from pathlib import Path -from typing import TYPE_CHECKING, Any, Generic, TypeVar, overload +from typing import TYPE_CHECKING, Any, Generic, Literal, NamedTuple, TypeVar, overload from warnings import warn import numpy as np @@ -216,20 +216,37 @@ def numpy_to_dataarray( return DataArray(arr, coords=coords, dims=dims, **kwargs) -def _as_dataarray_lax( +def as_dataarray( arr: Any, coords: CoordsLike | None = None, dims: DimsLike | None = None, **kwargs: Any, ) -> DataArray: """ - Type-dispatched DataArray conversion without any coords validation. + Convert ``arr`` to a DataArray. + + Picks the right constructor for each supported input type (pandas, + polars, numpy, scalar, DataArray) and labels positional axes with + ``dims`` / ``coords``. The result is not reshaped against ``coords``: + dims are neither expanded, reordered, nor projected onto MultiIndex + dims. Use :func:`broadcast_to_coords` when + ``coords`` should govern the result's shape. - This is the conversion primitive used by ``as_dataarray``: it picks the - right constructor for each supported input type but does not check the - result against ``coords``. Callers that need ``coords`` to govern the - output (dim order, shared-dim values, missing-dim expansion) should use - ``as_dataarray`` instead. + Parameters + ---------- + arr + The input to convert. + coords + Coordinate values used to label positional axes. + dims + Dimension names used to label positional axes. + **kwargs + Forwarded to the underlying DataArray construction. + + Returns + ------- + DataArray + The converted input, dims and entries as ``arr`` provides them. """ if isinstance(arr, pd.Series | pd.DataFrame): arr = pandas_to_dataarray(arr, coords=coords, dims=dims, **kwargs) @@ -285,34 +302,38 @@ def _as_multiindex(coord_values: Any) -> pd.MultiIndex | None: return None +class _LevelProjection(NamedTuple): + """ + Record of one MultiIndex-level projection performed by ``_broadcast_to_coords``. + + Terminology: a stacked MultiIndex dim has *levels* (its component index + names, e.g. ``period`` / ``timestep``) and *level combinations* (its + elements — one tuple per position, e.g. ``(2030, 't1')``). + """ + + dim: Hashable + levels: list[Hashable] + is_partial: bool # input carried only a subset of the MI's levels + has_gap: bool # some level combinations of the MI dim got no value (NaN) + missing: list[Any] # the level combinations that got no value + + def _project_onto_multiindex_levels( arr: DataArray, expected: dict[Hashable, Any], - *, - enforce_coverage: bool, -) -> DataArray: +) -> tuple[DataArray, list[_LevelProjection]]: """ - Map ``arr`` dims that are levels of a stacked-MultiIndex coords dim onto it. + Map ``arr`` dims that name levels of a stacked-MultiIndex coords dim onto it. - A dim of ``arr`` that is not itself a coords dim but names a level of a - stacked-MultiIndex coords dim ``D`` is projected onto ``D`` by selecting, - for every entry of ``D``, the ``arr`` value at that entry's level values. - A subset of ``D``'s levels broadcasts across the remaining ones; the full - set aligns element-wise. ``arr`` is returned unchanged when it carries no - such level dims. + For every level combination of the MultiIndex dim, select the ``arr`` + value at that combination's level values. A subset of levels broadcasts + across the remaining ones; the full set aligns element-wise. ``arr`` is + returned unchanged when it carries no level dims. - Raises ``ValueError`` if a level name belongs to more than one MI dim - (ambiguous) or if a referenced level value is missing from ``arr``. When - ``enforce_coverage`` is set, also raises if the projection leaves entries - of ``D`` uncovered (the input did not span the full MultiIndex). - - On the non-enforcing (arithmetic) path, projections that the v1 - arithmetic convention will require the caller to make explicit emit an - :class:`~linopy.EvolvingAPIWarning`: aligning a *subset* of ``D``'s - levels (an implicit broadcast — future §9/§10) and aligning the full - level set when it leaves gaps (an implicit NaN-fill — future §5/§8). - Aligning the full level set with full coverage is convention-clean and - stays silent. + Raises ``ValueError`` only on structural errors: a level name owned by + two MI dims, or a level value missing from ``arr``. Partial projections + and coverage gaps are recorded in the returned ``_LevelProjection`` list; + the caller decides how to treat them. """ level_owner: dict[Hashable, Hashable] = {} owner_mi: dict[Hashable, pd.MultiIndex] = {} @@ -340,6 +361,7 @@ def _project_onto_multiindex_levels( if owner is not None: groups.setdefault(owner, []).append(d) + projections: list[_LevelProjection] = [] for dim, levels in groups.items(): mi = owner_mi[dim] selectors = { @@ -354,92 +376,43 @@ def _project_onto_multiindex_levels( f"{dim!r}: value {err} is missing." ) from err arr = arr.assign_coords(Coordinates.from_pandas_multiindex(mi, dim)) - is_partial = len(levels) < sum(name is not None for name in mi.names) - has_gap = bool(arr.isnull().any()) - if enforce_coverage: - if has_gap: - raise ValueError( - f"Input does not cover every entry of MultiIndex dimension " - f"{dim!r} (aligned from level(s) {levels})." - ) - elif is_partial or has_gap: - kind = ( - f"broadcasting level subset {levels}" - if is_partial - else f"filling uncovered entries with NaN (from level(s) {levels})" - ) - warn( - f"multiindex-projection: implicitly {kind} onto MultiIndex " - f"dimension {dim!r}. The v1 arithmetic convention will require " - f"this to be explicit; reindex onto the dimension or use a " - f"named method with `join=` to keep current behavior.", - EvolvingAPIWarning, - stacklevel=2, + # A level combination is "missing" when the projection gave it no + # value at any position of the other dims. + null_mask = arr.isnull() + other_dims = [d for d in arr.dims if d != dim] + if other_dims: + null_mask = null_mask.any(other_dims) + has_gap = bool(null_mask.any()) + missing = list(arr.indexes[dim][null_mask.values]) if has_gap else [] + projections.append( + _LevelProjection( + dim=dim, + levels=levels, + is_partial=len(levels) < sum(name is not None for name in mi.names), + has_gap=has_gap, + missing=missing, ) + ) - return arr + return arr, projections -def as_dataarray( +def _broadcast_to_coords( arr: Any, coords: CoordsLike | None = None, dims: DimsLike | None = None, - *, - enforce_level_coverage: bool = False, **kwargs: Any, -) -> DataArray: +) -> tuple[DataArray, list[_LevelProjection]]: """ - Convert ``arr`` to a DataArray and broadcast it against ``coords``. + Convert ``arr`` and broadcast it against ``coords`` (shared mechanics). - When ``coords`` carries named dimensions, the result is aligned with - those coords: - - - positional inputs (numpy, polars, unnamed pandas, scalar) are labeled - with the coord dim names by position; - - for every dim shared between ``arr`` and ``coords``, same-values- - different-order coordinates are reindexed to ``coords`` order; - - dims present in ``coords`` but not in ``arr`` are expanded to the - ``coords`` shape; - - dims of ``arr`` that name levels of a stacked-MultiIndex ``coords`` - dim are projected onto that dim (a subset of levels broadcasts, the - full set aligns element-wise); - - the result is transposed to ``coords`` order. - - Dimensions present in ``arr`` but not in ``coords`` are preserved so - standard xarray broadcasting keeps working. Disagreeing coord values - on a shared dim (i.e. value sets that are not equal as sets) are - passed through unchanged: downstream xarray alignment decides how to - combine them. To enforce that ``arr.dims`` ⊆ ``coords.dims`` and that - shared coord values match, use ``validate_alignment`` (called - automatically for ``lower``, ``upper``, and ``mask`` in - :meth:`~linopy.model.Model.add_variables` and for ``mask`` in - :meth:`~linopy.model.Model.add_constraints`). - - Parameters - ---------- - arr - Input scalar / list / numpy / polars / pandas / DataArray. - coords - Mapping of dim name → coord values, or a sequence of ``pd.Index`` - / unnamed sequences. ``None`` falls back to xarray's default - labeling (no broadcasting). - dims - Optional dim-names hint, used for positional inputs and to bias - pandas-axis interpretation. - enforce_level_coverage - When projecting onto a stacked-MultiIndex dim, raise if the input - leaves entries of that dim uncovered. Set by the strict callers - (``add_variables`` / ``add_constraints`` via ``align_to_coords``). - **kwargs - Forwarded to the underlying DataArray construction. - - Returns - ------- - DataArray - Broadcast against ``coords`` (extra dims preserved). + Returns the broadcast DataArray together with the MultiIndex-level + projections performed along the way, so the public entry points can + apply their own policy (warn or raise) to partial projections and + coverage gaps. """ if coords is None: - return _as_dataarray_lax(arr, coords, dims, **kwargs) + return as_dataarray(arr, coords, dims, **kwargs), [] if isinstance(coords, list | tuple) and any(isinstance(c, tuple) for c in coords): # xarray reads bare `(a, b)` as `(dim_name, values)`; normalize so a @@ -448,7 +421,7 @@ def as_dataarray( expected = _coords_to_dict(coords, dims=dims) if not expected: - return _as_dataarray_lax(arr, coords, dims, **kwargs) + return as_dataarray(arr, coords, dims, **kwargs), [] if isinstance(arr, pd.Series | pd.DataFrame): converted = _named_pandas_to_dataarray(arr) @@ -458,7 +431,7 @@ def as_dataarray( if not isinstance(arr, DataArray): # numpy/polars/unnamed-pandas inputs are positional — their only # meaningful information is the values; any axis labels are - # auto-generated. Default dims to coords' keys so the lax conversion + # auto-generated. Default dims to coords' keys so the conversion # labels axes correctly (instead of dim_0/dim_1), then re-assign # coords from expected so positional inputs align to coords by # position. A shape mismatch surfaces here as a clear xarray @@ -466,9 +439,9 @@ def as_dataarray( # "coordinates do not match" further down. if dims is None: dims = list(expected) - arr = _as_dataarray_lax(arr, coords, dims=dims, **kwargs) + arr = as_dataarray(arr, coords, dims=dims, **kwargs) # Skip MultiIndex dims — re-assigning a PandasMultiIndex coord emits - # a FutureWarning and isn't needed (the lax pass already used it). + # a FutureWarning and isn't needed (the conversion already used it). arr = arr.assign_coords( { d: expected[d] @@ -477,9 +450,7 @@ def as_dataarray( } ) - arr = _project_onto_multiindex_levels( - arr, expected, enforce_coverage=enforce_level_coverage - ) + arr, projections = _project_onto_multiindex_levels(arr, expected) for dim, coord_values in expected.items(): if dim not in arr.dims: @@ -491,10 +462,7 @@ def as_dataarray( if actual_idx.equals(expected_idx): continue # Same values, different order → reindex to match expected order. - # Different value sets are left alone: downstream xarray alignment - # (e.g. xr.align in arithmetic) handles them. Callers needing strict - # value matching (add_variables / add_constraints) should use - # ``validate_alignment`` after this call. + # Different value sets are left alone for downstream xarray alignment. if len(actual_idx) == len(expected_idx) and set(actual_idx) == set( expected_idx ): @@ -543,7 +511,159 @@ def as_dataarray( name=arr.name, ) - return arr + return arr, projections + + +@overload +def broadcast_to_coords( + arr: Any, + coords: CoordsLike | None = ..., + dims: DimsLike | None = ..., + *, + strict: Literal[True] = ..., + label: str, + **kwargs: Any, +) -> DataArray: ... + + +@overload +def broadcast_to_coords( + arr: Any, + coords: CoordsLike | None = ..., + dims: DimsLike | None = ..., + *, + strict: Literal[False], + label: None = ..., + **kwargs: Any, +) -> DataArray: ... + + +def broadcast_to_coords( + arr: Any, + coords: CoordsLike | None = None, + dims: DimsLike | None = None, + *, + strict: bool = True, + label: str | None = None, + **kwargs: Any, +) -> DataArray: + """ + Convert ``arr`` to a DataArray and broadcast it against ``coords``. + + When ``coords`` carries named dimensions, the result is aligned with + them: positional inputs are labeled by position, shared dims with equal + values in a different order are reindexed, dims missing from ``arr`` + are expanded, dims naming levels of a stacked-MultiIndex coords dim are + projected onto it, and the result is transposed to ``coords`` order. + + ``strict`` decides what happens to anything broadcasting alone cannot + resolve — extra dims, disagreeing coord values, and MultiIndex coverage + gaps: + + - ``strict=True`` (default): raise, naming ``label`` in the error. + - ``strict=False``: pass through unchanged so downstream xarray + alignment can handle them. + + A stacked-MultiIndex dim of ``coords`` has *levels* (its component + index names, e.g. ``period`` / ``timestep``) and *level combinations* + (its elements — one tuple per position, e.g. ``(2030, 't1')``). Inputs + indexed by levels instead of the dim itself are implicitly projected + onto the dim's level combinations. These projections are deprecated in + both modes and emit an :class:`~linopy.EvolvingAPIWarning`; the v1 + convention will require them to be explicit. Two cases: + + - input misses a whole level → broadcasts across it; warns in both modes. + - input gives some level combinations no value (a *coverage gap*) → + warns under ``strict=False``, raises under ``strict=True`` (the error + lists the missing combinations). + + Parameters + ---------- + arr + The input to convert and broadcast. + coords + Coordinate values the result is broadcast against. ``None`` falls + back to plain conversion. + dims + Dimension names used to label positional axes. + strict + Check that the result stays within ``coords`` (raise on violation) + instead of passing violations through. + label + Name of the input in error messages (e.g. ``"lower bound"``). + Required when ``strict=True``, not accepted otherwise. + **kwargs + Forwarded to the underlying DataArray construction. + + Returns + ------- + DataArray + Broadcast against ``coords``. + """ + if not strict: + da, projections = _broadcast_to_coords(arr, coords, dims, **kwargs) + _warn_implicit_projections(projections) + return da + + if label is None: + raise TypeError( + "broadcast_to_coords(strict=True) requires `label` to name the " + "input in error messages, e.g. label='lower bound'." + ) + subject = label + if coords is not None: + _coords_to_dict(coords, dims=dims) + try: + da, projections = _broadcast_to_coords(arr, coords, dims=dims, **kwargs) + except TypeError as err: + raise TypeError(f"{subject} could not be aligned to coords: {err}") from err + except (ValueError, CoordinateValidationError) as err: + raise ValueError(f"{subject} could not be aligned to coords: {err}") from err + for p in projections: + if p.has_gap: + preview = ", ".join(str(c) for c in p.missing[:5]) + if len(p.missing) > 5: + preview += f", … ({len(p.missing)} in total)" + raise ValueError( + f"{subject} could not be aligned to coords: no value for " + f"{len(p.missing)} level combination(s) of MultiIndex dimension " + f"{p.dim!r}: {preview}. The input is indexed by level(s) " + f"{p.levels} and must cover every combination." + ) + _warn_implicit_projections(projections) + validate_alignment(da, coords, dims=dims, label=label) + return da + + +def _warn_implicit_projections(projections: list[_LevelProjection]) -> None: + """ + Deprecation warnings for implicit MultiIndex-level projections. + + The same check in every mode (scenario B of the #732 / #737 discussion): + implicit projection is deprecated and raises under the v1 convention. The + strict path raises on coverage gaps before reaching here, so only partial + levels warn there; the non-strict path warns for both. + + TODO(#738): migrate to ``warn_legacy()`` / ``LinopySemanticsWarning`` + once the v1 semantics infrastructure (#717) lands. + """ + for p in projections: + if p.is_partial or p.has_gap: + kind = ( + f"broadcasting level subset {p.levels}" + if p.is_partial + else f"filling uncovered level combinations with NaN " + f"(from level(s) {p.levels})" + ) + warn( + f"multiindex-projection: implicitly {kind} onto MultiIndex " + f"dimension {p.dim!r}. This is deprecated and will raise under " + f"the v1 convention; project the input onto the dimension " + f"explicitly (select with the dimension's level values) to " + f"keep current behavior.", + EvolvingAPIWarning, + stacklevel=3, + ) def validate_alignment( @@ -608,37 +728,6 @@ def validate_alignment( ) -def align_to_coords( - value: Any, - coords: CoordsLike | None, - *, - label: str, - dims: DimsLike | None = None, - **kwargs: Any, -) -> DataArray: - """ - Convert ``value`` with :func:`as_dataarray` and enforce the coords contract. - - Used by :meth:`~linopy.model.Model.add_variables` for ``lower``, ``upper``, - and ``mask``, and by :meth:`~linopy.model.Model.add_constraints` for - ``mask``. Raises :class:`ValueError` with a message that names ``label`` - when ``value`` cannot be aligned to ``coords``. Coords-parsing errors - propagate unchanged. - """ - if coords is not None: - _coords_to_dict(coords, dims=dims) - try: - da = as_dataarray( - value, coords, dims=dims, enforce_level_coverage=True, **kwargs - ) - except TypeError as err: - raise TypeError(f"{label} could not be aligned to coords: {err}") from err - except (ValueError, CoordinateValidationError) as err: - raise ValueError(f"{label} could not be aligned to coords: {err}") from err - validate_alignment(da, coords, dims=dims, label=label) - return da - - def _coords_to_dict( coords: Sequence[Sequence | pd.Index] | Mapping, dims: DimsLike | None = None, diff --git a/linopy/expressions.py b/linopy/expressions.py index 7342d22a..673eaba9 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -47,9 +47,9 @@ from linopy.common import ( EmptyDeprecationWrapper, LocIndexer, - _as_dataarray_lax, as_dataarray, assign_multiindex_safe, + broadcast_to_coords, check_common_keys_values, check_has_nulls, check_has_nulls_polars, @@ -583,7 +583,9 @@ def _add_constant( # so that missing data does not silently propagate through arithmetic. if np.isscalar(other) and join is None: return self.assign(const=self.const.fillna(0) + other) - da = as_dataarray(other, coords=self.coords, dims=self.coord_dims) + da = broadcast_to_coords( + other, coords=self.coords, dims=self.coord_dims, strict=False + ) self_const, da, needs_data_reindex = self._align_constant( da, fill_value=0, join=join ) @@ -612,7 +614,9 @@ def _apply_constant_op( - factor (other) is filled with fill_value (0 for mul, 1 for div) - coeffs and const are filled with 0 (additive identity) """ - factor = as_dataarray(other, coords=self.coords, dims=self.coord_dims) + factor = broadcast_to_coords( + other, coords=self.coords, dims=self.coord_dims, strict=False + ) self_const, factor, needs_data_reindex = self._align_constant( factor, fill_value=fill_value, join=join ) @@ -1104,7 +1108,9 @@ def to_constraint( ) if isinstance(rhs, CONSTANT_TYPES): - rhs = as_dataarray(rhs, coords=self.coords, dims=self.coord_dims) + rhs = broadcast_to_coords( + rhs, coords=self.coords, dims=self.coord_dims, strict=False + ) extra_dims = set(rhs.dims) - set(self.coord_dims) if extra_dims: @@ -1687,7 +1693,7 @@ def __matmul__( Matrix multiplication with other, similar to xarray dot. """ if not isinstance(other, LinearExpression | variables.Variable): - other = _as_dataarray_lax(other, coords=self.coords, dims=self.coord_dims) + other = as_dataarray(other, coords=self.coords, dims=self.coord_dims) common_dims = list(set(self.coord_dims).intersection(other.dims)) return (self * other).sum(dim=common_dims) @@ -2173,7 +2179,7 @@ def __matmul__( "Higher order non-linear expressions are not yet supported." ) - other = _as_dataarray_lax(other, coords=self.coords, dims=self.coord_dims) + other = as_dataarray(other, coords=self.coords, dims=self.coord_dims) common_dims = list(set(self.coord_dims).intersection(other.dims)) return (self * other).sum(dim=common_dims) @@ -2291,7 +2297,7 @@ def as_expression( model : linopy.Model, optional Assigned model, by default None **kwargs : - Keyword arguments passed to `linopy.as_dataarray`. + Keyword arguments passed to `linopy.common.broadcast_to_coords`. Returns ------- @@ -2308,7 +2314,7 @@ def as_expression( return obj.to_linexpr() else: try: - obj = as_dataarray(obj, **kwargs) + obj = broadcast_to_coords(obj, strict=False, **kwargs) except ValueError as e: raise ValueError("Cannot convert to LinearExpression") from e return LinearExpression(obj, model) diff --git a/linopy/model.py b/linopy/model.py index e374c101..aa0e5d29 100644 --- a/linopy/model.py +++ b/linopy/model.py @@ -28,10 +28,10 @@ from linopy import solvers from linopy.common import ( - align_to_coords, as_dataarray, assign_multiindex_safe, best_int, + broadcast_to_coords, maybe_replace_signs, replace_by_map, to_path, @@ -774,8 +774,8 @@ def add_variables( "Semi-continuous variables require a positive scalar lower bound." ) - lower_da = align_to_coords(lower, coords, label="lower bound", **kwargs) - upper_da = align_to_coords(upper, coords, label="upper bound", **kwargs) + lower_da = broadcast_to_coords(lower, coords, label="lower bound", **kwargs) + upper_da = broadcast_to_coords(upper, coords, label="upper bound", **kwargs) data = Dataset( { "lower": lower_da, @@ -788,7 +788,7 @@ def add_variables( self._check_valid_dim_names(data) if mask is not None: - mask = align_to_coords( + mask = broadcast_to_coords( mask, coords if coords is not None else data.coords, label="mask", @@ -1057,7 +1057,7 @@ def add_constraints( (data,) = xr.broadcast(data, exclude=[TERM_DIM]) if mask is not None: - mask = align_to_coords(mask, data.coords, label="mask").astype(bool) + mask = broadcast_to_coords(mask, data.coords, label="mask").astype(bool) # Auto-mask based on null expressions or NaN RHS (use numpy for speed) if self.auto_mask: diff --git a/linopy/variables.py b/linopy/variables.py index cbf2fb87..755a3afc 100644 --- a/linopy/variables.py +++ b/linopy/variables.py @@ -37,6 +37,7 @@ VariableLabelIndex, as_dataarray, assign_multiindex_safe, + broadcast_to_coords, check_has_nulls, check_has_nulls_polars, filter_nulls_polars, @@ -327,7 +328,9 @@ def to_linexpr( linopy.LinearExpression Linear expression with the variables and coefficients. """ - coefficient = as_dataarray(coefficient, coords=self.coords, dims=self.dims) + coefficient = broadcast_to_coords( + coefficient, coords=self.coords, dims=self.dims, strict=False + ) coefficient = coefficient.reindex_like(self.labels, fill_value=0) coefficient = coefficient.fillna(0) ds = Dataset({"coeffs": coefficient, "vars": self.labels}).expand_dims( diff --git a/test/test_common.py b/test/test_common.py index 61ae6f2d..86735547 100644 --- a/test/test_common.py +++ b/test/test_common.py @@ -20,10 +20,10 @@ from linopy import EvolvingAPIWarning, LinearExpression, Model, Variable from linopy.common import ( align, - align_to_coords, as_dataarray, assign_multiindex_safe, best_int, + broadcast_to_coords, get_dims_with_index_levels, is_constant, iterate_slices, @@ -349,11 +349,22 @@ def test_as_dataarray_with_ndarray_coords_dict_dims_aligned() -> None: def test_as_dataarray_with_ndarray_coords_dict_set_dims_not_aligned() -> None: - """Coords is source of truth: extra coord entries broadcast into the result.""" + """as_dataarray converts only: dims label the axes, extra coord entries are dropped.""" target_dims = ("dim_0", "dim_1") target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} arr = np.array([[1, 2], [3, 4]]) da = as_dataarray(arr, coords=target_coords, dims=target_dims) + assert da.dims == target_dims + assert list(da.coords["dim_0"].values) == ["a", "b"] + assert "dim_2" not in da.coords + + +def test_broadcast_to_coords_with_ndarray_coords_dict_set_dims_not_aligned() -> None: + """Coords is source of truth: extra coord entries broadcast into the result.""" + target_dims = ("dim_0", "dim_1") + target_coords = {"dim_0": ["a", "b"], "dim_2": ["A", "B"]} + arr = np.array([[1, 2], [3, 4]]) + da = broadcast_to_coords(arr, coords=target_coords, dims=target_dims, strict=False) # dims labels the positional axes; coords adds dim_2 by broadcast. assert set(da.dims) == {"dim_0", "dim_1", "dim_2"} assert list(da.coords["dim_0"].values) == ["a", "b"] @@ -489,7 +500,7 @@ def test_as_dataarray_with_unsupported_type() -> None: as_dataarray(lambda x: 1, dims=["dim1"], coords=[["a"]]) -def test_as_dataarray_preserves_extra_dims_for_broadcasting() -> None: +def test_broadcast_to_coords_preserves_extra_dims() -> None: """Extra dims in the input are not rejected — they broadcast downstream.""" arr = DataArray( [[1, 2], [3, 4], [5, 6]], @@ -497,21 +508,21 @@ def test_as_dataarray_preserves_extra_dims_for_broadcasting() -> None: coords={"a": [0, 1, 2], "t": [10, 20]}, ) coords = {"a": [0, 1, 2]} - da = as_dataarray(arr, coords=coords) + da = broadcast_to_coords(arr, coords=coords, strict=False) assert set(da.dims) == {"a", "t"} assert list(da.coords["t"].values) == [10, 20] -def test_as_dataarray_keeps_disjoint_shared_dim_values() -> None: +def test_broadcast_to_coords_keeps_disjoint_shared_dim_values() -> None: """Different value sets on a shared dim are passed through (xr.align handles).""" arr = DataArray([1, 2, 3, 4, 5], dims=["a"], coords={"a": [0, 1, 2, 3, 4]}) coords = {"a": [2, 3]} - da = as_dataarray(arr, coords=coords) + da = broadcast_to_coords(arr, coords=coords, strict=False) # No exception, no reindex; downstream alignment intersects. assert list(da.coords["a"].values) == [0, 1, 2, 3, 4] -def test_as_dataarray_expands_missing_multiindex_dim_keeps_levels() -> None: +def test_broadcast_to_coords_expands_missing_multiindex_dim_keeps_levels() -> None: """ Broadcasting a missing MultiIndex dim must keep its level coords intact. @@ -527,16 +538,17 @@ def test_as_dataarray_expands_missing_multiindex_dim_keeps_levels() -> None: labels = DataArray( [[1], [2], [3], [4]], coords={**sc, "name": ["1"]}, dims=["snapshot", "name"] ) - coeff = as_dataarray( + coeff = broadcast_to_coords( DataArray([1.0], coords={"name": ["1"]}, dims=["name"]), coords=labels.coords, dims=labels.dims, + strict=False, ) assert set(coeff.xindexes) == {"snapshot", "period", "timestep", "name"} coeff.reindex_like(labels, fill_value=0) -def test_as_dataarray_broadcasts_single_multiindex_level() -> None: +def test_broadcast_to_coords_broadcasts_single_multiindex_level() -> None: """ A constant indexed by one MultiIndex level broadcasts across the MI dim. @@ -550,7 +562,7 @@ def test_as_dataarray_broadcasts_single_multiindex_level() -> None: by_level1 = DataArray([10.0, 20.0], coords={"level1": [1, 2]}, dims=["level1"]) with pytest.warns(EvolvingAPIWarning, match=r"broadcasting level subset"): - da = as_dataarray(by_level1, coords=coords, dims=["dim_3"]) + da = broadcast_to_coords(by_level1, coords=coords, dims=["dim_3"], strict=False) assert da.dims == ("dim_3",) assert isinstance(da.indexes["dim_3"], pd.MultiIndex) @@ -560,13 +572,13 @@ def test_as_dataarray_broadcasts_single_multiindex_level() -> None: assert da.sel(dim_3=(2, "b")).item() == 20.0 -def test_as_dataarray_stacks_full_multiindex_levels() -> None: +def test_broadcast_to_coords_stacks_full_multiindex_levels() -> None: """ A constant indexed by all MI level names stacks element-wise into the MI dim. PyPSA's storage_weightings is a pandas Series over a (period, timestep) MultiIndex subset (the last snapshot of each period); it must align onto - the matching entries of the 'snapshot' MultiIndex. Entries the subset does + the matching level combinations of the 'snapshot' MultiIndex. Combinations the subset does not cover are left as NaN (broadcast path). """ idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) @@ -575,8 +587,10 @@ def test_as_dataarray_stacks_full_multiindex_levels() -> None: subset = pd.MultiIndex.from_tuples([(1, "a"), (2, "b")], names=["level1", "level2"]) weights = pd.Series([10.0, 20.0], index=subset) - with pytest.warns(EvolvingAPIWarning, match=r"filling uncovered entries with NaN"): - da = as_dataarray(weights, coords=coords, dims=["dim_3"]) + with pytest.warns( + EvolvingAPIWarning, match=r"filling uncovered level combinations" + ): + da = broadcast_to_coords(weights, coords=coords, dims=["dim_3"], strict=False) assert da.dims == ("dim_3",) assert isinstance(da.indexes["dim_3"], pd.MultiIndex) @@ -586,7 +600,7 @@ def test_as_dataarray_stacks_full_multiindex_levels() -> None: assert np.isnan(da.sel(dim_3=(2, "a")).item()) -def test_as_dataarray_full_multiindex_full_coverage_is_silent() -> None: +def test_broadcast_to_coords_full_multiindex_full_coverage_is_silent() -> None: """ Full-level, fully-covering alignment is convention-clean → no warning. @@ -601,13 +615,13 @@ def test_as_dataarray_full_multiindex_full_coverage_is_silent() -> None: with warnings.catch_warnings(): warnings.simplefilter("error", EvolvingAPIWarning) - da = as_dataarray(full, coords=coords, dims=["dim_3"]) + da = broadcast_to_coords(full, coords=coords, dims=["dim_3"], strict=False) assert da.dims == ("dim_3",) assert da.values.tolist() == [1.0, 2.0, 3.0, 4.0] -def test_as_dataarray_level_projection_ambiguous_raises() -> None: +def test_broadcast_to_coords_level_projection_ambiguous_raises() -> None: """A level name shared by two MI dims cannot be resolved.""" a = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("shared", "x")) b = pd.MultiIndex.from_product([[1, 2], ["c", "d"]], names=("shared", "y")) @@ -618,10 +632,10 @@ def test_as_dataarray_level_projection_ambiguous_raises() -> None: arr = DataArray([1.0, 2.0], coords={"shared": [1, 2]}, dims=["shared"]) with pytest.raises(ValueError, match=r"shared.*shared by MultiIndex"): - as_dataarray(arr, coords=coords) + broadcast_to_coords(arr, coords=coords, strict=False) -def test_as_dataarray_level_projection_missing_value_raises() -> None: +def test_broadcast_to_coords_level_projection_missing_value_raises() -> None: """A level value absent from the input cannot be broadcast.""" idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) idx.name = "dim_3" @@ -629,19 +643,87 @@ def test_as_dataarray_level_projection_missing_value_raises() -> None: by_level1 = DataArray([10.0, 20.0], coords={"level1": [1, 9]}, dims=["level1"]) with pytest.raises(ValueError, match=r"Cannot align level.*is missing"): - as_dataarray(by_level1, coords=coords, dims=["dim_3"]) + broadcast_to_coords(by_level1, coords=coords, dims=["dim_3"], strict=False) -def test_as_dataarray_unrelated_multiindex_series_still_unstacks() -> None: +def test_broadcast_to_coords_unrelated_multiindex_series_still_unstacks() -> None: """A MI Series whose levels match no coords MI dim keeps unstacking.""" sub = pd.MultiIndex.from_product([["p", "q"], [1, 2]], names=["foo", "bar"]) series = pd.Series([1.0, 2.0, 3.0, 4.0], index=sub) - da = as_dataarray(series, coords={"time": [0, 1, 2]}) + da = broadcast_to_coords(series, coords={"time": [0, 1, 2]}, strict=False) assert set(da.dims) == {"time", "foo", "bar"} +# --------------------------------------------------------------------------- +# Strictness: as_dataarray (convert) ⊂ broadcast_to_coords(strict=False) ⊂ broadcast_to_coords(strict=True) +# --------------------------------------------------------------------------- + + +def test_as_dataarray_does_not_expand_missing_coord_dims() -> None: + """as_dataarray converts; only broadcast_to_coords expands missing dims.""" + coords = {"a": [0, 1], "b": [10, 20]} + arr = np.array([1, 2]) + + converted = as_dataarray(arr, coords=coords, dims=["a"]) + assert converted.dims == ("a",) + + broadcast = broadcast_to_coords(arr, coords=coords, dims=["a"], strict=False) + assert broadcast.dims == ("a", "b") + + +def test_extra_dims_pass_broadcast_rung_fail_strict_rung() -> None: + """Extra dims pass through the broadcast rung but fail the strict rung.""" + arr = DataArray( + [[1, 2], [3, 4]], dims=["a", "t"], coords={"a": [0, 1], "t": [10, 20]} + ) + coords = {"a": [0, 1]} + + da = broadcast_to_coords(arr, coords=coords, strict=False) + assert set(da.dims) == {"a", "t"} + + with pytest.raises(ValueError, match=r"not declared in coords"): + broadcast_to_coords(arr, coords, label="lower bound") + + +def test_broadcast_to_coords_rejects_multiindex_coverage_gap() -> None: + """A coverage gap warns on the broadcast rung but raises on the strict rung.""" + idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) + idx.name = "dim_3" + coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") + subset = pd.MultiIndex.from_tuples([(1, "a"), (2, "b")], names=["level1", "level2"]) + weights = pd.Series([10.0, 20.0], index=subset) + + with pytest.warns( + EvolvingAPIWarning, match=r"filling uncovered level combinations" + ): + broadcast_to_coords(weights, coords=coords, dims=["dim_3"], strict=False) + + with pytest.raises(ValueError, match=r"no value for .* level combination"): + broadcast_to_coords(weights, coords, dims=["dim_3"], label="lower bound") + + +def test_broadcast_to_coords_strict_partial_level_warns() -> None: + """ + Per-level bounds broadcast across the MI dim, with the deprecation warning. + + Scenario B (#732 / #737 discussion): implicit MI-level projection is + deprecated everywhere, including the strict (bounds/mask) path, and will + raise under the v1 convention. + """ + idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) + idx.name = "dim_3" + coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") + by_level1 = DataArray([10.0, 20.0], coords={"level1": [1, 2]}, dims=["level1"]) + + with pytest.warns(EvolvingAPIWarning, match=r"broadcasting level subset"): + da = broadcast_to_coords(by_level1, coords, dims=["dim_3"], label="lower bound") + + assert da.sel(dim_3=(1, "b")).item() == 10.0 + assert da.sel(dim_3=(2, "a")).item() == 20.0 + + def test_validate_alignment_rejects_extra_dims() -> None: arr = DataArray( [[1, 2], [3, 4]], dims=["a", "b"], coords={"a": [0, 1], "b": [0, 1]} @@ -682,22 +764,28 @@ def test_validate_alignment_label_in_error() -> None: validate_alignment(arr, {"a": [0, 1]}, label="lower bound") -def test_align_to_coords_wraps_conversion_errors() -> None: +def test_broadcast_to_coords_strict_requires_label() -> None: + """strict=True without label raises: errors must name their subject.""" + with pytest.raises(TypeError, match=r"requires `label`"): + broadcast_to_coords(np.array([1, 2]), {"x": [0, 1]}) # type: ignore[call-overload] + + +def test_broadcast_to_coords_wraps_conversion_errors() -> None: with pytest.raises(ValueError, match=r"lower bound could not be aligned"): - align_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") + broadcast_to_coords(np.array([1, 2]), {"x": [0, 1, 2]}, label="lower bound") -def test_align_to_coords_preserves_type_errors() -> None: +def test_broadcast_to_coords_preserves_type_errors() -> None: """Unsupported input types stay TypeError (don't become ValueError).""" with pytest.raises(TypeError, match=r"lower bound could not be aligned"): - align_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") + broadcast_to_coords(lambda x: x, {"x": [0, 1, 2]}, label="lower bound") -def test_align_to_coords_does_not_relabel_coords_errors() -> None: +def test_broadcast_to_coords_does_not_relabel_coords_errors() -> None: """Coords-side TypeError carries its own message, not the value label.""" mi = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["i", "j"]) with pytest.raises(TypeError, match=r"MultiIndex.*must have \.name set"): - align_to_coords(np.array([1, 2, 3, 4]), [mi], label="lower bound") + broadcast_to_coords(np.array([1, 2, 3, 4]), [mi], label="lower bound") class TestCoordsToDictRules: diff --git a/test/test_constraint.py b/test/test_constraint.py index a1b33d66..d3581de9 100644 --- a/test/test_constraint.py +++ b/test/test_constraint.py @@ -453,6 +453,45 @@ def test_constraint_rhs_setter_with_expression_and_constant( assert mc.lhs.nterm == 2 +def test_constraint_rhs_setter_broadcasts_missing_dim() -> None: + """Rhs assignment broadcasts against the constraint coords: missing dims expand.""" + m = Model() + x = m.add_variables( + coords=[pd.RangeIndex(2, name="i"), pd.RangeIndex(3, name="j")], name="x" + ) + con = m.add_constraints(1 * x >= 0, name="con") + + con.rhs = xr.DataArray([1.0, 2.0], dims=["i"], coords={"i": [0, 1]}) # type: ignore + + assert dict(con.rhs.sizes) == {"i": 2, "j": 3} + assert (con.rhs.sel(i=1) == 2.0).all() + + +def test_constraint_rhs_setter_projects_multiindex_level() -> None: + """ + Rhs indexed by one MultiIndex level is projected onto the stacked dim. + + Regression: as_expression must convert constants with the broadcast rung + (broadcast_to_coords), not plain conversion — otherwise the level dim + collides with the MI level coord downstream (xarray AlignmentError). + """ + idx = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=("level1", "level2")) + idx.name = "dim_3" + coords = xr.Coordinates.from_pandas_multiindex(idx, "dim_3") + m = Model() + x = m.add_variables(coords=coords, name="x") + con = m.add_constraints(1 * x >= 0, name="con") + + rhs_by_level = xr.DataArray( + [10.0, 20.0], coords={"level1": [1, 2]}, dims=["level1"] + ) + with pytest.warns(linopy.EvolvingAPIWarning, match="broadcasting level subset"): + con.rhs = rhs_by_level # type: ignore + + assert con.rhs.sel(dim_3=(1, "b")).item() == 10.0 + assert con.rhs.sel(dim_3=(2, "a")).item() == 20.0 + + def test_constraint_labels_setter_invalid(c: linopy.constraints.CSRConstraint) -> None: # Test that assigning labels raises AttributeError (Constraint is frozen) with pytest.raises(AttributeError): diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index 1ea20b00..82aba70e 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -546,6 +546,47 @@ def test_matmul_expr_and_const(x: Variable, y: Variable) -> None: assert_linequal(expr.dot(const), target) +def test_matmul_contracts_only_shared_dims(z: Variable) -> None: + """ + A @ b contracts the genuinely shared dims and keeps the rest. + + ``z`` has dims (dim_0, dim_1); ``b`` has (dim_1, location). Only dim_1 + is shared, so the result must keep dim_0 and location. A conversion that + broadcast ``b`` to ``z``'s coords would expand dim_0 into ``b`` and + contract it away too — collapsing the result to (location,) only. + """ + expr = 1 * z + b = xr.DataArray( + np.ones((3, 2)), + coords={"dim_1": expr.data.indexes["dim_1"], "location": ["L1", "L2"]}, + dims=["dim_1", "location"], + ) + + res = expr @ b + + assert set(res.coord_dims) == {"dim_0", "location"} + assert_linequal(res, (expr * b).sum("dim_1")) + + +def test_matmul_contracts_all_dims_when_const_covers_them(z: Variable) -> None: + """B covering all of a's dims (and more) contracts a's dims, keeping b's extras.""" + expr = 1 * z # dims (dim_0, dim_1) + b = xr.DataArray( + np.ones((2, 3, 2)), + coords={ + "dim_0": expr.data.indexes["dim_0"], + "dim_1": expr.data.indexes["dim_1"], + "location": ["L1", "L2"], + }, + dims=["dim_0", "dim_1", "location"], + ) + + res = expr @ b + + assert set(res.coord_dims) == {"location"} + assert_linequal(res, (expr * b).sum(["dim_0", "dim_1"])) + + def test_matmul_wrong_input(x: Variable, y: Variable, z: Variable) -> None: expr = 10 * x + y + z with pytest.raises(TypeError): diff --git a/test/test_variable.py b/test/test_variable.py index d3629c0e..c5e315bd 100644 --- a/test/test_variable.py +++ b/test/test_variable.py @@ -866,7 +866,12 @@ def test_single_level_bound_broadcasts( self, model: "Model", midx: pd.MultiIndex ) -> None: bound = DataArray([5, 6], dims=["l1"], coords={"l1": [0, 1]}) - var = model.add_variables(upper=bound, coords=[midx], name="x") + # Implicit level projection is deprecated (scenario B) — warns until + # the v1 convention makes it an error. + with pytest.warns( + linopy.EvolvingAPIWarning, match=r"broadcasting level subset" + ): + var = model.add_variables(upper=bound, coords=[midx], name="x") assert var.dims == ("multi",) assert (var.data.upper == [5, 5, 6, 6]).all() @@ -875,5 +880,5 @@ def test_incomplete_level_bound_raises( ) -> None: subset = pd.MultiIndex.from_tuples([(0, "a"), (1, "b")], names=("l1", "l2")) bound = pd.Series([1, 2], index=subset) - with pytest.raises(ValueError, match="does not cover every entry"): + with pytest.raises(ValueError, match="no value for .* level combination"): model.add_variables(upper=bound, coords=[midx], name="x")