feat(aggregation): Add FairGrad (#688)

ValerianRey · web-flow · commit cb1c680d3ca0 · 2026-05-20T21:10:19.000+02:00
* Add `FairGrad`
* Add `FairGradWeighting`
* Add a `fairgrad` optional dependency group (`pip install
"torchjd[fairgrad]"`) backed by `scipy`
* Add changelog entry
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,11 @@ changelog does not include internal changes that do not affect the user.
 
 ## [Unreleased]
 
+### Added
+
+- Added `FairGrad` and `FairGradWeighting` from [Fair Resource Allocation in Multi-Task
+  Learning](https://arxiv.org/pdf/2402.15638).
+
 ### Changed
 
 - **BREAKING**: Removed `numpy`, `quadprog` and `qpsolvers` from the main dependencies of `torchjd`,
diff --git a/NOTICES b/NOTICES
@@ -59,6 +59,34 @@ SOFTWARE.
 
 -------------------------------------------------------------------------------
 
+Project: fairgrad
+Source:  https://github.com/OptMN-Lab/fairgrad
+Used in: src/torchjd/aggregation/_fairgrad.py
+
+MIT License
+
+Copyright (c) 2024 OptMN-Lab
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+-------------------------------------------------------------------------------
+
 Project: ConFIG
 Source:  https://github.com/tum-pbs/ConFIG/tree/main/conflictfree
 Used in: src/torchjd/aggregation/_config.py
diff --git a/docs/source/docs/aggregation/fairgrad.rst b/docs/source/docs/aggregation/fairgrad.rst
@@ -0,0 +1,10 @@
+:hide-toc:
+
+FairGrad
+========
+
+.. autoclass:: torchjd.aggregation.FairGrad
+    :members: __call__
+
+.. autoclass:: torchjd.aggregation.FairGradWeighting
+    :members: __call__
diff --git a/docs/source/docs/aggregation/index.rst b/docs/source/docs/aggregation/index.rst
@@ -37,6 +37,7 @@ Abstract base classes
     constant.rst
     cr_mogm.rst
     dualproj.rst
+    fairgrad.rst
     flattening.rst
     graddrop.rst
     gradvac.rst
diff --git a/pyproject.toml b/pyproject.toml
@@ -119,12 +119,17 @@ cagrad = [
     "numpy>=1.21.2",  # Does not work before 1.21. No python 3.10 wheel before 1.21.2.
     "cvxpy>=1.3.0",  # No Clarabel solver before 1.3.0
 ]
+fairgrad = [
+    "numpy>=1.21.2",  # Does not work before 1.21. No python 3.10 wheel before 1.21.2.
+    "scipy",
+]
 full = [
     "numpy>=1.21.2",  # Does not work before 1.21. No python 3.10 wheel before 1.21.2.
     "quadprog>=0.1.9, != 0.1.10",  # Doesn't work before 0.1.9, 0.1.10 is yanked
     "qpsolvers>=1.0.1",  # Does not work before 1.0.1
     "cvxpy>=1.3.0",  # No Clarabel solver before 1.3.0
     "ecos>=2.0.14",  # Does not work before 2.0.14
+    "scipy",
 ]
 
 [tool.pytest.ini_options]
diff --git a/src/torchjd/aggregation/__init__.py b/src/torchjd/aggregation/__init__.py
@@ -67,6 +67,7 @@
 from ._constant import Constant, ConstantWeighting
 from ._cr_mogm import CRMOGMWeighting
 from ._dualproj import DualProj, DualProjWeighting
+from ._fairgrad import FairGrad, FairGradWeighting
 from ._flattening import Flattening
 from ._graddrop import GradDrop
 from ._gradvac import GradVac, GradVacWeighting
@@ -95,6 +96,8 @@
     "CRMOGMWeighting",
     "DualProj",
     "DualProjWeighting",
+    "FairGrad",
+    "FairGradWeighting",
     "Flattening",
     "GeneralizedWeighting",
     "GradDrop",
diff --git a/src/torchjd/aggregation/_fairgrad.py b/src/torchjd/aggregation/_fairgrad.py
@@ -0,0 +1,123 @@
+# Partly adapted from https://github.com/OptMN-Lab/fairgrad/blob/main/methods/weight_methods.py#L811-L825 — MIT License, Copyright (c) 2024 OptMN-Lab.
+# See NOTICES for the full license text.
+
+from __future__ import annotations
+
+import contextlib
+
+import torch
+from torch import Tensor
+
+from torchjd._mixins import _WithOptionalDeps
+from torchjd.linalg import PSDMatrix
+
+from ._aggregator_bases import GramianWeightedAggregator
+from ._mixins import _NonDifferentiable
+from ._weighting_bases import _GramianWeighting
+
+with contextlib.suppress(ImportError):
+    import numpy as np
+    from scipy.optimize import least_squares
+
+
+# Non-differentiable: the scipy solver operates on numpy arrays, breaking the autograd graph.
+class FairGradWeighting(_WithOptionalDeps, _NonDifferentiable, _GramianWeighting):
+    r"""
+    :class:`~torchjd.aggregation.Weighting` [:class:`~torchjd.linalg.PSDMatrix`] giving the
+    weights of :class:`~torchjd.aggregation.FairGrad`, as defined in Equation 4 of `Fair Resource
+    Allocation in Multi-Task Learning <https://arxiv.org/pdf/2402.15638>`_.
+
+    :param alpha: The parameter controlling the type of fairness in the alpha-fairness
+        formulation.
+    :param max_iters: The maximum number of iterations of the optimization loop. If set to None,
+        the default value of ``scipy.optimize.least_squares`` (``100 * m``) will be used.
+
+    .. note::
+        This implementation was adapted from the `official implementation
+        <https://github.com/OptMN-Lab/fairgrad/blob/main/methods/weight_methods.py#L811-L825>`_.
+
+    .. note::
+        This aggregator requires optional dependencies. When they are not installed, instantiating
+        it raises an :class:`ImportError` with installation instructions.
+        To install them, use ``pip install "torchjd[fairgrad]"``.
+    """
+
+    _REQUIRED_DEPS = ["numpy", "scipy"]
+    _INSTALL_HINT = 'Install it with: pip install "torchjd[fairgrad]"'
+
+    def __init__(self, alpha: float, max_iters: int | None = None) -> None:
+        super().__init__()
+        self.alpha = alpha
+        self.max_iters = max_iters
+
+    def forward(self, gramian: PSDMatrix, /) -> Tensor:
+        m = gramian.shape[0]
+        uniform = np.ones(m) / m
+
+        if self.alpha == 0:
+            # When alpha=0, the alpha-fairness formulation reduces to linear scalarization with
+            # uniform weights (see Section 3 of https://arxiv.org/pdf/2402.15638).
+            weight_array = uniform
+        else:
+            gramian_array = gramian.detach().cpu().numpy()
+
+            def objective(x: np.ndarray) -> np.ndarray:
+                return np.dot(gramian_array, x) - np.power(x, -1.0 / self.alpha)
+
+            res = least_squares(objective, uniform, bounds=(0, np.inf), max_nfev=self.max_iters)
+            weight_array = res.x
+
+        return torch.tensor(weight_array).to(device=gramian.device, dtype=gramian.dtype)
+
+    @property
+    def alpha(self) -> float:
+        return self._alpha
+
+    @alpha.setter
+    def alpha(self, value: float) -> None:
+        self._alpha = value
+
+
+class FairGrad(_NonDifferentiable, GramianWeightedAggregator):
+    r"""
+    :class:`~torchjd.aggregation.GramianWeightedAggregator` using the step decision of Algorithm 1
+    of `Fair Resource Allocation in Multi-Task Learning
+    <https://arxiv.org/pdf/2402.15638.pdf>`_.
+
+    :param alpha: The parameter controlling the type of fairness in the alpha-fairness
+        formulation.
+    :param max_iters: The maximum number of iterations of the optimization loop. If set to None,
+        the default value of ``scipy.optimize.least_squares`` (``100 * m``) will be used.
+
+    .. note::
+        This aggregator requires optional dependencies. When they are not installed, instantiating
+        it raises an :class:`ImportError` with installation instructions.
+        To install them, use ``pip install "torchjd[fairgrad]"``.
+    """
+
+    gramian_weighting: FairGradWeighting
+
+    def __init__(self, alpha: float, max_iters: int | None = None) -> None:
+        super().__init__(FairGradWeighting(alpha=alpha, max_iters=max_iters))
+
+    @property
+    def alpha(self) -> float:
+        return self.gramian_weighting.alpha
+
+    @alpha.setter
+    def alpha(self, value: float) -> None:
+        self.gramian_weighting.alpha = value
+
+    @property
+    def max_iters(self) -> int | None:
+        return self.gramian_weighting.max_iters
+
+    @max_iters.setter
+    def max_iters(self, value: int | None) -> None:
+        self.gramian_weighting.max_iters = value
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(alpha={self.alpha}, max_iters={self.max_iters})"
+
+    def __str__(self) -> str:
+        return f"{self.alpha}-FairGrad"
diff --git a/tests/plots/interactive_plotter.py b/tests/plots/interactive_plotter.py
@@ -20,6 +20,7 @@
     CAGrad,
     ConFIG,
     DualProj,
+    FairGrad,
     GradDrop,
     GradVac,
     Mean,
@@ -63,6 +64,7 @@ def main() -> None:
         str(CAGrad(c=0.5)): lambda: CAGrad(c=0.5),
         str(ConFIG()): lambda: ConFIG(),
         str(DualProj()): lambda: DualProj(projector=QuadprogProjector(reg_eps=1e-7)),
+        str(FairGrad(alpha=1.0)): lambda: FairGrad(alpha=1.0),
         str(GradDrop()): lambda: GradDrop(),
         str(GradVac()): lambda: GradVac(),
         str(IMTLG()): lambda: IMTLG(),
diff --git a/tests/unit/aggregation/test_fairgrad.py b/tests/unit/aggregation/test_fairgrad.py
@@ -0,0 +1,63 @@
+from utils.optional_deps import skip_if_deps_not_installed
+
+from torchjd.aggregation import FairGrad, FairGradWeighting, Mean
+
+skip_if_deps_not_installed(FairGradWeighting)
+
+from pytest import mark
+from torch import Tensor
+from utils.tensors import ones_
+
+from ._asserts import assert_expected_structure, assert_non_conflicting, assert_non_differentiable
+from ._inputs import scaled_matrices, typical_matrices
+
+# max_iters=1 is enough to produce a finite output for structure tests.
+scaled_pairs = [(FairGrad(alpha=1.0, max_iters=1), matrix) for matrix in scaled_matrices]
+typical_pairs = [(FairGrad(alpha=1.0, max_iters=1), matrix) for matrix in typical_matrices]
+requires_grad_pairs = [(FairGrad(alpha=1.0, max_iters=1), ones_(3, 5, requires_grad=True))]
+# max_iters=100 is sufficient for convergence on the base matrices.
+non_conflicting_pairs = [
+    (FairGrad(alpha=0.1, max_iters=100), matrix) for matrix in typical_matrices
+]
+
+
+@mark.parametrize(["aggregator", "matrix"], scaled_pairs + typical_pairs)
+def test_expected_structure(aggregator: FairGrad, matrix: Tensor) -> None:
+    assert_expected_structure(aggregator, matrix)
+
+
+@mark.parametrize(["aggregator", "matrix"], requires_grad_pairs)
+def test_non_differentiable(aggregator: FairGrad, matrix: Tensor) -> None:
+    assert_non_differentiable(aggregator, matrix)
+
+
+@mark.parametrize(["aggregator", "matrix"], non_conflicting_pairs)
+def test_non_conflicting(aggregator: FairGrad, matrix: Tensor) -> None:
+    assert_non_conflicting(aggregator, matrix)
+
+
+def test_representations() -> None:
+    A = FairGrad(alpha=0.1, max_iters=None)
+    assert repr(A) == "FairGrad(alpha=0.1, max_iters=None)"
+    assert str(A) == "0.1-FairGrad"
+
+
+def test_alpha_setter_updates_value() -> None:
+    A = FairGrad(alpha=1.0)
+    A.alpha = 2.0
+    assert A.alpha == 2.0
+    assert A.gramian_weighting.alpha == 2.0
+
+
+def test_max_iters_setter_updates_value() -> None:
+    A = FairGrad(alpha=1.0)
+    A.max_iters = 50
+    assert A.max_iters == 50
+    assert A.gramian_weighting.max_iters == 50
+
+
+def test_alpha_zero_gives_uniform_weights() -> None:
+    aggregator = FairGrad(alpha=0.0)
+    mean = Mean()
+    for matrix in typical_matrices:
+        assert aggregator(matrix).allclose(mean(matrix))
diff --git a/tests/unit/aggregation/test_values.py b/tests/unit/aggregation/test_values.py
@@ -3,6 +3,7 @@
 from torch.testing import assert_close
 from utils.optional_deps import (
     IS_CAGRAD_AVAILABLE,
+    IS_FAIRGRAD_AVAILABLE,
     IS_NASH_MTL_AVAILABLE,
     IS_QUADPROG_PROJ_AVAILABLE,
 )
@@ -20,6 +21,8 @@
     ConstantWeighting,
     DualProj,
     DualProjWeighting,
+    FairGrad,
+    FairGradWeighting,
     GradDrop,
     GradVac,
     GradVacWeighting,
@@ -93,13 +96,20 @@
     (SumWeighting(), G_base, tensor([1.0, 1.0])),
 ]
 
-
 if IS_QUADPROG_PROJ_AVAILABLE:
     AGGREGATOR_PARAMETRIZATIONS.append((DualProj(), J_base, tensor([0.5563, 1.1109, 1.1109])))
     AGGREGATOR_PARAMETRIZATIONS.append((UPGrad(), J_base, tensor([0.2929, 1.9004, 1.9004])))
     WEIGHTING_PARAMETRIZATIONS.append((DualProjWeighting(), G_base, tensor([0.6109, 0.5000])))
     WEIGHTING_PARAMETRIZATIONS.append((UPGradWeighting(), G_base, tensor([1.1109, 0.7894])))
 
+if IS_FAIRGRAD_AVAILABLE:
+    AGGREGATOR_PARAMETRIZATIONS.append(
+        (FairGrad(alpha=1.0), J_base, tensor([0.0766, 0.9985, 0.9985]))
+    )
+    WEIGHTING_PARAMETRIZATIONS.append(
+        (FairGradWeighting(alpha=1.0), G_base, tensor([0.5915, 0.4071]))
+    )
+
 if IS_CAGRAD_AVAILABLE:
     AGGREGATOR_PARAMETRIZATIONS.append((CAGrad(c=0.5), J_base, tensor([0.1835, 1.2041, 1.2041])))
     WEIGHTING_PARAMETRIZATIONS.append((CAGradWeighting(c=0.5), G_base, tensor([0.7041, 0.5000])))
diff --git a/tests/utils/optional_deps.py b/tests/utils/optional_deps.py
@@ -6,6 +6,7 @@
 from torchjd.aggregation import (
     IMTLG,
     CAGradWeighting,
+    FairGradWeighting,
     GramianWeightedAggregator,
     IMTLGWeighting,
     UPGrad,
@@ -23,6 +24,7 @@ def deps_are_installed(cls: type[_WithOptionalDeps]) -> bool:
 
 
 IS_QUADPROG_PROJ_AVAILABLE = deps_are_installed(QuadprogProjector)
+IS_FAIRGRAD_AVAILABLE = deps_are_installed(FairGradWeighting)
 IS_CAGRAD_AVAILABLE = deps_are_installed(CAGradWeighting)
 IS_NASH_MTL_AVAILABLE = deps_are_installed(_NashMTLWeighting)