From deaa834f418e1829a11f2b25511099e1b8893253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Val=C3=A9rian=20Rey?= Date: Wed, 1 Jul 2026 13:26:49 +0200 Subject: [PATCH] docs: Replace OpenReview links with open-access alternatives Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 4 ++-- README.md | 2 +- src/torchjd/aggregation/_gradvac.py | 2 +- src/torchjd/scalarization/_imtl_l.py | 2 +- src/torchjd/scalarization/_stch.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4c835c6d..ccdb521c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,7 +59,7 @@ changelog does not include internal changes that do not affect the user. ### Added - Added `IMTL-L` (the loss-balancing variant of Impartial Multi-Task Learning) from [Towards - Impartial Multi-Task Learning](https://openreview.net/pdf?id=IMPnRXEWpvr) (ICLR 2021), a stateful + Impartial Multi-Task Learning](https://www.semanticscholar.org/paper/Towards-Impartial-Multi-task-Learning-Liu-Li/45c0828baec1dd53b81f1b2635788fdf27d0792d) (ICLR 2021), a stateful `Scalarizer` that learns a per-task scale `s_i` and combines the values as `Σ (exp(s_i) · L_i − s_i)`. - Added `UW` (Uncertainty Weighting) from [Multi-Task Learning Using Uncertainty to Weigh Losses @@ -74,7 +74,7 @@ changelog does not include internal changes that do not affect the user. ### Added - Added `STCH` from [Smooth Tchebycheff Scalarization for Multi-Objective - Optimization](https://openreview.net/pdf?id=m4dO5L6eCp), a `Scalarizer` that combines the input + Optimization](https://arxiv.org/abs/2402.19078), a `Scalarizer` that combines the input tensor of values into a smooth approximation of their (weighted, shifted) maximum. - Added `MoDoWeighting` from [Three-Way Trade-Off in Multi-Objective Learning: Optimization, Generalization and Conflict-Avoidance](https://www.jmlr.org/papers/volume25/23-1287/23-1287.pdf) (JMLR 2024). It is a stateful `Weighting` that maintains task weights across calls via a simplex-projected gradient step on a cross-batch matrix `G = J_1 @ J_2.T`, computed from two independent mini-batches using `autojac.jac`. - Added `GeometricMean` (also known as GLS) studied in [MultiNet++: Multi-Stream Feature diff --git a/README.md b/README.md index b794a3497..b8189307b 100644 --- a/README.md +++ b/README.md @@ -235,7 +235,7 @@ TorchJD provides many existing aggregators from the literature, listed in the fo | [FairGrad](https://torchjd.org/stable/docs/aggregation/fairgrad#torchjd.aggregation.FairGrad) | [FairGradWeighting](https://torchjd.org/stable/docs/aggregation/fairgrad#torchjd.aggregation.FairGradWeighting) | [Fair Resource Allocation in Multi-Task Learning](https://arxiv.org/pdf/2402.15638) | | [GradDrop](https://torchjd.org/stable/docs/aggregation/graddrop#torchjd.aggregation.GradDrop) | - | [Just Pick a Sign: Optimizing Deep Multitask Models with Gradient Sign Dropout](https://arxiv.org/pdf/2010.06808) | | [GradVac](https://torchjd.org/stable/docs/aggregation/gradvac#torchjd.aggregation.GradVac) | [GradVacWeighting](https://torchjd.org/stable/docs/aggregation/gradvac#torchjd.aggregation.GradVacWeighting) | [Gradient Vaccine: Investigating and Improving Multi-task Optimization in Massively Multilingual Models](https://arxiv.org/pdf/2010.05874) | -| [IMTLG](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLG) | [IMTLGWeighting](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLGWeighting) | [Towards Impartial Multi-task Learning](https://discovery.ucl.ac.uk/id/eprint/10120667/) | +| [IMTLG](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLG) | [IMTLGWeighting](https://torchjd.org/stable/docs/aggregation/imtl_g#torchjd.aggregation.IMTLGWeighting) | [Towards Impartial Multi-task Learning](https://www.semanticscholar.org/paper/Towards-Impartial-Multi-task-Learning-Liu-Li/45c0828baec1dd53b81f1b2635788fdf27d0792d) | | [Krum](https://torchjd.org/stable/docs/aggregation/krum#torchjd.aggregation.Krum) | [KrumWeighting](https://torchjd.org/stable/docs/aggregation/krum#torchjd.aggregation.KrumWeighting) | [Machine Learning with Adversaries: Byzantine Tolerant Gradient Descent](https://proceedings.neurips.cc/paper/2017/file/f4b9ec30ad9f68f89b29639786cb62ef-Paper.pdf) | | [Mean](https://torchjd.org/stable/docs/aggregation/mean#torchjd.aggregation.Mean) | [MeanWeighting](https://torchjd.org/stable/docs/aggregation/mean#torchjd.aggregation.MeanWeighting) | - | | [MGDA](https://torchjd.org/stable/docs/aggregation/mgda#torchjd.aggregation.MGDA) | [MGDAWeighting](https://torchjd.org/stable/docs/aggregation/mgda#torchjd.aggregation.MGDAWeighting) | [Multiple-gradient descent algorithm (MGDA) for multiobjective optimization](https://comptes-rendus.academie-sciences.fr/mathematique/articles/10.1016/j.crma.2012.03.014/) | diff --git a/src/torchjd/aggregation/_gradvac.py b/src/torchjd/aggregation/_gradvac.py index 7e5191c01..73464c113 100644 --- a/src/torchjd/aggregation/_gradvac.py +++ b/src/torchjd/aggregation/_gradvac.py @@ -135,7 +135,7 @@ class GradVac(GramianWeightedAggregator, Stateful, _NonDifferentiable): :class:`~torchjd.aggregation.GramianWeightedAggregator` implementing the aggregation step of Gradient Vaccine (GradVac) from `Gradient Vaccine: Investigating and Improving Multi-task Optimization in Massively Multilingual Models (ICLR 2021 Spotlight) - `_. + `_. For each task :math:`i`, the order in which other tasks :math:`j` are visited is drawn at random. For each pair :math:`(i, j)`, the cosine similarity :math:`\phi_{ij}` between the diff --git a/src/torchjd/scalarization/_imtl_l.py b/src/torchjd/scalarization/_imtl_l.py index 91445f778..5eade40d8 100644 --- a/src/torchjd/scalarization/_imtl_l.py +++ b/src/torchjd/scalarization/_imtl_l.py @@ -14,7 +14,7 @@ class IMTLL(Scalarizer, Stateful): :class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using learned per-task scales. ``IMTL-L`` is the loss-balancing variant of Impartial Multi-Task Learning, proposed in `Towards Impartial Multi-Task Learning - `_. + `_. Each value :math:`L_i` is assigned a learnable scale parameter :math:`s_i`, and the values are combined as diff --git a/src/torchjd/scalarization/_stch.py b/src/torchjd/scalarization/_stch.py index 38a5dceaa..9df18ee15 100644 --- a/src/torchjd/scalarization/_stch.py +++ b/src/torchjd/scalarization/_stch.py @@ -8,7 +8,7 @@ class STCH(Scalarizer): r""" :class:`~torchjd.scalarization.Scalarizer` that combines the input tensor of values using smooth Tchebycheff scalarization, as defined in `Smooth Tchebycheff Scalarization for Multi-Objective - Optimization `_. + Optimization `_. It returns