Fides 0.8.0 (#60)

FFroehlich · web-flow · commit a81933d4b10d · 2025-11-11T10:28:14.000Z
* remove init_with_hess, add hess0

* fixup test
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 
 .idea/*
 venv/*
+.venv/*
 *.pyc
 fides.egg-info/*
 .DS_Store
diff --git a/fides/hessian_approximation.py b/fides/hessian_approximation.py
@@ -17,29 +17,26 @@ class HessianApproximation:
     Abstract class from which Hessian update strategies should subclass
     """
 
-    def __init__(self, init_with_hess: bool | None = False):
+    def __init__(self):
         """
         Create a Hessian update strategy instance
-
-        :param init_with_hess:
-            Whether the hybrid update strategy should be initialized
-            according to the user-provided objective function
         """
         self._hess: np.ndarray = np.empty(0)
         self._diff: np.ndarray = np.empty(0)
-        self.init_with_hess = init_with_hess
 
-    def init_mat(self, dim: int, hess: np.ndarray | None = None) -> None:
+    def _init_mat(self, dim: int, hess: np.ndarray | None = None) -> None:
         """
-        Initializes this approximation instance and checks the dimensionality
+        Initializes this approximation instance and checks the dimensionality.
+        Note that this method is not intended to be called directly by the
+        user.
 
         :param dim:
             dimension of optimization variables
 
         :param hess:
             user provided initialization
         """
-        if hess is None or not self.init_with_hess:
+        if hess is None:
             self._hess = np.eye(dim)
         else:
             if hess.shape[0] != dim:
@@ -146,7 +143,6 @@ class Broyden(IterativeHessianApproximation):
     def __init__(
         self,
         phi: float,
-        init_with_hess: bool | None = False,
         enforce_curv_cond: bool | None = True,
     ):
         self.phi = phi
@@ -158,7 +154,7 @@ def __init__(
                 'preserved during updating.',
                 stacklevel=2,
             )
-        super().__init__(init_with_hess)
+        super().__init__()
 
     def _compute_update(self, s: np.ndarray, y: np.ndarray):
         self._diff = broyden_class_update(
@@ -176,12 +172,10 @@ class BFGS(Broyden):
 
     def __init__(
         self,
-        init_with_hess: bool | None = False,
         enforce_curv_cond: bool | None = True,
     ):
         super().__init__(
             phi=0.0,
-            init_with_hess=init_with_hess,
             enforce_curv_cond=enforce_curv_cond,
         )
 
@@ -196,12 +190,10 @@ class DFP(Broyden):
 
     def __init__(
         self,
-        init_with_hess: bool | None = False,
         enforce_curv_cond: bool | None = True,
     ):
         super().__init__(
             phi=1.0,
-            init_with_hess=init_with_hess,
             enforce_curv_cond=enforce_curv_cond,
         )
 
@@ -273,9 +265,9 @@ def __init__(self, happ: IterativeHessianApproximation | None = None):
         self.hessian_update = happ if happ is not None else BFGS()
         super().__init__()
 
-    def init_mat(self, dim: int, hess: np.ndarray | None = None):
-        self.hessian_update.init_mat(dim, hess)
-        super().init_mat(dim, hess)
+    def _init_mat(self, dim: int, hess: np.ndarray | None = None):
+        self.hessian_update._init_mat(dim, hess)
+        super()._init_mat(dim, hess)
 
     def requires_hess(self):
         return True  # pragma: no cover
@@ -460,12 +452,12 @@ def __init__(
                 'preserved during updating.',
                 stacklevel=2,
             )
-        super().__init__(init_with_hess=True)
+        super().__init__()
 
-    def init_mat(self, dim: int, hess: np.ndarray | None = None):
+    def _init_mat(self, dim: int, hess: np.ndarray | None = None):
         self.A = np.eye(dim) * np.spacing(1)
         self._structured_diff = np.zeros_like(self.A)
-        super().init_mat(dim, hess)
+        super()._init_mat(dim, hess)
 
     def update(
         self,
diff --git a/fides/minimize.py b/fides/minimize.py
@@ -283,8 +283,6 @@ def __init__(
         self.grad_min = self.grad
 
         self.hessian_update: HessianApproximation | None = hessian_update
-        if not self.hessian_update.get_mat().empty():
-            self.hess = self.hessian_update.get_mat()
         self.iterations_since_tr_update: int = 0
         self.n_intermediate_tr_radius: int = 0
 
@@ -312,7 +310,12 @@ def _reset(self, start_id: str | None = None):
         self.start_id = start_id
         self.history = defaultdict(list)
 
-    def minimize(self, x0: np.ndarray, start_id: str | None = None):
+    def minimize(
+        self,
+        x0: np.ndarray,
+        start_id: str | None = None,
+        hess0: np.ndarray | str | None = None,
+    ) -> tuple[float, np.ndarray, np.ndarray, np.ndarray]:
         """
         Minimize the objective function using the interior trust-region
         reflective algorithm described by [ColemanLi1994] and [ColemanLi1996]
@@ -329,13 +332,25 @@ def minimize(self, x0: np.ndarray, start_id: str | None = None):
         options[`maxtime`] on the next iteration.
 
         :param x0:
-            initial guess
+            initial guess for the optimization variables
+
+        :param start_id:
+            optional identifier for this optimization run, used for history
+            tracking
+
+        :param hess0:
+            optional initial Hessian approximation. If a string 'hess' is
+            provided, the initial Hessian from the objective function
+            evaluation at x0 is used. Otherwise, a numpy array of shape
+            (n,n) must be provided, where n is the number of optimization
+            variables.
 
         :returns:
             fval: final function value,
             x: final optimization variable values,
             grad: final gradient,
             hess: final Hessian (approximation)
+
         """
         self._reset(start_id)
 
@@ -349,8 +364,11 @@ def minimize(self, x0: np.ndarray, start_id: str | None = None):
 
         self.fval, self.grad = funout.fval, funout.grad
         if self.hessian_update is not None:
-            if self.hessian_update.get_mat().empty():
-                self.hessian_update.init_mat(len(self.x), funout.hess)
+            if isinstance(hess0, str) and hess0 == 'hess':
+                self.hessian_update._init_mat(len(self.x), funout.hess)
+            else:
+                self.hessian_update._init_mat(len(self.x), hess0)
+            self.hess = self.hessian_update.get_mat().copy()
         else:
             self.hess = funout.hess.copy()
 
diff --git a/fides/version.py b/fides/version.py
@@ -1 +1 @@
-__version__ = '0.7.9'
+__version__ = '0.8.0'
diff --git a/tests/test_hessian_approximation.py b/tests/test_hessian_approximation.py
@@ -5,21 +5,21 @@
 
 
 def test_wrong_dim():
-    h = BFGS(init_with_hess=True)
+    h = BFGS()
     with pytest.raises(ValueError):
-        h.init_mat(dim=3, hess=np.ones((2, 2)))
+        h._init_mat(dim=3, hess=np.ones((2, 2)))
 
     h = BFGS()
-    h.init_mat(dim=3)
+    h._init_mat(dim=3)
     with pytest.raises(ValueError):
         h.set_mat(np.ones((2, 2)))
 
 
 def test_broyden():
     h = Broyden(phi=2)
-    h.init_mat(dim=2)
+    h._init_mat(dim=2)
     h.update(np.random.random((2, 1)), np.random.random((2, 1)))
 
     h = Broyden(phi=-1)
-    h.init_mat(dim=2)
+    h._init_mat(dim=2)
     h.update(np.random.random((2,)), np.random.random((2,)))
diff --git a/tests/test_minimize.py b/tests/test_minimize.py
@@ -185,17 +185,11 @@ def unbounded_and_init():
         (rosengrad, BB()),  # 5
         (rosengrad, Broyden(0.5)),  # 6
         (rosenboth, HybridFixed(BFGS())),  # 7
-        (rosenboth, HybridFixed(SR1())),  # 8
-        (rosenboth, HybridFixed(BFGS(init_with_hess=True))),  # 9
-        (rosenboth, HybridFixed(SR1(init_with_hess=True))),  # 10
+        (rosenboth, HybridFixed(SR1())),  # 8 # 10
         (rosenboth, HybridFraction(BFGS())),  # 11
         (rosenboth, HybridFraction(SR1())),  # 12
-        (rosenboth, HybridFraction(BFGS(init_with_hess=True))),  # 13
-        (rosenboth, HybridFraction(SR1(init_with_hess=True))),  # 14
         (fletcher, FX(BFGS())),  # 15
         (fletcher, FX(SR1())),  # 16
-        (fletcher, FX(BFGS(init_with_hess=True))),  # 17
-        (fletcher, FX(SR1(init_with_hess=True))),  # 18
         (fletcher, SSM(0.0)),  # 19
         (fletcher, SSM(0.5)),  # 20
         (fletcher, SSM(1.0)),  # 21
@@ -494,3 +488,140 @@ def test_wrong_options():
         verbose=logging.INFO,
         options={Options.SUBSPACE_DIM: '2D'},
     )
+
+
+def test_hess0_initialization():
+    """
+    Test that hess0 parameter correctly initializes Hessian approximation.
+    """
+    lb, ub, x0 = finite_bounds_include_optimum()
+    fun = rosengrad
+    fun_with_hess = rosenboth
+
+    # Test 1: Verify hess0 is used when provided with hessian_update
+    custom_hess0 = np.eye(len(x0)) * 10.0
+    opt_with_hess0 = Optimizer(
+        fun,
+        ub=ub,
+        lb=lb,
+        verbose=logging.WARNING,
+        options={Options.MAXITER: 1},  # Only run one iteration
+        hessian_update=BFGS(),
+    )
+    opt_with_hess0.minimize(x0, hess0=custom_hess0)
+    assert opt_with_hess0.hess is not None
+
+    # Test 2: Verify default initialization when hess0 is not provided
+    opt_without_hess0 = Optimizer(
+        fun,
+        ub=ub,
+        lb=lb,
+        verbose=logging.WARNING,
+        options={Options.MAXITER: 1},
+        hessian_update=BFGS(),
+    )
+    opt_without_hess0.minimize(x0)
+
+    # Test 3: Verify hess0 has correct dimensions
+    wrong_dim_hess0 = np.eye(len(x0) + 1)
+    opt_wrong_dim = Optimizer(
+        fun,
+        ub=ub,
+        lb=lb,
+        verbose=logging.WARNING,
+        hessian_update=BFGS(),
+    )
+    with pytest.raises(ValueError):
+        opt_wrong_dim.minimize(x0, hess0=wrong_dim_hess0)
+
+    # Test 4: Verify hess0 works with different update schemes
+    for happ_class in [BFGS, DFP, SR1, Broyden]:
+        happ = happ_class() if happ_class != Broyden else Broyden(phi=0.5)
+        custom_hess = np.eye(len(x0)) * 5.0
+        opt = Optimizer(
+            fun,
+            ub=ub,
+            lb=lb,
+            verbose=logging.WARNING,
+            options={Options.MAXITER: 2, Options.FATOL: 0},
+            hessian_update=happ,
+        )
+        opt.minimize(x0, hess0=custom_hess)
+        assert opt.iteration >= 1, f'Failed for {happ_class.__name__}'
+
+    # Test 5: Verify hess0 is ignored when no hessian_update is provided
+    opt_no_update = Optimizer(
+        fun_with_hess,
+        ub=ub,
+        lb=lb,
+        verbose=logging.WARNING,
+        options={Options.MAXITER: 1},
+    )
+    hess0_ignored = np.eye(len(x0)) * 100.0
+    opt_no_update.minimize(x0, hess0=hess0_ignored)
+
+    # Test 6: Test initialization with exact Hessian
+    opt_hess_init = Optimizer(
+        fun_with_hess,
+        ub=ub,
+        lb=lb,
+        verbose=logging.WARNING,
+        options={Options.MAXITER: 10, Options.FATOL: 1e-8},
+        hessian_update=HybridFixed(BFGS()),
+    )
+    opt_hess_init.minimize(x0, hess0='hess')
+    iterations_with_hess = opt_hess_init.iteration
+
+    # Compare with BFGS without using initial Hessian
+    opt_no_hess_init = Optimizer(
+        fun,
+        ub=ub,
+        lb=lb,
+        verbose=logging.WARNING,
+        options={Options.MAXITER: 10, Options.FATOL: 1e-8},
+        hessian_update=BFGS(),
+    )
+    opt_no_hess_init.minimize(x0)
+    iterations_without_hess = opt_no_hess_init.iteration
+
+    # Using exact Hessian for initialization should help convergence
+    assert iterations_with_hess <= iterations_without_hess or (
+        opt_hess_init.converged and opt_no_hess_init.converged
+    ), 'Hessian initialization should help convergence'
+
+    # Test 8: Verify hess0 affects convergence behavior
+    true_hess_at_x0 = np.array(
+        [
+            [1200 * x0[0] ** 2 - 400 * x0[1] + 2, -400 * x0[0]],
+            [-400 * x0[0], 200],
+        ]
+    )
+
+    opt_good_init = Optimizer(
+        fun,
+        ub=ub,
+        lb=lb,
+        verbose=logging.WARNING,
+        options={Options.MAXITER: 100, Options.FATOL: 1e-8},
+        hessian_update=BFGS(),
+    )
+    opt_good_init.minimize(x0, hess0=true_hess_at_x0)
+    iterations_good = opt_good_init.iteration
+
+    # Use a poor initial Hessian approximation
+    poor_hess = np.eye(len(x0)) * 0.01
+    opt_poor_init = Optimizer(
+        fun,
+        ub=ub,
+        lb=lb,
+        verbose=logging.WARNING,
+        options={Options.MAXITER: 100, Options.FATOL: 1e-8},
+        hessian_update=BFGS(),
+    )
+    opt_poor_init.minimize(x0, hess0=poor_hess)
+    iterations_poor = opt_poor_init.iteration
+
+    # Good initialization should converge in fewer or equal iterations
+    assert iterations_good <= iterations_poor or (
+        opt_good_init.converged and opt_poor_init.converged
+    ), 'Good Hessian initialization should help convergence'

-Original file line number
+Diff line change
@@ @@ -1,6 +1,7 @@ @@
 .idea/*
 venv/*
 +.venv/*
 *.pyc
 fides.egg-info/*
 .DS_Store
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '0.7.9'`
	`1`	`+__version__ = '0.8.0'`