Skip to content

Commit 7dac0df

Browse files
authored
Add files via upload
1 parent b7fad2c commit 7dac0df

1 file changed

Lines changed: 327 additions & 0 deletions

File tree

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
import numpy as np
2+
from numpy.linalg import norm
3+
from .hessian_update import ModelHessianUpdate
4+
from multioptpy.calc_tools import Calculationtools
5+
6+
class CubicNewton:
7+
def __init__(self, **config):
8+
"""
9+
Cubic Regularized Newton Method with Global O(1/k²) Convergence
10+
11+
References:
12+
[1] Mishchenko, K., "Regularized Newton Method with Global O(1/k²) Convergence",
13+
https://arxiv.org/abs/2112.02089
14+
"""
15+
# Configuration parameters
16+
self.max_micro_cycles = config.get("max_micro_cycles", 40)
17+
self.hessian_update_method = config.get("method", "bfgs")
18+
self.small_eigval_thresh = config.get("small_eigval_thresh", 1e-6)
19+
20+
# Line search parameters
21+
self.initial_step_length = config.get("initial_step_length", 0.1)
22+
self.lipschitz_scale_factor = config.get("lipschitz_scale_factor", 2.0)
23+
self.min_lipschitz = config.get("min_lipschitz", 1e-6)
24+
self.max_lipschitz = config.get("max_lipschitz", 1e6)
25+
26+
# Convergence criteria
27+
self.energy_change_threshold = config.get("energy_change_threshold", 1e-8)
28+
self.gradient_norm_threshold = config.get("gradient_norm_threshold", 1e-6)
29+
self.step_norm_tolerance = config.get("step_norm_tolerance", 1e-5)
30+
31+
# Debug and display settings
32+
self.debug_mode = config.get("debug_mode", False)
33+
self.display_flag = config.get("display_flag", True)
34+
35+
# Initialize state variables
36+
self.Initialization = True
37+
self.hessian = None
38+
self.bias_hessian = None
39+
40+
# For tracking optimization
41+
self.prev_geometry = None
42+
self.prev_gradient = None
43+
self.prev_energy = None
44+
self.prev_lipschitz = None
45+
self.converged = False
46+
self.iteration = 0
47+
self.line_search_cycles = 0
48+
49+
# Initialize the hessian update module
50+
self.hessian_updater = ModelHessianUpdate()
51+
52+
def log(self, message, force=False):
53+
"""Print message if display flag is enabled and either force is True or in debug mode"""
54+
if self.display_flag and (force or self.debug_mode):
55+
print(message)
56+
57+
def filter_small_eigvals(self, eigvals, eigvecs, mask=False):
58+
"""Remove small eigenvalues and corresponding eigenvectors from the Hessian"""
59+
small_inds = np.abs(eigvals) < self.small_eigval_thresh
60+
small_num = np.sum(small_inds)
61+
62+
if small_num > 0:
63+
self.log(f"Found {small_num} small eigenvalues in Hessian. Removed corresponding eigenvalues and eigenvectors.")
64+
65+
filtered_eigvals = eigvals[~small_inds]
66+
filtered_eigvecs = eigvecs[:, ~small_inds]
67+
68+
if small_num > 6:
69+
self.log(f"Warning: Found {small_num} small eigenvalues, which is more than expected. "
70+
"This may indicate numerical issues. Proceeding with caution.", force=True)
71+
72+
if mask:
73+
return filtered_eigvals, filtered_eigvecs, small_inds
74+
else:
75+
return filtered_eigvals, filtered_eigvecs
76+
77+
def run(self, geom_num_list, B_g, pre_B_g=[], pre_geom=[], B_e=0.0, pre_B_e=0.0, pre_move_vector=[], initial_geom_num_list=[], g=[], pre_g=[]):
78+
"""Execute one step of Cubic Regularized Newton optimization"""
79+
# Print iteration header
80+
self.log(f"\n{'='*50}\nCubic Newton Iteration {self.iteration}\n{'='*50}", force=True)
81+
82+
# Initialize on first call
83+
if self.Initialization:
84+
self.prev_geometry = None
85+
self.prev_gradient = None
86+
self.prev_energy = None
87+
self.prev_lipschitz = None
88+
self.converged = False
89+
self.iteration = 0
90+
self.line_search_cycles = 0
91+
self.Initialization = False
92+
93+
# Check if hessian is set
94+
if self.hessian is None:
95+
raise ValueError("Hessian matrix must be set before running optimization")
96+
97+
# Update Hessian if we have previous geometry and gradient information
98+
if self.prev_geometry is not None and self.prev_gradient is not None and len(pre_B_g) > 0 and len(pre_geom) > 0:
99+
self.update_hessian(geom_num_list, B_g, pre_geom, pre_B_g)
100+
101+
# Check for convergence based on gradient
102+
gradient_norm = np.linalg.norm(B_g)
103+
self.log(f"Gradient norm: {gradient_norm:.6f}", force=True)
104+
105+
if gradient_norm < self.gradient_norm_threshold:
106+
self.log(f"Converged: Gradient norm {gradient_norm:.6f} below threshold {self.gradient_norm_threshold:.6f}", force=True)
107+
self.converged = True
108+
return np.zeros_like(B_g).reshape(-1, 1)
109+
110+
# Check for convergence based on energy change
111+
if self.prev_energy is not None:
112+
energy_change = abs(B_e - self.prev_energy)
113+
if energy_change < self.energy_change_threshold:
114+
self.log(f"Converged: Energy change {energy_change:.6f} below threshold {self.energy_change_threshold:.6f}", force=True)
115+
self.converged = True
116+
return np.zeros_like(B_g).reshape(-1, 1)
117+
118+
# Ensure gradient is properly shaped as a 1D array
119+
gradient = np.asarray(B_g).ravel()
120+
121+
# Use effective Hessian
122+
H = self.hessian
123+
if self.bias_hessian is not None:
124+
H = self.hessian + self.bias_hessian
125+
126+
# Project out translations and rotations if geom_num_list is provided
127+
if len(geom_num_list) > 0:
128+
H = Calculationtools().project_out_hess_tr_and_rot_for_coord(
129+
H, geom_num_list.reshape(-1, 3), geom_num_list.reshape(-1, 3), False
130+
)
131+
132+
# Get the cubic Newton step using line search
133+
move_vector = -1 * self.get_cubic_newton_step(
134+
geom_num_list, gradient, H, B_e
135+
)
136+
137+
# Store current geometry, gradient and energy for next iteration
138+
self.prev_geometry = geom_num_list
139+
self.prev_gradient = B_g
140+
self.prev_energy = B_e
141+
142+
# Increment iteration counter
143+
self.iteration += 1
144+
145+
return move_vector.reshape(-1, 1)
146+
147+
def get_cubic_newton_step(self, geom_num_list, gradient, hessian, energy):
148+
"""Compute the step using Cubic Regularized Newton method with line search"""
149+
gradient_norm = np.linalg.norm(gradient)
150+
151+
# Initialize Lipschitz constant estimate if this is the first iteration
152+
if self.prev_lipschitz is None:
153+
# Initial Lipschitz constant estimate; line 2 in algorithm 2 in [1]
154+
trial_step_length = self.initial_step_length
155+
trial_step = trial_step_length * (-gradient / gradient_norm)
156+
157+
# Get energy and gradient at trial point
158+
trial_coords = geom_num_list.ravel() + trial_step
159+
self.log(f"Estimating initial Lipschitz constant using step of length {trial_step_length:.4f}")
160+
161+
# For MultiOptPy, we need to call external calculator, so we'll return this trial step
162+
# and continue on the next iteration with the results
163+
if self.prev_gradient is None:
164+
self.log("First iteration: returning initial step to estimate Lipschitz constant")
165+
return trial_step
166+
167+
trial_gradient = self.prev_gradient.ravel()
168+
169+
# Compute Lipschitz constant
170+
H = (
171+
np.linalg.norm(trial_gradient - gradient - hessian.dot(trial_step))
172+
/ np.linalg.norm(trial_step) ** 2
173+
)
174+
# Ensure H is within reasonable bounds
175+
H = max(self.min_lipschitz, min(H, self.max_lipschitz))
176+
else:
177+
# Start with previous Lipschitz constant divided by 4 (as in the paper)
178+
H = self.prev_lipschitz / 4
179+
H = max(self.min_lipschitz, min(H, self.max_lipschitz))
180+
181+
self.log(f"Starting Lipschitz constant in cycle {self.iteration}, H={H:.4f}")
182+
183+
# Line search to find a good step
184+
best_step = None
185+
for i in range(self.max_micro_cycles):
186+
self.line_search_cycles += 1
187+
H *= self.lipschitz_scale_factor
188+
H = min(H, self.max_lipschitz)
189+
190+
self.log(f"Adaptive Newton line search, cycle {i} using H={H:.4f}")
191+
192+
# Regularization parameter λ = sqrt(H * ||g||)
193+
lambda_ = np.sqrt(H * gradient_norm)
194+
195+
# Solve the regularized Newton system: (H + λI)p = -g
196+
try:
197+
trial_step = np.linalg.solve(
198+
hessian + lambda_ * np.eye(gradient.size), -gradient
199+
)
200+
except np.linalg.LinAlgError:
201+
# Fallback if the system is singular
202+
self.log("Linear system is singular, using gradient descent step")
203+
trial_step = -gradient / gradient_norm * self.initial_step_length
204+
205+
trial_step_norm = np.linalg.norm(trial_step)
206+
self.log(f"Trial step norm: {trial_step_norm:.6f}")
207+
208+
# For the cubic Newton method, we need to check two conditions:
209+
# 1. ||∇f(x+p)|| ≤ 2λ||p||
210+
# 2. f(x+p) ≤ f(x) - (2/3)λ||p||²
211+
#
212+
# In MultiOptPy's framework, we can't check these conditions directly because
213+
# we don't have f(x+p) and ∇f(x+p) yet. Instead, we return the step and let
214+
# the caller evaluate the new point. On the next iteration, we can use that
215+
# information to adjust our Lipschitz constant.
216+
217+
# For now, we'll store this step as our best guess
218+
best_step = trial_step
219+
break
220+
221+
if best_step is None:
222+
# Fallback to gradient descent
223+
self.log("Line search failed, using gradient descent step", force=True)
224+
best_step = -gradient / gradient_norm * self.initial_step_length
225+
226+
# Store current Lipschitz constant for next iteration
227+
self.prev_lipschitz = H
228+
229+
return best_step
230+
231+
def update_hessian(self, current_geom, current_grad, previous_geom, previous_grad):
232+
"""Update the Hessian using the specified update method"""
233+
# Calculate displacement and gradient difference
234+
displacement = np.asarray(current_geom - previous_geom).reshape(-1, 1)
235+
delta_grad = np.asarray(current_grad - previous_grad).reshape(-1, 1)
236+
237+
# Skip update if changes are too small
238+
disp_norm = np.linalg.norm(displacement)
239+
grad_diff_norm = np.linalg.norm(delta_grad)
240+
241+
if disp_norm < 1e-10 or grad_diff_norm < 1e-10:
242+
self.log("Skipping Hessian update due to small changes")
243+
return
244+
245+
# Check if displacement and gradient difference are sufficiently aligned
246+
dot_product = np.dot(displacement.T, delta_grad)
247+
dot_product = dot_product[0, 0] # Extract scalar value from 1x1 matrix
248+
if dot_product <= 0:
249+
self.log("Skipping Hessian update due to poor alignment")
250+
return
251+
252+
self.log(f"Hessian update: displacement norm={disp_norm:.6f}, gradient diff norm={grad_diff_norm:.6f}, dot product={dot_product:.6f}")
253+
254+
# Apply the selected Hessian update method
255+
if "flowchart" in self.hessian_update_method.lower():
256+
self.log(f"Hessian update method: flowchart")
257+
delta_hess = self.hessian_updater.flowchart_hessian_update(
258+
self.hessian, displacement, delta_grad, "auto"
259+
)
260+
elif "bfgs" in self.hessian_update_method.lower():
261+
self.log(f"Hessian update method: bfgs")
262+
delta_hess = self.hessian_updater.BFGS_hessian_update(
263+
self.hessian, displacement, delta_grad
264+
)
265+
elif "sr1" in self.hessian_update_method.lower():
266+
self.log(f"Hessian update method: sr1")
267+
delta_hess = self.hessian_updater.SR1_hessian_update(
268+
self.hessian, displacement, delta_grad
269+
)
270+
elif "fsb" in self.hessian_update_method.lower():
271+
self.log(f"Hessian update method: fsb")
272+
delta_hess = self.hessian_updater.FSB_hessian_update(
273+
self.hessian, displacement, delta_grad
274+
)
275+
elif "bofill" in self.hessian_update_method.lower():
276+
self.log(f"Hessian update method: bofill")
277+
delta_hess = self.hessian_updater.Bofill_hessian_update(
278+
self.hessian, displacement, delta_grad
279+
)
280+
elif "psb" in self.hessian_update_method.lower():
281+
self.log(f"Hessian update method: psb")
282+
delta_hess = self.hessian_updater.PSB_hessian_update(
283+
self.hessian, displacement, delta_grad
284+
)
285+
elif "msp" in self.hessian_update_method.lower():
286+
self.log(f"Hessian update method: msp")
287+
delta_hess = self.hessian_updater.MSP_hessian_update(
288+
self.hessian, displacement, delta_grad
289+
)
290+
else:
291+
self.log(f"Unknown Hessian update method: {self.hessian_update_method}. Using BFGS.")
292+
delta_hess = self.hessian_updater.BFGS_hessian_update(
293+
self.hessian, displacement, delta_grad
294+
)
295+
296+
# Update the Hessian (in-place addition)
297+
self.hessian += delta_hess
298+
299+
# Ensure Hessian symmetry (numerical errors might cause slight asymmetry)
300+
# Use in-place operation for symmetrization
301+
self.hessian = 0.5 * (self.hessian + self.hessian.T)
302+
303+
def is_converged(self):
304+
"""Check if optimization has converged"""
305+
return self.converged
306+
307+
def set_hessian(self, hessian):
308+
"""Set the Hessian matrix"""
309+
self.hessian = hessian
310+
return
311+
312+
def set_bias_hessian(self, bias_hessian):
313+
"""Set the bias Hessian matrix"""
314+
self.bias_hessian = bias_hessian
315+
return
316+
317+
def get_hessian(self):
318+
"""Get the current Hessian matrix"""
319+
return self.hessian
320+
321+
def get_bias_hessian(self):
322+
"""Get the current bias Hessian matrix"""
323+
return self.bias_hessian
324+
325+
def get_line_search_cycles(self):
326+
"""Get the number of line search cycles performed"""
327+
return self.line_search_cycles

0 commit comments

Comments
 (0)