1+ import numpy as np
2+ from numpy .linalg import norm
3+ from .hessian_update import ModelHessianUpdate
4+ from multioptpy .calc_tools import Calculationtools
5+
6+ class CubicNewton :
7+ def __init__ (self , ** config ):
8+ """
9+ Cubic Regularized Newton Method with Global O(1/k²) Convergence
10+
11+ References:
12+ [1] Mishchenko, K., "Regularized Newton Method with Global O(1/k²) Convergence",
13+ https://arxiv.org/abs/2112.02089
14+ """
15+ # Configuration parameters
16+ self .max_micro_cycles = config .get ("max_micro_cycles" , 40 )
17+ self .hessian_update_method = config .get ("method" , "bfgs" )
18+ self .small_eigval_thresh = config .get ("small_eigval_thresh" , 1e-6 )
19+
20+ # Line search parameters
21+ self .initial_step_length = config .get ("initial_step_length" , 0.1 )
22+ self .lipschitz_scale_factor = config .get ("lipschitz_scale_factor" , 2.0 )
23+ self .min_lipschitz = config .get ("min_lipschitz" , 1e-6 )
24+ self .max_lipschitz = config .get ("max_lipschitz" , 1e6 )
25+
26+ # Convergence criteria
27+ self .energy_change_threshold = config .get ("energy_change_threshold" , 1e-8 )
28+ self .gradient_norm_threshold = config .get ("gradient_norm_threshold" , 1e-6 )
29+ self .step_norm_tolerance = config .get ("step_norm_tolerance" , 1e-5 )
30+
31+ # Debug and display settings
32+ self .debug_mode = config .get ("debug_mode" , False )
33+ self .display_flag = config .get ("display_flag" , True )
34+
35+ # Initialize state variables
36+ self .Initialization = True
37+ self .hessian = None
38+ self .bias_hessian = None
39+
40+ # For tracking optimization
41+ self .prev_geometry = None
42+ self .prev_gradient = None
43+ self .prev_energy = None
44+ self .prev_lipschitz = None
45+ self .converged = False
46+ self .iteration = 0
47+ self .line_search_cycles = 0
48+
49+ # Initialize the hessian update module
50+ self .hessian_updater = ModelHessianUpdate ()
51+
52+ def log (self , message , force = False ):
53+ """Print message if display flag is enabled and either force is True or in debug mode"""
54+ if self .display_flag and (force or self .debug_mode ):
55+ print (message )
56+
57+ def filter_small_eigvals (self , eigvals , eigvecs , mask = False ):
58+ """Remove small eigenvalues and corresponding eigenvectors from the Hessian"""
59+ small_inds = np .abs (eigvals ) < self .small_eigval_thresh
60+ small_num = np .sum (small_inds )
61+
62+ if small_num > 0 :
63+ self .log (f"Found { small_num } small eigenvalues in Hessian. Removed corresponding eigenvalues and eigenvectors." )
64+
65+ filtered_eigvals = eigvals [~ small_inds ]
66+ filtered_eigvecs = eigvecs [:, ~ small_inds ]
67+
68+ if small_num > 6 :
69+ self .log (f"Warning: Found { small_num } small eigenvalues, which is more than expected. "
70+ "This may indicate numerical issues. Proceeding with caution." , force = True )
71+
72+ if mask :
73+ return filtered_eigvals , filtered_eigvecs , small_inds
74+ else :
75+ return filtered_eigvals , filtered_eigvecs
76+
77+ def run (self , geom_num_list , B_g , pre_B_g = [], pre_geom = [], B_e = 0.0 , pre_B_e = 0.0 , pre_move_vector = [], initial_geom_num_list = [], g = [], pre_g = []):
78+ """Execute one step of Cubic Regularized Newton optimization"""
79+ # Print iteration header
80+ self .log (f"\n { '=' * 50 } \n Cubic Newton Iteration { self .iteration } \n { '=' * 50 } " , force = True )
81+
82+ # Initialize on first call
83+ if self .Initialization :
84+ self .prev_geometry = None
85+ self .prev_gradient = None
86+ self .prev_energy = None
87+ self .prev_lipschitz = None
88+ self .converged = False
89+ self .iteration = 0
90+ self .line_search_cycles = 0
91+ self .Initialization = False
92+
93+ # Check if hessian is set
94+ if self .hessian is None :
95+ raise ValueError ("Hessian matrix must be set before running optimization" )
96+
97+ # Update Hessian if we have previous geometry and gradient information
98+ if self .prev_geometry is not None and self .prev_gradient is not None and len (pre_B_g ) > 0 and len (pre_geom ) > 0 :
99+ self .update_hessian (geom_num_list , B_g , pre_geom , pre_B_g )
100+
101+ # Check for convergence based on gradient
102+ gradient_norm = np .linalg .norm (B_g )
103+ self .log (f"Gradient norm: { gradient_norm :.6f} " , force = True )
104+
105+ if gradient_norm < self .gradient_norm_threshold :
106+ self .log (f"Converged: Gradient norm { gradient_norm :.6f} below threshold { self .gradient_norm_threshold :.6f} " , force = True )
107+ self .converged = True
108+ return np .zeros_like (B_g ).reshape (- 1 , 1 )
109+
110+ # Check for convergence based on energy change
111+ if self .prev_energy is not None :
112+ energy_change = abs (B_e - self .prev_energy )
113+ if energy_change < self .energy_change_threshold :
114+ self .log (f"Converged: Energy change { energy_change :.6f} below threshold { self .energy_change_threshold :.6f} " , force = True )
115+ self .converged = True
116+ return np .zeros_like (B_g ).reshape (- 1 , 1 )
117+
118+ # Ensure gradient is properly shaped as a 1D array
119+ gradient = np .asarray (B_g ).ravel ()
120+
121+ # Use effective Hessian
122+ H = self .hessian
123+ if self .bias_hessian is not None :
124+ H = self .hessian + self .bias_hessian
125+
126+ # Project out translations and rotations if geom_num_list is provided
127+ if len (geom_num_list ) > 0 :
128+ H = Calculationtools ().project_out_hess_tr_and_rot_for_coord (
129+ H , geom_num_list .reshape (- 1 , 3 ), geom_num_list .reshape (- 1 , 3 ), False
130+ )
131+
132+ # Get the cubic Newton step using line search
133+ move_vector = - 1 * self .get_cubic_newton_step (
134+ geom_num_list , gradient , H , B_e
135+ )
136+
137+ # Store current geometry, gradient and energy for next iteration
138+ self .prev_geometry = geom_num_list
139+ self .prev_gradient = B_g
140+ self .prev_energy = B_e
141+
142+ # Increment iteration counter
143+ self .iteration += 1
144+
145+ return move_vector .reshape (- 1 , 1 )
146+
147+ def get_cubic_newton_step (self , geom_num_list , gradient , hessian , energy ):
148+ """Compute the step using Cubic Regularized Newton method with line search"""
149+ gradient_norm = np .linalg .norm (gradient )
150+
151+ # Initialize Lipschitz constant estimate if this is the first iteration
152+ if self .prev_lipschitz is None :
153+ # Initial Lipschitz constant estimate; line 2 in algorithm 2 in [1]
154+ trial_step_length = self .initial_step_length
155+ trial_step = trial_step_length * (- gradient / gradient_norm )
156+
157+ # Get energy and gradient at trial point
158+ trial_coords = geom_num_list .ravel () + trial_step
159+ self .log (f"Estimating initial Lipschitz constant using step of length { trial_step_length :.4f} " )
160+
161+ # For MultiOptPy, we need to call external calculator, so we'll return this trial step
162+ # and continue on the next iteration with the results
163+ if self .prev_gradient is None :
164+ self .log ("First iteration: returning initial step to estimate Lipschitz constant" )
165+ return trial_step
166+
167+ trial_gradient = self .prev_gradient .ravel ()
168+
169+ # Compute Lipschitz constant
170+ H = (
171+ np .linalg .norm (trial_gradient - gradient - hessian .dot (trial_step ))
172+ / np .linalg .norm (trial_step ) ** 2
173+ )
174+ # Ensure H is within reasonable bounds
175+ H = max (self .min_lipschitz , min (H , self .max_lipschitz ))
176+ else :
177+ # Start with previous Lipschitz constant divided by 4 (as in the paper)
178+ H = self .prev_lipschitz / 4
179+ H = max (self .min_lipschitz , min (H , self .max_lipschitz ))
180+
181+ self .log (f"Starting Lipschitz constant in cycle { self .iteration } , H={ H :.4f} " )
182+
183+ # Line search to find a good step
184+ best_step = None
185+ for i in range (self .max_micro_cycles ):
186+ self .line_search_cycles += 1
187+ H *= self .lipschitz_scale_factor
188+ H = min (H , self .max_lipschitz )
189+
190+ self .log (f"Adaptive Newton line search, cycle { i } using H={ H :.4f} " )
191+
192+ # Regularization parameter λ = sqrt(H * ||g||)
193+ lambda_ = np .sqrt (H * gradient_norm )
194+
195+ # Solve the regularized Newton system: (H + λI)p = -g
196+ try :
197+ trial_step = np .linalg .solve (
198+ hessian + lambda_ * np .eye (gradient .size ), - gradient
199+ )
200+ except np .linalg .LinAlgError :
201+ # Fallback if the system is singular
202+ self .log ("Linear system is singular, using gradient descent step" )
203+ trial_step = - gradient / gradient_norm * self .initial_step_length
204+
205+ trial_step_norm = np .linalg .norm (trial_step )
206+ self .log (f"Trial step norm: { trial_step_norm :.6f} " )
207+
208+ # For the cubic Newton method, we need to check two conditions:
209+ # 1. ||∇f(x+p)|| ≤ 2λ||p||
210+ # 2. f(x+p) ≤ f(x) - (2/3)λ||p||²
211+ #
212+ # In MultiOptPy's framework, we can't check these conditions directly because
213+ # we don't have f(x+p) and ∇f(x+p) yet. Instead, we return the step and let
214+ # the caller evaluate the new point. On the next iteration, we can use that
215+ # information to adjust our Lipschitz constant.
216+
217+ # For now, we'll store this step as our best guess
218+ best_step = trial_step
219+ break
220+
221+ if best_step is None :
222+ # Fallback to gradient descent
223+ self .log ("Line search failed, using gradient descent step" , force = True )
224+ best_step = - gradient / gradient_norm * self .initial_step_length
225+
226+ # Store current Lipschitz constant for next iteration
227+ self .prev_lipschitz = H
228+
229+ return best_step
230+
231+ def update_hessian (self , current_geom , current_grad , previous_geom , previous_grad ):
232+ """Update the Hessian using the specified update method"""
233+ # Calculate displacement and gradient difference
234+ displacement = np .asarray (current_geom - previous_geom ).reshape (- 1 , 1 )
235+ delta_grad = np .asarray (current_grad - previous_grad ).reshape (- 1 , 1 )
236+
237+ # Skip update if changes are too small
238+ disp_norm = np .linalg .norm (displacement )
239+ grad_diff_norm = np .linalg .norm (delta_grad )
240+
241+ if disp_norm < 1e-10 or grad_diff_norm < 1e-10 :
242+ self .log ("Skipping Hessian update due to small changes" )
243+ return
244+
245+ # Check if displacement and gradient difference are sufficiently aligned
246+ dot_product = np .dot (displacement .T , delta_grad )
247+ dot_product = dot_product [0 , 0 ] # Extract scalar value from 1x1 matrix
248+ if dot_product <= 0 :
249+ self .log ("Skipping Hessian update due to poor alignment" )
250+ return
251+
252+ self .log (f"Hessian update: displacement norm={ disp_norm :.6f} , gradient diff norm={ grad_diff_norm :.6f} , dot product={ dot_product :.6f} " )
253+
254+ # Apply the selected Hessian update method
255+ if "flowchart" in self .hessian_update_method .lower ():
256+ self .log (f"Hessian update method: flowchart" )
257+ delta_hess = self .hessian_updater .flowchart_hessian_update (
258+ self .hessian , displacement , delta_grad , "auto"
259+ )
260+ elif "bfgs" in self .hessian_update_method .lower ():
261+ self .log (f"Hessian update method: bfgs" )
262+ delta_hess = self .hessian_updater .BFGS_hessian_update (
263+ self .hessian , displacement , delta_grad
264+ )
265+ elif "sr1" in self .hessian_update_method .lower ():
266+ self .log (f"Hessian update method: sr1" )
267+ delta_hess = self .hessian_updater .SR1_hessian_update (
268+ self .hessian , displacement , delta_grad
269+ )
270+ elif "fsb" in self .hessian_update_method .lower ():
271+ self .log (f"Hessian update method: fsb" )
272+ delta_hess = self .hessian_updater .FSB_hessian_update (
273+ self .hessian , displacement , delta_grad
274+ )
275+ elif "bofill" in self .hessian_update_method .lower ():
276+ self .log (f"Hessian update method: bofill" )
277+ delta_hess = self .hessian_updater .Bofill_hessian_update (
278+ self .hessian , displacement , delta_grad
279+ )
280+ elif "psb" in self .hessian_update_method .lower ():
281+ self .log (f"Hessian update method: psb" )
282+ delta_hess = self .hessian_updater .PSB_hessian_update (
283+ self .hessian , displacement , delta_grad
284+ )
285+ elif "msp" in self .hessian_update_method .lower ():
286+ self .log (f"Hessian update method: msp" )
287+ delta_hess = self .hessian_updater .MSP_hessian_update (
288+ self .hessian , displacement , delta_grad
289+ )
290+ else :
291+ self .log (f"Unknown Hessian update method: { self .hessian_update_method } . Using BFGS." )
292+ delta_hess = self .hessian_updater .BFGS_hessian_update (
293+ self .hessian , displacement , delta_grad
294+ )
295+
296+ # Update the Hessian (in-place addition)
297+ self .hessian += delta_hess
298+
299+ # Ensure Hessian symmetry (numerical errors might cause slight asymmetry)
300+ # Use in-place operation for symmetrization
301+ self .hessian = 0.5 * (self .hessian + self .hessian .T )
302+
303+ def is_converged (self ):
304+ """Check if optimization has converged"""
305+ return self .converged
306+
307+ def set_hessian (self , hessian ):
308+ """Set the Hessian matrix"""
309+ self .hessian = hessian
310+ return
311+
312+ def set_bias_hessian (self , bias_hessian ):
313+ """Set the bias Hessian matrix"""
314+ self .bias_hessian = bias_hessian
315+ return
316+
317+ def get_hessian (self ):
318+ """Get the current Hessian matrix"""
319+ return self .hessian
320+
321+ def get_bias_hessian (self ):
322+ """Get the current bias Hessian matrix"""
323+ return self .bias_hessian
324+
325+ def get_line_search_cycles (self ):
326+ """Get the number of line search cycles performed"""
327+ return self .line_search_cycles
0 commit comments