-
Notifications
You must be signed in to change notification settings - Fork 83
Expand file tree
/
Copy pathThe-zig-zagging-of-exact-line-search.py
More file actions
136 lines (100 loc) · 4.48 KB
/
The-zig-zagging-of-exact-line-search.py
File metadata and controls
136 lines (100 loc) · 4.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 9 06:30:12 2021
@author: Sarat Moka
Python code for showing the zig-zagging property of exact line search
using a Rosenbrock function.
"""
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker, cm
# =============================================================================
# Rosenbrock function definition
# =============================================================================
f = lambda t: (1 - t[0])**2 + 100*(t[1] - t[0]**2)**2
# =============================================================================
# Gradient of the Rosenbrock function
# =============================================================================
def grad(t):
dx = -2*(1 - t[0]) - 4*100*t[0]*(t[1] - (t[0]**2))
dy = 2*100*(t[1] - (t[0]**2))
#print('dx, dy', dx, dy)
return np.array([dx, dy])
# =============================================================================
# Function to get values on the ray
# =============================================================================
def line(t, d, alpha):
return f(t + alpha*d)
# =============================================================================
# First order derivative with respect to alpha. Needed in line search.
# =============================================================================
def derivative(t, td, alpha):
gamma = t + alpha*td
deriv = - 2*td[0]*(1 - gamma[0]) + 200*(gamma[1] - gamma[0]**2)*(td[1] - 2*td[0]*gamma[0])
return deriv
# =============================================================================
# Second order derivative with respect to alpha. Needed in line search.
# =============================================================================
def second_derivative(t, td, alpha):
gamma = t + alpha*td
s_deriv = 2*(td[0]**2) + 200*((td[1] - 2*gamma[0]*td[0])**2 - 2*(td[0]**2)*(gamma[1] - gamma[0]**2))
return s_deriv
# =============================================================================
# Newton's method for finding an optimal alpha
# =============================================================================
def newton_method(t, td, alpha_init, niter):
alpha = alpha_init
for _ in range(niter):
deriv = derivative(t, td, alpha)
s_deriv = second_derivative(t, td, alpha)
d = - deriv/s_deriv
alpha = alpha + d
return alpha
# =============================================================================
# Gradient descent algorithm with exact line search in each iteration
# =============================================================================
def LineSearchGradientDescent(t, alpha_init, niter):
t_seq = [t]
td_seq = []
for _ in range(niter):
g = grad(t)
#norm = np.linalg.norm(g)
td = -g #/norm
# Theta update
alpha = newton_method(t, td, alpha_init, niter)
t = t + alpha*td
t_seq.append(t)
td_seq.append(td)
print('Final point of GD:', t)
t_seq = np.array(t_seq)
td_seq = np.array(td_seq)
return t_seq, td_seq
#%%
# =============================================================================
# Run this cell for calling the gradient descent algorithm
# with exact line search in each iteration
# =============================================================================
alpha_init = 0 # Initial alpha
t_init = np.array([0.4, -0.2]) # initial point
niter = 1000 # no of iteration
t_seq_gd, td_seq_gd = LineSearchGradientDescent(t_init, alpha_init, niter)
#%%
# =============================================================================
# Run this cell for plotting. Zoom in to see the zig-zagging property.
# =============================================================================
t1_range = np.arange(-0.3, 1.1, 0.001)
t2_range = np.arange(-0.3, 1.1, 0.001)
A = np.meshgrid(t1_range, t2_range)
Z = f(A)
plt.contour(t1_range, t2_range, Z, levels=np.exp(np.arange(-15, 7, 0.2)), locator=ticker.LogLocator(), cmap=cm.PuBu_r, linewidths=3)
plt.plot(t_seq_gd[:, 0], t_seq_gd[:, 1], '-g', linewidth=2.5)
plt.plot(1, 1, 'o')
plt.text(1-0.4,1, 'Optimal point', fontsize=25)
plt.plot(t_init[0], t_init[1], 'o', color='black')
plt.text(t_init[0]+0.02,t_init[1]-0.02, 'Initial point', fontsize=25)
plt.xlabel(r'$\theta_1$', fontsize=25)
plt.ylabel(r'$\theta_2$', fontsize=25)
#plt.legend(fontsize=20)
plt.rcParams["figure.figsize"] = (10,10)
plt.show()