-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest2.py
More file actions
74 lines (60 loc) · 2.27 KB
/
test2.py
File metadata and controls
74 lines (60 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
'''
------------------------------
10 ARM TESTBED WITH EPS-GREEDY
------------------------------
'''
import numpy as np
import matplotlib.pyplot as plt
import random
n_bandit=2000 # number of bandit problems
k=10 # number of arms in each bandit problem
n_pulls=1000 # number of times to pull each arm
q_true=np.random.normal(0,1,(n_bandit,k)) # generating the true means q*(a) for each arm for all bandits
true_opt_arms=np.argmax(q_true,1) # the true optimal arms in each bandit
# each row represents a bandit problem
epsilon=[0,0.01] # epsilon in epsilon-greedy method
col=['r','g','k','b','y']
#fig=plt.figure()
fig1=plt.figure().add_subplot(111)
fig2=plt.figure().add_subplot(111)
for eps in range(len(epsilon)) :
print ('Current epsilon : ',eps)
Q=np.zeros((n_bandit,k)) # reward estimated
N=np.ones((n_bandit,k)) # number of times each arm was pulled # each arm is pulled atleast once
# Pull all arms once
Qi=np.random.normal(q_true,1) # initial pulling of all arms
R_eps=[]
R_eps.append(0)
R_eps.append(np.mean(Qi))
R_eps_opt=[]
for pull in range(2,n_pulls+1) :
R_pull=[] # all rewards in this pull/time-step
opt_arm_pull=0 # number of pulss of best arm in this time step
for i in range(n_bandit) :
if random.random()<epsilon[eps] :
j=np.random.randint(k)
else :
j=np.argmax(Q[i])
if j==true_opt_arms[i] : # To calculate % optimal action
opt_arm_pull=opt_arm_pull+1
temp_R=np.random.normal(q_true[i][j],1)
R_pull.append(temp_R)
N[i][j]=N[i][j]+1
Q[i][j]=Q[i][j]+(temp_R-Q[i][j])/N[i][j]
avg_R_pull=np.mean(R_pull)
R_eps.append(avg_R_pull)
R_eps_opt.append(float(opt_arm_pull)*100/2000)
fig1.plot(range(0,n_pulls+1),R_eps,col[eps])
fig2.plot(range(2,n_pulls+1),R_eps_opt,col[eps])
# plt.rc('text',usetex=True)
#plt.ylim(0.5,1.5)
fig1.title.set_text(r'$\epsilon$-greedy : Average Reward Vs Steps for 10 arms')
fig1.set_ylabel('Average Reward')
fig1.set_xlabel('Steps')
fig1.legend((r"$\epsilon=$"+str(epsilon[0]),r"$\epsilon=$"+str(epsilon[1])),loc='best')
fig2.title.set_text(r'$\epsilon$-greedy : $\%$ Optimal Action Vs Steps for 10 arms')
fig2.set_ylabel(r'$\%$ Optimal Action')
fig2.set_xlabel('Steps')
fig2.set_ylim(0,100)
fig2.legend((r"$\epsilon=$"+str(epsilon[0]),r"$\epsilon=$"+str(epsilon[1])),loc='best')
plt.show()