@@ -5,6 +5,7 @@ globals [
55 speed-limit
66 speed-min
77 inputs
8+ loss
89]
910
1011turtles-own [
@@ -38,16 +39,17 @@ to setup
3839 py:set " state_dims" length inputs
3940 py:set " hl_size" 36
4041 py:set " num_actions" 3
41- py:set " memory_size" 10000
42- py:set " batch_size" 128
42+ py:set " memory_size" memory-size
43+ py:set " batch_size" batch-size
44+ py:set " lr" learning-rate
4345
4446 (py:run
4547 " model = Sequential()"
4648 " model.add(Dense(hl_size, input_shape=(state_dims,), activation='relu'))"
4749 " model.add(Dense(hl_size, activation='relu'))"
4850 " model.add(Dense(hl_size, activation='relu'))"
4951 " model.add(Dense(num_actions))"
50- " optimizer = optimizers.adam()"
52+ " optimizer = optimizers.adam(lr=lr )"
5153 " model.compile(optimizer, 'mse')"
5254 " model.summary()"
5355 " memory = []" )
@@ -107,10 +109,13 @@ to go
107109 if speed < speed-min [ set speed speed-min ]
108110 if speed > speed-limit [ set speed speed-limit ]
109111 fd speed
110- set reward (log (speed + 0.0000000000005 ) 2 )
112+ set reward (log (speed + 1e-8 ) 2 )
111113 ; set reward speed
112114 ]
113- if train? [ train ]
115+ if train? [
116+ remember
117+ train
118+ ]
114119 tick
115120end
116121
@@ -131,14 +136,18 @@ to select-actions
131136 ])
132137end
133138
134- to train
139+ to remember
135140 ask turtles [ set next-state map runresult inputs ]
136141 let data [ (list state action reward next-state) ] of turtles
137142 py:set " new_exp" data
138143 (py:run
139144 " memory.extend(new_exp)"
140145 " if len(memory) > memory_size:"
141- " memory = memory[-memory_size:]"
146+ " memory = memory[-memory_size:]" )
147+ end
148+
149+ to train
150+ (py:run
142151 " sample_ix = np.random.randint(len(memory), size = batch_size)"
143152 " inputs = np.array([memory[i][0] for i in sample_ix])"
144153 " actions = np.array([memory[i][1] for i in sample_ix])"
@@ -179,10 +188,10 @@ end
179188; See Info tab for full copyright and license.
180189@#$#@#$#@
181190GRAPHICS-WINDOW
182- 10
183- 405
184- 898
185- 569
191+ 15
192+ 425
193+ 903
194+ 589
186195-1
187196-1
18819717.255
@@ -207,9 +216,9 @@ ticks
207216
208217BUTTON
209218110
210- 55
219+ 160
211220182
212- 96
221+ 201
213222NIL
214223setup
215224NIL
224233
225234BUTTON
226235193
227- 56
236+ 161
228237264
229- 96
238+ 201
230239NIL
231240go
232241T
@@ -256,9 +265,9 @@ HORIZONTAL
256265
257266SLIDER
25826715
259- 135
268+ 240
260269265
261- 168
270+ 273
262271deceleration
263272deceleration
2642730
@@ -271,9 +280,9 @@ HORIZONTAL
271280
272281SLIDER
27328215
274- 100
283+ 205
275284265
276- 133
285+ 238
277286acceleration
278287acceleration
2792880
@@ -295,7 +304,7 @@ speed
2953040.0
296305300.0
2973060.0
298- 1.1
307+ 1.0
299308true
300309false
301310" " " "
306315
307316BUTTON
30831715
309- 55
318+ 160
31031997
311- 95
320+ 200
312321NIL
313322setup-tf
314323NIL
323332
324333SLIDER
32533415
326- 240
335+ 345
327336265
328- 273
337+ 378
329338discount
330339discount
3313400
@@ -338,14 +347,14 @@ HORIZONTAL
338347
339348SLIDER
34034915
341- 205
350+ 310
342351265
343- 238
352+ 343
344353exploration-rate
345354exploration-rate
3463550
3473561
348- 0.05
357+ 0.01
3493580.01
3503591
351360NIL
3623710.0
363372300.0
3643730.0
365- 10 .0
374+ 20 .0
366375true
367376false
368377" " " "
373382
374383SLIDER
37538415
376- 170
385+ 275
377386265
378- 203
387+ 308
379388stop-penalty
380389stop-penalty
3813900
@@ -388,15 +397,60 @@ HORIZONTAL
388397
389398SWITCH
39039915
391- 275
400+ 380
392401117
393- 308
402+ 413
394403train?
395404train?
3964050
3974061
398407-1000
399408
409+ SLIDER
410+ 15
411+ 50
412+ 265
413+ 83
414+ memory-size
415+ memory-size
416+ 0
417+ 100000
418+ 10000.0
419+ 1000
420+ 1
421+ NIL
422+ HORIZONTAL
423+
424+ SLIDER
425+ 15
426+ 85
427+ 265
428+ 118
429+ batch-size
430+ batch-size
431+ 0
432+ 1024
433+ 128.0
434+ 32
435+ 1
436+ NIL
437+ HORIZONTAL
438+
439+ SLIDER
440+ 15
441+ 120
442+ 265
443+ 153
444+ learning-rate
445+ learning-rate
446+ 0
447+ 0.01
448+ 0.001
449+ 0.0001
450+ 1
451+ NIL
452+ HORIZONTAL
453+
400454@#$#@#$#@
401455## WHAT IS IT?
402456
0 commit comments