We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 432643a commit 0f0c407Copy full SHA for 0f0c407
1 file changed
src/ppo/main.clj
@@ -342,3 +342,25 @@
342
'[torch.nn.functional :as F]
343
'[torch.optim :as optim]
344
'[torch.distributions :refer (Beta)])
345
+
346
347
348
349
350
+;; TODO
351
+;;
352
+;; $\hat{A}_{T-1} = -V(S_{T-1}) + r_{T-1} + \gamma V(S_T)$
353
354
+;; $\hat{A}_{T-2} = -V(S_{T-2}) + r_{T-2} + \gamma r_{T-1} + \gamma^2 V(S_T)$
355
356
+;; $\vdots$
357
358
+;; $\hat{A}_0 = -V(S_0) + r_0 + \gamma r_1 + \ldots + \gamma^T V(S_T)$
359
360
+;; $\hat{A}_t = -V(s_t) + r_t + \gamma r_{t+1} + \ldots + \gamma^{T-t+1} r_{T-1} + \gamma^{T-t} V(S_T)$
361
362
+;; $\hat{A}_t = \sum_{l=0}^{T-t-1} (\gamma \lambda)^l \delta_{t+l}$
363
364
+;; $\delta_t = r_t + \gamma V(s_{t+1}) - V(s_t)$
365
366
+;; $\hat{A}_t = \sum_{l=0}^{T-t-1} (\gamma \lambda)^l \left( r_{t+l} + \gamma V(s_{t+l+1}) - V(s_{t+l}) \right)$
0 commit comments