File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ [base]
2+ env_name = drmario
3+
4+ [vec]
5+ total_agents = 1536
6+ num_buffers = 4
7+ num_threads = 4
8+
9+ [env]
10+ n_rows = 16
11+ n_cols = 8
12+ n_init_viruses = 4
13+
14+ [policy]
15+ hidden_size = 128
16+ num_layers = 1
17+
18+ [legacy]
19+ torch_deterministic = 1
20+ cpu_offload = 0
21+ compile = 1
22+ compile_fullgraph = 0
23+
24+ [train]
25+ total_timesteps = 500_000_000
26+ learning_rate = 0.001
27+ gamma = 0.99
28+ gae_lambda = 0.95
29+ clip_coef = 0.2
30+ vf_coef = 0.5
31+ ent_coef = 0.01
32+ minibatch_size = 16384
33+ horizon = 128
34+ use_rnn = 0
35+
36+ # copied from Breakout
37+ beta1 = 0.9
38+ beta2 = 0.999
39+ eps = 1e-8
40+ max_grad_norm = 1.0
41+ replay_ratio = 2
42+ vtrace_rho_clip = 2.0
43+ vtrace_c_clip = 2.0
44+ prio_alpha = 0.6
45+ prio_beta0 = 0.9
Original file line number Diff line number Diff line change 1+ #include "drmario.h"
2+
3+ #define OBS_SIZE 133
4+ #define NUM_ATNS 1
5+ #define ACT_SIZES {7}
6+ #define OBS_TENSOR_T FloatTensor
7+
8+ #define Env DrMario
9+ #include "vecenv.h"
10+
11+ void my_init (Env * env , Dict * kwargs ) {
12+ env -> num_agents = 1 ;
13+ env -> n_rows = dict_get (kwargs , "n_rows" )-> value ;
14+ env -> n_cols = dict_get (kwargs , "n_cols" )-> value ;
15+ env -> n_init_viruses = dict_get (kwargs , "n_init_viruses" )-> value ;
16+ c_init (env );
17+ }
18+
19+ void my_log (Log * log , Dict * out ) {
20+ dict_set (out , "perf" , log -> perf );
21+ dict_set (out , "score" , log -> score );
22+ dict_set (out , "episode_return" , log -> episode_return );
23+ dict_set (out , "episode_length" , log -> episode_length );
24+ dict_set (out , "viruses_cleared" , log -> viruses_cleared );
25+ }
You can’t perform that action at this time.
0 commit comments