Skip to content

Commit 4080ab6

Browse files
committed
updated gym to latest version. Gymnasium
1 parent 239f839 commit 4080ab6

4 files changed

Lines changed: 18 additions & 21 deletions

File tree

rl_studio/agents/pendulum/inference_ddpg.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import time
33
import random
44

5-
import gym
5+
import gymnasium as gym
66
import matplotlib.pyplot as plt
77
from torch.utils import tensorboard
88
from tqdm import tqdm
@@ -68,11 +68,8 @@ def __init__(self, params):
6868
# # ,random_start_level=self.RANDOM_START_LEVEL, initial_pole_angle=self.INITIAL_POLE_ANGLE,
6969
# # non_recoverable_angle=non_recoverable_angle
7070
# ))
71-
self.env = gym.make(self.env_name)
71+
self.env = gym.make(self.env_name, render_mode="human")
7272
self.RUNS = self.environment_params["runs"]
73-
self.SHOW_EVERY = self.environment_params[
74-
"show_every"
75-
]
7673
self.UPDATE_EVERY = self.environment_params[
7774
"update_every"
7875
] # How often the current progress is recorded
@@ -129,7 +126,8 @@ def main(self):
129126
total_reward_in_epoch = 0
130127

131128
for episode in tqdm(range(self.RUNS)):
132-
state, done = self.env.reset(), False
129+
state, _ = self.env.reset()
130+
done = False
133131
episode_reward = 0
134132
step = 0
135133
while not done:
@@ -140,16 +138,13 @@ def main(self):
140138
# logging.debug("perturbated in step {} with action {}".format(episode_rew, perturbation_action))
141139

142140
action = self.inferencer.inference(state)
143-
new_state, reward, done, _ = self.env.step(action)
141+
new_state, reward, _, done, _ = self.env.step(action)
144142
state = new_state
145143
episode_reward += reward
146144
total_reward_in_epoch += reward
147145

148146
w.add_scalar("reward/episode_reward", episode_reward, global_step=episode)
149147

150-
if episode % self.SHOW_EVERY == 0:
151-
self.env.render()
152-
153148
self.gather_statistics(step, episode_reward)
154149

155150
# monitor progress
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
gym==0.26.2
2+
gymnasium==0.27.0
3+
markdownTable==6.0.0
4+
matplotlib==3.3.2
5+
numpy==1.17.4
6+
torch==1.12.1
7+
tqdm==4.64.0

rl_studio/agents/pendulum/train_ddpg.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import time
33
import random
44

5-
import gym
5+
import gymnasium as gym
66
import matplotlib.pyplot as plt
77
from torch.utils import tensorboard
88
from tqdm import tqdm
@@ -69,9 +69,6 @@ def __init__(self, params):
6969
# ))
7070
self.env = gym.make(self.env_name)
7171
self.RUNS = self.environment_params["runs"]
72-
self.SHOW_EVERY = self.environment_params[
73-
"show_every"
74-
]
7572
self.UPDATE_EVERY = self.environment_params[
7673
"update_every"
7774
] # How often the current progress is recorded
@@ -184,7 +181,8 @@ def main(self):
184181
total_reward_in_epoch = 0
185182

186183
for episode in tqdm(range(self.RUNS)):
187-
state, done = self.env.reset(), False
184+
state, _ = self.env.reset()
185+
done = False
188186
self.actor.reset_noise()
189187
episode_reward = 0
190188
step = 0
@@ -196,7 +194,7 @@ def main(self):
196194
# logging.debug("perturbated in step {} with action {}".format(episode_rew, perturbation_action))
197195

198196
action = self.actor.get_action(state, step)
199-
new_state, reward, done, _ = self.env.step(action)
197+
new_state, reward, _, done, _ = self.env.step(action)
200198
self.memory.push(state, action, reward, new_state, done)
201199

202200
if len(self.memory) > self.batch_size:
@@ -210,9 +208,6 @@ def main(self):
210208
w.add_scalar("loss/actor_loss", actor_loss, global_step=episode)
211209
w.add_scalar("loss/critic_loss", critic_loss, global_step=episode)
212210

213-
if episode % self.SHOW_EVERY == 0:
214-
self.env.render()
215-
216211
self.gather_statistics(actor_loss, step, episode_reward)
217212

218213
# monitor progress
@@ -229,7 +224,7 @@ def main(self):
229224
if self.config["save_model"] and last_average > self.max_avg:
230225
self.max_avg = total_reward_in_epoch / self.UPDATE_EVERY
231226
logging.info(f"Saving model . . .")
232-
utils.save_ddpg_model(self.actor, start_time_format, last_average, self.params)
227+
utils.save_ddpg_model(self.actor, start_time_format, last_average)
233228

234229
if last_average >= self.OBJECTIVE_REWARD:
235230
logging.info("Training objective reached!!")

rl_studio/config/config_pendulum_ddpg.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ environments:
2626
full_experimentation_runs: 0
2727
update_every: 20
2828
show_every: 50
29-
objective_reward: -400
29+
objective_reward: -350
3030
# block_experience_batch: False
3131
block_experience_batch: False
3232
# random_start_level: 0.05

0 commit comments

Comments
 (0)