diff --git a/joyrl/algos/SAC/agent.py b/joyrl/algos/SAC/agent.py index 107f77b..6ffe0be 100644 --- a/joyrl/algos/SAC/agent.py +++ b/joyrl/algos/SAC/agent.py @@ -4,7 +4,9 @@ import torch.nn.functional as F from torch.optim import Adam from torch.distributions import Normal +from common.memories import ReplayBuffer import random +import math import numpy as np LOG_SIG_MAX = 2 @@ -32,6 +34,7 @@ def forward(self, state): x = F.relu(self.linear2(x)) x = self.linear3(x) return x + class QNetwork(nn.Module): def __init__(self, num_inputs, num_actions, hidden_dim): super(QNetwork, self).__init__() @@ -98,12 +101,16 @@ def sample(self, state): normal = Normal(mean, std) x_t = normal.rsample() # for reparameterization trick (mean + std * N(0,1)) y_t = torch.tanh(x_t) + action = y_t * self.action_scale + self.action_bias log_prob = normal.log_prob(x_t) # Enforcing Action Bound + # log_prob -= (2 * (math.log(2) - x_t - F.softplus(-2 * x_t))).sum(1, keepdim=True) + log_prob -= torch.log(self.action_scale * (1 - y_t.pow(2)) + epsilon) log_prob = log_prob.sum(1, keepdim=True) mean = torch.tanh(mean) * self.action_scale + self.action_bias + # print ("action = ", action) return action, log_prob, mean def to(self, device): @@ -151,25 +158,7 @@ def to(self, device): self.action_bias = self.action_bias.to(device) self.noise = self.noise.to(device) return super(DeterministicPolicy, self).to(device) -class ReplayMemory: - def __init__(self, capacity): - self.capacity = capacity - self.buffer = [] - self.position = 0 - - def push(self, state, action, reward, next_state, done): - if len(self.buffer) < self.capacity: - self.buffer.append(None) - self.buffer[self.position] = (state, action, reward, next_state, done) - self.position = (self.position + 1) % self.capacity - - def sample(self, batch_size): - batch = random.sample(self.buffer, batch_size) - state, action, reward, next_state, done = map(np.stack, zip(*batch)) - return state, action, reward, next_state, done - - def __len__(self): - return len(self.buffer) + class Agent: def __init__(self,cfg) -> None: self.n_states = cfg.n_states @@ -187,13 +176,14 @@ def __init__(self,cfg) -> None: self.target_update_fre = cfg.target_update_fre self.automatic_entropy_tuning = cfg.automatic_entropy_tuning self.batch_size = cfg.batch_size - self.memory = ReplayMemory(cfg.buffer_size) + self.memory = ReplayBuffer(cfg.buffer_size) self.device = torch.device(cfg.device) self.critic = QNetwork(cfg.n_states,cfg.n_actions, cfg.hidden_dim).to(device=self.device) self.critic_optim = Adam(self.critic.parameters(), lr=cfg.lr) self.critic_target = QNetwork(cfg.n_states, cfg.n_actions, cfg.hidden_dim).to(self.device) for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()): target_param.data.copy_(param.data) + if cfg.policy_type == "Gaussian": # Target Entropy = −dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper if self.automatic_entropy_tuning is True: @@ -227,18 +217,20 @@ def update(self): return for i in range(self.n_epochs): self.update_count += 1 - state_batch, action_batch, reward_batch, next_state_batch, mask_batch = self.memory.sample(batch_size=self.batch_size) + state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(batch_size=self.batch_size) state_batch = torch.FloatTensor(state_batch).to(self.device) next_state_batch = torch.FloatTensor(next_state_batch).to(self.device) action_batch = torch.FloatTensor(action_batch).to(self.device) reward_batch = torch.FloatTensor(reward_batch).to(self.device).unsqueeze(1) - mask_batch = torch.FloatTensor(mask_batch).to(self.device).unsqueeze(1) + done_batch = torch.FloatTensor(done_batch).to(self.device).unsqueeze(1) + # print ("done_batch = ", done_batch) with torch.no_grad(): next_state_action, next_state_log_pi, _ = self.policy.sample(next_state_batch) qf1_next_target, qf2_next_target = self.critic_target(next_state_batch, next_state_action) min_qf_next_target = torch.min(qf1_next_target, qf2_next_target) - self.alpha * next_state_log_pi - next_q_value = reward_batch + mask_batch * self.gamma * (min_qf_next_target) + next_q_value = reward_batch + (1 - done_batch) * self.gamma * (min_qf_next_target) + qf1, qf2 = self.critic(state_batch, action_batch) # Two Q-functions to mitigate positive bias in the policy improvement step qf1_loss = F.mse_loss(qf1, next_q_value) # JQ = 𝔼(st,at)~D[0.5(Q1(st,at) - r(st,at) - γ(𝔼st+1~p[V(st+1)]))^2] qf2_loss = F.mse_loss(qf2, next_q_value) # JQ = 𝔼(st,at)~D[0.5(Q1(st,at) - r(st,at) - γ(𝔼st+1~p[V(st+1)]))^2] @@ -246,19 +238,23 @@ def update(self): self.critic_optim.zero_grad() qf_loss.backward() + for param in self.critic.parameters(): + param.grad.data.clamp_(-1, 1) self.critic_optim.step() - pi, log_pi, _ = self.policy.sample(state_batch) + pi, log_pi, _ = self.policy.sample(state_batch) qf1_pi, qf2_pi = self.critic(state_batch, pi) min_qf_pi = torch.min(qf1_pi, qf2_pi) - policy_loss = ((self.alpha * log_pi) - min_qf_pi).mean() # Jπ = 𝔼st∼D,εt∼N[α * logπ(f(εt;st)|st) − Q(st,f(εt;st))] - self.policy_optim.zero_grad() policy_loss.backward() + for param in self.policy.parameters(): + param.grad.data.clamp_(-1, 1) self.policy_optim.step() + + if self.automatic_entropy_tuning: alpha_loss = -(self.log_alpha * (log_pi + self.target_entropy).detach()).mean() @@ -289,7 +285,7 @@ def save_model(self, fpath): def load_model(self, fpath): - checkpoint = torch.load(fpath, map_location=self.device) + checkpoint = torch.load(f"{fpath}/checkpoint.pt", map_location=self.device) self.policy.load_state_dict(checkpoint['policy_state_dict']) self.critic.load_state_dict(checkpoint['critic_state_dict']) self.critic_target.load_state_dict(checkpoint['critic_target_state_dict']) diff --git a/joyrl/algos/SAC/config.py b/joyrl/algos/SAC/config.py index 4d8bd0e..2831171 100644 --- a/joyrl/algos/SAC/config.py +++ b/joyrl/algos/SAC/config.py @@ -1,13 +1,13 @@ class AlgoConfig: def __init__(self) -> None: self.policy_type = 'Gaussian' # policy type - self.lr = 3e-4 # learning rate + self.lr = 1e-3 # learning rate # 3e-4 self.gamma = 0.99 # discount factor self.tau = 0.005 # soft update factor - self.alpha = 0.2 # Temperature parameter α determines the relative importance of the entropy term against the reward + self.alpha = 0.1 # Temperature parameter α determines the relative importance of the entropy term against the reward # 0.1 self.automatic_entropy_tuning = False # automatically adjust α - self.batch_size = 256 # batch size - self.hidden_dim = 256 # hidden dimension + self.batch_size = 64 # batch size # 256 + self.hidden_dim = 64 # hidden dimension # 256 self.n_epochs = 1 # number of epochs self.start_steps = 10000 # number of random steps for exploration self.target_update_fre = 1 # interval for updating the target network diff --git a/joyrl/algos/SAC_D/agent.py b/joyrl/algos/SAC_D/agent.py new file mode 100644 index 0000000..48c6ab8 --- /dev/null +++ b/joyrl/algos/SAC_D/agent.py @@ -0,0 +1,201 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.optim import Adam +from torch.distributions import Normal +from common.memories import ReplayBuffer +import random +import math +import numpy as np + +# Initialize Policy weights +def weights_init_(m): + if isinstance(m, nn.Linear): + torch.nn.init.xavier_uniform_(m.weight, gain=1) + torch.nn.init.constant_(m.bias, 0) + +class QNetwork(nn.Module): + def __init__(self, num_inputs, num_actions, hidden_dim): + super(QNetwork, self).__init__() + + # Q1 architecture + self.linear1 = nn.Linear(num_inputs, hidden_dim) + self.linear2 = nn.Linear(hidden_dim, hidden_dim) + self.linear3 = nn.Linear(hidden_dim, num_actions) + + # Q2 architecture + self.linear4 = nn.Linear(num_inputs, hidden_dim) + self.linear5 = nn.Linear(hidden_dim, hidden_dim) + self.linear6 = nn.Linear(hidden_dim, num_actions) + + self.apply(weights_init_) + + def forward(self, state): + xu = state + + x1 = F.relu(self.linear1(xu)) + x1 = F.relu(self.linear2(x1)) + x1 = self.linear3(x1) + + x2 = F.relu(self.linear4(xu)) + x2 = F.relu(self.linear5(x2)) + x2 = self.linear6(x2) + + return x1, x2 + + +class PolicyNet(nn.Module): + def __init__(self, num_inputs, num_actions, hidden_dim): + super(PolicyNet, self).__init__() + + self.linear1 = nn.Linear(num_inputs, hidden_dim) + self.linear2 = nn.Linear(hidden_dim, hidden_dim) + self.linear3 = nn.Linear(hidden_dim, num_actions) + + self.apply(weights_init_) + + + def forward(self, state): + x = F.relu(self.linear1(state)) + x = F.relu(self.linear2(x)) + x = self.linear3(x) + + probs = F.softmax(x, -1) + z = probs == 0.0 + z = z.float() * 1e-8 + return x, probs + z + + +class Agent: + def __init__(self,cfg) -> None: + self.n_states = cfg.n_states + self.n_actions = cfg.n_actions + self.action_space = cfg.action_space + self.sample_count = 0 + self.update_count = 0 + self.gamma = cfg.gamma + self.tau = cfg.tau + self.alpha = cfg.alpha + self.n_epochs = cfg.n_epochs + self.target_update = cfg.target_update + self.automatic_entropy_tuning = cfg.automatic_entropy_tuning + self.batch_size = cfg.batch_size + self.memory = ReplayBuffer(cfg.buffer_size) + self.device = torch.device(cfg.device) + self.critic = QNetwork(cfg.n_states,cfg.n_actions, cfg.hidden_dim).to(device=self.device) + self.critic_optim = Adam(self.critic.parameters(), lr=cfg.lr) + self.critic_target = QNetwork(cfg.n_states, cfg.n_actions, cfg.hidden_dim).to(self.device) + + self.target_entropy = 0.98 * (-np.log(1 / self.n_actions)) + self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) + self.alpha = self.log_alpha.exp() + self.alpha_optim = Adam([self.log_alpha], lr=cfg.lr) + + self.epsilon = cfg.epsilon_start + self.epsilon_start = cfg.epsilon_start + self.epsilon_end = cfg.epsilon_end + self.epsilon_decay = cfg.epsilon_decay + + for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()): + target_param.data.copy_(param.data) + + self.policy = PolicyNet(cfg.n_states, cfg.n_actions, cfg.hidden_dim).to(self.device) + self.policy_optim = Adam(self.policy.parameters(), lr=cfg.lr) + + def sample_action(self,state): + self.sample_count+=1 + self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * \ + math.exp(-1. * self.sample_count / self.epsilon_decay) + if random.random() < self.epsilon: + action = random.randrange(self.n_actions) + else: + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(0) + q_values, _ = self.policy(state) + action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value + return action + + def predict_action(self,state): + state = torch.tensor(state, device=self.device, dtype=torch.float32).unsqueeze(0) + q_values, _ = self.policy(state) + action = q_values.max(1)[1].item() # choose action corresponding to the maximum q value + return action # .detach().cpu().numpy()[0] + def update(self): + if len(self.memory) < self.batch_size: # when transitions in memory donot meet a batch, not update + return + for i in range(self.n_epochs): + self.update_count += 1 + state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.memory.sample(batch_size=self.batch_size) + + state_batch = torch.tensor(state_batch, device=self.device, dtype=torch.float) + action_batch = torch.tensor(action_batch, device=self.device).unsqueeze(1) + reward_batch = torch.tensor(reward_batch, device=self.device, dtype=torch.float).unsqueeze(1) + next_state_batch = torch.tensor(next_state_batch, device=self.device, dtype=torch.float) + done_batch = torch.tensor(done_batch, device=self.device, dtype=torch.float).unsqueeze(1) + + with torch.no_grad(): + next_state_action, next_probs = self.policy(next_state_batch) + next_log_probs = torch.log(next_probs) + + qf1_next_target, qf2_next_target = self.critic_target(next_state_batch) + min_qf_next_target = (next_probs * (torch.min(qf1_next_target, qf2_next_target) - self.alpha * next_log_probs)).sum(-1).unsqueeze(-1) + next_q_value = reward_batch + (1 - done_batch) * self.gamma * (min_qf_next_target) + + qf1, qf2 = self.critic(state_batch) # Two Q-functions to mitigate positive bias in the policy improvement step + qf1 = qf1.gather(1, action_batch) ; qf2 = qf2.gather(1, action_batch) + + qf1_loss = F.mse_loss(qf1, next_q_value) # JQ = 𝔼(st,at)~D[0.5(Q1(st,at) - r(st,at) - γ(𝔼st+1~p[V(st+1)]))^2] + qf2_loss = F.mse_loss(qf2, next_q_value) # JQ = 𝔼(st,at)~D[0.5(Q1(st,at) - r(st,at) - γ(𝔼st+1~p[V(st+1)]))^2] + qf_loss = qf1_loss + qf2_loss + + self.critic_optim.zero_grad() + qf_loss.backward() + for param in self.critic.parameters(): + param.grad.data.clamp_(-1, 1) + self.critic_optim.step() + + + pi, probs = self.policy(state_batch) + log_probs = torch.log(probs) + with torch.no_grad(): + qf1_pi, qf2_pi = self.critic(state_batch) + min_qf_pi = torch.min(qf1_pi, qf2_pi) + policy_loss = (probs * ((self.alpha * log_probs) - min_qf_pi)).mean() # Jπ = 𝔼st∼D,εt∼N[α * logπ(f(εt;st)|st) − Q(st,f(εt;st))] + + self.policy_optim.zero_grad() + policy_loss.backward() + for param in self.policy.parameters(): + param.grad.data.clamp_(-1, 1) + self.policy_optim.step() + + log_probs = (probs * log_probs).sum(-1) + alpha_loss = -(self.log_alpha * (log_probs + self.target_entropy).detach()).mean() + self.alpha_optim.zero_grad() + alpha_loss.backward() + self.alpha_optim.step() + + self.alpha = self.log_alpha.exp() + + # hard update + if self.update_count % self.target_update == 0: + for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()): + target_param.data.copy_( param.data ) + + def save_model(self, fpath): + from pathlib import Path + # create path + Path(fpath).mkdir(parents=True, exist_ok=True) + + torch.save({'policy_state_dict': self.policy.state_dict(), + 'critic_state_dict': self.critic.state_dict(), + 'critic_target_state_dict': self.critic_target.state_dict(), + 'critic_optimizer_state_dict': self.critic_optim.state_dict(), + 'policy_optimizer_state_dict': self.policy_optim.state_dict()}, f"{fpath}/checkpoint.pt") + + + def load_model(self, fpath): + checkpoint = torch.load(f"{fpath}/checkpoint.pt", map_location=self.device) + self.policy.load_state_dict(checkpoint['policy_state_dict']) + self.critic.load_state_dict(checkpoint['critic_state_dict']) + self.critic_target.load_state_dict(checkpoint['critic_target_state_dict']) + self.critic_optim.load_state_dict(checkpoint['critic_optimizer_state_dict']) + self.policy_optim.load_state_dict(checkpoint['policy_optimizer_state_dict']) \ No newline at end of file diff --git a/joyrl/algos/SAC_D/config.py b/joyrl/algos/SAC_D/config.py new file mode 100644 index 0000000..dd29ebb --- /dev/null +++ b/joyrl/algos/SAC_D/config.py @@ -0,0 +1,15 @@ +class AlgoConfig: + def __init__(self) -> None: + self.epsilon_start = 0.95 # epsilon start value + self.epsilon_end = 0.01 # epsilon end value + self.epsilon_decay = 500 # epsilon decay rate + self.lr = 1e-3 # learning rate + self.gamma = 0.99 # discount factor + self.tau = 0.005 # soft update factor + self.alpha = 0.1 # Temperature parameter α determines the relative importance of the entropy term against the reward # 0.1 + self.automatic_entropy_tuning = False # automatically adjust α + self.batch_size = 64 # batch size # 256 + self.hidden_dim = 256 # hidden dimension # 256 + self.n_epochs = 1 # number of epochs + self.target_update = 1 # interval for updating the target network + self.buffer_size = 1000000 # replay buffer size \ No newline at end of file diff --git a/joyrl/algos/SAC_D/trainer.py b/joyrl/algos/SAC_D/trainer.py new file mode 100644 index 0000000..57df82f --- /dev/null +++ b/joyrl/algos/SAC_D/trainer.py @@ -0,0 +1,31 @@ +class Trainer: + def __init__(self) -> None: + pass + def train_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + ep_step += 1 + action = agent.sample_action(state) # sample action + next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym + agent.memory.push(state, action, reward,next_state, terminated) # save transitions + agent.update() # update agent + state = next_state # update next state for env + ep_reward += reward # + if terminated: + break + return agent,ep_reward,ep_step + def test_one_episode(self, env, agent, cfg): + ep_reward = 0 # reward per episode + ep_step = 0 + state = env.reset() # reset and obtain initial state + for _ in range(cfg.max_steps): + ep_step += 1 + action = agent.predict_action(state) # sample action + next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym + state = next_state # update next state for env + ep_reward += reward # + if terminated: + break + return agent,ep_reward,ep_step \ No newline at end of file diff --git a/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/config.yaml b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/config.yaml new file mode 100644 index 0000000..f0beb45 --- /dev/null +++ b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/config.yaml @@ -0,0 +1,32 @@ +general_cfg: + algo_name: SAC_D + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: true + load_path: Train_CartPole-v1_SAC_D_20230302-175416 + max_steps: 200 + mode: test + new_step_api: true + render: false + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 200 + wrapper: null +algo_cfg: + alpha: 0.2 + automatic_entropy_tuning: false + batch_size: 64 + buffer_size: 100000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + hidden_dim: 256 + lr: 0.0001 + n_epochs: 1 + target_update: 1 + tau: 0.005 diff --git a/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/logs/log.txt b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/logs/log.txt new file mode 100644 index 0000000..9c415a3 --- /dev/null +++ b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/logs/log.txt @@ -0,0 +1,53 @@ +2023-03-02 17:57:53 - r - INFO: - Hyperparameters: +2023-03-02 17:57:53 - r - INFO: - ================================================================================ +2023-03-02 17:57:53 - r - INFO: - Name Value Type +2023-03-02 17:57:53 - r - INFO: - env_name CartPole-v1 +2023-03-02 17:57:53 - r - INFO: - new_step_api 1 +2023-03-02 17:57:53 - r - INFO: - wrapper None +2023-03-02 17:57:53 - r - INFO: - render 0 +2023-03-02 17:57:53 - r - INFO: - algo_name SAC_D +2023-03-02 17:57:53 - r - INFO: - mode test +2023-03-02 17:57:53 - r - INFO: - seed 1 +2023-03-02 17:57:53 - r - INFO: - device cuda +2023-03-02 17:57:53 - r - INFO: - train_eps 200 +2023-03-02 17:57:53 - r - INFO: - test_eps 10 +2023-03-02 17:57:53 - r - INFO: - eval_eps 10 +2023-03-02 17:57:53 - r - INFO: - eval_per_episode 5 +2023-03-02 17:57:53 - r - INFO: - max_steps 200 +2023-03-02 17:57:53 - r - INFO: - load_checkpoint 1 +2023-03-02 17:57:53 - r - INFO: - load_path Train_CartPole-v1_SAC_D_20230302-175416 +2023-03-02 17:57:53 - r - INFO: - show_fig 0 +2023-03-02 17:57:53 - r - INFO: - save_fig 1 +2023-03-02 17:57:53 - r - INFO: - epsilon_start 0.95 +2023-03-02 17:57:53 - r - INFO: - epsilon_end 0.01 +2023-03-02 17:57:53 - r - INFO: - epsilon_decay 500 +2023-03-02 17:57:53 - r - INFO: - lr 0.0001 +2023-03-02 17:57:53 - r - INFO: - gamma 0.95 +2023-03-02 17:57:53 - r - INFO: - tau 0.005 +2023-03-02 17:57:53 - r - INFO: - alpha 0.2 +2023-03-02 17:57:53 - r - INFO: - automatic_entropy_tuning 0 +2023-03-02 17:57:53 - r - INFO: - batch_size 64 +2023-03-02 17:57:53 - r - INFO: - hidden_dim 256 +2023-03-02 17:57:53 - r - INFO: - n_epochs 1 +2023-03-02 17:57:53 - r - INFO: - target_update 1 +2023-03-02 17:57:53 - r - INFO: - buffer_size 100000 +2023-03-02 17:57:53 - r - INFO: - task_dir /home/dingli/rl-tutorials/joyrl/tasks/Test_CartPole-v1_SAC_D_20230302-175753 +2023-03-02 17:57:53 - r - INFO: - model_dir /home/dingli/rl-tutorials/joyrl/tasks/Test_CartPole-v1_SAC_D_20230302-175753/models +2023-03-02 17:57:53 - r - INFO: - res_dir /home/dingli/rl-tutorials/joyrl/tasks/Test_CartPole-v1_SAC_D_20230302-175753/results +2023-03-02 17:57:53 - r - INFO: - log_dir /home/dingli/rl-tutorials/joyrl/tasks/Test_CartPole-v1_SAC_D_20230302-175753/logs +2023-03-02 17:57:53 - r - INFO: - traj_dir /home/dingli/rl-tutorials/joyrl/tasks/Test_CartPole-v1_SAC_D_20230302-175753/traj +2023-03-02 17:57:53 - r - INFO: - ================================================================================ +2023-03-02 17:57:53 - r - INFO: - n_states: 4, n_actions: 2 +2023-03-02 17:57:55 - r - INFO: - Start testing! +2023-03-02 17:57:55 - r - INFO: - Env: CartPole-v1, Algorithm: SAC_D, Device: cuda +2023-03-02 17:57:56 - r - INFO: - Episode: 1/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 2/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 3/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 4/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 5/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 6/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 7/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 8/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 9/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Episode: 10/10, Reward: 200.000, Step: 200 +2023-03-02 17:57:56 - r - INFO: - Finish testing! diff --git a/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/models/checkpoint.pt b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/models/checkpoint.pt new file mode 100644 index 0000000..af3ab3a Binary files /dev/null and b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/models/checkpoint.pt differ diff --git a/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/results/learning_curve.png b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/results/learning_curve.png new file mode 100644 index 0000000..90d54a2 Binary files /dev/null and b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/results/learning_curve.png differ diff --git a/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/results/res.csv b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/results/res.csv new file mode 100644 index 0000000..cbbcf2e --- /dev/null +++ b/joyrl/benchmarks/Test_CartPole-v1_SAC_D_20230302-175753/results/res.csv @@ -0,0 +1,11 @@ +episodes,rewards,steps +0,200.0,200 +1,200.0,200 +2,200.0,200 +3,200.0,200 +4,200.0,200 +5,200.0,200 +6,200.0,200 +7,200.0,200 +8,200.0,200 +9,200.0,200 diff --git a/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/config.yaml b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/config.yaml new file mode 100644 index 0000000..ab366c6 --- /dev/null +++ b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/config.yaml @@ -0,0 +1,32 @@ +general_cfg: + algo_name: SAC_D + device: cuda + env_name: CartPole-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + mode: train + new_step_api: true + render: false + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 200 + wrapper: null +algo_cfg: + alpha: 0.2 + automatic_entropy_tuning: false + batch_size: 64 + buffer_size: 1000000 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + hidden_dim: 256 + lr: 0.0001 + n_epochs: 1 + target_update: 1 + tau: 0.005 diff --git a/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/logs/log.txt b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/logs/log.txt new file mode 100644 index 0000000..6d6962a --- /dev/null +++ b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/logs/log.txt @@ -0,0 +1,269 @@ +2023-03-02 17:54:16 - r - INFO: - Hyperparameters: +2023-03-02 17:54:16 - r - INFO: - ================================================================================ +2023-03-02 17:54:16 - r - INFO: - Name Value Type +2023-03-02 17:54:16 - r - INFO: - env_name CartPole-v1 +2023-03-02 17:54:16 - r - INFO: - new_step_api 1 +2023-03-02 17:54:16 - r - INFO: - wrapper None +2023-03-02 17:54:16 - r - INFO: - render 0 +2023-03-02 17:54:16 - r - INFO: - algo_name SAC_D +2023-03-02 17:54:16 - r - INFO: - mode train +2023-03-02 17:54:16 - r - INFO: - seed 1 +2023-03-02 17:54:16 - r - INFO: - device cuda +2023-03-02 17:54:16 - r - INFO: - train_eps 200 +2023-03-02 17:54:16 - r - INFO: - test_eps 10 +2023-03-02 17:54:16 - r - INFO: - eval_eps 10 +2023-03-02 17:54:16 - r - INFO: - eval_per_episode 5 +2023-03-02 17:54:16 - r - INFO: - max_steps 200 +2023-03-02 17:54:16 - r - INFO: - load_checkpoint 0 +2023-03-02 17:54:16 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 +2023-03-02 17:54:16 - r - INFO: - show_fig 0 +2023-03-02 17:54:16 - r - INFO: - save_fig 1 +2023-03-02 17:54:16 - r - INFO: - epsilon_start 0.95 +2023-03-02 17:54:16 - r - INFO: - epsilon_end 0.01 +2023-03-02 17:54:16 - r - INFO: - epsilon_decay 500 +2023-03-02 17:54:16 - r - INFO: - lr 0.0001 +2023-03-02 17:54:16 - r - INFO: - gamma 0.95 +2023-03-02 17:54:16 - r - INFO: - tau 0.005 +2023-03-02 17:54:16 - r - INFO: - alpha 0.2 +2023-03-02 17:54:16 - r - INFO: - automatic_entropy_tuning 0 +2023-03-02 17:54:16 - r - INFO: - batch_size 64 +2023-03-02 17:54:16 - r - INFO: - hidden_dim 256 +2023-03-02 17:54:16 - r - INFO: - n_epochs 1 +2023-03-02 17:54:16 - r - INFO: - target_update 1 +2023-03-02 17:54:16 - r - INFO: - buffer_size 1000000 +2023-03-02 17:54:16 - r - INFO: - task_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_CartPole-v1_SAC_D_20230302-175416 +2023-03-02 17:54:16 - r - INFO: - model_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_CartPole-v1_SAC_D_20230302-175416/models +2023-03-02 17:54:16 - r - INFO: - res_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_CartPole-v1_SAC_D_20230302-175416/results +2023-03-02 17:54:16 - r - INFO: - log_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_CartPole-v1_SAC_D_20230302-175416/logs +2023-03-02 17:54:16 - r - INFO: - traj_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_CartPole-v1_SAC_D_20230302-175416/traj +2023-03-02 17:54:16 - r - INFO: - ================================================================================ +2023-03-02 17:54:16 - r - INFO: - n_states: 4, n_actions: 2 +2023-03-02 17:54:18 - r - INFO: - Start training! +2023-03-02 17:54:18 - r - INFO: - Env: CartPole-v1, Algorithm: SAC_D, Device: cuda +2023-03-02 17:54:18 - r - INFO: - Episode: 1/200, Reward: 17.000, Step: 17 +2023-03-02 17:54:18 - r - INFO: - Episode: 2/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:18 - r - INFO: - Episode: 3/200, Reward: 37.000, Step: 37 +2023-03-02 17:54:18 - r - INFO: - Episode: 4/200, Reward: 23.000, Step: 23 +2023-03-02 17:54:18 - r - INFO: - Episode: 5/200, Reward: 15.000, Step: 15 +2023-03-02 17:54:18 - r - INFO: - Current episode 5 has the best eval reward: 9.500 +2023-03-02 17:54:18 - r - INFO: - Episode: 6/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:18 - r - INFO: - Episode: 7/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:19 - r - INFO: - Episode: 8/200, Reward: 13.000, Step: 13 +2023-03-02 17:54:19 - r - INFO: - Episode: 9/200, Reward: 13.000, Step: 13 +2023-03-02 17:54:19 - r - INFO: - Episode: 10/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:19 - r - INFO: - Episode: 11/200, Reward: 14.000, Step: 14 +2023-03-02 17:54:19 - r - INFO: - Episode: 12/200, Reward: 17.000, Step: 17 +2023-03-02 17:54:19 - r - INFO: - Episode: 13/200, Reward: 14.000, Step: 14 +2023-03-02 17:54:19 - r - INFO: - Episode: 14/200, Reward: 25.000, Step: 25 +2023-03-02 17:54:19 - r - INFO: - Episode: 15/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:19 - r - INFO: - Episode: 16/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:19 - r - INFO: - Episode: 17/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:20 - r - INFO: - Episode: 18/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:20 - r - INFO: - Episode: 19/200, Reward: 15.000, Step: 15 +2023-03-02 17:54:20 - r - INFO: - Episode: 20/200, Reward: 15.000, Step: 15 +2023-03-02 17:54:20 - r - INFO: - Current episode 20 has the best eval reward: 9.500 +2023-03-02 17:54:20 - r - INFO: - Episode: 21/200, Reward: 16.000, Step: 16 +2023-03-02 17:54:20 - r - INFO: - Episode: 22/200, Reward: 14.000, Step: 14 +2023-03-02 17:54:20 - r - INFO: - Episode: 23/200, Reward: 22.000, Step: 22 +2023-03-02 17:54:20 - r - INFO: - Episode: 24/200, Reward: 14.000, Step: 14 +2023-03-02 17:54:20 - r - INFO: - Episode: 25/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:20 - r - INFO: - Episode: 26/200, Reward: 13.000, Step: 13 +2023-03-02 17:54:21 - r - INFO: - Episode: 27/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:21 - r - INFO: - Episode: 28/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:21 - r - INFO: - Episode: 29/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:21 - r - INFO: - Episode: 30/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:21 - r - INFO: - Episode: 31/200, Reward: 13.000, Step: 13 +2023-03-02 17:54:21 - r - INFO: - Episode: 32/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:21 - r - INFO: - Episode: 33/200, Reward: 14.000, Step: 14 +2023-03-02 17:54:21 - r - INFO: - Episode: 34/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:21 - r - INFO: - Episode: 35/200, Reward: 13.000, Step: 13 +2023-03-02 17:54:21 - r - INFO: - Current episode 35 has the best eval reward: 9.500 +2023-03-02 17:54:21 - r - INFO: - Episode: 36/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:21 - r - INFO: - Episode: 37/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:21 - r - INFO: - Episode: 38/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:22 - r - INFO: - Episode: 39/200, Reward: 8.000, Step: 8 +2023-03-02 17:54:22 - r - INFO: - Episode: 40/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:22 - r - INFO: - Current episode 40 has the best eval reward: 9.600 +2023-03-02 17:54:22 - r - INFO: - Episode: 41/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:22 - r - INFO: - Episode: 42/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:22 - r - INFO: - Episode: 43/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:22 - r - INFO: - Episode: 44/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:22 - r - INFO: - Episode: 45/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:22 - r - INFO: - Episode: 46/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:22 - r - INFO: - Episode: 47/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:22 - r - INFO: - Episode: 48/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:22 - r - INFO: - Episode: 49/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:23 - r - INFO: - Episode: 50/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:23 - r - INFO: - Current episode 50 has the best eval reward: 9.700 +2023-03-02 17:54:23 - r - INFO: - Episode: 51/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:23 - r - INFO: - Episode: 52/200, Reward: 8.000, Step: 8 +2023-03-02 17:54:23 - r - INFO: - Episode: 53/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:23 - r - INFO: - Episode: 54/200, Reward: 8.000, Step: 8 +2023-03-02 17:54:23 - r - INFO: - Episode: 55/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:23 - r - INFO: - Current episode 55 has the best eval reward: 11.200 +2023-03-02 17:54:23 - r - INFO: - Episode: 56/200, Reward: 14.000, Step: 14 +2023-03-02 17:54:23 - r - INFO: - Episode: 57/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:23 - r - INFO: - Episode: 58/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:23 - r - INFO: - Episode: 59/200, Reward: 13.000, Step: 13 +2023-03-02 17:54:23 - r - INFO: - Episode: 60/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:24 - r - INFO: - Episode: 61/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:24 - r - INFO: - Episode: 62/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:24 - r - INFO: - Episode: 63/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:24 - r - INFO: - Episode: 64/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:24 - r - INFO: - Episode: 65/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:24 - r - INFO: - Episode: 66/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:24 - r - INFO: - Episode: 67/200, Reward: 8.000, Step: 8 +2023-03-02 17:54:24 - r - INFO: - Episode: 68/200, Reward: 15.000, Step: 15 +2023-03-02 17:54:24 - r - INFO: - Episode: 69/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:24 - r - INFO: - Episode: 70/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:24 - r - INFO: - Episode: 71/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:24 - r - INFO: - Episode: 72/200, Reward: 11.000, Step: 11 +2023-03-02 17:54:25 - r - INFO: - Episode: 73/200, Reward: 12.000, Step: 12 +2023-03-02 17:54:25 - r - INFO: - Episode: 74/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:25 - r - INFO: - Episode: 75/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:25 - r - INFO: - Episode: 76/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:25 - r - INFO: - Episode: 77/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:25 - r - INFO: - Episode: 78/200, Reward: 10.000, Step: 10 +2023-03-02 17:54:25 - r - INFO: - Episode: 79/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:25 - r - INFO: - Episode: 80/200, Reward: 9.000, Step: 9 +2023-03-02 17:54:25 - r - INFO: - Current episode 80 has the best eval reward: 19.200 +2023-03-02 17:54:25 - r - INFO: - Episode: 81/200, Reward: 25.000, Step: 25 +2023-03-02 17:54:25 - r - INFO: - Episode: 82/200, Reward: 21.000, Step: 21 +2023-03-02 17:54:26 - r - INFO: - Episode: 83/200, Reward: 26.000, Step: 26 +2023-03-02 17:54:26 - r - INFO: - Episode: 84/200, Reward: 21.000, Step: 21 +2023-03-02 17:54:26 - r - INFO: - Episode: 85/200, Reward: 15.000, Step: 15 +2023-03-02 17:54:26 - r - INFO: - Episode: 86/200, Reward: 21.000, Step: 21 +2023-03-02 17:54:26 - r - INFO: - Episode: 87/200, Reward: 17.000, Step: 17 +2023-03-02 17:54:27 - r - INFO: - Episode: 88/200, Reward: 28.000, Step: 28 +2023-03-02 17:54:27 - r - INFO: - Episode: 89/200, Reward: 16.000, Step: 16 +2023-03-02 17:54:27 - r - INFO: - Episode: 90/200, Reward: 19.000, Step: 19 +2023-03-02 17:54:27 - r - INFO: - Current episode 90 has the best eval reward: 20.400 +2023-03-02 17:54:27 - r - INFO: - Episode: 91/200, Reward: 19.000, Step: 19 +2023-03-02 17:54:27 - r - INFO: - Episode: 92/200, Reward: 18.000, Step: 18 +2023-03-02 17:54:27 - r - INFO: - Episode: 93/200, Reward: 24.000, Step: 24 +2023-03-02 17:54:28 - r - INFO: - Episode: 94/200, Reward: 61.000, Step: 61 +2023-03-02 17:54:28 - r - INFO: - Episode: 95/200, Reward: 33.000, Step: 33 +2023-03-02 17:54:28 - r - INFO: - Current episode 95 has the best eval reward: 52.500 +2023-03-02 17:54:29 - r - INFO: - Episode: 96/200, Reward: 35.000, Step: 35 +2023-03-02 17:54:29 - r - INFO: - Episode: 97/200, Reward: 72.000, Step: 72 +2023-03-02 17:54:29 - r - INFO: - Episode: 98/200, Reward: 25.000, Step: 25 +2023-03-02 17:54:30 - r - INFO: - Episode: 99/200, Reward: 84.000, Step: 84 +2023-03-02 17:54:31 - r - INFO: - Episode: 100/200, Reward: 47.000, Step: 47 +2023-03-02 17:54:31 - r - INFO: - Episode: 101/200, Reward: 36.000, Step: 36 +2023-03-02 17:54:31 - r - INFO: - Episode: 102/200, Reward: 39.000, Step: 39 +2023-03-02 17:54:32 - r - INFO: - Episode: 103/200, Reward: 40.000, Step: 40 +2023-03-02 17:54:32 - r - INFO: - Episode: 104/200, Reward: 69.000, Step: 69 +2023-03-02 17:54:32 - r - INFO: - Episode: 105/200, Reward: 43.000, Step: 43 +2023-03-02 17:54:33 - r - INFO: - Current episode 105 has the best eval reward: 62.000 +2023-03-02 17:54:33 - r - INFO: - Episode: 106/200, Reward: 53.000, Step: 53 +2023-03-02 17:54:33 - r - INFO: - Episode: 107/200, Reward: 43.000, Step: 43 +2023-03-02 17:54:34 - r - INFO: - Episode: 108/200, Reward: 92.000, Step: 92 +2023-03-02 17:54:35 - r - INFO: - Episode: 109/200, Reward: 92.000, Step: 92 +2023-03-02 17:54:35 - r - INFO: - Episode: 110/200, Reward: 30.000, Step: 30 +2023-03-02 17:54:36 - r - INFO: - Episode: 111/200, Reward: 56.000, Step: 56 +2023-03-02 17:54:36 - r - INFO: - Episode: 112/200, Reward: 50.000, Step: 50 +2023-03-02 17:54:37 - r - INFO: - Episode: 113/200, Reward: 54.000, Step: 54 +2023-03-02 17:54:37 - r - INFO: - Episode: 114/200, Reward: 42.000, Step: 42 +2023-03-02 17:54:38 - r - INFO: - Episode: 115/200, Reward: 139.000, Step: 139 +2023-03-02 17:54:38 - r - INFO: - Current episode 115 has the best eval reward: 128.100 +2023-03-02 17:54:39 - r - INFO: - Episode: 116/200, Reward: 82.000, Step: 82 +2023-03-02 17:54:40 - r - INFO: - Episode: 117/200, Reward: 75.000, Step: 75 +2023-03-02 17:54:40 - r - INFO: - Episode: 118/200, Reward: 68.000, Step: 68 +2023-03-02 17:54:41 - r - INFO: - Episode: 119/200, Reward: 89.000, Step: 89 +2023-03-02 17:54:41 - r - INFO: - Episode: 120/200, Reward: 67.000, Step: 67 +2023-03-02 17:54:42 - r - INFO: - Episode: 121/200, Reward: 94.000, Step: 94 +2023-03-02 17:54:43 - r - INFO: - Episode: 122/200, Reward: 92.000, Step: 92 +2023-03-02 17:54:45 - r - INFO: - Episode: 123/200, Reward: 190.000, Step: 190 +2023-03-02 17:54:45 - r - INFO: - Episode: 124/200, Reward: 78.000, Step: 78 +2023-03-02 17:54:46 - r - INFO: - Episode: 125/200, Reward: 83.000, Step: 83 +2023-03-02 17:54:46 - r - INFO: - Current episode 125 has the best eval reward: 179.300 +2023-03-02 17:54:47 - r - INFO: - Episode: 126/200, Reward: 152.000, Step: 152 +2023-03-02 17:54:49 - r - INFO: - Episode: 127/200, Reward: 145.000, Step: 145 +2023-03-02 17:54:50 - r - INFO: - Episode: 128/200, Reward: 200.000, Step: 200 +2023-03-02 17:54:52 - r - INFO: - Episode: 129/200, Reward: 200.000, Step: 200 +2023-03-02 17:54:54 - r - INFO: - Episode: 130/200, Reward: 200.000, Step: 200 +2023-03-02 17:54:54 - r - INFO: - Current episode 130 has the best eval reward: 182.000 +2023-03-02 17:54:56 - r - INFO: - Episode: 131/200, Reward: 200.000, Step: 200 +2023-03-02 17:54:57 - r - INFO: - Episode: 132/200, Reward: 200.000, Step: 200 +2023-03-02 17:54:59 - r - INFO: - Episode: 133/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:01 - r - INFO: - Episode: 134/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:03 - r - INFO: - Episode: 135/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:03 - r - INFO: - Current episode 135 has the best eval reward: 200.000 +2023-03-02 17:55:05 - r - INFO: - Episode: 136/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:06 - r - INFO: - Episode: 137/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:08 - r - INFO: - Episode: 138/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:10 - r - INFO: - Episode: 139/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:12 - r - INFO: - Episode: 140/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:12 - r - INFO: - Current episode 140 has the best eval reward: 200.000 +2023-03-02 17:55:14 - r - INFO: - Episode: 141/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:15 - r - INFO: - Episode: 142/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:17 - r - INFO: - Episode: 143/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:19 - r - INFO: - Episode: 144/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:20 - r - INFO: - Episode: 145/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:21 - r - INFO: - Current episode 145 has the best eval reward: 200.000 +2023-03-02 17:55:22 - r - INFO: - Episode: 146/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:24 - r - INFO: - Episode: 147/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:26 - r - INFO: - Episode: 148/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:28 - r - INFO: - Episode: 149/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:29 - r - INFO: - Episode: 150/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:30 - r - INFO: - Current episode 150 has the best eval reward: 200.000 +2023-03-02 17:55:32 - r - INFO: - Episode: 151/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:33 - r - INFO: - Episode: 152/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:35 - r - INFO: - Episode: 153/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:36 - r - INFO: - Episode: 154/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:38 - r - INFO: - Episode: 155/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:39 - r - INFO: - Current episode 155 has the best eval reward: 200.000 +2023-03-02 17:55:40 - r - INFO: - Episode: 156/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:42 - r - INFO: - Episode: 157/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:43 - r - INFO: - Episode: 158/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:45 - r - INFO: - Episode: 159/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:47 - r - INFO: - Episode: 160/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:47 - r - INFO: - Current episode 160 has the best eval reward: 200.000 +2023-03-02 17:55:49 - r - INFO: - Episode: 161/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:50 - r - INFO: - Episode: 162/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:52 - r - INFO: - Episode: 163/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:54 - r - INFO: - Episode: 164/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:55 - r - INFO: - Episode: 165/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:56 - r - INFO: - Current episode 165 has the best eval reward: 200.000 +2023-03-02 17:55:57 - r - INFO: - Episode: 166/200, Reward: 200.000, Step: 200 +2023-03-02 17:55:59 - r - INFO: - Episode: 167/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:01 - r - INFO: - Episode: 168/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:02 - r - INFO: - Episode: 169/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:04 - r - INFO: - Episode: 170/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:04 - r - INFO: - Current episode 170 has the best eval reward: 200.000 +2023-03-02 17:56:06 - r - INFO: - Episode: 171/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:08 - r - INFO: - Episode: 172/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:09 - r - INFO: - Episode: 173/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:11 - r - INFO: - Episode: 174/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:13 - r - INFO: - Episode: 175/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:14 - r - INFO: - Episode: 176/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:16 - r - INFO: - Episode: 177/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:18 - r - INFO: - Episode: 178/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:20 - r - INFO: - Episode: 179/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:21 - r - INFO: - Episode: 180/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:22 - r - INFO: - Current episode 180 has the best eval reward: 200.000 +2023-03-02 17:56:23 - r - INFO: - Episode: 181/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:25 - r - INFO: - Episode: 182/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:27 - r - INFO: - Episode: 183/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:28 - r - INFO: - Episode: 184/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:30 - r - INFO: - Episode: 185/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:30 - r - INFO: - Current episode 185 has the best eval reward: 200.000 +2023-03-02 17:56:32 - r - INFO: - Episode: 186/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:34 - r - INFO: - Episode: 187/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:35 - r - INFO: - Episode: 188/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:37 - r - INFO: - Episode: 189/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:39 - r - INFO: - Episode: 190/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:39 - r - INFO: - Current episode 190 has the best eval reward: 200.000 +2023-03-02 17:56:41 - r - INFO: - Episode: 191/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:42 - r - INFO: - Episode: 192/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:44 - r - INFO: - Episode: 193/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:46 - r - INFO: - Episode: 194/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:47 - r - INFO: - Episode: 195/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:48 - r - INFO: - Current episode 195 has the best eval reward: 200.000 +2023-03-02 17:56:50 - r - INFO: - Episode: 196/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:51 - r - INFO: - Episode: 197/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:53 - r - INFO: - Episode: 198/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:55 - r - INFO: - Episode: 199/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:56 - r - INFO: - Episode: 200/200, Reward: 200.000, Step: 200 +2023-03-02 17:56:57 - r - INFO: - Current episode 200 has the best eval reward: 200.000 +2023-03-02 17:56:57 - r - INFO: - Finish training! diff --git a/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/models/checkpoint.pt b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/models/checkpoint.pt new file mode 100644 index 0000000..af3ab3a Binary files /dev/null and b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/models/checkpoint.pt differ diff --git a/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/results/learning_curve.png b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/results/learning_curve.png new file mode 100644 index 0000000..8a38a7b Binary files /dev/null and b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/results/learning_curve.png differ diff --git a/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/results/res.csv b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/results/res.csv new file mode 100644 index 0000000..499a64a --- /dev/null +++ b/joyrl/benchmarks/Train_CartPole-v1_SAC_D_20230302-175416/results/res.csv @@ -0,0 +1,201 @@ +episodes,rewards,steps +0,17.0,17 +1,12.0,12 +2,37.0,37 +3,23.0,23 +4,15.0,15 +5,12.0,12 +6,9.0,9 +7,13.0,13 +8,13.0,13 +9,12.0,12 +10,14.0,14 +11,17.0,17 +12,14.0,14 +13,25.0,25 +14,11.0,11 +15,9.0,9 +16,12.0,12 +17,9.0,9 +18,15.0,15 +19,15.0,15 +20,16.0,16 +21,14.0,14 +22,22.0,22 +23,14.0,14 +24,9.0,9 +25,13.0,13 +26,11.0,11 +27,12.0,12 +28,9.0,9 +29,10.0,10 +30,13.0,13 +31,12.0,12 +32,14.0,14 +33,9.0,9 +34,13.0,13 +35,10.0,10 +36,10.0,10 +37,12.0,12 +38,8.0,8 +39,9.0,9 +40,9.0,9 +41,11.0,11 +42,10.0,10 +43,11.0,11 +44,9.0,9 +45,11.0,11 +46,11.0,11 +47,11.0,11 +48,10.0,10 +49,12.0,12 +50,10.0,10 +51,8.0,8 +52,10.0,10 +53,8.0,8 +54,9.0,9 +55,14.0,14 +56,11.0,11 +57,10.0,10 +58,13.0,13 +59,9.0,9 +60,11.0,11 +61,9.0,9 +62,10.0,10 +63,9.0,9 +64,10.0,10 +65,10.0,10 +66,8.0,8 +67,15.0,15 +68,11.0,11 +69,12.0,12 +70,11.0,11 +71,11.0,11 +72,12.0,12 +73,9.0,9 +74,10.0,10 +75,9.0,9 +76,10.0,10 +77,10.0,10 +78,9.0,9 +79,9.0,9 +80,25.0,25 +81,21.0,21 +82,26.0,26 +83,21.0,21 +84,15.0,15 +85,21.0,21 +86,17.0,17 +87,28.0,28 +88,16.0,16 +89,19.0,19 +90,19.0,19 +91,18.0,18 +92,24.0,24 +93,61.0,61 +94,33.0,33 +95,35.0,35 +96,72.0,72 +97,25.0,25 +98,84.0,84 +99,47.0,47 +100,36.0,36 +101,39.0,39 +102,40.0,40 +103,69.0,69 +104,43.0,43 +105,53.0,53 +106,43.0,43 +107,92.0,92 +108,92.0,92 +109,30.0,30 +110,56.0,56 +111,50.0,50 +112,54.0,54 +113,42.0,42 +114,139.0,139 +115,82.0,82 +116,75.0,75 +117,68.0,68 +118,89.0,89 +119,67.0,67 +120,94.0,94 +121,92.0,92 +122,190.0,190 +123,78.0,78 +124,83.0,83 +125,152.0,152 +126,145.0,145 +127,200.0,200 +128,200.0,200 +129,200.0,200 +130,200.0,200 +131,200.0,200 +132,200.0,200 +133,200.0,200 +134,200.0,200 +135,200.0,200 +136,200.0,200 +137,200.0,200 +138,200.0,200 +139,200.0,200 +140,200.0,200 +141,200.0,200 +142,200.0,200 +143,200.0,200 +144,200.0,200 +145,200.0,200 +146,200.0,200 +147,200.0,200 +148,200.0,200 +149,200.0,200 +150,200.0,200 +151,200.0,200 +152,200.0,200 +153,200.0,200 +154,200.0,200 +155,200.0,200 +156,200.0,200 +157,200.0,200 +158,200.0,200 +159,200.0,200 +160,200.0,200 +161,200.0,200 +162,200.0,200 +163,200.0,200 +164,200.0,200 +165,200.0,200 +166,200.0,200 +167,200.0,200 +168,200.0,200 +169,200.0,200 +170,200.0,200 +171,200.0,200 +172,200.0,200 +173,200.0,200 +174,200.0,200 +175,200.0,200 +176,200.0,200 +177,200.0,200 +178,200.0,200 +179,200.0,200 +180,200.0,200 +181,200.0,200 +182,200.0,200 +183,200.0,200 +184,200.0,200 +185,200.0,200 +186,200.0,200 +187,200.0,200 +188,200.0,200 +189,200.0,200 +190,200.0,200 +191,200.0,200 +192,200.0,200 +193,200.0,200 +194,200.0,200 +195,200.0,200 +196,200.0,200 +197,200.0,200 +198,200.0,200 +199,200.0,200 diff --git a/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/config.yaml b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/config.yaml new file mode 100644 index 0000000..0f9f10a --- /dev/null +++ b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/config.yaml @@ -0,0 +1,32 @@ +general_cfg: + algo_name: SAC + device: cpu + env_name: Pendulum-v1 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + mode: train + new_step_api: true + render: false + save_fig: true + seed: 10 + show_fig: false + test_eps: 10 + train_eps: 400 + wrapper: null +algo_cfg: + alpha: 0.2 + automatic_entropy_tuning: false + batch_size: 64 + buffer_size: 1000000 + continous: false + gamma: 0.99 + hidden_dim: 64 + lr: 0.001 + n_epochs: 1 + policy_type: Gaussian + start_steps: 10000 + target_update_fre: 1 + tau: 0.005 diff --git a/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/logs/log.txt b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/logs/log.txt new file mode 100644 index 0000000..def6026 --- /dev/null +++ b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/logs/log.txt @@ -0,0 +1,448 @@ +2023-02-23 17:57:27 - r - INFO: - Hyperparameters: +2023-02-23 17:57:27 - r - INFO: - ================================================================================ +2023-02-23 17:57:27 - r - INFO: - Name Value Type +2023-02-23 17:57:27 - r - INFO: - env_name Pendulum-v1 +2023-02-23 17:57:27 - r - INFO: - new_step_api 1 +2023-02-23 17:57:27 - r - INFO: - wrapper None +2023-02-23 17:57:27 - r - INFO: - render 0 +2023-02-23 17:57:27 - r - INFO: - algo_name SAC +2023-02-23 17:57:27 - r - INFO: - mode train +2023-02-23 17:57:27 - r - INFO: - seed 10 +2023-02-23 17:57:27 - r - INFO: - device cpu +2023-02-23 17:57:27 - r - INFO: - train_eps 400 +2023-02-23 17:57:27 - r - INFO: - test_eps 10 +2023-02-23 17:57:27 - r - INFO: - eval_eps 10 +2023-02-23 17:57:27 - r - INFO: - eval_per_episode 5 +2023-02-23 17:57:27 - r - INFO: - max_steps 200 +2023-02-23 17:57:27 - r - INFO: - load_checkpoint 0 +2023-02-23 17:57:27 - r - INFO: - load_path Train_CartPole-v1_DQN_20221026-054757 +2023-02-23 17:57:27 - r - INFO: - show_fig 0 +2023-02-23 17:57:27 - r - INFO: - save_fig 1 +2023-02-23 17:57:27 - r - INFO: - policy_type Gaussian +2023-02-23 17:57:27 - r - INFO: - lr 0.001 +2023-02-23 17:57:27 - r - INFO: - gamma 0.99 +2023-02-23 17:57:27 - r - INFO: - tau 0.005 +2023-02-23 17:57:27 - r - INFO: - alpha 0.2 +2023-02-23 17:57:27 - r - INFO: - automatic_entropy_tuning 0 +2023-02-23 17:57:27 - r - INFO: - batch_size 64 +2023-02-23 17:57:27 - r - INFO: - hidden_dim 64 +2023-02-23 17:57:27 - r - INFO: - n_epochs 1 +2023-02-23 17:57:27 - r - INFO: - start_steps 10000 +2023-02-23 17:57:27 - r - INFO: - target_update_fre 1 +2023-02-23 17:57:27 - r - INFO: - buffer_size 1000000 +2023-02-23 17:57:27 - r - INFO: - continous 0 +2023-02-23 17:57:27 - r - INFO: - task_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_Pendulum-v1_SAC_20230223-175727 +2023-02-23 17:57:27 - r - INFO: - model_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_Pendulum-v1_SAC_20230223-175727/models +2023-02-23 17:57:27 - r - INFO: - res_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_Pendulum-v1_SAC_20230223-175727/results +2023-02-23 17:57:27 - r - INFO: - log_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_Pendulum-v1_SAC_20230223-175727/logs +2023-02-23 17:57:27 - r - INFO: - traj_dir /home/dingli/rl-tutorials/joyrl/tasks/Train_Pendulum-v1_SAC_20230223-175727/traj +2023-02-23 17:57:27 - r - INFO: - ================================================================================ +2023-02-23 17:57:27 - r - INFO: - action_bound: 2.0 +2023-02-23 17:57:27 - r - INFO: - n_states: 3, n_actions: 1 +2023-02-23 17:57:27 - r - INFO: - Start training! +2023-02-23 17:57:27 - r - INFO: - Env: Pendulum-v1, Algorithm: SAC, Device: cpu +2023-02-23 17:57:31 - r - INFO: - Episode: 1/400, Reward: -1277.216, Step: 200 +2023-02-23 17:57:36 - r - INFO: - Episode: 2/400, Reward: -919.794, Step: 200 +2023-02-23 17:57:41 - r - INFO: - Episode: 3/400, Reward: -1327.623, Step: 200 +2023-02-23 17:57:45 - r - INFO: - Episode: 4/400, Reward: -1071.776, Step: 200 +2023-02-23 17:57:50 - r - INFO: - Episode: 5/400, Reward: -1730.883, Step: 200 +2023-02-23 17:57:51 - r - INFO: - Current episode 5 has the best eval reward: -1658.065 +2023-02-23 17:57:54 - r - INFO: - Episode: 6/400, Reward: -798.514, Step: 200 +2023-02-23 17:57:58 - r - INFO: - Episode: 7/400, Reward: -1153.813, Step: 200 +2023-02-23 17:58:02 - r - INFO: - Episode: 8/400, Reward: -1300.390, Step: 200 +2023-02-23 17:58:07 - r - INFO: - Episode: 9/400, Reward: -1616.517, Step: 200 +2023-02-23 17:58:10 - r - INFO: - Episode: 10/400, Reward: -988.301, Step: 200 +2023-02-23 17:58:11 - r - INFO: - Current episode 10 has the best eval reward: -938.243 +2023-02-23 17:58:15 - r - INFO: - Episode: 11/400, Reward: -873.625, Step: 200 +2023-02-23 17:58:19 - r - INFO: - Episode: 12/400, Reward: -969.271, Step: 200 +2023-02-23 17:58:24 - r - INFO: - Episode: 13/400, Reward: -1067.889, Step: 200 +2023-02-23 17:58:27 - r - INFO: - Episode: 14/400, Reward: -1726.466, Step: 200 +2023-02-23 17:58:32 - r - INFO: - Episode: 15/400, Reward: -1277.521, Step: 200 +2023-02-23 17:58:33 - r - INFO: - Current episode 15 has the best eval reward: -127.969 +2023-02-23 17:58:37 - r - INFO: - Episode: 16/400, Reward: -1237.172, Step: 200 +2023-02-23 17:58:42 - r - INFO: - Episode: 17/400, Reward: -857.345, Step: 200 +2023-02-23 17:58:46 - r - INFO: - Episode: 18/400, Reward: -1670.113, Step: 200 +2023-02-23 17:58:49 - r - INFO: - Episode: 19/400, Reward: -1451.402, Step: 200 +2023-02-23 17:58:53 - r - INFO: - Episode: 20/400, Reward: -1289.607, Step: 200 +2023-02-23 17:58:58 - r - INFO: - Episode: 21/400, Reward: -1490.241, Step: 200 +2023-02-23 17:59:01 - r - INFO: - Episode: 22/400, Reward: -1679.699, Step: 200 +2023-02-23 17:59:06 - r - INFO: - Episode: 23/400, Reward: -1070.238, Step: 200 +2023-02-23 17:59:11 - r - INFO: - Episode: 24/400, Reward: -1507.422, Step: 200 +2023-02-23 17:59:15 - r - INFO: - Episode: 25/400, Reward: -1815.035, Step: 200 +2023-02-23 17:59:21 - r - INFO: - Episode: 26/400, Reward: -875.410, Step: 200 +2023-02-23 17:59:26 - r - INFO: - Episode: 27/400, Reward: -1070.627, Step: 200 +2023-02-23 17:59:30 - r - INFO: - Episode: 28/400, Reward: -1500.266, Step: 200 +2023-02-23 17:59:32 - r - INFO: - Episode: 29/400, Reward: -1147.623, Step: 200 +2023-02-23 17:59:37 - r - INFO: - Episode: 30/400, Reward: -1169.251, Step: 200 +2023-02-23 17:59:42 - r - INFO: - Episode: 31/400, Reward: -989.438, Step: 200 +2023-02-23 17:59:45 - r - INFO: - Episode: 32/400, Reward: -1379.639, Step: 200 +2023-02-23 17:59:50 - r - INFO: - Episode: 33/400, Reward: -1246.896, Step: 200 +2023-02-23 17:59:55 - r - INFO: - Episode: 34/400, Reward: -864.805, Step: 200 +2023-02-23 18:00:00 - r - INFO: - Episode: 35/400, Reward: -1160.079, Step: 200 +2023-02-23 18:00:05 - r - INFO: - Episode: 36/400, Reward: -1643.487, Step: 200 +2023-02-23 18:00:09 - r - INFO: - Episode: 37/400, Reward: -1174.275, Step: 200 +2023-02-23 18:00:13 - r - INFO: - Episode: 38/400, Reward: -851.454, Step: 200 +2023-02-23 18:00:16 - r - INFO: - Episode: 39/400, Reward: -732.450, Step: 200 +2023-02-23 18:00:21 - r - INFO: - Episode: 40/400, Reward: -1191.985, Step: 200 +2023-02-23 18:00:27 - r - INFO: - Episode: 41/400, Reward: -1265.305, Step: 200 +2023-02-23 18:00:30 - r - INFO: - Episode: 42/400, Reward: -965.722, Step: 200 +2023-02-23 18:00:34 - r - INFO: - Episode: 43/400, Reward: -1256.934, Step: 200 +2023-02-23 18:00:38 - r - INFO: - Episode: 44/400, Reward: -1527.041, Step: 200 +2023-02-23 18:00:42 - r - INFO: - Episode: 45/400, Reward: -1627.675, Step: 200 +2023-02-23 18:00:43 - r - INFO: - Current episode 45 has the best eval reward: -84.110 +2023-02-23 18:00:48 - r - INFO: - Episode: 46/400, Reward: -977.533, Step: 200 +2023-02-23 18:00:52 - r - INFO: - Episode: 47/400, Reward: -990.754, Step: 200 +2023-02-23 18:00:56 - r - INFO: - Episode: 48/400, Reward: -1160.889, Step: 200 +2023-02-23 18:01:01 - r - INFO: - Episode: 49/400, Reward: -969.817, Step: 200 +2023-02-23 18:01:03 - r - INFO: - Episode: 50/400, Reward: -885.858, Step: 200 +2023-02-23 18:01:09 - r - INFO: - Episode: 51/400, Reward: -117.426, Step: 200 +2023-02-23 18:01:14 - r - INFO: - Episode: 52/400, Reward: -120.719, Step: 200 +2023-02-23 18:01:18 - r - INFO: - Episode: 53/400, Reward: -1370.978, Step: 200 +2023-02-23 18:01:23 - r - INFO: - Episode: 54/400, Reward: -226.238, Step: 200 +2023-02-23 18:01:28 - r - INFO: - Episode: 55/400, Reward: -114.981, Step: 200 +2023-02-23 18:01:34 - r - INFO: - Episode: 56/400, Reward: -118.284, Step: 200 +2023-02-23 18:01:40 - r - INFO: - Episode: 57/400, Reward: -124.166, Step: 200 +2023-02-23 18:01:44 - r - INFO: - Episode: 58/400, Reward: -1.008, Step: 200 +2023-02-23 18:01:48 - r - INFO: - Episode: 59/400, Reward: -116.575, Step: 200 +2023-02-23 18:01:53 - r - INFO: - Episode: 60/400, Reward: -120.690, Step: 200 +2023-02-23 18:01:59 - r - INFO: - Episode: 61/400, Reward: -2.564, Step: 200 +2023-02-23 18:02:03 - r - INFO: - Episode: 62/400, Reward: -228.519, Step: 200 +2023-02-23 18:02:08 - r - INFO: - Episode: 63/400, Reward: -233.380, Step: 200 +2023-02-23 18:02:11 - r - INFO: - Episode: 64/400, Reward: -233.605, Step: 200 +2023-02-23 18:02:15 - r - INFO: - Episode: 65/400, Reward: -114.283, Step: 200 +2023-02-23 18:02:19 - r - INFO: - Episode: 66/400, Reward: -303.221, Step: 200 +2023-02-23 18:02:24 - r - INFO: - Episode: 67/400, Reward: -123.543, Step: 200 +2023-02-23 18:02:27 - r - INFO: - Episode: 68/400, Reward: -119.202, Step: 200 +2023-02-23 18:02:30 - r - INFO: - Episode: 69/400, Reward: -123.513, Step: 200 +2023-02-23 18:02:34 - r - INFO: - Episode: 70/400, Reward: -121.393, Step: 200 +2023-02-23 18:02:40 - r - INFO: - Episode: 71/400, Reward: -0.906, Step: 200 +2023-02-23 18:02:43 - r - INFO: - Episode: 72/400, Reward: -114.346, Step: 200 +2023-02-23 18:02:48 - r - INFO: - Episode: 73/400, Reward: -0.458, Step: 200 +2023-02-23 18:02:52 - r - INFO: - Episode: 74/400, Reward: -118.703, Step: 200 +2023-02-23 18:02:57 - r - INFO: - Episode: 75/400, Reward: -116.569, Step: 200 +2023-02-23 18:03:00 - r - INFO: - Episode: 76/400, Reward: -1.248, Step: 200 +2023-02-23 18:03:02 - r - INFO: - Episode: 77/400, Reward: -1.609, Step: 200 +2023-02-23 18:03:03 - r - INFO: - Episode: 78/400, Reward: -234.304, Step: 200 +2023-02-23 18:03:04 - r - INFO: - Episode: 79/400, Reward: -115.065, Step: 200 +2023-02-23 18:03:05 - r - INFO: - Episode: 80/400, Reward: -228.434, Step: 200 +2023-02-23 18:03:06 - r - INFO: - Episode: 81/400, Reward: -2.101, Step: 200 +2023-02-23 18:03:07 - r - INFO: - Episode: 82/400, Reward: -237.795, Step: 200 +2023-02-23 18:03:08 - r - INFO: - Episode: 83/400, Reward: -253.691, Step: 200 +2023-02-23 18:03:09 - r - INFO: - Episode: 84/400, Reward: -115.752, Step: 200 +2023-02-23 18:03:10 - r - INFO: - Episode: 85/400, Reward: -123.629, Step: 200 +2023-02-23 18:03:12 - r - INFO: - Episode: 86/400, Reward: -237.640, Step: 200 +2023-02-23 18:03:13 - r - INFO: - Episode: 87/400, Reward: -120.178, Step: 200 +2023-02-23 18:03:14 - r - INFO: - Episode: 88/400, Reward: -1.343, Step: 200 +2023-02-23 18:03:15 - r - INFO: - Episode: 89/400, Reward: -124.640, Step: 200 +2023-02-23 18:03:16 - r - INFO: - Episode: 90/400, Reward: -234.831, Step: 200 +2023-02-23 18:03:17 - r - INFO: - Episode: 91/400, Reward: -124.601, Step: 200 +2023-02-23 18:03:18 - r - INFO: - Episode: 92/400, Reward: -122.634, Step: 200 +2023-02-23 18:03:19 - r - INFO: - Episode: 93/400, Reward: -123.870, Step: 200 +2023-02-23 18:03:20 - r - INFO: - Episode: 94/400, Reward: -357.039, Step: 200 +2023-02-23 18:03:21 - r - INFO: - Episode: 95/400, Reward: -128.522, Step: 200 +2023-02-23 18:03:23 - r - INFO: - Episode: 96/400, Reward: -357.399, Step: 200 +2023-02-23 18:03:24 - r - INFO: - Episode: 97/400, Reward: -124.879, Step: 200 +2023-02-23 18:03:25 - r - INFO: - Episode: 98/400, Reward: -121.206, Step: 200 +2023-02-23 18:03:26 - r - INFO: - Episode: 99/400, Reward: -129.602, Step: 200 +2023-02-23 18:03:27 - r - INFO: - Episode: 100/400, Reward: -124.818, Step: 200 +2023-02-23 18:03:29 - r - INFO: - Episode: 101/400, Reward: -337.873, Step: 200 +2023-02-23 18:03:30 - r - INFO: - Episode: 102/400, Reward: -129.382, Step: 200 +2023-02-23 18:03:31 - r - INFO: - Episode: 103/400, Reward: -123.645, Step: 200 +2023-02-23 18:03:32 - r - INFO: - Episode: 104/400, Reward: -227.259, Step: 200 +2023-02-23 18:03:33 - r - INFO: - Episode: 105/400, Reward: -235.175, Step: 200 +2023-02-23 18:03:35 - r - INFO: - Episode: 106/400, Reward: -261.137, Step: 200 +2023-02-23 18:03:36 - r - INFO: - Episode: 107/400, Reward: -118.805, Step: 200 +2023-02-23 18:03:37 - r - INFO: - Episode: 108/400, Reward: -116.917, Step: 200 +2023-02-23 18:03:38 - r - INFO: - Episode: 109/400, Reward: -1.661, Step: 200 +2023-02-23 18:03:39 - r - INFO: - Episode: 110/400, Reward: -126.901, Step: 200 +2023-02-23 18:03:41 - r - INFO: - Episode: 111/400, Reward: -245.841, Step: 200 +2023-02-23 18:03:42 - r - INFO: - Episode: 112/400, Reward: -1.304, Step: 200 +2023-02-23 18:03:43 - r - INFO: - Episode: 113/400, Reward: -121.070, Step: 200 +2023-02-23 18:03:43 - r - INFO: - Episode: 114/400, Reward: -115.699, Step: 200 +2023-02-23 18:03:44 - r - INFO: - Episode: 115/400, Reward: -236.537, Step: 200 +2023-02-23 18:03:46 - r - INFO: - Episode: 116/400, Reward: -120.605, Step: 200 +2023-02-23 18:03:47 - r - INFO: - Episode: 117/400, Reward: -120.558, Step: 200 +2023-02-23 18:03:49 - r - INFO: - Episode: 118/400, Reward: -123.835, Step: 200 +2023-02-23 18:03:50 - r - INFO: - Episode: 119/400, Reward: -127.089, Step: 200 +2023-02-23 18:03:51 - r - INFO: - Episode: 120/400, Reward: -121.673, Step: 200 +2023-02-23 18:03:52 - r - INFO: - Episode: 121/400, Reward: -117.981, Step: 200 +2023-02-23 18:03:54 - r - INFO: - Episode: 122/400, Reward: -231.593, Step: 200 +2023-02-23 18:03:54 - r - INFO: - Episode: 123/400, Reward: -235.246, Step: 200 +2023-02-23 18:03:55 - r - INFO: - Episode: 124/400, Reward: -118.501, Step: 200 +2023-02-23 18:03:56 - r - INFO: - Episode: 125/400, Reward: -226.557, Step: 200 +2023-02-23 18:03:58 - r - INFO: - Episode: 126/400, Reward: -123.985, Step: 200 +2023-02-23 18:03:59 - r - INFO: - Episode: 127/400, Reward: -119.453, Step: 200 +2023-02-23 18:04:00 - r - INFO: - Episode: 128/400, Reward: -230.235, Step: 200 +2023-02-23 18:04:01 - r - INFO: - Episode: 129/400, Reward: -123.089, Step: 200 +2023-02-23 18:04:02 - r - INFO: - Episode: 130/400, Reward: -250.778, Step: 200 +2023-02-23 18:04:04 - r - INFO: - Episode: 131/400, Reward: -119.136, Step: 200 +2023-02-23 18:04:05 - r - INFO: - Episode: 132/400, Reward: -224.023, Step: 200 +2023-02-23 18:04:06 - r - INFO: - Episode: 133/400, Reward: -128.073, Step: 200 +2023-02-23 18:04:07 - r - INFO: - Episode: 134/400, Reward: -125.194, Step: 200 +2023-02-23 18:04:08 - r - INFO: - Episode: 135/400, Reward: -117.942, Step: 200 +2023-02-23 18:04:10 - r - INFO: - Episode: 136/400, Reward: -293.175, Step: 200 +2023-02-23 18:04:11 - r - INFO: - Episode: 137/400, Reward: -233.901, Step: 200 +2023-02-23 18:04:12 - r - INFO: - Episode: 138/400, Reward: -127.536, Step: 200 +2023-02-23 18:04:13 - r - INFO: - Episode: 139/400, Reward: -125.852, Step: 200 +2023-02-23 18:04:14 - r - INFO: - Episode: 140/400, Reward: -4.626, Step: 200 +2023-02-23 18:04:15 - r - INFO: - Episode: 141/400, Reward: -121.477, Step: 200 +2023-02-23 18:04:16 - r - INFO: - Episode: 142/400, Reward: -128.645, Step: 200 +2023-02-23 18:04:17 - r - INFO: - Episode: 143/400, Reward: -124.142, Step: 200 +2023-02-23 18:04:18 - r - INFO: - Episode: 144/400, Reward: -126.138, Step: 200 +2023-02-23 18:04:19 - r - INFO: - Episode: 145/400, Reward: -239.834, Step: 200 +2023-02-23 18:04:21 - r - INFO: - Episode: 146/400, Reward: -126.860, Step: 200 +2023-02-23 18:04:22 - r - INFO: - Episode: 147/400, Reward: -243.917, Step: 200 +2023-02-23 18:04:23 - r - INFO: - Episode: 148/400, Reward: -125.254, Step: 200 +2023-02-23 18:04:24 - r - INFO: - Episode: 149/400, Reward: -120.843, Step: 200 +2023-02-23 18:04:25 - r - INFO: - Episode: 150/400, Reward: -124.994, Step: 200 +2023-02-23 18:04:27 - r - INFO: - Episode: 151/400, Reward: -232.281, Step: 200 +2023-02-23 18:04:28 - r - INFO: - Episode: 152/400, Reward: -234.166, Step: 200 +2023-02-23 18:04:29 - r - INFO: - Episode: 153/400, Reward: -128.775, Step: 200 +2023-02-23 18:04:30 - r - INFO: - Episode: 154/400, Reward: -119.237, Step: 200 +2023-02-23 18:04:31 - r - INFO: - Episode: 155/400, Reward: -238.381, Step: 200 +2023-02-23 18:04:32 - r - INFO: - Episode: 156/400, Reward: -3.757, Step: 200 +2023-02-23 18:04:34 - r - INFO: - Episode: 157/400, Reward: -3.541, Step: 200 +2023-02-23 18:04:35 - r - INFO: - Episode: 158/400, Reward: -3.609, Step: 200 +2023-02-23 18:04:36 - r - INFO: - Episode: 159/400, Reward: -244.705, Step: 200 +2023-02-23 18:04:36 - r - INFO: - Episode: 160/400, Reward: -237.873, Step: 200 +2023-02-23 18:04:38 - r - INFO: - Episode: 161/400, Reward: -124.496, Step: 200 +2023-02-23 18:04:39 - r - INFO: - Episode: 162/400, Reward: -125.729, Step: 200 +2023-02-23 18:04:40 - r - INFO: - Episode: 163/400, Reward: -242.331, Step: 200 +2023-02-23 18:04:41 - r - INFO: - Episode: 164/400, Reward: -337.622, Step: 200 +2023-02-23 18:04:42 - r - INFO: - Episode: 165/400, Reward: -125.713, Step: 200 +2023-02-23 18:04:44 - r - INFO: - Episode: 166/400, Reward: -123.387, Step: 200 +2023-02-23 18:04:45 - r - INFO: - Episode: 167/400, Reward: -118.735, Step: 200 +2023-02-23 18:04:46 - r - INFO: - Episode: 168/400, Reward: -251.639, Step: 200 +2023-02-23 18:04:47 - r - INFO: - Episode: 169/400, Reward: -125.514, Step: 200 +2023-02-23 18:04:48 - r - INFO: - Episode: 170/400, Reward: -119.435, Step: 200 +2023-02-23 18:04:50 - r - INFO: - Episode: 171/400, Reward: -227.563, Step: 200 +2023-02-23 18:04:51 - r - INFO: - Episode: 172/400, Reward: -229.818, Step: 200 +2023-02-23 18:04:52 - r - INFO: - Episode: 173/400, Reward: -121.292, Step: 200 +2023-02-23 18:04:53 - r - INFO: - Episode: 174/400, Reward: -120.299, Step: 200 +2023-02-23 18:04:54 - r - INFO: - Episode: 175/400, Reward: -122.705, Step: 200 +2023-02-23 18:04:55 - r - INFO: - Episode: 176/400, Reward: -124.976, Step: 200 +2023-02-23 18:04:56 - r - INFO: - Episode: 177/400, Reward: -120.587, Step: 200 +2023-02-23 18:04:58 - r - INFO: - Episode: 178/400, Reward: -229.365, Step: 200 +2023-02-23 18:04:58 - r - INFO: - Episode: 179/400, Reward: -128.139, Step: 200 +2023-02-23 18:04:59 - r - INFO: - Episode: 180/400, Reward: -242.512, Step: 200 +2023-02-23 18:05:01 - r - INFO: - Episode: 181/400, Reward: -2.780, Step: 200 +2023-02-23 18:05:02 - r - INFO: - Episode: 182/400, Reward: -131.842, Step: 200 +2023-02-23 18:05:03 - r - INFO: - Episode: 183/400, Reward: -119.414, Step: 200 +2023-02-23 18:05:04 - r - INFO: - Episode: 184/400, Reward: -122.954, Step: 200 +2023-02-23 18:05:05 - r - INFO: - Episode: 185/400, Reward: -116.153, Step: 200 +2023-02-23 18:05:07 - r - INFO: - Episode: 186/400, Reward: -127.490, Step: 200 +2023-02-23 18:05:08 - r - INFO: - Episode: 187/400, Reward: -2.520, Step: 200 +2023-02-23 18:05:09 - r - INFO: - Episode: 188/400, Reward: -230.481, Step: 200 +2023-02-23 18:05:10 - r - INFO: - Episode: 189/400, Reward: -238.267, Step: 200 +2023-02-23 18:05:11 - r - INFO: - Episode: 190/400, Reward: -2.604, Step: 200 +2023-02-23 18:05:13 - r - INFO: - Episode: 191/400, Reward: -3.679, Step: 200 +2023-02-23 18:05:14 - r - INFO: - Episode: 192/400, Reward: -119.951, Step: 200 +2023-02-23 18:05:15 - r - INFO: - Episode: 193/400, Reward: -127.197, Step: 200 +2023-02-23 18:05:16 - r - INFO: - Episode: 194/400, Reward: -122.921, Step: 200 +2023-02-23 18:05:17 - r - INFO: - Episode: 195/400, Reward: -232.357, Step: 200 +2023-02-23 18:05:19 - r - INFO: - Episode: 196/400, Reward: -229.004, Step: 200 +2023-02-23 18:05:20 - r - INFO: - Episode: 197/400, Reward: -127.442, Step: 200 +2023-02-23 18:05:21 - r - INFO: - Episode: 198/400, Reward: -128.069, Step: 200 +2023-02-23 18:05:22 - r - INFO: - Episode: 199/400, Reward: -128.410, Step: 200 +2023-02-23 18:05:23 - r - INFO: - Episode: 200/400, Reward: -4.333, Step: 200 +2023-02-23 18:05:24 - r - INFO: - Episode: 201/400, Reward: -334.068, Step: 200 +2023-02-23 18:05:25 - r - INFO: - Episode: 202/400, Reward: -345.768, Step: 200 +2023-02-23 18:05:26 - r - INFO: - Episode: 203/400, Reward: -124.905, Step: 200 +2023-02-23 18:05:27 - r - INFO: - Episode: 204/400, Reward: -5.030, Step: 200 +2023-02-23 18:05:28 - r - INFO: - Episode: 205/400, Reward: -317.938, Step: 200 +2023-02-23 18:05:30 - r - INFO: - Episode: 206/400, Reward: -114.654, Step: 200 +2023-02-23 18:05:31 - r - INFO: - Episode: 207/400, Reward: -114.685, Step: 200 +2023-02-23 18:05:32 - r - INFO: - Episode: 208/400, Reward: -124.014, Step: 200 +2023-02-23 18:05:33 - r - INFO: - Episode: 209/400, Reward: -1.515, Step: 200 +2023-02-23 18:05:34 - r - INFO: - Episode: 210/400, Reward: -2.156, Step: 200 +2023-02-23 18:05:35 - r - INFO: - Episode: 211/400, Reward: -121.759, Step: 200 +2023-02-23 18:05:36 - r - INFO: - Episode: 212/400, Reward: -122.409, Step: 200 +2023-02-23 18:05:37 - r - INFO: - Episode: 213/400, Reward: -246.275, Step: 200 +2023-02-23 18:05:38 - r - INFO: - Episode: 214/400, Reward: -248.994, Step: 200 +2023-02-23 18:05:39 - r - INFO: - Episode: 215/400, Reward: -120.145, Step: 200 +2023-02-23 18:05:41 - r - INFO: - Episode: 216/400, Reward: -117.457, Step: 200 +2023-02-23 18:05:42 - r - INFO: - Episode: 217/400, Reward: -123.658, Step: 200 +2023-02-23 18:05:43 - r - INFO: - Episode: 218/400, Reward: -124.004, Step: 200 +2023-02-23 18:05:44 - r - INFO: - Episode: 219/400, Reward: -3.136, Step: 200 +2023-02-23 18:05:45 - r - INFO: - Episode: 220/400, Reward: -123.119, Step: 200 +2023-02-23 18:05:47 - r - INFO: - Episode: 221/400, Reward: -247.126, Step: 200 +2023-02-23 18:05:48 - r - INFO: - Episode: 222/400, Reward: -223.845, Step: 200 +2023-02-23 18:05:49 - r - INFO: - Episode: 223/400, Reward: -6.159, Step: 200 +2023-02-23 18:05:50 - r - INFO: - Episode: 224/400, Reward: -116.789, Step: 200 +2023-02-23 18:05:51 - r - INFO: - Episode: 225/400, Reward: -127.785, Step: 200 +2023-02-23 18:05:52 - r - INFO: - Episode: 226/400, Reward: -2.806, Step: 200 +2023-02-23 18:05:53 - r - INFO: - Episode: 227/400, Reward: -117.938, Step: 200 +2023-02-23 18:05:54 - r - INFO: - Episode: 228/400, Reward: -116.786, Step: 200 +2023-02-23 18:05:55 - r - INFO: - Episode: 229/400, Reward: -229.099, Step: 200 +2023-02-23 18:05:56 - r - INFO: - Episode: 230/400, Reward: -237.441, Step: 200 +2023-02-23 18:05:58 - r - INFO: - Episode: 231/400, Reward: -128.706, Step: 200 +2023-02-23 18:05:59 - r - INFO: - Episode: 232/400, Reward: -4.242, Step: 200 +2023-02-23 18:06:00 - r - INFO: - Episode: 233/400, Reward: -228.298, Step: 200 +2023-02-23 18:06:00 - r - INFO: - Episode: 234/400, Reward: -126.216, Step: 200 +2023-02-23 18:06:01 - r - INFO: - Episode: 235/400, Reward: -125.393, Step: 200 +2023-02-23 18:06:03 - r - INFO: - Episode: 236/400, Reward: -349.810, Step: 200 +2023-02-23 18:06:04 - r - INFO: - Episode: 237/400, Reward: -126.923, Step: 200 +2023-02-23 18:06:05 - r - INFO: - Episode: 238/400, Reward: -226.350, Step: 200 +2023-02-23 18:06:06 - r - INFO: - Episode: 239/400, Reward: -6.655, Step: 200 +2023-02-23 18:06:07 - r - INFO: - Episode: 240/400, Reward: -237.138, Step: 200 +2023-02-23 18:06:09 - r - INFO: - Episode: 241/400, Reward: -126.106, Step: 200 +2023-02-23 18:06:10 - r - INFO: - Episode: 242/400, Reward: -121.713, Step: 200 +2023-02-23 18:06:11 - r - INFO: - Episode: 243/400, Reward: -237.439, Step: 200 +2023-02-23 18:06:12 - r - INFO: - Episode: 244/400, Reward: -124.061, Step: 200 +2023-02-23 18:06:13 - r - INFO: - Episode: 245/400, Reward: -117.127, Step: 200 +2023-02-23 18:06:15 - r - INFO: - Episode: 246/400, Reward: -121.850, Step: 200 +2023-02-23 18:06:16 - r - INFO: - Episode: 247/400, Reward: -119.954, Step: 200 +2023-02-23 18:06:17 - r - INFO: - Episode: 248/400, Reward: -4.865, Step: 200 +2023-02-23 18:06:18 - r - INFO: - Episode: 249/400, Reward: -129.402, Step: 200 +2023-02-23 18:06:19 - r - INFO: - Episode: 250/400, Reward: -124.066, Step: 200 +2023-02-23 18:06:21 - r - INFO: - Episode: 251/400, Reward: -122.129, Step: 200 +2023-02-23 18:06:22 - r - INFO: - Episode: 252/400, Reward: -127.661, Step: 200 +2023-02-23 18:06:23 - r - INFO: - Episode: 253/400, Reward: -119.543, Step: 200 +2023-02-23 18:06:24 - r - INFO: - Episode: 254/400, Reward: -120.942, Step: 200 +2023-02-23 18:06:25 - r - INFO: - Episode: 255/400, Reward: -129.517, Step: 200 +2023-02-23 18:06:26 - r - INFO: - Episode: 256/400, Reward: -123.719, Step: 200 +2023-02-23 18:06:27 - r - INFO: - Episode: 257/400, Reward: -117.218, Step: 200 +2023-02-23 18:06:28 - r - INFO: - Episode: 258/400, Reward: -228.477, Step: 200 +2023-02-23 18:06:29 - r - INFO: - Episode: 259/400, Reward: -124.058, Step: 200 +2023-02-23 18:06:30 - r - INFO: - Episode: 260/400, Reward: -221.263, Step: 200 +2023-02-23 18:06:32 - r - INFO: - Episode: 261/400, Reward: -3.460, Step: 200 +2023-02-23 18:06:33 - r - INFO: - Episode: 262/400, Reward: -119.923, Step: 200 +2023-02-23 18:06:34 - r - INFO: - Episode: 263/400, Reward: -1.781, Step: 200 +2023-02-23 18:06:35 - r - INFO: - Episode: 264/400, Reward: -2.084, Step: 200 +2023-02-23 18:06:36 - r - INFO: - Episode: 265/400, Reward: -128.649, Step: 200 +2023-02-23 18:06:38 - r - INFO: - Episode: 266/400, Reward: -248.378, Step: 200 +2023-02-23 18:06:39 - r - INFO: - Episode: 267/400, Reward: -235.889, Step: 200 +2023-02-23 18:06:39 - r - INFO: - Episode: 268/400, Reward: -243.804, Step: 200 +2023-02-23 18:06:40 - r - INFO: - Episode: 269/400, Reward: -126.839, Step: 200 +2023-02-23 18:06:41 - r - INFO: - Episode: 270/400, Reward: -328.343, Step: 200 +2023-02-23 18:06:42 - r - INFO: - Episode: 271/400, Reward: -127.395, Step: 200 +2023-02-23 18:06:43 - r - INFO: - Episode: 272/400, Reward: -117.342, Step: 200 +2023-02-23 18:06:44 - r - INFO: - Episode: 273/400, Reward: -237.520, Step: 200 +2023-02-23 18:06:46 - r - INFO: - Episode: 274/400, Reward: -3.849, Step: 200 +2023-02-23 18:06:47 - r - INFO: - Episode: 275/400, Reward: -239.562, Step: 200 +2023-02-23 18:06:49 - r - INFO: - Episode: 276/400, Reward: -337.735, Step: 200 +2023-02-23 18:06:50 - r - INFO: - Episode: 277/400, Reward: -122.551, Step: 200 +2023-02-23 18:06:51 - r - INFO: - Episode: 278/400, Reward: -121.440, Step: 200 +2023-02-23 18:06:52 - r - INFO: - Episode: 279/400, Reward: -233.806, Step: 200 +2023-02-23 18:06:53 - r - INFO: - Episode: 280/400, Reward: -116.855, Step: 200 +2023-02-23 18:06:56 - r - INFO: - Episode: 281/400, Reward: -241.428, Step: 200 +2023-02-23 18:06:57 - r - INFO: - Episode: 282/400, Reward: -1.053, Step: 200 +2023-02-23 18:06:58 - r - INFO: - Episode: 283/400, Reward: -117.119, Step: 200 +2023-02-23 18:06:59 - r - INFO: - Episode: 284/400, Reward: -230.184, Step: 200 +2023-02-23 18:07:00 - r - INFO: - Episode: 285/400, Reward: -236.261, Step: 200 +2023-02-23 18:07:02 - r - INFO: - Episode: 286/400, Reward: -235.835, Step: 200 +2023-02-23 18:07:03 - r - INFO: - Episode: 287/400, Reward: -119.655, Step: 200 +2023-02-23 18:07:05 - r - INFO: - Episode: 288/400, Reward: -123.262, Step: 200 +2023-02-23 18:07:06 - r - INFO: - Episode: 289/400, Reward: -125.274, Step: 200 +2023-02-23 18:07:07 - r - INFO: - Episode: 290/400, Reward: -125.173, Step: 200 +2023-02-23 18:07:09 - r - INFO: - Episode: 291/400, Reward: -119.516, Step: 200 +2023-02-23 18:07:10 - r - INFO: - Episode: 292/400, Reward: -116.497, Step: 200 +2023-02-23 18:07:12 - r - INFO: - Episode: 293/400, Reward: -236.632, Step: 200 +2023-02-23 18:07:13 - r - INFO: - Episode: 294/400, Reward: -225.005, Step: 200 +2023-02-23 18:07:14 - r - INFO: - Episode: 295/400, Reward: -120.580, Step: 200 +2023-02-23 18:07:16 - r - INFO: - Episode: 296/400, Reward: -123.470, Step: 200 +2023-02-23 18:07:17 - r - INFO: - Episode: 297/400, Reward: -123.675, Step: 200 +2023-02-23 18:07:18 - r - INFO: - Episode: 298/400, Reward: -1.727, Step: 200 +2023-02-23 18:07:20 - r - INFO: - Episode: 299/400, Reward: -1.719, Step: 200 +2023-02-23 18:07:21 - r - INFO: - Episode: 300/400, Reward: -1.202, Step: 200 +2023-02-23 18:07:23 - r - INFO: - Episode: 301/400, Reward: -227.902, Step: 200 +2023-02-23 18:07:24 - r - INFO: - Episode: 302/400, Reward: -125.067, Step: 200 +2023-02-23 18:07:25 - r - INFO: - Episode: 303/400, Reward: -121.697, Step: 200 +2023-02-23 18:07:26 - r - INFO: - Episode: 304/400, Reward: -127.879, Step: 200 +2023-02-23 18:07:28 - r - INFO: - Episode: 305/400, Reward: -316.231, Step: 200 +2023-02-23 18:07:30 - r - INFO: - Episode: 306/400, Reward: -247.307, Step: 200 +2023-02-23 18:07:31 - r - INFO: - Episode: 307/400, Reward: -125.626, Step: 200 +2023-02-23 18:07:32 - r - INFO: - Episode: 308/400, Reward: -124.827, Step: 200 +2023-02-23 18:07:33 - r - INFO: - Episode: 309/400, Reward: -247.548, Step: 200 +2023-02-23 18:07:34 - r - INFO: - Episode: 310/400, Reward: -2.574, Step: 200 +2023-02-23 18:07:37 - r - INFO: - Episode: 311/400, Reward: -124.385, Step: 200 +2023-02-23 18:07:38 - r - INFO: - Episode: 312/400, Reward: -226.282, Step: 200 +2023-02-23 18:07:39 - r - INFO: - Episode: 313/400, Reward: -122.007, Step: 200 +2023-02-23 18:07:40 - r - INFO: - Episode: 314/400, Reward: -3.127, Step: 200 +2023-02-23 18:07:41 - r - INFO: - Episode: 315/400, Reward: -120.404, Step: 200 +2023-02-23 18:07:43 - r - INFO: - Episode: 316/400, Reward: -3.230, Step: 200 +2023-02-23 18:07:45 - r - INFO: - Episode: 317/400, Reward: -125.382, Step: 200 +2023-02-23 18:07:46 - r - INFO: - Episode: 318/400, Reward: -228.291, Step: 200 +2023-02-23 18:07:47 - r - INFO: - Episode: 319/400, Reward: -128.367, Step: 200 +2023-02-23 18:07:48 - r - INFO: - Episode: 320/400, Reward: -340.185, Step: 200 +2023-02-23 18:07:50 - r - INFO: - Episode: 321/400, Reward: -3.480, Step: 200 +2023-02-23 18:07:51 - r - INFO: - Episode: 322/400, Reward: -337.110, Step: 200 +2023-02-23 18:07:53 - r - INFO: - Episode: 323/400, Reward: -122.615, Step: 200 +2023-02-23 18:07:54 - r - INFO: - Episode: 324/400, Reward: -230.324, Step: 200 +2023-02-23 18:07:55 - r - INFO: - Episode: 325/400, Reward: -116.848, Step: 200 +2023-02-23 18:07:57 - r - INFO: - Episode: 326/400, Reward: -236.639, Step: 200 +2023-02-23 18:07:58 - r - INFO: - Episode: 327/400, Reward: -129.873, Step: 200 +2023-02-23 18:07:59 - r - INFO: - Episode: 328/400, Reward: -237.080, Step: 200 +2023-02-23 18:08:01 - r - INFO: - Episode: 329/400, Reward: -3.813, Step: 200 +2023-02-23 18:08:02 - r - INFO: - Episode: 330/400, Reward: -126.419, Step: 200 +2023-02-23 18:08:04 - r - INFO: - Episode: 331/400, Reward: -2.246, Step: 200 +2023-02-23 18:08:05 - r - INFO: - Episode: 332/400, Reward: -124.518, Step: 200 +2023-02-23 18:08:06 - r - INFO: - Episode: 333/400, Reward: -351.609, Step: 200 +2023-02-23 18:08:07 - r - INFO: - Episode: 334/400, Reward: -120.765, Step: 200 +2023-02-23 18:08:08 - r - INFO: - Episode: 335/400, Reward: -246.045, Step: 200 +2023-02-23 18:08:10 - r - INFO: - Episode: 336/400, Reward: -122.447, Step: 200 +2023-02-23 18:08:11 - r - INFO: - Episode: 337/400, Reward: -4.377, Step: 200 +2023-02-23 18:08:12 - r - INFO: - Episode: 338/400, Reward: -124.337, Step: 200 +2023-02-23 18:08:13 - r - INFO: - Episode: 339/400, Reward: -120.447, Step: 200 +2023-02-23 18:08:14 - r - INFO: - Episode: 340/400, Reward: -129.811, Step: 200 +2023-02-23 18:08:16 - r - INFO: - Episode: 341/400, Reward: -257.244, Step: 200 +2023-02-23 18:08:17 - r - INFO: - Episode: 342/400, Reward: -128.989, Step: 200 +2023-02-23 18:08:18 - r - INFO: - Episode: 343/400, Reward: -126.065, Step: 200 +2023-02-23 18:08:19 - r - INFO: - Episode: 344/400, Reward: -114.976, Step: 200 +2023-02-23 18:08:20 - r - INFO: - Episode: 345/400, Reward: -234.827, Step: 200 +2023-02-23 18:08:22 - r - INFO: - Episode: 346/400, Reward: -126.401, Step: 200 +2023-02-23 18:08:23 - r - INFO: - Episode: 347/400, Reward: -122.196, Step: 200 +2023-02-23 18:08:24 - r - INFO: - Episode: 348/400, Reward: -231.322, Step: 200 +2023-02-23 18:08:25 - r - INFO: - Episode: 349/400, Reward: -3.506, Step: 200 +2023-02-23 18:08:26 - r - INFO: - Episode: 350/400, Reward: -229.492, Step: 200 +2023-02-23 18:08:28 - r - INFO: - Episode: 351/400, Reward: -126.537, Step: 200 +2023-02-23 18:08:29 - r - INFO: - Episode: 352/400, Reward: -122.644, Step: 200 +2023-02-23 18:08:30 - r - INFO: - Episode: 353/400, Reward: -234.403, Step: 200 +2023-02-23 18:08:30 - r - INFO: - Episode: 354/400, Reward: -125.073, Step: 200 +2023-02-23 18:08:32 - r - INFO: - Episode: 355/400, Reward: -231.391, Step: 200 +2023-02-23 18:08:33 - r - INFO: - Episode: 356/400, Reward: -232.475, Step: 200 +2023-02-23 18:08:34 - r - INFO: - Episode: 357/400, Reward: -115.946, Step: 200 +2023-02-23 18:08:35 - r - INFO: - Episode: 358/400, Reward: -125.046, Step: 200 +2023-02-23 18:08:36 - r - INFO: - Episode: 359/400, Reward: -120.136, Step: 200 +2023-02-23 18:08:37 - r - INFO: - Episode: 360/400, Reward: -126.938, Step: 200 +2023-02-23 18:08:39 - r - INFO: - Episode: 361/400, Reward: -121.118, Step: 200 +2023-02-23 18:08:40 - r - INFO: - Episode: 362/400, Reward: -121.019, Step: 200 +2023-02-23 18:08:41 - r - INFO: - Episode: 363/400, Reward: -229.035, Step: 200 +2023-02-23 18:08:42 - r - INFO: - Episode: 364/400, Reward: -126.549, Step: 200 +2023-02-23 18:08:43 - r - INFO: - Episode: 365/400, Reward: -128.485, Step: 200 +2023-02-23 18:08:44 - r - INFO: - Episode: 366/400, Reward: -229.418, Step: 200 +2023-02-23 18:08:45 - r - INFO: - Episode: 367/400, Reward: -3.244, Step: 200 +2023-02-23 18:08:46 - r - INFO: - Episode: 368/400, Reward: -125.450, Step: 200 +2023-02-23 18:08:47 - r - INFO: - Episode: 369/400, Reward: -241.611, Step: 200 +2023-02-23 18:08:48 - r - INFO: - Episode: 370/400, Reward: -120.539, Step: 200 +2023-02-23 18:08:50 - r - INFO: - Episode: 371/400, Reward: -230.037, Step: 200 +2023-02-23 18:08:51 - r - INFO: - Episode: 372/400, Reward: -130.172, Step: 200 +2023-02-23 18:08:52 - r - INFO: - Episode: 373/400, Reward: -6.547, Step: 200 +2023-02-23 18:08:53 - r - INFO: - Episode: 374/400, Reward: -117.929, Step: 200 +2023-02-23 18:08:54 - r - INFO: - Episode: 375/400, Reward: -129.161, Step: 200 +2023-02-23 18:08:56 - r - INFO: - Episode: 376/400, Reward: -3.479, Step: 200 +2023-02-23 18:08:57 - r - INFO: - Episode: 377/400, Reward: -239.342, Step: 200 +2023-02-23 18:08:58 - r - INFO: - Episode: 378/400, Reward: -124.070, Step: 200 +2023-02-23 18:08:59 - r - INFO: - Episode: 379/400, Reward: -242.031, Step: 200 +2023-02-23 18:09:00 - r - INFO: - Episode: 380/400, Reward: -246.940, Step: 200 +2023-02-23 18:09:01 - r - INFO: - Episode: 381/400, Reward: -233.095, Step: 200 +2023-02-23 18:09:02 - r - INFO: - Episode: 382/400, Reward: -118.124, Step: 200 +2023-02-23 18:09:03 - r - INFO: - Episode: 383/400, Reward: -3.460, Step: 200 +2023-02-23 18:09:04 - r - INFO: - Episode: 384/400, Reward: -228.834, Step: 200 +2023-02-23 18:09:05 - r - INFO: - Episode: 385/400, Reward: -2.770, Step: 200 +2023-02-23 18:09:07 - r - INFO: - Episode: 386/400, Reward: -1.038, Step: 200 +2023-02-23 18:09:08 - r - INFO: - Episode: 387/400, Reward: -119.011, Step: 200 +2023-02-23 18:09:09 - r - INFO: - Episode: 388/400, Reward: -122.313, Step: 200 +2023-02-23 18:09:10 - r - INFO: - Episode: 389/400, Reward: -121.948, Step: 200 +2023-02-23 18:09:11 - r - INFO: - Episode: 390/400, Reward: -117.317, Step: 200 +2023-02-23 18:09:13 - r - INFO: - Episode: 391/400, Reward: -236.070, Step: 200 +2023-02-23 18:09:14 - r - INFO: - Episode: 392/400, Reward: -241.802, Step: 200 +2023-02-23 18:09:15 - r - INFO: - Episode: 393/400, Reward: -248.515, Step: 200 +2023-02-23 18:09:16 - r - INFO: - Episode: 394/400, Reward: -119.671, Step: 200 +2023-02-23 18:09:17 - r - INFO: - Episode: 395/400, Reward: -226.173, Step: 200 +2023-02-23 18:09:18 - r - INFO: - Episode: 396/400, Reward: -1.849, Step: 200 +2023-02-23 18:09:19 - r - INFO: - Episode: 397/400, Reward: -227.384, Step: 200 +2023-02-23 18:09:20 - r - INFO: - Episode: 398/400, Reward: -229.654, Step: 200 +2023-02-23 18:09:21 - r - INFO: - Episode: 399/400, Reward: -245.469, Step: 200 +2023-02-23 18:09:22 - r - INFO: - Episode: 400/400, Reward: -2.061, Step: 200 +2023-02-23 18:09:23 - r - INFO: - Finish training! diff --git a/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/models/checkpoint.pt b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/models/checkpoint.pt new file mode 100644 index 0000000..e532209 Binary files /dev/null and b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/models/checkpoint.pt differ diff --git a/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/results/learning_curve.png b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/results/learning_curve.png new file mode 100644 index 0000000..f3e2f11 Binary files /dev/null and b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/results/learning_curve.png differ diff --git a/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/results/res.csv b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/results/res.csv new file mode 100644 index 0000000..0eb7cdc --- /dev/null +++ b/joyrl/benchmarks/Train_Pendulum-v1_SAC_20230223-175727/results/res.csv @@ -0,0 +1,401 @@ +episodes,rewards,steps +0,-1277.2161306875078,200 +1,-919.794469351202,200 +2,-1327.6230177526347,200 +3,-1071.7755969784448,200 +4,-1730.8832352002155,200 +5,-798.5138894326991,200 +6,-1153.8134027695749,200 +7,-1300.3895442728845,200 +8,-1616.5174867799137,200 +9,-988.3008556949399,200 +10,-873.624512052393,200 +11,-969.2707335705173,200 +12,-1067.8893558462414,200 +13,-1726.46644315275,200 +14,-1277.5209311519723,200 +15,-1237.1723909675852,200 +16,-857.3445056429451,200 +17,-1670.113110734327,200 +18,-1451.40245588009,200 +19,-1289.6066305454117,200 +20,-1490.2412469343822,200 +21,-1679.6989109138294,200 +22,-1070.2375648347174,200 +23,-1507.421537698328,200 +24,-1815.0354314565916,200 +25,-875.4097701692535,200 +26,-1070.6267198064638,200 +27,-1500.2656368785554,200 +28,-1147.6231288328652,200 +29,-1169.250797476638,200 +30,-989.4375942631394,200 +31,-1379.6387570292086,200 +32,-1246.8960961985154,200 +33,-864.8053966095239,200 +34,-1160.079426043244,200 +35,-1643.4866160054223,200 +36,-1174.2747244971576,200 +37,-851.4543275049947,200 +38,-732.449839444244,200 +39,-1191.9845684308311,200 +40,-1265.3053589272965,200 +41,-965.7215727101601,200 +42,-1256.9336520916584,200 +43,-1527.0407478708657,200 +44,-1627.6748131455295,200 +45,-977.5332514796311,200 +46,-990.7536620041255,200 +47,-1160.888813827148,200 +48,-969.816827954668,200 +49,-885.8584810245801,200 +50,-117.42575318414615,200 +51,-120.71890806265694,200 +52,-1370.9779806774832,200 +53,-226.23829280964225,200 +54,-114.9812810009856,200 +55,-118.28402084231385,200 +56,-124.16612888347281,200 +57,-1.0076433921125572,200 +58,-116.57516942696273,200 +59,-120.6902343608343,200 +60,-2.56397625345484,200 +61,-228.51930798817807,200 +62,-233.38030981809115,200 +63,-233.60465211832425,200 +64,-114.28320708895308,200 +65,-303.22105247302903,200 +66,-123.54327356498258,200 +67,-119.2018471899842,200 +68,-123.51299141712119,200 +69,-121.39328272252502,200 +70,-0.9055494909519236,200 +71,-114.34645422776656,200 +72,-0.45826142243403656,200 +73,-118.70313794425857,200 +74,-116.56882215166087,200 +75,-1.2484061187316011,200 +76,-1.6090204864068551,200 +77,-234.3036189482174,200 +78,-115.06545845738637,200 +79,-228.43360044226682,200 +80,-2.101163709189415,200 +81,-237.79513402620125,200 +82,-253.69136904891866,200 +83,-115.75196828605611,200 +84,-123.62850814784485,200 +85,-237.64012271238366,200 +86,-120.1784236641439,200 +87,-1.3428066167547101,200 +88,-124.64038576951457,200 +89,-234.83050660041195,200 +90,-124.60058048179343,200 +91,-122.6343635765944,200 +92,-123.87017060418373,200 +93,-357.03883062922915,200 +94,-128.52186303713088,200 +95,-357.3992839734525,200 +96,-124.87885261067944,200 +97,-121.2061097249631,200 +98,-129.60204425418632,200 +99,-124.81798027544578,200 +100,-337.8730813675059,200 +101,-129.38211347183633,200 +102,-123.64452847940986,200 +103,-227.25872006519876,200 +104,-235.17545976486136,200 +105,-261.1372917314361,200 +106,-118.80485712730045,200 +107,-116.91734428597242,200 +108,-1.6612206541083783,200 +109,-126.90068682614017,200 +110,-245.8412482492627,200 +111,-1.3043374230675258,200 +112,-121.0698965915704,200 +113,-115.699117445081,200 +114,-236.53665732253702,200 +115,-120.60465006943075,200 +116,-120.55761642511057,200 +117,-123.83468188077624,200 +118,-127.08908303659382,200 +119,-121.67307290592905,200 +120,-117.98088232144173,200 +121,-231.5926500033102,200 +122,-235.2458655208511,200 +123,-118.50059903796345,200 +124,-226.5569559442612,200 +125,-123.9854173048887,200 +126,-119.45348208629449,200 +127,-230.23514424211018,200 +128,-123.08949149267724,200 +129,-250.77798814876735,200 +130,-119.1364754826375,200 +131,-224.02282462975097,200 +132,-128.07318722903528,200 +133,-125.19428503867398,200 +134,-117.94183670853369,200 +135,-293.1746487673185,200 +136,-233.90051955808607,200 +137,-127.5355867041485,200 +138,-125.85223401743687,200 +139,-4.626152667375481,200 +140,-121.47684430979193,200 +141,-128.64509208235137,200 +142,-124.14249797267841,200 +143,-126.1383038761375,200 +144,-239.8337450804976,200 +145,-126.85974538473424,200 +146,-243.9167797151181,200 +147,-125.25415123472948,200 +148,-120.84316789549669,200 +149,-124.99387383858813,200 +150,-232.28090962852033,200 +151,-234.1659658327432,200 +152,-128.77476249747508,200 +153,-119.23672813132762,200 +154,-238.3812275165217,200 +155,-3.7567415866955285,200 +156,-3.5411249424784104,200 +157,-3.609097744296392,200 +158,-244.70525791870995,200 +159,-237.87309917044502,200 +160,-124.49646257448909,200 +161,-125.72885347485305,200 +162,-242.33059192857144,200 +163,-337.62184384045145,200 +164,-125.71262948704454,200 +165,-123.38670074518775,200 +166,-118.73462371934696,200 +167,-251.63898303558588,200 +168,-125.51446614355368,200 +169,-119.43520340074693,200 +170,-227.56316573888208,200 +171,-229.81800583341956,200 +172,-121.29229912501857,200 +173,-120.2988604951962,200 +174,-122.70484221687275,200 +175,-124.97560306222029,200 +176,-120.58720684321455,200 +177,-229.36521245416932,200 +178,-128.13870059975807,200 +179,-242.5119536970556,200 +180,-2.780197439950255,200 +181,-131.84164869166293,200 +182,-119.41396661596194,200 +183,-122.9536618140956,200 +184,-116.1528464374456,200 +185,-127.49014686942745,200 +186,-2.5204654726616624,200 +187,-230.4808505727043,200 +188,-238.26710607105352,200 +189,-2.6038605091117937,200 +190,-3.6789477828275086,200 +191,-119.95121206344093,200 +192,-127.19693529359165,200 +193,-122.92118292745423,200 +194,-232.35747013627596,200 +195,-229.00362942601606,200 +196,-127.44205494521,200 +197,-128.06895273553286,200 +198,-128.40987613917116,200 +199,-4.332916527647893,200 +200,-334.0679388625372,200 +201,-345.7679673282434,200 +202,-124.90456801791143,200 +203,-5.0302673111235645,200 +204,-317.9378561099371,200 +205,-114.65401872898728,200 +206,-114.68527414918377,200 +207,-124.01442289248908,200 +208,-1.514613179095242,200 +209,-2.1563693147727476,200 +210,-121.75897289905721,200 +211,-122.40892271634246,200 +212,-246.27475919367185,200 +213,-248.99395286342485,200 +214,-120.14524805524363,200 +215,-117.45731004161992,200 +216,-123.65784081355896,200 +217,-124.00438584660347,200 +218,-3.1362324636697823,200 +219,-123.11942700983279,200 +220,-247.12562129758138,200 +221,-223.84451194466143,200 +222,-6.15881508830518,200 +223,-116.78859359004693,200 +224,-127.78463870999937,200 +225,-2.8063218473954263,200 +226,-117.93780652243373,200 +227,-116.78644787824226,200 +228,-229.09895458169728,200 +229,-237.44123582427343,200 +230,-128.7060815642893,200 +231,-4.241577105626107,200 +232,-228.29816445299426,200 +233,-126.2161298049504,200 +234,-125.39317300217752,200 +235,-349.8098996080765,200 +236,-126.92264494743459,200 +237,-226.3502320009651,200 +238,-6.654564898486257,200 +239,-237.13832044596785,200 +240,-126.1058735791001,200 +241,-121.71324023362337,200 +242,-237.43894833703249,200 +243,-124.0605781237955,200 +244,-117.12699613379924,200 +245,-121.8500765426138,200 +246,-119.95369698454992,200 +247,-4.865479618975616,200 +248,-129.40176818135143,200 +249,-124.0659628546662,200 +250,-122.1285477993795,200 +251,-127.66102461955369,200 +252,-119.5432766857896,200 +253,-120.94195919732147,200 +254,-129.51690219460767,200 +255,-123.71930391700762,200 +256,-117.21829447178972,200 +257,-228.47678598934723,200 +258,-124.05838625000196,200 +259,-221.26264882154922,200 +260,-3.4602038262529575,200 +261,-119.92287047134276,200 +262,-1.7814099745897911,200 +263,-2.0837682199706307,200 +264,-128.6490928967471,200 +265,-248.37835067238808,200 +266,-235.88895649449393,200 +267,-243.8044142223758,200 +268,-126.83915879326483,200 +269,-328.3432888106848,200 +270,-127.39549256734736,200 +271,-117.3424899144692,200 +272,-237.51986066244189,200 +273,-3.8485328926670825,200 +274,-239.56172170688217,200 +275,-337.7351195776648,200 +276,-122.55078904013,200 +277,-121.4402041598005,200 +278,-233.8063743565577,200 +279,-116.85511486622529,200 +280,-241.42821199098822,200 +281,-1.052754170608591,200 +282,-117.11888523861005,200 +283,-230.18355878421508,200 +284,-236.26083094873803,200 +285,-235.83537297226357,200 +286,-119.65529711165165,200 +287,-123.26161892940857,200 +288,-125.27443346905945,200 +289,-125.17313599486818,200 +290,-119.51613483845973,200 +291,-116.4972499377655,200 +292,-236.63163839115944,200 +293,-225.00529267273893,200 +294,-120.57986220646897,200 +295,-123.47026946715624,200 +296,-123.67492352435151,200 +297,-1.7269425339158595,200 +298,-1.718552694714428,200 +299,-1.2021966340860126,200 +300,-227.9023991935425,200 +301,-125.06740796342635,200 +302,-121.69651197918479,200 +303,-127.87877743446697,200 +304,-316.23054450677904,200 +305,-247.30665684505038,200 +306,-125.62599028350631,200 +307,-124.82675049321402,200 +308,-247.54812050519263,200 +309,-2.573849056304928,200 +310,-124.38547469676062,200 +311,-226.28200832784148,200 +312,-122.00748418049179,200 +313,-3.127364450668834,200 +314,-120.4037952595662,200 +315,-3.229502221767452,200 +316,-125.38194259739751,200 +317,-228.29095933883684,200 +318,-128.3668321917714,200 +319,-340.1850283936829,200 +320,-3.480230715999837,200 +321,-337.10983928635756,200 +322,-122.61510619244966,200 +323,-230.32354728800362,200 +324,-116.84751314384577,200 +325,-236.63927381524556,200 +326,-129.87326188239297,200 +327,-237.07990451953384,200 +328,-3.8129007058446764,200 +329,-126.41948903650454,200 +330,-2.24574141441396,200 +331,-124.51757603715346,200 +332,-351.6093380555408,200 +333,-120.76465584228704,200 +334,-246.04479442401347,200 +335,-122.44653121904862,200 +336,-4.37697920819419,200 +337,-124.33663900238507,200 +338,-120.44742284653297,200 +339,-129.8106465591828,200 +340,-257.2439351649813,200 +341,-128.98912183716325,200 +342,-126.06549780931718,200 +343,-114.9760446780007,200 +344,-234.82712970463766,200 +345,-126.40135826456189,200 +346,-122.19642846950752,200 +347,-231.32195697215047,200 +348,-3.506408005468861,200 +349,-229.49161879654355,200 +350,-126.53663374624699,200 +351,-122.64412374050855,200 +352,-234.4028296390576,200 +353,-125.07309466107431,200 +354,-231.3905153735058,200 +355,-232.47548925553775,200 +356,-115.9459512456998,200 +357,-125.04603037590877,200 +358,-120.136298665279,200 +359,-126.93800658098944,200 +360,-121.11795884051025,200 +361,-121.01944424151925,200 +362,-229.03505188846063,200 +363,-126.54932290930633,200 +364,-128.48516252177592,200 +365,-229.4182920905771,200 +366,-3.244170845064431,200 +367,-125.45042429375434,200 +368,-241.61065404011143,200 +369,-120.53876110897774,200 +370,-230.03656076079432,200 +371,-130.17154044758365,200 +372,-6.54652016854727,200 +373,-117.9293548227783,200 +374,-129.16138087353013,200 +375,-3.4791141350915047,200 +376,-239.34227112422565,200 +377,-124.07006330367133,200 +378,-242.03052435304554,200 +379,-246.9395011187692,200 +380,-233.09466871416927,200 +381,-118.12443331224576,200 +382,-3.460197559156605,200 +383,-228.83438072811504,200 +384,-2.770487196516156,200 +385,-1.0380690972218531,200 +386,-119.01107220096995,200 +387,-122.31348271943133,200 +388,-121.94810215057323,200 +389,-117.3172270304195,200 +390,-236.0697509654736,200 +391,-241.80177437353683,200 +392,-248.5150409023374,200 +393,-119.67093947498905,200 +394,-226.17254257744773,200 +395,-1.8487310849265886,200 +396,-227.38352952055695,200 +397,-229.6537134847933,200 +398,-245.46944674531613,200 +399,-2.0613989257151046,200 diff --git a/joyrl/presets/CartPole-v1_SAC_D_Test.yaml b/joyrl/presets/CartPole-v1_SAC_D_Test.yaml new file mode 100644 index 0000000..bd90d23 --- /dev/null +++ b/joyrl/presets/CartPole-v1_SAC_D_Test.yaml @@ -0,0 +1,23 @@ +general_cfg: + algo_name: SAC_D + device: cuda + env_name: CartPole-v1 + mode: test + load_checkpoint: true + load_path: Train_CartPole-v1_SAC_D_20230302-175416 # model path under tasks folder + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 200 +algo_cfg: + batch_size: 64 + buffer_size: 100000 + alpha: 0.2 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.0001 + target_update: 1 diff --git a/joyrl/presets/CartPole-v1_SAC_D_Train.yaml b/joyrl/presets/CartPole-v1_SAC_D_Train.yaml new file mode 100644 index 0000000..dab9ca4 --- /dev/null +++ b/joyrl/presets/CartPole-v1_SAC_D_Train.yaml @@ -0,0 +1,23 @@ +general_cfg: + algo_name: SAC_D + device: cuda + env_name: CartPole-v1 + new_step_api: True + wrapper: null + mode: train + load_checkpoint: false + load_path: Train_CartPole-v1_DQN_20221026-054757 + max_steps: 200 + save_fig: true + seed: 1 + show_fig: false + test_eps: 10 + train_eps: 200 +algo_cfg: + alpha: 0.2 + epsilon_decay: 500 + epsilon_end: 0.01 + epsilon_start: 0.95 + gamma: 0.95 + lr: 0.0001 + target_update: 1 \ No newline at end of file diff --git a/joyrl/presets/Pendulum-v1_SAC_Train.yaml b/joyrl/presets/Pendulum-v1_SAC_Train.yaml index e22921f..9b3e08c 100644 --- a/joyrl/presets/Pendulum-v1_SAC_Train.yaml +++ b/joyrl/presets/Pendulum-v1_SAC_Train.yaml @@ -1,6 +1,6 @@ general_cfg: algo_name: SAC - device: cuda + device: cpu env_name: Pendulum-v1 new_step_api: True wrapper: null