diff --git a/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/config.yaml b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/config.yaml new file mode 100644 index 0000000..b06215b --- /dev/null +++ b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/config.yaml @@ -0,0 +1,44 @@ +general_cfg: + algo_name: DQN + device: cpu + env_name: MountainCar-v0 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: true + load_path: Train_MountainCar-v0_DQN_20230404-130132 + max_steps: 200 + mode: test + new_step_api: true + render: true + render_mode: human + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 300 + wrapper: null +algo_cfg: + batch_size: 64 + buffer_size: 50000 + epsilon_decay: 1000 + epsilon_end: 0.01 + epsilon_start: 0.99 + gamma: 0.99 + lr: 0.01 + target_update: 4 + value_layers: + - activation: relu + layer_dim: + - n_states + - 256 + layer_type: linear + - activation: relu + layer_dim: + - 256 + - 256 + layer_type: linear + - activation: none + layer_dim: + - 256 + - n_actions + layer_type: linear diff --git a/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/logs/log.txt b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/logs/log.txt new file mode 100644 index 0000000..119d8c4 --- /dev/null +++ b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/logs/log.txt @@ -0,0 +1,60 @@ +2023-04-04 13:40:35 - r - INFO: - Hyperparameters: +2023-04-04 13:40:35 - r - INFO: - ================================================================================ +2023-04-04 13:40:35 - r - INFO: - Name Value Type +2023-04-04 13:40:35 - r - INFO: - env_name MountainCar-v0 +2023-04-04 13:40:35 - r - INFO: - new_step_api 1 +2023-04-04 13:40:35 - r - INFO: - wrapper None +2023-04-04 13:40:35 - r - INFO: - render 1 +2023-04-04 13:40:35 - r - INFO: - algo_name DQN +2023-04-04 13:40:35 - r - INFO: - mode test +2023-04-04 13:40:35 - r - INFO: - seed 1 +2023-04-04 13:40:35 - r - INFO: - device cpu +2023-04-04 13:40:35 - r - INFO: - train_eps 300 +2023-04-04 13:40:35 - r - INFO: - test_eps 20 +2023-04-04 13:40:35 - r - INFO: - eval_eps 10 +2023-04-04 13:40:35 - r - INFO: - eval_per_episode 5 +2023-04-04 13:40:35 - r - INFO: - max_steps 200 +2023-04-04 13:40:35 - r - INFO: - load_checkpoint 1 +2023-04-04 13:40:35 - r - INFO: - load_path Train_MountainCar-v0_DQN_20230404-130132 +2023-04-04 13:40:35 - r - INFO: - show_fig 0 +2023-04-04 13:40:35 - r - INFO: - save_fig 1 +2023-04-04 13:40:35 - r - INFO: - render_mode human +2023-04-04 13:40:35 - r - INFO: - epsilon_start 0.99 +2023-04-04 13:40:35 - r - INFO: - epsilon_end 0.01 +2023-04-04 13:40:35 - r - INFO: - epsilon_decay 1000 +2023-04-04 13:40:35 - r - INFO: - gamma 0.99 +2023-04-04 13:40:35 - r - INFO: - lr 0.01 +2023-04-04 13:40:35 - r - INFO: - buffer_size 50000 +2023-04-04 13:40:35 - r - INFO: - batch_size 64 +2023-04-04 13:40:35 - r - INFO: - target_update 4 +2023-04-04 13:40:35 - r - INFO: - value_layers [{'activation': 'relu', 'layer_dim': ['n_states', 256], 'layer_type': 'linear'}, {'activation': 'relu', 'layer_dim': [256, 256], 'layer_type': 'linear'}, {'activation': 'none', 'layer_dim': [256, 'n_actions'], 'layer_type': 'linear'}] +2023-04-04 13:40:35 - r - INFO: - task_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Test_MountainCar-v0_DQN_20230404-134035 +2023-04-04 13:40:35 - r - INFO: - model_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Test_MountainCar-v0_DQN_20230404-134035/models +2023-04-04 13:40:35 - r - INFO: - res_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Test_MountainCar-v0_DQN_20230404-134035/results +2023-04-04 13:40:35 - r - INFO: - log_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Test_MountainCar-v0_DQN_20230404-134035/logs +2023-04-04 13:40:35 - r - INFO: - traj_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Test_MountainCar-v0_DQN_20230404-134035/traj +2023-04-04 13:40:35 - r - INFO: - ================================================================================ +2023-04-04 13:40:35 - r - INFO: - n_states: 2, n_actions: 3 +2023-04-04 13:40:35 - r - INFO: - Start testing! +2023-04-04 13:40:35 - r - INFO: - Env: MountainCar-v0, Algorithm: DQN, Device: cpu +2023-04-04 13:40:40 - r - INFO: - Episode: 1/20, Reward: -112.000, Step: 112 +2023-04-04 13:40:43 - r - INFO: - Episode: 2/20, Reward: -112.000, Step: 112 +2023-04-04 13:40:47 - r - INFO: - Episode: 3/20, Reward: -112.000, Step: 112 +2023-04-04 13:40:51 - r - INFO: - Episode: 4/20, Reward: -112.000, Step: 112 +2023-04-04 13:40:55 - r - INFO: - Episode: 5/20, Reward: -112.000, Step: 112 +2023-04-04 13:40:58 - r - INFO: - Episode: 6/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:02 - r - INFO: - Episode: 7/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:06 - r - INFO: - Episode: 8/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:10 - r - INFO: - Episode: 9/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:13 - r - INFO: - Episode: 10/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:17 - r - INFO: - Episode: 11/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:21 - r - INFO: - Episode: 12/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:24 - r - INFO: - Episode: 13/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:28 - r - INFO: - Episode: 14/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:32 - r - INFO: - Episode: 15/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:36 - r - INFO: - Episode: 16/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:39 - r - INFO: - Episode: 17/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:43 - r - INFO: - Episode: 18/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:47 - r - INFO: - Episode: 19/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:51 - r - INFO: - Episode: 20/20, Reward: -112.000, Step: 112 +2023-04-04 13:41:51 - r - INFO: - Finish testing! diff --git a/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/models/checkpoint.pt b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/models/checkpoint.pt new file mode 100644 index 0000000..e78eccc Binary files /dev/null and b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/models/checkpoint.pt differ diff --git a/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/results/learning_curve.png b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/results/learning_curve.png new file mode 100644 index 0000000..bda738f Binary files /dev/null and b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/results/learning_curve.png differ diff --git a/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/results/res.csv b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/results/res.csv new file mode 100644 index 0000000..c27e455 --- /dev/null +++ b/joyrl/benchmarks/Test_MountainCar-v0_DQN_20230404-134035/results/res.csv @@ -0,0 +1,21 @@ +episodes,rewards,steps +0,-112.0,112 +1,-112.0,112 +2,-112.0,112 +3,-112.0,112 +4,-112.0,112 +5,-112.0,112 +6,-112.0,112 +7,-112.0,112 +8,-112.0,112 +9,-112.0,112 +10,-112.0,112 +11,-112.0,112 +12,-112.0,112 +13,-112.0,112 +14,-112.0,112 +15,-112.0,112 +16,-112.0,112 +17,-112.0,112 +18,-112.0,112 +19,-112.0,112 diff --git a/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/config.yaml b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/config.yaml new file mode 100644 index 0000000..a3f5f21 --- /dev/null +++ b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/config.yaml @@ -0,0 +1,43 @@ +general_cfg: + algo_name: DQN + device: cpu + env_name: MountainCar-v0 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: false + load_path: tasks + max_steps: 200 + mode: train + new_step_api: true + render: false + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 300 + wrapper: null +algo_cfg: + batch_size: 64 + buffer_size: 50000 + epsilon_decay: 1000 + epsilon_end: 0.01 + epsilon_start: 0.99 + gamma: 0.99 + lr: 0.01 + target_update: 4 + value_layers: + - activation: relu + layer_dim: + - n_states + - 256 + layer_type: linear + - activation: relu + layer_dim: + - 256 + - 256 + layer_type: linear + - activation: none + layer_dim: + - 256 + - n_actions + layer_type: linear diff --git a/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/logs/log.txt b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/logs/log.txt new file mode 100644 index 0000000..fc41554 --- /dev/null +++ b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/logs/log.txt @@ -0,0 +1,378 @@ +2023-04-04 13:01:32 - r - INFO: - Hyperparameters: +2023-04-04 13:01:32 - r - INFO: - ================================================================================ +2023-04-04 13:01:32 - r - INFO: - Name Value Type +2023-04-04 13:01:32 - r - INFO: - env_name MountainCar-v0 +2023-04-04 13:01:32 - r - INFO: - new_step_api 1 +2023-04-04 13:01:32 - r - INFO: - wrapper None +2023-04-04 13:01:32 - r - INFO: - render 0 +2023-04-04 13:01:32 - r - INFO: - algo_name DQN +2023-04-04 13:01:32 - r - INFO: - mode train +2023-04-04 13:01:32 - r - INFO: - seed 1 +2023-04-04 13:01:32 - r - INFO: - device cpu +2023-04-04 13:01:32 - r - INFO: - train_eps 300 +2023-04-04 13:01:32 - r - INFO: - test_eps 20 +2023-04-04 13:01:32 - r - INFO: - eval_eps 10 +2023-04-04 13:01:32 - r - INFO: - eval_per_episode 5 +2023-04-04 13:01:32 - r - INFO: - max_steps 200 +2023-04-04 13:01:32 - r - INFO: - load_checkpoint 0 +2023-04-04 13:01:32 - r - INFO: - load_path tasks +2023-04-04 13:01:32 - r - INFO: - show_fig 0 +2023-04-04 13:01:32 - r - INFO: - save_fig 1 +2023-04-04 13:01:32 - r - INFO: - epsilon_start 0.99 +2023-04-04 13:01:32 - r - INFO: - epsilon_end 0.01 +2023-04-04 13:01:32 - r - INFO: - epsilon_decay 1000 +2023-04-04 13:01:32 - r - INFO: - gamma 0.99 +2023-04-04 13:01:32 - r - INFO: - lr 0.01 +2023-04-04 13:01:32 - r - INFO: - buffer_size 50000 +2023-04-04 13:01:32 - r - INFO: - batch_size 64 +2023-04-04 13:01:32 - r - INFO: - target_update 4 +2023-04-04 13:01:32 - r - INFO: - value_layers [{'layer_type': 'linear', 'layer_dim': ['n_states', 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 256], 'activation': 'relu'}, {'layer_type': 'linear', 'layer_dim': [256, 'n_actions'], 'activation': 'none'}] +2023-04-04 13:01:32 - r - INFO: - task_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Train_MountainCar-v0_DQN_20230404-130132 +2023-04-04 13:01:32 - r - INFO: - model_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Train_MountainCar-v0_DQN_20230404-130132/models +2023-04-04 13:01:32 - r - INFO: - res_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Train_MountainCar-v0_DQN_20230404-130132/results +2023-04-04 13:01:32 - r - INFO: - log_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Train_MountainCar-v0_DQN_20230404-130132/logs +2023-04-04 13:01:32 - r - INFO: - traj_dir /home/PJLAB/geyuhong/rl-tutorials/joyrl/tasks/Train_MountainCar-v0_DQN_20230404-130132/traj +2023-04-04 13:01:32 - r - INFO: - ================================================================================ +2023-04-04 13:01:32 - r - INFO: - n_states: 2, n_actions: 3 +2023-04-04 13:01:32 - r - INFO: - Start training! +2023-04-04 13:01:32 - r - INFO: - Env: MountainCar-v0, Algorithm: DQN, Device: cpu +2023-04-04 13:01:32 - r - INFO: - Episode: 1/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:32 - r - INFO: - Episode: 2/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:32 - r - INFO: - Episode: 3/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:32 - r - INFO: - Episode: 4/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:33 - r - INFO: - Episode: 5/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:33 - r - INFO: - Current episode 5 has the best eval reward: -200.000 +2023-04-04 13:01:33 - r - INFO: - Episode: 6/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:33 - r - INFO: - Episode: 7/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:34 - r - INFO: - Episode: 8/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:34 - r - INFO: - Episode: 9/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:34 - r - INFO: - Episode: 10/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:34 - r - INFO: - Current episode 10 has the best eval reward: -200.000 +2023-04-04 13:01:35 - r - INFO: - Episode: 11/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:35 - r - INFO: - Episode: 12/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:35 - r - INFO: - Episode: 13/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:36 - r - INFO: - Episode: 14/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:36 - r - INFO: - Episode: 15/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:36 - r - INFO: - Current episode 15 has the best eval reward: -200.000 +2023-04-04 13:01:36 - r - INFO: - Episode: 16/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:37 - r - INFO: - Episode: 17/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:37 - r - INFO: - Episode: 18/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:37 - r - INFO: - Episode: 19/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:38 - r - INFO: - Episode: 20/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:38 - r - INFO: - Current episode 20 has the best eval reward: -200.000 +2023-04-04 13:01:39 - r - INFO: - Episode: 21/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:39 - r - INFO: - Episode: 22/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:39 - r - INFO: - Episode: 23/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:40 - r - INFO: - Episode: 24/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:40 - r - INFO: - Episode: 25/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:40 - r - INFO: - Current episode 25 has the best eval reward: -200.000 +2023-04-04 13:01:40 - r - INFO: - Episode: 26/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:41 - r - INFO: - Episode: 27/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:41 - r - INFO: - Episode: 28/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:41 - r - INFO: - Episode: 29/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:42 - r - INFO: - Episode: 30/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:42 - r - INFO: - Current episode 30 has the best eval reward: -200.000 +2023-04-04 13:01:42 - r - INFO: - Episode: 31/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:43 - r - INFO: - Episode: 32/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:43 - r - INFO: - Episode: 33/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:43 - r - INFO: - Episode: 34/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:44 - r - INFO: - Episode: 35/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:44 - r - INFO: - Current episode 35 has the best eval reward: -200.000 +2023-04-04 13:01:44 - r - INFO: - Episode: 36/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:45 - r - INFO: - Episode: 37/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:45 - r - INFO: - Episode: 38/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:45 - r - INFO: - Episode: 39/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:46 - r - INFO: - Episode: 40/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:46 - r - INFO: - Current episode 40 has the best eval reward: -200.000 +2023-04-04 13:01:46 - r - INFO: - Episode: 41/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:47 - r - INFO: - Episode: 42/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:47 - r - INFO: - Episode: 43/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:47 - r - INFO: - Episode: 44/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:48 - r - INFO: - Episode: 45/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:48 - r - INFO: - Current episode 45 has the best eval reward: -200.000 +2023-04-04 13:01:48 - r - INFO: - Episode: 46/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:49 - r - INFO: - Episode: 47/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:49 - r - INFO: - Episode: 48/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:49 - r - INFO: - Episode: 49/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:50 - r - INFO: - Episode: 50/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:50 - r - INFO: - Current episode 50 has the best eval reward: -200.000 +2023-04-04 13:01:50 - r - INFO: - Episode: 51/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:50 - r - INFO: - Episode: 52/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:51 - r - INFO: - Episode: 53/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:51 - r - INFO: - Episode: 54/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:52 - r - INFO: - Episode: 55/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:52 - r - INFO: - Current episode 55 has the best eval reward: -200.000 +2023-04-04 13:01:52 - r - INFO: - Episode: 56/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:52 - r - INFO: - Episode: 57/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:53 - r - INFO: - Episode: 58/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:53 - r - INFO: - Episode: 59/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:53 - r - INFO: - Episode: 60/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:54 - r - INFO: - Current episode 60 has the best eval reward: -200.000 +2023-04-04 13:01:54 - r - INFO: - Episode: 61/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:54 - r - INFO: - Episode: 62/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:55 - r - INFO: - Episode: 63/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:55 - r - INFO: - Episode: 64/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:55 - r - INFO: - Episode: 65/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:56 - r - INFO: - Current episode 65 has the best eval reward: -200.000 +2023-04-04 13:01:56 - r - INFO: - Episode: 66/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:56 - r - INFO: - Episode: 67/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:57 - r - INFO: - Episode: 68/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:57 - r - INFO: - Episode: 69/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:57 - r - INFO: - Episode: 70/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:58 - r - INFO: - Current episode 70 has the best eval reward: -200.000 +2023-04-04 13:01:58 - r - INFO: - Episode: 71/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:58 - r - INFO: - Episode: 72/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:59 - r - INFO: - Episode: 73/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:59 - r - INFO: - Episode: 74/300, Reward: -200.000, Step: 200 +2023-04-04 13:01:59 - r - INFO: - Episode: 75/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:00 - r - INFO: - Current episode 75 has the best eval reward: -200.000 +2023-04-04 13:02:00 - r - INFO: - Episode: 76/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:00 - r - INFO: - Episode: 77/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:01 - r - INFO: - Episode: 78/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:01 - r - INFO: - Episode: 79/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:01 - r - INFO: - Episode: 80/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:02 - r - INFO: - Current episode 80 has the best eval reward: -200.000 +2023-04-04 13:02:02 - r - INFO: - Episode: 81/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:02 - r - INFO: - Episode: 82/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:03 - r - INFO: - Episode: 83/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:03 - r - INFO: - Episode: 84/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:03 - r - INFO: - Episode: 85/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:04 - r - INFO: - Current episode 85 has the best eval reward: -200.000 +2023-04-04 13:02:04 - r - INFO: - Episode: 86/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:04 - r - INFO: - Episode: 87/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:05 - r - INFO: - Episode: 88/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:05 - r - INFO: - Episode: 89/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:05 - r - INFO: - Episode: 90/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:06 - r - INFO: - Current episode 90 has the best eval reward: -200.000 +2023-04-04 13:02:06 - r - INFO: - Episode: 91/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:06 - r - INFO: - Episode: 92/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:07 - r - INFO: - Episode: 93/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:07 - r - INFO: - Episode: 94/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:08 - r - INFO: - Episode: 95/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:08 - r - INFO: - Current episode 95 has the best eval reward: -200.000 +2023-04-04 13:02:08 - r - INFO: - Episode: 96/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:08 - r - INFO: - Episode: 97/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:09 - r - INFO: - Episode: 98/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:09 - r - INFO: - Episode: 99/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:10 - r - INFO: - Episode: 100/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:10 - r - INFO: - Current episode 100 has the best eval reward: -200.000 +2023-04-04 13:02:10 - r - INFO: - Episode: 101/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:11 - r - INFO: - Episode: 102/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:11 - r - INFO: - Episode: 103/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:11 - r - INFO: - Episode: 104/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:12 - r - INFO: - Episode: 105/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:12 - r - INFO: - Current episode 105 has the best eval reward: -200.000 +2023-04-04 13:02:12 - r - INFO: - Episode: 106/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:13 - r - INFO: - Episode: 107/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:13 - r - INFO: - Episode: 108/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:13 - r - INFO: - Episode: 109/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:14 - r - INFO: - Episode: 110/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:14 - r - INFO: - Current episode 110 has the best eval reward: -200.000 +2023-04-04 13:02:14 - r - INFO: - Episode: 111/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:15 - r - INFO: - Episode: 112/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:15 - r - INFO: - Episode: 113/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:15 - r - INFO: - Episode: 114/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:16 - r - INFO: - Episode: 115/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:16 - r - INFO: - Current episode 115 has the best eval reward: -200.000 +2023-04-04 13:02:16 - r - INFO: - Episode: 116/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:17 - r - INFO: - Episode: 117/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:17 - r - INFO: - Episode: 118/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:17 - r - INFO: - Episode: 119/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:18 - r - INFO: - Episode: 120/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:18 - r - INFO: - Current episode 120 has the best eval reward: -200.000 +2023-04-04 13:02:18 - r - INFO: - Episode: 121/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:19 - r - INFO: - Episode: 122/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:19 - r - INFO: - Episode: 123/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:19 - r - INFO: - Episode: 124/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:20 - r - INFO: - Episode: 125/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:20 - r - INFO: - Current episode 125 has the best eval reward: -200.000 +2023-04-04 13:02:20 - r - INFO: - Episode: 126/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:21 - r - INFO: - Episode: 127/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:21 - r - INFO: - Episode: 128/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:22 - r - INFO: - Episode: 129/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:22 - r - INFO: - Episode: 130/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:22 - r - INFO: - Current episode 130 has the best eval reward: -200.000 +2023-04-04 13:02:22 - r - INFO: - Episode: 131/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:23 - r - INFO: - Episode: 132/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:23 - r - INFO: - Episode: 133/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:24 - r - INFO: - Episode: 134/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:24 - r - INFO: - Episode: 135/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:24 - r - INFO: - Current episode 135 has the best eval reward: -200.000 +2023-04-04 13:02:25 - r - INFO: - Episode: 136/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:25 - r - INFO: - Episode: 137/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:25 - r - INFO: - Episode: 138/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:26 - r - INFO: - Episode: 139/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:26 - r - INFO: - Episode: 140/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:26 - r - INFO: - Current episode 140 has the best eval reward: -200.000 +2023-04-04 13:02:27 - r - INFO: - Episode: 141/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:27 - r - INFO: - Episode: 142/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:27 - r - INFO: - Episode: 143/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:28 - r - INFO: - Episode: 144/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:28 - r - INFO: - Episode: 145/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:28 - r - INFO: - Current episode 145 has the best eval reward: -200.000 +2023-04-04 13:02:29 - r - INFO: - Episode: 146/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:29 - r - INFO: - Episode: 147/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:29 - r - INFO: - Episode: 148/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:30 - r - INFO: - Episode: 149/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:30 - r - INFO: - Episode: 150/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:30 - r - INFO: - Current episode 150 has the best eval reward: -200.000 +2023-04-04 13:02:31 - r - INFO: - Episode: 151/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:31 - r - INFO: - Episode: 152/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:31 - r - INFO: - Episode: 153/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:32 - r - INFO: - Episode: 154/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:32 - r - INFO: - Episode: 155/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:32 - r - INFO: - Current episode 155 has the best eval reward: -200.000 +2023-04-04 13:02:33 - r - INFO: - Episode: 156/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:33 - r - INFO: - Episode: 157/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:33 - r - INFO: - Episode: 158/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:34 - r - INFO: - Episode: 159/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:34 - r - INFO: - Episode: 160/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:34 - r - INFO: - Current episode 160 has the best eval reward: -200.000 +2023-04-04 13:02:35 - r - INFO: - Episode: 161/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:35 - r - INFO: - Episode: 162/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:35 - r - INFO: - Episode: 163/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:36 - r - INFO: - Episode: 164/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:36 - r - INFO: - Episode: 165/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:36 - r - INFO: - Current episode 165 has the best eval reward: -200.000 +2023-04-04 13:02:37 - r - INFO: - Episode: 166/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:37 - r - INFO: - Episode: 167/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:38 - r - INFO: - Episode: 168/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:38 - r - INFO: - Episode: 169/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:38 - r - INFO: - Episode: 170/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:38 - r - INFO: - Current episode 170 has the best eval reward: -200.000 +2023-04-04 13:02:39 - r - INFO: - Episode: 171/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:39 - r - INFO: - Episode: 172/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:40 - r - INFO: - Episode: 173/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:40 - r - INFO: - Episode: 174/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:40 - r - INFO: - Episode: 175/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:41 - r - INFO: - Current episode 175 has the best eval reward: -200.000 +2023-04-04 13:02:41 - r - INFO: - Episode: 176/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:41 - r - INFO: - Episode: 177/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:42 - r - INFO: - Episode: 178/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:42 - r - INFO: - Episode: 179/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:42 - r - INFO: - Episode: 180/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:42 - r - INFO: - Current episode 180 has the best eval reward: -147.000 +2023-04-04 13:02:43 - r - INFO: - Episode: 181/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:43 - r - INFO: - Episode: 182/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:44 - r - INFO: - Episode: 183/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:44 - r - INFO: - Episode: 184/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:44 - r - INFO: - Episode: 185/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:45 - r - INFO: - Episode: 186/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:45 - r - INFO: - Episode: 187/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:46 - r - INFO: - Episode: 188/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:46 - r - INFO: - Episode: 189/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:46 - r - INFO: - Episode: 190/300, Reward: -166.000, Step: 166 +2023-04-04 13:02:47 - r - INFO: - Episode: 191/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:47 - r - INFO: - Episode: 192/300, Reward: -160.000, Step: 160 +2023-04-04 13:02:47 - r - INFO: - Episode: 193/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:48 - r - INFO: - Episode: 194/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:48 - r - INFO: - Episode: 195/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:49 - r - INFO: - Episode: 196/300, Reward: -188.000, Step: 188 +2023-04-04 13:02:49 - r - INFO: - Episode: 197/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:49 - r - INFO: - Episode: 198/300, Reward: -155.000, Step: 155 +2023-04-04 13:02:50 - r - INFO: - Episode: 199/300, Reward: -188.000, Step: 188 +2023-04-04 13:02:50 - r - INFO: - Episode: 200/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:51 - r - INFO: - Episode: 201/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:51 - r - INFO: - Episode: 202/300, Reward: -191.000, Step: 191 +2023-04-04 13:02:51 - r - INFO: - Episode: 203/300, Reward: -182.000, Step: 182 +2023-04-04 13:02:52 - r - INFO: - Episode: 204/300, Reward: -193.000, Step: 193 +2023-04-04 13:02:52 - r - INFO: - Episode: 205/300, Reward: -169.000, Step: 169 +2023-04-04 13:02:52 - r - INFO: - Episode: 206/300, Reward: -174.000, Step: 174 +2023-04-04 13:02:53 - r - INFO: - Episode: 207/300, Reward: -145.000, Step: 145 +2023-04-04 13:02:53 - r - INFO: - Episode: 208/300, Reward: -169.000, Step: 169 +2023-04-04 13:02:53 - r - INFO: - Episode: 209/300, Reward: -174.000, Step: 174 +2023-04-04 13:02:54 - r - INFO: - Episode: 210/300, Reward: -200.000, Step: 200 +2023-04-04 13:02:54 - r - INFO: - Episode: 211/300, Reward: -180.000, Step: 180 +2023-04-04 13:02:55 - r - INFO: - Episode: 212/300, Reward: -117.000, Step: 117 +2023-04-04 13:02:55 - r - INFO: - Episode: 213/300, Reward: -161.000, Step: 161 +2023-04-04 13:02:55 - r - INFO: - Episode: 214/300, Reward: -198.000, Step: 198 +2023-04-04 13:02:55 - r - INFO: - Episode: 215/300, Reward: -138.000, Step: 138 +2023-04-04 13:02:56 - r - INFO: - Episode: 216/300, Reward: -145.000, Step: 145 +2023-04-04 13:02:56 - r - INFO: - Episode: 217/300, Reward: -115.000, Step: 115 +2023-04-04 13:02:56 - r - INFO: - Episode: 218/300, Reward: -149.000, Step: 149 +2023-04-04 13:02:57 - r - INFO: - Episode: 219/300, Reward: -148.000, Step: 148 +2023-04-04 13:02:57 - r - INFO: - Episode: 220/300, Reward: -162.000, Step: 162 +2023-04-04 13:02:58 - r - INFO: - Episode: 221/300, Reward: -146.000, Step: 146 +2023-04-04 13:02:58 - r - INFO: - Episode: 222/300, Reward: -113.000, Step: 113 +2023-04-04 13:02:58 - r - INFO: - Episode: 223/300, Reward: -158.000, Step: 158 +2023-04-04 13:02:58 - r - INFO: - Episode: 224/300, Reward: -148.000, Step: 148 +2023-04-04 13:02:59 - r - INFO: - Episode: 225/300, Reward: -194.000, Step: 194 +2023-04-04 13:02:59 - r - INFO: - Episode: 226/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:00 - r - INFO: - Episode: 227/300, Reward: -171.000, Step: 171 +2023-04-04 13:03:00 - r - INFO: - Episode: 228/300, Reward: -132.000, Step: 132 +2023-04-04 13:03:00 - r - INFO: - Episode: 229/300, Reward: -153.000, Step: 153 +2023-04-04 13:03:00 - r - INFO: - Episode: 230/300, Reward: -155.000, Step: 155 +2023-04-04 13:03:01 - r - INFO: - Episode: 231/300, Reward: -162.000, Step: 162 +2023-04-04 13:03:01 - r - INFO: - Episode: 232/300, Reward: -189.000, Step: 189 +2023-04-04 13:03:02 - r - INFO: - Episode: 233/300, Reward: -161.000, Step: 161 +2023-04-04 13:03:02 - r - INFO: - Episode: 234/300, Reward: -173.000, Step: 173 +2023-04-04 13:03:02 - r - INFO: - Episode: 235/300, Reward: -185.000, Step: 185 +2023-04-04 13:03:03 - r - INFO: - Episode: 236/300, Reward: -145.000, Step: 145 +2023-04-04 13:03:03 - r - INFO: - Episode: 237/300, Reward: -150.000, Step: 150 +2023-04-04 13:03:03 - r - INFO: - Episode: 238/300, Reward: -150.000, Step: 150 +2023-04-04 13:03:04 - r - INFO: - Episode: 239/300, Reward: -157.000, Step: 157 +2023-04-04 13:03:04 - r - INFO: - Episode: 240/300, Reward: -147.000, Step: 147 +2023-04-04 13:03:04 - r - INFO: - Episode: 241/300, Reward: -150.000, Step: 150 +2023-04-04 13:03:05 - r - INFO: - Episode: 242/300, Reward: -163.000, Step: 163 +2023-04-04 13:03:05 - r - INFO: - Episode: 243/300, Reward: -150.000, Step: 150 +2023-04-04 13:03:05 - r - INFO: - Episode: 244/300, Reward: -162.000, Step: 162 +2023-04-04 13:03:05 - r - INFO: - Episode: 245/300, Reward: -151.000, Step: 151 +2023-04-04 13:03:06 - r - INFO: - Current episode 245 has the best eval reward: -147.000 +2023-04-04 13:03:06 - r - INFO: - Episode: 246/300, Reward: -138.000, Step: 138 +2023-04-04 13:03:06 - r - INFO: - Episode: 247/300, Reward: -146.000, Step: 146 +2023-04-04 13:03:07 - r - INFO: - Episode: 248/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:07 - r - INFO: - Episode: 249/300, Reward: -162.000, Step: 162 +2023-04-04 13:03:07 - r - INFO: - Episode: 250/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:08 - r - INFO: - Episode: 251/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:08 - r - INFO: - Episode: 252/300, Reward: -163.000, Step: 163 +2023-04-04 13:03:08 - r - INFO: - Episode: 253/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:09 - r - INFO: - Episode: 254/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:09 - r - INFO: - Episode: 255/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:10 - r - INFO: - Episode: 256/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:10 - r - INFO: - Episode: 257/300, Reward: -174.000, Step: 174 +2023-04-04 13:03:10 - r - INFO: - Episode: 258/300, Reward: -146.000, Step: 146 +2023-04-04 13:03:11 - r - INFO: - Episode: 259/300, Reward: -179.000, Step: 179 +2023-04-04 13:03:11 - r - INFO: - Episode: 260/300, Reward: -145.000, Step: 145 +2023-04-04 13:03:11 - r - INFO: - Episode: 261/300, Reward: -155.000, Step: 155 +2023-04-04 13:03:12 - r - INFO: - Episode: 262/300, Reward: -139.000, Step: 139 +2023-04-04 13:03:12 - r - INFO: - Episode: 263/300, Reward: -149.000, Step: 149 +2023-04-04 13:03:12 - r - INFO: - Episode: 264/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:13 - r - INFO: - Episode: 265/300, Reward: -145.000, Step: 145 +2023-04-04 13:03:13 - r - INFO: - Episode: 266/300, Reward: -117.000, Step: 117 +2023-04-04 13:03:13 - r - INFO: - Episode: 267/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:14 - r - INFO: - Episode: 268/300, Reward: -199.000, Step: 199 +2023-04-04 13:03:14 - r - INFO: - Episode: 269/300, Reward: -189.000, Step: 189 +2023-04-04 13:03:14 - r - INFO: - Episode: 270/300, Reward: -137.000, Step: 137 +2023-04-04 13:03:15 - r - INFO: - Current episode 270 has the best eval reward: -113.000 +2023-04-04 13:03:15 - r - INFO: - Episode: 271/300, Reward: -137.000, Step: 137 +2023-04-04 13:03:15 - r - INFO: - Episode: 272/300, Reward: -145.000, Step: 145 +2023-04-04 13:03:15 - r - INFO: - Episode: 273/300, Reward: -147.000, Step: 147 +2023-04-04 13:03:16 - r - INFO: - Episode: 274/300, Reward: -142.000, Step: 142 +2023-04-04 13:03:16 - r - INFO: - Episode: 275/300, Reward: -144.000, Step: 144 +2023-04-04 13:03:16 - r - INFO: - Episode: 276/300, Reward: -154.000, Step: 154 +2023-04-04 13:03:17 - r - INFO: - Episode: 277/300, Reward: -155.000, Step: 155 +2023-04-04 13:03:17 - r - INFO: - Episode: 278/300, Reward: -157.000, Step: 157 +2023-04-04 13:03:17 - r - INFO: - Episode: 279/300, Reward: -106.000, Step: 106 +2023-04-04 13:03:17 - r - INFO: - Episode: 280/300, Reward: -154.000, Step: 154 +2023-04-04 13:03:18 - r - INFO: - Episode: 281/300, Reward: -160.000, Step: 160 +2023-04-04 13:03:18 - r - INFO: - Episode: 282/300, Reward: -173.000, Step: 173 +2023-04-04 13:03:18 - r - INFO: - Episode: 283/300, Reward: -162.000, Step: 162 +2023-04-04 13:03:19 - r - INFO: - Episode: 284/300, Reward: -163.000, Step: 163 +2023-04-04 13:03:19 - r - INFO: - Episode: 285/300, Reward: -169.000, Step: 169 +2023-04-04 13:03:19 - r - INFO: - Episode: 286/300, Reward: -153.000, Step: 153 +2023-04-04 13:03:20 - r - INFO: - Episode: 287/300, Reward: -110.000, Step: 110 +2023-04-04 13:03:20 - r - INFO: - Episode: 288/300, Reward: -154.000, Step: 154 +2023-04-04 13:03:20 - r - INFO: - Episode: 289/300, Reward: -163.000, Step: 163 +2023-04-04 13:03:21 - r - INFO: - Episode: 290/300, Reward: -151.000, Step: 151 +2023-04-04 13:03:21 - r - INFO: - Episode: 291/300, Reward: -145.000, Step: 145 +2023-04-04 13:03:21 - r - INFO: - Episode: 292/300, Reward: -119.000, Step: 119 +2023-04-04 13:03:22 - r - INFO: - Episode: 293/300, Reward: -169.000, Step: 169 +2023-04-04 13:03:22 - r - INFO: - Episode: 294/300, Reward: -112.000, Step: 112 +2023-04-04 13:03:22 - r - INFO: - Episode: 295/300, Reward: -144.000, Step: 144 +2023-04-04 13:03:23 - r - INFO: - Episode: 296/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:23 - r - INFO: - Episode: 297/300, Reward: -157.000, Step: 157 +2023-04-04 13:03:23 - r - INFO: - Episode: 298/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:24 - r - INFO: - Episode: 299/300, Reward: -195.000, Step: 195 +2023-04-04 13:03:24 - r - INFO: - Episode: 300/300, Reward: -200.000, Step: 200 +2023-04-04 13:03:24 - r - INFO: - Current episode 300 has the best eval reward: -108.000 +2023-04-04 13:03:24 - r - INFO: - Finish training! diff --git a/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/models/checkpoint.pt b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/models/checkpoint.pt new file mode 100644 index 0000000..e78eccc Binary files /dev/null and b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/models/checkpoint.pt differ diff --git a/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/results/learning_curve.png b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/results/learning_curve.png new file mode 100644 index 0000000..637d0b8 Binary files /dev/null and b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/results/learning_curve.png differ diff --git a/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/results/res.csv b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/results/res.csv new file mode 100644 index 0000000..bb23243 --- /dev/null +++ b/joyrl/benchmarks/Train_MountainCar-v0_DQN_20230404-130132/results/res.csv @@ -0,0 +1,301 @@ +episodes,rewards,steps +0,-200.0,200 +1,-200.0,200 +2,-200.0,200 +3,-200.0,200 +4,-200.0,200 +5,-200.0,200 +6,-200.0,200 +7,-200.0,200 +8,-200.0,200 +9,-200.0,200 +10,-200.0,200 +11,-200.0,200 +12,-200.0,200 +13,-200.0,200 +14,-200.0,200 +15,-200.0,200 +16,-200.0,200 +17,-200.0,200 +18,-200.0,200 +19,-200.0,200 +20,-200.0,200 +21,-200.0,200 +22,-200.0,200 +23,-200.0,200 +24,-200.0,200 +25,-200.0,200 +26,-200.0,200 +27,-200.0,200 +28,-200.0,200 +29,-200.0,200 +30,-200.0,200 +31,-200.0,200 +32,-200.0,200 +33,-200.0,200 +34,-200.0,200 +35,-200.0,200 +36,-200.0,200 +37,-200.0,200 +38,-200.0,200 +39,-200.0,200 +40,-200.0,200 +41,-200.0,200 +42,-200.0,200 +43,-200.0,200 +44,-200.0,200 +45,-200.0,200 +46,-200.0,200 +47,-200.0,200 +48,-200.0,200 +49,-200.0,200 +50,-200.0,200 +51,-200.0,200 +52,-200.0,200 +53,-200.0,200 +54,-200.0,200 +55,-200.0,200 +56,-200.0,200 +57,-200.0,200 +58,-200.0,200 +59,-200.0,200 +60,-200.0,200 +61,-200.0,200 +62,-200.0,200 +63,-200.0,200 +64,-200.0,200 +65,-200.0,200 +66,-200.0,200 +67,-200.0,200 +68,-200.0,200 +69,-200.0,200 +70,-200.0,200 +71,-200.0,200 +72,-200.0,200 +73,-200.0,200 +74,-200.0,200 +75,-200.0,200 +76,-200.0,200 +77,-200.0,200 +78,-200.0,200 +79,-200.0,200 +80,-200.0,200 +81,-200.0,200 +82,-200.0,200 +83,-200.0,200 +84,-200.0,200 +85,-200.0,200 +86,-200.0,200 +87,-200.0,200 +88,-200.0,200 +89,-200.0,200 +90,-200.0,200 +91,-200.0,200 +92,-200.0,200 +93,-200.0,200 +94,-200.0,200 +95,-200.0,200 +96,-200.0,200 +97,-200.0,200 +98,-200.0,200 +99,-200.0,200 +100,-200.0,200 +101,-200.0,200 +102,-200.0,200 +103,-200.0,200 +104,-200.0,200 +105,-200.0,200 +106,-200.0,200 +107,-200.0,200 +108,-200.0,200 +109,-200.0,200 +110,-200.0,200 +111,-200.0,200 +112,-200.0,200 +113,-200.0,200 +114,-200.0,200 +115,-200.0,200 +116,-200.0,200 +117,-200.0,200 +118,-200.0,200 +119,-200.0,200 +120,-200.0,200 +121,-200.0,200 +122,-200.0,200 +123,-200.0,200 +124,-200.0,200 +125,-200.0,200 +126,-200.0,200 +127,-200.0,200 +128,-200.0,200 +129,-200.0,200 +130,-200.0,200 +131,-200.0,200 +132,-200.0,200 +133,-200.0,200 +134,-200.0,200 +135,-200.0,200 +136,-200.0,200 +137,-200.0,200 +138,-200.0,200 +139,-200.0,200 +140,-200.0,200 +141,-200.0,200 +142,-200.0,200 +143,-200.0,200 +144,-200.0,200 +145,-200.0,200 +146,-200.0,200 +147,-200.0,200 +148,-200.0,200 +149,-200.0,200 +150,-200.0,200 +151,-200.0,200 +152,-200.0,200 +153,-200.0,200 +154,-200.0,200 +155,-200.0,200 +156,-200.0,200 +157,-200.0,200 +158,-200.0,200 +159,-200.0,200 +160,-200.0,200 +161,-200.0,200 +162,-200.0,200 +163,-200.0,200 +164,-200.0,200 +165,-200.0,200 +166,-200.0,200 +167,-200.0,200 +168,-200.0,200 +169,-200.0,200 +170,-200.0,200 +171,-200.0,200 +172,-200.0,200 +173,-200.0,200 +174,-200.0,200 +175,-200.0,200 +176,-200.0,200 +177,-200.0,200 +178,-200.0,200 +179,-200.0,200 +180,-200.0,200 +181,-200.0,200 +182,-200.0,200 +183,-200.0,200 +184,-200.0,200 +185,-200.0,200 +186,-200.0,200 +187,-200.0,200 +188,-200.0,200 +189,-166.0,166 +190,-200.0,200 +191,-160.0,160 +192,-200.0,200 +193,-200.0,200 +194,-200.0,200 +195,-188.0,188 +196,-200.0,200 +197,-155.0,155 +198,-188.0,188 +199,-200.0,200 +200,-200.0,200 +201,-191.0,191 +202,-182.0,182 +203,-193.0,193 +204,-169.0,169 +205,-174.0,174 +206,-145.0,145 +207,-169.0,169 +208,-174.0,174 +209,-200.0,200 +210,-180.0,180 +211,-117.0,117 +212,-161.0,161 +213,-198.0,198 +214,-138.0,138 +215,-145.0,145 +216,-115.0,115 +217,-149.0,149 +218,-148.0,148 +219,-162.0,162 +220,-146.0,146 +221,-113.0,113 +222,-158.0,158 +223,-148.0,148 +224,-194.0,194 +225,-200.0,200 +226,-171.0,171 +227,-132.0,132 +228,-153.0,153 +229,-155.0,155 +230,-162.0,162 +231,-189.0,189 +232,-161.0,161 +233,-173.0,173 +234,-185.0,185 +235,-145.0,145 +236,-150.0,150 +237,-150.0,150 +238,-157.0,157 +239,-147.0,147 +240,-150.0,150 +241,-163.0,163 +242,-150.0,150 +243,-162.0,162 +244,-151.0,151 +245,-138.0,138 +246,-146.0,146 +247,-200.0,200 +248,-162.0,162 +249,-200.0,200 +250,-200.0,200 +251,-163.0,163 +252,-200.0,200 +253,-200.0,200 +254,-200.0,200 +255,-200.0,200 +256,-174.0,174 +257,-146.0,146 +258,-179.0,179 +259,-145.0,145 +260,-155.0,155 +261,-139.0,139 +262,-149.0,149 +263,-200.0,200 +264,-145.0,145 +265,-117.0,117 +266,-200.0,200 +267,-199.0,199 +268,-189.0,189 +269,-137.0,137 +270,-137.0,137 +271,-145.0,145 +272,-147.0,147 +273,-142.0,142 +274,-144.0,144 +275,-154.0,154 +276,-155.0,155 +277,-157.0,157 +278,-106.0,106 +279,-154.0,154 +280,-160.0,160 +281,-173.0,173 +282,-162.0,162 +283,-163.0,163 +284,-169.0,169 +285,-153.0,153 +286,-110.0,110 +287,-154.0,154 +288,-163.0,163 +289,-151.0,151 +290,-145.0,145 +291,-119.0,119 +292,-169.0,169 +293,-112.0,112 +294,-144.0,144 +295,-200.0,200 +296,-157.0,157 +297,-200.0,200 +298,-195.0,195 +299,-200.0,200 diff --git a/joyrl/presets/MountainCar-v0_DQN_Test.yaml b/joyrl/presets/MountainCar-v0_DQN_Test.yaml new file mode 100644 index 0000000..b96e1a4 --- /dev/null +++ b/joyrl/presets/MountainCar-v0_DQN_Test.yaml @@ -0,0 +1,44 @@ +general_cfg: + algo_name: DQN + device: cpu + env_name: MountainCar-v0 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: true + load_path: Train_MountainCar-v0_DQN_20230404-130132 + max_steps: 200 + mode: test + new_step_api: true + render: true + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 300 + wrapper: null + render_mode: human +algo_cfg: + batch_size: 64 + buffer_size: 50000 + epsilon_decay: 1000 + epsilon_end: 0.01 + epsilon_start: 0.99 + gamma: 0.99 + lr: 0.01 + target_update: 4 + value_layers: + - activation: relu + layer_dim: + - n_states + - 256 + layer_type: linear + - activation: relu + layer_dim: + - 256 + - 256 + layer_type: linear + - activation: none + layer_dim: + - 256 + - n_actions + layer_type: linear diff --git a/joyrl/presets/MountainCar-v0_DQN_Train.yaml b/joyrl/presets/MountainCar-v0_DQN_Train.yaml new file mode 100644 index 0000000..a3f5f21 --- /dev/null +++ b/joyrl/presets/MountainCar-v0_DQN_Train.yaml @@ -0,0 +1,43 @@ +general_cfg: + algo_name: DQN + device: cpu + env_name: MountainCar-v0 + eval_eps: 10 + eval_per_episode: 5 + load_checkpoint: false + load_path: tasks + max_steps: 200 + mode: train + new_step_api: true + render: false + save_fig: true + seed: 1 + show_fig: false + test_eps: 20 + train_eps: 300 + wrapper: null +algo_cfg: + batch_size: 64 + buffer_size: 50000 + epsilon_decay: 1000 + epsilon_end: 0.01 + epsilon_start: 0.99 + gamma: 0.99 + lr: 0.01 + target_update: 4 + value_layers: + - activation: relu + layer_dim: + - n_states + - 256 + layer_type: linear + - activation: relu + layer_dim: + - 256 + - 256 + layer_type: linear + - activation: none + layer_dim: + - 256 + - n_actions + layer_type: linear