compsciencelab · vmoens · Jan 18, 2023 · Jan 31, 2023 · Jan 31, 2023 · Feb 6, 2023
diff --git a/ppo_pong/conf.yaml b/ppo_pong/conf.yaml
@@ -1,28 +1,30 @@
 # Logger
 experiment_name: atari_pong
 agent_name: ppo_agent
-wandb_key: null
+entity: vmoens
+wandb_key: d0bee782a83f90cbc11177e36a092de77585cbb3
 log_dir: /tmp/atari_pong
 
 # Environment
-env_name: PongNoFrameskip-v4
+env_name: ALE/Pong-v5
 frame_skip: 4
 
 # Collector
 total_frames: 40_000_000 # without accounting for frame skip
-num_parallel_envs: 8
-steps_per_env: 128 # between network updates
+num_parallel_envs: 16
+steps_per_env: 256 # between network updates
 
 # Loss
 gamma: 0.99
-clip_epsilon: 0.1
+clip_epsilon: 0.2
 loss_critic_type: l2
-entropy_coef: 0.0001
+entropy_coef: 0.001
 critic_coef: 1.0
 gae_lamdda: 0.95
+clip_grad: 0.5
 
 # Training loop
 lr: 2.5e-4
 num_ppo_epochs: 3
-mini_batch_size: 256  # so 4 mini_batches - (8 * 128) / 256
-evaluation_frequency: 100  # In number of network updates
+mini_batch_size: 128  # so 4 mini_batches - (8 * 128) / 256
+evaluation_frequency: 500  # In number of network updates