Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
f08d2f0
Refactor: Unify guidance mechanisms (#421)
daphne-cornelisse Apr 28, 2025
a7512ab
Add WOSAC initialization modes (#426)
daphne-cornelisse Apr 29, 2025
131626b
Better agent POV visualization and add LiDAr (#424)
daphne-cornelisse Apr 29, 2025
47713ff
Hot model fix
daphne-cornelisse May 1, 2025
e7f3bcb
Support for online guidance with `vbd_online` (#431)
daphne-cornelisse May 2, 2025
f965066
Integrate optional smoothening pipeline for guidance data. (#434)
daphne-cornelisse May 2, 2025
25f22d1
Temporary fix: Access average z position (elevation) from log replays…
daphne-cornelisse May 2, 2025
1777d0c
Res/scaling (#436)
daphne-cornelisse May 8, 2025
4e10440
model looks kinda okay? (#439)
eugenevinitsky May 9, 2025
e0a32c8
Ev/memory decrease (#442)
eugenevinitsky May 10, 2025
1cad3fc
update log for each scenario (#433)
zzx9636 May 12, 2025
e99ec54
Bug fixes to get rid of large position values with `vbd_online` (#435)
daphne-cornelisse May 12, 2025
5fa3c37
RL training improvements (#440)
daphne-cornelisse May 13, 2025
f1dd3be
make vbd 91 steps (#446)
nadarenator May 13, 2025
605cd4b
Fix dynamics model and some setting updates (#447)
daphne-cornelisse May 13, 2025
473e3e8
wosac dataset (#448)
nadarenator May 15, 2025
23f91f4
Intermediate update (#449)
daphne-cornelisse May 15, 2025
f0ad20f
More updates (#450)
daphne-cornelisse May 15, 2025
58620b1
Small fix
daphne-cornelisse May 15, 2025
2cffa6a
fix the wosac eval
zzx9636 May 15, 2025
ffbb5f3
Merge branch 'dev' of github.com:Emerge-Lab/gpudrive into dev
zzx9636 May 15, 2025
ad3ce01
fix amortization script (#452)
nadarenator May 27, 2025
56529c2
Added configs, params and code for the view cone setting (#454)
mpragnay May 30, 2025
3db4deb
Type-aware action space (#455)
daphne-cornelisse Jun 1, 2025
60081e2
Fix out of range error for VBD trajectory (#456)
rjs02 Jun 4, 2025
bc62868
simple occlusion check working
rjs02 Jun 2, 2025
442b210
add occlusion check with nested loop instead of bvh
rjs02 Jun 4, 2025
e7ebc10
ray trace against 8 corner points instead of only center
rjs02 Jun 4, 2025
6f28c63
change vector to array for gpu compatibility
rjs02 Jun 4, 2025
254deb4
resolved rebase conflicts with dev
rjs02 Jun 5, 2025
1138290
add action for head tilt animation
rjs02 Jun 5, 2025
909655f
change line style
rjs02 Jun 6, 2025
cd3ccf3
update config files to new parameters
rjs02 Jun 8, 2025
0b674cb
set full view and no occlusion as default
rjs02 Jun 8, 2025
39aa469
add addtional sampling points for occlusion check
rjs02 Jun 8, 2025
aecae0c
rename parameter and add documentation
rjs02 Jun 8, 2025
ef57e8b
Merge pull request #460 from Emerge-Lab/rs/occluded-obs
rjs02 Jun 8, 2025
f492dd0
pass config parameters into config object
rjs02 Jun 12, 2025
957f57e
Merge pull request #463 from Emerge-Lab/rs/fix-pufferenv-config
rjs02 Jun 13, 2025
b705eb8
Integrate traffic light states (#425)
daphne-cornelisse Jun 16, 2025
ab5c435
Merge branch 'dev' of https://github.com/Emerge-Lab/gpudrive into ap_…
Jun 19, 2025
fbf0910
[FIX] adapt TL+other missing changes to new json parsing
Jun 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@ data/raw/*
data/processed/validation/*
data/processed/training/*
data/processed/testing/*
data/processed/validation_interactive/*
data/processed/pop_play/*
data/processed/hand_designed/*
analyze/figures/*
figures/
checkpoints/
figures_data/
data/other/*
wosac/
data/processed/validation_random/*

# Logging
/wandb
Expand Down
6 changes: 1 addition & 5 deletions baselines/ppo/config/ppo_base_puffer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
road_map_obs: true
partner_obs: true
norm_obs: true
add_reference_path: false
add_reference_pos_xy: false
remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
reward_type: "weighted_combination" # Options: "weighted_combination", "reward_conditioned"
Expand All @@ -42,11 +42,7 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
obs_radius: 50.0 # Visibility radius of the agents
action_space_steer_disc: 13
action_space_accel_disc: 7
# Versatile Behavior Diffusion (VBD): This will slow down training
use_vbd: false
init_steps: 0
vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
vbd_in_obs: false

wandb:
entity: ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,66 @@ mode: "train"
use_rnn: false
eval_model_path: null
baseline: false
data_dir: data/processed/wosac/validation_json_1
data_dir: data/processed/wosac/validation_interactive/json
continue_training: false
model_cpt: null

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 100 # Number of parallel environments
num_worlds: 10 # Number of parallel environments
k_unique_scenes: 1 # Number of unique scenes to sample from
max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
max_controlled_agents: 32 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
norm_obs: true
add_previous_action: true

# Guidance through expert suggestions
guidance: true # If true, the agent will be guided by expert suggestions
guidance_mode: "log_replay" # Options: "log_replay", "vbd_amortized", "vbd_online"
add_reference_pos_xy: true # If true, a reference path is added to the ego observation
add_reference_speed: true # If true, the reference speeds are added to the ego observation
add_reference_heading: true # If true, the reference heading are added to the ego observation
smoothen_trajectory: true # If true, the velocities and headings are smoothed
guidance_dropout_prob: 0.0 # Probability of out guidance points

# Reward function
reward_type: "guided_autonomy"
collision_weight: -0.1
off_road_weight: -0.1
guidance_speed_weight: 0.005
guidance_heading_weight: 0.005
smoothness_weight: 0.0001

init_mode: wosac_train
dynamics_model: "classic"
remove_non_vehicles: false
collision_behavior: "ignore"
goal_behavior: "ignore"
reward_type: "follow_waypoints"
waypoint_distance_scale: 0.01
speed_distance_scale: 0.01
jerk_smoothness_scale: 0.001

init_mode: all_non_trivial #womd_tracks_to_predict
dynamics_model: "classic"
polyline_reduction_threshold: 0.1 # Rate at which to sample points from the polyline (0 is use all closest points, 1 maximum sparsity), needs to be balanced with kMaxAgentMapObservationsCount
sampling_seed: 42 # If given, the set of scenes to sample from will be deterministic, if None, the set of scenes will be random
obs_radius: 50.0 # Visibility radius of the agents
action_space_steer_disc: 15
action_space_accel_disc: 11
view_cone_half_angle: 3.14159 # -> 360° total view field
view_occlude_objects: false
action_space_steer_disc: 13
action_space_accel_disc: 7
action_space_head_tilt_disc: 1
vehicle_steer_range: [-1.57, 1.57] # pi/2 = 1.57, pi/3 = 1.05
vehicle_accel_range: [-4.0, 4.0]
head_tilt_action_range: [-0.7854, 0.7854] # radians (±45°)
init_steps: 0 # Warmup steps
goal_achieved_weight: 0.0
collision_weight: -0.2
off_road_weight: -0.2

# Versatile Behavior Diffusion (VBD)
use_vbd: false
init_steps: 0
vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
vbd_in_obs: false

# Planning guidance
add_reference_path: true # If true, a reference path is added to the ego observation
add_reference_speed: true # If true, the reference speed (scalar) is added to the ego observation
prob_reference_dropout: 0.0 # Value between 0 and 1, probability of a reference point to be zeroed out

wandb:
entity: ""
project: "humanlike"
group: "debug"
group: ""
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

train:
exp_id: waypoint_rs # Set dynamically in the script if needed
exp_id: guidance_logs # Set dynamically in the script if needed
seed: 42
cpu_offload: false
device: "cuda" # Dynamically set to cuda if available, else cpu
Expand All @@ -64,56 +71,58 @@ train:

# # # Data sampling # # #
resample_scenes: false
resample_dataset_size: 500 # Number of unique scenes to sample from
resample_interval: 2_000_000
resample_dataset_size: 10_000 # Number of unique scenes to sample from
resample_interval: 5_000_000
sample_with_replacement: true
shuffle_dataset: true
file_prefix: ""

# # # PPO # # #
torch_deterministic: false
total_timesteps: 2_000_000_000
batch_size: 131072
total_timesteps: 4_000_000_000
batch_size: 65536
minibatch_size: 8192
learning_rate: 3e-4
anneal_lr: true
gamma: 0.99
gamma: 1.0
gae_lambda: 0.95
update_epochs: 4
norm_adv: true
clip_coef: 0.2
clip_vloss: false
vf_clip_coef: 0.2
ent_coef: 0.005
ent_coef: 0.01
vf_coef: 0.5
max_grad_norm: 0.5
target_kl: null

# # # Logging # # #
log_window: 500
log_window: 100
track_realism_metrics: true # Log human-like metrics
track_n_worlds: 3 # Number of worlds to track

# # # Network # # #
network:
embed_dim: 64 # Embedding of the input features
embed_dim: 256 # Embedding of the input features
dropout: 0.01
class_name: "Agent"
num_parameters: 0 # Total trainable parameters, to be filled at runtime

# # # Checkpointing # # #
checkpoint_interval: 250 # Save policy every k iterations
checkpoint_interval: 200 # Save policy every k iterations
checkpoint_path: "./runs"

# # # Rendering # # #
render: true # Determines whether to render the environment (note: will slow down training)
render_3d: false # Render simulator state in 3d or 2d
render_interval: 200 # Render every k iterations
render_interval: 10 # Render every k iterations
render_every_t: 5 # Render every k timesteps
render_k_scenarios: 1 # Number of scenarios to render
render_agent_idx: [0] # Agent observations to render
render_format: "mp4" # Options: gif, mp4
render_fps: 20 # Frames per second
render_fps: 5 # Frames per second
zoom_radius: 100
plot_waypoints: true
plot_guidance_pos_xy: true

vec:
backend: "native" # Only native is currently supported
Expand Down
15 changes: 5 additions & 10 deletions baselines/ppo/config/ppo_population.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ model_cpt: null

environment: # Overrides default environment configs (see pygpudrive/env/config.py)
name: "gpudrive"
num_worlds: 100 # Number of parallel environments
k_unique_scenes: 100 # Number of unique scenes to sample from
num_worlds: 10 # Number of parallel environments
k_unique_scenes: 10 # Number of unique scenes to sample from
max_controlled_agents: 64 # Maximum number of agents controlled by the model. Make sure this aligns with the variable kMaxAgentCount in src/consts.hpp
ego_state: true
road_map_obs: true
partner_obs: true
norm_obs: true
remove_non_vehicles: false # If false, all agents are included (vehicles, pedestrians, cyclists)
lidar_obs: false # NOTE: Setting this to true currently turns of the other observation types
reward_type: "reward_conditioned" # Options: "weighted_combination", "reward_conditioned", "follow_waypoints"
reward_type: "reward_conditioned" # Options: "weighted_combination", "reward_conditioned", "guided_autonomy"
collision_weight: -0.75
off_road_weight: -0.75
goal_achieved_weight: 1.0
Expand All @@ -42,16 +42,11 @@ environment: # Overrides default environment configs (see pygpudrive/env/config.
action_space_steer_disc: 13
action_space_accel_disc: 7
init_steps: 0 # Warmup steps
# Versatile Behavior Diffusion (VBD): This will slow down training
use_vbd: false
vbd_model_path: "gpudrive/integrations/vbd/weights/epoch=18.ckpt"
vbd_trajectory_weight: 0.1 # Importance of distance to the vbd trajectories in the reward function
vbd_in_obs: false

wandb:
entity: ""
project: "kshotagents"
group: "separate_actor_critic"
group: "debug_mini"
mode: "online" # Options: online, offline, disabled
tags: ["ppo", "ff"]

Expand Down Expand Up @@ -110,7 +105,7 @@ train:
render_format: "mp4" # Options: gif, mp4
render_fps: 20 # Frames per second
zoom_radius: 100
plot_waypoints: true
plot_guidance_pos_xy: true

vec:
backend: "native" # Only native is currently supported
Expand Down
Loading
Loading