diff --git a/pufferlib/config/ocean/dinosaur.ini b/pufferlib/config/ocean/dinosaur.ini new file mode 100644 index 000000000..37f36fde1 --- /dev/null +++ b/pufferlib/config/ocean/dinosaur.ini @@ -0,0 +1,19 @@ +[base] +package = ocean +env_name = puffer_dinosaur +policy_name = Policy +rnn_name = Recurrent + +[env] +num_envs = 1024 +width = 800 +height = 400 +speed_init = 6 +speed_max = 14 +spawn_rate_max = 65 +spawn_rate_min = 45 +rate_increment_rate = 600 +max_obstacles = 8 + +[train] +total_timesteps = 120_000_000 diff --git a/pufferlib/ocean/dinosaur/binding.c b/pufferlib/ocean/dinosaur/binding.c new file mode 100644 index 000000000..728778d05 --- /dev/null +++ b/pufferlib/ocean/dinosaur/binding.c @@ -0,0 +1,27 @@ +#include "dinosaur.h" + +#define Env Dinosaur +#include "../env_binding.h" + +// Python -> C +static int my_init(Env* env, PyObject* args, PyObject* kwargs) { + env->width = unpack(kwargs, "width"); + env->height = unpack(kwargs, "height"); + env->speed_init = unpack(kwargs, "speed_init"); + env->speed_max = unpack(kwargs, "speed_max"); + env->spawn_rate_min = unpack(kwargs, "spawn_rate_min"); + env->spawn_rate_max = unpack(kwargs, "spawn_rate_max"); + env->rate_increment_rate = unpack(kwargs, "rate_increment_rate"); + env->max_obstacles = unpack(kwargs, "max_obstacles"); + init(env); + return 0; +} + +// C -> Python +static int my_log(PyObject* dict, Log* log) { + assign_to_dict(dict, "perf", log->perf); + assign_to_dict(dict, "score", log->score); + assign_to_dict(dict, "episode_return", log->episode_return); + assign_to_dict(dict, "episode_length", log->episode_length); + return 0; +} diff --git a/pufferlib/ocean/dinosaur/dinosaur.c b/pufferlib/ocean/dinosaur/dinosaur.c new file mode 100644 index 000000000..80cdf0f50 --- /dev/null +++ b/pufferlib/ocean/dinosaur/dinosaur.c @@ -0,0 +1,55 @@ +#include "dinosaur.h" +#include "puffernet.h" + +int main() { + int max_obstacles = 10; + int num_obs = (max_obstacles*3) + 4; + + Weights* weights = load_weights("resources/dinosaur/puffer_dinosaur_weights.bin", 545296); + + int logit_sizes[1] = {3}; + LinearLSTM* net = make_linearlstm(weights, 1, num_obs, logit_sizes, 1); + + Dinosaur env = { + .width = 800, + .height = 400, + .speed_init = 6, + .speed_max = 14, + .spawn_rate_max = 65, + .spawn_rate_min = 45, + .rate_increment_rate = 600, + .max_obstacles = 8, + }; + env.client = make_client(&env); + init(&env); + + env.observations = calloc(num_obs, sizeof(float)); + env.actions = calloc(2, sizeof(int)); + env.rewards = calloc(1, sizeof(float)); + env.terminals = calloc(1, sizeof(unsigned char)); + + c_reset(&env); + c_render(&env); + + while (!WindowShouldClose()) { + if(IsKeyDown(KEY_LEFT_SHIFT)){ + env.actions[0] = NOOP; + if(IsKeyDown(KEY_UP)) env.actions[0] = JUMP; + if(IsKeyDown(KEY_DOWN)) env.actions[0] = CROUCH; + } else { + int* actions = (int*)env.actions; + forward_linearlstm(net, env.observations, actions); + env.actions[0] = actions[0]; + } + c_step(&env); + c_render(&env); + } + + free_linearlstm(net); + free(weights); + free(env.observations); + free(env.actions); + free(env.rewards); + free(env.terminals); + c_close(&env); +} diff --git a/pufferlib/ocean/dinosaur/dinosaur.h b/pufferlib/ocean/dinosaur/dinosaur.h new file mode 100644 index 000000000..fb458d6b3 --- /dev/null +++ b/pufferlib/ocean/dinosaur/dinosaur.h @@ -0,0 +1,346 @@ +/* Dinosaur: a single-agent env that mimics google's offline dinosaur game */ + +#include +#include +#include +#include +#include "raylib.h" + +#define PLAYER_HEIGHT 48 +#define PLAYER_WIDTH 32 +#define PLAYER_JUMP 10.0f +#define GRAVITY 0.5f + +#define CACTUS_HEIGHT 24 +#define CACTUS_WIDTH 24 + +#define BIRD_HEIGHT 24 +#define BIRD_WIDTH 48 +#define BIRD_Y 44 + +const unsigned char NOOP = 0; +const unsigned char JUMP = 1; +const unsigned char CROUCH = 2; + +typedef struct { + float perf; + float score; + float episode_return; + float episode_length; + float n; +} Log; + +typedef struct { + Texture2D dinosaur_up; + Texture2D dinosaur_down; + Texture2D cactus; + Texture2D bird; +} Client; + +typedef struct { + float x; + float y; + float y_velocity; + float jump_strength; + int ticks; + float width; + float height; + float x_offset; +} Agent; + +enum ObstacleType { + CACTUS, + BIRD +}; + +typedef struct { + float x; + float y; + float width; + float height; + enum ObstacleType type; +} Obstacle; + +typedef struct { + /* Mandatory */ + Log log; + float* observations; + int* actions; + float* rewards; + unsigned char* terminals; + /* Not customizable */ + Client* client; + Agent* agent; + Obstacle* obstacles; + int num_obstacles; + float floor_y; + int speed; + int spawn_rate; + float gravity; + int spawn_ticks; + /* Customizable */ + int width; + int height; + int speed_init; + int speed_max; + int spawn_rate_min; + int spawn_rate_max; + int rate_increment_rate; + int max_obstacles; +} Dinosaur; + +Client* make_client(Dinosaur* env){ + Client* client = (Client*)calloc(1, sizeof(Client)); + + InitWindow(env->width, env->height, "Pufferlib Dinosaur"); + SetTargetFPS(60); + + client->cactus = LoadTexture("resources/dinosaur/cactus.png"); + client->bird = LoadTexture("resources/dinosaur/bird.png"); + client->dinosaur_up = LoadTexture("resources/dinosaur/dino.png"); + client->dinosaur_down = LoadTexture("resources/dinosaur/dino_down.png"); + return client; +} + +void init(Dinosaur* env) { + env->gravity = GRAVITY; + env->floor_y = env->height/2.0f; + env->spawn_rate = 1; + env->spawn_ticks = 0; + + env->agent = calloc(1, sizeof(Agent)); + env->agent->x = 0.0f + 2.0f * PLAYER_WIDTH; + env->agent->y = 0.0f; + env->agent->jump_strength = PLAYER_JUMP; + env->agent->width = PLAYER_WIDTH; + env->agent->height = PLAYER_HEIGHT; +} + +void compute_observations(Dinosaur* env) { + int obs_idx = 0; + env->observations[obs_idx++] = env->agent->y / (pow(env->agent->jump_strength, 2) / (2 * env->gravity)); + env->observations[obs_idx++] = env->agent->x/env->width; + env->observations[obs_idx++] = (float) env->speed / 10.0f; + env->observations[obs_idx++] = env->agent->ticks/100.0f; + + for(int o = 0; o < env->max_obstacles; o++){ + if (o < env->num_obstacles) { + Obstacle* obstacle = &env->obstacles[o]; + env->observations[obs_idx++] = obstacle->x/env->width; + env->observations[obs_idx++] = obstacle->y/(env->width / 2); + env->observations[obs_idx++] = obstacle->type == CACTUS ? 0.2f : 0.8f; + } else { + env->observations[obs_idx++] = 1.0f; + env->observations[obs_idx++] = -1.0f; + env->observations[obs_idx++] = 0.0f; + } + } +} + +void c_reset(Dinosaur* env){ + env->speed = env->speed_init; + env->spawn_rate = 1; + env->spawn_ticks = 0; + + env->agent->ticks = 0; + env->agent->y_velocity = 0.0f; + env->agent->y = 0.0f; + + env->num_obstacles = 0; + if (env->obstacles != NULL) { + free(env->obstacles); + env->obstacles = NULL; + } + + compute_observations(env); +} + +void c_step(Dinosaur* env){ + env->agent->ticks += 1; + env->spawn_ticks += 1; + *env->rewards = 0.01f; + *env->terminals = 0; + + // handle user input + switch(env->actions[0]){ + case NOOP: + env->agent->y_velocity = -env->agent->jump_strength; + env->agent->height = PLAYER_HEIGHT; + env->agent->width = PLAYER_WIDTH; + env->agent->x_offset = 0.0f; + break; + case CROUCH: + env->agent->y_velocity = -env->agent->jump_strength; + env->agent->height = PLAYER_HEIGHT / 2.f; + env->agent->width = PLAYER_WIDTH * 2.0f; + env->agent->x_offset = PLAYER_WIDTH; + break; + case JUMP: + if(env->agent->y == 0.0f) env->agent->y_velocity = env->agent->jump_strength; + env->agent->height = PLAYER_HEIGHT; + env->agent->width = PLAYER_WIDTH; + env->agent->x_offset = 0.0f; + break; + } + + // gravity + env->agent->y_velocity -= env->gravity; + env->agent->y += env->agent->y_velocity; + if(env->agent->y <= 0){ + env->agent->y = 0; + env->agent->y_velocity = 0; + } + + float agent_x_max = env->agent->x + env->agent->x_offset; + float agent_x_min = agent_x_max - env->agent->width; + float agent_y_min = env->agent->y; + float agent_y_max = agent_y_min + env->agent->height; + for(int o = 0; o < env->num_obstacles; o++){ + // move obstacles + Obstacle* obstacle = &env->obstacles[o]; + obstacle->x -= env->speed; + + // handle collisions + float obstacle_x_max = obstacle->x; + float obstacle_x_min = obstacle_x_max - obstacle->width; + float obstacle_y_min = obstacle->y; + float obstacle_y_max = obstacle_y_min + env->agent->height; + if( + ((agent_x_max <= obstacle_x_max && agent_x_max >= obstacle_x_min) || + (agent_x_min <= obstacle_x_max && agent_x_min >= obstacle_x_min)) && + ((agent_y_max <= obstacle_y_max && agent_y_max >= obstacle_y_min) || + (agent_y_min <= obstacle_y_max && agent_y_min >= obstacle_y_min)) + ){ + *env->rewards = -1.0f; + *env->terminals = 1; + env->log.episode_return += env->agent->ticks / 100.0f - 1.0f; + env->log.episode_length += env->agent->ticks; + env->log.score += env->agent->ticks / 100.0f - 1.0f; + env->log.perf += env->agent->ticks / 100.0f - 1.0f; + env->log.n += 1; + c_reset(env); + return; + } + + // despawn obstacles + if(obstacle->x < 0 - 10){ + for(int j = o; j < env->num_obstacles - 1; j++){ + env->obstacles[j] = env->obstacles[j+1]; + } + env->num_obstacles--; + env->obstacles = realloc(env->obstacles, env->num_obstacles * sizeof(Obstacle)); + o--; + } + } + + // spawn new obstacles + if(env->spawn_ticks % env->spawn_rate == 0){ + int spawn_num = rand() % 4 + 1; + if(spawn_num < 4){ + while(spawn_num + env->num_obstacles >= env->max_obstacles) spawn_num = rand() % 3; + for(int i = 0; i < spawn_num; i++){ + env->num_obstacles++; + env->obstacles = realloc(env->obstacles, env->num_obstacles * sizeof(Obstacle)); + env->obstacles[env->num_obstacles-1] = (Obstacle) { + .x = env->width + i * (CACTUS_WIDTH + 10.0f), + .y = 0, + .width = CACTUS_WIDTH, + .height = CACTUS_HEIGHT, + .type = CACTUS + }; + } + } else if (env->num_obstacles <= env->max_obstacles){ + env->num_obstacles++; + env->obstacles = realloc(env->obstacles, env->num_obstacles * sizeof(Obstacle)); + env->obstacles[env->num_obstacles-1] = (Obstacle) { + .x = env->width + BIRD_WIDTH + 10.0f, + .y = BIRD_Y, + .width = BIRD_WIDTH, + .height = BIRD_HEIGHT, + .type = BIRD + }; + } + env->spawn_rate = rand() % (env->spawn_rate_max - env->spawn_rate_min) + env->spawn_rate_min; + env->spawn_rate = env->spawn_rate / ((float)env->speed / (float)env->speed_init); + env->spawn_ticks = 0; + } + + // increase speed + if(env->agent->ticks > 0 && env->agent->ticks % env->rate_increment_rate == 0){ + if(env->speed <= env->speed_max) env->speed+=1; + } + + compute_observations(env); +} + +void c_render(Dinosaur* env){ + if(env->client == NULL) { + env->client = make_client(env); + } + + if(IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + + BeginDrawing(); + + ClearBackground((Color){255, 255, 255, 255}); + DrawRectangle(0, env->height/2.0, env->width, env->height, (Color){95, 87, 79, 255}); + + for(int o = 0; o < env->num_obstacles; o++){ + Obstacle* obstacle = &env->obstacles[o]; + Texture2D tex; + tex = obstacle->type == CACTUS ? env->client->cactus : env->client->bird; + DrawTexturePro( + tex, + (Rectangle){0, 0, obstacle->width, obstacle->height}, + (Rectangle){ + obstacle->x - obstacle->width, + env->floor_y - obstacle->height - obstacle->y, + obstacle->width, + obstacle->height, + }, + (Vector2){0, 0}, + 0.0, + WHITE + ); + } + + Texture2D tex; + switch(env->actions[0]){ + case NOOP: + case JUMP: + tex = env->client->dinosaur_up; + break; + case CROUCH: + tex = env->client->dinosaur_down; + break; + } + DrawTexturePro( + tex, + (Rectangle){0, 0, env->agent->width, env->agent->height}, + (Rectangle){ + env->agent->x - env->agent->width + env->agent->x_offset, + env->floor_y - env->agent->height - env->agent->y, + env->agent->width, + env->agent->height + }, + (Vector2){0, 0}, + 0.0f, + WHITE + ); + + EndDrawing(); +} + +void c_close(Dinosaur* env){ + free(env->agent); + free(env->obstacles); + if(env->client != NULL){ + UnloadTexture(env->client->cactus); + UnloadTexture(env->client->dinosaur_up); + UnloadTexture(env->client->dinosaur_down); + CloseWindow(); + free(env->client); + } +} diff --git a/pufferlib/ocean/dinosaur/dinosaur.py b/pufferlib/ocean/dinosaur/dinosaur.py new file mode 100644 index 000000000..08a146975 --- /dev/null +++ b/pufferlib/ocean/dinosaur/dinosaur.py @@ -0,0 +1,78 @@ +import gymnasium +import numpy as np + +import pufferlib +from pufferlib.ocean.dinosaur import binding + +class Dinosaur(pufferlib.PufferEnv): + def __init__(self, num_envs=1024, width=800, height=800, + speed_init=6, speed_max=12, spawn_rate_max=100, spawn_rate_min=50, + rate_increment_rate=400, max_obstacles=10, + render_mode=None, log_interval=128, size=11, buf=None, seed=0): + self.single_observation_space = gymnasium.spaces.Box(low=-1.0, high=1, + shape=((max_obstacles*3) + 4,), dtype=np.float32) + self.single_action_space = gymnasium.spaces.Discrete(3) + + self.render_mode = render_mode + self.num_agents = num_envs + self.log_interval = log_interval + + super().__init__(buf) + c_envs = [] + for i in range(num_envs): + c_env = binding.env_init( + self.observations[i:i+1], + self.actions[i:i+1], + self.rewards[i:i+1], + self.terminals[i:i+1], + self.truncations[i:i+1], + seed, width=width, height=height, speed_init=speed_init, speed_max=speed_max, + spawn_rate_max=spawn_rate_max, spawn_rate_min=spawn_rate_min, + rate_increment_rate=rate_increment_rate, max_obstacles=max_obstacles + ) + c_envs.append(c_env) + + self.c_envs = binding.vectorize(*c_envs) + + def reset(self, seed=0): + binding.vec_reset(self.c_envs, seed) + self.tick = 0 + return self.observations, [] + + def step(self, actions): + self.tick += 1 + self.actions[:] = actions + binding.vec_step(self.c_envs) + + info = [] + if self.tick % self.log_interval == 0: + log = binding.vec_log(self.c_envs) + if log: + info.append(log) + + return (self.observations, self.rewards, + self.terminals, self.truncations, info) + + def render(self): + binding.vec_render(self.c_envs, 0) + + def close(self): + binding.vec_close(self.c_envs) + +if __name__ == '__main__': + env = Dinosaur(num_envs=1024) + env.reset() + steps = 0 + + CACHE = 1024 + actions = np.random.randint(0, 2, (1024, env.num_agents)) + + i = 0 + import time + start = time.time() + while time.time() - start < 10: + env.step(actions[i % CACHE]) + steps += env.num_agents + i += 1 + + print('Dinosaur SPS:', int(steps / (time.time() - start))) diff --git a/pufferlib/ocean/environment.py b/pufferlib/ocean/environment.py index 6c56a4ea2..395d7cd24 100644 --- a/pufferlib/ocean/environment.py +++ b/pufferlib/ocean/environment.py @@ -162,6 +162,7 @@ def make_multiagent(buf=None, **kwargs): 'spaces': make_spaces, 'multiagent': make_multiagent, 'slimevolley': 'SlimeVolley', + 'dinosaur': 'Dinosaur' } def env_creator(name='squared', *args, **kwargs): diff --git a/pufferlib/resources/dinosaur/bird.png b/pufferlib/resources/dinosaur/bird.png new file mode 100644 index 000000000..89135d017 Binary files /dev/null and b/pufferlib/resources/dinosaur/bird.png differ diff --git a/pufferlib/resources/dinosaur/cactus.png b/pufferlib/resources/dinosaur/cactus.png new file mode 100644 index 000000000..94aba71f0 Binary files /dev/null and b/pufferlib/resources/dinosaur/cactus.png differ diff --git a/pufferlib/resources/dinosaur/dino.png b/pufferlib/resources/dinosaur/dino.png new file mode 100644 index 000000000..a32fe1a7b Binary files /dev/null and b/pufferlib/resources/dinosaur/dino.png differ diff --git a/pufferlib/resources/dinosaur/dino_down.png b/pufferlib/resources/dinosaur/dino_down.png new file mode 100644 index 000000000..3898ef64b Binary files /dev/null and b/pufferlib/resources/dinosaur/dino_down.png differ diff --git a/pufferlib/resources/dinosaur/puffer_dinosaur_weights.bin b/pufferlib/resources/dinosaur/puffer_dinosaur_weights.bin new file mode 100644 index 000000000..2ebc31ab4 Binary files /dev/null and b/pufferlib/resources/dinosaur/puffer_dinosaur_weights.bin differ diff --git a/pufferlib/resources/dinosaur/star.png b/pufferlib/resources/dinosaur/star.png new file mode 100644 index 000000000..2738c1864 Binary files /dev/null and b/pufferlib/resources/dinosaur/star.png differ