patrickloeber · Tahir-Musharraf · Jun 11, 2024
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,10 +1,4 @@
-# Teach AI To Play Snake! Reinforcement Learning With PyTorch and Pygame
+# How to Start
 
-In this Python Reinforcement Learning Tutorial series we teach an AI to play Snake! We build everything from scratch using Pygame and PyTorch. The tutorial consists of 4 parts:
 
-You can find all tutorials on my channel: [Playlist](https://www.youtube.com/playlist?list=PLqnslRFeH2UrDh7vUmJ60YrmWd64mTTKV)
-
-- Part 1: I'll show you the project and teach you some basics about Reinforcement Learning and Deep Q Learning.
-- Part 2: Learn how to setup the environment and implement the Snake game.
-- Part 3: Implement the agent that controls the game.
-- Part 4: Implement the neural network to predict the moves and train it.
+agent.py is the instructon for the computer.
diff --git a/__pycache__/game.cpython-310.pyc b/__pycache__/game.cpython-310.pyc
diff --git a/__pycache__/helper.cpython-310.pyc b/__pycache__/helper.cpython-310.pyc
diff --git a/__pycache__/model.cpython-310.pyc b/__pycache__/model.cpython-310.pyc
diff --git a/agent.py b/agent.py
@@ -14,9 +14,9 @@ class Agent:
 
     def __init__(self):
         self.n_games = 0
-        self.epsilon = 0 # randomness
-        self.gamma = 0.9 # discount rate
-        self.memory = deque(maxlen=MAX_MEMORY) # popleft()
+        self.epsilon = 0 
+        self.gamma = 0.9
+        self.memory = deque(maxlen=MAX_MEMORY)
         self.model = Linear_QNet(11, 256, 3)
         self.trainer = QTrainer(self.model, lr=LR, gamma=self.gamma)
 
@@ -78,8 +78,6 @@ def train_long_memory(self):
 
         states, actions, rewards, next_states, dones = zip(*mini_sample)
         self.trainer.train_step(states, actions, rewards, next_states, dones)
-        #for state, action, reward, nexrt_state, done in mini_sample:
-        #    self.trainer.train_step(state, action, reward, next_state, done)
 
     def train_short_memory(self, state, action, reward, next_state, done):
         self.trainer.train_step(state, action, reward, next_state, done)

diff --git a/arial.ttf b/arial.ttf
diff --git a/game.py b/game.py
@@ -5,8 +5,7 @@
 import numpy as np
 
 pygame.init()
-font = pygame.font.Font('arial.ttf', 25)
-#font = pygame.font.SysFont('arial', 25)
+font = pygame.font.SysFont('arial', 25)
 
 class Direction(Enum):
     RIGHT = 1
@@ -78,13 +77,15 @@ def play_step(self, action):
         game_over = False
         if self.is_collision() or self.frame_iteration > 100*len(self.snake):
             game_over = True
-            reward = -10
+            reward = -5
             return reward, game_over, self.score
 
         # 4. place new food or just move
         if self.head == self.food:
             self.score += 1
-            reward = 10
+            reward = 5
+
+
             self._place_food()
         else:
             self.snake.pop()

diff --git a/grid.py b/grid.py
@@ -0,0 +1,64 @@
+import pygame as pg
+
+TITLE = "Grid"
+TILES_HORIZONTAL = 10
+TILES_VERTICAL = 10
+TILE_SIZE = 80
+WINDOW_WIDTH = 800
+WINDOW_HEIGHT = 800
+
+
+class Player:
+    def __init__(self, surface):
+        self.surface = surface
+        self.pos = (40, 40)
+
+    def draw(self):
+        pg.draw.circle(self.surface, (255, 255, 255), self.pos, 40)
+
+    def move(self, target):
+        x = (80 * (target[0] // 80)) + 40
+        y = (80 * (target[1] // 80)) + 40
+
+        self.pos = (x, y)
+
+
+class Game:
+    def __init__(self):
+        pg.init()
+        self.clock = pg.time.Clock()
+        pg.display.set_caption(TITLE)
+        self.surface = pg.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
+        self.loop = True
+        self.player = Player(self.surface)
+
+    def main(self):
+        while self.loop:
+            self.grid_loop()
+        pg.quit()
+
+    def grid_loop(self):
+        self.surface.fill((0, 0, 0))
+        for row in range(TILES_HORIZONTAL):
+            for col in range(row % 2, TILES_HORIZONTAL, 2):
+                pg.draw.rect(
+                    self.surface,
+                    (40, 40, 40),
+                    (row * TILE_SIZE, col * TILE_SIZE, TILE_SIZE, TILE_SIZE),
+                )
+        self.player.draw()
+        for event in pg.event.get():
+            if event.type == pg.QUIT:
+                self.loop = False
+            elif event.type == pg.KEYDOWN:
+                if event.key == pg.K_ESCAPE:
+                    self.loop = False
+            elif event.type == pg.MOUSEBUTTONUP:
+                pos = pg.mouse.get_pos()
+                self.player.move(pos)
+        pg.display.update()
+
+
+if __name__ == "__main__":
+    mygame = Game()
+    mygame.main()
diff --git a/model.py b/model.py
@@ -65,7 +65,4 @@ def train_step(self, state, action, reward, next_state, done):
         loss = self.criterion(target, pred)
         loss.backward()
 
-        self.optimizer.step()
-
-
-
+        self.optimizer.step()
diff --git a/model/model.pth b/model/model.pth