From b972c86bfcf1ff9e9735dc52a02687e06f8c5d61 Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Thu, 23 Jan 2025 22:54:38 +0530
Subject: [PATCH] conv2d, maxpool and flatten layer added

---
 csrc/layers/convolutional_layer.h | 131 +++++++++++++++++-------------
 csrc/layers/flatten.h             |  19 +++++
 csrc/layers/non_linear_layer.h    |  30 -------
 csrc/main.cc                      |  26 ++++++
 csrc/neural_network.h             |   5 +-
 csrc/tensor.h                     |  12 ++-
 pyproject.toml                    |   2 +-
 src/deeptensor/__init__.py        |   6 ++
 8 files changed, 143 insertions(+), 88 deletions(-)
 create mode 100644 csrc/layers/flatten.h
diff --git a/csrc/layers/convolutional_layer.h b/csrc/layers/convolutional_layer.h
index 69184c2..fe59342 100644
--- a/csrc/layers/convolutional_layer.h
+++ b/csrc/layers/convolutional_layer.h
@@ -9,8 +9,8 @@ class Conv2D : public Layer {
   int in_channels;
   int out_channels;
   int kernel_size;
-  int stride;
-  int padding;
+  int stride = 1;
+  int padding = 0;
   int seed = -1;
   std::string technique = constant::HE;
   std::string mode = constant::NORMAL;
@@ -29,7 +29,11 @@ class Conv2D : public Layer {
 
     // Create the RandomNumberGenerator
     RandomNumberGenerator rng(
-        this->technique, this->mode, this->in_channels, this->out_channels, seed_to_use);
+        this->technique,
+        this->mode,
+        this->in_channels,
+        this->out_channels,
+        seed_to_use);
     for (int oc = 0; oc < out_channels; ++oc) {
       for (int ic = 0; ic < in_channels; ++ic) {
         for (int kh = 0; kh < kernel_size; ++kh) {
@@ -44,12 +48,18 @@ class Conv2D : public Layer {
   }
 
 public:
+  Conv2D(int in_channels, int out_channels, int kernel_size)
+      : in_channels(in_channels),
+        out_channels(out_channels),
+        kernel_size(kernel_size) {
+    _initialize();
+  }
   Conv2D(
       int in_channels,
       int out_channels,
       int kernel_size,
-      int stride = 1,
-      int padding = 0)
+      int stride,
+      int padding)
       : in_channels(in_channels),
         out_channels(out_channels),
         kernel_size(kernel_size),
@@ -90,39 +100,39 @@ class Conv2D : public Layer {
   std::shared_ptr<Tensor> call(std::shared_ptr<Tensor> input, bool using_cuda)
       override {
     auto input_shape = input->shape; // [batch_size, in_channels, height, width]
-    int batch_size = input_shape[0];
-    int height = input_shape[2];
-    int width = input_shape[3];
+                                     // -- no batch for now
+    // int batch = input_shape[0];
+    int in_channel = input_shape[0];
+    int height = input_shape[1];
+    int width = input_shape[2];
 
     // Compute output dimensions
     int output_height = (height - kernel_size + 2 * padding) / stride + 1;
     int output_width = (width - kernel_size + 2 * padding) / stride + 1;
 
     // Output tensor
-    auto output = std::make_shared<Tensor>(std::vector<int>{
-        batch_size, out_channels, output_height, output_width});
-
-    for (int b = 0; b < batch_size; ++b) {
-      for (int oc = 0; oc < out_channels; ++oc) {
-        for (int oh = 0; oh < output_height; ++oh) {
-          for (int ow = 0; ow < output_width; ++ow) {
-            // Compute the dot product of the kernel and the input patch
-            double result = 0.0;
-            for (int ic = 0; ic < in_channels; ++ic) {
-              for (int kh = 0; kh < kernel_size; ++kh) {
-                for (int kw = 0; kw < kernel_size; ++kw) {
-                  int ih = oh * stride + kh - padding;
-                  int iw = ow * stride + kw - padding;
-                  if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
-                    result += input->get({b, ic, ih, iw})->data *
-                        weights->get({oc, ic, kh, kw})->data;
-                  }
+    auto output = std::make_shared<Tensor>(
+        std::vector<int>{out_channels, output_height, output_width});
+
+    for (int oc = 0; oc < out_channels; ++oc) {
+      for (int oh = 0; oh < output_height; ++oh) {
+        for (int ow = 0; ow < output_width; ++ow) {
+          // Compute the dot product of the kernel and the input patch
+          double result = 0.0;
+          for (int ic = 0; ic < in_channels; ++ic) {
+            for (int kh = 0; kh < kernel_size; ++kh) {
+              for (int kw = 0; kw < kernel_size; ++kw) {
+                int ih = oh * stride + kh - padding;
+                int iw = ow * stride + kw - padding;
+                if (ih >= 0 && ih < height && iw >= 0 && iw < width) {
+                  result += input->get({ic, ih, iw})->data *
+                      weights->get({oc, ic, kh, kw})->data;
                 }
               }
             }
-            result += bias->get(oc)->data; // Add bias
-            output->set({b, oc, oh, ow}, std::make_shared<Value>(result));
           }
+          result += bias->get(oc)->data; // Add bias
+          output->set({oc, oh, ow}, std::make_shared<Value>(result));
         }
       }
     }
@@ -139,27 +149,40 @@ class Conv2D : public Layer {
   }
 
   void zero_grad() override {
-    weights->zero_grad();
-    bias->zero_grad();
+    this->weights->zero_grad();
+    this->bias->zero_grad();
+  }
+
+  std::vector<std::shared_ptr<Value>> parameters() override {
+    std::vector<std::shared_ptr<Value>> out;
+    for (int i = 0; i <= this->weights->maxIdx; i++) {
+      out.push_back(this->weights->get(i));
+    }
+    for (int i = 0; i <= this->bias->maxIdx; i++) {
+      out.push_back(this->bias->get(i));
+    }
+    return out;
   }
 };
 
 class MaxPooling2D : public Layer {
 private:
   int pool_size;
-  int stride;
+  int stride = 1;
 
 public:
-  MaxPooling2D(int pool_size, int stride = 1)
+  MaxPooling2D(int pool_size) : pool_size(pool_size) {}
+  MaxPooling2D(int pool_size, int stride)
       : pool_size(pool_size), stride(stride) {}
 
   std::shared_ptr<Tensor> call(std::shared_ptr<Tensor> input, bool using_cuda)
       override {
-    auto input_shape = input->shape; // [batch_size, channels, height, width]
-    int batch_size = input_shape[0];
-    int channels = input_shape[1];
-    int height = input_shape[2];
-    int width = input_shape[3];
+    auto input_shape = input->shape; // [batch_size, channels, height, width] --
+                                     // no batch for now
+    // int batch_size = input_shape[0];
+    int channels = input_shape[0];
+    int height = input_shape[1];
+    int width = input_shape[2];
 
     // Compute output dimensions
     int output_height = (height - pool_size) / stride + 1;
@@ -167,28 +190,26 @@ class MaxPooling2D : public Layer {
 
     // Output tensor
     auto output = std::make_shared<Tensor>(
-        std::vector<int>{batch_size, channels, output_height, output_width});
-
-    for (int b = 0; b < batch_size; ++b) {
-      for (int c = 0; c < channels; ++c) {
-        for (int oh = 0; oh < output_height; ++oh) {
-          for (int ow = 0; ow < output_width; ++ow) {
-            std::shared_ptr<Value> max_val = std::make_shared<Value>(
-                -std::numeric_limits<double>::infinity());
-            for (int ph = 0; ph < pool_size; ++ph) {
-              for (int pw = 0; pw < pool_size; ++pw) {
-                int ih = oh * stride + ph;
-                int iw = ow * stride + pw;
-                if (ih < height && iw < width) {
-                  std::shared_ptr<Value> curr_val = input->get({b, c, ih, iw});
-                  if (max_val->data < curr_val->data) {
-                    max_val = curr_val;
-                  }
+        std::vector<int>{channels, output_height, output_width});
+
+    for (int c = 0; c < channels; ++c) {
+      for (int oh = 0; oh < output_height; ++oh) {
+        for (int ow = 0; ow < output_width; ++ow) {
+          std::shared_ptr<Value> max_val =
+              std::make_shared<Value>(-std::numeric_limits<double>::infinity());
+          for (int ph = 0; ph < pool_size; ++ph) {
+            for (int pw = 0; pw < pool_size; ++pw) {
+              int ih = oh * stride + ph;
+              int iw = ow * stride + pw;
+              if (ih < height && iw < width) {
+                std::shared_ptr<Value> curr_val = input->get({c, ih, iw});
+                if (max_val->data < curr_val->data) {
+                  max_val = curr_val;
                 }
               }
             }
-            output->set({b, c, oh, ow}, max_val);
           }
+          output->set({c, oh, ow}, max_val);
         }
       }
     }
diff --git a/csrc/layers/flatten.h b/csrc/layers/flatten.h
new file mode 100644
index 0000000..32fd256
--- /dev/null
+++ b/csrc/layers/flatten.h
@@ -0,0 +1,19 @@
+#pragma once
+#include <memory>
+#include <string>
+#include "../neural_network.h"
+#include "../tensor.h"
+
+class Flatten : public Layer {
+public:
+  std::shared_ptr<Tensor> call(std::shared_ptr<Tensor> input, bool using_cuda)
+      override {
+    return input->flatten();
+  }
+
+  std::string printMe() override {
+    return "Flatten()";
+  }
+
+  void zero_grad() override {};
+};
diff --git a/csrc/layers/non_linear_layer.h b/csrc/layers/non_linear_layer.h
index ea8d729..eadf8e8 100644
--- a/csrc/layers/non_linear_layer.h
+++ b/csrc/layers/non_linear_layer.h
@@ -14,11 +14,6 @@ class ReLu : public Layer {
   }
 
   void zero_grad() override {};
-
-  std::vector<std::shared_ptr<Value>> parameters() override {
-    // no parameters
-    return std::vector<std::shared_ptr<Value>>{};
-  }
 };
 
 class GeLu : public Layer {
@@ -33,11 +28,6 @@ class GeLu : public Layer {
   }
 
   void zero_grad() override {};
-
-  std::vector<std::shared_ptr<Value>> parameters() override {
-    // no parameters
-    return std::vector<std::shared_ptr<Value>>{};
-  }
 };
 
 class Tanh : public Layer {
@@ -52,11 +42,6 @@ class Tanh : public Layer {
   }
 
   void zero_grad() override {};
-
-  std::vector<std::shared_ptr<Value>> parameters() override {
-    // no parameters
-    return std::vector<std::shared_ptr<Value>>{};
-  }
 };
 
 class Sigmoid : public Layer {
@@ -71,11 +56,6 @@ class Sigmoid : public Layer {
   }
 
   void zero_grad() override {};
-
-  std::vector<std::shared_ptr<Value>> parameters() override {
-    // no parameters
-    return std::vector<std::shared_ptr<Value>>{};
-  }
 };
 
 class LeakyReLu : public Layer {
@@ -92,11 +72,6 @@ class LeakyReLu : public Layer {
   }
 
   void zero_grad() override {};
-
-  std::vector<std::shared_ptr<Value>> parameters() override {
-    // no parameters
-    return std::vector<std::shared_ptr<Value>>{};
-  }
 };
 
 class SoftMax : public Layer {
@@ -111,9 +86,4 @@ class SoftMax : public Layer {
   }
 
   void zero_grad() override {};
-
-  std::vector<std::shared_ptr<Value>> parameters() override {
-    // no parameters
-    return std::vector<std::shared_ptr<Value>>{};
-  }
 };
diff --git a/csrc/main.cc b/csrc/main.cc
index 1318499..0506b23 100644
--- a/csrc/main.cc
+++ b/csrc/main.cc
@@ -1,6 +1,8 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
+#include "layers/convolutional_layer.h"
 #include "layers/feed_forward_layer.h"
+#include "layers/flatten.h"
 #include "layers/non_linear_layer.h"
 #include "loss.h"
 #include "neural_network.h"
@@ -145,6 +147,30 @@ PYBIND11_MODULE(_core, m) {
       .def("__call__", &FeedForwardLayer::call)
       .def("__repr__", &FeedForwardLayer::printMe);
 
+  py::class_<Conv2D, Layer, std::shared_ptr<Conv2D>>(m, "Conv2D")
+      .def(py::init<int, int, int>())
+      .def(py::init<int, int, int, int, int>())
+      .def(py::init<int, int, int, int, int, int, std::string, std::string>())
+      .def("zero_grad", &Conv2D::zero_grad)
+      .def("parameters", &Conv2D::parameters)
+      .def("__call__", &Conv2D::call)
+      .def("__repr__", &Conv2D::printMe);
+
+  py::class_<MaxPooling2D, Layer, std::shared_ptr<MaxPooling2D>>(
+      m, "MaxPooling2D")
+      .def(py::init<int>())
+      .def(py::init<int, int>())
+      .def("zero_grad", &MaxPooling2D::zero_grad)
+      .def("parameters", &MaxPooling2D::parameters)
+      .def("__call__", &MaxPooling2D::call)
+      .def("__repr__", &MaxPooling2D::printMe);
+
+  py::class_<Flatten, Layer, std::shared_ptr<Flatten>>(m, "Flatten")
+      .def("zero_grad", &Flatten::zero_grad)
+      .def("parameters", &Flatten::parameters)
+      .def("__call__", &Flatten::call)
+      .def("__repr__", &Flatten::printMe);
+
   py::class_<ReLu, Layer, std::shared_ptr<ReLu>>(m, "ReLu")
       .def(py::init<>())
       .def("zero_grad", &ReLu::zero_grad)
diff --git a/csrc/neural_network.h b/csrc/neural_network.h
index e6f5ee0..7eb142c 100644
--- a/csrc/neural_network.h
+++ b/csrc/neural_network.h
@@ -16,7 +16,10 @@ class Layer {
 
   virtual std::string printMe() = 0;
 
-  virtual std::vector<std::shared_ptr<Value>> parameters() = 0;
+  virtual std::vector<std::shared_ptr<Value>> parameters() {
+    // no parameters
+    return std::vector<std::shared_ptr<Value>>{};
+  }
 
   virtual void zero_grad() = 0;
 };
diff --git a/csrc/tensor.h b/csrc/tensor.h
index 5feef4f..41c356f 100644
--- a/csrc/tensor.h
+++ b/csrc/tensor.h
@@ -1,5 +1,4 @@
 #pragma once
-#include <iostream>
 #include <memory>
 #include <stdexcept>
 #include <string>
@@ -330,4 +329,15 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
     std::string my_shape = "tensor of shape: " + tensor_shape_str();
     return my_shape;
   }
+
+  std::shared_ptr<Tensor> flatten() {
+    std::shared_ptr<Tensor> out =
+        std::make_shared<Tensor>(std::vector<int>{maxIdx + 1});
+    int i = 0;
+    for (auto& e : this->v) {
+      out->set(i, e);
+      i++;
+    }
+    return out;
+  }
 };
diff --git a/pyproject.toml b/pyproject.toml
index 1becd8a..2b812d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "scikit_build_core.build"
 name = "deeptensor"
 version = "0.4.0" # new api
 url = "https://github.com/deependujha/deeptensor"
-description = "A minimal PyTorch-like **deep learning library** focused on custom autograd and efficient tensor operations."
+description = "A minimal PyTorch-like deep learning library focused on custom autograd and efficient tensor operations."
 readme = "README.md"
 authors = [
   { name = "Deependu Jha", email = "deependujha21@gmail.com" },
diff --git a/src/deeptensor/__init__.py b/src/deeptensor/__init__.py
index e7bfc39..ca43460 100644
--- a/src/deeptensor/__init__.py
+++ b/src/deeptensor/__init__.py
@@ -5,9 +5,12 @@
     SGD,
     AdaGrad,
     Adam,
+    Conv2D,
     FeedForwardLayer,
+    Flatten,
     GeLu,
     LeakyReLu,
+    MaxPooling2D,
     Model,
     Momentum,
     ReLu,
@@ -27,9 +30,12 @@
     "SGD",
     "AdaGrad",
     "Adam",
+    "Conv2D",
     "FeedForwardLayer",
+    "Flatten",
     "GeLu",
     "LeakyReLu",
+    "MaxPooling2D",
     "Model",
     "Momentum",
     "RMSprop",