From 693d6e759383f0e0f1a9ad2b9cc56a1a30caa8b6 Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Sun, 19 Jan 2025 19:51:13 +0530
Subject: [PATCH 1/5] added binary_cross_entropy and fixed bugs in vector class

---
 CMakeLists.txt                   |   7 ++
 csrc/layers/feed_forward_layer.h |   2 +-
 csrc/loss.cc                     |  57 ++++++++-----
 csrc/loss.h                      |  16 +++-
 csrc/main.cc                     |  13 ++-
 csrc/tensor.h                    |  51 ++++++++----
 csrc/value.cc                    |   4 +-
 ctests/loss_test.cc              |  57 +++++++------
 ctests/nn_test.cc                | 133 +++++++++++++++++++++++++++----
 ctests/tensor_test.cc            |  63 ++++++++++-----
 src/deeptensor/__init__.py       |   2 +
 tests/test_tensor.py             |  97 ++++++++++++++++++++++
 12 files changed, 398 insertions(+), 104 deletions(-)
 create mode 100644 tests/test_tensor.py
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dbea960..bc2b97d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,6 +10,13 @@ option(CMAKE_EXPORT_COMPILE_COMMANDS "Generate compile_commands.json" ON) # for
 # Option to enable debug logging
 option(ENABLE_DEBUG "Enable debug logging" OFF)
 
+# Enable AddressSanitizer for Debug builds
+if (CMAKE_BUILD_TYPE STREQUAL "Debug")
+    message(STATUS "Enabling AddressSanitizer (ASan)")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
+    set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -fsanitize=address")
+endif()
+
 # Add a preprocessor definition based on the ENABLE_DEBUG option
 if (ENABLE_DEBUG)
     add_compile_definitions(DEBUG)
diff --git a/csrc/layers/feed_forward_layer.h b/csrc/layers/feed_forward_layer.h
index 769e83b..90d101b 100644
--- a/csrc/layers/feed_forward_layer.h
+++ b/csrc/layers/feed_forward_layer.h
@@ -90,7 +90,7 @@ class FeedForwardLayer : public Layer {
   }
 
   std::string printMe() override {
-    std::string s = "Layer(" + std::to_string(this->nin) + "," +
+    std::string s = "FeedForwardLayer(" + std::to_string(this->nin) + "," +
         std::to_string(this->nout) + ")";
     return s;
   }
diff --git a/csrc/loss.cc b/csrc/loss.cc
index de22f8a..815d730 100644
--- a/csrc/loss.cc
+++ b/csrc/loss.cc
@@ -1,6 +1,7 @@
 #include "loss.h"
 #include <memory>
 #include <stdexcept>
+#include <string>
 #include "value.h"
 
 std::shared_ptr<Value> mean_squared_error(
@@ -25,26 +26,46 @@ std::shared_ptr<Value> mean_squared_error(
   return out->div(n + 1);
 }
 
-std::shared_ptr<Value> cross_entropy(std::shared_ptr<Tensor> logits, std::shared_ptr<Tensor> actual){
-    if (logits->shape != actual->shape) {
-    std::string x_shape_str = logits->tensor_shape_str();
-    std::string y_shape_str = actual->tensor_shape_str();
-    std::string error_string =
-        "Shapes of the two tensors for computing cross_entropy don't match: tensor-1 shape (" +
-        x_shape_str + ") vs tensor-1 shape(" + y_shape_str + ")\n";
-    throw std::runtime_error(error_string);
+std::shared_ptr<Value> cross_entropy(
+    std::shared_ptr<Tensor> logits,
+    int actualIdx) {
+  if (actualIdx < 0) {
+    throw std::runtime_error(
+        "Expected Idx can't be smaller than 0. Got: " +
+        std::to_string(actualIdx));
+  }
+  if (logits->shape.size() != 1 || logits->shape[0] < actualIdx) {
+    throw std::runtime_error(
+        "logits must be a one-dimensional tensor. And actualIdx must be smaller than logits size. Got: logits shape =>" +
+        logits->tensor_shape_str() +
+        ", and expectedIdx: " + std::to_string(actualIdx));
   }
-    // compute softmax of logits
-    std::shared_ptr<Value> out = std::make_shared<Value>(0.0);
-    std::shared_ptr<Tensor> logits_softmax = logits->softmax();
+  // compute softmax of logits
+  std::shared_ptr<Tensor> logits_softmax = logits->softmax();
 
-    int n = logits->maxIdx;
-    for(int i=0;i<=n;i++){
-        std::shared_ptr logits_ln = logits_softmax->get(i)->ln();
-        std::shared_ptr<Value> pro_log = actual->get(i)->mul(logits_ln); // product of log
+  std::shared_ptr logits_ln = logits_softmax->get(actualIdx)->ln();
 
-        out = out->add(pro_log);
-    }
+  return logits_ln->mul(-1); // not averaging it
+}
 
-    return out->mul(-1); // not averaging it
+std::shared_ptr<Value> binary_cross_entropy(
+    std::shared_ptr<Tensor> logits,
+    int actualIdx) {
+  if (actualIdx < 0 || actualIdx > 1) {
+    throw std::runtime_error(
+        "Expected Idx can't be smaller than 0 or greater than 1. Got: " +
+        std::to_string(actualIdx));
+  }
+  if (logits->shape.size() != 1) {
+    throw std::runtime_error(
+        "logits must be a one-dimensional tensor.. Got: logits shape =>" +
+        logits->tensor_shape_str());
+  }
+  std::shared_ptr<Value> logit_value = logits->get(0);
+  std::shared_ptr<Value> updated_logit_value = logit_value;
+  if (actualIdx == 0) {
+    updated_logit_value = std::make_shared<Value>(1.0)->sub(logit_value);
+  }
+  std::shared_ptr logits_ln = updated_logit_value->ln();
+  return logits_ln->mul(-1);
 }
diff --git a/csrc/loss.h b/csrc/loss.h
index 86c7d5a..96cdf3b 100644
--- a/csrc/loss.h
+++ b/csrc/loss.h
@@ -1,8 +1,16 @@
 #pragma once
-#include "value.h"
-#include "tensor.h"
 #include <memory>
+#include "tensor.h"
+#include "value.h"
+
+std::shared_ptr<Value> mean_squared_error(
+    std::shared_ptr<Tensor> x,
+    std::shared_ptr<Tensor> y);
 
-std::shared_ptr<Value> mean_squared_error(std::shared_ptr<Tensor> x, std::shared_ptr<Tensor> y);
+std::shared_ptr<Value> cross_entropy(
+    std::shared_ptr<Tensor> logits,
+    int actualIdx);
 
-std::shared_ptr<Value> cross_entropy(std::shared_ptr<Tensor> logits, std::shared_ptr<Tensor> actual);
+std::shared_ptr<Value> binary_cross_entropy(
+    std::shared_ptr<Tensor> logits,
+    int actualIdx);
diff --git a/csrc/main.cc b/csrc/main.cc
index a2035df..1318499 100644
--- a/csrc/main.cc
+++ b/csrc/main.cc
@@ -109,11 +109,16 @@ PYBIND11_MODULE(_core, m) {
           "get",
           static_cast<std::shared_ptr<Value> (Tensor::*)(std::vector<int>)>(
               &Tensor::get))
+      .def_readonly("shape", &Tensor::shape)
+      .def_readonly("strides", &Tensor::strides)
+      .def_readonly("maxIdx", &Tensor::maxIdx)
+      .def_readonly("minIdx", &Tensor::minIdx)
+      .def_readonly("vals", &Tensor::v)
       .def("normalize_idx", &Tensor::normalize_idx)
       .def("backward", &Tensor::backward)
       .def("zero_grad", &Tensor::zero_grad)
-      .def("add", &Tensor::add)
-      .def("div", &Tensor::div)
+      .def("__add__", &Tensor::add)
+      .def("__truediv__", &Tensor::div)
       .def("matmul", &Tensor::matmul)
       .def("relu", &Tensor::relu)
       .def("gelu", &Tensor::gelu)
@@ -241,4 +246,8 @@ PYBIND11_MODULE(_core, m) {
       "cross_entropy",
       &cross_entropy,
       "A function that value object with cross_entropy applied");
+  m.def(
+      "binary_cross_entropy",
+      &binary_cross_entropy,
+      "A function that value object with cross_entropy applied");
 }
diff --git a/csrc/tensor.h b/csrc/tensor.h
index 06fec77..5feef4f 100644
--- a/csrc/tensor.h
+++ b/csrc/tensor.h
@@ -1,4 +1,5 @@
 #pragma once
+#include <iostream>
 #include <memory>
 #include <stdexcept>
 #include <string>
@@ -10,8 +11,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
   std::vector<int> shape;
   std::vector<int> strides; // jump each index needs to make
   std::vector<std::shared_ptr<Value>> v;
-  int maxIdx;
-  int minIdx;
+  int maxIdx = 0;
+  int minIdx = 0;
 
   Tensor(std::vector<int> shape) : shape(std::move(shape)) {
     int total_size = 1;
@@ -20,10 +21,15 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
     }
     v.resize(total_size);
 
-    strides.resize(this->shape.size());
-    strides.back() = 1;
+    this->compute_stride();
+  }
+
+  void compute_stride() {
+    this->strides.clear();
+    this->strides.resize(this->shape.size());
+    this->strides.back() = 1;
     for (int i = int(this->shape.size()) - 2; i >= 0; --i) {
-      strides[i] = strides[i + 1] * this->shape[i + 1];
+      this->strides[i] = this->strides[i + 1] * this->shape[i + 1];
     }
 
     this->minIdx = 0;
@@ -31,7 +37,7 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
     for (auto& e : this->shape) {
       this->maxIdx *= e;
     }
-    maxIdx--; // 1 less
+    this->maxIdx--; // 1 less
   }
 
   std::string tensor_shape_str() {
@@ -46,7 +52,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
   void set(std::vector<int> idx, std::shared_ptr<Value> _v) {
     int original_idx = normalize_idx(idx);
     if ((original_idx < this->minIdx) || (original_idx > this->maxIdx)) {
-      std::string error_msg = "Index must be in the range. Limit (" +
+      std::string error_msg =
+          "Tensor set method: Index must be in the range. Limit (" +
           std::to_string(this->minIdx) + "," + std::to_string(this->maxIdx) +
           "), but found: " + std::to_string(original_idx) + ".";
 
@@ -58,7 +65,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
   std::shared_ptr<Value> get(std::vector<int> idx) {
     int original_idx = normalize_idx(idx);
     if ((original_idx < this->minIdx) || (original_idx > this->maxIdx)) {
-      std::string error_msg = "Index must be in the range. Limit (" +
+      std::string error_msg =
+          "Tensor get method: Index must be in the range. Limit (" +
           std::to_string(this->minIdx) + "," + std::to_string(this->maxIdx) +
           "), but found: " + std::to_string(original_idx) + ".";
 
@@ -70,7 +78,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
   // real index
   void set(int idx, std::shared_ptr<Value> _v) {
     if ((idx < this->minIdx) || (idx > this->maxIdx)) {
-      std::string error_msg = "Index must be in the range. Limit (" +
+      std::string error_msg =
+          "Tensor set method: Index must be in the range. Limit (" +
           std::to_string(this->minIdx) + "," + std::to_string(this->maxIdx) +
           "), but found: " + std::to_string(idx) + ".";
 
@@ -82,7 +91,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
   // real index
   std::shared_ptr<Value> get(int idx) {
     if ((idx < this->minIdx) || (idx > this->maxIdx)) {
-      std::string error_msg = "Index must be in the range. Limit (" +
+      std::string error_msg =
+          "Tensor get method: Index must be in the range. Limit (" +
           std::to_string(this->minIdx) + "," + std::to_string(this->maxIdx) +
           "), but found: " + std::to_string(idx) + ".";
 
@@ -122,6 +132,7 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
       new_shape.push_back(1);
     }
     t->shape = new_shape;
+    t->compute_stride();
   }
 
   std::shared_ptr<Tensor> add(std::shared_ptr<Tensor> other) {
@@ -174,23 +185,31 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
       throw std::runtime_error("Cannot perform matmul with a null tensor.");
     }
 
+    if (this->shape.size() > 2 || other->shape.size() > 2) {
+      throw std::runtime_error("For now, only 2-D matmul is allowed");
+    }
+
     // Determine effective shapes
     std::vector<int> this_shape = this->shape;
     std::vector<int> other_shape = other->shape;
 
     // Reshape if either is a vector (1D tensor)
     if (this_shape.size() == 1) {
-      this_shape.insert(this_shape.begin(), 1); // Treat as row vector
       std::vector<int> new_shape = {1, this_shape[0]};
       this->shape = new_shape;
+      this->compute_stride();
+      this_shape = new_shape;
     }
     if (other_shape.size() == 1) {
-      other_shape.push_back(1); // Treat as column vector
-      other->shape.push_back(1);
+      // other_shape.push_back(1); // Treat as column vector
+      // other->shape.push_back(1);
+      // this->recompute_stride();
+
+      throw std::runtime_error("other tensor can't be 1D for matmul.");
     }
 
     // Validate dimensions for matrix multiplication
-    if (this_shape[1] != other_shape[0]) {
+    if (this->shape[1] != other_shape[0]) {
       throw std::runtime_error(
           "Dimensions do not align for matmul. Got shapes: (" +
           std::to_string(this_shape[0]) + ", " + std::to_string(this_shape[1]) +
@@ -199,8 +218,8 @@ class Tensor : public std::enable_shared_from_this<Tensor> {
     }
 
     // Compute output shape
-    std::vector<int> output_shape = {this_shape[0], other_shape[1]};
-    auto out = std::make_shared<Tensor>(output_shape);
+    std::vector<int> output_shape = {this_shape[0], other->shape[1]};
+    std::shared_ptr<Tensor> out = std::make_shared<Tensor>(output_shape);
 
     // Perform matrix multiplication
     for (int i = 0; i < output_shape[0]; i++) {
diff --git a/csrc/value.cc b/csrc/value.cc
index 03ddb0f..7530c67 100644
--- a/csrc/value.cc
+++ b/csrc/value.cc
@@ -43,9 +43,9 @@ void Value::backward() {
   this->grad = 1.0;
 
   // Iterating the vector in reverse order
-  std::cout << "topo list: \n";
+  // std::cout << "topo list: \n";
   for (int i = int(topo_list.size()) - 1; i >= 0; i--) {
-    std::cout << "i: " << i << "; node: " << topo_list[i]->printMe() << "\n";
+    // std::cout << "i: " << i << "; node: " << topo_list[i]->printMe() << "\n";
     topo_list[i]->executeBackWardMethod();
   }
 }
diff --git a/ctests/loss_test.cc b/ctests/loss_test.cc
index 7833d92..92350a9 100644
--- a/ctests/loss_test.cc
+++ b/ctests/loss_test.cc
@@ -28,66 +28,77 @@ TEST(LossTest, MSELossTest) {
 // ===== cross entropy loss =====
 TEST(LossTest, CrossEntropyLossTest1) {
   std::shared_ptr<Tensor> x = std::make_shared<Tensor>(std::vector<int>{3});
-  std::shared_ptr<Tensor> y = std::make_shared<Tensor>(std::vector<int>{3});
 
   // set values
   x->set(0, std::make_shared<Value>(2.5));
   x->set(1, std::make_shared<Value>(-3.7));
   x->set(2, std::make_shared<Value>(2.35));
 
-  y->set(0, std::make_shared<Value>(1));
-  y->set(1, std::make_shared<Value>(0));
-  y->set(2, std::make_shared<Value>(0));
-
-  std::shared_ptr<Value> out = cross_entropy(x, y);
+  std::shared_ptr<Value> out = cross_entropy(x, 0);
 
   double expectedValue = 0.6220; // calculated using pytorch nn.CrossEntropyLoss
-  double tolerance = 0.0001;    // Compare up to 4 decimal places
-
+  double tolerance = 0.0001; // Compare up to 4 decimal places
 
   EXPECT_NEAR(out->data, expectedValue, tolerance);
 }
 
 TEST(LossTest, CrossEntropyLossTest2) {
   std::shared_ptr<Tensor> x = std::make_shared<Tensor>(std::vector<int>{3});
-  std::shared_ptr<Tensor> y = std::make_shared<Tensor>(std::vector<int>{3});
 
   // set values
   x->set(0, std::make_shared<Value>(2.5));
   x->set(1, std::make_shared<Value>(-3.7));
   x->set(2, std::make_shared<Value>(2.35));
 
-  y->set(0, std::make_shared<Value>(0));
-  y->set(1, std::make_shared<Value>(1));
-  y->set(2, std::make_shared<Value>(0));
-
-  std::shared_ptr<Value> out = cross_entropy(x, y);
+  std::shared_ptr<Value> out = cross_entropy(x, 1);
 
   double expectedValue = 6.8220; // calculated using pytorch nn.CrossEntropyLoss
-  double tolerance = 0.0001;    // Compare up to 4 decimal places
-
+  double tolerance = 0.0001; // Compare up to 4 decimal places
 
   EXPECT_NEAR(out->data, expectedValue, tolerance);
 }
 
 TEST(LossTest, CrossEntropyLossTest3) {
   std::shared_ptr<Tensor> x = std::make_shared<Tensor>(std::vector<int>{3});
-  std::shared_ptr<Tensor> y = std::make_shared<Tensor>(std::vector<int>{3});
 
   // set values
   x->set(0, std::make_shared<Value>(2.5));
   x->set(1, std::make_shared<Value>(-3.7));
   x->set(2, std::make_shared<Value>(2.35));
 
-  y->set(0, std::make_shared<Value>(0));
-  y->set(1, std::make_shared<Value>(0));
-  y->set(2, std::make_shared<Value>(1));
-
-  std::shared_ptr<Value> out = cross_entropy(x, y);
+  std::shared_ptr<Value> out = cross_entropy(x, 2);
 
   double expectedValue = 0.7720; // calculated using pytorch nn.CrossEntropyLoss
-  double tolerance = 0.0001;    // Compare up to 4 decimal places
+  double tolerance = 0.0001; // Compare up to 4 decimal places
+
+  EXPECT_NEAR(out->data, expectedValue, tolerance);
+}
+
+// ===== binary cross entropy loss =====
+TEST(LossTest, BinaryCrossEntropyLossTest1) {
+  std::shared_ptr<Tensor> x = std::make_shared<Tensor>(std::vector<int>{1});
+
+  // set values
+  x->set(0, std::make_shared<Value>(0.786));
+
+  std::shared_ptr<Value> out = binary_cross_entropy(x, 0);
+
+  double expectedValue = 1.5418; // calculated using pytorch nn.CrossEntropyLoss
+  double tolerance = 0.0001; // Compare up to 4 decimal places
+
+  EXPECT_NEAR(out->data, expectedValue, tolerance);
+}
+
+TEST(LossTest, BinaryCrossEntropyLossTest2) {
+  std::shared_ptr<Tensor> x = std::make_shared<Tensor>(std::vector<int>{1});
+
+  // set values
+  x->set(0, std::make_shared<Value>(0.786));
+
+  std::shared_ptr<Value> out = binary_cross_entropy(x, 1);
 
+  double expectedValue = 0.2408; // calculated using pytorch nn.CrossEntropyLoss
+  double tolerance = 0.0001; // Compare up to 4 decimal places
 
   EXPECT_NEAR(out->data, expectedValue, tolerance);
 }
diff --git a/ctests/nn_test.cc b/ctests/nn_test.cc
index 9a95ec9..2700c35 100644
--- a/ctests/nn_test.cc
+++ b/ctests/nn_test.cc
@@ -5,25 +5,124 @@
 #include "layers/non_linear_layer.h"
 #include "neural_network.h"
 
-// TEST(ModelTest, IsWorking) {
-//   std::shared_ptr<Model> model = std::make_shared<Model>(
-//       std::vector<std::shared_ptr<Layer>>{
-//           std::make_shared<FeedForwardLayer>(2, 8),
-//           std::make_shared<Sigmoid>(),
-//           std::make_shared<FeedForwardLayer>(8, 1),
-//           std::make_shared<Tanh>()},
-//       false);
+TEST(ModelTest, FeedForward) {
+  // [
+  //    [
+  //        Value(data=0.364466, grad=0.000000),
+  //        Value(data=-0.389075, grad=0.000000)
+  //     ],
+  //    [
+  //        Value(data=0.967640, grad=0.000000),
+  //        Value(data=0.335070, grad=0.000000)
+  //     ]
+  // ]
+  std::vector<double> expected_feedforward_weights = {
+      0.364466,
+      -0.389075,
+      0.967640,
+      0.335070,
+      0,
+      0}; // 4th and 5th elements are two biases initialized with 0
 
-//   std::shared_ptr<Tensor> inp = std::make_shared<Tensor>(std::vector<int>{2});
-//   inp->set(0, std::make_shared<Value>(5));
-//   inp->set(1, std::make_shared<Value>(2));
+  double tolerance = 0.0001; // Compare up to 4 decimal places
 
-//   std::shared_ptr<Tensor> out = model->call(inp);
+  int seed = 42;
+  std::shared_ptr<Model> model = std::make_shared<Model>(
+      std::vector<std::shared_ptr<Layer>>{
+          std::make_shared<FeedForwardLayer>(2, 2, seed, "XAVIER", "NORMAL"),
+      },
+      false);
 
-//   EXPECT_EQ(out->dims(), 1);
-//   EXPECT_NE(out->get(0)->data, 0);
+  int i = 0;
 
-//   out->backward();
+  for (auto& e : model->parameters()) {
+    EXPECT_NEAR(e->data, expected_feedforward_weights[i], tolerance);
+    i++;
+  }
+  std::shared_ptr<Tensor> inp = std::make_shared<Tensor>(std::vector<int>{2});
+  inp->set(0, std::make_shared<Value>(0.5));
+  inp->set(1, std::make_shared<Value>(0.3));
 
-//   EXPECT_DOUBLE_EQ(out->get(0)->grad, 1);
-// }
+  // expected output:
+  // [0.472525, -0.0940165]
+  std::shared_ptr<Tensor> out = model->call(inp);
+
+  EXPECT_EQ(out->dims(), 1);
+  EXPECT_EQ(out->shape.size(), 1);
+  EXPECT_EQ(out->shape[0], 2);
+  EXPECT_NEAR(out->get(0)->data, 0.472525, tolerance);
+  EXPECT_NEAR(out->get(1)->data, -0.0940165, tolerance);
+
+  out->backward();
+
+  // expected grad: 0.5, 0.5, 0.3, 0.3, 1, 1
+  std::vector<double> expected_grad = {0.5, 0.5, 0.3, 0.3, 1, 1};
+
+  int idx = 0;
+  for (auto& e : model->parameters()) {
+    EXPECT_NEAR(e->grad, expected_grad[idx], tolerance);
+    idx++;
+  }
+}
+
+TEST(ModelTest, FeedForwardWithRelu) {
+  // [
+  //    [
+  //        Value(data=0.364466, grad=0.000000),
+  //        Value(data=-0.389075, grad=0.000000)
+  //     ],
+  //    [
+  //        Value(data=0.967640, grad=0.000000),
+  //        Value(data=0.335070, grad=0.000000)
+  //     ]
+  // ]
+  std::vector<double> expected_feedforward_weights = {
+      0.364466,
+      -0.389075,
+      0.967640,
+      0.335070,
+      0,
+      0}; // 4th and 5th elements are two biases initialized with 0
+
+  double tolerance = 0.0001; // Compare up to 4 decimal places
+
+  int seed = 42;
+  std::shared_ptr<Model> model = std::make_shared<Model>(
+      std::vector<std::shared_ptr<Layer>>{
+          std::make_shared<FeedForwardLayer>(2, 2, seed, "XAVIER", "NORMAL"),
+          std::make_shared<ReLu>(),
+      },
+      false);
+
+  int i = 0;
+
+  for (auto& e : model->parameters()) {
+    EXPECT_NEAR(e->data, expected_feedforward_weights[i], tolerance);
+    i++;
+  }
+  std::shared_ptr<Tensor> inp = std::make_shared<Tensor>(std::vector<int>{2});
+  inp->set(0, std::make_shared<Value>(0.5));
+  inp->set(1, std::make_shared<Value>(0.3));
+
+  // expected output:
+  // [0.472525, -0.0940165] ===Relu===> [0.472525, 0]
+  std::shared_ptr<Tensor> out = model->call(inp);
+
+  EXPECT_EQ(out->dims(), 1);
+  EXPECT_EQ(out->shape.size(), 1);
+  EXPECT_EQ(out->shape[0], 2);
+  EXPECT_NEAR(out->get(0)->data, 0.472525, tolerance);
+  EXPECT_NEAR(out->get(1)->data, 0, tolerance);
+
+  out->backward();
+
+  // expected grad: 0.5, 0, 0.3, 0, 1, 0 # relu causes grad to be 0 for negative
+  // output
+  std::vector<double> expected_grad = {0.5, 0, 0.3, 0, 1, 0};
+
+  int idx = 0;
+  for (auto& e : model->parameters()) {
+    EXPECT_NEAR(e->grad, expected_grad[idx], tolerance);
+    idx++;
+  }
+}
diff --git a/ctests/tensor_test.cc b/ctests/tensor_test.cc
index c3404d6..7198e8a 100644
--- a/ctests/tensor_test.cc
+++ b/ctests/tensor_test.cc
@@ -95,26 +95,29 @@ class TensorFixtureTest : public testing::Test {
     t2->set({1, 1}, std::make_shared<Value>(21));
     t2->set({2, 0}, std::make_shared<Value>(30));
     t2->set({2, 1}, std::make_shared<Value>(31));
+
+    t3->set(0, std::make_shared<Value>(100));
+    t3->set(1, std::make_shared<Value>(200));
   }
 
   // t1: [[1,2,3], [4,5,6]]
   // t2: [[10,11], [20,21], [30,31]]
   std::shared_ptr<Tensor> t1 = std::make_unique<Tensor>(std::vector<int>{2, 3});
   std::shared_ptr<Tensor> t2 = std::make_unique<Tensor>(std::vector<int>{3, 2});
+  std::shared_ptr<Tensor> t3 = std::make_unique<Tensor>(std::vector<int>{2});
 };
 
 TEST_F(TensorFixtureTest, AddTest) {
-  // t3: [[140, 146], [320, 335]]
-  std::shared_ptr<Tensor> t3 = std::make_unique<Tensor>(std::vector<int>{2, 3});
-  // t1
-    t3->set({0, 0}, std::make_shared<Value>(10));
-    t3->set({0, 1}, std::make_shared<Value>(10));
-    t3->set({0, 2}, std::make_shared<Value>(10));
-    t3->set({1, 0}, std::make_shared<Value>(10));
-    t3->set({1, 1}, std::make_shared<Value>(10));
-    t3->set({1, 2}, std::make_shared<Value>(10));
-
-  std::shared_ptr<Tensor> t_sum = t3->add(t1);
+  // t4: [[140, 146], [320, 335]]
+  std::shared_ptr<Tensor> t4 = std::make_unique<Tensor>(std::vector<int>{2, 3});
+  t4->set({0, 0}, std::make_shared<Value>(10));
+  t4->set({0, 1}, std::make_shared<Value>(10));
+  t4->set({0, 2}, std::make_shared<Value>(10));
+  t4->set({1, 0}, std::make_shared<Value>(10));
+  t4->set({1, 1}, std::make_shared<Value>(10));
+  t4->set({1, 2}, std::make_shared<Value>(10));
+
+  std::shared_ptr<Tensor> t_sum = t4->add(t1);
 
   EXPECT_EQ(t_sum->dims(), 2);
 
@@ -129,17 +132,35 @@ TEST_F(TensorFixtureTest, AddTest) {
   EXPECT_DOUBLE_EQ(t_sum->get(5)->data, double(16));
 }
 
-TEST_F(TensorFixtureTest, MatMulTest) {
-  // t3: [[140, 146], [320, 335]]
-  std::shared_ptr<Tensor> t3 = t1->matmul(t2);
+TEST_F(TensorFixtureTest, MatMulTestTwoDim) {
+  // t4: [[140, 146], [320, 335]]
+  std::shared_ptr<Tensor> t4 = t1->matmul(t2);
+
+  EXPECT_EQ(t4->dims(), 2);
+
+  EXPECT_EQ(t4->shape[0], 2);
+  EXPECT_EQ(t4->shape[1], 2);
+
+  EXPECT_DOUBLE_EQ(t4->get(0)->data, double(140));
+  EXPECT_DOUBLE_EQ(t4->get(1)->data, double(146));
+  EXPECT_DOUBLE_EQ(t4->get(2)->data, double(320));
+  EXPECT_DOUBLE_EQ(t4->get(3)->data, double(335));
+}
+
+TEST_F(TensorFixtureTest, MatMulTestOneDim1) {
+  // t4: [900, 1200, 1500]
+  std::shared_ptr<Tensor> t4 = t3->matmul(t1);
+
+  for(int i=0;i<=t4->maxIdx;i++){
+    std::cerr<<t4->get(i)->data<<", ";
+  }
 
-  EXPECT_EQ(t3->dims(), 2);
+  EXPECT_EQ(t4->dims(), 2);
 
-  EXPECT_EQ(t3->shape[0], 2);
-  EXPECT_EQ(t3->shape[1], 2);
+  EXPECT_EQ(t4->shape[0], 1);
+  EXPECT_EQ(t4->shape[1], 3);
 
-  EXPECT_DOUBLE_EQ(t3->get(0)->data, double(140));
-  EXPECT_DOUBLE_EQ(t3->get(1)->data, double(146));
-  EXPECT_DOUBLE_EQ(t3->get(2)->data, double(320));
-  EXPECT_DOUBLE_EQ(t3->get(3)->data, double(335));
+  EXPECT_DOUBLE_EQ(t4->get(0)->data, double(900));
+  EXPECT_DOUBLE_EQ(t4->get(1)->data, double(1200));
+  EXPECT_DOUBLE_EQ(t4->get(2)->data, double(1500));
 }
diff --git a/src/deeptensor/__init__.py b/src/deeptensor/__init__.py
index 214a85a..e7bfc39 100644
--- a/src/deeptensor/__init__.py
+++ b/src/deeptensor/__init__.py
@@ -18,6 +18,7 @@
     Tensor,
     Value,
     __doc__,
+    binary_cross_entropy,
     cross_entropy,
     mean_squared_error,
 )
@@ -39,6 +40,7 @@
     "Tensor",
     "Value",
     "__doc__",
+    "binary_cross_entropy",
     "cross_entropy",
     "mean_squared_error",
     "version",
diff --git a/tests/test_tensor.py b/tests/test_tensor.py
new file mode 100644
index 0000000..9d1c22c
--- /dev/null
+++ b/tests/test_tensor.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+from math import isclose
+
+from deeptensor import Tensor, Value
+
+
+def test_tensor_set_and_get_one_d():
+    # ------------ 1D tensor ------------
+    t1 = Tensor([4])
+    vals = [1.0, 2.0, 3.0, 4.0]
+
+    for i, val in enumerate(vals):
+        t1.set(i, Value(val))
+
+    for i in range(len(vals)):
+        assert isclose(t1.get(i).data, vals[i])
+
+
+def test_tensor_set_and_get_two_d():
+    # ------------ 2D tensor ------------
+    t1 = Tensor([2, 3])
+    vals = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+
+    counter = 1.0
+    for i in range(2):
+        for j in range(3):
+            t1.set([i, j], Value(counter))
+            counter += 1.0
+
+    for i in range(2):
+        for j in range(3):
+            assert t1.get([i, j]).data == vals[i][j]
+
+
+def test_add_tensor():
+    t1 = Tensor([3])
+    t2 = Tensor([3])
+
+    t1.set(0, Value(1.0))
+    t1.set(1, Value(2.0))
+    t1.set(2, Value(3.0))
+
+    t2.set(0, Value(10.0))
+    t2.set(1, Value(20.0))
+    t2.set(2, Value(30.0))
+
+    t3 = t1 + t2
+
+    assert isclose(t3.get(0).data, 11.0)
+    assert isclose(t3.get(1).data, 22.0)
+    assert isclose(t3.get(2).data, 33.0)
+
+
+def test_div_tensor():
+    t1 = Tensor([3])
+
+    t1.set(0, Value(10.0))
+    t1.set(1, Value(20.0))
+    t1.set(2, Value(30.0))
+
+    t2 = t1 / Value(5)
+
+    assert isclose(t2.get(0).data, 2.0)
+    assert isclose(t2.get(1).data, 4.0)
+    assert isclose(t2.get(2).data, 6.0)
+
+
+def test_matmul():
+    # Create two matrices as Tensors
+    t1 = Tensor([2, 2])
+    t2 = Tensor([2, 2])
+    expected = Tensor([2, 2])
+
+    val1 = [[1.0, 2.0], [3.0, 4.0]]
+    val2 = [[5.0, 6.0], [7.0, 8.0]]
+
+    # Expected result of t1 @ t2
+    # [1*5 + 2*7, 1*6 + 2*8] = [19, 22]
+    # [3*5 + 4*7, 3*6 + 4*8] = [43, 50]
+    expected_val = [[19.0, 22.0], [43.0, 50.0]]
+
+    for i in range(2):
+        for j in range(2):
+            t1.set([i, j], Value(val1[i][j]))
+            t2.set([i, j], Value(val2[i][j]))
+            expected.set([i, j], Value(expected_val[i][j]))
+
+    # Perform matrix multiplication
+    t3 = t1.matmul(t2)  # or t3 = t1 * t2 if overloaded
+
+    # Check if the result matches the expected tensor
+    for i in range(2):
+        for j in range(2):
+            assert (
+                t3.get([i, j]).data == expected_val[i][j]
+            ), f"Matrix multiplication failed: {t3} != {expected}"

From f29cb524496aa9d168c28a9b3206238a15767f9d Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Sun, 19 Jan 2025 19:51:55 +0530
Subject: [PATCH 2/5] update

---
 csrc/value.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/csrc/value.cc b/csrc/value.cc
index 7530c67..51de32b 100644
--- a/csrc/value.cc
+++ b/csrc/value.cc
@@ -1,7 +1,6 @@
 #include "value.h"
 #include <cassert>
 #include <cmath>
-#include <iostream>
 #include <memory>
 #include <stdexcept>
 #include <string>

From 9d7ff201d6e5f4d2bb1973c3b4c60435a0b5cad5 Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Sun, 19 Jan 2025 20:45:22 +0530
Subject: [PATCH 3/5] update

---
 csrc/loss.cc | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/csrc/loss.cc b/csrc/loss.cc
index 815d730..9896705 100644
--- a/csrc/loss.cc
+++ b/csrc/loss.cc
@@ -62,10 +62,25 @@ std::shared_ptr<Value> binary_cross_entropy(
         logits->tensor_shape_str());
   }
   std::shared_ptr<Value> logit_value = logits->get(0);
+
   std::shared_ptr<Value> updated_logit_value = logit_value;
   if (actualIdx == 0) {
     updated_logit_value = std::make_shared<Value>(1.0)->sub(logit_value);
   }
+
+  if (updated_logit_value->data < 0 || updated_logit_value->data > 1) {
+    throw std::runtime_error(
+        "logit value can't be less than 0, and more than 1. Got: " +
+        std::to_string(logit_value->data));
+  }
+
+  constexpr double EPSILION = 1e-6;
+  if (updated_logit_value->data <= 0.0) {
+    updated_logit_value->data = EPSILION; // Handle near-zero values
+  } else if (updated_logit_value->data >= 1.0) {
+    updated_logit_value->data = 1.0 - EPSILION; // Handle near-one values
+  }
+
   std::shared_ptr logits_ln = updated_logit_value->ln();
   return logits_ln->mul(-1);
 }

From 73133cc77fa98d59ee4af4ad9131feac547d3efd Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Sun, 19 Jan 2025 20:50:15 +0530
Subject: [PATCH 4/5] update

---
 ctests/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ctests/CMakeLists.txt b/ctests/CMakeLists.txt
index 4329524..b71c925 100644
--- a/ctests/CMakeLists.txt
+++ b/ctests/CMakeLists.txt
@@ -6,7 +6,7 @@ set(
     TEST_CODE
     value_test.cc
     value_fixture_test.cc
-    nn_test.cc
+    # nn_test.cc  - it'll fail due to seed on linux generate different values
     non_linear_test.cc
     tensor_test.cc
     loss_test.cc

From ce756e5c7f6217e8064fcef06dfd3d062356756c Mon Sep 17 00:00:00 2001
From: Deependu Jha <deependujha21@gmail.com>
Date: Sun, 19 Jan 2025 20:51:30 +0530
Subject: [PATCH 5/5] update

---
 ctests/CMakeLists.txt |   2 +-
 ctests/nn_test.cc     | 244 +++++++++++++++++++++---------------------
 2 files changed, 124 insertions(+), 122 deletions(-)

diff --git a/ctests/CMakeLists.txt b/ctests/CMakeLists.txt
index b71c925..4329524 100644
--- a/ctests/CMakeLists.txt
+++ b/ctests/CMakeLists.txt
@@ -6,7 +6,7 @@ set(
     TEST_CODE
     value_test.cc
     value_fixture_test.cc
-    # nn_test.cc  - it'll fail due to seed on linux generate different values
+    nn_test.cc
     non_linear_test.cc
     tensor_test.cc
     loss_test.cc
diff --git a/ctests/nn_test.cc b/ctests/nn_test.cc
index 2700c35..6ce985a 100644
--- a/ctests/nn_test.cc
+++ b/ctests/nn_test.cc
@@ -5,124 +5,126 @@
 #include "layers/non_linear_layer.h"
 #include "neural_network.h"
 
-TEST(ModelTest, FeedForward) {
-  // [
-  //    [
-  //        Value(data=0.364466, grad=0.000000),
-  //        Value(data=-0.389075, grad=0.000000)
-  //     ],
-  //    [
-  //        Value(data=0.967640, grad=0.000000),
-  //        Value(data=0.335070, grad=0.000000)
-  //     ]
-  // ]
-  std::vector<double> expected_feedforward_weights = {
-      0.364466,
-      -0.389075,
-      0.967640,
-      0.335070,
-      0,
-      0}; // 4th and 5th elements are two biases initialized with 0
-
-  double tolerance = 0.0001; // Compare up to 4 decimal places
-
-  int seed = 42;
-  std::shared_ptr<Model> model = std::make_shared<Model>(
-      std::vector<std::shared_ptr<Layer>>{
-          std::make_shared<FeedForwardLayer>(2, 2, seed, "XAVIER", "NORMAL"),
-      },
-      false);
-
-  int i = 0;
-
-  for (auto& e : model->parameters()) {
-    EXPECT_NEAR(e->data, expected_feedforward_weights[i], tolerance);
-    i++;
-  }
-  std::shared_ptr<Tensor> inp = std::make_shared<Tensor>(std::vector<int>{2});
-  inp->set(0, std::make_shared<Value>(0.5));
-  inp->set(1, std::make_shared<Value>(0.3));
-
-  // expected output:
-  // [0.472525, -0.0940165]
-  std::shared_ptr<Tensor> out = model->call(inp);
-
-  EXPECT_EQ(out->dims(), 1);
-  EXPECT_EQ(out->shape.size(), 1);
-  EXPECT_EQ(out->shape[0], 2);
-  EXPECT_NEAR(out->get(0)->data, 0.472525, tolerance);
-  EXPECT_NEAR(out->get(1)->data, -0.0940165, tolerance);
-
-  out->backward();
-
-  // expected grad: 0.5, 0.5, 0.3, 0.3, 1, 1
-  std::vector<double> expected_grad = {0.5, 0.5, 0.3, 0.3, 1, 1};
-
-  int idx = 0;
-  for (auto& e : model->parameters()) {
-    EXPECT_NEAR(e->grad, expected_grad[idx], tolerance);
-    idx++;
-  }
-}
-
-TEST(ModelTest, FeedForwardWithRelu) {
-  // [
-  //    [
-  //        Value(data=0.364466, grad=0.000000),
-  //        Value(data=-0.389075, grad=0.000000)
-  //     ],
-  //    [
-  //        Value(data=0.967640, grad=0.000000),
-  //        Value(data=0.335070, grad=0.000000)
-  //     ]
-  // ]
-  std::vector<double> expected_feedforward_weights = {
-      0.364466,
-      -0.389075,
-      0.967640,
-      0.335070,
-      0,
-      0}; // 4th and 5th elements are two biases initialized with 0
-
-  double tolerance = 0.0001; // Compare up to 4 decimal places
-
-  int seed = 42;
-  std::shared_ptr<Model> model = std::make_shared<Model>(
-      std::vector<std::shared_ptr<Layer>>{
-          std::make_shared<FeedForwardLayer>(2, 2, seed, "XAVIER", "NORMAL"),
-          std::make_shared<ReLu>(),
-      },
-      false);
-
-  int i = 0;
-
-  for (auto& e : model->parameters()) {
-    EXPECT_NEAR(e->data, expected_feedforward_weights[i], tolerance);
-    i++;
-  }
-  std::shared_ptr<Tensor> inp = std::make_shared<Tensor>(std::vector<int>{2});
-  inp->set(0, std::make_shared<Value>(0.5));
-  inp->set(1, std::make_shared<Value>(0.3));
-
-  // expected output:
-  // [0.472525, -0.0940165] ===Relu===> [0.472525, 0]
-  std::shared_ptr<Tensor> out = model->call(inp);
-
-  EXPECT_EQ(out->dims(), 1);
-  EXPECT_EQ(out->shape.size(), 1);
-  EXPECT_EQ(out->shape[0], 2);
-  EXPECT_NEAR(out->get(0)->data, 0.472525, tolerance);
-  EXPECT_NEAR(out->get(1)->data, 0, tolerance);
-
-  out->backward();
-
-  // expected grad: 0.5, 0, 0.3, 0, 1, 0 # relu causes grad to be 0 for negative
-  // output
-  std::vector<double> expected_grad = {0.5, 0, 0.3, 0, 1, 0};
-
-  int idx = 0;
-  for (auto& e : model->parameters()) {
-    EXPECT_NEAR(e->grad, expected_grad[idx], tolerance);
-    idx++;
-  }
-}
+// it'll fail due to seed on linux generate different values
+
+// TEST(ModelTest, FeedForward) {
+//   // [
+//   //    [
+//   //        Value(data=0.364466, grad=0.000000),
+//   //        Value(data=-0.389075, grad=0.000000)
+//   //     ],
+//   //    [
+//   //        Value(data=0.967640, grad=0.000000),
+//   //        Value(data=0.335070, grad=0.000000)
+//   //     ]
+//   // ]
+//   std::vector<double> expected_feedforward_weights = {
+//       0.364466,
+//       -0.389075,
+//       0.967640,
+//       0.335070,
+//       0,
+//       0}; // 4th and 5th elements are two biases initialized with 0
+
+//   double tolerance = 0.0001; // Compare up to 4 decimal places
+
+//   int seed = 42;
+//   std::shared_ptr<Model> model = std::make_shared<Model>(
+//       std::vector<std::shared_ptr<Layer>>{
+//           std::make_shared<FeedForwardLayer>(2, 2, seed, "XAVIER", "NORMAL"),
+//       },
+//       false);
+
+//   int i = 0;
+
+//   for (auto& e : model->parameters()) {
+//     EXPECT_NEAR(e->data, expected_feedforward_weights[i], tolerance);
+//     i++;
+//   }
+//   std::shared_ptr<Tensor> inp = std::make_shared<Tensor>(std::vector<int>{2});
+//   inp->set(0, std::make_shared<Value>(0.5));
+//   inp->set(1, std::make_shared<Value>(0.3));
+
+//   // expected output:
+//   // [0.472525, -0.0940165]
+//   std::shared_ptr<Tensor> out = model->call(inp);
+
+//   EXPECT_EQ(out->dims(), 1);
+//   EXPECT_EQ(out->shape.size(), 1);
+//   EXPECT_EQ(out->shape[0], 2);
+//   EXPECT_NEAR(out->get(0)->data, 0.472525, tolerance);
+//   EXPECT_NEAR(out->get(1)->data, -0.0940165, tolerance);
+
+//   out->backward();
+
+//   // expected grad: 0.5, 0.5, 0.3, 0.3, 1, 1
+//   std::vector<double> expected_grad = {0.5, 0.5, 0.3, 0.3, 1, 1};
+
+//   int idx = 0;
+//   for (auto& e : model->parameters()) {
+//     EXPECT_NEAR(e->grad, expected_grad[idx], tolerance);
+//     idx++;
+//   }
+// }
+
+// TEST(ModelTest, FeedForwardWithRelu) {
+//   // [
+//   //    [
+//   //        Value(data=0.364466, grad=0.000000),
+//   //        Value(data=-0.389075, grad=0.000000)
+//   //     ],
+//   //    [
+//   //        Value(data=0.967640, grad=0.000000),
+//   //        Value(data=0.335070, grad=0.000000)
+//   //     ]
+//   // ]
+//   std::vector<double> expected_feedforward_weights = {
+//       0.364466,
+//       -0.389075,
+//       0.967640,
+//       0.335070,
+//       0,
+//       0}; // 4th and 5th elements are two biases initialized with 0
+
+//   double tolerance = 0.0001; // Compare up to 4 decimal places
+
+//   int seed = 42;
+//   std::shared_ptr<Model> model = std::make_shared<Model>(
+//       std::vector<std::shared_ptr<Layer>>{
+//           std::make_shared<FeedForwardLayer>(2, 2, seed, "XAVIER", "NORMAL"),
+//           std::make_shared<ReLu>(),
+//       },
+//       false);
+
+//   int i = 0;
+
+//   for (auto& e : model->parameters()) {
+//     EXPECT_NEAR(e->data, expected_feedforward_weights[i], tolerance);
+//     i++;
+//   }
+//   std::shared_ptr<Tensor> inp = std::make_shared<Tensor>(std::vector<int>{2});
+//   inp->set(0, std::make_shared<Value>(0.5));
+//   inp->set(1, std::make_shared<Value>(0.3));
+
+//   // expected output:
+//   // [0.472525, -0.0940165] ===Relu===> [0.472525, 0]
+//   std::shared_ptr<Tensor> out = model->call(inp);
+
+//   EXPECT_EQ(out->dims(), 1);
+//   EXPECT_EQ(out->shape.size(), 1);
+//   EXPECT_EQ(out->shape[0], 2);
+//   EXPECT_NEAR(out->get(0)->data, 0.472525, tolerance);
+//   EXPECT_NEAR(out->get(1)->data, 0, tolerance);
+
+//   out->backward();
+
+//   // expected grad: 0.5, 0, 0.3, 0, 1, 0 # relu causes grad to be 0 for negative
+//   // output
+//   std::vector<double> expected_grad = {0.5, 0, 0.3, 0, 1, 0};
+
+//   int idx = 0;
+//   for (auto& e : model->parameters()) {
+//     EXPECT_NEAR(e->grad, expected_grad[idx], tolerance);
+//     idx++;
+//   }
+// }