Skip to content

Commit 2f2d3d8

Browse files
committed
Finished ETL pipeline
1 parent a7ae2b6 commit 2f2d3d8

File tree

6 files changed

+109
-1
lines changed

6 files changed

+109
-1
lines changed

network_data/phisingData.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
1,0,-1,1,1,-1,1,1,-1,1,1,-1,1,0,1,-1,1,1,0,1,1,1,1,1,-1,1,1,1,0,1,1
1111
1,1,-1,1,1,-1,-1,1,-1,1,1,1,1,0,1,-1,1,1,0,1,1,1,1,1,-1,0,-1,1,0,1,-1
1212
1,1,1,1,1,-1,0,1,1,1,1,1,-1,0,0,-1,-1,-1,0,1,1,1,1,-1,1,1,1,1,-1,-1,1
13-
1,1,-1,1,1,-1,1,-1,-1,1,1,1,1,-1,-1,-1,-1,-1,0,1,1,1,1,-1,-1,-1,-1,1,0,-1,-1
13+
1,1,-1,1,1,-1,1,-1,-1,1,1,1,1,-1,-1,-1,-1,-1,0,1,1,1,1,-1,-1,-1,-1,,0,-1,-1
1414
-1,1,-1,1,-1,-1,0,0,1,1,1,-1,-1,-1,1,-1,1,1,0,-1,1,-1,1,1,-1,-1,-1,1,0,1,-1
1515
1,1,-1,1,1,-1,0,-1,1,1,1,1,-1,-1,-1,-1,1,1,0,1,1,1,1,-1,-1,0,-1,1,1,1,-1
1616
1,1,-1,1,1,1,-1,1,-1,1,1,-1,1,0,1,1,1,1,0,1,1,1,1,1,-1,1,-1,1,-1,1,1
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from network_security.logging import logger
2+
import sys
3+
4+
class NetworkSecurityException(Exception):
5+
def __init__(self, error_message, error_details: sys):
6+
self.error_message = error_message
7+
_, _, exc_tb = error_details.exc_info()
8+
9+
self.lineno = exc_tb.tb_lineno
10+
self.file_name = exc_tb.tb_frame.f_code.co_filename
11+
12+
def __str__(self):
13+
return "Error occurred in python script name [{0}] at line number [{1}] error message [{2}]".format(
14+
self.file_name, self.lineno, str(self.error_message)
15+
)
16+

network_security/logging/logger.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from datetime import datetime
2+
import logging
3+
import os, sys
4+
5+
LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
6+
7+
logs_path = os.path.join(os.getcwd(), "Logs", LOG_FILE)
8+
os.makedirs(logs_path, exist_ok=True)
9+
10+
LOGS_FILE_PATH = os.path.join(logs_path, LOG_FILE)
11+
12+
logging.basicConfig(
13+
filename = LOGS_FILE_PATH,
14+
format = "[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
15+
level = logging.INFO
16+
)
17+
18+

push_data.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from dotenv import load_dotenv
2+
from network_security.logging.logger import logging
3+
from network_security.exceptions.exception import NetworkSecurityException
4+
5+
import os, sys
6+
import json
7+
import certifi
8+
import pandas as pd
9+
import numpy as np
10+
import pymongo
11+
12+
load_dotenv()
13+
14+
# MongoDB URL and certification for valid connection
15+
MONGO_DB_URL = os.getenv("MONGODB_URI")
16+
ca = certifi.where()
17+
18+
19+
# implementing the ETL pipeline
20+
class NetworkDataExtraction():
21+
def __init__(self):
22+
try:
23+
pass
24+
except Exception as e:
25+
raise NetworkSecurityException(e, sys)
26+
27+
def csv_to_json_convertor(self, file_path):
28+
try:
29+
data = pd.read_csv(file_path)
30+
31+
# getting rid of the index
32+
data.reset_index(drop=True, inplace=True)
33+
34+
records = list((json.loads(data.T.to_json()).values()))
35+
return records
36+
except Exception as e:
37+
raise NetworkSecurityException(e, sys)
38+
39+
def push_data_to_mongodb(self, records, database, collection):
40+
try:
41+
self.mongo_client = pymongo.MongoClient(MONGO_DB_URL)
42+
db = self.mongo_client[database]
43+
collection_obj = db[collection]
44+
collection_obj.insert_many(records)
45+
46+
return len(records)
47+
48+
except Exception as e:
49+
raise NetworkSecurityException(e, sys)
50+
51+
if __name__ == "__main__":
52+
FILE_PATH = "network_data/phisingData.csv"
53+
DATABASE = "aryan"
54+
Collection = "NetworkData"
55+
56+
networkobj = NetworkDataExtraction()
57+
58+
records = networkobj.csv_to_json_convertor(file_path=FILE_PATH)
59+
len_records = networkobj.push_data_to_mongodb(records=records, database=DATABASE, collection=Collection)

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@ pandas
33
numpy
44
pymongo
55
certifi
6+
pymongo[srv]==3.12
7+
pymongo
68

79
# -e .

test_mongodb.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from pymongo.mongo_client import MongoClient
2+
3+
uri = "mongodb+srv://aryan:aryan@cluster0.mley4zy.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
4+
5+
# Create a new client and connect to the server
6+
client = MongoClient(uri)
7+
8+
# Send a ping to confirm a successful connection
9+
try:
10+
client.admin.command("ping")
11+
print("Pinged your deployment. You successfully connected to MongoDB!")
12+
except Exception as e:
13+
print(e)

0 commit comments

Comments
 (0)