Skip to content

Commit 36036f0

Browse files
committed
finished making file structure for data+ingestion
1 parent 2f2d3d8 commit 36036f0

File tree

4 files changed

+70
-1
lines changed

4 files changed

+70
-1
lines changed

network_security/components/data_ingestion.py

Whitespace-only changes.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""
2+
Training Pipeline Constants
3+
4+
This module contains all constants used throughout the training pipeline.
5+
Constants are organized by functionality with descriptive prefixes.
6+
"""
7+
8+
9+
"""
10+
Data Ingestion related constants start with DATA_INGESTION_* prefix
11+
"""
12+
DATA_INGESTION_COLLECTION_NAME: str = "NetworkData"
13+
DATA_INGESTION_DATABASE_NAME: str = "aryan"
14+
DATA_INGESTION_DIR_NAME: str = "data_ingestion"
15+
DATA_INGESTION_FEATURE_STORE_DIR: str = "feature_store"
16+
DATA_INGESTION_INGESTED_DIR: str = "ingested"
17+
DATA_INGESTION_TRAIN_TEST_SPLIT_RATION: float = 0.2
18+
19+
20+
"""
21+
Defining common constant variables for training pipeline
22+
"""
23+
TARGET_COLUMN: str = "Result"
24+
PIPELINE_NAME: str = "NetworkSecurity"
25+
ARTIFACT_DIR: str = "Artifacts"
26+
FILE_NAME: str = "phishingData.csv"
27+
28+
TRAIN_FILE_NAME: str = "train.csv"
29+
TEST_FILE_NAME: str = "test.csv"
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from datetime import datetime
2+
from network_security.constants import training_pipeline
3+
4+
import os, sys
5+
6+
7+
class TrainingPipelineConfig:
8+
def __init__(self, timestamp = datetime.now()):
9+
timestamp = timestamp.strftime("%m_%d_%Y_%H_%M_%S")
10+
self.pipeline_name = training_pipeline.PIPELINE_NAME
11+
self.artifact_name = training_pipeline.ARTIFACT_DIR
12+
self.artifact_dir = os.join(self.artifact_name, timestamp)
13+
self.timestamp: str = timestamp
14+
15+
16+
class DataIngestionConfig:
17+
def __init__(self, training_pipeline_config:TrainingPipelineConfig):
18+
self.data_ingestion_dir: str = os.path.join(
19+
training_pipeline_config.artifact_dir,
20+
training_pipeline.DATA_INGESTION_DIR_NAME
21+
)
22+
self.feature_store_file_path: str = os.path.join(
23+
self.data_ingestion_dir,
24+
training_pipeline.DATA_INGESTION_FEATURE_STORE_DIR,
25+
training_pipeline.FILE_NAME
26+
)
27+
self.train_file_path: str = os.path.join(
28+
self.data_ingestion_dir,
29+
training_pipeline.DATA_INGESTION_INGESTED_DIR,
30+
training_pipeline.TRAIN_FILE_NAME
31+
)
32+
self.test_file_path: str = os.path.join(
33+
self.data_ingestion_dir,
34+
training_pipeline.DATA_INGESTION_INGESTED_DIR,
35+
training_pipeline.TEST_FILE_NAME
36+
)
37+
self.train_test_split_ratio: float = training_pipeline.DATA_INGESTION_TRAIN_TEST_SPLIT_RATION
38+
self.collection_name: str = training_pipeline.DATA_INGESTION_COLLECTION_NAME
39+
self.db_name: str = training_pipeline.DATA_INGESTION_DATABASE_NAME

push_data.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,5 @@ def push_data_to_mongodb(self, records, database, collection):
5656
networkobj = NetworkDataExtraction()
5757

5858
records = networkobj.csv_to_json_convertor(file_path=FILE_PATH)
59-
len_records = networkobj.push_data_to_mongodb(records=records, database=DATABASE, collection=Collection)
59+
len_records = networkobj.push_data_to_mongodb(records=records, database=DATABASE, collection=Collection)
60+

0 commit comments

Comments
 (0)