Skip to content

Commit a0289eb

Browse files
ETLpipeline on MongoDB completed
1 parent 0b8bfcc commit a0289eb

11 files changed

Lines changed: 18398 additions & 1 deletion

.DS_Store

0 Bytes
Binary file not shown.

Network_Data/cyber_threat_intelligence_train.csv

Lines changed: 18296 additions & 0 deletions
Large diffs are not rendered by default.

logs/01_15_2025_13_06_05.log

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[2025-01-15 13:06:05,458] 29 root - INFO - MongoDB connection established successfully

logs/01_15_2025_13_08_16.log

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[2025-01-15 13:08:16,939] 29 root - INFO - MongoDB connection established successfully

logs/01_15_2025_13_09_45.log

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[2025-01-15 13:09:45,851] 29 root - INFO - MongoDB connection established successfully
199 Bytes
Binary file not shown.
1.2 KB
Binary file not shown.

push_data.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import os
2+
import sys
3+
import json
4+
5+
from dotenv import load_dotenv
6+
# Load environment variables
7+
load_dotenv()
8+
9+
# Get MongoDB URI from environment variable
10+
uri = os.getenv('MONGODB_URI')
11+
12+
print("MongoDB URI:", uri)
13+
14+
import certifi #root certificate for http connection and store in ca(certificate authority)
15+
ca=certifi.where()
16+
17+
import pandas as pd
18+
import numpy as np
19+
import pymongo
20+
from networksecurity.exception.exception import NetworkSecurityException
21+
from networksecurity.logging.logger import logging
22+
23+
class NetworkDataExtract():
24+
def __init__(self):
25+
try:
26+
self.client = pymongo.MongoClient(uri, tlsCAFile=ca)
27+
self.db = self.client['network_security']
28+
self.collection = self.db['network_data']
29+
logging.info("MongoDB connection established successfully")
30+
except Exception as e:
31+
logging.error("Error while connecting to MongoDB")
32+
raise NetworkSecurityException(e, sys) from e
33+
34+
def csv_to_json_converter(self, file_path):
35+
try:
36+
df = pd.read_csv(file_path)
37+
if df.empty:
38+
raise ValueError("CSV file is empty.")
39+
df.reset_index(drop=True, inplace=True)
40+
records = list(json.loads(df.T.to_json()).values())
41+
return records
42+
except Exception as e:
43+
raise NetworkSecurityException(e, sys)
44+
45+
46+
def insert_data_mongodb(self, records, database, collection):
47+
try:
48+
# Initialize MongoDB client and access specified database and collection
49+
mongo_client = pymongo.MongoClient(uri, tlsCAFile=ca)
50+
db = mongo_client[database]
51+
collection = db[collection]
52+
53+
# Insert records into the collection
54+
collection.insert_many(records)
55+
return len(records)
56+
except Exception as e:
57+
raise NetworkSecurityException(e, sys)
58+
59+
60+
if __name__ == "__main__":
61+
file_path = "Network_Data/cyber_threat_intelligence_train.csv"
62+
database = "AUSTINAI"
63+
collection = "network_data"
64+
65+
network_data_extract = NetworkDataExtract()
66+
records = network_data_extract.csv_to_json_converter(file_path)
67+
num_records = network_data_extract.insert_data_mongodb(records, database, collection)
68+
print(f"Inserted {num_records} records into MongoDB")
69+
70+

requirements.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,8 @@ pandas
33
numpy
44
pymongo
55
certifi
6+
pymongo[srv]==3.6
67

7-
-e .
8+
9+
10+
## -e .

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ def get_requirements() -> List[str]:
1313
requirements: List[str] = []
1414
try:
1515
with open('requirements.txt', 'r') as file:
16+
# process each line
1617
for line in file:
1718
requirement = line.strip()
19+
#ignore empty lines and -e.
1820
if requirement and requirement != '-e .':
1921
requirements.append(requirement)
2022
except FileNotFoundError:

0 commit comments

Comments
 (0)