Skip to content

Commit d7dee6c

Browse files
authored
Merge pull request #115 from AOSSIE-Org/doc_strings_and_logger
Added Doc strings and logger
2 parents 0e715b0 + 59d3ec0 commit d7dee6c

31 files changed

Lines changed: 793 additions & 97 deletions

backend/app/db/vector_store.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,31 @@
1+
"""
2+
vector_store.py
3+
------------------
4+
Initializes and manages the Pinecone vector database connection for the Perspective API.
5+
6+
This module:
7+
- Loads Pinecone credentials from environment variables.
8+
- Creates the Pinecone index if it does not exist.
9+
- Connects to the specified index for vector operations.
10+
11+
Attributes:
12+
PINECONE_API_KEY (str): API key for authenticating with Pinecone.
13+
INDEX_NAME (str): Name of the Pinecone index used for storing vectors.
14+
DIMENSIONS (int): Dimensionality of vector embeddings.
15+
METRIC (str): Similarity metric used for vector comparison.
16+
index (pinecone.Index): Connected Pinecone index instance.
17+
18+
Raises:
19+
ValueError: If `PINECONE_API_KEY` is not set in environment variables.
20+
RuntimeError: If Pinecone initialization or index connection fails.
21+
"""
22+
123
import os
224
from pinecone import Pinecone, ServerlessSpec, CloudProvider, AwsRegion
25+
from app.logging.logging_config import setup_logger
26+
27+
28+
logger = setup_logger(__name__)
329

430
# Load Pinecone credentials from environment variables
531
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
@@ -19,15 +45,15 @@
1945

2046
# Create index if it doesn't exist
2147
if not pc.has_index(INDEX_NAME):
22-
print(f"Creating index: {INDEX_NAME}")
48+
logger.info(f"Creating index: {INDEX_NAME}")
2349
pc.create_index(
2450
name=INDEX_NAME,
2551
dimension=DIMENSIONS,
2652
metric=METRIC,
2753
spec=ServerlessSpec(cloud=CloudProvider.AWS, region=AwsRegion.US_EAST_1),
2854
)
2955
else:
30-
print(f"Index '{INDEX_NAME}' already exists")
56+
logger.info(f"Index '{INDEX_NAME}' already exists")
3157

3258
try:
3359
# Connect to the index
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import logging
2+
import sys
3+
4+
def setup_logger(name: str) -> logging.Logger:
5+
"""
6+
Creates and configures a logger with console + file output.
7+
8+
Args:
9+
name (str): The logger's name (usually __name__ of the calling module).
10+
11+
Returns:
12+
logging.Logger: Configured logger instance.
13+
"""
14+
logger = logging.getLogger(name)
15+
logger.setLevel(logging.DEBUG) # Log everything from DEBUG and above
16+
17+
# Avoid adding duplicate handlers if logger already set
18+
if logger.handlers:
19+
return logger
20+
21+
# Formatter with timestamp, log level, module name
22+
formatter = logging.Formatter(
23+
"[%(asctime)s] [%(levelname)s] [%(name)s]: %(message)s",
24+
datefmt="%Y-%m-%d %H:%M:%S"
25+
)
26+
27+
# Console Handler
28+
console_handler = logging.StreamHandler(sys.stdout)
29+
console_handler.setLevel(logging.INFO)
30+
console_handler.setFormatter(formatter)
31+
logger.addHandler(console_handler)
32+
33+
# File Handler
34+
file_handler = logging.FileHandler("app.log")
35+
file_handler.setLevel(logging.DEBUG) # Keep detailed logs in file
36+
file_handler.setFormatter(formatter)
37+
logger.addHandler(file_handler)
38+
39+
return logger

backend/app/modules/bias_detection/check_bias.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,34 @@
1+
"""
2+
check_bias.py
3+
-------------
4+
Provides functionality to evaluate the bias score of an article using the Groq API.
5+
6+
This module:
7+
- Loads environment variables for Groq API credentials.
8+
- Connects to the Groq client.
9+
- Defines `check_bias()` to analyze a given article's bias and return a score.
10+
11+
Functions:
12+
check_bias(text: str) -> dict:
13+
Analyzes the input article text and returns a bias score between 0 and 100,
14+
where 0 indicates the least bias and 100 indicates the highest bias.
15+
16+
Environment Variables:
17+
GROQ_API_KEY (str): API key for authenticating with Groq.
18+
19+
Raises:
20+
ValueError: If `text` is missing or empty.
21+
Exception: For errors during API interaction or response parsing.
22+
"""
23+
24+
125
import os
226
from groq import Groq
327
from dotenv import load_dotenv
428
import json
29+
from app.logging.logging_config import setup_logger
30+
31+
logger = setup_logger(__name__)
532

633
load_dotenv()
734

@@ -10,10 +37,11 @@
1037

1138
def check_bias(text):
1239
try:
13-
print(text)
14-
print(json.dumps(text))
40+
logger.debug(f"Raw article text: {text}")
41+
logger.debug(f"JSON dump of text: {json.dumps(text)}")
1542

1643
if not text:
44+
logger.error("Missing or empty 'cleaned_text'")
1745
raise ValueError("Missing or empty 'cleaned_text'")
1846

1947
chat_completion = client.chat.completions.create(
@@ -37,16 +65,16 @@ def check_bias(text):
3765
temperature=0.3,
3866
max_tokens=512,
3967
)
40-
4168
bias_score = chat_completion.choices[0].message.content.strip()
69+
logger.info(f"Bias score calculated: {bias_score}")
4270

4371
return {
4472
"bias_score": bias_score,
4573
"status": "success",
4674
}
4775

4876
except Exception as e:
49-
print(f"Error in bias_detection: {e}")
77+
logger.exception("Error in bias detection")
5078
return {
5179
"status": "error",
5280
"error_from": "bias_detection",

backend/app/modules/chat/embed_query.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
"""
2+
embed_query.py
3+
--------------
4+
Provides functionality to generate vector embeddings for text queries using
5+
the Sentence Transformers library.
6+
7+
This module:
8+
- Loads a pre-trained "all-MiniLM-L6-v2" model.
9+
- Defines a helper function `embed_query()` to encode a query string into
10+
a list of numerical embeddings.
11+
12+
Functions:
13+
embed_query(query: str) -> list[float]:
14+
Encodes the given query into a numerical vector representation.
15+
16+
Model:
17+
all-MiniLM-L6-v2 (from sentence-transformers):
18+
A lightweight transformer model optimized for semantic search and
19+
similarity tasks.
20+
"""
21+
22+
123
from sentence_transformers import SentenceTransformer
224

325
embedder = SentenceTransformer("all-MiniLM-L6-v2")

backend/app/modules/chat/get_rag_data.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,29 @@
1+
"""
2+
get_rag_data.py
3+
---------------
4+
Provides functionality to perform semantic search queries on the Pinecone
5+
vector database for Retrieval-Augmented Generation (RAG) workflows.
6+
7+
This module:
8+
- Loads Pinecone credentials from environment variables.
9+
- Connects to the "perspective" index in Pinecone.
10+
- Defines `search_pinecone()` to search stored vector embeddings and
11+
retrieve the most relevant matches.
12+
13+
Functions:
14+
search_pinecone(query: str, top_k: int = 5) -> list[dict]:
15+
Encodes the input query, searches Pinecone for the most similar
16+
vectors, and returns a list of matches with metadata.
17+
18+
Environment Variables:
19+
PINECONE_API_KEY (str): API key for authenticating with Pinecone.
20+
21+
Dependencies:
22+
- app.modules.chat.embed_query (for generating embeddings)
23+
- pinecone (Pinecone client library)
24+
"""
25+
26+
127
from pinecone import Pinecone
228
from dotenv import load_dotenv
329
from app.modules.chat.embed_query import embed_query

backend/app/modules/chat/llm_processing.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,34 @@
1+
"""
2+
llm_processing.py
3+
-----------------
4+
Handles Large Language Model (LLM) interactions for context-based question answering.
5+
6+
This module:
7+
- Connects to the Groq API using credentials from environment variables.
8+
- Builds a context string from retrieved documents.
9+
- Sends user questions along with context to the LLM.
10+
- Returns generated answers.
11+
12+
Functions:
13+
build_context(docs: list[dict]) -> str:
14+
Extracts relevant fields (explanation or reasoning) from document
15+
metadata and combines them into a single context string.
16+
17+
ask_llm(question: str, docs: list[dict]) -> str:
18+
Builds context from the provided documents, sends it along with the
19+
question to the LLM, and returns the model's answer.
20+
21+
Environment Variables:
22+
GROQ_API_KEY (str): API key for authenticating with Groq.
23+
"""
24+
25+
126
import os
227
from groq import Groq
328
from dotenv import load_dotenv
29+
from app.logging.logging_config import setup_logger
30+
31+
logger = setup_logger(__name__)
432

533
load_dotenv()
634

@@ -16,7 +44,7 @@ def build_context(docs):
1644

1745
def ask_llm(question, docs):
1846
context = build_context(docs)
19-
print(context)
47+
logger.debug(f"Generated context for LLM:\n{context}")
2048
prompt = f"""You are an assistant that answers based on context.
2149
2250
Context:
@@ -33,5 +61,5 @@ def ask_llm(question, docs):
3361
{"role": "user", "content": prompt},
3462
],
3563
)
36-
64+
logger.info("LLM response retrieved successfully.")
3765
return response.choices[0].message.content

backend/app/modules/facts_check/llm_processing.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,36 @@
1+
"""
2+
llm_processing.py
3+
-----------------
4+
Handles claim extraction and fact verification tasks using the Groq LLM API.
5+
6+
This module:
7+
- Connects to the Groq API with credentials from environment variables.
8+
- Extracts verifiable factual claims from text.
9+
- Verifies claims using provided search results and evidence.
10+
- Returns structured responses with verdicts and explanations.
11+
12+
Functions:
13+
run_claim_extractor_sdk(state: dict) -> dict:
14+
Extracts up to three concise, verifiable claims from the input text
15+
stored in the `state` dictionary.
16+
17+
run_fact_verifier_sdk(search_results: list[dict]) -> dict:
18+
Evaluates provided claims against web search evidence and returns
19+
structured JSON verdicts for each claim.
20+
21+
Environment Variables:
22+
GROQ_API_KEY (str): API key for authenticating with Groq.
23+
"""
24+
25+
126
import os
227
from groq import Groq
328
from dotenv import load_dotenv
429
import json
530
import re
31+
from app.logging.logging_config import setup_logger
32+
33+
logger = setup_logger(__name__)
634

735
load_dotenv()
836

@@ -41,6 +69,8 @@ def run_claim_extractor_sdk(state):
4169
)
4270

4371
extracted_claims = chat_completion.choices[0].message.content.strip()
72+
logger.debug(f"Extracted claims:\n{extracted_claims}")
73+
4474

4575
return {
4676
**state,
@@ -49,7 +79,7 @@ def run_claim_extractor_sdk(state):
4979
}
5080

5181
except Exception as e:
52-
print(f"Error in claim_extraction: {e}")
82+
logger.exception("Error in claim_extraction")
5383
return {
5484
"status": "error",
5585
"error_from": "claim_extraction",
@@ -107,13 +137,13 @@ def run_fact_verifier_sdk(search_results):
107137

108138
# Strip markdown code blocks if present
109139
content = re.sub(r"^```json|```$", "", content).strip()
110-
print(content)
140+
logger.debug(f"Raw LLM fact verification output:\n{content}")
111141

112142
# Try parsing the JSON response
113143
try:
114144
parsed = json.loads(content)
115145
except Exception as parse_err:
116-
print(f"LLM JSON parse error: {parse_err}")
146+
logger.error(f"LLM JSON parse error: {parse_err}")
117147

118148
results_list.append(parsed)
119149

@@ -124,7 +154,7 @@ def run_fact_verifier_sdk(search_results):
124154
}
125155

126156
except Exception as e:
127-
print(f"🔥 Error in fact_verification: {e}")
157+
logger.exception("Error in fact_verification")
128158
return {
129159
"status": "error",
130160
"error_from": "fact_verification",

backend/app/modules/facts_check/web_search.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
"""
2+
web-search.py
3+
-------------
4+
Provides a simple wrapper for performing Google Custom Search queries.
5+
6+
This module:
7+
- Loads the Google Search API key from environment variables.
8+
- Sends search requests to the Google Custom Search API.
9+
- Returns the first search result with title, link, and snippet.
10+
11+
Functions:
12+
search_google(query: str) -> list[dict]:
13+
Executes a Google search for the given query and returns the top result
14+
in a list containing its title, link, and snippet.
15+
16+
Environment Variables:
17+
SEARCH_KEY (str): API key for Google Custom Search API.
18+
"""
19+
20+
121
import requests
222
from dotenv import load_dotenv
323
import os

0 commit comments

Comments
 (0)