diff --git a/elevate-mentoring/constants/routes.js b/elevate-mentoring/constants/routes.js
index 9e261bb..0f29627 100644
--- a/elevate-mentoring/constants/routes.js
+++ b/elevate-mentoring/constants/routes.js
@@ -1843,5 +1843,57 @@ module.exports = {
type: "GET"
},
},
+ {
+ "sourceRoute": "/mentoring/v1/admin/getCacheStats",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/clearCache",
+ "type": "POST",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/warmUpCache",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/getCacheHealth",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ }
],
}
diff --git a/evidence analysis/.env.sample b/evidence analysis/.env.sample
new file mode 100644
index 0000000..3b9078a
--- /dev/null
+++ b/evidence analysis/.env.sample
@@ -0,0 +1 @@
+GEMINI_API_KEYS=your_gemini_api_key_here
\ No newline at end of file
diff --git a/evidence analysis/README.md b/evidence analysis/README.md
new file mode 100644
index 0000000..622f709
--- /dev/null
+++ b/evidence analysis/README.md
@@ -0,0 +1,90 @@
+# π Evidence Analysis with AI
+
+This Streamlit app enables users to analyze classroom evidence images using AI models (Gemini or OpenAI). The app allows input of custom questions and provides AI-generated answers and reasoning.
+
+---
+
+## π Features
+
+- Upload or link to an image (evidence)
+- Input up to 7 custom questions
+- Use AI to answer questions and explain reasoning
+- Display image preview and AI output
+- Token switching for Gemini API
+- Modular backend in `ai/process_evidence.py`
+
+---
+
+## π§° Tech Stack
+
+- [Python](https://www.python.org/)
+- [Streamlit](https://streamlit.io/)
+- [Google Gemini API](https://ai.google.dev/)
+- [OpenAI API](https://platform.openai.com/)
+- [httpx](https://www.python-httpx.org/), [base64](https://docs.python.org/3/library/base64.html)
+
+---
+
+## π Project Structure
+
+```
+.
+βββ ai/
+β βββ process_evidence.py # AI logic for image and question processing
+βββ utils/
+β βββ auth.py (basic authentication)
+βββ app.py (your app logic)
+βββ evidence_analysis.py
+βββ .gitignore
+βββ README.md
+βββ requirements.txt
+```
+
+---
+
+## π§ Setup Instructions
+
+1. **Clone the repository**:
+ ```bash
+ git clone https://github.com/your-username/evidence-analysis.git
+ cd evidence-analysis
+ ```
+
+2. **Install dependencies**:
+ ```bash
+ pip install -r requirements.txt
+ ```
+
+3. **Add your API keys**:
+ - Create a `.env` file or securely inject them in the `ai/process_evidence.py`
+ - Required keys:
+ - `GEMINI`, `GEMINI_1` (Google Gemini API keys)
+ - `llama-evidence-analysis` (OpenAI-compatible endpoint API key)
+
+4. **Run the app**:
+ ```bash
+ streamlit run app.py
+ ```
+
+---
+
+## π§ͺ Example Usage
+
+1. Enter up to 7 custom evaluation questions.
+2. Paste a public URL of an image showing evidence (e.g., classroom project photo).
+3. Click **"π Analyse"**.
+4. Get AI-generated YES/NO answers with reasonings.
+5. See relevance tag and image preview.
+
+---
+
+## π‘οΈ Notes
+
+- Make sure your Gemini/OpenAI API keys have sufficient quota.
+- Gemini's `response_schema` requires accurate schema handling and token management.
+
+---
+
+## π License
+
+MIT License
\ No newline at end of file
diff --git a/evidence analysis/ai/process_evidence.py b/evidence analysis/ai/process_evidence.py
new file mode 100644
index 0000000..95cb9da
--- /dev/null
+++ b/evidence analysis/ai/process_evidence.py
@@ -0,0 +1,193 @@
+import json
+import base64
+import time
+import httpx
+import mimetypes
+import re
+import os
+from urllib.request import urlopen
+import google.generativeai as genai
+from openai import OpenAI
+import typing_extensions as typing
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# --- CONFIGURATION ---
+# Load keys from .env, split by comma if multiple exist
+gemini_env = os.getenv("GEMINI_API_KEYS", "")
+GEMINI_TOKENS = [key.strip() for key in gemini_env.split(",") if key.strip()]
+
+TOGETHER_TOKEN = os.getenv("TOGETHER_API_KEY")
+
+MAX_RETRIES = 3
+current_token_index = 0
+
+# --- TOKEN HANDLING (Gemini) ---
+def get_next_gemini_token():
+ global current_token_index
+ if GEMINI_TOKENS and current_token_index < len(GEMINI_TOKENS):
+ return GEMINI_TOKENS[current_token_index]
+ return None
+
+def switch_to_next_token():
+ global current_token_index
+ current_token_index += 1
+ if current_token_index >= len(GEMINI_TOKENS):
+ # Reset or handle exhaustion
+ current_token_index = 0
+ print("Warning: Cycled through all Gemini tokens.")
+ return get_next_gemini_token()
+
+# --- Gemini Model Setup ---
+class AnalysisResponse(typing.TypedDict):
+ answers: list[str]
+ reasonings: list[str]
+
+initial_token = get_next_gemini_token()
+if not initial_token:
+ raise ValueError("No valid Gemini tokens found in .env file.")
+
+genai.configure(api_key=initial_token)
+model = genai.GenerativeModel(
+ model_name="gemini-2.5-flash",
+ generation_config={
+ "response_mime_type": "application/json",
+ "response_schema": AnalysisResponse,
+ },
+)
+
+# --- OpenAI (SambaNova) Setup ---
+# Only initialize if token exists to prevent crash
+if TOGETHER_TOKEN:
+ client = OpenAI(
+ base_url="https://api.sambanova.ai/v1",
+ api_key=TOGETHER_TOKEN
+ )
+else:
+ client = None
+ print("Warning: TOGETHER_API_KEY not found in .env")
+
+# --- Helper: Convert image to base64 ---
+def get_image_as_base64(url: str) -> str:
+ with urlopen(url) as response:
+ image_data = response.read()
+ mime_type, _ = mimetypes.guess_type(url)
+ if not mime_type:
+ mime_type = "image/jpeg"
+ return f"data:{mime_type};base64,{base64.b64encode(image_data).decode('utf-8')}"
+
+# --- Helper: Relevance Tag ---
+def calculate_relevance_tag(answers):
+ if not answers or not isinstance(answers, list):
+ return 'Irrelevant'
+ yes_count = sum(1 for answer in answers if str(answer).upper() == 'YES')
+ if len(answers) == 0: return 'Irrelevant'
+
+ percentage = (yes_count / len(answers)) * 100
+ if percentage >= 50:
+ return 'Relevant'
+ elif percentage > 0:
+ return 'Partially Relevant'
+ return 'Irrelevant'
+
+# --- Response Parser ---
+def extract_structured_response(response_text):
+ normalized = response_text.replace("\r\n", "\n").upper()
+ answers_match = re.search(
+ r"^ANSWERS[:\-\s]*((?:YES|NO)(?:\s*,\s*(?:YES|NO))*)",
+ normalized,
+ re.MULTILINE
+ )
+ reasonings_match = re.findall(
+ r"(?:^REASONINGS[:\-\s]*\n)?(?:^|\n)\s*(\d+)\.?\s*([^\n]+)",
+ response_text,
+ re.MULTILINE
+ )
+
+ answers = [a.strip() for a in answers_match.group(1).split(",")] if answers_match else []
+ answers = [a.upper()[:3] for a in answers if a.upper().startswith(("YES", "NO"))]
+ reasonings = [item[1].strip() for item in sorted(reasonings_match, key=lambda x: int(x[0]))]
+
+ # Relaxed validation to allow partial parsing if strict 3 count fails
+ if not answers:
+ return None
+ return {"answers": answers, "reasonings": reasonings}
+
+# --- MAIN FUNCTION ---
+def analyze_evidence(image_url: str, prompt: str, use_openai: bool = False):
+# prompt = f"""You are an educational evidence validator. Analyze this image, which is field evidence from a Project-Based Learning classroom in Bihar, India.
+# Please analyze this image carefully and answer with ONLY 'yes' or 'no' for each question below separated by commas:
+# {questions}
+# Consider all visible elements and context. Explain your reasoning for each answer briefly.
+# """
+
+ print(f"[Prompt]:\n{prompt}\n")
+
+ # Step 1: Gemini
+ for _ in range(MAX_RETRIES):
+ try:
+ image = httpx.get(image_url)
+ gemini_response = model.generate_content(
+ [
+ {
+ "mime_type": "image/jpeg",
+ "data": base64.b64encode(image.content).decode("utf-8"),
+ },
+ prompt,
+ ]
+ )
+ response_json = json.loads(gemini_response.text)
+ relevance = calculate_relevance_tag(response_json.get("answers", []))
+ print(f"[Gemini Response] = {response_json}")
+ print(f"[Relevance Tag] = {relevance} \n")
+ return {
+ "source": "gemini",
+ "answers": response_json.get("answers"),
+ "reasonings": response_json.get("reasonings"),
+ "relevance": relevance
+ }
+ except Exception as e:
+ if any(x in str(e).lower() for x in ["quota", "rate limit", "429"]):
+ token = switch_to_next_token()
+ if token:
+ genai.configure(api_key=token)
+ continue
+ print(f"[Gemini Error] {e}")
+ break
+
+ # Step 2: OpenAI fallback (if enabled and client exists)
+ if use_openai and client:
+ for _ in range(MAX_RETRIES):
+ try:
+ openai_response = client.chat.completions.create(
+ model="Llama-4-Maverick-17B-128E-Instruct",
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": prompt},
+ {"type": "image_url", "image_url": {"url": get_image_as_base64(image_url)}},
+ ],
+ }
+ ],
+ )
+ if openai_response.choices:
+ content = openai_response.choices[0].message.content
+ structured = extract_structured_response(content)
+ if structured:
+ relevance = calculate_relevance_tag(structured["answers"])
+ return {
+ "source": "openai",
+ "answers": structured["answers"],
+ "reasonings": structured["reasonings"],
+ "relevance": relevance
+ }
+ except Exception as e:
+ print(f"[OpenAI Error] {e}")
+ time.sleep(3)
+
+ return {
+ "error": "Unable to process image after retries"
+ }
diff --git a/evidence analysis/app.py b/evidence analysis/app.py
new file mode 100644
index 0000000..56d62c6
--- /dev/null
+++ b/evidence analysis/app.py
@@ -0,0 +1,83 @@
+import streamlit as st
+from utils.auth import check_credentials
+import evidence_analysis_page
+import streamlit.components.v1 as components
+
+st.set_page_config(page_title="Login", layout="wide", initial_sidebar_state="collapsed")
+
+# Initialize login state if not already present
+if "logged_in" not in st.session_state:
+ st.session_state["logged_in"] = False
+
+if "show_reports" not in st.session_state:
+ st.session_state["show_reports"] = False
+
+def login():
+ st.title("π Login Page")
+ st.markdown(" ", unsafe_allow_html=True)
+
+ with st.form("login_form"):
+ username = st.text_input("Username")
+ password = st.text_input("Password", type="password")
+ submitted = st.form_submit_button("Login")
+
+ if submitted:
+ if check_credentials(username, password):
+ st.session_state["logged_in"] = True
+ st.success("Login successful!")
+ st.rerun()
+ else:
+ st.error("Invalid username or password")
+
+def show_reports():
+ """Display the HTML report page with navigation"""
+ # Header with navigation
+ col1, col2, col3 = st.columns([1, 6, 1])
+
+ with col1:
+ if st.button("β Back to Analysis", key="back_btn"):
+ st.session_state["show_reports"] = False
+ st.rerun()
+
+ with col2:
+ st.markdown("
π MIP Evidence Reports ", unsafe_allow_html=True)
+
+ with col3:
+ if st.button("πͺ Logout", key="logout_btn"):
+ st.session_state.clear()
+ st.rerun()
+
+ # Colored horizontal line
+ st.markdown("""
+
+ """, unsafe_allow_html=True)
+
+ # Hide menu and footer
+ st.markdown("""
+
+ """, unsafe_allow_html=True)
+
+ # Read and display the HTML file
+ try:
+ with open("report.html", "r", encoding="utf-8") as f:
+ html_content = f.read()
+
+ # Display the HTML content
+ components.html(html_content, height=1200, scrolling=True)
+
+ except FileNotFoundError:
+ st.error("report.html file not found. Please ensure the file exists in the same directory.")
+ except Exception as e:
+ st.error(f"Error loading report: {str(e)}")
+
+# Show login, evidence analysis page, or reports
+if not st.session_state["logged_in"]:
+ login()
+else:
+ if st.session_state["show_reports"]:
+ show_reports()
+ else:
+ evidence_analysis_page.show()
diff --git a/evidence analysis/evidence_analysis_page.py b/evidence analysis/evidence_analysis_page.py
new file mode 100644
index 0000000..aa72b9a
--- /dev/null
+++ b/evidence analysis/evidence_analysis_page.py
@@ -0,0 +1,99 @@
+import streamlit as st
+from ai.process_evidence import analyze_evidence
+
+def show():
+ st.set_page_config(page_title="Evidence Analysis", layout="wide")
+
+ # Header row: Title centered, Reports and Logout on the right
+ col1, col2, col3 = st.columns([1, 2, 1])
+
+ with col1:
+ pass # Empty left side
+
+ with col2:
+ st.markdown("π§ͺ Evidence Analysis ", unsafe_allow_html=True)
+
+ with col3:
+ st.markdown("", unsafe_allow_html=True)
+
+ # Create two columns for Reports and Logout buttons
+ btn_col1, btn_col2 = st.columns(2)
+
+ with btn_col1:
+ if st.button("π Reports"):
+ st.session_state["show_reports"] = True
+ st.rerun()
+
+ with btn_col2:
+ if st.button("πͺ Logout"):
+ st.session_state.clear()
+ st.rerun()
+
+ st.markdown("
", unsafe_allow_html=True)
+
+ # Colored horizontal line (simple styled )
+ st.markdown("""
+
+ """, unsafe_allow_html=True)
+
+ # An empty break line added
+ st.markdown(" ", unsafe_allow_html=True)
+
+ # Define layout columns
+ col1, col2, col3, col4 = st.columns([2, 2, 2, 2])
+
+ # Column 1: Enter 7 Questions
+ with col1:
+ st.markdown("π Enter Questions
", unsafe_allow_html=True)
+ question_inputs = []
+ for i in range(1, 8):
+ q = st.text_area(f"{i}.", key=f"question_{i}", height=100)
+ question_inputs.append(q)
+
+ # Column 2: Evidence Link Input + Analyse Button
+ with col2:
+ st.markdown("π Paste Evidence Link
", unsafe_allow_html=True)
+ image_url = st.text_area("Image URL", height=150)
+ default_prompt = """You are an educational evidence validator. Analyse this image as field evidence from a PBL classroom in Bihar, India. Answer the added questions with 'YES' or 'NO', consider all visible elements and context. Explain your reasoning for each answer briefly."""
+ prompt_text = st.text_area("Prompt (Editable)", value=default_prompt, height=150)
+ context = prompt_text + "\n\n" + "\n".join([f"{i+1}. {q}" for i, q in enumerate(question_inputs) if q.strip()])
+
+ if st.button("π Analyse", use_container_width=True):
+ if not image_url.strip():
+ st.warning("Please provide an evidence link.")
+ elif not any(q.strip() for q in question_inputs):
+ st.warning("Please enter at least one question.")
+ else:
+ result = analyze_evidence(image_url, context, use_openai=False)
+
+ if "error" in result:
+ st.error(f"β Error: {result['error']}")
+ else:
+ st.session_state["image_url"] = image_url
+ st.session_state["questions"] = question_inputs
+ st.session_state["ai_result"] = result
+ st.session_state["analysed"] = True
+
+ # Column 3: Image Preview
+ with col3:
+ st.markdown("πΌοΈ Image Preview
", unsafe_allow_html=True)
+ if st.session_state.get("analysed", False):
+ st.image(st.session_state["image_url"], width=350)
+ else:
+ st.info("Awaiting evidence link and analysis...")
+
+ # Column 4: Output Summary
+ with col4:
+ st.markdown("π§ Output Summary
", unsafe_allow_html=True)
+ if st.session_state.get("analysed", False):
+ result = st.session_state.get("ai_result", {})
+ if result:
+ st.markdown("**π’ Relevance Tag:** " + result.get("relevance", "Unknown"))
+ st.markdown("**β
Answers:** " + ", ".join(result.get("answers", [])))
+ st.markdown("**π§ Reasoning:**")
+ for i, reason in enumerate(result.get("reasonings", []), 1):
+ st.markdown(f"{i}. {reason}")
+ else:
+ st.text("(No AI output found)")
+ else:
+ st.info("Output will appear after clicking Analyse.")
diff --git a/evidence analysis/report-old.html b/evidence analysis/report-old.html
new file mode 100644
index 0000000..5e2f893
--- /dev/null
+++ b/evidence analysis/report-old.html
@@ -0,0 +1,1388 @@
+
+
+
+
+ MIP Evidence Report
+
+
+
+
+
+
+
+
+
+
+
+
Processing your data...
+
+
+
+
+
+
+
+
π Executive Summary
+
+
+
+
+
+
+
+
π Evidence Submission Overview
+
+
+
+
+
+
Monthly Project Timeline
+
+
+
+
+
+
+
Subject Distribution
+
+
+
+
+
+
+
+
+
π― Quality & Relevance Analysis
+
+
+
+
+
+
Task Completion Analysis
+
+
+
+
+
+
+
πΊοΈ District-wise Submission Quality & Insights
+
+
+
+
Bihar District Relevance Map
+
+
+
+ Map placeholder area (Ensure map is static before PDF generation)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/evidence analysis/report.html b/evidence analysis/report.html
new file mode 100644
index 0000000..687936f
--- /dev/null
+++ b/evidence analysis/report.html
@@ -0,0 +1,1819 @@
+
+
+
+
+ MIP Evidence Report - With Filters
+
+
+
+
+
+
+
+
+
+
+
+
π Filter Data
+
+
+ State
+
+ All States
+
+
+
+ District
+
+ All Districts
+
+
+
+ Block
+
+ All Blocks
+
+
+
+ School
+
+ All Schools
+
+
+
+ Relevance Tag
+
+ All Relevance
+ Relevant
+ Partially Relevant
+ Irrelevant
+
+
+
+
+ π Reset All Filters
+
+
+
+
+
+
+
Processing your data...
+
+
+
+
+
+
+
π Executive Summary
+
+
+
+
+
+
π Evidence Submission Overview
+
+
+
+
+
+
+
+
+
+
Subject Distribution
+
+
+
+
+
+
+
+
π― Quality & Relevance Analysis
+
+
+
+
+
Task Completion Analysis
+
+
+
+
+
+
πΊοΈ District-wise Submission Quality & Insights
+
+
+
+
Bihar District Relevance Map
+
+ Map placeholder area
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/evidence analysis/requirements.txt b/evidence analysis/requirements.txt
new file mode 100644
index 0000000..540ba2c
--- /dev/null
+++ b/evidence analysis/requirements.txt
@@ -0,0 +1,9 @@
+streamlit
+openai
+google-generativeai
+pandas
+openpyxl
+httpx
+requests
+typing_extensions
+python-dotenv
diff --git a/evidence analysis/utils/auth.py b/evidence analysis/utils/auth.py
new file mode 100644
index 0000000..17bd010
--- /dev/null
+++ b/evidence analysis/utils/auth.py
@@ -0,0 +1,2 @@
+def check_credentials(username: str, password: str) -> bool:
+ return username == "admin" and password == "admin"
diff --git a/evidence-analysis-process/.env.sample b/evidence-analysis-process/.env.sample
new file mode 100644
index 0000000..3281568
--- /dev/null
+++ b/evidence-analysis-process/.env.sample
@@ -0,0 +1 @@
+GEMINI_TOKEN=your_gemini_token_here
\ No newline at end of file
diff --git a/evidence-analysis-process/README.md b/evidence-analysis-process/README.md
new file mode 100644
index 0000000..dbb3eb6
--- /dev/null
+++ b/evidence-analysis-process/README.md
@@ -0,0 +1,105 @@
+# π§ Evidence Analysis Multithreaded β MIP Evidence Pipeline
+
+### Overview
+This repository contains a lightweight pipeline to preprocess CSV files, split them for parallel execution, perform **image-based evidence analysis** (via a generative model), merge processed results, validate URLs, and clean invalid rows.
+
+---
+
+## π Repository Structure
+
+| Path | Description |
+|------|--------------|
+| **pre-processor/1-pre-processor.py** | Loads and filters raw CSVs, adds computed columns (`clean_cell`, `is_image_url`), and generates single or split preprocessed CSVs. |
+| **pre-processor/2-csv-splitter.py** | Splits large CSVs into smaller chunks for parallel processing. |
+| **processor/1-main-parallel-script.py** | Main orchestration script for parallel execution. Handles per-file processing via `google-generativeai`, `httpx`, `openpyxl`, and includes rate-limiting and token rotation (`get_gemini_tokens_from_env`). |
+| **processor/2-merge-processed-csv.py** | Merges parallel outputs into a single `merged_output.csv`. |
+| **processor/3-invalid-url-&-custom-task-remover.py** | Removes invalid rows (where QA columns are null) and extracts URLs from *Task Evidence* fields. |
+| **processor/4-url-validator.py** | Concurrent URL validation tool (`load_urls_from_file`, `validate_url`, `validate_urls_concurrent`). |
+| **processor/validate-input-output-csv.py** | Compares source vs. merged output CSVs and logs missing rows. |
+| **webpage/** | Local web UI to visualize processed CSVs. |
+| **.env** | Environment file to store GEMINI API tokens (e.g., `GEMINI_TOKEN1="..."`). |
+
+---
+
+## βοΈ Prerequisites
+- **Python** 3.8 or above
+- Install dependencies:
+ ```
+ pip install -r requirements.txt
+ ```
+
+## π Environment Setup
+If using the generative model in processor/1-main-parallel-script.py, create a .env file and include your GEMINI tokens:
+```
+GEMINI_TOKEN1="your_token_here"
+```
+These tokens are fetched dynamically using the get_gemini_tokens_from_env function.
+
+## π Quickstart
+### 1. Preprocessing
+Edit input/output paths in pre-processor/1-pre-processor.py.
+Run:
+```
+python pre-processor/1-pre-processor.py
+```
+Output will be stored under output-pre-processor/ as either:
+* preprocessed_data.csv, or
+* multiple split_*.csv files (if splitting is enabled).
+
+### 2. Split CSVs for Parallel Processing
+#### Option A:
+Run the splitter script manually:
+```
+python pre-processor/2-csv-splitter.py
+```
+#### Option B:
+Enable the SPLIT_FILES=yes flag in the preprocessor to auto-generate split files.
+
+### 3. Main Parallel Processing
+Set the following variables at the top of processor/1-main-parallel-script.py:
+* INPUT_DIR
+* OUTPUT_DIR
+* FINAL_OUTPUT_FILE
+
+Ensure .env contains valid GEMINI tokens.
+Run:
+```
+python processor/1-main-parallel-script.py
+```
+Key functions:
+* main β processes a single input file
+* process_file_parallel β wrapper for thread pool execution
+* calculate_relevance_tag β maps responses to relevance tags
+
+### 4. Merge Processed Outputs
+After processing completes:
+```
+python processor/2-merge-processed-csv.py
+```
+Update INPUT_DIR in the script if necessary.
+
+### 5. URL Validation and Data Cleaning
+
+Clean invalid rows and extract URLs:
+```
+python processor/3-invalid-url-&-custom-task-remover.py
+```
+
+Validate URLs:
+```
+python processor/4-url-validator.py
+```
+
+Adjust constants (URLS_FILE, MAX_WORKERS, TIMEOUT) as needed.
+
+### 6. Validate Input vs Output Consistency
+Compare preprocessed vs merged outputs:
+```
+python processor/validate-input-output-csv.py
+```
+
+## π§© Notes & Tips
+
+* If youβre not using the generative API, you can comment out or stub those sections in processor/1-main-parallel-script.py.
+* The list of URLs to validate is located in processor/url.txt.
+* The webpage/ directory contains dashboards for visualizing CSV outputs.
diff --git a/evidence-analysis-process/input/input.csv b/evidence-analysis-process/input/input.csv
new file mode 100644
index 0000000..e69de29
diff --git a/evidence-analysis-process/output/output.csv b/evidence-analysis-process/output/output.csv
new file mode 100644
index 0000000..e69de29
diff --git a/evidence-analysis-process/pre-processor/1-pre-processor.py b/evidence-analysis-process/pre-processor/1-pre-processor.py
new file mode 100644
index 0000000..ffff04e
--- /dev/null
+++ b/evidence-analysis-process/pre-processor/1-pre-processor.py
@@ -0,0 +1,493 @@
+import os
+import csv
+import math
+from urllib.parse import urlparse
+from tqdm import tqdm # Import tqdm for the progress bar
+
+# === Configuration ===
+INPUT_CSV = "/Users/user/Documents/AI/parallel-process/input/017F35E575D87A3FB5ED3D90A3E69355_20250904.csv"
+QUESTION_CSV = "/Users/user/Documents/AI/parallel-process/input/aug_sample_questions.csv"
+FILTER_CSV = "/Users/user/Documents/AI/parallel-process/input/school_list.csv"
+OUTPUT_DIR = "output-pre-processor"
+
+# === SPLIT CONFIGURATION ===
+SPLIT_FILES = "yes" # Set to "yes" to split into multiple files, "no" for single file
+ROWS_PER_FILE = 10000 # Only used if SPLIT_FILES = "yes"
+
+# === IMAGE FORMATS ===
+IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"}
+
+# Create output directory
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# Counters for skipped rows
+skip_task_start = 0
+skip_evidence_null = 0
+skip_school_mismatch = 0
+skip_non_image = 0
+total_input_rows = 0 # This will be set correctly below
+
+# === Step 1: Load FILTER_CSV school codes into a set ===
+valid_school_codes = set()
+with open(FILTER_CSV, newline='', encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+ for row in reader:
+ school_code = row.get("UDISE+ SCHOOL CODE", "").strip()
+ if school_code:
+ valid_school_codes.add(school_code)
+
+# === Helper function for cleaning cell values ===
+def clean_cell(value):
+ """Strips whitespace AND common quote characters from the ends."""
+ if not isinstance(value, str):
+ return ""
+ # Strip whitespace, then strip both single and double quotes
+ return value.strip().strip("'\"")
+
+# === Helper function to check if URL is an image ===
+def is_image_url(url):
+ """Check if URL points to an image file"""
+ url = clean_cell(url) # Clean the URL string first for *checking*
+ if not url or url.lower() == "null":
+ return False
+ try:
+ parsed = urlparse(url)
+ path = parsed.path.lower()
+ return any(path.endswith(ext) for ext in IMAGE_FORMATS)
+ except:
+ return False
+
+# === Step 2: Load QUESTION_CSV into dictionary (TASK NAME β Refined Question) ===
+lookup_dict = {}
+with open(QUESTION_CSV, newline='', encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+ for row in reader:
+ task_name = clean_cell(row.get("TASK NAME", ""))
+ refined_question = row.get("Refined questions using tool and webpage", "").strip()
+ if task_name: # only add valid rows
+ lookup_dict[task_name] = refined_question
+
+# === District Renaming Map ===
+DISTRICT_REPLACEMENTS = {
+ "W Champaran": "West Champaran",
+ "E. Champaran": "East Champaran",
+ "Kaimur (Bhabua)": "Kaimur",
+ "Aurangabad (Bihar)": "Aurangabad"
+}
+
+# === Step 3: Load INPUT_CSV and filter ===
+
+# --- NEW: Load all rows into a list first to get the *correct* count ---
+print(f"Loading data from {INPUT_CSV}...")
+all_rows = []
+try:
+ with open(INPUT_CSV, newline='', encoding="utf-8") as infile:
+ reader = csv.DictReader(infile)
+ all_rows = list(reader)
+ total_input_rows = len(all_rows) # This is the CORRECT row count
+ header = reader.fieldnames
+except FileNotFoundError:
+ print(f"Error: INPUT_CSV '{INPUT_CSV}' not found.")
+ exit()
+except Exception as e:
+ print(f"Error reading {INPUT_CSV}: {e}")
+ exit()
+
+if header is None:
+ print("Error: CSV Header is empty. Cannot proceed.")
+ exit()
+print(f"Loaded {total_input_rows} data rows to process.")
+# --- END NEW ---
+
+
+filtered_rows = []
+
+# Add new columns
+new_columns = [
+ "Task Evidence Question",
+ "Task evidence Q and A",
+ "Task evidence Q and A Reason",
+ "Relevance Tag",
+ "Image Preview"
+]
+
+final_header = list(header)
+for col in new_columns:
+ if col not in final_header:
+ final_header.append(col)
+
+# --- NEW: Iterate over the list 'all_rows' instead of the 'reader' object ---
+for row in tqdm(all_rows, total=total_input_rows, desc="Processing input CSV"):
+
+ school_id = row.get("School ID", "").strip()
+ task = clean_cell(row.get("Tasks", "")) # Clean task for lookup
+ evidence = row.get("Task Evidence", "") # Get raw evidence
+
+ # Rule 0: Skip if School ID not in FILTER_CSV
+ if school_id not in valid_school_codes:
+ skip_school_mismatch += 1
+ continue
+
+ # Rule 1: Skip if task starts with 1 or 8
+ if task.startswith("1") or task.startswith("8"):
+ skip_task_start += 1
+ continue
+
+ # Rule 2: Skip if evidence is empty or "null" (after cleaning for check)
+ cleaned_evidence = clean_cell(evidence)
+ if cleaned_evidence == "" or cleaned_evidence.lower() == "null":
+ skip_evidence_null += 1
+ continue
+
+ # Rule 3: Skip if evidence URL is not an image format
+ if not is_image_url(evidence): # Send the raw evidence to be checked
+ skip_non_image += 1
+ continue
+
+ # === Step 4: Fill additional columns & Clean District ===
+ row["Task Evidence Question"] = lookup_dict.get(task, "Null")
+ row["Task evidence Q and A"] = ""
+ row["Task evidence Q and A Reason"] = ""
+ row["Relevance Tag"] = ""
+ row["Image Preview"] = ""
+
+ # Apply District replacement
+ current_district = row.get("District", "")
+ row["District"] = DISTRICT_REPLACEMENTS.get(current_district, current_district)
+
+ # Row passes all checks
+ filtered_rows.append([row.get(h, "") for h in final_header])
+
+# === Step 5: Output - Single file or Multiple files based on configuration ===
+if SPLIT_FILES.lower() == "no":
+ # Single file output
+ output_file = os.path.join(OUTPUT_DIR, "preprocessed_data.csv")
+ with open(output_file, "w", newline='', encoding="utf-8") as outfile:
+ writer = csv.writer(outfile)
+ writer.writerow(final_header)
+ writer.writerows(filtered_rows)
+
+ print(f"β
Created: {output_file} ({len(filtered_rows)} rows)")
+ print(f"Mode: Single file output")
+
+else:
+ # Split into multiple files
+ total_files = math.ceil(len(filtered_rows) / ROWS_PER_FILE)
+
+ for i in range(total_files):
+ start_index = i * ROWS_PER_FILE
+ end_index = start_index + ROWS_PER_FILE
+ chunk = filtered_rows[start_index:end_index]
+
+ output_file = os.path.join(OUTPUT_DIR, f"split_{i+1}.csv")
+ with open(output_file, "w", newline='', encoding="utf-8") as outfile:
+ writer = csv.writer(outfile)
+ writer.writerow(final_header)
+ writer.writerows(chunk)
+
+ print(f"β
Created: {output_file} ({len(chunk)} rows)")
+
+ print(f"Mode: Split into {total_files} files ({ROWS_PER_FILE} rows per file)")
+
+# === Summary ===
+print(f"\n{'='*70}")
+print(f"{'PREPROCESSING SUMMARY':^70}")
+print(f"{'='*70}")
+# --- This 'total_input_rows' variable is now CORRECT ---
+print(f"\nTotal CSV rows: {total_input_rows}")
+print(f"\n{'Filter Stage':<50} {'Removed':<10} {'Remaining'}")
+print(f"{'-'*70}")
+
+remaining_after_school = total_input_rows - skip_school_mismatch
+print(f"{'School ID not in filter list':<50} {skip_school_mismatch:<10} {remaining_after_school}")
+
+remaining_after_task = remaining_after_school - skip_task_start
+print(f"{'Task starts with 1 or 8':<50} {skip_task_start:<10} {remaining_after_task}")
+
+remaining_after_evidence = remaining_after_task - skip_evidence_null
+print(f"{'Task Evidence empty or null':<50} {skip_evidence_null:<10} {remaining_after_evidence}")
+
+remaining_after_non_image = remaining_after_evidence - skip_non_image
+print(f"{'Task Evidence is not an image (video/other)':<50} {skip_non_image:<10} {remaining_after_non_image}")
+
+print(f"\n{'='*70}")
+print(f"Final output CSV rows: {len(filtered_rows)}")
+print(f"{'='*70}")
+
+# === Script Checkpoints Section ===
+print(f"\n{'='*70}")
+print(f"{'SCRIPT CHECKPOINTS':^70}")
+print(f"{'='*70}")
+print("This script performed the following actions:")
+
+print("\n--- 1. PRE-LOADING ---")
+print(f"β
Loaded valid school codes from '{FILTER_CSV}'")
+print(f"β
Loaded task/question map from '{QUESTION_CSV}'")
+print("β
Defined district name replacements (e.g., 'W. Champaran' -> 'West Champaran')")
+
+print("\n--- 2. MAIN PROCESSING (Row-by-Row) ---")
+print(f"β
Loaded all {total_input_rows} rows from '{INPUT_CSV}' (This is the correct count).")
+print("β
Iterated through all rows with a progress bar.")
+print("\n For EACH row, the following filters were applied (in order):")
+print(" β‘οΈ 1. SKIPPED if 'School ID' was not in the valid school list.")
+print(" β‘οΈ 2. SKIPPED if 'Tasks' value (after cleaning) started with '1' or '8'.")
+print(" β‘οΈ 3. SKIPPED if 'Task Evidence' (after cleaning) was empty or 'null'.")
+print(" β‘οΈ 4. SKIPPED if 'Task Evidence' URL was not an image (e.g., .mp4, .pdf).")
+
+print("\n For EACH row that PASSED all filters:")
+print(" β‘οΈ Cleaned and matched 'Tasks' to populate 'Task Evidence Question'.")
+print(" β‘οΈ Cleaned 'District' names (e.g., 'Kaimur (Bhabua)' -> 'Kaimur').")
+print(" β‘οΈ Set the 'Image Preview' column to be empty.")
+print(" β‘οΈ Kept the original 'Task Evidence' value.")
+print(" β‘οΈ Added row to the final output list.")
+
+print("\n--- 3. FINAL OUTPUT ---")
+print(f"β
Wrote {len(filtered_rows)} passed rows to the final CSV file.")
+print("β
Printed the final summary report with skip/remaining counts.")
+print(f"{'='*70}")
+
+
+
+
+
+# import os
+# import csv
+# import math
+# from urllib.parse import urlparse
+# from tqdm import tqdm
+
+# # === Configuration ===
+# INPUT_CSV = "/Users/user/Documents/AI/parallel-process/input/017F35E575D87A3FB5ED3D90A3E69355_20250904.csv"
+# QUESTION_CSV = "/Users/user/Documents/AI/parallel-process/input/aug_sample_questions.csv"
+# FILTER_CSV = "/Users/user/Documents/AI/parallel-process/input/school_list.csv"
+# OUTPUT_DIR = "pre_split_csvs"
+
+# # === SPLIT CONFIGURATION ===
+# SPLIT_FILES = "no" # Set to "yes" to split into multiple files, "no" for single file
+# ROWS_PER_FILE = 10000 # Only used if SPLIT_FILES = "yes"
+
+# # === IMAGE FORMATS ===
+# IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"}
+
+# # Create output directory
+# os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# # Counters for skipped rows
+# skip_task_start = 0
+# skip_evidence_null = 0
+# skip_school_mismatch = 0
+# skip_non_image = 0
+
+# # Get total row count for progress bar
+# print(f"Calculating total rows in {INPUT_CSV}...")
+# try:
+# with open(INPUT_CSV, 'r', encoding="utf-8") as f:
+# # -1 to exclude the header row
+# total_input_rows = sum(1 for _ in f) - 1
+# except FileNotFoundError:
+# print(f"Error: INPUT_CSV '{INPUT_CSV}' not found.")
+# exit()
+# except Exception as e:
+# print(f"Error reading {INPUT_CSV}: {e}")
+# exit()
+# print(f"Found {total_input_rows} data rows to process.")
+
+# # === Step 1: Load FILTER_CSV school codes into a set ===
+# valid_school_codes = set()
+# with open(FILTER_CSV, newline='', encoding="utf-8") as f:
+# reader = csv.DictReader(f)
+# for row in reader:
+# school_code = row.get("UDISE+ SCHOOL CODE", "").strip()
+# if school_code:
+# valid_school_codes.add(school_code)
+
+# # === Helper function for cleaning cell values ===
+# def clean_cell(value):
+# """Strips whitespace AND common quote characters from the ends."""
+# if not isinstance(value, str):
+# return ""
+# # Strip whitespace, then strip both single and double quotes
+# return value.strip().strip("'\"")
+
+# # === Helper function to check if URL is an image ===
+# def is_image_url(url):
+# """Check if URL points to an image file"""
+# url = clean_cell(url) # Clean the URL string first for *checking*
+# if not url or url.lower() == "null":
+# return False
+# try:
+# parsed = urlparse(url)
+# path = parsed.path.lower()
+# return any(path.endswith(ext) for ext in IMAGE_FORMATS)
+# except:
+# return False
+
+# # === Step 2: Load QUESTION_CSV into dictionary (TASK NAME β Refined Question) ===
+# lookup_dict = {}
+# with open(QUESTION_CSV, newline='', encoding="utf-8") as f:
+# reader = csv.DictReader(f)
+# for row in reader:
+# task_name = clean_cell(row.get("TASK NAME", ""))
+# refined_question = row.get("Refined questions using tool and webpage", "").strip()
+# if task_name: # only add valid rows
+# lookup_dict[task_name] = refined_question
+
+# # === District Renaming Map ===
+# DISTRICT_REPLACEMENTS = {
+# "W. Champaran": "West Champaran",
+# "E. Champaran": "East Champaran",
+# "Kaimur (Bhabua)": "Kaimur",
+# "Aurangabad (Bihar)": "Aurangabad"
+# }
+
+# # === Step 3: Load INPUT_CSV and filter ===
+# filtered_rows = []
+# with open(INPUT_CSV, newline='', encoding="utf-8") as infile:
+# reader = csv.DictReader(infile)
+
+# header = reader.fieldnames
+# if header is None:
+# print("Error: CSV Header is empty. Cannot proceed.")
+# exit()
+
+# # Add new columns
+# new_columns = [
+# "Task Evidence Question",
+# "Task evidence Q and A",
+# "Task evidence Q and A Reason",
+# "Relevance Tag",
+# "Image Preview"
+# ]
+
+# final_header = list(header)
+# for col in new_columns:
+# if col not in final_header:
+# final_header.append(col)
+
+# # Wrap the reader with tqdm for the progress bar
+# for row in tqdm(reader, total=total_input_rows, desc="Processing input CSV"):
+
+# school_id = row.get("School ID", "").strip()
+# task = clean_cell(row.get("Tasks", "")) # Clean task for lookup
+# evidence = row.get("Task Evidence", "") # Get raw evidence
+
+# # Rule 0: Skip if School ID not in FILTER_CSV
+# if school_id not in valid_school_codes:
+# skip_school_mismatch += 1
+# continue
+
+# # Rule 1: Skip if task starts with 1 or 8
+# if task.startswith("1") or task.startswith("8"):
+# skip_task_start += 1
+# continue
+
+# # Rule 2: Skip if evidence is empty or "null" (after cleaning for check)
+# cleaned_evidence = clean_cell(evidence)
+# if cleaned_evidence == "" or cleaned_evidence.lower() == "null":
+# skip_evidence_null += 1
+# continue
+
+# # Rule 3: Skip if evidence URL is not an image format
+# if not is_image_url(evidence): # Send the raw evidence to be checked
+# skip_non_image += 1
+# continue
+
+# # === Step 4: Fill additional columns & Clean District ===
+# row["Task Evidence Question"] = lookup_dict.get(task, "Null")
+# row["Task evidence Q and A"] = ""
+# row["Task evidence Q and A Reason"] = ""
+# row["Relevance Tag"] = ""
+# row["Image Preview"] = ""
+
+# # --- NOTE: The "Task Evidence" column is NO longer overwritten ---
+
+# # Apply District replacement
+# current_district = row.get("District", "")
+# row["District"] = DISTRICT_REPLACEMENTS.get(current_district, current_district)
+
+# # Row passes all checks
+# filtered_rows.append([row.get(h, "") for h in final_header])
+
+# # === Step 5: Output - Single file or Multiple files based on configuration ===
+# if SPLIT_FILES.lower() == "no":
+# # Single file output
+# output_file = os.path.join(OUTPUT_DIR, "preprocessed_data.csv")
+# with open(output_file, "w", newline='', encoding="utf-8") as outfile:
+# writer = csv.writer(outfile)
+# writer.writerow(final_header)
+# writer.writerows(filtered_rows)
+
+# print(f"β
Created: {output_file} ({len(filtered_rows)} rows)")
+# print(f"Mode: Single file output")
+
+# else:
+# # Split into multiple files
+# total_files = math.ceil(len(filtered_rows) / ROWS_PER_FILE)
+
+# for i in range(total_files):
+# start_index = i * ROWS_PER_FILE
+# end_index = start_index + ROWS_PER_FILE
+# chunk = filtered_rows[start_index:end_index]
+
+# output_file = os.path.join(OUTPUT_DIR, f"split_{i+1}.csv")
+# with open(output_file, "w", newline='', encoding="utf-8") as outfile:
+# writer = csv.writer(outfile)
+# writer.writerow(final_header)
+# writer.writerows(chunk)
+
+# print(f"β
Created: {output_file} ({len(chunk)} rows)")
+
+# print(f"Mode: Split into {total_files} files ({ROWS_PER_FILE} rows per file)")
+
+# # === Summary ===
+# print(f"\n{'='*70}")
+# print(f"{'PREPROCESSING SUMMARY':^70}")
+# print(f"{'='*70}")
+# print(f"\nTotal CSV rows: {total_input_rows}")
+# print(f"\n{'Filter Stage':<50} {'Removed':<10} {'Remaining'}")
+# print(f"{'-'*70}")
+
+# remaining_after_school = total_input_rows - skip_school_mismatch
+# print(f"{'School ID not in filter list':<50} {skip_school_mismatch:<10} {remaining_after_school}")
+
+# remaining_after_task = remaining_after_school - skip_task_start
+# print(f"{'Task starts with 1 or 8':<50} {skip_task_start:<10} {remaining_after_task}")
+
+# remaining_after_evidence = remaining_after_task - skip_evidence_null
+# print(f"{'Task Evidence empty or null':<50} {skip_evidence_null:<10} {remaining_after_evidence}")
+
+# remaining_after_non_image = remaining_after_evidence - skip_non_image
+# print(f"{'Task Evidence is not an image (video/other)':<50} {skip_non_image:<10} {remaining_after_non_image}")
+
+# print(f"\n{'='*70}")
+# print(f"Final output CSV rows: {len(filtered_rows)}")
+# print(f"{'='*70}")
+
+# # === Script Checkpoints Section ===
+# print(f"\n{'='*70}")
+# print(f"{'SCRIPT CHECKPOINTS':^70}")
+# print(f"{'='*70}")
+# print("This script performed the following actions:")
+
+# print("\n--- 1. PRE-LOADING ---")
+# print(f"β
Loaded valid school codes from '{FILTER_CSV}'")
+# print(f"β
Loaded task/question map from '{QUESTION_CSV}'")
+# print("β
Defined district name replacements (e.g., 'W. Champaran' -> 'West Champaran')")
+
+# print("\n--- 2. MAIN PROCESSING (Row-by-Row) ---")
+# print(f"β
Iterated through all {total_input_rows} rows in '{INPUT_CSV}' with a progress bar.")
+# print("\n For EACH row, the following filters were applied (in order):")
+# print(" β‘οΈ 1. SKIPPED if 'School ID' was not in the valid school list.")
+# print(" β‘οΈ 2. SKIPPED if 'Tasks' value (after cleaning) started with '1' or '8'.")
+# print(" β‘οΈ 3. SKIPPED if 'Task Evidence' (after cleaning) was empty or 'null'.")
+# print(" β‘οΈ 4. SKIPPED if 'Task Evidence' URL was not an image (e.g., .mp4, .pdf).")
+
+# print("\n For EACH row that PASSED all filters:")
+# print(" β‘οΈ Cleaned and matched 'Tasks' to populate 'Task Evidence Question'.")
+# print(" β‘οΈ Cleaned 'District' names (e.g., 'Kaimur (Bhabua)' -> 'Kaimur').")
+# print(" β‘οΈ Added some extra rows to the final output list.")
+
+# print("\n--- 3. FINAL OUTPUT ---")
+# print(f"β
Wrote {len(filtered_rows)} passed rows to the final CSV file.")
+# print("β
Printed the final summary report with skip/remaining counts.")
+# print(f"{'='*70}")
\ No newline at end of file
diff --git a/evidence-analysis-process/pre-processor/2-csv-splitter.py b/evidence-analysis-process/pre-processor/2-csv-splitter.py
new file mode 100644
index 0000000..79f0bdb
--- /dev/null
+++ b/evidence-analysis-process/pre-processor/2-csv-splitter.py
@@ -0,0 +1,37 @@
+import csv
+import os
+from math import ceil
+
+# Configuration
+INPUT_CSV = "/Users/user/Documents/AI/parallel-process/output-pre-processor/split_1.csv"
+OUTPUT_DIR = "parallel_input_split_1_files"
+PARTS = 20
+
+# Create output directory if it doesn't exist
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+print("Splitting CSV using Python script...")
+
+# Count total data rows (excluding header)
+with open(INPUT_CSV, newline='', encoding='utf-8') as f:
+ reader = csv.reader(f)
+ header = next(reader)
+ rows = list(reader)
+
+total_rows = len(rows)
+rows_per_file = ceil(total_rows / PARTS)
+
+# Split the CSV into parts
+for i in range(PARTS):
+ start = i * rows_per_file
+ end = start + rows_per_file
+ output_file = os.path.join(OUTPUT_DIR, f"{i+1}.csv")
+
+ with open(output_file, "w", newline='', encoding='utf-8') as f_out:
+ writer = csv.writer(f_out)
+ writer.writerow(header)
+ writer.writerows(rows[start:end])
+
+ print(f"Created {output_file} with {len(rows[start:end])} rows.")
+
+print(f"Total rows: {total_rows}. Split into {PARTS} parts.")
\ No newline at end of file
diff --git a/evidence-analysis-process/processor/1-main-parallel-script.py b/evidence-analysis-process/processor/1-main-parallel-script.py
new file mode 100644
index 0000000..b9870ff
--- /dev/null
+++ b/evidence-analysis-process/processor/1-main-parallel-script.py
@@ -0,0 +1,1082 @@
+import os
+import concurrent.futures
+import pandas as pd
+from openpyxl import load_workbook
+from openpyxl.styles import Alignment
+import json
+import google.generativeai as genai
+import httpx
+import base64
+import typing_extensions as typing
+import time
+import mimetypes
+from urllib.request import urlopen
+import re
+import logging
+import csv
+from dotenv import load_dotenv
+load_dotenv()
+import threading
+import time
+from collections import deque
+
+# === Constants ===
+IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"}
+MAX_PROCESSED_ROWS = 520
+# OUTPUT_FILE = "processed_output.csv" # Not used
+INPUT_DIR = "parallel_input_split_1_files"
+OUTPUT_DIR = "parallel_output_split_1_files"
+FINAL_OUTPUT_FILE = os.path.join(OUTPUT_DIR, "merged_output.csv")
+
+# Create output directory if it doesn't exist
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s [%(levelname)s] [%(threadName)s] %(message)s",
+ handlers=[
+ logging.FileHandler(f"{OUTPUT_DIR}/processing.log"),
+ logging.StreamHandler()
+ ]
+)
+logger = logging.getLogger(__name__)
+
+# β
Add this below logger setup
+def validate_csv(input_file, logger):
+ bad_rows = []
+ with open(input_file, newline='') as f:
+ reader = csv.reader(f)
+ for i, row in enumerate(reader):
+ try:
+ pass # You can add your row-level validation here
+ except Exception as e:
+ logger.warning(f"Malformed row at line {i+1}: {e}")
+ bad_rows.append(i+1)
+ return bad_rows
+
+def get_gemini_tokens_from_env():
+ tokens = []
+ for key in os.environ:
+ if key.startswith("GEMINI_TOKEN"):
+ tokens.append(os.environ[key])
+ if not tokens:
+ logging.error("[Gemini] No Gemini tokens found in environment variables!")
+ return tokens
+
+GEMINI_TOKENS = get_gemini_tokens_from_env()
+
+current_token_index = 0
+
+def get_next_gemini_token():
+ global current_token_index
+ if current_token_index < len(GEMINI_TOKENS):
+ token = GEMINI_TOKENS[current_token_index]
+ logging.info(f"[Gemini] Using token: -----") #{token}
+ return token
+ return None
+
+def switch_to_next_token():
+ global current_token_index
+ current_token_index += 1
+ if current_token_index >= len(GEMINI_TOKENS):
+ logging.error("[Gemini] All tokens exhausted!")
+ return None
+ token = get_next_gemini_token()
+ if token:
+ genai.configure(api_key=token)
+ global model
+ model = genai.GenerativeModel(
+ model_name="gemini-2.0-flash",
+ generation_config={
+ "response_mime_type": "application/json",
+ "response_schema": AnalysisResponse,
+ },
+ )
+ return token
+ return None
+
+# === Gemini Model Setup ===
+class AnalysisResponse(typing.TypedDict):
+ answers: list[str]
+ reasonings: list[str]
+
+initial_token = get_next_gemini_token()
+if not initial_token:
+ raise ValueError("[Gemini] No valid Gemini tokens found!")
+
+genai.configure(api_key=initial_token)
+model = genai.GenerativeModel(
+ model_name="gemini-2.0-flash",
+ generation_config={
+ "response_mime_type": "application/json",
+ "response_schema": AnalysisResponse,
+ },
+)
+
+# === Utility functions ===
+def calculate_relevance_tag(answers):
+ if not answers or not isinstance(answers, list):
+ return 'Irrelevant'
+ yes_count = sum(1 for answer in answers if str(answer).upper() == 'YES')
+ total_count = len(answers)
+ percentage = (yes_count / total_count) * 100 if total_count > 0 else 0
+ if percentage >= 50:
+ return 'Relevant'
+ elif percentage > 0:
+ return 'Partially Relevant'
+ else:
+ return 'Irrelevant'
+
+def adjust_excel_formatting(output_file):
+ # This function is for .xlsx, but the script now saves .csv
+ # It won't be called by the current logic but is harmless to keep.
+ try:
+ wb = load_workbook(output_file)
+ ws = wb.active
+ for row in ws.iter_rows():
+ for cell in row:
+ cell.alignment = Alignment(wrap_text=True, vertical="top", horizontal="left")
+ for col in ws.columns:
+ max_length = 0
+ col_letter = col[0].column_letter
+ for cell in col:
+ try:
+ if cell.value:
+ max_length = max(max_length, len(str(cell.value)))
+ except:
+ pass
+ ws.column_dimensions[col_letter].width = max_length + 2
+ wb.save(output_file)
+ except Exception as e:
+ logging.warning(f"Could not apply Excel formatting to {output_file}: {e}")
+
+
+def get_image_as_base64(url: str) -> str:
+ with urlopen(url) as response:
+ image_data = response.read()
+ mime_type, _ = mimetypes.guess_type(url)
+ if not mime_type:
+ mime_type = "image/jpeg"
+ base64_data = base64.b64encode(image_data).decode("utf-8")
+ return f"data:{mime_type};base64,{base64_data}"
+
+# Track timestamps of recent requests
+_request_times = deque()
+_request_lock = threading.Lock()
+MAX_REQUESTS_PER_MINUTE = 2000
+
+def rate_limiter():
+ """Block until we are under the 2000 req/min limit."""
+ global _request_times
+ with _request_lock:
+ now = time.time()
+ # Remove requests older than 60 seconds
+ while _request_times and now - _request_times[0] > 60:
+ _request_times.popleft()
+
+ if len(_request_times) >= MAX_REQUESTS_PER_MINUTE:
+ sleep_time = 60 - (now - _request_times[0])
+ if sleep_time > 0:
+ logging.info(f"[RateLimiter] Throttling for {sleep_time:.2f} seconds to stay under 2000 req/min...")
+ time.sleep(sleep_time)
+ return rate_limiter() # Recheck after sleep
+
+ _request_times.append(time.time())
+
+
+def process_image(task_evidence_link, task_evidence_question, max_retries=3):
+ global current_token_index
+ retries = 0
+ while retries < max_retries:
+ try:
+ rate_limiter()
+ image = httpx.get(task_evidence_link)
+ prompt = f"""You are an educational evidence validator. Analyse the given image... {task_evidence_question}"""
+ response = model.generate_content([
+ {"mime_type": "image/jpeg", "data": base64.b64encode(image.content).decode("utf-8")},
+ prompt,
+ ])
+ response_json = json.loads(response.text)
+ return response_json
+ except Exception as e:
+ error_str = str(e).lower()
+ if any(k in error_str for k in ["rate limit", "quota", "429", "resource_exhausted"]):
+ logging.warning("[Gemini] Rate limit or quota exceeded. Switching token...")
+ if switch_to_next_token():
+ continue
+ else:
+ logging.warning("[Gemini] No more tokens. Retrying in 60 seconds...")
+ time.sleep(60)
+ retries += 1
+ else:
+ logging.error(f"[Gemini] Error: {e}")
+ retries += 1
+ logging.error("[Gemini] Max retries reached.")
+ return {"error": "Max retries reached"}
+
+
+# === Main processing ===
+def main(input_file, worker_id=None):
+ # β
--- Stats variables ---
+ api_calls = 0
+ api_successes = 0
+ api_failures = 0
+ success_list = []
+ failed_list = []
+ # --- End stats ---
+
+ try:
+ logging.info(f"[Worker {worker_id}] Starting processing for {input_file}")
+
+ if not os.path.exists(input_file):
+ logging.error(f"[Worker {worker_id}] File not found: {input_file}")
+ return None
+
+ df = pd.read_excel(input_file) if input_file.endswith(".xlsx") else pd.read_csv(input_file)
+ df_filtered = df[
+ ~df["Task Evidence"].isin([None, "Null"])
+ & ~df["Task Evidence Question"].isin([None, "Null"])
+ ].dropna(subset=["Task Evidence", "Task Evidence Question"])
+
+ processed_count = 0
+ task_evidence_qa = []
+ task_evidence_qa_reason = []
+ relevance_tags = []
+
+ for idx, row in df_filtered.iterrows():
+ task_evidence = str(row["Task Evidence"]).strip()
+ task_question = str(row["Task Evidence Question"]).strip()
+
+ if any(task_evidence.lower().endswith(ext) for ext in IMAGE_FORMATS):
+ logging.info(f"[Worker {worker_id}] Processing image row {idx+1}/{len(df_filtered)}")
+
+ api_calls += 1 # β
Track API call attempt
+ response = process_image(task_evidence, task_question)
+
+ if isinstance(response, dict) and "answers" in response and "reasonings" in response:
+ answers = response["answers"]
+ reasonings = response["reasonings"]
+ task_evidence_qa.append(answers)
+ task_evidence_qa_reason.append(reasonings)
+ relevance_tags.append(calculate_relevance_tag(answers))
+
+ api_successes += 1 # β
Track success
+ success_list.append(task_evidence) # β
Add to success list
+ else:
+ logging.warning(f"[Worker {worker_id}] Invalid response at row {idx+1}")
+ task_evidence_qa.append(None)
+ task_evidence_qa_reason.append(None)
+ relevance_tags.append('Irrelevant')
+
+ api_failures += 1 # β
Track failure
+ failed_list.append(task_evidence) # β
Add to failed list
+ else:
+ logging.info(f"[Worker {worker_id}] Skipping non-image row {idx+1}")
+ task_evidence_qa.append(None)
+ task_evidence_qa_reason.append(None)
+ relevance_tags.append('Irrelevant')
+
+ processed_count += 1
+ if processed_count >= MAX_PROCESSED_ROWS:
+ logging.info(f"[Worker {worker_id}] Reached max processed rows ({MAX_PROCESSED_ROWS})")
+ break
+
+ df_filtered = df_filtered.head(processed_count)
+ df_filtered["Task evidence Q and A"] = task_evidence_qa
+ df_filtered["Task evidence Q and A Reason"] = task_evidence_qa_reason
+ df_filtered["Relevance Tag"] = relevance_tags
+
+ df_filtered["Image Preview"] = df_filtered["Task Evidence"].apply(
+ lambda x: str(x) if str(x).lower().endswith(tuple(IMAGE_FORMATS)) else ""
+ )
+
+ output_filename = os.path.join(OUTPUT_DIR, f"processed_{os.path.basename(input_file).split('.')[0]}.csv")
+ df_filtered.to_csv(output_filename, index=False)
+
+ logging.info(f"[Worker {worker_id}] Finished processing {input_file}. Output: {output_filename}")
+
+ # β
Return the dictionary of stats
+ return {
+ "output_file": output_filename,
+ "rows_attempted": processed_count,
+ "api_calls": api_calls,
+ "api_successes": api_successes,
+ "api_failures": api_failures,
+ "success_list": success_list,
+ "failed_list": failed_list
+ }
+
+ except Exception as e:
+ logging.exception(f"[Worker {worker_id}] Failed to process {input_file}: {e}")
+ return None
+
+
+def process_file_parallel(file_path, worker_id):
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
+ output_stats = main(file_path, worker_id) # β
Get stats dictionary
+ if output_stats:
+ logging.info(f"[Worker {worker_id}] Output saved as {output_stats['output_file']}")
+ else:
+ logging.warning(f"[Worker {worker_id}] Processing failed for {file_path}")
+ return output_stats # β
Return the entire stats dictionary (or None)
+
+# === Entry point ===
+if __name__ == "__main__":
+ input_files = [
+ os.path.join(INPUT_DIR, file)
+ for file in os.listdir(INPUT_DIR)
+ if file.endswith((".xlsx", ".csv"))
+ ]
+
+ logging.info(f"[Main] Found {len(input_files)} input files to process.")
+
+ # β
--- Global Stats Aggregators ---
+ total_rows_processed_all = 0
+ total_api_calls_all = 0
+ total_api_success_all = 0
+ total_api_failure_all = 0
+ all_success_lists = []
+ all_failed_lists = []
+ processed_files = [] # List of successful output file paths
+ failed_files = [] # List of input files that failed to process
+ # --- End Aggregators ---
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=len(input_files)) as executor:
+ futures = {
+ executor.submit(process_file_parallel, f, idx + 1): f
+ for idx, f in enumerate(input_files)
+ }
+ for future in concurrent.futures.as_completed(futures):
+ original_file = futures[future]
+ result_stats = future.result()
+
+ if result_stats: # β
Check if processing was successful
+ processed_files.append(result_stats["output_file"])
+ total_rows_processed_all += result_stats["rows_attempted"]
+ total_api_calls_all += result_stats["api_calls"]
+ total_api_success_all += result_stats["api_successes"]
+ total_api_failure_all += result_stats["api_failures"]
+ all_success_lists.extend(result_stats["success_list"])
+ all_failed_lists.extend(result_stats["failed_list"])
+ logging.info(f"[Main] Worker finished processing: {original_file}")
+ else:
+ logging.warning(f"[Main] File {original_file} failed to process.")
+ failed_files.append(original_file)
+
+ if not processed_files:
+ logging.error("[Main] No files processed successfully. Exiting.")
+ # β
Still log the summary even if exiting
+ else:
+ try:
+ logging.info(f"[Main] Merging {len(processed_files)} files into {FINAL_OUTPUT_FILE}")
+ merged_df = pd.concat([pd.read_csv(f) for f in processed_files], ignore_index=True)
+ merged_df.to_csv(FINAL_OUTPUT_FILE, index=False)
+ logging.info(f"β
All files processed and merged into: {FINAL_OUTPUT_FILE}")
+ except Exception as e:
+ logging.exception(f"[Main] Error during merging: {e}")
+
+ # β
--- Log the Final Summary ---
+ try:
+ logging.info("="*80)
+ logging.info("===== π PROCESSING RUN SUMMARY =====")
+ logging.info("="*80)
+
+ logging.info(f"Total Rows Processed (sum of attempts): {total_rows_processed_all}")
+ logging.info(f"Total API Calls (image rows attempted): {total_api_calls_all}")
+ logging.info(f" - β
Success: {total_api_success_all}")
+ logging.info(f" - β Failed: {total_api_failure_all}")
+
+ logging.info("")
+ logging.info(f"Total Input Files Processed Successfully: {len(processed_files)}")
+ logging.info(f"Total Input Files Failed to Process: {len(failed_files)}")
+ if failed_files:
+ logging.warning("Failed Input Files:")
+ for f in failed_files:
+ logging.warning(f" - {f}")
+
+ logging.info("")
+ if all_failed_lists:
+ logging.warning(f"List of Failed API Calls ({len(all_failed_lists)}):")
+ for item in all_failed_lists:
+ logging.warning(f" - {item}")
+ else:
+ logging.info("β
No API call failures recorded.")
+
+ if all_success_lists:
+ logging.info(f"List of Successful API Calls ({len(all_success_lists)}):")
+ for item in all_success_lists:
+ logging.info(f" - {item}")
+ else:
+ logging.info("No API call successes recorded.")
+
+ logging.info("="*80)
+ logging.info("===== π END OF SUMMARY =====")
+ logging.info("="*80 + "\n")
+ except Exception as e:
+ logging.exception(f"[Main] Failed to write summary to log: {e}")
+
+
+
+
+
+# import os
+# import concurrent.futures
+# import pandas as pd
+# from openpyxl import load_workbook
+# from openpyxl.styles import Alignment
+# import json
+# import google.generativeai as genai
+# import httpx
+# import base64
+# import typing_extensions as typing
+# import time
+# import mimetypes
+# from urllib.request import urlopen
+# import re
+# import logging
+# import csv
+# from dotenv import load_dotenv
+# load_dotenv()
+# import threading
+# import time
+# from collections import deque
+
+# # === Constants ===
+# IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"}
+# MAX_PROCESSED_ROWS = 1
+# # OUTPUT_FILE = "processed_output.csv"
+# INPUT_DIR = "parallel_input_split_files"
+# OUTPUT_DIR = "parallel_output_split_files"
+# FINAL_OUTPUT_FILE = os.path.join(OUTPUT_DIR, "merged_output.csv")
+
+# # Create output directory if it doesn't exist
+# os.makedirs(OUTPUT_DIR, exist_ok=True)
+
+# # Configure logging
+# logging.basicConfig(
+# level=logging.INFO,
+# format="%(asctime)s [%(levelname)s] [%(threadName)s] %(message)s",
+# handlers=[
+# logging.FileHandler("parallel_output_split_files/processing.log"),
+# logging.StreamHandler()
+# ]
+# )
+# logger = logging.getLogger(__name__)
+
+# # β
Add this below logger setup
+# def validate_csv(input_file, logger):
+# bad_rows = []
+# with open(input_file, newline='') as f:
+# reader = csv.reader(f)
+# for i, row in enumerate(reader):
+# try:
+# pass # You can add your row-level validation here
+# except Exception as e:
+# logger.warning(f"Malformed row at line {i+1}: {e}")
+# bad_rows.append(i+1)
+# return bad_rows
+
+# def get_gemini_tokens_from_env():
+# tokens = []
+# for key in os.environ:
+# if key.startswith("GEMINI_TOKEN"):
+# tokens.append(os.environ[key])
+# if not tokens:
+# logging.error("[Gemini] No Gemini tokens found in environment variables!")
+# return tokens
+
+# GEMINI_TOKENS = get_gemini_tokens_from_env()
+
+# current_token_index = 0
+
+# def get_next_gemini_token():
+# global current_token_index
+# if current_token_index < len(GEMINI_TOKENS):
+# token = GEMINI_TOKENS[current_token_index]
+# logging.info(f"[Gemini] Using token: -----") #{token}
+# return token
+# return None
+
+# def switch_to_next_token():
+# global current_token_index
+# current_token_index += 1
+# if current_token_index >= len(GEMINI_TOKENS):
+# logging.error("[Gemini] All tokens exhausted!")
+# return None
+# token = get_next_gemini_token()
+# if token:
+# genai.configure(api_key=token)
+# global model
+# model = genai.GenerativeModel(
+# model_name="gemini-2.0-flash",
+# generation_config={
+# "response_mime_type": "application/json",
+# "response_schema": AnalysisResponse,
+# },
+# )
+# return token
+# return None
+
+# # === Gemini Model Setup ===
+# class AnalysisResponse(typing.TypedDict):
+# answers: list[str]
+# reasonings: list[str]
+
+# initial_token = get_next_gemini_token()
+# if not initial_token:
+# raise ValueError("[Gemini] No valid Gemini tokens found!")
+
+# genai.configure(api_key=initial_token)
+# model = genai.GenerativeModel(
+# model_name="gemini-2.0-flash",
+# generation_config={
+# "response_mime_type": "application/json",
+# "response_schema": AnalysisResponse,
+# },
+# )
+
+# # === Utility functions ===
+# def calculate_relevance_tag(answers):
+# if not answers or not isinstance(answers, list):
+# return 'Irrelevant'
+# yes_count = sum(1 for answer in answers if str(answer).upper() == 'YES')
+# total_count = len(answers)
+# percentage = (yes_count / total_count) * 100 if total_count > 0 else 0
+# if percentage >= 50:
+# return 'Relevant'
+# elif percentage > 0:
+# return 'Partially Relevant'
+# else:
+# return 'Irrelevant'
+
+# def adjust_excel_formatting(output_file):
+# wb = load_workbook(output_file)
+# ws = wb.active
+# for row in ws.iter_rows():
+# for cell in row:
+# cell.alignment = Alignment(wrap_text=True, vertical="top", horizontal="left")
+# for col in ws.columns:
+# max_length = 0
+# col_letter = col[0].column_letter
+# for cell in col:
+# try:
+# if cell.value:
+# max_length = max(max_length, len(str(cell.value)))
+# except:
+# pass
+# ws.column_dimensions[col_letter].width = max_length + 2
+# wb.save(output_file)
+
+# def get_image_as_base64(url: str) -> str:
+# with urlopen(url) as response:
+# image_data = response.read()
+# mime_type, _ = mimetypes.guess_type(url)
+# if not mime_type:
+# mime_type = "image/jpeg"
+# base64_data = base64.b64encode(image_data).decode("utf-8")
+# return f"data:{mime_type};base64,{base64_data}"
+
+# # Track timestamps of recent requests
+# _request_times = deque()
+# _request_lock = threading.Lock()
+# MAX_REQUESTS_PER_MINUTE = 2000
+
+# def rate_limiter():
+# """Block until we are under the 2000 req/min limit."""
+# global _request_times
+# with _request_lock:
+# now = time.time()
+# # Remove requests older than 60 seconds
+# while _request_times and now - _request_times[0] > 60:
+# _request_times.popleft()
+
+# if len(_request_times) >= MAX_REQUESTS_PER_MINUTE:
+# sleep_time = 60 - (now - _request_times[0])
+# if sleep_time > 0:
+# logging.info(f"[RateLimiter] Throttling for {sleep_time:.2f} seconds to stay under 2000 req/min...")
+# time.sleep(sleep_time)
+# return rate_limiter() # Recheck after sleep
+
+# _request_times.append(time.time())
+
+
+# def process_image(task_evidence_link, task_evidence_question, max_retries=3):
+# global current_token_index
+# retries = 0
+# while retries < max_retries:
+# try:
+# rate_limiter()
+# image = httpx.get(task_evidence_link)
+# prompt = f"""You are an educational evidence validator. Analyse the given image... {task_evidence_question}"""
+# response = model.generate_content([
+# {"mime_type": "image/jpeg", "data": base64.b64encode(image.content).decode("utf-8")},
+# prompt,
+# ])
+# response_json = json.loads(response.text)
+# return response_json
+# except Exception as e:
+# error_str = str(e).lower()
+# if any(k in error_str for k in ["rate limit", "quota", "429", "resource_exhausted"]):
+# logging.warning("[Gemini] Rate limit or quota exceeded. Switching token...")
+# if switch_to_next_token():
+# continue
+# else:
+# logging.warning("[Gemini] No more tokens. Retrying in 60 seconds...")
+# time.sleep(60)
+# retries += 1
+# else:
+# logging.error(f"[Gemini] Error: {e}")
+# retries += 1
+# logging.error("[Gemini] Max retries reached.")
+# return {"error": "Max retries reached"}
+
+
+# # === Main processing ===
+# def main(input_file, worker_id=None):
+# try:
+# logging.info(f"[Worker {worker_id}] Starting processing for {input_file}")
+
+# if not os.path.exists(input_file):
+# logging.error(f"[Worker {worker_id}] File not found: {input_file}")
+# return None
+
+# df = pd.read_excel(input_file) if input_file.endswith(".xlsx") else pd.read_csv(input_file)
+# df_filtered = df[
+# ~df["Task Evidence"].isin([None, "Null"])
+# & ~df["Task Evidence Question"].isin([None, "Null"])
+# ].dropna(subset=["Task Evidence", "Task Evidence Question"])
+
+# processed_count = 0
+# task_evidence_qa = []
+# task_evidence_qa_reason = []
+# relevance_tags = []
+
+# for idx, row in df_filtered.iterrows():
+# task_evidence = str(row["Task Evidence"]).strip()
+# task_question = str(row["Task Evidence Question"]).strip()
+
+# if any(task_evidence.lower().endswith(ext) for ext in IMAGE_FORMATS):
+# logging.info(f"[Worker {worker_id}] Processing image row {idx+1}/{len(df_filtered)}")
+# response = process_image(task_evidence, task_question)
+# if isinstance(response, dict) and "answers" in response and "reasonings" in response:
+# answers = response["answers"]
+# reasonings = response["reasonings"]
+# task_evidence_qa.append(answers)
+# task_evidence_qa_reason.append(reasonings)
+# relevance_tags.append(calculate_relevance_tag(answers))
+# else:
+# logging.warning(f"[Worker {worker_id}] Invalid response at row {idx+1}")
+# task_evidence_qa.append(None)
+# task_evidence_qa_reason.append(None)
+# relevance_tags.append('Irrelevant')
+# else:
+# logging.info(f"[Worker {worker_id}] Skipping non-image row {idx+1}")
+# task_evidence_qa.append(None)
+# task_evidence_qa_reason.append(None)
+# relevance_tags.append('Irrelevant')
+
+# processed_count += 1
+# if processed_count >= MAX_PROCESSED_ROWS:
+# logging.info(f"[Worker {worker_id}] Reached max processed rows ({MAX_PROCESSED_ROWS})")
+# break
+
+# df_filtered = df_filtered.head(processed_count)
+# df_filtered["Task evidence Q and A"] = task_evidence_qa
+# df_filtered["Task evidence Q and A Reason"] = task_evidence_qa_reason
+# df_filtered["Relevance Tag"] = relevance_tags
+
+# # β
Remove IMAGE() formula for CSV - it's Excel-specific
+# df_filtered["Image Preview"] = df_filtered["Task Evidence"].apply(
+# lambda x: str(x) if str(x).lower().endswith(tuple(IMAGE_FORMATS)) else ""
+# )
+
+# # β
Changed to save as CSV instead of XLSX
+# output_filename = os.path.join(OUTPUT_DIR, f"processed_{os.path.basename(input_file).split('.')[0]}.csv")
+# df_filtered.to_csv(output_filename, index=False)
+
+# logging.info(f"[Worker {worker_id}] Finished processing {input_file}. Output: {output_filename}")
+
+# return output_filename # β
Return the output file path
+
+# except Exception as e:
+# logging.exception(f"[Worker {worker_id}] Failed to process {input_file}: {e}")
+# return None
+
+
+# def process_file_parallel(file_path, worker_id):
+# os.makedirs(OUTPUT_DIR, exist_ok=True)
+# output_filename = main(file_path, worker_id) # β
Get return from main
+# if output_filename:
+# logging.info(f"[Worker {worker_id}] Output saved as {output_filename}")
+# else:
+# logging.warning(f"[Worker {worker_id}] Processing failed for {file_path}")
+# return output_filename # β
Always return, even if None
+
+# # === Entry point ===
+# if __name__ == "__main__":
+# input_files = [
+# os.path.join(INPUT_DIR, file)
+# for file in os.listdir(INPUT_DIR)
+# if file.endswith((".xlsx", ".csv"))
+# ]
+
+# logging.info(f"[Main] Found {len(input_files)} input files to process.")
+
+# processed_files = []
+# with concurrent.futures.ThreadPoolExecutor(max_workers=len(input_files)) as executor:
+# futures = {
+# executor.submit(process_file_parallel, f, idx + 1): f
+# for idx, f in enumerate(input_files)
+# }
+# for future in concurrent.futures.as_completed(futures):
+# result_file = future.result()
+# if result_file: # β
Skip failed results
+# processed_files.append(result_file)
+# logging.info(f"[Main] Worker finished: {result_file}")
+# else:
+# logging.warning(f"[Main] A file failed to process.")
+
+# if not processed_files:
+# logging.error("[Main] No files processed successfully. Exiting.")
+# exit(1)
+
+# try:
+# logging.info(f"[Main] Merging {len(processed_files)} files into {FINAL_OUTPUT_FILE}")
+# # β
Changed to read CSV files instead of Excel files
+# merged_df = pd.concat([pd.read_csv(f) for f in processed_files], ignore_index=True)
+# # β
Changed to save merged output as CSV
+# merged_df.to_csv(FINAL_OUTPUT_FILE, index=False)
+# logging.info(f"β
All files processed and merged into: {FINAL_OUTPUT_FILE}")
+# except Exception as e:
+# logging.exception(f"[Main] Error during merging: {e}")
+
+
+
+# import os
+# import concurrent.futures
+# import pandas as pd
+# from openpyxl import load_workbook
+# from openpyxl.styles import Alignment
+# import json
+# import google.generativeai as genai
+# import httpx
+# import base64
+# import typing_extensions as typing
+# import time
+# import mimetypes
+# from urllib.request import urlopen
+# import re
+# import logging
+# import csv
+# from dotenv import load_dotenv
+# load_dotenv()
+# import threading
+# import time
+# from collections import deque
+
+# # Configure logging
+# logging.basicConfig(
+# level=logging.INFO,
+# format="%(asctime)s [%(levelname)s] [%(threadName)s] %(message)s",
+# handlers=[
+# logging.FileHandler("parallel_output_split_files/processing.log"),
+# logging.StreamHandler()
+# ]
+# )
+# logger = logging.getLogger(__name__)
+
+# # β
Add this below logger setup
+# def validate_csv(input_file, logger):
+# bad_rows = []
+# with open(input_file, newline='') as f:
+# reader = csv.reader(f)
+# for i, row in enumerate(reader):
+# try:
+# pass # You can add your row-level validation here
+# except Exception as e:
+# logger.warning(f"Malformed row at line {i+1}: {e}")
+# bad_rows.append(i+1)
+# return bad_rows
+
+# # === Constants ===
+# IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"}
+# MAX_PROCESSED_ROWS = 1
+# OUTPUT_FILE = "processed_output.csv"
+# INPUT_DIR = "parallel_input_split_files"
+# OUTPUT_DIR = "parallel_output_split_files"
+# FINAL_OUTPUT_FILE = os.path.join(OUTPUT_DIR, "merged_output.csv")
+
+# def get_gemini_tokens_from_env():
+# tokens = []
+# for key in os.environ:
+# if key.startswith("GEMINI_TOKEN"):
+# tokens.append(os.environ[key])
+# if not tokens:
+# logging.error("[Gemini] No Gemini tokens found in environment variables!")
+# return tokens
+
+# GEMINI_TOKENS = get_gemini_tokens_from_env()
+
+# current_token_index = 0
+
+# def get_next_gemini_token():
+# global current_token_index
+# if current_token_index < len(GEMINI_TOKENS):
+# token = GEMINI_TOKENS[current_token_index]
+# logging.info(f"[Gemini] Using token: -----") #{token}
+# return token
+# return None
+
+# def switch_to_next_token():
+# global current_token_index
+# current_token_index += 1
+# if current_token_index >= len(GEMINI_TOKENS):
+# logging.error("[Gemini] All tokens exhausted!")
+# return None
+# token = get_next_gemini_token()
+# if token:
+# genai.configure(api_key=token)
+# global model
+# model = genai.GenerativeModel(
+# model_name="gemini-2.0-flash",
+# generation_config={
+# "response_mime_type": "application/json",
+# "response_schema": AnalysisResponse,
+# },
+# )
+# return token
+# return None
+
+# # === Gemini Model Setup ===
+# class AnalysisResponse(typing.TypedDict):
+# answers: list[str]
+# reasonings: list[str]
+
+# initial_token = get_next_gemini_token()
+# if not initial_token:
+# raise ValueError("[Gemini] No valid Gemini tokens found!")
+
+# genai.configure(api_key=initial_token)
+# model = genai.GenerativeModel(
+# model_name="gemini-2.0-flash",
+# generation_config={
+# "response_mime_type": "application/json",
+# "response_schema": AnalysisResponse,
+# },
+# )
+
+# # === Utility functions ===
+# def calculate_relevance_tag(answers):
+# if not answers or not isinstance(answers, list):
+# return 'Irrelevant'
+# yes_count = sum(1 for answer in answers if str(answer).upper() == 'YES')
+# total_count = len(answers)
+# percentage = (yes_count / total_count) * 100 if total_count > 0 else 0
+# if percentage >= 50:
+# return 'Relevant'
+# elif percentage > 0:
+# return 'Partially Relevant'
+# else:
+# return 'Irrelevant'
+
+# def adjust_excel_formatting(output_file):
+# wb = load_workbook(output_file)
+# ws = wb.active
+# for row in ws.iter_rows():
+# for cell in row:
+# cell.alignment = Alignment(wrap_text=True, vertical="top", horizontal="left")
+# for col in ws.columns:
+# max_length = 0
+# col_letter = col[0].column_letter
+# for cell in col:
+# try:
+# if cell.value:
+# max_length = max(max_length, len(str(cell.value)))
+# except:
+# pass
+# ws.column_dimensions[col_letter].width = max_length + 2
+# wb.save(output_file)
+
+# def get_image_as_base64(url: str) -> str:
+# with urlopen(url) as response:
+# image_data = response.read()
+# mime_type, _ = mimetypes.guess_type(url)
+# if not mime_type:
+# mime_type = "image/jpeg"
+# base64_data = base64.b64encode(image_data).decode("utf-8")
+# return f"data:{mime_type};base64,{base64_data}"
+
+# # Track timestamps of recent requests
+# _request_times = deque()
+# _request_lock = threading.Lock()
+# MAX_REQUESTS_PER_MINUTE = 2000
+
+# def rate_limiter():
+# """Block until we are under the 2000 req/min limit."""
+# global _request_times
+# with _request_lock:
+# now = time.time()
+# # Remove requests older than 60 seconds
+# while _request_times and now - _request_times[0] > 60:
+# _request_times.popleft()
+
+# if len(_request_times) >= MAX_REQUESTS_PER_MINUTE:
+# sleep_time = 60 - (now - _request_times[0])
+# if sleep_time > 0:
+# logging.info(f"[RateLimiter] Throttling for {sleep_time:.2f} seconds to stay under 2000 req/min...")
+# time.sleep(sleep_time)
+# return rate_limiter() # Recheck after sleep
+
+# _request_times.append(time.time())
+
+
+# def process_image(task_evidence_link, task_evidence_question, max_retries=3):
+# global current_token_index
+# retries = 0
+# while retries < max_retries:
+# try:
+# rate_limiter()
+# image = httpx.get(task_evidence_link)
+# prompt = f"""You are an educational evidence validator. Analyse the given image... {task_evidence_question}"""
+# response = model.generate_content([
+# {"mime_type": "image/jpeg", "data": base64.b64encode(image.content).decode("utf-8")},
+# prompt,
+# ])
+# response_json = json.loads(response.text)
+# return response_json
+# except Exception as e:
+# error_str = str(e).lower()
+# if any(k in error_str for k in ["rate limit", "quota", "429", "resource_exhausted"]):
+# logging.warning("[Gemini] Rate limit or quota exceeded. Switching token...")
+# if switch_to_next_token():
+# continue
+# else:
+# logging.warning("[Gemini] No more tokens. Retrying in 60 seconds...")
+# time.sleep(60)
+# retries += 1
+# else:
+# logging.error(f"[Gemini] Error: {e}")
+# retries += 1
+# logging.error("[Gemini] Max retries reached.")
+# return {"error": "Max retries reached"}
+
+
+# # === Main processing ===
+# def main(input_file, worker_id=None):
+# try:
+# logging.info(f"[Worker {worker_id}] Starting processing for {input_file}")
+
+# if not os.path.exists(input_file):
+# logging.error(f"[Worker {worker_id}] File not found: {input_file}")
+# return None
+
+# df = pd.read_excel(input_file) if input_file.endswith(".xlsx") else pd.read_csv(input_file)
+# df_filtered = df[
+# ~df["Task Evidence"].isin([None, "Null"])
+# & ~df["Task Evidence Question"].isin([None, "Null"])
+# ].dropna(subset=["Task Evidence", "Task Evidence Question"])
+
+# processed_count = 0
+# task_evidence_qa = []
+# task_evidence_qa_reason = []
+# relevance_tags = []
+
+# for idx, row in df_filtered.iterrows():
+# task_evidence = str(row["Task Evidence"]).strip()
+# task_question = str(row["Task Evidence Question"]).strip()
+
+# if any(task_evidence.lower().endswith(ext) for ext in IMAGE_FORMATS):
+# logging.info(f"[Worker {worker_id}] Processing image row {idx+1}/{len(df_filtered)}")
+# response = process_image(task_evidence, task_question)
+# if isinstance(response, dict) and "answers" in response and "reasonings" in response:
+# answers = response["answers"]
+# reasonings = response["reasonings"]
+# task_evidence_qa.append(answers)
+# task_evidence_qa_reason.append(reasonings)
+# relevance_tags.append(calculate_relevance_tag(answers))
+# else:
+# logging.warning(f"[Worker {worker_id}] Invalid response at row {idx+1}")
+# task_evidence_qa.append(None)
+# task_evidence_qa_reason.append(None)
+# relevance_tags.append('Irrelevant')
+# else:
+# logging.info(f"[Worker {worker_id}] Skipping non-image row {idx+1}")
+# task_evidence_qa.append(None)
+# task_evidence_qa_reason.append(None)
+# relevance_tags.append('Irrelevant')
+
+# processed_count += 1
+# if processed_count >= MAX_PROCESSED_ROWS:
+# logging.info(f"[Worker {worker_id}] Reached max processed rows ({MAX_PROCESSED_ROWS})")
+# break
+
+# df_filtered = df_filtered.head(processed_count)
+# df_filtered["Task evidence Q and A"] = task_evidence_qa
+# df_filtered["Task evidence Q and A Reason"] = task_evidence_qa_reason
+# df_filtered["Relevance Tag"] = relevance_tags
+
+# # β
Remove IMAGE() formula for CSV - it's Excel-specific
+# df_filtered["Image Preview"] = df_filtered["Task Evidence"].apply(
+# lambda x: str(x) if str(x).lower().endswith(tuple(IMAGE_FORMATS)) else ""
+# )
+
+# # β
Changed to save as CSV instead of XLSX
+# output_filename = os.path.join(OUTPUT_DIR, f"processed_{os.path.basename(input_file).split('.')[0]}.csv")
+# df_filtered.to_csv(output_filename, index=False)
+
+# logging.info(f"[Worker {worker_id}] Finished processing {input_file}. Output: {output_filename}")
+
+# return output_filename # β
Return the output file path
+
+# except Exception as e:
+# logging.exception(f"[Worker {worker_id}] Failed to process {input_file}: {e}")
+# return None
+
+
+# def process_file_parallel(file_path, worker_id):
+# os.makedirs(OUTPUT_DIR, exist_ok=True)
+# output_filename = main(file_path, worker_id) # β
Get return from main
+# if output_filename:
+# logging.info(f"[Worker {worker_id}] Output saved as {output_filename}")
+# else:
+# logging.warning(f"[Worker {worker_id}] Processing failed for {file_path}")
+# return output_filename # β
Always return, even if None
+
+# # === Entry point ===
+# if __name__ == "__main__":
+# input_files = [
+# os.path.join(INPUT_DIR, file)
+# for file in os.listdir(INPUT_DIR)
+# if file.endswith((".xlsx", ".csv"))
+# ]
+
+# logging.info(f"[Main] Found {len(input_files)} input files to process.")
+
+# processed_files = []
+# with concurrent.futures.ThreadPoolExecutor(max_workers=len(input_files)) as executor:
+# futures = {
+# executor.submit(process_file_parallel, f, idx + 1): f
+# for idx, f in enumerate(input_files)
+# }
+# for future in concurrent.futures.as_completed(futures):
+# result_file = future.result()
+# if result_file: # β
Skip failed results
+# processed_files.append(result_file)
+# logging.info(f"[Main] Worker finished: {result_file}")
+# else:
+# logging.warning(f"[Main] A file failed to process.")
+
+# if not processed_files:
+# logging.error("[Main] No files processed successfully. Exiting.")
+# exit(1)
+
+# try:
+# logging.info(f"[Main] Merging {len(processed_files)} files into {FINAL_OUTPUT_FILE}")
+# # β
Changed to read CSV files instead of Excel files
+# merged_df = pd.concat([pd.read_csv(f) for f in processed_files], ignore_index=True)
+# # β
Changed to save merged output as CSV
+# merged_df.to_csv(FINAL_OUTPUT_FILE, index=False)
+# logging.info(f"β
All files processed and merged into: {FINAL_OUTPUT_FILE}")
+# except Exception as e:
+# logging.exception(f"[Main] Error during merging: {e}")
\ No newline at end of file
diff --git a/evidence-analysis-process/processor/2-merge-processed-csv.py b/evidence-analysis-process/processor/2-merge-processed-csv.py
new file mode 100644
index 0000000..32663c3
--- /dev/null
+++ b/evidence-analysis-process/processor/2-merge-processed-csv.py
@@ -0,0 +1,131 @@
+import os
+import pandas as pd
+from pathlib import Path
+
+# ==== CONFIG ====
+INPUT_DIR = "/Users/user/Documents/AI/parallel-process/output/MAIN-SPLITS" # Folder containing CSV files to merge
+OUTPUT_FILE = "merged_output.csv" # Name of the merged output file
+SORT_FILES = True # Set to True to sort files before merging (useful for numbered files)
+
+# ==== DUPLICATE HANDLING ====
+# Options: "keep_all", "remove_duplicates", "remove_duplicates_keep_first", "remove_duplicates_keep_last"
+DUPLICATE_HANDLING = "keep_all" # Change this to control how duplicates are handled
+
+# ==== STEP 1: Find all CSV files ====
+print("π Searching for CSV files...")
+csv_files = [
+ os.path.join(INPUT_DIR, f)
+ for f in os.listdir(INPUT_DIR)
+ if f.endswith('.csv')
+]
+
+if not csv_files:
+ print(f"β No CSV files found in '{INPUT_DIR}'")
+ exit(1)
+
+# Sort files if enabled (useful for files like 1.csv, 2.csv, etc.)
+if SORT_FILES:
+ csv_files.sort(key=lambda x: int(''.join(filter(str.isdigit, os.path.basename(x)))) if any(c.isdigit() for c in os.path.basename(x)) else os.path.basename(x))
+
+print(f"β
Found {len(csv_files)} CSV files")
+
+# ==== STEP 2: Read and merge CSVs ====
+print("\nπ Reading CSV files...")
+dataframes = []
+file_stats = []
+
+for idx, csv_file in enumerate(csv_files, 1):
+ try:
+ df = pd.read_csv(csv_file)
+ rows = len(df)
+ dataframes.append(df)
+ file_stats.append({
+ 'file': os.path.basename(csv_file),
+ 'rows': rows,
+ 'columns': len(df.columns)
+ })
+ print(f" [{idx}/{len(csv_files)}] {os.path.basename(csv_file)}: {rows} rows, {len(df.columns)} columns")
+ except Exception as e:
+ print(f" β οΈ Error reading {csv_file}: {e}")
+
+if not dataframes:
+ print("β No CSV files could be read successfully")
+ exit(1)
+
+# ==== STEP 3: Merge all dataframes ====
+print("\nπ Merging CSV files...")
+merged_df = pd.concat(dataframes, ignore_index=True)
+
+# ==== STEP 4: Handle Duplicates ====
+original_row_count = len(merged_df)
+duplicates_before = merged_df.duplicated().sum()
+
+if DUPLICATE_HANDLING == "remove_duplicates":
+ merged_df = merged_df.drop_duplicates(keep=False)
+ print(f" βΉοΈ Removed ALL duplicate rows (both original and copies)")
+elif DUPLICATE_HANDLING == "remove_duplicates_keep_first":
+ merged_df = merged_df.drop_duplicates(keep='first')
+ print(f" βΉοΈ Removed duplicate rows (kept first occurrence)")
+elif DUPLICATE_HANDLING == "remove_duplicates_keep_last":
+ merged_df = merged_df.drop_duplicates(keep='last')
+ print(f" βΉοΈ Removed duplicate rows (kept last occurrence)")
+else: # keep_all
+ print(f" βΉοΈ Keeping all rows including duplicates")
+
+rows_removed = original_row_count - len(merged_df)
+
+# ==== STEP 5: Save merged CSV ====
+output_path = os.path.join(INPUT_DIR, OUTPUT_FILE)
+merged_df.to_csv(output_path, index=False)
+output_size = os.path.getsize(output_path) / 1024 # Size in KB
+
+print(f"πΎ Merged CSV saved to: {output_path}")
+
+# ==== STEP 6: Generate Merge Report ====
+print("\n" + "="*80)
+print(" MERGE REPORT ".center(80, "="))
+print("="*80)
+
+print(f"\nπ INPUT:")
+print(f" β’ Source directory: {INPUT_DIR}")
+print(f" β’ Files merged: {len(dataframes)}")
+
+print(f"\nπ FILE DETAILS:")
+for stat in file_stats:
+ print(f" β’ {stat['file']}: {stat['rows']} rows, {stat['columns']} columns")
+
+print(f"\nπ MERGED DATA:")
+print(f" β’ Total rows before dedup: {original_row_count}")
+print(f" β’ Duplicate rows found: {duplicates_before}")
+print(f" β’ Rows removed: {rows_removed}")
+print(f" β’ Final row count: {len(merged_df)}")
+print(f" β’ Total columns: {len(merged_df.columns)}")
+print(f" β’ File size: {output_size:.2f} KB")
+
+print(f"\nπ§ DUPLICATE HANDLING:")
+print(f" β’ Method: {DUPLICATE_HANDLING}")
+if duplicates_before > 0:
+ if DUPLICATE_HANDLING == "keep_all":
+ print(f" β’ β
All {duplicates_before} duplicate rows were kept in output")
+ else:
+ print(f" β’ β
{rows_removed} duplicate rows were removed")
+else:
+ print(f" β’ β
No duplicate rows detected")
+
+# Show column names
+print(f"\nπ COLUMNS ({len(merged_df.columns)}):")
+if len(merged_df.columns) <= 10:
+ for col in merged_df.columns:
+ print(f" β’ {col}")
+else:
+ for col in list(merged_df.columns[:5]):
+ print(f" β’ {col}")
+ print(f" ... and {len(merged_df.columns) - 5} more columns")
+
+print(f"\nπΎ OUTPUT:")
+print(f" β’ File: {output_path}")
+print(f" β’ Size: {output_size:.2f} KB")
+
+print("\n" + "="*80)
+print(" MERGE COMPLETED SUCCESSFULLY ".center(80, "="))
+print("="*80 + "\n")
\ No newline at end of file
diff --git a/evidence-analysis-process/processor/3-invalid-url-&-custom-task-remover.py b/evidence-analysis-process/processor/3-invalid-url-&-custom-task-remover.py
new file mode 100644
index 0000000..a9f2a47
--- /dev/null
+++ b/evidence-analysis-process/processor/3-invalid-url-&-custom-task-remover.py
@@ -0,0 +1,132 @@
+import os
+import pandas as pd
+import re
+from pathlib import Path
+
+# ==== CONFIG ====
+INPUT_CSV = "/Users/user/Documents/AI/output/TEST/merged_output.csv"
+OUTPUT_DIR = "/Users/user/Documents/AI/output/TEST"
+OUTPUT_FILE = "final_output.csv"
+
+# Column names to check for null/empty values
+CHECK_COLUMNS = ["Task evidence Q and A", "Task evidence Q and A Reason"]
+URL_COLUMN = "Task Evidence" # Column to extract URLs from
+
+# ==== STEP 1: Create output directory ====
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+print(f"π Output directory: {OUTPUT_DIR}\n")
+
+# ==== STEP 2: Read CSV ====
+print(f"π Reading CSV: {INPUT_CSV}")
+try:
+ df = pd.read_csv(INPUT_CSV)
+ print(f"β
Loaded {len(df)} rows, {len(df.columns)} columns\n")
+except Exception as e:
+ print(f"β Error reading CSV: {e}")
+ exit(1)
+
+# ==== STEP 3: Verify columns exist ====
+missing_cols = []
+for col in CHECK_COLUMNS + [URL_COLUMN]:
+ if col not in df.columns:
+ missing_cols.append(col)
+
+if missing_cols:
+ print(f"β Missing columns in CSV: {', '.join(missing_cols)}")
+ print(f"\nπ Available columns:")
+ for col in df.columns:
+ print(f" β’ {col}")
+ exit(1)
+
+# ==== STEP 4: Identify rows with null/empty values ====
+print(f"π Checking for null/empty values in:")
+print(f" β’ {CHECK_COLUMNS[0]}")
+print(f" β’ {CHECK_COLUMNS[1]}\n")
+
+# Create mask for rows with null or empty values in either column
+mask = df[CHECK_COLUMNS[0]].isna() | df[CHECK_COLUMNS[1]].isna() | \
+ (df[CHECK_COLUMNS[0]].astype(str).str.strip() == '') | \
+ (df[CHECK_COLUMNS[1]].astype(str).str.strip() == '')
+
+rows_with_nulls = df[mask].copy()
+rows_to_keep = df[~mask].copy()
+
+null_count = len(rows_with_nulls)
+print(f"π Found {null_count} rows with null/empty values")
+print(f"β
{len(rows_to_keep)} rows are valid and will be kept\n")
+
+# ==== STEP 5: Extract URLs from null rows ====
+extracted_urls = []
+
+if null_count > 0:
+ print(f"π Extracting URLs from '{URL_COLUMN}' column:\n")
+ print("=" * 80)
+
+ for idx, row in rows_with_nulls.iterrows():
+ url_value = row[URL_COLUMN]
+
+ # Extract URL using regex (handles various formats)
+ if pd.notna(url_value):
+ url_str = str(url_value)
+ # Find URLs in the text
+ url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
+ found_urls = re.findall(url_pattern, url_str)
+
+ if found_urls:
+ for url in found_urls:
+ extracted_urls.append(url)
+ print(f" β’ {url}")
+ else:
+ # If no URL pattern found, print the raw value
+ extracted_urls.append(url_str)
+ print(f" β’ {url_str}")
+ else:
+ print(f" β’ [No URL - Cell is empty]")
+
+ print("=" * 80)
+ print(f"\nπ Total URLs extracted: {len(extracted_urls)}\n")
+else:
+ print("β
No null/empty rows found - nothing to extract\n")
+
+# ==== STEP 6: Save cleaned CSV ====
+output_path = os.path.join(OUTPUT_DIR, OUTPUT_FILE)
+rows_to_keep.to_csv(output_path, index=False)
+output_size = os.path.getsize(output_path) / 1024
+
+print(f"πΎ Cleaned CSV saved to: {output_path}")
+print(f" β’ Size: {output_size:.2f} KB\n")
+
+# ==== STEP 7: Generate Report ====
+print("=" * 80)
+print(" CLEANING REPORT ".center(80, "="))
+print("=" * 80)
+
+print(f"\nπ INPUT:")
+print(f" β’ File: {INPUT_CSV}")
+print(f" β’ Original rows: {len(df)}")
+
+print(f"\nπ CHECKED COLUMNS:")
+print(f" β’ {CHECK_COLUMNS[0]}")
+print(f" β’ {CHECK_COLUMNS[1]}")
+
+print(f"\nπ RESULTS:")
+print(f" β’ Rows with null/empty values: {null_count}")
+print(f" β’ Rows removed: {null_count}")
+print(f" β’ Rows kept: {len(rows_to_keep)}")
+print(f" β’ URLs extracted: {len(extracted_urls)}")
+
+print(f"\nπΎ OUTPUT:")
+print(f" β’ File: {output_path}")
+print(f" β’ Final row count: {len(rows_to_keep)}")
+print(f" β’ Size: {output_size:.2f} KB")
+
+if len(extracted_urls) > 0:
+ print(f"\nπ EXTRACTED URLS ({len(extracted_urls)}):")
+ print("=" * 80)
+ for i, url in enumerate(extracted_urls, 1):
+ print(f"{i}. {url}")
+ print("=" * 80)
+
+print("\n" + "=" * 80)
+print(" CLEANING COMPLETED SUCCESSFULLY ".center(80, "="))
+print("=" * 80 + "\n")
\ No newline at end of file
diff --git a/evidence-analysis-process/processor/4-url-validator.py b/evidence-analysis-process/processor/4-url-validator.py
new file mode 100644
index 0000000..7c41665
--- /dev/null
+++ b/evidence-analysis-process/processor/4-url-validator.py
@@ -0,0 +1,268 @@
+import requests
+import pandas as pd
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime
+import time
+import re
+
+# ==== CONFIG ====
+URLS_FILE = "/Users/user/Documents/AI/parallel-process/processor/url.txt" # File containing URLs (one per line)
+OUTPUT_FILE = "url_validation_report.csv"
+MAX_WORKERS = 30 # Number of concurrent threads
+TIMEOUT = 15 # Timeout in seconds for each request
+RETRY_ATTEMPTS = 2 # Number of retry attempts for failed URLs
+
+# ==== STEP 1: Load and Clean URLs ====
+def load_urls_from_file(filename):
+ """Load URLs from a text file and clean them"""
+ try:
+ with open(filename, 'r', encoding='utf-8') as f:
+ content = f.read()
+
+ # Extract URLs using regex pattern
+ url_pattern = r'https?://[^\s<>"{}|\\^`\[\]\n]+'
+ urls = re.findall(url_pattern, content)
+
+ # Clean URLs - remove any trailing characters
+ cleaned_urls = []
+ for url in urls:
+ # Remove trailing punctuation that might not be part of URL
+ url = url.rstrip('.,;:)')
+ # Remove bullet points or numbering
+ url = re.sub(r'^[\d]+\.\s*', '', url)
+ url = re.sub(r'^[β’\-]\s*', '', url)
+ if url:
+ cleaned_urls.append(url.strip())
+
+ # Remove duplicates while preserving order
+ seen = set()
+ unique_urls = []
+ for url in cleaned_urls:
+ if url not in seen:
+ seen.add(url)
+ unique_urls.append(url)
+
+ return unique_urls
+ except FileNotFoundError:
+ print(f"β οΈ File '{filename}' not found.")
+ return []
+ except Exception as e:
+ print(f"β οΈ Error reading file: {e}")
+ return []
+
+# ==== STEP 2: Validate Single URL ====
+def validate_url(url, retry_count=0):
+ """
+ Validate a single URL and return status information
+ """
+ result = {
+ 'url': url,
+ 'status': 'Unknown',
+ 'status_code': None,
+ 'response_time': None,
+ 'content_type': None,
+ 'content_length': None,
+ 'error': None,
+ 'final_url': None
+ }
+
+ try:
+ start_time = time.time()
+ # Use HEAD request first for efficiency
+ response = requests.head(url, timeout=TIMEOUT, allow_redirects=True)
+ response_time = time.time() - start_time
+
+ result['status_code'] = response.status_code
+ result['response_time'] = round(response_time, 2)
+ result['content_type'] = response.headers.get('Content-Type', 'Unknown')
+ result['content_length'] = response.headers.get('Content-Length', 'Unknown')
+ result['final_url'] = response.url if response.url != url else None
+
+ if response.status_code == 200:
+ result['status'] = 'Valid'
+ elif response.status_code == 404:
+ result['status'] = 'Not Found'
+ elif response.status_code == 403:
+ result['status'] = 'Forbidden'
+ elif response.status_code >= 500:
+ result['status'] = 'Server Error'
+ elif response.status_code >= 400:
+ result['status'] = 'Client Error'
+ elif response.status_code >= 300:
+ result['status'] = 'Redirect'
+ else:
+ result['status'] = 'Other'
+
+ except requests.exceptions.Timeout:
+ result['status'] = 'Timeout'
+ result['error'] = f'Request timeout after {TIMEOUT}s'
+ except requests.exceptions.SSLError as e:
+ result['status'] = 'SSL Error'
+ result['error'] = str(e)[:150]
+ except requests.exceptions.ConnectionError as e:
+ result['status'] = 'Connection Error'
+ result['error'] = str(e)[:150]
+ except requests.exceptions.TooManyRedirects:
+ result['status'] = 'Too Many Redirects'
+ result['error'] = 'Exceeded maximum redirects'
+ except requests.exceptions.RequestException as e:
+ result['status'] = 'Request Error'
+ result['error'] = str(e)[:150]
+ except Exception as e:
+ result['status'] = 'Unknown Error'
+ result['error'] = str(e)[:150]
+
+ # Retry logic for failed requests
+ if result['status'] in ['Timeout', 'Connection Error', 'Server Error'] and retry_count < RETRY_ATTEMPTS:
+ time.sleep(1)
+ return validate_url(url, retry_count + 1)
+
+ return result
+
+# ==== STEP 3: Validate URLs Concurrently ====
+def validate_urls_concurrent(urls):
+ """
+ Validate multiple URLs concurrently using ThreadPoolExecutor
+ """
+ results = []
+ total = len(urls)
+
+ print(f"π Starting validation of {total} URLs...\n")
+
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+ future_to_url = {executor.submit(validate_url, url): url for url in urls}
+
+ completed = 0
+ for future in as_completed(future_to_url):
+ completed += 1
+ result = future.result()
+ results.append(result)
+
+ # Print progress
+ status_icon = {
+ 'Valid': 'β
',
+ 'Not Found': 'β',
+ 'Forbidden': 'π«',
+ 'Server Error': 'π₯',
+ 'Client Error': 'β οΈ',
+ 'Error': 'β οΈ',
+ 'Timeout': 'β±οΈ',
+ 'Connection Error': 'π',
+ 'SSL Error': 'π',
+ 'Redirect': 'βͺοΈ',
+ 'Other': 'β'
+ }.get(result['status'], 'β’')
+
+ # Show first 60 chars of URL
+ url_display = result['url'][:60] + '...' if len(result['url']) > 60 else result['url']
+ status_display = f"{result['status']}"
+ if result['status_code']:
+ status_display += f" ({result['status_code']})"
+
+ print(f"[{completed}/{total}] {status_icon} {status_display}: {url_display}")
+
+ return results
+
+# ==== STEP 4: Generate Detailed Report ====
+def print_report(df, total_time):
+ """Print detailed validation report"""
+ print("\n" + "=" * 80)
+ print(" VALIDATION REPORT ".center(80, "="))
+ print("=" * 80)
+
+ print(f"\nπ SUMMARY:")
+ print(f" β’ Total URLs validated: {len(df)}")
+ print(f" β’ Unique URLs: {df['url'].nunique()}")
+ print(f" β’ Total time taken: {round(total_time, 2)}s")
+ print(f" β’ Average time per URL: {round(total_time / len(df), 2)}s")
+
+ print(f"\nπ STATUS BREAKDOWN:")
+ status_counts = df['status'].value_counts()
+ for status, count in status_counts.items():
+ percentage = (count / len(df)) * 100
+ icon = {
+ 'Valid': 'β
',
+ 'Not Found': 'β',
+ 'Forbidden': 'π«',
+ 'Server Error': 'π₯',
+ 'Client Error': 'β οΈ',
+ 'Timeout': 'β±οΈ',
+ 'Connection Error': 'π',
+ 'SSL Error': 'π',
+ 'Redirect': 'βͺοΈ',
+ 'Other': 'β'
+ }.get(status, 'β’')
+ print(f" {icon} {status}: {count} ({percentage:.1f}%)")
+
+ # Response time statistics for valid URLs
+ valid_df = df[df['status'] == 'Valid']
+ if len(valid_df) > 0:
+ print(f"\nβ‘ RESPONSE TIME STATISTICS (Valid URLs):")
+ print(f" β’ Fastest: {valid_df['response_time'].min()}s")
+ print(f" β’ Slowest: {valid_df['response_time'].max()}s")
+ print(f" β’ Average: {round(valid_df['response_time'].mean(), 2)}s")
+ print(f" β’ Median: {round(valid_df['response_time'].median(), 2)}s")
+
+ # Show invalid URLs
+ invalid_df = df[df['status'] != 'Valid']
+ if len(invalid_df) > 0:
+ print(f"\nβ οΈ INVALID/FAILED URLs ({len(invalid_df)}):")
+ print("=" * 80)
+ for idx, row in invalid_df.head(20).iterrows():
+ url_display = row['url'][:65] + '...' if len(row['url']) > 65 else row['url']
+ error_msg = f" - {row['error'][:50]}" if pd.notna(row['error']) else ""
+ status_display = f"[{row['status']}"
+ if pd.notna(row['status_code']):
+ status_display += f" {row['status_code']}"
+ status_display += "]"
+ print(f" {status_display} {url_display}{error_msg}")
+
+ if len(invalid_df) > 20:
+ print(f"\n ... and {len(invalid_df) - 20} more invalid URLs")
+ print("=" * 80)
+
+# ==== MAIN EXECUTION ====
+if __name__ == "__main__":
+ print("=" * 80)
+ print(" URL VALIDATOR ".center(80, "="))
+ print("=" * 80 + "\n")
+
+ # Load URLs
+ urls = load_urls_from_file(URLS_FILE)
+
+ if not urls:
+ print("β No URLs found. Please check your input file.")
+ exit(1)
+
+ print(f"π Loaded {len(urls)} unique URLs from file")
+ print(f"βοΈ Max concurrent workers: {MAX_WORKERS}")
+ print(f"β±οΈ Timeout per request: {TIMEOUT}s")
+ print(f"π Retry attempts: {RETRY_ATTEMPTS}\n")
+
+ # Validate URLs
+ start_time = time.time()
+ results = validate_urls_concurrent(urls)
+ total_time = time.time() - start_time
+
+ # Convert to DataFrame
+ df = pd.DataFrame(results)
+
+ # Sort by status (Valid first, then others)
+ status_order = ['Valid', 'Redirect', 'Not Found', 'Forbidden', 'Client Error',
+ 'Server Error', 'Timeout', 'Connection Error', 'SSL Error',
+ 'Request Error', 'Other', 'Unknown Error']
+ df['status_order'] = df['status'].apply(lambda x: status_order.index(x) if x in status_order else 999)
+ df = df.sort_values('status_order').drop('status_order', axis=1)
+
+ # Save to CSV
+ df.to_csv(OUTPUT_FILE, index=False)
+
+ # Generate report
+ print_report(df, total_time)
+
+ print(f"\nπΎ OUTPUT:")
+ print(f" β’ Full report saved to: {OUTPUT_FILE}")
+
+ print("\n" + "=" * 80)
+ print(" VALIDATION COMPLETED ".center(80, "="))
+ print("=" * 80 + "\n")
\ No newline at end of file
diff --git a/evidence-analysis-process/processor/url.txt b/evidence-analysis-process/processor/url.txt
new file mode 100644
index 0000000..d0b0164
--- /dev/null
+++ b/evidence-analysis-process/processor/url.txt
@@ -0,0 +1,181 @@
+1. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1755757226432.jpg
+2. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1755757233931.jpg
+3. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1755766494929.jpg
+4. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112377600.jpg
+5. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112352372.jpg
+6. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112468976.jpg
+7. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112561957.jpg
+8. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112585163.jpg
+9. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68b28bf22462c2000845583a/45c2d1fc-effe-4586-97d7-eb944d5b64ec/6069f715-2332-436a-ac03-f0ab10fc8463/1756537060440.jpg
+10. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755062027902.jpg
+11. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755062030271.jpg
+12. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755066531343.jpg
+13. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755066528855.jpg
+14. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755066533233.jpg
+15. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755011928103.jpg
+16. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755011968386.jpg
+17. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755012027535.jpg
+18. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755012061968.jpg
+19. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989006233.jpg
+20. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989009015.jpg
+21. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989003357.jpg
+22. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989010927.jpg
+23. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989000141.jpg
+24. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755066775403.jpg
+25. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993226918.jpg
+26. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993242938.jpg
+27. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993262582.jpg
+28. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993257520.jpg
+29. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993206829.jpg
+30. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993188560.jpg
+31. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756364371996.jpg
+32. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756364373667.jpg
+33. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756371255806.jpg
+34. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756364583851.jpg
+35. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756369799312.jpg
+36. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756369794636.jpg
+37. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756364582020.jpg
+38. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756371161172.jpg
+39. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756369899533.jpg
+40. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756369901781.jpg
+41. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756370810927.jpg
+42. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756370762297.jpg
+43. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756370787370.jpg
+44. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457408003.jpg
+45. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452661905.jpg
+46. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452664154.jpg
+47. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457409742.jpg
+48. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457446132.jpg
+49. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452694093.jpg
+50. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457442871.jpg
+51. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452691976.jpg
+52. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457465829.jpg
+53. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457460407.jpg
+54. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452573999.jpg
+55. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452575538.jpg
+56. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756131232333.jpg
+57. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756129909476.jpg
+58. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756129641020.jpg
+59. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756130503063.jpg
+60. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756130803302.jpg
+61. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689459c02462c20008157450/73947ed5-c5a0-4109-842d-90b6fdf36972/cc163c3e-c112-4bde-86c0-dd40249f0edb/1755876961342.jpg
+62. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689459c02462c20008157450/73947ed5-c5a0-4109-842d-90b6fdf36972/cc163c3e-c112-4bde-86c0-dd40249f0edb/1755877076407.jpg
+63. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689459c02462c20008157450/73947ed5-c5a0-4109-842d-90b6fdf36972/cc163c3e-c112-4bde-86c0-dd40249f0edb/1755789369513.jpg
+64. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6899962e2462c2000819e971/69ad3fdf-5b84-4602-8f00-94aea0b3864e/bf9ee9b1-b8c5-4e26-84ef-3634d50f15d2/1755856628790.jpg
+65. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2db992462c2000822062c/8c5a70b2-0248-4278-b862-dc4413c13bd9/45809b72-572c-4219-bbbd-5b5ca4be1370/1756120831205.jpg
+66. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755785820616.jpg
+67. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755506251258.jpg
+68. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755594048065.jpg
+69. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755785972244.jpg
+70. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755941756973.jpg
+71. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755882118082.jpg
+72. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756097813854.jpg
+73. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756097817157.jpg
+74. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756097810155.jpg
+75. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101422072.jpg
+76. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101416101.jpg
+77. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101424484.jpg
+78. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101437883.jpg
+79. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101453377.jpg
+80. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101441359.jpg
+81. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101477708.jpg
+82. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101466184.jpg
+83. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101470599.jpg
+84. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2fb142462c20008227a66/b7a04e24-ee1c-46b2-b9bf-39cf81287195/5a667727-8f1c-4071-97d0-0bf815b365b6/1756451443415.jpg
+85. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944177421.jpg
+86. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944172390.jpg
+87. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944181238.jpg
+88. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944190010.jpg
+89. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944185285.jpg
+90. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944174721.jpg
+91. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755933317992.jpg
+92. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755933185744.jpg
+93. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755933188670.jpg
+94. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755933182135.jpg
+95. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846377416.jpg
+96. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846380059.jpg
+97. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846394276.jpg
+98. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846387490.jpg
+99. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846383110.jpg
+100. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934367274.jpg
+101. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934362137.jpg
+102. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934411786.jpg
+103. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934373122.jpg
+104. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934356296.jpg
+105. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276260615.jpg
+106. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276252533.jpg
+107. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276242877.jpg
+108. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276232667.jpg
+109. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276247893.jpg
+110. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276238468.jpg
+111. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670249224.jpg
+112. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670256658.jpg
+113. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670264870.jpg
+114. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670260119.jpg
+115. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670337209.jpg
+116. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670313387.jpg
+117. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670322699.jpg
+118. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756540953969.jpg
+119. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756541299398.jpg
+120. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756541327608.jpg
+121. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756541387845.jpg
+122. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756541432668.jpg
+123. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685262419.jpg
+124. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685250603.jpg
+125. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685265118.jpg
+126. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685246527.jpg
+127. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685395153.jpg
+128. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685386138.jpg
+129. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685391435.jpg
+130. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685376705.jpg
+131. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755766907917.jpg
+132. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755766911978.jpg
+133. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755766915443.jpg
+134. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755766906207.jpg
+135. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699045889.jpg
+136. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699042757.jpg
+137. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699038725.jpg
+138. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699034383.jpg
+139. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699100471.jpg
+140. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699123461.jpg
+141. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699557330.jpg
+142. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699501611.jpg
+143. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699475529.jpg
+144. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755914888529.jpg
+145. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68b261f12462c2000844b602/d62fbb11-191e-4eef-aef9-f4f8767894e8/faf8c968-ad27-4822-8e47-c5145d3225ef/1756552744999.jpg
+146. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689d7c732462c200081e7eff/d53edf53-6e5a-4e6e-8b45-1fc4c5149777/3865c61a-4a8c-4d3c-88c2-e051859b63b0/1756272169698.jpg
+147. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756372862210.jpg
+148. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756372773933.jpg
+149. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756372889852.jpg
+150. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756372958676.jpg
+151. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373138375.jpg
+152. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373194932.jpg
+153. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1755512173612.jpg
+154. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1755512124754.jpg
+155. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1755512109373.jpg
+156. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373153827.jpg
+157. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373160500.jpg
+158. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373172649.jpg
+159. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1755512132376.jpg
+160. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373504815.jpg
+161. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373629953.jpg
+162. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373533355.jpg
+163. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373516419.jpg
+164. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373488114.jpg
+165. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047429145.jpg
+166. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047439458.jpg
+167. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047529554.jpg
+168. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047633518.jpg
+169. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047619813.jpg
+170. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047700054.jpg
+171. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047715120.jpg
+172. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047791816.jpg
+173. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047787045.jpg
+174. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756048012523.jpg
+175. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756048002346.jpg
+176. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755595622454.jpg
+177. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755595657088.jpg
+178. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755595685922.jpg
+179. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755595748267.jpg
+180. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755598730557.jpg
+181. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755598775105.jpg
\ No newline at end of file
diff --git a/evidence-analysis-process/processor/validate-input-output-csv.py b/evidence-analysis-process/processor/validate-input-output-csv.py
new file mode 100644
index 0000000..4b78697
--- /dev/null
+++ b/evidence-analysis-process/processor/validate-input-output-csv.py
@@ -0,0 +1,93 @@
+import csv
+import pandas as pd
+
+# ==== CONFIG ====
+csv1_path = "/Users/user/Documents/AI/parallel-process/output [pre-processor]/preprocessed_data.csv"
+csv2_path = "/Users/user/Documents/AI/OUPUT/MAIN-SPLITS/merged_output.csv"
+output_missing_rows = "missing_rows.csv"
+
+# ==== STEP 0: Print row counts (excluding header) ====
+print("π Row counts for the given files (excluding header):")
+
+# CSV 1
+with open(csv1_path, newline="") as fp:
+ csv1_count = sum(1 for _ in csv.reader(fp)) - 1
+print(f"{csv1_path}: {csv1_count}")
+
+# CSV 2
+with open(csv2_path, newline="") as fp:
+ csv2_count = sum(1 for _ in csv.reader(fp)) - 1
+print(f"{csv2_path}: {csv2_count}")
+
+# ==== STEP 1: Read both CSVs ====
+df_csv1 = pd.read_csv(csv1_path, dtype=str)
+df_csv2 = pd.read_csv(csv2_path, dtype=str)
+
+# ==== STEP 2: Limit columns up to 'Project Evidence' ====
+if 'Project Evidence' not in df_csv1.columns:
+ raise ValueError("'Project Evidence' column not found in first CSV")
+if 'Project Evidence' not in df_csv2.columns:
+ raise ValueError("'Project Evidence' column not found in second CSV")
+
+# Get column names up to and including 'Project Evidence'
+cols_to_check = list(df_csv1.columns[:df_csv1.columns.get_loc('Project Evidence') + 1])
+
+df_csv1 = df_csv1[cols_to_check].astype(str)
+df_csv2 = df_csv2[cols_to_check].astype(str)
+
+# ==== STEP 3: Identify missing rows (in CSV1 but not in CSV2) ====
+merged = df_csv1.merge(df_csv2.drop_duplicates(), how='left', indicator=True)
+missing_rows = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
+
+if missing_rows.empty:
+ print("β
No missing rows found.")
+else:
+ print(f"β οΈ Found {len(missing_rows)} missing rows in CSV2 that exist in CSV1.")
+
+# ==== STEP 4: Write missing rows to CSV ====
+missing_rows.to_csv(output_missing_rows, index=False)
+print(f"πΎ Missing rows saved to: {output_missing_rows}")
+
+# ==== STEP 5: Generate Comparison Report ====
+print("\n" + "="*80)
+print(" COMPARISON REPORT ".center(80, "="))
+print("="*80)
+
+print(f"\nπ FILES COMPARED:")
+print(f" β’ CSV 1 (Source): {csv1_path}")
+print(f" β’ CSV 2 (Target): {csv2_path}")
+
+print(f"\nπ ROW STATISTICS:")
+print(f" β’ Rows in CSV 1: {csv1_count}")
+print(f" β’ Rows in CSV 2: {csv2_count}")
+print(f" β’ Missing rows: {len(missing_rows)}")
+if csv1_count > 0:
+ print(f" β’ Match rate: {((csv1_count - len(missing_rows)) / csv1_count * 100):.2f}%")
+
+print(f"\nπ COLUMNS COMPARED:")
+print(f" β’ Total columns checked: {len(cols_to_check)}")
+if len(cols_to_check) > 5:
+ print(f" β’ Columns: {', '.join(cols_to_check[:5])}...")
+else:
+ print(f" β’ Columns: {', '.join(cols_to_check)}")
+
+if not missing_rows.empty:
+ print(f"\nβ οΈ MISSING ROWS ANALYSIS:")
+ print(f" β’ Total missing: {len(missing_rows)}")
+ if csv1_count > 0:
+ print(f" β’ Percentage missing: {(len(missing_rows) / csv1_count * 100):.2f}%")
+ print(f" β’ Output file: {output_missing_rows}")
+
+ # Show first few missing rows as sample
+ print(f"\nπ SAMPLE MISSING ROWS (first 3):")
+ for idx, row in missing_rows.head(3).iterrows():
+ print(f" Row {idx + 2}:") # +2 because of 0-indexing and header
+ for col in cols_to_check[:3]: # Show first 3 columns
+ value = str(row[col])[:50] # Truncate long values
+ print(f" β’ {col}: {value}")
+else:
+ print(f"\nβ
RESULT: All rows from CSV 1 exist in CSV 2")
+
+print("\n" + "="*80)
+print(" COMPARISON COMPLETED ".center(80, "="))
+print("="*80 + "\n")
\ No newline at end of file
diff --git a/evidence-analysis-process/requirements.txt b/evidence-analysis-process/requirements.txt
new file mode 100644
index 0000000..a96544a
--- /dev/null
+++ b/evidence-analysis-process/requirements.txt
@@ -0,0 +1,8 @@
+pandas
+openpyxl
+httpx
+requests
+google-generativeai
+typing-extensions
+python-dotenv
+tqdm
\ No newline at end of file
diff --git a/evidence-analysis-process/webpage/home.html b/evidence-analysis-process/webpage/home.html
new file mode 100644
index 0000000..51c17d2
--- /dev/null
+++ b/evidence-analysis-process/webpage/home.html
@@ -0,0 +1,900 @@
+
+
+
+
+ MIP Evidence Report
+
+
+
+
+
+
+
+
+
+
+
+
Processing your data...
+
+
+
+
+
+
+
π Executive Summary
+
+
+
+
+
+
π Evidence Submission Overview
+
+
+
+
+
+
+
+
+
+
Subject Distribution
+
+
+
+
+
+
+
+
π― Quality & Relevance Analysis
+
+
+
+
+
Task Completion Analysis
+
+
+
+
+
+
πΊοΈ District-wise Submission Quality & Insights
+
+
+
+
Bihar District Relevance Map
+
+ Map placeholder area (Ensure map is static before PDF generation)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/interface-routes/elevate-routes.json b/interface-routes/elevate-routes.json
index 10b908a..a532cf5 100644
--- a/interface-routes/elevate-routes.json
+++ b/interface-routes/elevate-routes.json
@@ -9822,6 +9822,34 @@
],
"service": "survey"
},
+ {
+ "sourceRoute": "/survey/v1/admin/clearTenantCache/:id",
+ "type": "DELETE",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "survey",
+ "packageName": "elevate-survey-observation"
+ }
+ ],
+ "service": "survey"
+ },
+ {
+ "sourceRoute": "/survey/v1/admin/clearTenantCache",
+ "type": "DELETE",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "survey",
+ "packageName": "elevate-survey-observation"
+ }
+ ],
+ "service": "survey"
+ },
{
"sourceRoute": "/user/v1/account/login",
"type": "POST",
@@ -14269,6 +14297,115 @@
"packageName": "elevate-self-creation-portal"
}
]
- }
+
+ },
+ {
+ "sourceRoute": "/project/v1/userProjects/updateAcl",
+ "type": "POST",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "project",
+ "packageName": "elevate-project"
+ }
+ ],
+ "service": "project"
+ },
+ {
+ "sourceRoute": "/project/v1/userProjects/updateAcl/:id",
+ "type": "POST",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "project",
+ "packageName": "elevate-project"
+ }
+ ],
+ "service": "project"
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/getCacheStats",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/clearCache",
+ "type": "POST",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/warmUpCache",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/getCacheHealth",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/project/v1/admin/clearTenantCache",
+ "type": "POST",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "project",
+ "packageName": "elevate-project"
+ }
+ ],
+ "service": "project"
+ },
+ {
+ "sourceRoute": "/project/v1/admin/clearTenantCache/:id",
+ "type": "POST",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "project",
+ "packageName": "elevate-project"
+ }
+ ],
+ "service": "project"
+ }
]
}
\ No newline at end of file
diff --git a/interface-routes/saas-routes.json b/interface-routes/saas-routes.json
index 01e08cc..f7e6e88 100644
--- a/interface-routes/saas-routes.json
+++ b/interface-routes/saas-routes.json
@@ -12189,6 +12189,58 @@
"packageName": "elevate-mentoring"
}
]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/getCacheStats",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/clearCache",
+ "type": "POST",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/warmUpCache",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
+ },
+ {
+ "sourceRoute": "/mentoring/v1/admin/getCacheHealth",
+ "type": "GET",
+ "priority": "MUST_HAVE",
+ "inSequence": false,
+ "orchestrated": false,
+ "targetPackages": [
+ {
+ "basePackageName": "mentoring",
+ "packageName": "elevate-mentoring"
+ }
+ ]
}
]
}