From 7989ce1737ead054882909839e5d60c25587f14a Mon Sep 17 00:00:00 2001 From: priyanka-TL Date: Wed, 10 Dec 2025 12:17:11 +0530 Subject: [PATCH 01/11] Refactor code structure for improved readability and maintainability --- evidence analysis/README.md | 90 + evidence analysis/ai/process_evidence.py | 193 ++ evidence analysis/app.py | 83 + evidence analysis/evidence_analysis_page.py | 99 + evidence analysis/report-old.html | 1388 +++++++++++++ evidence analysis/report.html | 1819 +++++++++++++++++ evidence analysis/requirements.txt | 9 + evidence analysis/utils/auth.py | 2 + evidence-analysis-process/README.md | 105 + evidence-analysis-process/input/input.csv | 0 evidence-analysis-process/output/output.csv | 0 .../pre-processor/1-pre-processor.py | 493 +++++ .../pre-processor/2-csv-splitter.py | 37 + .../processor/1-main-parallel-script.py | 1082 ++++++++++ .../processor/2-merge-processed-csv.py | 131 ++ .../3-invalid-url-&-custom-task-remover.py | 132 ++ .../processor/4-url-validator.py | 268 +++ evidence-analysis-process/processor/url.txt | 181 ++ .../processor/validate-input-output-csv.py | 93 + evidence-analysis-process/requirements.txt | 8 + evidence-analysis-process/webpage/home.html | 900 ++++++++ 21 files changed, 7113 insertions(+) create mode 100644 evidence analysis/README.md create mode 100644 evidence analysis/ai/process_evidence.py create mode 100644 evidence analysis/app.py create mode 100644 evidence analysis/evidence_analysis_page.py create mode 100644 evidence analysis/report-old.html create mode 100644 evidence analysis/report.html create mode 100644 evidence analysis/requirements.txt create mode 100644 evidence analysis/utils/auth.py create mode 100644 evidence-analysis-process/README.md create mode 100644 evidence-analysis-process/input/input.csv create mode 100644 evidence-analysis-process/output/output.csv create mode 100644 evidence-analysis-process/pre-processor/1-pre-processor.py create mode 100644 evidence-analysis-process/pre-processor/2-csv-splitter.py create mode 100644 evidence-analysis-process/processor/1-main-parallel-script.py create mode 100644 evidence-analysis-process/processor/2-merge-processed-csv.py create mode 100644 evidence-analysis-process/processor/3-invalid-url-&-custom-task-remover.py create mode 100644 evidence-analysis-process/processor/4-url-validator.py create mode 100644 evidence-analysis-process/processor/url.txt create mode 100644 evidence-analysis-process/processor/validate-input-output-csv.py create mode 100644 evidence-analysis-process/requirements.txt create mode 100644 evidence-analysis-process/webpage/home.html diff --git a/evidence analysis/README.md b/evidence analysis/README.md new file mode 100644 index 00000000..622f7092 --- /dev/null +++ b/evidence analysis/README.md @@ -0,0 +1,90 @@ +# πŸ“Š Evidence Analysis with AI + +This Streamlit app enables users to analyze classroom evidence images using AI models (Gemini or OpenAI). The app allows input of custom questions and provides AI-generated answers and reasoning. + +--- + +## πŸš€ Features + +- Upload or link to an image (evidence) +- Input up to 7 custom questions +- Use AI to answer questions and explain reasoning +- Display image preview and AI output +- Token switching for Gemini API +- Modular backend in `ai/process_evidence.py` + +--- + +## 🧰 Tech Stack + +- [Python](https://www.python.org/) +- [Streamlit](https://streamlit.io/) +- [Google Gemini API](https://ai.google.dev/) +- [OpenAI API](https://platform.openai.com/) +- [httpx](https://www.python-httpx.org/), [base64](https://docs.python.org/3/library/base64.html) + +--- + +## πŸ“‚ Project Structure + +``` +. +β”œβ”€β”€ ai/ +β”‚ └── process_evidence.py # AI logic for image and question processing +β”œβ”€β”€ utils/ +β”‚ └── auth.py (basic authentication) +β”œβ”€β”€ app.py (your app logic) +β”œβ”€β”€ evidence_analysis.py +β”œβ”€β”€ .gitignore +β”œβ”€β”€ README.md +└── requirements.txt +``` + +--- + +## πŸ”§ Setup Instructions + +1. **Clone the repository**: + ```bash + git clone https://github.com/your-username/evidence-analysis.git + cd evidence-analysis + ``` + +2. **Install dependencies**: + ```bash + pip install -r requirements.txt + ``` + +3. **Add your API keys**: + - Create a `.env` file or securely inject them in the `ai/process_evidence.py` + - Required keys: + - `GEMINI`, `GEMINI_1` (Google Gemini API keys) + - `llama-evidence-analysis` (OpenAI-compatible endpoint API key) + +4. **Run the app**: + ```bash + streamlit run app.py + ``` + +--- + +## πŸ§ͺ Example Usage + +1. Enter up to 7 custom evaluation questions. +2. Paste a public URL of an image showing evidence (e.g., classroom project photo). +3. Click **"πŸ” Analyse"**. +4. Get AI-generated YES/NO answers with reasonings. +5. See relevance tag and image preview. + +--- + +## πŸ›‘οΈ Notes + +- Make sure your Gemini/OpenAI API keys have sufficient quota. +- Gemini's `response_schema` requires accurate schema handling and token management. + +--- + +## πŸ“œ License + +MIT License \ No newline at end of file diff --git a/evidence analysis/ai/process_evidence.py b/evidence analysis/ai/process_evidence.py new file mode 100644 index 00000000..95cb9da8 --- /dev/null +++ b/evidence analysis/ai/process_evidence.py @@ -0,0 +1,193 @@ +import json +import base64 +import time +import httpx +import mimetypes +import re +import os +from urllib.request import urlopen +import google.generativeai as genai +from openai import OpenAI +import typing_extensions as typing +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# --- CONFIGURATION --- +# Load keys from .env, split by comma if multiple exist +gemini_env = os.getenv("GEMINI_API_KEYS", "") +GEMINI_TOKENS = [key.strip() for key in gemini_env.split(",") if key.strip()] + +TOGETHER_TOKEN = os.getenv("TOGETHER_API_KEY") + +MAX_RETRIES = 3 +current_token_index = 0 + +# --- TOKEN HANDLING (Gemini) --- +def get_next_gemini_token(): + global current_token_index + if GEMINI_TOKENS and current_token_index < len(GEMINI_TOKENS): + return GEMINI_TOKENS[current_token_index] + return None + +def switch_to_next_token(): + global current_token_index + current_token_index += 1 + if current_token_index >= len(GEMINI_TOKENS): + # Reset or handle exhaustion + current_token_index = 0 + print("Warning: Cycled through all Gemini tokens.") + return get_next_gemini_token() + +# --- Gemini Model Setup --- +class AnalysisResponse(typing.TypedDict): + answers: list[str] + reasonings: list[str] + +initial_token = get_next_gemini_token() +if not initial_token: + raise ValueError("No valid Gemini tokens found in .env file.") + +genai.configure(api_key=initial_token) +model = genai.GenerativeModel( + model_name="gemini-2.5-flash", + generation_config={ + "response_mime_type": "application/json", + "response_schema": AnalysisResponse, + }, +) + +# --- OpenAI (SambaNova) Setup --- +# Only initialize if token exists to prevent crash +if TOGETHER_TOKEN: + client = OpenAI( + base_url="https://api.sambanova.ai/v1", + api_key=TOGETHER_TOKEN + ) +else: + client = None + print("Warning: TOGETHER_API_KEY not found in .env") + +# --- Helper: Convert image to base64 --- +def get_image_as_base64(url: str) -> str: + with urlopen(url) as response: + image_data = response.read() + mime_type, _ = mimetypes.guess_type(url) + if not mime_type: + mime_type = "image/jpeg" + return f"data:{mime_type};base64,{base64.b64encode(image_data).decode('utf-8')}" + +# --- Helper: Relevance Tag --- +def calculate_relevance_tag(answers): + if not answers or not isinstance(answers, list): + return 'Irrelevant' + yes_count = sum(1 for answer in answers if str(answer).upper() == 'YES') + if len(answers) == 0: return 'Irrelevant' + + percentage = (yes_count / len(answers)) * 100 + if percentage >= 50: + return 'Relevant' + elif percentage > 0: + return 'Partially Relevant' + return 'Irrelevant' + +# --- Response Parser --- +def extract_structured_response(response_text): + normalized = response_text.replace("\r\n", "\n").upper() + answers_match = re.search( + r"^ANSWERS[:\-\s]*((?:YES|NO)(?:\s*,\s*(?:YES|NO))*)", + normalized, + re.MULTILINE + ) + reasonings_match = re.findall( + r"(?:^REASONINGS[:\-\s]*\n)?(?:^|\n)\s*(\d+)\.?\s*([^\n]+)", + response_text, + re.MULTILINE + ) + + answers = [a.strip() for a in answers_match.group(1).split(",")] if answers_match else [] + answers = [a.upper()[:3] for a in answers if a.upper().startswith(("YES", "NO"))] + reasonings = [item[1].strip() for item in sorted(reasonings_match, key=lambda x: int(x[0]))] + + # Relaxed validation to allow partial parsing if strict 3 count fails + if not answers: + return None + return {"answers": answers, "reasonings": reasonings} + +# --- MAIN FUNCTION --- +def analyze_evidence(image_url: str, prompt: str, use_openai: bool = False): +# prompt = f"""You are an educational evidence validator. Analyze this image, which is field evidence from a Project-Based Learning classroom in Bihar, India. +# Please analyze this image carefully and answer with ONLY 'yes' or 'no' for each question below separated by commas: +# {questions} +# Consider all visible elements and context. Explain your reasoning for each answer briefly. +# """ + + print(f"[Prompt]:\n{prompt}\n") + + # Step 1: Gemini + for _ in range(MAX_RETRIES): + try: + image = httpx.get(image_url) + gemini_response = model.generate_content( + [ + { + "mime_type": "image/jpeg", + "data": base64.b64encode(image.content).decode("utf-8"), + }, + prompt, + ] + ) + response_json = json.loads(gemini_response.text) + relevance = calculate_relevance_tag(response_json.get("answers", [])) + print(f"[Gemini Response] = {response_json}") + print(f"[Relevance Tag] = {relevance} \n") + return { + "source": "gemini", + "answers": response_json.get("answers"), + "reasonings": response_json.get("reasonings"), + "relevance": relevance + } + except Exception as e: + if any(x in str(e).lower() for x in ["quota", "rate limit", "429"]): + token = switch_to_next_token() + if token: + genai.configure(api_key=token) + continue + print(f"[Gemini Error] {e}") + break + + # Step 2: OpenAI fallback (if enabled and client exists) + if use_openai and client: + for _ in range(MAX_RETRIES): + try: + openai_response = client.chat.completions.create( + model="Llama-4-Maverick-17B-128E-Instruct", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + {"type": "image_url", "image_url": {"url": get_image_as_base64(image_url)}}, + ], + } + ], + ) + if openai_response.choices: + content = openai_response.choices[0].message.content + structured = extract_structured_response(content) + if structured: + relevance = calculate_relevance_tag(structured["answers"]) + return { + "source": "openai", + "answers": structured["answers"], + "reasonings": structured["reasonings"], + "relevance": relevance + } + except Exception as e: + print(f"[OpenAI Error] {e}") + time.sleep(3) + + return { + "error": "Unable to process image after retries" + } diff --git a/evidence analysis/app.py b/evidence analysis/app.py new file mode 100644 index 00000000..56d62c6c --- /dev/null +++ b/evidence analysis/app.py @@ -0,0 +1,83 @@ +import streamlit as st +from utils.auth import check_credentials +import evidence_analysis_page +import streamlit.components.v1 as components + +st.set_page_config(page_title="Login", layout="wide", initial_sidebar_state="collapsed") + +# Initialize login state if not already present +if "logged_in" not in st.session_state: + st.session_state["logged_in"] = False + +if "show_reports" not in st.session_state: + st.session_state["show_reports"] = False + +def login(): + st.title("πŸ” Login Page") + st.markdown("
", unsafe_allow_html=True) + + with st.form("login_form"): + username = st.text_input("Username") + password = st.text_input("Password", type="password") + submitted = st.form_submit_button("Login") + + if submitted: + if check_credentials(username, password): + st.session_state["logged_in"] = True + st.success("Login successful!") + st.rerun() + else: + st.error("Invalid username or password") + +def show_reports(): + """Display the HTML report page with navigation""" + # Header with navigation + col1, col2, col3 = st.columns([1, 6, 1]) + + with col1: + if st.button("← Back to Analysis", key="back_btn"): + st.session_state["show_reports"] = False + st.rerun() + + with col2: + st.markdown("

πŸ“Š MIP Evidence Reports

", unsafe_allow_html=True) + + with col3: + if st.button("πŸšͺ Logout", key="logout_btn"): + st.session_state.clear() + st.rerun() + + # Colored horizontal line + st.markdown(""" +
+ """, unsafe_allow_html=True) + + # Hide menu and footer + st.markdown(""" + + """, unsafe_allow_html=True) + + # Read and display the HTML file + try: + with open("report.html", "r", encoding="utf-8") as f: + html_content = f.read() + + # Display the HTML content + components.html(html_content, height=1200, scrolling=True) + + except FileNotFoundError: + st.error("report.html file not found. Please ensure the file exists in the same directory.") + except Exception as e: + st.error(f"Error loading report: {str(e)}") + +# Show login, evidence analysis page, or reports +if not st.session_state["logged_in"]: + login() +else: + if st.session_state["show_reports"]: + show_reports() + else: + evidence_analysis_page.show() diff --git a/evidence analysis/evidence_analysis_page.py b/evidence analysis/evidence_analysis_page.py new file mode 100644 index 00000000..aa72b9a6 --- /dev/null +++ b/evidence analysis/evidence_analysis_page.py @@ -0,0 +1,99 @@ +import streamlit as st +from ai.process_evidence import analyze_evidence + +def show(): + st.set_page_config(page_title="Evidence Analysis", layout="wide") + + # Header row: Title centered, Reports and Logout on the right + col1, col2, col3 = st.columns([1, 2, 1]) + + with col1: + pass # Empty left side + + with col2: + st.markdown("

πŸ§ͺ Evidence Analysis

", unsafe_allow_html=True) + + with col3: + st.markdown("
", unsafe_allow_html=True) + + # Create two columns for Reports and Logout buttons + btn_col1, btn_col2 = st.columns(2) + + with btn_col1: + if st.button("πŸ“Š Reports"): + st.session_state["show_reports"] = True + st.rerun() + + with btn_col2: + if st.button("πŸšͺ Logout"): + st.session_state.clear() + st.rerun() + + st.markdown("
", unsafe_allow_html=True) + + # Colored horizontal line (simple styled
) + st.markdown(""" +
+ """, unsafe_allow_html=True) + + # An empty break line added + st.markdown("
", unsafe_allow_html=True) + + # Define layout columns + col1, col2, col3, col4 = st.columns([2, 2, 2, 2]) + + # Column 1: Enter 7 Questions + with col1: + st.markdown("
πŸ“‹ Enter Questions
", unsafe_allow_html=True) + question_inputs = [] + for i in range(1, 8): + q = st.text_area(f"{i}.", key=f"question_{i}", height=100) + question_inputs.append(q) + + # Column 2: Evidence Link Input + Analyse Button + with col2: + st.markdown("
πŸ”— Paste Evidence Link
", unsafe_allow_html=True) + image_url = st.text_area("Image URL", height=150) + default_prompt = """You are an educational evidence validator. Analyse this image as field evidence from a PBL classroom in Bihar, India. Answer the added questions with 'YES' or 'NO', consider all visible elements and context. Explain your reasoning for each answer briefly.""" + prompt_text = st.text_area("Prompt (Editable)", value=default_prompt, height=150) + context = prompt_text + "\n\n" + "\n".join([f"{i+1}. {q}" for i, q in enumerate(question_inputs) if q.strip()]) + + if st.button("πŸ” Analyse", use_container_width=True): + if not image_url.strip(): + st.warning("Please provide an evidence link.") + elif not any(q.strip() for q in question_inputs): + st.warning("Please enter at least one question.") + else: + result = analyze_evidence(image_url, context, use_openai=False) + + if "error" in result: + st.error(f"❌ Error: {result['error']}") + else: + st.session_state["image_url"] = image_url + st.session_state["questions"] = question_inputs + st.session_state["ai_result"] = result + st.session_state["analysed"] = True + + # Column 3: Image Preview + with col3: + st.markdown("
πŸ–ΌοΈ Image Preview
", unsafe_allow_html=True) + if st.session_state.get("analysed", False): + st.image(st.session_state["image_url"], width=350) + else: + st.info("Awaiting evidence link and analysis...") + + # Column 4: Output Summary + with col4: + st.markdown("
🧠 Output Summary
", unsafe_allow_html=True) + if st.session_state.get("analysed", False): + result = st.session_state.get("ai_result", {}) + if result: + st.markdown("**🟒 Relevance Tag:** " + result.get("relevance", "Unknown")) + st.markdown("**βœ… Answers:** " + ", ".join(result.get("answers", []))) + st.markdown("**🧠 Reasoning:**") + for i, reason in enumerate(result.get("reasonings", []), 1): + st.markdown(f"{i}. {reason}") + else: + st.text("(No AI output found)") + else: + st.info("Output will appear after clicking Analyse.") diff --git a/evidence analysis/report-old.html b/evidence analysis/report-old.html new file mode 100644 index 00000000..5e2f893b --- /dev/null +++ b/evidence analysis/report-old.html @@ -0,0 +1,1388 @@ + + + + + MIP Evidence Report + + + + + + +
+
+

MIP Evidence Analysis Dashboard

+
+
+ + +
+
+ + +
+ + +
+
+ +
+
+

Processing your data...

+
+ +
+ +
+
+ + + + \ No newline at end of file diff --git a/evidence analysis/report.html b/evidence analysis/report.html new file mode 100644 index 00000000..687936fe --- /dev/null +++ b/evidence analysis/report.html @@ -0,0 +1,1819 @@ + + + + + MIP Evidence Report - With Filters + + + + + + +
+
+

MIP Evidence Analysis Dashboard

+
+
+ + +
+
+ + +
+ + +
+
+ + +
+

πŸ” Filter Data

+
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+
+ +
+
+
Active Filters:
+
+
+
+ +
+
+

Processing your data...

+
+ +
+ +
+
+ + + + + \ No newline at end of file diff --git a/evidence analysis/requirements.txt b/evidence analysis/requirements.txt new file mode 100644 index 00000000..540ba2c0 --- /dev/null +++ b/evidence analysis/requirements.txt @@ -0,0 +1,9 @@ +streamlit +openai +google-generativeai +pandas +openpyxl +httpx +requests +typing_extensions +python-dotenv diff --git a/evidence analysis/utils/auth.py b/evidence analysis/utils/auth.py new file mode 100644 index 00000000..17bd0103 --- /dev/null +++ b/evidence analysis/utils/auth.py @@ -0,0 +1,2 @@ +def check_credentials(username: str, password: str) -> bool: + return username == "admin" and password == "admin" diff --git a/evidence-analysis-process/README.md b/evidence-analysis-process/README.md new file mode 100644 index 00000000..dbb3eb6d --- /dev/null +++ b/evidence-analysis-process/README.md @@ -0,0 +1,105 @@ +# 🧠 Evidence Analysis Multithreaded β€” MIP Evidence Pipeline + +### Overview +This repository contains a lightweight pipeline to preprocess CSV files, split them for parallel execution, perform **image-based evidence analysis** (via a generative model), merge processed results, validate URLs, and clean invalid rows. + +--- + +## πŸ“ Repository Structure + +| Path | Description | +|------|--------------| +| **pre-processor/1-pre-processor.py** | Loads and filters raw CSVs, adds computed columns (`clean_cell`, `is_image_url`), and generates single or split preprocessed CSVs. | +| **pre-processor/2-csv-splitter.py** | Splits large CSVs into smaller chunks for parallel processing. | +| **processor/1-main-parallel-script.py** | Main orchestration script for parallel execution. Handles per-file processing via `google-generativeai`, `httpx`, `openpyxl`, and includes rate-limiting and token rotation (`get_gemini_tokens_from_env`). | +| **processor/2-merge-processed-csv.py** | Merges parallel outputs into a single `merged_output.csv`. | +| **processor/3-invalid-url-&-custom-task-remover.py** | Removes invalid rows (where QA columns are null) and extracts URLs from *Task Evidence* fields. | +| **processor/4-url-validator.py** | Concurrent URL validation tool (`load_urls_from_file`, `validate_url`, `validate_urls_concurrent`). | +| **processor/validate-input-output-csv.py** | Compares source vs. merged output CSVs and logs missing rows. | +| **webpage/** | Local web UI to visualize processed CSVs. | +| **.env** | Environment file to store GEMINI API tokens (e.g., `GEMINI_TOKEN1="..."`). | + +--- + +## βš™οΈ Prerequisites +- **Python** 3.8 or above +- Install dependencies: + ``` + pip install -r requirements.txt + ``` + +## 🌍 Environment Setup +If using the generative model in processor/1-main-parallel-script.py, create a .env file and include your GEMINI tokens: +``` +GEMINI_TOKEN1="your_token_here" +``` +These tokens are fetched dynamically using the get_gemini_tokens_from_env function. + +## πŸš€ Quickstart +### 1. Preprocessing +Edit input/output paths in pre-processor/1-pre-processor.py. +Run: +``` +python pre-processor/1-pre-processor.py +``` +Output will be stored under output-pre-processor/ as either: +* preprocessed_data.csv, or +* multiple split_*.csv files (if splitting is enabled). + +### 2. Split CSVs for Parallel Processing +#### Option A: +Run the splitter script manually: +``` +python pre-processor/2-csv-splitter.py +``` +#### Option B: +Enable the SPLIT_FILES=yes flag in the preprocessor to auto-generate split files. + +### 3. Main Parallel Processing +Set the following variables at the top of processor/1-main-parallel-script.py: +* INPUT_DIR +* OUTPUT_DIR +* FINAL_OUTPUT_FILE + +Ensure .env contains valid GEMINI tokens. +Run: +``` +python processor/1-main-parallel-script.py +``` +Key functions: +* main β€” processes a single input file +* process_file_parallel β€” wrapper for thread pool execution +* calculate_relevance_tag β€” maps responses to relevance tags + +### 4. Merge Processed Outputs +After processing completes: +``` +python processor/2-merge-processed-csv.py +``` +Update INPUT_DIR in the script if necessary. + +### 5. URL Validation and Data Cleaning + +Clean invalid rows and extract URLs: +``` +python processor/3-invalid-url-&-custom-task-remover.py +``` + +Validate URLs: +``` +python processor/4-url-validator.py +``` + +Adjust constants (URLS_FILE, MAX_WORKERS, TIMEOUT) as needed. + +### 6. Validate Input vs Output Consistency +Compare preprocessed vs merged outputs: +``` +python processor/validate-input-output-csv.py +``` + +## 🧩 Notes & Tips + +* If you’re not using the generative API, you can comment out or stub those sections in processor/1-main-parallel-script.py. +* The list of URLs to validate is located in processor/url.txt. +* The webpage/ directory contains dashboards for visualizing CSV outputs. diff --git a/evidence-analysis-process/input/input.csv b/evidence-analysis-process/input/input.csv new file mode 100644 index 00000000..e69de29b diff --git a/evidence-analysis-process/output/output.csv b/evidence-analysis-process/output/output.csv new file mode 100644 index 00000000..e69de29b diff --git a/evidence-analysis-process/pre-processor/1-pre-processor.py b/evidence-analysis-process/pre-processor/1-pre-processor.py new file mode 100644 index 00000000..ffff04e4 --- /dev/null +++ b/evidence-analysis-process/pre-processor/1-pre-processor.py @@ -0,0 +1,493 @@ +import os +import csv +import math +from urllib.parse import urlparse +from tqdm import tqdm # Import tqdm for the progress bar + +# === Configuration === +INPUT_CSV = "/Users/user/Documents/AI/parallel-process/input/017F35E575D87A3FB5ED3D90A3E69355_20250904.csv" +QUESTION_CSV = "/Users/user/Documents/AI/parallel-process/input/aug_sample_questions.csv" +FILTER_CSV = "/Users/user/Documents/AI/parallel-process/input/school_list.csv" +OUTPUT_DIR = "output-pre-processor" + +# === SPLIT CONFIGURATION === +SPLIT_FILES = "yes" # Set to "yes" to split into multiple files, "no" for single file +ROWS_PER_FILE = 10000 # Only used if SPLIT_FILES = "yes" + +# === IMAGE FORMATS === +IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"} + +# Create output directory +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# Counters for skipped rows +skip_task_start = 0 +skip_evidence_null = 0 +skip_school_mismatch = 0 +skip_non_image = 0 +total_input_rows = 0 # This will be set correctly below + +# === Step 1: Load FILTER_CSV school codes into a set === +valid_school_codes = set() +with open(FILTER_CSV, newline='', encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + school_code = row.get("UDISE+ SCHOOL CODE", "").strip() + if school_code: + valid_school_codes.add(school_code) + +# === Helper function for cleaning cell values === +def clean_cell(value): + """Strips whitespace AND common quote characters from the ends.""" + if not isinstance(value, str): + return "" + # Strip whitespace, then strip both single and double quotes + return value.strip().strip("'\"") + +# === Helper function to check if URL is an image === +def is_image_url(url): + """Check if URL points to an image file""" + url = clean_cell(url) # Clean the URL string first for *checking* + if not url or url.lower() == "null": + return False + try: + parsed = urlparse(url) + path = parsed.path.lower() + return any(path.endswith(ext) for ext in IMAGE_FORMATS) + except: + return False + +# === Step 2: Load QUESTION_CSV into dictionary (TASK NAME β†’ Refined Question) === +lookup_dict = {} +with open(QUESTION_CSV, newline='', encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + task_name = clean_cell(row.get("TASK NAME", "")) + refined_question = row.get("Refined questions using tool and webpage", "").strip() + if task_name: # only add valid rows + lookup_dict[task_name] = refined_question + +# === District Renaming Map === +DISTRICT_REPLACEMENTS = { + "W Champaran": "West Champaran", + "E. Champaran": "East Champaran", + "Kaimur (Bhabua)": "Kaimur", + "Aurangabad (Bihar)": "Aurangabad" +} + +# === Step 3: Load INPUT_CSV and filter === + +# --- NEW: Load all rows into a list first to get the *correct* count --- +print(f"Loading data from {INPUT_CSV}...") +all_rows = [] +try: + with open(INPUT_CSV, newline='', encoding="utf-8") as infile: + reader = csv.DictReader(infile) + all_rows = list(reader) + total_input_rows = len(all_rows) # This is the CORRECT row count + header = reader.fieldnames +except FileNotFoundError: + print(f"Error: INPUT_CSV '{INPUT_CSV}' not found.") + exit() +except Exception as e: + print(f"Error reading {INPUT_CSV}: {e}") + exit() + +if header is None: + print("Error: CSV Header is empty. Cannot proceed.") + exit() +print(f"Loaded {total_input_rows} data rows to process.") +# --- END NEW --- + + +filtered_rows = [] + +# Add new columns +new_columns = [ + "Task Evidence Question", + "Task evidence Q and A", + "Task evidence Q and A Reason", + "Relevance Tag", + "Image Preview" +] + +final_header = list(header) +for col in new_columns: + if col not in final_header: + final_header.append(col) + +# --- NEW: Iterate over the list 'all_rows' instead of the 'reader' object --- +for row in tqdm(all_rows, total=total_input_rows, desc="Processing input CSV"): + + school_id = row.get("School ID", "").strip() + task = clean_cell(row.get("Tasks", "")) # Clean task for lookup + evidence = row.get("Task Evidence", "") # Get raw evidence + + # Rule 0: Skip if School ID not in FILTER_CSV + if school_id not in valid_school_codes: + skip_school_mismatch += 1 + continue + + # Rule 1: Skip if task starts with 1 or 8 + if task.startswith("1") or task.startswith("8"): + skip_task_start += 1 + continue + + # Rule 2: Skip if evidence is empty or "null" (after cleaning for check) + cleaned_evidence = clean_cell(evidence) + if cleaned_evidence == "" or cleaned_evidence.lower() == "null": + skip_evidence_null += 1 + continue + + # Rule 3: Skip if evidence URL is not an image format + if not is_image_url(evidence): # Send the raw evidence to be checked + skip_non_image += 1 + continue + + # === Step 4: Fill additional columns & Clean District === + row["Task Evidence Question"] = lookup_dict.get(task, "Null") + row["Task evidence Q and A"] = "" + row["Task evidence Q and A Reason"] = "" + row["Relevance Tag"] = "" + row["Image Preview"] = "" + + # Apply District replacement + current_district = row.get("District", "") + row["District"] = DISTRICT_REPLACEMENTS.get(current_district, current_district) + + # Row passes all checks + filtered_rows.append([row.get(h, "") for h in final_header]) + +# === Step 5: Output - Single file or Multiple files based on configuration === +if SPLIT_FILES.lower() == "no": + # Single file output + output_file = os.path.join(OUTPUT_DIR, "preprocessed_data.csv") + with open(output_file, "w", newline='', encoding="utf-8") as outfile: + writer = csv.writer(outfile) + writer.writerow(final_header) + writer.writerows(filtered_rows) + + print(f"βœ… Created: {output_file} ({len(filtered_rows)} rows)") + print(f"Mode: Single file output") + +else: + # Split into multiple files + total_files = math.ceil(len(filtered_rows) / ROWS_PER_FILE) + + for i in range(total_files): + start_index = i * ROWS_PER_FILE + end_index = start_index + ROWS_PER_FILE + chunk = filtered_rows[start_index:end_index] + + output_file = os.path.join(OUTPUT_DIR, f"split_{i+1}.csv") + with open(output_file, "w", newline='', encoding="utf-8") as outfile: + writer = csv.writer(outfile) + writer.writerow(final_header) + writer.writerows(chunk) + + print(f"βœ… Created: {output_file} ({len(chunk)} rows)") + + print(f"Mode: Split into {total_files} files ({ROWS_PER_FILE} rows per file)") + +# === Summary === +print(f"\n{'='*70}") +print(f"{'PREPROCESSING SUMMARY':^70}") +print(f"{'='*70}") +# --- This 'total_input_rows' variable is now CORRECT --- +print(f"\nTotal CSV rows: {total_input_rows}") +print(f"\n{'Filter Stage':<50} {'Removed':<10} {'Remaining'}") +print(f"{'-'*70}") + +remaining_after_school = total_input_rows - skip_school_mismatch +print(f"{'School ID not in filter list':<50} {skip_school_mismatch:<10} {remaining_after_school}") + +remaining_after_task = remaining_after_school - skip_task_start +print(f"{'Task starts with 1 or 8':<50} {skip_task_start:<10} {remaining_after_task}") + +remaining_after_evidence = remaining_after_task - skip_evidence_null +print(f"{'Task Evidence empty or null':<50} {skip_evidence_null:<10} {remaining_after_evidence}") + +remaining_after_non_image = remaining_after_evidence - skip_non_image +print(f"{'Task Evidence is not an image (video/other)':<50} {skip_non_image:<10} {remaining_after_non_image}") + +print(f"\n{'='*70}") +print(f"Final output CSV rows: {len(filtered_rows)}") +print(f"{'='*70}") + +# === Script Checkpoints Section === +print(f"\n{'='*70}") +print(f"{'SCRIPT CHECKPOINTS':^70}") +print(f"{'='*70}") +print("This script performed the following actions:") + +print("\n--- 1. PRE-LOADING ---") +print(f"βœ… Loaded valid school codes from '{FILTER_CSV}'") +print(f"βœ… Loaded task/question map from '{QUESTION_CSV}'") +print("βœ… Defined district name replacements (e.g., 'W. Champaran' -> 'West Champaran')") + +print("\n--- 2. MAIN PROCESSING (Row-by-Row) ---") +print(f"βœ… Loaded all {total_input_rows} rows from '{INPUT_CSV}' (This is the correct count).") +print("βœ… Iterated through all rows with a progress bar.") +print("\n For EACH row, the following filters were applied (in order):") +print(" ➑️ 1. SKIPPED if 'School ID' was not in the valid school list.") +print(" ➑️ 2. SKIPPED if 'Tasks' value (after cleaning) started with '1' or '8'.") +print(" ➑️ 3. SKIPPED if 'Task Evidence' (after cleaning) was empty or 'null'.") +print(" ➑️ 4. SKIPPED if 'Task Evidence' URL was not an image (e.g., .mp4, .pdf).") + +print("\n For EACH row that PASSED all filters:") +print(" ➑️ Cleaned and matched 'Tasks' to populate 'Task Evidence Question'.") +print(" ➑️ Cleaned 'District' names (e.g., 'Kaimur (Bhabua)' -> 'Kaimur').") +print(" ➑️ Set the 'Image Preview' column to be empty.") +print(" ➑️ Kept the original 'Task Evidence' value.") +print(" ➑️ Added row to the final output list.") + +print("\n--- 3. FINAL OUTPUT ---") +print(f"βœ… Wrote {len(filtered_rows)} passed rows to the final CSV file.") +print("βœ… Printed the final summary report with skip/remaining counts.") +print(f"{'='*70}") + + + + + +# import os +# import csv +# import math +# from urllib.parse import urlparse +# from tqdm import tqdm + +# # === Configuration === +# INPUT_CSV = "/Users/user/Documents/AI/parallel-process/input/017F35E575D87A3FB5ED3D90A3E69355_20250904.csv" +# QUESTION_CSV = "/Users/user/Documents/AI/parallel-process/input/aug_sample_questions.csv" +# FILTER_CSV = "/Users/user/Documents/AI/parallel-process/input/school_list.csv" +# OUTPUT_DIR = "pre_split_csvs" + +# # === SPLIT CONFIGURATION === +# SPLIT_FILES = "no" # Set to "yes" to split into multiple files, "no" for single file +# ROWS_PER_FILE = 10000 # Only used if SPLIT_FILES = "yes" + +# # === IMAGE FORMATS === +# IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"} + +# # Create output directory +# os.makedirs(OUTPUT_DIR, exist_ok=True) + +# # Counters for skipped rows +# skip_task_start = 0 +# skip_evidence_null = 0 +# skip_school_mismatch = 0 +# skip_non_image = 0 + +# # Get total row count for progress bar +# print(f"Calculating total rows in {INPUT_CSV}...") +# try: +# with open(INPUT_CSV, 'r', encoding="utf-8") as f: +# # -1 to exclude the header row +# total_input_rows = sum(1 for _ in f) - 1 +# except FileNotFoundError: +# print(f"Error: INPUT_CSV '{INPUT_CSV}' not found.") +# exit() +# except Exception as e: +# print(f"Error reading {INPUT_CSV}: {e}") +# exit() +# print(f"Found {total_input_rows} data rows to process.") + +# # === Step 1: Load FILTER_CSV school codes into a set === +# valid_school_codes = set() +# with open(FILTER_CSV, newline='', encoding="utf-8") as f: +# reader = csv.DictReader(f) +# for row in reader: +# school_code = row.get("UDISE+ SCHOOL CODE", "").strip() +# if school_code: +# valid_school_codes.add(school_code) + +# # === Helper function for cleaning cell values === +# def clean_cell(value): +# """Strips whitespace AND common quote characters from the ends.""" +# if not isinstance(value, str): +# return "" +# # Strip whitespace, then strip both single and double quotes +# return value.strip().strip("'\"") + +# # === Helper function to check if URL is an image === +# def is_image_url(url): +# """Check if URL points to an image file""" +# url = clean_cell(url) # Clean the URL string first for *checking* +# if not url or url.lower() == "null": +# return False +# try: +# parsed = urlparse(url) +# path = parsed.path.lower() +# return any(path.endswith(ext) for ext in IMAGE_FORMATS) +# except: +# return False + +# # === Step 2: Load QUESTION_CSV into dictionary (TASK NAME β†’ Refined Question) === +# lookup_dict = {} +# with open(QUESTION_CSV, newline='', encoding="utf-8") as f: +# reader = csv.DictReader(f) +# for row in reader: +# task_name = clean_cell(row.get("TASK NAME", "")) +# refined_question = row.get("Refined questions using tool and webpage", "").strip() +# if task_name: # only add valid rows +# lookup_dict[task_name] = refined_question + +# # === District Renaming Map === +# DISTRICT_REPLACEMENTS = { +# "W. Champaran": "West Champaran", +# "E. Champaran": "East Champaran", +# "Kaimur (Bhabua)": "Kaimur", +# "Aurangabad (Bihar)": "Aurangabad" +# } + +# # === Step 3: Load INPUT_CSV and filter === +# filtered_rows = [] +# with open(INPUT_CSV, newline='', encoding="utf-8") as infile: +# reader = csv.DictReader(infile) + +# header = reader.fieldnames +# if header is None: +# print("Error: CSV Header is empty. Cannot proceed.") +# exit() + +# # Add new columns +# new_columns = [ +# "Task Evidence Question", +# "Task evidence Q and A", +# "Task evidence Q and A Reason", +# "Relevance Tag", +# "Image Preview" +# ] + +# final_header = list(header) +# for col in new_columns: +# if col not in final_header: +# final_header.append(col) + +# # Wrap the reader with tqdm for the progress bar +# for row in tqdm(reader, total=total_input_rows, desc="Processing input CSV"): + +# school_id = row.get("School ID", "").strip() +# task = clean_cell(row.get("Tasks", "")) # Clean task for lookup +# evidence = row.get("Task Evidence", "") # Get raw evidence + +# # Rule 0: Skip if School ID not in FILTER_CSV +# if school_id not in valid_school_codes: +# skip_school_mismatch += 1 +# continue + +# # Rule 1: Skip if task starts with 1 or 8 +# if task.startswith("1") or task.startswith("8"): +# skip_task_start += 1 +# continue + +# # Rule 2: Skip if evidence is empty or "null" (after cleaning for check) +# cleaned_evidence = clean_cell(evidence) +# if cleaned_evidence == "" or cleaned_evidence.lower() == "null": +# skip_evidence_null += 1 +# continue + +# # Rule 3: Skip if evidence URL is not an image format +# if not is_image_url(evidence): # Send the raw evidence to be checked +# skip_non_image += 1 +# continue + +# # === Step 4: Fill additional columns & Clean District === +# row["Task Evidence Question"] = lookup_dict.get(task, "Null") +# row["Task evidence Q and A"] = "" +# row["Task evidence Q and A Reason"] = "" +# row["Relevance Tag"] = "" +# row["Image Preview"] = "" + +# # --- NOTE: The "Task Evidence" column is NO longer overwritten --- + +# # Apply District replacement +# current_district = row.get("District", "") +# row["District"] = DISTRICT_REPLACEMENTS.get(current_district, current_district) + +# # Row passes all checks +# filtered_rows.append([row.get(h, "") for h in final_header]) + +# # === Step 5: Output - Single file or Multiple files based on configuration === +# if SPLIT_FILES.lower() == "no": +# # Single file output +# output_file = os.path.join(OUTPUT_DIR, "preprocessed_data.csv") +# with open(output_file, "w", newline='', encoding="utf-8") as outfile: +# writer = csv.writer(outfile) +# writer.writerow(final_header) +# writer.writerows(filtered_rows) + +# print(f"βœ… Created: {output_file} ({len(filtered_rows)} rows)") +# print(f"Mode: Single file output") + +# else: +# # Split into multiple files +# total_files = math.ceil(len(filtered_rows) / ROWS_PER_FILE) + +# for i in range(total_files): +# start_index = i * ROWS_PER_FILE +# end_index = start_index + ROWS_PER_FILE +# chunk = filtered_rows[start_index:end_index] + +# output_file = os.path.join(OUTPUT_DIR, f"split_{i+1}.csv") +# with open(output_file, "w", newline='', encoding="utf-8") as outfile: +# writer = csv.writer(outfile) +# writer.writerow(final_header) +# writer.writerows(chunk) + +# print(f"βœ… Created: {output_file} ({len(chunk)} rows)") + +# print(f"Mode: Split into {total_files} files ({ROWS_PER_FILE} rows per file)") + +# # === Summary === +# print(f"\n{'='*70}") +# print(f"{'PREPROCESSING SUMMARY':^70}") +# print(f"{'='*70}") +# print(f"\nTotal CSV rows: {total_input_rows}") +# print(f"\n{'Filter Stage':<50} {'Removed':<10} {'Remaining'}") +# print(f"{'-'*70}") + +# remaining_after_school = total_input_rows - skip_school_mismatch +# print(f"{'School ID not in filter list':<50} {skip_school_mismatch:<10} {remaining_after_school}") + +# remaining_after_task = remaining_after_school - skip_task_start +# print(f"{'Task starts with 1 or 8':<50} {skip_task_start:<10} {remaining_after_task}") + +# remaining_after_evidence = remaining_after_task - skip_evidence_null +# print(f"{'Task Evidence empty or null':<50} {skip_evidence_null:<10} {remaining_after_evidence}") + +# remaining_after_non_image = remaining_after_evidence - skip_non_image +# print(f"{'Task Evidence is not an image (video/other)':<50} {skip_non_image:<10} {remaining_after_non_image}") + +# print(f"\n{'='*70}") +# print(f"Final output CSV rows: {len(filtered_rows)}") +# print(f"{'='*70}") + +# # === Script Checkpoints Section === +# print(f"\n{'='*70}") +# print(f"{'SCRIPT CHECKPOINTS':^70}") +# print(f"{'='*70}") +# print("This script performed the following actions:") + +# print("\n--- 1. PRE-LOADING ---") +# print(f"βœ… Loaded valid school codes from '{FILTER_CSV}'") +# print(f"βœ… Loaded task/question map from '{QUESTION_CSV}'") +# print("βœ… Defined district name replacements (e.g., 'W. Champaran' -> 'West Champaran')") + +# print("\n--- 2. MAIN PROCESSING (Row-by-Row) ---") +# print(f"βœ… Iterated through all {total_input_rows} rows in '{INPUT_CSV}' with a progress bar.") +# print("\n For EACH row, the following filters were applied (in order):") +# print(" ➑️ 1. SKIPPED if 'School ID' was not in the valid school list.") +# print(" ➑️ 2. SKIPPED if 'Tasks' value (after cleaning) started with '1' or '8'.") +# print(" ➑️ 3. SKIPPED if 'Task Evidence' (after cleaning) was empty or 'null'.") +# print(" ➑️ 4. SKIPPED if 'Task Evidence' URL was not an image (e.g., .mp4, .pdf).") + +# print("\n For EACH row that PASSED all filters:") +# print(" ➑️ Cleaned and matched 'Tasks' to populate 'Task Evidence Question'.") +# print(" ➑️ Cleaned 'District' names (e.g., 'Kaimur (Bhabua)' -> 'Kaimur').") +# print(" ➑️ Added some extra rows to the final output list.") + +# print("\n--- 3. FINAL OUTPUT ---") +# print(f"βœ… Wrote {len(filtered_rows)} passed rows to the final CSV file.") +# print("βœ… Printed the final summary report with skip/remaining counts.") +# print(f"{'='*70}") \ No newline at end of file diff --git a/evidence-analysis-process/pre-processor/2-csv-splitter.py b/evidence-analysis-process/pre-processor/2-csv-splitter.py new file mode 100644 index 00000000..79f0bdb7 --- /dev/null +++ b/evidence-analysis-process/pre-processor/2-csv-splitter.py @@ -0,0 +1,37 @@ +import csv +import os +from math import ceil + +# Configuration +INPUT_CSV = "/Users/user/Documents/AI/parallel-process/output-pre-processor/split_1.csv" +OUTPUT_DIR = "parallel_input_split_1_files" +PARTS = 20 + +# Create output directory if it doesn't exist +os.makedirs(OUTPUT_DIR, exist_ok=True) + +print("Splitting CSV using Python script...") + +# Count total data rows (excluding header) +with open(INPUT_CSV, newline='', encoding='utf-8') as f: + reader = csv.reader(f) + header = next(reader) + rows = list(reader) + +total_rows = len(rows) +rows_per_file = ceil(total_rows / PARTS) + +# Split the CSV into parts +for i in range(PARTS): + start = i * rows_per_file + end = start + rows_per_file + output_file = os.path.join(OUTPUT_DIR, f"{i+1}.csv") + + with open(output_file, "w", newline='', encoding='utf-8') as f_out: + writer = csv.writer(f_out) + writer.writerow(header) + writer.writerows(rows[start:end]) + + print(f"Created {output_file} with {len(rows[start:end])} rows.") + +print(f"Total rows: {total_rows}. Split into {PARTS} parts.") \ No newline at end of file diff --git a/evidence-analysis-process/processor/1-main-parallel-script.py b/evidence-analysis-process/processor/1-main-parallel-script.py new file mode 100644 index 00000000..b9870ffe --- /dev/null +++ b/evidence-analysis-process/processor/1-main-parallel-script.py @@ -0,0 +1,1082 @@ +import os +import concurrent.futures +import pandas as pd +from openpyxl import load_workbook +from openpyxl.styles import Alignment +import json +import google.generativeai as genai +import httpx +import base64 +import typing_extensions as typing +import time +import mimetypes +from urllib.request import urlopen +import re +import logging +import csv +from dotenv import load_dotenv +load_dotenv() +import threading +import time +from collections import deque + +# === Constants === +IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"} +MAX_PROCESSED_ROWS = 520 +# OUTPUT_FILE = "processed_output.csv" # Not used +INPUT_DIR = "parallel_input_split_1_files" +OUTPUT_DIR = "parallel_output_split_1_files" +FINAL_OUTPUT_FILE = os.path.join(OUTPUT_DIR, "merged_output.csv") + +# Create output directory if it doesn't exist +os.makedirs(OUTPUT_DIR, exist_ok=True) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] [%(threadName)s] %(message)s", + handlers=[ + logging.FileHandler(f"{OUTPUT_DIR}/processing.log"), + logging.StreamHandler() + ] +) +logger = logging.getLogger(__name__) + +# βœ… Add this below logger setup +def validate_csv(input_file, logger): + bad_rows = [] + with open(input_file, newline='') as f: + reader = csv.reader(f) + for i, row in enumerate(reader): + try: + pass # You can add your row-level validation here + except Exception as e: + logger.warning(f"Malformed row at line {i+1}: {e}") + bad_rows.append(i+1) + return bad_rows + +def get_gemini_tokens_from_env(): + tokens = [] + for key in os.environ: + if key.startswith("GEMINI_TOKEN"): + tokens.append(os.environ[key]) + if not tokens: + logging.error("[Gemini] No Gemini tokens found in environment variables!") + return tokens + +GEMINI_TOKENS = get_gemini_tokens_from_env() + +current_token_index = 0 + +def get_next_gemini_token(): + global current_token_index + if current_token_index < len(GEMINI_TOKENS): + token = GEMINI_TOKENS[current_token_index] + logging.info(f"[Gemini] Using token: -----") #{token} + return token + return None + +def switch_to_next_token(): + global current_token_index + current_token_index += 1 + if current_token_index >= len(GEMINI_TOKENS): + logging.error("[Gemini] All tokens exhausted!") + return None + token = get_next_gemini_token() + if token: + genai.configure(api_key=token) + global model + model = genai.GenerativeModel( + model_name="gemini-2.0-flash", + generation_config={ + "response_mime_type": "application/json", + "response_schema": AnalysisResponse, + }, + ) + return token + return None + +# === Gemini Model Setup === +class AnalysisResponse(typing.TypedDict): + answers: list[str] + reasonings: list[str] + +initial_token = get_next_gemini_token() +if not initial_token: + raise ValueError("[Gemini] No valid Gemini tokens found!") + +genai.configure(api_key=initial_token) +model = genai.GenerativeModel( + model_name="gemini-2.0-flash", + generation_config={ + "response_mime_type": "application/json", + "response_schema": AnalysisResponse, + }, +) + +# === Utility functions === +def calculate_relevance_tag(answers): + if not answers or not isinstance(answers, list): + return 'Irrelevant' + yes_count = sum(1 for answer in answers if str(answer).upper() == 'YES') + total_count = len(answers) + percentage = (yes_count / total_count) * 100 if total_count > 0 else 0 + if percentage >= 50: + return 'Relevant' + elif percentage > 0: + return 'Partially Relevant' + else: + return 'Irrelevant' + +def adjust_excel_formatting(output_file): + # This function is for .xlsx, but the script now saves .csv + # It won't be called by the current logic but is harmless to keep. + try: + wb = load_workbook(output_file) + ws = wb.active + for row in ws.iter_rows(): + for cell in row: + cell.alignment = Alignment(wrap_text=True, vertical="top", horizontal="left") + for col in ws.columns: + max_length = 0 + col_letter = col[0].column_letter + for cell in col: + try: + if cell.value: + max_length = max(max_length, len(str(cell.value))) + except: + pass + ws.column_dimensions[col_letter].width = max_length + 2 + wb.save(output_file) + except Exception as e: + logging.warning(f"Could not apply Excel formatting to {output_file}: {e}") + + +def get_image_as_base64(url: str) -> str: + with urlopen(url) as response: + image_data = response.read() + mime_type, _ = mimetypes.guess_type(url) + if not mime_type: + mime_type = "image/jpeg" + base64_data = base64.b64encode(image_data).decode("utf-8") + return f"data:{mime_type};base64,{base64_data}" + +# Track timestamps of recent requests +_request_times = deque() +_request_lock = threading.Lock() +MAX_REQUESTS_PER_MINUTE = 2000 + +def rate_limiter(): + """Block until we are under the 2000 req/min limit.""" + global _request_times + with _request_lock: + now = time.time() + # Remove requests older than 60 seconds + while _request_times and now - _request_times[0] > 60: + _request_times.popleft() + + if len(_request_times) >= MAX_REQUESTS_PER_MINUTE: + sleep_time = 60 - (now - _request_times[0]) + if sleep_time > 0: + logging.info(f"[RateLimiter] Throttling for {sleep_time:.2f} seconds to stay under 2000 req/min...") + time.sleep(sleep_time) + return rate_limiter() # Recheck after sleep + + _request_times.append(time.time()) + + +def process_image(task_evidence_link, task_evidence_question, max_retries=3): + global current_token_index + retries = 0 + while retries < max_retries: + try: + rate_limiter() + image = httpx.get(task_evidence_link) + prompt = f"""You are an educational evidence validator. Analyse the given image... {task_evidence_question}""" + response = model.generate_content([ + {"mime_type": "image/jpeg", "data": base64.b64encode(image.content).decode("utf-8")}, + prompt, + ]) + response_json = json.loads(response.text) + return response_json + except Exception as e: + error_str = str(e).lower() + if any(k in error_str for k in ["rate limit", "quota", "429", "resource_exhausted"]): + logging.warning("[Gemini] Rate limit or quota exceeded. Switching token...") + if switch_to_next_token(): + continue + else: + logging.warning("[Gemini] No more tokens. Retrying in 60 seconds...") + time.sleep(60) + retries += 1 + else: + logging.error(f"[Gemini] Error: {e}") + retries += 1 + logging.error("[Gemini] Max retries reached.") + return {"error": "Max retries reached"} + + +# === Main processing === +def main(input_file, worker_id=None): + # βœ… --- Stats variables --- + api_calls = 0 + api_successes = 0 + api_failures = 0 + success_list = [] + failed_list = [] + # --- End stats --- + + try: + logging.info(f"[Worker {worker_id}] Starting processing for {input_file}") + + if not os.path.exists(input_file): + logging.error(f"[Worker {worker_id}] File not found: {input_file}") + return None + + df = pd.read_excel(input_file) if input_file.endswith(".xlsx") else pd.read_csv(input_file) + df_filtered = df[ + ~df["Task Evidence"].isin([None, "Null"]) + & ~df["Task Evidence Question"].isin([None, "Null"]) + ].dropna(subset=["Task Evidence", "Task Evidence Question"]) + + processed_count = 0 + task_evidence_qa = [] + task_evidence_qa_reason = [] + relevance_tags = [] + + for idx, row in df_filtered.iterrows(): + task_evidence = str(row["Task Evidence"]).strip() + task_question = str(row["Task Evidence Question"]).strip() + + if any(task_evidence.lower().endswith(ext) for ext in IMAGE_FORMATS): + logging.info(f"[Worker {worker_id}] Processing image row {idx+1}/{len(df_filtered)}") + + api_calls += 1 # βœ… Track API call attempt + response = process_image(task_evidence, task_question) + + if isinstance(response, dict) and "answers" in response and "reasonings" in response: + answers = response["answers"] + reasonings = response["reasonings"] + task_evidence_qa.append(answers) + task_evidence_qa_reason.append(reasonings) + relevance_tags.append(calculate_relevance_tag(answers)) + + api_successes += 1 # βœ… Track success + success_list.append(task_evidence) # βœ… Add to success list + else: + logging.warning(f"[Worker {worker_id}] Invalid response at row {idx+1}") + task_evidence_qa.append(None) + task_evidence_qa_reason.append(None) + relevance_tags.append('Irrelevant') + + api_failures += 1 # βœ… Track failure + failed_list.append(task_evidence) # βœ… Add to failed list + else: + logging.info(f"[Worker {worker_id}] Skipping non-image row {idx+1}") + task_evidence_qa.append(None) + task_evidence_qa_reason.append(None) + relevance_tags.append('Irrelevant') + + processed_count += 1 + if processed_count >= MAX_PROCESSED_ROWS: + logging.info(f"[Worker {worker_id}] Reached max processed rows ({MAX_PROCESSED_ROWS})") + break + + df_filtered = df_filtered.head(processed_count) + df_filtered["Task evidence Q and A"] = task_evidence_qa + df_filtered["Task evidence Q and A Reason"] = task_evidence_qa_reason + df_filtered["Relevance Tag"] = relevance_tags + + df_filtered["Image Preview"] = df_filtered["Task Evidence"].apply( + lambda x: str(x) if str(x).lower().endswith(tuple(IMAGE_FORMATS)) else "" + ) + + output_filename = os.path.join(OUTPUT_DIR, f"processed_{os.path.basename(input_file).split('.')[0]}.csv") + df_filtered.to_csv(output_filename, index=False) + + logging.info(f"[Worker {worker_id}] Finished processing {input_file}. Output: {output_filename}") + + # βœ… Return the dictionary of stats + return { + "output_file": output_filename, + "rows_attempted": processed_count, + "api_calls": api_calls, + "api_successes": api_successes, + "api_failures": api_failures, + "success_list": success_list, + "failed_list": failed_list + } + + except Exception as e: + logging.exception(f"[Worker {worker_id}] Failed to process {input_file}: {e}") + return None + + +def process_file_parallel(file_path, worker_id): + os.makedirs(OUTPUT_DIR, exist_ok=True) + output_stats = main(file_path, worker_id) # βœ… Get stats dictionary + if output_stats: + logging.info(f"[Worker {worker_id}] Output saved as {output_stats['output_file']}") + else: + logging.warning(f"[Worker {worker_id}] Processing failed for {file_path}") + return output_stats # βœ… Return the entire stats dictionary (or None) + +# === Entry point === +if __name__ == "__main__": + input_files = [ + os.path.join(INPUT_DIR, file) + for file in os.listdir(INPUT_DIR) + if file.endswith((".xlsx", ".csv")) + ] + + logging.info(f"[Main] Found {len(input_files)} input files to process.") + + # βœ… --- Global Stats Aggregators --- + total_rows_processed_all = 0 + total_api_calls_all = 0 + total_api_success_all = 0 + total_api_failure_all = 0 + all_success_lists = [] + all_failed_lists = [] + processed_files = [] # List of successful output file paths + failed_files = [] # List of input files that failed to process + # --- End Aggregators --- + + with concurrent.futures.ThreadPoolExecutor(max_workers=len(input_files)) as executor: + futures = { + executor.submit(process_file_parallel, f, idx + 1): f + for idx, f in enumerate(input_files) + } + for future in concurrent.futures.as_completed(futures): + original_file = futures[future] + result_stats = future.result() + + if result_stats: # βœ… Check if processing was successful + processed_files.append(result_stats["output_file"]) + total_rows_processed_all += result_stats["rows_attempted"] + total_api_calls_all += result_stats["api_calls"] + total_api_success_all += result_stats["api_successes"] + total_api_failure_all += result_stats["api_failures"] + all_success_lists.extend(result_stats["success_list"]) + all_failed_lists.extend(result_stats["failed_list"]) + logging.info(f"[Main] Worker finished processing: {original_file}") + else: + logging.warning(f"[Main] File {original_file} failed to process.") + failed_files.append(original_file) + + if not processed_files: + logging.error("[Main] No files processed successfully. Exiting.") + # βœ… Still log the summary even if exiting + else: + try: + logging.info(f"[Main] Merging {len(processed_files)} files into {FINAL_OUTPUT_FILE}") + merged_df = pd.concat([pd.read_csv(f) for f in processed_files], ignore_index=True) + merged_df.to_csv(FINAL_OUTPUT_FILE, index=False) + logging.info(f"βœ… All files processed and merged into: {FINAL_OUTPUT_FILE}") + except Exception as e: + logging.exception(f"[Main] Error during merging: {e}") + + # βœ… --- Log the Final Summary --- + try: + logging.info("="*80) + logging.info("===== πŸš€ PROCESSING RUN SUMMARY =====") + logging.info("="*80) + + logging.info(f"Total Rows Processed (sum of attempts): {total_rows_processed_all}") + logging.info(f"Total API Calls (image rows attempted): {total_api_calls_all}") + logging.info(f" - βœ… Success: {total_api_success_all}") + logging.info(f" - ❌ Failed: {total_api_failure_all}") + + logging.info("") + logging.info(f"Total Input Files Processed Successfully: {len(processed_files)}") + logging.info(f"Total Input Files Failed to Process: {len(failed_files)}") + if failed_files: + logging.warning("Failed Input Files:") + for f in failed_files: + logging.warning(f" - {f}") + + logging.info("") + if all_failed_lists: + logging.warning(f"List of Failed API Calls ({len(all_failed_lists)}):") + for item in all_failed_lists: + logging.warning(f" - {item}") + else: + logging.info("βœ… No API call failures recorded.") + + if all_success_lists: + logging.info(f"List of Successful API Calls ({len(all_success_lists)}):") + for item in all_success_lists: + logging.info(f" - {item}") + else: + logging.info("No API call successes recorded.") + + logging.info("="*80) + logging.info("===== 🏁 END OF SUMMARY =====") + logging.info("="*80 + "\n") + except Exception as e: + logging.exception(f"[Main] Failed to write summary to log: {e}") + + + + + +# import os +# import concurrent.futures +# import pandas as pd +# from openpyxl import load_workbook +# from openpyxl.styles import Alignment +# import json +# import google.generativeai as genai +# import httpx +# import base64 +# import typing_extensions as typing +# import time +# import mimetypes +# from urllib.request import urlopen +# import re +# import logging +# import csv +# from dotenv import load_dotenv +# load_dotenv() +# import threading +# import time +# from collections import deque + +# # === Constants === +# IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"} +# MAX_PROCESSED_ROWS = 1 +# # OUTPUT_FILE = "processed_output.csv" +# INPUT_DIR = "parallel_input_split_files" +# OUTPUT_DIR = "parallel_output_split_files" +# FINAL_OUTPUT_FILE = os.path.join(OUTPUT_DIR, "merged_output.csv") + +# # Create output directory if it doesn't exist +# os.makedirs(OUTPUT_DIR, exist_ok=True) + +# # Configure logging +# logging.basicConfig( +# level=logging.INFO, +# format="%(asctime)s [%(levelname)s] [%(threadName)s] %(message)s", +# handlers=[ +# logging.FileHandler("parallel_output_split_files/processing.log"), +# logging.StreamHandler() +# ] +# ) +# logger = logging.getLogger(__name__) + +# # βœ… Add this below logger setup +# def validate_csv(input_file, logger): +# bad_rows = [] +# with open(input_file, newline='') as f: +# reader = csv.reader(f) +# for i, row in enumerate(reader): +# try: +# pass # You can add your row-level validation here +# except Exception as e: +# logger.warning(f"Malformed row at line {i+1}: {e}") +# bad_rows.append(i+1) +# return bad_rows + +# def get_gemini_tokens_from_env(): +# tokens = [] +# for key in os.environ: +# if key.startswith("GEMINI_TOKEN"): +# tokens.append(os.environ[key]) +# if not tokens: +# logging.error("[Gemini] No Gemini tokens found in environment variables!") +# return tokens + +# GEMINI_TOKENS = get_gemini_tokens_from_env() + +# current_token_index = 0 + +# def get_next_gemini_token(): +# global current_token_index +# if current_token_index < len(GEMINI_TOKENS): +# token = GEMINI_TOKENS[current_token_index] +# logging.info(f"[Gemini] Using token: -----") #{token} +# return token +# return None + +# def switch_to_next_token(): +# global current_token_index +# current_token_index += 1 +# if current_token_index >= len(GEMINI_TOKENS): +# logging.error("[Gemini] All tokens exhausted!") +# return None +# token = get_next_gemini_token() +# if token: +# genai.configure(api_key=token) +# global model +# model = genai.GenerativeModel( +# model_name="gemini-2.0-flash", +# generation_config={ +# "response_mime_type": "application/json", +# "response_schema": AnalysisResponse, +# }, +# ) +# return token +# return None + +# # === Gemini Model Setup === +# class AnalysisResponse(typing.TypedDict): +# answers: list[str] +# reasonings: list[str] + +# initial_token = get_next_gemini_token() +# if not initial_token: +# raise ValueError("[Gemini] No valid Gemini tokens found!") + +# genai.configure(api_key=initial_token) +# model = genai.GenerativeModel( +# model_name="gemini-2.0-flash", +# generation_config={ +# "response_mime_type": "application/json", +# "response_schema": AnalysisResponse, +# }, +# ) + +# # === Utility functions === +# def calculate_relevance_tag(answers): +# if not answers or not isinstance(answers, list): +# return 'Irrelevant' +# yes_count = sum(1 for answer in answers if str(answer).upper() == 'YES') +# total_count = len(answers) +# percentage = (yes_count / total_count) * 100 if total_count > 0 else 0 +# if percentage >= 50: +# return 'Relevant' +# elif percentage > 0: +# return 'Partially Relevant' +# else: +# return 'Irrelevant' + +# def adjust_excel_formatting(output_file): +# wb = load_workbook(output_file) +# ws = wb.active +# for row in ws.iter_rows(): +# for cell in row: +# cell.alignment = Alignment(wrap_text=True, vertical="top", horizontal="left") +# for col in ws.columns: +# max_length = 0 +# col_letter = col[0].column_letter +# for cell in col: +# try: +# if cell.value: +# max_length = max(max_length, len(str(cell.value))) +# except: +# pass +# ws.column_dimensions[col_letter].width = max_length + 2 +# wb.save(output_file) + +# def get_image_as_base64(url: str) -> str: +# with urlopen(url) as response: +# image_data = response.read() +# mime_type, _ = mimetypes.guess_type(url) +# if not mime_type: +# mime_type = "image/jpeg" +# base64_data = base64.b64encode(image_data).decode("utf-8") +# return f"data:{mime_type};base64,{base64_data}" + +# # Track timestamps of recent requests +# _request_times = deque() +# _request_lock = threading.Lock() +# MAX_REQUESTS_PER_MINUTE = 2000 + +# def rate_limiter(): +# """Block until we are under the 2000 req/min limit.""" +# global _request_times +# with _request_lock: +# now = time.time() +# # Remove requests older than 60 seconds +# while _request_times and now - _request_times[0] > 60: +# _request_times.popleft() + +# if len(_request_times) >= MAX_REQUESTS_PER_MINUTE: +# sleep_time = 60 - (now - _request_times[0]) +# if sleep_time > 0: +# logging.info(f"[RateLimiter] Throttling for {sleep_time:.2f} seconds to stay under 2000 req/min...") +# time.sleep(sleep_time) +# return rate_limiter() # Recheck after sleep + +# _request_times.append(time.time()) + + +# def process_image(task_evidence_link, task_evidence_question, max_retries=3): +# global current_token_index +# retries = 0 +# while retries < max_retries: +# try: +# rate_limiter() +# image = httpx.get(task_evidence_link) +# prompt = f"""You are an educational evidence validator. Analyse the given image... {task_evidence_question}""" +# response = model.generate_content([ +# {"mime_type": "image/jpeg", "data": base64.b64encode(image.content).decode("utf-8")}, +# prompt, +# ]) +# response_json = json.loads(response.text) +# return response_json +# except Exception as e: +# error_str = str(e).lower() +# if any(k in error_str for k in ["rate limit", "quota", "429", "resource_exhausted"]): +# logging.warning("[Gemini] Rate limit or quota exceeded. Switching token...") +# if switch_to_next_token(): +# continue +# else: +# logging.warning("[Gemini] No more tokens. Retrying in 60 seconds...") +# time.sleep(60) +# retries += 1 +# else: +# logging.error(f"[Gemini] Error: {e}") +# retries += 1 +# logging.error("[Gemini] Max retries reached.") +# return {"error": "Max retries reached"} + + +# # === Main processing === +# def main(input_file, worker_id=None): +# try: +# logging.info(f"[Worker {worker_id}] Starting processing for {input_file}") + +# if not os.path.exists(input_file): +# logging.error(f"[Worker {worker_id}] File not found: {input_file}") +# return None + +# df = pd.read_excel(input_file) if input_file.endswith(".xlsx") else pd.read_csv(input_file) +# df_filtered = df[ +# ~df["Task Evidence"].isin([None, "Null"]) +# & ~df["Task Evidence Question"].isin([None, "Null"]) +# ].dropna(subset=["Task Evidence", "Task Evidence Question"]) + +# processed_count = 0 +# task_evidence_qa = [] +# task_evidence_qa_reason = [] +# relevance_tags = [] + +# for idx, row in df_filtered.iterrows(): +# task_evidence = str(row["Task Evidence"]).strip() +# task_question = str(row["Task Evidence Question"]).strip() + +# if any(task_evidence.lower().endswith(ext) for ext in IMAGE_FORMATS): +# logging.info(f"[Worker {worker_id}] Processing image row {idx+1}/{len(df_filtered)}") +# response = process_image(task_evidence, task_question) +# if isinstance(response, dict) and "answers" in response and "reasonings" in response: +# answers = response["answers"] +# reasonings = response["reasonings"] +# task_evidence_qa.append(answers) +# task_evidence_qa_reason.append(reasonings) +# relevance_tags.append(calculate_relevance_tag(answers)) +# else: +# logging.warning(f"[Worker {worker_id}] Invalid response at row {idx+1}") +# task_evidence_qa.append(None) +# task_evidence_qa_reason.append(None) +# relevance_tags.append('Irrelevant') +# else: +# logging.info(f"[Worker {worker_id}] Skipping non-image row {idx+1}") +# task_evidence_qa.append(None) +# task_evidence_qa_reason.append(None) +# relevance_tags.append('Irrelevant') + +# processed_count += 1 +# if processed_count >= MAX_PROCESSED_ROWS: +# logging.info(f"[Worker {worker_id}] Reached max processed rows ({MAX_PROCESSED_ROWS})") +# break + +# df_filtered = df_filtered.head(processed_count) +# df_filtered["Task evidence Q and A"] = task_evidence_qa +# df_filtered["Task evidence Q and A Reason"] = task_evidence_qa_reason +# df_filtered["Relevance Tag"] = relevance_tags + +# # βœ… Remove IMAGE() formula for CSV - it's Excel-specific +# df_filtered["Image Preview"] = df_filtered["Task Evidence"].apply( +# lambda x: str(x) if str(x).lower().endswith(tuple(IMAGE_FORMATS)) else "" +# ) + +# # βœ… Changed to save as CSV instead of XLSX +# output_filename = os.path.join(OUTPUT_DIR, f"processed_{os.path.basename(input_file).split('.')[0]}.csv") +# df_filtered.to_csv(output_filename, index=False) + +# logging.info(f"[Worker {worker_id}] Finished processing {input_file}. Output: {output_filename}") + +# return output_filename # βœ… Return the output file path + +# except Exception as e: +# logging.exception(f"[Worker {worker_id}] Failed to process {input_file}: {e}") +# return None + + +# def process_file_parallel(file_path, worker_id): +# os.makedirs(OUTPUT_DIR, exist_ok=True) +# output_filename = main(file_path, worker_id) # βœ… Get return from main +# if output_filename: +# logging.info(f"[Worker {worker_id}] Output saved as {output_filename}") +# else: +# logging.warning(f"[Worker {worker_id}] Processing failed for {file_path}") +# return output_filename # βœ… Always return, even if None + +# # === Entry point === +# if __name__ == "__main__": +# input_files = [ +# os.path.join(INPUT_DIR, file) +# for file in os.listdir(INPUT_DIR) +# if file.endswith((".xlsx", ".csv")) +# ] + +# logging.info(f"[Main] Found {len(input_files)} input files to process.") + +# processed_files = [] +# with concurrent.futures.ThreadPoolExecutor(max_workers=len(input_files)) as executor: +# futures = { +# executor.submit(process_file_parallel, f, idx + 1): f +# for idx, f in enumerate(input_files) +# } +# for future in concurrent.futures.as_completed(futures): +# result_file = future.result() +# if result_file: # βœ… Skip failed results +# processed_files.append(result_file) +# logging.info(f"[Main] Worker finished: {result_file}") +# else: +# logging.warning(f"[Main] A file failed to process.") + +# if not processed_files: +# logging.error("[Main] No files processed successfully. Exiting.") +# exit(1) + +# try: +# logging.info(f"[Main] Merging {len(processed_files)} files into {FINAL_OUTPUT_FILE}") +# # βœ… Changed to read CSV files instead of Excel files +# merged_df = pd.concat([pd.read_csv(f) for f in processed_files], ignore_index=True) +# # βœ… Changed to save merged output as CSV +# merged_df.to_csv(FINAL_OUTPUT_FILE, index=False) +# logging.info(f"βœ… All files processed and merged into: {FINAL_OUTPUT_FILE}") +# except Exception as e: +# logging.exception(f"[Main] Error during merging: {e}") + + + +# import os +# import concurrent.futures +# import pandas as pd +# from openpyxl import load_workbook +# from openpyxl.styles import Alignment +# import json +# import google.generativeai as genai +# import httpx +# import base64 +# import typing_extensions as typing +# import time +# import mimetypes +# from urllib.request import urlopen +# import re +# import logging +# import csv +# from dotenv import load_dotenv +# load_dotenv() +# import threading +# import time +# from collections import deque + +# # Configure logging +# logging.basicConfig( +# level=logging.INFO, +# format="%(asctime)s [%(levelname)s] [%(threadName)s] %(message)s", +# handlers=[ +# logging.FileHandler("parallel_output_split_files/processing.log"), +# logging.StreamHandler() +# ] +# ) +# logger = logging.getLogger(__name__) + +# # βœ… Add this below logger setup +# def validate_csv(input_file, logger): +# bad_rows = [] +# with open(input_file, newline='') as f: +# reader = csv.reader(f) +# for i, row in enumerate(reader): +# try: +# pass # You can add your row-level validation here +# except Exception as e: +# logger.warning(f"Malformed row at line {i+1}: {e}") +# bad_rows.append(i+1) +# return bad_rows + +# # === Constants === +# IMAGE_FORMATS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"} +# MAX_PROCESSED_ROWS = 1 +# OUTPUT_FILE = "processed_output.csv" +# INPUT_DIR = "parallel_input_split_files" +# OUTPUT_DIR = "parallel_output_split_files" +# FINAL_OUTPUT_FILE = os.path.join(OUTPUT_DIR, "merged_output.csv") + +# def get_gemini_tokens_from_env(): +# tokens = [] +# for key in os.environ: +# if key.startswith("GEMINI_TOKEN"): +# tokens.append(os.environ[key]) +# if not tokens: +# logging.error("[Gemini] No Gemini tokens found in environment variables!") +# return tokens + +# GEMINI_TOKENS = get_gemini_tokens_from_env() + +# current_token_index = 0 + +# def get_next_gemini_token(): +# global current_token_index +# if current_token_index < len(GEMINI_TOKENS): +# token = GEMINI_TOKENS[current_token_index] +# logging.info(f"[Gemini] Using token: -----") #{token} +# return token +# return None + +# def switch_to_next_token(): +# global current_token_index +# current_token_index += 1 +# if current_token_index >= len(GEMINI_TOKENS): +# logging.error("[Gemini] All tokens exhausted!") +# return None +# token = get_next_gemini_token() +# if token: +# genai.configure(api_key=token) +# global model +# model = genai.GenerativeModel( +# model_name="gemini-2.0-flash", +# generation_config={ +# "response_mime_type": "application/json", +# "response_schema": AnalysisResponse, +# }, +# ) +# return token +# return None + +# # === Gemini Model Setup === +# class AnalysisResponse(typing.TypedDict): +# answers: list[str] +# reasonings: list[str] + +# initial_token = get_next_gemini_token() +# if not initial_token: +# raise ValueError("[Gemini] No valid Gemini tokens found!") + +# genai.configure(api_key=initial_token) +# model = genai.GenerativeModel( +# model_name="gemini-2.0-flash", +# generation_config={ +# "response_mime_type": "application/json", +# "response_schema": AnalysisResponse, +# }, +# ) + +# # === Utility functions === +# def calculate_relevance_tag(answers): +# if not answers or not isinstance(answers, list): +# return 'Irrelevant' +# yes_count = sum(1 for answer in answers if str(answer).upper() == 'YES') +# total_count = len(answers) +# percentage = (yes_count / total_count) * 100 if total_count > 0 else 0 +# if percentage >= 50: +# return 'Relevant' +# elif percentage > 0: +# return 'Partially Relevant' +# else: +# return 'Irrelevant' + +# def adjust_excel_formatting(output_file): +# wb = load_workbook(output_file) +# ws = wb.active +# for row in ws.iter_rows(): +# for cell in row: +# cell.alignment = Alignment(wrap_text=True, vertical="top", horizontal="left") +# for col in ws.columns: +# max_length = 0 +# col_letter = col[0].column_letter +# for cell in col: +# try: +# if cell.value: +# max_length = max(max_length, len(str(cell.value))) +# except: +# pass +# ws.column_dimensions[col_letter].width = max_length + 2 +# wb.save(output_file) + +# def get_image_as_base64(url: str) -> str: +# with urlopen(url) as response: +# image_data = response.read() +# mime_type, _ = mimetypes.guess_type(url) +# if not mime_type: +# mime_type = "image/jpeg" +# base64_data = base64.b64encode(image_data).decode("utf-8") +# return f"data:{mime_type};base64,{base64_data}" + +# # Track timestamps of recent requests +# _request_times = deque() +# _request_lock = threading.Lock() +# MAX_REQUESTS_PER_MINUTE = 2000 + +# def rate_limiter(): +# """Block until we are under the 2000 req/min limit.""" +# global _request_times +# with _request_lock: +# now = time.time() +# # Remove requests older than 60 seconds +# while _request_times and now - _request_times[0] > 60: +# _request_times.popleft() + +# if len(_request_times) >= MAX_REQUESTS_PER_MINUTE: +# sleep_time = 60 - (now - _request_times[0]) +# if sleep_time > 0: +# logging.info(f"[RateLimiter] Throttling for {sleep_time:.2f} seconds to stay under 2000 req/min...") +# time.sleep(sleep_time) +# return rate_limiter() # Recheck after sleep + +# _request_times.append(time.time()) + + +# def process_image(task_evidence_link, task_evidence_question, max_retries=3): +# global current_token_index +# retries = 0 +# while retries < max_retries: +# try: +# rate_limiter() +# image = httpx.get(task_evidence_link) +# prompt = f"""You are an educational evidence validator. Analyse the given image... {task_evidence_question}""" +# response = model.generate_content([ +# {"mime_type": "image/jpeg", "data": base64.b64encode(image.content).decode("utf-8")}, +# prompt, +# ]) +# response_json = json.loads(response.text) +# return response_json +# except Exception as e: +# error_str = str(e).lower() +# if any(k in error_str for k in ["rate limit", "quota", "429", "resource_exhausted"]): +# logging.warning("[Gemini] Rate limit or quota exceeded. Switching token...") +# if switch_to_next_token(): +# continue +# else: +# logging.warning("[Gemini] No more tokens. Retrying in 60 seconds...") +# time.sleep(60) +# retries += 1 +# else: +# logging.error(f"[Gemini] Error: {e}") +# retries += 1 +# logging.error("[Gemini] Max retries reached.") +# return {"error": "Max retries reached"} + + +# # === Main processing === +# def main(input_file, worker_id=None): +# try: +# logging.info(f"[Worker {worker_id}] Starting processing for {input_file}") + +# if not os.path.exists(input_file): +# logging.error(f"[Worker {worker_id}] File not found: {input_file}") +# return None + +# df = pd.read_excel(input_file) if input_file.endswith(".xlsx") else pd.read_csv(input_file) +# df_filtered = df[ +# ~df["Task Evidence"].isin([None, "Null"]) +# & ~df["Task Evidence Question"].isin([None, "Null"]) +# ].dropna(subset=["Task Evidence", "Task Evidence Question"]) + +# processed_count = 0 +# task_evidence_qa = [] +# task_evidence_qa_reason = [] +# relevance_tags = [] + +# for idx, row in df_filtered.iterrows(): +# task_evidence = str(row["Task Evidence"]).strip() +# task_question = str(row["Task Evidence Question"]).strip() + +# if any(task_evidence.lower().endswith(ext) for ext in IMAGE_FORMATS): +# logging.info(f"[Worker {worker_id}] Processing image row {idx+1}/{len(df_filtered)}") +# response = process_image(task_evidence, task_question) +# if isinstance(response, dict) and "answers" in response and "reasonings" in response: +# answers = response["answers"] +# reasonings = response["reasonings"] +# task_evidence_qa.append(answers) +# task_evidence_qa_reason.append(reasonings) +# relevance_tags.append(calculate_relevance_tag(answers)) +# else: +# logging.warning(f"[Worker {worker_id}] Invalid response at row {idx+1}") +# task_evidence_qa.append(None) +# task_evidence_qa_reason.append(None) +# relevance_tags.append('Irrelevant') +# else: +# logging.info(f"[Worker {worker_id}] Skipping non-image row {idx+1}") +# task_evidence_qa.append(None) +# task_evidence_qa_reason.append(None) +# relevance_tags.append('Irrelevant') + +# processed_count += 1 +# if processed_count >= MAX_PROCESSED_ROWS: +# logging.info(f"[Worker {worker_id}] Reached max processed rows ({MAX_PROCESSED_ROWS})") +# break + +# df_filtered = df_filtered.head(processed_count) +# df_filtered["Task evidence Q and A"] = task_evidence_qa +# df_filtered["Task evidence Q and A Reason"] = task_evidence_qa_reason +# df_filtered["Relevance Tag"] = relevance_tags + +# # βœ… Remove IMAGE() formula for CSV - it's Excel-specific +# df_filtered["Image Preview"] = df_filtered["Task Evidence"].apply( +# lambda x: str(x) if str(x).lower().endswith(tuple(IMAGE_FORMATS)) else "" +# ) + +# # βœ… Changed to save as CSV instead of XLSX +# output_filename = os.path.join(OUTPUT_DIR, f"processed_{os.path.basename(input_file).split('.')[0]}.csv") +# df_filtered.to_csv(output_filename, index=False) + +# logging.info(f"[Worker {worker_id}] Finished processing {input_file}. Output: {output_filename}") + +# return output_filename # βœ… Return the output file path + +# except Exception as e: +# logging.exception(f"[Worker {worker_id}] Failed to process {input_file}: {e}") +# return None + + +# def process_file_parallel(file_path, worker_id): +# os.makedirs(OUTPUT_DIR, exist_ok=True) +# output_filename = main(file_path, worker_id) # βœ… Get return from main +# if output_filename: +# logging.info(f"[Worker {worker_id}] Output saved as {output_filename}") +# else: +# logging.warning(f"[Worker {worker_id}] Processing failed for {file_path}") +# return output_filename # βœ… Always return, even if None + +# # === Entry point === +# if __name__ == "__main__": +# input_files = [ +# os.path.join(INPUT_DIR, file) +# for file in os.listdir(INPUT_DIR) +# if file.endswith((".xlsx", ".csv")) +# ] + +# logging.info(f"[Main] Found {len(input_files)} input files to process.") + +# processed_files = [] +# with concurrent.futures.ThreadPoolExecutor(max_workers=len(input_files)) as executor: +# futures = { +# executor.submit(process_file_parallel, f, idx + 1): f +# for idx, f in enumerate(input_files) +# } +# for future in concurrent.futures.as_completed(futures): +# result_file = future.result() +# if result_file: # βœ… Skip failed results +# processed_files.append(result_file) +# logging.info(f"[Main] Worker finished: {result_file}") +# else: +# logging.warning(f"[Main] A file failed to process.") + +# if not processed_files: +# logging.error("[Main] No files processed successfully. Exiting.") +# exit(1) + +# try: +# logging.info(f"[Main] Merging {len(processed_files)} files into {FINAL_OUTPUT_FILE}") +# # βœ… Changed to read CSV files instead of Excel files +# merged_df = pd.concat([pd.read_csv(f) for f in processed_files], ignore_index=True) +# # βœ… Changed to save merged output as CSV +# merged_df.to_csv(FINAL_OUTPUT_FILE, index=False) +# logging.info(f"βœ… All files processed and merged into: {FINAL_OUTPUT_FILE}") +# except Exception as e: +# logging.exception(f"[Main] Error during merging: {e}") \ No newline at end of file diff --git a/evidence-analysis-process/processor/2-merge-processed-csv.py b/evidence-analysis-process/processor/2-merge-processed-csv.py new file mode 100644 index 00000000..32663c38 --- /dev/null +++ b/evidence-analysis-process/processor/2-merge-processed-csv.py @@ -0,0 +1,131 @@ +import os +import pandas as pd +from pathlib import Path + +# ==== CONFIG ==== +INPUT_DIR = "/Users/user/Documents/AI/parallel-process/output/MAIN-SPLITS" # Folder containing CSV files to merge +OUTPUT_FILE = "merged_output.csv" # Name of the merged output file +SORT_FILES = True # Set to True to sort files before merging (useful for numbered files) + +# ==== DUPLICATE HANDLING ==== +# Options: "keep_all", "remove_duplicates", "remove_duplicates_keep_first", "remove_duplicates_keep_last" +DUPLICATE_HANDLING = "keep_all" # Change this to control how duplicates are handled + +# ==== STEP 1: Find all CSV files ==== +print("πŸ” Searching for CSV files...") +csv_files = [ + os.path.join(INPUT_DIR, f) + for f in os.listdir(INPUT_DIR) + if f.endswith('.csv') +] + +if not csv_files: + print(f"❌ No CSV files found in '{INPUT_DIR}'") + exit(1) + +# Sort files if enabled (useful for files like 1.csv, 2.csv, etc.) +if SORT_FILES: + csv_files.sort(key=lambda x: int(''.join(filter(str.isdigit, os.path.basename(x)))) if any(c.isdigit() for c in os.path.basename(x)) else os.path.basename(x)) + +print(f"βœ… Found {len(csv_files)} CSV files") + +# ==== STEP 2: Read and merge CSVs ==== +print("\nπŸ“Š Reading CSV files...") +dataframes = [] +file_stats = [] + +for idx, csv_file in enumerate(csv_files, 1): + try: + df = pd.read_csv(csv_file) + rows = len(df) + dataframes.append(df) + file_stats.append({ + 'file': os.path.basename(csv_file), + 'rows': rows, + 'columns': len(df.columns) + }) + print(f" [{idx}/{len(csv_files)}] {os.path.basename(csv_file)}: {rows} rows, {len(df.columns)} columns") + except Exception as e: + print(f" ⚠️ Error reading {csv_file}: {e}") + +if not dataframes: + print("❌ No CSV files could be read successfully") + exit(1) + +# ==== STEP 3: Merge all dataframes ==== +print("\nπŸ”— Merging CSV files...") +merged_df = pd.concat(dataframes, ignore_index=True) + +# ==== STEP 4: Handle Duplicates ==== +original_row_count = len(merged_df) +duplicates_before = merged_df.duplicated().sum() + +if DUPLICATE_HANDLING == "remove_duplicates": + merged_df = merged_df.drop_duplicates(keep=False) + print(f" ℹ️ Removed ALL duplicate rows (both original and copies)") +elif DUPLICATE_HANDLING == "remove_duplicates_keep_first": + merged_df = merged_df.drop_duplicates(keep='first') + print(f" ℹ️ Removed duplicate rows (kept first occurrence)") +elif DUPLICATE_HANDLING == "remove_duplicates_keep_last": + merged_df = merged_df.drop_duplicates(keep='last') + print(f" ℹ️ Removed duplicate rows (kept last occurrence)") +else: # keep_all + print(f" ℹ️ Keeping all rows including duplicates") + +rows_removed = original_row_count - len(merged_df) + +# ==== STEP 5: Save merged CSV ==== +output_path = os.path.join(INPUT_DIR, OUTPUT_FILE) +merged_df.to_csv(output_path, index=False) +output_size = os.path.getsize(output_path) / 1024 # Size in KB + +print(f"πŸ’Ύ Merged CSV saved to: {output_path}") + +# ==== STEP 6: Generate Merge Report ==== +print("\n" + "="*80) +print(" MERGE REPORT ".center(80, "=")) +print("="*80) + +print(f"\nπŸ“ INPUT:") +print(f" β€’ Source directory: {INPUT_DIR}") +print(f" β€’ Files merged: {len(dataframes)}") + +print(f"\nπŸ“Š FILE DETAILS:") +for stat in file_stats: + print(f" β€’ {stat['file']}: {stat['rows']} rows, {stat['columns']} columns") + +print(f"\nπŸ“ˆ MERGED DATA:") +print(f" β€’ Total rows before dedup: {original_row_count}") +print(f" β€’ Duplicate rows found: {duplicates_before}") +print(f" β€’ Rows removed: {rows_removed}") +print(f" β€’ Final row count: {len(merged_df)}") +print(f" β€’ Total columns: {len(merged_df.columns)}") +print(f" β€’ File size: {output_size:.2f} KB") + +print(f"\nπŸ”§ DUPLICATE HANDLING:") +print(f" β€’ Method: {DUPLICATE_HANDLING}") +if duplicates_before > 0: + if DUPLICATE_HANDLING == "keep_all": + print(f" β€’ βœ… All {duplicates_before} duplicate rows were kept in output") + else: + print(f" β€’ βœ… {rows_removed} duplicate rows were removed") +else: + print(f" β€’ βœ… No duplicate rows detected") + +# Show column names +print(f"\nπŸ“‹ COLUMNS ({len(merged_df.columns)}):") +if len(merged_df.columns) <= 10: + for col in merged_df.columns: + print(f" β€’ {col}") +else: + for col in list(merged_df.columns[:5]): + print(f" β€’ {col}") + print(f" ... and {len(merged_df.columns) - 5} more columns") + +print(f"\nπŸ’Ύ OUTPUT:") +print(f" β€’ File: {output_path}") +print(f" β€’ Size: {output_size:.2f} KB") + +print("\n" + "="*80) +print(" MERGE COMPLETED SUCCESSFULLY ".center(80, "=")) +print("="*80 + "\n") \ No newline at end of file diff --git a/evidence-analysis-process/processor/3-invalid-url-&-custom-task-remover.py b/evidence-analysis-process/processor/3-invalid-url-&-custom-task-remover.py new file mode 100644 index 00000000..a9f2a476 --- /dev/null +++ b/evidence-analysis-process/processor/3-invalid-url-&-custom-task-remover.py @@ -0,0 +1,132 @@ +import os +import pandas as pd +import re +from pathlib import Path + +# ==== CONFIG ==== +INPUT_CSV = "/Users/user/Documents/AI/output/TEST/merged_output.csv" +OUTPUT_DIR = "/Users/user/Documents/AI/output/TEST" +OUTPUT_FILE = "final_output.csv" + +# Column names to check for null/empty values +CHECK_COLUMNS = ["Task evidence Q and A", "Task evidence Q and A Reason"] +URL_COLUMN = "Task Evidence" # Column to extract URLs from + +# ==== STEP 1: Create output directory ==== +os.makedirs(OUTPUT_DIR, exist_ok=True) +print(f"πŸ“ Output directory: {OUTPUT_DIR}\n") + +# ==== STEP 2: Read CSV ==== +print(f"πŸ“– Reading CSV: {INPUT_CSV}") +try: + df = pd.read_csv(INPUT_CSV) + print(f"βœ… Loaded {len(df)} rows, {len(df.columns)} columns\n") +except Exception as e: + print(f"❌ Error reading CSV: {e}") + exit(1) + +# ==== STEP 3: Verify columns exist ==== +missing_cols = [] +for col in CHECK_COLUMNS + [URL_COLUMN]: + if col not in df.columns: + missing_cols.append(col) + +if missing_cols: + print(f"❌ Missing columns in CSV: {', '.join(missing_cols)}") + print(f"\nπŸ“‹ Available columns:") + for col in df.columns: + print(f" β€’ {col}") + exit(1) + +# ==== STEP 4: Identify rows with null/empty values ==== +print(f"πŸ” Checking for null/empty values in:") +print(f" β€’ {CHECK_COLUMNS[0]}") +print(f" β€’ {CHECK_COLUMNS[1]}\n") + +# Create mask for rows with null or empty values in either column +mask = df[CHECK_COLUMNS[0]].isna() | df[CHECK_COLUMNS[1]].isna() | \ + (df[CHECK_COLUMNS[0]].astype(str).str.strip() == '') | \ + (df[CHECK_COLUMNS[1]].astype(str).str.strip() == '') + +rows_with_nulls = df[mask].copy() +rows_to_keep = df[~mask].copy() + +null_count = len(rows_with_nulls) +print(f"πŸ“Š Found {null_count} rows with null/empty values") +print(f"βœ… {len(rows_to_keep)} rows are valid and will be kept\n") + +# ==== STEP 5: Extract URLs from null rows ==== +extracted_urls = [] + +if null_count > 0: + print(f"πŸ”— Extracting URLs from '{URL_COLUMN}' column:\n") + print("=" * 80) + + for idx, row in rows_with_nulls.iterrows(): + url_value = row[URL_COLUMN] + + # Extract URL using regex (handles various formats) + if pd.notna(url_value): + url_str = str(url_value) + # Find URLs in the text + url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+' + found_urls = re.findall(url_pattern, url_str) + + if found_urls: + for url in found_urls: + extracted_urls.append(url) + print(f" β€’ {url}") + else: + # If no URL pattern found, print the raw value + extracted_urls.append(url_str) + print(f" β€’ {url_str}") + else: + print(f" β€’ [No URL - Cell is empty]") + + print("=" * 80) + print(f"\nπŸ“ Total URLs extracted: {len(extracted_urls)}\n") +else: + print("βœ… No null/empty rows found - nothing to extract\n") + +# ==== STEP 6: Save cleaned CSV ==== +output_path = os.path.join(OUTPUT_DIR, OUTPUT_FILE) +rows_to_keep.to_csv(output_path, index=False) +output_size = os.path.getsize(output_path) / 1024 + +print(f"πŸ’Ύ Cleaned CSV saved to: {output_path}") +print(f" β€’ Size: {output_size:.2f} KB\n") + +# ==== STEP 7: Generate Report ==== +print("=" * 80) +print(" CLEANING REPORT ".center(80, "=")) +print("=" * 80) + +print(f"\nπŸ“ INPUT:") +print(f" β€’ File: {INPUT_CSV}") +print(f" β€’ Original rows: {len(df)}") + +print(f"\nπŸ” CHECKED COLUMNS:") +print(f" β€’ {CHECK_COLUMNS[0]}") +print(f" β€’ {CHECK_COLUMNS[1]}") + +print(f"\nπŸ“Š RESULTS:") +print(f" β€’ Rows with null/empty values: {null_count}") +print(f" β€’ Rows removed: {null_count}") +print(f" β€’ Rows kept: {len(rows_to_keep)}") +print(f" β€’ URLs extracted: {len(extracted_urls)}") + +print(f"\nπŸ’Ύ OUTPUT:") +print(f" β€’ File: {output_path}") +print(f" β€’ Final row count: {len(rows_to_keep)}") +print(f" β€’ Size: {output_size:.2f} KB") + +if len(extracted_urls) > 0: + print(f"\nπŸ”— EXTRACTED URLS ({len(extracted_urls)}):") + print("=" * 80) + for i, url in enumerate(extracted_urls, 1): + print(f"{i}. {url}") + print("=" * 80) + +print("\n" + "=" * 80) +print(" CLEANING COMPLETED SUCCESSFULLY ".center(80, "=")) +print("=" * 80 + "\n") \ No newline at end of file diff --git a/evidence-analysis-process/processor/4-url-validator.py b/evidence-analysis-process/processor/4-url-validator.py new file mode 100644 index 00000000..7c416654 --- /dev/null +++ b/evidence-analysis-process/processor/4-url-validator.py @@ -0,0 +1,268 @@ +import requests +import pandas as pd +from concurrent.futures import ThreadPoolExecutor, as_completed +from datetime import datetime +import time +import re + +# ==== CONFIG ==== +URLS_FILE = "/Users/user/Documents/AI/parallel-process/processor/url.txt" # File containing URLs (one per line) +OUTPUT_FILE = "url_validation_report.csv" +MAX_WORKERS = 30 # Number of concurrent threads +TIMEOUT = 15 # Timeout in seconds for each request +RETRY_ATTEMPTS = 2 # Number of retry attempts for failed URLs + +# ==== STEP 1: Load and Clean URLs ==== +def load_urls_from_file(filename): + """Load URLs from a text file and clean them""" + try: + with open(filename, 'r', encoding='utf-8') as f: + content = f.read() + + # Extract URLs using regex pattern + url_pattern = r'https?://[^\s<>"{}|\\^`\[\]\n]+' + urls = re.findall(url_pattern, content) + + # Clean URLs - remove any trailing characters + cleaned_urls = [] + for url in urls: + # Remove trailing punctuation that might not be part of URL + url = url.rstrip('.,;:)') + # Remove bullet points or numbering + url = re.sub(r'^[\d]+\.\s*', '', url) + url = re.sub(r'^[β€’\-]\s*', '', url) + if url: + cleaned_urls.append(url.strip()) + + # Remove duplicates while preserving order + seen = set() + unique_urls = [] + for url in cleaned_urls: + if url not in seen: + seen.add(url) + unique_urls.append(url) + + return unique_urls + except FileNotFoundError: + print(f"⚠️ File '{filename}' not found.") + return [] + except Exception as e: + print(f"⚠️ Error reading file: {e}") + return [] + +# ==== STEP 2: Validate Single URL ==== +def validate_url(url, retry_count=0): + """ + Validate a single URL and return status information + """ + result = { + 'url': url, + 'status': 'Unknown', + 'status_code': None, + 'response_time': None, + 'content_type': None, + 'content_length': None, + 'error': None, + 'final_url': None + } + + try: + start_time = time.time() + # Use HEAD request first for efficiency + response = requests.head(url, timeout=TIMEOUT, allow_redirects=True) + response_time = time.time() - start_time + + result['status_code'] = response.status_code + result['response_time'] = round(response_time, 2) + result['content_type'] = response.headers.get('Content-Type', 'Unknown') + result['content_length'] = response.headers.get('Content-Length', 'Unknown') + result['final_url'] = response.url if response.url != url else None + + if response.status_code == 200: + result['status'] = 'Valid' + elif response.status_code == 404: + result['status'] = 'Not Found' + elif response.status_code == 403: + result['status'] = 'Forbidden' + elif response.status_code >= 500: + result['status'] = 'Server Error' + elif response.status_code >= 400: + result['status'] = 'Client Error' + elif response.status_code >= 300: + result['status'] = 'Redirect' + else: + result['status'] = 'Other' + + except requests.exceptions.Timeout: + result['status'] = 'Timeout' + result['error'] = f'Request timeout after {TIMEOUT}s' + except requests.exceptions.SSLError as e: + result['status'] = 'SSL Error' + result['error'] = str(e)[:150] + except requests.exceptions.ConnectionError as e: + result['status'] = 'Connection Error' + result['error'] = str(e)[:150] + except requests.exceptions.TooManyRedirects: + result['status'] = 'Too Many Redirects' + result['error'] = 'Exceeded maximum redirects' + except requests.exceptions.RequestException as e: + result['status'] = 'Request Error' + result['error'] = str(e)[:150] + except Exception as e: + result['status'] = 'Unknown Error' + result['error'] = str(e)[:150] + + # Retry logic for failed requests + if result['status'] in ['Timeout', 'Connection Error', 'Server Error'] and retry_count < RETRY_ATTEMPTS: + time.sleep(1) + return validate_url(url, retry_count + 1) + + return result + +# ==== STEP 3: Validate URLs Concurrently ==== +def validate_urls_concurrent(urls): + """ + Validate multiple URLs concurrently using ThreadPoolExecutor + """ + results = [] + total = len(urls) + + print(f"πŸ” Starting validation of {total} URLs...\n") + + with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + future_to_url = {executor.submit(validate_url, url): url for url in urls} + + completed = 0 + for future in as_completed(future_to_url): + completed += 1 + result = future.result() + results.append(result) + + # Print progress + status_icon = { + 'Valid': 'βœ…', + 'Not Found': '❌', + 'Forbidden': '🚫', + 'Server Error': 'πŸ”₯', + 'Client Error': '⚠️', + 'Error': '⚠️', + 'Timeout': '⏱️', + 'Connection Error': 'πŸ”Œ', + 'SSL Error': 'πŸ”’', + 'Redirect': 'β†ͺ️', + 'Other': '❓' + }.get(result['status'], 'β€’') + + # Show first 60 chars of URL + url_display = result['url'][:60] + '...' if len(result['url']) > 60 else result['url'] + status_display = f"{result['status']}" + if result['status_code']: + status_display += f" ({result['status_code']})" + + print(f"[{completed}/{total}] {status_icon} {status_display}: {url_display}") + + return results + +# ==== STEP 4: Generate Detailed Report ==== +def print_report(df, total_time): + """Print detailed validation report""" + print("\n" + "=" * 80) + print(" VALIDATION REPORT ".center(80, "=")) + print("=" * 80) + + print(f"\nπŸ“Š SUMMARY:") + print(f" β€’ Total URLs validated: {len(df)}") + print(f" β€’ Unique URLs: {df['url'].nunique()}") + print(f" β€’ Total time taken: {round(total_time, 2)}s") + print(f" β€’ Average time per URL: {round(total_time / len(df), 2)}s") + + print(f"\nπŸ“ˆ STATUS BREAKDOWN:") + status_counts = df['status'].value_counts() + for status, count in status_counts.items(): + percentage = (count / len(df)) * 100 + icon = { + 'Valid': 'βœ…', + 'Not Found': '❌', + 'Forbidden': '🚫', + 'Server Error': 'πŸ”₯', + 'Client Error': '⚠️', + 'Timeout': '⏱️', + 'Connection Error': 'πŸ”Œ', + 'SSL Error': 'πŸ”’', + 'Redirect': 'β†ͺ️', + 'Other': '❓' + }.get(status, 'β€’') + print(f" {icon} {status}: {count} ({percentage:.1f}%)") + + # Response time statistics for valid URLs + valid_df = df[df['status'] == 'Valid'] + if len(valid_df) > 0: + print(f"\n⚑ RESPONSE TIME STATISTICS (Valid URLs):") + print(f" β€’ Fastest: {valid_df['response_time'].min()}s") + print(f" β€’ Slowest: {valid_df['response_time'].max()}s") + print(f" β€’ Average: {round(valid_df['response_time'].mean(), 2)}s") + print(f" β€’ Median: {round(valid_df['response_time'].median(), 2)}s") + + # Show invalid URLs + invalid_df = df[df['status'] != 'Valid'] + if len(invalid_df) > 0: + print(f"\n⚠️ INVALID/FAILED URLs ({len(invalid_df)}):") + print("=" * 80) + for idx, row in invalid_df.head(20).iterrows(): + url_display = row['url'][:65] + '...' if len(row['url']) > 65 else row['url'] + error_msg = f" - {row['error'][:50]}" if pd.notna(row['error']) else "" + status_display = f"[{row['status']}" + if pd.notna(row['status_code']): + status_display += f" {row['status_code']}" + status_display += "]" + print(f" {status_display} {url_display}{error_msg}") + + if len(invalid_df) > 20: + print(f"\n ... and {len(invalid_df) - 20} more invalid URLs") + print("=" * 80) + +# ==== MAIN EXECUTION ==== +if __name__ == "__main__": + print("=" * 80) + print(" URL VALIDATOR ".center(80, "=")) + print("=" * 80 + "\n") + + # Load URLs + urls = load_urls_from_file(URLS_FILE) + + if not urls: + print("❌ No URLs found. Please check your input file.") + exit(1) + + print(f"πŸ“‹ Loaded {len(urls)} unique URLs from file") + print(f"βš™οΈ Max concurrent workers: {MAX_WORKERS}") + print(f"⏱️ Timeout per request: {TIMEOUT}s") + print(f"πŸ”„ Retry attempts: {RETRY_ATTEMPTS}\n") + + # Validate URLs + start_time = time.time() + results = validate_urls_concurrent(urls) + total_time = time.time() - start_time + + # Convert to DataFrame + df = pd.DataFrame(results) + + # Sort by status (Valid first, then others) + status_order = ['Valid', 'Redirect', 'Not Found', 'Forbidden', 'Client Error', + 'Server Error', 'Timeout', 'Connection Error', 'SSL Error', + 'Request Error', 'Other', 'Unknown Error'] + df['status_order'] = df['status'].apply(lambda x: status_order.index(x) if x in status_order else 999) + df = df.sort_values('status_order').drop('status_order', axis=1) + + # Save to CSV + df.to_csv(OUTPUT_FILE, index=False) + + # Generate report + print_report(df, total_time) + + print(f"\nπŸ’Ύ OUTPUT:") + print(f" β€’ Full report saved to: {OUTPUT_FILE}") + + print("\n" + "=" * 80) + print(" VALIDATION COMPLETED ".center(80, "=")) + print("=" * 80 + "\n") \ No newline at end of file diff --git a/evidence-analysis-process/processor/url.txt b/evidence-analysis-process/processor/url.txt new file mode 100644 index 00000000..d0b01647 --- /dev/null +++ b/evidence-analysis-process/processor/url.txt @@ -0,0 +1,181 @@ +1. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1755757226432.jpg +2. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1755757233931.jpg +3. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1755766494929.jpg +4. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112377600.jpg +5. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112352372.jpg +6. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112468976.jpg +7. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112561957.jpg +8. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a321312462c2000822abb6/268b3492-2da3-48e1-8b74-33990d38e74b/bfe031b9-f02a-4406-b06b-42f3b3198710/1756112585163.jpg +9. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68b28bf22462c2000845583a/45c2d1fc-effe-4586-97d7-eb944d5b64ec/6069f715-2332-436a-ac03-f0ab10fc8463/1756537060440.jpg +10. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755062027902.jpg +11. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755062030271.jpg +12. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755066531343.jpg +13. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755066528855.jpg +14. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755066533233.jpg +15. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755011928103.jpg +16. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755011968386.jpg +17. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755012027535.jpg +18. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755012061968.jpg +19. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989006233.jpg +20. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989009015.jpg +21. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989003357.jpg +22. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989010927.jpg +23. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754989000141.jpg +24. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1755066775403.jpg +25. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993226918.jpg +26. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993242938.jpg +27. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993262582.jpg +28. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993257520.jpg +29. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993206829.jpg +30. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6895b5372462c200081775c3/33116015-4e43-43c0-a6b8-56391a9393a6/559bea10-4f75-4ad9-9f2f-3985b1e32891/1754993188560.jpg +31. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756364371996.jpg +32. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756364373667.jpg +33. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756371255806.jpg +34. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756364583851.jpg +35. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756369799312.jpg +36. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756369794636.jpg +37. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756364582020.jpg +38. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756371161172.jpg +39. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756369899533.jpg +40. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756369901781.jpg +41. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756370810927.jpg +42. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756370762297.jpg +43. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756370787370.jpg +44. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457408003.jpg +45. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452661905.jpg +46. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452664154.jpg +47. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457409742.jpg +48. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457446132.jpg +49. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452694093.jpg +50. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457442871.jpg +51. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452691976.jpg +52. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457465829.jpg +53. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756457460407.jpg +54. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452573999.jpg +55. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a4b1282462c20008261ba7/3d7dc2ef-1a9d-4e2c-960d-704aef929b6a/540df529-dc00-4d96-9a0d-9a889e6db811/1756452575538.jpg +56. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756131232333.jpg +57. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756129909476.jpg +58. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756129641020.jpg +59. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756130503063.jpg +60. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68ac615a2462c2000837803e/356edecb-c397-4dc4-b843-245627502d40/863f030c-6253-4fdd-98ae-4fb601cc5c22/1756130803302.jpg +61. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689459c02462c20008157450/73947ed5-c5a0-4109-842d-90b6fdf36972/cc163c3e-c112-4bde-86c0-dd40249f0edb/1755876961342.jpg +62. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689459c02462c20008157450/73947ed5-c5a0-4109-842d-90b6fdf36972/cc163c3e-c112-4bde-86c0-dd40249f0edb/1755877076407.jpg +63. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689459c02462c20008157450/73947ed5-c5a0-4109-842d-90b6fdf36972/cc163c3e-c112-4bde-86c0-dd40249f0edb/1755789369513.jpg +64. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/6899962e2462c2000819e971/69ad3fdf-5b84-4602-8f00-94aea0b3864e/bf9ee9b1-b8c5-4e26-84ef-3634d50f15d2/1755856628790.jpg +65. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2db992462c2000822062c/8c5a70b2-0248-4278-b862-dc4413c13bd9/45809b72-572c-4219-bbbd-5b5ca4be1370/1756120831205.jpg +66. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755785820616.jpg +67. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755506251258.jpg +68. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755594048065.jpg +69. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755785972244.jpg +70. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755941756973.jpg +71. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2e4102462c2000822283f/8a5a7b32-e5cd-40a4-bf70-839ae72a88fd/af22c038-6517-45dc-808a-030580c52f31/1755882118082.jpg +72. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756097813854.jpg +73. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756097817157.jpg +74. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756097810155.jpg +75. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101422072.jpg +76. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101416101.jpg +77. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101424484.jpg +78. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101437883.jpg +79. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101453377.jpg +80. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101441359.jpg +81. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101477708.jpg +82. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101466184.jpg +83. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68abe7122462c2000834b0fb/9a018b2f-4d5c-4d00-94c8-634da85c00c2/a8c7d517-2517-4da9-8a30-bf1e804386d7/1756101470599.jpg +84. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2fb142462c20008227a66/b7a04e24-ee1c-46b2-b9bf-39cf81287195/5a667727-8f1c-4071-97d0-0bf815b365b6/1756451443415.jpg +85. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944177421.jpg +86. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944172390.jpg +87. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944181238.jpg +88. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944190010.jpg +89. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944185285.jpg +90. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755944174721.jpg +91. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755933317992.jpg +92. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755933185744.jpg +93. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755933188670.jpg +94. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755933182135.jpg +95. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846377416.jpg +96. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846380059.jpg +97. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846394276.jpg +98. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846387490.jpg +99. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755846383110.jpg +100. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934367274.jpg +101. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934362137.jpg +102. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934411786.jpg +103. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934373122.jpg +104. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1755934356296.jpg +105. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276260615.jpg +106. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276252533.jpg +107. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276242877.jpg +108. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276232667.jpg +109. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276247893.jpg +110. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a8120c2462c200082d155a/c21d850d-3f69-46ca-8fb6-f20a7e1505d7/6c0d09cb-c362-48b7-b79e-5b70b87fdae6/1756276238468.jpg +111. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670249224.jpg +112. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670256658.jpg +113. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670264870.jpg +114. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670260119.jpg +115. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670337209.jpg +116. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670313387.jpg +117. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68997bba2462c200081983ed/b47d6ccd-c1f5-4a3f-81b8-a070a7c7f932/47388bc4-c2f7-43f0-8517-245cc6bb5e7a/1755670322699.jpg +118. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756540953969.jpg +119. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756541299398.jpg +120. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756541327608.jpg +121. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756541387845.jpg +122. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689356482462c2000813f0a9/b97367a4-d5aa-4e48-9b67-ac25da3ae346/86b4372d-25f7-4561-95a5-467aa728c4d2/1756541432668.jpg +123. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685262419.jpg +124. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685250603.jpg +125. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685265118.jpg +126. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685246527.jpg +127. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685395153.jpg +128. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685386138.jpg +129. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685391435.jpg +130. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755685376705.jpg +131. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755766907917.jpg +132. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755766911978.jpg +133. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755766915443.jpg +134. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755766906207.jpg +135. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699045889.jpg +136. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699042757.jpg +137. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699038725.jpg +138. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699034383.jpg +139. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699100471.jpg +140. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699123461.jpg +141. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699557330.jpg +142. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699501611.jpg +143. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755699475529.jpg +144. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689aef602462c200081ba250/bb2510ce-9a4f-41ff-ad21-f4f1a35d88a9/1a187505-9775-4b20-8984-a98c99de2df5/1755914888529.jpg +145. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68b261f12462c2000844b602/d62fbb11-191e-4eef-aef9-f4f8767894e8/faf8c968-ad27-4822-8e47-c5145d3225ef/1756552744999.jpg +146. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689d7c732462c200081e7eff/d53edf53-6e5a-4e6e-8b45-1fc4c5149777/3865c61a-4a8c-4d3c-88c2-e051859b63b0/1756272169698.jpg +147. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756372862210.jpg +148. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756372773933.jpg +149. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756372889852.jpg +150. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756372958676.jpg +151. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373138375.jpg +152. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373194932.jpg +153. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1755512173612.jpg +154. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1755512124754.jpg +155. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1755512109373.jpg +156. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373153827.jpg +157. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373160500.jpg +158. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373172649.jpg +159. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1755512132376.jpg +160. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373504815.jpg +161. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373629953.jpg +162. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373533355.jpg +163. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373516419.jpg +164. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/689451822462c20008155821/deaecb52-e45a-410a-8022-bfe4e496290d/72db6991-ae72-42b2-a4e8-76eec3e7bae1/1756373488114.jpg +165. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047429145.jpg +166. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047439458.jpg +167. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047529554.jpg +168. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047633518.jpg +169. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047619813.jpg +170. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047700054.jpg +171. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047715120.jpg +172. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047791816.jpg +173. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756047787045.jpg +174. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756048012523.jpg +175. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a2c4ed2462c20008219b74/ff3c2eb7-535c-4bae-a7c6-4063b427feb3/6e76218e-0674-4f8d-95f6-74bce999805f/1756048002346.jpg +176. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755595622454.jpg +177. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755595657088.jpg +178. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755595685922.jpg +179. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755595748267.jpg +180. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755598730557.jpg +181. https://bmzbbujw9kal.compat.objectstorage.ap-mumbai-1.oraclecloud.com/odev-dev-diksha-manage-learn/survey/68a442e82462c2000825221e/f9ffe90a-3d23-4d96-9824-8232783510e0/72414da0-a0b5-4575-a2c4-fb77efec9641/1755598775105.jpg \ No newline at end of file diff --git a/evidence-analysis-process/processor/validate-input-output-csv.py b/evidence-analysis-process/processor/validate-input-output-csv.py new file mode 100644 index 00000000..4b78697f --- /dev/null +++ b/evidence-analysis-process/processor/validate-input-output-csv.py @@ -0,0 +1,93 @@ +import csv +import pandas as pd + +# ==== CONFIG ==== +csv1_path = "/Users/user/Documents/AI/parallel-process/output [pre-processor]/preprocessed_data.csv" +csv2_path = "/Users/user/Documents/AI/OUPUT/MAIN-SPLITS/merged_output.csv" +output_missing_rows = "missing_rows.csv" + +# ==== STEP 0: Print row counts (excluding header) ==== +print("πŸ“Š Row counts for the given files (excluding header):") + +# CSV 1 +with open(csv1_path, newline="") as fp: + csv1_count = sum(1 for _ in csv.reader(fp)) - 1 +print(f"{csv1_path}: {csv1_count}") + +# CSV 2 +with open(csv2_path, newline="") as fp: + csv2_count = sum(1 for _ in csv.reader(fp)) - 1 +print(f"{csv2_path}: {csv2_count}") + +# ==== STEP 1: Read both CSVs ==== +df_csv1 = pd.read_csv(csv1_path, dtype=str) +df_csv2 = pd.read_csv(csv2_path, dtype=str) + +# ==== STEP 2: Limit columns up to 'Project Evidence' ==== +if 'Project Evidence' not in df_csv1.columns: + raise ValueError("'Project Evidence' column not found in first CSV") +if 'Project Evidence' not in df_csv2.columns: + raise ValueError("'Project Evidence' column not found in second CSV") + +# Get column names up to and including 'Project Evidence' +cols_to_check = list(df_csv1.columns[:df_csv1.columns.get_loc('Project Evidence') + 1]) + +df_csv1 = df_csv1[cols_to_check].astype(str) +df_csv2 = df_csv2[cols_to_check].astype(str) + +# ==== STEP 3: Identify missing rows (in CSV1 but not in CSV2) ==== +merged = df_csv1.merge(df_csv2.drop_duplicates(), how='left', indicator=True) +missing_rows = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge']) + +if missing_rows.empty: + print("βœ… No missing rows found.") +else: + print(f"⚠️ Found {len(missing_rows)} missing rows in CSV2 that exist in CSV1.") + +# ==== STEP 4: Write missing rows to CSV ==== +missing_rows.to_csv(output_missing_rows, index=False) +print(f"πŸ’Ύ Missing rows saved to: {output_missing_rows}") + +# ==== STEP 5: Generate Comparison Report ==== +print("\n" + "="*80) +print(" COMPARISON REPORT ".center(80, "=")) +print("="*80) + +print(f"\nπŸ“ FILES COMPARED:") +print(f" β€’ CSV 1 (Source): {csv1_path}") +print(f" β€’ CSV 2 (Target): {csv2_path}") + +print(f"\nπŸ“Š ROW STATISTICS:") +print(f" β€’ Rows in CSV 1: {csv1_count}") +print(f" β€’ Rows in CSV 2: {csv2_count}") +print(f" β€’ Missing rows: {len(missing_rows)}") +if csv1_count > 0: + print(f" β€’ Match rate: {((csv1_count - len(missing_rows)) / csv1_count * 100):.2f}%") + +print(f"\nπŸ“‹ COLUMNS COMPARED:") +print(f" β€’ Total columns checked: {len(cols_to_check)}") +if len(cols_to_check) > 5: + print(f" β€’ Columns: {', '.join(cols_to_check[:5])}...") +else: + print(f" β€’ Columns: {', '.join(cols_to_check)}") + +if not missing_rows.empty: + print(f"\n⚠️ MISSING ROWS ANALYSIS:") + print(f" β€’ Total missing: {len(missing_rows)}") + if csv1_count > 0: + print(f" β€’ Percentage missing: {(len(missing_rows) / csv1_count * 100):.2f}%") + print(f" β€’ Output file: {output_missing_rows}") + + # Show first few missing rows as sample + print(f"\nπŸ“‹ SAMPLE MISSING ROWS (first 3):") + for idx, row in missing_rows.head(3).iterrows(): + print(f" Row {idx + 2}:") # +2 because of 0-indexing and header + for col in cols_to_check[:3]: # Show first 3 columns + value = str(row[col])[:50] # Truncate long values + print(f" β€’ {col}: {value}") +else: + print(f"\nβœ… RESULT: All rows from CSV 1 exist in CSV 2") + +print("\n" + "="*80) +print(" COMPARISON COMPLETED ".center(80, "=")) +print("="*80 + "\n") \ No newline at end of file diff --git a/evidence-analysis-process/requirements.txt b/evidence-analysis-process/requirements.txt new file mode 100644 index 00000000..a96544a7 --- /dev/null +++ b/evidence-analysis-process/requirements.txt @@ -0,0 +1,8 @@ +pandas +openpyxl +httpx +requests +google-generativeai +typing-extensions +python-dotenv +tqdm \ No newline at end of file diff --git a/evidence-analysis-process/webpage/home.html b/evidence-analysis-process/webpage/home.html new file mode 100644 index 00000000..51c17d2c --- /dev/null +++ b/evidence-analysis-process/webpage/home.html @@ -0,0 +1,900 @@ + + + + + MIP Evidence Report + + + + + + +
+
+

MIP Evidence Analysis Dashboard

+
+
+ + +
+
+ + +
+ + +
+
+ +
+
+

Processing your data...

+
+ +
+ +
+
+ + + + + \ No newline at end of file From 4476edaea11c116b98ef9cbd76c0e51b9b9d6116 Mon Sep 17 00:00:00 2001 From: priyanka-TL Date: Wed, 10 Dec 2025 15:08:48 +0530 Subject: [PATCH 02/11] Add sample environment files for GEMINI API keys and token --- evidence analysis/.env.sample | 1 + evidence-analysis-process/.env.sample | 1 + 2 files changed, 2 insertions(+) create mode 100644 evidence analysis/.env.sample create mode 100644 evidence-analysis-process/.env.sample diff --git a/evidence analysis/.env.sample b/evidence analysis/.env.sample new file mode 100644 index 00000000..3b9078a1 --- /dev/null +++ b/evidence analysis/.env.sample @@ -0,0 +1 @@ +GEMINI_API_KEYS=your_gemini_api_key_here \ No newline at end of file diff --git a/evidence-analysis-process/.env.sample b/evidence-analysis-process/.env.sample new file mode 100644 index 00000000..32815684 --- /dev/null +++ b/evidence-analysis-process/.env.sample @@ -0,0 +1 @@ +GEMINI_TOKEN=your_gemini_token_here \ No newline at end of file From 1fa652816e363905331cbf5d7115ab03bf102fba Mon Sep 17 00:00:00 2001 From: prajwal Date: Thu, 26 Mar 2026 11:24:31 +0530 Subject: [PATCH 03/11] npm-pkg changes for multi-user project access feature --- interface-routes/elevate-routes.json | 44 +++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/interface-routes/elevate-routes.json b/interface-routes/elevate-routes.json index 10b908aa..56c565de 100644 --- a/interface-routes/elevate-routes.json +++ b/interface-routes/elevate-routes.json @@ -14269,6 +14269,48 @@ "packageName": "elevate-self-creation-portal" } ] - } + + }, + { + "sourceRoute": "/mentoring/v1/connections/checkConnection", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/project/v1/userProjects/updateAcl", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "project", + "packageName": "elevate-project" + } + ], + "service": "project" + }, + { + "sourceRoute": "/project/v1/userProjects/updateAcl/:id", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "project", + "packageName": "elevate-project" + } + ], + "service": "project" + } ] } \ No newline at end of file From 657b223b8f7efbc389be58c804b49eec80fc8691 Mon Sep 17 00:00:00 2001 From: prajwal Date: Thu, 26 Mar 2026 11:28:26 +0530 Subject: [PATCH 04/11] unwanted config removed --- interface-routes/elevate-routes.json | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/interface-routes/elevate-routes.json b/interface-routes/elevate-routes.json index 56c565de..5299f847 100644 --- a/interface-routes/elevate-routes.json +++ b/interface-routes/elevate-routes.json @@ -14271,19 +14271,6 @@ ] }, - { - "sourceRoute": "/mentoring/v1/connections/checkConnection", - "type": "POST", - "priority": "MUST_HAVE", - "inSequence": false, - "orchestrated": false, - "targetPackages": [ - { - "basePackageName": "mentoring", - "packageName": "elevate-mentoring" - } - ] - }, { "sourceRoute": "/project/v1/userProjects/updateAcl", "type": "POST", From f21cec98648a4d837f3d9435d00359d1754cd8f6 Mon Sep 17 00:00:00 2001 From: sumanvpacewisdom Date: Thu, 26 Mar 2026 18:06:47 +0530 Subject: [PATCH 05/11] Add admin cache management routes for mentoring --- interface-routes/elevate-routes.json | 52 ++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/interface-routes/elevate-routes.json b/interface-routes/elevate-routes.json index 10b908aa..1c0326ed 100644 --- a/interface-routes/elevate-routes.json +++ b/interface-routes/elevate-routes.json @@ -14269,6 +14269,58 @@ "packageName": "elevate-self-creation-portal" } ] + }, + { + "sourceRoute": "/mentoring/v1/admin/getCacheStats", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/clearCache", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/warmUpCache", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/getCacheHealth", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] } ] } \ No newline at end of file From 7a78058445824e2048b31157f2dec8bc4c0a42f1 Mon Sep 17 00:00:00 2001 From: sumanvpacewisdom Date: Thu, 26 Mar 2026 18:12:27 +0530 Subject: [PATCH 06/11] changes --- interface-routes/elevate-routes.json | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/interface-routes/elevate-routes.json b/interface-routes/elevate-routes.json index 1c0326ed..d90c606e 100644 --- a/interface-routes/elevate-routes.json +++ b/interface-routes/elevate-routes.json @@ -14270,6 +14270,34 @@ } ] }, + { + "sourceRoute": "/project/v1/userProjects/updateAcl", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "project", + "packageName": "elevate-project" + } + ], + "service": "project" + }, + { + "sourceRoute": "/project/v1/userProjects/updateAcl/:id", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "project", + "packageName": "elevate-project" + } + ], + "service": "project" + }, { "sourceRoute": "/mentoring/v1/admin/getCacheStats", "type": "GET", From a01c132b60c44e893e7895aab0d4b743edb95c70 Mon Sep 17 00:00:00 2001 From: sumanvpacewisdom Date: Thu, 26 Mar 2026 18:16:59 +0530 Subject: [PATCH 07/11] removing unessary change --- interface-routes/elevate-routes.json | 1 + 1 file changed, 1 insertion(+) diff --git a/interface-routes/elevate-routes.json b/interface-routes/elevate-routes.json index d90c606e..18941645 100644 --- a/interface-routes/elevate-routes.json +++ b/interface-routes/elevate-routes.json @@ -14269,6 +14269,7 @@ "packageName": "elevate-self-creation-portal" } ] + }, { "sourceRoute": "/project/v1/userProjects/updateAcl", From 6111903969e39f08269e86a121897f5ce51df0a6 Mon Sep 17 00:00:00 2001 From: sumanvpacewisdom Date: Thu, 26 Mar 2026 18:22:14 +0530 Subject: [PATCH 08/11] adding new api for saas routes --- interface-routes/saas-routes.json | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/interface-routes/saas-routes.json b/interface-routes/saas-routes.json index 01e08cc7..f7e6e88f 100644 --- a/interface-routes/saas-routes.json +++ b/interface-routes/saas-routes.json @@ -12189,6 +12189,58 @@ "packageName": "elevate-mentoring" } ] + }, + { + "sourceRoute": "/mentoring/v1/admin/getCacheStats", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/clearCache", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/warmUpCache", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/getCacheHealth", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] } ] } From 9a0cc6ea158410b30c45788ad0d5eeb6f8ff2433 Mon Sep 17 00:00:00 2001 From: sumanvpacewisdom Date: Thu, 26 Mar 2026 20:10:04 +0530 Subject: [PATCH 09/11] adding new apis in mentoring routes --- elevate-mentoring/constants/routes.js | 52 +++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/elevate-mentoring/constants/routes.js b/elevate-mentoring/constants/routes.js index 9e261bbf..0f296276 100644 --- a/elevate-mentoring/constants/routes.js +++ b/elevate-mentoring/constants/routes.js @@ -1843,5 +1843,57 @@ module.exports = { type: "GET" }, }, + { + "sourceRoute": "/mentoring/v1/admin/getCacheStats", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/clearCache", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/warmUpCache", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + }, + { + "sourceRoute": "/mentoring/v1/admin/getCacheHealth", + "type": "GET", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "mentoring", + "packageName": "elevate-mentoring" + } + ] + } ], } From 7e0072fb00331fb5311d74922b4057360697328e Mon Sep 17 00:00:00 2001 From: PraveenDass Date: Mon, 20 Apr 2026 18:18:29 +0530 Subject: [PATCH 10/11] adding routes for develop branch --- interface-routes/elevate-routes.json | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/interface-routes/elevate-routes.json b/interface-routes/elevate-routes.json index 18941645..702a7f02 100644 --- a/interface-routes/elevate-routes.json +++ b/interface-routes/elevate-routes.json @@ -9822,6 +9822,34 @@ ], "service": "survey" }, + { + "sourceRoute": "/survey/v1/admin/clearTenantCache/:id", + "type": "DELETE", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "survey", + "packageName": "elevate-survey-observation" + } + ], + "service": "survey" + }, + { + "sourceRoute": "/survey/v1/admin/clearTenantCache", + "type": "DELETE", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "survey", + "packageName": "elevate-survey-observation" + } + ], + "service": "survey" + }, { "sourceRoute": "/user/v1/account/login", "type": "POST", From 7411665039971adfe29e4b8379aeb451a487fbff Mon Sep 17 00:00:00 2001 From: prajwal Date: Sun, 26 Apr 2026 17:57:07 +0530 Subject: [PATCH 11/11] admin api route to clear tenantCache added --- interface-routes/elevate-routes.json | 30 +++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/interface-routes/elevate-routes.json b/interface-routes/elevate-routes.json index 702a7f02..a532cf5e 100644 --- a/interface-routes/elevate-routes.json +++ b/interface-routes/elevate-routes.json @@ -14378,6 +14378,34 @@ "packageName": "elevate-mentoring" } ] - } + }, + { + "sourceRoute": "/project/v1/admin/clearTenantCache", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "project", + "packageName": "elevate-project" + } + ], + "service": "project" + }, + { + "sourceRoute": "/project/v1/admin/clearTenantCache/:id", + "type": "POST", + "priority": "MUST_HAVE", + "inSequence": false, + "orchestrated": false, + "targetPackages": [ + { + "basePackageName": "project", + "packageName": "elevate-project" + } + ], + "service": "project" + } ] } \ No newline at end of file