Codexgrey · k-fitw · Apr 14, 2026 · Apr 14, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/.gitignore b/.gitignore
@@ -58,7 +58,7 @@ faiss_index/
 # ===============================
 .cache/
 .huggingface/
-models/
+
 
 # ===============================
 # IDE

diff --git a/backend/README.md b/backend/README.md
@@ -0,0 +1,48 @@
+# Capstone RAG Backend
+This is the backend for Retrieval-Augmented Generation system for intelligent document Q & A.
+Will try to keep it updated as i update the files for better usability
+
+## Setup
+
+### Virtual Environment
+
+```bash
+python3 -m venv venv
+source venv/bin/activate   # Linux/macOS
+venv\Scripts\activate      # Windows
+```
+### Instal Dependencies
+pip install -r requirements.txt
+
+### Environment Variables
+Create a .env file in backend/:
+
+DATABASE_URL=postgresql://postgres:<password>@localhost:5432/ragdb
+# JWT secret key (must NOT be left as default)
+# Generate a secure random key:
+# In Ubuntu terminal (prefered) type:  openssl rand -hex 32
+JWT_SECRET=<paste-generated-key>
+
+# Token settings
+JWT_ALGORITHM=HS256
+ACCESS_TOKEN_EXPIRE_MINUTES=30
+
+## Database
+
+### Create the database in PostgreSQL
+```bash
+sudo -u postgres psql
+``` 
+inside psql:
+```Sql
+    CREATE DATABASE ragdb;
+```
+
+## Run App
+```bash
+uvicorn app.main:app --reload
+```
+
+## Test
+http://localhost:8000/ -> {"message":"Backend Running"}
+http://localhost:8000/db_test -> shows PostgreSQL version
diff --git a/backend/app/api/auth.py b/backend/app/api/auth.py
@@ -0,0 +1,136 @@
+"""
+routers/auth.py: Authentication Routes
+Endpoints:
+    POST /auth/register: create a new user account
+    POST /auth/login: verify credentials, return JWT token
+  The frontend stores access_token and sends it in every request header:
+    Authorization: Bearer eyJhbGci...
+"""
+
+from fastapi import APIRouter, Depends, HTTPException, status
+from sqlalchemy.orm import Session
+
+from app.config.database import get_db
+from app.models.db_models import User, Log
+from app.config.dependencies import hash_password, verify_password, create_access_token
+from app.models.schemas import UserRegisterRequest, UserResponse, UserLoginRequest, TokenResponse
+
+import uuid
+from datetime import datetime
+
+router = APIRouter(prefix="/auth", tags=["Authentication"])
+
+
+# POST /auth/register
+@router.post(
+    "/register",
+    response_model=UserResponse,
+    status_code=status.HTTP_201_CREATED,
+    summary="Register a new user"
+)
+def register(payload: UserRegisterRequest, db: Session = Depends(get_db)):
+    """
+    Creates a new user account.
+
+    Checks:
+      - Email is not already registered
+      - Username is not already taken
+
+    Password is hashed with bcrypt before storing — never stored in plain text.
+    """
+    # Check email uniqueness
+    if db.query(User).filter(User.email == payload.email).first():
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail="An account with this email already exists."
+        )
+
+    # Check username uniqueness
+    if db.query(User).filter(User.username == payload.username).first():
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail="This username is already taken."
+        )
+
+    # Create user
+    new_user = User(
+        id=uuid.uuid4(),
+        username=payload.username,
+        email=payload.email,
+        password_hash=hash_password(payload.password),
+        created_at=datetime.utcnow()
+    )
+    db.add(new_user)
+
+    # Log the action
+    db.add(Log(
+        user_id=new_user.id,
+        action="user_registered",
+        detail=f"New user registered: {new_user.username}",
+        timestamp=datetime.utcnow()
+    ))
+
+    db.commit()
+    db.refresh(new_user)
+
+    return new_user
+
+
+# POST /auth/login
+@router.post(
+    "/login",
+    response_model=TokenResponse,
+    summary="Login and receive a JWT token"
+)
+def login(payload: UserLoginRequest, db: Session = Depends(get_db)):
+    """
+    Verifies credentials and returns a JWT access token.
+
+    The frontend stores this token and sends it in the
+    Authorization header on every subsequent request:
+        Authorization: Bearer <token>
+
+    The token payload contains:
+        { "sub": "<user_id>" }
+
+    This is decoded by get_current_user() in dependencies.py
+    to identify WHO is making each request.
+    """
+    # Find user by email
+    user = db.query(User).filter(User.email == payload.email).first()
+
+    if not user or not verify_password(payload.password, user.password_hash):
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Incorrect email or password."
+        )
+
+    # Generate JWT: sub = user's UUID (this is how every request is tied to a user)
+    token = create_access_token(data={"sub": str(user.id)})
+
+    # Log the action
+    db.add(Log(
+        user_id=user.id,
+        action="user_login",
+        detail=f"User logged in: {user.username}",
+        timestamp=datetime.utcnow()
+    ))
+    db.commit()
+
+    # I used TokenResponse + UserResponse instead of returning a plain dict,
+    # because this technique ensures FastAPI validates the response correctly
+    # against the schema and includes all required fields (id, username, email, created_at).
+    return TokenResponse(
+        access_token=token,
+        token_type = "bearer", 
+        user=UserResponse.model_validate(user)
+    )
+
+    # {
+    #     "access_token": token,
+    #     "token_type": "bearer",
+    #     "user": {
+    #         "id": str(user.id),
+    #         "username": user.username
+    #     }
+    # }
diff --git a/backend/app/api/upload.py b/backend/app/api/upload.py
@@ -0,0 +1,198 @@
+import uuid
+from datetime import datetime
+from fastapi import APIRouter, Depends, UploadFile, File, HTTPException, status, BackgroundTasks
+from sqlalchemy.orm import Session
+
+from app.config.database import get_db
+from app.config.dependencies import get_current_user
+from app.models.db_models import User, Document, DocumentChunk, Log
+from app.ingestion.parser import extract_text
+
+router = APIRouter(tags=["Documents"])
+
+
+@router.post("/upload", status_code=status.HTTP_202_ACCEPTED)
+async def upload_document(
+    background_tasks: BackgroundTasks,
+    file: UploadFile = File(...),
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    # 1: Generate shared document_id ─
+    document_id = f"doc_{uuid.uuid4().hex[:12]}"
+
+    # 2: Save file to disk ─
+    try:
+        file_info = await save_upload_file(file, user_id=str(current_user.id))
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+    # 3: Save metadata to PostgreSQL (status=processing) ─
+    doc = Document(
+        document_id = document_id,
+        filename = file_info["filename"],
+        filepath = file_info["filepath"],
+        file_type = file_info["file_type"],
+        uploaded_by = current_user.id,
+        upload_date = datetime.utcnow(),
+        status = "processing"
+    )
+    db.add(doc)
+    db.add(Log(
+        user_id = current_user.id,
+        action = "document_upload_started",
+        detail = f"file={file_info['filename']} doc_id={document_id}",
+        timestamp = datetime.utcnow()
+    ))
+    db.commit()
+
+    # 4: Queue background processing ─
+    background_tasks.add_task(
+        process_document_background,
+        document_id = document_id,
+        filepath = file_info["filepath"],
+        file_type = file_info["file_type"],
+        filename = file_info["filename"],
+        user_id = current_user.id,
+        username = current_user.username
+    )
+
+    #  Return immediately — don't wait for processing 
+    return {
+        "message" : "Document uploaded. Processing in background.",
+        "document_id" : document_id,
+        "filename" : file_info["filename"],
+        "status" : "processing"
+    }
+
+
+async def process_document_background(
+    document_id: str,
+    filepath: str,
+    file_type: str,
+    filename: str,
+    user_id: int,
+    username: str
+):
+    from app.config.database import SessionLocal
+    db = SessionLocal()
+
+    try:
+        #  Extract text 
+        text = extract_text(filepath, file_type)
+        print(f"✅ Text extracted from {filename}")
+
+        #  Chunk the text 
+        chunks = chunk_text(
+            text = text,
+            document_id = document_id,
+            source_name = filename
+        )
+        print(f"✅ {len(chunks)} chunks created")
+
+        # Index chunks in ChromaDB 
+        index_chunks(                                  
+            chunks      = chunks,
+            document_id = document_id,
+            uploaded_by = username,
+            file_type   = file_type
+        )
+        print(f"✅ Chunks indexed in ChromaDB")
+
+        #  Save chunk metadata to PostgreSQL 
+        for chunk in chunks:
+            db.add(DocumentChunk(
+                chunk_id = chunk["chunk_id"],
+                document_id = document_id,
+                source_name = filename,
+                text = chunk["text"],
+                page = chunk.get("page", 1),
+                start_char = chunk.get("start_char", 0),
+                end_char = chunk.get("end_char", 0),
+            ))
+
+        #  Mark document as completed 
+        doc = db.query(Document).filter(
+            Document.document_id == document_id
+        ).first()
+        if doc:
+            doc.status = "completed"
+
+        db.add(Log(
+            user_id = user_id,
+            action = "document_uploaded",
+            detail = f"file={filename} chunks={len(chunks)} doc_id={document_id}",
+            timestamp = datetime.utcnow()
+        ))
+        db.commit()
+        print(f"  ✅ Document {document_id} processing complete")
+
+    except Exception as e:
+        print(f"❌ Failed: {e}")
+        try:
+            doc = db.query(Document).filter(
+                Document.document_id == document_id
+            ).first()
+            if doc:
+                doc.status = "failed"
+            db.add(Log(
+                user_id = user_id,
+                action = "document_upload_failed",
+                detail = f"file={filename} error={str(e)}",
+                timestamp = datetime.utcnow()
+            ))
+            db.commit()
+        except:
+            pass
+
+    finally:
+        db.close()
+
+
+@router.get("/documents", status_code=status.HTTP_200_OK)
+def list_documents(
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    # List all documents uploaded by the current user.
+    docs = db.query(Document)\
+             .filter(Document.uploaded_by == current_user.id)\
+             .order_by(Document.upload_date.desc())\
+             .all()
+
+    return {
+        "documents": [
+            {
+                "document_id" : d.document_id,
+                "filename" : d.filename,
+                "file_type" : d.file_type,
+                "status" : d.status,
+                "upload_date" : d.upload_date,
+            }
+            for d in docs
+        ],
+        "total": len(docs)
+    }
+
+
+@router.get("/document/{document_id}/status")
+def get_document_status(
+    document_id: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    # Check the processing status of a specific document.
+    doc = db.query(Document).filter(
+        Document.document_id == document_id,
+        Document.uploaded_by == current_user.id
+    ).first()
+
+    if not doc:
+        raise HTTPException(status_code=404, detail="Document not found")
+
+    return {
+        "document_id" : doc.document_id,
+        "filename" : doc.filename,
+        "status" : doc.status,
+        "upload_date" : doc.upload_date
+    }
-Original file line number
+Diff line change
@@ Expand Up / @@ -58,7 +58,7 @@ faiss_index/ @@
     # ===============================
     .cache/
     .huggingface/
-    models/
     # ===============================
     # IDE
@@ Expand Down @@