StellarDevHub · Glittersup · Jun 26, 2026
diff --git a/backend/prisma/migrations/20260626000000_curriculum_search_index/migration.sql b/backend/prisma/migrations/20260626000000_curriculum_search_index/migration.sql
@@ -0,0 +1,44 @@
+-- CreateTable
+CREATE TABLE "curriculum_search_entries" (
+    "id" TEXT NOT NULL,
+    "workspaceId" TEXT NOT NULL DEFAULT 'default',
+    "entityType" TEXT NOT NULL,
+    "entityId" TEXT NOT NULL,
+    "courseId" TEXT,
+    "title" TEXT NOT NULL,
+    "content" TEXT NOT NULL,
+    "difficulty" TEXT,
+    "searchVector" tsvector,
+    "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updatedAt" TIMESTAMP(3) NOT NULL,
+
+    CONSTRAINT "curriculum_search_entries_pkey" PRIMARY KEY ("id")
+);
+
+-- CreateIndex
+CREATE UNIQUE INDEX "curriculum_search_entries_workspaceId_entityType_entityId_key" ON "curriculum_search_entries"("workspaceId", "entityType", "entityId");
+
+-- CreateIndex
+CREATE INDEX "curriculum_search_entries_workspaceId_entityType_idx" ON "curriculum_search_entries"("workspaceId", "entityType");
+
+-- CreateIndex
+CREATE INDEX "curriculum_search_entries_workspaceId_courseId_idx" ON "curriculum_search_entries"("workspaceId", "courseId");
+
+-- Full-text search GIN index over the maintained tsvector column.
+CREATE INDEX "curriculum_search_entries_search_idx" ON "curriculum_search_entries" USING GIN ("searchVector");
+
+-- Trigger keeps "searchVector" in sync with the indexed text. The title is
+-- weighted higher than the body so title matches rank above description matches.
+CREATE OR REPLACE FUNCTION curriculum_search_entries_vector_update() RETURNS trigger AS $$
+BEGIN
+  NEW."searchVector" :=
+    setweight(to_tsvector('english', COALESCE(NEW."title", '')), 'A') ||
+    setweight(to_tsvector('english', COALESCE(NEW."content", '')), 'B');
+  RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE TRIGGER curriculum_search_entries_vector_trigger
+  BEFORE INSERT OR UPDATE OF "title", "content"
+  ON "curriculum_search_entries"
+  FOR EACH ROW EXECUTE FUNCTION curriculum_search_entries_vector_update();
diff --git a/backend/prisma/schema.prisma b/backend/prisma/schema.prisma
@@ -271,3 +271,26 @@ model StudentActivity {
   @@map("student_activities")
 }
 
+/// Denormalised, full-text searchable index of curriculum content
+/// (courses, modules and lessons). Populated by the reindex routine and
+/// queried via PostgreSQL full-text search (see CurriculumSearchService).
+model CurriculumSearchEntry {
+  id           String                   @id @default(cuid())
+  workspaceId  String                   @default("default")
+  entityType   String // 'course' | 'module' | 'lesson'
+  entityId     String // source id, e.g. 'course-1-lesson-1'
+  courseId     String?
+  title        String
+  content      String // text block indexed for search (title + description)
+  difficulty   String?
+  /// Maintained automatically by the curriculum_search_entries_vector_update trigger.
+  searchVector Unsupported("tsvector")?
+  createdAt    DateTime                 @default(now())
+  updatedAt    DateTime                 @updatedAt
+
+  @@unique([workspaceId, entityType, entityId])
+  @@index([workspaceId, entityType])
+  @@index([workspaceId, courseId])
+  @@map("curriculum_search_entries")
+}
+
diff --git a/backend/src/routes/index.ts b/backend/src/routes/index.ts
@@ -13,6 +13,7 @@ import exportRouter from './export.routes.js';
 import generatorRouter from './generator/generator.routes.js';
 import healthRouter from './health.routes.js';
 import learningRoutes from './learning/learning.routes.js';
+import curriculumSearchRouter from './search/curriculum-search.routes.js';
 import securityRouter from './security.routes.js';
 import studentsRouter from './students.js';
 
@@ -33,6 +34,7 @@ router.use('/dashboard', dashboardRoutes);
 router.use('/feedback', feedbackRouter);
 router.use('/auth', authRoutes);
 router.use('/learning', learningRoutes);
+router.use('/search', curriculumSearchRouter);
 router.use('/contracts', contractRouter);
 router.use('/notifications', notificationRouter);
 router.use('/security', securityRouter);

diff --git a/backend/src/routes/search/curriculum-search.routes.ts b/backend/src/routes/search/curriculum-search.routes.ts
@@ -0,0 +1,59 @@
+import { Router, Request, Response } from 'express';
+import { getWorkspaceId } from '../../middleware/WorkspaceContext.js';
+import { validateQuery } from '../../utils/validation.js';
+import { curriculumSearchQuerySchema } from './curriculum-search.schemas.js';
+import {
+  reindexCurriculum,
+  searchCurriculum,
+} from '../../search/curriculum/CurriculumSearchService.js';
+
+const router = Router();
+
+/**
+ * GET /api/v1/search
+ *
+ * Full-text search across indexed curriculum content (courses, modules,
+ * lessons). Supports filtering by `type`, `difficulty` and `courseId`, and
+ * pagination via `limit`/`offset`. Results are ranked by relevance.
+ */
+router.get('/', validateQuery(curriculumSearchQuerySchema), async (req: Request, res: Response) => {
+  try {
+    // Re-parse to obtain the coerced/typed values (validateQuery only validates).
+    const { q, type, difficulty, courseId, limit, offset } = curriculumSearchQuerySchema.parse(
+      req.query
+    );
+    const workspaceId = getWorkspaceId() ?? 'default';
+
+    const results = await searchCurriculum({
+      query: q,
+      workspaceId,
+      entityType: type,
+      difficulty,
+      courseId,
+      limit,
+      offset,
+    });
+
+    res.json({ query: q, count: results.length, limit, offset, results });
+  } catch {
+    res.status(500).json({ error: 'Search request failed' });
+  }
+});
+
+/**
+ * POST /api/v1/search/reindex
+ *
+ * Rebuilds the curriculum search index for the current workspace. Intended for
+ * admin/maintenance use (e.g. after curriculum updates or a fresh deploy).
+ */
+router.post('/reindex', async (_req: Request, res: Response) => {
+  try {
+    const workspaceId = getWorkspaceId() ?? 'default';
+    const indexed = await reindexCurriculum(workspaceId);
+    res.json({ indexed });
+  } catch {
+    res.status(500).json({ error: 'Reindex request failed' });
+  }
+});
+
+export default router;
diff --git a/backend/src/routes/search/curriculum-search.schemas.ts b/backend/src/routes/search/curriculum-search.schemas.ts
@@ -0,0 +1,19 @@
+import { z } from 'zod';
+
+/**
+ * Validation schema for `GET /api/v1/search`.
+ *
+ * `q` is the required keyword query. `type`, `difficulty` and `courseId` are
+ * optional filters. `limit`/`offset` are coerced from query strings and bounded
+ * to keep result sets fast.
+ */
+export const curriculumSearchQuerySchema = z.object({
+  q: z.string().trim().min(1, 'q (search keywords) is required'),
+  type: z.enum(['course', 'module', 'lesson']).optional(),
+  difficulty: z.enum(['beginner', 'intermediate', 'advanced']).optional(),
+  courseId: z.string().trim().min(1).optional(),
+  limit: z.coerce.number().int().min(1).max(50).default(20),
+  offset: z.coerce.number().int().min(0).default(0),
+});
+
+export type CurriculumSearchQuery = z.infer<typeof curriculumSearchQuerySchema>;
diff --git a/backend/src/search/curriculum/CurriculumSearchService.ts b/backend/src/search/curriculum/CurriculumSearchService.ts
@@ -0,0 +1,110 @@
+import prisma from '../../db/index.js';
+import { COURSES, curriculumByCourseId } from '../../routes/learning/curriculum.data.js';
+import {
+  buildCurriculumSearchQuery,
+  buildIndexEntries,
+  type CurriculumIndexEntry,
+  type CurriculumSearchParams,
+  type SearchableCourse,
+} from './curriculumSearchQuery.js';
+
+/**
+ * CurriculumSearchService — the database-facing layer of the search indexer.
+ *
+ * Responsibilities:
+ *  - `reindexCurriculum`: (re)build the `curriculum_search_entries` table from
+ *    the curriculum sources (static modules/lessons + DB course records).
+ *  - `searchCurriculum`: run the PostgreSQL full-text query and return ranked
+ *    results.
+ *
+ * The query/row-shaping logic lives in `curriculumSearchQuery.ts` (pure and unit
+ * tested); this module only wires it to Prisma.
+ */
+
+/** A single ranked search hit returned to the API. */
+export interface CurriculumSearchResult {
+  id: string;
+  entityType: string;
+  entityId: string;
+  courseId: string | null;
+  title: string;
+  content: string;
+  difficulty: string | null;
+  rank: number;
+}
+
+// The default Prisma export is workspace-extended; raw queries and the new model
+// are accessed dynamically to avoid coupling to the extension's narrowed types.
+const db = prisma as unknown as {
+  $queryRawUnsafe: (text: string, ...values: unknown[]) => Promise<unknown[]>;
+  $transaction: (ops: unknown[]) => Promise<unknown>;
+  course: { findMany: (args: unknown) => Promise<SearchableCourse[]> };
+  curriculumSearchEntry: {
+    deleteMany: (args: unknown) => unknown;
+    createMany: (args: unknown) => unknown;
+  };
+};
+
+/**
+ * Merge the curriculum sources into a deduplicated set of index rows.
+ *
+ * The static curriculum (`curriculum.data.ts`) is the source of modules and
+ * lessons; DB `Course` rows are also indexed so live courses are searchable.
+ * Pure and deterministic for easy testing — pass the DB courses in.
+ */
+export function collectIndexEntries(dbCourses: SearchableCourse[]): CurriculumIndexEntry[] {
+  const staticEntries = buildIndexEntries(
+    COURSES.map((c) => ({ id: c.id, title: c.title, description: c.description })),
+    curriculumByCourseId
+  );
+  // DB courses contribute only 'course' rows (no static modules attached).
+  const dbEntries = buildIndexEntries(dbCourses, {});
+
+  const seen = new Set<string>();
+  const merged: CurriculumIndexEntry[] = [];
+  for (const entry of [...staticEntries, ...dbEntries]) {
+    const key = `${entry.entityType}:${entry.entityId}`;
+    if (seen.has(key)) continue;
+    seen.add(key);
+    merged.push(entry);
+  }
+  return merged;
+}
+
+/**
+ * Rebuild the search index for a workspace. Returns the number of rows indexed.
+ * The stored `searchVector` is populated by the database trigger on insert.
+ */
+export async function reindexCurriculum(workspaceId: string): Promise<number> {
+  let dbCourses: SearchableCourse[] = [];
+  try {
+    dbCourses = await db.course.findMany({
+      where: { workspaceId },
+      select: { id: true, title: true, description: true },
+    });
+  } catch {
+    // If the courses table is unavailable, still index the static curriculum.
+    dbCourses = [];
+  }
+
+  const entries = collectIndexEntries(dbCourses);
+
+  await db.$transaction([
+    db.curriculumSearchEntry.deleteMany({ where: { workspaceId } }),
+    db.curriculumSearchEntry.createMany({
+      data: entries.map((entry) => ({ ...entry, workspaceId })),
+      skipDuplicates: true,
+    }),
+  ]);
+
+  return entries.length;
+}
+
+/** Execute the full-text search and return ranked results. */
+export async function searchCurriculum(
+  params: CurriculumSearchParams
+): Promise<CurriculumSearchResult[]> {
+  const { text, values } = buildCurriculumSearchQuery(params);
+  const rows = await db.$queryRawUnsafe(text, ...values);
+  return rows as CurriculumSearchResult[];
+}
diff --git a/backend/src/search/curriculum/README.md b/backend/src/search/curriculum/README.md
@@ -0,0 +1,76 @@
+# Advanced Search Indexer for Learning Roadmaps
+
+A fast, relevance-ranked search API over curriculum content (courses, modules,
+lessons) backed by **PostgreSQL full-text search**.
+
+## Endpoints
+
+### `GET /api/v1/search`
+Keyword search across indexed curriculum.
+
+| Query param | Type | Notes |
+|-------------|------|-------|
+| `q` | string (required) | Keywords. Parsed with `websearch_to_tsquery` (supports quoted phrases, `or`, `-negation`). |
+| `type` | `course` \| `module` \| `lesson` | Filter by entity type. |
+| `difficulty` | `beginner` \| `intermediate` \| `advanced` | Filter (applies to lessons). |
+| `courseId` | string | Restrict to one course. |
+| `limit` | number (1–50, default 20) | Page size. |
+| `offset` | number (default 0) | Pagination offset. |
+
+Response:
+```json
+{ "query": "soroban auth", "count": 2, "limit": 20, "offset": 0,
+  "results": [ { "entityType": "lesson", "entityId": "course-1-lesson-3",
+                 "courseId": "course-1", "title": "...", "content": "...",
+                 "difficulty": "intermediate", "rank": 0.12 } ] }
+```
+
+### `POST /api/v1/search/reindex`
+Rebuilds the index for the current workspace. Run after curriculum changes or a
+fresh deploy.
+
+## How it works
+
+```
+curriculum.data.ts (modules/lessons)  ┐
+DB Course records                     ┘──▶ collectIndexEntries()
+        ──▶ curriculum_search_entries (tsvector + GIN index)
+        ──▶ GET /api/v1/search → websearch_to_tsquery + ts_rank
+```
+
+- **Index table** `curriculum_search_entries` stores one denormalised, searchable
+  row per course/module/lesson. A `tsvector` column (`searchVector`) is kept up
+  to date by a database trigger, with the **title weighted above the body** so
+  title matches rank higher.
+- **GIN index** on `searchVector` makes `@@` lookups fast.
+- **Workspace isolation**: every row carries `workspaceId`; queries and reindex
+  are scoped to the request's workspace.
+
+## Files
+
+| File | Responsibility |
+|------|----------------|
+| `curriculumSearchQuery.ts` | Pure builders: `buildIndexEntries`, `buildCurriculumSearchQuery` (parameterised, injection-safe). Unit tested. |
+| `CurriculumSearchService.ts` | Prisma wiring: `reindexCurriculum`, `searchCurriculum`, `collectIndexEntries`. |
+| `../../routes/search/curriculum-search.routes.ts` | Express routes (mounted at `/api/v1/search`). |
+| `../../routes/search/curriculum-search.schemas.ts` | Zod query validation. |
+| `prisma/migrations/20260626000000_curriculum_search_index/` | Table + tsvector + GIN index + trigger. |
+
+## Setup
+
+```bash
+cd backend
+npx prisma migrate deploy   # apply the migration
+# then, once per workspace (or via the endpoint):
+curl -X POST localhost:8080/api/v1/search/reindex -H 'x-workspace-id: default'
+```
+
+## Tests
+
+```bash
+cd backend
+npm test -- curriculum-search
+```
+
+Pure builder tests run without a database. The integration tests auto-skip when
+no database / migrated index table is available.