Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
-- CreateTable
CREATE TABLE "curriculum_search_entries" (
"id" TEXT NOT NULL,
"workspaceId" TEXT NOT NULL DEFAULT 'default',
"entityType" TEXT NOT NULL,
"entityId" TEXT NOT NULL,
"courseId" TEXT,
"title" TEXT NOT NULL,
"content" TEXT NOT NULL,
"difficulty" TEXT,
"searchVector" tsvector,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,

CONSTRAINT "curriculum_search_entries_pkey" PRIMARY KEY ("id")
);

-- CreateIndex
CREATE UNIQUE INDEX "curriculum_search_entries_workspaceId_entityType_entityId_key" ON "curriculum_search_entries"("workspaceId", "entityType", "entityId");

-- CreateIndex
CREATE INDEX "curriculum_search_entries_workspaceId_entityType_idx" ON "curriculum_search_entries"("workspaceId", "entityType");

-- CreateIndex
CREATE INDEX "curriculum_search_entries_workspaceId_courseId_idx" ON "curriculum_search_entries"("workspaceId", "courseId");

-- Full-text search GIN index over the maintained tsvector column.
CREATE INDEX "curriculum_search_entries_search_idx" ON "curriculum_search_entries" USING GIN ("searchVector");

-- Trigger keeps "searchVector" in sync with the indexed text. The title is
-- weighted higher than the body so title matches rank above description matches.
CREATE OR REPLACE FUNCTION curriculum_search_entries_vector_update() RETURNS trigger AS $$
BEGIN
NEW."searchVector" :=
setweight(to_tsvector('english', COALESCE(NEW."title", '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW."content", '')), 'B');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER curriculum_search_entries_vector_trigger
BEFORE INSERT OR UPDATE OF "title", "content"
ON "curriculum_search_entries"
FOR EACH ROW EXECUTE FUNCTION curriculum_search_entries_vector_update();
23 changes: 23 additions & 0 deletions backend/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -271,3 +271,26 @@ model StudentActivity {
@@map("student_activities")
}

/// Denormalised, full-text searchable index of curriculum content
/// (courses, modules and lessons). Populated by the reindex routine and
/// queried via PostgreSQL full-text search (see CurriculumSearchService).
model CurriculumSearchEntry {
id String @id @default(cuid())
workspaceId String @default("default")
entityType String // 'course' | 'module' | 'lesson'
entityId String // source id, e.g. 'course-1-lesson-1'
courseId String?
title String
content String // text block indexed for search (title + description)
difficulty String?
/// Maintained automatically by the curriculum_search_entries_vector_update trigger.
searchVector Unsupported("tsvector")?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt

@@unique([workspaceId, entityType, entityId])
@@index([workspaceId, entityType])
@@index([workspaceId, courseId])
@@map("curriculum_search_entries")
}

2 changes: 2 additions & 0 deletions backend/src/routes/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import exportRouter from './export.routes.js';
import generatorRouter from './generator/generator.routes.js';
import healthRouter from './health.routes.js';
import learningRoutes from './learning/learning.routes.js';
import curriculumSearchRouter from './search/curriculum-search.routes.js';
import securityRouter from './security.routes.js';
import studentsRouter from './students.js';

Expand All @@ -33,6 +34,7 @@ router.use('/dashboard', dashboardRoutes);
router.use('/feedback', feedbackRouter);
router.use('/auth', authRoutes);
router.use('/learning', learningRoutes);
router.use('/search', curriculumSearchRouter);
router.use('/contracts', contractRouter);
router.use('/notifications', notificationRouter);
router.use('/security', securityRouter);
Expand Down
59 changes: 59 additions & 0 deletions backend/src/routes/search/curriculum-search.routes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import { Router, Request, Response } from 'express';
import { getWorkspaceId } from '../../middleware/WorkspaceContext.js';
import { validateQuery } from '../../utils/validation.js';
import { curriculumSearchQuerySchema } from './curriculum-search.schemas.js';
import {
reindexCurriculum,
searchCurriculum,
} from '../../search/curriculum/CurriculumSearchService.js';

const router = Router();

/**
* GET /api/v1/search
*
* Full-text search across indexed curriculum content (courses, modules,
* lessons). Supports filtering by `type`, `difficulty` and `courseId`, and
* pagination via `limit`/`offset`. Results are ranked by relevance.
*/
router.get('/', validateQuery(curriculumSearchQuerySchema), async (req: Request, res: Response) => {
try {
// Re-parse to obtain the coerced/typed values (validateQuery only validates).
const { q, type, difficulty, courseId, limit, offset } = curriculumSearchQuerySchema.parse(
req.query
);
const workspaceId = getWorkspaceId() ?? 'default';

const results = await searchCurriculum({
query: q,
workspaceId,
entityType: type,
difficulty,
courseId,
limit,
offset,
});

res.json({ query: q, count: results.length, limit, offset, results });
} catch {
res.status(500).json({ error: 'Search request failed' });
}
});

/**
* POST /api/v1/search/reindex
*
* Rebuilds the curriculum search index for the current workspace. Intended for
* admin/maintenance use (e.g. after curriculum updates or a fresh deploy).
*/
router.post('/reindex', async (_req: Request, res: Response) => {
try {
const workspaceId = getWorkspaceId() ?? 'default';
const indexed = await reindexCurriculum(workspaceId);
res.json({ indexed });
} catch {
res.status(500).json({ error: 'Reindex request failed' });
}
});

export default router;
19 changes: 19 additions & 0 deletions backend/src/routes/search/curriculum-search.schemas.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { z } from 'zod';

/**
* Validation schema for `GET /api/v1/search`.
*
* `q` is the required keyword query. `type`, `difficulty` and `courseId` are
* optional filters. `limit`/`offset` are coerced from query strings and bounded
* to keep result sets fast.
*/
export const curriculumSearchQuerySchema = z.object({
q: z.string().trim().min(1, 'q (search keywords) is required'),
type: z.enum(['course', 'module', 'lesson']).optional(),
difficulty: z.enum(['beginner', 'intermediate', 'advanced']).optional(),
courseId: z.string().trim().min(1).optional(),
limit: z.coerce.number().int().min(1).max(50).default(20),
offset: z.coerce.number().int().min(0).default(0),
});

export type CurriculumSearchQuery = z.infer<typeof curriculumSearchQuerySchema>;
110 changes: 110 additions & 0 deletions backend/src/search/curriculum/CurriculumSearchService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import prisma from '../../db/index.js';
import { COURSES, curriculumByCourseId } from '../../routes/learning/curriculum.data.js';
import {
buildCurriculumSearchQuery,
buildIndexEntries,
type CurriculumIndexEntry,
type CurriculumSearchParams,
type SearchableCourse,
} from './curriculumSearchQuery.js';

/**
* CurriculumSearchService — the database-facing layer of the search indexer.
*
* Responsibilities:
* - `reindexCurriculum`: (re)build the `curriculum_search_entries` table from
* the curriculum sources (static modules/lessons + DB course records).
* - `searchCurriculum`: run the PostgreSQL full-text query and return ranked
* results.
*
* The query/row-shaping logic lives in `curriculumSearchQuery.ts` (pure and unit
* tested); this module only wires it to Prisma.
*/

/** A single ranked search hit returned to the API. */
export interface CurriculumSearchResult {
id: string;
entityType: string;
entityId: string;
courseId: string | null;
title: string;
content: string;
difficulty: string | null;
rank: number;
}

// The default Prisma export is workspace-extended; raw queries and the new model
// are accessed dynamically to avoid coupling to the extension's narrowed types.
const db = prisma as unknown as {
$queryRawUnsafe: (text: string, ...values: unknown[]) => Promise<unknown[]>;
$transaction: (ops: unknown[]) => Promise<unknown>;
course: { findMany: (args: unknown) => Promise<SearchableCourse[]> };
curriculumSearchEntry: {
deleteMany: (args: unknown) => unknown;
createMany: (args: unknown) => unknown;
};
};

/**
* Merge the curriculum sources into a deduplicated set of index rows.
*
* The static curriculum (`curriculum.data.ts`) is the source of modules and
* lessons; DB `Course` rows are also indexed so live courses are searchable.
* Pure and deterministic for easy testing — pass the DB courses in.
*/
export function collectIndexEntries(dbCourses: SearchableCourse[]): CurriculumIndexEntry[] {
const staticEntries = buildIndexEntries(
COURSES.map((c) => ({ id: c.id, title: c.title, description: c.description })),
curriculumByCourseId
);
// DB courses contribute only 'course' rows (no static modules attached).
const dbEntries = buildIndexEntries(dbCourses, {});

const seen = new Set<string>();
const merged: CurriculumIndexEntry[] = [];
for (const entry of [...staticEntries, ...dbEntries]) {
const key = `${entry.entityType}:${entry.entityId}`;
if (seen.has(key)) continue;
seen.add(key);
merged.push(entry);
}
return merged;
}

/**
* Rebuild the search index for a workspace. Returns the number of rows indexed.
* The stored `searchVector` is populated by the database trigger on insert.
*/
export async function reindexCurriculum(workspaceId: string): Promise<number> {
let dbCourses: SearchableCourse[] = [];
try {
dbCourses = await db.course.findMany({
where: { workspaceId },
select: { id: true, title: true, description: true },
});
} catch {
// If the courses table is unavailable, still index the static curriculum.
dbCourses = [];
}

const entries = collectIndexEntries(dbCourses);

await db.$transaction([
db.curriculumSearchEntry.deleteMany({ where: { workspaceId } }),
db.curriculumSearchEntry.createMany({
data: entries.map((entry) => ({ ...entry, workspaceId })),
skipDuplicates: true,
}),
]);

return entries.length;
}

/** Execute the full-text search and return ranked results. */
export async function searchCurriculum(
params: CurriculumSearchParams
): Promise<CurriculumSearchResult[]> {
const { text, values } = buildCurriculumSearchQuery(params);
const rows = await db.$queryRawUnsafe(text, ...values);
return rows as CurriculumSearchResult[];
}
76 changes: 76 additions & 0 deletions backend/src/search/curriculum/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Advanced Search Indexer for Learning Roadmaps

A fast, relevance-ranked search API over curriculum content (courses, modules,
lessons) backed by **PostgreSQL full-text search**.

## Endpoints

### `GET /api/v1/search`
Keyword search across indexed curriculum.

| Query param | Type | Notes |
|-------------|------|-------|
| `q` | string (required) | Keywords. Parsed with `websearch_to_tsquery` (supports quoted phrases, `or`, `-negation`). |
| `type` | `course` \| `module` \| `lesson` | Filter by entity type. |
| `difficulty` | `beginner` \| `intermediate` \| `advanced` | Filter (applies to lessons). |
| `courseId` | string | Restrict to one course. |
| `limit` | number (1–50, default 20) | Page size. |
| `offset` | number (default 0) | Pagination offset. |

Response:
```json
{ "query": "soroban auth", "count": 2, "limit": 20, "offset": 0,
"results": [ { "entityType": "lesson", "entityId": "course-1-lesson-3",
"courseId": "course-1", "title": "...", "content": "...",
"difficulty": "intermediate", "rank": 0.12 } ] }
```

### `POST /api/v1/search/reindex`
Rebuilds the index for the current workspace. Run after curriculum changes or a
fresh deploy.

## How it works

```
curriculum.data.ts (modules/lessons) ┐
DB Course records ┘──▶ collectIndexEntries()
──▶ curriculum_search_entries (tsvector + GIN index)
──▶ GET /api/v1/search → websearch_to_tsquery + ts_rank
```

- **Index table** `curriculum_search_entries` stores one denormalised, searchable
row per course/module/lesson. A `tsvector` column (`searchVector`) is kept up
to date by a database trigger, with the **title weighted above the body** so
title matches rank higher.
- **GIN index** on `searchVector` makes `@@` lookups fast.
- **Workspace isolation**: every row carries `workspaceId`; queries and reindex
are scoped to the request's workspace.

## Files

| File | Responsibility |
|------|----------------|
| `curriculumSearchQuery.ts` | Pure builders: `buildIndexEntries`, `buildCurriculumSearchQuery` (parameterised, injection-safe). Unit tested. |
| `CurriculumSearchService.ts` | Prisma wiring: `reindexCurriculum`, `searchCurriculum`, `collectIndexEntries`. |
| `../../routes/search/curriculum-search.routes.ts` | Express routes (mounted at `/api/v1/search`). |
| `../../routes/search/curriculum-search.schemas.ts` | Zod query validation. |
| `prisma/migrations/20260626000000_curriculum_search_index/` | Table + tsvector + GIN index + trigger. |

## Setup

```bash
cd backend
npx prisma migrate deploy # apply the migration
# then, once per workspace (or via the endpoint):
curl -X POST localhost:8080/api/v1/search/reindex -H 'x-workspace-id: default'
```

## Tests

```bash
cd backend
npm test -- curriculum-search
```

Pure builder tests run without a database. The integration tests auto-skip when
no database / migrated index table is available.
Loading
Loading