Skip to content

Commit b7898f8

Browse files
shantanu patilclaude
authored andcommitted
Add Phase 5: admin ingestion pipeline with CLI tools and admin API
- scripts/ingest.py: single-repo ingestion via WebSocket wiki generation, GCS upload, and Supabase upsert with GitHub metadata fetching - scripts/ingest_batch.py: batch ingestion from repos.json with --skip-existing, --dry-run, --only filters, and summary table - scripts/repos.json: initial 6 curated repos (react, flask, express, langchain, next.js, rust-analyzer) - api/routes/admin.py: POST /api/admin/ingest (register projects in Supabase with GitHub metadata) and GET /api/admin/projects (admin list) - Makefile: ingest, ingest-batch, ingest-batch-skip, ingest-dry-run targets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 70be50f commit b7898f8

6 files changed

Lines changed: 1809 additions & 4 deletions

File tree

Makefile

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
## ============================================================
66

77
.PHONY: dev dev-api dev-web test test-api lint build-api build-web \
8-
infra-plan infra-apply ingest spec help
8+
infra-plan infra-apply ingest ingest-batch ingest-batch-skip \
9+
ingest-dry-run spec help
910

1011
## ------ Development ------------------------------------------
1112

@@ -45,10 +46,21 @@ infra-plan: ## Preview Terraform changes for production
4546
infra-apply: ## Apply Terraform changes to production
4647
cd infra/environments/prod && terraform apply
4748

48-
## ------ Utilities --------------------------------------------
49+
## ------ Ingestion -----------------------------------------------
50+
51+
ingest: ## Ingest a single repo (usage: make ingest REPO=facebook/react TAGS=javascript,ui)
52+
python scripts/ingest.py --repo $(REPO) $(if $(TAGS),--tags $(TAGS),) $(if $(PROVIDER),--provider $(PROVIDER),)
53+
54+
ingest-batch: ## Ingest all repos from repos.json
55+
python scripts/ingest_batch.py --repos scripts/repos.json
4956

50-
ingest: ## Ingest a repo into the wiki cache (usage: make ingest REPO=owner/repo)
51-
python scripts/ingest.py --repo $(REPO)
57+
ingest-batch-skip: ## Ingest repos, skipping existing ones
58+
python scripts/ingest_batch.py --repos scripts/repos.json --skip-existing
59+
60+
ingest-dry-run: ## Preview what repos would be ingested
61+
python scripts/ingest_batch.py --repos scripts/repos.json --dry-run
62+
63+
## ------ Utilities --------------------------------------------
5264

5365
spec: ## Create a new feature spec from template (usage: make spec NAME=my-feature)
5466
cp specs/_template.md specs/$(NAME).md && echo "Created specs/$(NAME).md"

api/api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from api.wiki_structure_parser import parse_wiki_structure, convert_json_to_xml
2121
from api.storage import get_storage
2222
from api.auth import require_auth, optional_auth, _auth_not_configured, _verify_token
23+
from api.routes.admin import router as admin_router
2324
from api.routes.waitlist import router as waitlist_router
2425
from api.routes.webhooks import router as webhooks_router
2526

@@ -126,6 +127,7 @@ def _custom_rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded
126127
app.add_middleware(GZipMiddleware, minimum_size=1000)
127128

128129
# --- External routers ---
130+
app.include_router(admin_router, tags=["admin"])
129131
app.include_router(waitlist_router, tags=["waitlist"])
130132
app.include_router(webhooks_router, tags=["webhooks"])
131133

api/routes/admin.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
"""
2+
Admin API router.
3+
4+
Provides endpoints for administrative operations such as ingesting
5+
(registering) wiki projects in Supabase and listing all projects.
6+
7+
All endpoints require authentication via Clerk JWT.
8+
"""
9+
10+
import logging
11+
import os
12+
from typing import Any, Dict, List, Optional
13+
14+
import httpx
15+
from fastapi import APIRouter, Depends, HTTPException
16+
from pydantic import BaseModel, Field
17+
18+
from api.auth import require_auth
19+
from api.supabase_client import _get_client
20+
21+
logger = logging.getLogger(__name__)
22+
23+
router = APIRouter()
24+
25+
# ---------------------------------------------------------------------------
26+
# GitHub API helper
27+
# ---------------------------------------------------------------------------
28+
29+
GITHUB_API_BASE = "https://api.github.com"
30+
31+
32+
async def _fetch_github_metadata(owner: str, repo: str) -> Dict[str, Any]:
33+
"""Fetch repository metadata from the GitHub API.
34+
35+
Returns a dict with ``stars``, ``description``, and ``topics``.
36+
Falls back to empty/zero values if the request fails (e.g. private repo,
37+
rate-limited, or non-GitHub repo).
38+
"""
39+
url = f"{GITHUB_API_BASE}/repos/{owner}/{repo}"
40+
headers: Dict[str, str] = {"Accept": "application/vnd.github.v3+json"}
41+
42+
# Use a GitHub token if available to avoid rate-limiting
43+
gh_token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GITHUB_API_KEY")
44+
if gh_token:
45+
headers["Authorization"] = f"Bearer {gh_token}"
46+
47+
try:
48+
async with httpx.AsyncClient(timeout=15.0) as client:
49+
resp = await client.get(url, headers=headers)
50+
resp.raise_for_status()
51+
data = resp.json()
52+
return {
53+
"stars": data.get("stargazers_count", 0),
54+
"description": data.get("description") or "",
55+
"topics": data.get("topics") or [],
56+
}
57+
except httpx.HTTPStatusError as exc:
58+
logger.warning(
59+
f"GitHub API returned {exc.response.status_code} for {owner}/{repo}: "
60+
f"{exc.response.text[:200]}"
61+
)
62+
return {"stars": 0, "description": "", "topics": []}
63+
except Exception as exc:
64+
logger.warning(f"Failed to fetch GitHub metadata for {owner}/{repo}: {exc}")
65+
return {"stars": 0, "description": "", "topics": []}
66+
67+
68+
# ---------------------------------------------------------------------------
69+
# Request / Response models
70+
# ---------------------------------------------------------------------------
71+
72+
73+
class IngestRequest(BaseModel):
74+
"""Request body for the admin ingest endpoint."""
75+
76+
owner: str = Field(..., description="Repository owner (e.g. 'facebook')")
77+
repo: str = Field(..., description="Repository name (e.g. 'react')")
78+
repo_type: str = Field(
79+
"github",
80+
description="Repository hosting type: github, gitlab, or bitbucket",
81+
)
82+
language: str = Field("en", description="Wiki language code")
83+
tags: Optional[List[str]] = Field(None, description="Categorisation tags")
84+
is_featured: bool = Field(False, description="Whether to feature this project")
85+
provider: Optional[str] = Field(
86+
None, description="AI provider override (e.g. 'google', 'openai')"
87+
)
88+
model: Optional[str] = Field(None, description="AI model override")
89+
90+
91+
class IngestResponse(BaseModel):
92+
"""Response body for a successful ingest registration."""
93+
94+
status: str = "ok"
95+
project: Dict[str, Any]
96+
github_metadata: Dict[str, Any]
97+
98+
99+
# ---------------------------------------------------------------------------
100+
# Endpoints
101+
# ---------------------------------------------------------------------------
102+
103+
104+
@router.post(
105+
"/api/admin/ingest",
106+
response_model=IngestResponse,
107+
summary="Register a wiki project in Supabase",
108+
responses={
109+
200: {"description": "Project upserted successfully"},
110+
401: {"description": "Authentication required"},
111+
422: {"description": "Validation error"},
112+
500: {"description": "Internal server error"},
113+
},
114+
)
115+
async def ingest_project(
116+
body: IngestRequest,
117+
claims: dict = Depends(require_auth),
118+
) -> IngestResponse:
119+
"""Register (upsert) a wiki project in Supabase with GitHub metadata.
120+
121+
This endpoint does NOT generate the wiki itself — that is a long-running
122+
process handled by the CLI ingest scripts. It only creates or updates
123+
the project row in the ``wiki_projects`` table so that it appears in the
124+
project directory.
125+
"""
126+
try:
127+
# 1. Fetch GitHub metadata (best-effort)
128+
github_metadata = await _fetch_github_metadata(body.owner, body.repo)
129+
130+
# 2. Build the upsert payload
131+
extra: Dict[str, Any] = {
132+
"is_published": True,
133+
"is_featured": body.is_featured,
134+
"stars": github_metadata["stars"],
135+
}
136+
if body.provider:
137+
extra["provider"] = body.provider
138+
if body.model:
139+
extra["model"] = body.model
140+
141+
# Merge GitHub topics with user-supplied tags (deduplicated)
142+
merged_tags: List[str] = list(
143+
dict.fromkeys((body.tags or []) + github_metadata.get("topics", []))
144+
)
145+
146+
client = _get_client()
147+
payload: Dict[str, Any] = {
148+
"owner": body.owner,
149+
"repo": body.repo,
150+
"repo_type": body.repo_type,
151+
"language": body.language,
152+
"title": f"{body.owner}/{body.repo}",
153+
"description": github_metadata["description"],
154+
"tags": merged_tags,
155+
**extra,
156+
}
157+
158+
response = (
159+
client.table("wiki_projects")
160+
.upsert(payload, on_conflict="owner,repo,repo_type,language")
161+
.execute()
162+
)
163+
project = response.data[0] if response.data else payload
164+
165+
logger.info(
166+
f"Admin ingest: upserted {body.owner}/{body.repo} "
167+
f"(featured={body.is_featured}, tags={merged_tags})"
168+
)
169+
170+
return IngestResponse(
171+
status="ok",
172+
project=project,
173+
github_metadata=github_metadata,
174+
)
175+
176+
except HTTPException:
177+
raise
178+
except Exception as exc:
179+
logger.error(
180+
f"Admin ingest error for {body.owner}/{body.repo}: {exc}",
181+
exc_info=True,
182+
)
183+
raise HTTPException(
184+
status_code=500,
185+
detail=f"Failed to ingest project: {exc}",
186+
)
187+
188+
189+
@router.get(
190+
"/api/admin/projects",
191+
summary="List all wiki projects (admin view)",
192+
responses={
193+
200: {"description": "List of all wiki projects"},
194+
401: {"description": "Authentication required"},
195+
500: {"description": "Internal server error"},
196+
},
197+
)
198+
async def list_all_projects(
199+
claims: dict = Depends(require_auth),
200+
) -> List[Dict[str, Any]]:
201+
"""List all wiki projects including unpublished ones.
202+
203+
Unlike the public project listing, this returns every row in the
204+
``wiki_projects`` table regardless of ``is_published`` status.
205+
"""
206+
try:
207+
client = _get_client()
208+
response = (
209+
client.table("wiki_projects")
210+
.select("*")
211+
.order("created_at", desc=True)
212+
.execute()
213+
)
214+
return response.data or []
215+
except Exception as exc:
216+
logger.error(f"Admin list projects error: {exc}", exc_info=True)
217+
raise HTTPException(
218+
status_code=500,
219+
detail=f"Failed to list projects: {exc}",
220+
)

0 commit comments

Comments
 (0)