Skip to content

Commit 576d0ed

Browse files
shantanu patilclaude
authored andcommitted
Add dual-tier diagram generation: simple overview + detailed views
Update backend diagram system to generate both a simplified (5-8 node) overview diagram and the existing detailed diagram in a single LLM call. - DiagramData schema: add optional layerLevel and simplifiedMermaidSource fields (backward-compatible with existing cached wikis) - STRUCTURED_DIAGRAM_DATA_PROMPT: instruct LLM to produce a simplified Mermaid source alongside the detailed version, with clear guidelines for executive-summary-level diagrams - diagram_extract: validate simplified diagrams have fewer nodes than the full version, gracefully falling back to detailed-only on failure Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1d0046a commit 576d0ed

3 files changed

Lines changed: 69 additions & 6 deletions

File tree

api/diagram_extract.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,36 @@
1818
re.DOTALL,
1919
)
2020

21+
# Lightweight regex to find node IDs with shape declarations in Mermaid source.
22+
# Matches patterns like A[Label], B(Label), C{Label}, D((Label)), etc.
23+
_MERMAID_NODE_RE = re.compile(r'\b(\w+)\s*[\[\(\{<]')
24+
25+
# Mermaid keywords to exclude from node counting.
26+
_MERMAID_KEYWORDS = frozenset({
27+
'graph', 'flowchart', 'sequenceDiagram', 'classDiagram', 'erDiagram',
28+
'TD', 'TB', 'LR', 'RL', 'BT', 'subgraph', 'end', 'style', 'class',
29+
'click', 'linkStyle', 'classDef',
30+
})
31+
32+
33+
def _count_mermaid_nodes(mermaid_src: str) -> int:
34+
"""Return a rough count of unique node declarations in a Mermaid source string."""
35+
matches = _MERMAID_NODE_RE.findall(mermaid_src)
36+
return len({m for m in matches if m not in _MERMAID_KEYWORDS})
37+
38+
39+
def _validate_simplified(simplified: str, full: str) -> bool:
40+
"""Check that the simplified diagram has fewer (or equal) nodes than the full one."""
41+
simplified_count = _count_mermaid_nodes(simplified)
42+
full_count = _count_mermaid_nodes(full)
43+
if simplified_count > full_count:
44+
logger.warning(
45+
"Simplified diagram has more nodes (%d) than the full diagram (%d); discarding",
46+
simplified_count, full_count,
47+
)
48+
return False
49+
return True
50+
2151

2252
def extract_diagram_data(content: str) -> List[Dict]:
2353
"""Extract all structured diagram JSON blocks from wiki page content.
@@ -38,7 +68,14 @@ def extract_diagram_data(content: str) -> List[Dict]:
3868
try:
3969
data = json.loads(raw_json)
4070
validated = DiagramData(**data)
41-
results.append(validated.model_dump())
71+
dumped = validated.model_dump()
72+
73+
# Validate simplified diagram if present
74+
if dumped.get("simplifiedMermaidSource") and dumped.get("mermaidSource"):
75+
if not _validate_simplified(dumped["simplifiedMermaidSource"], dumped["mermaidSource"]):
76+
dumped["simplifiedMermaidSource"] = None
77+
78+
results.append(dumped)
4279
except json.JSONDecodeError as exc:
4380
logger.warning("Skipping diagram data block with invalid JSON: %s", exc)
4481
except Exception as exc:

api/diagram_schema.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,5 @@ class DiagramData(BaseModel):
2323
edges: List[DiagramEdge]
2424
mermaidSource: str
2525
diagramType: Literal['flowchart', 'sequence', 'class', 'er'] = 'flowchart'
26+
layerLevel: Optional[int] = None # 1 = simple overview, 2 = detailed
27+
simplifiedMermaidSource: Optional[str] = None # Pre-generated simple version

api/prompts.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,24 +203,38 @@
203203
code block MUST still follow — the JSON is supplementary metadata, NOT a
204204
replacement for the Mermaid diagram.
205205
206+
The JSON MUST include a "simplifiedMermaidSource" field containing a simplified
207+
overview version of the diagram. This simplified version is critical for users
208+
who want a high-level understanding at a glance.
209+
206210
Format:
207211
<!-- DIAGRAM_DATA_START -->
208212
{
209213
"nodes": [
210214
{ "id": "A", "label": "Frontend App", "technology": "react", "files": ["src/App.tsx"], "description": "Main React application", "depth": 0 },
211-
{ "id": "B", "label": "API Server", "technology": "fastapi", "files": ["api/api.py"], "description": "REST API backend", "depth": 0 }
215+
{ "id": "B", "label": "API Server", "technology": "fastapi", "files": ["api/api.py"], "description": "REST API backend", "depth": 0 },
216+
{ "id": "C", "label": "Database", "technology": "postgresql", "files": ["db/models.py"], "description": "Data persistence layer", "depth": 0 },
217+
{ "id": "D", "label": "Auth Service", "technology": "python", "files": ["api/auth.py"], "description": "Authentication and authorization", "depth": 1 },
218+
{ "id": "E", "label": "Cache Layer", "technology": "redis", "files": ["api/cache.py"], "description": "Response caching", "depth": 1 }
212219
],
213220
"edges": [
214-
{ "source": "A", "target": "B", "label": "HTTP requests", "type": "api_call" }
221+
{ "source": "A", "target": "B", "label": "HTTP requests", "type": "api_call" },
222+
{ "source": "B", "target": "C", "label": "queries", "type": "data_flow" },
223+
{ "source": "B", "target": "D", "label": "validates tokens", "type": "dependency" },
224+
{ "source": "B", "target": "E", "label": "reads/writes", "type": "data_flow" }
215225
],
216-
"mermaidSource": "graph TD\n A[Frontend App] --> B[API Server]",
217-
"diagramType": "flowchart"
226+
"mermaidSource": "graph TD\n A[Frontend App] -->|HTTP requests| B[API Server]\n B -->|queries| C[Database]\n B -->|validates tokens| D[Auth Service]\n B -->|reads/writes| E[Cache Layer]",
227+
"diagramType": "flowchart",
228+
"simplifiedMermaidSource": "graph TD\n A[Frontend App] --> B[API Server]\n B --> C[Database]\n B --> D[External Services]"
218229
}
219230
<!-- DIAGRAM_DATA_END -->
220231
221232
```mermaid
222233
graph TD
223-
A[Frontend App] --> B[API Server]
234+
A[Frontend App] -->|HTTP requests| B[API Server]
235+
B -->|queries| C[Database]
236+
B -->|validates tokens| D[Auth Service]
237+
B -->|reads/writes| E[Cache Layer]
224238
```
225239
226240
Rules for the structured JSON:
@@ -232,6 +246,16 @@
232246
- "mermaidSource" must contain the exact Mermaid source used in the code fence
233247
- If a diagram has no meaningful structured metadata, you may omit the JSON block
234248
- The JSON must be valid — if you are unsure, omit it rather than produce invalid JSON
249+
250+
Rules for "simplifiedMermaidSource" (the simplified overview diagram):
251+
- MUST contain a valid Mermaid diagram with MAXIMUM 5-8 nodes
252+
- Show ONLY the highest-level architectural components (think "executive summary")
253+
- Use clear, short labels (2-4 words each, e.g., "User Interface", "API Layer", "Database")
254+
- Use simple relationships WITHOUT detailed edge labels (just arrows, no labels)
255+
- Collapse related sub-components into a single node (e.g., merge "Auth Service" + "User Service" into "Backend Services")
256+
- The simplified diagram must be immediately understandable at a glance by a non-technical person
257+
- If the full diagram already has 8 or fewer nodes, simplifiedMermaidSource can match mermaidSource
258+
- Do NOT include implementation details, file names, or technical jargon in the simplified version
235259
</structured_diagram_data>
236260
"""
237261

0 commit comments

Comments
 (0)