REDFOX1899
diff --git a/‎api/api.py‎
Lines changed: 497 additions & 9 deletions b/‎api/api.py‎
Lines changed: 497 additions & 9 deletions
diff --git a/‎api/config/wiki_templates.json‎
Lines changed: 55 additions & 0 deletions b/‎api/config/wiki_templates.json‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎api/openrouter_client.py‎
Lines changed: 23 additions & 135 deletions b/‎api/openrouter_client.py‎
Lines changed: 23 additions & 135 deletions
@@ -0,0 +1,55 @@
+{
+  "templates": {
+    "comprehensive": {
+      "id": "comprehensive",
+      "name": "Comprehensive",
+      "description": "Full wiki with architecture, data flow, API docs, and diagrams",
+      "icon": "BookOpen",
+      "prompt_guidance": "",
+      "structure_hint": "Create a structured wiki with sections covering: Overview, System Architecture, Core Features, Data Management/Flow, Frontend Components, Backend Systems, Deployment/Infrastructure, and Extensibility.",
+      "page_count": "8-12",
+      "focus_areas": []
+    },
+    "architecture": {
+      "id": "architecture",
+      "name": "Architecture Guide",
+      "description": "System design, component relationships, design patterns, and trade-offs",
+      "icon": "Layers",
+      "prompt_guidance": "Focus on system architecture, component relationships, design patterns, and technical trade-offs. Prioritize diagrams showing data flow and component interactions. Emphasize how modules connect, what protocols they use, and the reasoning behind architectural decisions.",
+      "structure_hint": "Create a wiki focused on architecture with sections: High-Level Architecture, Component Breakdown, Design Patterns Used, Data Flow & Communication, Dependency Graph, and Trade-offs & Decisions.",
+      "page_count": "6-10",
+      "focus_areas": ["architecture", "design patterns", "system design", "component relationships", "data flow"]
+    },
+    "api-docs": {
+      "id": "api-docs",
+      "name": "API Documentation",
+      "description": "API endpoints, request/response formats, authentication, and error handling",
+      "icon": "Code",
+      "prompt_guidance": "Focus on API documentation: endpoints, request/response schemas, authentication mechanisms, error codes, and rate limiting. Document every public interface, including REST endpoints, WebSocket protocols, GraphQL schemas, and RPC methods. Include example requests and responses.",
+      "structure_hint": "Create a wiki focused on API documentation with sections: API Overview & Base URLs, Authentication & Authorization, REST Endpoints (grouped by resource), WebSocket/Real-time APIs, Error Handling & Status Codes, and SDK/Client Usage.",
+      "page_count": "6-10",
+      "focus_areas": ["api", "endpoints", "routes", "handlers", "controllers", "middleware", "schemas", "serializers"]
+    },
+    "onboarding": {
+      "id": "onboarding",
+      "name": "Onboarding Guide",
+      "description": "Getting started, dev environment setup, key concepts, and contribution guide",
+      "icon": "Rocket",
+      "prompt_guidance": "Write for a developer joining this project for the first time. Focus on getting started quickly, development environment setup, key concepts they need to understand, common development tasks, and how to contribute. Use a friendly, instructional tone. Include step-by-step instructions where possible.",
+      "structure_hint": "Create a wiki for new developers with sections: Quick Start & Prerequisites, Development Environment Setup, Project Structure Walkthrough, Key Concepts & Terminology, Common Development Tasks, Testing & Debugging, and Contribution Guide.",
+      "page_count": "5-8",
+      "focus_areas": ["readme", "setup", "configuration", "package.json", "docker", "makefile", "contributing", "tests"]
+    },
+    "security": {
+      "id": "security",
+      "name": "Security Review",
+      "description": "Threat model, auth flows, data handling, dependency risks, and OWASP considerations",
+      "icon": "Shield",
+      "prompt_guidance": "Analyze this codebase from a security perspective. Cover authentication flows, authorization patterns, data handling and sanitization, input validation, dependency risks, secrets management, and potential vulnerabilities. Reference OWASP Top 10 where applicable. Flag areas that need security attention.",
+      "structure_hint": "Create a security-focused wiki with sections: Security Overview & Threat Model, Authentication & Session Management, Authorization & Access Control, Input Validation & Data Sanitization, Secrets & Configuration Management, Dependency Security, and Security Recommendations.",
+      "page_count": "5-8",
+      "focus_areas": ["auth", "security", "middleware", "validation", "sanitize", "crypto", "password", "token", "session", "cors", "csrf"]
+    }
+  },
+  "default_template": "comprehensive"
+}
@@ -173,144 +173,32 @@ async def content_generator():
                                         content = choice["message"]["content"]
                                         log.info("Successfully retrieved response")
 
-                                        # Check if the content is XML and ensure it's properly formatted
-                                        if content.strip().startswith("<") and ">" in content:
-                                            # It's likely XML, let's make sure it's properly formatted
+                                        # Check if content looks like a wiki_structure response
+                                        # (XML or JSON). Use the robust parser to normalize
+                                        # it and re-emit clean XML for the frontend.
+                                        if "<wiki_structure>" in content or (
+                                            '"pages"' in content and '"title"' in content
+                                        ):
                                             try:
-                                                # Extract the XML content
-                                                xml_content = content
-
-                                                # Check if it's a wiki_structure XML
-                                                if "<wiki_structure>" in xml_content:
-                                                    log.info("Found wiki_structure XML, ensuring proper format")
-
-                                                    # Extract just the wiki_structure XML
-                                                    import re
-                                                    wiki_match = re.search(r'<wiki_structure>[\s\S]*?<\/wiki_structure>', xml_content)
-                                                    if wiki_match:
-                                                        # Get the raw XML
-                                                        raw_xml = wiki_match.group(0)
-
-                                                        # Clean the XML by removing any leading/trailing whitespace
-                                                        # and ensuring it's properly formatted
-                                                        clean_xml = raw_xml.strip()
-
-                                                        # Try to fix common XML issues
-                                                        try:
-                                                            # Replace problematic characters in XML
-                                                            fixed_xml = clean_xml
-
-                                                            # Replace & with &amp; if not already part of an entity
-                                                            fixed_xml = re.sub(r'&(?!amp;|lt;|gt;|apos;|quot;)', '&amp;', fixed_xml)
-
-                                                            # Fix other common XML issues
-                                                            fixed_xml = fixed_xml.replace('</', '</').replace('  >', '>')
-
-                                                            # Try to parse the fixed XML
-                                                            from xml.dom.minidom import parseString
-                                                            dom = parseString(fixed_xml)
-
-                                                            # Get the pretty-printed XML with proper indentation
-                                                            pretty_xml = dom.toprettyxml()
-
-                                                            # Remove XML declaration
-                                                            if pretty_xml.startswith('<?xml'):
-                                                                pretty_xml = pretty_xml[pretty_xml.find('?>')+2:].strip()
-
-                                                            log.info(f"Extracted and validated XML: {pretty_xml[:100]}...")
-                                                            yield pretty_xml
-                                                        except Exception as xml_parse_error:
-                                                            log.warning(f"XML validation failed: {str(xml_parse_error)}, using raw XML")
-
-                                                            # If XML validation fails, try a more aggressive approach
-                                                            try:
-                                                                # Use regex to extract just the structure without any problematic characters
-                                                                import re
-
-                                                                # Extract the basic structure
-                                                                structure_match = re.search(r'<wiki_structure>(.*?)</wiki_structure>', clean_xml, re.DOTALL)
-                                                                if structure_match:
-                                                                    structure = structure_match.group(1).strip()
-
-                                                                    # Rebuild a clean XML structure
-                                                                    clean_structure = "<wiki_structure>\n"
-
-                                                                    # Extract title
-                                                                    title_match = re.search(r'<title>(.*?)</title>', structure, re.DOTALL)
-                                                                    if title_match:
-                                                                        title = title_match.group(1).strip()
-                                                                        clean_structure += f"  <title>{title}</title>\n"
-
-                                                                    # Extract description
-                                                                    desc_match = re.search(r'<description>(.*?)</description>', structure, re.DOTALL)
-                                                                    if desc_match:
-                                                                        desc = desc_match.group(1).strip()
-                                                                        clean_structure += f"  <description>{desc}</description>\n"
-
-                                                                    # Add pages section
-                                                                    clean_structure += "  <pages>\n"
-
-                                                                    # Extract pages
-                                                                    pages = re.findall(r'<page id="(.*?)">(.*?)</page>', structure, re.DOTALL)
-                                                                    for page_id, page_content in pages:
-                                                                        clean_structure += f'    <page id="{page_id}">\n'
-
-                                                                        # Extract page title
-                                                                        page_title_match = re.search(r'<title>(.*?)</title>', page_content, re.DOTALL)
-                                                                        if page_title_match:
-                                                                            page_title = page_title_match.group(1).strip()
-                                                                            clean_structure += f"      <title>{page_title}</title>\n"
-
-                                                                        # Extract page description
-                                                                        page_desc_match = re.search(r'<description>(.*?)</description>', page_content, re.DOTALL)
-                                                                        if page_desc_match:
-                                                                            page_desc = page_desc_match.group(1).strip()
-                                                                            clean_structure += f"      <description>{page_desc}</description>\n"
-
-                                                                        # Extract importance
-                                                                        importance_match = re.search(r'<importance>(.*?)</importance>', page_content, re.DOTALL)
-                                                                        if importance_match:
-                                                                            importance = importance_match.group(1).strip()
-                                                                            clean_structure += f"      <importance>{importance}</importance>\n"
-
-                                                                        # Extract relevant files
-                                                                        clean_structure += "      <relevant_files>\n"
-                                                                        file_paths = re.findall(r'<file_path>(.*?)</file_path>', page_content, re.DOTALL)
-                                                                        for file_path in file_paths:
-                                                                            clean_structure += f"        <file_path>{file_path.strip()}</file_path>\n"
-                                                                        clean_structure += "      </relevant_files>\n"
-
-                                                                        # Extract related pages
-                                                                        clean_structure += "      <related_pages>\n"
-                                                                        related_pages = re.findall(r'<related>(.*?)</related>', page_content, re.DOTALL)
-                                                                        for related in related_pages:
-                                                                            clean_structure += f"        <related>{related.strip()}</related>\n"
-                                                                        clean_structure += "      </related_pages>\n"
-
-                                                                        clean_structure += "    </page>\n"
-
-                                                                    clean_structure += "  </pages>\n</wiki_structure>"
-
-                                                                    log.info("Successfully rebuilt clean XML structure")
-                                                                    yield clean_structure
-                                                                else:
-                                                                    log.warning("Could not extract wiki structure, using raw XML")
-                                                                    yield clean_xml
-                                                            except Exception as rebuild_error:
-                                                                log.warning(f"Failed to rebuild XML: {str(rebuild_error)}, using raw XML")
-                                                                yield clean_xml
-                                                    else:
-                                                        # If we can't extract it, just yield the original content
-                                                        log.warning("Could not extract wiki_structure XML, yielding original content")
-                                                        yield xml_content
-                                                else:
-                                                    # For other XML content, just yield it as is
-                                                    yield content
-                                            except Exception as xml_error:
-                                                log.error(f"Error processing XML content: {str(xml_error)}")
+                                                from api.wiki_structure_parser import (
+                                                    parse_wiki_structure,
+                                                    convert_json_to_xml,
+                                                )
+
+                                                structure = parse_wiki_structure(content)
+                                                clean_xml = convert_json_to_xml(structure)
+                                                log.info(
+                                                    f"Parsed and normalized wiki structure: {clean_xml[:100]}..."
+                                                )
+                                                yield clean_xml
+                                            except Exception as parse_error:
+                                                log.warning(
+                                                    f"Wiki structure parsing failed ({parse_error}), "
+                                                    "yielding original content"
+                                                )
                                                 yield content
                                         else:
-                                            # Not XML, just yield the content
+                                            # Not a wiki structure response, yield as-is
                                             yield content
                                     else:
                                         log.error(f"Unexpected response format: {data}")