microsoft · Aadi-011 · May 31, 2026 · May 31, 2026
diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitdown/src/markitdown/__main__.py
@@ -4,13 +4,15 @@
 import argparse
 import sys
 import codecs
+import json
 from typing import Any, Dict
 from textwrap import dedent
 from importlib.metadata import entry_points
 from .__about__ import __version__
 from ._markitdown import MarkItDown, StreamInfo, DocumentConverterResult
 
 
+
 def main():
     parser = argparse.ArgumentParser(
         description="Convert various file formats to markdown.",
@@ -59,6 +61,13 @@ def main():
         "--output",
         help="Output file name. If not provided, output is written to stdout.",
     )
+
+    parser.add_argument(
+    "--output-format", 
+    choices=["markdown", "json"], 
+    default="markdown", 
+    help="Output format: 'markdown' (default) or structured 'json'."
+    )
 
     parser.add_argument(
         "-x",
@@ -260,6 +269,15 @@ def main():
 
 def _handle_output(args, result: DocumentConverterResult):
     """Handle output to stdout or file"""
+    if getattr(args, "output_format", "markdown") == "json":
+        # Convert the raw markdown to structured dictionary
+        structured_data = convert_markdown_to_json(result.markdown)
+
+        if args.output:
+            with open(args.output, "w", encoding="utf-8") as f:
+                json.dump(structured_data, f, indent=2, ensure_ascii=False)
+        else:
+            print(json.dumps(structured_data, indent=2, ensure_ascii=False))
     if args.output:
         with open(args.output, "w", encoding="utf-8") as f:
             f.write(result.markdown)

diff --git a/packages/markitdown/src/markitdown/py.typed b/packages/markitdown/src/markitdown/py.typed
diff --git a/utilities/utils.py b/utilities/utils.py
@@ -0,0 +1,40 @@
+import json
+import re
+
+def convert_markdown_to_json(md_text: str) -> dict:
+    """
+    Parses Markdown text and structures it into a JSON-serializable dictionary
+    containing a title and sections.
+    """
+    lines = md_text.splitlines()
+    title = "Untitled Document"
+    sections = []
+
+    current_heading = "Introduction"
+    current_content = []
+
+    for line in lines:
+        if line.startswith("# "):
+            title = line[2:].strip()
+        elif re.match(r"^#{2,} ", line):
+            # Save the previous section before starting a new one
+            if current_content or current_heading != "Introduction":
+                sections.append({
+                    "heading": current_heading,
+                    "content": "\n".join(current_content).strip()
+                })
+            current_heading = re.sub(r"^#{2,} ", "", line).strip()
+            current_content = []
+        else:
+            current_content.append(line)
+
+    if current_content:
+        sections.append({
+            "heading": current_heading,
+            "content": "\n".join(current_content).strip()
+        })
+
+    return {
+        "title": title,
+        "sections": sections
+    }