Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion claude_code_log/html/tool_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from .utils import (
escape_html,
is_markdown_path,
is_memory_path,
render_collapsible_code,
render_async_result_body,
Expand Down Expand Up @@ -391,6 +392,26 @@ def format_read_input(read_input: ReadInput) -> str: # noqa: ARG001
# Parsing (parse_read_output, parse_edit_output) is now in factories/tool_factory.py


def _is_full_read(output: ReadOutput) -> bool:
"""True when a Read result covers the file from line 1 with no truncation.

A full read is safe to render as Markdown; a partial slice (``start_line``
> 1 or truncated content) can split a code fence and is kept as Pygments
source (issue #232). The text-only parser fallback can't recover
``total_lines`` and reports ``is_truncated=False`` with ``start_line=1``
for a whole-file read, so it correctly reads as full.

Residual edge: a *truncated* read that started at line 1, parsed via the
text-only fallback (no structured ``toolUseResult.file``), forces
``is_truncated=False`` and so reads as full — rendering as Markdown even
though the tail was cut, which may garble a fence straddling the cut.
This is narrow (old transcripts only; modern ones carry the structured
metadata that classifies it correctly) and cosmetic (escaping still
applies, so no XSS/crash).
"""
return output.start_line == 1 and not output.is_truncated


def format_read_output(output: ReadOutput) -> str:
"""Format Read tool result as HTML with syntax highlighting.

Expand All @@ -411,12 +432,24 @@ def format_read_output(output: ReadOutput) -> str:
# Auto-memory files are Markdown (MEMORY.md + topic .md), so render a
# recalled-memory body as rendered Markdown rather than syntax-highlighted
# source — using the project's usual collapsible-markdown helper (#192).
# Memory bodies render as Markdown unconditionally (even partial reads):
# memory files are small and read whole, and #192 pinned this behavior.
if is_memory_path(output.file_path):
# Escape HTML: memory files are untrusted content — raw <script>/HTML
# must render as text, not live DOM when the transcript is opened.
body = render_user_markdown_collapsible(output.content, "read-tool-result")
return resolve_memory_body_links(body, output.file_path) + suffix_html

# Any other Markdown file: render the body as Markdown too (#232) — but
# only for a *full* read. A partial read (offset/limit) can begin or end
# mid-fence, which would render as garbled Markdown; a line-numbered
# source view is both safe and more useful for a slice, so partial reads
# keep Pygments. The escaping helper is used because file content is
# untrusted regardless of whether it's a memory file.
if is_markdown_path(output.file_path) and _is_full_read(output):
body = render_user_markdown_collapsible(output.content, "read-tool-result")
return body + suffix_html

return render_file_content_collapsible(
output.content,
output.file_path,
Expand Down Expand Up @@ -651,13 +684,21 @@ def format_write_input(write_input: WriteInput) -> str:
Note: File path is now shown in the header, so we skip it here.
"""
# Memory files are Markdown — render a written memory body as rendered
# Markdown rather than highlighted source (#192).
# Markdown rather than highlighted source (#192), with memory link
# resolution applied.
if is_memory_path(write_input.file_path):
# Escape HTML (untrusted memory content) — see format_read_output.
body = render_user_markdown_collapsible(
write_input.content, "write-tool-content"
)
return resolve_memory_body_links(body, write_input.file_path)
# Any other Markdown file: render the written body as Markdown too (#232).
# A Write always carries the file's full content, so there's no partial
# slice to worry about — unlike Read. No memory link resolution applies.
if is_markdown_path(write_input.file_path):
return render_user_markdown_collapsible(
write_input.content, "write-tool-content"
)
return render_file_content_collapsible(
write_input.content, write_input.file_path, "write-tool-content"
)
Expand Down
17 changes: 17 additions & 0 deletions claude_code_log/html/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,23 @@ def is_memory_path(file_path: Optional[str]) -> bool:
)


# -- Markdown-file detection --------------------------------------------------
# A file whose own format is Markdown. Read/Write of such a file renders the
# body as Markdown rather than Pygments-highlighted source (issue #232).
# Auto-memory files (always ``.md``) are a subset: ``is_memory_path`` ⊂
# ``is_markdown_path``. The memory specialization that survives is the 🧠 title
# (and memory's relative-link resolution + always-Markdown body); for any other
# ``.md`` the generalization keys on this predicate instead.
_MARKDOWN_EXTS = (".md", ".markdown")


def is_markdown_path(file_path: Optional[str]) -> bool:
"""True if ``file_path`` names a Markdown file (``.md`` / ``.markdown``)."""
return bool(file_path) and _normalize_sep(file_path).lower().endswith(
_MARKDOWN_EXTS
)


def is_memory_tool(tool_name: Optional[str], file_path: Optional[str]) -> bool:
"""True if a tool call/result is an auto-memory interaction.

Expand Down
142 changes: 142 additions & 0 deletions test/test_markdown_file_rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
"""Render Read/Write of any Markdown file as Markdown, not Pygments (issue #232).

A follow-up to the auto-memory work (#192): a fully-contained Markdown file
should render the usual way (rendered Markdown) instead of syntax-highlighted
source. This applies to every ``.md`` file, not just memory files.

- **Write** always carries the whole file → always rendered as Markdown.
- **Read** renders as Markdown only for a *full* read; a partial slice
(offset/limit) can split a code fence, so partial reads keep Pygments.
- Memory keeps its extra specialization (🧠 title, relative-link resolution,
and always-Markdown body even when partial) — see test_memory_rendering.py.
"""

import re
from typing import Optional

from claude_code_log.html.tool_formatters import (
format_read_output,
format_write_input,
)
from claude_code_log.html.utils import is_markdown_path
from claude_code_log.models import ReadOutput, WriteInput


MD = "/home/u/proj/docs/guide.md"
MD_CAPS = "/home/u/proj/README.MARKDOWN"
PY = "/home/u/proj/src/app.py"
MEM = "/home/u/.claude/projects/-home-u-proj/memory/MEMORY.md"

MD_BODY = "# Guide\n\nUse `just ci` before pushing.\n"


def _read(
file_path: str, content: str, *, start_line: int = 1, total: Optional[int] = None
):
n = len(content.splitlines())
total = n if total is None else total
return ReadOutput(
file_path=file_path,
content=content,
start_line=start_line,
num_lines=n,
total_lines=total,
is_truncated=n < total,
)


# ----------------------------- is_markdown_path ------------------------------


class TestIsMarkdownPath:
def test_md_and_markdown_extensions(self):
assert is_markdown_path(MD)
assert is_markdown_path("/x/y.markdown")

def test_case_insensitive(self):
assert is_markdown_path(MD_CAPS)
assert is_markdown_path("/x/Notes.Md")

def test_windows_separators(self):
assert is_markdown_path(r"C:\Users\u\docs\guide.md")

def test_memory_paths_are_markdown(self):
# Memory files are a subset: is_memory_path ⊂ is_markdown_path.
assert is_markdown_path(MEM)

def test_non_markdown_and_none(self):
assert not is_markdown_path(PY)
assert not is_markdown_path("/x/data.json")
assert not is_markdown_path("/x/MD") # bare, no extension
assert not is_markdown_path(None)
assert not is_markdown_path("")


# ----------------------------- Read rendering --------------------------------
# The full/partial/truncated split (the ``_is_full_read`` predicate) is pinned
# behaviorally through ``format_read_output`` rather than by importing the
# private helper — keeps the test off the private symbol so it stays clean if
# ``test/`` ever joins the pyright include scope (cf. #216's _PARAMS_TABLE_MAX_
# DEPTH reportPrivateUsage).


class TestReadMarkdownRendering:
def test_full_md_read_rendered_as_markdown(self):
# Whole file from line 1, not truncated → full read → Markdown.
html = format_read_output(_read(MD, MD_BODY))
assert re.search(r'class="read-tool-result markdown"', html)
assert re.search(r"<h1[^>]*>Guide", html)
assert "<code>just ci</code>" in html

def test_partial_md_read_keeps_pygments(self):
# A slice (start_line > 1) could land mid-fence → keep highlighted source.
html = format_read_output(_read(MD, MD_BODY, start_line=10, total=999))
assert 'class="read-tool-result markdown"' not in html
assert "<h1" not in html

def test_truncated_md_read_keeps_pygments(self):
# Starts at line 1 but truncated (num_lines < total_lines) → not full.
html = format_read_output(_read(MD, MD_BODY, total=999))
assert 'class="read-tool-result markdown"' not in html

def test_non_markdown_read_keeps_pygments(self):
html = format_read_output(_read(PY, "x = 1\n"))
assert 'class="read-tool-result markdown"' not in html

def test_markdown_body_escapes_raw_html(self):
# File content is untrusted regardless of being a memory file.
html = format_read_output(_read(MD, "# T\n\n<script>alert(1)</script>\n"))
assert "&lt;script&gt;" in html
assert "<script>alert(1)</script>" not in html

def test_general_md_read_has_no_memory_link_resolution(self):
# Relative links in a non-memory .md stay as-authored (no file:// rewrite).
html = format_read_output(_read(MD, "[peer](peer.md)\n"))
assert 'href="peer.md"' in html
assert "file://" not in html


# ----------------------------- Write rendering -------------------------------


class TestWriteMarkdownRendering:
def test_md_write_rendered_as_markdown(self):
html = format_write_input(WriteInput(file_path=MD, content=MD_BODY))
assert re.search(r'class="write-tool-content markdown"', html)
assert re.search(r"<h1[^>]*>Guide", html)

def test_non_markdown_write_keeps_pygments(self):
html = format_write_input(WriteInput(file_path=PY, content="x = 1\n"))
assert 'class="write-tool-content markdown"' not in html

def test_md_write_escapes_raw_html(self):
html = format_write_input(
WriteInput(file_path=MD, content="# T\n\n<script>alert(1)</script>")
)
assert "&lt;script&gt;" in html
assert "<script>alert(1)</script>" not in html

def test_general_md_write_has_no_memory_link_resolution(self):
html = format_write_input(WriteInput(file_path=MD, content="[peer](peer.md)\n"))
assert 'href="peer.md"' in html
assert "file://" not in html
Loading