diff --git a/packages/markitdown/src/markitdown/converters/_csv_converter.py b/packages/markitdown/src/markitdown/converters/_csv_converter.py index 7e9631e1b..0b6af9ae3 100644 --- a/packages/markitdown/src/markitdown/converters/_csv_converter.py +++ b/packages/markitdown/src/markitdown/converters/_csv_converter.py @@ -57,21 +57,33 @@ def convert( # Create markdown table markdown_table = [] + def _escape_cell(value: str) -> str: + """Escape pipe characters so they don't break Markdown table structure.""" + return str(value).replace("|", r"\|") + # Add header row - markdown_table.append("| " + " | ".join(rows[0]) + " |") + markdown_table.append( + "| " + " | ".join(_escape_cell(c) for c in rows[0]) + " |" + ) # Add separator row - markdown_table.append("| " + " | ".join(["---"] * len(rows[0])) + " |") + markdown_table.append( + "| " + " | ".join(["---"] * len(rows[0])) + " |" + ) # Add data rows for row in rows[1:]: - # Make sure row has the same number of columns as header + # Pad short rows while len(row) < len(rows[0]): row.append("") - # Truncate if row has more columns than header + + # Trim extra columns row = row[: len(rows[0])] - markdown_table.append("| " + " | ".join(row) + " |") + + markdown_table.append( + "| " + " | ".join(_escape_cell(c) for c in row) + " |" + ) result = "\n".join(markdown_table) - return DocumentConverterResult(markdown=result) + return DocumentConverterResult(markdown=result) \ No newline at end of file