Skip to content

Commit 7df6e95

Browse files
fix(ui): allow XML tag for markdown rich input (#6095)
* fix(ui): allow XML tag for rich input - Introduced utilities for escaping and unescaping non-standard XML/HTML tags to preserve them during markdown processing. - Updated RichInput and ExpandRichInputDialog components to utilize the new XML tag utilities for handling custom tags. - Added Jest configuration for the UI package and created tests for the XML tag utilities to ensure functionality. - Updated package.json to include Jest as a dependency and added a test script. * address code review comments from Gemini * allow html tag in markdown editor * add test case for markdown format * update dependency in ui
1 parent c4e689b commit 7df6e95

9 files changed

Lines changed: 568 additions & 81 deletions

File tree

packages/server/src/utils/buildAgentflow.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,11 @@ export const resolveVariables = async (
247247
// If value is not a string, return as is
248248
if (typeof value !== 'string') return value
249249

250-
// Convert legacy HTML content to markdown, preserving any markdown syntax within
251-
if (/<[a-z][a-z0-9]*[^>]*>/i.test(value)) {
250+
// Convert legacy HTML content to markdown, preserving any markdown syntax within.
251+
// Legacy content from old getHTML() starts with a TipTap block tag (e.g. <p>text</p>).
252+
// Anchor with ^ to avoid matching intentional HTML/XML tags in user prompts
253+
// (e.g. <instruction><div>...</div></instruction>).
254+
if (/^\s*<(?:p|div|h[1-6]|ul|ol|blockquote|pre|table)\b/i.test(value)) {
252255
const turndownService = new TurndownService()
253256
// Disable escaping so markdown characters (e.g. ###, -, *) inside HTML are preserved as-is
254257
turndownService.escape = (str: string) => str

packages/ui/jest.config.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
module.exports = {
2+
roots: ['<rootDir>/src'],
3+
testEnvironment: 'node',
4+
testMatch: ['<rootDir>/src/**/*.test.js'],
5+
testPathIgnorePatterns: ['/node_modules/', '/build/'],
6+
moduleNameMapper: {
7+
'\\.(css|less|scss|sass|svg|png|jpg|jpeg|gif|webp)$': '<rootDir>/src/__mocks__/styleMock.js',
8+
'^@/(.*)$': '<rootDir>/src/$1'
9+
},
10+
transform: {
11+
'^.+\\.jsx?$': 'babel-jest'
12+
},
13+
transformIgnorePatterns: ['/node_modules/']
14+
}

packages/ui/package.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"@mui/x-tree-view": "^7.25.0",
2727
"@reduxjs/toolkit": "^2.2.7",
2828
"@tabler/icons-react": "^3.30.0",
29+
"@tiptap/core": "^3.20.4",
2930
"@tiptap/extension-code-block-lowlight": "^3.20.4",
3031
"@tiptap/extension-mention": "^3.20.4",
3132
"@tiptap/extension-placeholder": "^3.20.4",
@@ -81,11 +82,20 @@
8182
"dev": "vite",
8283
"start": "vite",
8384
"build": "vite build",
85+
"test": "jest",
8486
"clean": "rimraf build",
8587
"nuke": "rimraf build node_modules .turbo"
8688
},
8789
"babel": {
8890
"presets": [
91+
[
92+
"@babel/preset-env",
93+
{
94+
"targets": {
95+
"node": "current"
96+
}
97+
}
98+
],
8999
"@babel/preset-react"
90100
]
91101
},
@@ -108,6 +118,7 @@
108118
"@testing-library/react": "^14.0.0",
109119
"@testing-library/user-event": "^12.8.3",
110120
"@vitejs/plugin-react": "^4.2.0",
121+
"jest": "^29.7.0",
111122
"pretty-quick": "^3.1.3",
112123
"react-scripts": "^5.0.1",
113124
"rimraf": "^5.0.5",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
module.exports = {}

packages/ui/src/ui-component/dialog/ExpandRichInputDialog.jsx

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,10 @@ import { common, createLowlight } from 'lowlight'
2323
import { suggestionOptions } from '@/ui-component/input/suggestionOption'
2424
import { getAvailableNodesForVariable } from '@/utils/genericHelper'
2525
import { CustomMention } from '@/utils/customMention'
26+
import { isHtmlContent, escapeXmlTags, unescapeXmlEntities, unescapeXmlTags } from '@/utils/xmlTagUtils'
2627

2728
const lowlight = createLowlight(common)
2829

29-
// Detect if content is legacy HTML (from old getHTML() storage) vs markdown
30-
const isHtmlContent = (content) => {
31-
if (!content || typeof content !== 'string') return false
32-
return /<(?:p|div|span|h[1-6]|ul|ol|li|br|code|pre|blockquote|table|strong|em)\b/i.test(content)
33-
}
34-
3530
// Store
3631
import { HIDE_CANVAS_DIALOG, SHOW_CANVAS_DIALOG } from '@/store/actions'
3732

@@ -221,7 +216,7 @@ const ExpandRichInputDialog = ({ show, dialogProps, onCancel, onInputHintDialogC
221216
onUpdate: ({ editor }) => {
222217
if (!isSwitchingRef.current) {
223218
try {
224-
setInputValue(editor.getMarkdown())
219+
setInputValue(unescapeXmlTags(editor.getMarkdown()))
225220
} catch {
226221
setInputValue(editor.getHTML())
227222
}
@@ -239,12 +234,13 @@ const ExpandRichInputDialog = ({ show, dialogProps, onCancel, onInputHintDialogC
239234
if (isHtmlContent(inputValue)) {
240235
editor.commands.setContent(inputValue)
241236
try {
242-
setInputValue(editor.getMarkdown())
237+
setInputValue(unescapeXmlTags(editor.getMarkdown()))
243238
} catch {
244239
// keep original value if conversion fails
245240
}
246241
} else {
247-
editor.commands.setContent(inputValue, { contentType: 'markdown' })
242+
editor.commands.setContent(escapeXmlTags(inputValue), { contentType: 'markdown' })
243+
editor.commands.setContent(unescapeXmlEntities(editor.getJSON()))
248244
}
249245
isSwitchingRef.current = false
250246
}
@@ -265,13 +261,17 @@ const ExpandRichInputDialog = ({ show, dialogProps, onCancel, onInputHintDialogC
265261

266262
if (newMode === 'preview' && editor) {
267263
isSwitchingRef.current = true
268-
const contentType = isHtmlContent(inputValue) ? 'html' : 'markdown'
269-
editor.commands.setContent(inputValue, { contentType })
264+
if (isHtmlContent(inputValue)) {
265+
editor.commands.setContent(inputValue, { contentType: 'html' })
266+
} else {
267+
editor.commands.setContent(escapeXmlTags(inputValue), { contentType: 'markdown' })
268+
editor.commands.setContent(unescapeXmlEntities(editor.getJSON()))
269+
}
270270
isSwitchingRef.current = false
271271
setTimeout(() => editor.commands.focus(), 50)
272272
} else if (newMode === 'raw' && editor) {
273273
try {
274-
setInputValue(editor.getMarkdown())
274+
setInputValue(unescapeXmlTags(editor.getMarkdown()))
275275
} catch {
276276
setInputValue(editor.getHTML())
277277
}

packages/ui/src/ui-component/input/RichInput.jsx

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,10 @@ import { common, createLowlight } from 'lowlight'
1313
import { suggestionOptions } from './suggestionOption'
1414
import { getAvailableNodesForVariable } from '@/utils/genericHelper'
1515
import { CustomMention } from '@/utils/customMention'
16+
import { isHtmlContent, escapeXmlTags, unescapeXmlEntities, unescapeXmlTags } from '@/utils/xmlTagUtils'
1617

1718
const lowlight = createLowlight(common)
1819

19-
// Detect if content is legacy HTML (from old getHTML() storage) vs markdown
20-
const isHtmlContent = (content) => {
21-
if (!content || typeof content !== 'string') return false
22-
return /<(?:p|div|span|h[1-6]|ul|ol|li|br|code|pre|blockquote|table|strong|em)\b/i.test(content)
23-
}
24-
2520
// define your extension array
2621
const extensions = (
2722
availableNodesForVariable,
@@ -154,7 +149,7 @@ export const RichInput = ({ inputParam, value, nodes, edges, nodeId, onChange, d
154149
onUpdate: ({ editor }) => {
155150
if (useMarkdown) {
156151
try {
157-
onChange(editor.getMarkdown())
152+
onChange(unescapeXmlTags(editor.getMarkdown()))
158153
} catch {
159154
onChange(editor.getHTML())
160155
}
@@ -173,7 +168,10 @@ export const RichInput = ({ inputParam, value, nodes, edges, nodeId, onChange, d
173168
if (!useMarkdown || isHtmlContent(value)) {
174169
editor.commands.setContent(value)
175170
} else {
176-
editor.commands.setContent(value, { contentType: 'markdown' })
171+
// Step 1: Escape XML tags to entities so marked treats them as text
172+
editor.commands.setContent(escapeXmlTags(value), { contentType: 'markdown' })
173+
// Step 2: Decode entities in the ProseMirror doc for proper display
174+
editor.commands.setContent(unescapeXmlEntities(editor.getJSON()))
177175
}
178176
}
179177
}, [editor]) // eslint-disable-line react-hooks/exhaustive-deps
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/**
2+
* Utilities for preserving XML/HTML tags in prompt text through TipTap's markdown roundtrip.
3+
*
4+
* Problem: When content like `<question>text</question>` is parsed by marked (via @tiptap/markdown),
5+
* the lexer tokenizes tags as HTML tokens. TipTap's parseHTMLToken then calls generateJSON which
6+
* creates DOM elements — unrecognized tags are stripped and only inner text survives.
7+
*
8+
* Solution: Three-step process:
9+
* 1. escapeXmlTags: Convert all tags to HTML entities before markdown parsing
10+
* so marked treats them as text, not HTML tokens.
11+
* 2. unescapeXmlEntities: After TipTap builds the ProseMirror document, walk the JSON tree
12+
* and decode &lt;/&gt; back to </> in text nodes for proper visual display.
13+
* 3. unescapeXmlTags: After getMarkdown(), reverse any remaining entity-escaped tags
14+
* in the serialized output (safety net — typically a no-op).
15+
*/
16+
17+
/**
18+
* Detect if content is legacy HTML from old getHTML() storage vs markdown.
19+
* Legacy content always starts with a block-level tag like <p>.
20+
* Anchored with ^ to avoid matching intentional HTML tags inside user prompts.
21+
*
22+
* @example
23+
* isHtmlContent('<p>some text</p>') // → true (legacy getHTML output)
24+
* isHtmlContent('<instruction>text</instruction>') // → false (user prompt)
25+
*
26+
* @param {string} content - Content to check
27+
* @returns {boolean} True if content looks like legacy HTML
28+
*/
29+
export const isHtmlContent = (content) => {
30+
if (!content || typeof content !== 'string') return false
31+
return /^\s*<(?:p|div|h[1-6]|ul|ol|blockquote|pre|table)\b/i.test(content)
32+
}
33+
34+
/**
35+
* Regex matching opening, closing, and self-closing XML/HTML tags.
36+
* Captures: (1) optional slash, (2) tag name, (3) optional attributes, (4) optional self-close slash
37+
*/
38+
const XML_TAG_REGEX = /<(\/?)([a-zA-Z][a-zA-Z0-9_.-]*)(\s[^>]*)?(\/?)>/g
39+
40+
/**
41+
* Escape all XML/HTML tags to HTML entities so marked doesn't parse them as HTML.
42+
* In prompt editing context, users want tags preserved literally, not rendered.
43+
*
44+
* @example
45+
* escapeXmlTags('<instructions>Be helpful</instructions>')
46+
* // → '&lt;instructions&gt;Be helpful&lt;/instructions&gt;'
47+
*
48+
* escapeXmlTags('<div><question>text</question></div>')
49+
* // → '&lt;div&gt;&lt;question&gt;text&lt;/question&gt;&lt;/div&gt;'
50+
*
51+
* @param {string} text - Raw markdown/text content
52+
* @returns {string} Content with tags escaped to HTML entities
53+
*/
54+
export function escapeXmlTags(text) {
55+
if (!text || typeof text !== 'string') return text
56+
return text.replace(XML_TAG_REGEX, (match, slash, tagName, attrs, selfClose) => {
57+
return `&lt;${slash}${tagName}${attrs || ''}${selfClose}&gt;`
58+
})
59+
}
60+
61+
/**
62+
* Unescape XML tag entities in ProseMirror JSON text nodes.
63+
* Call this after setContent() to fix the visual display in the editor.
64+
* Mutates the JSON in-place and returns it.
65+
*
66+
* @example
67+
* const json = { type: 'doc', content: [
68+
* { type: 'paragraph', content: [{ type: 'text', text: '&lt;question&gt;What?&lt;/question&gt;' }] }
69+
* ]}
70+
* unescapeXmlEntities(json)
71+
* // json.content[0].content[0].text → '<question>What?</question>'
72+
*
73+
* @param {object} json - ProseMirror document JSON from editor.getJSON()
74+
* @returns {object} The same JSON with decoded entities in text nodes
75+
*/
76+
export function unescapeXmlEntities(json) {
77+
if (json.text) {
78+
json.text = unescapeXmlTags(json.text)
79+
}
80+
if (json.content) {
81+
json.content.forEach(unescapeXmlEntities)
82+
}
83+
return json
84+
}
85+
86+
/**
87+
* Unescape all entity-escaped XML/HTML tags after markdown serialization.
88+
*
89+
* @example
90+
* unescapeXmlTags('&lt;question&gt;text&lt;/question&gt;')
91+
* // → '<question>text</question>'
92+
*
93+
* unescapeXmlTags('&lt;div&gt;text&lt;/div&gt;')
94+
* // → '<div>text</div>'
95+
*
96+
* unescapeXmlTags('<question>text</question>')
97+
* // → '<question>text</question>' (raw tags pass through unchanged)
98+
*
99+
* @param {string} text - Markdown output from TipTap
100+
* @returns {string} Content with tags restored to angle brackets
101+
*/
102+
export function unescapeXmlTags(text) {
103+
if (!text || typeof text !== 'string') return text
104+
return text.replace(/&lt;(\/?)([a-zA-Z][a-zA-Z0-9_.-]*)(\s.*?)?(\/?)&gt;/g, (match, slash, tagName, attrs, selfClose) => {
105+
return `<${slash}${tagName}${attrs || ''}${selfClose}>`
106+
})
107+
}

0 commit comments

Comments
 (0)