Skip to content

Commit 66a42e0

Browse files
committed
fix: preserve angle bracket markup in discussion posts (#198)
Angle bracket markup (e.g., < >) was being rendered as HTML upon submission, causing content to disappear or display incorrectly in discussion posts. While the content appeared correctly during authoring and preview, it was not preserved after posting. This issue affected both manually typed markup and content added via the "Insert/Edit Code Sample" toolbar option, making it difficult to share code snippets in programming-related discussions. This fix ensures that angle brackets are properly escaped and preserved as plaintext, maintaining consistency between preview and final rendered posts.
1 parent 68facd7 commit 66a42e0

2 files changed

Lines changed: 17 additions & 2 deletions

File tree

lms/djangoapps/discussion/rest_api/serializers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Discussion API serializers
33
"""
44

5-
import html
65
import re
76
from typing import Dict
87
from urllib.parse import urlencode, urlunparse
@@ -165,7 +164,9 @@ def filter_spam_urls_from_html(html_string):
165164
Returns:
166165
clean_post, is_spam
167166
"""
168-
html_string = html.unescape(html_string)
167+
# BeautifulSoup automatically handles HTML entities correctly.
168+
# Do NOT call html.unescape() here as it breaks properly escaped content in code blocks
169+
# (e.g., &lt;div&gt; inside <code> tags would become real <div> tags).
169170
soup = BeautifulSoup(html_string, "html.parser")
170171
patterns = []
171172
is_spam = False

lms/djangoapps/discussion/rest_api/tests/test_render.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from django.test import TestCase
88

99
from lms.djangoapps.discussion.rest_api.render import render_body
10+
from lms.djangoapps.discussion.rest_api.serializers import filter_spam_urls_from_html
1011

1112

1213
def _add_p_tags(raw_body):
@@ -103,3 +104,16 @@ def test_interleaved_tags(self):
103104
render_body('foo<i>bar<b>baz</i>quux</b>greg'),
104105
'<p>foo<i>bar<b>baz</b></i><b>quux</b>greg</p>',
105106
)
107+
108+
def test_full_pipeline_preserves_escaped_html_in_code(self):
109+
"""
110+
Test that angle brackets in code blocks remain escaped after the full pipeline.
111+
This prevents the regression where filter_spam_urls_from_html() would break
112+
properly escaped content like `<div>` by converting &lt; back to <.
113+
"""
114+
raw_body = '`<script>alert("xss")</script>`'
115+
rendered = render_body(raw_body)
116+
filtered, _ = filter_spam_urls_from_html(rendered)
117+
# Angle brackets must remain escaped as HTML entities
118+
assert '&lt;script&gt;' in filtered
119+
assert '<script>' not in filtered

0 commit comments

Comments
 (0)