danmactough · danmactough · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/lib/utils.js b/lib/utils.js
@@ -175,17 +175,122 @@ function reresolve (node, baseurl) {
 }
 exports.reresolve = reresolve;
 
+var HTML_TAGS = new Set([
+  'a', 'abbr', 'acronym', 'address', 'applet', 'area', 'article', 'aside', 'audio',
+  'b', 'base', 'basefont', 'bdi', 'bdo', 'big', 'blink', 'blockquote', 'body', 'br', 'button',
+  'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
+  'data', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt',
+  'em', 'embed',
+  'fieldset', 'figcaption', 'figure', 'font', 'footer', 'form', 'frame', 'frameset',
+  'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html',
+  'i', 'iframe', 'img', 'input', 'ins', 'isindex',
+  'kbd',
+  'label', 'legend', 'li', 'link', 'listing',
+  'main', 'map', 'mark', 'marquee', 'menu', 'menuitem', 'meta', 'meter', 'multicol',
+  'nav', 'nextid', 'nobr', 'noembed', 'noframes', 'noscript',
+  'object', 'ol', 'optgroup', 'option', 'output',
+  'p', 'param', 'picture', 'plaintext', 'pre', 'progress',
+  'q',
+  'rb', 'rp', 'rt', 'rtc', 'ruby',
+  's', 'samp', 'script', 'section', 'select', 'slot', 'small', 'source', 'spacer', 'span', 'strike', 'strong', 'style', 'sub', 'summary', 'sup',
+  'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th',
+  'thead', 'time', 'title', 'tr', 'track', 'tt',
+  'u', 'ul',
+  'var', 'video',
+  'wbr',
+  'xmp'
+]);
+
+/*
+ * Scan markup starting at str[i] (which must be '<') and return its length
+ * and type if it is recognized markup, or null if it isn't.
+ * Recognized types:
+ *   { alwaysStrip: true, len }  - comments, doctypes, PIs
+ *   { tagName, len }            - opening or closing HTML tags
+ *
+ * Respects quoted attribute values so that an attribute like title="1 > 0"
+ * doesn't cause a premature close.
+ *
+ * @param {string} str
+ * @param {number} i
+ * @returns {Object|null}
+ * @private
+ */
+function readMarkupAt (str, i) {
+  // HTML comment: <!-- ... -->
+  if (str.slice(i, i + 4) === '<!--') {
+    var commentEnd = str.indexOf('-->', i + 4);
+    return commentEnd !== -1 ? { alwaysStrip: true, len: commentEnd + 3 - i } : null;
+  }
+
+  // Processing instruction: <? ... ?>
+  if (str[i + 1] === '?') {
+    var piEnd = str.indexOf('?>', i + 2);
+    return piEnd !== -1 ? { alwaysStrip: true, len: piEnd + 2 - i } : null;
+  }
+
+  // Doctype / other <! declarations: <! ... >
+  if (str[i + 1] === '!') {
+    var declEnd = str.indexOf('>', i + 2);
+    return declEnd !== -1 ? { alwaysStrip: true, len: declEnd + 1 - i } : null;
+  }
+
+  // Closing tag or opening tag: </tagName ...> or <tagName ...>
+  var isClosing = str[i + 1] === '/';
+  var j = isClosing ? i + 2 : i + 1;
+  var nameStart = j;
+  while (j < str.length) {
+    var code = str.charCodeAt(j);
+    var isLetter = (code >= 97 && code <= 122) || (code >= 65 && code <= 90);
+    var isDigit = code >= 48 && code <= 57;
+    if (j === nameStart ? !isLetter : !(isLetter || isDigit)) break;
+    j++;
+  }
+  var tagName = str.slice(nameStart, j).toLowerCase();
+  if (!tagName) return null;
+
+  // Scan for >, respecting quoted attribute values
+  var quote = null;
+  while (j < str.length) {
+    var ch = str[j];
+    if (quote) {
+      if (ch === quote) quote = null;
+    } else if (ch === '"' || ch === '\'') {
+      quote = ch;
+    } else if (ch === '>') {
+      return { tagName: tagName, len: j + 1 - i };
+    }
+    j++;
+  }
+  return null; // unclosed tag
+}
+
 /*
- * Aggressivly strip HTML tags
- * Pulled out of node-resanitize because it was all that was being used
- * and it's way lighter...
+ * Strip HTML tags, leaving bare text content.
+ * Scans the string for markup - HTML tags, comments, doctypes, and processing
+ * instructions - and removes them. Only tags with known HTML element names
+ * are stripped; unknown angle-bracket content like <<<NotHTML>>> is preserved.
  *
  * @param {string} str
  * @returns {string}
  * @private
  */
 function stripHtml (str) {
-  return str.replace(/<.*?>/g, '');
+  var out = '';
+  var i = 0;
+  while (i < str.length) {
+    if (str[i] === '<') {
+      var markup = readMarkupAt(str, i);
+      if (markup && (markup.alwaysStrip || HTML_TAGS.has(markup.tagName))) {
+        i += markup.len;
+        continue;
+      }
+    }
+    out += str[i];
+    i++;
+  }
+  return out;
 }
 
+exports.HTML_TAGS = HTML_TAGS;
 exports.stripHtml = stripHtml;
diff --git a/test/angle-brackets.js b/test/angle-brackets.js
@@ -0,0 +1,55 @@
+const { Readable } = require('stream');
+describe('angle brackets in title', function () {
+
+  var feeds = [
+    `<?xml version="1.0" encoding="UTF-8" ?>
+<rss version="2.0">
+<channel>
+<title>W3Schools Home Page</title>
+<link>http://www.w3schools.com</link>
+<description>Free web building tutorials</description>
+<item>
+<title>RSS &#x3C;&#x3C;&#x3C;Tutorial&#x3E;&#x3E;&#x3E;</title>
+<link>http://www.w3schools.com/xml/xml_rss.asp</link>
+<description>New RSS tutorial on W3Schools</description>
+</item>
+</channel>
+</rss>
+`, `<?xml version="1.0" encoding="UTF-8" ?>
+<rss version="2.0">
+<channel>
+<title>W3Schools Home Page</title>
+<link>http://www.w3schools.com</link>
+<description>Free web building tutorials</description>
+<item>
+<title>RSS &lt;&lt;&lt;Tutorial&gt;&gt;&gt;</title>
+<link>http://www.w3schools.com/xml/xml_rss.asp</link>
+<description>New RSS tutorial on W3Schools</description>
+</item>
+</channel>
+</rss>
+`];
+
+  feeds.forEach(function (feed) {
+    it('should be properly decoded', function (done) {
+      var feedparser = new FeedParser();
+      var titles = [];
+      Readable.from(feed).pipe(feedparser);
+      feedparser.on('readable', function () {
+        var item;
+        while ((item = this.read())) {
+          titles.push(item.title);
+        }
+      })
+    .on('error', function (err) {
+      assert.ifError(err);
+      done(err);
+    })
+    .on('end', function () {
+      assert.equal(titles[0], 'RSS <<<Tutorial>>>');
+      done();
+    });
+    });
+  });
+
+});
diff --git a/test/utils.js b/test/utils.js
@@ -307,6 +307,14 @@ describe('utils', function () {
 
   describe('stripHtml', function () {
 
+    it('returns the string unchanged when there are no tags', function () {
+      assert.strictEqual(utils.stripHtml('plain text'), 'plain text');
+    });
+
+    it('returns an empty string for an empty string', function () {
+      assert.strictEqual(utils.stripHtml(''), '');
+    });
+
     it('removes simple HTML tags', function () {
       assert.strictEqual(utils.stripHtml('<b>bold</b>'), 'bold');
     });
@@ -323,12 +331,122 @@ describe('utils', function () {
       assert.strictEqual(utils.stripHtml('<a href="http://example.com">link</a>'), 'link');
     });
 
-    it('returns the string unchanged when there are no tags', function () {
-      assert.strictEqual(utils.stripHtml('plain text'), 'plain text');
+    it('strips nested tags leaving inner text', function () {
+      assert.strictEqual(utils.stripHtml('<div>You <strong>MUST</strong> remove tags</div>'), 'You MUST remove tags');
     });
 
-    it('returns an empty string for an empty string', function () {
-      assert.strictEqual(utils.stripHtml(''), '');
+    it('strips a self-closing tag without trailing slash', function () {
+      assert.strictEqual(utils.stripHtml('before<hr>after'), 'beforeafter');
+    });
+
+    it('strips a self-closing tag with trailing slash', function () {
+      assert.strictEqual(utils.stripHtml('before<br/>after'), 'beforeafter');
+    });
+
+    it('strips a self-closing tag with attributes', function () {
+      assert.strictEqual(utils.stripHtml('before<img src="x.png" alt="x">after'), 'beforeafter');
+    });
+
+    it('strips img with self-closing slash and attributes', function () {
+      assert.strictEqual(utils.stripHtml('before<img src="x.png" />after'), 'beforeafter');
+    });
+
+    it('strips closing tags', function () {
+      assert.strictEqual(utils.stripHtml('text</p>'), 'text');
+    });
+
+    it('strips tags with multiple attributes', function () {
+      assert.strictEqual(utils.stripHtml('<div class="foo" id="bar">content</div>'), 'content');
+    });
+
+    // --- edge cases: non-HTML angle brackets must be preserved ---
+
+    it('preserves literal angle brackets that are not HTML tags', function () {
+      assert.strictEqual(utils.stripHtml('1 < 2'), '1 < 2');
+    });
+
+    it('preserves less-than followed by a number', function () {
+      assert.strictEqual(utils.stripHtml('x<3'), 'x<3');
+    });
+
+    it('preserves triple left angle brackets', function () {
+      assert.strictEqual(utils.stripHtml('<<<'), '<<<');
+    });
+
+    it('preserves triple right angle brackets', function () {
+      assert.strictEqual(utils.stripHtml('>>>'), '>>>');
+    });
+
+    it('preserves encoded angle brackets decoded by XML parser', function () {
+      assert.strictEqual(utils.stripHtml('RSS <<<Tutorial>>>'), 'RSS <<<Tutorial>>>');
+    });
+
+    it('does not treat unknown tag names as HTML', function () {
+      assert.strictEqual(utils.stripHtml('a <foo> b'), 'a <foo> b');
+    });
+
+    it('does not treat a closing tag with unknown name as HTML', function () {
+      assert.strictEqual(utils.stripHtml('</foo>'), '</foo>');
+    });
+
+    it('preserves bare angle brackets adjacent to real HTML tags', function () {
+      assert.strictEqual(utils.stripHtml('<<b>bold</b>>'), '<bold>');
+    });
+
+    it('strips real HTML but preserves non-HTML angle brackets in the same string', function () {
+      assert.strictEqual(utils.stripHtml('if a < b then <em>yes</em> else x<y>z'), 'if a < b then yes else x<y>z');
+    });
+
+    it('strips non-void elements written as self-closing like <div />', function () {
+      assert.strictEqual(utils.stripHtml('<div />text</div>'), 'text');
+    });
+
+    it('strips multiline tags spanning several lines', function () {
+      assert.strictEqual(
+        utils.stripHtml('<img \n  src="logo.png" \n  alt="Company Logo" \n  width="200" \n  height="100"\n>'),
+        ''
+      );
+    });
+
+    it('strips HTML comments', function () {
+      assert.strictEqual(utils.stripHtml('before<!-- comment -->after'), 'beforeafter');
+    });
+
+    it('strips multi-line HTML comments', function () {
+      assert.strictEqual(utils.stripHtml('before<!-- a\ncomment -->after'), 'beforeafter');
+    });
+
+    it('strips doctype declarations', function () {
+      assert.strictEqual(utils.stripHtml('<!DOCTYPE html>Title'), 'Title');
+    });
+
+    it('strips XML processing instructions', function () {
+      assert.strictEqual(utils.stripHtml('<?xml version="1.0"?>text'), 'text');
+    });
+
+    it('strips xml-stylesheet processing instructions', function () {
+      assert.strictEqual(utils.stripHtml('<?xml-stylesheet href="style.xsl" type="text/xsl"?>text'), 'text');
+    });
+
+    it('strips mixed HTML tags, comments, and processing instructions in one string', function () {
+      assert.strictEqual(
+        utils.stripHtml('<?xml version="1.0"?><p><!-- intro -->Hello <em>world</em></p>'),
+        'Hello world'
+      );
+    });
+
+    it('strips tags with double-quoted attribute values containing >', function () {
+      assert.strictEqual(utils.stripHtml('<a title="1 > 0">link</a>'), 'link');
+    });
+
+    it('strips tags with single-quoted attribute values containing >', function () {
+      assert.strictEqual(utils.stripHtml('<a title=\'1 > 0\'>link</a>'), 'link');
+    });
+
+    utils.HTML_TAGS.forEach(function (tag) {
+      it(`strips ${tag} HTML tag opening and closing and self-closing`, function () {
+        assert.strictEqual(utils.stripHtml('<' + tag + '>content</' + tag + '> and <' + tag + ' />more'), 'content and more', 'expected <' + tag + '> to be stripped');
+      });
     });
 
   });