11using System ;
22using System . Collections . Generic ;
33using System . Linq ;
4+ using System . Reflection ;
45using HtmlAgilityPack ;
56using RegExpressLibrary ;
67using RegExpressLibrary . Matches ;
@@ -12,6 +13,12 @@ namespace HtmlAgilityPackPlugin
1213{
1314 static class Matcher
1415 {
16+ // Cache the FieldInfo for the private _outerlength field
17+ // HtmlAgilityPack's OuterHtml property returns reconstructed/normalized HTML which may differ
18+ // in length from the original text (e.g., due to CRLF normalization). The private _outerlength
19+ // field contains the actual length in the original source text.
20+ private static readonly FieldInfo ? OuterLengthField = typeof ( HtmlNode ) . GetField ( "_outerlength" , BindingFlags . NonPublic | BindingFlags . Instance ) ;
21+
1522 public static RegexMatches GetMatches ( ICancellable cnc , string pattern , string text , Options options )
1623 {
1724 if ( string . IsNullOrWhiteSpace ( pattern ) )
@@ -58,8 +65,19 @@ public static RegexMatches GetMatches( ICancellable cnc, string pattern, string
5865
5966 // Get the position of the node in the original text (for highlighting)
6067 int index = node . StreamPosition ;
61- string outerHtml = node . OuterHtml ;
62- int length = outerHtml . Length ;
68+
69+ // Use the private _outerlength field to get the actual length in the original text.
70+ // OuterHtml.Length can differ due to HTML normalization (e.g., CRLF → LF conversion).
71+ int length ;
72+ if ( OuterLengthField != null )
73+ {
74+ length = ( int ) ( OuterLengthField . GetValue ( node ) ?? node . OuterHtml . Length ) ;
75+ }
76+ else
77+ {
78+ // Fallback if reflection fails (shouldn't happen, but be safe)
79+ length = node . OuterHtml . Length ;
80+ }
6381
6482 // Validate and clamp the index/length to avoid out-of-bounds
6583 if ( index < 0 ) index = 0 ;
0 commit comments