Skip to content

Commit 4e12c5a

Browse files
committed
Fix getting the true length to make sure it matches what we expect
1 parent aa7956f commit 4e12c5a

1 file changed

Lines changed: 20 additions & 2 deletions

File tree

  • RegExpressWPFNET/RegexEngines/HtmlAgilityPack/HtmlAgilityPackPlugin

RegExpressWPFNET/RegexEngines/HtmlAgilityPack/HtmlAgilityPackPlugin/Matcher.cs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System;
22
using System.Collections.Generic;
33
using System.Linq;
4+
using System.Reflection;
45
using HtmlAgilityPack;
56
using RegExpressLibrary;
67
using RegExpressLibrary.Matches;
@@ -12,6 +13,12 @@ namespace HtmlAgilityPackPlugin
1213
{
1314
static class Matcher
1415
{
16+
// Cache the FieldInfo for the private _outerlength field
17+
// HtmlAgilityPack's OuterHtml property returns reconstructed/normalized HTML which may differ
18+
// in length from the original text (e.g., due to CRLF normalization). The private _outerlength
19+
// field contains the actual length in the original source text.
20+
private static readonly FieldInfo? OuterLengthField = typeof( HtmlNode ).GetField( "_outerlength", BindingFlags.NonPublic | BindingFlags.Instance );
21+
1522
public static RegexMatches GetMatches( ICancellable cnc, string pattern, string text, Options options )
1623
{
1724
if( string.IsNullOrWhiteSpace( pattern ) )
@@ -58,8 +65,19 @@ public static RegexMatches GetMatches( ICancellable cnc, string pattern, string
5865

5966
// Get the position of the node in the original text (for highlighting)
6067
int index = node.StreamPosition;
61-
string outerHtml = node.OuterHtml;
62-
int length = outerHtml.Length;
68+
69+
// Use the private _outerlength field to get the actual length in the original text.
70+
// OuterHtml.Length can differ due to HTML normalization (e.g., CRLF → LF conversion).
71+
int length;
72+
if( OuterLengthField != null )
73+
{
74+
length = (int)( OuterLengthField.GetValue( node ) ?? node.OuterHtml.Length );
75+
}
76+
else
77+
{
78+
// Fallback if reflection fails (shouldn't happen, but be safe)
79+
length = node.OuterHtml.Length;
80+
}
6381

6482
// Validate and clamp the index/length to avoid out-of-bounds
6583
if( index < 0 ) index = 0;

0 commit comments

Comments
 (0)