Skip to content

Commit 211babf

Browse files
committed
Add HTMLAgilityPack engine xpath/css query selector patterns
Added new IAIEngine interface for overriding certain prompt items.
1 parent 2954aa5 commit 211babf

13 files changed

Lines changed: 495 additions & 16 deletions

File tree

RegExpressWPFNET/RegExpressLibrary/IRegexEngine.cs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,21 @@ namespace RegExpressLibrary
1515
public delegate void RegexEngineOptionsChanged( IRegexEngine sender, RegexEngineOptionsChangedArgs args );
1616

1717

18-
public interface IRegexEngine
18+
public interface IBaseEngine
19+
{
20+
string Name { get; }
21+
string? ExportOptions( ); // (JSON)
22+
}
23+
24+
public interface IAIEngine : IBaseEngine
25+
{
26+
string AIPatternType => "Regex";
27+
string AIPatternCodeblockType => "regex";
28+
string AIAdditionalSystemPrompt => "If the language supports named capture groups, use these by default. " +
29+
"If the user has ignoring patterned whitespace enabled in the options, use multi-lines and minimal in-regex comments for complex regexes with nice whitespace formatting to make it more readable. ";
30+
}
31+
32+
public interface IRegexEngine : IAIEngine
1933
{
2034
event RegexEngineOptionsChanged? OptionsChanged;
2135
event EventHandler? FeatureMatrixReady;
@@ -26,8 +40,6 @@ public interface IRegexEngine
2640

2741
(string Kind, string? Version) CombinedId => (Kind, Version);
2842

29-
string Name { get; }
30-
3143
string Subtitle { get; }
3244

3345
RegexEngineCapabilityEnum Capabilities { get; }
@@ -36,8 +48,6 @@ public interface IRegexEngine
3648

3749
Control GetOptionsControl( );
3850

39-
string? ExportOptions( ); // (JSON)
40-
4151
void ImportOptions( string? json );
4252

4353
RegexMatches GetMatches( ICancellable cnc, [StringSyntax( StringSyntaxAttribute.Regex )] string pattern, string text );

RegExpressWPFNET/RegExpressWPFNET.slnx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
<Folder Name="/RegexEngines/Fortran/">
3838
<Project Path="RegexEngines/Fortran/FortranPlugin/FortranPlugin.csproj" />
3939
</Folder>
40+
<Folder Name="/RegexEngines/HtmlAgilityPack/">
41+
<Project Path="RegexEngines/HtmlAgilityPack/HtmlAgilityPackPlugin/HtmlAgilityPackPlugin.csproj" />
42+
</Folder>
4043
<Folder Name="/RegexEngines/Hyperscan/">
4144
<Project Path="RegexEngines/Hyperscan/HyperscanPlugin/HyperscanPlugin.csproj">
4245
<BuildDependency Project="RegexEngines/Hyperscan/HyperscanWorker/HyperscanWorker.vcxproj" />
@@ -142,6 +145,7 @@
142145
<BuildDependency Project="RegexEngines/DotNETFramework4_8/DotNETFrameworkPlugin/DotNETFrameworkPlugin.csproj" />
143146
<BuildDependency Project="RegexEngines/DotNETFramework4_8/DotNETFrameworkWorker/DotNETFrameworkWorker.csproj" />
144147
<BuildDependency Project="RegexEngines/Fortran/FortranPlugin/FortranPlugin.csproj" />
148+
<BuildDependency Project="RegexEngines/HtmlAgilityPack/HtmlAgilityPackPlugin/HtmlAgilityPackPlugin.csproj" />
145149
<BuildDependency Project="RegexEngines/Hyperscan/HyperscanPlugin/HyperscanPlugin.csproj" />
146150
<BuildDependency Project="RegexEngines/Hyperscan/HyperscanWorker/HyperscanWorker.vcxproj" />
147151
<BuildDependency Project="RegexEngines/ICU/ICUPlugin/ICUPlugin.csproj" />

RegExpressWPFNET/RegExpressWPFNET/Code/AIService.cs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@ public class AiService
1919
private ChatHistory? _chatHistory; // Persistent conversation history
2020
public bool IsConfigured => _chatService != null;
2121

22-
public string EngineName { get; private set; } = string.Empty;
23-
public string? EngineOptions { get; private set; }
2422
public string? CurrentProvider { get; private set; }
2523
public string? CurrentModelId { get; private set; }
24+
public IAIEngine Engine { get; private set; }
2625

2726
public void Configure( string provider, string apiKey, string modelId, string endpoint )
2827
{
@@ -105,12 +104,13 @@ private void SetSystemPromptIfChanged( bool force = false )
105104

106105
private string GetSystemPrompt( )
107106
{
108-
return $"You are a {EngineName} Regex expert assistant. The user has questions about their regex patterns and target text. " +
109-
"Provide Regex patterns inside Markdown code blocks (```regex ... ```). " +
107+
if (Engine == null)
108+
return string.Empty;
109+
return $"You are a {Engine.Name} {Engine.AIPatternType} expert assistant. The user has questions about their {Engine.AIPatternType} patterns and target text. " +
110+
$"Provide {Engine.AIPatternType} patterns inside Markdown code blocks (```{Engine.AIPatternCodeblockType} ... ```). " +
110111
"Explain how the pattern works briefly. " +
111-
"If the language supports named capture groups, use these by default. " +
112-
"If the user has ignoring patterned whitespace enabled in the options, use multi-lines and minimal in-regex comments for complex regexes with nice whitespace formatting to make it more readable. " +
113-
$"They currently have these engine options enabled: {EngineOptions}";
112+
Engine.AIAdditionalSystemPrompt +
113+
$"They currently have these engine options enabled: {Engine.ExportOptions()}";
114114
}
115115

116116
public async Task<string> GetSuggestionAsync( string userPrompt, string curRegex, string targetText )
@@ -192,10 +192,9 @@ public void ClearConversation( )
192192
}
193193
}
194194

195-
internal void SetEngineInfo( string name, string? engine_options )
195+
internal void SetEngineInfo( IAIEngine engine )
196196
{
197-
this.EngineName = name;
198-
this.EngineOptions = engine_options;
197+
this.Engine = engine;
199198
}
200199

201200
public void Reset( )

RegExpressWPFNET/RegExpressWPFNET/Engines.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
{
44
"path": "Engines\\DotNET9\\DotNET9Plugin.dll"
55
},
6+
{
7+
"path": "Engines\\HtmlAgilityPack\\HtmlAgilityPackPlugin.dll",
8+
"no_fm": true
9+
},
610
{
711
"path": "Engines\\DotNETFramework4_8\\DotNETFrameworkPlugin.dll",
812
"no_fm": true

RegExpressWPFNET/RegExpressWPFNET/UCAI.xaml.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ private async Task SendToAi( )
302302
}
303303

304304
var engine = CurrentTab.CurrentRegexEngine;
305-
_aiService.SetEngineInfo( engine.Name, engine.ExportOptions( ) );
305+
_aiService.SetEngineInfo( engine );
306306
var question = ChatInput.Text;
307307
var curRegex = CurrentTab.ucPattern.GetTextData( "\n" ).Text;
308308

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
using RegExpressLibrary;
2+
using RegExpressLibrary.Matches;
3+
using RegExpressLibrary.SyntaxColouring;
4+
using System;
5+
using System.Collections.Generic;
6+
using System.Text.Json;
7+
using System.Windows.Controls;
8+
9+
10+
namespace HtmlAgilityPackPlugin
11+
{
12+
class Engine : IRegexEngine, IAIEngine
13+
14+
15+
{
16+
string IAIEngine.AIPatternType => Options.SelectorMode == SelectorMode.XPath ? "html xpath" : "html css query selector";
17+
string IAIEngine.AIPatternCodeblockType => Options.SelectorMode == SelectorMode.XPath ? "xpath" : "css";
18+
string IAIEngine.AIAdditionalSystemPrompt => "";
19+
static readonly Lazy<string?> LazyVersion = new( Matcher.GetVersion );
20+
21+
Options mOptions = new( );
22+
readonly Lazy<UCOptions> mOptionsControl;
23+
24+
public Engine( )
25+
{
26+
mOptionsControl = new Lazy<UCOptions>( ( ) =>
27+
{
28+
UCOptions oc = new( );
29+
oc.SetOptions( Options );
30+
oc.Changed += OptionsControl_Changed;
31+
32+
return oc;
33+
} );
34+
}
35+
36+
public Options Options
37+
{
38+
get
39+
{
40+
return mOptions;
41+
}
42+
set
43+
{
44+
mOptions = value;
45+
46+
if( mOptionsControl.IsValueCreated ) mOptionsControl.Value.SetOptions( mOptions );
47+
}
48+
}
49+
50+
#region IRegexEngine
51+
52+
public string Kind => "HtmlAgilityPack";
53+
54+
public string? Version => LazyVersion.Value;
55+
56+
public string Name => "HtmlAgilityPack";
57+
58+
public string Subtitle => mOptions.SelectorMode == SelectorMode.XPath ? "XPath" : "CSS";
59+
60+
public RegexEngineCapabilityEnum Capabilities => RegexEngineCapabilityEnum.Default;// | RegexEngineCapabilityEnum.NoCaptures;
61+
62+
public string? NoteForCaptures => "This engine uses XPath or CSS selectors to select HTML nodes, not regex patterns.";
63+
64+
public event RegexEngineOptionsChanged? OptionsChanged;
65+
#pragma warning disable 0067
66+
public event EventHandler? FeatureMatrixReady;
67+
#pragma warning restore 0067
68+
69+
public Control GetOptionsControl( )
70+
{
71+
return mOptionsControl.Value;
72+
}
73+
74+
public string? ExportOptions( )
75+
{
76+
string json = JsonSerializer.Serialize( Options, JsonUtilities.JsonOptions );
77+
78+
return json;
79+
}
80+
81+
public void ImportOptions( string? json )
82+
{
83+
if( string.IsNullOrWhiteSpace( json ) )
84+
{
85+
Options = new Options( );
86+
}
87+
else
88+
{
89+
try
90+
{
91+
Options = JsonSerializer.Deserialize<Options>( json, JsonUtilities.JsonOptions )!;
92+
}
93+
catch( Exception ex )
94+
{
95+
// ignore versioning errors, for example
96+
if( InternalConfig.HandleException( ex ) )
97+
throw;
98+
99+
Options = new Options( );
100+
}
101+
}
102+
}
103+
104+
public RegexMatches GetMatches( ICancellable cnc, string pattern, string text )
105+
{
106+
return Matcher.GetMatches( cnc, pattern, text, Options );
107+
}
108+
109+
public SyntaxOptions GetSyntaxOptions( )
110+
{
111+
// XPath and CSS selectors have their own syntax, not regex syntax
112+
// Return Literal mode so no regex syntax highlighting is applied
113+
return new SyntaxOptions
114+
{
115+
Literal = true,
116+
XLevel = XLevelEnum.none,
117+
FeatureMatrix = new FeatureMatrix( )
118+
};
119+
}
120+
121+
public IReadOnlyList<FeatureMatrixVariant> GetFeatureMatrices( )
122+
{
123+
// This engine doesn't use regex, so return empty feature matrix
124+
return [];
125+
}
126+
127+
public void SetIgnoreCase( bool yes )
128+
{
129+
// XPath/CSS selectors don't have a direct ignore case option
130+
// but we could potentially add this in the future
131+
}
132+
133+
public void SetIgnorePatternWhitespace( bool yes )
134+
{
135+
// Not applicable for XPath/CSS selectors
136+
}
137+
138+
#endregion
139+
140+
private void OptionsControl_Changed( object? sender, RegexEngineOptionsChangedArgs args )
141+
{
142+
OptionsChanged?.Invoke( this, args );
143+
}
144+
}
145+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<Nullable>enable</Nullable>
5+
<UseWPF>true</UseWPF>
6+
<EnableDynamicLoading>true</EnableDynamicLoading>
7+
<RunPostBuildEvent>OnOutputUpdated</RunPostBuildEvent>
8+
</PropertyGroup>
9+
10+
<ItemGroup>
11+
<PackageReference Include="Universal.HtmlAgilityPack" Version="1.0.2">
12+
<PrivateAssets>All</PrivateAssets>
13+
</PackageReference>
14+
</ItemGroup>
15+
16+
<ItemGroup>
17+
<ProjectReference Include="..\..\..\RegExpressLibrary\RegExpressLibrary.csproj">
18+
<Private>false</Private>
19+
<ExcludeAssets>runtime</ExcludeAssets>
20+
</ProjectReference>
21+
</ItemGroup>
22+
23+
</Project>

0 commit comments

Comments
 (0)