@@ -46,23 +46,36 @@ public static void PerformModuleReload(PythonContext/*!*/ context, PythonDiction
4646
4747 #region CONSTANTS
4848
49+ [ Flags ]
50+ internal enum ReFlags : int {
51+ TEMPLATE = 0x01 ,
52+ IGNORECASE = 0x02 ,
53+ LOCALE = 0x04 ,
54+ MULTILINE = 0x08 ,
55+ DOTALL = 0x10 ,
56+ UNICODE = 0x20 ,
57+ VERBOSE = 0x40 ,
58+ DEBUG = 0x80 ,
59+ ASCII = 0x100 ,
60+ }
61+
4962 // short forms
50- public const int I = 0x02 ;
51- public const int L = 0x04 ;
52- public const int M = 0x08 ;
53- public const int S = 0x10 ;
54- public const int U = 0x20 ;
55- public const int X = 0x40 ;
56- public const int A = 0x100 ;
63+ public const int I = ( int ) ReFlags . IGNORECASE ;
64+ public const int L = ( int ) ReFlags . LOCALE ;
65+ public const int M = ( int ) ReFlags . MULTILINE ;
66+ public const int S = ( int ) ReFlags . DOTALL ;
67+ public const int U = ( int ) ReFlags . UNICODE ;
68+ public const int X = ( int ) ReFlags . VERBOSE ;
69+ public const int A = ( int ) ReFlags . ASCII ;
5770
5871 // long forms
59- public const int IGNORECASE = 0x02 ;
60- public const int LOCALE = 0x04 ;
61- public const int MULTILINE = 0x08 ;
62- public const int DOTALL = 0x10 ;
63- public const int UNICODE = 0x20 ;
64- public const int VERBOSE = 0x40 ;
65- public const int ASCII = 0x100 ;
72+ public const int IGNORECASE = ( int ) ReFlags . IGNORECASE ;
73+ public const int LOCALE = ( int ) ReFlags . LOCALE ;
74+ public const int MULTILINE = ( int ) ReFlags . MULTILINE ;
75+ public const int DOTALL = ( int ) ReFlags . DOTALL ;
76+ public const int UNICODE = ( int ) ReFlags . UNICODE ;
77+ public const int VERBOSE = ( int ) ReFlags . VERBOSE ;
78+ public const int ASCII = ( int ) ReFlags . ASCII ;
6679
6780 #endregion
6881
@@ -150,28 +163,30 @@ public class Pattern : IWeakReferenceable {
150163 private PythonDictionary _groups ;
151164 private WeakRefTracker _weakRefTracker ;
152165
153- internal Pattern ( CodeContext /*!*/ context , object pattern , int flags = 0 , bool compiled = false ) {
154- _pre = PreParseRegex ( context , PatternAsString ( pattern ) , ( flags & VERBOSE ) != 0 ) ;
155- flags |= OptionToFlags ( _pre . Options ) ;
166+ internal Pattern ( CodeContext /*!*/ context , object pattern , ReFlags flags = 0 , bool compiled = false ) {
167+ _pre = PreParseRegex ( context , PatternAsString ( pattern , ref flags ) , ( flags & ReFlags . VERBOSE ) != 0 ) ;
168+ flags |= _pre . Options ;
156169 _re = GenRegex ( context , _pre . Pattern , flags , compiled , false ) ;
157170 this . pattern = pattern ;
158- this . flags = flags ;
171+ this . flags = ( int ) flags ;
159172
160- static string PatternAsString ( object pattern ) {
173+ static string PatternAsString ( object pattern , ref ReFlags flags ) {
161174 switch ( pattern ) {
162175 case Bytes bytes :
163176 return bytes . MakeString ( ) ;
164177 case string s :
178+ flags |= ReFlags . UNICODE ;
165179 return s ;
166180 case ExtensibleString es :
181+ flags |= ReFlags . UNICODE ;
167182 return es . Value ;
168183 default :
169184 throw new ArgumentTypeException ( ) ;
170185 }
171186 }
172187 }
173188
174- private static Regex GenRegex ( CodeContext /*!*/ context , string pattern , int flags , bool compiled , bool fullmatch ) {
189+ private static Regex GenRegex ( CodeContext /*!*/ context , string pattern , ReFlags flags , bool compiled , bool fullmatch ) {
175190 try {
176191 RegexOptions opts = FlagsToOption ( flags ) ;
177192 return new Regex ( fullmatch ? $ "(?:{ pattern } )\\ Z" : pattern , opts | ( compiled ? RegexOptions . Compiled : RegexOptions . None ) ) ;
@@ -210,7 +225,7 @@ private Regex GetRegexFullMatch(CodeContext /*!*/ context) {
210225 if ( _re_fullmatch == null ) {
211226 lock ( _re ) {
212227 if ( _re_fullmatch == null )
213- _re_fullmatch = GenRegex ( context , _pre . Pattern , flags , _re . Options . HasFlag ( RegexOptions . Compiled ) , true ) ;
228+ _re_fullmatch = GenRegex ( context , _pre . Pattern , ( ReFlags ) flags , _re . Options . HasFlag ( RegexOptions . Compiled ) , true ) ;
214229 }
215230 }
216231
@@ -837,7 +852,7 @@ private static Pattern GetPattern(CodeContext/*!*/ context, object pattern, int
837852 return res ;
838853 }
839854 }
840- res = new Pattern ( context , pattern , flags , compiled ) ;
855+ res = new Pattern ( context , pattern , ( ReFlags ) flags , compiled ) ;
841856 _cachedPatterns [ key ] = res ;
842857 return res ;
843858 }
@@ -849,45 +864,25 @@ private static IEnumerator MatchIterator(MatchCollection matches, Pattern patter
849864 }
850865 }
851866
852- private static RegexOptions FlagsToOption ( int flags ) {
867+ private static RegexOptions FlagsToOption ( ReFlags flags ) {
853868 RegexOptions opts = RegexOptions . None ;
854- if ( ( flags & ( int ) IGNORECASE ) != 0 ) opts |= RegexOptions . IgnoreCase ;
855- if ( ( flags & ( int ) MULTILINE ) != 0 ) opts |= RegexOptions . Multiline ;
856- if ( ( ( flags & ( int ) LOCALE ) ) == 0 ) opts &= ( ~ RegexOptions . CultureInvariant ) ;
857- if ( ( flags & ( int ) DOTALL ) != 0 ) opts |= RegexOptions . Singleline ;
858- if ( ( flags & ( int ) VERBOSE ) != 0 ) opts |= RegexOptions . IgnorePatternWhitespace ;
869+ if ( ( flags & ReFlags . ASCII ) != 0 ) opts |= RegexOptions . ECMAScript ;
870+ if ( ( flags & ReFlags . IGNORECASE ) != 0 ) opts |= RegexOptions . IgnoreCase ;
871+ if ( ( flags & ReFlags . MULTILINE ) != 0 ) opts |= RegexOptions . Multiline ;
872+ if ( ( flags & ReFlags . LOCALE ) == 0 ) opts &= ~ RegexOptions . CultureInvariant ;
873+ if ( ( flags & ReFlags . DOTALL ) != 0 ) opts |= RegexOptions . Singleline ;
859874
860875 return opts ;
861876 }
862877
863- private static int OptionToFlags ( RegexOptions options ) {
864- int flags = 0 ;
865- if ( ( options & RegexOptions . IgnoreCase ) != 0 ) {
866- flags |= IGNORECASE ;
867- }
868- if ( ( options & RegexOptions . Multiline ) != 0 ) {
869- flags |= MULTILINE ;
870- }
871- if ( ( options & RegexOptions . CultureInvariant ) == 0 ) {
872- flags |= LOCALE ;
873- }
874- if ( ( options & RegexOptions . Singleline ) != 0 ) {
875- flags |= DOTALL ;
876- }
877- if ( ( options & RegexOptions . IgnorePatternWhitespace ) != 0 ) {
878- flags |= VERBOSE ;
879- }
880- return flags ;
881- }
882-
883878 internal class ParsedRegex {
884879 public ParsedRegex ( string pattern ) {
885880 this . UserPattern = pattern ;
886881 }
887882
888883 public string UserPattern ;
889884 public string Pattern ;
890- public RegexOptions Options = RegexOptions . CultureInvariant ;
885+ public ReFlags Options ;
891886 }
892887
893888 private static readonly char [ ] _endOfLineChars = new [ ] { '\r ' , '\n ' } ;
@@ -900,6 +895,7 @@ public ParsedRegex(string pattern) {
900895 /// </summary>
901896 private static ParsedRegex PreParseRegex ( CodeContext /*!*/ context , string pattern , bool verbose ) {
902897 ParsedRegex res = new ParsedRegex ( pattern ) ;
898+ if ( verbose ) res . Options |= ReFlags . VERBOSE ;
903899
904900 //string newPattern;
905901 int cur = 0 , nameIndex ;
@@ -911,6 +907,58 @@ private static ParsedRegex PreParseRegex(CodeContext/*!*/ context, string patter
911907 int groupCount = 0 ;
912908 var namedGroups = new Dictionary < string , int > ( ) ;
913909
910+ if ( verbose ) {
911+ pattern = ApplyVerbose ( pattern ) ;
912+ }
913+
914+ static string ApplyVerbose ( string pattern ) {
915+ var builder = new StringBuilder ( ) ;
916+
917+ bool isCharList = false ;
918+ bool isEscaped = false ;
919+
920+ for ( int i = 0 ; i < pattern . Length ; i ++ ) {
921+ var c = pattern [ i ] ;
922+ if ( isEscaped ) {
923+ isEscaped = false ;
924+ } else {
925+ switch ( c ) {
926+ case ' ' :
927+ case '\t ' :
928+ case '\n ' :
929+ case '\r ' :
930+ case '\f ' :
931+ case '\v ' :
932+ if ( ! isCharList ) continue ;
933+ break ;
934+ case '\\ ' :
935+ isEscaped = true ;
936+ break ;
937+ case '[' :
938+ isCharList = true ;
939+ break ;
940+ case ']' :
941+ isCharList = false ;
942+ break ;
943+ case '#' :
944+ if ( ! isCharList ) {
945+ // skip to end of line
946+ i = pattern . IndexOfAny ( _endOfLineChars , i ) ;
947+ if ( i < 0 ) i = pattern . Length ;
948+ continue ;
949+ }
950+ break ;
951+ default :
952+ break ;
953+ }
954+ }
955+
956+ builder . Append ( c ) ;
957+ }
958+
959+ return builder . ToString ( ) ;
960+ }
961+
914962 for ( ; ; ) {
915963 if ( verbose && inComment ) {
916964 // read to end of line
@@ -1008,30 +1056,34 @@ private static ParsedRegex PreParseRegex(CodeContext/*!*/ context, string patter
10081056 pattern = pattern . Remove ( nameIndex , 1 ) ;
10091057 }
10101058
1059+ break ;
1060+ case 'a' :
1061+ res . Options |= ReFlags . ASCII ;
1062+ RemoveOption ( ref pattern , ref nameIndex ) ;
10111063 break ;
10121064 case 'i' :
1013- res . Options |= RegexOptions . IgnoreCase ;
1065+ res . Options |= ReFlags . IGNORECASE ;
10141066 RemoveOption ( ref pattern , ref nameIndex ) ;
10151067 break ;
10161068 case 'L' :
1017- res . Options &= ~ ( RegexOptions . CultureInvariant ) ;
1069+ res . Options |= ReFlags . LOCALE ;
10181070 RemoveOption ( ref pattern , ref nameIndex ) ;
10191071 break ;
10201072 case 'm' :
1021- res . Options |= RegexOptions . Multiline ;
1073+ res . Options |= ReFlags . MULTILINE ;
10221074 RemoveOption ( ref pattern , ref nameIndex ) ;
10231075 break ;
10241076 case 's' :
1025- res . Options |= RegexOptions . Singleline ;
1077+ res . Options |= ReFlags . DOTALL ;
10261078 RemoveOption ( ref pattern , ref nameIndex ) ;
10271079 break ;
10281080 case 'u' :
1029- // specify unicode; not relevant and not valid under .NET as we're always unicode
1030- // -- so the option needs to be removed
1081+ res . Options |= ReFlags . UNICODE ;
10311082 RemoveOption ( ref pattern , ref nameIndex ) ;
10321083 break ;
10331084 case 'x' :
1034- res . Options |= RegexOptions . IgnorePatternWhitespace ;
1085+ if ( ! verbose ) return PreParseRegex ( context , res . UserPattern , true ) ;
1086+ res . Options |= ReFlags . VERBOSE ;
10351087 RemoveOption ( ref pattern , ref nameIndex ) ;
10361088 break ;
10371089 case ':' : break ; // non-capturing
0 commit comments