@@ -206,6 +206,53 @@ const (
206206 rRegexpClose = '/'
207207)
208208
209+ type LexerFlag uint64
210+
211+ const (
212+ // LexDefaultFlags is the empty flag set (the default).
213+ LexDefaultFlags LexerFlag = 0
214+
215+ // LexWordLiterals treats all literals, other than strings and compounds (maps, arrays) as
216+ // words. This is the union of LexNo* flags.
217+ LexWordLiterals = LexNoRegexps |
218+ LexNoBools |
219+ LexNoDurations |
220+ LexNoRationals |
221+ LexNoFloats |
222+ LexNoBaseInts |
223+ LexNoNumbers
224+ )
225+
226+ const (
227+ // LexNoRegexps disables regular expressions.
228+ LexNoRegexps LexerFlag = 1 << iota
229+ // LexNoBools disables true/false/yes/no parsing.
230+ LexNoBools
231+ // LexNoDurations disables durations.
232+ LexNoDurations
233+ // LexNoRationals disables rationals.
234+ LexNoRationals
235+ // LexNoFloats disables floating point numbers.
236+ LexNoFloats
237+ // LexNoBaseInts disables non-base-10 number forms.
238+ LexNoBaseInts
239+ // LexNoNumbers disables all numbers.
240+ // Implies NoBaseInts, NoFloats, NoRationals, and NoDurations
241+ LexNoNumbers
242+ )
243+
244+ func (f LexerFlag ) none (bits LexerFlag ) bool {
245+ return f & bits == 0
246+ }
247+
248+ func (f LexerFlag ) any (bits LexerFlag ) bool {
249+ return f & bits != 0
250+ }
251+
252+ func (f LexerFlag ) all (bits LexerFlag ) bool {
253+ return f & bits == bits
254+ }
255+
209256// Lexer takes an input sequence of runes and constructs Tokens from it.
210257type Lexer struct {
211258 // Precision is the precision used in *big.Float when taking the actual value of a TFloat
@@ -218,6 +265,9 @@ type Lexer struct {
218265 // priority.
219266 Name string
220267
268+ // Flags is a set of Lex flags that can be used to change lexer behavior.
269+ Flags LexerFlag
270+
221271 scanner io.RuneReader
222272
223273 pending bool
@@ -238,8 +288,10 @@ func NewLexer(r io.Reader) *Lexer {
238288 rr := runeReader (r )
239289
240290 le := & Lexer {
241- scanner : rr ,
242- pos : Location {Line : 1 , Column : 1 },
291+ Precision : DefaultPrecision ,
292+ Flags : LexDefaultFlags ,
293+ scanner : rr ,
294+ pos : Location {Line : 1 , Column : 1 },
243295 }
244296 return le
245297}
@@ -557,8 +609,11 @@ func (l *Lexer) lexSegment(r rune) (Token, consumerFunc, error) {
557609 // Map / regexp (#// | #{})
558610 case r == rSpecial :
559611 return noToken , l .lexSpecial , nil
612+ }
560613
561614 // Numerics (integer, decimal, rational, duration)
615+ switch {
616+ case l .Flags .any (LexNoNumbers ):
562617 case isSign (r ):
563618 l .buffer (r , r )
564619 return noToken , l .lexSignedNumber , nil
@@ -568,17 +623,20 @@ func (l *Lexer) lexSegment(r rune) (Token, consumerFunc, error) {
568623 case isDecimal (r ):
569624 l .buffer (r , r )
570625 return noToken , l .lexNonZero , nil
626+ }
571627
572628 // String
573- case r == rDoubleQuote :
629+ switch r {
630+ case rDoubleQuote :
574631 l .buffer (r , - 1 )
575632 return noToken , l .lexString , nil
576- case r == rBackQuote :
633+ case rBackQuote :
577634 l .buffer (r , - 1 )
578635 return noToken , l .lexRawString , nil
636+ }
579637
580638 // Word
581- case isBarewordRune (r ):
639+ if isBarewordRune (r ) {
582640 return l .lexBecomeWord (r )
583641 }
584642 return noToken , nil , fmt .Errorf ("unexpected character %q at %v" , r , l .pos )
@@ -604,8 +662,13 @@ func (l *Lexer) lexWordTail(next consumerFunc) consumerFunc {
604662 return noToken , wordConsumer , nil
605663 }
606664 l .unread ()
665+
607666 tok := l .token (TWord , true )
608667 tok .Value = string (tok .Raw )
668+ if l .Flags .none (LexNoBools ) {
669+ tok = wordToBool (tok )
670+ }
671+
609672 return tok , next , nil
610673 }
611674 return wordConsumer
@@ -631,7 +694,7 @@ func (l *Lexer) lexSegmentTail(r rune) (Token, consumerFunc, error) {
631694
632695func (l * Lexer ) lexSignedNumber (r rune ) (Token , consumerFunc , error ) {
633696 switch {
634- case isDecimal (r ):
697+ case l . Flags . none ( LexNoNumbers ) && isDecimal (r ):
635698 l .buffer (r , r )
636699 if r == '0' {
637700 return noToken , l .lexZero , nil
@@ -889,17 +952,24 @@ func (l *Lexer) lexFloatPoint(r rune) (Token, consumerFunc, error) {
889952 // Sep -> Float
890953 // BarewordRune -> lex bareword
891954 //
955+ var (
956+ allowFloat = l .Flags .none (LexNoFloats )
957+ allowDurations = l .Flags .none (LexNoDurations )
958+ )
892959 switch {
893- case r == 'E' || r == 'e' : // exponent
960+ case allowFloat && ( r == 'E' || r == 'e' ) : // exponent
894961 l .buffer (r , r )
895962 return noToken , l .lexFloatExponentUnsigned , nil
896- case isIntervalInitial (r ):
963+ case allowDurations && isIntervalInitial (r ):
897964 return l .lexIntervalConsumer (r )
898965 case isDecimal (r ):
899966 l .buffer (r , r )
900967 return noToken , l .lexFloatPoint , nil
901968 case isStatementSep (r ) || r == eof :
902969 l .unread ()
970+ if ! allowFloat {
971+ return l .lexBecomeWord (- 1 )
972+ }
903973 tok , err := l .valueToken (TFloat , parseBigFloat (l .Precision ))
904974 return tok , l .lexSegment , err
905975 case isBarewordTransition (r ):
@@ -1070,29 +1140,33 @@ func (l *Lexer) lexZero(r rune) (Token, consumerFunc, error) {
10701140 // 'Ee' -> lex float from exponent (necessarily zero)
10711141 // BarewordRune -> lex bareword
10721142 //
1073- switch {
1143+ switch allowBaseInts := l . Flags . none ( LexNoBaseInts ); {
10741144 case isStatementSep (r ), r == - 1 :
10751145 l .unread ()
10761146 tok , err := l .valueToken (TInteger , parseBaseInt (10 ))
10771147 return tok , l .lexSegment , err
10781148 case isOctal (r ):
1149+ if ! allowBaseInts {
1150+ return l .lexBecomeWord (r )
1151+ }
10791152 l .buffer (r , r )
10801153 return noToken , l .lexOctalNumber , nil
1081- case r == rFracSep :
1154+ case l . Flags . none ( LexNoRationals ) && r == rFracSep :
10821155 l .buffer (r , r )
10831156 return noToken , l .lexRationalDenomInitial , nil
1084- case r == 'b' || r == 'B' :
1157+ case allowBaseInts && ( r == 'b' || r == 'B' ) :
10851158 l .buffer (r , - 1 )
10861159 return noToken , l .lexNoTerminate (l .lexBinNum , "binary digit" ), nil
1087- case r == 'x' || r == 'X' :
1160+ case allowBaseInts && ( r == 'x' || r == 'X' ) :
10881161 l .buffer (r , - 1 )
10891162 return noToken , l .lexNoTerminate (l .lexHexNum , "hex digit" ), nil
1090- case r == rDot :
1163+ case ! l .Flags .all (LexNoDurations | LexNoFloats ) && r == rDot :
1164+ // Continue parsing here unless both floats and durations are disabled
10911165 l .buffer (r , r )
10921166 return noToken , l .lexFloatPointInitial , nil
1093- case isIntervalInitial (r ):
1167+ case l . Flags . none ( LexNoDurations ) && isIntervalInitial (r ):
10941168 return l .lexIntervalConsumer (r )
1095- case r == 'E' || r == 'e' :
1169+ case l . Flags . none ( LexNoFloats ) && ( r == 'E' || r == 'e' ) :
10961170 l .buffer (r , r )
10971171 return noToken , l .lexFloatExponentUnsigned , nil
10981172 case isBarewordTransition (r ):
@@ -1123,12 +1197,12 @@ func (l *Lexer) lexNonZero(r rune) (Token, consumerFunc, error) {
11231197 case isDecimal (r ):
11241198 l .buffer (r , r )
11251199 return noToken , l .lexNonZero , nil
1126- case isIntervalInitial (r ):
1200+ case l . Flags . none ( LexNoDurations ) && isIntervalInitial (r ):
11271201 return l .lexIntervalConsumer (r )
11281202 }
11291203
1130- switch r {
1131- case rBaseSep :
1204+ switch {
1205+ case l . Flags . none ( LexNoBaseInts ) && r == rBaseSep :
11321206 l .buffer (r , - 1 )
11331207
11341208 str := l .strbuf .String ()
@@ -1143,13 +1217,13 @@ func (l *Lexer) lexNonZero(r rune) (Token, consumerFunc, error) {
11431217
11441218 l .strbuf .Reset ()
11451219 return noToken , l .lexBaseNumber (neg , base ), nil
1146- case rFracSep :
1220+ case l . Flags . none ( LexNoRationals ) && r == rFracSep :
11471221 l .buffer (r , r )
11481222 return noToken , l .lexRationalDenomInitial , nil
1149- case rDot :
1223+ case ! l . Flags . all ( LexNoDurations | LexNoFloats ) && r == rDot :
11501224 l .buffer (r , r )
11511225 return noToken , l .lexFloatPointInitial , nil
1152- case 'E' , 'e' :
1226+ case l . Flags . none ( LexNoFloats ) && ( r == 'E' || r == 'e' ) :
11531227 l .buffer (r , r )
11541228 return noToken , l .lexFloatExponentUnsigned , nil
11551229 }
@@ -1375,7 +1449,7 @@ func (l *Lexer) lexSpecial(r rune) (Token, consumerFunc, error) {
13751449 switch {
13761450 case r == rCurlOpen :
13771451 return l .token (TMapOpen , false ), l .lexSegment , nil
1378- case r == rRegexpOpen :
1452+ case r == rRegexpOpen && l . Flags . none ( LexNoRegexps ) :
13791453 l .buffer (rSpecial , - 1 )
13801454 l .buffer (r , - 1 )
13811455 return noToken , l .lexRegexp , nil
@@ -1431,3 +1505,20 @@ func (l *Lexer) lexRegexp(r rune) (Token, consumerFunc, error) {
14311505 l .buffer (r , r )
14321506 return noToken , l .lexRegexp , nil
14331507}
1508+
1509+ func wordToBool (tok Token ) Token {
1510+ if tok .Kind != TWord {
1511+ return tok
1512+ }
1513+ s , ok := tok .Value .(string )
1514+ if ! ok {
1515+ return tok
1516+ }
1517+ switch s {
1518+ case "TRUE" , "True" , "true" , "YES" , "Yes" , "yes" :
1519+ tok .Kind , tok .Value = TBoolean , true
1520+ case "FALSE" , "False" , "false" , "NO" , "No" , "no" :
1521+ tok .Kind , tok .Value = TBoolean , false
1522+ }
1523+ return tok
1524+ }
0 commit comments