Skip to content

Commit 565ec6a

Browse files
committed
Merge branch '14-lexer-flags'
* 14-lexer-flags: Implement lexer flags to disable special tokens
2 parents 6aee1c6 + eba1287 commit 565ec6a

3 files changed

Lines changed: 635 additions & 159 deletions

File tree

lexer.go

Lines changed: 113 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,53 @@ const (
206206
rRegexpClose = '/'
207207
)
208208

209+
type LexerFlag uint64
210+
211+
const (
212+
// LexDefaultFlags is the empty flag set (the default).
213+
LexDefaultFlags LexerFlag = 0
214+
215+
// LexWordLiterals treats all literals, other than strings and compounds (maps, arrays) as
216+
// words. This is the union of LexNo* flags.
217+
LexWordLiterals = LexNoRegexps |
218+
LexNoBools |
219+
LexNoDurations |
220+
LexNoRationals |
221+
LexNoFloats |
222+
LexNoBaseInts |
223+
LexNoNumbers
224+
)
225+
226+
const (
227+
// LexNoRegexps disables regular expressions.
228+
LexNoRegexps LexerFlag = 1 << iota
229+
// LexNoBools disables true/false/yes/no parsing.
230+
LexNoBools
231+
// LexNoDurations disables durations.
232+
LexNoDurations
233+
// LexNoRationals disables rationals.
234+
LexNoRationals
235+
// LexNoFloats disables floating point numbers.
236+
LexNoFloats
237+
// LexNoBaseInts disables non-base-10 number forms.
238+
LexNoBaseInts
239+
// LexNoNumbers disables all numbers.
240+
// Implies NoBaseInts, NoFloats, NoRationals, and NoDurations
241+
LexNoNumbers
242+
)
243+
244+
func (f LexerFlag) none(bits LexerFlag) bool {
245+
return f&bits == 0
246+
}
247+
248+
func (f LexerFlag) any(bits LexerFlag) bool {
249+
return f&bits != 0
250+
}
251+
252+
func (f LexerFlag) all(bits LexerFlag) bool {
253+
return f&bits == bits
254+
}
255+
209256
// Lexer takes an input sequence of runes and constructs Tokens from it.
210257
type Lexer struct {
211258
// Precision is the precision used in *big.Float when taking the actual value of a TFloat
@@ -218,6 +265,9 @@ type Lexer struct {
218265
// priority.
219266
Name string
220267

268+
// Flags is a set of Lex flags that can be used to change lexer behavior.
269+
Flags LexerFlag
270+
221271
scanner io.RuneReader
222272

223273
pending bool
@@ -238,8 +288,10 @@ func NewLexer(r io.Reader) *Lexer {
238288
rr := runeReader(r)
239289

240290
le := &Lexer{
241-
scanner: rr,
242-
pos: Location{Line: 1, Column: 1},
291+
Precision: DefaultPrecision,
292+
Flags: LexDefaultFlags,
293+
scanner: rr,
294+
pos: Location{Line: 1, Column: 1},
243295
}
244296
return le
245297
}
@@ -557,8 +609,11 @@ func (l *Lexer) lexSegment(r rune) (Token, consumerFunc, error) {
557609
// Map / regexp (#// | #{})
558610
case r == rSpecial:
559611
return noToken, l.lexSpecial, nil
612+
}
560613

561614
// Numerics (integer, decimal, rational, duration)
615+
switch {
616+
case l.Flags.any(LexNoNumbers):
562617
case isSign(r):
563618
l.buffer(r, r)
564619
return noToken, l.lexSignedNumber, nil
@@ -568,17 +623,20 @@ func (l *Lexer) lexSegment(r rune) (Token, consumerFunc, error) {
568623
case isDecimal(r):
569624
l.buffer(r, r)
570625
return noToken, l.lexNonZero, nil
626+
}
571627

572628
// String
573-
case r == rDoubleQuote:
629+
switch r {
630+
case rDoubleQuote:
574631
l.buffer(r, -1)
575632
return noToken, l.lexString, nil
576-
case r == rBackQuote:
633+
case rBackQuote:
577634
l.buffer(r, -1)
578635
return noToken, l.lexRawString, nil
636+
}
579637

580638
// Word
581-
case isBarewordRune(r):
639+
if isBarewordRune(r) {
582640
return l.lexBecomeWord(r)
583641
}
584642
return noToken, nil, fmt.Errorf("unexpected character %q at %v", r, l.pos)
@@ -604,8 +662,13 @@ func (l *Lexer) lexWordTail(next consumerFunc) consumerFunc {
604662
return noToken, wordConsumer, nil
605663
}
606664
l.unread()
665+
607666
tok := l.token(TWord, true)
608667
tok.Value = string(tok.Raw)
668+
if l.Flags.none(LexNoBools) {
669+
tok = wordToBool(tok)
670+
}
671+
609672
return tok, next, nil
610673
}
611674
return wordConsumer
@@ -631,7 +694,7 @@ func (l *Lexer) lexSegmentTail(r rune) (Token, consumerFunc, error) {
631694

632695
func (l *Lexer) lexSignedNumber(r rune) (Token, consumerFunc, error) {
633696
switch {
634-
case isDecimal(r):
697+
case l.Flags.none(LexNoNumbers) && isDecimal(r):
635698
l.buffer(r, r)
636699
if r == '0' {
637700
return noToken, l.lexZero, nil
@@ -889,17 +952,24 @@ func (l *Lexer) lexFloatPoint(r rune) (Token, consumerFunc, error) {
889952
// Sep -> Float
890953
// BarewordRune -> lex bareword
891954
//
955+
var (
956+
allowFloat = l.Flags.none(LexNoFloats)
957+
allowDurations = l.Flags.none(LexNoDurations)
958+
)
892959
switch {
893-
case r == 'E' || r == 'e': // exponent
960+
case allowFloat && (r == 'E' || r == 'e'): // exponent
894961
l.buffer(r, r)
895962
return noToken, l.lexFloatExponentUnsigned, nil
896-
case isIntervalInitial(r):
963+
case allowDurations && isIntervalInitial(r):
897964
return l.lexIntervalConsumer(r)
898965
case isDecimal(r):
899966
l.buffer(r, r)
900967
return noToken, l.lexFloatPoint, nil
901968
case isStatementSep(r) || r == eof:
902969
l.unread()
970+
if !allowFloat {
971+
return l.lexBecomeWord(-1)
972+
}
903973
tok, err := l.valueToken(TFloat, parseBigFloat(l.Precision))
904974
return tok, l.lexSegment, err
905975
case isBarewordTransition(r):
@@ -1070,29 +1140,33 @@ func (l *Lexer) lexZero(r rune) (Token, consumerFunc, error) {
10701140
// 'Ee' -> lex float from exponent (necessarily zero)
10711141
// BarewordRune -> lex bareword
10721142
//
1073-
switch {
1143+
switch allowBaseInts := l.Flags.none(LexNoBaseInts); {
10741144
case isStatementSep(r), r == -1:
10751145
l.unread()
10761146
tok, err := l.valueToken(TInteger, parseBaseInt(10))
10771147
return tok, l.lexSegment, err
10781148
case isOctal(r):
1149+
if !allowBaseInts {
1150+
return l.lexBecomeWord(r)
1151+
}
10791152
l.buffer(r, r)
10801153
return noToken, l.lexOctalNumber, nil
1081-
case r == rFracSep:
1154+
case l.Flags.none(LexNoRationals) && r == rFracSep:
10821155
l.buffer(r, r)
10831156
return noToken, l.lexRationalDenomInitial, nil
1084-
case r == 'b' || r == 'B':
1157+
case allowBaseInts && (r == 'b' || r == 'B'):
10851158
l.buffer(r, -1)
10861159
return noToken, l.lexNoTerminate(l.lexBinNum, "binary digit"), nil
1087-
case r == 'x' || r == 'X':
1160+
case allowBaseInts && (r == 'x' || r == 'X'):
10881161
l.buffer(r, -1)
10891162
return noToken, l.lexNoTerminate(l.lexHexNum, "hex digit"), nil
1090-
case r == rDot:
1163+
case !l.Flags.all(LexNoDurations|LexNoFloats) && r == rDot:
1164+
// Continue parsing here unless both floats and durations are disabled
10911165
l.buffer(r, r)
10921166
return noToken, l.lexFloatPointInitial, nil
1093-
case isIntervalInitial(r):
1167+
case l.Flags.none(LexNoDurations) && isIntervalInitial(r):
10941168
return l.lexIntervalConsumer(r)
1095-
case r == 'E' || r == 'e':
1169+
case l.Flags.none(LexNoFloats) && (r == 'E' || r == 'e'):
10961170
l.buffer(r, r)
10971171
return noToken, l.lexFloatExponentUnsigned, nil
10981172
case isBarewordTransition(r):
@@ -1123,12 +1197,12 @@ func (l *Lexer) lexNonZero(r rune) (Token, consumerFunc, error) {
11231197
case isDecimal(r):
11241198
l.buffer(r, r)
11251199
return noToken, l.lexNonZero, nil
1126-
case isIntervalInitial(r):
1200+
case l.Flags.none(LexNoDurations) && isIntervalInitial(r):
11271201
return l.lexIntervalConsumer(r)
11281202
}
11291203

1130-
switch r {
1131-
case rBaseSep:
1204+
switch {
1205+
case l.Flags.none(LexNoBaseInts) && r == rBaseSep:
11321206
l.buffer(r, -1)
11331207

11341208
str := l.strbuf.String()
@@ -1143,13 +1217,13 @@ func (l *Lexer) lexNonZero(r rune) (Token, consumerFunc, error) {
11431217

11441218
l.strbuf.Reset()
11451219
return noToken, l.lexBaseNumber(neg, base), nil
1146-
case rFracSep:
1220+
case l.Flags.none(LexNoRationals) && r == rFracSep:
11471221
l.buffer(r, r)
11481222
return noToken, l.lexRationalDenomInitial, nil
1149-
case rDot:
1223+
case !l.Flags.all(LexNoDurations|LexNoFloats) && r == rDot:
11501224
l.buffer(r, r)
11511225
return noToken, l.lexFloatPointInitial, nil
1152-
case 'E', 'e':
1226+
case l.Flags.none(LexNoFloats) && (r == 'E' || r == 'e'):
11531227
l.buffer(r, r)
11541228
return noToken, l.lexFloatExponentUnsigned, nil
11551229
}
@@ -1375,7 +1449,7 @@ func (l *Lexer) lexSpecial(r rune) (Token, consumerFunc, error) {
13751449
switch {
13761450
case r == rCurlOpen:
13771451
return l.token(TMapOpen, false), l.lexSegment, nil
1378-
case r == rRegexpOpen:
1452+
case r == rRegexpOpen && l.Flags.none(LexNoRegexps):
13791453
l.buffer(rSpecial, -1)
13801454
l.buffer(r, -1)
13811455
return noToken, l.lexRegexp, nil
@@ -1431,3 +1505,20 @@ func (l *Lexer) lexRegexp(r rune) (Token, consumerFunc, error) {
14311505
l.buffer(r, r)
14321506
return noToken, l.lexRegexp, nil
14331507
}
1508+
1509+
func wordToBool(tok Token) Token {
1510+
if tok.Kind != TWord {
1511+
return tok
1512+
}
1513+
s, ok := tok.Value.(string)
1514+
if !ok {
1515+
return tok
1516+
}
1517+
switch s {
1518+
case "TRUE", "True", "true", "YES", "Yes", "yes":
1519+
tok.Kind, tok.Value = TBoolean, true
1520+
case "FALSE", "False", "false", "NO", "No", "no":
1521+
tok.Kind, tok.Value = TBoolean, false
1522+
}
1523+
return tok
1524+
}

0 commit comments

Comments
 (0)