Skip to content

Commit bb1d724

Browse files
committed
Ignore commas and periods across all grammars
1 parent b9c2bf6 commit bb1d724

6 files changed

Lines changed: 24 additions & 17 deletions

File tree

src/undate/converters/calendars/hebrew/parser.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
grammar_path = GRAMMAR_FILE_PATH / "hebrew.lark"
66

7-
with open(grammar_path) as grammar:
8-
# NOTE: LALR parser is faster but can't be used to ambiguity between years and dates
9-
hebrew_parser = Lark(grammar.read(), start="hebrew_date", strict=True)
7+
# open based on filename to allow relative imports based on grammar file
8+
hebrew_parser = Lark.open(
9+
str(grammar_path), rel_to=__file__, start="hebrew_date", strict=True
10+
)

src/undate/converters/calendars/islamic/parser.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
grammar_path = GRAMMAR_FILE_PATH / "islamic.lark"
66

7-
with open(grammar_path) as grammar:
8-
# NOTE: LALR parser is faster but can't be used due to ambiguity between years and days
9-
islamic_parser = Lark(grammar.read(), start="islamic_date", strict=True)
7+
# open based on filename to allow relative imports based on grammar file
8+
islamic_parser = Lark.open(
9+
str(grammar_path), rel_to=__file__, start="islamic_date", strict=True
10+
)

src/undate/converters/grammars/combined.lark

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
%import common.WS
22
%ignore WS
33

4-
// Some abbreviations use periods; some default date formats
5-
// include commas. Ignore both. (Copied from gregorian.lark)
6-
PUNCTUATION: "." | ","
7-
%ignore PUNCTUATION
4+
// Ignore periods and commas in dates
5+
%import .undate_common.DATE_PUNCTUATION
6+
%ignore DATE_PUNCTUATION
87

98
start: (edtf__start | hebrew__hebrew_date | islamic__islamic_date | gregorian__gregorian_date )
109

src/undate/converters/grammars/gregorian.lark

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
%import common.WS
22
%ignore WS
33

4-
// Some abbreviations use periods; some default date formats
5-
// include commas. Ignore both
6-
PUNCTUATION: "." | ","
7-
%ignore PUNCTUATION
4+
// Ignore periods and commas in dates
5+
%import .undate_common.DATE_PUNCTUATION
6+
%ignore DATE_PUNCTUATION
87

98
%import .gregorian_multilang (month_1, month_2, month_3, month_4, month_5, \
109
month_6, month_7, month_8, month_9, month_10, month_11, month_12)

src/undate/converters/grammars/hebrew.lark

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
%import common.WS
22
%ignore WS
33

4+
// Ignore periods and commas in dates
5+
%import .undate_common.DATE_PUNCTUATION
6+
%ignore DATE_PUNCTUATION
7+
48
// only support day month year format for now
59
// parser requires numeric day and year to be distinguished based on order
6-
hebrew_date: weekday? day month comma? year | month year | year
10+
hebrew_date: weekday? day month year | month year | year
711

812
// TODO: handle date ranges?
913

@@ -31,8 +35,7 @@ month: month_1
3135
// months have 29 or 30 days; we do not expect leading zeroes
3236
day: /[1-9]/ | /[12][0-9]/ | /30/
3337

34-
comma: ","
35-
weekday: ("Monday" | "Tuesday" | "Wednesday" | "Thursday" | "Friday" | "Saturday" | "Sunday") comma?
38+
weekday: ("Monday" | "Tuesday" | "Wednesday" | "Thursday" | "Friday" | "Saturday" | "Sunday")
3639

3740

3841
// months, in order; from convertdate list

src/undate/converters/grammars/islamic.lark

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
%import common.WS
22
%ignore WS
33

4+
// Ignore periods and commas in dates
5+
%import .undate_common.DATE_PUNCTUATION
6+
%ignore DATE_PUNCTUATION
7+
48
// only support day month year format for now
59
// parser requires numeric day and year to be distinguished based on order
610
islamic_date: weekday? day month year | month year | year

0 commit comments

Comments
 (0)