-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcombined.py
More file actions
87 lines (66 loc) · 2.99 KB
/
combined.py
File metadata and controls
87 lines (66 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
**Experimental** combined parser. Supports EDTF, Gregorian, Hebrew, and Hijri
where dates are unambiguous. Year-only dates are parsed as EDTF in
Gregorian calendar.
"""
from typing import Union
from lark import Lark
from lark.exceptions import UnexpectedInput
from lark.visitors import Transformer, merge_transformers
from undate import Undate, UndateInterval
from undate.converters import BaseDateConverter, GRAMMAR_FILE_PATH
from undate.converters.edtf.transformer import EDTFTransformer
from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer
from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
from undate.converters.calendars.islamic.transformer import IslamicDateTransformer
class CombinedDateTransformer(Transformer):
def start(self, children):
# trigger the transformer for the appropriate part of the grammar
return children
# NOTE: currently year-only dates in combined parser are interpreted as
# EDTF and use Gregorian calendar.
# In future, we could refine by adding calendar names & abbreviations
# to the parser in order to recognize years from other calendars.
combined_transformer = merge_transformers(
CombinedDateTransformer(),
edtf=EDTFTransformer(),
hebrew=HebrewDateTransformer(),
islamic=IslamicDateTransformer(),
gregorian=GregorianDateTransformer(),
)
# open based on filename so we can specify relative import path based on grammar file
parser = Lark.open(
str(GRAMMAR_FILE_PATH / "combined.lark"), rel_to=__file__, strict=True
)
class OmnibusDateConverter(BaseDateConverter):
"""
Combination parser that aggregates existing parser grammars.
Currently supports EDTF, Gregorian, Hebrew, and Hijri where dates are unambiguous.
(Year-only dates are parsed as EDTF in Gregorian calendar.)
Does not support serialization.
Example usage::
Undate.parse("Tammuz 4816", "omnibus")
"""
#: converter name: omnibus
name: str = "omnibus"
def __init__(self):
self.transformer = combined_transformer
def parse(self, value: str) -> Union[Undate, UndateInterval]:
"""
Parse a string in a supported format and return an :class:`~undate.undate.Undate`
or :class:`~undate.undate.UndateInterval`.
"""
if not value:
raise ValueError("Parsing empty/unset string is not supported")
# parse the input string, then transform to undate object
try:
parsetree = parser.parse(value)
# transform returns a list; we want the first item in the list
return self.transformer.transform(parsetree)[0]
except UnexpectedInput:
raise ValueError(
"Parsing failed: '%s' is not in a recognized date format" % value
)
def to_string(self, undate: Union[Undate, UndateInterval]) -> str:
"Not supported by this converter. Will raise :class:`ValueError`"
raise ValueError("Omnibus converter does not support serialization")