-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathconverter.py
More file actions
105 lines (84 loc) · 3.93 KB
/
converter.py
File metadata and controls
105 lines (84 loc) · 3.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from typing import Optional, Union
from lark.exceptions import UnexpectedInput
from undate import Undate, UndateInterval
from undate.converters.base import BaseDateConverter
from undate.converters.edtf.parser import edtf_parser
from undate.converters.edtf.transformer import EDTFTransformer
from undate.date import DatePrecision
#: character for unspecified digits
EDTF_UNSPECIFIED_DIGIT: str = "X"
class EDTFDateConverter(BaseDateConverter):
"""
Converter for Extended Date/Time Format (EDTF).
Supports parsing and serializing dates and date ranges in EDTF format.
Does not support all of EDTF, and only supports dates and not times.
"""
#: converter name: EDTF
name: str = "EDTF"
def __init__(self):
self.transformer = EDTFTransformer()
def parse(self, value: str) -> Union[Undate, UndateInterval]:
"""
Parse a string in a supported EDTF date or date interval format and
return an :class:`~undate.undate.Undate` or
:class:`~undate.undate.UndateInterval`.
"""
if not value:
raise ValueError("Parsing empty/unset string is not supported")
# parse the input string, then transform to undate object
try:
parsetree = edtf_parser.parse(value)
return self.transformer.transform(parsetree)
except UnexpectedInput as err:
raise ValueError(
f"Parsing failed: '{value}' is not a supported EDTF date format"
) from err
def _convert_missing_digits(
self, value: Optional[str], old_missing_digit: str
) -> Optional[str]:
if value:
return value.replace(old_missing_digit, EDTF_UNSPECIFIED_DIGIT)
return None
def to_string(self, undate: Union[Undate, UndateInterval]) -> str:
"""
Convert an :class:`~undate.undate.Undate` or
:class:`~undate.undate.UndateInterval` to EDTF format.
"""
if isinstance(undate, Undate):
return self._undate_to_string(undate)
elif isinstance(undate, UndateInterval):
# NOTE: what is the difference between an open interval and unknown start/end?
# spec distinguishes between these, open is ".." but unknown is ""
start = self._undate_to_string(undate.earliest) if undate.earliest else ".."
end = self._undate_to_string(undate.latest) if undate.latest else ".."
return f"{start}/{end}"
def _undate_to_string(self, undate: Undate) -> str:
# in theory it's possible to use the parser and reconstruct using a tree,
# but that seems much more complicated and would be harder to read
parts = []
if undate.precision >= DatePrecision.YEAR:
year = self._convert_missing_digits(undate.year, undate.MISSING_DIGIT)
# years with more than 4 digits should be prefixed with Y
# (don't count minus sign when checking digits)
if year and len(year.lstrip("-")) > 4:
year = f"Y{year}"
# TODO: handle uncertain / approximate
parts.append(year or EDTF_UNSPECIFIED_DIGIT * 4)
# beware when we add more date precisions,
# week-level won't necessarily mean we know the month
if undate.precision >= DatePrecision.MONTH:
# TODO: handle uncertain / approximate
parts.append(
self._convert_missing_digits(undate.month, undate.MISSING_DIGIT)
or EDTF_UNSPECIFIED_DIGIT * 2
)
if undate.precision >= DatePrecision.DAY:
# TODO: handle uncertain / approximate
parts.append(
self._convert_missing_digits(undate.day, undate.MISSING_DIGIT)
or EDTF_UNSPECIFIED_DIGIT * 2
)
if parts:
return "-".join(parts)
# how can we have an empty string? probably shouldn't get here
raise ValueError("Failed to generate an EDTF string from %r", undate)