Skip to content

Commit 5883f53

Browse files
committed
Significant digits updates
- Adds functionality for significant digits to Date, LongYear, and ExponentialYear - Updates the tests for significant digits - Updates the docs for significant digits and a few other references to old syntax (lowercase e, grouping) - ExponentialYear inherits from LongYear so only need to add it there; LongYear does not inherit from Date, so a bit of code duplication in the _fuzzy() overrides
1 parent 6b3a9d4 commit 5883f53

3 files changed

Lines changed: 124 additions & 16 deletions

File tree

README.md

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,8 @@ Test coverage includes every example given in the spec table of features.
138138

139139
* Partial uncertain/approximate:
140140

141-
>>> parse_edtf('(2011)-06-04~') # year certain, month/day approximate.
142-
# Note that the result text is normalized
143-
PartialUncertainOrApproximate: '2011-(06-04)~'
141+
>>> parse_edtf('2004-06~-11') # year certain, month/day approximate.
142+
PartialUncertainOrApproximate: '2004-06~-11'
144143

145144
* Partial unspecified:
146145

@@ -159,13 +158,42 @@ Test coverage includes every example given in the spec table of features.
159158

160159
* Level 2 Extended intervals:
161160

162-
>>> parse_edtf('2004-06-(01)~/2004-06-(20)~')
163-
Level2Interval: '2004-06-(01)~/2004-06-(20)~'
161+
>>> parse_edtf('2004-06-~01/2004-06-~20')
162+
Level2Interval: '2004-06-~01/2004-06-~20'
164163

165164
* Year requiring more than 4 digits - exponential form:
166165

167-
>>> parse_edtf('Y-17e7')
168-
ExponentialYear: 'Y-17e7'
166+
>>> e = parse_edtf('Y-17E7')
167+
ExponentialYear: 'Y-17E7'
168+
>>> e.estimated()
169+
-170000000
170+
171+
* Significant digits:
172+
# '1950S2': some year between 1900 and 1999, estimated to be 1950
173+
>>> d = parse_edtf('1950S2')
174+
Date: '1950S2'
175+
>>> d.lower_fuzzy()[:3]
176+
(1900, 1, 1)
177+
>>> d.upper_fuzzy()[:3]
178+
(1999, 12, 31)
179+
# 'Y171010000S3': some year between some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits.
180+
>>> l = parse_edtf('Y171010000S3')
181+
LongYear: 'Y171010000S3'
182+
>>> l.estimated()
183+
171010000
184+
>>> l.lower_fuzzy()[:3]
185+
(171000000, 1, 1)
186+
>>> l.upper_fuzzy()[:3]
187+
(171999999, 12, 31)
188+
# 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800
189+
>>> e = parse_edtf('Y3388E2S3')
190+
ExponentialYear: 'Y3388E2S3S3'
191+
>>> e.estimated()
192+
338800
193+
>>> e.lower_fuzzy()[:3]
194+
(338000, 1, 1)
195+
>>> e.upper_fuzzy()[:3]
196+
(338999, 12, 31)
169197

170198
### Natural language representation
171199

edtf/parser/parser_classes.py

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,9 @@ def __init__(
272272
self.year = year # Year is required, but sometimes passed in as a 'date' dict.
273273
self.month = month
274274
self.day = day
275-
self.significant_digits = significant_digits
275+
self.significant_digits = (
276+
int(significant_digits) if significant_digits else None
277+
)
276278

277279
def __str__(self):
278280
r = self.year
@@ -291,6 +293,36 @@ def isoformat(self, default=date.max):
291293
int(self.day or default.day),
292294
)
293295

296+
def lower_fuzzy(self):
297+
if not hasattr(self, "significant_digits") or not self.significant_digits:
298+
return apply_delta(
299+
sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST)
300+
)
301+
else:
302+
total_digits = len(self.year)
303+
insignificant_digits = total_digits - self.significant_digits
304+
lower_year = (
305+
int(self.year)
306+
// (10**insignificant_digits)
307+
* (10**insignificant_digits)
308+
)
309+
return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)
310+
311+
def upper_fuzzy(self):
312+
if not hasattr(self, "significant_digits") or not self.significant_digits:
313+
return apply_delta(
314+
add, self.upper_strict(), self._get_fuzzy_padding(LATEST)
315+
)
316+
else:
317+
total_digits = len(self.year)
318+
insignificant_digits = total_digits - self.significant_digits
319+
upper_year = (int(self.year) // (10**insignificant_digits) + 1) * (
320+
10**insignificant_digits
321+
) - 1
322+
return struct_time(
323+
[upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS
324+
)
325+
294326
def _precise_year(self, lean):
295327
# Replace any ambiguous characters in the year string with 0s or 9s
296328
if lean == EARLIEST:
@@ -547,7 +579,9 @@ def _get_fuzzy_padding(self, lean):
547579
class LongYear(EDTFObject):
548580
def __init__(self, year, significant_digits=None):
549581
self.year = year
550-
self.significant_digits = significant_digits
582+
self.significant_digits = (
583+
int(significant_digits) if significant_digits else None
584+
)
551585

552586
def __str__(self):
553587
if self.significant_digits:
@@ -568,6 +602,42 @@ def _strict_date(self, lean):
568602
def estimated(self):
569603
return self._precise_year()
570604

605+
def lower_fuzzy(self):
606+
full_year = self._precise_year()
607+
strict_val = self.lower_strict()
608+
if not self.significant_digits:
609+
return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
610+
else:
611+
insignificant_digits = len(str(full_year)) - int(self.significant_digits)
612+
if insignificant_digits <= 0:
613+
return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
614+
padding_value = 10**insignificant_digits
615+
sig_digits = full_year // padding_value
616+
lower_year = sig_digits * padding_value
617+
return apply_delta(
618+
sub,
619+
struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS),
620+
self._get_fuzzy_padding(EARLIEST),
621+
)
622+
623+
def upper_fuzzy(self):
624+
full_year = self._precise_year()
625+
strict_val = self.upper_strict()
626+
if not self.significant_digits:
627+
return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
628+
else:
629+
insignificant_digits = len(str(full_year)) - self.significant_digits
630+
if insignificant_digits <= 0:
631+
return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
632+
padding_value = 10**insignificant_digits
633+
sig_digits = full_year // padding_value
634+
upper_year = (sig_digits + 1) * padding_value - 1
635+
return apply_delta(
636+
add,
637+
struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS),
638+
self._get_fuzzy_padding(LATEST),
639+
)
640+
571641

572642
class Season(Date):
573643
def __init__(self, year, season, **kwargs):
@@ -845,7 +915,9 @@ class ExponentialYear(LongYear):
845915
def __init__(self, base, exponent, significant_digits=None):
846916
self.base = base
847917
self.exponent = exponent
848-
self.significant_digits = significant_digits
918+
self.significant_digits = (
919+
int(significant_digits) if significant_digits else None
920+
)
849921

850922
def _precise_year(self):
851923
return int(self.base) * 10 ** int(self.exponent)

edtf/parser/tests.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
# where the first value is a tuple, the second item is a tuple of the normalised parse result.
1515
#
1616
# The values in the second tuple indicate the iso versions of the derived Python `date`s.
17-
# - If there's one other value, all the derived dates should be the same.
18-
# - If there're two other values, then all the lower values should be the same
17+
# - If there is one other value, all the derived dates should be the same.
18+
# - If there are two other values, then all the lower values should be the same
1919
# and all the upper values should be the same.
2020
# - If there are three other values, then the upper and lower ``_strict`` values
2121
# should be the first value, and the upper and lower ``_fuzzy`` values should be
@@ -194,13 +194,21 @@
194194
("Y-17E7", ("-170000000-01-01", "-170000000-12-31")),
195195
# L2 significant digits
196196
# Some year between 1900 and 1999, estimated to be 1950
197-
("1950S2", ("1900-01-01", "1999-12-31")),
197+
("1950S2", ("1950-01-01", "1950-12-31", "1900-01-01", "1999-12-31")),
198+
("1953S2", ("1953-01-01", "1953-12-31", "1900-01-01", "1999-12-31")),
199+
("1953S3", ("1953-01-01", "1953-12-31", "1950-01-01", "1959-12-31")),
198200
# Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.)
199-
("Y17101E4S3", ("171000000-01-01", "171999999-12-31")),
201+
(
202+
"Y17101E4S3",
203+
("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"),
204+
),
200205
# Some year between 338000 and 338999, estimated to be 338800
201-
("Y3388E2S3", ("338000-01-01", "338999-12-31")),
206+
("Y3388E2S3", ("338800-01-01", "338800-12-31", "338000-01-01", "338999-12-31")),
202207
# some year between 171000000 and 171999999 estimated to be 171010000
203-
("Y171010000S3", ("171010000-01-01", "171999999-12-31")),
208+
(
209+
"Y171010000S3",
210+
("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"),
211+
),
204212
# L2 Seasons
205213
# Spring southern hemisphere, 2001
206214
("2001-29", ("2001-09-01", "2001-11-30")),

0 commit comments

Comments
 (0)