Skip to content

Commit 6f19388

Browse files
authored
Merge pull request #55 from artshumrc/12_exponential_year_precision
Significant Digits
2 parents 7e15e89 + 5883f53 commit 6f19388

4 files changed

Lines changed: 168 additions & 39 deletions

File tree

README.md

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ The object returned by `parse_edtf()` is an instance of an `edtf.parser.parser_c
7272
PartialUnspecified
7373
OneOfASet
7474
MultipleDates
75-
MaskedPrecision
7675
Level2Interval
7776
Level2Season
7877
ExponentialYear
@@ -139,9 +138,8 @@ Test coverage includes every example given in the spec table of features.
139138

140139
* Partial uncertain/approximate:
141140

142-
>>> parse_edtf('(2011)-06-04~') # year certain, month/day approximate.
143-
# Note that the result text is normalized
144-
PartialUncertainOrApproximate: '2011-(06-04)~'
141+
>>> parse_edtf('2004-06~-11') # year certain, month/day approximate.
142+
PartialUncertainOrApproximate: '2004-06~-11'
145143

146144
* Partial unspecified:
147145

@@ -158,20 +156,44 @@ Test coverage includes every example given in the spec table of features.
158156
>>> parse_edtf('{1667,1668, 1670..1672}')
159157
MultipleDates: '{1667, 1668, 1670..1672}'
160158

161-
* Masked precision:
162-
163-
>>> parse_edtf('197x') # A date in the 1970s.
164-
MaskedPrecision: '197x'
165-
166159
* Level 2 Extended intervals:
167160

168-
>>> parse_edtf('2004-06-(01)~/2004-06-(20)~')
169-
Level2Interval: '2004-06-(01)~/2004-06-(20)~'
161+
>>> parse_edtf('2004-06-~01/2004-06-~20')
162+
Level2Interval: '2004-06-~01/2004-06-~20'
170163

171164
* Year requiring more than 4 digits - exponential form:
172165

173-
>>> parse_edtf('Y-17e7')
174-
ExponentialYear: 'Y-17e7'
166+
>>> e = parse_edtf('Y-17E7')
167+
ExponentialYear: 'Y-17E7'
168+
>>> e.estimated()
169+
-170000000
170+
171+
* Significant digits:
172+
# '1950S2': some year between 1900 and 1999, estimated to be 1950
173+
>>> d = parse_edtf('1950S2')
174+
Date: '1950S2'
175+
>>> d.lower_fuzzy()[:3]
176+
(1900, 1, 1)
177+
>>> d.upper_fuzzy()[:3]
178+
(1999, 12, 31)
179+
# 'Y171010000S3': some year between some year between 171000000 and 171999999 estimated to be 171010000, with 3 significant digits.
180+
>>> l = parse_edtf('Y171010000S3')
181+
LongYear: 'Y171010000S3'
182+
>>> l.estimated()
183+
171010000
184+
>>> l.lower_fuzzy()[:3]
185+
(171000000, 1, 1)
186+
>>> l.upper_fuzzy()[:3]
187+
(171999999, 12, 31)
188+
# 'Y3388E2S3': some year in exponential notation between 338000 and 338999, estimated to be 338800
189+
>>> e = parse_edtf('Y3388E2S3')
190+
ExponentialYear: 'Y3388E2S3S3'
191+
>>> e.estimated()
192+
338800
193+
>>> e.lower_fuzzy()[:3]
194+
(338000, 1, 1)
195+
>>> e.upper_fuzzy()[:3]
196+
(338999, 12, 31)
175197

176198
### Natural language representation
177199

edtf/parser/grammar.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@
4848
oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)])
4949
zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)])
5050

51-
positiveDigit = Word(nums, exact=1, excludeChars="0")
5251
digit = Word(nums, exact=1)
52+
positiveDigit = Word(nums, exact=1, excludeChars="0")
53+
positiveInteger = Combine(positiveDigit + ZeroOrMore(digit))
5354

5455
second = zeroThru59
5556
minute = zeroThru59
@@ -63,13 +64,18 @@
6364
^ (L("02")("month") + "-" + oneThru29("day"))
6465
)
6566

67+
# Significant digits suffix
68+
significantDigits = "S" + Word(nums)("significant_digits")
69+
6670
# 4 digits, 0 to 9
6771
positiveYear = Word(nums, exact=4)
6872

6973
# Negative version of positive year, but "-0000" is illegal
7074
negativeYear = NotAny(L("-0000")) + ("-" + positiveYear)
7175

72-
year = Combine(positiveYear ^ negativeYear)("year")
76+
year = Combine(positiveYear ^ negativeYear)("year") + Optional(significantDigits)
77+
# simple version for Consecutives
78+
year_basic = Combine(positiveYear ^ negativeYear)("year")
7379

7480
yearMonth = year + "-" + month
7581
yearMonthDay = year + "-" + monthDay # o hai iso date
@@ -112,9 +118,13 @@
112118

113119
# (* *** Long Year - Simple Form *** *)
114120

115-
longYearSimple = "Y" + Combine(
116-
Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit)
117-
)("year")
121+
longYearSimple = (
122+
"Y"
123+
+ Combine(Optional("-") + positiveDigit + digit + digit + digit + OneOrMore(digit))(
124+
"year"
125+
)
126+
+ Optional(significantDigits)
127+
)
118128
LongYear.set_parser(longYearSimple)
119129

120130
# (* *** L1Interval *** *)
@@ -238,13 +248,12 @@ def f(toks):
238248
seasonQualified = season + "^" + seasonQualifier
239249

240250
# (* ** Long Year - Scientific Form ** *)
241-
positiveInteger = Combine(positiveDigit + ZeroOrMore(digit))
242251
longYearScientific = (
243252
"Y"
244253
+ Combine(Optional("-") + positiveInteger)("base")
245254
+ "E"
246255
+ positiveInteger("exponent")
247-
+ Optional("S" + positiveInteger("precision"))
256+
+ Optional(significantDigits)
248257
)
249258
ExponentialYear.set_parser(longYearScientific)
250259

@@ -260,15 +269,13 @@ def f(toks):
260269
)
261270
Level2Interval.set_parser(level2Interval)
262271

263-
# (* ** Masked precision ** *) eliminated in latest specs
264-
# maskedPrecision = Combine(digit + digit + ((digit + "x") ^ "xx"))("year")
265-
# MaskedPrecision.set_parser(maskedPrecision)
266-
267272
# (* ** Inclusive list and choice list** *)
268273
consecutives = (
269274
(yearMonthDay("lower") + ".." + yearMonthDay("upper"))
270275
^ (yearMonth("lower") + ".." + yearMonth("upper"))
271-
^ (year("lower") + ".." + year("upper"))
276+
^ (
277+
year_basic("lower") + ".." + year_basic("upper")
278+
) # using year_basic because some tests were throwing `'list' object has no attribute 'expandtabs'` - somewhere, pyparsing.parse_string() was being passed a list
272279
)
273280
Consecutives.set_parser(consecutives)
274281

edtf/parser/parser_classes.py

Lines changed: 97 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,9 @@ def get_month(self):
261261

262262
month = property(get_month, set_month)
263263

264-
def __init__(self, year=None, month=None, day=None, **kwargs):
264+
def __init__(
265+
self, year=None, month=None, day=None, significant_digits=None, **kwargs
266+
):
265267
for param in ("date", "lower", "upper"):
266268
if param in kwargs:
267269
self.__init__(**kwargs[param])
@@ -270,13 +272,18 @@ def __init__(self, year=None, month=None, day=None, **kwargs):
270272
self.year = year # Year is required, but sometimes passed in as a 'date' dict.
271273
self.month = month
272274
self.day = day
275+
self.significant_digits = (
276+
int(significant_digits) if significant_digits else None
277+
)
273278

274279
def __str__(self):
275280
r = self.year
276281
if self.month:
277282
r += f"-{self.month}"
278283
if self.day:
279284
r += f"-{self.day}"
285+
if self.significant_digits:
286+
r += f"S{self.significant_digits}"
280287
return r
281288

282289
def isoformat(self, default=date.max):
@@ -286,6 +293,36 @@ def isoformat(self, default=date.max):
286293
int(self.day or default.day),
287294
)
288295

296+
def lower_fuzzy(self):
297+
if not hasattr(self, "significant_digits") or not self.significant_digits:
298+
return apply_delta(
299+
sub, self.lower_strict(), self._get_fuzzy_padding(EARLIEST)
300+
)
301+
else:
302+
total_digits = len(self.year)
303+
insignificant_digits = total_digits - self.significant_digits
304+
lower_year = (
305+
int(self.year)
306+
// (10**insignificant_digits)
307+
* (10**insignificant_digits)
308+
)
309+
return struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)
310+
311+
def upper_fuzzy(self):
312+
if not hasattr(self, "significant_digits") or not self.significant_digits:
313+
return apply_delta(
314+
add, self.upper_strict(), self._get_fuzzy_padding(LATEST)
315+
)
316+
else:
317+
total_digits = len(self.year)
318+
insignificant_digits = total_digits - self.significant_digits
319+
upper_year = (int(self.year) // (10**insignificant_digits) + 1) * (
320+
10**insignificant_digits
321+
) - 1
322+
return struct_time(
323+
[upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS
324+
)
325+
289326
def _precise_year(self, lean):
290327
# Replace any ambiguous characters in the year string with 0s or 9s
291328
if lean == EARLIEST:
@@ -337,6 +374,9 @@ def precision(self):
337374
return PRECISION_MONTH
338375
return PRECISION_YEAR
339376

377+
def estimated(self):
378+
return self._precise_year(EARLIEST)
379+
340380

341381
class DateAndTime(EDTFObject):
342382
def __init__(self, date, time):
@@ -537,11 +577,17 @@ def _get_fuzzy_padding(self, lean):
537577

538578

539579
class LongYear(EDTFObject):
540-
def __init__(self, year):
580+
def __init__(self, year, significant_digits=None):
541581
self.year = year
582+
self.significant_digits = (
583+
int(significant_digits) if significant_digits else None
584+
)
542585

543586
def __str__(self):
544-
return f"Y{self.year}"
587+
if self.significant_digits:
588+
return f"Y{self.year}S{self.significant_digits}"
589+
else:
590+
return f"Y{self.year}"
545591

546592
def _precise_year(self):
547593
return int(self.year)
@@ -553,6 +599,45 @@ def _strict_date(self, lean):
553599
else:
554600
return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS)
555601

602+
def estimated(self):
603+
return self._precise_year()
604+
605+
def lower_fuzzy(self):
606+
full_year = self._precise_year()
607+
strict_val = self.lower_strict()
608+
if not self.significant_digits:
609+
return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
610+
else:
611+
insignificant_digits = len(str(full_year)) - int(self.significant_digits)
612+
if insignificant_digits <= 0:
613+
return apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
614+
padding_value = 10**insignificant_digits
615+
sig_digits = full_year // padding_value
616+
lower_year = sig_digits * padding_value
617+
return apply_delta(
618+
sub,
619+
struct_time([lower_year, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS),
620+
self._get_fuzzy_padding(EARLIEST),
621+
)
622+
623+
def upper_fuzzy(self):
624+
full_year = self._precise_year()
625+
strict_val = self.upper_strict()
626+
if not self.significant_digits:
627+
return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
628+
else:
629+
insignificant_digits = len(str(full_year)) - self.significant_digits
630+
if insignificant_digits <= 0:
631+
return apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
632+
padding_value = 10**insignificant_digits
633+
sig_digits = full_year // padding_value
634+
upper_year = (sig_digits + 1) * padding_value - 1
635+
return apply_delta(
636+
add,
637+
struct_time([upper_year, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS),
638+
self._get_fuzzy_padding(LATEST),
639+
)
640+
556641

557642
class Season(Date):
558643
def __init__(self, year, season, **kwargs):
@@ -806,10 +891,6 @@ def _strict_date(self, lean):
806891
return min([x._strict_date(lean) for x in self.objects])
807892

808893

809-
class MaskedPrecision(Date):
810-
pass
811-
812-
813894
class Level2Interval(Level1Interval):
814895
def __init__(self, lower, upper):
815896
# Check whether incoming lower/upper values are single-item lists, and
@@ -831,18 +912,23 @@ class Level2Season(Season):
831912

832913

833914
class ExponentialYear(LongYear):
834-
def __init__(self, base, exponent, precision=None):
915+
def __init__(self, base, exponent, significant_digits=None):
835916
self.base = base
836917
self.exponent = exponent
837-
self.precision = precision
918+
self.significant_digits = (
919+
int(significant_digits) if significant_digits else None
920+
)
838921

839922
def _precise_year(self):
840923
return int(self.base) * 10 ** int(self.exponent)
841924

842925
def get_year(self):
843-
if self.precision:
844-
return f"{self.base}E{self.exponent}S{self.precision}"
926+
if self.significant_digits:
927+
return f"{self.base}E{self.exponent}S{self.significant_digits}"
845928
else:
846929
return f"{self.base}E{self.exponent}"
847930

848931
year = property(get_year)
932+
933+
def estimated(self):
934+
return self._precise_year()

edtf/parser/tests.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
# where the first value is a tuple, the second item is a tuple of the normalised parse result.
1515
#
1616
# The values in the second tuple indicate the iso versions of the derived Python `date`s.
17-
# - If there's one other value, all the derived dates should be the same.
18-
# - If there're two other values, then all the lower values should be the same
17+
# - If there is one other value, all the derived dates should be the same.
18+
# - If there are two other values, then all the lower values should be the same
1919
# and all the upper values should be the same.
2020
# - If there are three other values, then the upper and lower ``_strict`` values
2121
# should be the first value, and the upper and lower ``_fuzzy`` values should be
@@ -193,8 +193,22 @@
193193
# the year -170000000
194194
("Y-17E7", ("-170000000-01-01", "-170000000-12-31")),
195195
# L2 significant digits
196+
# Some year between 1900 and 1999, estimated to be 1950
197+
("1950S2", ("1950-01-01", "1950-12-31", "1900-01-01", "1999-12-31")),
198+
("1953S2", ("1953-01-01", "1953-12-31", "1900-01-01", "1999-12-31")),
199+
("1953S3", ("1953-01-01", "1953-12-31", "1950-01-01", "1959-12-31")),
196200
# Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.)
197-
# ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')),
201+
(
202+
"Y17101E4S3",
203+
("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"),
204+
),
205+
# Some year between 338000 and 338999, estimated to be 338800
206+
("Y3388E2S3", ("338800-01-01", "338800-12-31", "338000-01-01", "338999-12-31")),
207+
# some year between 171000000 and 171999999 estimated to be 171010000
208+
(
209+
"Y171010000S3",
210+
("171010000-01-01", "171010000-12-31", "171000000-01-01", "171999999-12-31"),
211+
),
198212
# L2 Seasons
199213
# Spring southern hemisphere, 2001
200214
("2001-29", ("2001-09-01", "2001-11-30")),

0 commit comments

Comments
 (0)