Skip to content

Commit 8c571b0

Browse files
authored
Merge pull request #58 from artshumrc/38_unspecified_and_qualified
Handle unspecified + qualified EDTF strings
2 parents d550274 + c14a57b commit 8c571b0

4 files changed

Lines changed: 164 additions & 4 deletions

File tree

edtf/appsettings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,13 @@
8787
PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1))
8888
PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1))
8989
PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12))
90+
PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10))
91+
PADDING_CENTURY_PRECISION = EDTF.get(
92+
"PADDING_CENTURY_PRECISION", relativedelta(years=100)
93+
)
94+
PADDING_MILLENNIUM_PRECISION = EDTF.get(
95+
"PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000)
96+
)
9097
MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0)
9198
MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0)
9299
MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0)

edtf/parser/grammar.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,17 +161,19 @@ def f(toks):
161161
Level1Interval.set_parser(level1Interval)
162162

163163
# (* *** unspecified *** *)
164-
yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year")
164+
yearWithOneOrTwoOrThreeUnspecifedDigits = Combine(
165+
Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X"
166+
)("year")
165167
monthUnspecified = year + "-" + L("XX")("month")
166168
dayUnspecified = yearMonth + "-" + L("XX")("day")
167169
dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day")
168170

169171
unspecified = (
170-
yearWithOneOrTwoUnspecifedDigits
172+
yearWithOneOrTwoOrThreeUnspecifedDigits
171173
^ monthUnspecified
172174
^ dayUnspecified
173175
^ dayAndMonthUnspecified
174-
)
176+
) + Optional(UASymbol)("ua")
175177
Unspecified.set_parser(unspecified)
176178

177179
# (* *** uncertainOrApproxDate *** *)

edtf/parser/parser_classes.py

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,148 @@ def precision(self):
541541

542542

543543
class Unspecified(Date):
544-
pass
544+
def __init__(
545+
self,
546+
year=None,
547+
month=None,
548+
day=None,
549+
significant_digits=None,
550+
ua=None,
551+
**kwargs,
552+
):
553+
super().__init__(
554+
year=year,
555+
month=month,
556+
day=day,
557+
significant_digits=significant_digits,
558+
**kwargs,
559+
)
560+
self.ua = ua
561+
self.negative = self.year.startswith("-")
562+
563+
def __str__(self):
564+
base = super().__str__()
565+
if self.ua:
566+
base += str(self.ua)
567+
return base
568+
569+
def _get_fuzzy_padding(self, lean):
570+
if not self.ua:
571+
return relativedelta()
572+
multiplier = self.ua._get_multiplier()
573+
padding = relativedelta()
574+
575+
if self.year:
576+
years_padding = self._years_padding(multiplier)
577+
padding += years_padding
578+
if self.month:
579+
padding += relativedelta(
580+
months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months)
581+
)
582+
if self.day:
583+
padding += relativedelta(
584+
days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days)
585+
)
586+
return padding
587+
588+
def _years_padding(self, multiplier):
589+
"""Calculate year padding based on the precision."""
590+
precision_settings = {
591+
PRECISION_MILLENIUM: appsettings.PADDING_MILLENNIUM_PRECISION.years,
592+
PRECISION_CENTURY: appsettings.PADDING_CENTURY_PRECISION.years,
593+
PRECISION_DECADE: appsettings.PADDING_DECADE_PRECISION.years,
594+
PRECISION_YEAR: appsettings.PADDING_YEAR_PRECISION.years,
595+
}
596+
years = precision_settings.get(self.precision, 0)
597+
return relativedelta(years=int(multiplier * years))
598+
599+
def lower_fuzzy(self):
600+
strict_val = (
601+
self.lower_strict()
602+
) # negative handled in the lower_strict() override
603+
adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
604+
return adjusted
605+
606+
def upper_fuzzy(self):
607+
strict_val = (
608+
self.upper_strict()
609+
) # negative handled in the upper_strict() override
610+
611+
adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
612+
return adjusted
613+
614+
def lower_strict(self):
615+
if self.negative:
616+
strict_val = self._strict_date(
617+
lean=LATEST
618+
) # gets the year right, but need to adjust day and month
619+
if self.precision in (
620+
PRECISION_YEAR,
621+
PRECISION_DECADE,
622+
PRECISION_CENTURY,
623+
PRECISION_MILLENIUM,
624+
):
625+
return struct_time(
626+
(strict_val.tm_year, 1, 1)
627+
+ tuple(TIME_EMPTY_TIME)
628+
+ tuple(TIME_EMPTY_EXTRAS)
629+
)
630+
elif self.precision == PRECISION_MONTH:
631+
return struct_time(
632+
(strict_val.tm_year, strict_val.tm_mon, 1)
633+
+ tuple(TIME_EMPTY_TIME)
634+
+ tuple(TIME_EMPTY_EXTRAS)
635+
)
636+
else:
637+
return strict_val
638+
else:
639+
return self._strict_date(lean=EARLIEST)
640+
641+
def upper_strict(self):
642+
if self.negative:
643+
strict_val = self._strict_date(lean=EARLIEST)
644+
if self.precision in (
645+
PRECISION_YEAR,
646+
PRECISION_DECADE,
647+
PRECISION_CENTURY,
648+
PRECISION_MILLENIUM,
649+
):
650+
return struct_time(
651+
(strict_val.tm_year, 12, 31)
652+
+ tuple(TIME_EMPTY_TIME)
653+
+ tuple(TIME_EMPTY_EXTRAS)
654+
)
655+
elif self.precision == PRECISION_MONTH:
656+
days_in_month = calendar.monthrange(
657+
strict_val.tm_year, strict_val.tm_mon
658+
)[1]
659+
return struct_time(
660+
(strict_val.tm_year, strict_val.tm_mon, days_in_month)
661+
+ tuple(TIME_EMPTY_TIME)
662+
+ tuple(TIME_EMPTY_EXTRAS)
663+
)
664+
else:
665+
return strict_val
666+
else:
667+
return self._strict_date(lean=LATEST)
668+
669+
@property
670+
def precision(self):
671+
if self.day:
672+
return PRECISION_DAY
673+
if self.month:
674+
return PRECISION_MONTH
675+
if self.year:
676+
year_no_symbol = self.year.lstrip("-")
677+
if year_no_symbol.isdigit():
678+
return PRECISION_YEAR
679+
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"):
680+
return PRECISION_MILLENIUM
681+
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"):
682+
return PRECISION_CENTURY
683+
if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"):
684+
return PRECISION_DECADE
685+
raise ValueError(f"Unspecified date {self} has no precision")
545686

546687

547688
class Level1Interval(Interval):

edtf/parser/tests.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,20 @@
8181
("1999-01-XX", ("1999-01-01", "1999-01-31")),
8282
# some day in 1999
8383
("1999-XX-XX", ("1999-01-01", "1999-12-31")),
84+
# negative unspecified year
85+
("-01XX", ("-0199-01-01", "-0100-12-31")),
8486
# Uncertain/Approximate lower boundary dates (BCE)
8587
("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")),
8688
("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")),
8789
("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")),
90+
# Unspecified and qualified
91+
# "circa 17th century"
92+
("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")),
93+
("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")),
94+
("1XXX", ("1000-01-01", "1999-12-31")),
95+
("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")),
96+
("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")),
97+
("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "0000-12-31")),
8898
# L1 Extended Interval
8999
# beginning unknown, end 2006
90100
# for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)

0 commit comments

Comments
 (0)