Skip to content

Commit ef24bc7

Browse files
committed
Handle unspecified and qualified ("16XX~")
Unspecified dates previously could not handle qualification. Unspecified dates also couldn't handle dates with 3 unspecified digits ("1XXX"). This commit adds both those features and tests for those use cases.
1 parent d550274 commit ef24bc7

4 files changed

Lines changed: 102 additions & 4 deletions

File tree

edtf/appsettings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,13 @@
8787
PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1))
8888
PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1))
8989
PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12))
90+
PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10))
91+
PADDING_CENTURY_PRECISION = EDTF.get(
92+
"PADDING_CENTURY_PRECISION", relativedelta(years=100)
93+
)
94+
PADDING_MILLENNIUM_PRECISION = EDTF.get(
95+
"PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000)
96+
)
9097
MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0)
9198
MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0)
9299
MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0)

edtf/parser/grammar.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,17 +161,19 @@ def f(toks):
161161
Level1Interval.set_parser(level1Interval)
162162

163163
# (* *** unspecified *** *)
164-
yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year")
164+
yearWithOneOrTwoOrThreeUnspecifedDigits = Combine(
165+
digit + (digit ^ "X") + (digit ^ "X") + "X"
166+
)("year")
165167
monthUnspecified = year + "-" + L("XX")("month")
166168
dayUnspecified = yearMonth + "-" + L("XX")("day")
167169
dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day")
168170

169171
unspecified = (
170-
yearWithOneOrTwoUnspecifedDigits
172+
yearWithOneOrTwoOrThreeUnspecifedDigits
171173
^ monthUnspecified
172174
^ dayUnspecified
173175
^ dayAndMonthUnspecified
174-
)
176+
) + Optional(UASymbol)("ua")
175177
Unspecified.set_parser(unspecified)
176178

177179
# (* *** uncertainOrApproxDate *** *)

edtf/parser/parser_classes.py

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,89 @@ def precision(self):
541541

542542

543543
class Unspecified(Date):
544-
pass
544+
def __init__(
545+
self,
546+
year=None,
547+
month=None,
548+
day=None,
549+
significant_digits=None,
550+
ua=None,
551+
**kwargs,
552+
):
553+
for param in ("date", "lower", "upper"):
554+
if param in kwargs:
555+
self.__init__(**kwargs[param])
556+
return
557+
self.year = year # Year is required, but sometimes passed in as a 'date' dict.
558+
self.month = month
559+
self.day = day
560+
self.significant_digits = (
561+
int(significant_digits) if significant_digits else None
562+
)
563+
self.ua = ua if ua else None
564+
565+
def __str__(self):
566+
r = self.year
567+
if self.month:
568+
r += f"-{self.month}"
569+
if self.day:
570+
r += f"-{self.day}"
571+
if self.ua:
572+
r += str(self.ua)
573+
return r
574+
575+
def _get_fuzzy_padding(self, lean):
576+
if not self.ua:
577+
return relativedelta()
578+
multiplier = self.ua._get_multiplier()
579+
padding = relativedelta()
580+
581+
if self.year:
582+
if self.precision == PRECISION_MILLENIUM:
583+
padding += relativedelta(
584+
years=int(
585+
multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years
586+
)
587+
)
588+
elif self.precision == PRECISION_CENTURY:
589+
padding += relativedelta(
590+
years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years)
591+
)
592+
elif self.precision == PRECISION_DECADE:
593+
padding += relativedelta(
594+
years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years)
595+
)
596+
else:
597+
padding += relativedelta(
598+
years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years)
599+
)
600+
if self.month:
601+
padding += relativedelta(
602+
months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months)
603+
)
604+
if self.day:
605+
padding += relativedelta(
606+
days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days)
607+
)
608+
609+
return padding
610+
611+
@property
612+
def precision(self):
613+
if self.day:
614+
return PRECISION_DAY
615+
if self.month:
616+
return PRECISION_MONTH
617+
if self.year:
618+
if self.year.isdigit():
619+
return PRECISION_YEAR
620+
if len(self.year) == 4 and self.year.endswith("XXX"):
621+
return PRECISION_MILLENIUM
622+
if len(self.year) == 4 and self.year.endswith("XX"):
623+
return PRECISION_CENTURY
624+
if len(self.year) == 4 and self.year.endswith("X"):
625+
return PRECISION_DECADE
626+
raise ValueError(f"Unspecified date {self} has no precision")
545627

546628

547629
class Level1Interval(Interval):

edtf/parser/tests.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@
8585
("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")),
8686
("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")),
8787
("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")),
88+
# Unspecified and qualified
89+
# "circa 17th century"
90+
("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")),
91+
("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")),
92+
("1XXX", ("1000-01-01", "1999-12-31")),
93+
("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")),
94+
("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")),
8895
# L1 Extended Interval
8996
# beginning unknown, end 2006
9097
# for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)

0 commit comments

Comments
 (0)