Skip to content

Commit b53df4a

Browse files
committed
Handle negative unspecified and negative unspecified + qualified
Requires quite a few overrides of lower_ and upper_ range methods to properly handle dates due to padding working in the opposite direction for negative dates, esp when combined with month/day padding.
1 parent ef24bc7 commit b53df4a

3 files changed

Lines changed: 201 additions & 30 deletions

File tree

edtf/parser/grammar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def f(toks):
162162

163163
# (* *** unspecified *** *)
164164
yearWithOneOrTwoOrThreeUnspecifedDigits = Combine(
165-
digit + (digit ^ "X") + (digit ^ "X") + "X"
165+
Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X"
166166
)("year")
167167
monthUnspecified = year + "-" + L("XX")("month")
168168
dayUnspecified = yearMonth + "-" + L("XX")("day")

edtf/parser/parser_classes.py

Lines changed: 197 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -561,16 +561,13 @@ def __init__(
561561
int(significant_digits) if significant_digits else None
562562
)
563563
self.ua = ua if ua else None
564+
self.negative = self.year.startswith("-")
564565

565566
def __str__(self):
566-
r = self.year
567-
if self.month:
568-
r += f"-{self.month}"
569-
if self.day:
570-
r += f"-{self.day}"
567+
base = super().__str__()
571568
if self.ua:
572-
r += str(self.ua)
573-
return r
569+
base += str(self.ua)
570+
return base
574571

575572
def _get_fuzzy_padding(self, lean):
576573
if not self.ua:
@@ -579,24 +576,16 @@ def _get_fuzzy_padding(self, lean):
579576
padding = relativedelta()
580577

581578
if self.year:
582-
if self.precision == PRECISION_MILLENIUM:
583-
padding += relativedelta(
584-
years=int(
585-
multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years
586-
)
587-
)
588-
elif self.precision == PRECISION_CENTURY:
589-
padding += relativedelta(
590-
years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years)
591-
)
592-
elif self.precision == PRECISION_DECADE:
593-
padding += relativedelta(
594-
years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years)
595-
)
596-
else:
597-
padding += relativedelta(
598-
years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years)
599-
)
579+
year_no_symbol = self.year.lstrip("-")
580+
years_padding = self._calculate_years_padding(multiplier, year_no_symbol)
581+
# Reverse the padding for negative years and earliest calculations
582+
# if self.negative:
583+
# years_padding = -years_padding if lean == EARLIEST else years_padding
584+
# else:
585+
# years_padding = years_padding if lean == EARLIEST else -years_padding
586+
587+
padding += years_padding
588+
600589
if self.month:
601590
padding += relativedelta(
602591
months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months)
@@ -608,20 +597,199 @@ def _get_fuzzy_padding(self, lean):
608597

609598
return padding
610599

600+
def _calculate_years_padding(self, multiplier, year_no_symbol):
601+
if self.precision == PRECISION_MILLENIUM:
602+
return relativedelta(
603+
years=int(multiplier * appsettings.PADDING_MILLENNIUM_PRECISION.years)
604+
)
605+
elif self.precision == PRECISION_CENTURY:
606+
return relativedelta(
607+
years=int(multiplier * appsettings.PADDING_CENTURY_PRECISION.years)
608+
)
609+
elif self.precision == PRECISION_DECADE:
610+
return relativedelta(
611+
years=int(multiplier * appsettings.PADDING_DECADE_PRECISION.years)
612+
)
613+
else:
614+
return relativedelta(
615+
years=int(multiplier * appsettings.PADDING_YEAR_PRECISION.years)
616+
)
617+
618+
def lower_fuzzy(self):
619+
time_empty_time_tuple = tuple(TIME_EMPTY_TIME)
620+
time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS)
621+
strict_val = (
622+
self.lower_strict()
623+
) # negative handled in the lower_strict() override
624+
625+
if self.negative:
626+
adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(LATEST))
627+
if (
628+
self.precision == PRECISION_YEAR
629+
or self.precision == PRECISION_DECADE
630+
or self.precision == PRECISION_CENTURY
631+
or self.precision == PRECISION_MILLENIUM
632+
):
633+
adjusted = struct_time(
634+
(adjusted.tm_year, 1, 1)
635+
+ time_empty_time_tuple
636+
+ time_empty_extras_tuple
637+
)
638+
elif self.precision == PRECISION_MONTH:
639+
adjusted = struct_time(
640+
(adjusted.tm_year, adjusted.tm_mon, 1)
641+
+ time_empty_time_tuple
642+
+ time_empty_extras_tuple
643+
)
644+
else:
645+
adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
646+
if (
647+
self.precision == PRECISION_YEAR
648+
or self.precision == PRECISION_DECADE
649+
or self.precision == PRECISION_CENTURY
650+
or self.precision == PRECISION_MILLENIUM
651+
):
652+
adjusted = struct_time(
653+
(adjusted.tm_year, 1, 1)
654+
+ time_empty_time_tuple
655+
+ time_empty_extras_tuple
656+
)
657+
elif self.precision == PRECISION_MONTH:
658+
days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[
659+
1
660+
]
661+
adjusted = struct_time(
662+
(adjusted.tm_year, adjusted.tm_mon, days_in_month)
663+
+ time_empty_time_tuple
664+
+ time_empty_extras_tuple
665+
)
666+
667+
return adjusted
668+
669+
def upper_fuzzy(self):
670+
time_empty_time_tuple = tuple(TIME_EMPTY_TIME)
671+
time_empty_extras_tuple = tuple(TIME_EMPTY_EXTRAS)
672+
strict_val = (
673+
self.upper_strict()
674+
) # negative handled in the upper_strict() override
675+
676+
if self.negative:
677+
adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
678+
if (
679+
self.precision == PRECISION_YEAR
680+
or self.precision == PRECISION_DECADE
681+
or self.precision == PRECISION_CENTURY
682+
or self.precision == PRECISION_MILLENIUM
683+
):
684+
adjusted = struct_time(
685+
(adjusted.tm_year, 12, 31)
686+
+ time_empty_time_tuple
687+
+ time_empty_extras_tuple
688+
)
689+
elif self.precision == PRECISION_MONTH:
690+
days_in_month = calendar.monthrange(adjusted.tm_year, adjusted.tm_mon)[
691+
1
692+
]
693+
adjusted = struct_time(
694+
(adjusted.tm_year, adjusted.tm_mon, days_in_month)
695+
+ time_empty_time_tuple
696+
+ time_empty_extras_tuple
697+
)
698+
else:
699+
adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
700+
if (
701+
self.precision == PRECISION_YEAR
702+
or self.precision == PRECISION_DECADE
703+
or self.precision == PRECISION_CENTURY
704+
or self.precision == PRECISION_MILLENIUM
705+
):
706+
adjusted = struct_time(
707+
(adjusted.tm_year, 12, 31)
708+
+ time_empty_time_tuple
709+
+ time_empty_extras_tuple
710+
)
711+
elif self.precision == PRECISION_MONTH:
712+
adjusted = struct_time(
713+
(adjusted.tm_year, adjusted.tm_mon, 1)
714+
+ time_empty_time_tuple
715+
+ time_empty_extras_tuple
716+
)
717+
718+
return adjusted
719+
720+
def lower_strict(self):
721+
if self.negative:
722+
strict_val = self._strict_date(
723+
lean=LATEST
724+
) # gets the year right, but need to adjust day and month
725+
if (
726+
self.precision == PRECISION_YEAR
727+
or self.precision == PRECISION_DECADE
728+
or self.precision == PRECISION_CENTURY
729+
or self.precision == PRECISION_MILLENIUM
730+
):
731+
return struct_time(
732+
(strict_val.tm_year, 1, 1)
733+
+ tuple(TIME_EMPTY_TIME)
734+
+ tuple(TIME_EMPTY_EXTRAS)
735+
)
736+
elif self.precision == PRECISION_MONTH:
737+
days_in_month = calendar.monthrange(
738+
strict_val.tm_year, strict_val.tm_mon
739+
)[1]
740+
return struct_time(
741+
(strict_val.tm_year, strict_val.tm_mon, days_in_month)
742+
+ tuple(TIME_EMPTY_TIME)
743+
+ tuple(TIME_EMPTY_EXTRAS)
744+
)
745+
else:
746+
return strict_val
747+
else:
748+
return self._strict_date(lean=EARLIEST)
749+
750+
def upper_strict(self):
751+
if self.negative:
752+
strict_val = self._strict_date(lean=EARLIEST)
753+
if (
754+
self.precision == PRECISION_YEAR
755+
or self.precision == PRECISION_DECADE
756+
or self.precision == PRECISION_CENTURY
757+
or self.precision == PRECISION_MILLENIUM
758+
):
759+
return struct_time(
760+
(strict_val.tm_year, 12, 31)
761+
+ tuple(TIME_EMPTY_TIME)
762+
+ tuple(TIME_EMPTY_EXTRAS)
763+
)
764+
elif self.precision == PRECISION_MONTH:
765+
days_in_month = calendar.monthrange(
766+
strict_val.tm_year, strict_val.tm_mon
767+
)[1]
768+
return struct_time(
769+
(strict_val.tm_year, strict_val.tm_mon, days_in_month)
770+
+ tuple(TIME_EMPTY_TIME)
771+
+ tuple(TIME_EMPTY_EXTRAS)
772+
)
773+
else:
774+
return strict_val
775+
else:
776+
return self._strict_date(lean=LATEST)
777+
611778
@property
612779
def precision(self):
613780
if self.day:
614781
return PRECISION_DAY
615782
if self.month:
616783
return PRECISION_MONTH
617784
if self.year:
618-
if self.year.isdigit():
785+
year_no_symbol = self.year.lstrip("-")
786+
if year_no_symbol.isdigit():
619787
return PRECISION_YEAR
620-
if len(self.year) == 4 and self.year.endswith("XXX"):
788+
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"):
621789
return PRECISION_MILLENIUM
622-
if len(self.year) == 4 and self.year.endswith("XX"):
790+
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"):
623791
return PRECISION_CENTURY
624-
if len(self.year) == 4 and self.year.endswith("X"):
792+
if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"):
625793
return PRECISION_DECADE
626794
raise ValueError(f"Unspecified date {self} has no precision")
627795

edtf/parser/tests.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@
8181
("1999-01-XX", ("1999-01-01", "1999-01-31")),
8282
# some day in 1999
8383
("1999-XX-XX", ("1999-01-01", "1999-12-31")),
84+
# negative unspecified year
85+
("-01XX", ("-0199-01-01", "-0100-12-31")),
8486
# Uncertain/Approximate lower boundary dates (BCE)
8587
("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")),
8688
("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")),
@@ -92,6 +94,7 @@
9294
("1XXX", ("1000-01-01", "1999-12-31")),
9395
("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")),
9496
("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")),
97+
("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "-0000-12-31")),
9598
# L1 Extended Interval
9699
# beginning unknown, end 2006
97100
# for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)

0 commit comments

Comments
 (0)