dh-tech · rlskoeser · Mar 27, 2026 · Feb 12, 2026 · Feb 12, 2026 · Feb 12, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Change Log
 
+## 0.7
+
+- Add parsing to Gregorian date converter; supports month names (full or abbreviated)
+  in English, French, German, Spanish, Kinyarwanda, Ganda, and Tigrinya
+- Include Gregorian dates in omnibus parser
+
 ## 0.6
 
 - Experimental omnibus date converter + parser (EDTF, Hebrew, Hijri)

diff --git a/DEVELOPER_NOTES.md b/DEVELOPER_NOTES.md
@@ -88,4 +88,19 @@ pip install -e ".[docs]"
 sphinx-build docs docs/_build
 ```
 
-HTML documentation will be generated in `docs/_build/html`
+HTML documentation will be generated in `docs/_build/html`
+
+
+### Regenerating multilingual Gregorian month name parse file
+
+The Gregorian Lark parser includes a script-generated file, which
+populates month names based on a list of language codes using the Babel
+library.  To regenerate, run the script with hatch (which should
+be installed globally)::
+
+    hatch run codegen:generate
+
+When the `.lark` file is modified by the script, it must be committed to git.
+
+
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -81,6 +81,12 @@ path = "src/undate/__init__.py"
 [tool.hatch.build.targets.sdist]
 include = ["src/undate/**/*.py", "src/undate/**/*.lark", "tests/**"]
 
+[tool.hatch.envs.codegen]
+dependencies = ["babel"]
+
+[tool.hatch.envs.codegen.scripts]
+generate = "python scripts/generate_gregorian_grammar.py"
+
 [tool.pytest.ini_options]
 pythonpath = "src/"
 markers = [

diff --git a/scripts/generate_gregorian_grammar.py b/scripts/generate_gregorian_grammar.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+"""
+This script generates the gregorian_multilang.lark file
+with month names (full and abbreviated) based on the list of
+target languages.
+
+Run this script with hatch to regeneate the file::
+
+    hatch run codegen:generate
+
+"""
+
+from collections import defaultdict
+import pathlib
+
+from babel.dates import get_month_names
+
+# lark grammar path relative to this script
+GRAMMAR_DIR_PATH = (
+    pathlib.Path(__file__).parent.parent / "src" / "undate" / "converters" / "grammars"
+)
+# file that is generated by this script, in that directory
+MONTH_GRAMMAR_FILE = GRAMMAR_DIR_PATH / "gregorian_multilang.lark"
+
+# include month names in the following languages
+languages = [
+    "en",  # English
+    "es",  # Spanish
+    "fr",  # French
+    "de",  # German
+    "rw",  # Kinyarwanda
+    "lg",  # Ganda
+    "ti",  # Tigrinya
+]
+
+# warning to include at top of generated file
+warning_text = """// WARNING: This file is auto-generated. DO NOT EDIT.
+// To regenerate: hatch run codegen:generate
+
+"""
+
+
+def main():
+    # create a dictionary of lists to hold the names for each month
+    all_month_names = defaultdict(list)
+
+    for lang in languages:
+        for width in ["wide", "abbreviated"]:
+            for month_num, month_name in get_month_names(width, locale=lang).items():
+                # some locales use a . on the shortened month; let's ignore that
+                month_name = month_name.strip(".")
+                # In some cases different languages have the same abbreviations;
+                # in some cases, abbreviated and full are the same.
+                # Only add if not already present, to avoid redundancy
+                if month_name not in all_month_names[month_num]:
+                    all_month_names[month_num].append(month_name)
+
+    with MONTH_GRAMMAR_FILE.open("w") as outfile:
+        outfile.write(warning_text)
+
+        # for each numeric month, generate a rule with all variant names:
+        # month_1:  "January" | "Jan"  ...
+        for i, names in all_month_names.items():
+            # combine all names in an OR string
+            or_names = " | ".join(f'"{m}"' for m in names)
+            outfile.write(f"month_{i}: {or_names}\n")
+
+    print(
+        f"Successfully regenerated {MONTH_GRAMMAR_FILE.relative_to(pathlib.Path.cwd())}"
+    )
+    print("If the file has changed, make sure to commit the new version.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/undate/converters/calendars/gregorian/__init__.py b/src/undate/converters/calendars/gregorian/__init__.py
@@ -0,0 +1,3 @@
+from undate.converters.calendars.gregorian.converter import GregorianDateConverter
+
+__all__ = ["GregorianDateConverter"]
diff --git a/src/undate/converters/calendars/gregorian.py → ...nverters/calendars/gregorian/converter.py b/src/undate/converters/calendars/gregorian.py → ...nverters/calendars/gregorian/converter.py
@@ -1,6 +1,11 @@
 from calendar import monthrange, isleap
 
+from lark.exceptions import UnexpectedCharacters
+
+from undate.undate import Undate
 from undate.converters.base import BaseCalendarConverter
+from undate.converters.calendars.gregorian.parser import gregorian_parser
+from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer
 
 
 class GregorianDateConverter(BaseCalendarConverter):
@@ -18,6 +23,9 @@ class GregorianDateConverter(BaseCalendarConverter):
     #: arbitrary known leap year
     LEAP_YEAR: int = 2024
 
+    def __init__(self):
+        self.transformer = GregorianDateTransformer()
+
     def min_month(self) -> int:
         """First month for the Gregorian calendar."""
         return 1
@@ -79,3 +87,25 @@ def to_gregorian(self, year, month, day) -> tuple[int, int, int]:
         a common point of comparison.
         """
         return (year, month, day)
+
+    def parse(self, value: str) -> Undate:
+        """
+        Parse a Gregorian date string of any supported precision in any
+        supported language and return an :class:`~undate.undate.Undate`.
+        The input date string is preserved in the label of the resulting
+        Undate object.
+        """
+        if not value:
+            raise ValueError("Parsing empty string is not supported")
+
+        # parse the input string, then transform to undate object
+        try:
+            # parse the string with our Hebrew date parser
+            parsetree = gregorian_parser.parse(value)
+            # transform the parse tree into an undate object
+            undate_obj = self.transformer.transform(parsetree)
+            # set the original date string as the label
+            undate_obj.label = value
+            return undate_obj
+        except UnexpectedCharacters as err:
+            raise ValueError(f"Could not parse '{value}' as a Gregorian date") from err
diff --git a/src/undate/converters/calendars/gregorian/parser.py b/src/undate/converters/calendars/gregorian/parser.py
@@ -0,0 +1,10 @@
+from lark import Lark
+
+from undate.converters import GRAMMAR_FILE_PATH
+
+grammar_path = GRAMMAR_FILE_PATH / "gregorian.lark"
+
+# open based on filename to allow relative imports based on grammar file
+gregorian_parser = Lark.open(
+    str(grammar_path), rel_to=__file__, start="gregorian_date", strict=True
+)
diff --git a/src/undate/converters/calendars/gregorian/transformer.py b/src/undate/converters/calendars/gregorian/transformer.py
@@ -0,0 +1,42 @@
+from lark import Transformer, Tree
+
+from undate import Undate, Calendar
+
+
+class GregorianDateTransformer(Transformer):
+    """Transform a Gregorian date parse tree and return an Undate."""
+
+    # Currently parser should not result in intervals
+
+    calendar = Calendar.GREGORIAN
+
+    def gregorian_date(self, items):
+        parts = {}
+        for child in items:
+            if child.data in ["year", "month", "day"]:
+                # in each case we expect one integer value;
+                # anonymous tokens convert to their value and cast as int
+                value = int(child.children[0])
+                parts[str(child.data)] = value
+
+        # initialize and return an undate with year, month, day and
+        # Gregorian calendar
+        return Undate(**parts, calendar=self.calendar)
+
+    def year(self, items):
+        # combine multiple parts into a single string
+        value = "".join([str(i) for i in items])
+        return Tree(data="year", children=[value])
+
+    def month(self, items):
+        # month has a nested tree for the rule and the value
+        # the name of the rule (month_1, month_2, etc) gives us the
+        # number of the month needed for converting the date
+        tree = items[0]
+        month_n = tree.data.split("_")[-1]
+        return Tree(data="month", children=[month_n])
+
+    def day(self, items):
+        # combine multiple parts into a single string
+        value = "".join([str(i) for i in items])
+        return Tree(data="day", children=[value])
diff --git a/src/undate/converters/combined.py b/src/undate/converters/combined.py
@@ -13,6 +13,7 @@
 from undate import Undate, UndateInterval
 from undate.converters import BaseDateConverter, GRAMMAR_FILE_PATH
 from undate.converters.edtf.transformer import EDTFTransformer
+from undate.converters.calendars.gregorian.transformer import GregorianDateTransformer
 from undate.converters.calendars.hebrew.transformer import HebrewDateTransformer
 from undate.converters.calendars.islamic.transformer import IslamicDateTransformer
 
@@ -33,6 +34,7 @@ def start(self, children):
     edtf=EDTFTransformer(),
     hebrew=HebrewDateTransformer(),
     islamic=IslamicDateTransformer(),
+    gregorian=GregorianDateTransformer(),
 )
 
 
@@ -45,7 +47,7 @@ def start(self, children):
 class OmnibusDateConverter(BaseDateConverter):
     """
     Combination parser that aggregates existing parser grammars.
-    Currently supports EDTF, Hebrew, and Hijri  where dates are unambiguous.
+    Currently supports EDTF, Gregorian, Hebrew, and Hijri where dates are unambiguous.
     (Year-only dates are parsed as EDTF in Gregorian calendar.)
 
     Does not support serialization.

diff --git a/src/undate/converters/grammars/combined.lark b/src/undate/converters/grammars/combined.lark
@@ -1,7 +1,12 @@
 %import common.WS
 %ignore WS
 
-start: (edtf__start | hebrew__hebrew_date  | islamic__islamic_date )
+// Some abbreviations use periods; some default date formats
+// include commas. Ignore both. (Copied from gregorian.lark)
+PUNCTUATION: "." | ","
+%ignore PUNCTUATION
+
+start: (edtf__start | hebrew__hebrew_date  | islamic__islamic_date | gregorian__gregorian_date )
 
 // Renaming of the import variables is required, as they receive the namespace of this file.
 // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565
@@ -23,10 +28,17 @@ start: (edtf__start | hebrew__hebrew_date  | islamic__islamic_date )
 %import .islamic.month -> islamic__month
 %import .islamic.year -> islamic__year
 
+// gregorian calendar, in multiple languages
+%import .gregorian.gregorian_date -> gregorian__gregorian_date
+
 
 // override hebrew date to omit year-only, since year without calendar is ambiguous
 // NOTE: potentially support year with calendar label
-%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year 
+%override hebrew__hebrew_date: hebrew__day hebrew__month hebrew__year | hebrew__month hebrew__year
 
 // same for islamic date, year alone is ambiguous
-%override islamic__islamic_date: islamic__day islamic__month islamic__year | islamic__month islamic__year 
+%override islamic__islamic_date: islamic__day islamic__month islamic__year | islamic__month islamic__year
+
+// same as above. omit year only, since covered by EDTF
+// %override gregorian__gregorian_date: day month year | month day year | year month day | month year | year month |  day month | month day
+
diff --git a/src/undate/converters/grammars/gregorian.lark b/src/undate/converters/grammars/gregorian.lark
@@ -0,0 +1,38 @@
+%import common.WS
+%ignore WS
+
+// Some abbreviations use periods; some default date formats
+// include commas. Ignore both
+PUNCTUATION: "." | ","
+%ignore PUNCTUATION
+
+%import .gregorian_multilang (month_1, month_2, month_3, month_4, month_5,  \
+	month_6, month_7, month_8, month_9, month_10, month_11, month_12)
+
+
+// no weekday support for now
+gregorian_date: day month year | month day year | year month day | month year | year month | year | day month | month day
+
+// months have 28 to 31 days; we do not expect leading zeroes
+day:  /[1-9]/ | /[12][0-9]/ | /3[0-1]/
+
+// Gregorian calendar started in 1582; assume years with 3 or more digits for now,
+// so we can support mixed day / year order unambiguously
+year: /\b\d{3,}\b/
+// Use word boundaries to separate from other tokens (esp. numeric day),
+// since we otherwise ignore whitespace
+
+// months
+month: month_1
+    | month_2
+    | month_3
+    | month_4
+    | month_5
+    | month_6
+    | month_7
+    | month_8
+    | month_9
+    | month_10
+    | month_11
+    | month_12
+
diff --git a/src/undate/converters/grammars/gregorian_multilang.lark b/src/undate/converters/grammars/gregorian_multilang.lark
@@ -0,0 +1,15 @@
+// WARNING: This file is auto-generated. DO NOT EDIT.
+// To regenerate: hatch run codegen:generate
+
+month_1: "January" | "Jan" | "enero" | "ene" | "janvier" | "janv" | "Januar" | "Mutarama" | "mut" | "Janwaliyo" | "ጥሪ"
+month_2: "February" | "Feb" | "febrero" | "feb" | "février" | "févr" | "Februar" | "Gashyantare" | "gas" | "Febwaliyo" | "ለካቲት" | "ለካ"
+month_3: "March" | "Mar" | "marzo" | "mar" | "mars" | "März" | "Werurwe" | "wer" | "Marisi" | "መጋቢት" | "መጋ"
+month_4: "April" | "Apr" | "abril" | "abr" | "avril" | "avr" | "Mata" | "mat" | "Apuli" | "Apu" | "ሚያዝያ" | "ሚያ"
+month_5: "May" | "mayo" | "may" | "mai" | "Mai" | "Gicurasi" | "gic" | "Maayi" | "Maa" | "ጉንበት" | "ግን"
+month_6: "June" | "Jun" | "junio" | "jun" | "juin" | "Juni" | "Kamena" | "kam" | "Juuni" | "Juu" | "ሰነ"
+month_7: "July" | "Jul" | "julio" | "jul" | "juillet" | "juil" | "Juli" | "Nyakanga" | "nya" | "Julaayi" | "ሓምለ" | "ሓም"
+month_8: "August" | "Aug" | "agosto" | "ago" | "août" | "Kanama" | "kan" | "Agusito" | "Agu" | "ነሓሰ" | "ነሓ"
+month_9: "September" | "Sep" | "septiembre" | "sept" | "septembre" | "Sept" | "Nzeri" | "nze" | "Sebuttemba" | "Seb" | "መስከረም" | "መስ"
+month_10: "October" | "Oct" | "octubre" | "oct" | "octobre" | "Oktober" | "Okt" | "Ukwakira" | "ukw" | "Okitobba" | "Oki" | "ጥቅምቲ" | "ጥቅ"
+month_11: "November" | "Nov" | "noviembre" | "nov" | "novembre" | "Ugushyingo" | "ugu" | "Novemba" | "ሕዳር" | "ሕዳ"
+month_12: "December" | "Dec" | "diciembre" | "dic" | "décembre" | "déc" | "Dezember" | "Dez" | "Ukuboza" | "uku" | "Desemba" | "Des" | "ታሕሳስ" | "ታሕ"
diff --git a/...nverters/test_calendars/test_gregorian.py → ...est_gregorian/test_gregorian_converter.py b/...nverters/test_calendars/test_gregorian.py → ...est_gregorian/test_gregorian_converter.py
@@ -1,3 +1,5 @@
+from undate.date import DatePrecision
+from undate.undate import Undate, Calendar
 from undate.converters.calendars import GregorianDateConverter
 
 
@@ -38,3 +40,28 @@ def test_representative_years(self):
             converter.LEAP_YEAR,
             converter.NON_LEAP_YEAR,
         ]
+
+    def test_parse(self):
+        # day
+        date_str = "2022 Ugushyingo 26"
+        date = GregorianDateConverter().parse(date_str)
+        assert date == Undate(2022, 11, 26)  # Ugushyingo = November
+        assert date.calendar == Calendar.GREGORIAN
+        assert date.precision == DatePrecision.DAY
+        assert date.label == date_str
+
+        # month
+        date_str = "avril 1362"
+        date = GregorianDateConverter().parse(date_str)
+        assert date == Undate(1362, 4)
+        assert date.calendar == Calendar.GREGORIAN
+        assert date.precision == DatePrecision.MONTH
+        assert date.label == date_str
+
+        # year
+        date_str = "932"
+        date = GregorianDateConverter().parse(date_str)
+        assert date == Undate(932)
+        assert date.calendar == Calendar.GREGORIAN
+        assert date.precision == DatePrecision.YEAR
+        assert date.label == date_str
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from undate.converters.calendars.gregorian.converter import GregorianDateConverter

		__all__ = ["GregorianDateConverter"]