Skip to content

Commit 2787045

Browse files
committed
2 parents fa8e6d0 + 0d73c37 commit 2787045

13 files changed

Lines changed: 367 additions & 23 deletions

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ codicefiscale.decode("CCCFBA85D03L219P")
6262
# "province": "TO",
6363
# "code": "L219",
6464
# },
65+
# "firstname_options": [
66+
# "Fabio",
67+
# ],
6568
# "omocodes": [
6669
# "CCCFBA85D03L219P",
6770
# "CCCFBA85D03L21VE",
@@ -86,6 +89,9 @@ codicefiscale.decode("CCCFBA85D03L219P")
8689
# }
8790
```
8891

92+
> [!TIP]
93+
> **Name suggestions**: The `firstname_options` field contains a list of possible first names matching the encoded firstname code. For Italian birthplaces, in approximately **60% of cases**, it returns a single name, providing near-certain identification. In other cases, it returns a list of possible names. For foreign birthplaces, the list is empty.
94+
8995
#### Check
9096
```python
9197
codicefiscale.is_valid("CCCFBA85D03L219P")

requirements-test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ mypy == 1.20.*
44
pre-commit == 4.5.*
55
pytest==9.0.*
66
pytest-cov == 7.1.*
7-
tox == 4.52.*
7+
tox == 4.53.*

src/codicefiscale/__init__.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
from codicefiscale.codicefiscale import (
2+
decode,
3+
decode_firstname,
4+
decode_raw,
5+
encode,
6+
encode_birthdate,
7+
encode_birthplace,
8+
encode_cin,
9+
encode_firstname,
10+
encode_lastname,
11+
is_omocode,
12+
is_valid,
13+
)
114
from codicefiscale.metadata import (
215
__author__,
316
__copyright__,
@@ -14,4 +27,15 @@
1427
"__license__",
1528
"__title__",
1629
"__version__",
30+
"decode",
31+
"decode_firstname",
32+
"decode_raw",
33+
"encode",
34+
"encode_birthdate",
35+
"encode_birthplace",
36+
"encode_cin",
37+
"encode_firstname",
38+
"encode_lastname",
39+
"is_omocode",
40+
"is_valid",
1741
]

src/codicefiscale/codicefiscale.py

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from datetime import datetime, timedelta
66
from itertools import combinations
77
from re import Pattern
8-
from typing import Any, Literal
8+
from typing import Any, Literal, cast
99

1010
from dateutil import parser as date_parser
1111
from slugify import slugify
@@ -82,7 +82,15 @@
8282
_OMOCODIA_SUBS_INDEXES_COMBINATIONS.append(list(combo))
8383

8484

85-
_DATA: dict[str, dict[str, list[dict[str, Any]]]] = get_indexed_data()
85+
_DATA: dict[str, Any] | None = None
86+
87+
88+
def _get_data() -> dict[str, Any]:
89+
global _DATA
90+
if _DATA is None:
91+
_DATA = get_indexed_data()
92+
return _DATA
93+
8694

8795
CODICEFISCALE_RE: Pattern[str] = re.compile(
8896
r"^"
@@ -144,17 +152,18 @@ def _get_date(
144152
def _get_birthplace(
145153
birthplace: str,
146154
birthdate: datetime | str | None = None,
147-
) -> dict[str, dict[str, Any]] | None:
155+
) -> dict[str, Any] | None:
148156
birthplace_unicode_slug = slugify(birthplace, allow_unicode=True)
149157
birthplace_slug = slugify(birthplace)
150158
birthplace_code = birthplace_slug.upper()
151-
birthplaces_options = _DATA["municipalities"].get(
159+
data = _get_data()
160+
birthplaces_options = data["municipalities"].get(
152161
birthplace_unicode_slug,
153-
_DATA["municipalities"].get(
162+
data["municipalities"].get(
154163
birthplace_slug,
155-
_DATA["countries"].get(
164+
data["countries"].get(
156165
birthplace_slug,
157-
_DATA["codes"].get(
166+
data["codes"].get(
158167
birthplace_code,
159168
),
160169
),
@@ -165,23 +174,23 @@ def _get_birthplace(
165174

166175
birthdate_date = _get_date(birthdate)
167176
if not birthdate_date:
168-
return birthplaces_options[0].copy()
177+
return cast(dict[str, Any], birthplaces_options[0].copy())
169178

170179
# search birthplace that has been created before / deleted after birthdate
171180
for birthplace_option in birthplaces_options:
172181
date_created = _get_date(birthplace_option["date_created"]) or datetime.min
173182
date_deleted = _get_date(birthplace_option["date_deleted"]) or datetime.max
174183
# print(birthdate_date, date_created, date_deleted)
175184
if birthdate_date >= date_created and birthdate_date <= date_deleted:
176-
return birthplace_option.copy()
185+
return cast(dict[str, Any], birthplace_option.copy())
177186

178187
return _get_birthplace_fallback(birthplaces_options, birthdate_date)
179188

180189

181190
def _get_birthplace_fallback(
182191
birthplaces_options: list[dict[str, Any]],
183192
birthdate_date: datetime,
184-
) -> dict[str, dict[str, Any]] | None:
193+
) -> dict[str, Any] | None:
185194
# avoid wrong birthplace code error when birthdate falls in
186195
# missing date-range in the data-source even if birthplace code is valid
187196
birthplaces_options_count = len(birthplaces_options)
@@ -280,6 +289,43 @@ def encode_firstname(firstname: str) -> str:
280289
return firstname_code
281290

282291

292+
def decode_firstname(
293+
firstname_code: str, gender: Literal["m", "M", "f", "F"] | None = None
294+
) -> list[str] | None:
295+
"""
296+
Decodes firstname code to possible italian first names.
297+
298+
Returns a list of possible names that encode to the given code.
299+
Only works for common italian names.
300+
301+
:param firstname_code: The 3-character firstname code
302+
:type firstname_code: string
303+
:param gender: Optional gender filter ('M' or 'F')
304+
:type gender: string | None
305+
306+
:returns: List of possible first names, or None if not found
307+
:rtype: list[str] | None
308+
"""
309+
firstname_code_upper = firstname_code.upper()
310+
data = _get_data()
311+
names_by_gender = cast(
312+
dict[str, list[str]] | None, data["names"].get(firstname_code_upper)
313+
)
314+
315+
if not names_by_gender:
316+
return None
317+
318+
if gender:
319+
gender_upper = gender.upper()
320+
if gender_upper in ("M", "F"):
321+
gender_names = names_by_gender.get(gender_upper, [])
322+
return gender_names if gender_names else None
323+
324+
# return all names (both genders) if no gender specified
325+
all_names = names_by_gender.get("M", []) + names_by_gender.get("F", [])
326+
return sorted(set(all_names)) if all_names else None
327+
328+
283329
def encode_birthdate(
284330
birthdate: datetime | str | None,
285331
gender: Literal["m", "M", "f", "F"],
@@ -448,7 +494,7 @@ def decode_raw(code: str) -> dict[str, str]:
448494
return data
449495

450496

451-
def decode(code: str) -> dict[str, Any]:
497+
def decode(code: str) -> dict[str, Any]: # noqa: C901
452498
"""
453499
Decodes the italian fiscal code.
454500
@@ -466,11 +512,10 @@ def decode(code: str) -> dict[str, Any]:
466512
birthdate_month = _MONTHS.index(raw["birthdate_month"]) + 1
467513
birthdate_day = int(raw["birthdate_day"].translate(_OMOCODIA_DECODE_TRANS))
468514

515+
gender: Literal["M", "F"] = "M"
469516
if birthdate_day > 40:
470517
birthdate_day -= 40
471518
gender = "F"
472-
else:
473-
gender = "M"
474519

475520
current_year = datetime.now().year
476521
current_year_century_prefix = str(current_year)[0:-2]
@@ -517,12 +562,19 @@ def decode(code: str) -> dict[str, Any]:
517562
f"expected {cin_check!r}, found {cin!r}"
518563
)
519564

565+
# add possible first names if birthplace is in Italy (not foreign country)
566+
firstname_options = None
567+
is_foreign = birthplace and birthplace.get("province") == "EE"
568+
if not is_foreign:
569+
firstname_options = decode_firstname(raw["firstname"], gender)
570+
520571
data = {
521572
"code": code,
522573
"omocodes": _get_omocodes(code),
523574
"gender": gender,
524575
"birthdate": birthdate,
525576
"birthplace": birthplace,
577+
"firstname_options": firstname_options or [],
526578
"raw": raw,
527579
}
528580

src/codicefiscale/data.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import os
44
import sys
5-
from datetime import datetime
65
from typing import Any
76

87
import fsutil
@@ -33,23 +32,31 @@ def get_countries_data() -> Any:
3332
return deleted_countries + countries
3433

3534

36-
def get_indexed_data() -> dict[
37-
str, dict[str, list[dict[str, bool | datetime | str | list[str]]]]
38-
]:
35+
def get_names_data() -> Any:
36+
names = get_data("names.json")
37+
return names
38+
39+
40+
def get_indexed_data() -> dict[str, Any]:
41+
from codicefiscale.codicefiscale import encode_firstname
42+
3943
municipalities = get_municipalities_data()
4044
countries = get_countries_data()
41-
data: dict[str, dict[str, list[dict[str, bool | datetime | str | list[str]]]]] = {
45+
names = get_names_data()
46+
47+
data: dict[str, Any] = {
4248
"municipalities": {},
4349
"countries": {},
4450
"codes": {},
51+
"names": {},
4552
}
4653

4754
for municipality in municipalities:
4855
code = municipality["code"]
4956
province = municipality["province"].lower()
5057
municipality_unicode_slug = slugify(municipality["name"], allow_unicode=True)
51-
names = [municipality_unicode_slug] + municipality["name_slugs"]
52-
for name in names:
58+
municipality_names = [municipality_unicode_slug] + municipality["name_slugs"]
59+
for name in municipality_names:
5360
name_and_province = f"{name}-{province}"
5461
data["municipalities"].setdefault(name, [])
5562
data["municipalities"].setdefault(name_and_province, [])
@@ -60,11 +67,21 @@ def get_indexed_data() -> dict[
6067

6168
for country in countries:
6269
code = country["code"]
63-
names = country["name_slugs"]
64-
for name in names:
70+
country_names = country["name_slugs"]
71+
for name in country_names:
6572
data["countries"].setdefault(name, [])
6673
data["countries"][name].append(country)
6774
data["codes"].setdefault(code, [])
6875
data["codes"][code].append(country)
6976

77+
for gender, gender_names in names.items():
78+
for name in gender_names:
79+
code = encode_firstname(name)
80+
data["names"].setdefault(code, {"M": set(), "F": set()})
81+
data["names"][code][gender].add(name)
82+
83+
for code in data["names"]:
84+
data["names"][code]["M"] = sorted(data["names"][code]["M"])
85+
data["names"][code]["F"] = sorted(data["names"][code]["F"])
86+
7087
return data

0 commit comments

Comments
 (0)