Skip to content

Commit f1f89f5

Browse files
author
github-actions
committed
feat: Add partial address validation with component cleaning
Add support for partial address validation where optional components like Zip4 can be cleaned/removed while keeping addresses valid. This prevents entire address parsing from failing due to invalid optional fields. Key changes: - Add CleaningOperation dataclass to track cleaning operations - Extend ParseResult with cleaning tracking fields and methods - Add allow_partial parameter to parse_auto() functions - Add component-level validation functions (validate_zip5, validate_zip4) - Implement partial validation logic that cleans invalid optional components - Add comprehensive test coverage for partial validation behavior - Update ruff config for Python 3.9 type annotation compatibility Addresses issue where addresses with invalid optional components (like malformed Zip4) would fail completely. Now such components are cleaned and tracked, allowing the address to remain valid with proper reporting. BREAKING: parse_auto() now accepts allow_partial=False by default for backward compatibility FEATURE: New allow_partial=True enables cleaning of invalid optional components FEATURE: Cleaning operations are tracked with timestamps and reasons FEATURE: Enhanced ParseResult with cleaning report methods
1 parent eff3066 commit f1f89f5

7 files changed

Lines changed: 686 additions & 8 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ version_variables = [
8686
select = ["E", "F", "I", "UP", "B", "SIM"]
8787
ignore = [
8888
"UP007", # Use X | Y - not compatible with Pydantic on Python 3.9
89+
"UP045", # Use X | None - not compatible with typer on Python 3.9
8990
]
9091

9192
[tool.mypy]

src/ryandata_address_utils/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
Address,
5555
AddressBuilder,
5656
AddressField,
57+
CleaningOperation,
5758
ParseResult,
5859
RyanDataAddressError,
5960
RyanDataValidationError,
@@ -85,6 +86,8 @@
8586
CompositeValidator,
8687
StateValidator,
8788
ZipCodeValidator,
89+
validate_zip4,
90+
validate_zip5,
8891
)
8992

9093
__version__ = "0.5.0"
@@ -104,6 +107,7 @@
104107
"AddressBuilder",
105108
"AddressField",
106109
"ADDRESS_FIELDS",
110+
"CleaningOperation",
107111
"ParseResult",
108112
"RyanDataAddressError",
109113
"RyanDataValidationError",
@@ -127,6 +131,8 @@
127131
"CompositeValidator",
128132
"StateValidator",
129133
"ZipCodeValidator",
134+
"validate_zip4",
135+
"validate_zip5",
130136
# Convenience functions
131137
"get_zip_info",
132138
"is_valid_zip",

src/ryandata_address_utils/models.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from __future__ import annotations
22

3+
from collections import Counter
34
from dataclasses import dataclass, field
5+
from datetime import datetime
46
from enum import Enum
5-
from typing import Union
7+
from typing import Any, Union
68

79
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator
810
from pydantic_core import PydanticCustomError
@@ -670,6 +672,19 @@ class ValidationError:
670672
value: str | None = None
671673

672674

675+
@dataclass
676+
class CleaningOperation:
677+
"""Record of a component cleaning operation.
678+
679+
Tracks when an address component was cleaned/removed during partial validation.
680+
"""
681+
682+
component: str
683+
original_value: str | None
684+
reason: str
685+
timestamp: str # ISO format
686+
687+
673688
@dataclass
674689
class ValidationResult:
675690
"""Result of address validation."""
@@ -692,7 +707,10 @@ def merge(self, other: ValidationResult) -> ValidationResult:
692707

693708
@dataclass
694709
class ParseResult:
695-
"""Result of address parsing."""
710+
"""Result of address parsing.
711+
712+
Extended to support partial validation with component-level cleaning tracking.
713+
"""
696714

697715
raw_input: str
698716
address: Address | None = None
@@ -701,6 +719,10 @@ class ParseResult:
701719
validation: ValidationResult | None = None
702720
source: str | None = None # "us" or "international"
703721
is_international: bool | None = None
722+
# Partial validation tracking fields
723+
cleaned_components: dict[str, Any] = field(default_factory=dict)
724+
invalid_components: dict[str, dict[str, Any]] = field(default_factory=dict)
725+
cleaning_operations: list[CleaningOperation] = field(default_factory=list)
704726

705727
@property
706728
def is_valid(self) -> bool:
@@ -729,6 +751,57 @@ def to_dict(self) -> dict[str, str | None]:
729751
return self.address.to_dict()
730752
return {f: None for f in ADDRESS_FIELDS}
731753

754+
def add_cleaning_operation(
755+
self, component: str, original_value: Any, reason: str
756+
) -> None:
757+
"""Track a cleaning operation performed during partial validation.
758+
759+
Args:
760+
component: Name of the address component that was cleaned.
761+
original_value: The original value before cleaning.
762+
reason: Explanation of why the component was cleaned.
763+
"""
764+
self.cleaning_operations.append(
765+
CleaningOperation(
766+
component=component,
767+
original_value=original_value,
768+
reason=reason,
769+
timestamp=datetime.now().isoformat(),
770+
)
771+
)
772+
773+
def has_cleaning_operations(self) -> bool:
774+
"""Check if any components were cleaned during partial validation.
775+
776+
Returns:
777+
True if any cleaning operations were performed.
778+
"""
779+
return len(self.cleaning_operations) > 0
780+
781+
def get_cleaning_report(self) -> list[dict[str, Any]]:
782+
"""Get cleaning operations as a list of dictionaries for export.
783+
784+
Returns:
785+
List of dictionaries with cleaning operation details.
786+
"""
787+
return [
788+
{
789+
"component": op.component,
790+
"original_value": op.original_value,
791+
"reason": op.reason,
792+
"timestamp": op.timestamp,
793+
}
794+
for op in self.cleaning_operations
795+
]
796+
797+
def get_cleaning_summary(self) -> dict[str, int]:
798+
"""Get summary counts of cleaning operations by component type.
799+
800+
Returns:
801+
Dictionary mapping component names to count of cleaning operations.
802+
"""
803+
return dict(Counter(op.component for op in self.cleaning_operations))
804+
732805

733806
class AddressBuilder:
734807
"""Builder for programmatic Address construction.

0 commit comments

Comments
 (0)