diff --git a/PYDANTIC_GUIDE.md b/PYDANTIC_GUIDE.md index 3f502c9e5..ceda6ec01 100644 --- a/PYDANTIC_GUIDE.md +++ b/PYDANTIC_GUIDE.md @@ -54,10 +54,8 @@ from overture.schema.system.string import ( NoWhitespaceString, StrippedString, ) -from overture.schema.core.types import ( - ConfidenceScore, - LanguageTag, -) +from overture.schema.core.types import ConfidenceScore +from overture.schema.system.string import LanguageTag # Numeric primitives (use these instead of int/float) from overture.schema.system.primitive import ( @@ -182,7 +180,7 @@ from typing import Literal from overture.schema.core import OvertureFeature from overture.schema.core.models import Stacked from overture.schema.core.names import Named -from overture.schema.system.primitives import float64 +from overture.schema.system.primitive import float64 class Building(OvertureFeature[Literal["buildings"], Literal["building"]], Named, Stacked): # Gets fields from Feature: id, theme, type, geometry, etc. @@ -506,18 +504,34 @@ class Building(OvertureFeature): Add documentation to describe what the enum and its values mean. In Python, you do this with **docstrings** - text enclosed in triple quotes `"""` that describes what something does: -TODO: DocumentedEnum +Use `DocumentedEnum` from `overture.schema.system.doc` when enum members need their own descriptions for code generation and documentation tooling. Each member takes a `(value, description)` tuple: ```python -class VehicleType(str, Enum): +from overture.schema.system.doc import DocumentedEnum + +class VehicleType(str, DocumentedEnum): """Types of vehicles for transportation.""" - CAR = "car" # Standard passenger vehicle - TRUCK = "truck" # Commercial freight vehicle - BICYCLE = "bicycle" # Human-powered two-wheeler - MOTORCYCLE = "motorcycle" # Motorized two-wheeler + CAR = ("car", "Standard passenger vehicle") + TRUCK = ("truck", "Commercial freight vehicle") + BICYCLE = ("bicycle", "Human-powered two-wheeler") + MOTORCYCLE = ("motorcycle", "Motorized two-wheeler") ``` +Members without descriptions use the plain value form -- documentation is optional per-member: + +```python +class ConnectionState(str, DocumentedEnum): + CONNECTED = "connected" + DISCONNECTED = "disconnected" + QUIESCING = ( + "quiescing", + "Gracefully shutting down, rejecting new requests but completing existing ones", + ) +``` + +Use `DocumentedEnum` over plain `str, Enum` when the enum members' semantics aren't obvious from their names and downstream tools (code generators, documentation renderers) need access to member-level descriptions. Use plain `str, Enum` for self-explanatory values. + #### Why str, Enum? Inheriting from `str, Enum` makes enum values work as both enums and strings, which is useful for JSON serialization and compatibility. @@ -560,7 +574,7 @@ class DivisionArea(OvertureFeature[Literal["divisions"], Literal["division_area" ] = None ``` -**Available relationship types (see [Relationship](packages/overture-schema-core/src/overture/schema/core/ref.py)):** +**Available relationship types (see [Relationship](packages/overture-schema-system/src/overture/schema/system/ref/ref.py)):** - **`BELONGS_TO`**: The referencing feature belongs to the referenced feature (division area belongs to division) - **`CONNECTS_TO`**: The referencing feature connects to the referenced feature (segment connects to connector) @@ -627,7 +641,7 @@ class Building(OvertureFeature[Literal["buildings"], Literal["building"]]): #### Best Practices -**1. Always Use Reference Annotations** +##### Always Use Reference Annotations Include `Reference` annotations for semantic clarity and documentation: @@ -643,7 +657,7 @@ division_id: Annotated[ division_id: Id ``` -**2. Choose the Right Pattern** +##### Choose the Right Pattern - **Simple relationships** → Direct references (foreign keys) - **Relationships with metadata** → Separate association features @@ -938,7 +952,7 @@ Organize code by scope and avoid circular imports: **Cross-theme shared**: `overture-schema-core` package -- Used by multiple themes (e.g., `LanguageTag`, `CountryCode`, `OvertureFeature`) +- Used by multiple themes (e.g., `OvertureFeature`, `Names`, `Sources`, `Scope`) **Theme-level shared**: Theme package root (e.g., `overture-schema-transportation-theme/src/overture/schema/transportation/`) @@ -1110,7 +1124,7 @@ JSON Schema containers become **mixin classes** in Pydantic that you inherit fro ```python models.py from typing import Annotated from pydantic import BaseModel, Field -from overture.schema.model_constraints import no_extra +from overture.schema.system.model_constraint import no_extra_fields from overture.schema.system.primitive import int8, float64 @no_extra_fields diff --git a/README.pydantic.md b/README.pydantic.md index 7bae5a7ff..0655c46d9 100644 --- a/README.pydantic.md +++ b/README.pydantic.md @@ -88,9 +88,11 @@ This workspace contains the following packages: - **`overture-schema`** - Main entrypoint package that aggregates all types for convenient usage -- **`overture-schema-core`** - Base classes, geometry models, and common structures - shared across all themes -- **`overture-schema-system`** - Foundational system of primitive types and constraints +- **`overture-schema-core`** - Overture-specific models shared across themes: base + feature class, scoping framework, names, sources, and cartographic hints +- **`overture-schema-system`** - Portable primitive types, constraints, and a + GeoJSON-aware base model for building Pydantic schemas that serialize to + JSON, Parquet, and Spark ### Theme Packages diff --git a/packages/overture-schema-core/README.md b/packages/overture-schema-core/README.md index f4938f871..58f5d3ee9 100644 --- a/packages/overture-schema-core/README.md +++ b/packages/overture-schema-core/README.md @@ -1,6 +1,6 @@ # Overture Schema Core -Core Pydantic models and base classes for Overture Maps schemas, providing foundational types, geometry handling, and a comprehensive scoping system for conditional rule application. +Shared models and conventions for building Overture Maps feature types. Defines the base feature class all themes extend, a scoping framework for expressing conditional values (this speed limit applies *here*, *then*, to *these vehicles*), and common structures for names, sources, and cartographic hints. ## Installation @@ -8,159 +8,90 @@ Core Pydantic models and base classes for Overture Maps schemas, providing found pip install overture-schema-core ``` -## Key Components +## OvertureFeature -- **Base Classes**: Extensible base models for Overture Maps features -- **Geometry Types**: WKB geometry type hints and utilities -- **Common Structures**: Shared models used across all themes -- **Primitive Data Types**: Validated primitive types with multi-target serialization support -- **Scoping System**: Flexible conditional rule application framework - -## Enhanced Primitive Types - -The enhanced primitive types system provides validated primitive types with automatic -constraint checking and multi-target serialization support. This enables consistent type -definitions that can generate appropriate representations for different targets (Spark, -Parquet, etc.). - -### Available Types - -Built-in Python primitive types (`str`, `int`, `float`, `bool`, `list`, etc.) are -automatically mapped. - -We also provide the following additional types: - -#### Integer Types - -- **`uint8`**: 8-bit unsigned integer (0-255) -- **`uint16`**: 16-bit unsigned integer (0-65535) -- **`uint32`**: 32-bit unsigned integer (0-4294967295) -- **`int8`**: 8-bit signed integer (-128 to 127) -- **`int32`**: 32-bit signed integer (-2³¹ to 2³¹-1) -- **`int64`**: 64-bit signed integer (-2⁶³ to 2⁶³-1) - -#### Floating Point Types - -- **`float32`**: 32-bit floating point number -- **`float64`**: 64-bit floating point number - -### Basic Usage +Every Overture feature type inherits from `OvertureFeature`, which extends `system.Feature` with the fields present on all Overture data: `id`, `theme`, `type`, `version`, `geometry`, and `sources`. ```python -from pydantic import BaseModel, Field -from overture.schema.core.primitives import ( - uint8, uint32, float32 -) - -class Building(BaseModel): - """Building feature with specific primitive data types.""" - - height: float32 | None = Field( - None, - description="Height of building in meters" - ) - - num_floors: uint8 | None = Field( - None, - description="Number of floors in building" - ) - - area: uint32 | None = Field( - None, - description="Floor area in square meters" - ) +from typing import Literal +from overture.schema.core import OvertureFeature + +class Park(OvertureFeature[Literal["places"], Literal["park"]]): + area_hectares: float | None = None ``` -### Automatic Validation +## Scoping -Enhanced primitive types automatically validate constraints: +Many Overture values only apply under specific conditions -- a speed limit that holds during rush hour, along a sub-segment, in the forward direction. The `@scoped` decorator adds conditional fields to any Pydantic model: ```python -# Valid values -building = Building(height=45.5, num_floors=12, area=2500) +from pydantic import BaseModel +from overture.schema.core.scoping import Scope, scoped +from overture.schema.system.primitive import float32 -# Invalid values raise ValidationError -Building(num_floors=256) # Error: 256 > UInt8 maximum (255) -Building(num_floors=-1) # Error: -1 < UInt8 minimum (0) +@scoped(Scope.GEOMETRIC_RANGE, Scope.TEMPORAL) +class SpeedLimit(BaseModel): + max_speed: float32 ``` -### Type Safety +This produces a model with `between` (geometric range) and `when.during` (temporal) fields, both optional. The full set of scopes and the fields they inject: -The enhanced primitive types provide strong type safety guarantees at both static and -runtime levels: +| Scope | Field | +|----------------------------|-------------------| +| `Scope.GEOMETRIC_POSITION` | `at` | +| `Scope.GEOMETRIC_RANGE` | `between` | +| `Scope.HEADING` | `when.heading` | +| `Scope.TEMPORAL` | `when.during` | +| `Scope.TRAVEL_MODE` | `when.mode` | +| `Scope.PURPOSE_OF_USE` | `when.using` | +| `Scope.RECOGNIZED_STATUS` | `when.recognized` | +| `Scope.SIDE` | `side` | +| `Scope.VEHICLE` | `when.vehicle` | -**Static Type Checking**: mypy can distinguish between different primitive types, -*preventing common errors: +Scopes are optional by default. Make them mandatory via `required`: ```python -from overture.schema.core.primitives import uint8, uint32 +@scoped(Scope.TEMPORAL, required=(Scope.GEOMETRIC_POSITION, Scope.HEADING)) +class TrafficSignal(BaseModel): + signal_type: str +``` -def process_floor_count(floors: uint8) -> str: - return f"Building has {floors} floors" +## Names -def process_area(area: uint32) -> str: - return f"Area: {area} sq meters" +Multilingual naming with support for common names, name rules (official, alternate, short variants), and scoping by geometric range, side, or political perspective. Mix `Named` into a feature type to give it a `names` field: -# Type checker prevents mixing incompatible types -floors: uint8 = 12 -area: uint32 = 2500 +```python +from typing import Literal +from overture.schema.core import OvertureFeature +from overture.schema.core.names import Named -process_floor_count(area) # mypy error: Expected UInt8, got UInt32 -process_area(floors) # mypy error: Expected UInt32, got UInt8 +class Lake(OvertureFeature[Literal["base"], Literal["water"]], Named): + pass # inherits names: Names | None from Named ``` +Name rules support geometric range and side scoping for cases like a street whose name changes partway along or differs on each side. `NameRule` variants: `common`, `official`, `alternate`, `short`. -### Examples +## Sources -#### Temporal Speed Limit - -```yaml -speed_limits: - - between: [0, 1] - max_speed: {value: 30, unit: km/h} - when: - during: "Mo-Fr 07:00-09:00,17:00-19:00" # Rush hours only -``` +Source attribution tracking. Each `SourceItem` identifies which dataset a feature or property came from, with optional license, record ID, update time, and confidence score. Source items support geometric range scoping for per-segment attribution. -#### Vehicle-Specific Access Restriction - -```yaml -access_restrictions: - - between: [0.2, 0.8] - access_type: denied - when: - vehicle: - - dimension: weight - comparison: greater_than - value: 7.5 - unit: t -``` - -#### Multi-Dimensional Scoping - -```yaml -access_restrictions: - - between: [0, 1] - access_type: denied - when: - mode: [bus] - during: "Mo-Fr 15:00-18:00" - heading: forward - using: [to_deliver] +```python +from overture.schema.core.sources import SourceItem + +sources = [ + SourceItem(property="", dataset="OpenStreetMap"), + SourceItem(property="/geometry", dataset="Microsoft ML Buildings"), + # first 30% of the segment's geometry came from a different source + SourceItem(property="/geometry", dataset="County GIS", between=[0, 0.3]), +] ``` -### Design Principles - -1. **Composability**: Mix-in design allows combining only needed scoping dimensions -2. **Reusability**: Base scope classes work across all rule types and themes -3. **Extensibility**: Easy to add new scoping dimensions or modify existing ones -4. **Type Safety**: Full Pydantic validation for all scoping conditions -5. **Linear Reference Integration**: Seamless integration with geometric positioning +## Cartography -### Rule Complexity Patterns +Rendering hints for map-making: `prominence` (1--100 significance scale), `min_zoom`/`max_zoom` (tile zoom bounds), and `sort_key` (draw order). Mix `CartographicallyHinted` into a model to add a `cartography` field. -- **Simple Rules** (flags, dimensions): Geometric scoping only -- **Complex Rules** (speed limits, access): Geometric + conditional scoping -- **Transition Rules**: Full scoping including directional constraints +## Also Included -This scoping system provides the foundation for precise, flexible rule specification across all Overture Maps transportation features. +- **Types** -- domain-specific aliases built on system primitives: `ConfidenceScore` (0.0--1.0), `Level` (z-order), `FeatureVersion`. +- **Units** -- measurement enumerations: `SpeedUnit`, `LengthUnit`, `WeightUnit`. +- **Discovery** -- entry-point-based model registry. Theme packages register models via `overture.models` entry points; `discover_models()` resolves them at runtime. diff --git a/packages/overture-schema-system/README.md b/packages/overture-schema-system/README.md index 33764e38e..238984eb0 100644 --- a/packages/overture-schema-system/README.md +++ b/packages/overture-schema-system/README.md @@ -1 +1,117 @@ -todo: README +# Overture Schema System + +Write Pydantic models once, get validated data that serializes correctly to JSON, Parquet, and Spark. This package provides the primitive types, constraint decorators, and GeoJSON-aware base class that make Pydantic models portable across serialization targets. + +## Installation + +```bash +pip install overture-schema-system +``` + +## Feature + +GeoJSON-compatible Pydantic base model. Subclasses serialize to the GeoJSON format automatically -- `geometry` and `id` at the top level, everything else under `properties` -- and validate from it: + +```python +from overture.schema.system.feature import Feature +from overture.schema.system.primitive import Geometry, float32 + +class Mountain(Feature): + name: str + max_elevation: float32 + +m = Mountain( + geometry=Geometry.from_wkt("POINT(86.9252 27.9888)"), + name="Mount Everest", + max_elevation=8848.86, +) +``` + +## Primitive Types + +Using `int` and `float` in a Pydantic model produces valid Python but loses information downstream -- an `int` field becomes a 64-bit integer in Parquet, Arrow, and Spark StructTypes, even when the domain is 0--255. The primitive types (`uint8`, `int32`, `float32`, etc.) carry range constraints and map to the correct wire type so data round-trips cleanly between Python, Parquet files, PostgreSQL, and JSON Schema: + +```python +from pydantic import BaseModel +from overture.schema.system.primitive import uint8, float32 + +class Building(BaseModel): + height: float32 | None = None + num_floors: uint8 | None = None +``` + +Integer types: `uint8`, `uint16`, `uint32`, `int8`, `int16`, `int32`, `int64`. Float types: `float32`, `float64`. Geometry types: `Geometry`, `BBox`, `GeometryType`, `GeometryTypeConstraint`. + +## String Types + +Validated string types that carry their constraints into generated JSON Schemas and downstream code generation. Using `CountryCodeAlpha2` instead of `str` means Pydantic rejects `"USA"` at validation time, JSON Schema gets the right pattern, and codegen tools produce typed output: + +```python +from overture.schema.system.string import CountryCodeAlpha2, LanguageTag +``` + +Available types: `CountryCodeAlpha2`, `RegionCode`, `LanguageTag`, `HexColor`, `JsonPointer`, `PhoneNumber`, `StrippedString`, `SnakeCaseString`, `NoWhitespaceString`, `WikidataId`. + +## Field Constraints + +Annotations for Pydantic fields that enforce domain rules beyond what the type alone expresses. Each constraint produces the corresponding JSON Schema keywords (e.g., `pattern`, `uniqueItems`) and is introspectable by code generation tools -- unlike Pydantic's `@field_validator`, which runs in Python only. Apply via `Annotated`: + +```python +from typing import Annotated +from pydantic import BaseModel, Field +from overture.schema.system.field_constraint import UniqueItemsConstraint, PatternConstraint + +OsmIdConstraint = PatternConstraint( + pattern=r"^[nwr]\d+$", + error_message="invalid OSM ID format: {value}. Must be n123, w123, or r123.", +) + +class MyModel(BaseModel): + osm_id: Annotated[str, OsmIdConstraint] + tags: Annotated[list[str], UniqueItemsConstraint()] = Field(min_length=1) +``` + +Built-in constraints include `PatternConstraint`, `StrippedConstraint`, `UniqueItemsConstraint`, and all the string-type constraints (`CountryCodeAlpha2Constraint`, `HexColorConstraint`, etc.). All produce error messages with domain context. + +## Model Constraints + +Class-level decorators for cross-field validation -- relationships between fields that no single field annotation can express. Each decorator produces corresponding JSON Schema constructs (`if`/`then`, `anyOf`, etc.) and is introspectable for code generation: + +```python +from pydantic import BaseModel +from overture.schema.system.model_constraint import require_any_of + +@require_any_of("email", "phone") +class Contact(BaseModel): + email: str | None = None + phone: str | None = None +``` + +- `@require_any_of("a", "b", ...)` -- at least one field must be non-None +- `@radio_group("a", "b", ...)` -- at most one field may be truthy +- `@require_if("target", condition)` -- field required when condition holds +- `@forbid_if("target", condition)` -- field forbidden when condition holds +- `@min_fields_set(n, "a", "b", ...)` -- at least *n* fields must be set +- `@no_extra_fields` -- reject unrecognized fields (equivalent to `model_config = ConfigDict(extra="forbid")`) + +## References + +Foreign-key-style annotations that describe relationships between models. These carry no runtime enforcement but provide metadata for code generation and documentation tools: + +```python +from typing import Annotated +from overture.schema.system.ref import Id, Identified, Reference, Relationship + +class Park(Identified): + pass + +class ParkBench(Identified): + park_id: Annotated[Id, Reference(Relationship.BELONGS_TO, Park)] +``` + +## Also Included + +- **Optionality** -- `Omitable[T]` models JSON Schema's "may be absent but not null" semantics, which Pydantic's `T | None` conflates with nullable. +- **DocumentedEnum** -- base class for enumerations whose members carry their own docstrings, enabling code generation tools to produce documented output. +- **Metadata** -- internal key-value store used by model constraints to attach data to classes. +- **JSON Schema** -- schema generator that treats `T | None = None` as "omit when unset" rather than Pydantic's default "nullable with null default." Also handles unions of models.