diff --git a/pctasks/cli/requirements.txt b/pctasks/cli/requirements.txt index 277eca65d..bd90e1fd8 100644 --- a/pctasks/cli/requirements.txt +++ b/pctasks/cli/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -163,10 +166,17 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pyjwt==2.8.0 # via msal @@ -191,7 +201,9 @@ python-dateutil==2.8.2 # strictyaml # from https://pypi.org/simple python-dotenv==1.0.1 - # via planetary-computer + # via + # planetary-computer + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -225,7 +237,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -233,6 +245,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/client/pctasks/client/client.py b/pctasks/client/pctasks/client/client.py index bef17fdf7..f64a57a51 100644 --- a/pctasks/client/pctasks/client/client.py +++ b/pctasks/client/pctasks/client/client.py @@ -174,7 +174,7 @@ def _yield_page_results( route, params=params, ) - result = record_list_response_type.parse_obj(resp) + result = record_list_response_type.model_validate(resp) yield from result.records page_count += 1 page_token = result.next_page_token @@ -285,7 +285,7 @@ def get_workflow(self, workflow_id: str) -> Optional[WorkflowRecord]: result = self._call_api( "GET", WORKFLOW_ROUTE.format(workflow_id=workflow_id) ) - return WorkflowRecordResponse.parse_obj(result).record + return WorkflowRecordResponse.model_validate(result).record except HTTPError as e: if e.response.status_code == 404: return None @@ -369,7 +369,7 @@ def submit_workflow( workflow_def = workflow.definition - request = request.copy() + request = request.model_copy() # Ensure arguments request.args = self.settings.add_default_args( @@ -452,7 +452,7 @@ def get_workflow_run( route += f"?dataset={dataset_id}" try: result = self._call_api("GET", route) - return WorkflowRunRecordResponse.parse_obj(result).record + return WorkflowRunRecordResponse.model_validate(result).record except HTTPError as e: if e.response.status_code == 404: return None @@ -497,7 +497,7 @@ def get_job_partition_run( ) try: result = self._call_api("GET", route) - return JobPartitionRunRecordResponse.parse_obj(result).record + return JobPartitionRunRecordResponse.model_validate(result).record except HTTPError as e: if e.response.status_code == 404: return None diff --git a/pctasks/client/pctasks/client/profile/commands.py b/pctasks/client/pctasks/client/profile/commands.py index 1f37b7415..4a87505a9 100644 --- a/pctasks/client/pctasks/client/profile/commands.py +++ b/pctasks/client/pctasks/client/profile/commands.py @@ -141,7 +141,7 @@ def set_profile(ctx: click.Context, profile: str) -> None: rprint(f"[red]Profile [bold]{profile}[/bold] does not exists[/red]") ctx.exit(1) - profile_only_config = settings_config.copy(update={"settings_file": None}) + profile_only_config = settings_config.model_copy(update={"settings_file": None}) profile_settings_file = profile_only_config.get_settings_file() if not Path(profile_settings_file).exists(): raise click.UsageError( @@ -160,7 +160,7 @@ def show_profile(ctx: click.Context, profile: str) -> None: rprint(f"[red]Profile [bold]{profile}[/bold] does not exists[/red]") ctx.exit(1) - profile_only_config = settings_config.copy(update={"settings_file": None}) + profile_only_config = settings_config.model_copy(update={"settings_file": None}) yaml_txt = profile_only_config.get_settings_file().read_text() console = Console() console.print(Syntax(yaml_txt, "yaml")) diff --git a/pctasks/client/pctasks/client/settings.py b/pctasks/client/pctasks/client/settings.py index 07acac8b3..67df9d4da 100644 --- a/pctasks/client/pctasks/client/settings.py +++ b/pctasks/client/pctasks/client/settings.py @@ -1,7 +1,7 @@ from typing import Dict, Optional from urllib.parse import urlparse -from pydantic import validator +from pydantic import field_validator from pctasks.core.models.workflow import WorkflowDefinition from pctasks.core.settings import PCTasksSettings @@ -21,7 +21,7 @@ def section_name(cls) -> str: default_args: Optional[Dict[str, str]] = None default_page_size: int = DEFAULT_PAGE_SIZE - @validator("endpoint") + @field_validator("endpoint", mode="after") def _validate_endpoint(cls, v: str) -> str: try: parsed = urlparse(v) diff --git a/pctasks/client/pctasks/client/workflow/commands.py b/pctasks/client/pctasks/client/workflow/commands.py index fcde32527..36a21422c 100644 --- a/pctasks/client/pctasks/client/workflow/commands.py +++ b/pctasks/client/pctasks/client/workflow/commands.py @@ -264,7 +264,7 @@ def cli_handle_workflow( workflow_id = workflow_def.workflow_id else: - workflow_def = workflow_def.copy(update={"workflow_id": workflow_id}) + workflow_def = workflow_def.model_copy(update={"workflow_id": workflow_id}) if not client: client = PCTasksClient(settings=ClientSettings.from_context(ctx.obj)) diff --git a/pctasks/client/pctasks/client/workflow/template.py b/pctasks/client/pctasks/client/workflow/template.py index 383d19dfc..237d85315 100644 --- a/pctasks/client/pctasks/client/workflow/template.py +++ b/pctasks/client/pctasks/client/workflow/template.py @@ -24,7 +24,7 @@ def template_workflow_dict( base_path = base_path or Path(".") workflow_dict = LocalTemplater(base_path).template_dict(workflow_dict) - return WorkflowDefinition.parse_obj(workflow_dict) + return WorkflowDefinition.model_validate(workflow_dict) def template_workflow_contents( diff --git a/pctasks/client/pyproject.toml b/pctasks/client/pyproject.toml index 8342792f9..381669032 100644 --- a/pctasks/client/pyproject.toml +++ b/pctasks/client/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ dependencies = [ "pctasks.core @ {root:parent:uri}/core", "pctasks.cli @ {root:parent:uri}/cli", - "pydantic[dotenv]>=1.8,<2.0.0", + "pydantic[dotenv]>=2.0.0", "rich>=11.2.0", ] diff --git a/pctasks/client/requirements.txt b/pctasks/client/requirements.txt index c9f93df00..0880cc1ef 100644 --- a/pctasks/client/requirements.txt +++ b/pctasks/client/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -173,11 +176,18 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-client (./pctasks/client/pyproject.toml) # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pygments==2.17.2 # via rich @@ -207,7 +217,7 @@ python-dateutil==2.8.2 python-dotenv==1.0.1 # via # planetary-computer - # pydantic + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -244,7 +254,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -252,6 +262,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/client/tests/test_client.py b/pctasks/client/tests/test_client.py index b14e97310..c5e591bac 100644 --- a/pctasks/client/tests/test_client.py +++ b/pctasks/client/tests/test_client.py @@ -9,6 +9,8 @@ from pctasks.dev.blob import get_azurite_code_storage from pctasks.dev.test_utils import assert_workflow_is_successful +# from pctasks.dev.test_utils import assert_workflow_is_successful + HERE = pathlib.Path(__file__).parent diff --git a/pctasks/client/tests/test_template.py b/pctasks/client/tests/test_template.py index 3fe7bbd70..7a74b5de3 100644 --- a/pctasks/client/tests/test_template.py +++ b/pctasks/client/tests/test_template.py @@ -23,7 +23,7 @@ def test_local_file_template(): yaml_dict = yaml.safe_load(yaml_str) templated_dict = LocalTemplater().template_dict(yaml_dict) - data = IngestCollectionsInput.parse_obj(templated_dict) + data = IngestCollectionsInput.model_validate(templated_dict) assert data.collections assert data.collections[0]["id"] == "test-collection" diff --git a/pctasks/core/pctasks/core/activity.py b/pctasks/core/pctasks/core/activity.py index 73e8f37a5..74f7e5642 100644 --- a/pctasks/core/pctasks/core/activity.py +++ b/pctasks/core/pctasks/core/activity.py @@ -34,13 +34,13 @@ def _func(msg: str) -> str: if "msg" not in msg_dict: raise ValueError(f"Missing 'msg' in message: {msg_dict}") - msg_model = model_class.parse_obj(msg_dict["msg"]) + msg_model = model_class.model_validate(msg_dict["msg"]) event_tag_msg: Optional[str] = None if event_tag: event_tag_msg = event_tag(msg_model) activity_msg: ActivityMessage[T] = ActivityMessage( - run_record_id=RunRecordId.parse_obj(msg_dict.get("run_record_id")), + run_record_id=RunRecordId.model_validate(msg_dict.get("run_record_id")), msg=msg_model, ) diff --git a/pctasks/core/pctasks/core/cosmos/container.py b/pctasks/core/pctasks/core/cosmos/container.py index fd6a555d7..032b279f8 100644 --- a/pctasks/core/pctasks/core/cosmos/container.py +++ b/pctasks/core/pctasks/core/cosmos/container.py @@ -128,7 +128,7 @@ def item_from_model(self, model: T) -> Dict[str, Any]: def model_from_item(self, model_type: Type[T], item: Dict[str, Any]) -> T: """Transform a cosmosdb item (dict) into a model.""" - return model_type.parse_obj(item) + return model_type.model_validate(item) def _prepare_put_item(self, model: T) -> Dict[str, Any]: # Set created or updated time diff --git a/pctasks/core/pctasks/core/cosmos/settings.py b/pctasks/core/pctasks/core/cosmos/settings.py index e98a782e2..cacf34f20 100644 --- a/pctasks/core/pctasks/core/cosmos/settings.py +++ b/pctasks/core/pctasks/core/cosmos/settings.py @@ -1,13 +1,14 @@ import os import re -from typing import Any, Dict, Optional +from typing import Optional from urllib.parse import urlparse import azure.identity.aio from azure.cosmos import CosmosClient from azure.cosmos.aio import CosmosClient as AsyncCosmosClient from azure.identity import DefaultAzureCredential -from pydantic import validator +from pydantic import field_validator, model_validator +from typing_extensions import Self from pctasks.core.constants import COSMOSDB_EMULATOR_HOST_ENV_VAR from pctasks.core.settings import PCTasksSettings @@ -67,11 +68,11 @@ def get_records_container_name(self) -> str: def get_process_item_errors_container_name(self) -> str: return f"{self.process_item_errors_container_name}{self.test_container_suffix}" - @validator("test_container_suffix", always=True) + @field_validator("test_container_suffix") def _validate_test_container_suffix(cls, v: Optional[str]) -> str: return v or "" - @validator("connection_string") + @field_validator("connection_string") def _validate_connection_string(cls, v: Optional[str]) -> Optional[str]: if v: if not re.search(r"AccountEndpoint=(.*?);", v): @@ -80,15 +81,14 @@ def _validate_connection_string(cls, v: Optional[str]) -> Optional[str]: raise ValueError("Cannot find AccountKey in connection_string") return v - @validator("key", always=True) # Validates all connection properties (last defined) - def _validate_key(cls, v: Optional[str], values: Dict[str, Any]) -> Optional[str]: - if v: - if values.get("connection_string"): - raise ValueError("Cannot set both key and connection_string") - if not values.get("url"): - raise ValueError("Must set url when setting key") + @model_validator(mode="after") # Validates all connection properties (last defined) + def _key_or_connection_string(self) -> Self: + if self.key and self.connection_string: + raise ValueError("Cannot set both key and connection_string") + if self.key and not self.url: + raise ValueError("Must set url when setting key") - return v + return self def get_cosmosdb_url(self) -> str: if self.connection_string: diff --git a/pctasks/core/pctasks/core/models/activity.py b/pctasks/core/pctasks/core/models/activity.py index a988b1b7f..bb35dcc2a 100644 --- a/pctasks/core/pctasks/core/models/activity.py +++ b/pctasks/core/pctasks/core/models/activity.py @@ -7,6 +7,6 @@ T = TypeVar("T", bound=BaseModel) -class ActivityMessage(Generic[T], PCBaseModel): +class ActivityMessage(PCBaseModel, Generic[T]): run_record_id: RunRecordId msg: T diff --git a/pctasks/core/pctasks/core/models/base.py b/pctasks/core/pctasks/core/models/base.py index fd7c234dd..d71cab4bc 100644 --- a/pctasks/core/pctasks/core/models/base.py +++ b/pctasks/core/pctasks/core/models/base.py @@ -14,12 +14,12 @@ class PCBaseModel(BaseModel): def dict(self, **kwargs: Any) -> Dict[str, Any]: kwargs.setdefault("by_alias", True) kwargs.setdefault("exclude_none", True) - return super().dict(**kwargs) + return super().model_dump(**kwargs) def json(self, **kwargs: Any) -> str: kwargs.setdefault("by_alias", True) kwargs.setdefault("exclude_none", True) - return super().json(**kwargs) + return super().model_dump_json(**kwargs) def to_json(self, *args: Any, **kwargs: Any) -> str: """Passed through to .json() @@ -58,9 +58,9 @@ def _prop_sort(field_order: List[str], d: Dict[str, Any]) -> Dict[str, Any]: def from_yaml(cls: Type[T], yaml_str: str, section: Optional[str] = None) -> T: return model_from_yaml(cls, yaml_str, section=section) - class Config: - exclude_none = True - allow_population_by_field_name = True + model_config = { + "populate_by_name": True, + } class RunRecordId(PCBaseModel): diff --git a/pctasks/core/pctasks/core/models/config.py b/pctasks/core/pctasks/core/models/config.py index 28b89ee3a..dccefe194 100644 --- a/pctasks/core/pctasks/core/models/config.py +++ b/pctasks/core/pctasks/core/models/config.py @@ -1,6 +1,6 @@ from typing import Dict, List, Optional -from pydantic import validator +from pydantic import field_validator from pctasks.core.models.base import PCBaseModel from pctasks.core.storage.blob import BlobStorage, BlobUri @@ -59,7 +59,7 @@ class ImageConfig(PCBaseModel): environment: Optional[List[str]] = None tags: Optional[List[str]] = None - @validator("environment") + @field_validator("environment") def _environment_validator(cls, v: Optional[List[str]]) -> Optional[List[str]]: if v: for env in v: @@ -67,7 +67,7 @@ def _environment_validator(cls, v: Optional[List[str]]) -> Optional[List[str]]: raise ValueError(f"Environment entry {env} is invalid.") return v - @validator("tags") + @field_validator("tags") def _tags_validator(cls, v: Optional[List[str]]) -> Optional[List[str]]: if v: for tag in v: diff --git a/pctasks/core/pctasks/core/models/event.py b/pctasks/core/pctasks/core/models/event.py index d37bea93e..d35d52cce 100644 --- a/pctasks/core/pctasks/core/models/event.py +++ b/pctasks/core/pctasks/core/models/event.py @@ -11,7 +11,7 @@ class CloudEvent(PCBaseModel): - spec_version: str = Field(default="1.0", const=True, alias="specversion") + spec_version: str = Field(default="1.0", frozen=True, alias="specversion") type: str source: str subject: Optional[str] = None @@ -178,7 +178,7 @@ class NotificationMessage(PCBaseModel): class NotificationSubmitMessage(PCBaseModel): notification: NotificationMessage target_environment: Optional[str] - type: str = Field(default="Notification", const=True) + type: str = Field(default="Notification", frozen=True) processing_id: RunRecordId @@ -188,7 +188,7 @@ class NotificationConfig(PCBaseModel): class ItemNotificationConfig(NotificationConfig): - type: str = Field(default="Item", const=True) + type: str = Field(default="Item", frozen=True) owner: str = MICROSOFT_OWNER collection_id: str item_id: str diff --git a/pctasks/core/pctasks/core/models/item.py b/pctasks/core/pctasks/core/models/item.py index b738f8296..ce41b89f8 100644 --- a/pctasks/core/pctasks/core/models/item.py +++ b/pctasks/core/pctasks/core/models/item.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Optional import pystac -from pydantic import Field, validator +from pydantic import Field, field_validator from pctasks.core.models.record import Record from pctasks.core.utils import StrEnum @@ -17,7 +17,7 @@ class ItemRecord(Record): type: ItemRecordType stac_id: str - @validator("stac_id") + @field_validator("stac_id") def _stac_id_validator(cls, v: str) -> str: # Ensure a single forward slash in the string if v.count("/") != 1: @@ -52,7 +52,7 @@ class StacItemRecord(ItemRecord): by a single ``/`` """ - type: ItemRecordType = Field(default=ItemRecordType.STAC_ITEM, const=True) + type: ItemRecordType = Field(default=ItemRecordType.STAC_ITEM, frozen=True) item: Dict[str, Any] deleted: bool = False @@ -74,7 +74,7 @@ class ItemUpdatedRecord(ItemRecord): Does not specify if the item was created or updated. """ - type: ItemRecordType = Field(default=ItemRecordType.ITEM_UPDATED, const=True) + type: ItemRecordType = Field(default=ItemRecordType.ITEM_UPDATED, frozen=True) run_id: str """The run ID of the workflow that updated this Item version""" @@ -86,6 +86,6 @@ class ItemUpdatedRecord(ItemRecord): message_inserted_time: Optional[datetime] = None version: Optional[str] - @validator("version") + @field_validator("version") def _version_validator(cls, v: Optional[str]) -> str: return v or "" diff --git a/pctasks/core/pctasks/core/models/registration.py b/pctasks/core/pctasks/core/models/registration.py index ab1d5001b..fc389293c 100644 --- a/pctasks/core/pctasks/core/models/registration.py +++ b/pctasks/core/pctasks/core/models/registration.py @@ -2,7 +2,8 @@ from typing import Any, Dict, Optional from uuid import uuid4 -from pydantic import Field, validator +from pydantic import Field, model_validator +from typing_extensions import Self from pctasks.core.constants import MICROSOFT_OWNER from pctasks.core.models.base import PCBaseModel @@ -105,12 +106,10 @@ def matches(self, event: CloudEvent) -> bool: return False return True - @validator("eventgrid_channel_info", always=True) - def validate_eventgrid_channel_info( - cls, v: Optional[EventGridChannelInfo], values: Dict[str, Any] - ) -> Optional[EventGridChannelInfo]: - if not v and not values.get("webhook_endpoint"): + @model_validator(mode="after") + def validate_eventgrid_channel_info(self) -> Self: + if not self.eventgrid_channel_info and not self.webhook_endpoint: raise ValueError( "Either eventgrid_channel_info or webhook_endpoint must be set" ) - return v + return self diff --git a/pctasks/core/pctasks/core/models/response.py b/pctasks/core/pctasks/core/models/response.py index bfe75bdb7..824e49da0 100644 --- a/pctasks/core/pctasks/core/models/response.py +++ b/pctasks/core/pctasks/core/models/response.py @@ -31,8 +31,8 @@ class Link(PCBaseModel): title: Optional[str] = None -class RecordResponse(Generic[T], PCBaseModel): - links: Optional[List[Link]] = Field(default_factory=list) +class RecordResponse(PCBaseModel, Generic[T]): + links: Optional[List[Link]] = Field(default=[]) record: T @@ -48,8 +48,8 @@ class JobPartitionRunRecordResponse(RecordResponse[JobPartitionRunRecord]): record: JobPartitionRunRecord -class RecordListResponse(Generic[T], PCBaseModel): - links: Optional[List[Link]] = Field(default_factory=list) +class RecordListResponse(PCBaseModel, Generic[T]): + links: Optional[List[Link]] = Field(default=[]) records: List[T] next_page_token: Optional[str] = Field(None, alias="nextPageToken") diff --git a/pctasks/core/pctasks/core/models/run.py b/pctasks/core/pctasks/core/models/run.py index 1c599395c..e58a06b5a 100644 --- a/pctasks/core/pctasks/core/models/run.py +++ b/pctasks/core/pctasks/core/models/run.py @@ -1,7 +1,8 @@ from datetime import datetime from typing import Any, Dict, List, Optional -from pydantic import Field, validator +from pydantic import Field, model_validator +from typing_extensions import Self from pctasks.core.models.base import PCBaseModel from pctasks.core.models.event import CloudEvent @@ -94,18 +95,18 @@ def set_status(self, status: str) -> None: StatusHistoryEntry(status=status, timestamp=tzutc_now()) ) - @validator("status_history", always=True) - def _validate_status_history( - cls, v: List[StatusHistoryEntry], values: Dict[str, Any] - ) -> List[StatusHistoryEntry]: + @model_validator(mode="after") + def _validate_status_history(self) -> Self: # Always ensure the status history is valid - if not v: - return [StatusHistoryEntry(status=values["status"], timestamp=tzutc_now())] - return v + if not self.status_history: + self.status_history = [ + StatusHistoryEntry(status=self.status, timestamp=tzutc_now()) + ] + return self class TaskRunRecord(RunRecord): - type: str = Field(default=RunRecordType.TASK_RUN, const=True) + type: str = Field(default=RunRecordType.TASK_RUN, frozen=True) run_id: str job_id: str @@ -140,7 +141,7 @@ def from_task_definition( class JobPartitionRunRecord(RunRecord): - type: str = Field(default=RunRecordType.JOB_PARTITION_RUN, const=True) + type: str = Field(default=RunRecordType.JOB_PARTITION_RUN, frozen=True) run_id: str job_id: str @@ -188,7 +189,7 @@ def id_from(run_id: str, job_id: str, partition_id: str) -> str: class JobRunRecord(RunRecord): - type: str = Field(default=RunRecordType.JOB_RUN, const=True) + type: str = Field(default=RunRecordType.JOB_RUN, frozen=True) run_id: str job_id: str @@ -223,7 +224,7 @@ def from_job(cls, job: JobDefinition, run_id: str) -> "JobRunRecord": class WorkflowRunRecord(RunRecord): - type: str = Field(default=RunRecordType.WORKFLOW_RUN, const=True) + type: str = Field(default=RunRecordType.WORKFLOW_RUN, frozen=True) dataset_id: str run_id: str diff --git a/pctasks/core/pctasks/core/models/task.py b/pctasks/core/pctasks/core/models/task.py index 160438c56..822bc9a13 100644 --- a/pctasks/core/pctasks/core/models/task.py +++ b/pctasks/core/pctasks/core/models/task.py @@ -2,7 +2,8 @@ from typing import Any, Dict, List, Optional, Union import orjson -from pydantic import Field, validator +from pydantic import Field, field_validator, model_validator +from typing_extensions import Self from pctasks.core.constants import ( TASK_CONFIG_SCHEMA_VERSION, @@ -21,7 +22,7 @@ class TaskDefinition(PCBaseModel): id: str image: Optional[str] = None - image_key: Optional[str] = None + image_key: Optional[str] = Field(default=None, validation_alias="image-key") code: Optional[CodeConfig] = None task: str args: Dict[str, Any] = {} @@ -29,19 +30,16 @@ class TaskDefinition(PCBaseModel): environment: Optional[Dict[str, str]] = None schema_version: str = TASK_CONFIG_SCHEMA_VERSION - @validator("image_key", always=True) - def image_key_or_image_validator( - cls, v: Optional[str], values: Dict[str, Any] - ) -> Optional[str]: - if values.get("image") is not None: - if v: - raise ValueError("Specify either image_key or image.") - elif v is None: - raise ValueError("Must specify either image_key or image.") - return v + @model_validator(mode="after") + def image_key_or_image_validator(self) -> Self: + if self.image_key and self.image: + raise ValueError("Specify either image_key or image, but not both.") + elif not (self.image_key or self.image): + raise ValueError("Must specify either image_key or image, at most one") + return self - @validator("id") - def validate_jobs(cls, v: Optional[str]) -> Optional[str]: + @field_validator("id", mode="after") + def validate_id(cls, v: Optional[str]) -> Optional[str]: if v: try: validate_table_key(v) @@ -68,7 +66,7 @@ class TaskRunConfig(PCBaseModel): output_blob_config: BlobConfig log_blob_config: BlobConfig event_logger_app_insights_key: Optional[str] = None - schema_version: str = Field(default=TASK_RUN_CONFIG_SCHEMA_VERSION, const=True) + schema_version: str = Field(default=TASK_RUN_CONFIG_SCHEMA_VERSION, frozen=True) def get_run_record_id(self) -> str: return f"{self.run_id}/{self.job_id}/{self.partition_id}/{self.task_id}" @@ -83,7 +81,7 @@ def encoded(self) -> str: @classmethod def decode(cls, msg_text: str) -> "TaskRunMessage": - return cls.parse_obj( + return cls.model_validate( orjson.loads(b64decode(msg_text.encode("utf-8")).decode("utf-8")) ) @@ -100,7 +98,7 @@ class TaskResultType(StrEnum): class TaskResult(PCBaseModel): - schema_version: str = Field(default=TASK_RESULT_SCHEMA_VERSION, const=True) + schema_version: str = Field(default=TASK_RESULT_SCHEMA_VERSION, frozen=True) status: TaskResultType @staticmethod @@ -134,17 +132,17 @@ def parse_subclass( cls, obj: Dict[str, Any] ) -> Union["CompletedTaskResult", "WaitTaskResult", "FailedTaskResult"]: if obj["status"] == TaskResultType.COMPLETED: - return CompletedTaskResult.parse_obj(obj) + return CompletedTaskResult.model_validate(obj) elif obj["status"] == TaskResultType.WAIT: - return WaitTaskResult.parse_obj(obj) + return WaitTaskResult.model_validate(obj) elif obj["status"] == TaskResultType.FAILED: - return FailedTaskResult.parse_obj(obj) + return FailedTaskResult.model_validate(obj) else: raise ValueError(f"Unknown task result status: {obj['status']}") class CompletedTaskResult(TaskResult): - status: TaskResultType = Field(default=TaskResultType.COMPLETED, const=True) + status: TaskResultType = Field(default=TaskResultType.COMPLETED, frozen=True) output: Dict[str, Any] = {} notifications: Optional[List[NotificationMessage]] = None task_uri: Optional[str] = None @@ -154,13 +152,13 @@ class CompletedTaskResult(TaskResult): class WaitTaskResult(TaskResult): """Result returned by a task when it is not yet ready to run.""" - status: TaskResultType = Field(default=TaskResultType.WAIT, const=True) + status: TaskResultType = Field(default=TaskResultType.WAIT, frozen=True) wait_seconds: Optional[int] = None message: Optional[str] = None class FailedTaskResult(TaskResult): - status: TaskResultType = Field(default=TaskResultType.FAILED, const=True) + status: TaskResultType = Field(default=TaskResultType.FAILED, frozen=True) errors: Optional[List[str]] = None @@ -176,4 +174,4 @@ class TaskRunSignal(PCBaseModel): class TaskRunSignalMessage(PCBaseModel): signal_target_id: str data: TaskRunSignal - schema_version: str = Field(default=TASK_RUN_SIGNAL_SCHEMA_VERSION, const=True) + schema_version: str = Field(default=TASK_RUN_SIGNAL_SCHEMA_VERSION, frozen=True) diff --git a/pctasks/core/pctasks/core/models/workflow.py b/pctasks/core/pctasks/core/models/workflow.py index 6dba9fbe6..d934f1b70 100644 --- a/pctasks/core/pctasks/core/models/workflow.py +++ b/pctasks/core/pctasks/core/models/workflow.py @@ -1,6 +1,7 @@ from typing import Any, Dict, List, Optional, Set, Union -from pydantic import Field, validator +from pydantic import Field, field_validator, model_validator +from typing_extensions import Self from pctasks.core.constants import WORKFLOW_SCHEMA_VERSION from pctasks.core.models.base import ForeachConfig, PCBaseModel @@ -47,7 +48,7 @@ class JobDefinition(PCBaseModel): needs: Optional[Union[str, List[str]]] = None - @validator("id") + @field_validator("id", mode="after") def _validate_jobs(cls, v: Optional[str]) -> Optional[str]: if v: cls.validate_job_id(v) @@ -101,7 +102,7 @@ class WorkflowDefinition(PCBaseModel): on: Optional[TriggerDefinition] = None is_streaming: bool = False - schema_version: str = Field(default=WORKFLOW_SCHEMA_VERSION, const=True) + schema_version: str = Field(default=WORKFLOW_SCHEMA_VERSION, frozen=True) def __init__(self, **data: Any) -> None: super().__init__(**data) @@ -109,7 +110,7 @@ def __init__(self, **data: Any) -> None: if not job.id: job.id = id - @validator("jobs") + @field_validator("jobs") def _validate_jobs(cls, v: Dict[str, JobDefinition]) -> Dict[str, JobDefinition]: for job_id in v: # Only validate if job_id is set to None @@ -118,10 +119,11 @@ def _validate_jobs(cls, v: Dict[str, JobDefinition]) -> Dict[str, JobDefinition] JobDefinition.validate_job_id(job_id) return v - @validator("is_streaming") + @model_validator(mode="after") def _validate_is_streaming( - cls, v: bool, values: Dict[str, Any], **kwargs: Dict[str, Any] - ) -> bool: + # cls, v: bool, values: Dict[str, Any], **kwargs: Dict[str, Any] + self, + ) -> Self: """ A streaming workflow is similar to other pctasks workflows, but requires a few additional properties on the streaming tasks within the workflow: @@ -142,8 +144,8 @@ def _validate_is_streaming( to run indefinitely. They should continuously process messages from a queue, and leave starting, stopping, and scaling to the pctasks framework. """ - if v: - jobs = values["jobs"] + if self.is_streaming: + jobs = self.jobs n_jobs = len(jobs) if n_jobs != 1: raise ValueError( @@ -182,7 +184,7 @@ def _validate_is_streaming( f"on the task." ) - return v + return self def template_args(self, args: Optional[Dict[str, Any]]) -> "WorkflowDefinition": return DictTemplater({"args": args}, strict=False).template_model(self) @@ -234,7 +236,7 @@ def from_definition( class WorkflowRecord(Record): - type: str = Field(default=WorkflowRecordType.WORKFLOW, const=True) + type: str = Field(default=WorkflowRecordType.WORKFLOW, frozen=True) workflow_id: str workflow: Workflow @@ -260,7 +262,7 @@ class WorkflowSubmitMessage(PCBaseModel): def get_workflow_with_templated_args(self) -> Workflow: if self.args is None: return self.workflow - return self.workflow.copy( + return self.workflow.model_copy( update={"definition": self.workflow.definition.template_args(self.args)} ) diff --git a/pctasks/core/pctasks/core/settings.py b/pctasks/core/pctasks/core/settings.py index e31b026f7..d2fc97495 100644 --- a/pctasks/core/pctasks/core/settings.py +++ b/pctasks/core/pctasks/core/settings.py @@ -2,14 +2,16 @@ import os from abc import abstractmethod from pathlib import Path -from typing import Any, Dict, Hashable, List, Optional, Tuple, Type, TypeVar, Union +from typing import Hashable, List, Optional, Tuple, Type, TypeVar, Union -import yaml from cachetools import Cache, LRUCache, cachedmethod from cachetools.keys import hashkey -from pydantic import BaseSettings, Field, ValidationError -from pydantic.env_settings import SettingsSourceCallable -from yaml import Loader +from pydantic import Field, ValidationError +from pydantic_settings import ( + BaseSettings, + PydanticBaseSettingsSource, + YamlConfigSettingsSource, +) from pctasks.core.constants import ( DEFAULT_PROFILE, @@ -49,15 +51,18 @@ def __init__( super().__init__(message, *args) +_settings_cache: Cache = LRUCache(maxsize=100) + + class SettingsConfig(PCBaseModel): - _cache: Cache = LRUCache(maxsize=100) """Configuration for the settings location.""" + profile: Optional[str] = None settings_file: Optional[str] = None @classmethod - @cachedmethod(lambda cls: cls._cache) + @cachedmethod(lambda cls: _settings_cache) def get( cls, profile: Optional[str] = None, @@ -137,24 +142,6 @@ def is_profile_from_environment(self) -> bool: ) -def _get_yaml_settings_source( - settings_file: Path, -) -> SettingsSourceCallable: - def yaml_config_settings_source(settings: BaseSettings) -> Dict[str, Any]: - """ - A settings source that loads configuration from YAML. - """ - if settings_file.exists(): - with open(settings_file) as f: - yaml_txt = f.read() - result: Dict[str, Any] = yaml.load(yaml_txt, Loader=Loader) - return result - else: - return {} - - return yaml_config_settings_source - - def get_settings( model: Type[T], section_name: str, @@ -165,39 +152,40 @@ def get_settings( _settings_file = settings_config.get_settings_file() class _Settings(BaseSettings): + model_config = { + "env_prefix": ENV_VAR_PCTASK_PREFIX, + "extra": "ignore", + "env_nested_delimiter": "__", + } # mypy doesn't like using type vars here, # but it works and defines pydantic validation. section: model = Field( # type: ignore default_factory=model, - alias=section_name, - env=f"{ENV_VAR_PCTASK_PREFIX}{section_name.upper()}", + alias=f"{ENV_VAR_PCTASK_PREFIX}{section_name.upper()}", ) - class Config: - env_prefix = ENV_VAR_PCTASK_PREFIX - extra = "ignore" - env_nested_delimiter = "__" - - @classmethod - def customise_sources( - cls, - init_settings: SettingsSourceCallable, - env_settings: SettingsSourceCallable, - file_secret_settings: SettingsSourceCallable, - ) -> Any: - if _settings_file.exists(): - return ( - init_settings, - env_settings, - _get_yaml_settings_source(_settings_file), - file_secret_settings, - ) - else: - return ( - init_settings, - env_settings, - file_secret_settings, - ) + @classmethod + def settings_customise_sources( + cls, + settings_cls: Type[BaseSettings], + init_settings: PydanticBaseSettingsSource, + env_settings: PydanticBaseSettingsSource, + dotenv_settings: PydanticBaseSettingsSource, + file_secret_settings: PydanticBaseSettingsSource, + ) -> Tuple[PydanticBaseSettingsSource, ...]: + if _settings_file.exists(): + return ( + init_settings, + env_settings, + YamlConfigSettingsSource(settings_cls, yaml_file=_settings_file), + file_secret_settings, + ) + else: + return ( + init_settings, + env_settings, + file_secret_settings, + ) try: # type ignore as pydantic reports issue with missing 'section' @@ -205,9 +193,13 @@ def customise_sources( settings = _Settings() # type:ignore return settings.section except Exception as e: - msg = "Could not load settings from environment" + msg = f"Could not load {model.__name__} settings from environment" if _settings_file: - msg += f" or settings file at {_settings_file}" + msg += f" or settings file at {_settings_file}." + msg += ( + f" Model config is {_Settings.model_config} and " + f"model fields are {_Settings.model_fields}" + ) msg += f" - {e}" raise SettingsError( msg, @@ -224,16 +216,17 @@ def settings_hash_key( return hashkey((cls.section_name(), profile, settings_file)) -class PCTasksSettings(PCBaseModel): - _cache: Cache = LRUCache(maxsize=100) +_cache: Cache = LRUCache(maxsize=100) + +class PCTasksSettings(PCBaseModel): @classmethod @abstractmethod def section_name(cls) -> str: raise NotImplementedError @classmethod - @cachedmethod(lambda cls: cls._cache, key=settings_hash_key) + @cachedmethod(lambda cls: _cache, key=settings_hash_key) def get( cls: Type[T], profile: Optional[str] = None, diff --git a/pctasks/core/pctasks/core/tables/base.py b/pctasks/core/pctasks/core/tables/base.py index 6f910aeef..9a7f017e1 100644 --- a/pctasks/core/pctasks/core/tables/base.py +++ b/pctasks/core/pctasks/core/tables/base.py @@ -308,7 +308,7 @@ def _model_from_entity(self, entity: TableEntity) -> M: "Data column must be a string. " f"partition_key={partition_key} row_key={row_key}" ) - return self._model.parse_obj(decode_dict(data)) + return self._model.model_validate(decode_dict(data)) def insert(self, partition_key: str, row_key: str, entity: M) -> None: self._ensure_table_client() diff --git a/pctasks/core/pctasks/core/utils/summary.py b/pctasks/core/pctasks/core/utils/summary.py index bd2409866..dd7dd6ded 100644 --- a/pctasks/core/pctasks/core/utils/summary.py +++ b/pctasks/core/pctasks/core/utils/summary.py @@ -4,7 +4,6 @@ Useful for generating Collection "summaries" and "item-assets" fields. """ - from abc import abstractmethod from enum import Enum from functools import reduce @@ -17,24 +16,24 @@ class SummarySettings(BaseModel): - max_distinct_values = 4 + max_distinct_values: int = Field(default=4) """The max number of distinct values to collect in a distinct value summary. If there are more values collected than this settings, the DistinctValueSummary will be converted into a MixedValueSummary. """ - max_distinct_key_sets = 5 + max_distinct_key_sets: int = Field(default=5) """The max number of distinct values to collect in a distinct key sets. If there are more values collected than this setting, the DistinctKeySets will be converted to a MixedKeySets. """ - max_mixed_object_list_samples = 3 + max_mixed_object_list_samples: int = Field(default=3) """The max number of distinct samples to include in a mixed object list summary.""" - max_mixed_summary_samples = 4 + max_mixed_summary_samples: int = Field(default=4) """The max number of distinct samples to include in a mixed summary.""" @@ -81,42 +80,42 @@ def merge(self, other: "ValueCount") -> None: class BoolValueCount(ValueCount): """Count of boolean values.""" - type: str = Field(default=ValueTypes.BOOLEAN, const=True) + type: str = Field(default=ValueTypes.BOOLEAN, frozen=True) value: bool class IntValueCount(ValueCount): """Count of integer values.""" - type: str = Field(default=ValueTypes.INT, const=True) + type: str = Field(default=ValueTypes.INT, frozen=True) value: int class FloatValueCount(ValueCount): """Count of float values.""" - type: str = Field(default=ValueTypes.FLOAT, const=True) + type: str = Field(default=ValueTypes.FLOAT, frozen=True) value: float class StringValueCount(ValueCount): """Count of string values.""" - type: str = Field(default=ValueTypes.STRING, const=True) + type: str = Field(default=ValueTypes.STRING, frozen=True) value: str class NullValueCount(ValueCount): """Count of null values.""" - type: str = Field(default=ValueTypes.NULL, const=True) - value: Optional[str] = Field(default=None, const=True) + type: str = Field(default=ValueTypes.NULL, frozen=True) + value: Optional[str] = Field(default=None, frozen=True) class ListValueCount(ValueCount): """Count of list values.""" - type: str = Field(default=ValueTypes.LIST, const=True) + type: str = Field(default=ValueTypes.LIST, frozen=True) value: List[Any] @@ -177,7 +176,7 @@ class DistinctValueSummary(PropertySummary): The values can be bool, int, float, string, list, or null. """ - type: str = Field(default=SummaryTypes.DISTINCT, const=True) + type: str = Field(default=SummaryTypes.DISTINCT, frozen=True) values: ValueCountList def merge( @@ -279,7 +278,7 @@ def merge( class IntRangeSummary(PropertySummary): """Represents a property that is an integer in a specific range.""" - type: str = Field(default=SummaryTypes.INT_RANGE, const=True) + type: str = Field(default=SummaryTypes.INT_RANGE, frozen=True) min: int max: int @@ -331,7 +330,7 @@ def merge( class FloatRangeSummary(PropertySummary): """Represents a property that is an float in a specific range.""" - type: str = Field(default=SummaryTypes.FLOAT_RANGE, const=True) + type: str = Field(default=SummaryTypes.FLOAT_RANGE, frozen=True) min: float max: float @@ -383,7 +382,7 @@ def merge( class ObjectPropertySummary(PropertySummary): """Represents a property that is an JSON Object.""" - type: str = Field(default="object", const=True) + type: str = Field(default="object", frozen=True) summary: "ObjectSummary" def merge( @@ -431,7 +430,7 @@ def merge( class ObjectListSummary(PropertySummary): """Represents a property that is a list of JSON Objects.""" - type: str = Field(default=SummaryTypes.OBJECT_LIST, const=True) + type: str = Field(default=SummaryTypes.OBJECT_LIST, frozen=True) values: List["ObjectSummary"] def merge( @@ -470,7 +469,7 @@ def merge( class MixedObjectListSummary(PropertySummary): """Represents a property that is a non-uniform list of JSON Objects.""" - type: str = Field(default=SummaryTypes.MIXED_OBJECT_LIST, const=True) + type: str = Field(default=SummaryTypes.MIXED_OBJECT_LIST, frozen=True) lengths: Set[int] sample: List[List["ObjectSummary"]] @@ -520,7 +519,7 @@ class MixedValueSummary(PropertySummary): not be represented in the samples. """ - type: str = Field(default=SummaryTypes.MIXED_VALUE, const=True) + type: str = Field(default=SummaryTypes.MIXED_VALUE, frozen=True) data_types: Set[str] sample: ValueCountAndRangeList @@ -564,7 +563,7 @@ def merge( class MixedSummary(PropertySummary): """Represents a property that is represented by a mix of summary types.""" - type: str = Field(default=SummaryTypes.MIXED_SUMMARIES, const=True) + type: str = Field(default=SummaryTypes.MIXED_SUMMARIES, frozen=True) summary_types: Set[str] sample: List[PropertySummary] @@ -621,7 +620,7 @@ class KeySetsType(str, Enum): class KeySet(BaseModel): """Represents a set of keys that are present in objects.""" - keys: Set[str] + keys: Set[str | int] """The distinct set of keys that are present in an object.""" count_with: int """The number of objects that have this set of keys.""" @@ -630,7 +629,7 @@ class KeySet(BaseModel): class DistinctKeySets(BaseModel): """Represents a set of distinct key sets.""" - type: KeySetsType = Field(default=KeySetsType.DISTINCT, const=True) + type: KeySetsType = Field(default=KeySetsType.DISTINCT, frozen=True) values: List[KeySet] def merge(self, other: "KeySetType", settings: SummarySettings) -> "KeySetType": @@ -671,7 +670,7 @@ class MixedKeySets(BaseModel): The number of samples present is limited by the max_mixed_key_sets setting. """ - type: KeySetsType = Field(default=KeySetsType.MIXED, const=True) + type: KeySetsType = Field(default=KeySetsType.MIXED, frozen=True) sample_values: List[KeySet] def merge(self, other: "KeySetType", settings: SummarySettings) -> "KeySetType": @@ -850,6 +849,6 @@ def empty(cls) -> "ObjectSummary": return cls(count=0, keys={}, key_sets=DistinctKeySets(values=[])) -ObjectListSummary.update_forward_refs() -ObjectPropertySummary.update_forward_refs() -MixedObjectListSummary.update_forward_refs() +ObjectListSummary.model_rebuild() +ObjectPropertySummary.model_rebuild() +MixedObjectListSummary.model_rebuild() diff --git a/pctasks/core/pctasks/core/utils/template.py b/pctasks/core/pctasks/core/utils/template.py index 87c6616e3..d3ac7eb1f 100644 --- a/pctasks/core/pctasks/core/utils/template.py +++ b/pctasks/core/pctasks/core/utils/template.py @@ -220,7 +220,7 @@ def template_model( model: T, get_value: Callable[[List[str]], Optional[TemplateValue]], ) -> T: - return model.__class__.parse_obj(template_dict(model.dict(), get_value)) + return model.__class__.model_validate(template_dict(model.dict(), get_value)) class Templater(ABC): diff --git a/pctasks/core/pctasks/core/yaml.py b/pctasks/core/pctasks/core/yaml.py index 71bff9b7f..7ec544c0c 100644 --- a/pctasks/core/pctasks/core/yaml.py +++ b/pctasks/core/pctasks/core/yaml.py @@ -5,7 +5,7 @@ import strictyaml import yaml from pydantic import BaseModel, ValidationError -from pydantic.error_wrappers import ErrorList +from pydantic_core import ErrorDetails from yaml import Loader T = TypeVar("T", bound=BaseModel) @@ -16,7 +16,7 @@ @dataclass class YamlValidationErrorInfo: - pydantic_error: ErrorList + pydantic_error: ErrorDetails start_line: Optional[int] = None end_line: Optional[int] = None path: Optional[str] = None @@ -26,16 +26,18 @@ class SectionDoesNotExist(Exception): pass -class YamlValidationError(ValidationError): +class YamlValidationError(ValueError): + # yaml_ext = Field(default=None) + # yaml_ext = Field(default=None) def __init__( self, yml_text: str, errors: List[YamlValidationErrorInfo], - model: Any, + # model: Any, ) -> None: self.yaml_ext = yml_text self.yaml_errors = errors - super().__init__([e.pydantic_error for e in errors], model) + # super().__init__([e.pydantic_error for e in errors], model) def __str__(self) -> str: result = "\nValidation errors while parsing YAML:\n" @@ -126,8 +128,8 @@ def model_from_yaml( start_line=start_line, end_line=end_line, path=path, - pydantic_error=cast(ErrorList, error), + pydantic_error=error, ), ) - raise YamlValidationError(yml_text=yaml_txt, errors=errors, model=model) + raise YamlValidationError(yml_text=yaml_txt, errors=errors) diff --git a/pctasks/core/pyproject.toml b/pctasks/core/pyproject.toml index a1ec1753d..448aaffee 100644 --- a/pctasks/core/pyproject.toml +++ b/pctasks/core/pyproject.toml @@ -36,8 +36,9 @@ dependencies = [ "opencensus-ext-logging>=0.1.1", "orjson>=3.0.0,<4", "planetary-computer>=0.4.0", - "pystac==1.8.3", - "pydantic>=1.9,<2.0.0", + "pystac>=1.8.3", + "pydantic>=2.0.0", + "pydantic-settings>=2.0.0", "python-dateutil>=2.8.2,<2.9", "pyyaml>=5.3", "stac-validator>=3.1.0", diff --git a/pctasks/core/requirements.txt b/pctasks/core/requirements.txt index df2b03dfa..8a0e77e5c 100644 --- a/pctasks/core/requirements.txt +++ b/pctasks/core/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -160,10 +163,17 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-core (./pctasks/core/pyproject.toml) # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core (./pctasks/core/pyproject.toml) # from https://pypi.org/simple pyjwt==2.8.0 # via msal @@ -188,7 +198,9 @@ python-dateutil==2.8.2 # strictyaml # from https://pypi.org/simple python-dotenv==1.0.1 - # via planetary-computer + # via + # planetary-computer + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -222,7 +234,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core (./pctasks/core/pyproject.toml) # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -230,6 +242,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/core/tests/cosmos/containers/test_workflow_runs.py b/pctasks/core/tests/cosmos/containers/test_workflow_runs.py index 172086e19..6d9e18259 100644 --- a/pctasks/core/tests/cosmos/containers/test_workflow_runs.py +++ b/pctasks/core/tests/cosmos/containers/test_workflow_runs.py @@ -194,6 +194,7 @@ def test_job_part_bulk_put(temp_cosmosdb_containers): mock_job_partition_runs = MockWorkflowRunsContainer( JobPartitionRunRecord, db=temp_cosmosdb_containers ) + with mock_job_partition_runs: mock_job_partition_runs.bulk_put(job_parts) diff --git a/pctasks/core/tests/cosmos/containers/test_workflows.py b/pctasks/core/tests/cosmos/containers/test_workflows.py index aa68fda09..976c19d42 100644 --- a/pctasks/core/tests/cosmos/containers/test_workflows.py +++ b/pctasks/core/tests/cosmos/containers/test_workflows.py @@ -120,7 +120,6 @@ def test_workflow_runs_pagination(temp_cosmosdb_containers): assert items1_2[0].run_id == items2_1[0].run_id # Test job_run.task_group_count trigger update - def fetch_workflow() -> WorkflowRecord: fetched_workflow = workflows.get(workflow_id, partition_key=workflow_id) assert fetched_workflow is not None diff --git a/pctasks/core/tests/models/test_storage_event.py b/pctasks/core/tests/models/test_storage_event.py index 8bec400ab..c2c3ead6b 100644 --- a/pctasks/core/tests/models/test_storage_event.py +++ b/pctasks/core/tests/models/test_storage_event.py @@ -35,7 +35,7 @@ def event_body(): def test_storage_event_created(event_body): - event = StorageEvent.parse_obj(event_body) + event = StorageEvent.model_validate(event_body) assert event.time == "2017-06-26T18:41:00.9584103Z" assert event.type == StorageEventType.CREATED assert event.data.api == "CreateFile" @@ -46,7 +46,7 @@ def test_storage_event_created(event_body): def test_storage_events_record(event_body): - record = StorageEventRecord.parse_obj(event_body) + record = StorageEventRecord.model_validate(event_body) assert record.get_id() == "831e1650-001e-001b-66ab-eeb76e069631" assert StorageEventRecord.migrate(event_body) @@ -56,7 +56,7 @@ def test_create_item_error(event_body): event_body["run_id"] = "test" event_body["traceback"] = "ZeroDivisionError" event_body["dequeue_count"] = 1 - event = StorageEventRecord.parse_obj(event_body) + event = StorageEventRecord.model_validate(event_body) record = CreateItemErrorRecord( input=event, attempt=0, diff --git a/pctasks/core/tests/models/test_task.py b/pctasks/core/tests/models/test_task.py index c97f9d459..9ff8c4a48 100644 --- a/pctasks/core/tests/models/test_task.py +++ b/pctasks/core/tests/models/test_task.py @@ -36,6 +36,6 @@ def test_serialize_task_run_msg(): msg_text = b64encode(msg.json(exclude_none=True).encode("utf-8")).decode("utf-8") msg_dict = json.loads(b64decode(msg_text.encode("utf-8")).decode("utf-8")) - deserialized = TaskRunMessage.parse_obj(msg_dict) + deserialized = TaskRunMessage.model_validate(msg_dict) assert deserialized == msg diff --git a/pctasks/core/tests/models/test_workflow.py b/pctasks/core/tests/models/test_workflow.py index 4ae7aa252..3a3fec433 100644 --- a/pctasks/core/tests/models/test_workflow.py +++ b/pctasks/core/tests/models/test_workflow.py @@ -8,6 +8,7 @@ WorkflowDefinition, WorkflowSubmitMessage, ) +from pctasks.core.yaml import YamlValidationError def test_sets_job_id(): @@ -162,7 +163,7 @@ def test_missing_args(): def test_job_ids_no_commas(): - with pytest.raises(ValidationError): + with pytest.raises(YamlValidationError): _ = WorkflowDefinition.from_yaml( """ name: A workflow* *with* *asterisks diff --git a/pctasks/core/tests/test_activity.py b/pctasks/core/tests/test_activity.py index bb2a9520e..9e3aaa70a 100644 --- a/pctasks/core/tests/test_activity.py +++ b/pctasks/core/tests/test_activity.py @@ -23,7 +23,7 @@ def process_model(m: TestModel, event_logger: RunLogger) -> TestModel: ).json() ) - result = TestModel.parse_raw(result_str) + result = TestModel.model_validate_json(result_str) assert result.input == "test-in-processed" assert result.output == "test-out-processed" diff --git a/pctasks/core/tests/test_yaml.py b/pctasks/core/tests/test_yaml.py index 565745faa..7ee2b5fc0 100644 --- a/pctasks/core/tests/test_yaml.py +++ b/pctasks/core/tests/test_yaml.py @@ -21,4 +21,4 @@ def test_error_handling(): ) except YamlValidationError as e: error_text = str(e) - assert "jobs -> name: value is not a valid dict" in error_text + assert "dataset: Field required" in error_text diff --git a/pctasks/core/tests/utils/test_summary.py b/pctasks/core/tests/utils/test_summary.py index 40099c1d7..6dbe4c014 100644 --- a/pctasks/core/tests/utils/test_summary.py +++ b/pctasks/core/tests/utils/test_summary.py @@ -235,12 +235,12 @@ def test_single_naip_summary(): # print(summary.json(indent=2)) - assert summary.dict()["keys"]["properties"]["summary"]["keys"]["gsd"] == ( + assert summary.model_dump()["keys"]["properties"]["summary"]["keys"]["gsd"] == ( DistinctValueSummary( count_with=1, count_without=0, values=[FloatValueCount(value=0.6, count=1)], - ).dict() + ).model_dump() ) @@ -254,22 +254,22 @@ def test_two_naip_summary(): # print(summary.json(indent=2)) - assert summary.dict()["keys"]["properties"]["summary"]["keys"]["gsd"] == ( + assert summary.model_dump()["keys"]["properties"]["summary"]["keys"]["gsd"] == ( DistinctValueSummary( count_with=2, count_without=0, values=[FloatValueCount(value=0.6, count=2)], - ).dict() + ).model_dump() ) - image_summary = summary.dict()["keys"]["assets"]["summary"]["keys"]["image"] + image_summary = summary.model_dump()["keys"]["assets"]["summary"]["keys"]["image"] assert image_summary["summary"]["keys"]["roles"] == ( DistinctValueSummary( count_with=2, count_without=0, values=[ListValueCount(value=["data"], count=2)], # type: ignore - ).dict() + ).model_dump() ) assert image_summary["summary"]["keys"]["eo:bands"]["values"][0]["keys"][ @@ -279,7 +279,7 @@ def test_two_naip_summary(): count_with=2, count_without=0, values=[StringValueCount(value="red", count=2)], - ).dict() + ).model_dump() ) @@ -288,22 +288,22 @@ def test_include_key_three_deep(): summary = ObjectSummary.summarize_dict(item, include_keys=["assets.image.title"]) - print(summary.json(indent=2)) + print(summary.model_dump_json(indent=2)) - s = summary.dict() + s = summary.model_dump() assert len(s["keys"]) == 1 assert len(s["keys"]["assets"]["summary"]["keys"]) == 1 assert len(s["keys"]["assets"]["summary"]["keys"]["image"]["summary"]["keys"]) == 1 - assert summary.dict()["keys"]["assets"]["summary"]["keys"]["image"]["summary"][ - "keys" - ]["title"] == ( + assert summary.model_dump()["keys"]["assets"]["summary"]["keys"]["image"][ + "summary" + ]["keys"]["title"] == ( DistinctValueSummary( count_with=1, count_without=0, values=[StringValueCount(value="RGBIR COG tile", count=1)], - ).dict() + ).model_dump() ) @@ -322,14 +322,14 @@ def test_several_asset_descriptions(): settings=SummarySettings(max_distinct_values=20), ) - print(summary.json(indent=2)) + print(summary.model_dump_json(indent=2)) - assert summary.dict()["keys"]["assets"]["summary"]["keys"]["image"]["summary"][ - "keys" - ]["description"] == ( + assert summary.model_dump()["keys"]["assets"]["summary"]["keys"]["image"][ + "summary" + ]["keys"]["description"] == ( DistinctValueSummary( count_with=100, count_without=0, values=[StringValueCount(value=f"Image {i}", count=5) for i in range(20)], - ).dict() + ).model_dump() ) diff --git a/pctasks/dataset/pctasks/dataset/items/models.py b/pctasks/dataset/pctasks/dataset/items/models.py index bcf8e5aee..1dd3bfa5a 100644 --- a/pctasks/dataset/pctasks/dataset/items/models.py +++ b/pctasks/dataset/pctasks/dataset/items/models.py @@ -1,6 +1,7 @@ -from typing import Any, Dict, Optional +from typing import Dict, Optional -from pydantic import validator +from pydantic import model_validator +from typing_extensions import Self from pctasks.core.models.base import PCBaseModel from pctasks.core.models.config import CodeConfig @@ -47,23 +48,19 @@ class CreateItemsInput(PCBaseModel): options: CreateItemsOptions = CreateItemsOptions() - @validator("asset_chunk_info") - def _validate_chunk_uri( - cls, v: Optional[str], values: Dict[str, Any] - ) -> Optional[str]: - if v is None and values.get("asset_uri") is None: + @model_validator(mode="after") + def _validate_chunk_uri(self) -> Self: + if self.asset_chunk_info is None and self.asset_uri is None: raise ValueError("Either asset_chunk_info or asset_uri must be specified") - return v + return self - @validator("item_chunkset_uri") - def _validate_output_uri( - cls, v: Optional[str], values: Dict[str, Any] - ) -> Optional[str]: - if v is None and values.get("asset_chunk_info") is None: + @model_validator(mode="after") + def _validate_output_uri(self) -> Self: + if self.asset_chunk_info is None and self.item_chunkset_uri is not None: raise ValueError( "item_chunkset_uri must be specified if not processing asset_chunk_info" ) - return v + return self class CreateItemsOutput(PCBaseModel): diff --git a/pctasks/dataset/pctasks/dataset/models.py b/pctasks/dataset/pctasks/dataset/models.py index 3f1e7b490..15648ee89 100644 --- a/pctasks/dataset/pctasks/dataset/models.py +++ b/pctasks/dataset/pctasks/dataset/models.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import Any, Dict, List, Optional, Set, Union -from pydantic import Field, validator +from pydantic import Field, field_validator from pctasks.core.models.base import PCBaseModel from pctasks.core.models.config import CodeConfig @@ -97,7 +97,7 @@ class ChunksConfig(PCBaseModel): options: ChunkOptions = ChunkOptions() splits: Optional[List[SplitDefinition]] = None - @validator("splits") + @field_validator("splits", mode="after") def _validate_splits( cls, v: Optional[List[SplitDefinition]] ) -> Optional[List[SplitDefinition]]: @@ -147,8 +147,9 @@ def get_tokens(self) -> Dict[str, StorageAccountTokens]: return tokens - class Config: - allow_population_by_field_name = True + model_config = { + "populate_by_name": True, + } class DatasetDefinition(PCBaseModel): @@ -181,7 +182,7 @@ def get_collection( def template_args(self, args: Dict[str, str]) -> "DatasetDefinition": return DictTemplater({"args": args}, strict=False).template_model(self) - @validator("id") + @field_validator("id") def _validate_name(cls, v: str) -> str: try: validate_table_key(v) @@ -189,7 +190,7 @@ def _validate_name(cls, v: str) -> str: raise ValueError(f"Invalid dataset id: {e.INFO_MESSAGE}") return v - @validator("collections") + @field_validator("collections", mode="after") def _validate_collections( cls, v: List[CollectionDefinition] ) -> List[CollectionDefinition]: diff --git a/pctasks/dataset/pctasks/dataset/splits/models.py b/pctasks/dataset/pctasks/dataset/splits/models.py index 77a146b79..7dc848490 100644 --- a/pctasks/dataset/pctasks/dataset/splits/models.py +++ b/pctasks/dataset/pctasks/dataset/splits/models.py @@ -87,7 +87,7 @@ def from_collection( storage_chunk_options = asset_storage_config.chunks.options if chunk_options: - storage_chunk_options = storage_chunk_options.copy( + storage_chunk_options = storage_chunk_options.model_copy( update=chunk_options.dict(exclude_defaults=True) ) split_inputs.append( diff --git a/pctasks/dataset/pctasks/dataset/streaming.py b/pctasks/dataset/pctasks/dataset/streaming.py index ed1263ccf..9dd60e02c 100644 --- a/pctasks/dataset/pctasks/dataset/streaming.py +++ b/pctasks/dataset/pctasks/dataset/streaming.py @@ -80,8 +80,7 @@ class StreamingCreateItemsInput(PCBaseModel): ] extra_env: Dict[str, str] = pydantic.Field(default_factory=dict) - class Config: - extra = "forbid" + model_config = {"extra": "forbid"} class ExtraOptions(TypedDict): @@ -194,7 +193,7 @@ def process_message( create_items_function = extra_options["create_items_function"] logger.info("Processing message id=%s", message.id) - parsed_message = StorageEvent.parse_raw(message.content) + parsed_message = StorageEvent.model_validate_json(message.content) try: items = self.create_items( diff --git a/pctasks/dataset/pctasks/dataset/template.py b/pctasks/dataset/pctasks/dataset/template.py index ac5816207..19f6551bc 100644 --- a/pctasks/dataset/pctasks/dataset/template.py +++ b/pctasks/dataset/pctasks/dataset/template.py @@ -18,7 +18,7 @@ def template_dataset( root = Path.cwd() templater = LocalTemplater(root) dataset_dict = templater.template_dict(dataset_dict) - return DatasetDefinition.parse_obj(dataset_dict) + return DatasetDefinition.model_validate(dataset_dict) def template_dataset_file( diff --git a/pctasks/dataset/pctasks/dataset/workflow.py b/pctasks/dataset/pctasks/dataset/workflow.py index 8b10b5241..d052d8a17 100644 --- a/pctasks/dataset/pctasks/dataset/workflow.py +++ b/pctasks/dataset/pctasks/dataset/workflow.py @@ -314,7 +314,7 @@ def modify_for_update(workflow_definition: WorkflowDefinition) -> WorkflowDefini 4. The ``process-chunk`` task will be modified to include the ``since`` value in the in the chunk file prefix at ``item_chunkset_uri``. """ - workflow_definition = workflow_definition.copy(deep=True) + workflow_definition = workflow_definition.model_copy(deep=True) if workflow_definition.args is None: workflow_definition.args = ["since"] else: diff --git a/pctasks/dataset/requirements.txt b/pctasks/dataset/requirements.txt index a51afe646..85bf6a9e9 100644 --- a/pctasks/dataset/requirements.txt +++ b/pctasks/dataset/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -193,11 +196,18 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-client # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pygments==2.17.2 # via rich @@ -227,7 +237,7 @@ python-dateutil==2.8.2 python-dotenv==1.0.1 # via # planetary-computer - # pydantic + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -264,7 +274,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -272,6 +282,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/dataset/tests/chunks/test_task.py b/pctasks/dataset/tests/chunks/test_task.py index e579e56fd..1062d7d01 100644 --- a/pctasks/dataset/tests/chunks/test_task.py +++ b/pctasks/dataset/tests/chunks/test_task.py @@ -47,7 +47,7 @@ def test_task(): task_result = run_test_task(args.dict(), task_path) assert isinstance(task_result, CompletedTaskResult) - result = ChunksOutput.parse_obj(task_result.output) + result = ChunksOutput.model_validate(task_result.output) test_asset_folder = str(TEST_ASSETS_PATH).strip("/") @@ -119,7 +119,7 @@ def test_naip_since_date(): ) assert isinstance(task_result, CompletedTaskResult) - result = ChunksOutput.parse_obj(task_result.output) + result = ChunksOutput.model_validate(task_result.output) assert len(result.chunks) == 12 for chunk in result.chunks: @@ -150,7 +150,7 @@ def test_task_simple_assets() -> None: assert isinstance(task_result, CompletedTaskResult) - result = ChunksOutput.parse_obj(task_result.output) + result = ChunksOutput.model_validate(task_result.output) print(result.to_json(indent=2)) for chunk in result.chunks: print(chunk.uri) @@ -185,7 +185,7 @@ def test_task_list_folders(): task_result = run_test_task(args.dict(), task_path) assert isinstance(task_result, CompletedTaskResult) - result = ChunksOutput.parse_obj(task_result.output) + result = ChunksOutput.model_validate(task_result.output) test_asset_folder = str(TEST_ASSETS_PATH.parent / "simple-assets").strip("/") diff --git a/pctasks/dataset/tests/items/test_task.py b/pctasks/dataset/tests/items/test_task.py index 34c1d4e25..cd6990a2b 100644 --- a/pctasks/dataset/tests/items/test_task.py +++ b/pctasks/dataset/tests/items/test_task.py @@ -78,7 +78,7 @@ def test_create_items(): task_result = run_test_task(args.dict(), TASK_PATH) assert isinstance(task_result, CompletedTaskResult) - result = CreateItemsOutput.parse_obj(task_result.output) + result = CreateItemsOutput.model_validate(task_result.output) ndjson_uri = result.ndjson_uri assert ndjson_uri assert Path(ndjson_uri).exists() diff --git a/pctasks/dataset/tests/test_dataset.py b/pctasks/dataset/tests/test_dataset.py index 35712cc70..90a2fb5d9 100644 --- a/pctasks/dataset/tests/test_dataset.py +++ b/pctasks/dataset/tests/test_dataset.py @@ -100,10 +100,10 @@ def test_process_items_is_update_workflow(tmp_path, has_args, extra_uri) -> None if extra_uri: asset_storage = ds_config.collections[0].asset_storage - asset_storage.append(asset_storage[0].copy()) + asset_storage.append(asset_storage[0].model_copy()) if not has_args: - ds_config = ds_config.copy(update={"args": None}) + ds_config = ds_config.model_copy(update={"args": None}) assert ds_config.args is None collection_config = ds_config.collections[0] diff --git a/pctasks/dataset/tests/test_streaming_create_items.py b/pctasks/dataset/tests/test_streaming_create_items.py index 6814f26b0..bfb81e177 100644 --- a/pctasks/dataset/tests/test_streaming_create_items.py +++ b/pctasks/dataset/tests/test_streaming_create_items.py @@ -190,7 +190,7 @@ def test_streaming_create_items_rewrite_url(monkeypatch, storage_event): url = root_storage.get_url("data/item.json") storage_event["data"]["url"] = url - message_data = StorageEventData.parse_obj(storage_event["data"]) + message_data = StorageEventData.model_validate(storage_event["data"]) assert url.startswith(f"http://{host}:{port}") diff --git a/pctasks/dev/pctasks/dev/task.py b/pctasks/dev/pctasks/dev/task.py index 4d27f996c..5cdd6ab2e 100644 --- a/pctasks/dev/pctasks/dev/task.py +++ b/pctasks/dev/pctasks/dev/task.py @@ -1,6 +1,8 @@ import os from typing import List, Optional, Union +from pydantic import Field + from pctasks.core.models.base import PCBaseModel from pctasks.core.models.task import FailedTaskResult, WaitTaskResult from pctasks.task.context import TaskContext @@ -20,7 +22,7 @@ class TestTaskInput(PCBaseModel): uri: Optional[str] = None check_exists_uri: Optional[str] = None output_dir: str - options = TestTaskOptions() + options: TestTaskOptions = Field(default=TestTaskOptions()) class TestTaskOutput(PCBaseModel): @@ -55,7 +57,7 @@ def run( raise TestTaskError(f"Input file {input.uri} does not exist.") try: - history = TestTaskAsset.parse_obj( + history = TestTaskAsset.model_validate( input_storage.read_json(input_path) ).history except: diff --git a/pctasks/dev/requirements.txt b/pctasks/dev/requirements.txt index ea7b2e31d..0ca8245b3 100644 --- a/pctasks/dev/requirements.txt +++ b/pctasks/dev/requirements.txt @@ -9,6 +9,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple argo-workflows==6.3.10 # via pctasks-run # from https://pypi.org/simple @@ -244,11 +247,18 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-client # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pygments==2.17.2 # via rich @@ -283,7 +293,7 @@ python-dateutil==2.8.2 python-dotenv==1.0.1 # via # planetary-computer - # pydantic + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -333,7 +343,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -342,6 +352,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via diff --git a/pctasks/ingest/pctasks/ingest/models.py b/pctasks/ingest/pctasks/ingest/models.py index 6270a4764..51c10a2a7 100644 --- a/pctasks/ingest/pctasks/ingest/models.py +++ b/pctasks/ingest/pctasks/ingest/models.py @@ -1,6 +1,7 @@ -from typing import Any, Dict, List, Optional, Union +from typing import Annotated, Any, Dict, List, Optional, Union -from pydantic import Field, validator +from pydantic import Discriminator, Field, Tag, model_validator +from typing_extensions import Self from pctasks.core.models.base import PCBaseModel from pctasks.core.models.event import STACCollectionEventType, STACItemEventType @@ -31,27 +32,40 @@ class NdjsonFolder(PCBaseModel): class IngestNdjsonInput(PCBaseModel): - type: str = Field(default=NDJSON_MESSAGE_TYPE, const=True) + type: str = Field(default=NDJSON_MESSAGE_TYPE, frozen=True) uris: Optional[Union[str, List[str]]] = None ndjson_folder: Optional[NdjsonFolder] = None - @validator("ndjson_folder") - def _validate_ndjson_folder( - cls, v: Optional[NdjsonFolder], values: Dict[str, Any] - ) -> Optional[NdjsonFolder]: - if v is None: - if values["uris"] is None: - raise ValueError("Either ndjson_folder or uris must be provided.") - return v + @model_validator(mode="after") + def _validate_ndjson_folder(self) -> Self: + if self.ndjson_folder is None and self.uris is None: + raise ValueError("Either ndjson_folder or uris must be provided.") + return self class IngestCollectionsInput(PCBaseModel): - type: str = Field(default=COLLECTIONS_MESSAGE_TYPE, const=True) + type: str = Field(default=COLLECTIONS_MESSAGE_TYPE, frozen=True) collections: List[Dict[str, Any]] +def _get_discriminator_tag(v: Any) -> str: + if isinstance(v, IngestNdjsonInput): + return NDJSON_MESSAGE_TYPE + elif isinstance(v, IngestCollectionsInput): + return COLLECTIONS_MESSAGE_TYPE + else: + return "Any" + + class IngestTaskInput(PCBaseModel): - content: Union[IngestNdjsonInput, IngestCollectionsInput, Dict[str, Any]] + content: Annotated[ + Union[ + Annotated[IngestNdjsonInput, Tag(NDJSON_MESSAGE_TYPE)], + Annotated[IngestCollectionsInput, Tag(COLLECTIONS_MESSAGE_TYPE)], + Annotated[Dict[str, Any], Tag("Any")], + ], + Discriminator(_get_discriminator_tag), + ] """The content of the message. Can be a STAC Collection or Item JSON dict, or a NdjsonMessageData object. @@ -80,11 +94,11 @@ class IngestTaskOutput(PCBaseModel): items: Optional[List[ItemIngestTaskOutput]] = None """List of items created by the ingest task.""" - @validator("items", always=True) - def _validate_items(cls, v: Any, values: Dict[str, Any]) -> Any: - if not v and values["collections"] is None and not values["bulk_load"]: + @model_validator(mode="after") + def _validate_items(self) -> Any: + if not self.items and self.collections is None and not self.bulk_load: raise ValueError("Must supply either collections or items") - return v + return self class IngestTaskConfig(TaskDefinition): diff --git a/pctasks/ingest/requirements.txt b/pctasks/ingest/requirements.txt index 566d9b3d3..9571ee92e 100644 --- a/pctasks/ingest/requirements.txt +++ b/pctasks/ingest/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -184,11 +187,18 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-client # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pygments==2.17.2 # via rich @@ -218,7 +228,7 @@ python-dateutil==2.8.2 python-dotenv==1.0.1 # via # planetary-computer - # pydantic + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -255,7 +265,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -263,6 +273,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/ingest/tests/test_collection.py b/pctasks/ingest/tests/test_collection.py index c22a3e981..625a804a5 100644 --- a/pctasks/ingest/tests/test_collection.py +++ b/pctasks/ingest/tests/test_collection.py @@ -17,8 +17,7 @@ def test_collection_ser() -> None: collection = json.load(f) task = IngestTaskConfig.from_collection(collection=collection, target="staging") - - input = IngestTaskInput.parse_obj(task.args) + input = IngestTaskInput.model_validate(task.args) ser_collection = input.content assert collection == ser_collection @@ -28,7 +27,7 @@ def test_goes_coll_deser() -> None: workflow = WorkflowDefinition.from_yaml(f.read()) task = workflow.jobs["ingest-collection"].tasks[0] - input = IngestTaskInput.parse_obj(task.args) + input = IngestTaskInput.model_validate(task.args) assert isinstance(input.content, dict) collection_dict: Dict[str, Any] = input.content collection = pystac.Collection.from_dict(collection_dict) diff --git a/pctasks/ingest_task/pyproject.toml b/pctasks/ingest_task/pyproject.toml index a99693e3b..409cef5a0 100644 --- a/pctasks/ingest_task/pyproject.toml +++ b/pctasks/ingest_task/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "pctasks.ingest @ {root:parent:uri}/ingest", "pctasks.task @ {root:parent:uri}/task", "plpygis==0.2.0", - "pypgstac[psycopg]==0.7.10", + "pypgstac[psycopg]>=0.8.5,<0.9", "pystac>=1.0.0,<2", ] diff --git a/pctasks/ingest_task/requirements.txt b/pctasks/ingest_task/requirements.txt index 69ab7b28e..6b7619fa1 100644 --- a/pctasks/ingest_task/requirements.txt +++ b/pctasks/ingest_task/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -215,20 +218,27 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-client # pctasks-core # planetary-computer + # pydantic-settings # pypgstac # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core + # from https://pypi.org/simple pygments==2.17.2 # via rich # from https://pypi.org/simple pyjwt==2.8.0 # via msal # from https://pypi.org/simple -pypgstac==0.7.10 +pypgstac==0.8.6 # via pctasks-ingest-task (./pctasks/ingest_task/pyproject.toml) # from https://pypi.org/simple pyrsistent==0.20.0 @@ -255,7 +265,7 @@ python-dateutil==2.8.2 python-dotenv==1.0.1 # via # planetary-computer - # pydantic + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -302,7 +312,7 @@ tenacity==8.1.0 termcolor==2.4.0 # via fire # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -312,6 +322,7 @@ typing-extensions==4.11.0 # psycopg # psycopg-pool # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/notify/requirements.txt b/pctasks/notify/requirements.txt index e6643b150..442690a48 100644 --- a/pctasks/notify/requirements.txt +++ b/pctasks/notify/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -162,10 +165,17 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pyjwt==2.8.0 # via msal @@ -190,7 +200,9 @@ python-dateutil==2.8.2 # strictyaml # from https://pypi.org/simple python-dotenv==1.0.1 - # via planetary-computer + # via + # planetary-computer + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -224,7 +236,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -232,6 +244,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/router/pctasks/router/handlers/eventgrid.py b/pctasks/router/pctasks/router/handlers/eventgrid.py index ec4e53155..9f2fc6a28 100644 --- a/pctasks/router/pctasks/router/handlers/eventgrid.py +++ b/pctasks/router/pctasks/router/handlers/eventgrid.py @@ -43,5 +43,5 @@ def handle_blob_event(event: CloudEvent) -> bool: class EventGridMessageHandler(MessageHandler): def handle(self, message: Dict[str, Any]) -> None: - event = CloudEvent.parse_obj(message) + event = CloudEvent.model_validate(message) handle_blob_event(event) diff --git a/pctasks/router/requirements.txt b/pctasks/router/requirements.txt index 7992a0186..868cfa709 100644 --- a/pctasks/router/requirements.txt +++ b/pctasks/router/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -162,10 +165,17 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pyjwt==2.8.0 # via msal @@ -190,7 +200,9 @@ python-dateutil==2.8.2 # strictyaml # from https://pypi.org/simple python-dotenv==1.0.1 - # via planetary-computer + # via + # planetary-computer + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -224,7 +236,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -232,6 +244,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/run/pctasks/run/models.py b/pctasks/run/pctasks/run/models.py index 73bec651b..914f82fba 100644 --- a/pctasks/run/pctasks/run/models.py +++ b/pctasks/run/pctasks/run/models.py @@ -33,7 +33,7 @@ class TaskSubmitMessage(PCBaseModel): related_tasks: Optional[List[Tuple[str, str]]] = None tokens: Optional[Dict[str, StorageAccountTokens]] = None wait_retries: int = 0 - type: str = Field(default=TASK_SUBMIT_MESSAGE_TYPE, const=True) + type: str = Field(default=TASK_SUBMIT_MESSAGE_TYPE, frozen=True) def get_run_record_id(self) -> RunRecordId: return RunRecordId( diff --git a/pctasks/run/pctasks/run/settings.py b/pctasks/run/pctasks/run/settings.py index 1afb3a6b4..4e5f7917b 100644 --- a/pctasks/run/pctasks/run/settings.py +++ b/pctasks/run/pctasks/run/settings.py @@ -1,9 +1,10 @@ from enum import Enum from pathlib import Path -from typing import Any, Dict, Optional, Union +from typing import Optional, Union from urllib.parse import urlparse -from pydantic import validator +from pydantic import Field, model_validator +from typing_extensions import Self from pctasks.core.constants import ( DEFAULT_CODE_CONTAINER, @@ -44,7 +45,7 @@ def get_batch_name(self) -> str: class NotificationQueueConnStrConfig(QueueConnStrConfig): - queue_name: str = DEFAULT_NOTIFICATIONS_QUEUE_NAME + queue_name: str = Field(DEFAULT_NOTIFICATIONS_QUEUE_NAME) class RunSettings(PCTasksSettings): @@ -64,7 +65,7 @@ def section_name(cls) -> str: local_dev_endpoints_url: Optional[str] = None local_secrets: bool = False - notification_queue: NotificationQueueConnStrConfig + notification_queue: NotificationQueueConnStrConfig = Field() # Tables tables_account_url: str @@ -135,62 +136,56 @@ def batch_settings(self) -> BatchSettings: submit_threads=self.batch_submit_threads, ) - @validator("keyvault_url", always=True) - def keyvault_url_validator( - cls, v: Optional[str], values: Dict[str, Any] - ) -> Optional[str]: - if not values.get("local_secrets"): - if not v: + @model_validator(mode="after") + def keyvault_url_validator(self) -> Self: + if not self.local_secrets: + if not self.keyvault_url: raise ValueError("Must specify keyvault_url.") - return v - - @validator("task_runner_type", always=True) - def _task_runner_type_validator( - cls, v: TaskRunnerType, values: Dict[str, Any] - ) -> TaskRunnerType: - if v == TaskRunnerType.LOCAL: - if values.get("local_dev_endpoints_url") is None: + return self + + @model_validator(mode="after") + def _task_runner_type_validator(self) -> Self: + if self.task_runner_type == TaskRunnerType.LOCAL: + if self.local_dev_endpoints_url is None: raise ValueError( "Must specify local_dev_endpoints_url for local remote runner type." ) - if v == TaskRunnerType.ARGO: - if values.get("argo_host") is None: + if self.task_runner_type == TaskRunnerType.ARGO: + if self.argo_host is None: raise ValueError("Must specify argo_host for argo remote runner type.") - if values.get("argo_token") is None: + if self.argo_token is None: raise ValueError("Must specify argo_token for argo remote runner type.") - if v == TaskRunnerType.BATCH: - if values.get("batch_url") is None: + if self.task_runner_type == TaskRunnerType.BATCH: + if self.batch_url is None: raise ValueError("Must specify batch_url for batch remote runner type.") - if values.get("batch_key") is None: + if self.batch_key is None: raise ValueError("Must specify batch_key for batch remote runner type.") - if values.get("batch_default_pool_id") is None: + if self.batch_default_pool_id is None: raise ValueError( "Must specify batch_default_pool_id for batch remote runner type." ) - return v + return self - @validator("workflow_runner_type", always=True) - def _workflow_runner_type_validator( - cls, v: WorkflowRunnerType, values: Dict[str, Any] - ) -> WorkflowRunnerType: - if v == WorkflowRunnerType.ARGO: - if values.get("argo_host") is None: + @model_validator(mode="after") + def _workflow_runner_type_validator(self) -> Self: + if self.workflow_runner_type == WorkflowRunnerType.ARGO: + if self.argo_host is None: raise ValueError( "Must specify argo_host for argo workflow runner type." ) - if values.get("argo_token") is None: + if self.argo_token is None: raise ValueError( "Must specify argo_token for argo workflow runner type." ) - if values.get("workflow_runner_image") is None: + if self.workflow_runner_image is None: raise ValueError( "Must specify workflow_runner_image " "for argo workflow runner type." ) - return v + return self # Don't cache tables; executor is not thread-safe diff --git a/pctasks/run/pctasks/run/task/batch.py b/pctasks/run/pctasks/run/task/batch.py index c1d1c50c9..d47ded47e 100644 --- a/pctasks/run/pctasks/run/task/batch.py +++ b/pctasks/run/pctasks/run/task/batch.py @@ -253,7 +253,7 @@ def get_failed_tasks( for job_id, task_ids in groupby( [ - (BatchTaskId.parse_obj(batch_id), (partition_id, task_id)) + (BatchTaskId.model_validate(batch_id), (partition_id, task_id)) for partition_id, task_map in runner_ids.items() for task_id, batch_id in task_map.items() ], @@ -336,7 +336,7 @@ def poll_task( ) -> TaskPollResult: batch_client = self._get_batch_client() - task_id = BatchTaskId.parse_obj(runner_id) + task_id = BatchTaskId.model_validate(runner_id) task_status_result = batch_client.get_task_status( job_id=task_id.batch_job_id, task_id=task_id.batch_task_id @@ -362,7 +362,7 @@ def poll_task( def cancel_task(self, runner_id: Dict[str, Any]) -> None: batch_client = self._get_batch_client() - task_id = BatchTaskId.parse_obj(runner_id) + task_id = BatchTaskId.model_validate(runner_id) batch_client.terminate_task( job_id=task_id.batch_job_id, task_id=task_id.batch_task_id diff --git a/pctasks/run/pctasks/run/task/local.py b/pctasks/run/pctasks/run/task/local.py index 9742249b6..c474f8d08 100644 --- a/pctasks/run/pctasks/run/task/local.py +++ b/pctasks/run/pctasks/run/task/local.py @@ -85,7 +85,7 @@ def poll_task( self.local_dev_endpoints_url + f"/poll/{runner_id['id']}" ) if resp.status_code == 200: - return TaskPollResult.parse_obj(resp.json()) + return TaskPollResult.model_validate(resp.json()) elif resp.status_code == 404: if previous_poll_count < MAX_MISSING_POLLS: return TaskPollResult(task_status=TaskRunStatus.PENDING) diff --git a/pctasks/run/pctasks/run/workflow/executor/models.py b/pctasks/run/pctasks/run/workflow/executor/models.py index f9df958e9..a11936d09 100644 --- a/pctasks/run/pctasks/run/workflow/executor/models.py +++ b/pctasks/run/pctasks/run/workflow/executor/models.py @@ -390,7 +390,9 @@ def prepare_next_task(self, settings: RunSettings) -> None: ) task_data = self.job_part_submit_msg.job_partition.task_data[task_index] - copied_task = next_task_config.__class__.parse_obj(next_task_config.dict()) + copied_task = next_task_config.__class__.model_validate( + next_task_config.dict() + ) copied_task.args = template_args( copied_task.args, job_outputs=self.job_part_submit_msg.job_outputs, diff --git a/pctasks/run/pctasks/run/workflow/executor/simple.py b/pctasks/run/pctasks/run/workflow/executor/simple.py index 2a920c14b..0cacc0ac1 100644 --- a/pctasks/run/pctasks/run/workflow/executor/simple.py +++ b/pctasks/run/pctasks/run/workflow/executor/simple.py @@ -128,7 +128,7 @@ def run_job( task_outputs: Dict[str, Any] = {} for task_config in job.tasks: - copied_task = task_config.__class__.parse_obj(task_config.dict()) + copied_task = task_config.__class__.model_validate(task_config.dict()) copied_task.args = template_args( copied_task.args, job_outputs=previous_job_outputs or {}, diff --git a/pctasks/run/pctasks/run/workflow/kubernetes.py b/pctasks/run/pctasks/run/workflow/kubernetes.py index 27a5118ec..dd87d5a7c 100644 --- a/pctasks/run/pctasks/run/workflow/kubernetes.py +++ b/pctasks/run/pctasks/run/workflow/kubernetes.py @@ -231,7 +231,7 @@ def build_streaming_deployment( env.append(V1EnvVar(name=k, value=str(v))) resources = build_resources( - Resources.parse_obj(task_definition.args["streaming_options"]["resources"]) + Resources.model_validate(task_definition.args["streaming_options"]["resources"]) ) container = V1Container( diff --git a/pctasks/run/requirements.txt b/pctasks/run/requirements.txt index d7a52b4d0..ed72d0a54 100644 --- a/pctasks/run/requirements.txt +++ b/pctasks/run/requirements.txt @@ -9,6 +9,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple argo-workflows==6.3.10 # via pctasks-run (./pctasks/run/pyproject.toml) # from https://pypi.org/simple @@ -225,11 +228,18 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-client # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pygments==2.17.2 # via rich @@ -264,7 +274,7 @@ python-dateutil==2.8.2 python-dotenv==1.0.1 # via # planetary-computer - # pydantic + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -314,7 +324,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -323,6 +333,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via diff --git a/pctasks/run/tests/workflow/test_remote.py b/pctasks/run/tests/workflow/test_remote.py index aa460a171..cb9395d13 100644 --- a/pctasks/run/tests/workflow/test_remote.py +++ b/pctasks/run/tests/workflow/test_remote.py @@ -37,7 +37,7 @@ def run_workflow( ) run_settings = RunSettings.get() - run_settings = run_settings.copy(deep=True) + run_settings = run_settings.model_copy(deep=True) run_settings.task_poll_seconds = 5 run_settings.max_concurrent_workflow_tasks = 5 run_settings.remote_runner_threads = 2 diff --git a/pctasks/server/pctasks/server/settings.py b/pctasks/server/pctasks/server/settings.py index df4add861..38aa31e6d 100644 --- a/pctasks/server/pctasks/server/settings.py +++ b/pctasks/server/pctasks/server/settings.py @@ -1,6 +1,7 @@ -from typing import Any, Dict, Optional +from typing import Optional -from pydantic import validator +from pydantic import model_validator +from typing_extensions import Self from pctasks.core.settings import PCTasksSettings @@ -20,20 +21,16 @@ def section_name(cls) -> str: app_insights_instrumentation_key: Optional[str] = None - @validator("dev_api_key", always=True) - def _dev_api_key_validator( - cls, v: Optional[str], values: Dict[str, Any] - ) -> Optional[str]: - if values.get("dev"): - if not v: + @model_validator(mode="after") + def _dev_api_key_validator(self) -> Self: + if self.dev: + if not self.dev_api_key: raise ValueError("dev_api_key is required when dev is True") - return v - - @validator("dev_auth_token", always=True) - def _dev_auth_token_validator( - cls, v: Optional[str], values: Dict[str, Any] - ) -> Optional[str]: - if values.get("dev"): - if not v: + return self + + @model_validator(mode="after") + def _dev_auth_token_validator(self) -> Self: + if self.dev: + if not self.dev_auth_token: raise ValueError("dev_auth_token is required when dev is True") - return v + return self diff --git a/pctasks/server/requirements.txt b/pctasks/server/requirements.txt index c8f202e7b..5fe4279df 100644 --- a/pctasks/server/requirements.txt +++ b/pctasks/server/requirements.txt @@ -9,6 +9,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple anyio==4.3.0 # via # httpx @@ -285,12 +288,19 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # fastapi # pctasks-client # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pygments==2.17.2 # via rich @@ -333,7 +343,7 @@ python-dateutil==2.8.2 python-dotenv==1.0.1 # via # planetary-computer - # pydantic + # pydantic-settings # from https://pypi.org/simple python-multipart==0.0.7 # via pctasks-server (./pctasks/server/pyproject.toml) @@ -403,7 +413,7 @@ tomli==2.0.1 # coverage # pytest # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # anyio # azure-core @@ -414,6 +424,7 @@ typing-extensions==4.11.0 # azure-storage-queue # fastapi # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via diff --git a/pctasks/server/tests/routes/test_run.py b/pctasks/server/tests/routes/test_run.py index e291b18f7..f443e2e54 100644 --- a/pctasks/server/tests/routes/test_run.py +++ b/pctasks/server/tests/routes/test_run.py @@ -39,5 +39,5 @@ def test_run_workflow_rejects_no_creds(client: TestClient) -> None: def test_run_workflow_invalid_workflow(client: TestClient) -> None: with ignore_ssl_warnings(): response = client.post("/workflows/test-workflow", json={"invalid": "workflow"}) - assert "field required" in response.content.decode("utf-8") + assert "field required" in response.content.decode("utf-8").lower() assert response.status_code == 400 diff --git a/pctasks/task/pctasks/task/streaming.py b/pctasks/task/pctasks/task/streaming.py index 551879cb3..3ac47ac3a 100644 --- a/pctasks/task/pctasks/task/streaming.py +++ b/pctasks/task/pctasks/task/streaming.py @@ -76,8 +76,9 @@ class StreamingTaskOptions(PCBaseModel): allow_spot_instances: bool = False resources: Resources - class Config: - extra = "forbid" + model_config = { + "extra": "forbid", + } class NoOutput(PCBaseModel): diff --git a/pctasks/task/pctasks/task/task.py b/pctasks/task/pctasks/task/task.py index 885742a40..8b9994bec 100644 --- a/pctasks/task/pctasks/task/task.py +++ b/pctasks/task/pctasks/task/task.py @@ -49,7 +49,7 @@ def run( pass def parse_and_run(self, data: Dict[str, Any], context: TaskContext) -> TaskResult: - args = self._input_model.parse_obj(data) + args = self._input_model.model_validate(data) output = self.run(args, context) if isinstance(output, WaitTaskResult): diff --git a/pctasks/task/requirements.txt b/pctasks/task/requirements.txt index a6534616d..f23689755 100644 --- a/pctasks/task/requirements.txt +++ b/pctasks/task/requirements.txt @@ -6,6 +6,9 @@ aiohttp==3.9.5 aiosignal==1.3.1 # via aiohttp # from https://pypi.org/simple +annotated-types==0.7.0 + # via pydantic + # from https://pypi.org/simple async-timeout==4.0.3 # via aiohttp # from https://pypi.org/simple @@ -167,10 +170,17 @@ pyasn1-modules==0.4.0 pycparser==2.22 # via cffi # from https://pypi.org/simple -pydantic==1.10.15 +pydantic==2.10.6 # via # pctasks-core # planetary-computer + # pydantic-settings + # from https://pypi.org/simple +pydantic-core==2.27.2 + # via pydantic + # from https://pypi.org/simple +pydantic-settings==2.8.1 + # via pctasks-core # from https://pypi.org/simple pyjwt==2.8.0 # via msal @@ -195,7 +205,9 @@ python-dateutil==2.8.2 # strictyaml # from https://pypi.org/simple python-dotenv==1.0.1 - # via planetary-computer + # via + # planetary-computer + # pydantic-settings # from https://pypi.org/simple pytz==2024.1 # via planetary-computer @@ -229,7 +241,7 @@ stac-validator==3.3.2 strictyaml==1.7.3 # via pctasks-core # from https://pypi.org/simple -typing-extensions==4.11.0 +typing-extensions==4.12.2 # via # azure-core # azure-cosmos @@ -237,6 +249,7 @@ typing-extensions==4.11.0 # azure-storage-blob # azure-storage-queue # pydantic + # pydantic-core # from https://pypi.org/simple urllib3==2.2.1 # via requests diff --git a/pctasks/task/tests/common/test_list_files.py b/pctasks/task/tests/common/test_list_files.py index b22441378..2b68f9e5d 100644 --- a/pctasks/task/tests/common/test_list_files.py +++ b/pctasks/task/tests/common/test_list_files.py @@ -17,7 +17,7 @@ def test_list_files_local() -> None: assert isinstance(task_result, CompletedTaskResult) - output = ListFilesOutput.parse_obj(task_result.output) + output = ListFilesOutput.model_validate(task_result.output) print(output.uris) @@ -42,6 +42,6 @@ def test_list_files_blob() -> None: assert isinstance(task_result, CompletedTaskResult) - output = ListFilesOutput.parse_obj(task_result.output) + output = ListFilesOutput.model_validate(task_result.output) assert set(output.uris) == expected diff --git a/pctasks/task/tests/common/test_list_prefixes.py b/pctasks/task/tests/common/test_list_prefixes.py index 31b6cd754..0e6bfd02a 100644 --- a/pctasks/task/tests/common/test_list_prefixes.py +++ b/pctasks/task/tests/common/test_list_prefixes.py @@ -21,7 +21,7 @@ def test_list_prefixes_local() -> None: assert isinstance(task_result, CompletedTaskResult) - output = ListPrefixesOutput.parse_obj(task_result.output) + output = ListPrefixesOutput.model_validate(task_result.output) print(output.uris) @@ -46,7 +46,7 @@ def test_list_files_blob() -> None: assert isinstance(task_result, CompletedTaskResult) - output = ListPrefixesOutput.parse_obj(task_result.output) + output = ListPrefixesOutput.model_validate(task_result.output) expected = [ storage.get_uri(x) for x in os.listdir(TEST_DIR) if (TEST_DIR / x).is_dir() diff --git a/pctasks/task/tests/common/test_summarize.py b/pctasks/task/tests/common/test_summarize.py index 685b60533..8a748ef9e 100644 --- a/pctasks/task/tests/common/test_summarize.py +++ b/pctasks/task/tests/common/test_summarize.py @@ -31,7 +31,7 @@ def test_summarize_jsons() -> None: prefix_input = ListPrefixesInput(src_uri=storage.get_uri(), depth=1) prefix_task_result = run_test_task(prefix_input.dict(), LIST_PREFIXES_TASK_PATH) assert isinstance(prefix_task_result, CompletedTaskResult) - prefix_output = ListPrefixesOutput.parse_obj(prefix_task_result.output) + prefix_output = ListPrefixesOutput.model_validate(prefix_task_result.output) map_outputs: List[SummarizeOutput] = [] for prefix_uri in prefix_output.uris: @@ -40,18 +40,18 @@ def test_summarize_jsons() -> None: list_files_input.dict(), LIST_FILES_TASK_PATH ) assert isinstance(files_task_result, CompletedTaskResult) - files_output = ListFilesOutput.parse_obj(files_task_result.output) + files_output = ListFilesOutput.model_validate(files_task_result.output) map_input = SummarizeMapInput(uris=files_output.uris) map_task_result = run_test_task(map_input.dict(), MAP_TASK_PATH) assert isinstance(map_task_result, CompletedTaskResult) - map_output = SummarizeOutput.parse_obj(map_task_result.output) + map_output = SummarizeOutput.model_validate(map_task_result.output) map_outputs.append(map_output) reduce_input = SummarizeReduceInput(summaries=[x.summary for x in map_outputs]) reduce_task_result = run_test_task(reduce_input.dict(), REDUCE_TASK_PATH) assert isinstance(reduce_task_result, CompletedTaskResult) - reduce_output = SummarizeOutput.parse_obj(reduce_task_result.output) + reduce_output = SummarizeOutput.model_validate(reduce_task_result.output) summary = reduce_output.summary.dict() assert set( diff --git a/pctasks/task/tests/common/test_write.py b/pctasks/task/tests/common/test_write.py index cdccbf042..84a42173b 100644 --- a/pctasks/task/tests/common/test_write.py +++ b/pctasks/task/tests/common/test_write.py @@ -9,7 +9,7 @@ def test_write_text() -> None: input = WriteInput(content="hello world", uri=storage.get_uri("test1.txt")) task_result = run_test_task(input.dict(), TASK_PATH) assert isinstance(task_result, CompletedTaskResult) - output = WriteOutput.parse_obj(task_result.output) + output = WriteOutput.model_validate(task_result.output) content = storage.read_text(storage.get_path(output.uri)) assert content == input.content @@ -22,7 +22,7 @@ def test_write_json() -> None: ) task_result = run_test_task(input.dict(), TASK_PATH) assert isinstance(task_result, CompletedTaskResult) - output = WriteOutput.parse_obj(task_result.output) + output = WriteOutput.model_validate(task_result.output) content = storage.read_json(storage.get_path(output.uri)) assert content == input.content @@ -35,6 +35,6 @@ def test_write_list() -> None: ) task_result = run_test_task(input.dict(), TASK_PATH) assert isinstance(task_result, CompletedTaskResult) - output = WriteOutput.parse_obj(task_result.output) + output = WriteOutput.model_validate(task_result.output) content = storage.read_json(storage.get_path(output.uri)) assert content == input.content diff --git a/pctasks_funcs/StorageEventsQueue/__init__.py b/pctasks_funcs/StorageEventsQueue/__init__.py index 9e948987e..a98d2e5bd 100644 --- a/pctasks_funcs/StorageEventsQueue/__init__.py +++ b/pctasks_funcs/StorageEventsQueue/__init__.py @@ -14,7 +14,7 @@ # TODO: use async def main(msg: func.QueueMessage) -> None: body = msg.get_body().decode("utf-8") - event = StorageEventRecord.parse_raw(body) + event = StorageEventRecord.model_validate_json(body) with StorageEventsContainer(StorageEventRecord) as cosmos_client: cosmos_client.put(event) diff --git a/pctasks_funcs/WorkflowRunsCF/__init__.py b/pctasks_funcs/WorkflowRunsCF/__init__.py index dea6ce5b2..13ec23db1 100644 --- a/pctasks_funcs/WorkflowRunsCF/__init__.py +++ b/pctasks_funcs/WorkflowRunsCF/__init__.py @@ -22,7 +22,7 @@ async def main(container: func.DocumentList) -> None: async def handle_workflow_run(data: Dict[str, Any]) -> None: """Handle a workflow run record.""" - record = WorkflowRunRecord.parse_obj(data) + record = WorkflowRunRecord.model_validate(data) async with AsyncWorkflowsContainer(WorkflowRunRecord) as container: await container.put(record) diff --git a/pctasks_funcs/WorkflowsCF/__init__.py b/pctasks_funcs/WorkflowsCF/__init__.py index 4d3176e47..65fc76f21 100644 --- a/pctasks_funcs/WorkflowsCF/__init__.py +++ b/pctasks_funcs/WorkflowsCF/__init__.py @@ -22,7 +22,7 @@ async def main(container: func.DocumentList) -> None: async def handle_workflow(data: Dict[str, Any]) -> None: """Handle a workflow record.""" - record = WorkflowRecord.parse_obj(data) + record = WorkflowRecord.model_validate(data) async with AsyncRecordsContainer(WorkflowRecord) as container: await container.put(record)