From 7faa27a78cf6ca50b1a13c4abac8481e832c01a1 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 5 Jun 2026 16:45:55 +0200 Subject: [PATCH 01/10] docs: add guide on validating Actor input with Pydantic --- docs/01_introduction/quick-start.mdx | 1 + docs/02_concepts/02_actor_input.mdx | 4 + docs/03_guides/10_pydantic.mdx | 119 +++++++++++++++++++++++++++ docs/03_guides/code/10_pydantic.py | 72 ++++++++++++++++ 4 files changed, 196 insertions(+) create mode 100644 docs/03_guides/10_pydantic.mdx create mode 100644 docs/03_guides/code/10_pydantic.py diff --git a/docs/01_introduction/quick-start.mdx b/docs/01_introduction/quick-start.mdx index da166da96..e0d2e641e 100644 --- a/docs/01_introduction/quick-start.mdx +++ b/docs/01_introduction/quick-start.mdx @@ -106,3 +106,4 @@ To see how you can integrate the Apify SDK with popular web scraping libraries, - [Crawlee](../guides/crawlee) - [Scrapy](../guides/scrapy) - [Running webserver](../guides/running-webserver) +- [Validate Actor input with Pydantic](../guides/input-validation) diff --git a/docs/02_concepts/02_actor_input.mdx b/docs/02_concepts/02_actor_input.mdx index 15807c05d..f975e6ae3 100644 --- a/docs/02_concepts/02_actor_input.mdx +++ b/docs/02_concepts/02_actor_input.mdx @@ -20,6 +20,10 @@ For example, if an Actor received a JSON input with two fields, `{ "firstNumber" {InputExample} +## Validating input + +Reading values straight out of the raw input dictionary works for simple cases, but it gives you no type guarantees, no constraint checks, and no clear error when the input is malformed. For anything beyond a couple of fields, validate the input with [Pydantic](https://docs.pydantic.dev/) so your code works with a typed, guaranteed-valid object instead. See the [Validate Actor input with Pydantic](../guides/input-validation) guide for the recommended approach. + ## Loading URLs from Actor input Actors commonly receive a list of URLs to process via their input. The `ApifyRequestList` class (from `apify.request_loaders`) can parse the standard Apify input format for URL sources. It supports both direct URL objects (`{"url": "https://example.com"}`) and remote URL lists (`{"requestsFromUrl": "https://example.com/urls.txt"}`), where the remote file contains one URL per line. diff --git a/docs/03_guides/10_pydantic.mdx b/docs/03_guides/10_pydantic.mdx new file mode 100644 index 000000000..19a20c85b --- /dev/null +++ b/docs/03_guides/10_pydantic.mdx @@ -0,0 +1,119 @@ +--- +id: input-validation +title: Validate Actor input with Pydantic +description: Parse, validate, and type your Actor's input with Pydantic models instead of reaching into a raw dictionary. +--- + +import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; +import ApiLink from '@theme/ApiLink'; + +import PydanticExample from '!!raw-loader!roa-loader!./code/10_pydantic.py'; + +In this guide, you'll learn how to validate your Apify Actor's input with [Pydantic](https://docs.pydantic.dev/), so that your code works with a typed, guaranteed-valid object instead of a raw dictionary. + +## Introduction + +An Actor reads its input with `Actor.get_input`, which returns the input record as a plain `dict` (or `None` when there's no input). Working with that dictionary directly is fragile: + +```python +actor_input = await Actor.get_input() or {} +search_terms = actor_input.get('searchTerms', []) +max_results = actor_input.get('maxResults', 10) +``` + +- There are no type guarantees - `max_results` could just as easily arrive as the string `"10"` or `None`, and you'd only find out when something blows up later. +- There's no validation - nothing stops `max_results` from being `0` or `-5`, or `search_terms` from being empty. +- A typo in a key (`maxResult` instead of `maxResults`) silently falls back to the default instead of failing. +- Defaults are scattered across the codebase, and your editor can't autocomplete the fields or catch mistakes. + +[Pydantic](https://docs.pydantic.dev/) solves all of this. You declare the shape of your input once as a model, and Pydantic parses the raw dictionary into a typed object, applying defaults, enforcing constraints, and producing clear error messages when the input doesn't match. Pydantic is already a dependency of the Apify SDK, so there's nothing extra to install. + +## Example Actor + +The following Actor declares its input as a Pydantic `BaseModel`, validates the raw input against it, and then works with a fully typed object. On invalid input it fails fast with a readable error; on valid input it logs the normalized values and stores them as the Actor's output. + + + {PydanticExample} + + +A few things worth pointing out about the **model**: + +- **Aliases bridge the naming conventions.** Apify input fields are conventionally `camelCase` (`maxResults`), while Python attributes are `snake_case` (`max_results`). `Field(alias='maxResults')` maps one to the other, and `populate_by_name=True` lets the model accept either spelling - handy in tests. +- **Defaults and `required` fields are explicit.** A field without a default (`search_terms`) is required; one with a default (`max_results`) is optional. There's a single, obvious place where every default lives. +- **Constraints are declarative.** `ge=1, le=100` enforces a numeric range, `min_length=1` rejects an empty list, and `Literal['json', 'csv']` restricts a field to a fixed set of choices - mirroring an `enum` in the input schema. +- **Custom validators handle the rest.** The `field_validator` normalizes the search terms (trimming whitespace, dropping empties) and rejects input that has nothing left, so the rest of your code never has to repeat those checks. +- **Unknown fields are ignored.** `extra='ignore'` means adding a new field to your input schema won't break an older Actor build that doesn't know about it yet. Use `extra='forbid'` instead if you'd rather reject anything unexpected. + +And about the **validation** itself: + +- `model_validate` parses the raw dictionary into a typed `ActorInput` instance, filling in defaults and guaranteeing every field is valid - or raising a `ValidationError` describing every problem at once. +- Catching that error, logging a readable summary, and re-raising makes the Actor **fail fast** with a clear explanation right at the start, rather than crashing with an obscure error somewhere deep in the run. Because the body runs inside `async with Actor:`, the re-raised exception automatically marks the run as `FAILED`. +- The error messages refer to the fields by their input-schema aliases. For invalid input like `{"searchTerms": [], "maxResults": 999, "outputFormat": "xml"}`, the log shows exactly what's wrong: + + ```text + The Actor input is invalid: + 3 validation errors for ActorInput + searchTerms + List should have at least 1 item after validation, not 0 ... + maxResults + Input should be less than or equal to 100 ... + outputFormat + Input should be 'json' or 'csv' ... + ``` + +Once validation passes, the rest of `main` works with `actor_input.search_terms`, `actor_input.max_results`, and `actor_input.output_format` - all correctly typed, with editor autocompletion and static type checking. + +## Relationship to the input schema + +Pydantic validation **complements** the Actor's [input schema](https://docs.apify.com/platform/actors/development/input-schema) (`.actor/input_schema.json`) - it doesn't replace it. The two serve different layers: + +- The **input schema** drives the Apify Console form, documents the fields for your users, and lets the platform validate input before the run even starts. Keep declaring your fields there. +- The **Pydantic model** validates the input again *inside your Python code*, where it gives you a typed object, IDE support, and richer rules (normalization, cross-field checks, custom formats) that the input schema can't express. It's also your safety net for runs started programmatically by [another Actor](../concepts/interacting-with-other-actors) or executed [locally](https://docs.apify.com/cli/docs/reference#apify-run), and for keeping the two definitions honest with each other. + +Keep the model's aliases in sync with the field keys in `input_schema.json`, and the two definitions describe the same input from both sides. + +## Useful validation features + +Pydantic offers much more than the example uses. A few features that come up often when validating Actor input: + +- **Format-validated types** for common string formats, for example `HttpUrl` for URLs or `EmailStr` for e-mail addresses (the latter needs the `pydantic[email]` extra): + + ```python + from pydantic import BaseModel, HttpUrl + + class ActorInput(BaseModel): + target_url: HttpUrl + ``` + +- **Cross-field validation** with `model_validator`, when one field's validity depends on another: + + ```python + from pydantic import BaseModel, model_validator + from typing_extensions import Self + + class ActorInput(BaseModel): + min_price: int = 0 + max_price: int = 100 + + @model_validator(mode='after') + def _check_range(self) -> Self: + if self.min_price > self.max_price: + raise ValueError('min_price must not exceed max_price') + return self + ``` + +- **Secret input fields.** The platform decrypts [secret input fields](https://docs.apify.com/platform/actors/development/secret-input) for you before `Actor.get_input` returns, so you receive plaintext. Wrap such fields in Pydantic's `SecretStr` to keep them from leaking into logs or `model_dump()` output. + +For the full set of types, constraints, and validators, see the [Pydantic documentation](https://docs.pydantic.dev/latest/concepts/models/). + +## Conclusion + +In this guide, you learned how to validate Actor input with Pydantic: declaring the input as a model with aliases, defaults, and constraints; parsing the raw input with `model_validate`; failing fast with a readable error when the input is invalid; and working with a typed object for the rest of the run. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own Actors. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy validating! + +## Additional resources + +- [Pydantic: Official documentation](https://docs.pydantic.dev/) +- [Pydantic: Models](https://docs.pydantic.dev/latest/concepts/models/) +- [Pydantic: Validators](https://docs.pydantic.dev/latest/concepts/validators/) +- [Apify: Actor input](https://docs.apify.com/platform/actors/running/input) +- [Apify: Input schema specification](https://docs.apify.com/platform/actors/development/input-schema) diff --git a/docs/03_guides/code/10_pydantic.py b/docs/03_guides/code/10_pydantic.py new file mode 100644 index 000000000..e836fea9a --- /dev/null +++ b/docs/03_guides/code/10_pydantic.py @@ -0,0 +1,72 @@ +import asyncio +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator + +from apify import Actor + + +class ActorInput(BaseModel): + """Typed and validated representation of the Actor input. + + The field names follow Python's `snake_case`, while the aliases match the + `camelCase` keys produced by the Apify input schema editor. With + `populate_by_name`, the model accepts either form, and unknown fields are + ignored (`extra='ignore'`) so that adding a field to the input schema never + breaks an older Actor build. + """ + + model_config = ConfigDict(populate_by_name=True, extra='ignore') + + # Required: a non-empty list of search terms. The validator below trims + # each entry and drops the empty ones. + search_terms: list[str] = Field(alias='searchTerms', min_length=1) + + # Optional: defaults to 10 and must fall within the inclusive 1-100 range. + max_results: int = Field(alias='maxResults', default=10, ge=1, le=100) + + # Optional: restricted to a fixed set of choices, like an input schema enum. + output_format: Literal['json', 'csv'] = Field(alias='outputFormat', default='json') + + @field_validator('search_terms') + @classmethod + def _normalize_terms(cls, value: list[str]) -> list[str]: + # Trim whitespace and drop empty terms, then ensure something is left. + cleaned = [term.strip() for term in value if term.strip()] + if not cleaned: + raise ValueError('searchTerms must contain at least one non-empty term') + return cleaned + + +async def main() -> None: + # Enter the context of the Actor. + async with Actor: + # Read the raw input record from the default key-value store. It's a + # plain dict (or None) - no validation has happened yet. + raw_input = await Actor.get_input() or {} + + # Validate the raw input against the model. On success, `actor_input` is + # a fully typed `ActorInput` with defaults filled in and every field + # guaranteed to be valid. + try: + actor_input = ActorInput.model_validate(raw_input) + except ValidationError as exc: + # Log a readable, per-field summary, then re-raise so the context + # manager marks the run as FAILED. Failing fast here beats crashing + # later with an obscure error deep in the code. + Actor.log.error('The Actor input is invalid:\n%s', exc) + raise + + # From here on, work with typed attributes instead of dict lookups. + Actor.log.info('Input passed validation: %s', actor_input.model_dump()) + + max_results = actor_input.max_results + for term in actor_input.search_terms: + Actor.log.info('Processing %r (max %d results)', term, max_results) + + # Store the normalized input as the Actor's output. + await Actor.set_value('OUTPUT', actor_input.model_dump()) + + +if __name__ == '__main__': + asyncio.run(main()) From a27cf5a167cc0a413d032deaa1a2718f02c4348e Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 5 Jun 2026 17:02:54 +0200 Subject: [PATCH 02/10] docs: move guide code examples into dedicated files --- docs/03_guides/10_pydantic.mdx | 44 +++++++++-------------- docs/03_guides/code/10_http_url.py | 5 +++ docs/03_guides/code/10_model_validator.py | 14 ++++++++ docs/03_guides/code/10_raw_input.py | 18 ++++++++++ 4 files changed, 53 insertions(+), 28 deletions(-) create mode 100644 docs/03_guides/code/10_http_url.py create mode 100644 docs/03_guides/code/10_model_validator.py create mode 100644 docs/03_guides/code/10_raw_input.py diff --git a/docs/03_guides/10_pydantic.mdx b/docs/03_guides/10_pydantic.mdx index 19a20c85b..f633d85bf 100644 --- a/docs/03_guides/10_pydantic.mdx +++ b/docs/03_guides/10_pydantic.mdx @@ -4,10 +4,14 @@ title: Validate Actor input with Pydantic description: Parse, validate, and type your Actor's input with Pydantic models instead of reaching into a raw dictionary. --- +import CodeBlock from '@theme/CodeBlock'; import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; import ApiLink from '@theme/ApiLink'; +import RawInputExample from '!!raw-loader!roa-loader!./code/10_raw_input.py'; import PydanticExample from '!!raw-loader!roa-loader!./code/10_pydantic.py'; +import HttpUrlExample from '!!raw-loader!./code/10_http_url.py'; +import ModelValidatorExample from '!!raw-loader!./code/10_model_validator.py'; In this guide, you'll learn how to validate your Apify Actor's input with [Pydantic](https://docs.pydantic.dev/), so that your code works with a typed, guaranteed-valid object instead of a raw dictionary. @@ -15,11 +19,9 @@ In this guide, you'll learn how to validate your Apify Actor's input with [Pydan An Actor reads its input with `Actor.get_input`, which returns the input record as a plain `dict` (or `None` when there's no input). Working with that dictionary directly is fragile: -```python -actor_input = await Actor.get_input() or {} -search_terms = actor_input.get('searchTerms', []) -max_results = actor_input.get('maxResults', 10) -``` + + {RawInputExample} + - There are no type guarantees - `max_results` could just as easily arrive as the string `"10"` or `None`, and you'd only find out when something blows up later. - There's no validation - nothing stops `max_results` from being `0` or `-5`, or `search_terms` from being empty. @@ -76,33 +78,19 @@ Keep the model's aliases in sync with the field keys in `input_schema.json`, and Pydantic offers much more than the example uses. A few features that come up often when validating Actor input: -- **Format-validated types** for common string formats, for example `HttpUrl` for URLs or `EmailStr` for e-mail addresses (the latter needs the `pydantic[email]` extra): +**Format-validated types** for common string formats, for example `HttpUrl` for URLs or `EmailStr` for e-mail addresses (the latter needs the `pydantic[email]` extra): - ```python - from pydantic import BaseModel, HttpUrl - - class ActorInput(BaseModel): - target_url: HttpUrl - ``` + + {HttpUrlExample} + -- **Cross-field validation** with `model_validator`, when one field's validity depends on another: +**Cross-field validation** with `model_validator`, when one field's validity depends on another: - ```python - from pydantic import BaseModel, model_validator - from typing_extensions import Self - - class ActorInput(BaseModel): - min_price: int = 0 - max_price: int = 100 - - @model_validator(mode='after') - def _check_range(self) -> Self: - if self.min_price > self.max_price: - raise ValueError('min_price must not exceed max_price') - return self - ``` + + {ModelValidatorExample} + -- **Secret input fields.** The platform decrypts [secret input fields](https://docs.apify.com/platform/actors/development/secret-input) for you before `Actor.get_input` returns, so you receive plaintext. Wrap such fields in Pydantic's `SecretStr` to keep them from leaking into logs or `model_dump()` output. +**Secret input fields.** The platform decrypts [secret input fields](https://docs.apify.com/platform/actors/development/secret-input) for you before `Actor.get_input` returns, so you receive plaintext. Wrap such fields in Pydantic's `SecretStr` to keep them from leaking into logs or `model_dump()` output. For the full set of types, constraints, and validators, see the [Pydantic documentation](https://docs.pydantic.dev/latest/concepts/models/). diff --git a/docs/03_guides/code/10_http_url.py b/docs/03_guides/code/10_http_url.py new file mode 100644 index 000000000..80bf1f190 --- /dev/null +++ b/docs/03_guides/code/10_http_url.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel, HttpUrl + + +class ActorInput(BaseModel): + target_url: HttpUrl diff --git a/docs/03_guides/code/10_model_validator.py b/docs/03_guides/code/10_model_validator.py new file mode 100644 index 000000000..29c4c98e6 --- /dev/null +++ b/docs/03_guides/code/10_model_validator.py @@ -0,0 +1,14 @@ +from typing import Self + +from pydantic import BaseModel, model_validator + + +class ActorInput(BaseModel): + min_price: int = 0 + max_price: int = 100 + + @model_validator(mode='after') + def _check_range(self) -> Self: + if self.min_price > self.max_price: + raise ValueError('min_price must not exceed max_price') + return self diff --git a/docs/03_guides/code/10_raw_input.py b/docs/03_guides/code/10_raw_input.py new file mode 100644 index 000000000..7bfbeede2 --- /dev/null +++ b/docs/03_guides/code/10_raw_input.py @@ -0,0 +1,18 @@ +import asyncio + +from apify import Actor + + +async def main() -> None: + # Enter the context of the Actor. + async with Actor: + # Read the input and reach into the raw dictionary for each value. + actor_input = await Actor.get_input() or {} + search_terms = actor_input.get('searchTerms', []) + max_results = actor_input.get('maxResults', 10) + + Actor.log.info('search_terms=%s, max_results=%s', search_terms, max_results) + + +if __name__ == '__main__': + asyncio.run(main()) From bd52c61b14b36ebc27d03b99ada64ce8eb83a82a Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 5 Jun 2026 20:46:28 +0200 Subject: [PATCH 03/10] docs: renumber Pydantic guide to 11 --- .../{10_pydantic.mdx => 11_pydantic.mdx} | 10 +-- docs/03_guides/code/10_pydantic.py | 72 ------------------- .../code/{10_http_url.py => 11_http_url.py} | 0 ...del_validator.py => 11_model_validator.py} | 0 docs/03_guides/code/11_pydantic.py | 59 +++++++++++++++ .../code/{10_raw_input.py => 11_raw_input.py} | 2 +- 6 files changed, 65 insertions(+), 78 deletions(-) rename docs/03_guides/{10_pydantic.mdx => 11_pydantic.mdx} (96%) delete mode 100644 docs/03_guides/code/10_pydantic.py rename docs/03_guides/code/{10_http_url.py => 11_http_url.py} (100%) rename docs/03_guides/code/{10_model_validator.py => 11_model_validator.py} (100%) create mode 100644 docs/03_guides/code/11_pydantic.py rename docs/03_guides/code/{10_raw_input.py => 11_raw_input.py} (85%) diff --git a/docs/03_guides/10_pydantic.mdx b/docs/03_guides/11_pydantic.mdx similarity index 96% rename from docs/03_guides/10_pydantic.mdx rename to docs/03_guides/11_pydantic.mdx index f633d85bf..dd8bbde69 100644 --- a/docs/03_guides/10_pydantic.mdx +++ b/docs/03_guides/11_pydantic.mdx @@ -1,6 +1,6 @@ --- id: input-validation -title: Validate Actor input with Pydantic +title: Input validation with Pydantic description: Parse, validate, and type your Actor's input with Pydantic models instead of reaching into a raw dictionary. --- @@ -8,10 +8,10 @@ import CodeBlock from '@theme/CodeBlock'; import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; import ApiLink from '@theme/ApiLink'; -import RawInputExample from '!!raw-loader!roa-loader!./code/10_raw_input.py'; -import PydanticExample from '!!raw-loader!roa-loader!./code/10_pydantic.py'; -import HttpUrlExample from '!!raw-loader!./code/10_http_url.py'; -import ModelValidatorExample from '!!raw-loader!./code/10_model_validator.py'; +import RawInputExample from '!!raw-loader!roa-loader!./code/11_raw_input.py'; +import PydanticExample from '!!raw-loader!roa-loader!./code/11_pydantic.py'; +import HttpUrlExample from '!!raw-loader!./code/11_http_url.py'; +import ModelValidatorExample from '!!raw-loader!./code/11_model_validator.py'; In this guide, you'll learn how to validate your Apify Actor's input with [Pydantic](https://docs.pydantic.dev/), so that your code works with a typed, guaranteed-valid object instead of a raw dictionary. diff --git a/docs/03_guides/code/10_pydantic.py b/docs/03_guides/code/10_pydantic.py deleted file mode 100644 index e836fea9a..000000000 --- a/docs/03_guides/code/10_pydantic.py +++ /dev/null @@ -1,72 +0,0 @@ -import asyncio -from typing import Literal - -from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator - -from apify import Actor - - -class ActorInput(BaseModel): - """Typed and validated representation of the Actor input. - - The field names follow Python's `snake_case`, while the aliases match the - `camelCase` keys produced by the Apify input schema editor. With - `populate_by_name`, the model accepts either form, and unknown fields are - ignored (`extra='ignore'`) so that adding a field to the input schema never - breaks an older Actor build. - """ - - model_config = ConfigDict(populate_by_name=True, extra='ignore') - - # Required: a non-empty list of search terms. The validator below trims - # each entry and drops the empty ones. - search_terms: list[str] = Field(alias='searchTerms', min_length=1) - - # Optional: defaults to 10 and must fall within the inclusive 1-100 range. - max_results: int = Field(alias='maxResults', default=10, ge=1, le=100) - - # Optional: restricted to a fixed set of choices, like an input schema enum. - output_format: Literal['json', 'csv'] = Field(alias='outputFormat', default='json') - - @field_validator('search_terms') - @classmethod - def _normalize_terms(cls, value: list[str]) -> list[str]: - # Trim whitespace and drop empty terms, then ensure something is left. - cleaned = [term.strip() for term in value if term.strip()] - if not cleaned: - raise ValueError('searchTerms must contain at least one non-empty term') - return cleaned - - -async def main() -> None: - # Enter the context of the Actor. - async with Actor: - # Read the raw input record from the default key-value store. It's a - # plain dict (or None) - no validation has happened yet. - raw_input = await Actor.get_input() or {} - - # Validate the raw input against the model. On success, `actor_input` is - # a fully typed `ActorInput` with defaults filled in and every field - # guaranteed to be valid. - try: - actor_input = ActorInput.model_validate(raw_input) - except ValidationError as exc: - # Log a readable, per-field summary, then re-raise so the context - # manager marks the run as FAILED. Failing fast here beats crashing - # later with an obscure error deep in the code. - Actor.log.error('The Actor input is invalid:\n%s', exc) - raise - - # From here on, work with typed attributes instead of dict lookups. - Actor.log.info('Input passed validation: %s', actor_input.model_dump()) - - max_results = actor_input.max_results - for term in actor_input.search_terms: - Actor.log.info('Processing %r (max %d results)', term, max_results) - - # Store the normalized input as the Actor's output. - await Actor.set_value('OUTPUT', actor_input.model_dump()) - - -if __name__ == '__main__': - asyncio.run(main()) diff --git a/docs/03_guides/code/10_http_url.py b/docs/03_guides/code/11_http_url.py similarity index 100% rename from docs/03_guides/code/10_http_url.py rename to docs/03_guides/code/11_http_url.py diff --git a/docs/03_guides/code/10_model_validator.py b/docs/03_guides/code/11_model_validator.py similarity index 100% rename from docs/03_guides/code/10_model_validator.py rename to docs/03_guides/code/11_model_validator.py diff --git a/docs/03_guides/code/11_pydantic.py b/docs/03_guides/code/11_pydantic.py new file mode 100644 index 000000000..7ce35f881 --- /dev/null +++ b/docs/03_guides/code/11_pydantic.py @@ -0,0 +1,59 @@ +import asyncio +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator + +from apify import Actor + + +class ActorInput(BaseModel): + """Typed and validated representation of the Actor input.""" + + # Accept both snake_case and the input schema's camelCase; ignore extras. + model_config = ConfigDict(populate_by_name=True, extra='ignore') + + # Required: non-empty list of search terms (normalized below). + search_terms: list[str] = Field(alias='searchTerms', min_length=1) + + # Optional: 1-100, defaults to 10. + max_results: int = Field(alias='maxResults', default=10, ge=1, le=100) + + # Optional: restricted to a fixed set of choices. + output_format: Literal['json', 'csv'] = Field(alias='outputFormat', default='json') + + @field_validator('search_terms') + @classmethod + def _normalize_terms(cls, value: list[str]) -> list[str]: + # Trim whitespace and drop empty terms. + cleaned = [term.strip() for term in value if term.strip()] + if not cleaned: + raise ValueError('searchTerms must contain at least one non-empty term') + return cleaned + + +async def main() -> None: + async with Actor: + # Read the raw input (a plain dict, not yet validated). + raw_input = await Actor.get_input() or {} + + # Validate the raw input against the model. + try: + actor_input = ActorInput.model_validate(raw_input) + except ValidationError as exc: + # Log a per-field summary, then re-raise to fail the run. + Actor.log.error('The Actor input is invalid:\n%s', exc) + raise + + # Work with typed attributes from here on. + Actor.log.info('Input passed validation: %s', actor_input.model_dump()) + + max_results = actor_input.max_results + for term in actor_input.search_terms: + Actor.log.info('Processing %r (max %d results)', term, max_results) + + # Store the normalized input as output. + await Actor.set_value('OUTPUT', actor_input.model_dump()) + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/docs/03_guides/code/10_raw_input.py b/docs/03_guides/code/11_raw_input.py similarity index 85% rename from docs/03_guides/code/10_raw_input.py rename to docs/03_guides/code/11_raw_input.py index 7bfbeede2..29c313e5f 100644 --- a/docs/03_guides/code/10_raw_input.py +++ b/docs/03_guides/code/11_raw_input.py @@ -6,7 +6,7 @@ async def main() -> None: # Enter the context of the Actor. async with Actor: - # Read the input and reach into the raw dictionary for each value. + # Read the input and reach into the raw dict. actor_input = await Actor.get_input() or {} search_terms = actor_input.get('searchTerms', []) max_results = actor_input.get('maxResults', 10) From 58e6c443d72a58c90007d6679a8c99e163e61fa3 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Mon, 8 Jun 2026 13:39:48 +0200 Subject: [PATCH 04/10] docs: address review feedback on Pydantic input guide --- docs/03_guides/11_pydantic.mdx | 9 +++++++-- docs/03_guides/code/11_pydantic.py | 14 +++++++++----- docs/03_guides/code/11_secret_str.py | 10 ++++++++++ 3 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 docs/03_guides/code/11_secret_str.py diff --git a/docs/03_guides/11_pydantic.mdx b/docs/03_guides/11_pydantic.mdx index dd8bbde69..d89a71860 100644 --- a/docs/03_guides/11_pydantic.mdx +++ b/docs/03_guides/11_pydantic.mdx @@ -12,6 +12,7 @@ import RawInputExample from '!!raw-loader!roa-loader!./code/11_raw_input.py'; import PydanticExample from '!!raw-loader!roa-loader!./code/11_pydantic.py'; import HttpUrlExample from '!!raw-loader!./code/11_http_url.py'; import ModelValidatorExample from '!!raw-loader!./code/11_model_validator.py'; +import SecretStrExample from '!!raw-loader!./code/11_secret_str.py'; In this guide, you'll learn how to validate your Apify Actor's input with [Pydantic](https://docs.pydantic.dev/), so that your code works with a typed, guaranteed-valid object instead of a raw dictionary. @@ -40,7 +41,7 @@ The following Actor declares its input as a Pydantic `BaseModel`, validates the A few things worth pointing out about the **model**: -- **Aliases bridge the naming conventions.** Apify input fields are conventionally `camelCase` (`maxResults`), while Python attributes are `snake_case` (`max_results`). `Field(alias='maxResults')` maps one to the other, and `populate_by_name=True` lets the model accept either spelling - handy in tests. +- **Aliases bridge the naming conventions.** Apify input fields are conventionally `camelCase` (`maxResults`), while Python attributes are `snake_case` (`max_results`). Since every field follows that convention, `alias_generator=to_camel` derives the camelCase alias for the whole model at once, instead of spelling out `Field(alias=...)` on each field. `populate_by_name=True` lets the model accept either spelling - handy in tests. - **Defaults and `required` fields are explicit.** A field without a default (`search_terms`) is required; one with a default (`max_results`) is optional. There's a single, obvious place where every default lives. - **Constraints are declarative.** `ge=1, le=100` enforces a numeric range, `min_length=1` rejects an empty list, and `Literal['json', 'csv']` restricts a field to a fixed set of choices - mirroring an `enum` in the input schema. - **Custom validators handle the rest.** The `field_validator` normalizes the search terms (trimming whitespace, dropping empties) and rejects input that has nothing left, so the rest of your code never has to repeat those checks. @@ -90,7 +91,11 @@ Pydantic offers much more than the example uses. A few features that come up oft {ModelValidatorExample} -**Secret input fields.** The platform decrypts [secret input fields](https://docs.apify.com/platform/actors/development/secret-input) for you before `Actor.get_input` returns, so you receive plaintext. Wrap such fields in Pydantic's `SecretStr` to keep them from leaking into logs or `model_dump()` output. +**Secret input fields.** The platform decrypts [secret input fields](https://docs.apify.com/platform/actors/development/secret-input) for you before `Actor.get_input` returns, so you receive plaintext. Wrap such fields in Pydantic's `SecretStr` to keep them from leaking into logs or `model_dump()` output, and read the plaintext with `get_secret_value()` when you actually need it: + + + {SecretStrExample} + For the full set of types, constraints, and validators, see the [Pydantic documentation](https://docs.pydantic.dev/latest/concepts/models/). diff --git a/docs/03_guides/code/11_pydantic.py b/docs/03_guides/code/11_pydantic.py index 7ce35f881..4626b2d4c 100644 --- a/docs/03_guides/code/11_pydantic.py +++ b/docs/03_guides/code/11_pydantic.py @@ -2,6 +2,7 @@ from typing import Literal from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator +from pydantic.alias_generators import to_camel from apify import Actor @@ -9,17 +10,20 @@ class ActorInput(BaseModel): """Typed and validated representation of the Actor input.""" - # Accept both snake_case and the input schema's camelCase; ignore extras. - model_config = ConfigDict(populate_by_name=True, extra='ignore') + # Derive each field's camelCase alias (searchTerms, maxResults, ...) automatically; + # accept both spellings and ignore extras. + model_config = ConfigDict( + populate_by_name=True, extra='ignore', alias_generator=to_camel + ) # Required: non-empty list of search terms (normalized below). - search_terms: list[str] = Field(alias='searchTerms', min_length=1) + search_terms: list[str] = Field(min_length=1) # Optional: 1-100, defaults to 10. - max_results: int = Field(alias='maxResults', default=10, ge=1, le=100) + max_results: int = Field(default=10, ge=1, le=100) # Optional: restricted to a fixed set of choices. - output_format: Literal['json', 'csv'] = Field(alias='outputFormat', default='json') + output_format: Literal['json', 'csv'] = Field(default='json') @field_validator('search_terms') @classmethod diff --git a/docs/03_guides/code/11_secret_str.py b/docs/03_guides/code/11_secret_str.py new file mode 100644 index 000000000..093c17951 --- /dev/null +++ b/docs/03_guides/code/11_secret_str.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel, SecretStr + + +class ActorInput(BaseModel): + # Masked in logs and `model_dump()`; read the plaintext with `get_secret_value()`. + api_token: SecretStr + + +actor_input = ActorInput.model_validate({'api_token': 'my-secret-token'}) +token = actor_input.api_token.get_secret_value() From cc6ea4bcd34a33d340279374ec78d127babd1389 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Mon, 8 Jun 2026 13:46:57 +0200 Subject: [PATCH 05/10] docs: reduce clause-gluing dashes in the Pydantic input-validation guide --- docs/03_guides/11_pydantic.mdx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/03_guides/11_pydantic.mdx b/docs/03_guides/11_pydantic.mdx index d89a71860..b1f3b4308 100644 --- a/docs/03_guides/11_pydantic.mdx +++ b/docs/03_guides/11_pydantic.mdx @@ -24,8 +24,8 @@ An Actor reads its input with `Actor.get_inp {RawInputExample} -- There are no type guarantees - `max_results` could just as easily arrive as the string `"10"` or `None`, and you'd only find out when something blows up later. -- There's no validation - nothing stops `max_results` from being `0` or `-5`, or `search_terms` from being empty. +- There are no type guarantees. `max_results` could just as easily arrive as the string `"10"` or `None`, and you'd only find out when something blows up later. +- There's no validation. Nothing stops `max_results` from being `0` or `-5`, or `search_terms` from being empty. - A typo in a key (`maxResult` instead of `maxResults`) silently falls back to the default instead of failing. - Defaults are scattered across the codebase, and your editor can't autocomplete the fields or catch mistakes. @@ -41,15 +41,15 @@ The following Actor declares its input as a Pydantic `BaseModel`, validates the A few things worth pointing out about the **model**: -- **Aliases bridge the naming conventions.** Apify input fields are conventionally `camelCase` (`maxResults`), while Python attributes are `snake_case` (`max_results`). Since every field follows that convention, `alias_generator=to_camel` derives the camelCase alias for the whole model at once, instead of spelling out `Field(alias=...)` on each field. `populate_by_name=True` lets the model accept either spelling - handy in tests. +- **Aliases bridge the naming conventions.** Apify input fields are conventionally `camelCase` (`maxResults`), while Python attributes are `snake_case` (`max_results`). Since every field follows that convention, `alias_generator=to_camel` derives the camelCase alias for the whole model at once, instead of spelling out `Field(alias=...)` on each field. `populate_by_name=True` lets the model accept either spelling, which is handy in tests. - **Defaults and `required` fields are explicit.** A field without a default (`search_terms`) is required; one with a default (`max_results`) is optional. There's a single, obvious place where every default lives. -- **Constraints are declarative.** `ge=1, le=100` enforces a numeric range, `min_length=1` rejects an empty list, and `Literal['json', 'csv']` restricts a field to a fixed set of choices - mirroring an `enum` in the input schema. +- **Constraints are declarative.** `ge=1, le=100` enforces a numeric range, `min_length=1` rejects an empty list, and `Literal['json', 'csv']` restricts a field to a fixed set of choices, mirroring an `enum` in the input schema. - **Custom validators handle the rest.** The `field_validator` normalizes the search terms (trimming whitespace, dropping empties) and rejects input that has nothing left, so the rest of your code never has to repeat those checks. - **Unknown fields are ignored.** `extra='ignore'` means adding a new field to your input schema won't break an older Actor build that doesn't know about it yet. Use `extra='forbid'` instead if you'd rather reject anything unexpected. And about the **validation** itself: -- `model_validate` parses the raw dictionary into a typed `ActorInput` instance, filling in defaults and guaranteeing every field is valid - or raising a `ValidationError` describing every problem at once. +- `model_validate` parses the raw dictionary into a typed `ActorInput` instance, filling in defaults and guaranteeing every field is valid, or raising a `ValidationError` describing every problem at once. - Catching that error, logging a readable summary, and re-raising makes the Actor **fail fast** with a clear explanation right at the start, rather than crashing with an obscure error somewhere deep in the run. Because the body runs inside `async with Actor:`, the re-raised exception automatically marks the run as `FAILED`. - The error messages refer to the fields by their input-schema aliases. For invalid input like `{"searchTerms": [], "maxResults": 999, "outputFormat": "xml"}`, the log shows exactly what's wrong: @@ -64,11 +64,11 @@ And about the **validation** itself: Input should be 'json' or 'csv' ... ``` -Once validation passes, the rest of `main` works with `actor_input.search_terms`, `actor_input.max_results`, and `actor_input.output_format` - all correctly typed, with editor autocompletion and static type checking. +Once validation passes, the rest of `main` works with `actor_input.search_terms`, `actor_input.max_results`, and `actor_input.output_format`, all correctly typed, with editor autocompletion and static type checking. ## Relationship to the input schema -Pydantic validation **complements** the Actor's [input schema](https://docs.apify.com/platform/actors/development/input-schema) (`.actor/input_schema.json`) - it doesn't replace it. The two serve different layers: +Pydantic validation **complements** the Actor's [input schema](https://docs.apify.com/platform/actors/development/input-schema) (`.actor/input_schema.json`). It doesn't replace it. The two serve different layers: - The **input schema** drives the Apify Console form, documents the fields for your users, and lets the platform validate input before the run even starts. Keep declaring your fields there. - The **Pydantic model** validates the input again *inside your Python code*, where it gives you a typed object, IDE support, and richer rules (normalization, cross-field checks, custom formats) that the input schema can't express. It's also your safety net for runs started programmatically by [another Actor](../concepts/interacting-with-other-actors) or executed [locally](https://docs.apify.com/cli/docs/reference#apify-run), and for keeping the two definitions honest with each other. From 45ca90c4cb84c8f1f8d64b9fe583256757ba3e29 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Tue, 9 Jun 2026 10:48:13 +0200 Subject: [PATCH 06/10] docs: adjust wording style --- docs/02_concepts/02_actor_input.mdx | 2 +- docs/03_guides/11_pydantic.mdx | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/02_concepts/02_actor_input.mdx b/docs/02_concepts/02_actor_input.mdx index f975e6ae3..8e6f2895d 100644 --- a/docs/02_concepts/02_actor_input.mdx +++ b/docs/02_concepts/02_actor_input.mdx @@ -22,7 +22,7 @@ For example, if an Actor received a JSON input with two fields, `{ "firstNumber" ## Validating input -Reading values straight out of the raw input dictionary works for simple cases, but it gives you no type guarantees, no constraint checks, and no clear error when the input is malformed. For anything beyond a couple of fields, validate the input with [Pydantic](https://docs.pydantic.dev/) so your code works with a typed, guaranteed-valid object instead. See the [Validate Actor input with Pydantic](../guides/input-validation) guide for the recommended approach. +Reading values straight out of the raw input dictionary works for simple cases, but it gives you no type guarantees, no constraint checks, and no clear error when the input is malformed. For anything beyond a couple of fields, validate the input with [Pydantic](https://docs.pydantic.dev/). Your code then works with a typed, guaranteed-valid object instead. For the recommended approach, see [Validate Actor input with Pydantic](../guides/input-validation). ## Loading URLs from Actor input diff --git a/docs/03_guides/11_pydantic.mdx b/docs/03_guides/11_pydantic.mdx index b1f3b4308..d81e88620 100644 --- a/docs/03_guides/11_pydantic.mdx +++ b/docs/03_guides/11_pydantic.mdx @@ -29,7 +29,7 @@ An Actor reads its input with `Actor.get_inp - A typo in a key (`maxResult` instead of `maxResults`) silently falls back to the default instead of failing. - Defaults are scattered across the codebase, and your editor can't autocomplete the fields or catch mistakes. -[Pydantic](https://docs.pydantic.dev/) solves all of this. You declare the shape of your input once as a model, and Pydantic parses the raw dictionary into a typed object, applying defaults, enforcing constraints, and producing clear error messages when the input doesn't match. Pydantic is already a dependency of the Apify SDK, so there's nothing extra to install. +[Pydantic](https://docs.pydantic.dev/) solves all of this. You declare the shape of your input once as a model, and Pydantic parses the raw dictionary into a typed object, applying defaults, enforcing constraints, and producing clear error messages when the input doesn't match. Pydantic is already a dependency of the Apify SDK. There's nothing extra to install. ## Example Actor @@ -39,13 +39,13 @@ The following Actor declares its input as a Pydantic `BaseModel`, validates the {PydanticExample} -A few things worth pointing out about the **model**: +Note about the **model**: -- **Aliases bridge the naming conventions.** Apify input fields are conventionally `camelCase` (`maxResults`), while Python attributes are `snake_case` (`max_results`). Since every field follows that convention, `alias_generator=to_camel` derives the camelCase alias for the whole model at once, instead of spelling out `Field(alias=...)` on each field. `populate_by_name=True` lets the model accept either spelling, which is handy in tests. -- **Defaults and `required` fields are explicit.** A field without a default (`search_terms`) is required; one with a default (`max_results`) is optional. There's a single, obvious place where every default lives. -- **Constraints are declarative.** `ge=1, le=100` enforces a numeric range, `min_length=1` rejects an empty list, and `Literal['json', 'csv']` restricts a field to a fixed set of choices, mirroring an `enum` in the input schema. -- **Custom validators handle the rest.** The `field_validator` normalizes the search terms (trimming whitespace, dropping empties) and rejects input that has nothing left, so the rest of your code never has to repeat those checks. -- **Unknown fields are ignored.** `extra='ignore'` means adding a new field to your input schema won't break an older Actor build that doesn't know about it yet. Use `extra='forbid'` instead if you'd rather reject anything unexpected. +- Apify input fields are conventionally `camelCase` (`maxResults`), while Python attributes are `snake_case` (`max_results`). Since every field follows that convention, `alias_generator=to_camel` derives the camelCase alias for the whole model at once, instead of spelling out `Field(alias=...)` on each field. `populate_by_name=True` lets the model accept either spelling, which is handy in tests. +- A field without a default (`search_terms`) is required; one with a default (`max_results`) is optional. There's a single, obvious place where every default lives. +- `ge=1, le=100` enforces a numeric range, `min_length=1` rejects an empty list, and `Literal['json', 'csv']` restricts a field to a fixed set of choices, mirroring an `enum` in the input schema. +- The `field_validator` normalizes the search terms (trimming whitespace, dropping empties) and rejects input that has nothing left. The rest of your code never has to repeat those checks. +- `extra='ignore'` means adding a new field to your input schema won't break an older Actor build that doesn't know about it yet. Use `extra='forbid'` instead if you'd rather reject anything unexpected. And about the **validation** itself: @@ -101,7 +101,7 @@ For the full set of types, constraints, and validators, see the [Pydantic docume ## Conclusion -In this guide, you learned how to validate Actor input with Pydantic: declaring the input as a model with aliases, defaults, and constraints; parsing the raw input with `model_validate`; failing fast with a readable error when the input is invalid; and working with a typed object for the rest of the run. See the [Actor templates](https://apify.com/templates/categories/python) to get started with your own Actors. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy validating! +In this guide, you learned how to validate Actor input with Pydantic: declaring the input as a model with aliases, defaults, and constraints; parsing the raw input with `model_validate`; failing fast with a readable error when the input is invalid; and working with a typed object for the rest of the run. To get started with your own Actors, see the [Actor templates](https://apify.com/templates/categories/python). If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy validating! ## Additional resources From a0a10f91873b5cc0ed6d5eb93452d0cb47b1fc38 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 11 Jun 2026 12:43:05 +0200 Subject: [PATCH 07/10] address feedback --- docs/03_guides/11_pydantic.mdx | 50 +++++++++++++++++------------- docs/03_guides/code/11_http_url.py | 4 ++- src/apify/_actor.py | 10 +++++- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/docs/03_guides/11_pydantic.mdx b/docs/03_guides/11_pydantic.mdx index d81e88620..bfc0ee01d 100644 --- a/docs/03_guides/11_pydantic.mdx +++ b/docs/03_guides/11_pydantic.mdx @@ -18,39 +18,41 @@ In this guide, you'll learn how to validate your Apify Actor's input with [Pydan ## Introduction -An Actor reads its input with `Actor.get_input`, which returns the input record as a plain `dict` (or `None` when there's no input). Working with that dictionary directly is fragile: +An Actor reads its input with `Actor.get_input`, which returns the input record as a plain `dict`. Working with that dictionary directly is fragile: {RawInputExample} -- There are no type guarantees. `max_results` could just as easily arrive as the string `"10"` or `None`, and you'd only find out when something blows up later. +- There are no type guarantees. `max_results` can arrive as the string `"10"` or `None` and you won't know until something breaks. - There's no validation. Nothing stops `max_results` from being `0` or `-5`, or `search_terms` from being empty. -- A typo in a key (`maxResult` instead of `maxResults`) silently falls back to the default instead of failing. +- A typo in a key, like `maxResult` instead of `maxResults`, silently falls back to the default instead of failing. - Defaults are scattered across the codebase, and your editor can't autocomplete the fields or catch mistakes. -[Pydantic](https://docs.pydantic.dev/) solves all of this. You declare the shape of your input once as a model, and Pydantic parses the raw dictionary into a typed object, applying defaults, enforcing constraints, and producing clear error messages when the input doesn't match. Pydantic is already a dependency of the Apify SDK. There's nothing extra to install. +[Pydantic](https://docs.pydantic.dev/) solves all of these problems. You declare the shape of your input once as a model, and Pydantic parses the raw dictionary into a typed object, applies defaults, enforces constraints, and produces clear error messages when the input doesn't match. + +Pydantic is already a dependency of the Apify SDK, so you don't have to install anything. ## Example Actor -The following Actor declares its input as a Pydantic `BaseModel`, validates the raw input against it, and then works with a fully typed object. On invalid input it fails fast with a readable error; on valid input it logs the normalized values and stores them as the Actor's output. +The following Actor declares its input as a Pydantic `BaseModel`, validates the raw input against it, and then works with a fully typed object. On invalid input it fails fast with a readable error. On valid input it logs the normalized values and stores them as the Actor's output. {PydanticExample} -Note about the **model**: +### About the model -- Apify input fields are conventionally `camelCase` (`maxResults`), while Python attributes are `snake_case` (`max_results`). Since every field follows that convention, `alias_generator=to_camel` derives the camelCase alias for the whole model at once, instead of spelling out `Field(alias=...)` on each field. `populate_by_name=True` lets the model accept either spelling, which is handy in tests. -- A field without a default (`search_terms`) is required; one with a default (`max_results`) is optional. There's a single, obvious place where every default lives. +- Apify input fields conventionally use camel case (`maxResults`), while Python attributes use snake case (`max_results`). Since every field follows that convention, `alias_generator=to_camel` derives the camel case alias for the whole model at once, instead of spelling out `Field(alias=...)` on each field. `populate_by_name=True` lets the model accept either spelling, which is handy in tests. +- A field without a default (`search_terms`) is required. A field with a default (`max_results`) is optional. There's a single, obvious place where every default lives. - `ge=1, le=100` enforces a numeric range, `min_length=1` rejects an empty list, and `Literal['json', 'csv']` restricts a field to a fixed set of choices, mirroring an `enum` in the input schema. - The `field_validator` normalizes the search terms (trimming whitespace, dropping empties) and rejects input that has nothing left. The rest of your code never has to repeat those checks. -- `extra='ignore'` means adding a new field to your input schema won't break an older Actor build that doesn't know about it yet. Use `extra='forbid'` instead if you'd rather reject anything unexpected. +- `extra='ignore'` means adding a new field to your input schema won't break an older Actor build that doesn't know about it yet. Use `extra='forbid'` instead if you prefer to reject anything unexpected. -And about the **validation** itself: +### About the validation -- `model_validate` parses the raw dictionary into a typed `ActorInput` instance, filling in defaults and guaranteeing every field is valid, or raising a `ValidationError` describing every problem at once. -- Catching that error, logging a readable summary, and re-raising makes the Actor **fail fast** with a clear explanation right at the start, rather than crashing with an obscure error somewhere deep in the run. Because the body runs inside `async with Actor:`, the re-raised exception automatically marks the run as `FAILED`. +- `model_validate` parses the raw dictionary into a typed `ActorInput` instance. It fills in defaults and guarantees every field is valid, or raises a `ValidationError` that describes every problem at once. +- Catching that error, logging a readable summary, and re-raising makes the Actor fail fast with a clear explanation right at the start, rather than crashing with an obscure error somewhere deep in the run. Because the body runs inside `async with Actor:`, the re-raised exception automatically marks the run as `FAILED`. - The error messages refer to the fields by their input-schema aliases. For invalid input like `{"searchTerms": [], "maxResults": 999, "outputFormat": "xml"}`, the log shows exactly what's wrong: ```text @@ -68,40 +70,44 @@ Once validation passes, the rest of `main` works with `actor_input.search_terms` ## Relationship to the input schema -Pydantic validation **complements** the Actor's [input schema](https://docs.apify.com/platform/actors/development/input-schema) (`.actor/input_schema.json`). It doesn't replace it. The two serve different layers: +Pydantic validation complements the Actor's [input schema](https://docs.apify.com/platform/actors/development/input-schema) (`.actor/input_schema.json`). It doesn't replace it. The two serve different layers: -- The **input schema** drives the Apify Console form, documents the fields for your users, and lets the platform validate input before the run even starts. Keep declaring your fields there. -- The **Pydantic model** validates the input again *inside your Python code*, where it gives you a typed object, IDE support, and richer rules (normalization, cross-field checks, custom formats) that the input schema can't express. It's also your safety net for runs started programmatically by [another Actor](../concepts/interacting-with-other-actors) or executed [locally](https://docs.apify.com/cli/docs/reference#apify-run), and for keeping the two definitions honest with each other. +- The input schema drives the Apify Console form, documents the fields for your users, and lets the platform validate input before the run even starts. Keep declaring your fields there. +- The Pydantic model validates the input again inside your Python code, where it gives you a typed object, IDE support, and richer rules (normalization, cross-field checks, custom formats) that the input schema can't express. It's also your safety net for runs started programmatically by [another Actor](../concepts/interacting-with-other-actors) or executed [locally](https://docs.apify.com/cli/docs/reference#apify-run), and for keeping the two definitions honest with each other. Keep the model's aliases in sync with the field keys in `input_schema.json`, and the two definitions describe the same input from both sides. ## Useful validation features -Pydantic offers much more than the example uses. A few features that come up often when validating Actor input: +Pydantic offers extra features for validating Actor input. For the full set of types, constraints, and validators, see the [Pydantic documentation](https://docs.pydantic.dev/latest/concepts/models/). + +### Format-validated types -**Format-validated types** for common string formats, for example `HttpUrl` for URLs or `EmailStr` for e-mail addresses (the latter needs the `pydantic[email]` extra): +For common string formats, for example `HttpUrl` for URLs or `EmailStr` for e-mail addresses, use format-validated types: {HttpUrlExample} -**Cross-field validation** with `model_validator`, when one field's validity depends on another: +### Cross-field validation + +When one field's validity depends on another, use `model_validator`: {ModelValidatorExample} -**Secret input fields.** The platform decrypts [secret input fields](https://docs.apify.com/platform/actors/development/secret-input) for you before `Actor.get_input` returns, so you receive plaintext. Wrap such fields in Pydantic's `SecretStr` to keep them from leaking into logs or `model_dump()` output, and read the plaintext with `get_secret_value()` when you actually need it: +### Secret input fields + +The platform decrypts [secret input fields](https://docs.apify.com/platform/actors/development/secret-input) for you before `Actor.get_input` returns, so you receive plaintext. To keep them from leaking into logs or `model_dump()` output, wrap such fields in Pydantic's `SecretStr` and read the plaintext with `get_secret_value()` when you actually need it: {SecretStrExample} -For the full set of types, constraints, and validators, see the [Pydantic documentation](https://docs.pydantic.dev/latest/concepts/models/). - ## Conclusion -In this guide, you learned how to validate Actor input with Pydantic: declaring the input as a model with aliases, defaults, and constraints; parsing the raw input with `model_validate`; failing fast with a readable error when the input is invalid; and working with a typed object for the rest of the run. To get started with your own Actors, see the [Actor templates](https://apify.com/templates/categories/python). If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy validating! +In this guide, you learned how to validate Actor input with Pydantic: declaring the input as a model with aliases, defaults, and constraints, parsing the raw input with `model_validate`, failing fast with a readable error when the input is invalid, and working with a typed object for the rest of the run. To get started with your own Actors, see the [Actor templates](https://apify.com/templates/categories/python). If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy validating! ## Additional resources diff --git a/docs/03_guides/code/11_http_url.py b/docs/03_guides/code/11_http_url.py index 80bf1f190..f63ce1e26 100644 --- a/docs/03_guides/code/11_http_url.py +++ b/docs/03_guides/code/11_http_url.py @@ -1,5 +1,7 @@ -from pydantic import BaseModel, HttpUrl +from pydantic import BaseModel, EmailStr, HttpUrl class ActorInput(BaseModel): target_url: HttpUrl + # `EmailStr` needs the `pydantic[email]` extra installed. + contact_email: EmailStr diff --git a/src/apify/_actor.py b/src/apify/_actor.py index dd1ff1401..befd051de 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -699,7 +699,15 @@ async def push_data(self, data: dict | list[dict], *, charged_event_name: str | @_ensure_context async def get_input(self) -> Any: - """Get the Actor input value from the default key-value store associated with the current Actor run.""" + """Get the Actor input value from the default key-value store associated with the current Actor run. + + The input is the deserialized contents of the input record (the `INPUT` key by default), so it is typically + a `dict` keyed by the fields declared in the Actor's input schema. Any secret input fields are decrypted to + plaintext before being returned. + + Returns: + The Actor input, usually a `dict` of input fields, or `None` if the Actor has no input. + """ input_value = await self.get_value(self.configuration.input_key) input_secrets_private_key = self.configuration.input_secrets_private_key_file input_secrets_key_passphrase = self.configuration.input_secrets_private_key_passphrase From d5ed06bb42f6c3b0f7952fa8220e20d7f46bb621 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 11 Jun 2026 12:54:48 +0200 Subject: [PATCH 08/10] docs: Show explicit pydantic install in input validation guide --- docs/03_guides/11_pydantic.mdx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/03_guides/11_pydantic.mdx b/docs/03_guides/11_pydantic.mdx index bfc0ee01d..be2e495af 100644 --- a/docs/03_guides/11_pydantic.mdx +++ b/docs/03_guides/11_pydantic.mdx @@ -31,7 +31,11 @@ An Actor reads its input with `Actor.get_inp [Pydantic](https://docs.pydantic.dev/) solves all of these problems. You declare the shape of your input once as a model, and Pydantic parses the raw dictionary into a typed object, applies defaults, enforces constraints, and produces clear error messages when the input doesn't match. -Pydantic is already a dependency of the Apify SDK, so you don't have to install anything. +To use Pydantic, install it into your Actor's environment: + +```bash +pip install pydantic +``` ## Example Actor From 2a5578fdadbaeb048b057fa1615e294732ee12ba Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 11 Jun 2026 13:05:22 +0200 Subject: [PATCH 09/10] link to console --- docs/03_guides/11_pydantic.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/03_guides/11_pydantic.mdx b/docs/03_guides/11_pydantic.mdx index be2e495af..bb3b5d9b4 100644 --- a/docs/03_guides/11_pydantic.mdx +++ b/docs/03_guides/11_pydantic.mdx @@ -76,7 +76,7 @@ Once validation passes, the rest of `main` works with `actor_input.search_terms` Pydantic validation complements the Actor's [input schema](https://docs.apify.com/platform/actors/development/input-schema) (`.actor/input_schema.json`). It doesn't replace it. The two serve different layers: -- The input schema drives the Apify Console form, documents the fields for your users, and lets the platform validate input before the run even starts. Keep declaring your fields there. +- The input schema drives the [Apify Console](https://console.apify.com/) form, documents the fields for your users, and lets the platform validate input before the run even starts. Keep declaring your fields there. - The Pydantic model validates the input again inside your Python code, where it gives you a typed object, IDE support, and richer rules (normalization, cross-field checks, custom formats) that the input schema can't express. It's also your safety net for runs started programmatically by [another Actor](../concepts/interacting-with-other-actors) or executed [locally](https://docs.apify.com/cli/docs/reference#apify-run), and for keeping the two definitions honest with each other. Keep the model's aliases in sync with the field keys in `input_schema.json`, and the two definitions describe the same input from both sides. From 39d8a52a745735e356d8aa16f06a0411769841c7 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 11 Jun 2026 13:37:21 +0200 Subject: [PATCH 10/10] docs: Clone Pydantic input guide into versioned docs (v3.4) --- .../01_introduction/quick-start.mdx | 1 + .../02_concepts/02_actor_input.mdx | 4 + .../version-3.4/03_guides/11_pydantic.mdx | 122 ++++++++++++++++++ .../version-3.4/03_guides/code/11_http_url.py | 7 + .../03_guides/code/11_model_validator.py | 14 ++ .../version-3.4/03_guides/code/11_pydantic.py | 63 +++++++++ .../03_guides/code/11_raw_input.py | 18 +++ .../03_guides/code/11_secret_str.py | 10 ++ 8 files changed, 239 insertions(+) create mode 100644 website/versioned_docs/version-3.4/03_guides/11_pydantic.mdx create mode 100644 website/versioned_docs/version-3.4/03_guides/code/11_http_url.py create mode 100644 website/versioned_docs/version-3.4/03_guides/code/11_model_validator.py create mode 100644 website/versioned_docs/version-3.4/03_guides/code/11_pydantic.py create mode 100644 website/versioned_docs/version-3.4/03_guides/code/11_raw_input.py create mode 100644 website/versioned_docs/version-3.4/03_guides/code/11_secret_str.py diff --git a/website/versioned_docs/version-3.4/01_introduction/quick-start.mdx b/website/versioned_docs/version-3.4/01_introduction/quick-start.mdx index a7123d4ca..f86dae8f2 100644 --- a/website/versioned_docs/version-3.4/01_introduction/quick-start.mdx +++ b/website/versioned_docs/version-3.4/01_introduction/quick-start.mdx @@ -109,3 +109,4 @@ To see how you can integrate the Apify SDK with popular web scraping libraries, - [Browser Use](../guides/browser-use) - [Running webserver](../guides/running-webserver) - [uv](../guides/uv) +- [Validate Actor input with Pydantic](../guides/input-validation) diff --git a/website/versioned_docs/version-3.4/02_concepts/02_actor_input.mdx b/website/versioned_docs/version-3.4/02_concepts/02_actor_input.mdx index 15807c05d..8e6f2895d 100644 --- a/website/versioned_docs/version-3.4/02_concepts/02_actor_input.mdx +++ b/website/versioned_docs/version-3.4/02_concepts/02_actor_input.mdx @@ -20,6 +20,10 @@ For example, if an Actor received a JSON input with two fields, `{ "firstNumber" {InputExample} +## Validating input + +Reading values straight out of the raw input dictionary works for simple cases, but it gives you no type guarantees, no constraint checks, and no clear error when the input is malformed. For anything beyond a couple of fields, validate the input with [Pydantic](https://docs.pydantic.dev/). Your code then works with a typed, guaranteed-valid object instead. For the recommended approach, see [Validate Actor input with Pydantic](../guides/input-validation). + ## Loading URLs from Actor input Actors commonly receive a list of URLs to process via their input. The `ApifyRequestList` class (from `apify.request_loaders`) can parse the standard Apify input format for URL sources. It supports both direct URL objects (`{"url": "https://example.com"}`) and remote URL lists (`{"requestsFromUrl": "https://example.com/urls.txt"}`), where the remote file contains one URL per line. diff --git a/website/versioned_docs/version-3.4/03_guides/11_pydantic.mdx b/website/versioned_docs/version-3.4/03_guides/11_pydantic.mdx new file mode 100644 index 000000000..bb3b5d9b4 --- /dev/null +++ b/website/versioned_docs/version-3.4/03_guides/11_pydantic.mdx @@ -0,0 +1,122 @@ +--- +id: input-validation +title: Input validation with Pydantic +description: Parse, validate, and type your Actor's input with Pydantic models instead of reaching into a raw dictionary. +--- + +import CodeBlock from '@theme/CodeBlock'; +import RunnableCodeBlock from '@site/src/components/RunnableCodeBlock'; +import ApiLink from '@theme/ApiLink'; + +import RawInputExample from '!!raw-loader!roa-loader!./code/11_raw_input.py'; +import PydanticExample from '!!raw-loader!roa-loader!./code/11_pydantic.py'; +import HttpUrlExample from '!!raw-loader!./code/11_http_url.py'; +import ModelValidatorExample from '!!raw-loader!./code/11_model_validator.py'; +import SecretStrExample from '!!raw-loader!./code/11_secret_str.py'; + +In this guide, you'll learn how to validate your Apify Actor's input with [Pydantic](https://docs.pydantic.dev/), so that your code works with a typed, guaranteed-valid object instead of a raw dictionary. + +## Introduction + +An Actor reads its input with `Actor.get_input`, which returns the input record as a plain `dict`. Working with that dictionary directly is fragile: + + + {RawInputExample} + + +- There are no type guarantees. `max_results` can arrive as the string `"10"` or `None` and you won't know until something breaks. +- There's no validation. Nothing stops `max_results` from being `0` or `-5`, or `search_terms` from being empty. +- A typo in a key, like `maxResult` instead of `maxResults`, silently falls back to the default instead of failing. +- Defaults are scattered across the codebase, and your editor can't autocomplete the fields or catch mistakes. + +[Pydantic](https://docs.pydantic.dev/) solves all of these problems. You declare the shape of your input once as a model, and Pydantic parses the raw dictionary into a typed object, applies defaults, enforces constraints, and produces clear error messages when the input doesn't match. + +To use Pydantic, install it into your Actor's environment: + +```bash +pip install pydantic +``` + +## Example Actor + +The following Actor declares its input as a Pydantic `BaseModel`, validates the raw input against it, and then works with a fully typed object. On invalid input it fails fast with a readable error. On valid input it logs the normalized values and stores them as the Actor's output. + + + {PydanticExample} + + +### About the model + +- Apify input fields conventionally use camel case (`maxResults`), while Python attributes use snake case (`max_results`). Since every field follows that convention, `alias_generator=to_camel` derives the camel case alias for the whole model at once, instead of spelling out `Field(alias=...)` on each field. `populate_by_name=True` lets the model accept either spelling, which is handy in tests. +- A field without a default (`search_terms`) is required. A field with a default (`max_results`) is optional. There's a single, obvious place where every default lives. +- `ge=1, le=100` enforces a numeric range, `min_length=1` rejects an empty list, and `Literal['json', 'csv']` restricts a field to a fixed set of choices, mirroring an `enum` in the input schema. +- The `field_validator` normalizes the search terms (trimming whitespace, dropping empties) and rejects input that has nothing left. The rest of your code never has to repeat those checks. +- `extra='ignore'` means adding a new field to your input schema won't break an older Actor build that doesn't know about it yet. Use `extra='forbid'` instead if you prefer to reject anything unexpected. + +### About the validation + +- `model_validate` parses the raw dictionary into a typed `ActorInput` instance. It fills in defaults and guarantees every field is valid, or raises a `ValidationError` that describes every problem at once. +- Catching that error, logging a readable summary, and re-raising makes the Actor fail fast with a clear explanation right at the start, rather than crashing with an obscure error somewhere deep in the run. Because the body runs inside `async with Actor:`, the re-raised exception automatically marks the run as `FAILED`. +- The error messages refer to the fields by their input-schema aliases. For invalid input like `{"searchTerms": [], "maxResults": 999, "outputFormat": "xml"}`, the log shows exactly what's wrong: + + ```text + The Actor input is invalid: + 3 validation errors for ActorInput + searchTerms + List should have at least 1 item after validation, not 0 ... + maxResults + Input should be less than or equal to 100 ... + outputFormat + Input should be 'json' or 'csv' ... + ``` + +Once validation passes, the rest of `main` works with `actor_input.search_terms`, `actor_input.max_results`, and `actor_input.output_format`, all correctly typed, with editor autocompletion and static type checking. + +## Relationship to the input schema + +Pydantic validation complements the Actor's [input schema](https://docs.apify.com/platform/actors/development/input-schema) (`.actor/input_schema.json`). It doesn't replace it. The two serve different layers: + +- The input schema drives the [Apify Console](https://console.apify.com/) form, documents the fields for your users, and lets the platform validate input before the run even starts. Keep declaring your fields there. +- The Pydantic model validates the input again inside your Python code, where it gives you a typed object, IDE support, and richer rules (normalization, cross-field checks, custom formats) that the input schema can't express. It's also your safety net for runs started programmatically by [another Actor](../concepts/interacting-with-other-actors) or executed [locally](https://docs.apify.com/cli/docs/reference#apify-run), and for keeping the two definitions honest with each other. + +Keep the model's aliases in sync with the field keys in `input_schema.json`, and the two definitions describe the same input from both sides. + +## Useful validation features + +Pydantic offers extra features for validating Actor input. For the full set of types, constraints, and validators, see the [Pydantic documentation](https://docs.pydantic.dev/latest/concepts/models/). + +### Format-validated types + +For common string formats, for example `HttpUrl` for URLs or `EmailStr` for e-mail addresses, use format-validated types: + + + {HttpUrlExample} + + +### Cross-field validation + +When one field's validity depends on another, use `model_validator`: + + + {ModelValidatorExample} + + +### Secret input fields + +The platform decrypts [secret input fields](https://docs.apify.com/platform/actors/development/secret-input) for you before `Actor.get_input` returns, so you receive plaintext. To keep them from leaking into logs or `model_dump()` output, wrap such fields in Pydantic's `SecretStr` and read the plaintext with `get_secret_value()` when you actually need it: + + + {SecretStrExample} + + +## Conclusion + +In this guide, you learned how to validate Actor input with Pydantic: declaring the input as a model with aliases, defaults, and constraints, parsing the raw input with `model_validate`, failing fast with a readable error when the input is invalid, and working with a typed object for the rest of the run. To get started with your own Actors, see the [Actor templates](https://apify.com/templates/categories/python). If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/apify-sdk-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy validating! + +## Additional resources + +- [Pydantic: Official documentation](https://docs.pydantic.dev/) +- [Pydantic: Models](https://docs.pydantic.dev/latest/concepts/models/) +- [Pydantic: Validators](https://docs.pydantic.dev/latest/concepts/validators/) +- [Apify: Actor input](https://docs.apify.com/platform/actors/running/input) +- [Apify: Input schema specification](https://docs.apify.com/platform/actors/development/input-schema) diff --git a/website/versioned_docs/version-3.4/03_guides/code/11_http_url.py b/website/versioned_docs/version-3.4/03_guides/code/11_http_url.py new file mode 100644 index 000000000..f63ce1e26 --- /dev/null +++ b/website/versioned_docs/version-3.4/03_guides/code/11_http_url.py @@ -0,0 +1,7 @@ +from pydantic import BaseModel, EmailStr, HttpUrl + + +class ActorInput(BaseModel): + target_url: HttpUrl + # `EmailStr` needs the `pydantic[email]` extra installed. + contact_email: EmailStr diff --git a/website/versioned_docs/version-3.4/03_guides/code/11_model_validator.py b/website/versioned_docs/version-3.4/03_guides/code/11_model_validator.py new file mode 100644 index 000000000..29c4c98e6 --- /dev/null +++ b/website/versioned_docs/version-3.4/03_guides/code/11_model_validator.py @@ -0,0 +1,14 @@ +from typing import Self + +from pydantic import BaseModel, model_validator + + +class ActorInput(BaseModel): + min_price: int = 0 + max_price: int = 100 + + @model_validator(mode='after') + def _check_range(self) -> Self: + if self.min_price > self.max_price: + raise ValueError('min_price must not exceed max_price') + return self diff --git a/website/versioned_docs/version-3.4/03_guides/code/11_pydantic.py b/website/versioned_docs/version-3.4/03_guides/code/11_pydantic.py new file mode 100644 index 000000000..4626b2d4c --- /dev/null +++ b/website/versioned_docs/version-3.4/03_guides/code/11_pydantic.py @@ -0,0 +1,63 @@ +import asyncio +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator +from pydantic.alias_generators import to_camel + +from apify import Actor + + +class ActorInput(BaseModel): + """Typed and validated representation of the Actor input.""" + + # Derive each field's camelCase alias (searchTerms, maxResults, ...) automatically; + # accept both spellings and ignore extras. + model_config = ConfigDict( + populate_by_name=True, extra='ignore', alias_generator=to_camel + ) + + # Required: non-empty list of search terms (normalized below). + search_terms: list[str] = Field(min_length=1) + + # Optional: 1-100, defaults to 10. + max_results: int = Field(default=10, ge=1, le=100) + + # Optional: restricted to a fixed set of choices. + output_format: Literal['json', 'csv'] = Field(default='json') + + @field_validator('search_terms') + @classmethod + def _normalize_terms(cls, value: list[str]) -> list[str]: + # Trim whitespace and drop empty terms. + cleaned = [term.strip() for term in value if term.strip()] + if not cleaned: + raise ValueError('searchTerms must contain at least one non-empty term') + return cleaned + + +async def main() -> None: + async with Actor: + # Read the raw input (a plain dict, not yet validated). + raw_input = await Actor.get_input() or {} + + # Validate the raw input against the model. + try: + actor_input = ActorInput.model_validate(raw_input) + except ValidationError as exc: + # Log a per-field summary, then re-raise to fail the run. + Actor.log.error('The Actor input is invalid:\n%s', exc) + raise + + # Work with typed attributes from here on. + Actor.log.info('Input passed validation: %s', actor_input.model_dump()) + + max_results = actor_input.max_results + for term in actor_input.search_terms: + Actor.log.info('Processing %r (max %d results)', term, max_results) + + # Store the normalized input as output. + await Actor.set_value('OUTPUT', actor_input.model_dump()) + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/website/versioned_docs/version-3.4/03_guides/code/11_raw_input.py b/website/versioned_docs/version-3.4/03_guides/code/11_raw_input.py new file mode 100644 index 000000000..29c313e5f --- /dev/null +++ b/website/versioned_docs/version-3.4/03_guides/code/11_raw_input.py @@ -0,0 +1,18 @@ +import asyncio + +from apify import Actor + + +async def main() -> None: + # Enter the context of the Actor. + async with Actor: + # Read the input and reach into the raw dict. + actor_input = await Actor.get_input() or {} + search_terms = actor_input.get('searchTerms', []) + max_results = actor_input.get('maxResults', 10) + + Actor.log.info('search_terms=%s, max_results=%s', search_terms, max_results) + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/website/versioned_docs/version-3.4/03_guides/code/11_secret_str.py b/website/versioned_docs/version-3.4/03_guides/code/11_secret_str.py new file mode 100644 index 000000000..093c17951 --- /dev/null +++ b/website/versioned_docs/version-3.4/03_guides/code/11_secret_str.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel, SecretStr + + +class ActorInput(BaseModel): + # Masked in logs and `model_dump()`; read the plaintext with `get_secret_value()`. + api_token: SecretStr + + +actor_input = ActorInput.model_validate({'api_token': 'my-secret-token'}) +token = actor_input.api_token.get_secret_value()