mindee · sebastianMindee · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
@@ -9,5 +9,5 @@
     uses: mindee/mindee-api-python/.github/workflows/_test-regressions.yml@main
     secrets: inherit
   test-code-samples:
-    uses: mindee/mindee-api-python/.github/workflows/_smoke_test.yml@main
+    uses: mindee/mindee-api-python/.github/workflows/_smoke-test.yml@main
     secrets: inherit
diff --git a/examples/auto_multi_receipts_extraction_example.py b/examples/auto_multi_receipts_extraction_example.py
@@ -16,7 +16,9 @@ def parse_receipts(input_path):
     extracted_receipts = extract_receipts(input_doc, result_split.document.inference)
 
     for idx, receipt in enumerate(extracted_receipts, 1):
-        result_receipt = mindee_client.parse(product.ReceiptV5, receipt.as_source())
+        result_receipt = mindee_client.parse(
+            product.ReceiptV5, receipt.as_input_source()
+        )
         print(f"Receipt {idx}:")
         print(result_receipt.document)
         print("-" * 40)

diff --git a/mindee/extraction/common/extracted_image.py b/mindee/extraction/common/extracted_image.py
@@ -17,6 +17,8 @@ class ExtractedImage:
     """Id of the page the image was extracted from."""
     _element_id: int
     """Id of the element on a given page."""
+    filename: str
+    """Name of the file the image was extracted from."""
 
     def __init__(
         self, input_source: LocalInputSource, page_id: int, element_id: int
@@ -30,6 +32,7 @@ def __init__(
         """
         self.buffer = io.BytesIO(input_source.file_object.read())
         self.buffer.name = input_source.filename
+        self.filename = input_source.filename
         if input_source.is_pdf():
             extension = "jpg"
         else:
@@ -56,20 +59,27 @@ def save_to_file(self, output_path: str, file_format: Optional[str] = None):
             if not file_format:
                 if len(resolved_path.suffix) < 1:
                     raise ValueError("Invalid file format.")
-                file_format = (
-                    resolved_path.suffix.upper()
-                )  # technically redundant since PIL applies an upper operation
-                # to the parameter , but older versions may not do so.
+                # Let PIL infer format from filename extension
             self.buffer.seek(0)
             image = Image.open(self.buffer)
-            image.save(resolved_path, format=file_format)
+            if file_format:
+                image.save(resolved_path, format=file_format)
+            else:
+                image.save(resolved_path)
             logger.info("File saved successfully to '%s'.", resolved_path)
         except TypeError as exc:
             raise MindeeError("Invalid path/filename provided.") from exc
         except Exception as exc:
+            print(exc)
             raise MindeeError(f"Could not save file {Path(output_path).name}.") from exc
 
     def as_source(self) -> FileInput:
+        """
+        Deprecated. Use ``as_input_source`` instead.
+        """
+        return self.as_input_source()
+
+    def as_input_source(self) -> FileInput:
         """
         Return the file as a Mindee-compatible BufferInput source.
 

diff --git a/mindee/extraction/common/image_extractor.py b/mindee/extraction/common/image_extractor.py
@@ -1,13 +1,13 @@
 import io
-from typing import BinaryIO, List
+from typing import BinaryIO, List, Union
 
 import pypdfium2 as pdfium
 from PIL import Image
 
 from mindee.error.mindee_error import MindeeError
 from mindee.extraction.common.extracted_image import ExtractedImage
 from mindee.geometry.point import Point
-from mindee.geometry.polygon import get_min_max_x, get_min_max_y
+from mindee.geometry.polygon import Polygon, get_min_max_x, get_min_max_y
 from mindee.input.sources.bytes_input import BytesInput
 from mindee.input.sources.local_input_source import LocalInputSource
 
@@ -114,7 +114,9 @@ def get_file_extension(file_format: str):
 
 
 def extract_multiple_images_from_source(
-    input_source: LocalInputSource, page_id: int, polygons: List[List[Point]]
+    input_source: LocalInputSource,
+    page_id: int,
+    polygons: Union[List[Polygon], List[List[Point]]],
 ) -> List[ExtractedImage]:
     """
     Extracts elements from a page based on a list of bounding boxes.

diff --git a/mindee/extraction/pdf_extractor/extracted_pdf.py b/mindee/extraction/pdf_extractor/extracted_pdf.py
@@ -28,6 +28,10 @@ def get_page_count(self) -> int:
             ) from exc
 
     def write_to_file(self, output_path: str):
+        """Deprecated. Use ``save_to_file`` instead."""
+        self.save_to_file(output_path)
+
+    def save_to_file(self, output_path: str):
         """
         Writes the contents of the current PDF object to a file.
 
@@ -40,6 +44,7 @@ def write_to_file(self, output_path: str):
             raise MindeeError("Invalid save path provided {}.")
         if out_path.suffix.lower() != "pdf":
             out_path = out_path.parent / (out_path.stem + "." + "pdf")
+        self.pdf_bytes.seek(0)
         with open(out_path, "wb") as out_file:
             out_file.write(self.pdf_bytes.read())
 

diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py
@@ -4,7 +4,9 @@
 import requests
 
 from mindee.error.mindee_error import MindeeApiV2Error
-from mindee.input import LocalInputSource, UrlInputSource, BaseParameters
+from mindee.input.base_parameters import BaseParameters
+from mindee.input.sources.local_input_source import LocalInputSource
+from mindee.input.sources.url_input_source import UrlInputSource
 from mindee.logger import logger
 from mindee.mindee_http.base_settings import USER_AGENT
 from mindee.mindee_http.settings_mixin import SettingsMixin

diff --git a/mindee/mindee_http/workflow_endpoint.py b/mindee/mindee_http/workflow_endpoint.py
@@ -2,7 +2,9 @@
 
 import requests
 
-from mindee.input import LocalInputSource, UrlInputSource, WorkflowOptions
+from mindee.input.sources.local_input_source import LocalInputSource
+from mindee.input.sources.url_input_source import UrlInputSource
+from mindee.input.workflow_options import WorkflowOptions
 from mindee.mindee_http.base_endpoint import BaseEndpoint
 from mindee.mindee_http.workflow_settings import WorkflowSettings
 

diff --git a/mindee/v2/__init__.py b/mindee/v2/__init__.py
@@ -1,3 +1,5 @@
+from mindee.v2.file_operations.split import Split
+from mindee.v2.file_operations.crop import Crop
 from mindee.v2.product.classification.classification_parameters import (
     ClassificationParameters,
 )
@@ -14,10 +16,12 @@
 __all__ = [
     "ClassificationResponse",
     "ClassificationParameters",
+    "Crop",
     "CropResponse",
     "CropParameters",
     "OCRResponse",
     "OCRParameters",
+    "Split",
     "SplitResponse",
     "SplitParameters",
 ]
diff --git a/mindee/v2/file_operations/__init__.py b/mindee/v2/file_operations/__init__.py
@@ -0,0 +1,4 @@
+from mindee.v2.file_operations.crop import Crop
+from mindee.v2.file_operations.split import Split
+
+__all__ = ["Crop", "Split"]
diff --git a/mindee/v2/file_operations/crop.py b/mindee/v2/file_operations/crop.py
@@ -0,0 +1,69 @@
+from typing import List
+
+from mindee.error import MindeeError
+from mindee.extraction import ExtractedImage, extract_multiple_images_from_source
+from mindee.geometry import Polygon
+from mindee.input.sources.local_input_source import LocalInputSource
+from mindee.parsing.v2.field import FieldLocation
+from mindee.v2.product.crop.crop_box import CropBox
+
+
+class Crop:
+    """Crop operations for V2."""
+
+    @classmethod
+    def extract_single_crop(
+        cls, input_source: LocalInputSource, crop: FieldLocation
+    ) -> ExtractedImage:
+        """
+        Extracts a single crop as complete PDFs from the document.
+
+        :param input_source: Local Input Source to extract sub-receipts from.
+        :param crop: Crop to extract.
+        :return: ExtractedImage.
+        """
+
+        return extract_multiple_images_from_source(
+            input_source, crop.page, [crop.polygon]
+        )[0]
+
+    @classmethod
+    def extract_crops(
+        cls, input_source: LocalInputSource, crops: List[CropBox]
+    ) -> List[ExtractedImage]:
+        """
+        Extracts individual receipts from multi-receipts documents.
+
+        :param input_source: Local Input Source to extract sub-receipts from.
+        :param crops: List of crops.
+        :return: Individual extracted receipts as an array of ExtractedImage.
+        """
+        images: List[ExtractedImage] = []
+        if not crops:
+            raise MindeeError("No possible candidates found for Crop extraction.")
+        polygons: List[List[Polygon]] = [[] for _ in range(input_source.page_count)]
+        for i, crop in enumerate(crops):
+            polygons[crop.location.page].append(crop.location.polygon)
+        for i, polygon in enumerate(polygons):
+            images.extend(
+                extract_multiple_images_from_source(
+                    input_source,
+                    i,
+                    polygon,
+                )
+            )
+        return images
+
+    @classmethod
+    def apply(
+        cls,
+        input_source: LocalInputSource,
+        crops: List[CropBox],
+    ) -> List[ExtractedImage]:
+        """Crop a document into multiple pages.
+
+        :param input_source: Input source to crop.
+        :param crops: List of crops.
+        """
+
+        return cls.extract_crops(input_source, crops)
diff --git a/mindee/v2/file_operations/split.py b/mindee/v2/file_operations/split.py
@@ -0,0 +1,49 @@
+from typing import List, Union
+
+from mindee.error import MindeeError
+from mindee.extraction import ExtractedPdf, PdfExtractor
+from mindee.input.sources.local_input_source import LocalInputSource
+from mindee.v2.product.split.split_range import SplitRange
+
+
+class Split:
+    """Split operations for V2."""
+
+    @classmethod
+    def extract_splits(
+        cls,
+        input_source: LocalInputSource,
+        splits: Union[List[SplitRange], List[List[int]]],
+    ) -> List[ExtractedPdf]:
+        """
+        Extracts splits as complete PDFs from the document.
+
+        :param input_source: Input source to split.
+        :param splits: List of sub-lists of pages to keep.
+        :return: A list of extracted invoices.
+        """
+        pdf_extractor = PdfExtractor(input_source)
+        page_groups = []
+        for split in splits:
+            if isinstance(split, SplitRange):
+                lower_bound = split.page_range[0]
+                upper_bound = split.page_range[1]
+            else:
+                lower_bound = split[0]
+                upper_bound = split[1]
+            page_groups.append(list(range(lower_bound, upper_bound + 1)))
+        if len(splits) < 1:
+            raise MindeeError("No indexes provided.")
+        return pdf_extractor.extract_sub_documents(page_groups)
+
+    @classmethod
+    def apply(
+        cls, input_source: LocalInputSource, splits: List[SplitRange]
+    ) -> List[ExtractedPdf]:
+        """Split a document into multiple pages.
+
+        :param input_source: Input source to split.
+        :param splits: List of splits.
+        """
+
+        return cls.extract_splits(input_source, splits)
diff --git a/mindee/v2/product/crop/crop_box.py b/mindee/v2/product/crop/crop_box.py
@@ -1,3 +1,5 @@
+from mindee.extraction import ExtractedImage, extract_multiple_images_from_source
+from mindee.input.sources.local_input_source import LocalInputSource
 from mindee.parsing.common.string_dict import StringDict
 from mindee.parsing.v2.field.field_location import FieldLocation
 
@@ -16,3 +18,14 @@ def __init__(self, server_response: StringDict):
 
     def __str__(self) -> str:
         return f"* :Location: {self.location}\n  :Object Type: {self.object_type}"
+
+    def apply_to_file(self, input_source: LocalInputSource) -> ExtractedImage:
+        """
+        Apply the split range inference to a file and return a single extracted PDF.
+
+        :param input_source: Local file to apply the inference to
+        :return: Extracted PDF
+        """
+        return extract_multiple_images_from_source(
+            input_source, self.location.page, [self.location.polygon]
+        )[0]
diff --git a/mindee/v2/product/crop/crop_response.py b/mindee/v2/product/crop/crop_response.py
@@ -1,3 +1,10 @@
+from typing import List
+
+from mindee.error import MindeeError
+from mindee.extraction.common.extracted_image import ExtractedImage
+from mindee.extraction.common.image_extractor import extract_multiple_images_from_source
+from mindee.geometry import Polygon
+from mindee.input.sources.local_input_source import LocalInputSource
 from mindee.parsing.common.string_dict import StringDict
 from mindee.v2.parsing.inference import BaseResponse
 from mindee.v2.product.crop.crop_inference import CropInference
@@ -15,3 +22,31 @@ class CropResponse(BaseResponse):
     def __init__(self, raw_response: StringDict) -> None:
         super().__init__(raw_response)
         self.inference = CropInference(raw_response["inference"])
+
+    def apply_to_file(self, input_source: LocalInputSource) -> List[ExtractedImage]:
+        """
+        Apply the crop inference to a file and return a list of extracted images.
+
+        :param input_source: Local file to apply the inference to
+        :return: List of extracted PDFs
+        """
+        crops = self.inference.result.crops
+        if not crops:
+            raise MindeeError("No possible candidates found for Crop extraction.")
+
+        polygons: List[List[Polygon]] = [[] for _ in range(input_source.page_count)]
+        for crop in crops:
+            polygons[crop.location.page].append(crop.location.polygon)
+
+        images: List[ExtractedImage] = []
+        for page_index, page_polygons in enumerate(polygons):
+            if not page_polygons:
+                continue
+            images.extend(
+                extract_multiple_images_from_source(
+                    input_source,
+                    page_index,
+                    page_polygons,
+                )
+            )
+        return images
diff --git a/mindee/v2/product/split/split_range.py b/mindee/v2/product/split/split_range.py
@@ -1,5 +1,8 @@
 from typing import List
 
+from mindee.extraction.pdf_extractor.extracted_pdf import ExtractedPdf
+from mindee.extraction.pdf_extractor.pdf_extractor import PdfExtractor
+from mindee.input.sources.local_input_source import LocalInputSource
 from mindee.parsing.common.string_dict import StringDict
 
 
@@ -21,3 +24,13 @@ def __init__(self, server_response: StringDict):
     def __str__(self) -> str:
         page_range = ",".join([str(page_index) for page_index in self.page_range])
         return f"* :Page Range: {page_range}\n  :Document Type: {self.document_type}"
+
+    def apply_to_file(self, input_source: LocalInputSource) -> ExtractedPdf:
+        """
+        Apply the split range inference to a file and return a single extracted PDF.
+
+        :param input_source: Local file to apply the inference to
+        :return: Extracted PDF
+        """
+        pdf_extractor = PdfExtractor(input_source)
+        return pdf_extractor.extract_sub_documents([self.page_range])[0]
diff --git a/mindee/v2/product/split/split_response.py b/mindee/v2/product/split/split_response.py
@@ -1,4 +1,9 @@
+from typing import List
+
+from mindee.extraction import ExtractedPdf
+from mindee.input.sources.local_input_source import LocalInputSource
 from mindee.parsing.common.string_dict import StringDict
+from mindee.v2.file_operations.split import Split
 from mindee.v2.parsing.inference import BaseResponse
 from mindee.v2.product.split.split_inference import SplitInference
 
@@ -15,3 +20,12 @@ class SplitResponse(BaseResponse):
     def __init__(self, raw_response: StringDict) -> None:
         super().__init__(raw_response)
         self.inference = SplitInference(raw_response["inference"])
+
+    def apply_to_file(self, input_source: LocalInputSource) -> List[ExtractedPdf]:
+        """
+        Apply the split inference to a file and return a list of extracted PDFs.
+
+        :param input_source: Local file to apply the inference to
+        :return: List of extracted PDFs
+        """
+        return Split.extract_splits(input_source, self.inference.result.splits)
diff --git a/tests/data b/tests/data
+1 −1		v2/products/crop/crop_multiple.json
+1 −1		v2/products/crop/crop_multiple.rst
+2 −2		v2/products/split/split_multiple.json