docs: add example and docstring to ingest

leo-altertable · leo-altertable · commit de5f65e9b714 · 2025-11-25T18:02:32.000+01:00
diff --git a/examples/client_usage.py b/examples/client_usage.py
@@ -8,7 +8,10 @@
 
 import os
 
+import pyarrow as pa
+
 from altertable_flightsql import Client
+from altertable_flightsql.client import IngestIncrementalOptions
 
 ALTERTABLE_HOST = os.getenv("ALTERTABLE_HOST", "flight.altertable.ai")
 ALTERTABLE_PORT = int(os.getenv("ALTERTABLE_PORT", "443"))
@@ -133,6 +136,72 @@ def example_transactions():
     print()
 
 
+def example_bulk_ingest():
+    """Bulk ingest data using Arrow Flight."""
+    print("=" * 60)
+    print("Example: Bulk Data Ingestion")
+    print("=" * 60)
+
+    with Client(
+        username=ALTERTABLE_USERNAME,
+        password=ALTERTABLE_PASSWORD,
+        **CONNECTION_SETTINGS,
+    ) as client:
+        # Define schema for the data
+        schema = pa.schema([
+            ("id", pa.int64()),
+            ("name", pa.string()),
+            ("created_at", pa.int64()),
+        ])
+
+        # First batch
+        first_batch = pa.record_batch([
+            [1, 2, 3],
+            ["Alice", "Bob", "Charlie"],
+            [1000, 2000, 3000],
+        ], schema=schema)
+
+        # Second batch with updated data (same IDs 1,2 and new ID 4)
+        second_batch = pa.record_batch([
+            [1, 2, 4],
+            ["Alice Updated", "Bob Updated", "David"],
+            [1500, 2500, 4000],
+        ], schema=schema)
+
+        with client.ingest(
+            table_name="incremental_users",
+            schema=schema,
+            incremental_options=IngestIncrementalOptions(
+                primary_key=["id"],
+                cursor_field=["created_at"],
+            ),
+        ) as writer:
+            writer.write(first_batch)
+
+        # Upsert with second batch
+        with client.ingest(
+            table_name="incremental_users",
+            schema=schema,
+            incremental_options=IngestIncrementalOptions(
+                primary_key=["id"],
+                cursor_field=["created_at"],
+            ),
+        ) as writer:
+            writer.write(second_batch)
+
+        # Verify - should have 4 rows (3 from first batch, 2 updated, 1 new)
+        reader = client.query("SELECT * FROM incremental_users ORDER BY id")
+        result = reader.read_pandas()
+        print(f"\nIncremental ingestion results ({len(result)} rows):")
+        print(result)
+
+        # Cleanup
+        client.execute("DROP TABLE IF EXISTS bulk_users")
+        client.execute("DROP TABLE IF EXISTS incremental_users")
+
+    print()
+
+
 def example_metadata():
     """Query database metadata."""
     print("=" * 60)
@@ -171,5 +240,6 @@ def example_metadata():
     example_updates()
     example_basic_query()
     example_transactions()
+    example_bulk_ingest()
     example_prepared_statement()
     example_metadata()
diff --git a/src/altertable_flightsql/client.py b/src/altertable_flightsql/client.py
@@ -275,6 +275,55 @@ def ingest(
         incremental_options: Optional[IngestIncrementalOptions] = None,
         transaction: Optional["Transaction"] = None,
     ) -> flight.FlightStreamWriter:
+        """
+        Bulk ingest data into a table using Apache Arrow Flight.
+
+        This method provides high-performance bulk data loading by streaming
+        Arrow record batches directly to the server. The writer can be used as
+        a context manager for automatic resource cleanup.
+
+        Args:
+            table_name: Name of the table to ingest data into.
+            schema: PyArrow schema defining the table structure.
+            schema_name: Optional schema name. If not provided, uses the client's
+                default schema.
+            catalog_name: Optional catalog name. If not provided, uses the client's
+                default catalog.
+            mode: Table creation/append mode. Options:
+                - CREATE: Create table, fail if it exists
+                - APPEND: Append to existing table, fail if it doesn't exist
+                - CREATE_APPEND: Create if not exists, append if exists (default)
+                - REPLACE: Drop and recreate table if it exists
+            incremental_options: Options for incremental ingestion, including:
+                - primary_key: Columns to use as primary key
+                - cursor_field: Columns used to determine which row to keep in case of conflict on primary key
+            transaction: Optional transaction to execute ingestion within.
+
+        Returns:
+            FlightStreamWriter for writing record batches to the table.
+            The writer should be closed after all data is written, or used
+            as a context manager.
+
+        Example:
+            >>> # Basic ingestion
+            >>> schema = pa.schema([("id", pa.int64()), ("name", pa.string())])
+            >>> with client.ingest(table_name="users", schema=schema) as writer:
+            ...     batch = pa.record_batch([[1, 2], ["Alice", "Bob"]], schema=schema)
+            ...     writer.write(batch)
+
+            >>> # Incremental ingestion with primary key
+            >>> from altertable_flightsql.client import IngestIncrementalOptions
+            >>> opts = IngestIncrementalOptions(
+            ...     primary_key=["id"],
+            ...     cursor_field=["updated_at"]
+            ... )
+            >>> with client.ingest(
+            ...     table_name="users",
+            ...     schema=schema,
+            ...     incremental_options=opts
+            ... ) as writer:
+            ...     writer.write(batch)
+        """
         cmd = sql_pb2.CommandStatementIngest(
             table=table_name,
             table_definition_options=self._ingest_mode_to_table_definition_options(mode),