1+ """
2+ Integration tests for data ingestion.
3+
4+ Tests the ingest method for bulk data loading.
5+ """
6+
7+ import pyarrow as pa
8+ import pytest
9+
10+ from altertable_flightsql import Client
11+ from altertable_flightsql .client import IngestIncrementalOptions
12+ from tests .conftest import SchemaInfo
13+
14+
15+ class TestBasicIngest :
16+ """Test basic ingest functionality."""
17+
18+ def test_ingest_simple_table (self , altertable_client : Client , test_schema : SchemaInfo ):
19+ """Test ingesting data into a new table."""
20+ import uuid
21+
22+ table_name = f"test_ingest_{ uuid .uuid4 ().hex [:8 ]} "
23+ fully_qualified_table = f"{ test_schema .full_name } .{ table_name } "
24+
25+ # Define schema
26+ schema = pa .schema ([
27+ ("id" , pa .int64 ()),
28+ ("name" , pa .string ()),
29+ ("value" , pa .float64 ()),
30+ ])
31+
32+ # Create test data
33+ data = pa .record_batch ([
34+ [1 , 2 , 3 , 4 , 5 ],
35+ ["Alice" , "Bob" , "Charlie" , "David" , "Eve" ],
36+ [100.5 , 200.0 , 300.75 , 400.25 , 500.5 ],
37+ ], schema = schema )
38+
39+ try :
40+ # Ingest data
41+ with altertable_client .ingest (
42+ table_name = table_name ,
43+ schema = schema ,
44+ schema_name = test_schema .schema ,
45+ catalog_name = test_schema .catalog ,
46+ ) as writer :
47+ writer .write (data )
48+
49+ reader = altertable_client .query (f"SELECT * FROM { fully_qualified_table } ORDER BY id" )
50+ result = reader .read_all ()
51+
52+ assert result .num_rows == 5
53+ result_df = result .to_pandas ()
54+ assert list (result_df ["id" ]) == [1 , 2 , 3 , 4 , 5 ]
55+ assert list (result_df ["name" ]) == ["Alice" , "Bob" , "Charlie" , "David" , "Eve" ]
56+
57+ finally :
58+ # Cleanup
59+ try :
60+ altertable_client .execute (f"DROP TABLE IF EXISTS { fully_qualified_table } " )
61+ except Exception as e :
62+ print (f"Warning: Failed to drop table { fully_qualified_table } : { e } " )
63+
64+ def test_ingest_multiple_batches (self , altertable_client : Client , test_schema : SchemaInfo ):
65+ """Test ingesting multiple batches of data."""
66+ import uuid
67+
68+ table_name = f"test_ingest_{ uuid .uuid4 ().hex [:8 ]} "
69+ fully_qualified_table = f"{ test_schema .full_name } .{ table_name } "
70+
71+ # Define schema
72+ schema = pa .schema ([
73+ ("id" , pa .int64 ()),
74+ ("name" , pa .string ()),
75+ ])
76+
77+ try :
78+ # Ingest data
79+ with altertable_client .ingest (
80+ table_name = table_name ,
81+ schema = schema ,
82+ schema_name = test_schema .schema ,
83+ catalog_name = test_schema .catalog ,
84+ ) as writer :
85+ # Write multiple batches
86+ batch1 = pa .record_batch ([[1 , 2 ], ["Alice" , "Bob" ]], schema = schema )
87+ batch2 = pa .record_batch ([[3 , 4 ], ["Charlie" , "David" ]], schema = schema )
88+ batch3 = pa .record_batch ([[5 ], ["Eve" ]], schema = schema )
89+
90+ writer .write (batch1 )
91+ writer .write (batch2 )
92+ writer .write (batch3 )
93+
94+ reader = altertable_client .query (f"SELECT * FROM { fully_qualified_table } ORDER BY id" )
95+ result = reader .read_all ()
96+
97+ assert result .num_rows == 5
98+ result_df = result .to_pandas ()
99+ assert list (result_df ["id" ]) == [1 , 2 , 3 , 4 , 5 ]
100+ assert list (result_df ["name" ]) == ["Alice" , "Bob" , "Charlie" , "David" , "Eve" ]
101+
102+ finally :
103+ # Cleanup
104+ try :
105+ altertable_client .execute (f"DROP TABLE IF EXISTS { fully_qualified_table } " )
106+ except Exception as e :
107+ print (f"Warning: Failed to drop table { fully_qualified_table } : { e } " )
108+
109+ class TestIngestWithPrimaryKey :
110+ """Test ingest with primary key specification."""
111+
112+ def test_ingest_with_primary_key (self , altertable_client : Client , test_schema : SchemaInfo ):
113+ """Test ingesting data with primary key constraint."""
114+ import uuid
115+
116+ table_name = f"test_ingest_{ uuid .uuid4 ().hex [:8 ]} "
117+ fully_qualified_table = f"{ test_schema .full_name } .{ table_name } "
118+
119+ # Define schema
120+ schema = pa .schema ([
121+ ("id" , pa .int64 ()),
122+ ("email" , pa .string ()),
123+ ("name" , pa .string ()),
124+ ("created_at" , pa .int64 ()),
125+ ])
126+
127+ try :
128+ # Ingest data with primary key
129+ with altertable_client .ingest (
130+ table_name = table_name ,
131+ schema = schema ,
132+ schema_name = test_schema .schema ,
133+ catalog_name = test_schema .catalog ,
134+ incremental_options = IngestIncrementalOptions (primary_key = ["id" ], cursor_field = ["created_at" ]),
135+ ) as writer :
136+ writer .write (pa .record_batch ([
137+ [1 , 2 , 3 , 1 ],
138+ ["alice@example.com" , "bob@example.com" , "charlie@example.com" , "alice+1@example.com" ],
139+ ["Alice" , "Bob" , "Charlie" , "Alice" ],
140+ [1 , 2 , 3 , 4 ],
141+ ], schema = schema ))
142+
143+ # Verify data was ingested
144+ reader = altertable_client .query (f"SELECT * FROM { fully_qualified_table } ORDER BY id" )
145+ result = reader .read_all ()
146+
147+ assert result .num_rows == 3
148+ result_df = result .to_pandas ()
149+ assert list (result_df ["id" ]) == [1 , 2 , 3 ]
150+ assert list (result_df ["email" ]) == ["alice+1@example.com" , "bob@example.com" , "charlie@example.com" ]
151+ assert list (result_df ["name" ]) == ["Alice" , "Bob" , "Charlie" ]
152+
153+ finally :
154+ # Cleanup
155+ try :
156+ altertable_client .execute (f"DROP TABLE IF EXISTS { fully_qualified_table } " )
157+ except Exception as e :
158+ print (f"Warning: Failed to drop table { fully_qualified_table } : { e } " )
0 commit comments