Skip to content
This repository was archived by the owner on Mar 13, 2026. It is now read-only.

Commit cbbff09

Browse files
committed
test: add tests
1 parent dc62471 commit cbbff09

2 files changed

Lines changed: 145 additions & 32 deletions

File tree

tests/unit/test_core_sample.py

Lines changed: 138 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from unittest import mock
77

88
import google.cloud.bigquery
9+
import google.cloud.bigquery.table
910
import pytest
1011

1112
import pandas_gbq.constants
@@ -71,17 +72,9 @@
7172
), # 0
7273
google.cloud.bigquery.SchemaField("simple_int", "INT64"), # 8
7374
],
74-
8, # 0 + 8
75+
9, # 1 + 8
7576
id="empty-struct",
7677
),
77-
pytest.param(
78-
[
79-
google.cloud.bigquery.SchemaField("bytes", "BYTES"),
80-
]
81-
* 9_999,
82-
pandas_gbq.core.sample._MAX_ROW_BYTES,
83-
id="many-bytes",
84-
),
8578
# Case 8: Complex Mix (Combining multiple cases)
8679
pytest.param(
8780
[
@@ -127,21 +120,21 @@ def test_calculate_target_bytes_with_available_memory(mock_virtual_memory):
127120
available_memory = 2 * pandas_gbq.constants.BYTES_IN_GIB # 2 GB
128121
mock_virtual_memory.return_value = mock.Mock(available=available_memory)
129122

130-
# Expected bytes is available memory / 4, as it falls between _MAX_ROW_BYTES and _MAX_AUTO_TARGET_BYTES
123+
# Expected bytes is available memory / 4.
131124
expected_bytes = available_memory // 4
132125
actual_bytes = pandas_gbq.core.sample._calculate_target_bytes(None)
133126
assert actual_bytes == expected_bytes
134127

135128

136129
@mock.patch("psutil.virtual_memory")
137-
def test_calculate_target_bytes_low_memory_uses_max_row_bytes(mock_virtual_memory):
130+
def test_calculate_target_bytes_low_memory(mock_virtual_memory):
138131
# Mock psutil.virtual_memory to return a mock object with an 'available' attribute.
139132
# Set available memory to a low value.
140133
available_memory = 100 # 100 bytes
141134
mock_virtual_memory.return_value = mock.Mock(available=available_memory)
142135

143-
# Expected bytes should be _MAX_ROW_BYTES because available // 4 is less.
144-
expected_bytes = pandas_gbq.core.sample._MAX_ROW_BYTES
136+
# Expected bytes should be low value // 4.
137+
expected_bytes = 25
145138
actual_bytes = pandas_gbq.core.sample._calculate_target_bytes(None)
146139
assert actual_bytes == expected_bytes
147140

@@ -206,7 +199,45 @@ def test_estimate_limit(target_bytes, table_bytes, table_rows, fields, expected_
206199

207200

208201
@mock.patch("pandas_gbq.core.read.download_results")
209-
def test_sample_with_tablesample(mock_download_results, mock_bigquery_client):
202+
def test_download_results_in_parallel_with_table(
203+
mock_download_results, mock_bigquery_client
204+
):
205+
rows = mock.Mock(spec=google.cloud.bigquery.table.RowIterator)
206+
rows._table = "table"
207+
rows._schema = "schema"
208+
pandas_gbq.core.sample._download_results_in_parallel(
209+
rows, bqclient=mock_bigquery_client
210+
)
211+
mock_bigquery_client.list_rows.assert_called_once_with(
212+
"table", selected_fields="schema"
213+
)
214+
mock_download_results.assert_called_once()
215+
216+
217+
@mock.patch("pandas_gbq.core.read.download_results")
218+
def test_download_results_in_parallel_no_table(
219+
mock_download_results, mock_bigquery_client
220+
):
221+
rows = mock.Mock(spec=google.cloud.bigquery.table.RowIterator)
222+
rows._table = None
223+
rows._schema = None
224+
pandas_gbq.core.sample._download_results_in_parallel(
225+
rows, bqclient=mock_bigquery_client
226+
)
227+
mock_bigquery_client.list_rows.assert_not_called()
228+
mock_download_results.assert_called_once_with(
229+
rows,
230+
bqclient=mock_bigquery_client,
231+
progress_bar_type=None,
232+
warn_on_large_results=False,
233+
max_results=None,
234+
user_dtypes=None,
235+
use_bqstorage_api=True,
236+
)
237+
238+
239+
@mock.patch("pandas_gbq.core.sample._download_results_in_parallel")
240+
def test_sample_with_tablesample(mock_download_results_in_parallel, mock_bigquery_client):
210241
proportion = 0.1
211242
target_row_count = 100
212243

@@ -219,15 +250,23 @@ def test_sample_with_tablesample(mock_download_results, mock_bigquery_client):
219250

220251
mock_bigquery_client.query_and_wait.assert_called_once()
221252
query = mock_bigquery_client.query_and_wait.call_args[0][0]
222-
assert "TABLESAMPLE SYSTEM (10.0 PERCENT)" in query
253+
assert "TABLESAMPLE SYSTEM (10 PERCENT)" in query
223254
assert "LIMIT 100" in query
224255
assert "FROM `test-project.test_dataset.test_table`" in query
225256

226-
mock_download_results.assert_called_once()
257+
# The mock for query_and_wait returns a mock RowIterator, which is then
258+
# passed to _download_results_in_parallel.
259+
mock_results = mock_bigquery_client.query_and_wait.return_value
260+
mock_download_results_in_parallel.assert_called_with(
261+
mock_results,
262+
bqclient=mock_bigquery_client,
263+
progress_bar_type=None,
264+
use_bqstorage_api=True,
265+
)
227266

228267

229-
@mock.patch("pandas_gbq.core.read.download_results")
230-
def test_sample_with_limit(mock_download_results, mock_bigquery_client):
268+
@mock.patch("pandas_gbq.core.sample._download_results_in_parallel")
269+
def test_sample_with_limit(mock_download_results_in_parallel, mock_bigquery_client):
231270
target_row_count = 200
232271

233272
pandas_gbq.core.sample._sample_with_limit(
@@ -242,7 +281,15 @@ def test_sample_with_limit(mock_download_results, mock_bigquery_client):
242281
assert "LIMIT 200" in query
243282
assert "FROM `test-project.test_dataset.test_table`" in query
244283

245-
mock_download_results.assert_called_once()
284+
# The mock for query_and_wait returns a mock RowIterator, which is then
285+
# passed to _download_results_in_parallel.
286+
mock_results = mock_bigquery_client.query_and_wait.return_value
287+
mock_download_results_in_parallel.assert_called_with(
288+
mock_results,
289+
bqclient=mock_bigquery_client,
290+
progress_bar_type=None,
291+
use_bqstorage_api=True,
292+
)
246293

247294

248295
@pytest.fixture
@@ -254,15 +301,70 @@ def mock_gbq_connector(mock_bigquery_client):
254301
yield mock_connector
255302

256303

304+
@mock.patch("pandas_gbq.core.biglake.get_table_metadata")
305+
@mock.patch("pandas_gbq.core.sample._sample_with_tablesample")
306+
def test_sample_biglake_table(
307+
mock_sample_with_tablesample,
308+
mock_get_table_metadata,
309+
mock_gbq_connector,
310+
mock_bigquery_client,
311+
):
312+
mock_metadata = mock.Mock()
313+
mock_metadata.num_rows = 1000
314+
mock_metadata.schema = [google.cloud.bigquery.SchemaField("col1", "INT64")]
315+
mock_get_table_metadata.return_value = mock_metadata
316+
table_id = "p.c.d.t"
317+
318+
with mock.patch(
319+
"pandas_gbq.core.sample._calculate_target_bytes", return_value=1000
320+
):
321+
pandas_gbq.core.sample.sample(table_id, billing_project_id="p")
322+
323+
mock_sample_with_tablesample.assert_called_once()
324+
# 1000 target bytes / 8 bytes/row in schema = 125 target_row_count
325+
# 125 / 1000 rows = 0.125 proportion.
326+
# min(100, max(1, 0.125 * 100)) = 12.5 -> 12
327+
# So proportion should be about 0.125
328+
# Let's check the args
329+
args, kwargs = mock_sample_with_tablesample.call_args
330+
assert kwargs["proportion"] > 0.1
331+
assert kwargs["target_row_count"] == 125
332+
333+
334+
@mock.patch("pandas_gbq.core.biglake.get_table_metadata")
335+
@mock.patch("pandas_gbq.core.sample._sample_with_tablesample")
336+
def test_sample_biglake_table_zero_rows(
337+
mock_sample_with_tablesample,
338+
mock_get_table_metadata,
339+
mock_gbq_connector,
340+
mock_bigquery_client,
341+
):
342+
mock_metadata = mock.Mock()
343+
mock_metadata.num_rows = 0
344+
mock_metadata.schema = [google.cloud.bigquery.SchemaField("col1", "INT64")]
345+
mock_get_table_metadata.return_value = mock_metadata
346+
table_id = "p.c.d.t"
347+
348+
with mock.patch(
349+
"pandas_gbq.core.sample._calculate_target_bytes", return_value=1000
350+
):
351+
pandas_gbq.core.sample.sample(table_id, billing_project_id="p")
352+
353+
mock_sample_with_tablesample.assert_called_once()
354+
args, kwargs = mock_sample_with_tablesample.call_args
355+
# Avoid division by zero
356+
assert kwargs["proportion"] > 0.0
357+
358+
257359
@mock.patch("pandas_gbq.core.read.download_results")
258360
def test_sample_small_table_downloads_all(
259361
mock_download_results, mock_gbq_connector, mock_bigquery_client
260362
):
261363
mock_table = mock.Mock(spec=google.cloud.bigquery.Table)
262-
type(mock_table).table_type = mock.PropertyMock(return_value="TABLE")
263-
type(mock_table).num_bytes = mock.PropertyMock(return_value=1000)
264-
type(mock_table).num_rows = mock.PropertyMock(return_value=10)
265-
type(mock_table).schema = mock.PropertyMock(return_value=[])
364+
type(mock_table).table_type = "TABLE"
365+
type(mock_table).num_bytes = 1000
366+
type(mock_table).num_rows = 10
367+
type(mock_table).schema = []
266368
mock_bigquery_client.get_table.return_value = mock_table
267369

268370
with mock.patch(
@@ -281,12 +383,13 @@ def test_sample_uses_tablesample(
281383
mock_sample_with_tablesample, mock_gbq_connector, mock_bigquery_client
282384
):
283385
mock_table = mock.Mock(spec=google.cloud.bigquery.Table)
284-
type(mock_table).table_type = mock.PropertyMock(return_value="TABLE")
285-
type(mock_table).num_bytes = mock.PropertyMock(return_value=1_000_000_000_000)
286-
type(mock_table).num_rows = mock.PropertyMock(return_value=1_000)
287-
type(mock_table).schema = mock.PropertyMock(
288-
return_value=[google.cloud.bigquery.SchemaField("col1", "INT64")]
289-
)
386+
type(mock_table).project = "my-project"
387+
type(mock_table).dataset_id = "my_dataset"
388+
type(mock_table).table_id = "my_table"
389+
type(mock_table).table_type = "TABLE"
390+
type(mock_table).num_bytes = 1_000_000_000_000
391+
type(mock_table).num_rows = 1_000
392+
type(mock_table).schema = [google.cloud.bigquery.SchemaField("col1", "INT64")]
290393
mock_bigquery_client.get_table.return_value = mock_table
291394

292395
pandas_gbq.core.sample.sample("my-project.my_dataset.my_table", target_mb=1)
@@ -299,6 +402,9 @@ def test_sample_uses_limit_fallback(
299402
mock_sample_with_limit, mock_gbq_connector, mock_bigquery_client
300403
):
301404
mock_table = mock.Mock(spec=google.cloud.bigquery.Table)
405+
mock_table.project = "my-project"
406+
mock_table.dataset_id = "my_dataset"
407+
mock_table.table_id = "my_table"
302408
mock_table.num_bytes = 10000
303409
mock_table.num_rows = 100
304410
mock_table.table_type = "VIEW" # Not eligible for TABLESAMPLE
@@ -318,6 +424,9 @@ def test_sample_uses_limit_fallback_no_bytes(
318424
mock_sample_with_limit, mock_gbq_connector, mock_bigquery_client
319425
):
320426
mock_table = mock.Mock(spec=google.cloud.bigquery.Table)
427+
mock_table.project = "my-project"
428+
mock_table.dataset_id = "my_dataset"
429+
mock_table.table_id = "my_table"
321430
mock_table.num_bytes = None # num_bytes can be None
322431
mock_table.num_rows = 100
323432
mock_table.table_type = "TABLE"

tests/unit/test_to_gbq.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# license that can be found in the LICENSE file.
44

55

6+
import os
67
import os
78
import pathlib
89
import tempfile
@@ -188,6 +189,7 @@ def test_to_gbq_with_if_exists_unknown():
188189
)
189190

190191

192+
@mock.patch.dict(os.environ, {}, clear=True)
191193
@pytest.mark.parametrize(
192194
"user_agent,rfc9110_delimiter,expected",
193195
[
@@ -216,7 +218,8 @@ def test_create_user_agent(user_agent, rfc9110_delimiter, expected):
216218
def test_create_user_agent_vscode():
217219
from pandas_gbq.gbq_connector import create_user_agent
218220

219-
assert create_user_agent() == f"pandas-{pd.__version__} vscode"
221+
result = create_user_agent()
222+
assert f"pandas-{pd.__version__} vscode" in result
220223

221224

222225
@mock.patch.dict(os.environ, {"VSCODE_PID": "1234"}, clear=True)
@@ -239,9 +242,10 @@ def test_create_user_agent_vscode_plugin():
239242
f.write("{}")
240243

241244
with mock.patch("pathlib.Path.home", return_value=user_home):
245+
result = create_user_agent()
242246
assert (
243-
create_user_agent()
244-
== f"pandas-{pd.__version__} vscode googlecloudtools.cloudcode"
247+
f"pandas-{pd.__version__} vscode googlecloudtools.cloudcode"
248+
in result
245249
)
246250

247251

0 commit comments

Comments
 (0)