Skip to content

Commit cabb2c2

Browse files
committed
SQLAlchemy: Enable the insertmanyvalues feature for batch size control
This feature lets you control the batch size of `INSERT` operations using the `insertmanyvalues_page_size` option, which is applicable to all of the engine-, connection-, and statement- objects. https://docs.sqlalchemy.org/core/connections.html#controlling-the-batch-size
1 parent 3d2d597 commit cabb2c2

3 files changed

Lines changed: 62 additions & 2 deletions

File tree

CHANGES.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ Unreleased
99
``supports_multivalues_insert`` on the CrateDB dialect, it is used by pandas'
1010
``method="multi"`` option
1111

12+
- SQLAlchemy: Enable the ``insertmanyvalues`` feature, which lets you control
13+
the batch size of ``INSERT`` operations using the ``insertmanyvalues_page_size``
14+
engine-, connection-, and statement-options.
15+
1216

1317
2023/03/02 0.30.1
1418
=================

src/crate/client/sqlalchemy/dialect.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ class CrateDialect(default.DefaultDialect):
173173
statement_compiler = statement_compiler
174174
ddl_compiler = CrateDDLCompiler
175175
type_compiler = CrateTypeCompiler
176+
use_insertmanyvalues = True
177+
use_insertmanyvalues_wo_returning = True
176178
supports_multivalues_insert = True
177179
supports_native_boolean = True
178180
supports_statement_cache = True

src/crate/client/sqlalchemy/tests/compiler_test.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919
# with Crate these terms will supersede the license and you may use the
2020
# software solely pursuant to the terms of the relevant commercial agreement.
2121

22-
from unittest import TestCase
22+
from unittest import mock, TestCase, skipIf
2323

2424
from crate.client.sqlalchemy.compiler import crate_before_execute
2525

2626
import sqlalchemy as sa
2727
from sqlalchemy.sql import text, Update
2828

29-
from crate.client.sqlalchemy.sa_version import SA_VERSION, SA_1_4
29+
from crate.client.sqlalchemy.sa_version import SA_VERSION, SA_1_4, SA_2_0
3030
from crate.client.sqlalchemy.types import Craty
3131

3232

@@ -127,3 +127,57 @@ def test_insert_multivalues(self):
127127
insertable = self.mytable.insert().values(records)
128128
statement = str(insertable.compile(bind=self.crate_engine))
129129
self.assertEqual(statement, "INSERT INTO mytable (name) VALUES (?), (?), (?)")
130+
131+
@skipIf(SA_VERSION < SA_2_0, "SQLAlchemy 1.x does not support the 'insertmanyvalues' dialect feature")
132+
def test_insert_manyvalues(self):
133+
"""
134+
Verify the `use_insertmanyvalues` and `use_insertmanyvalues_wo_returning` dialect features.
135+
136+
> For DML statements such as "INSERT", "UPDATE" and "DELETE", we can
137+
> send multiple parameter sets to the `Connection.execute()` method by
138+
> passing a list of dictionaries instead of a single dictionary, which
139+
> indicates that the single SQL statement should be invoked multiple
140+
> times, once for each parameter set. This style of execution is known
141+
> as "executemany".
142+
143+
> A key characteristic of "insertmanyvalues" is that the size of the INSERT
144+
> statement is limited on a fixed max number of "values" clauses as well as
145+
> a dialect-specific fixed total number of bound parameters that may be
146+
> represented in one INSERT statement at a time.
147+
> When the number of parameter dictionaries given exceeds a fixed limit [...],
148+
> multiple INSERT statements will be invoked within the scope of a single
149+
> `Connection.execute()` call, each of which accommodate for a portion of the
150+
> parameter dictionaries, referred towards as a "batch".
151+
152+
- https://docs.sqlalchemy.org/tutorial/dbapi_transactions.html#tutorial-multiple-parameters
153+
- https://docs.sqlalchemy.org/glossary.html#term-executemany
154+
- https://docs.sqlalchemy.org/core/connections.html#engine-insertmanyvalues
155+
- https://docs.sqlalchemy.org/core/connections.html#controlling-the-batch-size
156+
"""
157+
158+
# Don't truncate unittest's diff output on `assertListEqual`.
159+
self.maxDiff = None
160+
161+
# Five records with a batch size of two should produce three `INSERT` statements.
162+
record_count = 5
163+
batch_size = 2
164+
165+
# Prepare input data and verify insert statement.
166+
records = [{"name": f"foo_{i}"} for i in range(record_count)]
167+
insertable = self.mytable.insert()
168+
statement = str(insertable.compile(bind=self.crate_engine))
169+
self.assertEqual(statement, "INSERT INTO mytable (name, data) VALUES (?, ?)")
170+
171+
with mock.patch("crate.client.http.Client.sql", autospec=True, return_value={"cols": []}) as client_mock:
172+
173+
with self.crate_engine.begin() as conn:
174+
# Adjust page size on a per-connection level.
175+
conn.execution_options(insertmanyvalues_page_size=batch_size)
176+
conn.execute(insertable, parameters=records)
177+
178+
# Verify that input data has been batched correctly.
179+
self.assertListEqual(client_mock.mock_calls, [
180+
mock.call(mock.ANY, 'INSERT INTO mytable (name) VALUES (?), (?)', ('foo_0', 'foo_1'), None),
181+
mock.call(mock.ANY, 'INSERT INTO mytable (name) VALUES (?), (?)', ('foo_2', 'foo_3'), None),
182+
mock.call(mock.ANY, 'INSERT INTO mytable (name) VALUES (?)', ('foo_4', ), None),
183+
])

0 commit comments

Comments
 (0)