Skip to content

Commit a1850d3

Browse files
authored
feat(perf)!: parsing performance improvements (#349)
## Performance Changes ### Core Hotpath Optimizations - String builder pattern for placeholder replacement - Native `pa.Table.from_pylist()` for Arrow dict-to-table conversion - Column-oriented access with `zip(*cols)` transpose for batch-to-rows ### Driver & Serialization - Batch-level type detection (check first row once, not per-row) - Module-level singleton pattern for serializer fallbacks ### Caching Optimizations - **Structural parameter fingerprinting**: Cache keys based on parameter STRUCTURE (keys, types) not VALUES - blake2b hashing (50% faster than SHA256) for all cache keys ### Parse-Once Pipeline - AST preservation through compilation pipeline - Added `parsed_expression` field to `ParameterProcessingResult`
1 parent edeaf68 commit a1850d3

93 files changed

Lines changed: 2920 additions & 1981 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ maintainers = [{ name = "Litestar Developers", email = "hello@litestar.dev" }]
2424
name = "sqlspec"
2525
readme = "README.md"
2626
requires-python = ">=3.10, <4.0"
27-
version = "0.38.4"
27+
version = "0.39.0"
2828

2929
[project.urls]
3030
Discord = "https://discord.gg/litestar"
@@ -240,7 +240,7 @@ opt_level = "3" # Maximum optimization (0-3)
240240
allow_dirty = true
241241
commit = false
242242
commit_args = "--no-verify"
243-
current_version = "0.38.4"
243+
current_version = "0.39.0"
244244
ignore_missing_files = false
245245
ignore_missing_version = false
246246
message = "chore(release): bump to v{new_version}"

sqlspec/_serialization.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,28 @@ def decode(self, data: str | bytes, *, decode_bytes: bool = True) -> Any:
105105
...
106106

107107

108+
# Module-level singleton fallback serializers for performance
109+
# These avoid creating new instances on every fallback call in MsgspecSerializer
110+
_orjson_fallback: "OrjsonSerializer | None" = None
111+
_stdlib_fallback: "StandardLibSerializer | None" = None
112+
113+
114+
def _get_orjson_fallback() -> "OrjsonSerializer":
115+
"""Get singleton OrjsonSerializer instance for fallback use."""
116+
global _orjson_fallback
117+
if _orjson_fallback is None:
118+
_orjson_fallback = OrjsonSerializer()
119+
return _orjson_fallback
120+
121+
122+
def _get_stdlib_fallback() -> "StandardLibSerializer":
123+
"""Get singleton StandardLibSerializer instance for fallback use."""
124+
global _stdlib_fallback
125+
if _stdlib_fallback is None:
126+
_stdlib_fallback = StandardLibSerializer()
127+
return _stdlib_fallback
128+
129+
108130
class MsgspecSerializer(BaseJSONSerializer):
109131
"""Msgspec-based JSON serializer."""
110132

@@ -125,8 +147,8 @@ def encode(self, data: Any, *, as_bytes: bool = False) -> str | bytes:
125147
return self._encoder.encode(data).decode("utf-8")
126148
except (TypeError, ValueError):
127149
if ORJSON_INSTALLED:
128-
return OrjsonSerializer().encode(data, as_bytes=as_bytes)
129-
return StandardLibSerializer().encode(data, as_bytes=as_bytes)
150+
return _get_orjson_fallback().encode(data, as_bytes=as_bytes)
151+
return _get_stdlib_fallback().encode(data, as_bytes=as_bytes)
130152

131153
def decode(self, data: str | bytes, *, decode_bytes: bool = True) -> Any:
132154
"""Decode data using msgspec."""
@@ -136,16 +158,16 @@ def decode(self, data: str | bytes, *, decode_bytes: bool = True) -> Any:
136158
return self._decoder.decode(data)
137159
except (TypeError, ValueError):
138160
if ORJSON_INSTALLED:
139-
return OrjsonSerializer().decode(data, decode_bytes=decode_bytes)
140-
return StandardLibSerializer().decode(data, decode_bytes=decode_bytes)
161+
return _get_orjson_fallback().decode(data, decode_bytes=decode_bytes)
162+
return _get_stdlib_fallback().decode(data, decode_bytes=decode_bytes)
141163
return data
142164

143165
try:
144166
return self._decoder.decode(data.encode("utf-8"))
145167
except (TypeError, ValueError):
146168
if ORJSON_INSTALLED:
147-
return OrjsonSerializer().decode(data, decode_bytes=decode_bytes)
148-
return StandardLibSerializer().decode(data, decode_bytes=decode_bytes)
169+
return _get_orjson_fallback().decode(data, decode_bytes=decode_bytes)
170+
return _get_stdlib_fallback().decode(data, decode_bytes=decode_bytes)
149171

150172

151173
class OrjsonSerializer(BaseJSONSerializer):

sqlspec/adapters/adbc/litestar/store.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,39 @@ def _set(self, key: str, value: "str | bytes", expires_in: "int | timedelta | No
310310
INSERT OR REPLACE INTO {self._table_name} (session_id, data, expires_at)
311311
VALUES ({p1}, {p2}, {p3})
312312
"""
313+
elif dialect in {"postgres", "postgresql"}:
314+
# ADBC Arrow driver cannot infer type from Python None, causing 'na' type error.
315+
# Use separate SQL statements for NULL vs non-NULL expires_at to avoid the issue.
316+
if expires_at is None:
317+
sql = f"""
318+
INSERT INTO {self._table_name} (session_id, data, expires_at)
319+
VALUES ({p1}, {p2}, NULL)
320+
ON CONFLICT (session_id) DO UPDATE
321+
SET data = EXCLUDED.data, expires_at = NULL
322+
"""
323+
with self._config.provide_session() as driver:
324+
driver.execute(sql, key, data)
325+
driver.commit()
326+
return
327+
sql = f"""
328+
INSERT INTO {self._table_name} (session_id, data, expires_at)
329+
VALUES ({p1}, {p2}, {p3})
330+
ON CONFLICT (session_id) DO UPDATE
331+
SET data = EXCLUDED.data, expires_at = EXCLUDED.expires_at
332+
"""
313333
else:
334+
# DuckDB: Same issue with Arrow 'na' type for None values
335+
if expires_at is None:
336+
sql = f"""
337+
INSERT INTO {self._table_name} (session_id, data, expires_at)
338+
VALUES ({p1}, {p2}, NULL)
339+
ON CONFLICT (session_id) DO UPDATE
340+
SET data = EXCLUDED.data, expires_at = NULL
341+
"""
342+
with self._config.provide_session() as driver:
343+
driver.execute(sql, key, data)
344+
driver.commit()
345+
return
314346
sql = f"""
315347
INSERT INTO {self._table_name} (session_id, data, expires_at)
316348
VALUES ({p1}, {p2}, {p3})

sqlspec/adapters/pymysql/pool.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,7 @@ def _close_thread_connection(self) -> None:
161161
@contextmanager
162162
def get_connection(self) -> "Generator[PyMysqlConnection, None, None]":
163163
connection = self._get_thread_connection()
164-
try:
165-
yield connection
166-
finally:
167-
with contextlib.suppress(Exception):
168-
if connection.open and connection.get_autocommit() is False:
169-
connection.commit()
164+
yield connection
170165

171166
def close(self) -> None:
172167
self._close_thread_connection()

sqlspec/builder/_base.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -559,12 +559,17 @@ def _generate_builder_cache_key(self, config: "StatementConfig | None" = None) -
559559
if self._expression is None:
560560
self._expression = self._create_base_expression()
561561

562-
expr_sql: str = self._expression.sql() if self._expression else "None"
562+
if self._expression:
563+
expr_sql = self._expression.sql()
564+
expr_hash = hashlib.blake2b(expr_sql.encode(), digest_size=8).hexdigest()
565+
else:
566+
expr_hash = "None"
567+
563568
parameters_snapshot = sorted(self._parameters.items())
564569
parameters_hash = hashlib.sha256(str(parameters_snapshot).encode()).hexdigest()[:8]
565570

566571
state_parts = [
567-
f"expression:{expr_sql}",
572+
f"expression_hash:{expr_hash}",
568573
f"parameters_hash:{parameters_hash}",
569574
f"ctes:{sorted(self._with_ctes.keys())}",
570575
f"dialect:{dialect_name}",

sqlspec/builder/_column.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,5 +517,5 @@ def desc(self) -> "exp.Ordered":
517517
return exp.Ordered(this=self._expression, desc=True)
518518

519519
def __hash__(self) -> int:
520-
"""Hash based on the SQL expression."""
521-
return hash(self._expression.sql())
520+
"""Hash based on the expression identity."""
521+
return hash(id(self._expression))

sqlspec/core/cache.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -593,15 +593,16 @@ class CachedStatement:
593593
594594
This class stores compiled SQL and parameters in an immutable format
595595
that can be safely shared between different parts of the system without
596-
risk of mutation. Tuple parameters ensure no copying is needed.
596+
risk of mutation. List parameters are preserved for execute_many operations
597+
where drivers require list type.
597598
"""
598599

599600
__slots__ = ("compiled_sql", "expression", "parameters")
600601

601602
def __init__(
602603
self,
603604
compiled_sql: str,
604-
parameters: "tuple[Any, ...] | dict[str, Any] | None",
605+
parameters: "tuple[Any, ...] | list[Any] | dict[str, Any] | None",
605606
expression: "exp.Expression | None",
606607
) -> None:
607608
self.compiled_sql = compiled_sql

sqlspec/core/compiler.py

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
from sqlspec.core.parameters import (
2222
ParameterProcessor,
2323
ParameterProfile,
24-
fingerprint_parameters,
24+
structural_fingerprint,
2525
validate_parameter_alignment,
26+
value_fingerprint,
2627
)
2728
from sqlspec.utils.logging import get_logger, log_with_context
2829
from sqlspec.utils.type_guards import get_value_attribute
@@ -349,11 +350,26 @@ def compile(
349350
cache_key = self._make_cache_key(sql, parameters, is_many)
350351

351352
if cache_key in self._cache:
352-
result = self._cache[cache_key]
353+
cached_result = self._cache[cache_key]
353354
del self._cache[cache_key]
354-
self._cache[cache_key] = result
355+
self._cache[cache_key] = cached_result
355356
self._cache_hits += 1
356-
return result
357+
# Structural fingerprinting means same SQL structure = same cache entry,
358+
# but we must still process the caller's actual parameter values
359+
dialect_str = str(self._config.dialect) if self._config.dialect else None
360+
_, processed_params, _, _ = self._prepare_parameters(sql, parameters, is_many, dialect_str)
361+
# Return cached compilation metadata with NEW parameters
362+
return CompiledSQL(
363+
compiled_sql=cached_result.compiled_sql,
364+
execution_parameters=processed_params,
365+
operation_type=cached_result.operation_type,
366+
expression=cached_result.expression,
367+
parameter_style=cached_result.parameter_style,
368+
supports_many=cached_result.supports_many,
369+
parameter_casts=cached_result.parameter_casts,
370+
parameter_profile=cached_result.parameter_profile,
371+
operation_profile=cached_result.operation_profile,
372+
)
357373

358374
self._cache_misses += 1
359375
result = self._compile_uncached(sql, parameters, is_many, expression)
@@ -602,13 +618,15 @@ def _finalize_compilation(
602618
if self._config.parameter_config.needs_static_script_compilation and processed_params is None:
603619
return processed_sql, processed_params, parameter_profile
604620
if ast_was_transformed and expression is not None:
621+
# Pass the transformed expression through the pipeline to avoid re-parsing
605622
transformed_result = self._parameter_processor.process_for_execution(
606623
sql=expression.sql(dialect=dialect_str),
607624
parameters=parameters,
608625
config=self._config.parameter_config,
609626
dialect=dialect_str,
610627
is_many=is_many,
611628
wrap_types=self._config.enable_parameter_type_wrapping,
629+
parsed_expression=expression,
612630
)
613631
final_sql = transformed_result.sql
614632
final_params = transformed_result.parameters
@@ -762,21 +780,28 @@ def _make_cache_key(self, sql: str, parameters: Any, is_many: bool = False) -> s
762780
Returns:
763781
Cache key string
764782
"""
765-
766-
param_fingerprint = fingerprint_parameters(parameters)
783+
# For static script compilation, parameter VALUES are embedded in the SQL string,
784+
# so different values produce different compiled SQL. Must use value_fingerprint
785+
# to avoid returning cached SQL with stale embedded values.
786+
if self._config.parameter_config.needs_static_script_compilation:
787+
param_fingerprint = value_fingerprint(parameters)
788+
else:
789+
# Use structural fingerprint (keys + types, not values) for better cache hit rates
790+
param_fingerprint = structural_fingerprint(parameters, is_many)
767791
dialect_str = str(self._config.dialect) if self._config.dialect else None
768-
param_style = self._config.parameter_config.default_parameter_style.value
769-
770-
hash_data = (
771-
sql,
772-
param_fingerprint,
773-
param_style,
774-
dialect_str,
775-
self._config.enable_parsing,
776-
self._config.enable_transformations,
777-
is_many,
792+
# Include both input and execution parameter styles to avoid cache collisions
793+
# (e.g., MySQL asyncmy uses ? for input but %s for execution)
794+
input_style = self._config.parameter_config.default_parameter_style.value
795+
exec_style = (
796+
self._config.parameter_config.default_execution_parameter_style.value
797+
if self._config.parameter_config.default_execution_parameter_style
798+
else input_style
778799
)
779800

801+
# Exclude enable_parsing and enable_transformations from hash_data as they are
802+
# per-config static flags, not per-statement - they belong in pipeline key only
803+
hash_data = (sql, param_fingerprint, input_style, exec_style, dialect_str, is_many)
804+
780805
hash_str = hashlib.blake2b(repr(hash_data).encode("utf-8"), digest_size=8).hexdigest()
781806
return f"sql_{hash_str}"
782807

@@ -924,7 +949,8 @@ def clear_cache(self) -> None:
924949

925950
def _make_parse_cache_key(self, sql: str, dialect: "str | None") -> str:
926951
dialect_marker = dialect or "default"
927-
hash_str = hashlib.sha256(f"{dialect_marker}:{sql}".encode()).hexdigest()[:16]
952+
# Use blake2b instead of sha256 for faster hashing (~50% faster)
953+
hash_str = hashlib.blake2b(f"{dialect_marker}:{sql}".encode(), digest_size=8).hexdigest()
928954
return f"parse_{hash_str}"
929955

930956
@property

sqlspec/core/filters.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,8 +582,12 @@ def append_to_statement(self, statement: "SQL") -> "SQL":
582582
limit_placeholder = exp.Placeholder(this=limit_param_name)
583583
offset_placeholder = exp.Placeholder(this=offset_param_name)
584584

585+
# Prefer cached expression to avoid re-parsing
586+
current_statement: exp.Expression
585587
if statement.statement_expression is not None:
586588
current_statement = statement.statement_expression.copy()
589+
elif statement.raw_expression is not None:
590+
current_statement = statement.raw_expression.copy()
587591
elif not statement.statement_config.enable_parsing:
588592
current_statement = exp.Select().from_(f"({statement.raw_sql})")
589593
else:
@@ -638,8 +642,12 @@ def append_to_statement(self, statement: "SQL") -> "SQL":
638642
col_expr = exp.column(self.field_name)
639643
order_expr = col_expr.desc() if converted_sort_order == "desc" else col_expr.asc()
640644

645+
# Prefer cached expression to avoid re-parsing
646+
current_statement: exp.Expression
641647
if statement.statement_expression is not None:
642648
current_statement = statement.statement_expression.copy()
649+
elif statement.raw_expression is not None:
650+
current_statement = statement.raw_expression.copy()
643651
elif not statement.statement_config.enable_parsing:
644652
current_statement = exp.Select().from_(f"({statement.raw_sql})")
645653
else:

sqlspec/core/parameters/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
validate_parameter_alignment,
99
)
1010
from sqlspec.core.parameters._converter import ParameterConverter
11-
from sqlspec.core.parameters._processor import ParameterProcessor, fingerprint_parameters
11+
from sqlspec.core.parameters._processor import ParameterProcessor, structural_fingerprint, value_fingerprint
1212
from sqlspec.core.parameters._registry import (
1313
DRIVER_PARAMETER_PROFILES,
1414
build_statement_config_from_profile,
@@ -58,14 +58,15 @@
5858
"build_null_pruning_transform",
5959
"build_statement_config_from_profile",
6060
"collect_null_parameter_ordinals",
61-
"fingerprint_parameters",
6261
"get_driver_profile",
6362
"is_iterable_parameters",
6463
"looks_like_execute_many",
6564
"normalize_parameter_key",
6665
"register_driver_profile",
6766
"replace_null_parameters_with_literals",
6867
"replace_placeholders_with_literals",
68+
"structural_fingerprint",
6969
"validate_parameter_alignment",
70+
"value_fingerprint",
7071
"wrap_with_type",
7172
)

0 commit comments

Comments
 (0)