Skip to content

Commit 5a43669

Browse files
committed
adapters
1 parent 3fd6879 commit 5a43669

14 files changed

Lines changed: 1283 additions & 1477 deletions

.claude/settings.local.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"permissions": {
3+
"allow": [
4+
"Bash(git add:*)",
5+
"Bash(git commit:*)",
6+
"Bash(git push:*)",
7+
"Bash(ls:*)",
8+
"Bash(gh api:*)",
9+
"Bash(gh run list:*)",
10+
"Bash(gh run view:*)",
11+
"Bash(curl:*)",
12+
"Bash(gh workflow run:*)",
13+
"Bash(python scripts/generate_window_join.py:*)",
14+
"Bash(source .venv/bin/activate)",
15+
"Bash(python scripts/generate_h2oai_join.py:*)",
16+
"Bash(python run_bench.py:*)",
17+
"Bash(../rayforce/rayforce:*)",
18+
"Bash(python -m py_compile:*)",
19+
"Bash(python3 -m py_compile:*)"
20+
]
21+
},
22+
"enableAllProjectMcpServers": true,
23+
"enabledMcpjsonServers": [
24+
"connect-apps"
25+
]
26+
}

FAIRNESS.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -162,18 +162,18 @@ For subprocess-based adapters:
162162

163163
## Threading Configuration
164164

165-
| Adapter | Default Threading | Override Option |
166-
|-----------|----------------------|---------------------------|
167-
| DuckDB | Multi-threaded (auto)| `--duckdb-threads N` |
168-
| Polars | Multi-threaded (auto)| `--polars-threads N` |
169-
| Rayforce | Configurable | Via rayforce `-c` flag |
170-
| KDB+ | Single-threaded | N/A (q is single-threaded)|
171-
172-
For fair comparison, consider matching thread counts:
173-
```bash
174-
# Single-threaded comparison
175-
python run_bench.py --duckdb-threads 1 --polars-threads 1 ...
176-
```
165+
Each adapter uses its default threading model:
166+
167+
| Adapter | Threading |
168+
|-----------|--------------------------------------------|
169+
| DuckDB | Multi-threaded (auto, uses all CPU cores) |
170+
| Polars | Multi-threaded (auto, uses all CPU cores) |
171+
| Pandas | Single-threaded (GIL-bound) |
172+
| Rayforce | Multi-threaded (auto) |
173+
| KDB+ | Single-threaded (q is single-threaded) |
174+
| QuestDB | Server-managed threading |
175+
176+
Threading is managed internally by each database engine for optimal performance.
177177

178178
## Validation
179179

adapters/duckdb_adapter.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,24 @@
3535

3636
class DuckDBAdapter(Adapter):
3737
"""DuckDB in-process adapter.
38-
38+
3939
Uses DuckDB's embedded Python API for minimal overhead.
4040
All operations run in-process without IPC or network.
4141
"""
42-
42+
4343
name = "duckdb"
4444
version = duckdb.__version__
4545
embedded = True
46-
47-
def __init__(self, threads: int | None = None, memory_limit: str | None = None):
46+
47+
def __init__(self, memory_limit: str | None = None):
4848
"""Initialize DuckDB adapter.
49-
49+
5050
Args:
51-
threads: Number of threads (None = auto, or from config).
5251
memory_limit: Memory limit (e.g., "4GB", or from config).
5352
"""
5453
config = get_config()
5554
ddb_config = config.duckdb
56-
57-
self.threads = threads if threads is not None else ddb_config.get("threads")
55+
5856
self.memory_limit = memory_limit if memory_limit is not None else ddb_config.get("memory_limit")
5957
self._conn: duckdb.DuckDBPyConnection | None = None
6058
self._table_name: str = ""
@@ -90,10 +88,8 @@ def setup(self, schema: dict[str, Any]) -> None:
9088
"""Initialize DuckDB with in-memory database."""
9189
# Create in-memory connection
9290
self._conn = duckdb.connect(":memory:")
93-
91+
9492
# Configure settings
95-
if self.threads is not None:
96-
self._conn.execute(f"SET threads = {self.threads}")
9793
if self.memory_limit is not None:
9894
self._conn.execute(f"SET memory_limit = '{self.memory_limit}'")
9995

@@ -162,25 +158,23 @@ def get_info(self) -> dict[str, Any]:
162158
info = super().get_info()
163159
info.update({
164160
"duckdb_version": duckdb.__version__,
165-
"threads": self.threads,
166161
"memory_limit": self.memory_limit,
167162
})
168163
return info
169164

170165
def _execute_query(self, query: str) -> AdapterResult:
171166
"""Execute a query and return result metadata.
172-
173-
FAIRNESS: DuckDB execute() runs the full query (not lazy).
174-
We time only execute(), then fetch results afterward for validation.
167+
168+
FAIRNESS: DuckDB execute() can be lazy, so we include fetchall()
169+
in timing to ensure full query materialization is measured.
175170
"""
176171
try:
177-
# Time only query execution
172+
# Time query execution AND result materialization
178173
start_ns = time.perf_counter_ns()
179174
result = self._conn.execute(query)
180-
end_ns = time.perf_counter_ns()
181-
182-
# Fetch results AFTER timing (for row count and checksum validation)
183175
rows = result.fetchall()
176+
end_ns = time.perf_counter_ns()
177+
184178
row_count = len(rows)
185179

186180
# Checksum from sample

adapters/kdb_adapter.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,28 @@
2020

2121
class KDBAdapter(Adapter):
2222
"""KDB+/q adapter using subprocess execution.
23-
23+
2424
Uses q's \t command for timing, which measures only query execution
2525
(similar to Rayforce's timeit function).
2626
"""
27-
27+
2828
name = "kdb"
2929
version = "4.0"
3030
embedded = False
31-
31+
3232
def __init__(
3333
self,
3434
binary_path: str | Path | None = None,
35-
threads: int | None = None,
3635
):
3736
"""Initialize KDB adapter.
3837
3938
Args:
4039
binary_path: Path to q binary (default: from config or 'q')
41-
threads: Number of secondary threads (-s flag). Default 3 for 4-core license.
4240
"""
4341
config = get_config()
4442
kdb_config = config.kdb
4543

4644
self.binary_path = Path(binary_path or kdb_config.get("binary", "q"))
47-
# KDB+ -s flag sets secondary threads. 3 secondary + 1 main = 4 total cores
48-
self.threads = threads if threads is not None else kdb_config.get("threads", 3)
4945

5046
self._schema: dict[str, Any] = {}
5147
self._table_name: str = ""
@@ -97,7 +93,7 @@ def setup(self, schema: dict[str, Any]) -> None:
9793
# Try to get version
9894
try:
9995
result = subprocess.run(
100-
[str(self.binary_path), "-s", str(self.threads), "-q"],
96+
[str(self.binary_path), "-q"],
10197
input="\\\\", # Exit immediately
10298
capture_output=True,
10399
text=True,
@@ -134,8 +130,6 @@ def get_info(self) -> dict[str, Any]:
134130
"kdb_version": self.version,
135131
"mode": "subprocess",
136132
"binary_path": str(self.binary_path),
137-
"threads": self.threads, # Secondary threads (-s flag)
138-
"total_cores": self.threads + 1, # Secondary + main thread
139133
})
140134
return info
141135

@@ -191,7 +185,7 @@ def _execute_q(self, expr: str) -> AdapterResult:
191185
"""
192186

193187
result = subprocess.run(
194-
[str(self.binary_path), "-s", str(self.threads), "-q"],
188+
[str(self.binary_path), "-q"],
195189
input=q_script,
196190
capture_output=True,
197191
text=True,

0 commit comments

Comments
 (0)