-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathsmoke_test.py
More file actions
132 lines (98 loc) · 3.69 KB
/
smoke_test.py
File metadata and controls
132 lines (98 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
"""Smoke tests for verifying buckaroo installs correctly with various extras.
Usage:
python scripts/smoke_test.py base
python scripts/smoke_test.py polars
python scripts/smoke_test.py mcp
python scripts/smoke_test.py marimo
python scripts/smoke_test.py jupyterlab
python scripts/smoke_test.py notebook
"""
import sys
def test_base():
"""Bare `pip install buckaroo` — pandas comes via fastparquet."""
import pandas as pd
from buckaroo.dataflow.pandas_dataflow import PandasCustomizableDataflow
from buckaroo.dataflow.autocleaning import PandasAutocleaning
from buckaroo.customizations.pd_autoclean_conf import NoCleaningConf
from buckaroo.serialization_utils import pd_to_obj, to_parquet
df = pd.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
# Verify the full dataflow pipeline runs
class TestDataflow(PandasCustomizableDataflow):
autocleaning_klass = PandasAutocleaning
autoclean_conf = tuple([NoCleaningConf])
flow = TestDataflow(df)
assert flow.processed_df is not None, "processed_df should not be None"
assert len(flow.processed_df) == 3
# Verify serialization works (parquet round-trip)
parquet_bytes = to_parquet(df)
assert len(parquet_bytes) > 0, "parquet serialization should produce bytes"
# Verify JSON serialization
obj = pd_to_obj(df)
assert len(obj) > 0, "pd_to_obj should produce non-empty output"
print(" base: OK")
def test_polars():
"""pip install buckaroo[polars]"""
import polars as pl
from buckaroo.polars_buckaroo import PolarsBuckarooWidget, to_parquet
df = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
# Verify polars parquet serialization
parquet_bytes = to_parquet(df.with_row_index())
assert len(parquet_bytes) > 0, "polars parquet serialization should produce bytes"
# Verify polars widget class is importable
assert PolarsBuckarooWidget is not None
print(" polars: OK")
def test_mcp():
"""pip install buckaroo[mcp]"""
import mcp # noqa: F401
import tornado # noqa: F401
# Verify MCP tool module is importable
import buckaroo_mcp_tool # noqa: F401
assert hasattr(buckaroo_mcp_tool, "main")
# Verify server module imports work (uses tornado)
from buckaroo.server.app import make_app # noqa: F401
print(" mcp: OK")
def test_marimo():
"""pip install buckaroo[marimo]"""
import marimo # noqa: F401
from buckaroo.marimo_utils import ( # noqa: F401
marimo_monkeypatch,
marimo_unmonkeypatch,
BuckarooDataFrame,
)
print(" marimo: OK")
def test_jupyterlab():
"""pip install buckaroo[jupyterlab]"""
import jupyterlab
major = int(jupyterlab.__version__.split(".")[0])
assert major >= 3, f"jupyterlab {jupyterlab.__version__} too old"
print(" jupyterlab: OK")
def test_notebook():
"""pip install buckaroo[notebook]"""
import notebook
major = int(notebook.__version__.split(".")[0])
assert major >= 7, f"notebook {notebook.__version__} too old"
print(" notebook: OK")
TESTS = {
"base": test_base,
"polars": test_polars,
"mcp": test_mcp,
"marimo": test_marimo,
"jupyterlab": test_jupyterlab,
"notebook": test_notebook,
}
if __name__ == "__main__":
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <{'|'.join(TESTS.keys())}>")
sys.exit(1)
name = sys.argv[1]
if name not in TESTS:
print(f"Unknown test: {name!r}. Choose from: {', '.join(TESTS.keys())}")
sys.exit(1)
try:
TESTS[name]()
except Exception as e:
print(f" {name}: FAILED — {e}")
import traceback
traceback.print_exc()
sys.exit(1)