11import { parquetRead , parquetMetadata } from 'hyparquet' ;
2- import { resolveDFData , resolveDFDataAsync } from './resolveDFData' ;
2+ import { resolveDFData , resolveDFDataAsync , pivotWideSummaryStats } from './resolveDFData' ;
33import { DFData , DFDataRow , ParquetB64Payload } from './DFWhole' ;
44
5- // Fixture generated by Python's sd_to_parquet_b64() with a summary stats dict
6- // containing numeric histogram data for one column.
5+ // Fixture generated by Python's sd_to_parquet_b64() with wide-column layout.
76// eslint-disable-next-line @typescript-eslint/no-var-requires
87const fixture = require ( './test-fixtures/summary_stats_parquet_b64.json' ) ;
98const parquetPayload : ParquetB64Payload = fixture as ParquetB64Payload ;
@@ -29,9 +28,7 @@ describe('resolveDFData', () => {
2928 expect ( resolveDFData ( data ) ) . toBe ( data ) ;
3029 } ) ;
3130
32- it ( 'hyparquet can read the parquet_b64 fixture' , async ( ) => {
33- // Verify the fixture is valid and hyparquet can decode it.
34- // This is independent of resolveDFData — it tests the raw decode path.
31+ it ( 'hyparquet can read the wide-format parquet_b64 fixture' , async ( ) => {
3532 const buf = b64ToArrayBuffer ( parquetPayload . data ) ;
3633 const metadata = parquetMetadata ( buf ) ;
3734 expect ( metadata . row_groups . length ) . toBeGreaterThan ( 0 ) ;
@@ -44,60 +41,124 @@ describe('resolveDFData', () => {
4441 onComplete : ( data : any [ ] ) => { rows . push ( ...data ) ; } ,
4542 } ) ;
4643
47- expect ( rows . length ) . toBeGreaterThan ( 0 ) ;
44+ // Wide format: single row with col__stat columns
45+ expect ( rows . length ) . toBe ( 1 ) ;
46+ const keys = Object . keys ( rows [ 0 ] ) ;
47+ expect ( keys . some ( k => k . includes ( '__' ) ) ) . toBe ( true ) ;
48+ expect ( keys ) . toContain ( 'a__mean' ) ;
49+ expect ( keys ) . toContain ( 'b__dtype' ) ;
50+ } ) ;
4851
49- // Should have an 'index' column with stat names
50- const indices = rows . map ( r => r . index ) . filter ( Boolean ) ;
51- expect ( indices ) . toContain ( 'histogram' ) ;
52- expect ( indices ) . toContain ( 'dtype' ) ;
52+ it ( 'sync resolveDFData returns [] for parquet_b64 (known async limitation)' , ( ) => {
53+ const result = resolveDFData ( parquetPayload ) ;
54+ expect ( result . length ) . toBe ( 0 ) ;
5355 } ) ;
5456
55- it ( 'parquet_b64 histogram data round-trips with correct types' , async ( ) => {
56- // Decode the fixture and verify histogram arrays have the right structure.
57- const buf = b64ToArrayBuffer ( parquetPayload . data ) ;
58- const metadata = parquetMetadata ( buf ) ;
57+ it ( 'async resolveDFDataAsync returns pivoted DFData for wide-format parquet' , async ( ) => {
58+ const result = await resolveDFDataAsync ( parquetPayload ) ;
59+ expect ( result . length ) . toBeGreaterThan ( 0 ) ;
5960
60- const rows : DFDataRow [ ] = [ ] ;
61- await parquetRead ( {
62- file : buf ,
63- metadata,
64- rowFormat : 'object' ,
65- onComplete : ( data : any [ ] ) => { rows . push ( ...data ) ; } ,
66- } ) ;
61+ // Should have row-based format with index column
62+ const meanRow = result . find ( r => r . index === 'mean' ) ;
63+ expect ( meanRow ) . toBeDefined ( ) ;
64+ expect ( meanRow ! . a ) . toBe ( 50.0 ) ;
65+ expect ( meanRow ! . b ) . toBe ( 22.0 ) ;
6766
68- const histRow = rows . find ( r => r . index === 'histogram' ) ;
69- expect ( histRow ) . toBeDefined ( ) ;
67+ const dtypeRow = result . find ( r => r . index === 'dtype' ) ;
68+ expect ( dtypeRow ) . toBeDefined ( ) ;
69+ expect ( dtypeRow ! . a ) . toBe ( 'float64' ) ;
70+ expect ( dtypeRow ! . b ) . toBe ( 'int64' ) ;
71+ } ) ;
7072
71- // Column 'a' contains the JSON-encoded histogram array
72- const rawCell = histRow ! [ 'a' ] ;
73- expect ( typeof rawCell ) . toBe ( 'string' ) ;
73+ it ( 'async decode produces histogram arrays from JSON strings' , async ( ) => {
74+ const result = await resolveDFDataAsync ( parquetPayload ) ;
7475
75- const parsed = JSON . parse ( rawCell as string ) ;
76- expect ( Array . isArray ( parsed ) ) . toBe ( true ) ;
77- expect ( parsed . length ) . toBeGreaterThan ( 0 ) ;
76+ const histRow = result . find ( r => r . index === 'histogram' ) ;
77+ expect ( histRow ) . toBeDefined ( ) ;
78+ expect ( Array . isArray ( histRow ! . a ) ) . toBe ( true ) ;
79+ const hist = histRow ! . a as any [ ] ;
80+ expect ( hist . length ) . toBe ( 5 ) ;
81+ expect ( typeof hist [ 0 ] . population ) . toBe ( 'number' ) ;
82+ expect ( hist [ 0 ] . name ) . toBe ( '0-20' ) ;
83+ } ) ;
7884
79- // Verify types: population should be a number, not a string
80- const popBar = parsed . find ( ( b : any ) => b . population !== undefined ) ;
81- expect ( popBar ) . toBeDefined ( ) ;
82- expect ( typeof popBar . population ) . toBe ( 'number' ) ;
83- expect ( typeof parsed [ 0 ] . name ) . toBe ( 'string' ) ;
85+ it ( 'async decode produces histogram_bins arrays' , async ( ) => {
86+ const result = await resolveDFDataAsync ( parquetPayload ) ;
87+
88+ const binsRow = result . find ( r => r . index === 'histogram_bins' ) ;
89+ expect ( binsRow ) . toBeDefined ( ) ;
90+ expect ( Array . isArray ( binsRow ! . a ) ) . toBe ( true ) ;
91+ expect ( ( binsRow ! . a as number [ ] ) . length ) . toBe ( 6 ) ;
8492 } ) ;
93+ } ) ;
8594
86- it ( 'sync resolveDFData returns [] for parquet_b64 (known async limitation)' , ( ) => {
87- // Documents #630: parquetRead is async so the sync wrapper returns [].
88- // Widget components use useResolvedDFDataDict which falls back to async.
89- // The static embed path uses resolveDFDataAsync which works correctly.
90- const result = resolveDFData ( parquetPayload ) ;
91- expect ( result . length ) . toBe ( 0 ) ;
95+ describe ( 'pivotWideSummaryStats' , ( ) => {
96+ it ( 'pivots a wide row into row-based DFData' , ( ) => {
97+ const wideRow = {
98+ a__mean : 42.5 ,
99+ a__dtype : 'float64' ,
100+ b__mean : 10.0 ,
101+ b__dtype : 'int64' ,
102+ } ;
103+ const result = pivotWideSummaryStats ( wideRow ) ;
104+
105+ const meanRow = result . find ( r => r . index === 'mean' ) ;
106+ expect ( meanRow ) . toBeDefined ( ) ;
107+ expect ( meanRow ! . a ) . toBe ( 42.5 ) ;
108+ expect ( meanRow ! . b ) . toBe ( 10.0 ) ;
109+ expect ( meanRow ! . level_0 ) . toBe ( 'mean' ) ;
110+
111+ const dtypeRow = result . find ( r => r . index === 'dtype' ) ;
112+ expect ( dtypeRow ) . toBeDefined ( ) ;
113+ expect ( dtypeRow ! . a ) . toBe ( 'float64' ) ;
114+ expect ( dtypeRow ! . b ) . toBe ( 'int64' ) ;
92115 } ) ;
93116
94- it ( 'async resolveDFDataAsync returns non-empty result for parquet_b64' , async ( ) => {
95- const result = await resolveDFDataAsync ( parquetPayload ) ;
96- expect ( result . length ) . toBeGreaterThan ( 0 ) ;
117+ it ( 'JSON-parses list/object values in string cells' , ( ) => {
118+ const wideRow = {
119+ a__histogram : '[{"name": "foo", "population": 10}]' ,
120+ a__dtype : 'float64' ,
121+ } ;
122+ const result = pivotWideSummaryStats ( wideRow ) ;
97123
98- // Verify the histogram row was JSON-parsed correctly
99124 const histRow = result . find ( r => r . index === 'histogram' ) ;
100125 expect ( histRow ) . toBeDefined ( ) ;
101- expect ( Array . isArray ( histRow ! [ 'a' ] ) ) . toBe ( true ) ;
126+ expect ( Array . isArray ( histRow ! . a ) ) . toBe ( true ) ;
127+ expect ( ( histRow ! . a as any [ ] ) [ 0 ] . population ) . toBe ( 10 ) ;
128+ } ) ;
129+
130+ it ( 'keeps plain strings as strings (not JSON-parsed)' , ( ) => {
131+ const wideRow = {
132+ a__dtype : 'float64' ,
133+ } ;
134+ const result = pivotWideSummaryStats ( wideRow ) ;
135+ const row = result . find ( r => r . index === 'dtype' ) ;
136+ expect ( row ! . a ) . toBe ( 'float64' ) ;
137+ } ) ;
138+
139+ it ( 'handles null values' , ( ) => {
140+ const wideRow = {
141+ a__mean : null ,
142+ a__dtype : 'float64' ,
143+ } ;
144+ const result = pivotWideSummaryStats ( wideRow ) ;
145+ const meanRow = result . find ( r => r . index === 'mean' ) ;
146+ expect ( meanRow ! . a ) . toBeNull ( ) ;
147+ } ) ;
148+
149+ it ( 'fills missing columns with null' , ( ) => {
150+ const wideRow = {
151+ a__mean : 42 ,
152+ b__dtype : 'int64' ,
153+ } ;
154+ const result = pivotWideSummaryStats ( wideRow ) ;
155+
156+ const meanRow = result . find ( r => r . index === 'mean' ) ;
157+ expect ( meanRow ! . a ) . toBe ( 42 ) ;
158+ expect ( meanRow ! . b ) . toBeNull ( ) ;
159+
160+ const dtypeRow = result . find ( r => r . index === 'dtype' ) ;
161+ expect ( dtypeRow ! . a ) . toBeNull ( ) ;
162+ expect ( dtypeRow ! . b ) . toBe ( 'int64' ) ;
102163 } ) ;
103164} ) ;
0 commit comments