Skip to content

Commit e038ca6

Browse files
committed
fix(go): handle empty SchemaBytes for 0-row query results
When databricks-sql-go returns 0-row results, SchemaBytes() can intermittently return empty bytes due to a bug where the IPC stream iterator is created with nil schema bytes on the cold-start path (databricks/databricks-sql-go#312, databricks/databricks-sql-go#327). Add a fallback that builds an Arrow schema from driver.Rows column metadata (Columns, ColumnTypeDatabaseTypeName, ColumnTypeNullable) when SchemaBytes is empty, instead of returning an error. Why we're not using `databricks/databricks-sql-go#327`: 1. **The fix is incomplete** — as reviewer @vikrantpuppala [noted](databricks/databricks-sql-go#327), `r.resultSetMetadata` is populated lazily, so it can be `nil` when `GetArrowIPCStreams()` is called before `Columns()`/`Next()`, causing `GetArrowSchemaBytes` to silently return `nil, nil` and the bug to persist. 2. **The PR is not approved** and has unresolved review comments (missing tests, duplicated logic, context param ordering). 3. **Our workaround is simple and robust** — we use `driver.Rows.Columns()` + `ColumnTypeDatabaseTypeName()` + `ColumnTypeNullable()`, which are always available regardless of lazy metadata loading.
1 parent 8d056d1 commit e038ca6

1 file changed

Lines changed: 75 additions & 3 deletions

File tree

go/ipc_reader_adapter.go

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"errors"
3030
"fmt"
3131
"io"
32+
"strings"
3233
"sync/atomic"
3334

3435
"github.com/apache/arrow-adbc/go/adbc"
@@ -103,10 +104,19 @@ func newIPCReaderAdapter(ctx context.Context, rows driver.Rows) (array.RecordRea
103104
}
104105

105106
if len(schema_bytes) == 0 {
106-
return nil, adbc.Error{
107-
Code: adbc.StatusInternal,
108-
Msg: "schema bytes are empty and no data available",
107+
// Workaround for https://github.com/databricks/databricks-sql-go/pull/327
108+
// SchemaBytes() can be empty when databricks-sql-go doesn't
109+
// propagate the schema for 0-row results. Fall back to
110+
// building the schema from driver.Rows column metadata.
111+
schema, err := schemaFromRowsMetadata(rows)
112+
if err != nil {
113+
return nil, adbc.Error{
114+
Code: adbc.StatusInternal,
115+
Msg: fmt.Sprintf("schema bytes are empty and failed to build schema from column metadata: %v", err),
116+
}
109117
}
118+
adapter.schema = schema
119+
return adapter, nil
110120
}
111121

112122
reader, err := ipc.NewReader(bytes.NewReader(schema_bytes))
@@ -130,6 +140,68 @@ func newIPCReaderAdapter(ctx context.Context, rows driver.Rows) (array.RecordRea
130140
return adapter, nil
131141
}
132142

143+
// schemaFromRowsMetadata builds an Arrow schema from driver.Rows column
144+
// metadata. This is used as a fallback when SchemaBytes() is empty for
145+
// 0-row result sets: https://github.com/databricks/databricks-sql-go/pull/327
146+
func schemaFromRowsMetadata(rows driver.Rows) (*arrow.Schema, error) {
147+
typed, ok := rows.(driver.RowsColumnTypeDatabaseTypeName)
148+
if !ok {
149+
return nil, fmt.Errorf("driver.Rows does not implement RowsColumnTypeDatabaseTypeName")
150+
}
151+
152+
nullableTyped, hasNullable := rows.(driver.RowsColumnTypeNullable)
153+
154+
columns := rows.Columns()
155+
fields := make([]arrow.Field, len(columns))
156+
for i, name := range columns {
157+
dbType := typed.ColumnTypeDatabaseTypeName(i)
158+
nullable := true
159+
if hasNullable {
160+
if n, ok := nullableTyped.ColumnTypeNullable(i); ok {
161+
nullable = n
162+
}
163+
}
164+
fields[i] = arrow.Field{
165+
Name: name,
166+
Type: databricksTypeToArrow(dbType),
167+
Nullable: nullable,
168+
}
169+
}
170+
return arrow.NewSchema(fields, nil), nil
171+
}
172+
173+
// databricksTypeToArrow maps a Databricks SQL type name to an Arrow data type.
174+
func databricksTypeToArrow(dbType string) arrow.DataType {
175+
switch strings.ToUpper(dbType) {
176+
case "BOOLEAN":
177+
return arrow.FixedWidthTypes.Boolean
178+
case "BYTE", "TINYINT":
179+
return arrow.PrimitiveTypes.Int8
180+
case "SHORT", "SMALLINT":
181+
return arrow.PrimitiveTypes.Int16
182+
case "INT", "INTEGER":
183+
return arrow.PrimitiveTypes.Int32
184+
case "LONG", "BIGINT":
185+
return arrow.PrimitiveTypes.Int64
186+
case "FLOAT":
187+
return arrow.PrimitiveTypes.Float32
188+
case "DOUBLE":
189+
return arrow.PrimitiveTypes.Float64
190+
case "STRING":
191+
return arrow.BinaryTypes.String
192+
case "BINARY":
193+
return arrow.BinaryTypes.Binary
194+
case "DATE":
195+
return arrow.FixedWidthTypes.Date32
196+
case "TIMESTAMP", "TIMESTAMP_NTZ":
197+
return arrow.FixedWidthTypes.Timestamp_us
198+
case "DECIMAL":
199+
return &arrow.Decimal128Type{Precision: 38, Scale: 18}
200+
default:
201+
return arrow.BinaryTypes.String
202+
}
203+
}
204+
133205
func (r *ipcReaderAdapter) loadNextReader() error {
134206
if r.currentReader != nil {
135207
r.currentReader.Release()

0 commit comments

Comments
 (0)