From cec802e19339803aae990d6a0e311d7e2fcd82b9 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 19:07:41 +0000 Subject: [PATCH 1/9] Expand Explain function to handle more AST types This significantly improves test coverage by adding explain output handling for: - CastExpr (CAST and :: operator) - InExpr (IN and NOT IN expressions) - TernaryExpr (ternary conditional) - ArrayAccess (array element access) - TupleAccess (tuple element access) - DropQuery (DROP TABLE/VIEW/DATABASE) - LikeExpr (LIKE/ILIKE expressions) - BetweenExpr (BETWEEN expressions) - IsNullExpr (IS NULL/IS NOT NULL) - CaseExpr (CASE WHEN expressions) - IntervalExpr (INTERVAL expressions) - ExistsExpr (EXISTS subqueries) - ExtractExpr (EXTRACT function) - CreateQuery (CREATE TABLE/VIEW/DATABASE) - SystemQuery (SYSTEM commands) - ExplainQuery (EXPLAIN statements) - ShowQuery (SHOW statements) - UseQuery (USE database) - DescribeQuery (DESCRIBE/DESC) - TableJoin (JOIN clauses) - DataType (type expressions) - Parameter (query parameters) Also fixes: - ARRAY JOIN placement in TablesInSelectQuery - Subquery alias handling in TableExpression - SETTINGS clause output as Set - Tuple literals with complex expressions rendered as Function tuple Tests improved from ~50% to ~73% passing (4998 of 6824). --- parser/explain.go | 441 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 426 insertions(+), 15 deletions(-) diff --git a/parser/explain.go b/parser/explain.go index 485513e694..d5466bd302 100644 --- a/parser/explain.go +++ b/parser/explain.go @@ -40,13 +40,9 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { for _, col := range n.Columns { explainNode(sb, col, depth+2) } - // FROM - if n.From != nil { - explainNode(sb, n.From, depth+1) - } - // ARRAY JOIN - if n.ArrayJoin != nil { - explainNode(sb, n.ArrayJoin, depth+1) + // FROM (including ARRAY JOIN as part of TablesInSelectQuery) + if n.From != nil || n.ArrayJoin != nil { + explainTablesWithArrayJoin(sb, n.From, n.ArrayJoin, depth+1) } // PREWHERE if n.PreWhere != nil { @@ -82,6 +78,10 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { if n.Offset != nil { explainNode(sb, n.Offset, depth+1) } + // SETTINGS + if len(n.Settings) > 0 { + fmt.Fprintf(sb, "%s Set\n", indent) + } case *ast.TablesInSelectQuery: fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, len(n.Tables)) @@ -104,11 +104,14 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { case *ast.TableExpression: children := 1 // table - if n.Alias != "" { - children++ - } fmt.Fprintf(sb, "%sTableExpression (children %d)\n", indent, children) - explainNode(sb, n.Table, depth+1) + // If there's a subquery with an alias, pass the alias to the subquery output + if subq, ok := n.Table.(*ast.Subquery); ok && n.Alias != "" { + fmt.Fprintf(sb, "%s Subquery (alias %s) (children %d)\n", indent, n.Alias, 1) + explainNode(sb, subq.Query, depth+2) + } else { + explainNode(sb, n.Table, depth+1) + } case *ast.TableIdentifier: name := n.Table @@ -141,6 +144,27 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { } case *ast.Literal: + // Check if this is a tuple with complex expressions that should be rendered as Function tuple + if n.Type == ast.LiteralTuple { + if exprs, ok := n.Value.([]ast.Expression); ok { + hasComplexExpr := false + for _, e := range exprs { + if _, isLit := e.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function tuple instead of Literal + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, e := range exprs { + explainNode(sb, e, depth+2) + } + return + } + } + } fmt.Fprintf(sb, "%sLiteral %s\n", indent, formatLiteral(n)) case *ast.FunctionCall: @@ -208,6 +232,299 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { case *ast.SetQuery: fmt.Fprintf(sb, "%sSet\n", indent) + case *ast.CastExpr: + // CAST is represented as Function CAST with expr and type as arguments + fmt.Fprintf(sb, "%sFunction CAST (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + explainNode(sb, n.Expr, depth+2) + // Type is formatted as a literal string + typeStr := formatDataType(n.Type) + fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, typeStr) + + case *ast.InExpr: + // IN is represented as Function in + fnName := "in" + if n.Not { + fnName = "notIn" + } + if n.Global { + fnName = "global" + strings.Title(fnName) + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + // Count arguments: expr + list items or subquery + argCount := 1 + if n.Query != nil { + argCount++ + } else { + argCount += len(n.List) + } + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + explainNode(sb, n.Expr, depth+2) + if n.Query != nil { + // Subqueries in IN should be wrapped in Subquery node + fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) + explainNode(sb, n.Query, depth+3) + } else { + for _, item := range n.List { + explainNode(sb, item, depth+2) + } + } + + case *ast.TernaryExpr: + // Ternary is represented as Function if with 3 arguments + fmt.Fprintf(sb, "%sFunction if (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 3) + explainNode(sb, n.Condition, depth+2) + explainNode(sb, n.Then, depth+2) + explainNode(sb, n.Else, depth+2) + + case *ast.ArrayAccess: + // Array access is represented as Function arrayElement + fmt.Fprintf(sb, "%sFunction arrayElement (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + explainNode(sb, n.Array, depth+2) + explainNode(sb, n.Index, depth+2) + + case *ast.TupleAccess: + // Tuple access is represented as Function tupleElement + fmt.Fprintf(sb, "%sFunction tupleElement (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + explainNode(sb, n.Tuple, depth+2) + explainNode(sb, n.Index, depth+2) + + case *ast.DropQuery: + name := n.Table + if n.View != "" { + name = n.View + } + if n.DropDatabase { + name = n.Database + } + fmt.Fprintf(sb, "%sDropQuery %s (children %d)\n", indent, name, 1) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) + + case *ast.Asterisk: + if n.Table != "" { + fmt.Fprintf(sb, "%sQualifiedAsterisk (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) + } else { + fmt.Fprintf(sb, "%sAsterisk\n", indent) + } + + case *ast.LikeExpr: + // LIKE is represented as Function like + fnName := "like" + if n.CaseInsensitive { + fnName = "ilike" + } + if n.Not { + fnName = "not" + strings.Title(fnName) + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + explainNode(sb, n.Expr, depth+2) + explainNode(sb, n.Pattern, depth+2) + + case *ast.BetweenExpr: + // BETWEEN is represented as Function and with two comparisons + // But for explain, we can use a simpler form + fnName := "between" + if n.Not { + fnName = "notBetween" + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 3) + explainNode(sb, n.Expr, depth+2) + explainNode(sb, n.Low, depth+2) + explainNode(sb, n.High, depth+2) + + case *ast.IsNullExpr: + // IS NULL is represented as Function isNull + fnName := "isNull" + if n.Not { + fnName = "isNotNull" + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + explainNode(sb, n.Expr, depth+2) + + case *ast.CaseExpr: + // CASE is represented as Function multiIf or caseWithExpression + if n.Operand != nil { + // CASE x WHEN ... form + argCount := 1 + len(n.Whens)*2 // operand + (condition, result) pairs + if n.Else != nil { + argCount++ + } + fmt.Fprintf(sb, "%sFunction caseWithExpression (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + explainNode(sb, n.Operand, depth+2) + for _, w := range n.Whens { + explainNode(sb, w.Condition, depth+2) + explainNode(sb, w.Result, depth+2) + } + if n.Else != nil { + explainNode(sb, n.Else, depth+2) + } + } else { + // CASE WHEN ... form + argCount := len(n.Whens) * 2 + if n.Else != nil { + argCount++ + } + fmt.Fprintf(sb, "%sFunction multiIf (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + for _, w := range n.Whens { + explainNode(sb, w.Condition, depth+2) + explainNode(sb, w.Result, depth+2) + } + if n.Else != nil { + explainNode(sb, n.Else, depth+2) + } + } + + case *ast.IntervalExpr: + // INTERVAL is represented as Function toInterval + fnName := "toInterval" + n.Unit + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + explainNode(sb, n.Value, depth+2) + + case *ast.ExistsExpr: + // EXISTS is represented as Function exists + fmt.Fprintf(sb, "%sFunction exists (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) + explainNode(sb, n.Query, depth+3) + + case *ast.ExtractExpr: + // EXTRACT is represented as Function toYear, toMonth, etc. + fnName := "to" + strings.Title(strings.ToLower(n.Field)) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + explainNode(sb, n.From, depth+2) + + case *ast.CreateQuery: + name := n.Table + if n.View != "" { + name = n.View + } + if n.CreateDatabase { + name = n.Database + } + // Count children: name + columns + engine/storage + children := 1 // name identifier + if len(n.Columns) > 0 { + children++ + } + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { + children++ + } + if n.AsSelect != nil { + children++ + } + fmt.Fprintf(sb, "%sCreateQuery %s (children %d)\n", indent, name, children) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) + if len(n.Columns) > 0 { + fmt.Fprintf(sb, "%s Columns definition (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) + for _, col := range n.Columns { + explainColumn(sb, col, depth+3) + } + } + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { + storageChildren := 0 + if n.Engine != nil { + storageChildren++ + } + if len(n.OrderBy) > 0 { + storageChildren++ + } + if len(n.PrimaryKey) > 0 { + storageChildren++ + } + fmt.Fprintf(sb, "%s Storage definition (children %d)\n", indent, storageChildren) + if n.Engine != nil { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + } + if len(n.OrderBy) > 0 { + if len(n.OrderBy) == 1 { + if ident, ok := n.OrderBy[0].(*ast.Identifier); ok { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name()) + } else { + explainNode(sb, n.OrderBy[0], depth+2) + } + } else { + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) + for _, o := range n.OrderBy { + explainNode(sb, o, depth+4) + } + } + } + } + if n.AsSelect != nil { + fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) + explainNode(sb, n.AsSelect, depth+2) + } + + case *ast.SystemQuery: + fmt.Fprintf(sb, "%sSystem %s\n", indent, n.Command) + + case *ast.ExplainQuery: + fmt.Fprintf(sb, "%sExplain %s (children %d)\n", indent, n.ExplainType, 1) + explainNode(sb, n.Statement, depth+1) + + case *ast.ShowQuery: + fmt.Fprintf(sb, "%sShow%s\n", indent, n.ShowType) + + case *ast.UseQuery: + fmt.Fprintf(sb, "%sUse %s\n", indent, n.Database) + + case *ast.DescribeQuery: + name := n.Table + if n.Database != "" { + name = n.Database + "." + n.Table + } + fmt.Fprintf(sb, "%sDescribe %s\n", indent, name) + + case *ast.TableJoin: + // TableJoin is part of TablesInSelectQueryElement + joinType := strings.ToLower(string(n.Type)) + if n.Strictness != "" { + joinType = strings.ToLower(string(n.Strictness)) + " " + joinType + } + if n.Global { + joinType = "global " + joinType + } + children := 0 + if n.On != nil { + children++ + } + if len(n.Using) > 0 { + children++ + } + fmt.Fprintf(sb, "%sTableJoin %s (children %d)\n", indent, joinType, children) + if n.On != nil { + explainNode(sb, n.On, depth+1) + } + if len(n.Using) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Using)) + for _, u := range n.Using { + explainNode(sb, u, depth+2) + } + } + + case *ast.DataType: + fmt.Fprintf(sb, "%sDataType %s\n", indent, formatDataType(n)) + + case *ast.Parameter: + if n.Name != "" { + fmt.Fprintf(sb, "%sQueryParameter %s\n", indent, n.Name) + } else { + fmt.Fprintf(sb, "%sQueryParameter\n", indent) + } + default: // For unhandled types, just print the type name fmt.Fprintf(sb, "%s%T\n", indent, node) @@ -222,10 +539,8 @@ func countChildren(n *ast.SelectWithUnionQuery) int { // countSelectQueryChildren counts the children of a SelectQuery func countSelectQueryChildren(n *ast.SelectQuery) int { count := 1 // columns ExpressionList - if n.From != nil { - count++ - } - if n.ArrayJoin != nil { + // FROM and ARRAY JOIN together count as one child (TablesInSelectQuery) + if n.From != nil || n.ArrayJoin != nil { count++ } if n.PreWhere != nil { @@ -249,9 +564,39 @@ func countSelectQueryChildren(n *ast.SelectQuery) int { if n.Offset != nil { count++ } + if len(n.Settings) > 0 { + count++ + } return count } +// explainTablesWithArrayJoin handles FROM and ARRAY JOIN together as TablesInSelectQuery +func explainTablesWithArrayJoin(sb *strings.Builder, from *ast.TablesInSelectQuery, arrayJoin *ast.ArrayJoinClause, depth int) { + indent := strings.Repeat(" ", depth) + + tableCount := 0 + if from != nil { + tableCount = len(from.Tables) + } + if arrayJoin != nil { + tableCount++ + } + + fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, tableCount) + + if from != nil { + for _, t := range from.Tables { + explainNode(sb, t, depth+1) + } + } + + if arrayJoin != nil { + // ARRAY JOIN is wrapped in TablesInSelectQueryElement + fmt.Fprintf(sb, "%s TablesInSelectQueryElement (children %d)\n", indent, 1) + explainNode(sb, arrayJoin, depth+2) + } +} + // formatLiteral formats a literal value for EXPLAIN AST output func formatLiteral(lit *ast.Literal) string { switch lit.Type { @@ -321,6 +666,32 @@ func formatTupleLiteral(val interface{}) string { return fmt.Sprintf("Tuple_(%s)", strings.Join(parts, ", ")) } +// formatDataType formats a DataType for EXPLAIN AST output +func formatDataType(dt *ast.DataType) string { + if dt == nil { + return "" + } + if len(dt.Parameters) == 0 { + return dt.Name + } + var params []string + for _, p := range dt.Parameters { + if lit, ok := p.(*ast.Literal); ok { + if lit.Type == ast.LiteralString { + // String parameters in type need extra escaping: 'val' -> \\\'val\\\' + params = append(params, fmt.Sprintf("\\\\\\'%s\\\\\\'", lit.Value)) + } else { + params = append(params, fmt.Sprintf("%v", lit.Value)) + } + } else if nested, ok := p.(*ast.DataType); ok { + params = append(params, formatDataType(nested)) + } else { + params = append(params, fmt.Sprintf("%v", p)) + } + } + return fmt.Sprintf("%s(%s)", dt.Name, strings.Join(params, ", ")) +} + // operatorToFunction maps binary operators to ClickHouse function names func operatorToFunction(op string) string { switch op { @@ -369,6 +740,25 @@ func unaryOperatorToFunction(op string) string { } } +// explainColumn handles column declarations +func explainColumn(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) { + indent := strings.Repeat(" ", depth) + children := 0 + if col.Type != nil { + children++ + } + if col.Default != nil { + children++ + } + fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children) + if col.Type != nil { + fmt.Fprintf(sb, "%s DataType %s\n", indent, formatDataType(col.Type)) + } + if col.Default != nil { + explainNode(sb, col.Default, depth+1) + } +} + // explainAliasedExpr handles expressions with aliases func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { // For aliased expressions, we need to show the underlying expression with the alias @@ -376,6 +766,27 @@ func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { switch e := n.Expr.(type) { case *ast.Literal: + // Check if this is a tuple with complex expressions that should be rendered as Function tuple + if e.Type == ast.LiteralTuple { + if exprs, ok := e.Value.([]ast.Expression); ok { + hasComplexExpr := false + for _, expr := range exprs { + if _, isLit := expr.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function tuple with alias + fmt.Fprintf(sb, "%sFunction tuple (alias %s) (children %d)\n", indent, n.Alias, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, expr := range exprs { + explainNode(sb, expr, depth+2) + } + return + } + } + } fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, formatLiteral(e), n.Alias) default: // For other types, recursively explain and add alias info From 2bbdbcfe7521a56fc5e8dd87640a9edd88c06040 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 19:12:46 +0000 Subject: [PATCH 2/9] Add more Explain function improvements - Handle empty tuples (nil arguments) as Function tuple with empty ExpressionList - Add array handling for complex expressions (renders as Function array) - Normalize function names (ltrim -> trimLeft, etc.) - Fix ShowQuery capitalization - Fix SystemQuery format to "SYSTEM query" Tests improved from ~73% to ~74% passing (5057 of 6824). --- parser/explain.go | 82 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/parser/explain.go b/parser/explain.go index d5466bd302..9ea31a473a 100644 --- a/parser/explain.go +++ b/parser/explain.go @@ -17,6 +17,10 @@ func Explain(stmt ast.Statement) string { // explainNode writes the EXPLAIN AST output for an AST node. func explainNode(sb *strings.Builder, node interface{}, depth int) { if node == nil { + // nil can represent an empty tuple in function arguments + indent := strings.Repeat(" ", depth) + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) return } @@ -144,9 +148,16 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { } case *ast.Literal: - // Check if this is a tuple with complex expressions that should be rendered as Function tuple + // Check if this is a tuple - either with expressions or empty if n.Type == ast.LiteralTuple { if exprs, ok := n.Value.([]ast.Expression); ok { + // Check if empty tuple or has complex expressions + if len(exprs) == 0 { + // Empty tuple renders as Function tuple with empty ExpressionList + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } hasComplexExpr := false for _, e := range exprs { if _, isLit := e.(*ast.Literal); !isLit { @@ -163,6 +174,32 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { } return } + } else if n.Value == nil { + // nil value means empty tuple + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + } + // Check if this is an array with complex expressions that should be rendered as Function array + if n.Type == ast.LiteralArray { + if exprs, ok := n.Value.([]ast.Expression); ok { + hasComplexExpr := false + for _, e := range exprs { + if _, isLit := e.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function array instead of Literal + fmt.Fprintf(sb, "%sFunction array (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, e := range exprs { + explainNode(sb, e, depth+2) + } + return + } } } fmt.Fprintf(sb, "%sLiteral %s\n", indent, formatLiteral(n)) @@ -172,10 +209,12 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { if len(n.Parameters) > 0 { children++ // parameters ExpressionList } + // Normalize function name + fnName := normalizeFunctionName(n.Name) if n.Alias != "" { - fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, n.Name, n.Alias, children) + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Alias, children) } else { - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, n.Name, children) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, children) } // Arguments fmt.Fprintf(sb, "%s ExpressionList", indent) @@ -469,14 +508,16 @@ func explainNode(sb *strings.Builder, node interface{}, depth int) { } case *ast.SystemQuery: - fmt.Fprintf(sb, "%sSystem %s\n", indent, n.Command) + fmt.Fprintf(sb, "%sSYSTEM query\n", indent) case *ast.ExplainQuery: fmt.Fprintf(sb, "%sExplain %s (children %d)\n", indent, n.ExplainType, 1) explainNode(sb, n.Statement, depth+1) case *ast.ShowQuery: - fmt.Fprintf(sb, "%sShow%s\n", indent, n.ShowType) + // Capitalize ShowType correctly for display + showType := strings.Title(strings.ToLower(string(n.ShowType))) + fmt.Fprintf(sb, "%sShow%s\n", indent, showType) case *ast.UseQuery: fmt.Fprintf(sb, "%sUse %s\n", indent, n.Database) @@ -692,6 +733,37 @@ func formatDataType(dt *ast.DataType) string { return fmt.Sprintf("%s(%s)", dt.Name, strings.Join(params, ", ")) } +// normalizeFunctionName normalizes function names to match ClickHouse's EXPLAIN AST output +func normalizeFunctionName(name string) string { + // ClickHouse normalizes certain function names in EXPLAIN AST + normalized := map[string]string{ + "ltrim": "trimLeft", + "rtrim": "trimRight", + "lcase": "lower", + "ucase": "upper", + "mid": "substring", + "substr": "substring", + "pow": "power", + "ceil": "ceiling", + "ln": "log", + "log10": "log10", + "log2": "log2", + "rand": "rand", + "ifnull": "ifNull", + "nullif": "nullIf", + "coalesce": "coalesce", + "greatest": "greatest", + "least": "least", + "concat_ws": "concat", + "length": "length", + "char_length": "length", + } + if n, ok := normalized[strings.ToLower(name)]; ok { + return n + } + return name +} + // operatorToFunction maps binary operators to ClickHouse function names func operatorToFunction(op string) string { switch op { From e4528da178ad7c7c65faf481788f6070d8384297 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 19:17:10 +0000 Subject: [PATCH 3/9] Refactor explain.go into internal/explain package Reorganize the explain functionality into a dedicated internal package with separate files for each category of AST types: - internal/explain/explain.go - main Explain function and core dispatcher - internal/explain/select.go - SelectQuery and related types - internal/explain/expressions.go - expressions (Literal, BinaryExpr, etc.) - internal/explain/functions.go - FunctionCall, Lambda, CAST, IN, etc. - internal/explain/tables.go - table-related types - internal/explain/statements.go - DDL statements (Create, Drop, etc.) - internal/explain/format.go - formatting and normalization functions The parser package now delegates to the internal package. No functional changes - all 5057 tests still pass. --- internal/explain/explain.go | 173 +++++++ internal/explain/expressions.go | 140 ++++++ internal/explain/format.go | 182 +++++++ internal/explain/functions.go | 217 ++++++++ internal/explain/select.go | 112 +++++ internal/explain/statements.go | 128 +++++ internal/explain/tables.go | 89 ++++ parser/explain.go | 860 +------------------------------- 8 files changed, 1043 insertions(+), 858 deletions(-) create mode 100644 internal/explain/explain.go create mode 100644 internal/explain/expressions.go create mode 100644 internal/explain/format.go create mode 100644 internal/explain/functions.go create mode 100644 internal/explain/select.go create mode 100644 internal/explain/statements.go create mode 100644 internal/explain/tables.go diff --git a/internal/explain/explain.go b/internal/explain/explain.go new file mode 100644 index 0000000000..3468b3c720 --- /dev/null +++ b/internal/explain/explain.go @@ -0,0 +1,173 @@ +// Package explain provides EXPLAIN AST output functionality for ClickHouse SQL. +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +// Explain returns the EXPLAIN AST output for a statement, matching ClickHouse's format. +func Explain(stmt ast.Statement) string { + var sb strings.Builder + Node(&sb, stmt, 0) + return sb.String() +} + +// Node writes the EXPLAIN AST output for an AST node. +func Node(sb *strings.Builder, node interface{}, depth int) { + if node == nil { + // nil can represent an empty tuple in function arguments + indent := strings.Repeat(" ", depth) + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + + indent := strings.Repeat(" ", depth) + + switch n := node.(type) { + // Select statements + case *ast.SelectWithUnionQuery: + explainSelectWithUnionQuery(sb, n, indent, depth) + case *ast.SelectQuery: + explainSelectQuery(sb, n, indent, depth) + + // Tables + case *ast.TablesInSelectQuery: + explainTablesInSelectQuery(sb, n, indent, depth) + case *ast.TablesInSelectQueryElement: + explainTablesInSelectQueryElement(sb, n, indent, depth) + case *ast.TableExpression: + explainTableExpression(sb, n, indent, depth) + case *ast.TableIdentifier: + explainTableIdentifier(sb, n, indent) + case *ast.ArrayJoinClause: + explainArrayJoinClause(sb, n, indent, depth) + case *ast.TableJoin: + explainTableJoin(sb, n, indent, depth) + + // Expressions + case *ast.OrderByElement: + explainOrderByElement(sb, n, indent, depth) + case *ast.Identifier: + explainIdentifier(sb, n, indent) + case *ast.Literal: + explainLiteral(sb, n, indent, depth) + case *ast.BinaryExpr: + explainBinaryExpr(sb, n, indent, depth) + case *ast.UnaryExpr: + explainUnaryExpr(sb, n, indent, depth) + case *ast.Subquery: + explainSubquery(sb, n, indent, depth) + case *ast.AliasedExpr: + explainAliasedExpr(sb, n, depth) + case *ast.Asterisk: + explainAsterisk(sb, n, indent) + + // Functions + case *ast.FunctionCall: + explainFunctionCall(sb, n, indent, depth) + case *ast.Lambda: + explainLambda(sb, n, indent, depth) + case *ast.CastExpr: + explainCastExpr(sb, n, indent, depth) + case *ast.InExpr: + explainInExpr(sb, n, indent, depth) + case *ast.TernaryExpr: + explainTernaryExpr(sb, n, indent, depth) + case *ast.ArrayAccess: + explainArrayAccess(sb, n, indent, depth) + case *ast.TupleAccess: + explainTupleAccess(sb, n, indent, depth) + case *ast.LikeExpr: + explainLikeExpr(sb, n, indent, depth) + case *ast.BetweenExpr: + explainBetweenExpr(sb, n, indent, depth) + case *ast.IsNullExpr: + explainIsNullExpr(sb, n, indent, depth) + case *ast.CaseExpr: + explainCaseExpr(sb, n, indent, depth) + case *ast.IntervalExpr: + explainIntervalExpr(sb, n, indent, depth) + case *ast.ExistsExpr: + explainExistsExpr(sb, n, indent, depth) + case *ast.ExtractExpr: + explainExtractExpr(sb, n, indent, depth) + + // DDL statements + case *ast.CreateQuery: + explainCreateQuery(sb, n, indent, depth) + case *ast.DropQuery: + explainDropQuery(sb, n, indent) + case *ast.SetQuery: + explainSetQuery(sb, indent) + case *ast.SystemQuery: + explainSystemQuery(sb, indent) + case *ast.ExplainQuery: + explainExplainQuery(sb, n, indent, depth) + case *ast.ShowQuery: + explainShowQuery(sb, n, indent) + case *ast.UseQuery: + explainUseQuery(sb, n, indent) + case *ast.DescribeQuery: + explainDescribeQuery(sb, n, indent) + + // Types + case *ast.DataType: + explainDataType(sb, n, indent) + case *ast.Parameter: + explainParameter(sb, n, indent) + + default: + // For unhandled types, just print the type name + fmt.Fprintf(sb, "%s%T\n", indent, node) + } +} + +// TablesWithArrayJoin handles FROM and ARRAY JOIN together as TablesInSelectQuery +func TablesWithArrayJoin(sb *strings.Builder, from *ast.TablesInSelectQuery, arrayJoin *ast.ArrayJoinClause, depth int) { + indent := strings.Repeat(" ", depth) + + tableCount := 0 + if from != nil { + tableCount = len(from.Tables) + } + if arrayJoin != nil { + tableCount++ + } + + fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, tableCount) + + if from != nil { + for _, t := range from.Tables { + Node(sb, t, depth+1) + } + } + + if arrayJoin != nil { + // ARRAY JOIN is wrapped in TablesInSelectQueryElement + fmt.Fprintf(sb, "%s TablesInSelectQueryElement (children %d)\n", indent, 1) + Node(sb, arrayJoin, depth+2) + } +} + +// Column handles column declarations +func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) { + indent := strings.Repeat(" ", depth) + children := 0 + if col.Type != nil { + children++ + } + if col.Default != nil { + children++ + } + fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children) + if col.Type != nil { + fmt.Fprintf(sb, "%s DataType %s\n", indent, FormatDataType(col.Type)) + } + if col.Default != nil { + Node(sb, col.Default, depth+1) + } +} diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go new file mode 100644 index 0000000000..4dcf5c7d73 --- /dev/null +++ b/internal/explain/expressions.go @@ -0,0 +1,140 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainIdentifier(sb *strings.Builder, n *ast.Identifier, indent string) { + name := n.Name() + if n.Alias != "" { + fmt.Fprintf(sb, "%sIdentifier %s (alias %s)\n", indent, name, n.Alias) + } else { + fmt.Fprintf(sb, "%sIdentifier %s\n", indent, name) + } +} + +func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth int) { + // Check if this is a tuple - either with expressions or empty + if n.Type == ast.LiteralTuple { + if exprs, ok := n.Value.([]ast.Expression); ok { + // Check if empty tuple or has complex expressions + if len(exprs) == 0 { + // Empty tuple renders as Function tuple with empty ExpressionList + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + hasComplexExpr := false + for _, e := range exprs { + if _, isLit := e.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function tuple instead of Literal + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, e := range exprs { + Node(sb, e, depth+2) + } + return + } + } else if n.Value == nil { + // nil value means empty tuple + fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } + } + // Check if this is an array with complex expressions that should be rendered as Function array + if n.Type == ast.LiteralArray { + if exprs, ok := n.Value.([]ast.Expression); ok { + hasComplexExpr := false + for _, e := range exprs { + if _, isLit := e.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function array instead of Literal + fmt.Fprintf(sb, "%sFunction array (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, e := range exprs { + Node(sb, e, depth+2) + } + return + } + } + } + fmt.Fprintf(sb, "%sLiteral %s\n", indent, FormatLiteral(n)) +} + +func explainBinaryExpr(sb *strings.Builder, n *ast.BinaryExpr, indent string, depth int) { + // Convert operator to function name + fnName := OperatorToFunction(n.Op) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Left, depth+2) + Node(sb, n.Right, depth+2) +} + +func explainUnaryExpr(sb *strings.Builder, n *ast.UnaryExpr, indent string, depth int) { + fnName := UnaryOperatorToFunction(n.Op) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.Operand, depth+2) +} + +func explainSubquery(sb *strings.Builder, n *ast.Subquery, indent string, depth int) { + children := 1 + fmt.Fprintf(sb, "%sSubquery (children %d)\n", indent, children) + Node(sb, n.Query, depth+1) +} + +func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { + // For aliased expressions, we need to show the underlying expression with the alias + indent := strings.Repeat(" ", depth) + + switch e := n.Expr.(type) { + case *ast.Literal: + // Check if this is a tuple with complex expressions that should be rendered as Function tuple + if e.Type == ast.LiteralTuple { + if exprs, ok := e.Value.([]ast.Expression); ok { + hasComplexExpr := false + for _, expr := range exprs { + if _, isLit := expr.(*ast.Literal); !isLit { + hasComplexExpr = true + break + } + } + if hasComplexExpr { + // Render as Function tuple with alias + fmt.Fprintf(sb, "%sFunction tuple (alias %s) (children %d)\n", indent, n.Alias, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, expr := range exprs { + Node(sb, expr, depth+2) + } + return + } + } + } + fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, FormatLiteral(e), n.Alias) + default: + // For other types, recursively explain and add alias info + Node(sb, n.Expr, depth) + } +} + +func explainAsterisk(sb *strings.Builder, n *ast.Asterisk, indent string) { + if n.Table != "" { + fmt.Fprintf(sb, "%sQualifiedAsterisk (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) + } else { + fmt.Fprintf(sb, "%sAsterisk\n", indent) + } +} diff --git a/internal/explain/format.go b/internal/explain/format.go new file mode 100644 index 0000000000..2b31a21e05 --- /dev/null +++ b/internal/explain/format.go @@ -0,0 +1,182 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +// FormatLiteral formats a literal value for EXPLAIN AST output +func FormatLiteral(lit *ast.Literal) string { + switch lit.Type { + case ast.LiteralInteger: + val := lit.Value.(int64) + if val >= 0 { + return fmt.Sprintf("UInt64_%d", val) + } + return fmt.Sprintf("Int64_%d", val) + case ast.LiteralFloat: + val := lit.Value.(float64) + return fmt.Sprintf("Float64_%v", val) + case ast.LiteralString: + s := lit.Value.(string) + return fmt.Sprintf("\\'%s\\'", s) + case ast.LiteralBoolean: + if lit.Value.(bool) { + return "UInt8_1" + } + return "UInt8_0" + case ast.LiteralNull: + return "Null" + case ast.LiteralArray: + return formatArrayLiteral(lit.Value) + case ast.LiteralTuple: + return formatTupleLiteral(lit.Value) + default: + return fmt.Sprintf("%v", lit.Value) + } +} + +// formatArrayLiteral formats an array literal for EXPLAIN AST output +func formatArrayLiteral(val interface{}) string { + exprs, ok := val.([]ast.Expression) + if !ok { + return "Array_[]" + } + var parts []string + for _, e := range exprs { + if lit, ok := e.(*ast.Literal); ok { + parts = append(parts, FormatLiteral(lit)) + } else if ident, ok := e.(*ast.Identifier); ok { + parts = append(parts, ident.Name()) + } else { + parts = append(parts, fmt.Sprintf("%v", e)) + } + } + return fmt.Sprintf("Array_[%s]", strings.Join(parts, ", ")) +} + +// formatTupleLiteral formats a tuple literal for EXPLAIN AST output +func formatTupleLiteral(val interface{}) string { + exprs, ok := val.([]ast.Expression) + if !ok { + return "Tuple_()" + } + var parts []string + for _, e := range exprs { + if lit, ok := e.(*ast.Literal); ok { + parts = append(parts, FormatLiteral(lit)) + } else if ident, ok := e.(*ast.Identifier); ok { + parts = append(parts, ident.Name()) + } else { + parts = append(parts, fmt.Sprintf("%v", e)) + } + } + return fmt.Sprintf("Tuple_(%s)", strings.Join(parts, ", ")) +} + +// FormatDataType formats a DataType for EXPLAIN AST output +func FormatDataType(dt *ast.DataType) string { + if dt == nil { + return "" + } + if len(dt.Parameters) == 0 { + return dt.Name + } + var params []string + for _, p := range dt.Parameters { + if lit, ok := p.(*ast.Literal); ok { + if lit.Type == ast.LiteralString { + // String parameters in type need extra escaping: 'val' -> \\\'val\\\' + params = append(params, fmt.Sprintf("\\\\\\'%s\\\\\\'", lit.Value)) + } else { + params = append(params, fmt.Sprintf("%v", lit.Value)) + } + } else if nested, ok := p.(*ast.DataType); ok { + params = append(params, FormatDataType(nested)) + } else { + params = append(params, fmt.Sprintf("%v", p)) + } + } + return fmt.Sprintf("%s(%s)", dt.Name, strings.Join(params, ", ")) +} + +// NormalizeFunctionName normalizes function names to match ClickHouse's EXPLAIN AST output +func NormalizeFunctionName(name string) string { + // ClickHouse normalizes certain function names in EXPLAIN AST + normalized := map[string]string{ + "ltrim": "trimLeft", + "rtrim": "trimRight", + "lcase": "lower", + "ucase": "upper", + "mid": "substring", + "substr": "substring", + "pow": "power", + "ceil": "ceiling", + "ln": "log", + "log10": "log10", + "log2": "log2", + "rand": "rand", + "ifnull": "ifNull", + "nullif": "nullIf", + "coalesce": "coalesce", + "greatest": "greatest", + "least": "least", + "concat_ws": "concat", + "length": "length", + "char_length": "length", + } + if n, ok := normalized[strings.ToLower(name)]; ok { + return n + } + return name +} + +// OperatorToFunction maps binary operators to ClickHouse function names +func OperatorToFunction(op string) string { + switch op { + case "+": + return "plus" + case "-": + return "minus" + case "*": + return "multiply" + case "/": + return "divide" + case "%": + return "modulo" + case "=", "==": + return "equals" + case "!=", "<>": + return "notEquals" + case "<": + return "less" + case ">": + return "greater" + case "<=": + return "lessOrEquals" + case ">=": + return "greaterOrEquals" + case "AND": + return "and" + case "OR": + return "or" + case "||": + return "concat" + default: + return strings.ToLower(op) + } +} + +// UnaryOperatorToFunction maps unary operators to ClickHouse function names +func UnaryOperatorToFunction(op string) string { + switch op { + case "-": + return "negate" + case "NOT": + return "not" + default: + return strings.ToLower(op) + } +} diff --git a/internal/explain/functions.go b/internal/explain/functions.go new file mode 100644 index 0000000000..33e25e81db --- /dev/null +++ b/internal/explain/functions.go @@ -0,0 +1,217 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainFunctionCall(sb *strings.Builder, n *ast.FunctionCall, indent string, depth int) { + children := 1 // arguments ExpressionList + if len(n.Parameters) > 0 { + children++ // parameters ExpressionList + } + // Normalize function name + fnName := NormalizeFunctionName(n.Name) + if n.Alias != "" { + fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Alias, children) + } else { + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, children) + } + // Arguments + fmt.Fprintf(sb, "%s ExpressionList", indent) + if len(n.Arguments) > 0 { + fmt.Fprintf(sb, " (children %d)", len(n.Arguments)) + } + fmt.Fprintln(sb) + for _, arg := range n.Arguments { + Node(sb, arg, depth+2) + } + // Parameters (for parametric functions) + if len(n.Parameters) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) + for _, p := range n.Parameters { + Node(sb, p, depth+2) + } + } +} + +func explainLambda(sb *strings.Builder, n *ast.Lambda, indent string, depth int) { + // Lambda is represented as Function lambda with tuple of params and body + fmt.Fprintf(sb, "%sFunction lambda (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + // Parameters as tuple + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) + for _, p := range n.Parameters { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, p) + } + // Body + Node(sb, n.Body, depth+2) +} + +func explainCastExpr(sb *strings.Builder, n *ast.CastExpr, indent string, depth int) { + // CAST is represented as Function CAST with expr and type as arguments + fmt.Fprintf(sb, "%sFunction CAST (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Expr, depth+2) + // Type is formatted as a literal string + typeStr := FormatDataType(n.Type) + fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, typeStr) +} + +func explainInExpr(sb *strings.Builder, n *ast.InExpr, indent string, depth int) { + // IN is represented as Function in + fnName := "in" + if n.Not { + fnName = "notIn" + } + if n.Global { + fnName = "global" + strings.Title(fnName) + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + // Count arguments: expr + list items or subquery + argCount := 1 + if n.Query != nil { + argCount++ + } else { + argCount += len(n.List) + } + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + Node(sb, n.Expr, depth+2) + if n.Query != nil { + // Subqueries in IN should be wrapped in Subquery node + fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) + Node(sb, n.Query, depth+3) + } else { + for _, item := range n.List { + Node(sb, item, depth+2) + } + } +} + +func explainTernaryExpr(sb *strings.Builder, n *ast.TernaryExpr, indent string, depth int) { + // Ternary is represented as Function if with 3 arguments + fmt.Fprintf(sb, "%sFunction if (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 3) + Node(sb, n.Condition, depth+2) + Node(sb, n.Then, depth+2) + Node(sb, n.Else, depth+2) +} + +func explainArrayAccess(sb *strings.Builder, n *ast.ArrayAccess, indent string, depth int) { + // Array access is represented as Function arrayElement + fmt.Fprintf(sb, "%sFunction arrayElement (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Array, depth+2) + Node(sb, n.Index, depth+2) +} + +func explainTupleAccess(sb *strings.Builder, n *ast.TupleAccess, indent string, depth int) { + // Tuple access is represented as Function tupleElement + fmt.Fprintf(sb, "%sFunction tupleElement (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Tuple, depth+2) + Node(sb, n.Index, depth+2) +} + +func explainLikeExpr(sb *strings.Builder, n *ast.LikeExpr, indent string, depth int) { + // LIKE is represented as Function like + fnName := "like" + if n.CaseInsensitive { + fnName = "ilike" + } + if n.Not { + fnName = "not" + strings.Title(fnName) + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) + Node(sb, n.Expr, depth+2) + Node(sb, n.Pattern, depth+2) +} + +func explainBetweenExpr(sb *strings.Builder, n *ast.BetweenExpr, indent string, depth int) { + // BETWEEN is represented as Function and with two comparisons + // But for explain, we can use a simpler form + fnName := "between" + if n.Not { + fnName = "notBetween" + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 3) + Node(sb, n.Expr, depth+2) + Node(sb, n.Low, depth+2) + Node(sb, n.High, depth+2) +} + +func explainIsNullExpr(sb *strings.Builder, n *ast.IsNullExpr, indent string, depth int) { + // IS NULL is represented as Function isNull + fnName := "isNull" + if n.Not { + fnName = "isNotNull" + } + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.Expr, depth+2) +} + +func explainCaseExpr(sb *strings.Builder, n *ast.CaseExpr, indent string, depth int) { + // CASE is represented as Function multiIf or caseWithExpression + if n.Operand != nil { + // CASE x WHEN ... form + argCount := 1 + len(n.Whens)*2 // operand + (condition, result) pairs + if n.Else != nil { + argCount++ + } + fmt.Fprintf(sb, "%sFunction caseWithExpression (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + Node(sb, n.Operand, depth+2) + for _, w := range n.Whens { + Node(sb, w.Condition, depth+2) + Node(sb, w.Result, depth+2) + } + if n.Else != nil { + Node(sb, n.Else, depth+2) + } + } else { + // CASE WHEN ... form + argCount := len(n.Whens) * 2 + if n.Else != nil { + argCount++ + } + fmt.Fprintf(sb, "%sFunction multiIf (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) + for _, w := range n.Whens { + Node(sb, w.Condition, depth+2) + Node(sb, w.Result, depth+2) + } + if n.Else != nil { + Node(sb, n.Else, depth+2) + } + } +} + +func explainIntervalExpr(sb *strings.Builder, n *ast.IntervalExpr, indent string, depth int) { + // INTERVAL is represented as Function toInterval + fnName := "toInterval" + n.Unit + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.Value, depth+2) +} + +func explainExistsExpr(sb *strings.Builder, n *ast.ExistsExpr, indent string, depth int) { + // EXISTS is represented as Function exists + fmt.Fprintf(sb, "%sFunction exists (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) + Node(sb, n.Query, depth+3) +} + +func explainExtractExpr(sb *strings.Builder, n *ast.ExtractExpr, indent string, depth int) { + // EXTRACT is represented as Function toYear, toMonth, etc. + fnName := "to" + strings.Title(strings.ToLower(n.Field)) + fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) + Node(sb, n.From, depth+2) +} diff --git a/internal/explain/select.go b/internal/explain/select.go new file mode 100644 index 0000000000..118e160c72 --- /dev/null +++ b/internal/explain/select.go @@ -0,0 +1,112 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuery, indent string, depth int) { + children := countSelectUnionChildren(n) + fmt.Fprintf(sb, "%sSelectWithUnionQuery (children %d)\n", indent, children) + // Wrap selects in ExpressionList + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Selects)) + for _, sel := range n.Selects { + Node(sb, sel, depth+2) + } +} + +func explainSelectQuery(sb *strings.Builder, n *ast.SelectQuery, indent string, depth int) { + children := countSelectQueryChildren(n) + fmt.Fprintf(sb, "%sSelectQuery (children %d)\n", indent, children) + // Columns (ExpressionList) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) + for _, col := range n.Columns { + Node(sb, col, depth+2) + } + // FROM (including ARRAY JOIN as part of TablesInSelectQuery) + if n.From != nil || n.ArrayJoin != nil { + TablesWithArrayJoin(sb, n.From, n.ArrayJoin, depth+1) + } + // PREWHERE + if n.PreWhere != nil { + Node(sb, n.PreWhere, depth+1) + } + // WHERE + if n.Where != nil { + Node(sb, n.Where, depth+1) + } + // GROUP BY + if len(n.GroupBy) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.GroupBy)) + for _, g := range n.GroupBy { + Node(sb, g, depth+2) + } + } + // HAVING + if n.Having != nil { + Node(sb, n.Having, depth+1) + } + // ORDER BY + if len(n.OrderBy) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) + for _, o := range n.OrderBy { + Node(sb, o, depth+2) + } + } + // LIMIT + if n.Limit != nil { + Node(sb, n.Limit, depth+1) + } + // OFFSET + if n.Offset != nil { + Node(sb, n.Offset, depth+1) + } + // SETTINGS + if len(n.Settings) > 0 { + fmt.Fprintf(sb, "%s Set\n", indent) + } +} + +func explainOrderByElement(sb *strings.Builder, n *ast.OrderByElement, indent string, depth int) { + fmt.Fprintf(sb, "%sOrderByElement (children %d)\n", indent, 1) + Node(sb, n.Expression, depth+1) +} + +func countSelectUnionChildren(n *ast.SelectWithUnionQuery) int { + return 1 // ExpressionList of selects +} + +func countSelectQueryChildren(n *ast.SelectQuery) int { + count := 1 // columns ExpressionList + // FROM and ARRAY JOIN together count as one child (TablesInSelectQuery) + if n.From != nil || n.ArrayJoin != nil { + count++ + } + if n.PreWhere != nil { + count++ + } + if n.Where != nil { + count++ + } + if len(n.GroupBy) > 0 { + count++ + } + if n.Having != nil { + count++ + } + if len(n.OrderBy) > 0 { + count++ + } + if n.Limit != nil { + count++ + } + if n.Offset != nil { + count++ + } + if len(n.Settings) > 0 { + count++ + } + return count +} diff --git a/internal/explain/statements.go b/internal/explain/statements.go new file mode 100644 index 0000000000..baf8eaf516 --- /dev/null +++ b/internal/explain/statements.go @@ -0,0 +1,128 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, depth int) { + name := n.Table + if n.View != "" { + name = n.View + } + if n.CreateDatabase { + name = n.Database + } + // Count children: name + columns + engine/storage + children := 1 // name identifier + if len(n.Columns) > 0 { + children++ + } + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { + children++ + } + if n.AsSelect != nil { + children++ + } + fmt.Fprintf(sb, "%sCreateQuery %s (children %d)\n", indent, name, children) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) + if len(n.Columns) > 0 { + fmt.Fprintf(sb, "%s Columns definition (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) + for _, col := range n.Columns { + Column(sb, col, depth+3) + } + } + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { + storageChildren := 0 + if n.Engine != nil { + storageChildren++ + } + if len(n.OrderBy) > 0 { + storageChildren++ + } + if len(n.PrimaryKey) > 0 { + storageChildren++ + } + fmt.Fprintf(sb, "%s Storage definition (children %d)\n", indent, storageChildren) + if n.Engine != nil { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + } + if len(n.OrderBy) > 0 { + if len(n.OrderBy) == 1 { + if ident, ok := n.OrderBy[0].(*ast.Identifier); ok { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name()) + } else { + Node(sb, n.OrderBy[0], depth+2) + } + } else { + fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) + for _, o := range n.OrderBy { + Node(sb, o, depth+4) + } + } + } + } + if n.AsSelect != nil { + fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) + Node(sb, n.AsSelect, depth+2) + } +} + +func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string) { + name := n.Table + if n.View != "" { + name = n.View + } + if n.DropDatabase { + name = n.Database + } + fmt.Fprintf(sb, "%sDropQuery %s (children %d)\n", indent, name, 1) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) +} + +func explainSetQuery(sb *strings.Builder, indent string) { + fmt.Fprintf(sb, "%sSet\n", indent) +} + +func explainSystemQuery(sb *strings.Builder, indent string) { + fmt.Fprintf(sb, "%sSYSTEM query\n", indent) +} + +func explainExplainQuery(sb *strings.Builder, n *ast.ExplainQuery, indent string, depth int) { + fmt.Fprintf(sb, "%sExplain %s (children %d)\n", indent, n.ExplainType, 1) + Node(sb, n.Statement, depth+1) +} + +func explainShowQuery(sb *strings.Builder, n *ast.ShowQuery, indent string) { + // Capitalize ShowType correctly for display + showType := strings.Title(strings.ToLower(string(n.ShowType))) + fmt.Fprintf(sb, "%sShow%s\n", indent, showType) +} + +func explainUseQuery(sb *strings.Builder, n *ast.UseQuery, indent string) { + fmt.Fprintf(sb, "%sUse %s\n", indent, n.Database) +} + +func explainDescribeQuery(sb *strings.Builder, n *ast.DescribeQuery, indent string) { + name := n.Table + if n.Database != "" { + name = n.Database + "." + n.Table + } + fmt.Fprintf(sb, "%sDescribe %s\n", indent, name) +} + +func explainDataType(sb *strings.Builder, n *ast.DataType, indent string) { + fmt.Fprintf(sb, "%sDataType %s\n", indent, FormatDataType(n)) +} + +func explainParameter(sb *strings.Builder, n *ast.Parameter, indent string) { + if n.Name != "" { + fmt.Fprintf(sb, "%sQueryParameter %s\n", indent, n.Name) + } else { + fmt.Fprintf(sb, "%sQueryParameter\n", indent) + } +} diff --git a/internal/explain/tables.go b/internal/explain/tables.go new file mode 100644 index 0000000000..e707120168 --- /dev/null +++ b/internal/explain/tables.go @@ -0,0 +1,89 @@ +package explain + +import ( + "fmt" + "strings" + + "github.com/kyleconroy/doubleclick/ast" +) + +func explainTablesInSelectQuery(sb *strings.Builder, n *ast.TablesInSelectQuery, indent string, depth int) { + fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, len(n.Tables)) + for _, t := range n.Tables { + Node(sb, t, depth+1) + } +} + +func explainTablesInSelectQueryElement(sb *strings.Builder, n *ast.TablesInSelectQueryElement, indent string, depth int) { + children := 1 // table + if n.Join != nil { + children++ + } + fmt.Fprintf(sb, "%sTablesInSelectQueryElement (children %d)\n", indent, children) + if n.Table != nil { + Node(sb, n.Table, depth+1) + } + if n.Join != nil { + Node(sb, n.Join, depth+1) + } +} + +func explainTableExpression(sb *strings.Builder, n *ast.TableExpression, indent string, depth int) { + children := 1 // table + fmt.Fprintf(sb, "%sTableExpression (children %d)\n", indent, children) + // If there's a subquery with an alias, pass the alias to the subquery output + if subq, ok := n.Table.(*ast.Subquery); ok && n.Alias != "" { + fmt.Fprintf(sb, "%s Subquery (alias %s) (children %d)\n", indent, n.Alias, 1) + Node(sb, subq.Query, depth+2) + } else { + Node(sb, n.Table, depth+1) + } +} + +func explainTableIdentifier(sb *strings.Builder, n *ast.TableIdentifier, indent string) { + name := n.Table + if n.Database != "" { + name = n.Database + "." + n.Table + } + fmt.Fprintf(sb, "%sTableIdentifier %s\n", indent, name) +} + +func explainArrayJoinClause(sb *strings.Builder, n *ast.ArrayJoinClause, indent string, depth int) { + fmt.Fprintf(sb, "%sArrayJoin (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList", indent) + if len(n.Columns) > 0 { + fmt.Fprintf(sb, " (children %d)", len(n.Columns)) + } + fmt.Fprintln(sb) + for _, col := range n.Columns { + Node(sb, col, depth+2) + } +} + +func explainTableJoin(sb *strings.Builder, n *ast.TableJoin, indent string, depth int) { + // TableJoin is part of TablesInSelectQueryElement + joinType := strings.ToLower(string(n.Type)) + if n.Strictness != "" { + joinType = strings.ToLower(string(n.Strictness)) + " " + joinType + } + if n.Global { + joinType = "global " + joinType + } + children := 0 + if n.On != nil { + children++ + } + if len(n.Using) > 0 { + children++ + } + fmt.Fprintf(sb, "%sTableJoin %s (children %d)\n", indent, joinType, children) + if n.On != nil { + Node(sb, n.On, depth+1) + } + if len(n.Using) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Using)) + for _, u := range n.Using { + Node(sb, u, depth+2) + } + } +} diff --git a/parser/explain.go b/parser/explain.go index 9ea31a473a..906efb8a60 100644 --- a/parser/explain.go +++ b/parser/explain.go @@ -1,867 +1,11 @@ package parser import ( - "fmt" - "strings" - "github.com/kyleconroy/doubleclick/ast" + "github.com/kyleconroy/doubleclick/internal/explain" ) // Explain returns the EXPLAIN AST output for a statement, matching ClickHouse's format. func Explain(stmt ast.Statement) string { - var sb strings.Builder - explainNode(&sb, stmt, 0) - return sb.String() -} - -// explainNode writes the EXPLAIN AST output for an AST node. -func explainNode(sb *strings.Builder, node interface{}, depth int) { - if node == nil { - // nil can represent an empty tuple in function arguments - indent := strings.Repeat(" ", depth) - fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList\n", indent) - return - } - - indent := strings.Repeat(" ", depth) - - switch n := node.(type) { - case *ast.SelectWithUnionQuery: - children := countChildren(n) - fmt.Fprintf(sb, "%sSelectWithUnionQuery (children %d)\n", indent, children) - // Wrap selects in ExpressionList - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Selects)) - for _, sel := range n.Selects { - explainNode(sb, sel, depth+2) - } - - case *ast.SelectQuery: - children := countSelectQueryChildren(n) - fmt.Fprintf(sb, "%sSelectQuery (children %d)\n", indent, children) - // Columns (ExpressionList) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) - for _, col := range n.Columns { - explainNode(sb, col, depth+2) - } - // FROM (including ARRAY JOIN as part of TablesInSelectQuery) - if n.From != nil || n.ArrayJoin != nil { - explainTablesWithArrayJoin(sb, n.From, n.ArrayJoin, depth+1) - } - // PREWHERE - if n.PreWhere != nil { - explainNode(sb, n.PreWhere, depth+1) - } - // WHERE - if n.Where != nil { - explainNode(sb, n.Where, depth+1) - } - // GROUP BY - if len(n.GroupBy) > 0 { - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.GroupBy)) - for _, g := range n.GroupBy { - explainNode(sb, g, depth+2) - } - } - // HAVING - if n.Having != nil { - explainNode(sb, n.Having, depth+1) - } - // ORDER BY - if len(n.OrderBy) > 0 { - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) - for _, o := range n.OrderBy { - explainNode(sb, o, depth+2) - } - } - // LIMIT - if n.Limit != nil { - explainNode(sb, n.Limit, depth+1) - } - // OFFSET - if n.Offset != nil { - explainNode(sb, n.Offset, depth+1) - } - // SETTINGS - if len(n.Settings) > 0 { - fmt.Fprintf(sb, "%s Set\n", indent) - } - - case *ast.TablesInSelectQuery: - fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, len(n.Tables)) - for _, t := range n.Tables { - explainNode(sb, t, depth+1) - } - - case *ast.TablesInSelectQueryElement: - children := 1 // table - if n.Join != nil { - children++ - } - fmt.Fprintf(sb, "%sTablesInSelectQueryElement (children %d)\n", indent, children) - if n.Table != nil { - explainNode(sb, n.Table, depth+1) - } - if n.Join != nil { - explainNode(sb, n.Join, depth+1) - } - - case *ast.TableExpression: - children := 1 // table - fmt.Fprintf(sb, "%sTableExpression (children %d)\n", indent, children) - // If there's a subquery with an alias, pass the alias to the subquery output - if subq, ok := n.Table.(*ast.Subquery); ok && n.Alias != "" { - fmt.Fprintf(sb, "%s Subquery (alias %s) (children %d)\n", indent, n.Alias, 1) - explainNode(sb, subq.Query, depth+2) - } else { - explainNode(sb, n.Table, depth+1) - } - - case *ast.TableIdentifier: - name := n.Table - if n.Database != "" { - name = n.Database + "." + n.Table - } - fmt.Fprintf(sb, "%sTableIdentifier %s\n", indent, name) - - case *ast.ArrayJoinClause: - fmt.Fprintf(sb, "%sArrayJoin (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList", indent) - if len(n.Columns) > 0 { - fmt.Fprintf(sb, " (children %d)", len(n.Columns)) - } - fmt.Fprintln(sb) - for _, col := range n.Columns { - explainNode(sb, col, depth+2) - } - - case *ast.OrderByElement: - fmt.Fprintf(sb, "%sOrderByElement (children %d)\n", indent, 1) - explainNode(sb, n.Expression, depth+1) - - case *ast.Identifier: - name := n.Name() - if n.Alias != "" { - fmt.Fprintf(sb, "%sIdentifier %s (alias %s)\n", indent, name, n.Alias) - } else { - fmt.Fprintf(sb, "%sIdentifier %s\n", indent, name) - } - - case *ast.Literal: - // Check if this is a tuple - either with expressions or empty - if n.Type == ast.LiteralTuple { - if exprs, ok := n.Value.([]ast.Expression); ok { - // Check if empty tuple or has complex expressions - if len(exprs) == 0 { - // Empty tuple renders as Function tuple with empty ExpressionList - fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList\n", indent) - return - } - hasComplexExpr := false - for _, e := range exprs { - if _, isLit := e.(*ast.Literal); !isLit { - hasComplexExpr = true - break - } - } - if hasComplexExpr { - // Render as Function tuple instead of Literal - fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) - for _, e := range exprs { - explainNode(sb, e, depth+2) - } - return - } - } else if n.Value == nil { - // nil value means empty tuple - fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList\n", indent) - return - } - } - // Check if this is an array with complex expressions that should be rendered as Function array - if n.Type == ast.LiteralArray { - if exprs, ok := n.Value.([]ast.Expression); ok { - hasComplexExpr := false - for _, e := range exprs { - if _, isLit := e.(*ast.Literal); !isLit { - hasComplexExpr = true - break - } - } - if hasComplexExpr { - // Render as Function array instead of Literal - fmt.Fprintf(sb, "%sFunction array (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) - for _, e := range exprs { - explainNode(sb, e, depth+2) - } - return - } - } - } - fmt.Fprintf(sb, "%sLiteral %s\n", indent, formatLiteral(n)) - - case *ast.FunctionCall: - children := 1 // arguments ExpressionList - if len(n.Parameters) > 0 { - children++ // parameters ExpressionList - } - // Normalize function name - fnName := normalizeFunctionName(n.Name) - if n.Alias != "" { - fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, n.Alias, children) - } else { - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, children) - } - // Arguments - fmt.Fprintf(sb, "%s ExpressionList", indent) - if len(n.Arguments) > 0 { - fmt.Fprintf(sb, " (children %d)", len(n.Arguments)) - } - fmt.Fprintln(sb) - for _, arg := range n.Arguments { - explainNode(sb, arg, depth+2) - } - // Parameters (for parametric functions) - if len(n.Parameters) > 0 { - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) - for _, p := range n.Parameters { - explainNode(sb, p, depth+2) - } - } - - case *ast.BinaryExpr: - // Convert operator to function name - fnName := operatorToFunction(n.Op) - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - explainNode(sb, n.Left, depth+2) - explainNode(sb, n.Right, depth+2) - - case *ast.UnaryExpr: - fnName := unaryOperatorToFunction(n.Op) - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) - explainNode(sb, n.Operand, depth+2) - - case *ast.Subquery: - children := 1 - fmt.Fprintf(sb, "%sSubquery (children %d)\n", indent, children) - explainNode(sb, n.Query, depth+1) - - case *ast.AliasedExpr: - explainAliasedExpr(sb, n, depth) - - case *ast.Lambda: - // Lambda is represented as Function lambda with tuple of params and body - fmt.Fprintf(sb, "%sFunction lambda (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - // Parameters as tuple - fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) - for _, p := range n.Parameters { - fmt.Fprintf(sb, "%s Identifier %s\n", indent, p) - } - // Body - explainNode(sb, n.Body, depth+2) - - case *ast.SetQuery: - fmt.Fprintf(sb, "%sSet\n", indent) - - case *ast.CastExpr: - // CAST is represented as Function CAST with expr and type as arguments - fmt.Fprintf(sb, "%sFunction CAST (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - explainNode(sb, n.Expr, depth+2) - // Type is formatted as a literal string - typeStr := formatDataType(n.Type) - fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, typeStr) - - case *ast.InExpr: - // IN is represented as Function in - fnName := "in" - if n.Not { - fnName = "notIn" - } - if n.Global { - fnName = "global" + strings.Title(fnName) - } - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - // Count arguments: expr + list items or subquery - argCount := 1 - if n.Query != nil { - argCount++ - } else { - argCount += len(n.List) - } - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) - explainNode(sb, n.Expr, depth+2) - if n.Query != nil { - // Subqueries in IN should be wrapped in Subquery node - fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) - explainNode(sb, n.Query, depth+3) - } else { - for _, item := range n.List { - explainNode(sb, item, depth+2) - } - } - - case *ast.TernaryExpr: - // Ternary is represented as Function if with 3 arguments - fmt.Fprintf(sb, "%sFunction if (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 3) - explainNode(sb, n.Condition, depth+2) - explainNode(sb, n.Then, depth+2) - explainNode(sb, n.Else, depth+2) - - case *ast.ArrayAccess: - // Array access is represented as Function arrayElement - fmt.Fprintf(sb, "%sFunction arrayElement (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - explainNode(sb, n.Array, depth+2) - explainNode(sb, n.Index, depth+2) - - case *ast.TupleAccess: - // Tuple access is represented as Function tupleElement - fmt.Fprintf(sb, "%sFunction tupleElement (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - explainNode(sb, n.Tuple, depth+2) - explainNode(sb, n.Index, depth+2) - - case *ast.DropQuery: - name := n.Table - if n.View != "" { - name = n.View - } - if n.DropDatabase { - name = n.Database - } - fmt.Fprintf(sb, "%sDropQuery %s (children %d)\n", indent, name, 1) - fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) - - case *ast.Asterisk: - if n.Table != "" { - fmt.Fprintf(sb, "%sQualifiedAsterisk (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) - } else { - fmt.Fprintf(sb, "%sAsterisk\n", indent) - } - - case *ast.LikeExpr: - // LIKE is represented as Function like - fnName := "like" - if n.CaseInsensitive { - fnName = "ilike" - } - if n.Not { - fnName = "not" + strings.Title(fnName) - } - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - explainNode(sb, n.Expr, depth+2) - explainNode(sb, n.Pattern, depth+2) - - case *ast.BetweenExpr: - // BETWEEN is represented as Function and with two comparisons - // But for explain, we can use a simpler form - fnName := "between" - if n.Not { - fnName = "notBetween" - } - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 3) - explainNode(sb, n.Expr, depth+2) - explainNode(sb, n.Low, depth+2) - explainNode(sb, n.High, depth+2) - - case *ast.IsNullExpr: - // IS NULL is represented as Function isNull - fnName := "isNull" - if n.Not { - fnName = "isNotNull" - } - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) - explainNode(sb, n.Expr, depth+2) - - case *ast.CaseExpr: - // CASE is represented as Function multiIf or caseWithExpression - if n.Operand != nil { - // CASE x WHEN ... form - argCount := 1 + len(n.Whens)*2 // operand + (condition, result) pairs - if n.Else != nil { - argCount++ - } - fmt.Fprintf(sb, "%sFunction caseWithExpression (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) - explainNode(sb, n.Operand, depth+2) - for _, w := range n.Whens { - explainNode(sb, w.Condition, depth+2) - explainNode(sb, w.Result, depth+2) - } - if n.Else != nil { - explainNode(sb, n.Else, depth+2) - } - } else { - // CASE WHEN ... form - argCount := len(n.Whens) * 2 - if n.Else != nil { - argCount++ - } - fmt.Fprintf(sb, "%sFunction multiIf (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, argCount) - for _, w := range n.Whens { - explainNode(sb, w.Condition, depth+2) - explainNode(sb, w.Result, depth+2) - } - if n.Else != nil { - explainNode(sb, n.Else, depth+2) - } - } - - case *ast.IntervalExpr: - // INTERVAL is represented as Function toInterval - fnName := "toInterval" + n.Unit - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) - explainNode(sb, n.Value, depth+2) - - case *ast.ExistsExpr: - // EXISTS is represented as Function exists - fmt.Fprintf(sb, "%sFunction exists (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) - explainNode(sb, n.Query, depth+3) - - case *ast.ExtractExpr: - // EXTRACT is represented as Function toYear, toMonth, etc. - fnName := "to" + strings.Title(strings.ToLower(n.Field)) - fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1) - explainNode(sb, n.From, depth+2) - - case *ast.CreateQuery: - name := n.Table - if n.View != "" { - name = n.View - } - if n.CreateDatabase { - name = n.Database - } - // Count children: name + columns + engine/storage - children := 1 // name identifier - if len(n.Columns) > 0 { - children++ - } - if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { - children++ - } - if n.AsSelect != nil { - children++ - } - fmt.Fprintf(sb, "%sCreateQuery %s (children %d)\n", indent, name, children) - fmt.Fprintf(sb, "%s Identifier %s\n", indent, name) - if len(n.Columns) > 0 { - fmt.Fprintf(sb, "%s Columns definition (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns)) - for _, col := range n.Columns { - explainColumn(sb, col, depth+3) - } - } - if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { - storageChildren := 0 - if n.Engine != nil { - storageChildren++ - } - if len(n.OrderBy) > 0 { - storageChildren++ - } - if len(n.PrimaryKey) > 0 { - storageChildren++ - } - fmt.Fprintf(sb, "%s Storage definition (children %d)\n", indent, storageChildren) - if n.Engine != nil { - fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) - } - if len(n.OrderBy) > 0 { - if len(n.OrderBy) == 1 { - if ident, ok := n.OrderBy[0].(*ast.Identifier); ok { - fmt.Fprintf(sb, "%s Identifier %s\n", indent, ident.Name()) - } else { - explainNode(sb, n.OrderBy[0], depth+2) - } - } else { - fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy)) - for _, o := range n.OrderBy { - explainNode(sb, o, depth+4) - } - } - } - } - if n.AsSelect != nil { - fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) - explainNode(sb, n.AsSelect, depth+2) - } - - case *ast.SystemQuery: - fmt.Fprintf(sb, "%sSYSTEM query\n", indent) - - case *ast.ExplainQuery: - fmt.Fprintf(sb, "%sExplain %s (children %d)\n", indent, n.ExplainType, 1) - explainNode(sb, n.Statement, depth+1) - - case *ast.ShowQuery: - // Capitalize ShowType correctly for display - showType := strings.Title(strings.ToLower(string(n.ShowType))) - fmt.Fprintf(sb, "%sShow%s\n", indent, showType) - - case *ast.UseQuery: - fmt.Fprintf(sb, "%sUse %s\n", indent, n.Database) - - case *ast.DescribeQuery: - name := n.Table - if n.Database != "" { - name = n.Database + "." + n.Table - } - fmt.Fprintf(sb, "%sDescribe %s\n", indent, name) - - case *ast.TableJoin: - // TableJoin is part of TablesInSelectQueryElement - joinType := strings.ToLower(string(n.Type)) - if n.Strictness != "" { - joinType = strings.ToLower(string(n.Strictness)) + " " + joinType - } - if n.Global { - joinType = "global " + joinType - } - children := 0 - if n.On != nil { - children++ - } - if len(n.Using) > 0 { - children++ - } - fmt.Fprintf(sb, "%sTableJoin %s (children %d)\n", indent, joinType, children) - if n.On != nil { - explainNode(sb, n.On, depth+1) - } - if len(n.Using) > 0 { - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Using)) - for _, u := range n.Using { - explainNode(sb, u, depth+2) - } - } - - case *ast.DataType: - fmt.Fprintf(sb, "%sDataType %s\n", indent, formatDataType(n)) - - case *ast.Parameter: - if n.Name != "" { - fmt.Fprintf(sb, "%sQueryParameter %s\n", indent, n.Name) - } else { - fmt.Fprintf(sb, "%sQueryParameter\n", indent) - } - - default: - // For unhandled types, just print the type name - fmt.Fprintf(sb, "%s%T\n", indent, node) - } -} - -// countChildren counts the children of a SelectWithUnionQuery -func countChildren(n *ast.SelectWithUnionQuery) int { - return 1 // ExpressionList of selects -} - -// countSelectQueryChildren counts the children of a SelectQuery -func countSelectQueryChildren(n *ast.SelectQuery) int { - count := 1 // columns ExpressionList - // FROM and ARRAY JOIN together count as one child (TablesInSelectQuery) - if n.From != nil || n.ArrayJoin != nil { - count++ - } - if n.PreWhere != nil { - count++ - } - if n.Where != nil { - count++ - } - if len(n.GroupBy) > 0 { - count++ - } - if n.Having != nil { - count++ - } - if len(n.OrderBy) > 0 { - count++ - } - if n.Limit != nil { - count++ - } - if n.Offset != nil { - count++ - } - if len(n.Settings) > 0 { - count++ - } - return count -} - -// explainTablesWithArrayJoin handles FROM and ARRAY JOIN together as TablesInSelectQuery -func explainTablesWithArrayJoin(sb *strings.Builder, from *ast.TablesInSelectQuery, arrayJoin *ast.ArrayJoinClause, depth int) { - indent := strings.Repeat(" ", depth) - - tableCount := 0 - if from != nil { - tableCount = len(from.Tables) - } - if arrayJoin != nil { - tableCount++ - } - - fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, tableCount) - - if from != nil { - for _, t := range from.Tables { - explainNode(sb, t, depth+1) - } - } - - if arrayJoin != nil { - // ARRAY JOIN is wrapped in TablesInSelectQueryElement - fmt.Fprintf(sb, "%s TablesInSelectQueryElement (children %d)\n", indent, 1) - explainNode(sb, arrayJoin, depth+2) - } -} - -// formatLiteral formats a literal value for EXPLAIN AST output -func formatLiteral(lit *ast.Literal) string { - switch lit.Type { - case ast.LiteralInteger: - val := lit.Value.(int64) - if val >= 0 { - return fmt.Sprintf("UInt64_%d", val) - } - return fmt.Sprintf("Int64_%d", val) - case ast.LiteralFloat: - val := lit.Value.(float64) - return fmt.Sprintf("Float64_%v", val) - case ast.LiteralString: - s := lit.Value.(string) - return fmt.Sprintf("\\'%s\\'", s) - case ast.LiteralBoolean: - if lit.Value.(bool) { - return "UInt8_1" - } - return "UInt8_0" - case ast.LiteralNull: - return "Null" - case ast.LiteralArray: - return formatArrayLiteral(lit.Value) - case ast.LiteralTuple: - return formatTupleLiteral(lit.Value) - default: - return fmt.Sprintf("%v", lit.Value) - } -} - -// formatArrayLiteral formats an array literal for EXPLAIN AST output -func formatArrayLiteral(val interface{}) string { - exprs, ok := val.([]ast.Expression) - if !ok { - return "Array_[]" - } - var parts []string - for _, e := range exprs { - if lit, ok := e.(*ast.Literal); ok { - parts = append(parts, formatLiteral(lit)) - } else if ident, ok := e.(*ast.Identifier); ok { - parts = append(parts, ident.Name()) - } else { - parts = append(parts, fmt.Sprintf("%v", e)) - } - } - return fmt.Sprintf("Array_[%s]", strings.Join(parts, ", ")) -} - -// formatTupleLiteral formats a tuple literal for EXPLAIN AST output -func formatTupleLiteral(val interface{}) string { - exprs, ok := val.([]ast.Expression) - if !ok { - return "Tuple_()" - } - var parts []string - for _, e := range exprs { - if lit, ok := e.(*ast.Literal); ok { - parts = append(parts, formatLiteral(lit)) - } else if ident, ok := e.(*ast.Identifier); ok { - parts = append(parts, ident.Name()) - } else { - parts = append(parts, fmt.Sprintf("%v", e)) - } - } - return fmt.Sprintf("Tuple_(%s)", strings.Join(parts, ", ")) -} - -// formatDataType formats a DataType for EXPLAIN AST output -func formatDataType(dt *ast.DataType) string { - if dt == nil { - return "" - } - if len(dt.Parameters) == 0 { - return dt.Name - } - var params []string - for _, p := range dt.Parameters { - if lit, ok := p.(*ast.Literal); ok { - if lit.Type == ast.LiteralString { - // String parameters in type need extra escaping: 'val' -> \\\'val\\\' - params = append(params, fmt.Sprintf("\\\\\\'%s\\\\\\'", lit.Value)) - } else { - params = append(params, fmt.Sprintf("%v", lit.Value)) - } - } else if nested, ok := p.(*ast.DataType); ok { - params = append(params, formatDataType(nested)) - } else { - params = append(params, fmt.Sprintf("%v", p)) - } - } - return fmt.Sprintf("%s(%s)", dt.Name, strings.Join(params, ", ")) -} - -// normalizeFunctionName normalizes function names to match ClickHouse's EXPLAIN AST output -func normalizeFunctionName(name string) string { - // ClickHouse normalizes certain function names in EXPLAIN AST - normalized := map[string]string{ - "ltrim": "trimLeft", - "rtrim": "trimRight", - "lcase": "lower", - "ucase": "upper", - "mid": "substring", - "substr": "substring", - "pow": "power", - "ceil": "ceiling", - "ln": "log", - "log10": "log10", - "log2": "log2", - "rand": "rand", - "ifnull": "ifNull", - "nullif": "nullIf", - "coalesce": "coalesce", - "greatest": "greatest", - "least": "least", - "concat_ws": "concat", - "length": "length", - "char_length": "length", - } - if n, ok := normalized[strings.ToLower(name)]; ok { - return n - } - return name -} - -// operatorToFunction maps binary operators to ClickHouse function names -func operatorToFunction(op string) string { - switch op { - case "+": - return "plus" - case "-": - return "minus" - case "*": - return "multiply" - case "/": - return "divide" - case "%": - return "modulo" - case "=", "==": - return "equals" - case "!=", "<>": - return "notEquals" - case "<": - return "less" - case ">": - return "greater" - case "<=": - return "lessOrEquals" - case ">=": - return "greaterOrEquals" - case "AND": - return "and" - case "OR": - return "or" - case "||": - return "concat" - default: - return strings.ToLower(op) - } -} - -// unaryOperatorToFunction maps unary operators to ClickHouse function names -func unaryOperatorToFunction(op string) string { - switch op { - case "-": - return "negate" - case "NOT": - return "not" - default: - return strings.ToLower(op) - } -} - -// explainColumn handles column declarations -func explainColumn(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) { - indent := strings.Repeat(" ", depth) - children := 0 - if col.Type != nil { - children++ - } - if col.Default != nil { - children++ - } - fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children) - if col.Type != nil { - fmt.Fprintf(sb, "%s DataType %s\n", indent, formatDataType(col.Type)) - } - if col.Default != nil { - explainNode(sb, col.Default, depth+1) - } -} - -// explainAliasedExpr handles expressions with aliases -func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { - // For aliased expressions, we need to show the underlying expression with the alias - indent := strings.Repeat(" ", depth) - - switch e := n.Expr.(type) { - case *ast.Literal: - // Check if this is a tuple with complex expressions that should be rendered as Function tuple - if e.Type == ast.LiteralTuple { - if exprs, ok := e.Value.([]ast.Expression); ok { - hasComplexExpr := false - for _, expr := range exprs { - if _, isLit := expr.(*ast.Literal); !isLit { - hasComplexExpr = true - break - } - } - if hasComplexExpr { - // Render as Function tuple with alias - fmt.Fprintf(sb, "%sFunction tuple (alias %s) (children %d)\n", indent, n.Alias, 1) - fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) - for _, expr := range exprs { - explainNode(sb, expr, depth+2) - } - return - } - } - } - fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, formatLiteral(e), n.Alias) - default: - // For other types, recursively explain and add alias info - explainNode(sb, n.Expr, depth) - } + return explain.Explain(stmt) } From 3ad6dc00505558f36836d8c8b36feb2448ec0fa0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 19:19:53 +0000 Subject: [PATCH 4/9] Handle empty arrays in Explain output Add support for rendering empty arrays as Function array with empty ExpressionList, matching ClickHouse's EXPLAIN AST format. Tests improved from ~74% to ~74.3% passing (5072 of 6824). --- internal/explain/expressions.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index 4dcf5c7d73..1ef0993735 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -50,9 +50,15 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in return } } - // Check if this is an array with complex expressions that should be rendered as Function array + // Check if this is an array with complex expressions or empty that should be rendered as Function array if n.Type == ast.LiteralArray { if exprs, ok := n.Value.([]ast.Expression); ok { + // Empty array renders as Function array with empty ExpressionList + if len(exprs) == 0 { + fmt.Fprintf(sb, "%sFunction array (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return + } hasComplexExpr := false for _, e := range exprs { if _, isLit := e.(*ast.Literal); !isLit { @@ -69,6 +75,11 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in } return } + } else if n.Value == nil { + // nil value means empty array + fmt.Fprintf(sb, "%sFunction array (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + return } } fmt.Fprintf(sb, "%sLiteral %s\n", indent, FormatLiteral(n)) From 73350a4126f775f82c26abfd9014a58a78697720 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 19:21:28 +0000 Subject: [PATCH 5/9] Handle engine parentheses in Explain output When an engine has parentheses (like MergeTree()), output it with children count and empty ExpressionList, matching ClickHouse's format. Tests improved to 5090 passing (~74.6% of 6824). --- internal/explain/statements.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/explain/statements.go b/internal/explain/statements.go index baf8eaf516..2546a0bdc2 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -48,7 +48,12 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, } fmt.Fprintf(sb, "%s Storage definition (children %d)\n", indent, storageChildren) if n.Engine != nil { - fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + if n.Engine.HasParentheses { + fmt.Fprintf(sb, "%s Function %s (children %d)\n", indent, n.Engine.Name, 1) + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } else { + fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) + } } if len(n.OrderBy) > 0 { if len(n.OrderBy) == 1 { From 9d774d29cacab0bcefeb6d3781949735fd1f74f6 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 19:38:08 +0000 Subject: [PATCH 6/9] Fix additional explain output mismatches - Add DROP USER support with proper 'DROP USER query' output format - Add SHOW FUNCTIONS type handling with 'ShowFunctions' output - Fix cast shorthand (::) to output expression as string literal - Escape backslashes in string literals for proper output - Add ENGINE function parameters output in CREATE TABLE - Add SETTINGS output (Set) in CREATE TABLE storage definition - Remove Subquery wrapper for AS SELECT in CREATE TABLE Tests passing: 5128 (up from 5090) --- ast/ast.go | 2 ++ internal/explain/format.go | 30 ++++++++++++++++++++++++++++++ internal/explain/functions.go | 9 ++++++++- internal/explain/statements.go | 26 ++++++++++++++++++++++---- parser/parser.go | 12 ++++++++++-- token/token.go | 2 ++ 6 files changed, 74 insertions(+), 7 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 1c86b8b483..2b61012d1a 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -315,6 +315,7 @@ type DropQuery struct { Database string `json:"database,omitempty"` Table string `json:"table,omitempty"` View string `json:"view,omitempty"` + User string `json:"user,omitempty"` Temporary bool `json:"temporary,omitempty"` OnCluster string `json:"on_cluster,omitempty"` DropDatabase bool `json:"drop_database,omitempty"` @@ -449,6 +450,7 @@ const ( ShowCreateDB ShowType = "CREATE_DATABASE" ShowColumns ShowType = "COLUMNS" ShowDictionaries ShowType = "DICTIONARIES" + ShowFunctions ShowType = "FUNCTIONS" ) // ExplainQuery represents an EXPLAIN statement. diff --git a/internal/explain/format.go b/internal/explain/format.go index 2b31a21e05..3a881e70cb 100644 --- a/internal/explain/format.go +++ b/internal/explain/format.go @@ -21,6 +21,8 @@ func FormatLiteral(lit *ast.Literal) string { return fmt.Sprintf("Float64_%v", val) case ast.LiteralString: s := lit.Value.(string) + // Escape backslashes in strings + s = strings.ReplaceAll(s, "\\", "\\\\") return fmt.Sprintf("\\'%s\\'", s) case ast.LiteralBoolean: if lit.Value.(bool) { @@ -180,3 +182,31 @@ func UnaryOperatorToFunction(op string) string { return strings.ToLower(op) } } + +// formatExprAsString formats an expression as a string literal for :: cast syntax +func formatExprAsString(expr ast.Expression) string { + switch e := expr.(type) { + case *ast.Literal: + switch e.Type { + case ast.LiteralInteger: + return fmt.Sprintf("%d", e.Value) + case ast.LiteralFloat: + return fmt.Sprintf("%v", e.Value) + case ast.LiteralString: + return e.Value.(string) + case ast.LiteralBoolean: + if e.Value.(bool) { + return "true" + } + return "false" + case ast.LiteralNull: + return "NULL" + default: + return fmt.Sprintf("%v", e.Value) + } + case *ast.Identifier: + return e.Name() + default: + return fmt.Sprintf("%v", expr) + } +} diff --git a/internal/explain/functions.go b/internal/explain/functions.go index 33e25e81db..df49cbdbed 100644 --- a/internal/explain/functions.go +++ b/internal/explain/functions.go @@ -55,7 +55,14 @@ func explainCastExpr(sb *strings.Builder, n *ast.CastExpr, indent string, depth // CAST is represented as Function CAST with expr and type as arguments fmt.Fprintf(sb, "%sFunction CAST (children %d)\n", indent, 1) fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2) - Node(sb, n.Expr, depth+2) + // For :: operator syntax, expression is represented as string literal + if n.OperatorSyntax { + // Format expression as string literal + exprStr := formatExprAsString(n.Expr) + fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, exprStr) + } else { + Node(sb, n.Expr, depth+2) + } // Type is formatted as a literal string typeStr := FormatDataType(n.Type) fmt.Fprintf(sb, "%s Literal \\'%s\\'\n", indent, typeStr) diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 2546a0bdc2..3f1e4ca3c2 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -35,7 +35,7 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, Column(sb, col, depth+3) } } - if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 { + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || len(n.Settings) > 0 { storageChildren := 0 if n.Engine != nil { storageChildren++ @@ -46,11 +46,21 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, if len(n.PrimaryKey) > 0 { storageChildren++ } + if len(n.Settings) > 0 { + storageChildren++ + } fmt.Fprintf(sb, "%s Storage definition (children %d)\n", indent, storageChildren) if n.Engine != nil { if n.Engine.HasParentheses { fmt.Fprintf(sb, "%s Function %s (children %d)\n", indent, n.Engine.Name, 1) - fmt.Fprintf(sb, "%s ExpressionList\n", indent) + if len(n.Engine.Parameters) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Engine.Parameters)) + for _, param := range n.Engine.Parameters { + Node(sb, param, depth+4) + } + } else { + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } } else { fmt.Fprintf(sb, "%s Function %s\n", indent, n.Engine.Name) } @@ -70,14 +80,22 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, } } } + if len(n.Settings) > 0 { + fmt.Fprintf(sb, "%s Set\n", indent) + } } if n.AsSelect != nil { - fmt.Fprintf(sb, "%s Subquery (children %d)\n", indent, 1) - Node(sb, n.AsSelect, depth+2) + // AS SELECT is output directly without Subquery wrapper + Node(sb, n.AsSelect, depth+1) } } func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string) { + // DROP USER has a special output format + if n.User != "" { + fmt.Fprintf(sb, "%sDROP USER query\n", indent) + return + } name := n.Table if n.View != "" { name = n.View diff --git a/parser/parser.go b/parser/parser.go index 809b0f7fc5..36af97046b 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1318,6 +1318,7 @@ func (p *Parser) parseDrop() *ast.DropQuery { } // What are we dropping? + dropUser := false switch p.current.Token { case token.TABLE: p.nextToken() @@ -1326,6 +1327,9 @@ func (p *Parser) parseDrop() *ast.DropQuery { p.nextToken() case token.VIEW: p.nextToken() + case token.USER: + dropUser = true + p.nextToken() default: p.nextToken() // skip unknown token } @@ -1356,7 +1360,9 @@ func (p *Parser) parseDrop() *ast.DropQuery { p.nextToken() } } else { - if drop.DropDatabase { + if dropUser { + drop.User = name + } else if drop.DropDatabase { drop.Database = name } else { drop.Table = name @@ -1794,7 +1800,7 @@ func (p *Parser) parseShow() *ast.ShowQuery { } } default: - // Handle SHOW PROCESSLIST, SHOW DICTIONARIES, etc. + // Handle SHOW PROCESSLIST, SHOW DICTIONARIES, SHOW FUNCTIONS, etc. if p.currentIs(token.IDENT) { upper := strings.ToUpper(p.current.Value) switch upper { @@ -1802,6 +1808,8 @@ func (p *Parser) parseShow() *ast.ShowQuery { show.ShowType = ast.ShowProcesses case "DICTIONARIES": show.ShowType = ast.ShowDictionaries + case "FUNCTIONS": + show.ShowType = ast.ShowFunctions } p.nextToken() } diff --git a/token/token.go b/token/token.go index ec61d19c3a..e1b58dc0e8 100644 --- a/token/token.go +++ b/token/token.go @@ -177,6 +177,7 @@ const ( UNION UPDATE USE + USER USING VALUES VIEW @@ -355,6 +356,7 @@ var tokens = [...]string{ UNION: "UNION", UPDATE: "UPDATE", USE: "USE", + USER: "USER", USING: "USING", VALUES: "VALUES", VIEW: "VIEW", From 77accb435fec5be326a846ee4de8c14137e743fd Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 19:43:43 +0000 Subject: [PATCH 7/9] Fix NULL casing and Enum type parameter handling - Change NULL literal output from 'Null' to 'NULL' (uppercase) - Fix DataType with complex parameters (like Enum) to output as children with ExpressionList instead of formatted string - Update Column function to use Node() for type output Tests passing: 5179 (up from 5128) --- internal/explain/explain.go | 4 ++-- internal/explain/format.go | 2 +- internal/explain/statements.go | 26 ++++++++++++++++++++++++-- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/internal/explain/explain.go b/internal/explain/explain.go index 3468b3c720..e20ee4ed4f 100644 --- a/internal/explain/explain.go +++ b/internal/explain/explain.go @@ -116,7 +116,7 @@ func Node(sb *strings.Builder, node interface{}, depth int) { // Types case *ast.DataType: - explainDataType(sb, n, indent) + explainDataType(sb, n, indent, depth) case *ast.Parameter: explainParameter(sb, n, indent) @@ -165,7 +165,7 @@ func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) { } fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children) if col.Type != nil { - fmt.Fprintf(sb, "%s DataType %s\n", indent, FormatDataType(col.Type)) + Node(sb, col.Type, depth+1) } if col.Default != nil { Node(sb, col.Default, depth+1) diff --git a/internal/explain/format.go b/internal/explain/format.go index 3a881e70cb..6a0fed6216 100644 --- a/internal/explain/format.go +++ b/internal/explain/format.go @@ -30,7 +30,7 @@ func FormatLiteral(lit *ast.Literal) string { } return "UInt8_0" case ast.LiteralNull: - return "Null" + return "NULL" case ast.LiteralArray: return formatArrayLiteral(lit.Value) case ast.LiteralTuple: diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 3f1e4ca3c2..133ebcfddd 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -138,8 +138,30 @@ func explainDescribeQuery(sb *strings.Builder, n *ast.DescribeQuery, indent stri fmt.Fprintf(sb, "%sDescribe %s\n", indent, name) } -func explainDataType(sb *strings.Builder, n *ast.DataType, indent string) { - fmt.Fprintf(sb, "%sDataType %s\n", indent, FormatDataType(n)) +func explainDataType(sb *strings.Builder, n *ast.DataType, indent string, depth int) { + // Check if type has complex parameters (expressions, not just literals/types) + hasComplexParams := false + for _, p := range n.Parameters { + if _, ok := p.(*ast.Literal); ok { + continue + } + if _, ok := p.(*ast.DataType); ok { + continue + } + hasComplexParams = true + break + } + + if hasComplexParams && len(n.Parameters) > 0 { + // Complex parameters need to be output as children + fmt.Fprintf(sb, "%sDataType %s (children %d)\n", indent, n.Name, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters)) + for _, p := range n.Parameters { + Node(sb, p, depth+2) + } + } else { + fmt.Fprintf(sb, "%sDataType %s\n", indent, FormatDataType(n)) + } } func explainParameter(sb *strings.Builder, n *ast.Parameter, indent string) { From 8f1fd53fc133875f357653a4e8da2eb34904e0f0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 19:46:21 +0000 Subject: [PATCH 8/9] Add FORMAT clause output at SelectWithUnionQuery level - Check if any SelectQuery in the union has a Format field - Output Format Identifier at SelectWithUnionQuery level (matches ClickHouse) - Update child count accordingly Tests passing: 5197 (up from 5179) --- internal/explain/select.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/internal/explain/select.go b/internal/explain/select.go index 118e160c72..52e15aa07c 100644 --- a/internal/explain/select.go +++ b/internal/explain/select.go @@ -15,6 +15,13 @@ func explainSelectWithUnionQuery(sb *strings.Builder, n *ast.SelectWithUnionQuer for _, sel := range n.Selects { Node(sb, sel, depth+2) } + // FORMAT clause - check if any SelectQuery has Format set + for _, sel := range n.Selects { + if sq, ok := sel.(*ast.SelectQuery); ok && sq.Format != nil { + Node(sb, sq.Format, depth+1) + break + } + } } func explainSelectQuery(sb *strings.Builder, n *ast.SelectQuery, indent string, depth int) { @@ -75,7 +82,15 @@ func explainOrderByElement(sb *strings.Builder, n *ast.OrderByElement, indent st } func countSelectUnionChildren(n *ast.SelectWithUnionQuery) int { - return 1 // ExpressionList of selects + count := 1 // ExpressionList of selects + // Check if any SelectQuery has Format set + for _, sel := range n.Selects { + if sq, ok := sel.(*ast.SelectQuery); ok && sq.Format != nil { + count++ + break + } + } + return count } func countSelectQueryChildren(n *ast.SelectQuery) int { From 58dc0d994e81d2976800d12014b10f042d5124c9 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 13 Dec 2025 20:23:15 +0000 Subject: [PATCH 9/9] Add TODO.md documenting remaining parser and explain issues --- TODO.md | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000000..9ad0b2b487 --- /dev/null +++ b/TODO.md @@ -0,0 +1,136 @@ +# TODO: Remaining Parser and Explain Issues + +## Current State + +- **Tests passing:** 5,197 (76.2%) +- **Tests skipped:** 1,627 (23.8%) + - Parser issues: ~675 + - Explain mismatches: ~637 + +## Parser Issues + +These require changes to `parser/parser.go`: + +### Table/Database Names Starting with Numbers +Tables and databases with names starting with digits fail to parse: +```sql +DROP TABLE IF EXISTS 03657_gby_overflow; +DROP DATABASE IF EXISTS 03710_database; +``` + +### FORMAT Null +The `FORMAT Null` clause is not recognized: +```sql +SELECT ... FORMAT Null; +``` + +### FETCH FIRST ... ROW ONLY +SQL standard fetch syntax is not supported: +```sql +SELECT ... FETCH FIRST 1 ROW ONLY; +``` + +### INSERT INTO FUNCTION +Function-based inserts are not supported: +```sql +INSERT INTO FUNCTION file('file.parquet') SELECT ...; +``` + +### WITH ... AS Subquery Aliases +Subquery aliases in FROM clauses with keyword `AS`: +```sql +SELECT * FROM (SELECT 1 x) AS alias; +``` + +### String Concatenation Operator || +The `||` operator in some contexts: +```sql +SELECT currentDatabase() || '_test' AS key; +``` + +### MOD/DIV Operators +The MOD and DIV keywords as operators: +```sql +SELECT number MOD 3, number DIV 3 FROM ...; +``` + +### Reserved Keyword Handling +Keywords like `LEFT`, `RIGHT` used as table aliases: +```sql +SELECT * FROM numbers(10) AS left RIGHT JOIN ...; +``` + +### Parameterized Settings +Settings with `$` parameters: +```sql +SET param_$1 = 'Hello'; +``` + +### Incomplete CASE Expression +CASE without END: +```sql +SELECT CASE number -- missing END +``` + +## Explain Output Issues + +These require changes to `internal/explain/`: + +### Double Equals (==) Operator +The `==` operator creates extra nested equals/tuple nodes: +```sql +SELECT value == '127.0.0.1:9181' +``` +Expected: `Function equals` with `Identifier` and `Literal` +Got: Nested `Function equals` with extra `Function tuple` + +### CreateQuery Spacing +Some ClickHouse versions output extra space before `(children`: +``` +CreateQuery d1 (children 1) -- two spaces +CreateQuery d1 (children 1) -- one space (our output) +``` + +### Server Error Messages in Expected Output +Some test expected outputs include trailing messages: +``` +The query succeeded but the server error '42' was expected +``` +These are not part of the actual EXPLAIN output. + +## Lower Priority + +### DateTime64 with Timezone +Type parameters with string timezone: +```sql +DateTime64(3,'UTC') +``` + +### Complex Type Expressions +Nested type expressions in column definitions: +```sql +CREATE TABLE t (c LowCardinality(UUID)); +``` + +### Parameterized Views +View definitions with parameters: +```sql +CREATE VIEW v AS SELECT ... WHERE x={parity:Int8}; +``` + +## Testing Notes + +Run tests with timeout to catch infinite loops: +```bash +go test ./parser -timeout 5s -v +``` + +Count test results: +```bash +go test ./parser -timeout 5s -v 2>&1 | grep -E 'PASS:|SKIP:' | cut -d':' -f1 | sort | uniq -c +``` + +View explain mismatches: +```bash +go test ./parser -timeout 5s -v 2>&1 | grep -A 30 "TODO: Explain output mismatch" | head -100 +```