diff --git a/internal/buildgen/build_data_iteration.go b/internal/buildgen/build_data_iteration.go index 3e72ce6d..200fcfd6 100644 --- a/internal/buildgen/build_data_iteration.go +++ b/internal/buildgen/build_data_iteration.go @@ -15,6 +15,10 @@ import ( const ( buildIterationBudget = 50000 buildValueMaxDepth = 64 + // buildSeqBound caps seq() operands at the IEEE-754 safe-integer range so the + // end-start length cannot overflow int before the budget check, and so seq + // values round-trip through JSON exactly. + buildSeqBound = 1 << 53 ) // buildEnv is the evaluation environment for one build {} block. data holds the @@ -327,11 +331,12 @@ func indexTopLevelWord(source string, word string) int { // exprScanner tracks bracket depth and string-literal state so the top-level // scanners ignore separators nested inside parentheses, brackets, braces, or -// string literals. +// string literals (both interpreted "..." and raw `...` Go strings). type exprScanner struct { - depth int - inString bool - escaped bool + depth int + inString bool + escaped bool + inRawString bool } func newExprScanner() *exprScanner { @@ -345,6 +350,12 @@ func (s *exprScanner) step(char byte) bool { s.escaped = false return false } + if s.inRawString { + if char == '`' { + s.inRawString = false + } + return false + } if s.inString { switch char { case '\\': @@ -358,6 +369,9 @@ func (s *exprScanner) step(char byte) bool { case '"': s.inString = true return false + case '`': + s.inRawString = true + return false case '(', '[', '{': s.depth++ return false diff --git a/internal/buildgen/build_data_iteration_test.go b/internal/buildgen/build_data_iteration_test.go index ba8b25a8..84139175 100644 --- a/internal/buildgen/build_data_iteration_test.go +++ b/internal/buildgen/build_data_iteration_test.go @@ -26,6 +26,72 @@ func assertBuildField(t *testing.T, data map[string]string, field, want string) } } +func TestBuildDataReviewRegressions(t *testing.T) { + t.Run("raw string list elements", func(t *testing.T) { + data := evalBuildData(t, "=> { labels: [`a,b`, `c`] }", nil) + assertBuildField(t, data, "labels", `["a,b","c"]`) + }) + + t.Run("leading-zero integers serialize as canonical json", func(t *testing.T) { + data := evalBuildData(t, `=> { nums: [01, 02] }`, nil) + assertBuildField(t, data, "nums", "[1,2]") + }) + + t.Run("wide integer literals keep full precision", func(t *testing.T) { + data := evalBuildData(t, `=> { ids: [9007199254740993] }`, nil) + assertBuildField(t, data, "ids", "[9007199254740993]") + }) + + errorCases := []struct { + name string + body string + want string + }{ + { + name: "reverse charges the element budget", + body: `=> { xs: seq(0, 30000) } +=> { ys: reverse(field("xs")) }`, + want: "exceeded the limit", + }, + { + name: "take charges the element budget", + body: `=> { xs: seq(0, 30000) } +=> { ys: take(field("xs"), 30000) }`, + want: "exceeded the limit", + }, + { + name: "seq bounds cannot overflow", + body: `=> { x: seq(-9000000000000000000, 9000000000000000000) }`, + want: "seq bounds must be within", + }, + { + name: "deeply nested expressions are bounded", + body: `=> { deep: ` + strings.Repeat("(", 70) + "1" + strings.Repeat(")", 70) + ` }`, + want: "nested too deeply", + }, + } + for _, testCase := range errorCases { + t.Run(testCase.name, func(t *testing.T) { + _, err := parseBuildData(testCase.body, nil, "", nil, nil, "") + if err == nil { + t.Fatalf("expected error containing %q, got nil", testCase.want) + } + if !strings.Contains(err.Error(), testCase.want) { + t.Fatalf("error %q does not contain %q", err.Error(), testCase.want) + } + }) + } +} + +func TestBuildFunctionOutputPreservesWideIntegers(t *testing.T) { + data, err := parseBuildFunctionOutput([]byte(`{"big":9007199254740993,"ids":[9007199254740993,2]}`)) + if err != nil { + t.Fatal(err) + } + assertBuildField(t, data, "big", "9007199254740993") + assertBuildField(t, data, "ids", "[9007199254740993,2]") +} + func TestBuildDataComprehensionFilterAndReductions(t *testing.T) { data := evalBuildData(t, `=> { nums: seq(1, 6) } => { evens: [n for n in field("nums") if n % 2 == 0] } diff --git a/internal/buildgen/build_data_runner.go b/internal/buildgen/build_data_runner.go index fa3cf403..6e9844b0 100644 --- a/internal/buildgen/build_data_runner.go +++ b/internal/buildgen/build_data_runner.go @@ -534,8 +534,13 @@ func isBuildDataSignatureMismatch(err error) bool { } func parseBuildFunctionOutput(output []byte) (map[string]string, error) { + // UseNumber keeps numbers as their exact source text (json.Number) instead of + // float64, so large integer IDs inside returned slices/structs are not + // silently rounded when re-serialized below. + decoder := json.NewDecoder(bytes.NewReader(output)) + decoder.UseNumber() var raw map[string]any - if err := json.Unmarshal(output, &raw); err != nil { + if err := decoder.Decode(&raw); err != nil { return nil, fmt.Errorf("decode build data output: %w", err) } if len(raw) == 0 { @@ -569,6 +574,8 @@ func buildFieldOutputString(value any) (string, bool) { return "", false } return typed, true + case json.Number: + return typed.String(), true case float64: return strconv.FormatFloat(typed, 'f', -1, 64), true case bool: diff --git a/internal/buildgen/build_data_values.go b/internal/buildgen/build_data_values.go index a136048e..8d5be139 100644 --- a/internal/buildgen/build_data_values.go +++ b/internal/buildgen/build_data_values.go @@ -78,7 +78,7 @@ func (v buildValue) jsonText() string { encoded, _ := json.Marshal(v.text) return string(encoded) case buildValueNumber: - return v.text + return canonicalJSONNumber(v.text, v.number) case buildValueBool: return v.text case buildValueNil: @@ -101,6 +101,66 @@ func (v buildValue) jsonText() string { } } +// canonicalJSONNumber returns a JSON-valid spelling of a number value. A scalar's +// text is the author's literal form, which can be valid Go but invalid JSON (for +// example the octal-style `01`). When the literal is already canonical JSON it is +// preserved verbatim so wide integer literals keep full precision; otherwise it +// is reformatted from the parsed float. +func canonicalJSONNumber(text string, number float64) string { + if isCanonicalJSONNumber(text) { + return text + } + return strconv.FormatFloat(number, 'f', -1, 64) +} + +// isCanonicalJSONNumber reports whether text is a JSON number literal: an +// optional minus, an integer part with no redundant leading zero, and optional +// fraction and exponent. +func isCanonicalJSONNumber(text string) bool { + if text == "" { + return false + } + index := 0 + if text[index] == '-' { + index++ + } + intStart := index + for index < len(text) && text[index] >= '0' && text[index] <= '9' { + index++ + } + intLen := index - intStart + if intLen == 0 { + return false + } + if intLen > 1 && text[intStart] == '0' { + return false + } + if index < len(text) && text[index] == '.' { + index++ + fracStart := index + for index < len(text) && text[index] >= '0' && text[index] <= '9' { + index++ + } + if index == fracStart { + return false + } + } + if index < len(text) && (text[index] == 'e' || text[index] == 'E') { + index++ + if index < len(text) && (text[index] == '+' || text[index] == '-') { + index++ + } + expStart := index + for index < len(text) && text[index] >= '0' && text[index] <= '9' { + index++ + } + if index == expStart { + return false + } + } + return index == len(text) +} + func buildValueStrings(data map[string]buildValue) map[string]string { out := make(map[string]string, len(data)) for key, value := range data { @@ -162,6 +222,9 @@ func buildFieldValueFromString(name string, exprStr string, env *buildEnv) (stri } func buildEvalAST(expr ast.Expr, env *buildEnv) (buildValue, error) { + if env.depth > buildValueMaxDepth { + return buildValue{}, fmt.Errorf("build expression nested too deeply (limit %d)", buildValueMaxDepth) + } switch typed := expr.(type) { case *ast.BasicLit: switch typed.Kind { @@ -207,25 +270,25 @@ func buildEvalAST(expr ast.Expr, env *buildEnv) (buildValue, error) { return value, nil } case *ast.SelectorExpr: - return buildSelectorValue(typed, env) + return buildSelectorValue(typed, env.deeper()) case *ast.IndexExpr: - return buildIndexValue(typed, env) + return buildIndexValue(typed, env.deeper()) case *ast.CallExpr: - return buildCallValue(typed, env) + return buildCallValue(typed, env.deeper()) case *ast.ParenExpr: - return buildEvalAST(typed.X, env) + return buildEvalAST(typed.X, env.deeper()) case *ast.UnaryExpr: - value, err := buildEvalAST(typed.X, env) + value, err := buildEvalAST(typed.X, env.deeper()) if err != nil { return buildValue{}, err } return buildUnaryValue(typed.Op, value) case *ast.BinaryExpr: - left, err := buildEvalAST(typed.X, env) + left, err := buildEvalAST(typed.X, env.deeper()) if err != nil { return buildValue{}, err } - right, err := buildEvalAST(typed.Y, env) + right, err := buildEvalAST(typed.Y, env.deeper()) if err != nil { return buildValue{}, err } @@ -349,6 +412,11 @@ func buildSeqCall(call *ast.CallExpr, env *buildEnv) (buildValue, error) { if end < start { return buildValue{}, fmt.Errorf("seq end %d must be >= start %d", end, start) } + // Bound the operands so end-start cannot overflow int before the budget check; + // a valid seq produces at most the per-block budget anyway. + if start < -buildSeqBound || start > buildSeqBound || end < -buildSeqBound || end > buildSeqBound { + return buildValue{}, fmt.Errorf("seq bounds must be within [-%d, %d]", buildSeqBound, buildSeqBound) + } if err := env.consume(end - start); err != nil { return buildValue{}, err } @@ -449,6 +517,9 @@ func buildTakeCall(call *ast.CallExpr, env *buildEnv) (buildValue, error) { if limit > len(list.items) { limit = len(list.items) } + if err := env.consume(limit); err != nil { + return buildValue{}, err + } return buildListValue(append([]buildValue(nil), list.items[:limit]...)), nil } @@ -457,6 +528,9 @@ func buildReverseCall(call *ast.CallExpr, env *buildEnv) (buildValue, error) { if err != nil { return buildValue{}, err } + if err := env.consume(len(list.items)); err != nil { + return buildValue{}, err + } reversed := make([]buildValue, len(list.items)) for index, item := range list.items { reversed[len(list.items)-1-index] = item