Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions internal/buildgen/build_data_iteration.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ import (
const (
buildIterationBudget = 50000
buildValueMaxDepth = 64
// buildSeqBound caps seq() operands at the IEEE-754 safe-integer range so the
// end-start length cannot overflow int before the budget check, and so seq
// values round-trip through JSON exactly.
buildSeqBound = 1 << 53
)

// buildEnv is the evaluation environment for one build {} block. data holds the
Expand Down Expand Up @@ -327,11 +331,12 @@ func indexTopLevelWord(source string, word string) int {

// exprScanner tracks bracket depth and string-literal state so the top-level
// scanners ignore separators nested inside parentheses, brackets, braces, or
// string literals.
// string literals (both interpreted "..." and raw `...` Go strings).
type exprScanner struct {
depth int
inString bool
escaped bool
depth int
inString bool
escaped bool
inRawString bool
}

func newExprScanner() *exprScanner {
Expand All @@ -345,6 +350,12 @@ func (s *exprScanner) step(char byte) bool {
s.escaped = false
return false
}
if s.inRawString {
if char == '`' {
s.inRawString = false
}
return false
}
if s.inString {
switch char {
case '\\':
Expand All @@ -358,6 +369,9 @@ func (s *exprScanner) step(char byte) bool {
case '"':
s.inString = true
return false
case '`':
s.inRawString = true
return false
case '(', '[', '{':
s.depth++
return false
Expand Down
66 changes: 66 additions & 0 deletions internal/buildgen/build_data_iteration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,72 @@ func assertBuildField(t *testing.T, data map[string]string, field, want string)
}
}

func TestBuildDataReviewRegressions(t *testing.T) {
t.Run("raw string list elements", func(t *testing.T) {
data := evalBuildData(t, "=> { labels: [`a,b`, `c`] }", nil)
assertBuildField(t, data, "labels", `["a,b","c"]`)
})

t.Run("leading-zero integers serialize as canonical json", func(t *testing.T) {
data := evalBuildData(t, `=> { nums: [01, 02] }`, nil)
assertBuildField(t, data, "nums", "[1,2]")
})

t.Run("wide integer literals keep full precision", func(t *testing.T) {
data := evalBuildData(t, `=> { ids: [9007199254740993] }`, nil)
assertBuildField(t, data, "ids", "[9007199254740993]")
})

errorCases := []struct {
name string
body string
want string
}{
{
name: "reverse charges the element budget",
body: `=> { xs: seq(0, 30000) }
=> { ys: reverse(field("xs")) }`,
want: "exceeded the limit",
},
{
name: "take charges the element budget",
body: `=> { xs: seq(0, 30000) }
=> { ys: take(field("xs"), 30000) }`,
want: "exceeded the limit",
},
{
name: "seq bounds cannot overflow",
body: `=> { x: seq(-9000000000000000000, 9000000000000000000) }`,
want: "seq bounds must be within",
},
{
name: "deeply nested expressions are bounded",
body: `=> { deep: ` + strings.Repeat("(", 70) + "1" + strings.Repeat(")", 70) + ` }`,
want: "nested too deeply",
},
}
for _, testCase := range errorCases {
t.Run(testCase.name, func(t *testing.T) {
_, err := parseBuildData(testCase.body, nil, "", nil, nil, "")
if err == nil {
t.Fatalf("expected error containing %q, got nil", testCase.want)
}
if !strings.Contains(err.Error(), testCase.want) {
t.Fatalf("error %q does not contain %q", err.Error(), testCase.want)
}
})
}
}

func TestBuildFunctionOutputPreservesWideIntegers(t *testing.T) {
data, err := parseBuildFunctionOutput([]byte(`{"big":9007199254740993,"ids":[9007199254740993,2]}`))
if err != nil {
t.Fatal(err)
}
assertBuildField(t, data, "big", "9007199254740993")
assertBuildField(t, data, "ids", "[9007199254740993,2]")
}

func TestBuildDataComprehensionFilterAndReductions(t *testing.T) {
data := evalBuildData(t, `=> { nums: seq(1, 6) }
=> { evens: [n for n in field("nums") if n % 2 == 0] }
Expand Down
9 changes: 8 additions & 1 deletion internal/buildgen/build_data_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,13 @@ func isBuildDataSignatureMismatch(err error) bool {
}

func parseBuildFunctionOutput(output []byte) (map[string]string, error) {
// UseNumber keeps numbers as their exact source text (json.Number) instead of
// float64, so large integer IDs inside returned slices/structs are not
// silently rounded when re-serialized below.
decoder := json.NewDecoder(bytes.NewReader(output))
decoder.UseNumber()
var raw map[string]any
if err := json.Unmarshal(output, &raw); err != nil {
if err := decoder.Decode(&raw); err != nil {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Reject trailing helper stdout after JSON decode

In the build-data runner path I checked, stdout is supposed to be exactly one JSON object while stderr is handled separately. Switching from json.Unmarshal to a single Decode accepts {"title":"ok"} debug or a second JSON object after the first and silently ignores the trailing bytes, so a helper that writes trailing stdout can produce a successful build with only the first object instead of surfacing corrupted helper output. After this decode, check that the decoder is at EOF (or otherwise reject non-whitespace trailing data).

Useful? React with 👍 / 👎.

return nil, fmt.Errorf("decode build data output: %w", err)
}
if len(raw) == 0 {
Expand Down Expand Up @@ -569,6 +574,8 @@ func buildFieldOutputString(value any) (string, bool) {
return "", false
}
return typed, true
case json.Number:
return typed.String(), true
case float64:
return strconv.FormatFloat(typed, 'f', -1, 64), true
case bool:
Expand Down
90 changes: 82 additions & 8 deletions internal/buildgen/build_data_values.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func (v buildValue) jsonText() string {
encoded, _ := json.Marshal(v.text)
return string(encoded)
case buildValueNumber:
return v.text
return canonicalJSONNumber(v.text, v.number)
case buildValueBool:
return v.text
case buildValueNil:
Expand All @@ -101,6 +101,66 @@ func (v buildValue) jsonText() string {
}
}

// canonicalJSONNumber returns a JSON-valid spelling of a number value. A scalar's
// text is the author's literal form, which can be valid Go but invalid JSON (for
// example the octal-style `01`). When the literal is already canonical JSON it is
// preserved verbatim so wide integer literals keep full precision; otherwise it
// is reformatted from the parsed float.
func canonicalJSONNumber(text string, number float64) string {
if isCanonicalJSONNumber(text) {
return text
}
return strconv.FormatFloat(number, 'f', -1, 64)
}

// isCanonicalJSONNumber reports whether text is a JSON number literal: an
// optional minus, an integer part with no redundant leading zero, and optional
// fraction and exponent.
func isCanonicalJSONNumber(text string) bool {
if text == "" {
return false
}
index := 0
if text[index] == '-' {
index++
}
intStart := index
for index < len(text) && text[index] >= '0' && text[index] <= '9' {
index++
}
intLen := index - intStart
if intLen == 0 {
return false
}
if intLen > 1 && text[intStart] == '0' {
return false
}
if index < len(text) && text[index] == '.' {
index++
fracStart := index
for index < len(text) && text[index] >= '0' && text[index] <= '9' {
index++
}
if index == fracStart {
return false
}
}
if index < len(text) && (text[index] == 'e' || text[index] == 'E') {
index++
if index < len(text) && (text[index] == '+' || text[index] == '-') {
index++
}
expStart := index
for index < len(text) && text[index] >= '0' && text[index] <= '9' {
index++
}
if index == expStart {
return false
}
}
return index == len(text)
}

func buildValueStrings(data map[string]buildValue) map[string]string {
out := make(map[string]string, len(data))
for key, value := range data {
Expand Down Expand Up @@ -162,6 +222,9 @@ func buildFieldValueFromString(name string, exprStr string, env *buildEnv) (stri
}

func buildEvalAST(expr ast.Expr, env *buildEnv) (buildValue, error) {
if env.depth > buildValueMaxDepth {
return buildValue{}, fmt.Errorf("build expression nested too deeply (limit %d)", buildValueMaxDepth)
}
switch typed := expr.(type) {
case *ast.BasicLit:
switch typed.Kind {
Expand Down Expand Up @@ -207,25 +270,25 @@ func buildEvalAST(expr ast.Expr, env *buildEnv) (buildValue, error) {
return value, nil
}
case *ast.SelectorExpr:
return buildSelectorValue(typed, env)
return buildSelectorValue(typed, env.deeper())
case *ast.IndexExpr:
return buildIndexValue(typed, env)
return buildIndexValue(typed, env.deeper())
case *ast.CallExpr:
return buildCallValue(typed, env)
return buildCallValue(typed, env.deeper())
case *ast.ParenExpr:
return buildEvalAST(typed.X, env)
return buildEvalAST(typed.X, env.deeper())
case *ast.UnaryExpr:
value, err := buildEvalAST(typed.X, env)
value, err := buildEvalAST(typed.X, env.deeper())
if err != nil {
return buildValue{}, err
}
return buildUnaryValue(typed.Op, value)
case *ast.BinaryExpr:
left, err := buildEvalAST(typed.X, env)
left, err := buildEvalAST(typed.X, env.deeper())
if err != nil {
return buildValue{}, err
}
right, err := buildEvalAST(typed.Y, env)
right, err := buildEvalAST(typed.Y, env.deeper())
if err != nil {
return buildValue{}, err
}
Expand Down Expand Up @@ -349,6 +412,11 @@ func buildSeqCall(call *ast.CallExpr, env *buildEnv) (buildValue, error) {
if end < start {
return buildValue{}, fmt.Errorf("seq end %d must be >= start %d", end, start)
}
// Bound the operands so end-start cannot overflow int before the budget check;
// a valid seq produces at most the per-block budget anyway.
if start < -buildSeqBound || start > buildSeqBound || end < -buildSeqBound || end > buildSeqBound {
return buildValue{}, fmt.Errorf("seq bounds must be within [-%d, %d]", buildSeqBound, buildSeqBound)
}
if err := env.consume(end - start); err != nil {
return buildValue{}, err
}
Expand Down Expand Up @@ -449,6 +517,9 @@ func buildTakeCall(call *ast.CallExpr, env *buildEnv) (buildValue, error) {
if limit > len(list.items) {
limit = len(list.items)
}
if err := env.consume(limit); err != nil {
return buildValue{}, err
}
return buildListValue(append([]buildValue(nil), list.items[:limit]...)), nil
}

Expand All @@ -457,6 +528,9 @@ func buildReverseCall(call *ast.CallExpr, env *buildEnv) (buildValue, error) {
if err != nil {
return buildValue{}, err
}
if err := env.consume(len(list.items)); err != nil {
return buildValue{}, err
}
reversed := make([]buildValue, len(list.items))
for index, item := range list.items {
reversed[len(list.items)-1-index] = item
Expand Down