Skip to content

Commit daa42ec

Browse files
committed
fix(datastore): compare resource version under COLLATE "C" consistently
KSUIDs are base62 with mixed case and only sort chronologically under byte-order. Several subqueries on the resources table compared r2.version against r1.version without an explicit collation, so Postgres and Aurora fell back to the database's locale-aware default. In locale order 'U' > 'f' while in byte order 'U' < 'f', so a chronologically-later KSUID can lose the "is this the latest version?" check to an earlier KSUID that happens to have an uppercase letter in a locale-significant position. Observed effect: the resources table keeps every version of every resource (including a terminal delete row after destroy). LoadResourcesByStack and the CountResources* variants use a "latest row is not a delete" filter via a NOT EXISTS subquery. Under locale ordering, a prior update row with an uppercase-in-position-N version can sort ahead of the later delete row, the filter picks the update as latest, operation != delete passes, and the destroyed resource leaks back as managed. That's how a lgtm-task row from a long-destroyed stack reappeared in a reconcile plan as a spurious delete. COLLATE "C" is already used for the same comparison in many queries in these files — the affected subqueries were the ones that slipped through. Postgres + Aurora only; SQLite's default collation is byte-order so the bug doesn't manifest there. Adds a postgres_test that inserts two rows for a single ksuid — an earlier update with a locale-greater version and a chronologically-later delete with a locale-lesser version — and asserts LoadResourcesByStack returns nothing. Skips when no local Postgres is available.
1 parent f7b27ce commit daa42ec

3 files changed

Lines changed: 92 additions & 6 deletions

File tree

internal/datastore/aurora/aurora.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,7 +1159,7 @@ func (d *DatastoreAuroraDataAPI) CountResourcesInStack(label string) (int, error
11591159
AND NOT EXISTS (
11601160
SELECT 1 FROM resources r2
11611161
WHERE r1.uri = r2.uri
1162-
AND r2.version > r1.version
1162+
AND r2.version COLLATE "C" > r1.version COLLATE "C"
11631163
)
11641164
AND operation != :operation
11651165
`
@@ -1194,7 +1194,7 @@ func (d *DatastoreAuroraDataAPI) LoadAllResourcesByStack() (map[string][]*pkgmod
11941194
SELECT 1
11951195
FROM resources r2
11961196
WHERE r1.uri = r2.uri
1197-
AND r2.version > r1.version
1197+
AND r2.version COLLATE "C" > r1.version COLLATE "C"
11981198
)
11991199
AND operation != :operation
12001200
`
@@ -1253,7 +1253,7 @@ func (d *DatastoreAuroraDataAPI) LoadResourcesByStack(stackLabel string) ([]*pkg
12531253
SELECT 1
12541254
FROM resources r2
12551255
WHERE r1.uri = r2.uri
1256-
AND r2.version > r1.version
1256+
AND r2.version COLLATE "C" > r1.version COLLATE "C"
12571257
)
12581258
AND operation != :operation
12591259
`
@@ -1304,7 +1304,7 @@ func (d *DatastoreAuroraDataAPI) CountResourcesInTarget(targetLabel string) (int
13041304
AND NOT EXISTS (
13051305
SELECT 1 FROM resources r2
13061306
WHERE r1.uri = r2.uri
1307-
AND r2.version > r1.version
1307+
AND r2.version COLLATE "C" > r1.version COLLATE "C"
13081308
)
13091309
AND operation != :operation
13101310
`

internal/datastore/postgres/postgres.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,7 @@ func (d DatastorePostgres) CountResourcesInStack(label string) (int, error) {
16021602
AND NOT EXISTS (
16031603
SELECT 1 FROM resources r2
16041604
WHERE r1.uri = r2.uri
1605-
AND r2.version > r1.version
1605+
AND r2.version COLLATE "C" > r1.version COLLATE "C"
16061606
)
16071607
AND operation != $2
16081608
`
@@ -3257,7 +3257,7 @@ func (d DatastorePostgres) CountResourcesInTarget(targetLabel string) (int, erro
32573257
AND NOT EXISTS (
32583258
SELECT 1 FROM resources r2
32593259
WHERE r1.uri = r2.uri
3260-
AND r2.version > r1.version
3260+
AND r2.version COLLATE "C" > r1.version COLLATE "C"
32613261
)
32623262
AND operation != $2
32633263
`

internal/datastore/postgres/postgres_test.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package postgres_test
88

99
import (
1010
"context"
11+
"encoding/json"
1112
"fmt"
1213
"testing"
1314

@@ -16,6 +17,8 @@ import (
1617
"github.com/platform-engineering-labs/formae/internal/datastore/dstest"
1718
"github.com/platform-engineering-labs/formae/internal/datastore/postgres"
1819
pkgmodel "github.com/platform-engineering-labs/formae/pkg/model"
20+
"github.com/stretchr/testify/assert"
21+
"github.com/stretchr/testify/require"
1922
)
2023

2124
func TestDatastore(t *testing.T) {
@@ -54,3 +57,86 @@ func TestDatastore(t *testing.T) {
5457
}
5558
})
5659
}
60+
61+
// Latest-version subqueries (`r2.version > r1.version`) on the resources table
62+
// depend on byte-order semantics because KSUIDs are base62 (mixed case) and only
63+
// sort chronologically under byte ordering. Under the default Postgres collation,
64+
// locale rules can put an uppercase-letter version before a chronologically-later
65+
// lowercase-letter version, causing the "latest" filter to pick the wrong row.
66+
// When the true latest is a delete and the locale-latest is an update, the
67+
// resource leaks back through as "managed" even after destroy.
68+
//
69+
// Scenario: `update` row has version "2wQUVeqpc…" (uppercase 'U' at position 3),
70+
// `delete` row has a chronologically-later version "2wQfTRyui…" (lowercase 'f').
71+
// Byte order: delete > update (correct — delete is latest, exclude resource).
72+
// Locale order: update > delete (wrong — update appears latest, resource leaks).
73+
func TestLoadResourcesByStack_ExcludesDeletedResourceWhenVersionsMixCase(t *testing.T) {
74+
ctx := context.Background()
75+
76+
adminConn, err := pgx.Connect(ctx, "postgres://postgres:admin@localhost:5432/postgres")
77+
if err != nil {
78+
t.Skipf("Postgres not available: %v", err)
79+
}
80+
adminConn.Close(ctx)
81+
82+
cfg := &pkgmodel.DatastoreConfig{
83+
DatastoreType: pkgmodel.PostgresDatastore,
84+
Postgres: pkgmodel.PostgresConfig{
85+
Host: "localhost",
86+
Port: 5432,
87+
User: "postgres",
88+
Password: "admin",
89+
Database: fmt.Sprintf("test_collate_%s", mksuid.New().String()),
90+
},
91+
}
92+
ds, err := postgres.NewDatastorePostgresEnsureDatabase(ctx, cfg, "test")
93+
require.NoError(t, err)
94+
defer func() {
95+
if d, ok := ds.(postgres.DatastorePostgres); ok {
96+
_ = d.CleanUp()
97+
}
98+
}()
99+
100+
// Open a second connection for direct INSERT to control the version column precisely.
101+
connStr := postgres.BuildConnStr(cfg.Postgres.Host, cfg.Postgres.Port, cfg.Postgres.User, cfg.Postgres.Password, cfg.Postgres.Database)
102+
conn, err := pgx.Connect(ctx, connStr)
103+
require.NoError(t, err)
104+
defer conn.Close(ctx) //nolint:errcheck
105+
106+
// Create the stack row first so downstream code that joins against stacks works.
107+
_, err = conn.Exec(ctx, `INSERT INTO stacks (ksuid, version, label, description) VALUES ($1, $2, $3, $4)`,
108+
"stack-ksuid", "v1", "test-stack", "")
109+
require.NoError(t, err)
110+
111+
// Earlier update row with an uppercase letter in the version's locale-significant position.
112+
updateVersion := "2wQUVeqpcVTSF4ROlhC9N4kMUPk"
113+
// Chronologically-later delete row with a lowercase letter. Byte order puts this after
114+
// updateVersion (correct); locale order puts it before (the bug).
115+
deleteVersion := "2wQfTRyuifVOB5LKSwee7d8aErH"
116+
117+
uri := "formae://test-ksuid"
118+
data := json.RawMessage(`{"Schema":{},"Properties":{}}`)
119+
120+
_, err = conn.Exec(ctx, `INSERT INTO resources (uri, version, command_id, operation, native_id, stack, type, label, target, data, managed, ksuid)
121+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)`,
122+
uri, updateVersion, "cmd-1", "update", "native-1", "test-stack", "type-1", "label-1", "", data, true, "test-ksuid")
123+
require.NoError(t, err)
124+
125+
_, err = conn.Exec(ctx, `INSERT INTO resources (uri, version, command_id, operation, native_id, stack, type, label, target, data, managed, ksuid)
126+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)`,
127+
uri, deleteVersion, "cmd-2", "delete", "native-1", "test-stack", "type-1", "label-1", "", data, true, "test-ksuid")
128+
require.NoError(t, err)
129+
130+
// Sanity check: under the default collation, 'U' > 'f' — the bug's precondition.
131+
var localeGT bool
132+
err = conn.QueryRow(ctx, `SELECT 'U' > 'f'`).Scan(&localeGT)
133+
require.NoError(t, err)
134+
require.True(t, localeGT, "test precondition: default collation must treat 'U' as greater than 'f'")
135+
136+
results, err := ds.LoadResourcesByStack("test-stack")
137+
require.NoError(t, err)
138+
139+
assert.Empty(t, results,
140+
"resource with a later delete row must be excluded from LoadResourcesByStack, "+
141+
"but uncollated version comparison picked the earlier update row as 'latest'")
142+
}

0 commit comments

Comments
 (0)