Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion lib/images/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,14 @@ func (c *ociClient) unpackLayers(ctx context.Context, layoutTag, targetDir strin
return fmt.Errorf("get manifest: %w", err)
}

configFile, err := img.ConfigFile()
if err != nil {
return fmt.Errorf("get config file: %w", err)
}
if err := validateConfigFileForUnpack(layoutTag, gcrManifest, configFile); err != nil {
return err
}

// Convert go-containerregistry manifest to OCI v1.Manifest for umoci
ociManifest := convertToOCIManifest(gcrManifest)

Expand Down Expand Up @@ -351,14 +359,40 @@ func (c *ociClient) unpackLayers(ctx context.Context, layoutTag, targetDir strin
},
}

err = layer.UnpackRootfs(context.Background(), casEngine, targetDir, ociManifest, unpackOpts)
err = layer.UnpackRootfs(ctx, casEngine, targetDir, ociManifest, unpackOpts)
if err != nil {
return fmt.Errorf("unpack rootfs: %w", err)
}

return nil
}

// validateConfigFileForUnpack rejects malformed image configs before calling
// umoci. In particular, we verify that the config blob resolves to a real OCI
// image config, that it declares a layered rootfs, and that rootfs.diff_ids has
// one entry per manifest layer so umoci won't index past the end of the slice.
func validateConfigFileForUnpack(layoutTag string, manifest *gcr.Manifest, configFile *gcr.ConfigFile) error {
if convertToOCIMediaType(string(manifest.Config.MediaType)) != v1.MediaTypeImageConfig {
return fmt.Errorf(
"unpack rootfs: config blob is not correct mediatype %s: %s",
v1.MediaTypeImageConfig,
manifest.Config.MediaType,
)
}
if configFile.RootFS.Type != "layers" {
return fmt.Errorf("unpack rootfs: config: unsupported rootfs.type: %s", configFile.RootFS.Type)
}
if len(configFile.RootFS.DiffIDs) != len(manifest.Layers) {
return fmt.Errorf(
"unpack rootfs: config rootfs.diff_ids has %d entries but manifest has %d layers for %s",
len(configFile.RootFS.DiffIDs),
len(manifest.Layers),
layoutTag,
)
}
return nil
}

// convertToOCIManifest converts a go-containerregistry manifest to OCI v1.Manifest
// This allows us to use go-containerregistry (which handles both Docker v2 and OCI v1)
// for manifest parsing, while still using umoci for layer unpacking.
Expand Down
102 changes: 102 additions & 0 deletions lib/images/recovery_regression_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package images

import (
"context"
"os"
"path/filepath"
"testing"
"time"

"github.com/kernel/hypeman/lib/paths"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

const (
recoveryFixtureDir = "testdata/recover-interrupted-build-panic"
recoveryFixtureRepo = "docker.io/library/hypeman-test"
recoveryFixtureDigest = "sha256:073e2a02f0df492def76940a909b6b79b896fc8907cceeb03452b250697d98fa"
recoveryFixtureTag = "073e2a02f0df492def76940a909b6b79b896fc8907cceeb03452b250697d98fa"
recoveryFixtureDigestHex = "073e2a02f0df492def76940a909b6b79b896fc8907cceeb03452b250697d98fa"
)

func TestUnpackLayersCapturedFixtureReturnsErrorInsteadOfPanicking(t *testing.T) {
dataDir := copyRecoveryFixture(t)

client, err := newOCIClient(filepath.Join(dataDir, "system", "oci-cache"))
require.NoError(t, err)

var unpackErr error
require.NotPanics(t, func() {
unpackErr = client.unpackLayers(context.Background(), recoveryFixtureTag, filepath.Join(t.TempDir(), "rootfs"))
})

require.Error(t, unpackErr)
assert.Contains(t, unpackErr.Error(), "config rootfs.diff_ids has 0 entries but manifest has 1 layers")
}

func TestRecoverInterruptedBuildsCapturedFixtureMarksBuildFailed(t *testing.T) {
dataDir := copyRecoveryFixture(t)
p := paths.New(dataDir)

client, err := newOCIClient(p.SystemOCICache())
require.NoError(t, err)

m := &manager{
paths: p,
ociClient: client,
queue: NewBuildQueue(1),
readySubscribers: make(map[string][]chan StatusEvent),
}

m.RecoverInterruptedBuilds()

require.Eventually(t, func() bool {
meta, err := readMetadata(p, recoveryFixtureRepo, recoveryFixtureDigestHex)
if err != nil || meta.Error == nil {
return false
}
return meta.Status == StatusFailed && m.queue.QueueLength() == 0
}, 5*time.Second, 20*time.Millisecond)

meta, err := readMetadata(p, recoveryFixtureRepo, recoveryFixtureDigestHex)
require.NoError(t, err)
require.NotNil(t, meta.Error)
assert.Equal(t, recoveryFixtureDigest, meta.Digest)
assert.Equal(t, StatusFailed, meta.Status)
assert.Contains(t, *meta.Error, "config rootfs.diff_ids has 0 entries but manifest has 1 layers")
}

func copyRecoveryFixture(t *testing.T) string {
t.Helper()

dst := t.TempDir()
copyTree(t, recoveryFixtureDir, dst)
return dst
}

func copyTree(t *testing.T, src, dst string) {
t.Helper()

entries, err := os.ReadDir(src)
require.NoError(t, err)

for _, entry := range entries {
srcPath := filepath.Join(src, entry.Name())
dstPath := filepath.Join(dst, entry.Name())

info, err := entry.Info()
require.NoError(t, err)

if entry.IsDir() {
require.NoError(t, os.MkdirAll(dstPath, info.Mode().Perm()))
copyTree(t, srcPath, dstPath)
continue
}

data, err := os.ReadFile(srcPath)
require.NoError(t, err)
require.NoError(t, os.MkdirAll(filepath.Dir(dstPath), 0755))
require.NoError(t, os.WriteFile(dstPath, data, info.Mode().Perm()))
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"name": "docker.io/library/hypeman-test:test-ts-sdk-url",
"digest": "sha256:073e2a02f0df492def76940a909b6b79b896fc8907cceeb03452b250697d98fa",
"status": "pulling",
"request": {
"Name": "docker.io/library/hypeman-test:test-ts-sdk-url",
"Tags": null
},
"size_bytes": 0,
"created_at": "2026-03-28T18:49:07.380510736Z"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"schemaVersion":2,"mediaType":"application/vnd.oci.image.manifest.v1+json","config":{"mediaType":"application/vnd.oci.image.config.v1+json","size":90,"digest":"sha256:dc570f145a7f2862c9ef3c30b8d6ae2feaceb0d364e4b2e08e67ae18815427d9"},"layers":[{"mediaType":"application/vnd.oci.image.layer.v1.tar+gzip","size":50,"digest":"sha256:f50e8885e7ceec00da1f35284e2f3cc986874b6640dd80cfc27270a4ba4ab965"}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"architecture":"amd64","os":"linux","config":{},"rootfs":{"type":"layers","diff_ids":[]}}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"schemaVersion":2,"mediaType":"application/vnd.oci.image.index.v1+json","manifests":[{"mediaType":"application/vnd.oci.image.manifest.v1+json","size":399,"digest":"sha256:073e2a02f0df492def76940a909b6b79b896fc8907cceeb03452b250697d98fa","annotations":{"org.opencontainers.image.ref.name":"073e2a02f0df492def76940a909b6b79b896fc8907cceeb03452b250697d98fa"}}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"imageLayoutVersion":"1.0.0"}
20 changes: 10 additions & 10 deletions lib/instances/firecracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func TestFirecrackerStandbyAndRestore(t *testing.T) {
}
})

inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.NoError(t, waitForExecAgent(ctx, mgr, inst.Id, 30*time.Second))

Expand Down Expand Up @@ -194,7 +194,7 @@ func TestFirecrackerStandbyAndRestore(t *testing.T) {
inst, err = mgr.RestoreInstance(ctx, inst.Id)
require.NoError(t, err)
assert.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.Equal(t, StateRunning, inst.State)
runningDuration := time.Since(start)
Expand Down Expand Up @@ -276,7 +276,7 @@ func TestFirecrackerStopClearsStaleSnapshot(t *testing.T) {
})
require.NoError(t, err)
require.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.Equal(t, StateRunning, inst.State)

Expand All @@ -289,7 +289,7 @@ func TestFirecrackerStopClearsStaleSnapshot(t *testing.T) {
inst, err = mgr.RestoreInstance(ctx, inst.Id)
require.NoError(t, err)
require.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.Equal(t, StateRunning, inst.State)

Expand Down Expand Up @@ -320,7 +320,7 @@ func TestFirecrackerStopClearsStaleSnapshot(t *testing.T) {
inst, err = mgr.StartInstance(ctx, inst.Id, StartInstanceRequest{})
require.NoError(t, err)
assert.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
assert.Equal(t, StateRunning, inst.State)

Expand Down Expand Up @@ -357,7 +357,7 @@ func TestFirecrackerNetworkLifecycle(t *testing.T) {
})
require.NoError(t, err)
require.NotNil(t, inst)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)

alloc, err := mgr.networkManager.GetAllocation(ctx, inst.Id)
Expand Down Expand Up @@ -414,7 +414,7 @@ func TestFirecrackerNetworkLifecycle(t *testing.T) {
inst, err = mgr.RestoreInstance(ctx, inst.Id)
require.NoError(t, err)
assert.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, mgr, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
assert.Equal(t, StateRunning, inst.State)

Expand Down Expand Up @@ -481,7 +481,7 @@ func TestFirecrackerForkFromRunningNetwork(t *testing.T) {
Hypervisor: hypervisor.TypeFirecracker,
})
require.NoError(t, err)
source, err = waitForInstanceState(ctx, mgr, source.Id, StateRunning, 20*time.Second)
source, err = waitForInstanceState(ctx, mgr, source.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
sourceID := source.Id
t.Cleanup(func() { _ = mgr.DeleteInstance(context.Background(), sourceID) })
Expand All @@ -499,7 +499,7 @@ func TestFirecrackerForkFromRunningNetwork(t *testing.T) {
})
require.NoError(t, err)
require.Contains(t, []State{StateInitializing, StateRunning}, forked.State)
forked, err = waitForInstanceState(ctx, mgr, forked.Id, StateRunning, 20*time.Second)
forked, err = waitForInstanceState(ctx, mgr, forked.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.Equal(t, StateRunning, forked.State)
forkID := forked.Id
Expand All @@ -515,7 +515,7 @@ func TestFirecrackerForkFromRunningNetwork(t *testing.T) {
sourceAfterFork, err := mgr.GetInstance(ctx, sourceID)
require.NoError(t, err)
if sourceAfterFork.State != StateRunning {
sourceAfterFork, err = waitForInstanceState(ctx, mgr, sourceID, StateRunning, 20*time.Second)
sourceAfterFork, err = waitForInstanceState(ctx, mgr, sourceID, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
}
require.Equal(t, StateRunning, sourceAfterFork.State)
Expand Down
6 changes: 3 additions & 3 deletions lib/instances/fork_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ func TestForkCloudHypervisorFromRunningNetwork(t *testing.T) {
require.NoError(t, err)
sourceID := source.Id
t.Cleanup(func() { _ = manager.DeleteInstance(context.Background(), sourceID) })
source, err = waitForInstanceState(ctx, manager, source.Id, StateRunning, 20*time.Second)
source, err = waitForInstanceState(ctx, manager, source.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.NoError(t, waitForVMReady(ctx, source.SocketPath, 5*time.Second))

Expand All @@ -465,7 +465,7 @@ func TestForkCloudHypervisorFromRunningNetwork(t *testing.T) {
})
require.NoError(t, err)
require.Contains(t, []State{StateInitializing, StateRunning}, forked.State)
forked, err = waitForInstanceState(ctx, manager, forked.Id, StateRunning, 20*time.Second)
forked, err = waitForInstanceState(ctx, manager, forked.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
require.Equal(t, StateRunning, forked.State)
forkedID := forked.Id
Expand All @@ -475,7 +475,7 @@ func TestForkCloudHypervisorFromRunningNetwork(t *testing.T) {
sourceAfterFork, err := manager.GetInstance(ctx, source.Id)
require.NoError(t, err)
if sourceAfterFork.State != StateRunning {
sourceAfterFork, err = waitForInstanceState(ctx, manager, source.Id, StateRunning, 20*time.Second)
sourceAfterFork, err = waitForInstanceState(ctx, manager, source.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
}
require.Equal(t, StateRunning, sourceAfterFork.State)
Expand Down
8 changes: 4 additions & 4 deletions lib/instances/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ func TestBasicEndToEnd(t *testing.T) {
// Wait for VM to be fully running
err = waitForVMReady(ctx, inst.SocketPath, 5*time.Second)
require.NoError(t, err, "VM should reach running state")
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err, "instance should reach Running state")

// Get instance
Expand Down Expand Up @@ -784,7 +784,7 @@ func TestBasicEndToEnd(t *testing.T) {
restartedInst, err := manager.StartInstance(ctx, inst.Id, StartInstanceRequest{})
require.NoError(t, err, "StartInstance should succeed")
assert.Contains(t, []State{StateInitializing, StateRunning}, restartedInst.State, "Instance should be active after restart")
restartedInst, err = waitForInstanceState(ctx, manager, restartedInst.Id, StateRunning, 20*time.Second)
restartedInst, err = waitForInstanceState(ctx, manager, restartedInst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err, "instance should reach Running after restart")

// Verify exit info was cleared
Expand Down Expand Up @@ -1346,7 +1346,7 @@ func TestStandbyAndRestore(t *testing.T) {
inst, err := manager.CreateInstance(ctx, req)
require.NoError(t, err)
assert.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
t.Logf("Instance created: %s", inst.Id)

Expand Down Expand Up @@ -1390,7 +1390,7 @@ func TestStandbyAndRestore(t *testing.T) {
inst, err = manager.RestoreInstance(ctx, inst.Id)
require.NoError(t, err)
assert.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
t.Log("Instance restored and running")

Expand Down
4 changes: 2 additions & 2 deletions lib/instances/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) {
t.Log("Exec agent is ready")

// Standby requires running state; create may still return Initializing.
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)

// Test initial internet connectivity via exec
Expand Down Expand Up @@ -160,7 +160,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) {
inst, err = manager.RestoreInstance(ctx, inst.Id)
require.NoError(t, err)
assert.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
assert.Equal(t, StateRunning, inst.State)
t.Log("Instance restored and running")
Expand Down
6 changes: 3 additions & 3 deletions lib/instances/qemu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ func TestQEMUBasicEndToEnd(t *testing.T) {
// Wait for VM to be fully running
err = waitForQEMUReady(ctx, inst.SocketPath, 10*time.Second)
require.NoError(t, err, "QEMU VM should reach running state")
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err, "instance should reach Running state")

// Get instance
Expand Down Expand Up @@ -830,7 +830,7 @@ func TestQEMUStandbyAndRestore(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, inst)
assert.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
assert.Equal(t, hypervisor.TypeQEMU, inst.HypervisorType)
t.Logf("Instance created: %s (hypervisor: %s)", inst.Id, inst.HypervisorType)
Expand Down Expand Up @@ -866,7 +866,7 @@ func TestQEMUStandbyAndRestore(t *testing.T) {
inst, err = manager.RestoreInstance(ctx, inst.Id)
require.NoError(t, err)
assert.Contains(t, []State{StateInitializing, StateRunning}, inst.State)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, 20*time.Second)
inst, err = waitForInstanceState(ctx, manager, inst.Id, StateRunning, integrationTestTimeout(20*time.Second))
require.NoError(t, err)
t.Log("Instance restored and running")

Expand Down
2 changes: 1 addition & 1 deletion lib/instances/snapshot_integration_scenario_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func runStandbySnapshotScenario(t *testing.T, mgr *manager, tmpDir string, cfg s
}
})

source, err = waitForInstanceState(ctx, mgr, sourceID, StateRunning, 20*time.Second)
source, err = waitForInstanceState(ctx, mgr, sourceID, StateRunning, integrationTestTimeout(20*time.Second))
requireNoErr(err)
require.Equal(t, StateRunning, source.State)
_, err = mgr.StandbyInstance(ctx, sourceID, StandbyInstanceRequest{})
Expand Down
Loading