From b69c2bad3a3cc5709c180ff8bfbb76939b2d4f17 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Thu, 7 May 2026 21:49:19 +0530 Subject: [PATCH] [live-migration] wire live-migration annotation Introduces a sandbox-scoped LiveMigrationAllowed flag (parsed from the LiveMigrationAllowed annotation) that propagates from the LCOW sandbox options through spec building, kernel-args construction, and the host-side VM controller. When a sandbox opts into live migration, the builder validates and locks the allow-listed UVM-shape annotations up front so the GCS init command is emitted without the /bin/vsockexec wrapper since the host will not run a log listener that is non-migratable. Correspondingly, the controller short-circuits its GCS log listener setup for live-migratable pods, closing logOutputDone so the boot path proceeds cleanly without a host-side log socket. Signed-off-by: Harsh Rawat --- internal/builder/vm/lcow/kernel_args.go | 31 ++-- internal/builder/vm/lcow/sandbox_options.go | 4 + internal/builder/vm/lcow/specs.go | 10 +- internal/builder/vm/lcow/specs_test.go | 155 ++++++++++++++++++++ internal/controller/vm/vm_lcow.go | 26 ++++ pkg/annotations/annotations.go | 15 +- 6 files changed, 217 insertions(+), 24 deletions(-) diff --git a/internal/builder/vm/lcow/kernel_args.go b/internal/builder/vm/lcow/kernel_args.go index 6d464d0d9c..7f055356c9 100644 --- a/internal/builder/vm/lcow/kernel_args.go +++ b/internal/builder/vm/lcow/kernel_args.go @@ -27,6 +27,7 @@ func buildKernelArgs( kernelDirect bool, hasConsole bool, rootFsFile string, + LiveMigrationSupportEnabled bool, ) (string, error) { log.G(ctx).WithField("rootFsFile", rootFsFile).Debug("buildKernelArgs: starting kernel arguments construction") @@ -81,7 +82,7 @@ func buildKernelArgs( args = append(args, "brd.rd_nr=0", "pmtmr=0") // 8. Init arguments (passed after "--" separator) - initArgs := buildInitArgs(ctx, opts, writableOverlayDirs, disableTimeSyncService, processDumpLocation, rootFsFile, hasConsole) + initArgs := buildInitArgs(ctx, opts, writableOverlayDirs, disableTimeSyncService, processDumpLocation, rootFsFile, hasConsole, LiveMigrationSupportEnabled) args = append(args, "--", initArgs) result := strings.Join(args, " ") @@ -150,6 +151,7 @@ func buildInitArgs( processDumpLocation string, rootFsFile string, hasConsole bool, + LiveMigrationSupportEnabled bool, ) string { log.G(ctx).WithFields(logrus.Fields{ "rootFsFile": rootFsFile, @@ -159,7 +161,7 @@ func buildInitArgs( entropyArgs := fmt.Sprintf("-e %d", vmutils.LinuxEntropyVsockPort) // Build GCS execution command - gcsCmd := buildGCSCommand(opts, disableTimeSyncService, processDumpLocation) + gcsCmd := buildGCSCommand(opts, disableTimeSyncService, processDumpLocation, LiveMigrationSupportEnabled) // Construct init arguments var initArgsList []string @@ -193,14 +195,8 @@ func buildGCSCommand( opts *runhcsoptions.Options, disableTimeSyncService bool, processDumpLocation string, + LiveMigrationSupportEnabled bool, ) string { - // Start with vsockexec wrapper - var cmdParts []string - cmdParts = append(cmdParts, "/bin/vsockexec") - - // Add logging vsock port - cmdParts = append(cmdParts, fmt.Sprintf("-e %d", vmutils.LinuxLogVsockPort)) - // Determine log level logLevel := "info" if opts != nil && opts.LogLevel != "" { @@ -229,8 +225,19 @@ func buildGCSCommand( gcsParts = append(gcsParts, "-core-dump-location", processDumpLocation) } - // Combine vsockexec and GCS command - cmdParts = append(cmdParts, strings.Join(gcsParts, " ")) + gcsCmd := strings.Join(gcsParts, " ") + + // Live-migratable pods skip the /bin/vsockexec wrapper. The wrapper exists + // solely to forward GCS stderr to the host-side log listener, but that listener + // is host-local state that live migration does not transfer, so the host + // does not run it for these pods. + // Without a listener, vsockexec's outbound connect would block and stall guest init, + // so we emit /bin/gcs directly instead. + if LiveMigrationSupportEnabled { + return gcsCmd + } - return strings.Join(cmdParts, " ") + // vsockexec `-e ` wires gcs's stderr to LinuxLogVsockPort, which + // the host listener reads and republishes. + return fmt.Sprintf("/bin/vsockexec -e %d %s", vmutils.LinuxLogVsockPort, gcsCmd) } diff --git a/internal/builder/vm/lcow/sandbox_options.go b/internal/builder/vm/lcow/sandbox_options.go index 492d3678b5..37c978a170 100644 --- a/internal/builder/vm/lcow/sandbox_options.go +++ b/internal/builder/vm/lcow/sandbox_options.go @@ -25,6 +25,10 @@ type SandboxOptions struct { // ConfidentialConfig carries confidential computing fields that are not // part of the HCS document but are needed for confidential VM setup. ConfidentialConfig *ConfidentialConfig + + // LiveMigrationSupportEnabled indicates that the live migration feature set is + // enabled for the sandbox, constraining it to migration-compatible features. + LiveMigrationSupportEnabled bool } // ConfidentialConfig carries confidential computing configuration that is not diff --git a/internal/builder/vm/lcow/specs.go b/internal/builder/vm/lcow/specs.go index 2229e0da63..18074e3bc4 100644 --- a/internal/builder/vm/lcow/specs.go +++ b/internal/builder/vm/lcow/specs.go @@ -222,6 +222,7 @@ func BuildSandboxConfig( bootOptions.LinuxKernelDirect != nil, // isKernelDirectBoot comPorts != nil, // hasConsole filepath.Base(rootFsFullPath), + sandboxOptions.LiveMigrationSupportEnabled, ) if err != nil { return nil, nil, fmt.Errorf("failed to build kernel args: %w", err) @@ -330,10 +331,11 @@ func parseSandboxOptions(ctx context.Context, platform string, annotations map[s log.G(ctx).WithField("platform", platform).Debug("parseSandboxOptions: starting sandbox options parsing") sandboxOptions := &SandboxOptions{ // Extract architecture from platform string (e.g., "linux/amd64" -> "amd64") - Architecture: platform[strings.IndexByte(platform, '/')+1:], - FullyPhysicallyBacked: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.FullyPhysicallyBacked, false), - PolicyBasedRouting: oci.ParseAnnotationsBool(ctx, annotations, iannotations.NetworkingPolicyBasedRouting, false), - NoWritableFileShares: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.DisableWritableFileShares, false), + Architecture: platform[strings.IndexByte(platform, '/')+1:], + FullyPhysicallyBacked: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.FullyPhysicallyBacked, false), + PolicyBasedRouting: oci.ParseAnnotationsBool(ctx, annotations, iannotations.NetworkingPolicyBasedRouting, false), + NoWritableFileShares: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.DisableWritableFileShares, false), + LiveMigrationSupportEnabled: oci.ParseAnnotationsBool(ctx, annotations, shimannotations.LiveMigrationSupportEnabled, false), } // Determine if this is a confidential VM early, as it affects boot options parsing diff --git a/internal/builder/vm/lcow/specs_test.go b/internal/builder/vm/lcow/specs_test.go index 1753d7b818..cd920f2c2d 100644 --- a/internal/builder/vm/lcow/specs_test.go +++ b/internal/builder/vm/lcow/specs_test.go @@ -2127,3 +2127,158 @@ func TestBuildSandboxConfig_CPUClamping(t *testing.T) { t.Errorf("expected processor count to be clamped to host count %d, got %d", hostCount, actualCount) } } + +// TestBuildSandboxConfig_LiveMigration validates the wiring for the +// io.microsoft.migration.support.enabled sandbox annotation. The annotation is parsed +// into SandboxOptions.LiveMigrationSupportEnabled and threaded down into the kernel +// command line: live-migratable sandboxes must skip the /bin/vsockexec wrapper +// (which would otherwise stall init waiting for a host log listener that the +// LM-enabled host does not run), while non-LM sandboxes must continue to use +// vsockexec so that GCS stderr is forwarded over LinuxLogVsockPort. +func TestBuildSandboxConfig_LiveMigration(t *testing.T) { + ctx := context.Background() + + validBootFilesPath := newBootFilesPath(t) + defaultOpts := defaultSandboxOpts(validBootFilesPath) + + // Pre-format the vsockexec prefix once so the assertions are obviously + // driven by the same constant the production code uses. + vsockexecPrefix := fmt.Sprintf("/bin/vsockexec -e %d", vmutils.LinuxLogVsockPort) + + tests := []specTestCase{ + { + name: "live migration disabled by default", + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if sandboxOpts.LiveMigrationSupportEnabled { + t.Errorf("expected LiveMigrationSupportEnabled=false by default, got true") + } + kernelArgs := getKernelArgs(doc) + if !strings.Contains(kernelArgs, vsockexecPrefix) { + t.Errorf("expected vsockexec wrapper %q in kernel args (LM disabled), got %q", vsockexecPrefix, kernelArgs) + } + if !strings.Contains(kernelArgs, "/bin/gcs") { + t.Errorf("expected /bin/gcs in kernel args, got %q", kernelArgs) + } + }, + }, + { + name: "live migration explicitly disabled", + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationSupportEnabled: "false", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if sandboxOpts.LiveMigrationSupportEnabled { + t.Errorf("expected LiveMigrationSupportEnabled=false when annotation=\"false\", got true") + } + kernelArgs := getKernelArgs(doc) + if !strings.Contains(kernelArgs, vsockexecPrefix) { + t.Errorf("expected vsockexec wrapper %q in kernel args, got %q", vsockexecPrefix, kernelArgs) + } + }, + }, + { + name: "live migration enabled drops vsockexec wrapper", + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationSupportEnabled: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if !sandboxOpts.LiveMigrationSupportEnabled { + t.Errorf("expected LiveMigrationSupportEnabled=true when annotation=\"true\", got false") + } + kernelArgs := getKernelArgs(doc) + // The vsockexec wrapper must not appear at all when LM is on: + // neither the prefix nor the binary path on its own. + if strings.Contains(kernelArgs, "vsockexec") { + t.Errorf("expected no vsockexec in kernel args when LM enabled, got %q", kernelArgs) + } + if strings.Contains(kernelArgs, fmt.Sprintf("-e %d", vmutils.LinuxLogVsockPort)) { + t.Errorf("expected no log vsock port (%d) wiring when LM enabled, got %q", vmutils.LinuxLogVsockPort, kernelArgs) + } + // /bin/gcs must still be invoked - just without the wrapper. + if !strings.Contains(kernelArgs, "/bin/gcs") { + t.Errorf("expected /bin/gcs in kernel args even when LM enabled, got %q", kernelArgs) + } + }, + }, + { + name: "live migration combined with debug log level", + opts: &runhcsoptions.Options{ + SandboxPlatform: "linux/amd64", + BootFilesRootPath: validBootFilesPath, + LogLevel: "debug", + }, + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationSupportEnabled: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if !sandboxOpts.LiveMigrationSupportEnabled { + t.Errorf("expected LiveMigrationSupportEnabled=true, got false") + } + kernelArgs := getKernelArgs(doc) + // Other GCS flags must still be threaded through the command + // even when the vsockexec wrapper is removed. + if !strings.Contains(kernelArgs, "-loglevel debug") { + t.Errorf("expected -loglevel debug in kernel args when LM enabled, got %q", kernelArgs) + } + if strings.Contains(kernelArgs, "vsockexec") { + t.Errorf("expected no vsockexec when LM enabled, got %q", kernelArgs) + } + }, + }, + { + name: "live migration with disable time sync still drops vsockexec", + spec: &vm.Spec{ + Annotations: map[string]string{ + shimannotations.LiveMigrationSupportEnabled: "true", + shimannotations.DisableLCOWTimeSyncService: "true", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if !sandboxOpts.LiveMigrationSupportEnabled { + t.Errorf("expected LiveMigrationSupportEnabled=true, got false") + } + kernelArgs := getKernelArgs(doc) + if !strings.Contains(kernelArgs, "-disable-time-sync") { + t.Errorf("expected -disable-time-sync flag in kernel args, got %q", kernelArgs) + } + if strings.Contains(kernelArgs, "vsockexec") { + t.Errorf("expected no vsockexec when LM enabled, got %q", kernelArgs) + } + }, + }, + { + name: "live migration invalid annotation value falls back to default (false)", + spec: &vm.Spec{ + Annotations: map[string]string{ + // ParseAnnotationsBool returns the default value (false) on + // unparseable input, so the sandbox should behave like the + // default-disabled case rather than failing the build. + shimannotations.LiveMigrationSupportEnabled: "not-a-bool", + }, + }, + validate: func(t *testing.T, doc *hcsschema.ComputeSystem, sandboxOpts *SandboxOptions) { + t.Helper() + if sandboxOpts.LiveMigrationSupportEnabled { + t.Errorf("expected LiveMigrationSupportEnabled=false on invalid annotation value, got true") + } + kernelArgs := getKernelArgs(doc) + if !strings.Contains(kernelArgs, vsockexecPrefix) { + t.Errorf("expected vsockexec wrapper %q in kernel args, got %q", vsockexecPrefix, kernelArgs) + } + }, + }, + } + + runTestCases(t, ctx, defaultOpts, tests) +} diff --git a/internal/controller/vm/vm_lcow.go b/internal/controller/vm/vm_lcow.go index a200dbe5f6..9df0846dd6 100644 --- a/internal/controller/vm/vm_lcow.go +++ b/internal/controller/vm/vm_lcow.go @@ -12,6 +12,7 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/device/plan9" "github.com/Microsoft/hcsshim/internal/controller/network" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/internal/vm/vmmanager" "github.com/Microsoft/hcsshim/internal/vm/vmutils" @@ -165,6 +166,31 @@ func (c *Controller) setupEntropyListener(ctx context.Context, group *errgroup.G // running inside the Linux VM. The logs are parsed and // forwarded to the host's logging system for monitoring and debugging. func (c *Controller) setupLoggingListener(ctx context.Context, group *errgroup.Group) error { + // Live-migratable sandboxes intentionally run without a host-side GCS log + // listener. + // + // The log listener is host-local state: GCS inside the guest connects out to + // a host-side hvsocket on LinuxLogVsockPort and streams its stderr to it. That + // connection, and the goroutine reading from it, are bound to the *source* + // host and are not part of the guest state that live migration transfers. + // After the VM is migrated to a destination host there is no equivalent + // listener to reconnect to, so a guest that depended on the log socket would + // block on its outbound connect and stall the boot path. To keep the guest + // migratable we skip the listener here and drop the matching /bin/vsockexec + // wrapper from the kernel command line, so GCS never attempts the connection. + // + // Re-enabling host-side log collection for live-migratable pods requires a + // migration-aware log transport: GCS must tolerate the listener going away + // and reconnect to a freshly established listener on the destination host once + // migration completes, and the host must (re)create the listener and re-attach + // the log-parsing goroutine on the destination. Until that work lands we forgo + // host-side GCS logs for these pods. + if c.sandboxOptions != nil && c.sandboxOptions.LiveMigrationSupportEnabled { + log.G(ctx).Info("skipping GCS log listener: pod is live-migratable") + close(c.logOutputDone) + return nil + } + // The GCS will connect to this port to stream log output. logConn, err := winio.ListenHvsock(&winio.HvsockAddr{ VMID: c.uvm.RuntimeID(), diff --git a/pkg/annotations/annotations.go b/pkg/annotations/annotations.go index 80f0264569..79289eed4d 100644 --- a/pkg/annotations/annotations.go +++ b/pkg/annotations/annotations.go @@ -539,14 +539,13 @@ const ( // Live Migration annotations. const ( - // LiveMigrationAllowed is a gatekeeping annotation scoped to a pod/sandbox that indicates - // the pod is intended to be live-migratable. When set on a pod, any container within that - // pod which requests a feature incompatible with live migration will fail to be created. - // - // For example, if a pod is started with this annotation and a container within it - // subsequently requests a plan9 share (which is not compatible with live migration), - // the container creation will be failed. - LiveMigrationAllowed = "io.microsoft.migration.allowed" + // LiveMigrationSupportEnabled is a sandbox-scoped annotation that enables the live + // migration feature set for a pod. When enabled, the pod is constrained to the subset + // of features that are compatible with live migration. + // + // For example, the sandbox runs without the host-side GCS log listener, + // since that listener is host-local and cannot survive migration. + LiveMigrationSupportEnabled = "io.microsoft.migration.support-enabled" // LiveMigrationSourceContainerID is used only on the destination node during a live // migration. It is set on the NewTask request to identify the corresponding container