diff --git a/cmd/y-cluster/main.go b/cmd/y-cluster/main.go
index 04a3fc7..ee7ab6b 100644
--- a/cmd/y-cluster/main.go
+++ b/cmd/y-cluster/main.go
@@ -461,9 +461,24 @@ func (k *dockerNamedTeardown) run() error {
 func importCmd() *cobra.Command {
 	var configDir string
 	cmd := &cobra.Command{
-		Use:   "import <input.vmdk>",
-		Short: "Import a VMware appliance as the cluster disk",
-		Args:  cobra.ExactArgs(1),
+		Use:   "import <input.vmdk|input.qcow2>",
+		Short: "Import a disk image as the cluster disk",
+		Long: `Imports a disk image into the cluster's cache as the boot
+disk for a subsequent ` + "`y-cluster start`" + ` or ` + "`provision`" + `.
+Format is sniffed by file extension:
+
+  .vmdk   imported from VMware-style disks (the original
+          VirtualBox / VMware export path).
+  .qcow2  imported by re-writing the qcow2 into the cache layout
+          (essentially a copy + compaction). Lets a local
+          qemu-only e2e loop chain ` + "`y-cluster export --format=qcow2`" + `
+          straight into this command with no out-of-band
+          qemu-img conversion.
+
+Other formats (raw, vdi, gcp-tar) aren't on the import path; the
+flow expects them to be converted to qcow2 first or handled by
+a different verb.`,
+		Args: cobra.ExactArgs(1),
 		RunE: func(cmd *cobra.Command, args []string) error {
 			loaded, err := loadProvision(configDir)
 			if err != nil {
@@ -474,7 +489,7 @@ func importCmd() *cobra.Command {
 				return err
 			}
 			rc := qemu.FromConfig(q)
-			return qemu.ImportVMDK(args[0], filepath.Join(rc.CacheDir, rc.Name+".qcow2"))
+			return qemu.Import(args[0], filepath.Join(rc.CacheDir, rc.Name+".qcow2"))
 		},
 	}
 	cmd.Flags().StringVarP(&configDir, "config", "c", "", "directory containing y-cluster-provision.yaml")
diff --git a/cmd/y-cluster/manifests.go b/cmd/y-cluster/manifests.go
index 0d6fb8f..98b425c 100644
--- a/cmd/y-cluster/manifests.go
+++ b/cmd/y-cluster/manifests.go
@@ -2,6 +2,7 @@ package main
 
 import (
 	"bytes"
+	"context"
 	"fmt"
 	"io"
 	"regexp"
@@ -13,8 +14,17 @@ import (
 )
 
 // manifestsCmd is the umbrella for build-time manifest staging on the
-// cluster's appliance disk. Today: just `add`. Future subcommands
-// (`list`, `remove`, `get`) read or mutate the same staging dir.
+// cluster's appliance disk. Three verbs (strict in both directions):
+//
+//   - add     : name must NOT be staged (or, if staged with
+//               byte-identical content, succeeds silently as a
+//               re-run idempotency convenience)
+//   - replace : name MUST be staged, overwrites
+//   - rm      : name MUST be staged, removes
+//
+// We deliberately don't ship a `--force` flag: the operator (or
+// agent) has to know what state they're in. The verb itself
+// documents the operator's intent at the call site.
 func manifestsCmd() *cobra.Command {
 	cmd := &cobra.Command{
 		Use:   "manifests",
@@ -37,6 +47,8 @@ APPLIANCE_MAINTENANCE.md for the recommended Job shape and
 idempotency conventions.`,
 	}
 	cmd.AddCommand(manifestsAddCmd())
+	cmd.AddCommand(manifestsReplaceCmd())
+	cmd.AddCommand(manifestsRmCmd())
 	return cmd
 }
 
@@ -48,16 +60,102 @@ idempotency conventions.`,
 // filename and metadata.name typically match.
 var manifestNameRE = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]*$`)
 
+// stagedManifestPath returns the on-cluster path for a manifest by
+// name. Single source of truth -- changing the staging directory or
+// the file-suffix convention only touches this function.
+func stagedManifestPath(name string) string {
+	return "/var/lib/y-cluster/manifests-staging/" + name + ".yaml"
+}
+
+// readStagedManifest reads the bytes of a staged manifest off the
+// cluster node.
+//
+// Returns (content, true, nil) when the file is present;
+// (nil, false, nil) when it doesn't exist; (nil, false, err) on any
+// other failure (I/O, ssh, ctr-exec, etc.). The two-shell-call
+// shape costs a round-trip but keeps the "missing" case
+// distinguishable from "present but empty," which the idempotency
+// + replace/rm flows care about.
+func readStagedManifest(ctx context.Context, lr *cluster.LookupResult, target string) ([]byte, bool, error) {
+	if err := cluster.RunShell(ctx, lr, "test -e "+target, nil, nil, nil); err != nil {
+		return nil, false, nil
+	}
+	var stdout, stderr bytes.Buffer
+	if err := cluster.RunShell(ctx, lr, "cat "+target, nil, &stdout, &stderr); err != nil {
+		return nil, false, fmt.Errorf("read %s: %s: %w", target, stderr.String(), err)
+	}
+	return stdout.Bytes(), true, nil
+}
+
+// writeStagedManifest installs the file atomically with mode 0644
+// and creates the staging directory under mode 0755 if missing.
+// install(1) (not `cat > file`) is used so the file lands with a
+// known mode and a deterministic atomicity boundary.
+func writeStagedManifest(ctx context.Context, lr *cluster.LookupResult, target string, data []byte) error {
+	writeCmd := "install -d -m 0755 /var/lib/y-cluster/manifests-staging && " +
+		"install -m 0644 /dev/stdin " + target
+	var stderr bytes.Buffer
+	if err := cluster.RunShell(ctx, lr, writeCmd, bytes.NewReader(data), nil, &stderr); err != nil {
+		return fmt.Errorf("write manifest: %s: %w", stderr.String(), err)
+	}
+	return nil
+}
+
+// removeStagedManifest deletes the file from the staging directory.
+// Caller should have already checked existence so a missing file
+// here surfaces as a real error (permission, fs problem).
+func removeStagedManifest(ctx context.Context, lr *cluster.LookupResult, target string) error {
+	var stderr bytes.Buffer
+	if err := cluster.RunShell(ctx, lr, "rm "+target, nil, nil, &stderr); err != nil {
+		return fmt.Errorf("rm %s: %s: %w", target, stderr.String(), err)
+	}
+	return nil
+}
+
+// readManifestInput is the shared "validate name, read input bytes,
+// look up cluster, compute target" prelude for add/replace. Splits
+// the per-command logic from the boilerplate that's identical
+// across both, so changes to either side don't drift.
+func readManifestInput(c *cobra.Command, name, input string, contextName string) ([]byte, *cluster.LookupResult, string, error) {
+	if !manifestNameRE.MatchString(name) {
+		return nil, nil, "", fmt.Errorf("invalid manifest name %q: must match %s (no slashes, no .., must start with alphanumeric)", name, manifestNameRE)
+	}
+	r, closer, err := openYAMLInput(input, c.InOrStdin())
+	if err != nil {
+		return nil, nil, "", err
+	}
+	defer closer()
+	data, err := io.ReadAll(r)
+	if err != nil {
+		return nil, nil, "", fmt.Errorf("read manifest: %w", err)
+	}
+	if len(strings.TrimSpace(string(data))) == 0 {
+		return nil, nil, "", fmt.Errorf("manifest is empty")
+	}
+	lr, err := cluster.Lookup(c.Context(), "", contextName)
+	if err != nil {
+		return nil, nil, "", err
+	}
+	return data, lr, stagedManifestPath(name), nil
+}
+
 func manifestsAddCmd() *cobra.Command {
 	var contextName string
 
 	cmd := &cobra.Command{
 		Use:   "add <name> <path|->",
-		Short: "Stage a manifest for first-customer-boot apply",
+		Short: "Stage a new manifest for first-customer-boot apply",
 		Long: `Reads the YAML at <path> (or stdin when <path> is "-"), then
 writes it to ` + "`/var/lib/y-cluster/manifests-staging/<name>.yaml`" + `
-on the cluster node. Bails if a manifest with the same <name> is
-already staged.
+on the cluster node.
+
+Strict: the name must NOT already be staged. To overwrite an
+existing manifest use ` + "`y-cluster manifests replace`" + `; to
+delete one use ` + "`y-cluster manifests rm`" + `. As a re-run
+convenience, ` + "`add`" + ` succeeds silently when the same name
+is already staged WITH BYTE-IDENTICAL content (covers the
+"my script ran twice, file unchanged" case without weakening the
+strict-new contract).
 
 Example:
 
@@ -72,62 +170,120 @@ two builds = idempotent re-apply (no-op on the customer's cluster
 since k3s remembers the prior apply).`,
 		Args: cobra.ExactArgs(2),
 		RunE: func(c *cobra.Command, args []string) error {
-			name := args[0]
-			input := args[1]
-
-			if !manifestNameRE.MatchString(name) {
-				return fmt.Errorf("invalid manifest name %q: must match %s (no slashes, no .., must start with alphanumeric)", name, manifestNameRE)
+			name, input := args[0], args[1]
+			data, lr, target, err := readManifestInput(c, name, input, contextName)
+			if err != nil {
+				return err
 			}
-
-			r, closer, err := openYAMLInput(input, c.InOrStdin())
+			existing, present, err := readStagedManifest(c.Context(), lr, target)
 			if err != nil {
 				return err
 			}
-			defer closer()
+			if present {
+				if bytes.Equal(existing, data) {
+					fmt.Fprintf(c.OutOrStdout(), "manifest %q already staged at %s with identical content; no change\n", name, target)
+					return nil
+				}
+				return fmt.Errorf("manifest %q already staged at %s with different content; use `y-cluster manifests replace` to overwrite", name, target)
+			}
+			if err := writeStagedManifest(c.Context(), lr, target, data); err != nil {
+				return err
+			}
+			fmt.Fprintf(c.OutOrStdout(), "staged manifest %q -> %s (%d bytes)\n", name, target, len(data))
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name")
+	return cmd
+}
+
+func manifestsReplaceCmd() *cobra.Command {
+	var contextName string
+
+	cmd := &cobra.Command{
+		Use:   "replace <name> <path|->",
+		Short: "Overwrite an already-staged manifest with new content",
+		Long: `Reads the YAML at <path> (or stdin when <path> is "-"), then
+overwrites the existing manifest at
+` + "`/var/lib/y-cluster/manifests-staging/<name>.yaml`" + ` on the
+cluster node.
 
-			data, err := io.ReadAll(r)
+Strict: the name MUST already be staged. Bails loud when the
+named manifest doesn't exist (use ` + "`y-cluster manifests add`" + `
+for the create path). The verb documents intent at the call site
+("I know this name is in use and I'm intentionally overwriting").
+
+No-op when the new content is byte-identical to the existing
+file: ` + "`replace`" + ` doesn't bump anything visible to k3s in
+that case either, but we still print a "no change" message so
+scripts can read the outcome.`,
+		Args: cobra.ExactArgs(2),
+		RunE: func(c *cobra.Command, args []string) error {
+			name, input := args[0], args[1]
+			data, lr, target, err := readManifestInput(c, name, input, contextName)
 			if err != nil {
-				return fmt.Errorf("read manifest: %w", err)
+				return err
 			}
-			if len(strings.TrimSpace(string(data))) == 0 {
-				return fmt.Errorf("manifest is empty")
+			existing, present, err := readStagedManifest(c.Context(), lr, target)
+			if err != nil {
+				return err
+			}
+			if !present {
+				return fmt.Errorf("manifest %q is not staged at %s; use `y-cluster manifests add` to create", name, target)
+			}
+			if bytes.Equal(existing, data) {
+				fmt.Fprintf(c.OutOrStdout(), "manifest %q at %s already matches input; no change\n", name, target)
+				return nil
+			}
+			if err := writeStagedManifest(c.Context(), lr, target, data); err != nil {
+				return err
 			}
+			fmt.Fprintf(c.OutOrStdout(), "replaced manifest %q -> %s (%d bytes)\n", name, target, len(data))
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name")
+	return cmd
+}
+
+func manifestsRmCmd() *cobra.Command {
+	var contextName string
+
+	cmd := &cobra.Command{
+		Use:   "rm <name>",
+		Short: "Remove a staged manifest from the cluster's appliance",
+		Long: `Removes the file at
+` + "`/var/lib/y-cluster/manifests-staging/<name>.yaml`" + ` on the
+cluster node.
 
+Strict: the name MUST already be staged. Bails loud when the
+named manifest doesn't exist -- there's no `+"`--force`"+` /
+"don't care" mode. Use this when iterating on the staged
+manifest's content alongside `+"`add`"+`, or to drop a manifest
+that's no longer wanted before prepare-export captures it.`,
+		Args: cobra.ExactArgs(1),
+		RunE: func(c *cobra.Command, args []string) error {
+			name := args[0]
+			if !manifestNameRE.MatchString(name) {
+				return fmt.Errorf("invalid manifest name %q: must match %s (no slashes, no .., must start with alphanumeric)", name, manifestNameRE)
+			}
 			lr, err := cluster.Lookup(c.Context(), "", contextName)
 			if err != nil {
 				return err
 			}
-
-			target := "/var/lib/y-cluster/manifests-staging/" + name + ".yaml"
-
-			// Stage in two RunShell calls: first probe for an
-			// existing entry (refuse if found), then atomically
-			// write the new one. We don't use a single
-			// `cat > <target>` redirect because that'd overwrite
-			// silently. install -m 0644 -T also creates the
-			// parent dir's permissions cleanly.
-			if cluster.RunShell(c.Context(), lr,
-				"test ! -e "+target, nil, nil, nil) != nil {
-				return fmt.Errorf("manifest %q already staged at %s; remove it first or pick a different name", name, target)
+			target := stagedManifestPath(name)
+			if _, present, err := readStagedManifest(c.Context(), lr, target); err != nil {
+				return err
+			} else if !present {
+				return fmt.Errorf("manifest %q is not staged at %s; nothing to remove", name, target)
 			}
-
-			// Use install(1) to create the parent dir and write
-			// the file atomically with a known mode. /dev/stdin
-			// is the standard way to feed install(1) bytes from
-			// the pipe.
-			writeCmd := "install -d -m 0755 /var/lib/y-cluster/manifests-staging && " +
-				"install -m 0644 /dev/stdin " + target
-			var stderr bytes.Buffer
-			if err := cluster.RunShell(c.Context(), lr, writeCmd,
-				bytes.NewReader(data), nil, &stderr); err != nil {
-				return fmt.Errorf("write manifest: %s: %w", stderr.String(), err)
+			if err := removeStagedManifest(c.Context(), lr, target); err != nil {
+				return err
 			}
-
-			fmt.Fprintf(c.OutOrStdout(), "staged manifest %q -> %s (%d bytes)\n", name, target, len(data))
+			fmt.Fprintf(c.OutOrStdout(), "removed manifest %q from %s\n", name, target)
 			return nil
 		},
 	}
 	cmd.Flags().StringVar(&contextName, "context", cluster.DefaultContext, "kubeconfig context name")
 	return cmd
 }
-
diff --git a/cmd/y-cluster/manifests_test.go b/cmd/y-cluster/manifests_test.go
index 5fe79c8..feebdb4 100644
--- a/cmd/y-cluster/manifests_test.go
+++ b/cmd/y-cluster/manifests_test.go
@@ -74,3 +74,68 @@ func TestManifestsAddCmd_RejectsInvalidName(t *testing.T) {
 		t.Errorf("expected 'invalid manifest name' in error, got: %v", err)
 	}
 }
+
+// TestManifestsReplaceCmd_RejectsInvalidName mirrors the add
+// guard for the replace verb -- same regex, same fail-fast.
+func TestManifestsReplaceCmd_RejectsInvalidName(t *testing.T) {
+	cmd := manifestsReplaceCmd()
+	cmd.SetArgs([]string{"../escape", "/dev/null"})
+	cmd.SetOut(&strings.Builder{})
+	cmd.SetErr(&strings.Builder{})
+	err := cmd.Execute()
+	if err == nil {
+		t.Fatal("expected error for path-traversal name, got nil")
+	}
+	if !strings.Contains(err.Error(), "invalid manifest name") {
+		t.Errorf("expected 'invalid manifest name' in error, got: %v", err)
+	}
+}
+
+// TestManifestsRmCmd_RejectsInvalidName guards the rm path too --
+// rm takes <name> only (no file input) and we want the same
+// regex check to fire before any RunShell.
+func TestManifestsRmCmd_RejectsInvalidName(t *testing.T) {
+	cmd := manifestsRmCmd()
+	cmd.SetArgs([]string{"../escape"})
+	cmd.SetOut(&strings.Builder{})
+	cmd.SetErr(&strings.Builder{})
+	err := cmd.Execute()
+	if err == nil {
+		t.Fatal("expected error for path-traversal name, got nil")
+	}
+	if !strings.Contains(err.Error(), "invalid manifest name") {
+		t.Errorf("expected 'invalid manifest name' in error, got: %v", err)
+	}
+}
+
+// TestStagedManifestPath pins the on-cluster path the three
+// verbs compute. The path is the single touchpoint between the
+// build-side commands and the prepare-export step that moves
+// these files onto the appliance disk; changing it accidentally
+// would silently break the appliance flow.
+func TestStagedManifestPath(t *testing.T) {
+	got := stagedManifestPath("migrate-v0.5.0-userdb")
+	want := "/var/lib/y-cluster/manifests-staging/migrate-v0.5.0-userdb.yaml"
+	if got != want {
+		t.Errorf("stagedManifestPath: got %q, want %q", got, want)
+	}
+}
+
+// TestManifestsCmd_Subcommands pins the three-verb surface so a
+// future refactor can't accidentally drop one. The strict-in-
+// both-directions contract (add: must-not-exist, replace: must-
+// exist, rm: must-exist) only works if all three exist; deleting
+// any of them turns either a "create" or an "overwrite" into a
+// silent --force.
+func TestManifestsCmd_Subcommands(t *testing.T) {
+	cmd := manifestsCmd()
+	have := map[string]bool{}
+	for _, sub := range cmd.Commands() {
+		have[sub.Name()] = true
+	}
+	for _, want := range []string{"add", "replace", "rm"} {
+		if !have[want] {
+			t.Errorf("manifests subcommand missing: %q (have %v)", want, have)
+		}
+	}
+}
diff --git a/e2e/qemu_test.go b/e2e/qemu_test.go
index 87dec52..f9b1ea9 100644
--- a/e2e/qemu_test.go
+++ b/e2e/qemu_test.go
@@ -305,6 +305,118 @@ func TestQemu_ExportImport(t *testing.T) {
 	}
 }
 
+// TestQemu_ExportImport_Qcow2 is the local-qemu-only counterpart
+// to TestQemu_ExportImport: export the disk as native qcow2,
+// then re-import via the same `qemu.Import` entrypoint without
+// going through vmdk. Maintainers exercising disk-reuse / appliance
+// e2e loops shouldn't have to bounce through a foreign format
+// just to satisfy the import verb.
+//
+// Two FRs land in the same round-trip:
+//
+//   - FR 3: format-sniff path (importFormatFromExt(".qcow2") ->
+//     qemu-img convert -f qcow2) — exercised by step 5.
+//   - FR 4: provision uses the staged disk from import — the
+//     second Provision call (step 6) hits the
+//     <CacheDir>/<Name>.qcow2-already-exists branch and reaches
+//     SSH + kubeconfig WITHOUT re-running k3s install. Pre-FR 4
+//     this errored out with "disk already exists; run start..."
+//     and start itself errored "no kubeconfig context yet" --
+//     the import->boot deadlock the spec calls out.
+func TestQemu_ExportImport_Qcow2(t *testing.T) {
+	if _, err := os.Stat("/dev/kvm"); err != nil {
+		t.Skip("QEMU tests require /dev/kvm")
+	}
+	if err := qemu.CheckPrerequisites(); err != nil {
+		t.Skip(err)
+	}
+
+	logger, _ := zap.NewDevelopment()
+	cfg := e2eQEMURuntime()
+	cfg.Name = "y-cluster-e2e-export-qcow2"
+	cfg.Context = "y-cluster-e2e-export-qcow2"
+	cfg.CacheDir = t.TempDir()
+	cfg.Memory = "4096"
+	cfg.CPUs = "2"
+	cfg.SSHPort = "2229"
+	cfg.PortForwards = e2eUniqueForwards("26449", "28449")
+	cfg.Kubeconfig = os.Getenv("KUBECONFIG")
+	if cfg.Kubeconfig == "" {
+		t.Skip("KUBECONFIG must be set")
+	}
+
+	ctx := context.Background()
+
+	cluster, err := qemu.Provision(ctx, cfg, logger)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if err := cluster.Teardown(true); err != nil {
+		t.Fatal(err)
+	}
+
+	bundleDir := filepath.Join(cfg.CacheDir, "bundle")
+	if err := qemu.Export(ctx, qemu.ExportOptions{
+		CacheDir:  cfg.CacheDir,
+		Name:      cfg.Name,
+		BundleDir: bundleDir,
+		Format:    qemu.FormatQcow2,
+		Logger:    logger,
+	}); err != nil {
+		t.Fatal(err)
+	}
+	qcow2Path := filepath.Join(bundleDir, cfg.Name+".qcow2")
+	if _, err := os.Stat(qcow2Path); err != nil {
+		t.Fatalf("qcow2 should exist after export: %v", err)
+	}
+
+	// Drop the original disk so the import is unambiguously the
+	// source of the boot disk on the next provision.
+	if err := os.Remove(cluster.DiskPath()); err != nil {
+		t.Fatal(err)
+	}
+
+	// Import via the same qemu.Import that the CLI calls. The
+	// .qcow2 extension dispatches to the qcow2-format branch.
+	if err := qemu.Import(qcow2Path, cluster.DiskPath()); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := os.Stat(cluster.DiskPath()); err != nil {
+		t.Fatal("disk should exist after qcow2 import")
+	}
+
+	// FR 4: this Provision must succeed against the
+	// already-existing <CacheDir>/<Name>.qcow2 left by Import.
+	// Pre-FR 4 we'd have errored here with "disk already exists;
+	// run start...". With the staged-disk branch in Provision,
+	// we boot the imported disk and pull the kubeconfig out of
+	// the k3s that the source appliance pre-baked.
+	cluster2, err := qemu.Provision(ctx, cfg, logger)
+	if err != nil {
+		t.Fatalf("Provision against imported disk (FR 4 path): %v", err)
+	}
+	if out, err := cluster2.SSH(ctx, "hostname"); err != nil {
+		t.Fatalf("SSH after qcow2 import: %v", err)
+	} else {
+		t.Logf("hostname after qcow2 import: %s", out)
+	}
+	// The kubeconfig context should be live -- the staged-disk
+	// branch's contract is "import + provision = kubectl works".
+	// kubectl get nodes is the simplest cheap proof.
+	if out, err := exec.Command("kubectl",
+		"--context="+cfg.Context, "--kubeconfig="+cfg.Kubeconfig,
+		"get", "nodes", "--no-headers",
+	).CombinedOutput(); err != nil {
+		t.Errorf("kubectl get nodes against staged-disk context: %s: %v", out, err)
+	} else if !strings.Contains(string(out), "Ready") {
+		t.Errorf("staged-disk cluster has no Ready nodes:\n%s", out)
+	}
+	if err := cluster2.Teardown(false); err != nil {
+		t.Fatal(err)
+	}
+}
+
 // TestQemu_StopStart provisions, stops the VM, asserts disk +
 // state sidecar are preserved while the pidfile is gone, then
 // starts the VM and asserts kubectl still works against the
@@ -565,6 +677,155 @@ func TestQemu_Seed_GateAndBypass(t *testing.T) {
 	t.Fatalf("k3s never reached Ready after bypass+restart\nk3s journal tail:\n%s", out)
 }
 
+// TestQemu_DataDisk_ReuseAcrossProvisions pins the disk-reuse
+// primitive's contract: an operator who sets DataDisk on the
+// runtime Config can provision a cluster, write to /data/yolean,
+// teardown the VM, re-provision a FRESH cluster on the same
+// DataDisk path, and read the same data back. This is the local
+// (qemu-side) shape of what `appliance-qemu-to-gcp.sh
+// --reuse-disk=true` does in cloud -- maintainers shouldn't
+// have to round-trip through GCP / Hetzner to QA disk reuse.
+//
+// What we prove:
+//
+//   - First provision creates a labeled qcow2 at the configured
+//     DataDisk path (NOT under CacheDir) and attaches it to the
+//     VM. The cloud-init `mounts:` entry mounts it at /data/yolean
+//     before workloads.
+//   - Writing a sentinel under /data/yolean lands on the labeled
+//     volume, not the boot disk.
+//   - Teardown removes the boot disk + cache artefacts but
+//     leaves the DataDisk file in place.
+//   - Second provision (same name, same DataDisk path, fresh
+//     boot disk + ssh key) re-attaches the same labeled qcow2
+//     and reads the sentinel back unchanged.
+//
+// Coverage gap closed: previously the only end-to-end coverage
+// for the "external labeled volume rides through a fresh cluster
+// install" pattern lived in TestQemu_Seed_VolumeAttached, which
+// goes via prepare-export + seed-tarball -- a different code
+// path. This test covers the direct disk-reuse path the
+// maintainer needs for fast iteration.
+func TestQemu_DataDisk_ReuseAcrossProvisions(t *testing.T) {
+	if _, err := os.Stat("/dev/kvm"); err != nil {
+		t.Skip("QEMU tests require /dev/kvm")
+	}
+	if err := qemu.CheckPrerequisites(); err != nil {
+		t.Skip(err)
+	}
+	if _, err := exec.LookPath("virt-format"); err != nil {
+		t.Skip("virt-format not on PATH; install libguestfs-tools")
+	}
+
+	logger, _ := zap.NewDevelopment()
+
+	// CacheDir is the qemu provisioner's per-test scratch.
+	// dataDiskDir is deliberately a separate tempdir so the
+	// teardown-doesn't-touch-it contract has somewhere external
+	// to point at; in the production shape an operator would
+	// put this under their home dir or a customer-specific
+	// path, NOT under the cluster cache.
+	cacheDir := t.TempDir()
+	dataDiskDir := t.TempDir()
+	dataDiskPath := filepath.Join(dataDiskDir, "y-cluster-data.qcow2")
+
+	cfg := e2eQEMURuntime()
+	cfg.Name = "y-cluster-e2e-datadisk-reuse"
+	cfg.Context = "y-cluster-e2e-datadisk-reuse"
+	cfg.CacheDir = cacheDir
+	cfg.Memory = "2048"
+	cfg.CPUs = "2"
+	cfg.SSHPort = "2230"
+	cfg.PortForwards = e2eUniqueForwards("26450", "28450")
+	cfg.Kubeconfig = os.Getenv("KUBECONFIG")
+	if cfg.Kubeconfig == "" {
+		t.Skip("KUBECONFIG must be set")
+	}
+	cfg.DataDisk = dataDiskPath
+	cfg.DataDiskSize = "1G"
+	// Skip the bundled gateway install for both provisions --
+	// this test is about disk reuse, not the EG install path,
+	// and skipping the EG install knocks ~30s off each
+	// provision (1 min over both passes).
+	cfg.Gateway.Skip = true
+
+	ctx := context.Background()
+
+	// === Provision 1: write the sentinel ===
+	cluster1, err := qemu.Provision(ctx, cfg, logger)
+	if err != nil {
+		t.Fatalf("Provision (pass 1): %v", err)
+	}
+
+	// The DataDisk was created at the operator-configured path,
+	// not the cache dir. Asserting BOTH so a future refactor
+	// doesn't accidentally relocate operator state.
+	if _, err := os.Stat(dataDiskPath); err != nil {
+		t.Errorf("DataDisk should exist post-provision at the operator path: %v", err)
+	}
+	if filepath.Dir(dataDiskPath) == cacheDir {
+		t.Errorf("DataDisk leaked into CacheDir; that defeats teardown safety")
+	}
+
+	// Mount must be the labeled volume (not the boot disk's /data/yolean).
+	if out, err := cluster1.SSH(ctx, "mountpoint -q /data/yolean && echo mounted"); err != nil {
+		t.Fatalf("mountpoint check (pass 1): %s: %v", out, err)
+	} else if !strings.Contains(string(out), "mounted") {
+		t.Errorf("/data/yolean must be a mountpoint when DataDisk is configured: %s", out)
+	}
+	if out, err := cluster1.SSH(ctx, "findmnt -no SOURCE /data/yolean"); err != nil {
+		t.Fatalf("findmnt /data/yolean: %s: %v", out, err)
+	} else if !strings.Contains(string(out), "/dev/vd") {
+		t.Errorf("/data/yolean should be backed by a virtio drive (the attached qcow2): %s", out)
+	}
+
+	// Plant the sentinel. lost+found is a normal ext4 reserved
+	// inode and proves the filesystem is the freshly-formatted
+	// labeled volume rather than a host bind-mount.
+	if out, err := cluster1.SSH(ctx,
+		"sudo ls -la /data/yolean && echo data-disk-reuse-v1 | sudo tee /data/yolean/sentinel.txt >/dev/null"); err != nil {
+		t.Fatalf("plant sentinel (pass 1): %s: %v", out, err)
+	}
+
+	// Teardown without --keepDisk: boot disk + cache go;
+	// DataDisk must stay.
+	if err := qemu.TeardownConfig(cfg, false, logger); err != nil {
+		t.Fatalf("TeardownConfig (pass 1): %v", err)
+	}
+	if _, err := os.Stat(filepath.Join(cacheDir, cfg.Name+".qcow2")); !os.IsNotExist(err) {
+		t.Errorf("boot disk should be gone after teardown: err=%v", err)
+	}
+	if _, err := os.Stat(dataDiskPath); err != nil {
+		t.Fatalf("DataDisk must survive teardown without --keepDisk: %v", err)
+	}
+
+	// === Provision 2: re-attach the same DataDisk ===
+	cluster2, err := qemu.Provision(ctx, cfg, logger)
+	if err != nil {
+		t.Fatalf("Provision (pass 2 / disk-reuse): %v", err)
+	}
+
+	// Sentinel must be readable unchanged on pass 2 -- the
+	// whole point of the primitive. cat sentinel.txt > 0 bytes
+	// implies the boot disk is brand new (no leftovers from
+	// the boot disk's /data/yolean) BUT the labeled volume
+	// brought the file along.
+	if out, err := cluster2.SSH(ctx, "cat /data/yolean/sentinel.txt"); err != nil {
+		t.Fatalf("read sentinel (pass 2): %v", err)
+	} else if got := strings.TrimSpace(string(out)); got != "data-disk-reuse-v1" {
+		t.Errorf("sentinel content lost across teardown + re-provision; got %q", got)
+	}
+
+	// Final teardown leaves the data disk in place (just like
+	// pass 1's teardown did).
+	if err := qemu.TeardownConfig(cfg, false, logger); err != nil {
+		t.Errorf("final TeardownConfig: %v", err)
+	}
+	if _, err := os.Stat(dataDiskPath); err != nil {
+		t.Errorf("DataDisk must still survive the second teardown: %v", err)
+	}
+}
+
 // TestQemu_Seed_VolumeAttached exercises the production-shape happy
 // path that TestQemu_Seed_GateAndBypass deliberately doesn't:
 //
diff --git a/pkg/provision/config/qemu.go b/pkg/provision/config/qemu.go
index 63fa4e0..d92f5c5 100644
--- a/pkg/provision/config/qemu.go
+++ b/pkg/provision/config/qemu.go
@@ -10,6 +10,23 @@ type QEMUConfig struct {
 	SSHPort  string `yaml:"sshPort,omitempty"      json:"sshPort,omitempty"      jsonschema:"default=2222,description=Host port forwarded to the VM's SSH server. Added on top of CommonConfig.PortForwards."`
 	CacheDir string `yaml:"cacheDir,omitempty"     json:"cacheDir,omitempty"     jsonschema:"description=Directory for VM disk and cloud image cache. Empty: $HOME/.cache/y-cluster-qemu."`
 
+	// DataDisk, when non-empty, points at an external qcow2 that y-cluster
+	// attaches as a labeled `y-cluster-data` ext4 volume. Provision
+	// creates the file with `qemu-img create` + `virt-format` if it
+	// doesn't exist; teardown leaves the file in place (operator-owned
+	// state, NOT cache-managed). The appliance image's pre-baked
+	// `LABEL=y-cluster-data /data/yolean ext4` fstab entry mounts it
+	// automatically at boot. Use this to test disk-reuse flows
+	// (provision -> workload writes /data/yolean -> teardown ->
+	// re-provision -> same data still there) locally without going
+	// through prepare-export + cloud import.
+	DataDisk string `yaml:"dataDisk,omitempty"     json:"dataDisk,omitempty"     jsonschema:"description=External qcow2 to attach as the labeled /data/yolean volume. Created if missing; preserved on teardown. Use absolute path; relative paths resolve against the config-file's directory."`
+
+	// DataDiskSize sizes a freshly-created DataDisk. Ignored when the
+	// DataDisk file already exists. Default keeps the same shape as
+	// DiskSize so the schema reads consistently.
+	DataDiskSize string `yaml:"dataDiskSize,omitempty" json:"dataDiskSize,omitempty" jsonschema:"description=Size for a freshly-created DataDisk ([num][KMGT]). Default 10G; ignored when the DataDisk file already exists or when DataDisk itself is empty."`
+
 	// Dir is filled at load time from the absolute path of the
 	// directory the config came from. Not part of the schema.
 	Dir string `yaml:"-" json:"-" jsonschema:"-"`
diff --git a/pkg/provision/qemu/data_disk.go b/pkg/provision/qemu/data_disk.go
new file mode 100644
index 0000000..c67e096
--- /dev/null
+++ b/pkg/provision/qemu/data_disk.go
@@ -0,0 +1,94 @@
+package qemu
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+
+	"go.uber.org/zap"
+)
+
+// DataDiskLabel is the ext4 filesystem label y-cluster's data
+// disk carries. The appliance image's pre-baked
+// `LABEL=y-cluster-data /data/yolean ext4 defaults,nofail 0 2`
+// fstab entry mounts it; the qemu-provisioned cloud-init also
+// adds the same entry when Config.DataDisk is set, so the
+// labeled volume mounts even on a non-prepared boot disk.
+const DataDiskLabel = "y-cluster-data"
+
+// DataDiskDefaultSize is the size applied to a freshly-created
+// DataDisk when QEMUConfig.DataDiskSize is empty. Matches the
+// appliance-flow default (GCP_DATADIR_SIZE) so local QA of disk
+// reuse uses the same shape the appliance does in cloud.
+const DataDiskDefaultSize = "10G"
+
+// ensureDataDisk makes path exist as a qcow2 with an ext4
+// filesystem labeled DataDiskLabel. Idempotent on existence:
+// if the file is already there, it's left alone (operator-owned
+// state, the whole point of this primitive is that it survives
+// teardown + re-provision). Only the initial creation runs the
+// format step.
+//
+// Errors after `qemu-img create` succeeds but `virt-format`
+// fails roll back the partial file so a re-run starts clean.
+//
+// Tools required when this fires (caller's check):
+//   - qemu-img (always required by qemu provisioner)
+//   - virt-format (libguestfs-tools; only needed for fresh
+//     DataDisk creation, NOT for reuse)
+func ensureDataDisk(ctx context.Context, path, size string, logger *zap.Logger) error {
+	if path == "" {
+		return fmt.Errorf("ensureDataDisk: path is empty")
+	}
+	if logger == nil {
+		logger = zap.NewNop()
+	}
+	if _, err := os.Stat(path); err == nil {
+		logger.Info("data disk exists, preserving",
+			zap.String("path", path))
+		return nil
+	} else if !os.IsNotExist(err) {
+		return fmt.Errorf("stat data disk %s: %w", path, err)
+	}
+	if size == "" {
+		size = DataDiskDefaultSize
+	}
+	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+		return fmt.Errorf("create data-disk parent dir %s: %w", filepath.Dir(path), err)
+	}
+	logger.Info("creating data disk",
+		zap.String("path", path),
+		zap.String("size", size),
+		zap.String("label", DataDiskLabel),
+	)
+	if out, err := exec.CommandContext(ctx, "qemu-img", "create", "-f", "qcow2", path, size).CombinedOutput(); err != nil {
+		return fmt.Errorf("qemu-img create %s: %s: %w", path, out, err)
+	}
+	if out, err := exec.CommandContext(ctx, "virt-format",
+		"-a", path,
+		"--filesystem=ext4",
+		"--label="+DataDiskLabel,
+	).CombinedOutput(); err != nil {
+		_ = os.Remove(path)
+		return fmt.Errorf("virt-format %s: %s: %w", path, out, err)
+	}
+	return nil
+}
+
+// checkDataDiskTools enforces the libguestfs prerequisite that
+// only matters when a fresh DataDisk needs creating. Existing
+// data disks don't need libguestfs to attach (qemu handles raw
+// qcow2 attachment natively).
+func checkDataDiskTools(path string) error {
+	if _, err := os.Stat(path); err == nil {
+		return nil // existing disk, no creation needed
+	}
+	if _, err := exec.LookPath("virt-format"); err != nil {
+		return fmt.Errorf(
+			"DataDisk %s does not exist and virt-format is not on PATH; "+
+				"install libguestfs-tools to let y-cluster create labeled data disks", path)
+	}
+	return nil
+}
diff --git a/pkg/provision/qemu/data_disk_test.go b/pkg/provision/qemu/data_disk_test.go
new file mode 100644
index 0000000..993b894
--- /dev/null
+++ b/pkg/provision/qemu/data_disk_test.go
@@ -0,0 +1,68 @@
+package qemu
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// TestEnsureDataDisk_PreservesExisting pins the disk-reuse
+// invariant: a DataDisk that already exists is NOT touched,
+// even if its size differs from the configured size. The whole
+// reason DataDisk is a separate concept from the boot disk is
+// that operators put data on it; reformatting on every provision
+// would defeat the purpose.
+//
+// We seed a sentinel file inside a placeholder qcow2 (using a
+// regular file for byte equality; the production path checks
+// file existence, not qcow2 validity). Post-call the bytes must
+// match.
+func TestEnsureDataDisk_PreservesExisting(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "data.qcow2")
+	body := []byte("not a real qcow2 -- placeholder for the reuse-preserves-bytes check")
+	if err := os.WriteFile(path, body, 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := ensureDataDisk(context.Background(), path, "1G", nil); err != nil {
+		t.Fatalf("ensureDataDisk on existing file: %v", err)
+	}
+
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read after ensure: %v", err)
+	}
+	if string(got) != string(body) {
+		t.Errorf("existing file content mutated:\ngot:  %q\nwant: %q", got, body)
+	}
+}
+
+// TestEnsureDataDisk_EmptyPathRejected protects callers from
+// silently creating a qcow2 at the empty path (which would land
+// at CWD or worse depending on the running shell). The runtime
+// Config translation in FromConfig should already prevent this,
+// but a misuse-of-the-helper case is cheap to pin.
+func TestEnsureDataDisk_EmptyPathRejected(t *testing.T) {
+	if err := ensureDataDisk(context.Background(), "", "1G", nil); err == nil {
+		t.Fatal("expected error for empty path")
+	}
+}
+
+// TestCheckDataDiskTools_NoToolNeededWhenDiskExists pins the
+// graceful-degradation contract: an operator on a host without
+// libguestfs can still EXERCISE the disk-reuse flow (the
+// expensive part) as long as the disk has been created once
+// elsewhere -- attaching an existing qcow2 doesn't need
+// libguestfs, only the initial format step does.
+func TestCheckDataDiskTools_NoToolNeededWhenDiskExists(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "data.qcow2")
+	if err := os.WriteFile(path, []byte("placeholder"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	if err := checkDataDiskTools(path); err != nil {
+		t.Errorf("expected no tool requirement for an existing disk: %v", err)
+	}
+}
diff --git a/pkg/provision/qemu/qemu.go b/pkg/provision/qemu/qemu.go
index aeabdfa..bfb58c5 100644
--- a/pkg/provision/qemu/qemu.go
+++ b/pkg/provision/qemu/qemu.go
@@ -59,6 +59,13 @@ type Config struct {
 	Registries   config.Registries
 	Gateway      config.GatewayConfig
 	Storage      config.StorageConfig
+
+	// DataDisk is the operator-owned external qcow2 attached as a
+	// labeled `y-cluster-data` volume at /data/yolean. Empty means
+	// "no external data disk; /data/yolean lives on the boot disk".
+	// Provision creates the file if missing; Teardown leaves it.
+	DataDisk     string
+	DataDiskSize string
 }
 
 // K3s carries the runtime view of K3sConfig: which version to
@@ -141,6 +148,17 @@ func FromConfig(c *config.QEMUConfig) Config {
 	for _, p := range c.PortForwards {
 		pfs = append(pfs, PortForward{Host: p.Host, Guest: p.Guest})
 	}
+	// Relative DataDisk resolves against the directory the YAML lived
+	// in (the same convention configfile.Load applies to other path
+	// fields). An absolute path passes through.
+	dataDisk := c.DataDisk
+	if dataDisk != "" && !filepath.IsAbs(dataDisk) && c.Dir != "" {
+		dataDisk = filepath.Join(c.Dir, dataDisk)
+	}
+	dataDiskSize := c.DataDiskSize
+	if dataDisk != "" && dataDiskSize == "" {
+		dataDiskSize = "10G"
+	}
 	return Config{
 		Name:         c.Name,
 		DiskSize:     c.DiskSize,
@@ -155,9 +173,11 @@ func FromConfig(c *config.QEMUConfig) Config {
 			Version: c.K3s.Version,
 			Install: c.K3s.Install,
 		},
-		Registries: c.Registries,
-		Gateway:    c.Gateway,
-		Storage:    c.Storage,
+		Registries:   c.Registries,
+		Gateway:      c.Gateway,
+		Storage:      c.Storage,
+		DataDisk:     dataDisk,
+		DataDiskSize: dataDiskSize,
 	}
 }
 
@@ -169,8 +189,10 @@ type Cluster struct {
 	logger     *zap.Logger
 	Kubeconfig *kubeconfig.Manager
 	// extraDisks is appended after boot+seed in startVM. Set by
-	// StartForDiagnosticWithDisks for tests that need a labeled
-	// data volume attached; production code paths leave it nil.
+	// Provision when Config.DataDisk is configured (production
+	// code path for the disk-reuse primitive), and by
+	// StartForDiagnosticWithDisks for the e2e tests that
+	// exercise the appliance's pre-baked LABEL fstab.
 	extraDisks []string
 }
 
@@ -246,19 +268,60 @@ func Provision(ctx context.Context, cfg Config, logger *zap.Logger) (*Cluster, e
 		Kubeconfig: kubecfg,
 	}
 
-	// Download cloud image
-	cloudImg, err := c.ensureCloudImage(ctx)
-	if err != nil {
-		return nil, err
+	// Two provision shapes:
+	//
+	//   - Fresh: <CacheDir>/<Name>.qcow2 does not exist. Build a
+	//     new boot disk from the upstream cloud image, install
+	//     k3s + local-path-provisioner + envoy-gateway on it,
+	//     extract a kubeconfig.
+	//
+	//   - Staged-disk: <CacheDir>/<Name>.qcow2 EXISTS. This is
+	//     the path `y-cluster import` set up -- a customer-side
+	//     boot of a freshly-imported appliance image. k3s and
+	//     the in-cluster surface (local-path-provisioner +
+	//     envoy-gateway) were baked in by the source appliance
+	//     build; this provision only needs to drop a fresh
+	//     SSH keypair + cloud-init seed, boot the VM, and
+	//     extract the kubeconfig context. Re-installing k3s
+	//     here would clobber the appliance's pre-baked state.
+	//
+	// The staged-disk branch closes the import->boot deadlock
+	// (provision used to error "disk already exists; run start"
+	// while start errored "no kubeconfig context"). After a
+	// successful staged-disk provision, the kubeconfig is
+	// populated and subsequent stop/start cycles take the
+	// existing-cluster path.
+	diskPath := filepath.Join(cfg.CacheDir, cfg.Name+".qcow2")
+	stagedDisk := false
+	if _, err := os.Stat(diskPath); err == nil {
+		stagedDisk = true
+		logger.Info("using staged disk from prior import (skipping cloud-image fetch + k3s install)",
+			zap.String("disk", diskPath))
+	} else {
+		cloudImg, err := c.ensureCloudImage(ctx)
+		if err != nil {
+			return nil, err
+		}
+		if err := c.ensureDisk(ctx, cloudImg, diskPath); err != nil {
+			return nil, err
+		}
 	}
 
-	// Create disk from cloud image. ensureDisk errors when the
-	// disk already exists -- per-customer Provision is always
-	// fresh; the operator runs teardown first or `start` to
-	// resume.
-	diskPath := filepath.Join(cfg.CacheDir, cfg.Name+".qcow2")
-	if err := c.ensureDisk(ctx, cloudImg, diskPath); err != nil {
-		return nil, err
+	// If DataDisk is configured, ensure it exists (creates a
+	// labeled qcow2 on first run; reuses an existing one on
+	// subsequent runs -- that's the disk-reuse contract). The
+	// cloud-init seed below adds the matching fstab entry so the
+	// kernel mounts the labeled volume at /data/yolean
+	// regardless of whether the boot disk has been through
+	// prepare-export.
+	if cfg.DataDisk != "" {
+		if err := checkDataDiskTools(cfg.DataDisk); err != nil {
+			return nil, err
+		}
+		if err := ensureDataDisk(ctx, cfg.DataDisk, cfg.DataDiskSize, logger); err != nil {
+			return nil, err
+		}
+		c.extraDisks = []string{cfg.DataDisk}
 	}
 
 	// Generate a fresh SSH keypair (always; never reused across
@@ -293,6 +356,30 @@ func Provision(ctx context.Context, cfg Config, logger *zap.Logger) (*Cluster, e
 
 	logger.Info("VM ready")
 
+	if stagedDisk {
+		// Pre-baked appliance: k3s + addons + registries are
+		// already on the disk. Wait for k3s to come up (which
+		// it does on its own at boot via systemd), pull its
+		// kubeconfig, merge it into the host's, and return.
+		// installK3s, localstorage.Install, envoygateway.Install
+		// are skipped wholesale -- re-running them here would
+		// clobber the appliance's pre-baked state (newer
+		// install.yaml apply, racey GatewayClass overwrite,
+		// etc.).
+		if err := c.waitForK3sReady(ctx); err != nil {
+			return nil, fmt.Errorf("staged disk: wait for k3s: %w", err)
+		}
+		rawKubeconfig, err := c.extractKubeconfig(ctx)
+		if err != nil {
+			return nil, fmt.Errorf("staged disk: extract kubeconfig: %w", err)
+		}
+		if err := kubecfg.Import(rawKubeconfig); err != nil {
+			return nil, fmt.Errorf("staged disk: merge kubeconfig: %w", err)
+		}
+		logger.Info("staged disk ready", zap.String("context", cfg.Context))
+		return c, nil
+	}
+
 	// Stage /etc/rancher/k3s/registries.yaml before installing k3s
 	// so containerd reads it on first start. Skipped when the user
 	// hasn't configured any mirrors or auth.
@@ -401,6 +488,18 @@ func TeardownConfig(cfg Config, keepDisk bool, logger *zap.Logger) error {
 	// is load-bearing -- the next provision generates a fresh one,
 	// which is the contract for per-customer appliance delivery (no
 	// key reuse across provision runs).
+	// Operator-owned DataDisk is preserved unconditionally
+	// across teardown -- it lives outside CacheDir and is the
+	// whole point of the disk-reuse primitive. Log it so the
+	// operator sees the same path they configured will be
+	// re-attached on the next provision.
+	if cfg.DataDisk != "" {
+		if _, err := os.Stat(cfg.DataDisk); err == nil {
+			logger.Info("data disk preserved",
+				zap.String("path", cfg.DataDisk))
+		}
+	}
+
 	diskPath := filepath.Join(cfg.CacheDir, cfg.Name+".qcow2")
 	if keepDisk {
 		logger.Info("teardown complete, disk preserved", zap.String("disk", diskPath))
@@ -601,20 +700,60 @@ func (c *Cluster) DiskPath() string {
 	return filepath.Join(c.cfg.CacheDir, c.cfg.Name+".qcow2")
 }
 
-// ImportVMDK converts a VMDK to qcow2 for use as a VM disk.
-func ImportVMDK(vmdkPath, diskPath string) error {
-	if _, err := os.Stat(vmdkPath); err != nil {
-		return fmt.Errorf("VMDK not found: %s", vmdkPath)
+// Import writes a VM disk to diskPath by converting from a
+// supported input format. Format is sniffed by extension on
+// inputPath:
+//
+//   - .vmdk  -> qemu-img convert -f vmdk  -O qcow2 (the original
+//              VMware-import path; vmdk subformat doesn't matter
+//              because qemu-img -f vmdk auto-detects the variant).
+//   - .qcow2 -> qemu-img convert -f qcow2 -O qcow2 (rewrites the
+//              qcow2 into the cache layout; usually a quick
+//              copy + compaction, no format change).
+//
+// A local-qemu e2e loop that does `y-cluster export
+// --format=qcow2 ... | y-cluster import` doesn't need any
+// out-of-band format conversion -- qcow2 is the native format on
+// both ends. Other formats (raw, vdi, OVA, gcp-tar) aren't on
+// the import path; add them when a real consumer asks.
+func Import(inputPath, diskPath string) error {
+	if _, err := os.Stat(inputPath); err != nil {
+		return fmt.Errorf("input not found: %s", inputPath)
+	}
+	srcFormat, err := importFormatFromExt(inputPath)
+	if err != nil {
+		return err
 	}
 	cmd := exec.Command("qemu-img", "convert",
-		"-f", "vmdk", "-O", "qcow2",
-		vmdkPath, diskPath)
+		"-f", srcFormat, "-O", "qcow2",
+		inputPath, diskPath)
 	if out, err := cmd.CombinedOutput(); err != nil {
 		return fmt.Errorf("qemu-img convert: %s: %w", out, err)
 	}
 	return nil
 }
 
+// ImportVMDK is the deprecated alias for Import retained for any
+// out-of-tree caller pinned to the old name. Prefer Import.
+func ImportVMDK(vmdkPath, diskPath string) error {
+	return Import(vmdkPath, diskPath)
+}
+
+// importFormatFromExt maps a file extension to the qemu-img `-f`
+// argument. Centralised so the supported-set is in one place and a
+// new format becomes a one-line table update.
+func importFormatFromExt(path string) (string, error) {
+	ext := strings.ToLower(filepath.Ext(path))
+	switch ext {
+	case ".vmdk":
+		return "vmdk", nil
+	case ".qcow2":
+		return "qcow2", nil
+	default:
+		return "", fmt.Errorf("unsupported import format %q for %s (supported: .vmdk, .qcow2)", ext, path)
+	}
+}
+
 // --- internal helpers ---
 
 const ubuntuVersion = "noble"
@@ -689,7 +828,20 @@ func (c *Cluster) ensureSSHKey() error {
 // on hosts that don't provide them. The "no SSH banner" failure
 // mode on Hetzner was cloud-init blocking sshd's network ordering;
 // this pin prevents the recurrence.
-func renderCloudInitUserData(hostname, sshPubKey string) string {
+func renderCloudInitUserData(hostname, sshPubKey string, mountDataLabel bool) string {
+	// Mount block. When the operator has configured a DataDisk
+	// for this VM, the labeled qcow2 is attached as an extra
+	// virtio drive and the kernel needs a fstab entry to mount
+	// it at the appliance contract path. `nofail` keeps boot
+	// moving when the volume is missing (an operator manually
+	// detached it for debugging, say) instead of dropping into
+	// emergency mode.
+	mounts := ""
+	if mountDataLabel {
+		mounts = `mounts:
+  - [ "LABEL=` + DataDiskLabel + `", "/data/yolean", "ext4", "defaults,nofail", "0", "2" ]
+`
+	}
 	return fmt.Sprintf(`#cloud-config
 hostname: %s
 users:
@@ -699,7 +851,7 @@ users:
     ssh_authorized_keys:
       - %s
 package_update: false
-write_files:
+%swrite_files:
   - path: /etc/cloud/cloud.cfg.d/99-y-cluster-pin.cfg
     permissions: '0644'
     content: |
@@ -708,7 +860,7 @@ write_files:
       # that don't provide them. NoCloud covers the qemu seed; None lets
       # cloud-init proceed when no NoCloud source is present.
       datasource_list: [NoCloud, None]
-`, hostname, strings.TrimSpace(sshPubKey))
+`, hostname, strings.TrimSpace(sshPubKey), mounts)
 }
 
 func (c *Cluster) createCloudInitSeed() (string, error) {
@@ -717,7 +869,7 @@ func (c *Cluster) createCloudInitSeed() (string, error) {
 		return "", fmt.Errorf("read SSH public key: %w", err)
 	}
 
-	cloudInit := renderCloudInitUserData(c.cfg.Name, string(pubKey))
+	cloudInit := renderCloudInitUserData(c.cfg.Name, string(pubKey), c.cfg.DataDisk != "")
 
 	// Name-prefix the cloud-init source so two concurrent provisions
 	// in the same cacheDir don't race on the file. Per-VM artifacts
diff --git a/pkg/provision/qemu/qemu_test.go b/pkg/provision/qemu/qemu_test.go
index ef7774e..ef6db8b 100644
--- a/pkg/provision/qemu/qemu_test.go
+++ b/pkg/provision/qemu/qemu_test.go
@@ -50,6 +50,68 @@ func TestFromConfig_AppliesDefaults(t *testing.T) {
 	}
 }
 
+// TestFromConfig_DataDiskDefaults pins the disk-reuse primitive's
+// path-resolution + size-default contract. The DataDisk field is
+// the operator's hook for "I want a labeled volume I can keep
+// across teardown / re-provision"; we let them spell an absolute
+// path or a path relative to the config dir (the latter so the
+// y-cluster-provision.yaml co-locates with its data disk for a
+// one-directory test setup).
+func TestFromConfig_DataDiskDefaults(t *testing.T) {
+	t.Run("absolute path passes through, size defaults", func(t *testing.T) {
+		c := &config.QEMUConfig{
+			CommonConfig: config.CommonConfig{Provider: config.ProviderQEMU},
+			DataDisk:     "/abs/path/data.qcow2",
+		}
+		c.ApplyDefaults()
+		rt := FromConfig(c)
+		if rt.DataDisk != "/abs/path/data.qcow2" {
+			t.Errorf("DataDisk: got %q, want absolute pass-through", rt.DataDisk)
+		}
+		if rt.DataDiskSize != "10G" {
+			t.Errorf("DataDiskSize: got %q, want 10G default", rt.DataDiskSize)
+		}
+	})
+	t.Run("relative path resolves against config dir", func(t *testing.T) {
+		c := &config.QEMUConfig{
+			CommonConfig: config.CommonConfig{Provider: config.ProviderQEMU},
+			DataDisk:     "data.qcow2",
+			Dir:          "/etc/y-cluster",
+		}
+		c.ApplyDefaults()
+		rt := FromConfig(c)
+		want := "/etc/y-cluster/data.qcow2"
+		if rt.DataDisk != want {
+			t.Errorf("DataDisk: got %q, want %q", rt.DataDisk, want)
+		}
+	})
+	t.Run("empty DataDisk leaves DataDiskSize empty (no surprise default)", func(t *testing.T) {
+		c := &config.QEMUConfig{
+			CommonConfig: config.CommonConfig{Provider: config.ProviderQEMU},
+		}
+		c.ApplyDefaults()
+		rt := FromConfig(c)
+		if rt.DataDisk != "" {
+			t.Errorf("DataDisk should stay empty: %q", rt.DataDisk)
+		}
+		if rt.DataDiskSize != "" {
+			t.Errorf("DataDiskSize should stay empty when DataDisk is unset: %q", rt.DataDiskSize)
+		}
+	})
+	t.Run("explicit DataDiskSize overrides default", func(t *testing.T) {
+		c := &config.QEMUConfig{
+			CommonConfig: config.CommonConfig{Provider: config.ProviderQEMU},
+			DataDisk:     "/abs/data.qcow2",
+			DataDiskSize: "50G",
+		}
+		c.ApplyDefaults()
+		rt := FromConfig(c)
+		if rt.DataDiskSize != "50G" {
+			t.Errorf("explicit DataDiskSize lost: %q", rt.DataDiskSize)
+		}
+	})
+}
+
 func TestFromConfig_PreservesExplicitPortForwards(t *testing.T) {
 	c := &config.QEMUConfig{
 		CommonConfig: config.CommonConfig{
@@ -92,6 +154,58 @@ func TestImportVMDK_MissingVMDK(t *testing.T) {
 	}
 }
 
+// TestImport_MissingInput guards both extensions: a non-existent
+// file should fail fast before any qemu-img invocation.
+func TestImport_MissingInput(t *testing.T) {
+	for _, ext := range []string{".vmdk", ".qcow2"} {
+		if err := Import("/nonexistent/disk"+ext, "/tmp/out.qcow2"); err == nil {
+			t.Errorf("expected error for missing input with ext %q", ext)
+		}
+	}
+}
+
+// TestImportFormatFromExt pins the supported-format set. New
+// formats land here first; the switch statement is the canonical
+// list. Any addition surfaces as a new test case.
+func TestImportFormatFromExt(t *testing.T) {
+	cases := []struct {
+		path    string
+		want    string
+		wantErr bool
+	}{
+		// Happy cases: case-insensitive on extension because
+		// operators sometimes get .VMDK off VMware exports.
+		{"foo.vmdk", "vmdk", false},
+		{"foo.VMDK", "vmdk", false},
+		{"foo.qcow2", "qcow2", false},
+		{"foo.QCOW2", "qcow2", false},
+		{"/tmp/a/b.qcow2", "qcow2", false},
+		// Reject explicit unsupported extensions so a typo
+		// ("disk.qcow" / "disk.img" / "disk.raw") doesn't fall
+		// through to a confusing qemu-img error.
+		{"foo.raw", "", true},
+		{"foo.vdi", "", true},
+		{"foo.tar.gz", "", true},
+		{"noext", "", true},
+		{"", "", true},
+	}
+	for _, c := range cases {
+		got, err := importFormatFromExt(c.path)
+		if c.wantErr {
+			if err == nil {
+				t.Errorf("importFormatFromExt(%q) expected error, got %q", c.path, got)
+			}
+			continue
+		}
+		if err != nil {
+			t.Errorf("importFormatFromExt(%q) unexpected error: %v", c.path, err)
+		}
+		if got != c.want {
+			t.Errorf("importFormatFromExt(%q) = %q, want %q", c.path, got, c.want)
+		}
+	}
+}
+
 func TestTeardownConfig_NoPidFile(t *testing.T) {
 	cfg := defaultedRuntimeConfig(t)
 	cfg.CacheDir = t.TempDir()
@@ -122,7 +236,7 @@ func TestTeardownConfig_KeepDisk(t *testing.T) {
 // snippet that pins datasource_list to NoCloud + None so a
 // re-imported disk doesn't stall on EC2 IMDS probing.
 func TestRenderCloudInitUserData_DatasourceListPin(t *testing.T) {
-	body := renderCloudInitUserData("foo", "ssh-ed25519 AAAA test@host\n")
+	body := renderCloudInitUserData("foo", "ssh-ed25519 AAAA test@host\n", false)
 	if !strings.Contains(body, "/etc/cloud/cloud.cfg.d/99-y-cluster-pin.cfg") {
 		t.Errorf("user-data must drop pin file under /etc/cloud/cloud.cfg.d/:\n%s", body)
 	}
@@ -131,11 +245,48 @@ func TestRenderCloudInitUserData_DatasourceListPin(t *testing.T) {
 	}
 }
 
+// TestRenderCloudInitUserData_NoMountWhenDataDiskDisabled pins
+// that the mount block is omitted when the operator hasn't
+// asked for a labeled data disk -- a non-DataDisk provision
+// must NOT pre-add a fstab entry that would then refer to a
+// missing labeled volume (nofail keeps boot moving, but the
+// noise in `journalctl -u systemd-fsck@*` is undesirable).
+func TestRenderCloudInitUserData_NoMountWhenDataDiskDisabled(t *testing.T) {
+	body := renderCloudInitUserData("foo", "ssh-ed25519 KEY t@h\n", false)
+	if strings.Contains(body, "LABEL=y-cluster-data") {
+		t.Errorf("user-data must not stamp a LABEL mount when DataDisk is disabled:\n%s", body)
+	}
+	if strings.Contains(body, "mounts:") {
+		t.Errorf("user-data must not include a mounts block when DataDisk is disabled:\n%s", body)
+	}
+}
+
+// TestRenderCloudInitUserData_MountWhenDataDiskEnabled pins
+// the fstab semantics: the appliance contract mount path
+// (/data/yolean) + the LABEL the qemu provisioner stamps on the
+// data disk + nofail so a removed disk doesn't deadlock boot.
+func TestRenderCloudInitUserData_MountWhenDataDiskEnabled(t *testing.T) {
+	body := renderCloudInitUserData("foo", "ssh-ed25519 KEY t@h\n", true)
+	if !strings.Contains(body, "mounts:") {
+		t.Errorf("user-data must include a mounts block when DataDisk is enabled:\n%s", body)
+	}
+	for _, want := range []string{
+		`"LABEL=y-cluster-data"`,
+		`"/data/yolean"`,
+		`"ext4"`,
+		`"defaults,nofail"`,
+	} {
+		if !strings.Contains(body, want) {
+			t.Errorf("user-data mount entry missing %q:\n%s", want, body)
+		}
+	}
+}
+
 // TestRenderCloudInitUserData_KeepsCoreShape pins the rest of the
 // user-data so the pin addition didn't accidentally drop the
 // hostname / user / sshkey wiring the qemu provisioner relies on.
 func TestRenderCloudInitUserData_KeepsCoreShape(t *testing.T) {
-	body := renderCloudInitUserData("my-cluster", "ssh-ed25519 KEY user@h\n")
+	body := renderCloudInitUserData("my-cluster", "ssh-ed25519 KEY user@h\n", false)
 	for _, want := range []string{
 		"hostname: my-cluster",
 		"name: ystack",
diff --git a/pkg/provision/schema/qemu.schema.json b/pkg/provision/schema/qemu.schema.json
index cd99139..9decf95 100644
--- a/pkg/provision/schema/qemu.schema.json
+++ b/pkg/provision/schema/qemu.schema.json
@@ -88,6 +88,14 @@
           "description": "vCPU count. qemu sets -smp; docker passes --cpus.",
           "type": "string"
         },
+        "dataDisk": {
+          "description": "External qcow2 to attach as the labeled /data/yolean volume. Created if missing; preserved on teardown. Use absolute path; relative paths resolve against the config-file's directory.",
+          "type": "string"
+        },
+        "dataDiskSize": {
+          "description": "Size for a freshly-created DataDisk ([num][KMGT]). Default 10G; ignored when the DataDisk file already exists or when DataDisk itself is empty.",
+          "type": "string"
+        },
         "diskSize": {
           "default": "20G",
           "description": "qcow2 disk size as a [num][KMGT] string.",