-
Notifications
You must be signed in to change notification settings - Fork 283
Expand file tree
/
Copy pathvm.go
More file actions
442 lines (368 loc) · 15.1 KB
/
vm.go
File metadata and controls
442 lines (368 loc) · 15.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
//go:build windows
package vm
import (
"context"
"errors"
"fmt"
"sync"
"sync/atomic"
"time"
"github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats"
"github.com/Microsoft/hcsshim/internal/cmd"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/shimdiag"
"github.com/Microsoft/hcsshim/internal/timeout"
"github.com/Microsoft/hcsshim/internal/vm/guestmanager"
"github.com/Microsoft/hcsshim/internal/vm/vmmanager"
"github.com/Microsoft/hcsshim/internal/vm/vmutils"
iwin "github.com/Microsoft/hcsshim/internal/windows"
"github.com/containerd/errdefs"
"github.com/Microsoft/go-winio/pkg/process"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
"golang.org/x/sys/windows"
)
// Manager is the VM controller implementation that manages the lifecycle of a Utility VM
// and its associated resources.
type Manager struct {
vmID string
uvm vmmanager.LifetimeManager
guest GuestManager
// vmState tracks the current state of the VM lifecycle.
// Access must be guarded by mu.
vmState State
// mu guards the concurrent access to the Manager's fields and operations.
mu sync.RWMutex
// logOutputDone is closed when the GCS log output processing goroutine completes.
logOutputDone chan struct{}
// Handle to the vmmem process associated with this UVM. Used to look up
// memory metrics for the UVM.
vmmemProcess windows.Handle
// activeExecCount tracks the number of ongoing ExecIntoHost calls.
activeExecCount atomic.Int64
// isPhysicallyBacked indicates whether the VM is using physical backing for its memory.
isPhysicallyBacked bool
}
// Ensure both the Controller, and it's subset Handle are implemented by Manager.
var _ Controller = (*Manager)(nil)
// NewController creates a new Manager instance in the [StateNotCreated] state.
func NewController() *Manager {
return &Manager{
logOutputDone: make(chan struct{}),
vmState: StateNotCreated,
}
}
// Guest returns the guest manager instance for this VM.
// The guest manager provides access to guest-host communication.
func (c *Manager) Guest() GuestManager {
return c.guest
}
// State returns the current VM state.
func (c *Manager) State() State {
c.mu.RLock()
defer c.mu.RUnlock()
return c.vmState
}
// CreateVM creates the VM using the HCS document and initializes device state.
func (c *Manager) CreateVM(ctx context.Context, opts *CreateOptions) error {
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "CreateVM"))
c.mu.Lock()
defer c.mu.Unlock()
// In case of duplicate CreateVM call for the same controller, we want to fail.
if c.vmState != StateNotCreated {
return fmt.Errorf("cannot create VM: VM is in incorrect state %s", c.vmState)
}
// Create the VM via vmmanager.
uvm, err := vmmanager.Create(ctx, opts.ID, opts.HCSDocument)
if err != nil {
return fmt.Errorf("failed to create VM: %w", err)
}
// Set the Manager parameters after successful creation.
c.vmID = opts.ID
c.uvm = uvm
// Determine if the VM is physically backed based on the HCS document configuration.
// We need this while extracting memory metrics, as some of them are only relevant for physically backed VMs.
c.isPhysicallyBacked = !opts.HCSDocument.VirtualMachine.ComputeTopology.Memory.AllowOvercommit
// Initialize the GuestManager for managing guest interactions.
// We will create the guest connection via GuestManager during StartVM.
c.guest = guestmanager.New(ctx, uvm)
c.vmState = StateCreated
return nil
}
// StartVM starts the VM that was previously created via CreateVM.
// It starts the underlying HCS VM, establishes the GCS connection,
// and transitions the VM to [StateRunning].
// On any failure the VM is transitioned to [StateInvalid].
func (c *Manager) StartVM(ctx context.Context, opts *StartOptions) (err error) {
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "StartVM"))
c.mu.Lock()
defer c.mu.Unlock()
// If the VM is already running, we can skip the start operation and just return.
// This makes StartVM idempotent in the case of duplicate calls.
if c.vmState == StateRunning {
return nil
}
// However, if the VM is in any other state than Created,
// we should fail as StartVM is only valid on a created VM.
if c.vmState != StateCreated {
return fmt.Errorf("cannot start VM: VM is in incorrect state %s", c.vmState)
}
defer func() {
if err != nil {
// If starting the VM fails, we transition to Invalid state to prevent any further operations on the VM.
// The VM can be terminated by invoking TerminateVM.
c.vmState = StateInvalid
}
}()
// save parent context, without timeout to use for wait.
pCtx := ctx
// For remaining operations, we expect them to complete within the GCS connection timeout,
// otherwise we want to fail.
ctx, cancel := context.WithTimeout(pCtx, timeout.GCSConnectionTimeout)
log.G(ctx).Debugf("using gcs connection timeout: %s\n", timeout.GCSConnectionTimeout)
g, gctx := errgroup.WithContext(ctx)
defer func() {
_ = g.Wait()
}()
defer cancel()
// we should set up the necessary listeners for guest-host communication.
// The guest needs to connect to predefined vsock ports.
// The host must already be listening on these ports before the guest attempts to connect,
// otherwise the connection would fail.
c.setupEntropyListener(gctx, g)
c.setupLoggingListener(gctx, g)
err = c.uvm.Start(ctx)
if err != nil {
return fmt.Errorf("failed to start VM: %w", err)
}
// Start waiting on the utility VM in the background.
// This goroutine will complete when the VM exits.
go c.waitForVMExit(pCtx)
// Collect any errors from writing entropy or establishing the log
// connection.
if err = g.Wait(); err != nil {
return err
}
err = c.guest.CreateConnection(ctx, opts.GCSServiceID, opts.ConfigOptions...)
if err != nil {
return fmt.Errorf("failed to create guest connection: %w", err)
}
err = c.finalizeGCSConnection(ctx)
if err != nil {
return fmt.Errorf("failed to finalize GCS connection: %w", err)
}
// Set the confidential options if applicable.
if opts.ConfidentialOptions != nil {
if err := c.guest.AddSecurityPolicy(ctx, *opts.ConfidentialOptions); err != nil {
return fmt.Errorf("failed to set confidential options: %w", err)
}
}
// If all goes well, we can transition the VM to Running state.
c.vmState = StateRunning
return nil
}
// waitForVMExit blocks until the VM exits and then transitions the VM state to [StateTerminated].
// This is called in StartVM in a background goroutine.
func (c *Manager) waitForVMExit(ctx context.Context) {
// The original context may have timeout or propagate a cancellation
// copy the original to prevent it affecting the background wait go routine
ctx = context.WithoutCancel(ctx)
_ = c.uvm.Wait(ctx)
// Once the VM has exited, attempt to transition to Terminated.
// This may be a no-op if TerminateVM already ran concurrently and
// transitioned the state first — log the discarded error so that
// concurrent-termination races remain observable.
c.mu.Lock()
if c.vmState != StateTerminated {
c.vmState = StateTerminated
} else {
log.G(ctx).WithField("currentState", c.vmState).Debug("waitForVMExit: state transition to Terminated was a no-op")
}
c.mu.Unlock()
}
// ExecIntoHost executes a command in the running UVM.
func (c *Manager) ExecIntoHost(ctx context.Context, request *shimdiag.ExecProcessRequest) (int, error) {
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "ExecIntoHost"))
if request.Terminal && request.Stderr != "" {
return -1, fmt.Errorf("if using terminal, stderr must be empty: %w", errdefs.ErrFailedPrecondition)
}
// Validate that the VM is running before allowing exec into it.
c.mu.RLock()
if c.vmState != StateRunning {
c.mu.RUnlock()
return -1, fmt.Errorf("cannot exec into VM: VM is in incorrect state %s", c.vmState)
}
c.mu.RUnlock()
// Keep a count of active exec sessions.
// This will be used to disallow LM with existing exec sessions,
// as that can lead to orphaned processes within UVM.
c.activeExecCount.Add(1)
defer c.activeExecCount.Add(-1)
cmdReq := &cmd.CmdProcessRequest{
Args: request.Args,
Workdir: request.Workdir,
Terminal: request.Terminal,
Stdin: request.Stdin,
Stdout: request.Stdout,
Stderr: request.Stderr,
}
return c.guest.ExecIntoUVM(ctx, cmdReq)
}
// DumpStacks dumps the GCS stacks associated with the VM
func (c *Manager) DumpStacks(ctx context.Context) (string, error) {
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "DumpStacks"))
// Take read lock at this place.
// The state change cannot happen until we release the lock,
// so we are sure that the state remains consistent throughout the method.
c.mu.RLock()
defer c.mu.RUnlock()
// Validate that the VM is running before sending dump stacks request to GCS.
if c.vmState != StateRunning {
return "", fmt.Errorf("cannot dump stacks: VM is in incorrect state %s", c.vmState)
}
if c.guest.Capabilities().IsDumpStacksSupported() {
return c.guest.DumpStacks(ctx)
}
return "", nil
}
// Wait blocks until the VM exits and all log output processing has completed.
func (c *Manager) Wait(ctx context.Context) error {
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Wait"))
// Validate that the VM has been created and can be waited on.
// Terminated VMs can also be waited on where we return immediately.
c.mu.RLock()
if c.vmState == StateNotCreated {
c.mu.RUnlock()
return fmt.Errorf("cannot wait on VM: VM is in incorrect state %s", c.vmState)
}
c.mu.RUnlock()
// Wait for the utility VM to exit.
// This will be unblocked when the VM exits or if the context is cancelled.
err := c.uvm.Wait(ctx)
// Wait for the log output processing to complete,
// which ensures all logs are processed before we return.
select {
case <-ctx.Done():
ctxErr := fmt.Errorf("failed to wait on uvm output processing: %w", ctx.Err())
err = errors.Join(err, ctxErr)
case <-c.logOutputDone:
}
return err
}
// Stats returns runtime statistics for the VM including processor runtime and
// memory usage. The VM must be in [StateRunning].
func (c *Manager) Stats(ctx context.Context) (*stats.VirtualMachineStatistics, error) {
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "Stats"))
// Take read lock at this place.
// The state change cannot happen until we release the lock,
// so we are sure that the state remains consistent throughout the method.
c.mu.RLock()
defer c.mu.RUnlock()
if c.vmState != StateRunning {
return nil, fmt.Errorf("cannot get stats: VM is in incorrect state %s", c.vmState)
}
// Initialization of vmmemProcess to calculate stats properly for VA-backed UVMs.
if c.vmmemProcess == 0 {
vmmemHandle, err := vmutils.LookupVMMEM(ctx, c.uvm.RuntimeID(), &iwin.WinAPI{})
if err != nil {
return nil, fmt.Errorf("cannot get stats: %w", err)
}
c.vmmemProcess = vmmemHandle
}
s := &stats.VirtualMachineStatistics{}
props, err := c.uvm.PropertiesV2(ctx, hcsschema.PTStatistics, hcsschema.PTMemory)
if err != nil {
return nil, fmt.Errorf("failed to get VM properties: %w", err)
}
s.Processor = &stats.VirtualMachineProcessorStatistics{}
s.Processor.TotalRuntimeNS = uint64(props.Statistics.Processor.TotalRuntime100ns * 100)
s.Memory = &stats.VirtualMachineMemoryStatistics{}
if !c.isPhysicallyBacked {
// The HCS properties does not return sufficient information to calculate
// working set size for a VA-backed UVM. To work around this, we instead
// locate the vmmem process for the VM, and query that process's working set
// instead, which will be the working set for the VM.
memCounters, err := process.GetProcessMemoryInfo(c.vmmemProcess)
if err != nil {
return nil, err
}
s.Memory.WorkingSetBytes = uint64(memCounters.WorkingSetSize)
}
if props.Memory != nil {
if c.isPhysicallyBacked {
// If the uvm is physically backed we set the working set to the total amount allocated
// to the UVM. AssignedMemory returns the number of 4KB pages. Will always be 4KB
// regardless of what the UVMs actual page size is so we don't need that information.
s.Memory.WorkingSetBytes = props.Memory.VirtualMachineMemory.AssignedMemory * 4096
}
s.Memory.VirtualNodeCount = props.Memory.VirtualNodeCount
s.Memory.VmMemory = &stats.VirtualMachineMemory{}
s.Memory.VmMemory.AvailableMemory = props.Memory.VirtualMachineMemory.AvailableMemory
s.Memory.VmMemory.AvailableMemoryBuffer = props.Memory.VirtualMachineMemory.AvailableMemoryBuffer
s.Memory.VmMemory.ReservedMemory = props.Memory.VirtualMachineMemory.ReservedMemory
s.Memory.VmMemory.AssignedMemory = props.Memory.VirtualMachineMemory.AssignedMemory
s.Memory.VmMemory.SlpActive = props.Memory.VirtualMachineMemory.SlpActive
s.Memory.VmMemory.BalancingEnabled = props.Memory.VirtualMachineMemory.BalancingEnabled
s.Memory.VmMemory.DmOperationInProgress = props.Memory.VirtualMachineMemory.DmOperationInProgress
}
return s, nil
}
// TerminateVM forcefully terminates a running VM, closes the guest connection,
// and releases HCS resources.
//
// The context is used for all operations, including waits, so timeouts/cancellations may prevent
// proper UVM cleanup.
func (c *Manager) TerminateVM(ctx context.Context) (err error) {
ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.Operation, "TerminateVM"))
c.mu.Lock()
defer c.mu.Unlock()
// If the VM has already terminated, we can skip termination and just return.
// Alternatively, if the VM was never created, we can also skip termination.
// This makes the TerminateVM operation idempotent.
if c.vmState == StateTerminated || c.vmState == StateNotCreated {
return nil
}
// Best effort attempt to clean up the open vmmem handle.
_ = windows.Close(c.vmmemProcess)
// Terminate the utility VM. This will also cause the Wait() call in the background goroutine to unblock.
_ = c.uvm.Terminate(ctx)
if err := c.guest.CloseConnection(); err != nil {
log.G(ctx).Errorf("close guest connection failed: %s", err)
}
err = c.uvm.Close(ctx)
if err != nil {
// Transition to Invalid so no further active operations can be performed on the VM.
c.vmState = StateInvalid
return fmt.Errorf("failed to close utility VM: %w", err)
}
// Set the Terminated status at the end.
c.vmState = StateTerminated
return nil
}
// StartTime returns the timestamp when the VM was started.
// Returns zero value of time.Time if the VM has not yet reached
// [StateRunning] or [StateTerminated].
func (c *Manager) StartTime() (startTime time.Time) {
c.mu.RLock()
defer c.mu.RUnlock()
if c.vmState == StateRunning || c.vmState == StateTerminated {
return c.uvm.StartedTime()
}
return startTime
}
// ExitStatus returns the final status of the VM once it has reached
// [StateTerminated], including the time it stopped and any exit error.
// Returns an error if the VM has not yet stopped.
func (c *Manager) ExitStatus() (*ExitStatus, error) {
c.mu.RLock()
defer c.mu.RUnlock()
if c.vmState != StateTerminated {
return nil, fmt.Errorf("cannot get exit status: VM is in incorrect state %s", c.vmState)
}
return &ExitStatus{
StoppedTime: c.uvm.StoppedTime(),
Err: c.uvm.ExitError(),
}, nil
}