-
Notifications
You must be signed in to change notification settings - Fork 284
Expand file tree
/
Copy pathtask_hcs.go
More file actions
1095 lines (989 loc) · 33.2 KB
/
task_hcs.go
File metadata and controls
1095 lines (989 loc) · 33.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//go:build windows
package main
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"
eventstypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/api/runtime/task/v2"
"github.com/containerd/containerd/api/types"
"github.com/containerd/containerd/v2/core/runtime"
"github.com/containerd/errdefs"
"github.com/containerd/typeurl/v2"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"go.opencensus.io/trace"
"google.golang.org/protobuf/types/known/timestamppb"
"github.com/Microsoft/go-winio/pkg/fs"
runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
"github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats"
"github.com/Microsoft/hcsshim/internal/cmd"
"github.com/Microsoft/hcsshim/internal/cow"
"github.com/Microsoft/hcsshim/internal/guestpath"
"github.com/Microsoft/hcsshim/internal/hcs"
"github.com/Microsoft/hcsshim/internal/hcs/resourcepaths"
"github.com/Microsoft/hcsshim/internal/hcs/schema1"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
"github.com/Microsoft/hcsshim/internal/hcsoci"
"github.com/Microsoft/hcsshim/internal/jobcontainers"
"github.com/Microsoft/hcsshim/internal/layers"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/logfields"
"github.com/Microsoft/hcsshim/internal/memory"
"github.com/Microsoft/hcsshim/internal/oc"
"github.com/Microsoft/hcsshim/internal/oci"
"github.com/Microsoft/hcsshim/internal/processorinfo"
"github.com/Microsoft/hcsshim/internal/protocol/guestrequest"
"github.com/Microsoft/hcsshim/internal/protocol/guestresource"
"github.com/Microsoft/hcsshim/internal/resources"
"github.com/Microsoft/hcsshim/internal/shimdiag"
"github.com/Microsoft/hcsshim/internal/uvm"
"github.com/Microsoft/hcsshim/osversion"
"github.com/Microsoft/hcsshim/pkg/annotations"
"github.com/Microsoft/hcsshim/pkg/ctrdtaskapi"
)
func newHcsStandaloneTask(ctx context.Context, events publisher, req *task.CreateTaskRequest, s *specs.Spec) (shimTask, error) {
log.G(ctx).WithField("tid", req.ID).Debug("newHcsStandaloneTask")
ct, _, err := oci.GetSandboxTypeAndID(s.Annotations)
if err != nil {
return nil, err
}
if ct != oci.KubernetesContainerTypeNone {
return nil, errors.Wrapf(
errdefs.ErrFailedPrecondition,
"cannot create standalone task, expected no annotation: '%s': got '%s'",
annotations.KubernetesContainerType,
ct)
}
owner := filepath.Base(os.Args[0])
var parent *uvm.UtilityVM
if osversion.Build() >= osversion.RS5 && oci.IsIsolated(s) {
// Create the UVM parent
opts, err := oci.SpecToUVMCreateOpts(ctx, s, fmt.Sprintf("%s@vm", req.ID), owner)
if err != nil {
return nil, err
}
switch opts.(type) {
case *uvm.OptionsLCOW:
lopts := (opts).(*uvm.OptionsLCOW)
parent, err = uvm.CreateLCOW(ctx, lopts)
if err != nil {
return nil, err
}
case *uvm.OptionsWCOW:
var layerFolders []string
if s.Windows != nil {
layerFolders = s.Windows.LayerFolders
}
wopts := (opts).(*uvm.OptionsWCOW)
wopts.BootFiles, err = layers.GetWCOWUVMBootFilesFromLayers(ctx, req.Rootfs, layerFolders)
if err != nil {
return nil, err
}
parent, err = uvm.CreateWCOW(ctx, wopts)
if err != nil {
return nil, err
}
}
err = parent.Start(ctx)
if err != nil {
parent.Close()
}
} else if !oci.IsWCOW(s) {
return nil, errors.Wrap(errdefs.ErrFailedPrecondition, "oci spec does not contain WCOW or LCOW spec")
}
shim, err := newHcsTask(ctx, events, parent, true, req, s)
if err != nil {
if parent != nil {
parent.Close()
}
return nil, err
}
return shim, nil
}
// createContainer is a generic call to return either a process/hypervisor isolated container, or a job container
// based on what is set in the OCI spec.
func createContainer(
ctx context.Context,
id,
owner,
netNS string,
s *specs.Spec,
parent *uvm.UtilityVM,
shimOpts *runhcsopts.Options,
rootfs []*types.Mount,
) (cow.Container, *resources.Resources, error) {
var (
err error
container cow.Container
resources *resources.Resources
)
var wcowLayers layers.WCOWLayers
var lcowLayers *layers.LCOWLayers
var layerFolders []string
if s.Windows != nil {
layerFolders = s.Windows.LayerFolders
}
if s.Linux != nil {
lcowLayers, err = layers.ParseLCOWLayers(rootfs, layerFolders)
} else {
wcowLayers, err = layers.ParseWCOWLayers(rootfs, layerFolders)
}
if err != nil {
return nil, nil, err
}
if oci.IsJobContainer(s) {
opts := jobcontainers.CreateOptions{WCOWLayers: wcowLayers}
container, resources, err = jobcontainers.Create(ctx, id, s, opts)
if err != nil {
return nil, nil, err
}
} else {
opts := &hcsoci.CreateOptions{
ID: id,
Owner: owner,
Spec: s,
HostingSystem: parent,
NetworkNamespace: netNS,
LCOWLayers: lcowLayers,
WCOWLayers: wcowLayers,
}
if shimOpts != nil {
opts.ScaleCPULimitsToSandbox = shimOpts.ScaleCpuLimitsToSandbox
}
container, resources, err = hcsoci.CreateContainer(ctx, opts)
if err != nil {
return nil, nil, err
}
}
return container, resources, nil
}
// newHcsTask creates a container within `parent` and its init exec process in
// the `shimExecCreated` state and returns the task that tracks its lifetime.
//
// If `parent == nil` the container is created on the host.
func newHcsTask(
ctx context.Context,
events publisher,
parent *uvm.UtilityVM,
ownsParent bool,
req *task.CreateTaskRequest,
s *specs.Spec) (_ shimTask, err error) {
ctx, entry := log.SetEntry(ctx, logrus.Fields{logfields.TaskID: req.ID})
entry.WithFields(logrus.Fields{
"hasHost": parent != nil,
"isVirtualPod": s.Annotations != nil && s.Annotations[annotations.VirtualPodID] != "",
"ownsHost": ownsParent,
}).Debug("newHcsTask")
owner := filepath.Base(os.Args[0])
var netNS string
if s.Windows != nil &&
s.Windows.Network != nil {
netNS = s.Windows.Network.NetworkNamespace
}
var shimOpts *runhcsopts.Options
if req.Options != nil {
v, err := typeurl.UnmarshalAny(req.Options)
if err != nil {
return nil, err
}
shimOpts = v.(*runhcsopts.Options)
}
// Default to an infinite timeout (zero value)
var ioRetryTimeout time.Duration
if shimOpts != nil {
ioRetryTimeout = time.Duration(shimOpts.IoRetryTimeoutInSec) * time.Second
}
io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal, ioRetryTimeout)
if err != nil {
return nil, err
}
container, resources, err := createContainer(ctx, req.ID, owner, netNS, s, parent, shimOpts, req.Rootfs)
if err != nil {
return nil, err
}
ht := &hcsTask{
events: events,
id: req.ID,
isWCOW: oci.IsWCOW(s),
c: container,
cr: resources,
ownsHost: ownsParent,
host: parent,
closed: make(chan struct{}),
taskSpec: s,
ioRetryTimeout: ioRetryTimeout,
}
ht.init = newHcsExec(
ctx,
events,
req.ID,
parent,
container,
req.ID,
req.Bundle,
ht.isWCOW,
s.Process,
io,
)
if parent != nil {
// We have a parent UVM. Listen for its exit and forcibly close this
// task. This is not expected but in the event of a UVM crash we need to
// handle this case.
go ht.waitForHostExit()
}
go ht.waitInitExit()
// Publish the created event
if err := ht.events.publishEvent(
ctx,
runtime.TaskCreateEventTopic,
&eventstypes.TaskCreate{
ContainerID: req.ID,
Bundle: req.Bundle,
Rootfs: req.Rootfs,
IO: &eventstypes.TaskIO{
Stdin: req.Stdin,
Stdout: req.Stdout,
Stderr: req.Stderr,
Terminal: req.Terminal,
},
Checkpoint: "",
Pid: uint32(ht.init.Pid()),
}); err != nil {
return nil, err
}
return ht, nil
}
var _ = (shimTask)(&hcsTask{})
// hcsTask is a generic task that represents a WCOW Container (process or
// hypervisor isolated), or a LCOW Container. This task MAY own the UVM the
// container is in but in the case of a POD it may just track the UVM for
// container lifetime management. In the case of ownership when the init
// task/exec is stopped the UVM itself will be stopped as well.
type hcsTask struct {
events publisher
// id is the id of this task when it is created.
//
// It MUST be treated as read only in the liftetime of the task.
id string
// isWCOW is set to `true` if this is a task representing a Windows container.
//
// It MUST be treated as read only in the liftetime of the task.
isWCOW bool
// c is the container backing this task.
//
// It MUST be treated as read only in the lifetime of this task EXCEPT after
// a Kill to the init task in which it must be shutdown.
c cow.Container
// cr is the container resources this task is holding.
//
// It MUST be treated as read only in the lifetime of this task EXCEPT after
// a Kill to the init task in which all resources must be released.
cr *resources.Resources
// init is the init process of the container.
//
// Note: the invariant `container state == init.State()` MUST be true. IE:
// if the init process exits the container as a whole and all exec's MUST
// exit.
//
// It MUST be treated as read only in the lifetime of the task.
init shimExec
// ownsHost is `true` if this task owns `host`. If so when this tasks init
// exec shuts down it is required that `host` be shut down as well.
ownsHost bool
// host is the hosting VM for this exec if hypervisor isolated. If
// `host==nil` this is an Argon task so no UVM cleanup is required.
//
// NOTE: if `osversion.Build() < osversion.RS5` this will always be
// `nil`.
host *uvm.UtilityVM
// ecl is the exec create lock for all non-init execs and MUST be held
// during create to prevent ID duplication.
ecl sync.Mutex
execs sync.Map
closed chan struct{}
closeOnce sync.Once
// closeHostOnce is used to close `host`. This will only be used if
// `ownsHost==true` and `host != nil`.
closeHostOnce sync.Once
// taskSpec represents the spec/configuration for this task.
taskSpec *specs.Spec
// ioRetryTimeout is the time for how long to try reconnecting to stdio pipes from containerd.
ioRetryTimeout time.Duration
}
func (ht *hcsTask) ID() string {
return ht.id
}
func (ht *hcsTask) CreateExec(ctx context.Context, req *task.ExecProcessRequest, spec *specs.Process) error {
ht.ecl.Lock()
defer ht.ecl.Unlock()
// If the task exists or we got a request for "" which is the init task
// fail.
if _, loaded := ht.execs.Load(req.ExecID); loaded || req.ExecID == "" {
return errors.Wrapf(errdefs.ErrAlreadyExists, "exec: '%s' in task: '%s' already exists", req.ExecID, ht.id)
}
if ht.init.State() != shimExecStateRunning {
return errors.Wrapf(errdefs.ErrFailedPrecondition, "exec: '' in task: '%s' must be running to create additional execs", ht.id)
}
io, err := cmd.NewUpstreamIO(ctx, req.ID, req.Stdout, req.Stderr, req.Stdin, req.Terminal, ht.ioRetryTimeout)
if err != nil {
return err
}
he := newHcsExec(
ctx,
ht.events,
ht.id,
ht.host,
ht.c,
req.ExecID,
ht.init.Status().Bundle,
ht.isWCOW,
spec,
io,
)
ht.execs.Store(req.ExecID, he)
// Publish the created event
return ht.events.publishEvent(
ctx,
runtime.TaskExecAddedEventTopic,
&eventstypes.TaskExecAdded{
ContainerID: ht.id,
ExecID: req.ExecID,
})
}
func (ht *hcsTask) GetExec(eid string) (shimExec, error) {
if eid == "" {
return ht.init, nil
}
raw, loaded := ht.execs.Load(eid)
if !loaded {
return nil, errors.Wrapf(errdefs.ErrNotFound, "exec: '%s' in task: '%s' not found", eid, ht.id)
}
return raw.(shimExec), nil
}
func (ht *hcsTask) ListExecs() (_ []shimExec, err error) {
var execs []shimExec
ht.execs.Range(func(key, value interface{}) bool {
wt, ok := value.(shimExec)
if !ok {
err = fmt.Errorf("failed to load exec %q", key)
return false
}
execs = append(execs, wt)
return true
})
if err != nil {
return nil, err
}
return execs, nil
}
func (ht *hcsTask) KillExec(ctx context.Context, eid string, signal uint32, all bool) error {
e, err := ht.GetExec(eid)
if err != nil {
return err
}
if all && eid != "" {
return errors.Wrapf(errdefs.ErrFailedPrecondition, "cannot signal all for non-empty exec: '%s'", eid)
}
if all {
// We are in a kill all on the init task. Signal everything.
ht.execs.Range(func(key, value interface{}) bool {
err := value.(shimExec).Kill(ctx, signal)
if err != nil {
log.G(ctx).WithFields(logrus.Fields{
"eid": key,
logrus.ErrorKey: err,
}).Warn("failed to kill exec in task")
}
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}
if signal == 0x9 && eid == "" && ht.host != nil {
// If this is a SIGKILL against the init process we start a background
// timer and wait on either the timer expiring or the process exiting
// cleanly. If the timer expires first we forcibly close the UVM as we
// assume the guest is misbehaving for some reason.
go func() {
t := time.NewTimer(30 * time.Second)
execExited := make(chan struct{})
go func() {
e.Wait()
close(execExited)
}()
select {
case <-execExited:
t.Stop()
case <-t.C:
// Safe to call multiple times if called previously on
// successful shutdown.
ht.host.Close()
}
}()
}
return e.Kill(ctx, signal)
}
func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, time.Time, error) {
e, err := ht.GetExec(eid)
if err != nil {
return 0, 0, time.Time{}, err
}
if eid == "" {
// We are deleting the init exec. Forcibly exit any additional exec's.
ht.execs.Range(func(key, value interface{}) bool {
ex := value.(shimExec)
if s := ex.State(); s != shimExecStateExited {
ex.ForceExit(ctx, 1)
}
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
}
switch state := e.State(); state {
case shimExecStateCreated:
e.ForceExit(ctx, 0)
case shimExecStateRunning:
return 0, 0, time.Time{}, newExecInvalidStateError(ht.id, eid, state, "delete")
}
if eid == "" {
entry := log.G(ctx).WithFields(logrus.Fields{
logfields.TaskID: ht.id,
logfields.ExecID: eid,
})
// We are killing the init task, so we expect the container to be
// stopped after this.
//
// The task process may have already exited, and the status set to
// shimExecStateExited, but resources may still be in the process
// of being cleaned up. Wait for ht.closed to be closed. This signals
// that [hcsTask.waitInitExit]/[hcsTask.waitForHostExit] has finished destroying
// container resources and that layers were umounted.
// If the shim exits before resources are cleaned up, those resources
// will remain locked and untracked, which leads to lingering sandboxes
// and container resources like base vhdx.
const timeout = 30 * time.Second
entry.WithField(logfields.Timeout, timeout).Trace("waiting for task to be closed")
select {
case <-time.After(timeout):
entry.WithField(logfields.Timeout, timeout).Error("timed out waiting for task to close while deleting init exec")
return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup")
case <-ht.closed:
entry.Trace("received task close signal")
}
// The init task has now exited. A ForceExit() has already been sent to
// execs. Cleanup execs and continue.
ht.execs.Range(func(key, value interface{}) bool {
if key == "" {
// Iterate next.
return true
}
ht.execs.Delete(key)
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
// cleanup the container directories inside the UVM if required.
if ht.host != nil {
if err := ht.host.DeleteContainerState(ctx, ht.id); err != nil {
log.G(ctx).WithError(err).Errorf("failed to delete container state")
}
}
}
status := e.Status()
if eid != "" {
ht.execs.Delete(eid)
}
// Publish the deleted event
if err := ht.events.publishEvent(
ctx,
runtime.TaskDeleteEventTopic,
&eventstypes.TaskDelete{
ContainerID: ht.id,
ID: eid,
Pid: status.Pid,
ExitStatus: status.ExitStatus,
ExitedAt: status.ExitedAt,
}); err != nil {
return 0, 0, time.Time{}, err
}
return int(status.Pid), status.ExitStatus, status.ExitedAt.AsTime(), nil
}
func (ht *hcsTask) Pids(ctx context.Context) ([]*runhcsopts.ProcessDetails, error) {
// Map all user created exec's to pid/exec-id
pidMap := make(map[int]string)
ht.execs.Range(func(key, value interface{}) bool {
ex := value.(shimExec)
pidMap[ex.Pid()] = ex.ID()
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
pidMap[ht.init.Pid()] = ht.init.ID()
// Get the guest pids
props, err := ht.c.Properties(ctx, schema1.PropertyTypeProcessList)
if err != nil {
if isStatsNotFound(err) {
return nil, errors.Wrapf(errdefs.ErrNotFound, "failed to fetch pids: %s", err)
}
return nil, err
}
// Copy to pid/exec-id pair's
pairs := make([]*runhcsopts.ProcessDetails, len(props.ProcessList))
for i, p := range props.ProcessList {
pairs[i] = &runhcsopts.ProcessDetails{}
pairs[i].ImageName = p.ImageName
pairs[i].CreatedAt = timestamppb.New(p.CreateTimestamp)
pairs[i].KernelTime_100Ns = p.KernelTime100ns
pairs[i].MemoryCommitBytes = p.MemoryCommitBytes
pairs[i].MemoryWorkingSetPrivateBytes = p.MemoryWorkingSetPrivateBytes
pairs[i].MemoryWorkingSetSharedBytes = p.MemoryWorkingSetSharedBytes
pairs[i].ProcessID = p.ProcessId
pairs[i].UserTime_100Ns = p.KernelTime100ns
if eid, ok := pidMap[int(p.ProcessId)]; ok {
pairs[i].ExecID = eid
}
}
return pairs, nil
}
func (ht *hcsTask) Wait() *task.StateResponse {
<-ht.closed
return ht.init.Wait()
}
func (ht *hcsTask) waitInitExit() {
ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitInitExit")
defer span.End()
span.AddAttributes(
trace.StringAttribute("tid", ht.id),
trace.BoolAttribute("host", ht.host != nil),
trace.BoolAttribute("ownsHost", ht.ownsHost))
// Wait for it to exit on its own
ht.init.Wait()
// Close the host and event the exit
ht.close(ctx)
}
// waitForHostExit waits for the host virtual machine to exit. Once exited
// forcibly exits all additional exec's in this task.
//
// This MUST be called via a goroutine to wait on a background thread.
//
// Note: For Windows process isolated containers there is no host virtual
// machine so this should not be called.
func (ht *hcsTask) waitForHostExit() {
ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitForHostExit")
defer span.End()
span.AddAttributes(
trace.StringAttribute("tid", ht.id),
trace.BoolAttribute("host", ht.host != nil),
trace.BoolAttribute("ownsHost", ht.ownsHost))
err := ht.host.WaitCtx(ctx)
if err != nil {
log.G(ctx).WithError(err).Error("failed to wait for host virtual machine exit")
} else {
log.G(ctx).Debug("host virtual machine exited")
}
ht.execs.Range(func(key, value interface{}) bool {
ex := value.(shimExec)
ex.ForceExit(ctx, 1)
// Iterate all. Returning false stops the iteration. See:
// https://pkg.go.dev/sync#Map.Range
return true
})
ht.init.ForceExit(ctx, 1)
ht.closeHost(ctx)
}
// close shuts down the container that is owned by this task and if
// `ht.ownsHost` will shutdown the hosting VM the container was placed in.
//
// NOTE: For Windows process isolated containers `ht.ownsHost==true && ht.host
// == nil`.
func (ht *hcsTask) close(ctx context.Context) {
ht.closeOnce.Do(func() {
entry := log.G(ctx)
entry.Debug("hcsTask::closeOnce")
// ht.c should never be nil for a real task but in testing we stub
// this to avoid a nil dereference. We really should introduce a
// method or interface for ht.c operations that we can stub for
// testing.
if ht.c != nil {
const tearDownTimeout = 30 * time.Second
// Do our best attempt to tear down the container.
// TODO: unify timeout select statements and use [ht.c.WaitCtx] and [context.WithTimeout]
var werr error
ch := make(chan struct{})
go func() {
werr = ht.c.Wait()
close(ch)
}()
err := ht.c.Shutdown(ctx)
if err != nil {
entry.WithError(err).Error("failed to shutdown container")
} else {
t := time.NewTimer(tearDownTimeout)
select {
case <-ch:
err = werr
t.Stop()
if err != nil {
entry.WithError(err).Error("failed to wait for container shutdown")
}
case <-t.C:
err = hcs.ErrTimeout
entry.WithFields(logrus.Fields{
logfields.Timeout: tearDownTimeout,
logrus.ErrorKey: err,
}).Error("timed out while waiting for container shutdown")
}
}
if err != nil {
err = ht.c.Terminate(ctx)
if err != nil {
entry.WithError(err).Error("failed to terminate container")
} else {
t := time.NewTimer(tearDownTimeout)
select {
case <-ch:
err = werr
t.Stop()
if err != nil {
entry.WithError(err).Error("failed to wait for container terminate")
}
case <-t.C:
entry.WithFields(logrus.Fields{
logfields.Timeout: tearDownTimeout,
logrus.ErrorKey: hcs.ErrTimeout,
}).Error("timed out while waiting for container terminate")
}
}
}
// Release any resources associated with the container.
entry.Trace("starting task resource cleanup")
if err := resources.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil {
entry.WithError(err).Error("failed to release container resources")
}
// Close the container handle invalidating all future access.
if err := ht.c.Close(); err != nil {
entry.WithError(err).Error("failed to close container")
}
entry.Trace("task resource cleanup completed")
}
ht.closeHost(ctx)
})
}
// closeHost safely closes the hosting UVM if this task is the owner. Once
// closed and all resources released it events the `runtime.TaskExitEventTopic`
// for all upstream listeners.
//
// Note: If this is a process isolated task the hosting UVM is simply a `noop`.
//
// This call is idempotent and safe to call multiple times.
func (ht *hcsTask) closeHost(ctx context.Context) {
ht.closeHostOnce.Do(func() {
entry := log.G(ctx)
entry.Debug("hcsTask::closeHostOnce")
if ht.ownsHost && ht.host != nil {
if err := ht.host.Close(); err != nil {
entry.WithError(err).Error("failed host vm shutdown")
}
}
// Send the `init` exec exit notification always.
exit := ht.init.Status()
if err := ht.events.publishEvent(
ctx,
runtime.TaskExitEventTopic,
&eventstypes.TaskExit{
ContainerID: ht.id,
ID: exit.ID,
Pid: uint32(exit.Pid),
ExitStatus: exit.ExitStatus,
ExitedAt: exit.ExitedAt,
}); err != nil {
entry.WithError(err).Error("failed to publish TaskExitEventTopic")
}
close(ht.closed)
})
}
func (ht *hcsTask) ExecInHost(ctx context.Context, req *shimdiag.ExecProcessRequest) (int, error) {
cmdReq := &cmd.CmdProcessRequest{
Args: req.Args,
Workdir: req.Workdir,
Terminal: req.Terminal,
Stdin: req.Stdin,
Stdout: req.Stdout,
Stderr: req.Stderr,
}
if ht.host == nil {
return cmd.ExecInShimHost(ctx, cmdReq)
}
ctx, _ = log.SetEntry(ctx, logrus.Fields{logfields.UVMID: ht.host.ID()})
return ht.host.ExecInUVM(ctx, cmdReq)
}
func (ht *hcsTask) DumpGuestStacks(ctx context.Context) string {
if ht.host != nil {
stacks, err := ht.host.DumpStacks(ctx)
if err != nil {
log.G(ctx).WithError(err).Warn("failed to capture guest stacks")
} else {
return stacks
}
}
return ""
}
func (ht *hcsTask) Share(ctx context.Context, req *shimdiag.ShareRequest) error {
if ht.host == nil {
return errTaskNotIsolated
}
return ht.host.Share(ctx, req.HostPath, req.UvmPath, req.ReadOnly)
}
func hcsPropertiesToWindowsStats(props *hcsschema.Properties) *stats.Statistics_Windows {
wcs := &stats.Statistics_Windows{Windows: &stats.WindowsContainerStatistics{}}
if props.Statistics != nil {
wcs.Windows.Timestamp = timestamppb.New(props.Statistics.Timestamp)
wcs.Windows.ContainerStartTime = timestamppb.New(props.Statistics.ContainerStartTime)
wcs.Windows.UptimeNS = props.Statistics.Uptime100ns * 100
if props.Statistics.Processor != nil {
wcs.Windows.Processor = &stats.WindowsContainerProcessorStatistics{
TotalRuntimeNS: props.Statistics.Processor.TotalRuntime100ns * 100,
RuntimeUserNS: props.Statistics.Processor.RuntimeUser100ns * 100,
RuntimeKernelNS: props.Statistics.Processor.RuntimeKernel100ns * 100,
}
}
if props.Statistics.Memory != nil {
wcs.Windows.Memory = &stats.WindowsContainerMemoryStatistics{
MemoryUsageCommitBytes: props.Statistics.Memory.MemoryUsageCommitBytes,
MemoryUsageCommitPeakBytes: props.Statistics.Memory.MemoryUsageCommitPeakBytes,
MemoryUsagePrivateWorkingSetBytes: props.Statistics.Memory.MemoryUsagePrivateWorkingSetBytes,
}
}
if props.Statistics.Storage != nil {
wcs.Windows.Storage = &stats.WindowsContainerStorageStatistics{
ReadCountNormalized: props.Statistics.Storage.ReadCountNormalized,
ReadSizeBytes: props.Statistics.Storage.ReadSizeBytes,
WriteCountNormalized: props.Statistics.Storage.WriteCountNormalized,
WriteSizeBytes: props.Statistics.Storage.WriteSizeBytes,
}
}
}
return wcs
}
func (ht *hcsTask) Stats(ctx context.Context) (*stats.Statistics, error) {
s := &stats.Statistics{}
props, err := ht.c.PropertiesV2(ctx, hcsschema.PTStatistics)
if err != nil {
if isStatsNotFound(err) {
return nil, errors.Wrapf(errdefs.ErrNotFound, "failed to fetch stats: %s", err)
}
return nil, err
}
if props != nil {
if ht.isWCOW {
s.Container = hcsPropertiesToWindowsStats(props)
} else {
s.Container = &stats.Statistics_Linux{Linux: props.Metrics}
}
}
if ht.ownsHost && ht.host != nil {
vmStats, err := ht.host.Stats(ctx)
if err != nil && !isStatsNotFound(err) {
return nil, err
}
s.VM = vmStats
}
return s, nil
}
func (ht *hcsTask) Update(ctx context.Context, req *task.UpdateTaskRequest) error {
resources, err := typeurl.UnmarshalAny(req.Resources)
if err != nil {
return errors.Wrapf(err, "failed to unmarshal resources for container %s update request", req.ID)
}
if err := verifyTaskUpdateResourcesType(resources); err != nil {
return err
}
if ht.ownsHost && ht.host != nil {
return ht.host.Update(ctx, resources, req.Annotations)
}
return ht.updateTaskContainerResources(ctx, resources, req.Annotations)
}
func (ht *hcsTask) updateTaskContainerResources(ctx context.Context, data interface{}, annotations map[string]string) error {
if ht.isWCOW {
switch resources := data.(type) {
case *specs.WindowsResources:
return ht.updateWCOWResources(ctx, resources, annotations)
case *ctrdtaskapi.ContainerMount:
// Adding mount to a running container is currently only supported for windows containers
return ht.updateWCOWContainerMount(ctx, resources, annotations)
default:
return errNotSupportedResourcesRequest
}
}
return ht.updateLCOWResources(ctx, data, annotations)
}
func (ht *hcsTask) updateWCOWContainerCPU(ctx context.Context, cpu *specs.WindowsCPUResources) error {
// if host is 20h2+ then we can make a request directly to hcs
if osversion.Get().Build >= osversion.V20H2 {
req := &hcsschema.Processor{}
if cpu.Count != nil {
procCount := int32(*cpu.Count)
hostProcs := processorinfo.ProcessorCount()
if ht.host != nil {
hostProcs = ht.host.ProcessorCount()
}
req.Count = hcsoci.NormalizeProcessorCount(ctx, ht.id, procCount, hostProcs)
}
if cpu.Maximum != nil {
req.Maximum = int32(*cpu.Maximum)
}
if cpu.Shares != nil {
req.Weight = int32(*cpu.Shares)
}
return ht.requestUpdateContainer(ctx, resourcepaths.SiloProcessorResourcePath, req)
}
return errdefs.ErrNotImplemented
}
func isValidWindowsCPUResources(c *specs.WindowsCPUResources) bool {
return (c.Count != nil && (c.Shares == nil && c.Maximum == nil)) ||
(c.Shares != nil && (c.Count == nil && c.Maximum == nil)) ||
(c.Maximum != nil && (c.Count == nil && c.Shares == nil))
}
func (ht *hcsTask) updateWCOWResources(ctx context.Context, resources *specs.WindowsResources, annotations map[string]string) error {
if resources.Memory != nil && resources.Memory.Limit != nil {
newMemorySizeInMB := *resources.Memory.Limit / memory.MiB
memoryLimit := hcsoci.NormalizeMemorySize(ctx, ht.id, newMemorySizeInMB)
if err := ht.requestUpdateContainer(ctx, resourcepaths.SiloMemoryResourcePath, memoryLimit); err != nil {
return err
}
}
if resources.CPU != nil {
if !isValidWindowsCPUResources(resources.CPU) {
return fmt.Errorf("invalid cpu resources request for container %s: %v", ht.id, resources.CPU)
}
if err := ht.updateWCOWContainerCPU(ctx, resources.CPU); err != nil {
return err
}
}
return nil
}
func (ht *hcsTask) updateLCOWResources(ctx context.Context, data interface{}, annotations map[string]string) error {
resources, ok := data.(*specs.LinuxResources)
if !ok || resources == nil {
return errors.New("must have resources be non-nil and type *LinuxResources when updating a lcow container")
}
settings := guestresource.LCOWContainerConstraints{
Linux: *resources,
}
return ht.requestUpdateContainer(ctx, "", settings)
}
func (ht *hcsTask) requestUpdateContainer(ctx context.Context, resourcePath string, settings interface{}) error {
var modification interface{}
if ht.isWCOW {
modification = &hcsschema.ModifySettingRequest{
ResourcePath: resourcePath,
RequestType: guestrequest.RequestTypeUpdate,
Settings: settings,
}
} else {
modification = guestrequest.ModificationRequest{
ResourceType: guestresource.ResourceTypeContainerConstraints,
RequestType: guestrequest.RequestTypeUpdate,
Settings: settings,
}
}
return ht.c.Modify(ctx, modification)
}
func (ht *hcsTask) ProcessorInfo(ctx context.Context) (*processorInfo, error) {
if ht.host == nil {
return nil, errTaskNotIsolated
}