Skip to content

Commit d9ddb14

Browse files
tranji-cloudgvisor-bot
authored andcommitted
kvm: RSEQ: Add RSEQ support
This change adds RSEQ support for platform/kvm. To support this, the KVM platform must provide two key capabilities: (1) A stable, unique CPU ID. (2) A way to detect when a thread as been preempted. This implementation provides the necessary support as follows: CPU ID: - platform/kvm now advertises the KVM vCPU ID as the cpu_id Preemption Detection: - Compares the last context the CPU ran against the current context being scheduled. - Compares the context's rseqCPU and CPU ID retrieved by the platform To facilitate this, several new methods are introduced to the platform interface and implemented by platform/kvm: - HasCpuNumbers() - NumCPUs() - DetectsCPUPreemption() - PreemptCpu() - PreemptAllCpus() PiperOrigin-RevId: 823228013
1 parent feddee9 commit d9ddb14

File tree

14 files changed

+186
-13
lines changed

14 files changed

+186
-13
lines changed

pkg/sentry/kernel/kernel.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,11 @@ func (k *Kernel) Init(args InitKernelArgs) error {
490490
k.cpuClockTickerWakeCh = make(chan struct{}, 1)
491491
k.cpuClockTickerStopCond.L = &k.runningTasksMu
492492
k.applicationCores = args.ApplicationCores
493+
if args.UseHostCores && k.HasCPUNumbers() {
494+
args.UseHostCores = false
495+
log.Infof("UseHostCores enabled but the platform implements HasCPUNumbers(): setting UseHostCores to false")
496+
}
497+
493498
if args.UseHostCores {
494499
k.useHostCores = true
495500
maxCPU, err := hostcpu.MaxPossibleCPU()
@@ -502,6 +507,15 @@ func (k *Kernel) Init(args InitKernelArgs) error {
502507
k.applicationCores = minAppCores
503508
}
504509
}
510+
511+
if k.HasCPUNumbers() {
512+
if k.applicationCores < uint(k.NumCPUs()) {
513+
log.Infof("ApplicationCores is less than NumCPUs: %d < %d", k.applicationCores, k.NumCPUs())
514+
log.Infof("Setting applicationCores to NumCPUs: %d", k.NumCPUs())
515+
k.applicationCores = uint(k.NumCPUs())
516+
}
517+
}
518+
505519
k.extraAuxv = args.ExtraAuxv
506520
k.vdso = args.Vdso
507521
k.vdsoParams = args.VdsoParams

pkg/sentry/kernel/rseq.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"gvisor.dev/gvisor/pkg/abi/linux"
2121
"gvisor.dev/gvisor/pkg/errors/linuxerr"
2222
"gvisor.dev/gvisor/pkg/hostarch"
23-
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
2423
"gvisor.dev/gvisor/pkg/usermem"
2524
)
2625

@@ -50,7 +49,7 @@ type OldRSeqCriticalRegion struct {
5049

5150
// RSeqAvailable returns true if t supports (old and new) restartable sequences.
5251
func (t *Task) RSeqAvailable() bool {
53-
return t.k.useHostCores && t.k.Platform.DetectsCPUPreemption()
52+
return (t.k.useHostCores || t.k.Platform.HasCPUNumbers()) && t.k.Platform.DetectsCPUPreemption()
5453
}
5554

5655
// SetRSeq registers addr as this thread's rseq structure.
@@ -201,7 +200,7 @@ func (t *Task) rseqUpdateCPU() error {
201200
return nil
202201
}
203202

204-
t.rseqCPU = int32(hostcpu.GetCPU())
203+
t.rseqCPU = t.CPU()
205204

206205
// Update both CPUs, even if one fails.
207206
rerr := t.rseqCopyOutCPU()

pkg/sentry/kernel/task_run.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"gvisor.dev/gvisor/pkg/goid"
2525
"gvisor.dev/gvisor/pkg/hostarch"
2626
"gvisor.dev/gvisor/pkg/refs"
27-
"gvisor.dev/gvisor/pkg/sentry/hostcpu"
2827
"gvisor.dev/gvisor/pkg/sentry/ktime"
2928
"gvisor.dev/gvisor/pkg/sentry/memmap"
3029
"gvisor.dev/gvisor/pkg/sentry/platform"
@@ -207,7 +206,7 @@ func (app *runApp) execute(t *Task) taskRunState {
207206
if t.rseqPreempted {
208207
t.rseqPreempted = false
209208
if t.rseqAddr != 0 || t.oldRSeqCPUAddr != 0 {
210-
t.rseqCPU = int32(hostcpu.GetCPU())
209+
t.rseqCPU = t.CPU()
211210
if err := t.rseqCopyOutCPU(); err != nil {
212211
t.Debugf("Failed to copy CPU to %#x for rseq: %v", t.rseqAddr, err)
213212
t.forceSignal(linux.SIGSEGV, false)

pkg/sentry/kernel/task_sched.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {
365365
return linuxerr.EINVAL
366366
}
367367

368-
if t.k.useHostCores {
368+
if t.k.useHostCores || t.k.Platform.HasCPUNumbers() {
369369
// No-op; pretend the mask was immediately changed back.
370370
return nil
371371
}
@@ -383,6 +383,10 @@ func (t *Task) SetCPUMask(mask sched.CPUSet) error {
383383

384384
// CPU returns the cpu id for a given task.
385385
func (t *Task) CPU() int32 {
386+
if t.k.Platform.HasCPUNumbers() {
387+
return t.p.LastCPUNumber()
388+
}
389+
386390
if t.k.useHostCores {
387391
return int32(hostcpu.GetCPU())
388392
}

pkg/sentry/platform/kvm/context.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package kvm
1616

1717
import (
1818
"gvisor.dev/gvisor/pkg/abi/linux"
19+
"gvisor.dev/gvisor/pkg/atomicbitops"
1920
pkgcontext "gvisor.dev/gvisor/pkg/context"
2021
"gvisor.dev/gvisor/pkg/hostarch"
2122
"gvisor.dev/gvisor/pkg/ring0"
@@ -36,6 +37,10 @@ type platformContext struct {
3637

3738
// interrupt is the interrupt platformContext.
3839
interrupt interrupt.Forwarder
40+
41+
// lastUsedCPU is the last CPU ID used by this platformContext.
42+
// It must be accessed atomically.
43+
lastUsedCPU atomicbitops.Int32
3944
}
4045

4146
// tryCPUIDError indicates that CPUID emulation should occur.
@@ -45,7 +50,7 @@ type tryCPUIDError struct{}
4550
func (tryCPUIDError) Error() string { return "cpuid emulation failed" }
4651

4752
// Switch runs the provided platformContext in the given address space.
48-
func (c *platformContext) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, _ int32) (*linux.SignalInfo, hostarch.AccessType, error) {
53+
func (c *platformContext) Switch(ctx pkgcontext.Context, mm platform.MemoryManager, ac *arch.Context64, rseqCPU int32) (*linux.SignalInfo, hostarch.AccessType, error) {
4954
as := mm.AddressSpace()
5055
localAS := as.(*addressSpace)
5156

@@ -58,6 +63,19 @@ restart:
5863
c.machine.Put(cpu) // Already preempted.
5964
return nil, hostarch.NoAccess, platform.ErrContextInterrupt
6065
}
66+
// If this CPU was last used to run a different context, then we've
67+
// been preempted.
68+
last := cpu.lastCtx.Swap(c)
69+
c.lastUsedCPU.Store(int32(cpu.id))
70+
preempted := rseqCPU >= 0 && (last != c || rseqCPU != int32(cpu.id))
71+
if preempted {
72+
// Release resources.
73+
c.machine.Put(cpu)
74+
75+
// All done.
76+
c.interrupt.Disable()
77+
return nil, hostarch.NoAccess, platform.ErrContextCPUPreempted
78+
}
6179

6280
// Set the active address space.
6381
//
@@ -136,3 +154,8 @@ func (c *platformContext) PullFullState(as platform.AddressSpace, ac *arch.Conte
136154

137155
// PrepareSleep implements platform.Context.platform.Context.
138156
func (*platformContext) PrepareSleep() {}
157+
158+
// LastCPUNumber implements platform.Context.LastCPUNumber.
159+
func (c *platformContext) LastCPUNumber() int32 {
160+
return c.lastUsedCPU.Load()
161+
}

pkg/sentry/platform/kvm/kvm.go

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ type runData struct {
6262

6363
// KVM represents a lightweight VM context.
6464
type KVM struct {
65-
platform.NoCPUPreemptionDetection
66-
6765
// KVM never changes mm_structs.
6866
platform.UseHostProcessMemoryBarrier
6967

@@ -180,11 +178,52 @@ func (k *KVM) ConcurrencyCount() int {
180178
return k.machine.maxVCPUs
181179
}
182180

181+
// HasCPUNumbers implements platform.Platform.HasCPUNumbers.
182+
func (*KVM) HasCPUNumbers() bool {
183+
return true
184+
}
185+
186+
// NumCPUs implements platform.Platform.NumCPUs.
187+
func (k *KVM) NumCPUs() int32 {
188+
return int32(k.machine.maxVCPUs)
189+
}
190+
191+
// DetectsCPUPreemption implements platform.Platform.DetectsCPUPreemption.
192+
func (k *KVM) DetectsCPUPreemption() bool {
193+
return true
194+
}
195+
196+
// PreemptAllCPUs implements platform.Platform.PreemptAllCPUs.
197+
func (k *KVM) PreemptAllCPUs() error {
198+
k.machine.mu.RLock()
199+
defer k.machine.mu.RUnlock()
200+
for _, c := range k.machine.vCPUsByID {
201+
c.lastCtx.Store(nil)
202+
c.BounceToHost()
203+
}
204+
return nil
205+
}
206+
207+
// PreemptCPU implements platform.Platform.PreemptCPU.
208+
func (k *KVM) PreemptCPU(cpu int32) error {
209+
if cpu < 0 || cpu >= k.NumCPUs() {
210+
return fmt.Errorf("invalid CPU number: %d", cpu)
211+
}
212+
k.machine.mu.RLock()
213+
defer k.machine.mu.RUnlock()
214+
c := k.machine.vCPUsByID[cpu]
215+
c.lastCtx.Store(nil)
216+
c.BounceToHost()
217+
return nil
218+
}
219+
183220
// NewContext returns an interruptible context.
184221
func (k *KVM) NewContext(pkgcontext.Context) platform.Context {
185-
return &platformContext{
222+
pc := &platformContext{
186223
machine: k.machine,
187224
}
225+
pc.lastUsedCPU.Store(0)
226+
return pc
188227
}
189228

190229
type constructor struct{}

pkg/sentry/platform/kvm/machine.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ type vCPU struct {
216216

217217
// dieState holds state related to vCPU death.
218218
dieState dieState
219+
220+
// lastCtx is the last context that was scheduled on this vCPU
221+
lastCtx atomic.Pointer[platformContext]
219222
}
220223

221224
type dieState struct {

pkg/sentry/platform/platform.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,14 @@ type Platform interface {
9696
// NewContext returns a new execution context.
9797
NewContext(context.Context) Context
9898

99+
// PreemptCPU causes the first following to to Context.Switch() to return
100+
// ErrContextCPUPreempted.
101+
//
102+
// PreemptCPU is only supported if DetectsCPUPremption() && HasCPUNumbers() == true.
103+
// Platforms for which this does not hold may panic if PreemptCPU is
104+
// called.
105+
PreemptCPU(cpu int32) error
106+
99107
// PreemptAllCPUs causes all concurrent calls to Context.Switch(), as well
100108
// as the first following call to Context.Switch() for each Context, to
101109
// return ErrContextCPUPreempted.
@@ -121,6 +129,12 @@ type Platform interface {
121129
// in parallel. Concurrent calls to Context.Switch() beyond
122130
// ConcurrencyCount() may block until previous calls have returned.
123131
ConcurrencyCount() int
132+
133+
// HasCPUNumbers returns true if platform assigns CPU numbers to contexts.
134+
HasCPUNumbers() bool
135+
136+
// NumCPUs returns the number of CPUs on the platform.
137+
NumCPUs() int32
124138
}
125139

126140
// NoCPUPreemptionDetection implements Platform.DetectsCPUPreemption and
@@ -137,6 +151,25 @@ func (NoCPUPreemptionDetection) PreemptAllCPUs() error {
137151
panic("This platform does not support CPU preemption detection")
138152
}
139153

154+
// NoCPUNumbers implements Platform.HasCPUNumbers for platforms that do
155+
// not support it.
156+
type NoCPUNumbers struct{}
157+
158+
// HasCPUNumbers implements Platform.HasCPUNumbers.
159+
func (NoCPUNumbers) HasCPUNumbers() bool {
160+
return false
161+
}
162+
163+
// NumCPUs implements Platform.NumCPUs.
164+
func (NoCPUNumbers) NumCPUs() int32 {
165+
panic("platform does not support CPU numbers")
166+
}
167+
168+
// PreemptCPU implements Platform.PreemptCPU.
169+
func (NoCPUNumbers) PreemptCPU(cpu int32) error {
170+
panic("platform does not support preempting a specific CPU")
171+
}
172+
140173
// UseHostGlobalMemoryBarrier implements Platform.HaveGlobalMemoryBarrier and
141174
// Platform.GlobalMemoryBarrier by invoking equivalent functionality on the
142175
// host.
@@ -264,6 +297,16 @@ type Context interface {
264297
// PrepareSleep() is called when the thread switches to the
265298
// interruptible sleep state.
266299
PrepareSleep()
300+
301+
// LastCPUNumber returns the last CPU number that this context was running on.
302+
// If the context never ran on a CPU, it may return any valid CPU number, as long as the first
303+
// call to Switch will detect that the CPU number is incorrect and return ErrContextCPUPreempted.
304+
LastCPUNumber() int32
305+
}
306+
307+
// LastCPUNumber implements Context.LastCPUNumber.
308+
func (NoCPUNumbers) LastCPUNumber() int32 {
309+
panic("context does not support last CPU number")
267310
}
268311

269312
// ContextError is one of the possible errors returned by Context.Switch().

pkg/sentry/platform/ptrace/ptrace.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ var (
7474

7575
type context struct {
7676
archContext
77+
platform.NoCPUNumbers
7778

7879
// signalInfo is the signal info, if and when a signal is received.
7980
signalInfo linux.SignalInfo
@@ -214,6 +215,7 @@ type PTrace struct {
214215
platform.MMapMinAddr
215216
platform.NoCPUPreemptionDetection
216217
platform.UseHostGlobalMemoryBarrier
218+
platform.NoCPUNumbers
217219
}
218220

219221
// New returns a new ptrace-based implementation of the platform interface.

pkg/sentry/platform/systrap/systrap.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ var (
125125

126126
// platformContext is an implementation of the platform context.
127127
type platformContext struct {
128+
platform.NoCPUNumbers
129+
128130
// signalInfo is the signal info, if and when a signal is received.
129131
signalInfo linux.SignalInfo
130132

@@ -239,6 +241,7 @@ func (c *platformContext) PrepareSleep() {
239241
type Systrap struct {
240242
platform.NoCPUPreemptionDetection
241243
platform.UseHostGlobalMemoryBarrier
244+
platform.NoCPUNumbers
242245

243246
// memoryFile is used to create a stub sysmsg stack which is shared with
244247
// the Sentry. Since memoryFile is platform-private, it is never restored,

0 commit comments

Comments
 (0)