APICv (APIC Virtualization)
kvm_apic_set_state()
KVM
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
{
struct kvm_lapic *apic = vcpu->arch.apic;
int r;
kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
/* set SPIV separately to get count of SW disabled APICs right */
apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
r = kvm_apic_state_fixup(vcpu, s, true);
if (r) {
kvm_recalculate_apic_map(vcpu->kvm);
return r;
}
memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
kvm_recalculate_apic_map(vcpu->kvm);
kvm_apic_set_version(vcpu);
apic_update_ppr(apic);
cancel_apic_timer(apic);
apic->lapic_timer.expired_tscdeadline = 0;
apic_update_lvtt(apic);
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
update_divide_count(apic);
__start_apic_timer(apic, APIC_TMCCT);
kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
kvm_apic_update_apicv(vcpu);
if (apic->apicv_active) {
static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_rtc_eoi_tracking_restore_one(vcpu);
vcpu->arch.apic_arb_prio = 0;
return 0;
}
struct kvm_lapic
KVM
kernel irqchip.
struct kvm_vcpu_arch {
struct kvm_lapic *apic; /* kernel irqchip context */
//...
}
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct kvm_timer lapic_timer;
u32 divide_count;
struct kvm_vcpu *vcpu;
//
bool apicv_active;
bool sw_enabled;
bool irr_pending;
bool lvt0_in_nmi_mode;
/* Select registers in the vAPIC cannot be read/written. */
// TDX patch 引入的
bool guest_apic_protected;
/* Number of bits set in ISR. */
s16 isr_count;
/* The highest vector set in ISR; if -1 - invalid, must scan ISR. */
int highest_isr_cache;
/**
* APIC register page. The layout matches the register layout seen by
* the guest 1:1, because it is accessed by the vmx microcode.
* Note: Only one register, the TPR, is used by the microcode.
*/
void *regs;
gpa_t vapic_addr;
struct gfn_to_hva_cache vapic_cache;
unsigned long pending_events;
unsigned int sipi_vector;
int nr_lvt_entries;
};
guest_apic_protected
// 设置上的地方
tdx_vcpu_create
//...
vcpu->arch.apic->guest_apic_protected = true;
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
//...
// TDX case
if (lapic_in_kernel(v) && v->arch.apic->guest_apic_protected)
return static_call(kvm_x86_protected_apic_has_interrupt)(v);
vt_protected_apic_has_interrupt
tdx_protected_apic_has_interrupt
//...
}
kvm_lapic->apicv_active
KVM
// 置上的地方:
kvm_arch_vcpu_create
if (irqchip_in_kernel(vcpu->kvm)) {
if (enable_apicv) {
vcpu->arch.apic->apicv_active = true;
// 在 enable 的时候会发一个 KVM_REQ_APICV_UPDATE 来更新 APIC 的状态
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
// Clear 的地方(TDX 会 clear 掉):
tdx_td_vcpu_init
vcpu->arch.apic->apicv_active = false;
vcpu_enter_guest
if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
kvm_vcpu_update_apicv
__kvm_vcpu_update_apicv
activate = kvm_vcpu_apicv_activated(vcpu) && (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED);
apic->apicv_active = activate;
vmx_sync_pir_to_irr()
KVM
是在 migration 的时候 source 端被 call 的。
int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int max_irr;
bool got_posted_interrupt;
// 确保 apicv enable 了
if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
return -EIO;
// Outstanding Notification bit 置上了话,把它清除掉。
// 同时把 PIR 的内容更新到 IRR,并把 PIR 置 0
if (pi_test_on(&vmx->pi_desc)) {
pi_clear_on(&vmx->pi_desc);
//...
got_posted_interrupt = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
got_posted_interrupt = false;
}
/*
* Newly recognized interrupts are injected via either virtual interrupt
* delivery (RVI) or KVM_REQ_EVENT. Virtual interrupt delivery is
* disabled in two cases:
*
* 1) If L2 is running and the vCPU has a new pending interrupt. If L1
* wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
* VM-Exit to L1. If L1 doesn't want to exit, the interrupt is injected
* into L2, but KVM doesn't use virtual interrupt delivery to inject
* interrupts into L2, and so KVM_REQ_EVENT is again needed.
*
* 2) If APICv is disabled for this vCPU, assigned devices may still
* attempt to post interrupts. The posted interrupt vector will cause
* a VM-Exit and the subsequent entry will call sync_pir_to_irr.
*/
if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
vmx_set_rvi(max_irr);
else if (got_posted_interrupt)
kvm_make_request(KVM_REQ_EVENT, vcpu);
return max_irr;
}
kvm_apic_accept_events()
KVM
这个函数只处理 init 和 sipi 这两种条件下的中断。
int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u8 sipi_vector;
int r;
if (!kvm_apic_has_pending_init_or_sipi(vcpu))
return 0;
// nested mode codes...
/*
* INITs are blocked while CPU is in specific states (SMM, VMX root
* mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in
* wait-for-SIPI (WFS).
*/
if (!kvm_apic_init_sipi_allowed(vcpu)) {
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
return 0;
}
if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events))
static_call(kvm_x86_vcpu_deliver_init)(vcpu);
if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
/* evaluate pending_events before reading the vector */
smp_rmb();
sipi_vector = apic->sipi_vector;
static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
}
return 0;
}
KVM_REQ_APICV_UPDATE
/ kvm_vcpu_update_apicv()
/ __kvm_vcpu_update_apicv()
KVM
这个函数主要就是为了更新 activate 的状态。
vcpu_enter_guest
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
kvm_vcpu_update_apicv(vcpu);
static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
// lapic 必须是在 kernel 中的。
if (!lapic_in_kernel(vcpu))
return;
//...
__kvm_vcpu_update_apicv(vcpu);
}
void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
bool activate;
if (!lapic_in_kernel(vcpu))
return;
// 必须是原子性操作,不能被抢占,也不能和其他线程同时更新。
down_read(&vcpu->kvm->arch.apicv_update_lock);
preempt_disable();
// Do not activate APICV when APIC is disabled
// 应该大多数条件下都是 activate 的
activate = kvm_vcpu_apicv_activated(vcpu) && (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED);
if (apic->apicv_active == activate)
return
// enable or disable
apic->apicv_active = activate;
kvm_apic_update_apicv(vcpu);
static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
/*
* When APICv gets disabled, we may still have injected interrupts
* pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
* still active when the interrupt got accepted. Make sure
* kvm_check_and_inject_events() is called to check for that.
*/
if (!apic->apicv_active)
kvm_make_request(KVM_REQ_EVENT, vcpu);
//...
}
kvm_apic_update_apicv()
KVM
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (apic->apicv_active) {
/* irr_pending is always true when apicv is activated. */
apic->irr_pending = true;
apic->isr_count = 1;
} else {
/*
* Don't clear irr_pending, searching the IRR can race with
* updates from the CPU as APICv is still active from hardware's
* perspective. The flag will be cleared as appropriate when
* KVM injects the interrupt.
*/
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
}
apic->highest_isr_cache = -1;
}
Kvmvapic
kvm_lapic_sync_to_vapic()
KVM
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
{
u32 data, tpr;
int max_irr, max_isr;
struct kvm_lapic *apic = vcpu->arch.apic;
apic_sync_pv_eoi_to_guest(vcpu, apic);
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
max_irr = apic_find_highest_irr(apic);
max_isr = apic_find_highest_isr(apic);
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, sizeof(u32));
}
KVM_SET_VAPIC_ADDR
KVM
🗞️ Recent Posts