kvm_apic_set_state() KVM

int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
	int r;

	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
	/* set SPIV separately to get count of SW disabled APICs right */
	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));

	r = kvm_apic_state_fixup(vcpu, s, true);
	if (r) {
		kvm_recalculate_apic_map(vcpu->kvm);
		return r;
	}
	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));

	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
	kvm_recalculate_apic_map(vcpu->kvm);
	kvm_apic_set_version(vcpu);

	apic_update_ppr(apic);
	cancel_apic_timer(apic);
	apic->lapic_timer.expired_tscdeadline = 0;
	apic_update_lvtt(apic);
	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
	update_divide_count(apic);
	__start_apic_timer(apic, APIC_TMCCT);
	kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
	kvm_apic_update_apicv(vcpu);
	if (apic->apicv_active) {
		static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
		static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
		static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
	}
	kvm_make_request(KVM_REQ_EVENT, vcpu);
	if (ioapic_in_kernel(vcpu->kvm))
		kvm_rtc_eoi_tracking_restore_one(vcpu);

	vcpu->arch.apic_arb_prio = 0;

	return 0;
}

struct kvm_lapic KVM

kernel irqchip.

struct kvm_vcpu_arch {
    struct kvm_lapic *apic;    /* kernel irqchip context */
    //...
}

struct kvm_lapic {
	unsigned long base_address;
	struct kvm_io_device dev;
	struct kvm_timer lapic_timer;
	u32 divide_count;
	struct kvm_vcpu *vcpu;
    //
	bool apicv_active;
	bool sw_enabled;
	bool irr_pending;
	bool lvt0_in_nmi_mode;
	/* Select registers in the vAPIC cannot be read/written. */
    // TDX patch 引入的
	bool guest_apic_protected;
	/* Number of bits set in ISR. */
	s16 isr_count;
	/* The highest vector set in ISR; if -1 - invalid, must scan ISR. */
	int highest_isr_cache;
	/**
	 * APIC register page.  The layout matches the register layout seen by
	 * the guest 1:1, because it is accessed by the vmx microcode.
	 * Note: Only one register, the TPR, is used by the microcode.
	 */
	void *regs;
	gpa_t vapic_addr;
	struct gfn_to_hva_cache vapic_cache;
	unsigned long pending_events;
	unsigned int sipi_vector;
	int nr_lvt_entries;
};

guest_apic_protected

// 设置上的地方
tdx_vcpu_create
    //...
    vcpu->arch.apic->guest_apic_protected = true;

int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
    //...
    // TDX case
	if (lapic_in_kernel(v) && v->arch.apic->guest_apic_protected)
		return static_call(kvm_x86_protected_apic_has_interrupt)(v);
            vt_protected_apic_has_interrupt
                tdx_protected_apic_has_interrupt
    //...
}



kvm_lapic->apicv_active KVM

// 置上的地方:
kvm_arch_vcpu_create
    if (irqchip_in_kernel(vcpu->kvm)) {
        if (enable_apicv) {
            vcpu->arch.apic->apicv_active = true;
            // 在 enable 的时候会发一个 KVM_REQ_APICV_UPDATE 来更新 APIC 的状态
            kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);

// Clear 的地方(TDX 会 clear 掉):
tdx_td_vcpu_init
	vcpu->arch.apic->apicv_active = false;

vcpu_enter_guest
    if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
        kvm_vcpu_update_apicv
            __kvm_vcpu_update_apicv
                activate = kvm_vcpu_apicv_activated(vcpu) && (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED);
                apic->apicv_active = activate;


                


vmx_sync_pir_to_irr() KVM

是在 migration 的时候 source 端被 call 的。

int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
	struct vcpu_vmx *vmx = to_vmx(vcpu);
	int max_irr;
	bool got_posted_interrupt;

    // 确保 apicv enable 了
	if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
		return -EIO;

    // Outstanding Notification bit 置上了话,把它清除掉。
    // 同时把 PIR 的内容更新到 IRR,并把 PIR 置 0
	if (pi_test_on(&vmx->pi_desc)) {
		pi_clear_on(&vmx->pi_desc);
        //...
		got_posted_interrupt = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
	} else {
		max_irr = kvm_lapic_find_highest_irr(vcpu);
		got_posted_interrupt = false;
	}

	/*
	 * Newly recognized interrupts are injected via either virtual interrupt
	 * delivery (RVI) or KVM_REQ_EVENT.  Virtual interrupt delivery is
	 * disabled in two cases:
	 *
	 * 1) If L2 is running and the vCPU has a new pending interrupt.  If L1
	 * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
	 * VM-Exit to L1.  If L1 doesn't want to exit, the interrupt is injected
	 * into L2, but KVM doesn't use virtual interrupt delivery to inject
	 * interrupts into L2, and so KVM_REQ_EVENT is again needed.
	 *
	 * 2) If APICv is disabled for this vCPU, assigned devices may still
	 * attempt to post interrupts.  The posted interrupt vector will cause
	 * a VM-Exit and the subsequent entry will call sync_pir_to_irr.
	 */
	if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
		vmx_set_rvi(max_irr);
	else if (got_posted_interrupt)
		kvm_make_request(KVM_REQ_EVENT, vcpu);

	return max_irr;
}

kvm_apic_accept_events() KVM

这个函数只处理 init 和 sipi 这两种条件下的中断。

int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
	u8 sipi_vector;
	int r;

	if (!kvm_apic_has_pending_init_or_sipi(vcpu))
		return 0;

    // nested mode codes...
	/*
	 * INITs are blocked while CPU is in specific states (SMM, VMX root
	 * mode, SVM with GIF=0), while SIPIs are dropped if the CPU isn't in
	 * wait-for-SIPI (WFS).
	 */
	if (!kvm_apic_init_sipi_allowed(vcpu)) {
		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
		clear_bit(KVM_APIC_SIPI, &apic->pending_events);
		return 0;
	}

	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events))
		static_call(kvm_x86_vcpu_deliver_init)(vcpu);
	if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
			/* evaluate pending_events before reading the vector */
			smp_rmb();
			sipi_vector = apic->sipi_vector;
			static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector);
			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
		}
	}
	return 0;
}

KVM_REQ_APICV_UPDATE / kvm_vcpu_update_apicv() / __kvm_vcpu_update_apicv() KVM

这个函数主要就是为了更新 activate 的状态。

vcpu_enter_guest
    if (kvm_request_pending(vcpu)) {
        if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
            kvm_vcpu_update_apicv(vcpu);
static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
    // lapic 必须是在 kernel 中的。
	if (!lapic_in_kernel(vcpu))
		return;

    //...
	__kvm_vcpu_update_apicv(vcpu);
}

void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
	bool activate;

	if (!lapic_in_kernel(vcpu))
		return;

    // 必须是原子性操作,不能被抢占,也不能和其他线程同时更新。 
	down_read(&vcpu->kvm->arch.apicv_update_lock);
	preempt_disable();

	// Do not activate APICV when APIC is disabled
	// 应该大多数条件下都是 activate 的
	activate = kvm_vcpu_apicv_activated(vcpu) && (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED);

	if (apic->apicv_active == activate)
        return

    // enable or disable
	apic->apicv_active = activate;
	kvm_apic_update_apicv(vcpu);
	static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);

	/*
	 * When APICv gets disabled, we may still have injected interrupts
	 * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
	 * still active when the interrupt got accepted. Make sure
	 * kvm_check_and_inject_events() is called to check for that.
	 */
	if (!apic->apicv_active)
		kvm_make_request(KVM_REQ_EVENT, vcpu);
    //...
}

kvm_apic_update_apicv() KVM

void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

	if (apic->apicv_active) {
		/* irr_pending is always true when apicv is activated. */
		apic->irr_pending = true;
		apic->isr_count = 1;
	} else {
		/*
		 * Don't clear irr_pending, searching the IRR can race with
		 * updates from the CPU as APICv is still active from hardware's
		 * perspective.  The flag will be cleared as appropriate when
		 * KVM injects the interrupt.
		 */
		apic->isr_count = count_vectors(apic->regs + APIC_ISR);
	}
	apic->highest_isr_cache = -1;
}

Kvmvapic

kvm_lapic_sync_to_vapic() KVM

void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
{
	u32 data, tpr;
	int max_irr, max_isr;
	struct kvm_lapic *apic = vcpu->arch.apic;

	apic_sync_pv_eoi_to_guest(vcpu, apic);

	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
		return;

	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
	max_irr = apic_find_highest_irr(apic);
	max_isr = apic_find_highest_isr(apic);
	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);

	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, sizeof(u32));
}

KVM_SET_VAPIC_ADDR KVM