kvm_arch->tdp_max_page_level KVM

这是一个 per VM 的值,每一个 VM 都不一样。

TDX Patch 引入的。Add per-VM maximum page level support to support different maximum page sizes for TD guest and conventional VMX guest.

struct kvm_arch {
	int tdp_max_page_level;
    //...
}

对于 legacy VM:

void kvm_mmu_init_vm(struct kvm *kvm)
{
    //...
	kvm->arch.tdp_max_page_level = KVM_MAX_HUGEPAGE_LEVEL;
}

使用的地方:

kvm_mmu_do_page_fault
    struct kvm_page_fault fault = {
        //...
		.max_level = vcpu->kvm->arch.tdp_max_page_level,
		.req_level = PG_LEVEL_4K,
		.goal_level = PG_LEVEL_4K,
	};


max_huge_page_level / tdp_root_level / max_tdp_level KVM

  • tdp_root_level:其实现在还没有用到。
  • max_tdp_level:表示 EPT 是几级的,只有 4 和 5 两种值。
  • max_huge_page_level:表示最大能用的大页的值。是刚开始的时候会通过 rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) 来获得 EPT Capability。并根据返回的 capability 来决定最大能 mapping 的页是多少。
kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(), ept_caps_to_lpage_level(vmx_capability.ept));
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
		       int tdp_max_root_level, int tdp_huge_page_level)
{
    // tdp_root_level 因为设置为了 0,所以目前其实还没有用到
	tdp_root_level = tdp_forced_root_level;
	max_tdp_level = tdp_max_root_level;

    //...
	tdp_enabled = enable_tdp;
	tdp_mmu_enabled = tdp_mmu_allowed && tdp_enabled;
	/*
	 * max_huge_page_level reflects KVM's MMU capabilities irrespective
	 * of kernel support, e.g. KVM may be capable of using 1GB pages when
	 * the kernel is not.  But, KVM never creates a page size greater than
	 * what is used by the kernel for any given HVA, i.e. the kernel's
	 * capabilities are ultimately consulted by kvm_mmu_hugepage_adjust().
	 */
	if (tdp_enabled)
		max_huge_page_level = tdp_huge_page_level;
	else if (boot_cpu_has(X86_FEATURE_GBPAGES))
		max_huge_page_level = PG_LEVEL_1G;
	else
		max_huge_page_level = PG_LEVEL_2M;
}

// 可以看下 EPT Capability 里关于 EPT 页。
static inline int ept_caps_to_lpage_level(u32 ept_caps)
{
	if (ept_caps & VMX_EPT_1GB_PAGE_BIT)
		return PG_LEVEL_1G;
	if (ept_caps & VMX_EPT_2MB_PAGE_BIT)
		return PG_LEVEL_2M;
	return PG_LEVEL_4K;
}

max_huge_page_level 似乎使用的地方不多,只有在:

nested_ept_new_eptp
    kvm_init_shadow_ept_mmu
        reset_ept_shadow_zero_bits_mask
        	__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
        				    reserved_hpa_bits(), execonly,
        				    max_huge_page_level);

kvm_init_mmu
    init_kvm_tdp_mmu
        reset_tdp_shadow_zero_bits_mask
            __reset_rsvds_bits_mask_ept(shadow_zero_check,
                        reserved_hpa_bits(), false,
                        max_huge_page_level);

kvm_mmu_max_mapping_level() / __kvm_mmu_max_mapping_level() KVM

int kvm_mmu_max_mapping_level(struct kvm *kvm,
			      const struct kvm_memory_slot *slot, gfn_t gfn,
			      int max_level, bool faultin_private)
{
	return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, PG_LEVEL_NONE, faultin_private);
}

// max_level: 表示调用者想支持的最大的 level
// host_level:表示 
static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
				       const struct kvm_memory_slot *slot,
				       gfn_t gfn, int max_level, int host_level,
				       bool is_private)
{
	struct kvm_lpage_info *linfo;

    // max_huge_page_level:平台支持的最大的 huge page level
	max_level = min(max_level, max_huge_page_level);

    // 从大页往小页(2M)找,如果这个 pfn 支持这一 level 了,就 break,说明找到了
	for ( ; max_level > PG_LEVEL_4K; max_level--) {
		linfo = lpage_info_slot(gfn, slot, max_level);
		if (!linfo->disallow_lpage)
			break;
	}

	if (max_level == PG_LEVEL_4K)
		return PG_LEVEL_4K;

	if (!is_private) {
		WARN_ON_ONCE(host_level != PG_LEVEL_NONE);
		host_level = host_pfn_mapping_level(kvm, gfn, slot);
	}
	WARN_ON_ONCE(host_level == PG_LEVEL_NONE);
	return min(host_level, max_level);
}