KVM MMU Memory Cache

比如在 handle page fault 的时候，我们有时候需要提前申请内存。

是基于 Slab^ memory cache 的。

struct kvm_arch {
    //...
	struct kvm_mmu_memory_cache split_shadow_page_cache;
	struct kvm_mmu_memory_cache split_page_header_cache;
	struct kvm_mmu_memory_cache split_private_spt_cache;
	struct kvm_mmu_memory_cache split_desc_cache;
    //...
}

`mmu_memory_cache_alloc_obj()` KVM

static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, gfp_t gfp_flags)
{
	void *page;

    // initialize to zero after allocation
	gfp_flags |= mc->gfp_zero;

    // 如果有 slab cache，那么直接从 cache 里面进行分配，
    // kmem_cache 里面有要分配什么 object 的信息。
	if (mc->kmem_cache)
		return kmem_cache_alloc(mc->kmem_cache, gfp_flags);

    // 如果没有 cache，那么我们直接申请并返回一个新的 page
	page = (void *)__get_free_page(gfp_flags);
    // If we have init value, then initialize to it.
	if (page && mc->init_value)
		memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
	return page;
}

`struct kvm_mmu_memory_cache` KVM

/*
 * The @capacity field and @objects array are lazily initialized when the cache
 * is topped up (__kvm_mmu_topup_memory_cache()).
 */
struct kvm_mmu_memory_cache {
    // 下面两个都是分配内存时用的 gfp (get free page) flags。
    // __GFP_ZERO flag: Returns a zeroed page on allocation success
	gfp_t gfp_zero;
    // 分配内存的 flag：GFP_KERNEL_ACCOUNT 等等。
	gfp_t gfp_custom;
    // 分配之后默认的值：
    // memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
	u64 init_value;
    // slab^ cache，里面包含了许多同样类型的 objects 可以进行复用。
	struct kmem_cache *kmem_cache;
    // 最多能容纳 object 的数量
	int capacity;
    // 当前里面有 object 的数量
	int nobjs;
    // 数组，每一个元素是一个指向某类型的指针。
	void **objects;
};

`__kvm_mmu_topup_memory_cache()` KVM

// 把这个 cache 设置成 capacity 的大小。
// min: 至少要预分配多少个 objects 到 mc->objects 里面。
int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
{
	gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT;
	void *obj;

	if (mc->nobjs >= min)
		return 0;

	if (unlikely(!mc->objects)) {
        //...
        // 根据最多能容纳的数量申请一个 array
		mc->objects = kvmalloc_array(sizeof(void *), capacity, gfp);
        //...
        // 最多能容纳 object 的数量
		mc->capacity = capacity;
	}

    //...
    // 把所有的 object 都预分配了。
	while (mc->nobjs < mc->capacity) {
		obj = mmu_memory_cache_alloc_obj(mc, gfp);
		if (!obj)
			return mc->nobjs >= min ? 0 : -ENOMEM;
		mc->objects[mc->nobjs++] = obj;
	}
	return 0;
}

`topup_split_caches()` KVM

static int topup_split_caches(struct kvm *kvm)
{
	/*
	 * Allocating rmap list entries when splitting huge pages for nested
	 * MMUs is uncommon as KVM needs to use a list if and only if there is
	 * more than one rmap entry for a gfn, i.e. requires an L1 gfn to be
	 * aliased by multiple L2 gfns and/or from multiple nested roots with
	 * different roles.  Aliasing gfns when using TDP is atypical for VMMs;
	 * a few gfns are often aliased during boot, e.g. when remapping BIOS,
	 * but aliasing rarely occurs post-boot or for many gfns.  If there is
	 * only one rmap entry, rmap->val points directly at that one entry and
	 * doesn't need to allocate a list.  Buffer the cache by the default
	 * capacity so that KVM doesn't have to drop mmu_lock to topup if KVM
	 * encounters an aliased gfn or two.
	 */
	const int capacity = SPLIT_DESC_CACHE_MIN_NR_OBJECTS + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
	struct kvm_mmu_memory_cache *mc;
	int start, end, i;
	int r;

    //...
	r = __kvm_mmu_topup_memory_cache(&kvm->arch.split_desc_cache, capacity, SPLIT_DESC_CACHE_MIN_NR_OBJECTS);
	r = kvm_mmu_topup_memory_cache(&kvm->arch.split_page_header_cache, 1);
	r = kvm_mmu_topup_memory_cache(&kvm->arch.split_shadow_page_cache, 1);
    //...
}

mmu_memory_cache_alloc_obj() KVM

struct kvm_mmu_memory_cache KVM

__kvm_mmu_topup_memory_cache() KVM

topup_split_caches() KVM

`mmu_memory_cache_alloc_obj()` KVM

`struct kvm_mmu_memory_cache` KVM

`__kvm_mmu_topup_memory_cache()` KVM

`topup_split_caches()` KVM