KVM MMU Memory Cache
比如在 handle page fault 的时候,我们有时候需要提前申请内存。
是基于 Slab^ memory cache 的。
struct kvm_arch {
//...
struct kvm_mmu_memory_cache split_shadow_page_cache;
struct kvm_mmu_memory_cache split_page_header_cache;
struct kvm_mmu_memory_cache split_private_spt_cache;
struct kvm_mmu_memory_cache split_desc_cache;
//...
}
mmu_memory_cache_alloc_obj()
KVM
static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc, gfp_t gfp_flags)
{
void *page;
// initialize to zero after allocation
gfp_flags |= mc->gfp_zero;
// 如果有 slab cache,那么直接从 cache 里面进行分配,
// kmem_cache 里面有要分配什么 object 的信息。
if (mc->kmem_cache)
return kmem_cache_alloc(mc->kmem_cache, gfp_flags);
// 如果没有 cache,那么我们直接申请并返回一个新的 page
page = (void *)__get_free_page(gfp_flags);
// If we have init value, then initialize to it.
if (page && mc->init_value)
memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
return page;
}
struct kvm_mmu_memory_cache
KVM
/*
* The @capacity field and @objects array are lazily initialized when the cache
* is topped up (__kvm_mmu_topup_memory_cache()).
*/
struct kvm_mmu_memory_cache {
// 下面两个都是分配内存时用的 gfp (get free page) flags。
// __GFP_ZERO flag: Returns a zeroed page on allocation success
gfp_t gfp_zero;
// 分配内存的 flag:GFP_KERNEL_ACCOUNT 等等。
gfp_t gfp_custom;
// 分配之后默认的值:
// memset64(page, mc->init_value, PAGE_SIZE / sizeof(mc->init_value));
u64 init_value;
// slab^ cache,里面包含了许多同样类型的 objects 可以进行复用。
struct kmem_cache *kmem_cache;
// 最多能容纳 object 的数量
int capacity;
// 当前里面有 object 的数量
int nobjs;
// 数组,每一个元素是一个指向某类型的指针。
void **objects;
};
__kvm_mmu_topup_memory_cache()
KVM
// 把这个 cache 设置成 capacity 的大小。
// min: 至少要预分配多少个 objects 到 mc->objects 里面。
int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
{
gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT;
void *obj;
if (mc->nobjs >= min)
return 0;
if (unlikely(!mc->objects)) {
//...
// 根据最多能容纳的数量申请一个 array
mc->objects = kvmalloc_array(sizeof(void *), capacity, gfp);
//...
// 最多能容纳 object 的数量
mc->capacity = capacity;
}
//...
// 把所有的 object 都预分配了。
while (mc->nobjs < mc->capacity) {
obj = mmu_memory_cache_alloc_obj(mc, gfp);
if (!obj)
return mc->nobjs >= min ? 0 : -ENOMEM;
mc->objects[mc->nobjs++] = obj;
}
return 0;
}
topup_split_caches()
KVM
static int topup_split_caches(struct kvm *kvm)
{
/*
* Allocating rmap list entries when splitting huge pages for nested
* MMUs is uncommon as KVM needs to use a list if and only if there is
* more than one rmap entry for a gfn, i.e. requires an L1 gfn to be
* aliased by multiple L2 gfns and/or from multiple nested roots with
* different roles. Aliasing gfns when using TDP is atypical for VMMs;
* a few gfns are often aliased during boot, e.g. when remapping BIOS,
* but aliasing rarely occurs post-boot or for many gfns. If there is
* only one rmap entry, rmap->val points directly at that one entry and
* doesn't need to allocate a list. Buffer the cache by the default
* capacity so that KVM doesn't have to drop mmu_lock to topup if KVM
* encounters an aliased gfn or two.
*/
const int capacity = SPLIT_DESC_CACHE_MIN_NR_OBJECTS + KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE;
struct kvm_mmu_memory_cache *mc;
int start, end, i;
int r;
//...
r = __kvm_mmu_topup_memory_cache(&kvm->arch.split_desc_cache, capacity, SPLIT_DESC_CACHE_MIN_NR_OBJECTS);
r = kvm_mmu_topup_memory_cache(&kvm->arch.split_page_header_cache, 1);
r = kvm_mmu_topup_memory_cache(&kvm->arch.split_shadow_page_cache, 1);
//...
}