Linux 内核学习笔记系列,内存管理部分,简单介绍非连续页框的管理。
非连续内存区的线性地址空间#
非连续内存区的线性地址空间的线性地址空间从 VMALLOC_START
开始,到 VMALLOC_END
结束,这个宏显然是体系结构相关的,下面以 32 位的 x86 为例。
arch/x86/include/asm/pgtable_32_types.h
:
1
2
3
4
5
| #define VMALLOC_OFFSET (8 * 1024 * 1024)
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
|
high_memory
对应直接映射的物理内存的末尾,VMALLOC_OFFSET
是一个 8MB
的安全空间,用于与直接映射的物理内存区域隔开。PKMAP_BASE
开始是永久内核映射的线性地址,VMALLOC_END
与之隔了个 8KB
的安全空间(默认页大小是 4KB
)。
高端内存的布局大致如下(arch/x86/include/asm/highmem.h
中的注释):
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| /*
* Right now we initialize only a single pte table. It can be extended
* easily, subsequent pte tables have to be allocated in one physical
* chunk of RAM.
*/
/*
* Ordering is:
*
* FIXADDR_TOP
* fixed_addresses
* FIXADDR_START
* temp fixed addresses
* FIXADDR_BOOT_START
* Persistent kmap area
* PKMAP_BASE
* VMALLOC_END
* Vmalloc area
* VMALLOC_START
* high_memory
*/
|
非连续内存区的描述符#
每个非连续内存区对应一个 vm_struct
描述符。
include/linux/vmalloc.h
:
1
2
3
4
5
6
7
8
9
10
| struct vm_struct {
struct vm_struct *next;
void *addr;
unsigned long size;
unsigned long flags;
struct page **pages;
unsigned int nr_pages;
unsigned long phys_addr;
void *caller;
};
|
next
:指向下一个 vm_struct
的指针。addr
:内存区内第一个内存单元的线性地址。size
:内存区的大小加上 4096
(安全区间为页大小)。flags
:映射的内存类型。pages
:指向页框描述符指针数组的指针(页框描述符是 struct page
)。nr_pages
:页框描述符指针数组中填充的页的个数。phys_addr
:映射硬件设备的 I/O 共享内存(没映射则为 0
)caller
:调用者的返回地址(__vmalloc_node()
的注释是这么写的,具体用途没研究,这几本书都没介绍,所以我也准备跳过了)
其中,映射的内存类型定义如下。
include/linux/vmalloc.h
:
1
2
3
4
5
6
7
| /* bits in flags of vmalloc's vm_struct below */
#define VM_IOREMAP 0x00000001 /* ioremap() and friends */
#define VM_ALLOC 0x00000002 /* vmalloc() */
#define VM_MAP 0x00000004 /* vmap()ed pages */
#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
#define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */
/* bits [20..32] reserved for arch specific ioremap internals */
|
分配非连续内存区#
分配非连续内存区使用 vmalloc()
函数。
mm/vmalloc.c
:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
| /**
* __vmalloc_node - allocate virtually contiguous memory
* @size: allocation size
* @align: desired alignment
* @gfp_mask: flags for the page level allocator
* @prot: protection mask for the allocated pages
* @node: node to use for allocation or -1
* @caller: caller's return address
*
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*/
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, void *caller)
{
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > totalram_pages)
return NULL;
area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
VMALLOC_END, node, gfp_mask, caller);
if (!area)
return NULL;
addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
/*
* A ref_count = 3 is needed because the vm_struct and vmap_area
* structures allocated in the __get_vm_area_node() function contain
* references to the virtual address of the vmalloc'ed block.
*/
kmemleak_alloc(addr, real_size, 3, gfp_mask);
return addr;
}
/**
* vmalloc - allocate virtually contiguous memory
* @size: allocation size
* Allocate enough pages to cover @size from the page level
* allocator and map them into contiguous kernel virtual space.
*
* For tight control over page level allocator and protection flags
* use __vmalloc() instead.
*/
void *vmalloc(unsigned long size)
{
return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
-1, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc);
|
__vmalloc_node()
主要进行如下操作:
- 通过
PAGE_ALIGN()
宏将 size
对齐到页大小。 - 通过
__get_vm_area_node()
创建新的描述符。 - 通过
__vmalloc_area_node()
分配内存空间。 - 通过
kmemleak_alloc()
通报内存分配。
PAGE_ALIGN()
#
PAGE_ALIGN()
的原理比较简单,挨个宏找下去就行了。
include/linux/mm.h
:
1
2
| /* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
|
include/linux/kernel.h
:
1
2
| #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
|
__get_vm_area_node()
#
mm/vmalloc.c
:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
| static struct vm_struct *__get_vm_area_node(unsigned long size,
unsigned long align, unsigned long flags, unsigned long start,
unsigned long end, int node, gfp_t gfp_mask, void *caller)
{
static struct vmap_area *va;
struct vm_struct *area;
BUG_ON(in_interrupt());
if (flags & VM_IOREMAP) {
int bit = fls(size);
if (bit > IOREMAP_MAX_ORDER)
bit = IOREMAP_MAX_ORDER;
else if (bit < PAGE_SHIFT)
bit = PAGE_SHIFT;
align = 1ul << bit;
}
size = PAGE_ALIGN(size);
if (unlikely(!size))
return NULL;
area = kzalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
if (unlikely(!area))
return NULL;
/*
* We always allocate a guard page.
*/
size += PAGE_SIZE;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
if (IS_ERR(va)) {
kfree(area);
return NULL;
}
insert_vmalloc_vm(area, va, flags, caller);
return area;
}
|
__get_vm_area_node()
主要用于创建新的描述符,并把该描述符插入到相关的内核数据结构中,具体细节比较复杂,我不准备看了。
__vmalloc_area_node()
#
mm/vmalloc.c
:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
| static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node, void *caller)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
PAGE_KERNEL, node, caller);
area->flags |= VM_VPAGES;
} else {
pages = kmalloc_node(array_size, nested_gfp, node);
}
area->pages = pages;
area->caller = caller;
if (!area->pages) {
remove_vm_area(area->addr);
kfree(area);
return NULL;
}
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (node < 0)
page = alloc_page(gfp_mask);
else
page = alloc_pages_node(node, gfp_mask, 0);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
goto fail;
}
area->pages[i] = page;
}
if (map_vm_area(area, prot, &pages))
goto fail;
return area->addr;
fail:
vfree(area->addr);
return NULL;
}
|
__vmalloc_area_node()
和 __vmalloc_node()
互相递归,当分配的页描述符指针数组小于一页时,__vmalloc_area_node()
调用 kmalloc_node()
来分配(kmalloc_node()
会通过 kmalloc()
分配内存),结束递归。
页描述符指针数组的元素指向 alloc_pages_node()
分配的页(alloc_page()
最终也会调用 alloc_pages_node()
,见请求页框),完成填充。
完成分配后,通过 map_vm_area()
来修改页表项,这个函数的细节比较复杂,暂时不准备去看了。
kmemleak_alloc()#
kmemleak_alloc()
是 kmemleak 的 API,kmemleak 主要用于检测内存泄漏,相关内容不属于本系列学习笔记的范畴。
释放非连续内存区#
释放非连续内存区使用 vfree()
函数。
mm/vmalloc.c
:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
| static void __vunmap(const void *addr, int deallocate_pages)
{
struct vm_struct *area;
if (!addr)
return;
if ((PAGE_SIZE-1) & (unsigned long)addr) {
WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
return;
}
area = remove_vm_area(addr);
if (unlikely(!area)) {
WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
addr);
return;
}
debug_check_no_locks_freed(addr, area->size);
debug_check_no_obj_freed(addr, area->size);
if (deallocate_pages) {
int i;
for (i = 0; i < area->nr_pages; i++) {
struct page *page = area->pages[i];
BUG_ON(!page);
__free_page(page);
}
if (area->flags & VM_VPAGES)
vfree(area->pages);
else
kfree(area->pages);
}
kfree(area);
return;
}
/**
* vfree - release memory allocated by vmalloc()
* @addr: memory base address
*
* Free the virtually continuous memory area starting at @addr, as
* obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
* NULL, no operation is performed.
*
* Must not be called in interrupt context.
*/
void vfree(const void *addr)
{
BUG_ON(in_interrupt());
kmemleak_free(addr);
__vunmap(addr, 1);
}
EXPORT_SYMBOL(vfree);
|
__vunmap()
主要进行如下操作(进行和分配顺序相反的操作):
- 通过
remove_vm_area()
清楚相应的页表项。 - 通过
__free_page()
释放分配的页。 - 通过
kfree()
释放描述符。
但这里面的细节比较复杂,我暂时不准备看了。