最近在看内核源码过程中,需要根据进程的虚拟地址,获取page结构体,经过资料查阅,发现了get_user_pages这个函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
/*
* get_user_pages() - pin user pages in memory
* @tsk: the task_struct to use for page fault accounting, or
* NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
* @write: whether pages will be written to by the caller
* @force: whether to force write access even if user mapping is
* readonly. This will result in the page being COWed even
* in MAP_SHARED mappings. You do not want this.
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long. Or NULL, if caller
* only intends to ensure the pages are faulted in.
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
*
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
* were pinned, returns -errno. Each page returned must be released
* with a put_page() call when it is finished with. vmas will only
* remain valid while mmap_sem is held.
*
* Must be called with mmap_sem held for read or write.
*
* get_user_pages walks a process's page tables and takes a reference to
* each struct page that each user address corresponds to at a given
* instant. That is, it takes the page that would be accessed if a user
* thread accesses the given user virtual address at that instant.
*
* This does not guarantee that the page exists in the user mappings when
* get_user_pages returns, and there may even be a completely different
* page there in some cases (eg. if mmapped pagecache has been invalidated
* and subsequently re faulted). However it does guarantee that the page
* won't be freed completely. And mostly callers simply care that the page
* contains data that was valid *at some point in time*. Typically, an IO
* or similar operation cannot guarantee anything stronger anyway because
* locks can't be held over the syscall boundary.
*
* If write=0, the page must not be written to. If the page is written to,
* set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
* after the page is finished with, and before put_page is called.
*
* get_user_pages is typically used for fewer-copy IO operations, to get a
* handle on the memory by some means other than accesses via the user virtual
* addresses. The pages may be submitted for DMA to devices or accessed via
* their kernel linear mapping (via the kmap APIs). Care should be taken to
* use the correct cache flushing APIs.
*
* See also get_user_pages_fast, for performance critical applications.
*/
long get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages, int write,
int force, struct page **pages, struct vm_area_struct **vmas)
{
int flags = FOLL_TOUCH;

if (pages)
flags |= FOLL_GET;
if (write)
flags |= FOLL_WRITE;
if (force)
flags |= FOLL_FORCE;

return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas,
NULL);
}

函数各参数的详细说明在代码注释中已经写的很清楚,下面给出一个demo来看看这个函数的具体用法吧(建议读者先看下linux kernel模块化编程入门这篇文章)。

get_user_pages()能获取用户区进程使用内存的某个页(struct page),然后可以在内核区通过kmap_atomic(), kmap()等函数映射到内核区线性地址,从而可以在内核区向其写入数据。

内核程序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/device.h>
#include <asm/uaccess.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mm.h>
static struct class *sample_class;
static int sample_open(struct inode *inode, struct file *file)
{
printk(KERN_INFO "%s\n", __FUNCTION__);
return (0);
}
static int sample_release(struct inode *inode, struct file *file)
{
printk(KERN_INFO "%s\n", __FUNCTION__);
return (0);
}
static ssize_t sample_write(struct file *file, const char __user *buf, size_t count, loff_t *off)
{
int res;
struct page *page;
char *myaddr;
printk(KERN_INFO "%s\n", __FUNCTION__);
down_read(&current->mm->mmap_sem);
res = get_user_pages(current, current->mm,
(unsigned long)buf,
1,
1,
0,
&page,
NULL);
if (res) {
printk(KERN_INFO "Got mmaped.\n");
myaddr = kmap(page);
printk(KERN_INFO "%s\n", myaddr);
strcpy(myaddr, "Mohan");
page_cache_release(page);
}
up_read(&current->mm->mmap_sem);
return (0);
}
static struct file_operations sample_ops = {
.owner = THIS_MODULE,
.open = sample_open,
.release = sample_release,
.write = sample_write
};
static int __init sample_init(void)
{
int ret;
ret = register_chrdev(42, "Sample", &sample_ops);
sample_class = class_create(THIS_MODULE, "Sample");
device_create(sample_class, NULL, MKDEV(42, 0), NULL, "Sample");
return (ret);
}
static void __exit sample_exit(void)
{
device_destroy(sample_class, MKDEV(42, 0));
class_destroy(sample_class);
unregister_chrdev(42, "Sample");
}
module_init(sample_init);
module_exit(sample_exit);
MODULE_LICENSE("GPL");

Makefile文件

1
2
3
4
5
6
7
8
9
obj-m += sample.o
KDIR := /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)

all:
$(MAKE) -C $(KDIR) M=$(PWD) modules

clean:
$(MAKE) -C $(KDIR) M=$(PWD) clean

测试程序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>

int
main()
{
int fd;
char *ptr;
fd = open("/dev/Sample", O_RDWR);
if (fd < 0) {
perror("error");
}
posix_memalign((void **)&ptr, 4096, 4096);
memcpy(ptr, "krishna", strlen("krishna")); //Write String to Driver
write(fd, ptr, 4096);
printf("data is %s\n", ptr); //Read Data from Driver
close(fd);
}


参考资料:

  1. chinaunix
  2. stackoverflow
  3. krishnamohanlinux