2012-05-25 146 views
11

假設緩衝區是使用基於頁面的方案分配的。實現mmap的一種方法是使用remap_pfn_range,但LDD3說這對傳統內存不起作用。看來我們可以通過使用SetPageReserved標記保留的頁面來解決這個問題,以便它在內存中被鎖定。但是,並非所有的內核內存已經不可交換,即已經保留了嗎?爲什麼需要明確設置保留位?如何將Linux內核緩衝區映射到用戶空間?

這是否與從HIGH_MEM分配的頁面有關?

+0

不知道這是否可以幫助,但據我所知,[逆足(HTTP ://lxr.free-electrons.com/source/tools/perf/design.txt)內核中的子系統提供了一組來自內核內存的頁面(一個環緩衝區,實際上)可以由用戶空間應用程序mmap。它的實現可能會提供一些關於你的問題的提示,可能值得看看它的源代碼。 – Eugene

回答

16

在mmap方法中,從內核映射一組頁面的最簡單方法是使用錯誤處理程序來映射頁面。基本上你最終的東西,如:

static int my_mmap(struct file *filp, struct vm_area_struct *vma) 
{ 
    vma->vm_ops = &my_vm_ops; 
    return 0; 
} 

static const struct file_operations my_fops = { 
    .owner = THIS_MODULE, 
    .open = nonseekable_open, 
    .mmap = my_mmap, 
    .llseek = no_llseek, 
}; 

(其中其他文件操作無論您需要的模塊)。同樣在my_mmap中,您需要進行任何範圍檢查等來驗證mmap參數。

然後vm_ops樣子:

static int my_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 
{ 
    vmf->page = my_page_at_index(vmf->pgoff); 
    get_page(vmf->page); 

    return 0; 
} 

static const struct vm_operations_struct my_vm_ops = { 
    .fault  = my_fault 
} 

,你只需要弄清楚一個給定VMA/VMF傳遞給你的錯功能映射的頁面到用戶空間。這取決於你的模塊是如何工作的。舉例來說,如果你沒有

my_buf = vmalloc_user(MY_BUF_SIZE); 

,那麼你使用的頁面會像

vmalloc_to_page(my_buf + (vmf->pgoff << PAGE_SHIFT)); 

但是你可以很容易地創建一個數組,併爲每個條目,使用kmalloc的,不管分配的頁面。

[只注意到my_fault是功能稍微有趣的名字]

+0

謝謝。這非常有幫助。不過,我們是否需要在故障處理程序中調用vm_insert_page?另外,誰將撤銷get_page以允許頁面稍後被釋放?我想,一旦用戶空間做了munmap,我們就可以從vma_close中得到一些代碼,我們可以在這個代碼中爲所有發生故障的頁面put_page。這是正確的方法嗎? – ravi

+2

不,如果您設置了vmf->頁面,則不需要執行vm_insert_page。如果你在映射非頁面支持的設備內存方面做了很多漂亮的工作,那麼你可能需要vm_insert_pfn(),但實際上你可能不需要擔心這一點。 put_page()在映射被拆除時由核心虛擬機代碼處理。真的,對於將內核內存映射到用戶空間的簡單驅動程序,我向你展示了幾乎所有你需要的東西。 – Roland

+0

你好。如果不可能vmalloc() - 吃my_buf緩衝區,那麼my_fault()方法的主體是什麼? (因爲太大)。按需分配一頁一頁的分配。 – user1284631

0

雖然網頁是通過內核驅動程序保留的,它是指通過用戶空間進行訪問。因此,PTE(頁表條目)不知道pfn是否屬於用戶空間或內核空間(即使它們是通過內核驅動程序分配的)。

這就是爲什麼他們被標記爲SetPageReserved

2

最小運行的例子,用戶態測試

Kernel module

#include <asm/uaccess.h> /* copy_from_user */ 
#include <linux/debugfs.h> 
#include <linux/fs.h> 
#include <linux/init.h> 
#include <linux/kernel.h> /* min */ 
#include <linux/mm.h> 
#include <linux/module.h> 
#include <linux/proc_fs.h> 
#include <linux/slab.h> 

static const char *filename = "lkmc_mmap"; 

enum { BUFFER_SIZE = 4 }; 

struct mmap_info { 
    char *data; 
}; 

/* After unmap. */ 
static void vm_close(struct vm_area_struct *vma) 
{ 
    pr_info("vm_close\n"); 
} 

/* First page access. */ 
static int vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 
{ 
    struct page *page; 
    struct mmap_info *info; 

    pr_info("vm_fault\n"); 
    info = (struct mmap_info *)vma->vm_private_data; 
    if (info->data) { 
     page = virt_to_page(info->data); 
     get_page(page); 
     vmf->page = page; 
    } 
    return 0; 
} 

/* Aftr mmap. TODO vs mmap, when can this happen at a different time than mmap? */ 
static void vm_open(struct vm_area_struct *vma) 
{ 
    pr_info("vm_open\n"); 
} 

static struct vm_operations_struct vm_ops = 
{ 
    .close = vm_close, 
    .fault = vm_fault, 
    .open = vm_open, 
}; 

static int mmap(struct file *filp, struct vm_area_struct *vma) 
{ 
    pr_info("mmap\n"); 
    vma->vm_ops = &vm_ops; 
    vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 
    vma->vm_private_data = filp->private_data; 
    vm_open(vma); 
    return 0; 
} 

static int open(struct inode *inode, struct file *filp) 
{ 
    struct mmap_info *info; 

    pr_info("open\n"); 
    info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL); 
    pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info)); 
    info->data = (char *)get_zeroed_page(GFP_KERNEL); 
    memcpy(info->data, "asdf", BUFFER_SIZE); 
    filp->private_data = info; 
    return 0; 
} 

static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off) 
{ 
    struct mmap_info *info; 
    int ret; 

    pr_info("read\n"); 
    info = filp->private_data; 
    ret = min(len, (size_t)BUFFER_SIZE); 
    if (copy_to_user(buf, info->data, ret)) { 
     ret = -EFAULT; 
    } 
    return ret; 
} 

static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off) 
{ 
    struct mmap_info *info; 

    pr_info("write\n"); 
    info = filp->private_data; 
    if (copy_from_user(info->data, buf, min(len, (size_t)BUFFER_SIZE))) { 
     return -EFAULT; 
    } else { 
     return len; 
    } 
} 

static int release(struct inode *inode, struct file *filp) 
{ 
    struct mmap_info *info; 

    pr_info("release\n"); 
    info = filp->private_data; 
    free_page((unsigned long)info->data); 
    kfree(info); 
    filp->private_data = NULL; 
    return 0; 
} 

static const struct file_operations fops = { 
    .mmap = mmap, 
    .open = open, 
    .release = release, 
    .read = read, 
    .write = write, 
}; 

static int myinit(void) 
{ 
    proc_create(filename, 0, NULL, &fops); 
    return 0; 
} 

static void myexit(void) 
{ 
    remove_proc_entry(filename, NULL); 
} 

module_init(myinit) 
module_exit(myexit) 
MODULE_LICENSE("GPL"); 

Userland test

#define _XOPEN_SOURCE 700 
#include <assert.h> 
#include <fcntl.h> 
#include <stdio.h> 
#include <stdlib.h> 
#include <stdint.h> /* uintmax_t */ 
#include <string.h> 
#include <sys/mman.h> 
#include <unistd.h> /* sysconf */ 

#include "common.h" /* virt_to_phys_user */ 

enum { BUFFER_SIZE = 4 }; 

int main(int argc, char **argv) 
{ 
    int fd; 
    long page_size; 
    char *address1, *address2; 
    char buf[BUFFER_SIZE]; 
    uintptr_t paddr; 

    if (argc < 2) { 
     printf("Usage: %s <mmap_file>\n", argv[0]); 
     return EXIT_FAILURE; 
    } 
    page_size = sysconf(_SC_PAGE_SIZE); 
    printf("open pathname = %s\n", argv[1]); 
    fd = open(argv[1], O_RDWR | O_SYNC); 
    if (fd < 0) { 
     perror("open"); 
     assert(0); 
    } 
    printf("fd = %d\n", fd); 

    /* mmap twice for double fun. */ 
    puts("mmap 1"); 
    address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 
    if (address1 == MAP_FAILED) { 
     perror("mmap"); 
     assert(0); 
    } 
    puts("mmap 2"); 
    address2 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 
    if (address2 == MAP_FAILED) { 
     perror("mmap"); 
     return EXIT_FAILURE; 
    } 
    assert(address1 != address2); 

    /* Read and modify memory. */ 
    puts("access 1"); 
    assert(!strcmp(address1, "asdf")); 
    /* vm_fault */ 
    puts("access 2"); 
    assert(!strcmp(address2, "asdf")); 
    /* vm_fault */ 
    strcpy(address1, "qwer"); 
    /* Also modified. So both virtual addresses point to the same physical address. */ 
    assert(!strcmp(address2, "qwer")); 

    /* Check that the physical addresses are the same. 
    * They are, but TODO why virt_to_phys on kernel gives a different value? */ 
    assert(!virt_to_phys_user(&paddr, getpid(), (uintptr_t)address1)); 
    printf("paddr1 = 0x%jx\n", (uintmax_t)paddr); 
    assert(!virt_to_phys_user(&paddr, getpid(), (uintptr_t)address2)); 
    printf("paddr2 = 0x%jx\n", (uintmax_t)paddr); 

    /* Check that modifications made from userland are also visible from the kernel. */ 
    read(fd, buf, BUFFER_SIZE); 
    assert(!memcmp(buf, "qwer", BUFFER_SIZE)); 

    /* Modify the data from the kernel, and check that the change is visible from userland. */ 
    write(fd, "zxcv", 4); 
    assert(!strcmp(address1, "zxcv")); 
    assert(!strcmp(address2, "zxcv")); 

    /* Cleanup. */ 
    puts("munmap 1"); 
    if (munmap(address1, page_size)) { 
     perror("munmap"); 
     assert(0); 
    } 
    puts("munmap 2"); 
    if (munmap(address2, page_size)) { 
     perror("munmap"); 
     assert(0); 
    } 
    puts("close"); 
    close(fd); 
    return EXIT_SUCCESS; 
} 
相關問題