“DevMem note”的版本间的差异

来自个人维基
跳转至: 导航搜索
第336行: 第336行:
 
         return -ENOSYS;
 
         return -ENOSYS;
  
     if (!range_is_allowed(vma->vm_pgoff, size))//6.2 check each page is allowed by calling devmem_is_allowed. ARCH_HAS_VALID_PHYS_ADDR_RANGE
+
     if (!range_is_allowed(vma->vm_pgoff, size))//6.2
 
         return -EPERM;
 
         return -EPERM;
  
第345行: 第345行:
 
     vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
 
     vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
 
                         size,
 
                         size,
                         vma->vm_page_prot);// 6.4 跟 cache 有关 先不管
+
                         vma->vm_page_prot);// 6.4 跟 cache 有关  
 
+
 
     vma->vm_ops = &mmap_mem_ops;
 
     vma->vm_ops = &mmap_mem_ops;
  
第442行: 第441行:
 
}
 
}
 
</source>
 
</source>
 
+
lomemory  由 max_pfn定义 或是在 memory 的memblock_region 
 
<source lang="c">
 
<source lang="c">
 
int page_is_ram(unsigned long pfn)
 
int page_is_ram(unsigned long pfn)
第468行: 第467行:
 
</source>
 
</source>
 
max_pfn代表了内核lowmem的页个数,lowmem在内核下静态线性映射。系统启动之初完毕映射之后不会修改。读写效率高。内核代码都是跑在lowmem。lowmem大小我们能够通过cmdline的“mem=”来指定。
 
max_pfn代表了内核lowmem的页个数,lowmem在内核下静态线性映射。系统启动之初完毕映射之后不会修改。读写效率高。内核代码都是跑在lowmem。lowmem大小我们能够通过cmdline的“mem=”来指定。
 +
 +
CONFIG_STRICT_DEVMEM  定义在 arch/arm/Kconfig.debug
 +
<source lang="c">
 +
config STRICT_DEVMEM
 +
    def_bool y
 +
    prompt "Filter access to /dev/mem"
 +
    help
 +
      This option restricts access to /dev/mem.  If this option is
 +
      disabled, you allow userspace access to all memory, including
 +
      kernel and userspace memory. Accidental memory access is likely
 +
      to be disastrous.
 +
      Memory access is required for experts who want to debug the kernel.
 +
</source>
 
6.3
 
6.3
phys_mem_access_prot_allowed
+
phys_mem_access_prot_allowed 实现为空返回1,没有影响。
 
6.4
 
6.4
 
phys_mem_access_prot
 
phys_mem_access_prot
 
+
<source lang="c">
      
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 +
                  unsigned long size, pgprot_t vma_prot)
 +
{
 +
     //假设有平台实现的phys_mem_access_prot,则调用之。
 +
    if (ppc_md.phys_mem_access_prot)
 +
        return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
 +
    //对于不是lowmem范围内的物理地址。权限设置为uncached。
 +
    if (!page_is_ram(pfn))
 +
        vma_prot = pgprot_noncached(vma_prot);
 +
    return vma_prot;
 +
}
 +
</source>
  
  
 
Question:
 
Question:
 
如何确认offset 合理区域? length 可以有多大?
 
如何确认offset 合理区域? length 可以有多大?
 +
Answer:
 +
所以假设打开CONFIG_STRICT_DEVMEM,mem驱动会对mmap要映射的物理地址进行范围和位置的检查然后才进行映射。检查条件例如以下:
 +
(1)映射范围不能超过4G。
 +
(2)该物理地址所在iomem不能exclusive.
 +
(3)该物理地址不能处在lowmem中。
  
 
Reference:
 
Reference:
 
https://blog.csdn.net/junllee/article/details/82146351
 
https://blog.csdn.net/junllee/article/details/82146351

2019年11月6日 (三) 17:47的版本

1.devmem ADDRESS [WIDTH [VALUE]]

Read/write from physical address
 
        ADDRESS Address to act upon
        WIDTH   Width (8/16/...)
        VALUE   Data to be written

2. devmem -> mmap

int devmem_main(int argc UNUSED_PARAM, char **argv)
{
      fd = xopen("/dev/mem", argv[3] ? (O_RDWR | O_SYNC) : (O_RDONLY | O_SYNC)); //根据第三个参数确定是以只读形式打开,还是以读写形式打开。 
      mapped_size = page_size = getpagesize(); //4K bytes
      //if cross page, mapped_size *=2 
      map_base = mmap(NULL,
            mapped_size, // 
            argv[3] ? (PROT_READ | PROT_WRITE) : PROT_READ,
            MAP_SHARED,
            fd,
            target & ~(off_t)(page_size - 1)//offset from 0x0000, page start address
          );
      virt_addr = (char*)map_base + offset_in_page;
}

3. mmap

#include <sys/mman.h>
 
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int munmap(void *addr, size_t length);

详细参数如下:

参数 详细说明
addr 需要映射的虚拟内存地址;如果为NULL,系统会自动选定。映射成功后返回该地址
length 需要映射多大的数据量. Mus be multiple of PAGE SIZE
prot 描述映射区域内存保护方式,包括:PROT_EXEC、PROT_READ、PROT_WRITE、PROT_NONE.
flags 描述映射区域的特性,比如是否对其他进程共享,是否建立匿名映射,是否创建私有的cow.
fd 要映射到内存中的文件描述符
offset 文件映射的偏移量

4. mmap to sys_mmap
Questions:
1. offset 如何变成是物理地址? offset 跟vma->pgoff 的关系?
2. vma->vm_start = addr ? 若 addr=NULL 如何给址?

系统调用的入口是entry_SYSCALL_64_fastpath,然后根据系统调用号在sys_call_table中找到对应的函数。
mmap()和munmap()对应的系统调用分别是SyS_mmap()和SyS_munmap()

arch/arc/kernel/sys.c

#define __SYSCALL(nr, call) [nr] = (call),
void *sys_call_table[NR_syscalls] = {
	[0 ... NR_syscalls-1] = sys_ni_syscall,
#include <asm/unistd.h>
};

include/uapiasm-generic/unistd.h

#define __NR_munmap 215
__SYSCALL(__NR_munmap, sys_munmap)
#define __NR_mremap 216
__SYSCALL(__NR_mremap, sys_mremap)
#define __NR_mmap 1058
__SYSCALL(__NR_mmap, sys_mmap)

Reference: https://www.cnblogs.com/sky-heaven/p/5689072.html
arch/arm64/kernel/sys.c

asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
			 unsigned long prot, unsigned long flags,
			 unsigned long fd, off_t off)
{
	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
}

mm/mmap.c

SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
		unsigned long, prot, unsigned long, flags,
		unsigned long, fd, unsigned long, pgoff)
{
  retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
}
 
unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
	unsigned long len, unsigned long prot,
	unsigned long flag, unsigned long pgoff)
{
		ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
				    &populate);
 
}
static inline unsigned long
do_mmap_pgoff(struct file *file, unsigned long addr,
	unsigned long len, unsigned long prot, unsigned long flags,
	unsigned long pgoff, unsigned long *populate)
{
	return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
}

mm/nommu.c

unsigned long do_mmap(struct file *file,
			unsigned long addr,
			unsigned long len,
			unsigned long prot,
			unsigned long flags,
			vm_flags_t vm_flags,
			unsigned long pgoff, //phical page number 
			unsigned long *populate)
{
	struct vm_area_struct *vma;
	struct vm_region *region;
	struct rb_node *rb;
	unsigned long capabilities, result;
	int ret;
 
	*populate = 0;
 
	/* decide whether we should attempt the mapping, and if so what sort of
	 * mapping */
	ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
				    &capabilities);
	if (ret < 0)
		return ret;
 
	/* we ignore the address hint */
	addr = 0; //答案 2, addr 无用
	len = PAGE_ALIGN(len);
 
	/* we've determined that we can make the mapping, now translate what we
	 * now know into VMA flags */
	vm_flags |= determine_vm_flags(file, prot, flags, capabilities);
 
	/* we're going to need to record the mapping */
	region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
	if (!region)
		goto error_getting_region;
 
	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
	if (!vma)
		goto error_getting_vma;
 
	region->vm_usage = 1;
	region->vm_flags = vm_flags;
	region->vm_pgoff = pgoff;
 
	INIT_LIST_HEAD(&vma->anon_vma_chain);
	vma->vm_flags = vm_flags;
	vma->vm_pgoff = pgoff; //问题1 答案: pgoff直接带入. Phy 位址不为 0的部份已被ignore
 
	if (file) {
		region->vm_file = get_file(file);
		vma->vm_file = get_file(file);//file 带入
	}
 
	down_write(&nommu_region_sem);
 
	/* 这里不管区域重叠的问题 if we want to share, we need to check for regions created by other
	 * mmap() calls that overlap with our proposed mapping
	 * - we can only share with a superset match on most regular files
	 * - shared mappings on character devices and memory backed files are
	 *   permitted to overlap inexactly as far as we are concerned for in
	 *   these cases, sharing is handled in the driver or filesystem rather
	 *   than here
	 */
	if (vm_flags & VM_MAYSHARE) {
		struct vm_region *pregion;
		unsigned long pglen, rpglen, pgend, rpgend, start;
 
		pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
		pgend = pgoff + pglen;
                //从无名区找一个vm_region的位址来当新的虚拟位址
		for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
			pregion = rb_entry(rb, struct vm_region, vm_rb);
 
			if (!(pregion->vm_flags & VM_MAYSHARE))
				continue;
 
			/* search for overlapping mappings on the same file */
			if (file_inode(pregion->vm_file) !=
			    file_inode(file))
				continue;
 
			if (pregion->vm_pgoff >= pgend)
				continue;
 
			rpglen = pregion->vm_end - pregion->vm_start;
			rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;//占用页数 已无条件进位
			rpgend = pregion->vm_pgoff + rpglen;
			if (pgoff >= rpgend)
				continue;
 
			/* handle inexactly overlapping matches between
			 * mappings */
			if ((pregion->vm_pgoff != pgoff || rpglen != pglen) &&
			    !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) {
				/* new mapping is not a subset of the region */
				if (!(capabilities & NOMMU_MAP_DIRECT))
					goto sharing_violation;
				continue;
			}
 
			/* we've found a region we can share */
			pregion->vm_usage++;
			vma->vm_region = pregion;
			start = pregion->vm_start;
			start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
			vma->vm_start = start;//答案2 start地址
			vma->vm_end = start + len;
 
			if (pregion->vm_flags & VM_MAPPED_COPY)
				vma->vm_flags |= VM_MAPPED_COPY;
			else {
				ret = do_mmap_shared_file(vma);
				if (ret < 0) {
					vma->vm_region = NULL;
					vma->vm_start = 0;
					vma->vm_end = 0;
					pregion->vm_usage--;
					pregion = NULL;
					goto error_just_free;
				}
			}
			fput(region->vm_file);
			kmem_cache_free(vm_region_jar, region);
			region = pregion;
			result = start;
			goto share;
		}
 
		/* obtain the address at which to make a shared mapping
		 * - this is the hook for quasi-memory character devices to
		 *   tell us the location of a shared mapping
		 */
		if (capabilities & NOMMU_MAP_DIRECT) {
			addr = file->f_op->get_unmapped_area(file, addr, len,
							     pgoff, flags);
			if (IS_ERR_VALUE(addr)) {
				ret = addr;
				if (ret != -ENOSYS)
					goto error_just_free;
 
				/* the driver refused to tell us where to site
				 * the mapping so we'll have to attempt to copy
				 * it */
				ret = -ENODEV;
				if (!(capabilities & NOMMU_MAP_COPY))
					goto error_just_free;
 
				capabilities &= ~NOMMU_MAP_DIRECT;
			} else {
				vma->vm_start = region->vm_start = addr;
				vma->vm_end = region->vm_end = addr + len;
			}
		}
	}
 
	vma->vm_region = region;
 
	/* set up the mapping
	 * - the region is filled in if NOMMU_MAP_DIRECT is still set
	 */
	if (file && vma->vm_flags & VM_SHARED)
		ret = do_mmap_shared_file(vma); //调用/dev/mem driver
	else
		ret = do_mmap_private(vma, region, len, capabilities);
	if (ret < 0)
		goto error_just_free;
	add_nommu_region(region);
 
	/* clear anonymous mappings that don't ask for uninitialized data */
	if (!vma->vm_file && !(flags & MAP_UNINITIALIZED))
		memset((void *)region->vm_start, 0,
		       region->vm_end - region->vm_start);
 
	/* okay... we have a mapping; now we have to register it */
	result = vma->vm_start;
 
	current->mm->total_vm += len >> PAGE_SHIFT;
 
share:
	add_vma_to_mm(current->mm, vma);
 
	/* we flush the region from the icache only when the first executable
	 * mapping of it is made  */
	if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
		flush_icache_range(region->vm_start, region->vm_end);
		region->vm_icache_flushed = true;
	}
 
	up_write(&nommu_region_sem);
 
	return result;
/* ignore code */
error_just_free:
error:
sharing_violation:
error_getting_vma:
error_getting_region:
	return -ENOMEM;
}

5. sys_mmap to dev/mem drive's mmap_mem

mm/nommu.c

static int do_mmap_shared_file(struct vm_area_struct *vma)
{
	ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
}

6./dev/mem driver in inux-kernel/drivers/char/mem.c

static int mmap_mem(struct file *file, struct vm_area_struct *vma)
{
    size_t size = vma->vm_end - vma->vm_start;
    if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))//6.1
          return -EINVAL;
    if (!private_mapping_ok(vma))//vm_flags must have VM_MAYSHARE
        return -ENOSYS;
 
    if (!range_is_allowed(vma->vm_pgoff, size))//6.2  
        return -EPERM;
 
    if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
                        &vma->vm_page_prot))//6.3 没检查
        return -EINVAL;
 
    vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
                         size,
                         vma->vm_page_prot);// 6.4 跟 cache 有关 
    vma->vm_ops = &mmap_mem_ops;
 
    /* Remap-pfn-range will mark the range VM_IO */
    if (remap_pfn_range(vma,
                vma->vm_start, //Virt 开头位址 
                vma->vm_pgoff,
                size,
                vma->vm_page_prot)) {
        return -EAGAIN;
    }
    return 0;
}

前面检查完了 这边直接把实体对应虚拟位址指派给mm 一页一个mm
可以看到 实体与虚拟位址都是连续的
remap_pfn_range in ./mm/memory.c

remap_pfn_range(vma, //user vma to map to
                addr, //target user address
                pfn, //physic address of kernel memory. Page #?
                size, //size of map area
                prot //page protection flags for this mapping
               )
{
    vma->vm_pgoff = pfn;
    vma->flags |= VM_IO|VM_PFNMAP|VM_DONTEXPAD |VM_DONTDUMP;
    mm = vma->mm;
    do {
        //for each page, update mm
        next = pgd_addr_end(addr, end) 
        remap_pud_range(mm, pgd, addr, next, pfn+page#, prot)
        pgd++, addr = next,
    }while (addr != end)

6.1
valid_mmap_phys_addr_range
该函数确定mmap的范围是否超过4G,超过4G则为无效物理地址,这样的情况用户空间一般不会出现。

int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
 
{
 
    return !(pfn + (size >> PAGE_SHIFT) > 0x00100000);
 
}

6.2 range_is_allowed(vma->vm_pgoff, size)

#ifdef CONFIG_STRICT_DEVMEM
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
    // 检查每个page的phy 地址是否允许    
        if (!devmem_is_allowed(pfn)) {
}
 
#else
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
    return 1;
}
#endif

devmem_is_allowed 在arch/arm/mm/mmap.c
disallowing access to system RAM and device-exclusive MMIO regions.

int devmem_is_allowed(unsigned long pfn)
{
    if (iomem_is_exclusive(pfn << PAGE_SHIFT))
        return 0;
    if (!page_is_ram(pfn))
        return 1;
    return 0;
}
 
 
/*
 * check if an address is reserved in the iomem resource tree
 * returns 1 if reserved, 0 if not reserved.
 */
int iomem_is_exclusive(u64 addr)
{
    struct resource *p = &iomem_resource;
    for (p = p->child; p ; p = r_next(NULL, p, &l)) {
        if (p->flags & IORESOURCE_BUSY &&
             p->flags & IORESOURCE_EXCLUSIVE) {
            err = 1;
            break;
        }
    }
 
}

lomemory 由 max_pfn定义 或是在 memory 的memblock_region

int page_is_ram(unsigned long pfn)
{
 
#ifndef CONFIG_PPC64    /* XXX for now */
    return pfn < max_pfn;
#else
    unsigned long paddr = (pfn << PAGE_SHIFT);
    struct memblock_region *reg;
    for_each_memblock(memory, reg)
        if (paddr >= reg->base && paddr < (reg->base + reg->size))
            return 1;
    return 0;
#endif
}

max_pfn赋值在在do_init_bootmem中。例如以下.

void __init do_init_bootmem(void)
{
    max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
}

max_pfn代表了内核lowmem的页个数,lowmem在内核下静态线性映射。系统启动之初完毕映射之后不会修改。读写效率高。内核代码都是跑在lowmem。lowmem大小我们能够通过cmdline的“mem=”来指定。

CONFIG_STRICT_DEVMEM 定义在 arch/arm/Kconfig.debug

config STRICT_DEVMEM
    def_bool y
    prompt "Filter access to /dev/mem"
    help
      This option restricts access to /dev/mem.  If this option is
      disabled, you allow userspace access to all memory, including
      kernel and userspace memory. Accidental memory access is likely
      to be disastrous.
      Memory access is required for experts who want to debug the kernel.

6.3
phys_mem_access_prot_allowed 实现为空返回1,没有影响。
6.4
phys_mem_access_prot

pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
                  unsigned long size, pgprot_t vma_prot)
{
    //假设有平台实现的phys_mem_access_prot,则调用之。
    if (ppc_md.phys_mem_access_prot)
        return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
    //对于不是lowmem范围内的物理地址。权限设置为uncached。
    if (!page_is_ram(pfn))
        vma_prot = pgprot_noncached(vma_prot);
    return vma_prot;
}


Question:
如何确认offset 合理区域? length 可以有多大?
Answer:
所以假设打开CONFIG_STRICT_DEVMEM,mem驱动会对mmap要映射的物理地址进行范围和位置的检查然后才进行映射。检查条件例如以下:
(1)映射范围不能超过4G。
(2)该物理地址所在iomem不能exclusive.
(3)该物理地址不能处在lowmem中。

Reference:
https://blog.csdn.net/junllee/article/details/82146351