DevMem note

来自个人维基
2019年11月6日 (三) 12:18free6d1823讨论 | 贡献的版本

跳转至: 导航搜索

1.devmem ADDRESS [WIDTH [VALUE]]

Read/write from physical address
 
        ADDRESS Address to act upon
        WIDTH   Width (8/16/...)
        VALUE   Data to be written

2. devmem -> mmap

int devmem_main(int argc UNUSED_PARAM, char **argv)
{
      fd = xopen("/dev/mem", argv[3] ? (O_RDWR | O_SYNC) : (O_RDONLY | O_SYNC)); //根据第三个参数确定是以只读形式打开,还是以读写形式打开。 
      mapped_size = page_size = getpagesize(); //4K bytes
      //if cross page, mapped_size *=2 
      map_base = mmap(NULL,
            mapped_size, // 
            argv[3] ? (PROT_READ | PROT_WRITE) : PROT_READ,
            MAP_SHARED,
            fd,
            target & ~(off_t)(page_size - 1)//offset from 0x0000, page start address
          );
      virt_addr = (char*)map_base + offset_in_page;
}

3. mmap

#include <sys/mman.h>
 
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int munmap(void *addr, size_t length);

详细参数如下:

参数 详细说明
addr 需要映射的虚拟内存地址;如果为NULL,系统会自动选定。映射成功后返回该地址
length 需要映射多大的数据量. Mus be multiple of PAGE SIZE
prot 描述映射区域内存保护方式,包括:PROT_EXEC、PROT_READ、PROT_WRITE、PROT_NONE.
flags 描述映射区域的特性,比如是否对其他进程共享,是否建立匿名映射,是否创建私有的cow.
fd 要映射到内存中的文件描述符
offset 文件映射的偏移量

4. mmap to sys_mmap
Questions:
1. offset 如何变成是物理地址? offset 跟vma->pgoff 的关系?
2. vma->vm_start = addr ? 若 addr=NULL 如何给址?

系统调用的入口是entry_SYSCALL_64_fastpath,然后根据系统调用号在sys_call_table中找到对应的函数。
mmap()和munmap()对应的系统调用分别是SyS_mmap()和SyS_munmap()

arch/arc/kernel/sys.c

#define __SYSCALL(nr, call) [nr] = (call),
void *sys_call_table[NR_syscalls] = {
	[0 ... NR_syscalls-1] = sys_ni_syscall,
#include <asm/unistd.h>
};

include/uapiasm-generic/unistd.h

#define __NR_munmap 215
__SYSCALL(__NR_munmap, sys_munmap)
#define __NR_mremap 216
__SYSCALL(__NR_mremap, sys_mremap)
#define __NR_mmap 1058
__SYSCALL(__NR_mmap, sys_mmap)

Reference: https://www.cnblogs.com/sky-heaven/p/5689072.html
arch/arm64/kernel/sys.c

asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
			 unsigned long prot, unsigned long flags,
			 unsigned long fd, off_t off)
{
	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
}

cannot find sys_mmap_pgoff implementation. But let's see 另一个版本
mm/nommu.c

unsigned long do_mmap(struct file *file,
			unsigned long addr,
			unsigned long len,
			unsigned long prot,
			unsigned long flags,
			vm_flags_t vm_flags,
			unsigned long pgoff, //phical page number 
			unsigned long *populate)
{
	struct vm_area_struct *vma;
	struct vm_region *region;
	struct rb_node *rb;
	unsigned long capabilities, result;
	int ret;
 
	*populate = 0;
 
	/* decide whether we should attempt the mapping, and if so what sort of
	 * mapping */
	ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
				    &capabilities);
	if (ret < 0)
		return ret;
 
	/* we ignore the address hint */
	addr = 0; //答案 2, addr 无用
	len = PAGE_ALIGN(len);
 
	/* we've determined that we can make the mapping, now translate what we
	 * now know into VMA flags */
	vm_flags |= determine_vm_flags(file, prot, flags, capabilities);
 
	/* we're going to need to record the mapping */
	region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
	if (!region)
		goto error_getting_region;
 
	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
	if (!vma)
		goto error_getting_vma;
 
	region->vm_usage = 1;
	region->vm_flags = vm_flags;
	region->vm_pgoff = pgoff;
 
	INIT_LIST_HEAD(&vma->anon_vma_chain);
	vma->vm_flags = vm_flags;
	vma->vm_pgoff = pgoff; //问题1 答案: pgoff直接带入. Phy 位址不为 0的部份已被ignore
 
	if (file) {
		region->vm_file = get_file(file);
		vma->vm_file = get_file(file);//file 带入
	}
 
	down_write(&nommu_region_sem);
 
	/* 这里不管区域重叠的问题 if we want to share, we need to check for regions created by other
	 * mmap() calls that overlap with our proposed mapping
	 * - we can only share with a superset match on most regular files
	 * - shared mappings on character devices and memory backed files are
	 *   permitted to overlap inexactly as far as we are concerned for in
	 *   these cases, sharing is handled in the driver or filesystem rather
	 *   than here
	 */
	if (vm_flags & VM_MAYSHARE) {
		struct vm_region *pregion;
		unsigned long pglen, rpglen, pgend, rpgend, start;
 
		pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
		pgend = pgoff + pglen;
                //从无名区找一个vm_region的位址来当新的虚拟位址
		for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
			pregion = rb_entry(rb, struct vm_region, vm_rb);
 
			if (!(pregion->vm_flags & VM_MAYSHARE))
				continue;
 
			/* search for overlapping mappings on the same file */
			if (file_inode(pregion->vm_file) !=
			    file_inode(file))
				continue;
 
			if (pregion->vm_pgoff >= pgend)
				continue;
 
			rpglen = pregion->vm_end - pregion->vm_start;
			rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;//占用页数 已无条件进位
			rpgend = pregion->vm_pgoff + rpglen;
			if (pgoff >= rpgend)
				continue;
 
			/* handle inexactly overlapping matches between
			 * mappings */
			if ((pregion->vm_pgoff != pgoff || rpglen != pglen) &&
			    !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) {
				/* new mapping is not a subset of the region */
				if (!(capabilities & NOMMU_MAP_DIRECT))
					goto sharing_violation;
				continue;
			}
 
			/* we've found a region we can share */
			pregion->vm_usage++;
			vma->vm_region = pregion;
			start = pregion->vm_start;
			start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
			vma->vm_start = start;//答案2 start地址
			vma->vm_end = start + len;
 
			if (pregion->vm_flags & VM_MAPPED_COPY)
				vma->vm_flags |= VM_MAPPED_COPY;
			else {
				ret = do_mmap_shared_file(vma);
				if (ret < 0) {
					vma->vm_region = NULL;
					vma->vm_start = 0;
					vma->vm_end = 0;
					pregion->vm_usage--;
					pregion = NULL;
					goto error_just_free;
				}
			}
			fput(region->vm_file);
			kmem_cache_free(vm_region_jar, region);
			region = pregion;
			result = start;
			goto share;
		}
 
		/* obtain the address at which to make a shared mapping
		 * - this is the hook for quasi-memory character devices to
		 *   tell us the location of a shared mapping
		 */
		if (capabilities & NOMMU_MAP_DIRECT) {
			addr = file->f_op->get_unmapped_area(file, addr, len,
							     pgoff, flags);
			if (IS_ERR_VALUE(addr)) {
				ret = addr;
				if (ret != -ENOSYS)
					goto error_just_free;
 
				/* the driver refused to tell us where to site
				 * the mapping so we'll have to attempt to copy
				 * it */
				ret = -ENODEV;
				if (!(capabilities & NOMMU_MAP_COPY))
					goto error_just_free;
 
				capabilities &= ~NOMMU_MAP_DIRECT;
			} else {
				vma->vm_start = region->vm_start = addr;
				vma->vm_end = region->vm_end = addr + len;
			}
		}
	}
 
	vma->vm_region = region;
 
	/* set up the mapping
	 * - the region is filled in if NOMMU_MAP_DIRECT is still set
	 */
	if (file && vma->vm_flags & VM_SHARED)
		ret = do_mmap_shared_file(vma); //调用/dev/mem driver
	else
		ret = do_mmap_private(vma, region, len, capabilities);
	if (ret < 0)
		goto error_just_free;
	add_nommu_region(region);
 
	/* clear anonymous mappings that don't ask for uninitialized data */
	if (!vma->vm_file && !(flags & MAP_UNINITIALIZED))
		memset((void *)region->vm_start, 0,
		       region->vm_end - region->vm_start);
 
	/* okay... we have a mapping; now we have to register it */
	result = vma->vm_start;
 
	current->mm->total_vm += len >> PAGE_SHIFT;
 
share:
	add_vma_to_mm(current->mm, vma);
 
	/* we flush the region from the icache only when the first executable
	 * mapping of it is made  */
	if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
		flush_icache_range(region->vm_start, region->vm_end);
		region->vm_icache_flushed = true;
	}
 
	up_write(&nommu_region_sem);
 
	return result;
/* ignore code */
error_just_free:
error:
sharing_violation:
error_getting_vma:
error_getting_region:
	return -ENOMEM;
}

5. sys_mmap to dev/mem drive's mmap_mem

mm/nommu.c

static int do_mmap_shared_file(struct vm_area_struct *vma)
{
	ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
}

6./dev/mem driver in inux-kernel/drivers/char/mem.c

static int mmap_mem(struct file *file, struct vm_area_struct *vma)
{
    size_t size = vma->vm_end - vma->vm_start;
    if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))//没检查
          return -EINVAL;
    if (!private_mapping_ok(vma))//vm_flags must have VM_MAYSHARE
        return -ENOSYS;
 
    if (!range_is_allowed(vma->vm_pgoff, size))//check each page is allowed by calling devmem_is_allowed. ARCH_HAS_VALID_PHYS_ADDR_RANGE 
        return -EPERM;
 
    if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
                        &vma->vm_page_prot))//没检查
        return -EINVAL;
 
    vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
                         size,
                         vma->vm_page_prot);// 跟 cache 有关 先不管
 
    vma->vm_ops = &mmap_mem_ops;
 
    /* Remap-pfn-range will mark the range VM_IO */
    if (remap_pfn_range(vma,
                vma->vm_start, //Virt 开头位址 
                vma->vm_pgoff,
                size,
                vma->vm_page_prot)) {
        return -EAGAIN;
    }
    return 0;
}

前面检查完了 这边直接把实体对应虚拟位址指派给mm 一页一个mm
可以看到 实体与虚拟位址都是连续的
remap_pfn_range in ./mm/memory.c

remap_pfn_range(vma, //user vma to map to
                addr, //target user address
                pfn, //physic address of kernel memory. Page #?
                size, //size of map area
                prot //page protection flags for this mapping
               )
{
    vma->vm_pgoff = pfn;
    vma->flags |= VM_IO|VM_PFNMAP|VM_DONTEXPAD |VM_DONTDUMP;
    mm = vma->mm;
    do {
        //for each page, update mm
        next = pgd_addr_end(addr, end) 
        remap_pud_range(mm, pgd, addr, next, pfn+page#, prot)
        pgd++, addr = next,
    }while (addr != end)


Question:
如何确认offset 合理区域? length 可以有多大?

Reference:
https://blog.csdn.net/junllee/article/details/82146351