“DevMem note”的版本间的差异
free6d1823(讨论 | 贡献) |
free6d1823(讨论 | 贡献) |
||
第442行: | 第442行: | ||
} | } | ||
</source> | </source> | ||
− | |||
− | |||
− | |||
− | + | <source lang="c"> | |
− | + | int page_is_ram(unsigned long pfn) | |
+ | { | ||
− | + | #ifndef CONFIG_PPC64 /* XXX for now */ | |
− | + | return pfn < max_pfn; | |
− | + | #else | |
+ | unsigned long paddr = (pfn << PAGE_SHIFT); | ||
+ | struct memblock_region *reg; | ||
+ | for_each_memblock(memory, reg) | ||
+ | if (paddr >= reg->base && paddr < (reg->base + reg->size)) | ||
+ | return 1; | ||
+ | return 0; | ||
+ | #endif | ||
+ | } | ||
+ | </source> | ||
+ | max_pfn赋值在在do_init_bootmem中。例如以下. | ||
+ | <source lang="c"> | ||
− | + | void __init do_init_bootmem(void) | |
− | + | { | |
− | + | max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; | |
+ | } | ||
+ | </source> | ||
+ | max_pfn代表了内核lowmem的页个数,lowmem在内核下静态线性映射。系统启动之初完毕映射之后不会修改。读写效率高。内核代码都是跑在lowmem。lowmem大小我们能够通过cmdline的“mem=”来指定。 | ||
+ | 6.3 | ||
+ | phys_mem_access_prot_allowed | ||
+ | 6.4 | ||
+ | phys_mem_access_prot | ||
2019年11月6日 (三) 16:03的版本
1.devmem ADDRESS [WIDTH [VALUE]]
Read/write from physical address ADDRESS Address to act upon WIDTH Width (8/16/...) VALUE Data to be written
2. devmem -> mmap
int devmem_main(int argc UNUSED_PARAM, char **argv) { fd = xopen("/dev/mem", argv[3] ? (O_RDWR | O_SYNC) : (O_RDONLY | O_SYNC)); //根据第三个参数确定是以只读形式打开,还是以读写形式打开。 mapped_size = page_size = getpagesize(); //4K bytes //if cross page, mapped_size *=2 map_base = mmap(NULL, mapped_size, // argv[3] ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, target & ~(off_t)(page_size - 1)//offset from 0x0000, page start address ); virt_addr = (char*)map_base + offset_in_page; }
3. mmap
#include <sys/mman.h> void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); int munmap(void *addr, size_t length);
详细参数如下:
参数 | 详细说明 |
---|---|
addr | 需要映射的虚拟内存地址;如果为NULL,系统会自动选定。映射成功后返回该地址 |
length | 需要映射多大的数据量. Mus be multiple of PAGE SIZE |
prot | 描述映射区域内存保护方式,包括:PROT_EXEC、PROT_READ、PROT_WRITE、PROT_NONE. |
flags | 描述映射区域的特性,比如是否对其他进程共享,是否建立匿名映射,是否创建私有的cow. |
fd | 要映射到内存中的文件描述符 |
offset | 文件映射的偏移量 |
4. mmap to sys_mmap
Questions:
1. offset 如何变成是物理地址? offset 跟vma->pgoff 的关系?
2. vma->vm_start = addr ? 若 addr=NULL 如何给址?
系统调用的入口是entry_SYSCALL_64_fastpath,然后根据系统调用号在sys_call_table中找到对应的函数。
mmap()和munmap()对应的系统调用分别是SyS_mmap()和SyS_munmap()
arch/arc/kernel/sys.c
#define __SYSCALL(nr, call) [nr] = (call), void *sys_call_table[NR_syscalls] = { [0 ... NR_syscalls-1] = sys_ni_syscall, #include <asm/unistd.h> };
include/uapiasm-generic/unistd.h
#define __NR_munmap 215 __SYSCALL(__NR_munmap, sys_munmap) #define __NR_mremap 216 __SYSCALL(__NR_mremap, sys_mremap) #define __NR_mmap 1058 __SYSCALL(__NR_mmap, sys_mmap)
Reference: https://www.cnblogs.com/sky-heaven/p/5689072.html
arch/arm64/kernel/sys.c
asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, off_t off) { return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); }
mm/mmap.c
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) { retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); } unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, &populate); } static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate) { return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate); }
mm/nommu.c
unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, //phical page number unsigned long *populate) { struct vm_area_struct *vma; struct vm_region *region; struct rb_node *rb; unsigned long capabilities, result; int ret; *populate = 0; /* decide whether we should attempt the mapping, and if so what sort of * mapping */ ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, &capabilities); if (ret < 0) return ret; /* we ignore the address hint */ addr = 0; //答案 2, addr 无用 len = PAGE_ALIGN(len); /* we've determined that we can make the mapping, now translate what we * now know into VMA flags */ vm_flags |= determine_vm_flags(file, prot, flags, capabilities); /* we're going to need to record the mapping */ region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); if (!region) goto error_getting_region; vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) goto error_getting_vma; region->vm_usage = 1; region->vm_flags = vm_flags; region->vm_pgoff = pgoff; INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_flags = vm_flags; vma->vm_pgoff = pgoff; //问题1 答案: pgoff直接带入. Phy 位址不为 0的部份已被ignore if (file) { region->vm_file = get_file(file); vma->vm_file = get_file(file);//file 带入 } down_write(&nommu_region_sem); /* 这里不管区域重叠的问题 if we want to share, we need to check for regions created by other * mmap() calls that overlap with our proposed mapping * - we can only share with a superset match on most regular files * - shared mappings on character devices and memory backed files are * permitted to overlap inexactly as far as we are concerned for in * these cases, sharing is handled in the driver or filesystem rather * than here */ if (vm_flags & VM_MAYSHARE) { struct vm_region *pregion; unsigned long pglen, rpglen, pgend, rpgend, start; pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; pgend = pgoff + pglen; //从无名区找一个vm_region的位址来当新的虚拟位址 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { pregion = rb_entry(rb, struct vm_region, vm_rb); if (!(pregion->vm_flags & VM_MAYSHARE)) continue; /* search for overlapping mappings on the same file */ if (file_inode(pregion->vm_file) != file_inode(file)) continue; if (pregion->vm_pgoff >= pgend) continue; rpglen = pregion->vm_end - pregion->vm_start; rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;//占用页数 已无条件进位 rpgend = pregion->vm_pgoff + rpglen; if (pgoff >= rpgend) continue; /* handle inexactly overlapping matches between * mappings */ if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { /* new mapping is not a subset of the region */ if (!(capabilities & NOMMU_MAP_DIRECT)) goto sharing_violation; continue; } /* we've found a region we can share */ pregion->vm_usage++; vma->vm_region = pregion; start = pregion->vm_start; start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; vma->vm_start = start;//答案2 start地址 vma->vm_end = start + len; if (pregion->vm_flags & VM_MAPPED_COPY) vma->vm_flags |= VM_MAPPED_COPY; else { ret = do_mmap_shared_file(vma); if (ret < 0) { vma->vm_region = NULL; vma->vm_start = 0; vma->vm_end = 0; pregion->vm_usage--; pregion = NULL; goto error_just_free; } } fput(region->vm_file); kmem_cache_free(vm_region_jar, region); region = pregion; result = start; goto share; } /* obtain the address at which to make a shared mapping * - this is the hook for quasi-memory character devices to * tell us the location of a shared mapping */ if (capabilities & NOMMU_MAP_DIRECT) { addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) { ret = addr; if (ret != -ENOSYS) goto error_just_free; /* the driver refused to tell us where to site * the mapping so we'll have to attempt to copy * it */ ret = -ENODEV; if (!(capabilities & NOMMU_MAP_COPY)) goto error_just_free; capabilities &= ~NOMMU_MAP_DIRECT; } else { vma->vm_start = region->vm_start = addr; vma->vm_end = region->vm_end = addr + len; } } } vma->vm_region = region; /* set up the mapping * - the region is filled in if NOMMU_MAP_DIRECT is still set */ if (file && vma->vm_flags & VM_SHARED) ret = do_mmap_shared_file(vma); //调用/dev/mem driver else ret = do_mmap_private(vma, region, len, capabilities); if (ret < 0) goto error_just_free; add_nommu_region(region); /* clear anonymous mappings that don't ask for uninitialized data */ if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) memset((void *)region->vm_start, 0, region->vm_end - region->vm_start); /* okay... we have a mapping; now we have to register it */ result = vma->vm_start; current->mm->total_vm += len >> PAGE_SHIFT; share: add_vma_to_mm(current->mm, vma); /* we flush the region from the icache only when the first executable * mapping of it is made */ if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { flush_icache_range(region->vm_start, region->vm_end); region->vm_icache_flushed = true; } up_write(&nommu_region_sem); return result; /* ignore code */ error_just_free: error: sharing_violation: error_getting_vma: error_getting_region: return -ENOMEM; }
5. sys_mmap to dev/mem drive's mmap_mem
mm/nommu.c
static int do_mmap_shared_file(struct vm_area_struct *vma) { ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); }
6./dev/mem driver in inux-kernel/drivers/char/mem.c
static int mmap_mem(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))//6.1 return -EINVAL; if (!private_mapping_ok(vma))//vm_flags must have VM_MAYSHARE return -ENOSYS; if (!range_is_allowed(vma->vm_pgoff, size))//6.2 check each page is allowed by calling devmem_is_allowed. ARCH_HAS_VALID_PHYS_ADDR_RANGE return -EPERM; if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, &vma->vm_page_prot))//6.3 没检查 return -EINVAL; vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot);// 6.4 跟 cache 有关 先不管 vma->vm_ops = &mmap_mem_ops; /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma, vma->vm_start, //Virt 开头位址 vma->vm_pgoff, size, vma->vm_page_prot)) { return -EAGAIN; } return 0; }
前面检查完了 这边直接把实体对应虚拟位址指派给mm 一页一个mm
可以看到 实体与虚拟位址都是连续的
remap_pfn_range in ./mm/memory.c
remap_pfn_range(vma, //user vma to map to addr, //target user address pfn, //physic address of kernel memory. Page #? size, //size of map area prot //page protection flags for this mapping ) { vma->vm_pgoff = pfn; vma->flags |= VM_IO|VM_PFNMAP|VM_DONTEXPAD |VM_DONTDUMP; mm = vma->mm; do { //for each page, update mm next = pgd_addr_end(addr, end) remap_pud_range(mm, pgd, addr, next, pfn+page#, prot) pgd++, addr = next, }while (addr != end)
6.1
valid_mmap_phys_addr_range
该函数确定mmap的范围是否超过4G,超过4G则为无效物理地址,这样的情况用户空间一般不会出现。
int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) { return !(pfn + (size >> PAGE_SHIFT) > 0x00100000); }
6.2 range_is_allowed(vma->vm_pgoff, size)
#ifdef CONFIG_STRICT_DEVMEM static inline int range_is_allowed(unsigned long pfn, unsigned long size) { // 检查每个page的phy 地址是否允许 if (!devmem_is_allowed(pfn)) { } #else static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; } #endif
devmem_is_allowed 在arch/arm/mm/mmap.c
disallowing access to system RAM and device-exclusive MMIO regions.
int devmem_is_allowed(unsigned long pfn) { if (iomem_is_exclusive(pfn << PAGE_SHIFT)) return 0; if (!page_is_ram(pfn)) return 1; return 0; } /* * check if an address is reserved in the iomem resource tree * returns 1 if reserved, 0 if not reserved. */ int iomem_is_exclusive(u64 addr) { struct resource *p = &iomem_resource; for (p = p->child; p ; p = r_next(NULL, p, &l)) { if (p->flags & IORESOURCE_BUSY && p->flags & IORESOURCE_EXCLUSIVE) { err = 1; break; } } }
int page_is_ram(unsigned long pfn) { #ifndef CONFIG_PPC64 /* XXX for now */ return pfn < max_pfn; #else unsigned long paddr = (pfn << PAGE_SHIFT); struct memblock_region *reg; for_each_memblock(memory, reg) if (paddr >= reg->base && paddr < (reg->base + reg->size)) return 1; return 0; #endif }
max_pfn赋值在在do_init_bootmem中。例如以下.
void __init do_init_bootmem(void) { max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; }
max_pfn代表了内核lowmem的页个数,lowmem在内核下静态线性映射。系统启动之初完毕映射之后不会修改。读写效率高。内核代码都是跑在lowmem。lowmem大小我们能够通过cmdline的“mem=”来指定。
6.3
phys_mem_access_prot_allowed
6.4
phys_mem_access_prot
Question:
如何确认offset 合理区域? length 可以有多大?
Reference:
https://blog.csdn.net/junllee/article/details/82146351