“DevMem note”的版本间的差异
free6d1823(讨论 | 贡献) |
free6d1823(讨论 | 贡献) |
||
(未显示1个用户的12个中间版本) | |||
第88行: | 第88行: | ||
} | } | ||
</source> | </source> | ||
− | + | mm/mmap.c | |
+ | <source lang="c"> | ||
+ | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
+ | unsigned long, prot, unsigned long, flags, | ||
+ | unsigned long, fd, unsigned long, pgoff) | ||
+ | { | ||
+ | retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
+ | } | ||
+ | |||
+ | unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, | ||
+ | unsigned long len, unsigned long prot, | ||
+ | unsigned long flag, unsigned long pgoff) | ||
+ | { | ||
+ | ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, | ||
+ | &populate); | ||
+ | |||
+ | } | ||
+ | static inline unsigned long | ||
+ | do_mmap_pgoff(struct file *file, unsigned long addr, | ||
+ | unsigned long len, unsigned long prot, unsigned long flags, | ||
+ | unsigned long pgoff, unsigned long *populate) | ||
+ | { | ||
+ | return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate); | ||
+ | } | ||
+ | </source> | ||
+ | |||
mm/nommu.c | mm/nommu.c | ||
<source lang="c"> | <source lang="c"> | ||
第97行: | 第122行: | ||
unsigned long flags, | unsigned long flags, | ||
vm_flags_t vm_flags, | vm_flags_t vm_flags, | ||
− | unsigned long pgoff, | + | unsigned long pgoff, //physical page number |
unsigned long *populate) | unsigned long *populate) | ||
{ | { | ||
第138行: | 第163行: | ||
INIT_LIST_HEAD(&vma->anon_vma_chain); | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
vma->vm_flags = vm_flags; | vma->vm_flags = vm_flags; | ||
− | vma->vm_pgoff = pgoff; //问题1 答案: pgoff直接带入 | + | vma->vm_pgoff = pgoff; //问题1 答案: pgoff直接带入. Phy 位址不为 0的部份已被ignore |
if (file) { | if (file) { | ||
第161行: | 第186行: | ||
pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
pgend = pgoff + pglen; | pgend = pgoff + pglen; | ||
− | + | //从无名区找一个vm_region的位址来当新的虚拟位址 | |
for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { | for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { | ||
pregion = rb_entry(rb, struct vm_region, vm_rb); | pregion = rb_entry(rb, struct vm_region, vm_rb); | ||
第253行: | 第278行: | ||
*/ | */ | ||
if (file && vma->vm_flags & VM_SHARED) | if (file && vma->vm_flags & VM_SHARED) | ||
− | ret = do_mmap_shared_file(vma); | + | ret = do_mmap_shared_file(vma); //调用/dev/mem driver |
else | else | ||
ret = do_mmap_private(vma, region, len, capabilities); | ret = do_mmap_private(vma, region, len, capabilities); | ||
第283行: | 第308行: | ||
return result; | return result; | ||
− | + | /* ignore code */ | |
error_just_free: | error_just_free: | ||
− | |||
error: | error: | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
sharing_violation: | sharing_violation: | ||
− | |||
− | |||
− | |||
− | |||
− | |||
error_getting_vma: | error_getting_vma: | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
error_getting_region: | error_getting_region: | ||
− | |||
− | |||
− | |||
return -ENOMEM; | return -ENOMEM; | ||
− | } | + | } |
</source> | </source> | ||
5. sys_mmap to dev/mem drive's mmap_mem | 5. sys_mmap to dev/mem drive's mmap_mem | ||
+ | mm/nommu.c | ||
+ | <source lang="c"> | ||
+ | static int do_mmap_shared_file(struct vm_area_struct *vma) | ||
+ | { | ||
+ | ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); | ||
+ | } | ||
+ | </source> | ||
6./dev/mem driver in inux-kernel/drivers/char/mem.c | 6./dev/mem driver in inux-kernel/drivers/char/mem.c | ||
<source lang="c"> | <source lang="c"> | ||
第322行: | 第331行: | ||
{ | { | ||
size_t size = vma->vm_end - vma->vm_start; | size_t size = vma->vm_end - vma->vm_start; | ||
− | if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))// | + | if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))//6.1 |
return -EINVAL; | return -EINVAL; | ||
if (!private_mapping_ok(vma))//vm_flags must have VM_MAYSHARE | if (!private_mapping_ok(vma))//vm_flags must have VM_MAYSHARE | ||
return -ENOSYS; | return -ENOSYS; | ||
− | if (!range_is_allowed(vma->vm_pgoff, size))// | + | if (!range_is_allowed(vma->vm_pgoff, size))//6.2 |
return -EPERM; | return -EPERM; | ||
if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, | if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, | ||
− | &vma->vm_page_prot))//没检查 | + | &vma->vm_page_prot))//6.3 没检查 |
return -EINVAL; | return -EINVAL; | ||
vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, | vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, | ||
size, | size, | ||
− | vma->vm_page_prot);// 跟 cache 有关 | + | vma->vm_page_prot);// 6.4 跟 cache 有关 |
− | + | ||
vma->vm_ops = &mmap_mem_ops; | vma->vm_ops = &mmap_mem_ops; | ||
第351行: | 第359行: | ||
} | } | ||
</source> | </source> | ||
− | remap_pfn_range | + | 前面检查完了 这边直接把实体对应虚拟位址指派给mm 一页一个mm |
+ | 可以看到 实体与虚拟位址都是连续的 | ||
+ | remap_pfn_range in ./mm/memory.c | ||
<source lang="c"> | <source lang="c"> | ||
remap_pfn_range(vma, //user vma to map to | remap_pfn_range(vma, //user vma to map to | ||
addr, //target user address | addr, //target user address | ||
− | pfn, // | + | pfn, //physical address of kernel memory. Page frame number. |
size, //size of map area | size, //size of map area | ||
prot //page protection flags for this mapping | prot //page protection flags for this mapping | ||
第365行: | 第375行: | ||
do { | do { | ||
//for each page, update mm | //for each page, update mm | ||
− | next = pgd_addr_end(addr, end) | + | next = pgd_addr_end(addr, end) |
remap_pud_range(mm, pgd, addr, next, pfn+page#, prot) | remap_pud_range(mm, pgd, addr, next, pfn+page#, prot) | ||
pgd++, addr = next, | pgd++, addr = next, | ||
第373行: | 第383行: | ||
</source> | </source> | ||
+ | 6.1 | ||
+ | valid_mmap_phys_addr_range | ||
+ | 该函数确定mmap的范围是否超过4G,超过4G则为无效物理地址,这样的情况用户空间一般不会出现。 | ||
+ | <source lang="c"> | ||
+ | int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) | ||
+ | { | ||
− | + | return !(pfn + (size >> PAGE_SHIFT) > 0x00100000); | |
+ | } | ||
+ | </source> | ||
+ | 6.2 range_is_allowed(vma->vm_pgoff, size) | ||
+ | <source lang="c"> | ||
+ | #ifdef CONFIG_STRICT_DEVMEM | ||
+ | static inline int range_is_allowed(unsigned long pfn, unsigned long size) | ||
+ | { | ||
+ | // 检查每个page的phy 地址是否允许 | ||
+ | if (!devmem_is_allowed(pfn)) { | ||
+ | } | ||
+ | |||
+ | #else | ||
+ | static inline int range_is_allowed(unsigned long pfn, unsigned long size) | ||
+ | { | ||
+ | return 1; | ||
+ | } | ||
+ | #endif | ||
+ | </source> | ||
+ | devmem_is_allowed 在arch/arm/mm/mmap.c | ||
+ | disallowing access to system RAM and device-exclusive MMIO regions. | ||
+ | <source lang="c"> | ||
+ | int devmem_is_allowed(unsigned long pfn) | ||
+ | { | ||
+ | if (iomem_is_exclusive(pfn << PAGE_SHIFT)) | ||
+ | return 0; | ||
+ | if (!page_is_ram(pfn)) | ||
+ | return 1; | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | |||
+ | /* | ||
+ | * check if an address is reserved in the iomem resource tree | ||
+ | * returns 1 if reserved, 0 if not reserved. | ||
+ | */ | ||
+ | int iomem_is_exclusive(u64 addr) | ||
+ | { | ||
+ | struct resource *p = &iomem_resource; | ||
+ | for (p = p->child; p ; p = r_next(NULL, p, &l)) { | ||
+ | if (p->flags & IORESOURCE_BUSY && | ||
+ | p->flags & IORESOURCE_EXCLUSIVE) { | ||
+ | err = 1; | ||
+ | break; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | } | ||
+ | </source> | ||
+ | lomemory 由 max_pfn定义 或是在 memory 的memblock_region | ||
+ | <source lang="c"> | ||
+ | int page_is_ram(unsigned long pfn) | ||
+ | { | ||
+ | |||
+ | #ifndef CONFIG_PPC64 /* XXX for now */ | ||
+ | return pfn < max_pfn; | ||
+ | #else | ||
+ | unsigned long paddr = (pfn << PAGE_SHIFT); | ||
+ | struct memblock_region *reg; | ||
+ | for_each_memblock(memory, reg) | ||
+ | if (paddr >= reg->base && paddr < (reg->base + reg->size)) | ||
+ | return 1; | ||
+ | return 0; | ||
+ | #endif | ||
+ | } | ||
+ | </source> | ||
+ | max_pfn赋值在在do_init_bootmem中。例如以下. | ||
+ | <source lang="c"> | ||
+ | |||
+ | void __init do_init_bootmem(void) | ||
+ | { | ||
+ | max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; | ||
+ | } | ||
+ | </source> | ||
+ | max_pfn代表了内核lowmem的页个数,lowmem在内核下静态线性映射。系统启动之初完毕映射之后不会修改。读写效率高。内核代码都是跑在lowmem。lowmem大小我们能够通过cmdline的“mem=”来指定。 | ||
+ | |||
+ | CONFIG_STRICT_DEVMEM 定义在 arch/arm/Kconfig.debug | ||
+ | <source lang="c"> | ||
+ | config STRICT_DEVMEM | ||
+ | def_bool y | ||
+ | prompt "Filter access to /dev/mem" | ||
+ | help | ||
+ | This option restricts access to /dev/mem. If this option is | ||
+ | disabled, you allow userspace access to all memory, including | ||
+ | kernel and userspace memory. Accidental memory access is likely | ||
+ | to be disastrous. | ||
+ | Memory access is required for experts who want to debug the kernel. | ||
+ | </source> | ||
+ | 6.3 | ||
+ | phys_mem_access_prot_allowed 实现为空返回1,没有影响。 | ||
+ | 6.4 | ||
+ | phys_mem_access_prot | ||
+ | <source lang="c"> | ||
+ | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | ||
+ | unsigned long size, pgprot_t vma_prot) | ||
+ | { | ||
+ | //假设有平台实现的phys_mem_access_prot,则调用之。 | ||
+ | if (ppc_md.phys_mem_access_prot) | ||
+ | return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot); | ||
+ | //对于不是lowmem范围内的物理地址。权限设置为uncached。 | ||
+ | if (!page_is_ram(pfn)) | ||
+ | vma_prot = pgprot_noncached(vma_prot); | ||
+ | return vma_prot; | ||
+ | } | ||
+ | </source> | ||
+ | |||
+ | |||
+ | Question: | ||
如何确认offset 合理区域? length 可以有多大? | 如何确认offset 合理区域? length 可以有多大? | ||
+ | Answer: | ||
+ | 所以假设打开CONFIG_STRICT_DEVMEM,mem驱动会对mmap要映射的物理地址进行范围和位置的检查然后才进行映射。检查条件例如以下: | ||
+ | (1)映射范围不能超过4G。 | ||
+ | (2)该物理地址所在iomem不能exclusive. | ||
+ | (3)该物理地址不能处在lowmem中。 | ||
+ | 关闭CONFIG_STRICT_DEVMEM 可以利用/dev/mem来操作所有内存以及寄存器. | ||
+ | 7. Uboot | ||
+ | Question: | ||
+ | DevMem 区域有多大? | ||
+ | Answer: | ||
+ | 由 u-boot "pram" 指定 pass 到Linux 环境变数 "mem", early init 阶段将 memblock 限制最大了. 所以一般 mem function 不会用到, 只能 mmap /dev/mem 使用, 但没有dev mem 内没有管理机制. | ||
+ | |||
+ | |||
+ | |||
+ | uboot/common/board_r. | ||
+ | <source lang="c"> | ||
+ | int initr_mem(void) | ||
+ | { | ||
+ | pram = env_get_ulong("pram", 10, CONFIG_PRAM); | ||
+ | sprintf(memsz, "%ldk", (long int)((gd->ram_size / 1024) - pram)); | ||
+ | env_set("mem", memsz); //pass to Linux mem environment | ||
+ | return 0; | ||
+ | } | ||
+ | </source> | ||
+ | linux/arch/arm64/mm/init.c | ||
+ | <source lang="c"> | ||
+ | static int __init early_mem(char *p) | ||
+ | { | ||
+ | memory_limit = memparse(p, &p) & PAGE_MASK; | ||
+ | } | ||
+ | early_param("mem", early_mem); | ||
+ | |||
+ | void __init arm64_memblock_init(void) | ||
+ | { | ||
+ | memblock_enforce_memory_limit(memory_limit); | ||
+ | memblock_add(__pa(_text), (u64)(_end - _text)); | ||
+ | } | ||
+ | |||
+ | void __init memblock_enforce_memory_limit(phys_addr_t limit) | ||
+ | { | ||
+ | |||
+ | /* find out max address */ | ||
+ | //memory 区由小排到大 找出 limit 所在区域的虚拟地址(base + offset) | ||
+ | for_each_memblock(memory, r) { | ||
+ | if (limit <= r->size) { | ||
+ | max_addr = r->base + limit; | ||
+ | break; | ||
+ | } | ||
+ | limit -= r->size; | ||
+ | } | ||
+ | |||
+ | /* truncate both memory and reserved regions */ | ||
+ | memblock_remove_range(&memblock.memory, max_addr, | ||
+ | (phys_addr_t)ULLONG_MAX); | ||
+ | memblock_remove_range(&memblock.reserved, max_addr, | ||
+ | (phys_addr_t)ULLONG_MAX); | ||
+ | } | ||
+ | |||
+ | </source> | ||
+ | linux/mm/memblock.c | ||
+ | <source lang="c"> | ||
+ | static int __init_memblock memblock_remove_range(struct memblock_type *type, | ||
+ | phys_addr_t base, phys_addr_t size) | ||
+ | { | ||
+ | int start_rgn, end_rgn; | ||
+ | int i, ret; | ||
+ | // 找出base 的rgn 区域并把他们移除出type 区 | ||
+ | ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); | ||
+ | if (ret) | ||
+ | return ret; | ||
+ | |||
+ | for (i = end_rgn - 1; i >= start_rgn; i--) | ||
+ | memblock_remove_region(type, i); | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | </source> | ||
Reference: | Reference: | ||
https://blog.csdn.net/junllee/article/details/82146351 | https://blog.csdn.net/junllee/article/details/82146351 |
2019年11月8日 (五) 11:07的最后版本
1.devmem ADDRESS [WIDTH [VALUE]]
Read/write from physical address ADDRESS Address to act upon WIDTH Width (8/16/...) VALUE Data to be written
2. devmem -> mmap
int devmem_main(int argc UNUSED_PARAM, char **argv) { fd = xopen("/dev/mem", argv[3] ? (O_RDWR | O_SYNC) : (O_RDONLY | O_SYNC)); //根据第三个参数确定是以只读形式打开,还是以读写形式打开。 mapped_size = page_size = getpagesize(); //4K bytes //if cross page, mapped_size *=2 map_base = mmap(NULL, mapped_size, // argv[3] ? (PROT_READ | PROT_WRITE) : PROT_READ, MAP_SHARED, fd, target & ~(off_t)(page_size - 1)//offset from 0x0000, page start address ); virt_addr = (char*)map_base + offset_in_page; }
3. mmap
#include <sys/mman.h> void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); int munmap(void *addr, size_t length);
详细参数如下:
参数 | 详细说明 |
---|---|
addr | 需要映射的虚拟内存地址;如果为NULL,系统会自动选定。映射成功后返回该地址 |
length | 需要映射多大的数据量. Mus be multiple of PAGE SIZE |
prot | 描述映射区域内存保护方式,包括:PROT_EXEC、PROT_READ、PROT_WRITE、PROT_NONE. |
flags | 描述映射区域的特性,比如是否对其他进程共享,是否建立匿名映射,是否创建私有的cow. |
fd | 要映射到内存中的文件描述符 |
offset | 文件映射的偏移量 |
4. mmap to sys_mmap
Questions:
1. offset 如何变成是物理地址? offset 跟vma->pgoff 的关系?
2. vma->vm_start = addr ? 若 addr=NULL 如何给址?
系统调用的入口是entry_SYSCALL_64_fastpath,然后根据系统调用号在sys_call_table中找到对应的函数。
mmap()和munmap()对应的系统调用分别是SyS_mmap()和SyS_munmap()
arch/arc/kernel/sys.c
#define __SYSCALL(nr, call) [nr] = (call), void *sys_call_table[NR_syscalls] = { [0 ... NR_syscalls-1] = sys_ni_syscall, #include <asm/unistd.h> };
include/uapiasm-generic/unistd.h
#define __NR_munmap 215 __SYSCALL(__NR_munmap, sys_munmap) #define __NR_mremap 216 __SYSCALL(__NR_mremap, sys_mremap) #define __NR_mmap 1058 __SYSCALL(__NR_mmap, sys_mmap)
Reference: https://www.cnblogs.com/sky-heaven/p/5689072.html
arch/arm64/kernel/sys.c
asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, off_t off) { return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); }
mm/mmap.c
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) { retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); } unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) { ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, &populate); } static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate) { return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate); }
mm/nommu.c
unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, //physical page number unsigned long *populate) { struct vm_area_struct *vma; struct vm_region *region; struct rb_node *rb; unsigned long capabilities, result; int ret; *populate = 0; /* decide whether we should attempt the mapping, and if so what sort of * mapping */ ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, &capabilities); if (ret < 0) return ret; /* we ignore the address hint */ addr = 0; //答案 2, addr 无用 len = PAGE_ALIGN(len); /* we've determined that we can make the mapping, now translate what we * now know into VMA flags */ vm_flags |= determine_vm_flags(file, prot, flags, capabilities); /* we're going to need to record the mapping */ region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL); if (!region) goto error_getting_region; vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) goto error_getting_vma; region->vm_usage = 1; region->vm_flags = vm_flags; region->vm_pgoff = pgoff; INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_flags = vm_flags; vma->vm_pgoff = pgoff; //问题1 答案: pgoff直接带入. Phy 位址不为 0的部份已被ignore if (file) { region->vm_file = get_file(file); vma->vm_file = get_file(file);//file 带入 } down_write(&nommu_region_sem); /* 这里不管区域重叠的问题 if we want to share, we need to check for regions created by other * mmap() calls that overlap with our proposed mapping * - we can only share with a superset match on most regular files * - shared mappings on character devices and memory backed files are * permitted to overlap inexactly as far as we are concerned for in * these cases, sharing is handled in the driver or filesystem rather * than here */ if (vm_flags & VM_MAYSHARE) { struct vm_region *pregion; unsigned long pglen, rpglen, pgend, rpgend, start; pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; pgend = pgoff + pglen; //从无名区找一个vm_region的位址来当新的虚拟位址 for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) { pregion = rb_entry(rb, struct vm_region, vm_rb); if (!(pregion->vm_flags & VM_MAYSHARE)) continue; /* search for overlapping mappings on the same file */ if (file_inode(pregion->vm_file) != file_inode(file)) continue; if (pregion->vm_pgoff >= pgend) continue; rpglen = pregion->vm_end - pregion->vm_start; rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;//占用页数 已无条件进位 rpgend = pregion->vm_pgoff + rpglen; if (pgoff >= rpgend) continue; /* handle inexactly overlapping matches between * mappings */ if ((pregion->vm_pgoff != pgoff || rpglen != pglen) && !(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) { /* new mapping is not a subset of the region */ if (!(capabilities & NOMMU_MAP_DIRECT)) goto sharing_violation; continue; } /* we've found a region we can share */ pregion->vm_usage++; vma->vm_region = pregion; start = pregion->vm_start; start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; vma->vm_start = start;//答案2 start地址 vma->vm_end = start + len; if (pregion->vm_flags & VM_MAPPED_COPY) vma->vm_flags |= VM_MAPPED_COPY; else { ret = do_mmap_shared_file(vma); if (ret < 0) { vma->vm_region = NULL; vma->vm_start = 0; vma->vm_end = 0; pregion->vm_usage--; pregion = NULL; goto error_just_free; } } fput(region->vm_file); kmem_cache_free(vm_region_jar, region); region = pregion; result = start; goto share; } /* obtain the address at which to make a shared mapping * - this is the hook for quasi-memory character devices to * tell us the location of a shared mapping */ if (capabilities & NOMMU_MAP_DIRECT) { addr = file->f_op->get_unmapped_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) { ret = addr; if (ret != -ENOSYS) goto error_just_free; /* the driver refused to tell us where to site * the mapping so we'll have to attempt to copy * it */ ret = -ENODEV; if (!(capabilities & NOMMU_MAP_COPY)) goto error_just_free; capabilities &= ~NOMMU_MAP_DIRECT; } else { vma->vm_start = region->vm_start = addr; vma->vm_end = region->vm_end = addr + len; } } } vma->vm_region = region; /* set up the mapping * - the region is filled in if NOMMU_MAP_DIRECT is still set */ if (file && vma->vm_flags & VM_SHARED) ret = do_mmap_shared_file(vma); //调用/dev/mem driver else ret = do_mmap_private(vma, region, len, capabilities); if (ret < 0) goto error_just_free; add_nommu_region(region); /* clear anonymous mappings that don't ask for uninitialized data */ if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) memset((void *)region->vm_start, 0, region->vm_end - region->vm_start); /* okay... we have a mapping; now we have to register it */ result = vma->vm_start; current->mm->total_vm += len >> PAGE_SHIFT; share: add_vma_to_mm(current->mm, vma); /* we flush the region from the icache only when the first executable * mapping of it is made */ if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { flush_icache_range(region->vm_start, region->vm_end); region->vm_icache_flushed = true; } up_write(&nommu_region_sem); return result; /* ignore code */ error_just_free: error: sharing_violation: error_getting_vma: error_getting_region: return -ENOMEM; }
5. sys_mmap to dev/mem drive's mmap_mem
mm/nommu.c
static int do_mmap_shared_file(struct vm_area_struct *vma) { ret = vma->vm_file->f_op->mmap(vma->vm_file, vma); }
6./dev/mem driver in inux-kernel/drivers/char/mem.c
static int mmap_mem(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))//6.1 return -EINVAL; if (!private_mapping_ok(vma))//vm_flags must have VM_MAYSHARE return -ENOSYS; if (!range_is_allowed(vma->vm_pgoff, size))//6.2 return -EPERM; if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, &vma->vm_page_prot))//6.3 没检查 return -EINVAL; vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot);// 6.4 跟 cache 有关 vma->vm_ops = &mmap_mem_ops; /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma, vma->vm_start, //Virt 开头位址 vma->vm_pgoff, size, vma->vm_page_prot)) { return -EAGAIN; } return 0; }
前面检查完了 这边直接把实体对应虚拟位址指派给mm 一页一个mm
可以看到 实体与虚拟位址都是连续的
remap_pfn_range in ./mm/memory.c
remap_pfn_range(vma, //user vma to map to addr, //target user address pfn, //physical address of kernel memory. Page frame number. size, //size of map area prot //page protection flags for this mapping ) { vma->vm_pgoff = pfn; vma->flags |= VM_IO|VM_PFNMAP|VM_DONTEXPAD |VM_DONTDUMP; mm = vma->mm; do { //for each page, update mm next = pgd_addr_end(addr, end) remap_pud_range(mm, pgd, addr, next, pfn+page#, prot) pgd++, addr = next, }while (addr != end)
6.1
valid_mmap_phys_addr_range
该函数确定mmap的范围是否超过4G,超过4G则为无效物理地址,这样的情况用户空间一般不会出现。
int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) { return !(pfn + (size >> PAGE_SHIFT) > 0x00100000); }
6.2 range_is_allowed(vma->vm_pgoff, size)
#ifdef CONFIG_STRICT_DEVMEM static inline int range_is_allowed(unsigned long pfn, unsigned long size) { // 检查每个page的phy 地址是否允许 if (!devmem_is_allowed(pfn)) { } #else static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; } #endif
devmem_is_allowed 在arch/arm/mm/mmap.c
disallowing access to system RAM and device-exclusive MMIO regions.
int devmem_is_allowed(unsigned long pfn) { if (iomem_is_exclusive(pfn << PAGE_SHIFT)) return 0; if (!page_is_ram(pfn)) return 1; return 0; } /* * check if an address is reserved in the iomem resource tree * returns 1 if reserved, 0 if not reserved. */ int iomem_is_exclusive(u64 addr) { struct resource *p = &iomem_resource; for (p = p->child; p ; p = r_next(NULL, p, &l)) { if (p->flags & IORESOURCE_BUSY && p->flags & IORESOURCE_EXCLUSIVE) { err = 1; break; } } }
lomemory 由 max_pfn定义 或是在 memory 的memblock_region
int page_is_ram(unsigned long pfn) { #ifndef CONFIG_PPC64 /* XXX for now */ return pfn < max_pfn; #else unsigned long paddr = (pfn << PAGE_SHIFT); struct memblock_region *reg; for_each_memblock(memory, reg) if (paddr >= reg->base && paddr < (reg->base + reg->size)) return 1; return 0; #endif }
max_pfn赋值在在do_init_bootmem中。例如以下.
void __init do_init_bootmem(void) { max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; }
max_pfn代表了内核lowmem的页个数,lowmem在内核下静态线性映射。系统启动之初完毕映射之后不会修改。读写效率高。内核代码都是跑在lowmem。lowmem大小我们能够通过cmdline的“mem=”来指定。
CONFIG_STRICT_DEVMEM 定义在 arch/arm/Kconfig.debug
config STRICT_DEVMEM def_bool y prompt "Filter access to /dev/mem" help This option restricts access to /dev/mem. If this option is disabled, you allow userspace access to all memory, including kernel and userspace memory. Accidental memory access is likely to be disastrous. Memory access is required for experts who want to debug the kernel.
6.3
phys_mem_access_prot_allowed 实现为空返回1,没有影响。
6.4
phys_mem_access_prot
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { //假设有平台实现的phys_mem_access_prot,则调用之。 if (ppc_md.phys_mem_access_prot) return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot); //对于不是lowmem范围内的物理地址。权限设置为uncached。 if (!page_is_ram(pfn)) vma_prot = pgprot_noncached(vma_prot); return vma_prot; }
Question:
如何确认offset 合理区域? length 可以有多大?
Answer:
所以假设打开CONFIG_STRICT_DEVMEM,mem驱动会对mmap要映射的物理地址进行范围和位置的检查然后才进行映射。检查条件例如以下:
(1)映射范围不能超过4G。
(2)该物理地址所在iomem不能exclusive.
(3)该物理地址不能处在lowmem中。
关闭CONFIG_STRICT_DEVMEM 可以利用/dev/mem来操作所有内存以及寄存器.
7. Uboot
Question:
DevMem 区域有多大?
Answer:
由 u-boot "pram" 指定 pass 到Linux 环境变数 "mem", early init 阶段将 memblock 限制最大了. 所以一般 mem function 不会用到, 只能 mmap /dev/mem 使用, 但没有dev mem 内没有管理机制.
uboot/common/board_r.
int initr_mem(void) { pram = env_get_ulong("pram", 10, CONFIG_PRAM); sprintf(memsz, "%ldk", (long int)((gd->ram_size / 1024) - pram)); env_set("mem", memsz); //pass to Linux mem environment return 0; }
linux/arch/arm64/mm/init.c
static int __init early_mem(char *p) { memory_limit = memparse(p, &p) & PAGE_MASK; } early_param("mem", early_mem); void __init arm64_memblock_init(void) { memblock_enforce_memory_limit(memory_limit); memblock_add(__pa(_text), (u64)(_end - _text)); } void __init memblock_enforce_memory_limit(phys_addr_t limit) { /* find out max address */ //memory 区由小排到大 找出 limit 所在区域的虚拟地址(base + offset) for_each_memblock(memory, r) { if (limit <= r->size) { max_addr = r->base + limit; break; } limit -= r->size; } /* truncate both memory and reserved regions */ memblock_remove_range(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX); memblock_remove_range(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX); }
linux/mm/memblock.c
static int __init_memblock memblock_remove_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { int start_rgn, end_rgn; int i, ret; // 找出base 的rgn 区域并把他们移除出type 区 ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); if (ret) return ret; for (i = end_rgn - 1; i >= start_rgn; i--) memblock_remove_region(type, i); return 0; }
Reference:
https://blog.csdn.net/junllee/article/details/82146351