当前位置 博文首页 > 星見遥:XV6学习(15)Lab mmap: Mmap

    星見遥:XV6学习(15)Lab mmap: Mmap

    作者:星見遥 时间:2021-02-10 12:28

    代码在Github上。

    这一个实验是要实现最基础的mmap功能。mmap即内存映射文件,将一个文件直接映射到内存当中,之后对文件的读写就可以直接通过对内存进行读写来进行,而对文件的同步则由操作系统来负责完成。使用mmap可以避免对文件大量readwrite操作带来的内核缓冲区和用户缓冲区之间的频繁的数据拷贝。在Kafka消息队列等软件中借助mmap来实现零拷贝(zero-copy)。

    首先定义vma结构体用于保存内存映射信息,并在proc结构体中加入struct vma *vma指针:

    #define NVMA 16
    #define VMA_START (MAXVA / 2)
    struct vma{
      uint64 start;
      uint64 end;
      uint64 length; // 0 means vma not used
      uint64 off;
      int permission;
      int flags;
      struct file *file;
      struct vma *next;
    
      struct spinlock lock;
    };
    
    // Per-process state
    struct proc {
      ...
      struct vma *vma;
      ...
    };
    

    之后实现对vma分配的代码:

    struct vma vma_list[NVMA];
    
    struct vma* vma_alloc(){
      for(int i = 0; i < NVMA; i++){
        acquire(&vma_list[i].lock);
        if(vma_list[i].length == 0){
          return &vma_list[i];
        }else{
          release(&vma_list[i].lock);
        }
      }
      panic("no enough vma");
    }
    

    实现mmap系统调用,这个函数主要就是申请一个vma,之后查找一块空闲内存,填入相关信息,将vma插入到进程的vma链表中去:

    uint64
    sys_mmap(void)
    {
      uint64 addr;
      int length, prot, flags, fd, offset;
      if(argaddr(0, &addr) < 0 || argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0 || argint(4, &fd) < 0 || argint(5, &offset) < 0){
        return -1;
      }
    
      if(addr != 0)
        panic("mmap: addr not 0");
      if(offset != 0)
        panic("mmap: offset not 0");
    
      struct proc *p = myproc();
      struct file* f = p->ofile[fd];
    
      int pte_flag = PTE_U;
      if (prot & PROT_WRITE) {
        if(!f->writable && !(flags & MAP_PRIVATE)) return -1; // map to a unwritable file with PROT_WRITE
        pte_flag |= PTE_W;
      }
      if (prot & PROT_READ) {
        if(!f->readable) return -1; // map to a unreadable file with PROT_READ
        pte_flag |= PTE_R;
      }
    
      struct vma* v = vma_alloc();
      v->permission = pte_flag;
      v->length = length;
      v->off = offset;
      v->file = myproc()->ofile[fd];
      v->flags = flags;
      filedup(f);
      struct vma* pv = p->vma;
      if(pv == 0){
        v->start = VMA_START;
        v->end = v->start + length;
        p->vma = v;
      }else{
        while(pv->next) pv = pv->next;
        v->start = PGROUNDUP(pv->end);
        v->end = v->start + length;
        pv->next = v;
        v->next = 0;
      }
      addr = v->start;
      printf("mmap: [%p, %p)\n", addr, v->end);
    
      release(&v->lock);
      return addr;
    }
    

    接下来就可以在usertrap中对缺页中断进行处理:查找进程的vma链表,判断该地址是否为映射地址,如果不是就说明出错,直接返回;如果在vma链表中,就可以申请并映射一个页面,之后根据vma从对应的文件中读取数据:

    int
    mmap_handler(uint64 va, int scause)
    {
      struct proc *p = myproc();
      struct vma* v = p->vma;
      while(v != 0){
        if(va >= v->start && va < v->end){
          break;
        }
        //printf("%p\n", v);
        v = v->next;
      }
    
      if(v == 0) return -1; // not mmap addr
      if(scause == 13 && !(v->permission & PTE_R)) return -2; // unreadable vma
      if(scause == 15 && !(v->permission & PTE_W)) return -3; // unwritable vma
    
      // load page from file
      va = PGROUNDDOWN(va);
      char* mem = kalloc();
      if (mem == 0) return -4; // kalloc failed
      
      memset(mem, 0, PGSIZE);
    
      if(mappages(p->pagetable, va, PGSIZE, (uint64)mem, v->permission) != 0){
        kfree(mem);
        return -5; // map page failed
      }
    
      struct file *f = v->file;
      ilock(f->ip);
      readi(f->ip, 0, (uint64)mem, v->off + va - v->start, PGSIZE);
      iunlock(f->ip);
      return 0;
    }
    

    之后就是munmap的实现,同样先从链表中找到对应的vma结构体,之后根据三种不同情况(头部、尾部、整个)来写回并释放对应的页面并更新vma信息,如果整个区域都被释放就将vma和文件释放。

    uint64
    sys_munmap(void)
    {
      uint64 addr;
      int length;
      if(argaddr(0, &addr) < 0 || argint(1, &length) < 0){
        return -1;
      }
    
      struct proc *p = myproc();
      struct vma *v = p->vma;
      struct vma *pre = 0;
      while(v != 0){
        if(addr >= v->start && addr < v->end) break; // found
        pre = v;
        v = v->next;
      }
    
      if(v == 0) return -1; // not mapped
      printf("munmap: %p %d\n", addr, length);
      if(addr != v->start && addr + length != v->end) panic("munmap middle of vma");
    
      if(addr == v->start){
        writeback(v, addr, length);
        uvmunmap(p->pagetable, addr, length / PGSIZE, 1);
        if(length == v->length){
          // free all
          fileclose(v->file);
          if(pre == 0){
            p->vma = v->next; // head
          }else{
            pre->next = v->next;
            v->next = 0;
          }
          acquire(&v->lock);
          v->length = 0;
          release(&v->lock);
        }else{
          // free head
          v->start -= length;
          v->off += length;
          v->length -= length;
        }
      }else{
        // free tail
        v->length -= length;
        v->end -= length;
      }
      return 0;
    }
    

    写回函数先判断是否需要写回,当需要写回时就仿照filewrite的实现,将数据写回到对应的文件当中去,这里的实现是直接写回所有页面,但实际可以根据PTE_D来判断内存是否被写入,如果没有写入就不用写回:

    void
    writeback(struct vma* v, uint64 addr, int n)
    {
      if(!(v->permission & PTE_W) || (v->flags & MAP_PRIVATE)) // no need to writeback
        return;
    
      if((addr % PGSIZE) != 0)
        panic("unmap: not aligned");
    
      printf("starting writeback: %p %d\n", addr, n);
    
      struct file* f = v->file;
    
      int max = ((MAXOPBLOCKS-1-1-2) / 2) * BSIZE;
      int i = 0;
      while(i < n){
        int n1 = n - i;
        if(n1 > max)
          n1 = max;
    
        begin_op();
        ilock(f->ip);
        printf("%p %d %d\n",addr + i, v->off + v->start - addr, n1);
        int r = writei(f->ip, 1, addr + i, v->off + v->start - addr + i, n1);
        iunlock(f->ip);
        end_op();
        i += r;
      }
    }
    

    最后就是在fork当中复制vma到子进程,在exit中当前进程的vma链表释放,在exit时要对页面进行写回:

    int
    fork(void)
    {
      ...
      np->state = RUNNABLE;
    
      np->vma = 0;
      struct vma *pv = p->vma;
      struct vma *pre = 0;
      while(pv){
        struct vma *vma = vma_alloc();
        vma->start = pv->start;
        vma->end = pv->end;
        vma->off = pv->off;
        vma->length = pv->length;
        vma->permission = pv->permission;
        vma->flags = pv->flags;
        vma->file = pv->file;
        filedup(vma->file);
        vma->next = 0;
        if(pre == 0){
          np->vma = vma;
        }else{
          pre->next = vma;
        }
        pre = vma;
        release(&vma->lock);
        pv = pv->next;
      }
      ...
    }
    
    void
    exit(int status)
    {
      struct proc *p = myproc();
    
      if(p == initproc)
        panic("init exiting");
    
      // munmap all mmap vma
      struct vma* v = p->vma;
      struct vma* pv;
      while(v){
        writeback(v, v->start, v->length);
        uvmunmap(p->pagetable, v->start, PGROUNDUP(v->length) / PGSIZE, 1);
        fileclose(v->file);
        pv = v->next;
        acquire(&v->lock);
        v->next = 0;
        v->length = 0;
        release(&v->lock);
        v = pv;
      }
      ...
    }
    
    bk
    下一篇:没有了