PKE labs

Posted on 2024-03-17 In school

1. 做了什么

上学期完成了一些基于 RISC-V 指令集的 PKE(Proxy Kernel for Education) 的基础实验，这学期的课程设计要求完成后面的挑战部分。实验的代码和文档如下：

1. 代码：https://gitee.com/hustos/riscv-pke

2. 文档：https://gitee.com/hustos/pke-doc

3. 我的完成源码：https://github.com/peacewang017/OS_labs

这学期在完成的代码基础上实现了一些额外的功能，最终为其添加了一个简易的 shell ，由于原版的 PKE 实在实现的非常简陋且奇怪，自己对于其中一部分逻辑进行了改写，在这里记录下自己的实现逻辑。

2. 笔记

进程调度

对于PKE进程的调度，我们这样进行操作：

用户态中断进入内核态时，将 current 置为 ready ，并插入 ready 队列尾
schedule 时，首先扫描所有 blocked 队列里的进程，如果等待子进程已是 zombie 状态，则将其设为 free，并重置 waiting_pid 字段，将等待中的进程从 blocked 队列删除并加入 ready 队列；接着从 ready 队列首取出一个进程设为 current ，并将其从 ready 队列删除，调用 switch_to 从内核态返回用户态
wait 时，current 进入内核态，先执行[1]，接着执行 wait 后被移除并放入 blocked 队列，并设置 waiting_pid 字段，最后执行[3]
fork 时，current 进入内核态，先执行[1]，接着申请一个进程，放入 ready 队列末尾
free 时，current 进入内核态，先执行[1]，接着被从 ready 队列移除

// process.c
int do_wait(int child_pid){
  // handle with waiting child proc
  current->waiting_pid = child_pid;
  from_ready_to_blocked(current);
  schedule();
  return child_pid;
}

int do_fork( process* parent){
  alloc_proc(child);
  init_proc(child);

  child->status = READY;
  child->parent = parent;
  from_blocked_to_ready( child );
  return child->pid;
}

int free_process( process* proc ) {
  // we set the status to ZOMBIE, but cannot destruct its vm space immediately.
  // since proc can be current process, and its user kernel stack is currently in use!
  // but for proxy kernel, it (memory leaking) may NOT be a really serious issue,
  // as it is different from regular OS, which needs to run 7x24.
  proc->status = ZOMBIE;
  remove_from_blocked_queue(proc);
  remove_from_ready_queue(proc);

  return 0;
}

// syscall.c
long do_syscall(long a0, long a1, long a2, long a3, long a4, long a5, long a6, long a7){
  insert_to_ready_queue(current);
}

// sched.c
void schedule() {
  // 检查父进程等待的子进程是否已经zombie，如果是，则将父进程重新投入运行（重置waiting_pid, status）
  if (blocked_queue_head){
    for(process* p = blocked_queue_head; p != NULL; p = p->queue_next){
      if(procs[p->waiting_pid].status == ZOMBIE){
        p->waiting_pid = -1;
        from_blocked_to_ready(p);
      }
    }
  }

  if ( !ready_queue_head ){
    int should_shutdown = 1;

    for( int i=0; i<NPROC; i++ )
      if( (procs[i].status != FREE) && (procs[i].status != ZOMBIE) ){
        should_shutdown = 0;
        sprint( "ready queue empty, but process %d is not in free/zombie state:%d\n", 
          i, procs[i].status );
      }

    if( should_shutdown ){
      sprint( "no more ready processes, system shutdown now.\n" );
      shutdown( 0 );
    }else{
      panic( "Not handled: we should let system wait for unfinished processes.\n" );
    }
  }

  current = ready_queue_head;
  assert( current->status == READY );
  ready_queue_head = ready_queue_head->queue_next;

  current->status = RUNNING;
  sprint( "going to schedule process %d to run.\n", current->pid );
  switch_to( current );
}

文件查找

PKE 实现了一个具体文件系统 RFS，并将其挂载在虚拟文件系统 VFS 的 /RAMDISK0 下面，所以，在不实现 spike 对宿主文件系统 hostfs 的操作的情况下，我们只对 /RAMDISK 下的文件有读写能力。

我们通过改写 lookup_final_dentry() 函数使得 VFS 拥有处理相对路径的功能。

struct dentry *lookup_final_dentry(const char *path, struct dentry **parent, char *miss_name) {
  // sprint("lookup_final_dentry: %s\n", path);
  int hartid = (int)read_tp();

  char path_copy[MAX_PATH_LEN];
  strcpy(path_copy, path);

  // 处理直接文件名，转为相对路径
  if(path_copy[0] != '.' && path_copy[0] != '/'){
    str_insert_to_head(path_copy, "./");
  }

  // split the path, and retrieves a token at a time.
  // note: strtok() uses a static (local) variable to store the input path
  // string at the first time it is called. thus it can out a token each time.
  // for example, when input path is: /RAMDISK0/test_dir/ramfile2
  // strtok() outputs three tokens: 1)RAMDISK0, 2)test_dir and 3)ramfile2
  // at its three continuous invocations.
  int flag = 0;
  char *token = strtok(path_copy, "/");
  struct dentry* this = *parent;
  struct dentry* cwd = NULL;
  if(current[hartid] != NULL){
    cwd = current[hartid]->pfiles->cwd;
  }

  while (token != NULL) {
    if(strcmp(token, "..") == 0){
      if(cwd == NULL){
        panic("lookup_final_dentry: proc not init\n");
        return NULL;
      }
      if(flag == 0){
        if(cwd == vfs_root_dentry){
          panic("already in root\n");
          return NULL;
        }
        (*parent) = cwd->parent;       
        flag = 1;
      }else{
        if((*parent) == vfs_root_dentry){
          panic("already in root\n");
          return NULL;
        }
        (*parent) = (*parent)->parent;
      }
      this = (*parent);
    }else if(strcmp(token, ".") == 0){
      if(cwd == NULL){
        panic("lookup_final_dentry: proc not init\n");
        return NULL;
      }
      if(flag == 0){
        (*parent) = cwd;
        flag = 1;
      }
      this = (*parent);  
    }else{
      // function return subdir's dentry
      this = hash_get_dentry((*parent), token);

      if (this == NULL) {
        // function return subdir's dentry
        this = alloc_vfs_dentry(token, NULL, *parent);

        // get subdir's vinode
        struct vinode *found_vinode = viop_lookup((*parent)->dentry_inode, this);

        // dentry not in hash && vinode not in disk
        // return miss
        if (found_vinode == NULL) {
          // not found in both hash table and directory file on disk.
          free_page(this);
          strcpy(miss_name, token);
          return NULL;
        }

        // dentry not in hash && vinode in disk
        // ensure dentry and vinode in hash
        struct vinode *same_inode = hash_get_vinode(found_vinode->sb, found_vinode->inum);
        if (same_inode != NULL) {
          // the vinode is already in the hash table (i.e. we are opening another hard link)
          this->dentry_inode = same_inode;
          same_inode->ref++;
          free_page(found_vinode);
        } else {
          // the vinode is not in the hash table
          this->dentry_inode = found_vinode;
          found_vinode->ref++;
          hash_put_vinode(found_vinode);
        }
        hash_put_dentry(this);
      }
      flag = 1;
      *parent = this;
    }

    // get next token
    token = strtok(NULL, "/");
  }
  return this;
}

malloc 与堆

我的 malloc 实现非常的不优雅，但为了记录用，仍然提一下。

对于数据结构，我们维护两个二元表：

typedef struct page_dentry{
  uint64 va_page;
  uint64 pa_page;
}page_dentry;

page_dentry* page_dir;

typedef struct malloc_dentry{
  uint64 va_start;
  uint64 va_end;
}malloc_dentry;

malloc_dentry* malloc_dir;

page_dentry 表用来记录用户程序调用 alloc_page 申请并 map 过的页，同时记录页虚拟地址和物理地址，相当于一个小型页表；

malloc_dentry 表用来记录用户程序调用 malloc 时从已 alloc 的页中取走的空间，表项是 malloc 空间的起止虚拟地址。

按照以下规则来进行运算：

do_malloc 时，首先检查有没有分配页面，如果没有分配页面，则申请需要的页数，添加 malloc_dir 后返回；如果至少有一个已经分配的页，在 malloc_dentry 中从头往尾寻找是否有可用空隙供插入；如果没有空隙，则申请页面
对于 page_dir 和 malloc_dir，需要保证增序，保证[1]中从头往尾查找可用空隙的正确性
free 时，更新 malloc_dir, 在 malloc 放生后，我们先不返回页面，只有在进程 exit 后，统一 free 所有 page 并清空两个 dir

// syscall.c
uint64 do_malloc(int n){
  // 在没有已分配页面时
  if(current->num_page == 0){
    // 需要num_page页,alloc后map,加入表中
    int page_need = ROUNDUP(n, PGSIZE) / PGSIZE;
    if(alloc_n_page(page_need) == 0){
      panic("page alloc error\n");
    }

    // 添加malloc表
    uint64 va_start = current->page_dir[0].va_page;
    if(current->num_malloc + 1 > MAX_MALLOC_IN_HEAP){
      panic("malloc error\n");
    }
    add_to_malloc_dir(va_start, va_start + n);
    // print_malloc_dir();
    return va_start;
  }

  uint64 va_page_start = current->page_dir[0].va_page;
  uint64 va_page_end = current->page_dir[current->num_page - 1].va_page + PGSIZE;

  // 首先检查头是否可用
  int front_offset = current->malloc_dir[0].va_start - va_page_start;
  if(front_offset >= n){
    add_to_malloc_dir(va_page_start, va_page_start + n);
    // print_malloc_dir();
    return va_page_start;
  }

  // 检查空隙是否可用
  if(current->num_malloc >= 2){
    for(int i = 0; i < current->num_malloc - 1; i++){
      int gap = current->malloc_dir[i + 1].va_start - current->malloc_dir[i].va_end;
      if(gap >= n){
        uint64 new_va_start = current->malloc_dir[i].va_end;
        add_to_malloc_dir(new_va_start, new_va_start + n);
        // print_malloc_dir();
        return new_va_start;
      }
    }
  }

  // 使用末尾，先填入malloc表，（不够时）再去alloc page并map
  uint64 new_va_start = current->malloc_dir[current->num_malloc - 1].va_end;
  add_to_malloc_dir(new_va_start, new_va_start + n);
  // print_malloc_dir();

  int rear_offset = va_page_end - new_va_start;
  if(rear_offset < n){
    int page_need = ROUNDUP((n - rear_offset), PGSIZE) / PGSIZE;
    if(alloc_n_page(page_need) == 0){
      panic("page alloc error\n");
    }
  }
  return new_va_start;
}

void do_free(uint64 va){
  int index;
  if((index = find_malloc_dir(va)) == -1){
    panic("free error\n");
  }
  remove_from_malloc_dir(index);

  // 注意到这里 free 的处理时简化过的，即 malloc 放生后，我们先不返回页面，只有在进程 exit 后，我们统一 free 所有被 malloc 占用的 page 并清空两个 dir。
}

int alloc_n_page(int n){
  if(current->num_page + n > MAX_PAGE_IN_HEAP){
    return 0;
  }
  for(int i = 0; i < n; i++){
    uint64 pa = (uint64)alloc_page();
    user_vm_map((pagetable_t)current->pagetable, g_ufree_page, PGSIZE, pa, prot_to_type(PROT_WRITE | PROT_READ, 1));
    add_to_page_dir(g_ufree_page, pa);
    g_ufree_page += PGSIZE;
  }
  return 1;
}

void add_to_page_dir(uint64 va_page, uint64 pa_page);
void sort_page_dir();
void add_to_malloc_dir(uint64 va_start, uint64 va_end);
void sort_malloc_dir();
int find_malloc_dir(uint64 va);
void remove_from_malloc_dir(int index);

这里的 malloc 纯粹是为满足功能和已经编写好的系统的逆天设计，关于 Linux 中的 malloc 是如何实现的，可以参考：

https://jacktang816.github.io/post/mallocandfree/

PKE 内存的管理

另一个逆天的事情是，PKE 内存存在泄露问题，变为 ZOMBIE 态的进程，并没有被重新利用，同时进程空间中除了因 malloc 而申请的页，其他申请过的页面都没有被 free，这样内存会越用越少…

3. 吐槽

因为最近事情很多，其他的功能也不想在往 shell 里面迁移了，说一点自己的想法。

我认为尽管 PKE 是 HUST 的老师投入较多且算得上用心的课程实验，但实在算不上设计良好。实际上，在做后续实验的时候，已经感觉并不像在“学习”一个操作系统，而是在往一个乱七八糟的软件上添加各种简陋的功能模块，并且这些功能模块还经不起测试，这实在是一件 demanding 且 unworthy 的事情，还不如去看看 XV6 或者备考下 GRE…