Update README.md

157d169a · 银角 · 0f7d452e · 157d169a
Commit 157d169a authored 3 years ago by 银角
Hide whitespace changes
Inline Side-by-side

Showing

with 349 additions and 1 deletion
+349 -1
--- a/README.md
+++ b/README.md
 # Proj121-金角银角与钝角

 Proj121-page-table-using-hashtable
-使用哈希页表实现虚拟机的stage-2页表
\ No newline at end of file
+使用哈希页表实现虚拟机的stage-2页表
+
+## 一、内核设计
+
+
+## 二、实验过程
+
+### 第一题
+
+
+
+### 第二题
+
+
+
+
+
+### 第三题
+
+#### 哈希页表的建立
+
+> 在RISC-V Hypervisor的stage-2缺页流程中，实现哈希页表建立过程。
+
+##### 建立页表的预置要求
+
+修改两个工具函数：
+
+* `my_set_pte`修改自`stage2_set_pte`，目的是为了将value值填入`pte`表项中。
+
+```c
+void my_set_pte(pte_t * ptep, unsigned long value){
+    //将ptep的前7个bytes置为value的值
+    unsigned char* charValue = (unsigned char*)&value;
+    unsigned char* charPte = (unsigned char*)ptep;
+    int i=0;
+    for(i=0;i<7;i++){
+        charPte[i] = charValue[i];
+    }
+}
+```
+
+* `my_pte_val`修改自`pte_val`，目的是为了获取`pte`表项的值。
+
+```c
+unsigned long my_pte_val(pte_t pte){
+    //取出pte前7个bytes的值
+    unsigned long  tmp = 0;
+    unsigned char* charPte = (unsigned char*)&pte;
+    unsigned char* charValue = (unsigned char*)&tmp;
+    int i=0;
+    for(i=0;i<7;i++){
+        charValue[i] = charPte[i];
+    }
+    return tmp;
+}
+```
+
+##### 实现缺页填表操作
+
+实现缺页填写页表操作：实现`hash`函数对于`addr`的映射以及开放寻址。
+
+```c 
+static int stage2_set_pte(struct kvm *kvm, u32 level,
+                          struct kvm_mmu_memory_cache *pcache, gpa_t addr,
+                          const pte_t *new_pte)
+{
+    //获取PGD的起始地址
+    unsigned char *tmpPgd = (unsigned char *)(kvm->arch.pgd);
+    //获取哈希的idx和组号
+    unsigned long blockNum = (unsigned long)addr >> 17;
+    unsigned long hashIdx = blockNum & 0xfff; //取后12位作为idx
+    unsigned long blockOff = (unsigned long)addr << 47 >> 61;
+    //获取该行的tag，需要进行比对是否一致
+    unsigned char *curLine = tmpPgd + (hashIdx * 64);
+
+    //前8位(再去掉最开始两个)为tag位
+    unsigned long tag, valid;
+    unsigned long i = 0, j = 0;
+
+    //startTable
+    unsigned char *startTable;
+    //PTE开始的地方
+    unsigned char *startPTE = (unsigned char *)&(new_pte->pte);
+    while (1) {
+        tag = *((unsigned long *)curLine);
+        startTable = curLine + blockOff * 7 + 8;
+        valid = tag >> 56;
+        if (valid) {
+            if ((tag & 0x0000ffffffffffff) == blockNum) {
+                //write PTE
+                my_set_pte((pte_t *)startTable,startPTE);
+                break;
+            } else {
+                //跳到下一行
+                curLine += 64;
+            }
+        } else {
+            //write PTE
+            my_set_pte((pte_t *)startTable,startPTE);
+            //write TAG
+            *((unsigned long *)curLine) =
+                0x0100000000000000 | blockNum;
+            break;
+        }
+    }
+    return 0;
+}
+```
+
+##### 实现建表相关函数
+
+* `stage2_op_pte`是负责初始化页表项的函数，需要将其修改为支持哈希页表版本。
+
+```c
+static void stage2_op_pte(struct kvm *kvm, gpa_t addr, pte_t *ptep,
+                          u32 ptep_level, enum stage2_op op)
+{
+    int i, ret;
+    pte_t *next_ptep;
+    u32 next_ptep_level;
+    unsigned long next_page_size, page_size;
+
+    page_size = PAGE_SIZE;
+
+    BUG_ON(addr & (page_size - 1));
+
+    //判断这个pte为0
+    if (!my_pte_val(*ptep))
+        return;
+
+    if (op == STAGE2_OP_CLEAR)
+        my_set_pte(ptep, 0);//定义函数:
+    else if (op == STAGE2_OP_WP)
+        my_set_pte(ptep, my_pte_val(*ptep) & ~_PAGE_WRITE);
+}
+```
+
+
+
+* `stage2_get_leaf_entry`是根据获取`addr`对应页表项的函数，在多级页表中该函数会迭代查找，直到找到叶节点（存储最终的物理地址）。
+
+```c
+static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr, pte_t **ptepp,
+                                  u32 *ptep_level)
+{
+    //addr ---hash---> 找到对应的位置
+    //获取PGD的起始地址
+    unsigned char *tmpPgd = (unsigned char *)(kvm->arch.pgd);
+    //获取哈希的idx和组号
+    unsigned long blockNum = (unsigned long)addr >> 17;
+    unsigned long hashIdx = blockNum & 0xff; //取后8位作为idx
+    unsigned long blockOff = (unsigned long)addr << 47 >> 61;
+    //获取该行的tag，需要进行比对是否一致
+    unsigned char *curLine = tmpPgd + (hashIdx * 64);
+
+    //前8位(再去掉最开始两个)为tag位
+    unsigned long tag, valid;
+    unsigned long i = 0, j = 0;
+
+    //startTable
+    unsigned char *startTable;
+
+    while (1) {
+        tag = *((unsigned long *)curLine);
+        startTable = curLine + blockOff * 7 + 8;
+        valid = tag >> 56;
+        if (valid) {
+            if ((tag & 0x0000ffffffffffff) == blockNum) {
+                //找到正确的位置
+                *ptep_level = 1;
+                *ptepp = (pte_t *)startTable;
+                break;
+            } else {
+                //跳到下一行
+                curLine += 64;
+            }
+        } else {
+            return false;
+        }
+    }
+    return true;
+}
+```
+
+#### 哈希页表的查询
+
+
+
+# 三、测试部分
+
+我们在`stream`测试集上进行了测试，对比了在Host上的使用16kb页表大小与使用原有4kb页表大小的性能，以及在启用`kvm`使能后，在Guest上的使用hash页表与使用原有三级页表映射的性能。
+
+## host-16k-hash
+
+```bash
+/apps # ./stream_c.exe
+-------------------------------------------------------------
+STREAM version $Revision: 5.10 $
+-------------------------------------------------------------
+This system uses 8 bytes per array element.
+-------------------------------------------------------------
+Array size = 10000000 (elements), Offset = 0 (elements)
+Memory per array = 76.3 MiB (= 0.1 GiB).
+Total memory required = 228.9 MiB (= 0.2 GiB).
+Each kernel will be executed 10 times.
+ The *best* time for each kernel (excluding the first iteration)
+ will be used to compute the reported bandwidth.
+-------------------------------------------------------------
+Number of Threads requested = 1
+Number of Threads counted = 1
+-------------------------------------------------------------
+Your clock granularity/precision appears to be 2 microseconds.
+Each test below will take on the order of 187141 microseconds.
+   (= 93570 clock ticks)
+Increase the size of the arrays if this shows that
+you are not getting at least 20 clock ticks per test.
+-------------------------------------------------------------
+WARNING -- The above is only a rough guideline.
+For best results, please be sure you know the
+precision of your system timer.
+-------------------------------------------------------------
+Function    Best Rate MB/s  Avg time     Min time     Max time
+Copy:            1833.0     0.087840     0.087290     0.088834
+Scale:            860.3     0.192468     0.185988     0.226276
+Add:             1107.3     0.217874     0.216739     0.225400
+Triad:            939.3     0.258147     0.255508     0.275860
+-------------------------------------------------------------[   22.601770] random: fast init done
+
+Solution Validates: avg error less than 1.000000e-13 on all three arrays
+-------------------------------------------------------------
+```
+
+## guest-16k-hash
+
+```bash
+-------------------------------------------------------------
+STREAM version $Revision: 5.10 $
+-------------------------------------------------------------
+This system uses 8 bytes per array element.
+-------------------------------------------------------------
+Array size = 10000000 (elements), Offset = 0 (elements)
+Memory per array = 76.3 MiB (= 0.1 GiB).
+Total memory required = 228.9 MiB (= 0.2 GiB).
+Each kernel will be executed 10 times.
+ The *best* time for each kernel (excluding the first iteration)
+ will be used to compute the reported bandwidth.
+-------------------------------------------------------------
+Number of Threads requested = 1
+Number of Threads counted = 1
+-------------------------------------------------------------
+Your clock granularity/precision appears to be 2 microseconds.
+Each test below will take on the order of 262511 microseconds.
+   (= 131255 clock ticks)
+Increase the size of the arrays if this shows that
+you are not getting at least 20 clock ticks per test.
+-------------------------------------------------------------
+WARNING -- The above is only a rough guideline.
+For best results, please be sure you know the
+precision of your system timer.
+-------------------------------------------------------------
+Function    Best Rate MB/s  Avg time     Min time     Max time
+Copy:            1364.9     0.121861     0.117227     0.146466
+Scale:            645.2     0.250996     0.247974     0.261212
+Add:              828.8     0.294635     0.289580     0.311800
+Triad:            699.8     0.359421     0.342947     0.478321
+-------------------------------------------------------------
+Solution Validates: avg error less than 1.000000e-13 on all three arrays
+-------------------------------------------------------------
+```
+
+## host-4k
+
+```bash
+-------------------------------------------------------------
+STREAM version $Revision: 5.10 $
+-------------------------------------------------------------
+This system uses 8 bytes per array element.
+-------------------------------------------------------------
+Array size = 10000000 (elements), Offset = 0 (elements)
+Memory per array = 76.3 MiB (= 0.1 GiB).
+Total memory required = 228.9 MiB (= 0.2 GiB).
+Each kernel will be executed 10 times.
+ The *best* time for each kernel (excluding the first iteration)
+ will be used to compute the reported bandwidth.
+-------------------------------------------------------------
+Number of Threads requested = 1
+Number of Threads counted = 1
+-------------------------------------------------------------
+Your clock granularity/precision appears to be 2 microseconds.
+Each test below will take on the order of 210880 microseconds.
+   (= 105440 clock ticks)
+Increase the size of the arrays if this shows that
+you are not getting at least 20 clock ticks per test.
+-------------------------------------------------------------
+WARNING -- The above is only a rough guideline.
+For best results, please be sure you know the
+precision of your system timer.
+-------------------------------------------------------------
+Function    Best Rate MB/s  Avg time     Min time     Max time
+Copy:            1326.4     0.121248     0.120624     0.122517
+Scale:            703.3     0.231843     0.227493     0.246709
+Add:              854.4     0.287438     0.280907     0.333396
+Triad:            770.8     0.313557     0.311359     0.324700
+-------------------------------------------------------------
+Solution Validates: avg error less than 1.000000e-13 on all three arrays
+-------------------------------------------------------------
+```
+
+## guest-4k
+
+```bash
+-------------------------------------------------------------
+STREAM version $Revision: 5.10 $
+-------------------------------------------------------------
+This system uses 8 bytes per array element.
+-------------------------------------------------------------
+Array size = 10000000 (elements), Offset = 0 (elements)
+Memory per array = 76.3 MiB (= 0.1 GiB).
+Total memory required = 228.9 MiB (= 0.2 GiB).
+Each kernel will be executed 10 times.
+ The *best* time for each kernel (excluding the first iteration)
+ will be used to compute the reported bandwidth.
+-------------------------------------------------------------
+Number of Threads requested = 1
+Number of Threads counted = 1
+-------------------------------------------------------------
+Your clock granularity/precision appears to be 2 microseconds.
+Each test below will take on the order of 313721 microseconds.
+   (= 156860 clock ticks)
+Increase the size of the arrays if this shows that
+you are not getting at least 20 clock ticks per test.
+-------------------------------------------------------------
+WARNING -- The above is only a rough guideline.
+For best results, please be sure you know the
+precision of your system timer.
+-------------------------------------------------------------
+Function    Best Rate MB/s  Avg time     Min time     Max time
+Copy:             640.3     0.251023     0.249885     0.253838
+Scale:            419.9     0.383085     0.381044     0.388234
+Add:              476.1     0.509056     0.504084     0.538106
+Triad:            440.2     0.550963     0.545153     0.586988
+-------------------------------------------------------------
+Solution Validates: avg error less than 1.000000e-13 on all three arrays
+-------------------------------------------------------------
+```
+
+<img src="https://s2.loli.net/2022/05/12/bdh9Ikptz1i86gu.png" alt="Guest" style="zoom:67%;" />
+
+<img src="https://s2.loli.net/2022/05/12/MYghpAPsboaS1ew.png" alt="Host" style="zoom:67%;" />