diff --git a/README.md b/README.md index 314881e..2329c45 100644 --- a/README.md +++ b/README.md @@ -69,13 +69,14 @@ You can exit the emulator using: \. (press Ctrl+A, leave it, afterwar ## Usage ```shell -./semu -k linux-image [-b dtb-file] [-i initrd-image] [-d disk-image] +./semu -k linux-image [-b dtb-file] [-i initrd-image] [-d disk-image] [-c max_cycles] ``` * `linux-image` is the path to the Linux kernel `Image`. * `dtb-file` is optional, as it specifies the user-specified device tree blob. * `initrd-image` is optional, as it specifies the user-specified initial RAM disk image. * `disk-image` is optional, as it specifies the path of a disk image in ext4 file system for the virtio-blk device. +* `max_cycles` allows to set a limit of cycles the emulator should run for - for testing performance ## Build Linux kernel image and root file system diff --git a/main.c b/main.c index fa94e23..1af6bb5 100644 --- a/main.c +++ b/main.c @@ -293,7 +293,8 @@ static void usage(const char *execpath) { fprintf( stderr, - "Usage: %s -k linux-image [-b dtb] [-i initrd-image] [-d disk-image]\n", + "Usage: %s -k linux-image [-b dtb] [-i initrd-image] " + "[-d disk-image] [-c max_cycles]\n", execpath); } @@ -302,7 +303,8 @@ static void handle_options(int argc, char **kernel_file, char **dtb_file, char **initrd_file, - char **disk_file) + char **disk_file, + uint32_t *cycle_limit) { *kernel_file = *dtb_file = *initrd_file = *disk_file = NULL; @@ -310,11 +312,12 @@ static void handle_options(int argc, struct option opts[] = { {"kernel", 1, NULL, 'k'}, {"dtb", 1, NULL, 'b'}, {"initrd", 1, NULL, 'i'}, {"disk", 1, NULL, 'd'}, + {"max_cycles", 1, NULL, 'c'}, {"help", 0, NULL, 'h'}, }; int c; - while ((c = getopt_long(argc, argv, "k:b:i:d:h", opts, &optidx)) != -1) { + while ((c = getopt_long(argc, argv, "k:b:i:d:c:h", opts, &optidx)) != -1) { switch (c) { case 'k': *kernel_file = optarg; @@ -328,6 +331,14 @@ static void handle_options(int argc, case 'd': *disk_file = optarg; break; + case 'c': + if (sscanf(optarg, "%u", cycle_limit) != 1) { + fprintf(stderr, "Cannot parse -c max_cycles argument '%s'.\n", + optarg); + usage(argv[0]); + exit(2); + } + break; case 'h': usage(argv[0]); exit(0); @@ -354,8 +365,9 @@ static int semu_start(int argc, char **argv) char *dtb_file; char *initrd_file; char *disk_file; + uint32_t cycle_limit = 0; handle_options(argc, argv, &kernel_file, &dtb_file, &initrd_file, - &disk_file); + &disk_file, &cycle_limit); /* Initialize the emulator */ emu_state_t emu; @@ -442,6 +454,18 @@ static int semu_start(int argc, char **argv) if (emu.vblk.InterruptStatus) emu_update_vblk_interrupts(&vm); #endif + if (cycle_limit && vm.insn_count >= cycle_limit) { +#if MMU_CACHE_STATS + printf("\n"); + printf("fetch hits: %12ld, misses: %12ld\n", + vm.mmu_cache_fetch_ctx.hits, vm.mmu_cache_fetch_ctx.misses); + printf(" load hits: %12ld, misses: %12ld\n", + vm.mmu_cache_load_ctx.hits, vm.mmu_cache_load_ctx.misses); + printf("store hits: %12ld, misses: %12ld\n", + vm.mmu_cache_store_ctx.hits, vm.mmu_cache_store_ctx.misses); +#endif + exit(0); + } } if (vm.insn_count_hi > emu.timer_hi || diff --git a/mmu_cache.h b/mmu_cache.h new file mode 100644 index 0000000..9295306 --- /dev/null +++ b/mmu_cache.h @@ -0,0 +1,62 @@ +#pragma once +#include +/* This currently implements a simple, fixed-size LRU cache for MMU + entries. Assumes that address 0 is never cached and it is used to + flag empty entries. If a returned translation is zero, the calling + logic has to assume that this means 'no entry available'. */ + +#define MMU_CACHE_ENTRIES 3 +#define MMU_CACHE_IDX_TYPE int +#define MMU_CACHE_STATS 0 + +struct _mmu_cache_ctx { + MMU_CACHE_IDX_TYPE write_idx; + uint32_t from[MMU_CACHE_ENTRIES]; + uint32_t to[MMU_CACHE_ENTRIES]; +#if MMU_CACHE_STATS + uint64_t hits, misses; +#endif +}; + +static inline void mmu_cache_reset_ctx(struct _mmu_cache_ctx* cc) { +#if MMU_CACHE_ENTRIES > 0 + cc->write_idx=0; + for (size_t i=0; i != MMU_CACHE_ENTRIES; i++) { + cc->from[i]=cc->to[i]=0; + } +#endif +} + +static inline uint32_t mmu_cache_lookup(struct _mmu_cache_ctx* cc, + uint32_t high_part) { +#if MMU_CACHE_ENTRIES > 0 + MMU_CACHE_IDX_TYPE i = cc->write_idx; + do { + if (cc->from[i] == high_part) { +#if MMU_CACHE_STATS + cc->hits++; +#endif + return cc->to[i]; + } + i--; + if (i<0) i = MMU_CACHE_ENTRIES-1; + } while(i != cc->write_idx); +#endif +#if MMU_CACHE_STATS + cc->misses++; +#endif + return 0; // no entry available +} + +static inline void mmu_cache_insert(struct _mmu_cache_ctx* cc, + uint32_t ifrom, + uint32_t ito) { +#if MMU_CACHE_ENTRIES > 0 + MMU_CACHE_IDX_TYPE idx = cc->write_idx+1; + if (idx == MMU_CACHE_ENTRIES) + idx = 0; + cc->write_idx = idx; + cc->from[idx]=ifrom; + cc->to[idx]=ito; +#endif +} diff --git a/riscv.c b/riscv.c index 8cb00ad..7299c69 100644 --- a/riscv.c +++ b/riscv.c @@ -167,11 +167,19 @@ static inline uint32_t read_rs2(const vm_t *vm, uint32_t insn) /* virtual addressing */ +static void mmu_invalidate_caches(vm_t *vm) { + mmu_cache_reset_ctx(&vm->mmu_cache_fetch_ctx); + mmu_cache_reset_ctx(&vm->mmu_cache_load_ctx); + mmu_cache_reset_ctx(&vm->mmu_cache_store_ctx); +} + + /* Pre-verify the root page table to minimize page table access during * translation time. */ static void mmu_set(vm_t *vm, uint32_t satp) { + mmu_invalidate_caches(vm); if (satp >> 31) { uint32_t *page_table = vm->mem_page_table(vm, satp & MASK(22)); if (!page_table) @@ -228,16 +236,25 @@ static bool mmu_lookup(const vm_t *vm, return true; } -static void mmu_translate(vm_t *vm, - uint32_t *addr, - const uint32_t access_bits, - const uint32_t set_bits, - const bool skip_privilege_test, - const uint8_t fault, - const uint8_t pfault) +static inline void mmu_translate(vm_t *vm, + struct _mmu_cache_ctx *cctx, + uint32_t *addr, + const uint32_t access_bits, + const uint32_t set_bits, + const bool skip_privilege_test, + const uint8_t fault, + const uint8_t pfault) { + const uint32_t high_part = *addr & ~MASK(RV_PAGE_SHIFT); + uint32_t caddr=mmu_cache_lookup(cctx, high_part); + /* NOTE: save virtual address, for physical accesses, to set exception. */ vm->exc_val = *addr; + + if (caddr) { + *addr= caddr | (*addr & MASK(RV_PAGE_SHIFT)); + return; + } if (!vm->page_table) return; @@ -265,16 +282,18 @@ static void mmu_translate(vm_t *vm, *pte_ref = new_pte; *addr = ((*addr) & MASK(RV_PAGE_SHIFT)) | (ppn << RV_PAGE_SHIFT); + mmu_cache_insert(cctx, high_part, ppn << RV_PAGE_SHIFT); } static void mmu_fence(vm_t *vm UNUSED, uint32_t insn UNUSED) { - /* no-op for now */ + mmu_invalidate_caches(vm); } static void mmu_fetch(vm_t *vm, uint32_t addr, uint32_t *value) { - mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT, + mmu_translate(vm, &vm->mmu_cache_fetch_ctx, + &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT, RV_EXC_FETCH_PFAULT); if (vm->error) return; @@ -287,7 +306,8 @@ static void mmu_load(vm_t *vm, uint32_t *value, bool reserved) { - mmu_translate(vm, &addr, (1 << 1) | (vm->sstatus_mxr ? (1 << 3) : 0), + mmu_translate(vm, &vm->mmu_cache_load_ctx, + &addr, (1 << 1) | (vm->sstatus_mxr ? (1 << 3) : 0), (1 << 6), vm->sstatus_sum && vm->s_mode, RV_EXC_LOAD_FAULT, RV_EXC_LOAD_PFAULT); if (vm->error) @@ -306,7 +326,8 @@ static bool mmu_store(vm_t *vm, uint32_t value, bool cond) { - mmu_translate(vm, &addr, (1 << 2), (1 << 6) | (1 << 7), + mmu_translate(vm, &vm->mmu_cache_store_ctx, + &addr, (1 << 2), (1 << 6) | (1 << 7), vm->sstatus_sum && vm->s_mode, RV_EXC_STORE_FAULT, RV_EXC_STORE_PFAULT); if (vm->error) @@ -336,6 +357,8 @@ void vm_set_exception(vm_t *vm, uint32_t cause, uint32_t val) void vm_trap(vm_t *vm) { + mmu_invalidate_caches(vm); + /* Fill exception fields */ vm->scause = vm->exc_cause; vm->stval = vm->exc_val; @@ -357,6 +380,8 @@ void vm_trap(vm_t *vm) static void op_sret(vm_t *vm) { + mmu_invalidate_caches(vm); + /* Restore from stack */ vm->pc = vm->sepc; vm->s_mode = vm->sstatus_spp; diff --git a/riscv.h b/riscv.h index c416384..b544675 100644 --- a/riscv.h +++ b/riscv.h @@ -2,6 +2,7 @@ #include #include +#include "mmu_cache.h" /* ERR_EXCEPTION indicates that the instruction has raised one of the * exceptions defined in the specification. If this flag is set, the @@ -96,6 +97,8 @@ struct __vm_internal { uint32_t satp; /**< MMU */ uint32_t *page_table; + struct _mmu_cache_ctx mmu_cache_fetch_ctx, mmu_cache_load_ctx, mmu_cache_store_ctx; + void *priv; /**< environment supplied */ /* Memory access sets the vm->error to indicate failure. On successful