From e885d2633eaf9fa9d87c0a3a2212ee94d7419bd7 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Mon, 29 Dec 2025 13:14:08 -0800 Subject: [PATCH] work in progress --- src/main.rs | 219 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 194 insertions(+), 25 deletions(-) diff --git a/src/main.rs b/src/main.rs index 4e851c3..07bea61 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,162 @@ +use std::collections::HashMap; +use std::vec::Vec; + +const PAGE_SIZE: usize = 4096; + +// Actively used bits in our limited implementation: +const BIT_V: i64 = 1; +const BIT_R: i64 = 2; +const BIT_W: i64 = 4; +const BIT_X: i64 = 8; + +// (Not used) - user mode accessible +const BIT_U: i64 = 16; + +// (Not used) - global mapping +const BIT_G: i64 = 32; + +// (Not presently used) accessed -- set if we've touched this since last time A cleared +const BIT_A: i64 = 64; + +// (Will be used for JIT on writes to executable pags) dirty -- written since last time D cleared +const BIT_D: i64 = 128; + +// (Not used) +const BITS_RSW_LO: i64 = 256; +const BITS_RSW_HI: i64 = 512; +const BITS_RSW: i64 = BITS_RSW_LO | BITS_RSW_HI; + + +type ExecutorFunc = fn(i64, &mut CoreState, &mut MachineState) -> i64; + #[repr(C)] -struct MachineState<'a> { - memory: &'a [u8] +struct MachineState { + memory: Vec, + pages: Vec, +} + +/** + * Note that physical memory accessors can traverse page boundaries; + * we lay out linear memory from 0 to +4 gigabytes and will allocate + * as many page table entries as are needed to cover RAM. These eat + * up an extra 4 bytes per 4 kilobytes of address space used, initially + * allocating enough for all physical memory initially allocated. + * + * This will be relatively space-inefficient for sparse address spaces + * in the range of several gigabytes and more but requires only one + * table lookup per load/store, or two if spanning pages. + * + * All threads in the process's machine will have access to the same + * page tables, even if running on different threads. + */ +impl MachineState { + fn restore(memory: Vec, pages: Vec) -> Self { + if ((memory.len() >> 12) << 12) != memory.len() { + panic!("memory size must be a multiple of 4096 bytes"); + } + if ((memory.len() >> 12)) != pages.len() { + panic!("page data is wrong length for memory size"); + } + return Self { + memory, + pages + }; + } + + fn new(memory_size: usize) -> Self { + let memory = vec![0u8; memory_size]; + let pages= vec![0u32; memory_size >> 12]; + return Self::restore(memory, pages); + } + + fn get_page_info(&mut self, address: usize) -> u32 { + return self.pages[address >> 12]; + } + + fn set_page_info(&mut self, address: usize, value: u32) { + self.pages[address >> 12] = value; + } + + fn lb_physical(&mut self, address: usize) -> i64 { + return self.memory[address] as i8 as i64; + } + + fn lbu_physical(&mut self, address: usize) -> i64 { + return self.memory[address] as u8 as i64; + } + + fn lh_physical(&mut self, address: usize) -> i64 { + return ( + (self.memory[address ] as u16) | + ((self.memory[address + 1] as u16) << 8) + ) as i16 as i64; + } + + fn lhu_physical(&mut self, address: usize) -> i64 { + return ( + (self.memory[address ] as u16) | + ((self.memory[address + 1] as u16) << 8) + ) as i64; + } + + fn lw_physical(&mut self, address: usize) -> i64 { + return ( + (self.memory[address ] as u32) | + ((self.memory[address + 1] as u32) << 8) | + ((self.memory[address + 2] as u32) << 16) | + ((self.memory[address + 3] as u32) << 24) + ) as i32 as i64; + } + + fn lwu_physical(&mut self, address: usize) -> i64 { + return ( + (self.memory[address ] as u32) | + ((self.memory[address + 1] as u32) << 8) | + ((self.memory[address + 2] as u32) << 16) | + ((self.memory[address + 3] as u32) << 24) + ) as u32 as i64; + } + + fn ld_physical(&mut self, address: usize) -> i64 { + return ( + (self.memory[address ] as u64) | + ((self.memory[address + 1] as u64) << 8) | + ((self.memory[address + 2] as u64) << 16) | + ((self.memory[address + 3] as u64) << 24) | + ((self.memory[address + 4] as u64) << 32) | + ((self.memory[address + 5] as u64) << 40) | + ((self.memory[address + 6] as u64) << 48) | + ((self.memory[address + 7] as u64) << 56) + ) as i64; + } + + fn sb_physical(&mut self, address: usize, value: i64) { + self.memory[address] = value as u8; + } + + fn sh_physical(&mut self, address: usize, value: i64) { + self.memory[address] = value as u8; + self.memory[address + 1] = (value >> 8) as u8; + } + + fn sw_physical(&mut self, address: usize, value: i64) { + self.memory[address] = value as u8; + self.memory[address + 1] = (value >> 8) as u8; + self.memory[address + 2] = (value >> 16) as u8; + self.memory[address + 3] = (value >> 24) as u8; + } + + fn sd_physical(&mut self, address: usize, value: i64) { + self.memory[address] = value as u8; + self.memory[address + 1] = (value >> 8) as u8; + self.memory[address + 2] = (value >> 16) as u8; + self.memory[address + 3] = (value >> 24) as u8; + self.memory[address + 4] = (value >> 32) as u8; + self.memory[address + 5] = (value >> 40) as u8; + self.memory[address + 6] = (value >> 48) as u8; + self.memory[address + 7] = (value >> 56) as u8; + } + } #[repr(C)] @@ -8,6 +164,8 @@ struct CoreState { // Integer registers x: [i64; 32], + // Do we need pc? we're passing it around as active state + // Floating point registers // f32 values get... NaN-boxed into f64 values? wild // probably most efficiently treated like a union so can @@ -16,9 +174,7 @@ struct CoreState { f: [f64; 32], // 4096 csrs? no we're not gonna store them all - - // do we need pc? we're passing it around as active state - pc: i64, + satp: i64, // * fflags, accrued exceptions: bits 0-4 // * nx: bit 0 @@ -29,8 +185,38 @@ struct CoreState { // * frm, rounding mode: bits 5-7 // * reserved: bits 8-31 fcsr: i32, + + // When we add AOT and JIT, compiled functions will be + // referenced in this lookup from PC to function pointer. + // Because function references are linked separately on + // each thread in WebAssembly, this has to live in each + // core's state separately. + executors: HashMap } +impl CoreState { + fn new() -> Self { + return Self { + x: [ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + ], + satp: 0, + f: [ + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + ], + fcsr: 0, + executors: HashMap::new() + } + } +} + + // R-type // * opcode: bits 0-6 // * rd: bits 7-11 @@ -97,26 +283,9 @@ extern "C" fn interpreter( } fn main() { - let size = 128 * 1024 * 1024; - let mut memory = vec![0u8; size]; - let mut machine = MachineState { - memory: &mut memory[..] - }; - let mut core = CoreState { - x: [ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - ], - f: [ - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - pc: 0, - fcsr: 0, - }; + let size = 8 * 1024 * 1024; + let mut machine = MachineState::new(size); + let mut core = CoreState::new(); let pc = interpreter(&mut machine, &mut core, 0); println!("Ended with PC {}", pc); }