reliant/src/main.rs

use std::collections::HashMap;
use std::vec::Vec;

const PAGE_BITS: usize = 12;
const PAGE_SIZE: usize = 2 << PAGE_BITS;

// Actively used bits in our limited implementation:
const BIT_V: u8 = 1;
const BIT_R: u8 = 2;
const BIT_W: u8 = 4;
const BIT_X: u8 = 8;

// (Not used) - user mode accessible
const BIT_U: u8 = 16;

/*
// (Not used) - global mapping
const BIT_G: u8 = 32;

// (Not presently used) accessed -- set if we've touched this since last time A cleared
const BIT_A: u8 = 64;

// (Not presently used)dirty -- written since last time D cleared
// Note each core/hart has its own dirty state for local JIT
const BIT_D: u8 = 128;

// (Not used or room for it) reserved for supervisor
const BITS_RSW_LO: i64 = 256;
const BITS_RSW_HI: i64 = 512;
const BITS_RSW: i64 = BITS_RSW_LO | BITS_RSW_HI;
*/


#[repr(C)]
#[derive(Clone, Copy)]
struct PageTableEntry {
    flags: u8
}

impl PageTableEntry {
    fn new(flags: u8) -> Self {
        return Self {
            flags
        }
    }

    fn as_u8(&self) -> u8 {
        return self.flags;
    }

    fn is_valid(&self) -> bool {
        return self.flags & BIT_V == BIT_V;
    }

    fn is_readable(&self) -> bool {
        return self.flags & (BIT_V | BIT_R) == (BIT_V | BIT_R);
    }

    fn is_writable(&self) -> bool {
        return self.flags & (BIT_V | BIT_R | BIT_W) == (BIT_V | BIT_R | BIT_W);
    }

    fn is_executable(&self) -> bool {
        return self.flags & (BIT_V | BIT_R | BIT_X) == (BIT_V | BIT_R | BIT_X);
    }

}

type ExecutorFunc = fn(i64, &mut CoreState, &mut MachineState) -> i64;

#[repr(C)]
struct MachineState {
    memory: Vec<u8>,
    pages: Vec<PageTableEntry>,
}

/**
 * Note that physical memory accessors can traverse page boundaries;
 * we lay out linear memory from 0 to +4 gigabytes and will allocate
 * as many page table entries as are needed to cover RAM. These eat
 * up an extra 1 byte per 4 KiB of address space used (1 MiB per 4 GiB),
 * initially allocating enough for all physical memory allocated.
 *
 * This will be relatively space-inefficient for sparse address spaces
 * in the range of several gigabytes and more but requires only one
 * table lookup per load/store, or two if spanning pages.
 *
 * All threads in the process's machine will have access to the same
 * page tables, even if running on different threads.
 */
impl MachineState {
    fn new_with_state(memory: Vec<u8>, pages: Vec<PageTableEntry>) -> Self {
        if ((memory.len() >> PAGE_BITS) << PAGE_BITS) != memory.len() {
            panic!("memory size must be a multiple of 4096 bytes");
        }
        if ((memory.len() >> PAGE_BITS)) != pages.len() {
            panic!("page data is wrong length for memory size");
        }
        return Self {
            memory,
            pages
        };
    }

    fn new(memory_size: usize) -> Self {
        let memory = vec![0u8; memory_size];
        let pages = vec![PageTableEntry::new(0); memory_size >> PAGE_BITS];
        return Self::new_with_state(memory, pages);
    }

    fn get_page_table_entry(&mut self, address: usize) -> PageTableEntry {
        let page = address >> PAGE_BITS;
        if page < self.pages.len() {
            return self.pages[page];
        } else {
            return PageTableEntry::new(0);
        }
    }

    fn set_page_table_entry(&mut self, address: usize, entry: PageTableEntry) {
        let page = address >> PAGE_BITS;
        if page < self.pages.len() {
            self.pages[address >> 12] = entry;
        } else {
            panic!("@fixme: handle attempts to expand address space");
        }
    }

    fn lb_physical(&mut self, address: usize) -> i64 {
        return self.memory[address] as i8 as i64;
    }

    fn lbu_physical(&mut self, address: usize) -> i64 {
        return self.memory[address] as u8 as i64;
    }

    fn lh_physical(&mut self, address: usize) -> i64 {
        return (
             (self.memory[address    ] as u16)       |
            ((self.memory[address + 1] as u16) << 8)
        ) as i16 as i64;
    }

    fn lhu_physical(&mut self, address: usize) -> i64 {
        return (
             (self.memory[address    ] as u16)       |
            ((self.memory[address + 1] as u16) << 8)
        ) as i64;
    }

    fn lw_physical(&mut self, address: usize) -> i64 {
        return (
             (self.memory[address    ] as u32)        |
            ((self.memory[address + 1] as u32) <<  8) |
            ((self.memory[address + 2] as u32) << 16) |
            ((self.memory[address + 3] as u32) << 24)
        ) as i32 as i64;
    }

    fn lwu_physical(&mut self, address: usize) -> i64 {
        return (
             (self.memory[address    ] as u32)        |
            ((self.memory[address + 1] as u32) <<  8) |
            ((self.memory[address + 2] as u32) << 16) |
            ((self.memory[address + 3] as u32) << 24)
        ) as u32 as i64;
    }

    fn ld_physical(&mut self, address: usize) -> i64 {
        return (
             (self.memory[address    ] as u64)        |
            ((self.memory[address + 1] as u64) <<  8) |
            ((self.memory[address + 2] as u64) << 16) |
            ((self.memory[address + 3] as u64) << 24) |
            ((self.memory[address + 4] as u64) << 32) |
            ((self.memory[address + 5] as u64) << 40) |
            ((self.memory[address + 6] as u64) << 48) |
            ((self.memory[address + 7] as u64) << 56)
        ) as i64;
    }

    fn sb_physical(&mut self, address: usize, value: i64) {
        self.memory[address] = value as u8;
    }

    fn sh_physical(&mut self, address: usize, value: i64) {
        self.memory[address]     =  value       as u8;
        self.memory[address + 1] = (value >> 8) as u8;
    }

    fn sw_physical(&mut self, address: usize, value: i64) {
        self.memory[address]     =  value        as u8;
        self.memory[address + 1] = (value >>  8) as u8;
        self.memory[address + 2] = (value >> 16) as u8;
        self.memory[address + 3] = (value >> 24) as u8;
    }

    fn sd_physical(&mut self, address: usize, value: i64) {
        self.memory[address]     =  value        as u8;
        self.memory[address + 1] = (value >>  8) as u8;
        self.memory[address + 2] = (value >> 16) as u8;
        self.memory[address + 3] = (value >> 24) as u8;
        self.memory[address + 4] = (value >> 32) as u8;
        self.memory[address + 5] = (value >> 40) as u8;
        self.memory[address + 6] = (value >> 48) as u8;
        self.memory[address + 7] = (value >> 56) as u8;
    }

    fn store_buffer_physical(&mut self, address: usize, bytes: &[u8]) {
        self.memory[address..address + bytes.len()].copy_from_slice(bytes);
    }
}

#[repr(C)]
struct CoreState {
    // Integer registers
    x: [i64; 32],

    // Do we need pc? we're passing it around as active state

    // Floating point registers
    // f32 values get... NaN-boxed into f64 values? wild
    // probably most efficiently treated like a union so can
    // do f32-native loads and stores and also update the top
    // bytes separately
    f: [f64; 32],

    // * fflags, accrued exceptions: bits 0-4
    //   * nx: bit 0
    //   * uf: bit 1
    //   * of: bit 2
    //   * dz: bit 3
    //   * nv: bit 4
    // * frm, rounding mode: bits 5-7
    // * reserved: bits 8-31
    fcsr: i32,

    // When we add AOT and JIT, compiled functions will be
    // referenced in this lookup from PC to function pointer.
    // Because function references are linked separately on
    // each thread in WebAssembly, this has to live in each
    // core's state separately.
    executors: HashMap<i64, ExecutorFunc>,

    // Local dirty flags for JIT pages.
    // When we get a fence.i instructrion, look for all dirty
    // pages and invalidate any functions including them
    // Takes up to 1 byte per 4 KiB (1 MiB per 4 GiB) per thread.
    // Could be made more compact if only 1 bit is needed.
    dirty: Vec<u8>
}

impl CoreState {
    fn new(machine: &MachineState) -> Self {
        return Self {
            x: [
                0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0
            ],
            f: [
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
            ],
            fcsr: 0,
            executors: HashMap::new(),
            dirty: vec![0u8; machine.memory.len()]
        }
    }
}


// R-type
// * opcode: bits 0-6
// * rd: bits 7-11
// * funct3: bits 12-14
// * rs1: bits 15-19
// * rs2: bits 20-24
// * func7: bits 25-31

// I-type
// * opcode: bits 0-6
// * rd: bits 7-11
// * funct3: bits 12-14
// * rs1: bits 15-19
// * imm[0:11]: bits 20-31

// S-type
// * opcode: bits 0-6
// * imm[0:4]: bits 7-11
// * funct3: bits 12-14
// * rs1: bits 15-19
// * rs2: bits 20-24
// * imm[5:11]: bits 25-31

// B-type
// * opcode: bits 0-6
// * imm[11]: bit 7
// * imm[1:4]: bits 8-11
// * funct3: bits 12-14
// * rs1: bits 15-19
// * rs2: bits 20-24
// * imm[5:10]: bits 25-30
// * imm[12]: bit 31

// U-type
// * opcode: bits 0-6
// * rd: bits 7-11
// * imm[12:31]

// J-type
// * opcode: bits 0-6
// * rd: bits 7-11
// * imm[12:19]: bits 12-19
// * imm[11]: bit 20
// * imm[1:10]: bits 21-30
// * imm[20]: bit 31

// sequential path is expected to be the common case on conditional branches
// this aligns nicely with encoding forward to the next unconditional branch
// but if possible unconditional jumps within a compilation unit may be doable

// memory ordering instructions -- how can any of this be done? :D

// FENCE.I could tell us to clear JIT caches :D


extern "C" fn interpreter(
    _machine: *mut MachineState,
    _state: *mut CoreState,
    pc: i64) -> i64
 {
    println!("Hello, world!");
    return pc;
}

fn main() {
    let size = 8 * 1024 * 1024;
    let mut machine = MachineState::new(size);
    let mut core = CoreState::new(&machine);
    let pc = interpreter(&mut machine, &mut core, 0);
    println!("Ended with PC {}", pc);
}