kernel/arch/riscv64/
mod.rs

1use core::arch::asm;
2use core::mem::transmute;
3use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
4use instruction::sbi::sbi_system_reset;
5use trap::kernel::_kernel_trap_entry;
6use trap::kernel::arch_kernel_trap_handler;
7use trap::user::_user_trap_entry;
8use trap::user::arch_user_trap_handler;
9use vcpu::Mode;
10
11use crate::arch::instruction::Instruction;
12use crate::arch::vm::get_root_pagetable;
13use crate::early_println;
14use crate::environment::MAX_NUM_CPUS;
15use crate::environment::STACK_SIZE;
16use crate::mem::KERNEL_STACK;
17use crate::task::Task;
18
19pub mod boot;
20pub mod context;
21pub mod earlycon;
22pub mod fdt;
23pub mod fpu;
24pub mod instruction;
25pub mod interrupt;
26pub mod kernel;
27pub mod mmio;
28pub mod registers;
29pub mod switch;
30pub mod timer;
31pub mod trap;
32pub mod vcpu;
33pub mod vm;
34
35pub use earlycon::*;
36pub use registers::IntRegisters;
37
38use crate::vm::vmem::MemoryArea;
39
40pub type Arch = Riscv64;
41
42/// Per-hart ownership of the live Vector register file.
43///
44/// When a task that used the V extension is rescheduled on the same hart, we can
45/// skip restoring vregs if it still owns the live state. This removes a very
46/// expensive per-timeslice illegal-instruction trap for vector-heavy workloads.
47const NO_VECTOR_OWNER: usize = usize::MAX;
48static VECTOR_OWNER: [AtomicUsize; MAX_NUM_CPUS] =
49    [const { AtomicUsize::new(NO_VECTOR_OWNER) }; MAX_NUM_CPUS];
50
51/// Whether the live vector register file contains state that is newer than the
52/// saved per-task context of `VECTOR_OWNER`.
53///
54/// This is needed because we sometimes keep vregs live across timeslices while
55/// forcing sstatus.VS to Clean/Off to avoid mis-attributing Dirtiness to another
56/// task.
57static VECTOR_OWNER_DIRTY: [AtomicBool; MAX_NUM_CPUS] =
58    [const { AtomicBool::new(false) }; MAX_NUM_CPUS];
59
60#[inline]
61pub(crate) fn get_vector_owner(cpu_id: usize) -> usize {
62    VECTOR_OWNER[cpu_id].load(Ordering::Relaxed)
63}
64
65#[inline]
66pub(crate) fn set_vector_owner(cpu_id: usize, owner: usize) {
67    VECTOR_OWNER[cpu_id].store(owner, Ordering::Relaxed)
68}
69
70#[inline]
71pub(crate) fn get_vector_owner_dirty(cpu_id: usize) -> bool {
72    VECTOR_OWNER_DIRTY[cpu_id].load(Ordering::Relaxed)
73}
74
75#[inline]
76pub(crate) fn set_vector_owner_dirty(cpu_id: usize, dirty: bool) {
77    VECTOR_OWNER_DIRTY[cpu_id].store(dirty, Ordering::Relaxed)
78}
79
80/// Apply user-entry options for the upcoming `sret`.
81///
82/// This does not enable interrupts in the kernel immediately; it only controls the
83/// sstatus.SPIE bit which is copied into SIE by the `sret` instruction.
84pub fn configure_user_entry(_trapframe: &mut Trapframe, options: crate::arch::UserEntryOptions) {
85    use crate::arch::UserReturnIrqPolicy;
86
87    // Reflect into sstatus.SPIE for the next `sret`.
88    const SPIE: usize = 1 << 5;
89    match options.irq_policy {
90        UserReturnIrqPolicy::Inherit => {}
91        UserReturnIrqPolicy::Enable => unsafe {
92            let mut sstatus: usize;
93            asm!("csrr {0}, sstatus", out(reg) sstatus);
94            sstatus |= SPIE;
95            asm!("csrw sstatus, {0}", in(reg) sstatus);
96        },
97        UserReturnIrqPolicy::Disable => unsafe {
98            let mut sstatus: usize;
99            asm!("csrr {0}, sstatus", out(reg) sstatus);
100            sstatus &= !SPIE;
101            asm!("csrw sstatus, {0}", in(reg) sstatus);
102        },
103    }
104
105    // Lazy FPU/Vector: trap on first use.
106    // If the task has never used FPU/Vector, keep them disabled for user mode.
107    // When an illegal-instruction trap is raised by a FP/Vector instruction,
108    // the trap handler will mark the task as used and re-enable the extension.
109    let cpu_id = crate::arch::get_cpu().get_cpuid();
110    let (current_task_ptr, current_task_id, owner_task_ptr, owner_id, owner_dirty) = {
111        let sched = crate::sched::scheduler::get_scheduler();
112        let Some(current_task_id) = sched.get_current_task_id(cpu_id) else {
113            return;
114        };
115        let Some(current_task_ptr) = sched
116            .get_task_by_id(current_task_id)
117            .map(|t| t as *mut Task)
118        else {
119            return;
120        };
121
122        let owner_id = get_vector_owner(cpu_id);
123        let owner_dirty = get_vector_owner_dirty(cpu_id);
124        let owner_task_ptr =
125            if owner_dirty && owner_id != NO_VECTOR_OWNER && owner_id != current_task_id {
126                sched.get_task_by_id(owner_id).map(|t| t as *mut Task)
127            } else {
128                None
129            };
130
131        (
132            current_task_ptr,
133            current_task_id,
134            owner_task_ptr,
135            owner_id,
136            owner_dirty,
137        )
138    };
139
140    let task = unsafe { &mut *current_task_ptr };
141
142    if !crate::arch::user_fpu_enabled() || !task.vcpu.lock().fpu_used {
143        crate::arch::riscv64::fpu::disable_fpu();
144    }
145
146    if !crate::arch::user_vector_enabled() || !task.vcpu.lock().vector_used {
147        crate::arch::riscv64::fpu::disable_vector();
148        return;
149    }
150
151    // Ensure the task has a backing context (allocated lazily).
152    if task.vcpu.lock().vector.is_none() {
153        task.vcpu.lock().vector = Some(alloc::boxed::Box::new(
154            crate::arch::riscv64::fpu::VectorContext::new(),
155        ));
156    }
157
158    // If another task currently owns the live vregs and its live state hasn't
159    // been saved, save it now before we clobber vregs with our restore.
160    if owner_dirty && owner_id != NO_VECTOR_OWNER && owner_id != current_task_id {
161        if let Some(owner_ptr) = owner_task_ptr {
162            let owner_task = unsafe { &mut *owner_ptr };
163            if owner_task.vcpu.lock().vector.is_none() {
164                owner_task.vcpu.lock().vector = Some(alloc::boxed::Box::new(
165                    crate::arch::riscv64::fpu::VectorContext::new(),
166                ));
167                owner_task.vcpu.lock().vector_used = true;
168            }
169            crate::arch::riscv64::fpu::enable_vector();
170            unsafe { owner_task.vcpu.lock().vector.as_mut().unwrap().save() };
171            crate::arch::riscv64::fpu::mark_vector_clean();
172            set_vector_owner_dirty(cpu_id, false);
173        } else {
174            // Owner task disappeared; drop the dirty flag to avoid repeated work.
175            set_vector_owner_dirty(cpu_id, false);
176        }
177    }
178
179    // Vector hot-path:
180    // - Restore only when ownership changed on this hart.
181    // - Otherwise just re-enable access without a full restore.
182    if owner_id != current_task_id {
183        crate::arch::riscv64::fpu::enable_vector();
184        unsafe { task.vcpu.lock().vector.as_ref().unwrap().restore() };
185        crate::arch::riscv64::fpu::mark_vector_clean();
186        set_vector_owner(cpu_id, current_task_id);
187        set_vector_owner_dirty(cpu_id, false);
188    } else if !crate::arch::riscv64::fpu::is_vector_enabled() {
189        crate::arch::riscv64::fpu::enable_vector();
190        crate::arch::riscv64::fpu::mark_vector_clean();
191        // Preserve owner-dirty: if we kept live unsaved state, it stays dirty.
192    }
193}
194
195/// RISC-V: perform the very first transition into a runnable user task.
196///
197/// This avoids bootstrapping the first user entry via a timer IRQ.
198/// The function prepares trampoline-visible per-CPU state and then
199/// jumps to the trampoline exit path which performs `sret` into user mode.
200pub fn first_switch_to_user(task: &mut Task) -> ! {
201    // Prefer the high-VA kernel stack window if available.
202    let kernel_sp = if let Some((_slot, base)) = task.get_kernel_stack_window_base() {
203        (base + crate::environment::PAGE_SIZE + crate::environment::TASK_KERNEL_STACK_SIZE) as u64
204    } else {
205        panic!("Task has no kernel stack window");
206    };
207
208    crate::early_println!(
209        "[riscv64] CPU {}: First switch to user task PID {} with kernel SP {:#x}",
210        crate::arch::get_cpu().get_cpuid(),
211        task.get_id(),
212        kernel_sp,
213    );
214
215    // Switch sscratch to the trampoline-visible per-CPU struct.
216    let cpu_id = crate::arch::get_cpu().get_cpuid();
217    set_arch(crate::vm::get_trampoline_arch(cpu_id));
218
219    // Update trampoline-visible CPU struct.
220    let cpu = crate::arch::get_cpu();
221    cpu.set_kernel_stack(kernel_sp);
222    cpu.set_trap_handler(get_user_trap_handler());
223    cpu.set_next_address_space(task.vm_manager.get_asid());
224
225    // Populate the trapframe from the task VCPU state.
226    let task_ptr = task as *mut Task;
227    unsafe {
228        let trapframe = (*task_ptr).get_trapframe();
229        (*task_ptr).vcpu.lock().switch(trapframe);
230    }
231
232    // Ensure the next return is to the correct privilege mode.
233    set_next_mode(task.vcpu.lock().get_mode());
234
235    // Program trampoline trap vector right before the jump.
236    set_trapvector(crate::vm::get_trampoline_trap_vector());
237
238    // Final transition via trampoline exit path.
239    crate::arch::riscv64::trap::user::arch_switch_to_user_space(task.get_trapframe())
240}
241
242/// Returns the device memory areas for RISC-V QEMU virt platform.
243/// These areas contain memory-mapped I/O devices and should be mapped
244/// with device memory attributes (non-cacheable, no speculation).
245pub fn get_device_memory_areas() -> alloc::vec::Vec<MemoryArea> {
246    alloc::vec![
247        // QEMU virt: MMIO devices are in the low 2GB
248        MemoryArea {
249            start: 0x0000_0000,
250            end: 0x7fff_ffff,
251        },
252    ]
253}
254
255#[unsafe(link_section = ".trampoline.data")]
256static mut CPUS: [Riscv64; MAX_NUM_CPUS] = [const { Riscv64::new(0) }; MAX_NUM_CPUS];
257
258#[repr(align(4))]
259#[allow(dead_code)]
260#[derive(Debug, Clone)]
261pub struct Riscv64 {
262    scratch: u64,      // offeset: 0
263    pub hartid: u64,   // offset: 8
264    satp: u64,         // offset: 16
265    kernel_stack: u64, // offset: 24
266    kernel_trap: u64,  // offset: 32
267}
268
269impl Riscv64 {
270    pub const fn new(cpu_id: usize) -> Self {
271        Riscv64 {
272            scratch: 0,
273            hartid: cpu_id as u64,
274            kernel_stack: 0,
275            kernel_trap: 0,
276            satp: 0,
277        }
278    }
279
280    pub fn get_cpuid(&self) -> usize {
281        self.hartid as usize
282    }
283
284    pub fn get_trapframe_paddr(&self) -> usize {
285        /* Get pointer of the trapframe, which is located at the top of the kernel stack */
286        let addr = self.kernel_stack as usize - core::mem::size_of::<Trapframe>();
287        addr
288    }
289
290    pub fn set_kernel_stack(&mut self, initial_top: u64) {
291        self.kernel_stack = initial_top;
292    }
293
294    pub fn set_trap_handler(&mut self, addr: usize) {
295        self.kernel_trap = addr as u64;
296    }
297
298    pub fn set_next_address_space(&mut self, asid: u16) {
299        let root_pagetable = get_root_pagetable(asid).expect("No root page table found for ASID");
300
301        let satp = root_pagetable.get_val_for_satp(asid);
302        self.satp = satp as u64;
303    }
304
305    pub fn as_paddr_cpu(&mut self) -> &mut Riscv64 {
306        unsafe { &mut CPUS[self.hartid as usize] }
307    }
308}
309
310#[repr(C, align(16))]
311#[derive(Debug, Clone)]
312pub struct Trapframe {
313    pub regs: IntRegisters,
314    pub epc: u64,
315    pub _padding: u64,
316}
317
318impl Trapframe {
319    pub fn new() -> Self {
320        Trapframe {
321            regs: IntRegisters::new(),
322            epc: 0,
323            _padding: 0xdeadbeefdeadbeef,
324        }
325    }
326
327    pub fn get_syscall_number(&self) -> usize {
328        self.regs.reg[17] // a7
329    }
330
331    pub fn set_syscall_number(&mut self, syscall_number: usize) {
332        self.regs.reg[17] = syscall_number; // a7
333    }
334
335    pub fn get_return_value(&self) -> usize {
336        self.regs.reg[10] // a0
337    }
338
339    pub fn set_return_value(&mut self, value: usize) {
340        self.regs.reg[10] = value; // a0
341    }
342
343    pub fn get_arg(&self, index: usize) -> usize {
344        self.regs.reg[index + 10] // a0 - a7
345    }
346
347    pub fn set_arg(&mut self, index: usize, value: usize) {
348        self.regs.reg[index + 10] = value; // a0 - a7
349    }
350
351    pub fn get_current_pc(&self) -> u64 {
352        self.epc
353    }
354
355    /// Increment the program counter (epc) to the next instruction
356    /// This is typically used after handling a trap or syscall to continue execution.
357    ///
358    pub fn increment_pc_next(&mut self, task: &Task) {
359        let instruction =
360            Instruction::fetch(task.vm_manager.translate_vaddr(self.epc as usize).unwrap());
361        let len = instruction.len();
362        if len == 0 {
363            debug_assert!(len > 0, "Invalid instruction length: {}", len);
364            early_println!(
365                "Warning: Invalid instruction length encountered. Defaulting to 4 bytes."
366            );
367            self.epc += 4; // Default to 4 bytes for invalid instruction length
368        } else {
369            self.epc += len as u64;
370        }
371    }
372}
373
374pub fn get_user_trapvector_paddr() -> usize {
375    _user_trap_entry as usize
376}
377
378pub fn get_kernel_trapvector_paddr() -> usize {
379    _kernel_trap_entry as usize
380}
381
382pub fn get_kernel_trap_handler() -> usize {
383    arch_kernel_trap_handler as usize
384}
385
386pub fn get_user_trap_handler() -> usize {
387    arch_user_trap_handler as usize
388}
389
390#[allow(static_mut_refs)]
391fn trap_init(riscv: &mut Riscv64) {
392    let trap_stack_start = unsafe { KERNEL_STACK.start() };
393    let stack_size = STACK_SIZE;
394
395    let trap_stack = trap_stack_start + stack_size * (riscv.hartid + 1) as usize;
396    riscv.kernel_stack = trap_stack as u64;
397    riscv.kernel_trap = arch_kernel_trap_handler as u64;
398    let scratch_addr = riscv as *const _ as usize;
399
400    let sie: usize = 0x20;
401    unsafe {
402        asm!("
403        csrci sstatus, 0x2 // Disable interrupts
404        csrw  sie, {0}
405        csrw  stvec, {1}
406        csrw  sscratch, {2}
407        ",
408        in(reg) sie,
409        in(reg) _kernel_trap_entry as usize,
410        in(reg) scratch_addr,
411        );
412    }
413
414    // Enable FPU for user-space and kernel access
415    fpu::enable_fpu();
416
417    // Enable Vector extension for user-space and kernel access
418    fpu::enable_vector();
419
420    // early_println!("Trap stack area    : {:#x} - {:#x}", trap_stack - stack_size, trap_stack - 1);
421    // early_println!("Trap stack size    : {:#x}", stack_size);
422    // early_println!("Trap stack pointer : {:#x}", trap_stack);
423    // early_println!("Scratch address    : {:#x}", scratch_addr);
424}
425
426pub fn set_trapvector(addr: usize) {
427    unsafe {
428        asm!("
429        csrw stvec, {0}
430        ",
431        in(reg) addr,
432        );
433    }
434}
435
436pub fn set_arch(addr: usize) {
437    unsafe {
438        asm!("
439        csrw sscratch, {0}
440        ",
441        in(reg) addr,
442        );
443    }
444}
445
446pub fn enable_interrupt() {
447    unsafe {
448        asm!(
449            "
450        csrsi sstatus, 0x2
451        "
452        );
453    }
454}
455
456pub fn disable_interrupt() {
457    unsafe {
458        asm!(
459            "
460        csrci sstatus, 0x2
461        "
462        );
463    }
464}
465
466/// Full memory barrier for normal memory (RAM).
467///
468/// This orders previous reads/writes before subsequent reads/writes.
469/// For device/MMIO ordering, prefer [`io_mb`].
470#[inline(always)]
471pub fn mb() {
472    unsafe {
473        asm!("fence rw, rw", options(nostack));
474    }
475}
476
477/// Read memory barrier for normal memory (RAM).
478#[inline(always)]
479pub fn rmb() {
480    unsafe {
481        asm!("fence r, r", options(nostack));
482    }
483}
484
485/// Write memory barrier for normal memory (RAM).
486#[inline(always)]
487pub fn wmb() {
488    unsafe {
489        asm!("fence w, w", options(nostack));
490    }
491}
492
493/// Full barrier for device/MMIO (I/O) operations.
494///
495/// RISC-V requires an explicit I/O fence to order device register accesses.
496#[inline(always)]
497pub fn io_mb() {
498    unsafe {
499        asm!("fence iorw, iorw", options(nostack));
500    }
501}
502
503/// Read barrier for device/MMIO (I/O) operations.
504#[inline(always)]
505pub fn io_rmb() {
506    unsafe {
507        asm!("fence ir, ir", options(nostack));
508    }
509}
510
511/// Write barrier for device/MMIO (I/O) operations.
512#[inline(always)]
513pub fn io_wmb() {
514    unsafe {
515        asm!("fence ow, ow", options(nostack));
516    }
517}
518
519/// Backward-compatible alias for a full device/MMIO barrier.
520#[inline(always)]
521pub fn mmio_fence() {
522    io_mb()
523}
524
525pub fn get_cpu() -> &'static mut Riscv64 {
526    let scratch: usize;
527
528    unsafe {
529        asm!("
530        csrr {0}, sscratch
531        ",
532        out(reg) scratch,
533        );
534    }
535    unsafe { transmute(scratch) }
536}
537
538pub fn set_next_mode(mode: Mode) {
539    match mode {
540        Mode::User => {
541            unsafe {
542                // sstatus.spp = 0 (U-mode)
543                let mut sstatus: usize;
544                asm!(
545                    "csrr {sstatus}, sstatus",
546                    sstatus = out(reg) sstatus,
547                );
548                sstatus &= !(1 << 8); // Clear SPP bit
549                asm!(
550                    "csrw sstatus, {sstatus}",
551                    sstatus = in(reg) sstatus,
552                );
553            }
554        }
555        Mode::Kernel => {
556            unsafe {
557                // sstatus.spp = 1 (S-mode)
558                let mut sstatus: usize;
559                asm!(
560                    "csrr {sstatus}, sstatus",
561                    sstatus = out(reg) sstatus,
562                );
563                sstatus |= 1 << 8; // Set SPP bit
564                asm!(
565                    "csrw sstatus, {sstatus}",
566                    sstatus = in(reg) sstatus,
567                );
568            }
569        }
570    }
571}
572
573pub fn shutdown() -> ! {
574    sbi_system_reset(0, 0);
575}
576
577pub fn shutdown_with_code(exit_code: u32) -> ! {
578    // Use reset_reason as exit code for test environments
579    sbi_system_reset(0, exit_code);
580}
581
582pub fn reboot() -> ! {
583    sbi_system_reset(1, 0);
584}