kernel/abi/linux/riscv64/
mod.rs

1#[macro_use]
2mod macros;
3mod errno;
4mod fs;
5mod futex;
6mod mm;
7mod pipe;
8mod proc;
9mod signal;
10mod socket;
11mod time;
12
13// pub mod drivers;
14
15use alloc::{
16    boxed::Box, collections::BTreeMap, format, string::ToString, sync::Arc, vec, vec::Vec,
17};
18use core::sync::atomic::Ordering;
19// use file::{sys_dup, sys_exec, sys_mknod, sys_open, sys_write};
20// use proc::{sys_exit, sys_fork, sys_wait, sys_getpid};
21
22use self::time::PosixTimer;
23use crate::{
24    abi::AbiModule,
25    arch::{self, IntRegisters, Trapframe},
26    early_initcall,
27    fs::{
28        FileSystemError, FileSystemErrorKind, SeekFrom, VfsManager, drivers::overlayfs::OverlayFS,
29    },
30    register_abi,
31    task::elf_loader::{
32        ExecutionMode, LoadStrategy, LoadTarget, analyze_and_load_elf_with_strategy,
33    },
34    vm::setup_user_stack,
35};
36
37const MAX_FDS: usize = 1024; // Maximum number of file descriptors
38
39#[derive(Clone, Default)]
40pub struct LinuxThreadState {
41    pub parent_tid_ptr: Option<usize>,
42    pub child_tid_ptr: Option<usize>,
43    pub clear_child_tid_ptr: Option<usize>,
44    pub robust_list_head: Option<usize>,
45    pub robust_list_len: usize,
46    pub tls_pointer: Option<usize>,
47    /// Linux Thread Group ID (TGID). For processes, TGID == PID of group leader.
48    /// 0 means uninitialized and should fall back to Task ID.
49    pub tgid: usize,
50    /// Internal: set by sys_clone prior to cloning to indicate CLONE_THREAD semantics
51    pub pending_clone_is_thread: bool,
52}
53
54#[derive(Clone)]
55pub struct LinuxRiscv64Abi {
56    /// Task namespace for Linux PID/TID management
57    namespace: Arc<crate::task::namespace::TaskNamespace>,
58    /// File descriptor to handle mapping table (fd -> handle)
59    /// None means the fd is not allocated
60    /// Vec to avoid stack overflow during initialization
61    fd_to_handle: Vec<Option<u32>>,
62    /// File descriptor flags (e.g., FD_CLOEXEC)
63    /// Vec to avoid stack overflow during initialization
64    fd_flags: Vec<u32>,
65    /// File status flags (e.g., O_NONBLOCK) for F_GETFL/F_SETFL
66    /// Vec to avoid stack overflow during initialization
67    file_status_flags: Vec<u32>,
68    /// Free file descriptor list for O(1) allocation/deallocation
69    free_fds: Vec<usize>,
70    /// Signal handling state
71    pub signal_state: Arc<spin::Mutex<signal::SignalState>>,
72    /// Linux-specific per-task thread state (isolated inside ABI)
73    thread_state: LinuxThreadState,
74    /// POSIX timers created via timer_create
75    posix_timers: BTreeMap<u64, PosixTimer>,
76    /// Next timer identifier (Linux timer_t)
77    next_timer_id: u64,
78}
79
80impl Default for LinuxRiscv64Abi {
81    fn default() -> Self {
82        // Initialize free_fds with all available file descriptors (0 to MAX_FDS-1)
83        // Pop from the end so fd 0, 1, 2 are allocated first
84        let mut free_fds: Vec<usize> = (0..MAX_FDS).collect();
85        free_fds.reverse(); // Reverse so fd 0 is at the end and allocated first
86
87        // Use root namespace by default for cross-ABI task visibility
88        // Separate namespaces can be created explicitly when needed (e.g., containers, cgroups)
89        let namespace = crate::task::namespace::get_root_namespace().clone();
90
91        Self {
92            namespace,
93            fd_to_handle: vec![None; MAX_FDS],
94            fd_flags: vec![0; MAX_FDS],
95            file_status_flags: vec![0; MAX_FDS],
96            free_fds,
97            signal_state: Arc::new(spin::Mutex::new(signal::SignalState::new())),
98            thread_state: LinuxThreadState::default(),
99            posix_timers: BTreeMap::new(),
100            next_timer_id: 1,
101        }
102    }
103}
104
105impl LinuxRiscv64Abi {
106    pub fn thread_state(&self) -> &LinuxThreadState {
107        &self.thread_state
108    }
109    pub fn thread_state_mut(&mut self) -> &mut LinuxThreadState {
110        &mut self.thread_state
111    }
112    /// Allocate a new file descriptor and map it to a handle
113    pub fn allocate_fd(&mut self, handle: u32) -> Result<usize, &'static str> {
114        let fd = if let Some(freed_fd) = self.free_fds.pop() {
115            // Reuse a previously freed file descriptor (O(1))
116            freed_fd
117        } else {
118            // No more file descriptors available
119            return Err("Too many open files");
120        };
121
122        self.fd_to_handle[fd] = Some(handle);
123        Ok(fd)
124    }
125
126    /// Allocate a specific file descriptor and map it to a handle
127    pub fn allocate_specific_fd(&mut self, fd: usize, handle: u32) -> Result<(), &'static str> {
128        if fd >= MAX_FDS {
129            return Err("File descriptor out of range");
130        }
131
132        // Check if the fd is already in use
133        if self.fd_to_handle[fd].is_some() {
134            return Err("File descriptor already in use");
135        }
136
137        // Remove from free list if present
138        if let Some(pos) = self.free_fds.iter().position(|&x| x == fd) {
139            self.free_fds.remove(pos);
140        }
141
142        self.fd_to_handle[fd] = Some(handle);
143        Ok(())
144    }
145
146    /// Get handle from file descriptor
147    pub fn get_handle(&self, fd: usize) -> Option<u32> {
148        if fd < MAX_FDS {
149            self.fd_to_handle[fd]
150        } else {
151            None
152        }
153    }
154
155    /// Remove file descriptor mapping and clear its flags
156    pub fn remove_fd(&mut self, fd: usize) -> Option<u32> {
157        if fd < MAX_FDS {
158            if let Some(handle) = self.fd_to_handle[fd].take() {
159                self.fd_flags[fd] = 0; // Clear flags when removing fd
160                self.file_status_flags[fd] = 0; // Clear status flags as well
161                // Add the freed fd back to the free list for reuse (O(1))
162                self.free_fds.push(fd);
163                Some(handle)
164            } else {
165                None
166            }
167        } else {
168            None
169        }
170    }
171
172    /// Find file descriptor by handle (linear search)
173    pub fn find_fd_by_handle(&self, handle: u32) -> Option<usize> {
174        for (fd, &mapped_handle) in self.fd_to_handle.iter().enumerate() {
175            if let Some(h) = mapped_handle {
176                if h == handle {
177                    return Some(fd);
178                }
179            }
180        }
181        None
182    }
183
184    /// Initialize standard file descriptors (stdin, stdout, stderr)
185    pub fn init_std_fds(&mut self, stdin_handle: u32, stdout_handle: u32, stderr_handle: u32) {
186        // Linux convention: fd 0 = stdin, fd 1 = stdout, fd 2 = stderr
187        self.fd_to_handle[0] = Some(stdin_handle);
188        self.fd_to_handle[1] = Some(stdout_handle);
189        self.fd_to_handle[2] = Some(stderr_handle);
190
191        // Remove std fds from free list
192        self.free_fds.retain(|&fd| fd != 0 && fd != 1 && fd != 2);
193    }
194
195    /// Get file descriptor flags
196    pub fn get_fd_flags(&self, fd: usize) -> Option<u32> {
197        if fd < MAX_FDS && self.fd_to_handle[fd].is_some() {
198            Some(self.fd_flags[fd])
199        } else {
200            None
201        }
202    }
203
204    /// Set file descriptor flags
205    pub fn set_fd_flags(&mut self, fd: usize, flags: u32) -> Result<(), &'static str> {
206        use crate::abi::linux::riscv64::fs::FD_CLOEXEC;
207        use crate::{object::handle::SpecialSemantics, task::mytask};
208
209        if fd < MAX_FDS && self.fd_to_handle[fd].is_some() {
210            let handle = self.fd_to_handle[fd].unwrap();
211            self.fd_flags[fd] = flags;
212
213            // Update handle metadata to sync FD_CLOEXEC with SpecialSemantics::CloseOnExec
214            if let Some(task) = mytask() {
215                if let Some(current_metadata) = task.handle_table.get_metadata(handle) {
216                    let mut new_metadata = current_metadata.clone();
217
218                    if flags & FD_CLOEXEC != 0 {
219                        // Set CloseOnExec if FD_CLOEXEC flag is present
220                        new_metadata.special_semantics = Some(SpecialSemantics::CloseOnExec);
221                    } else {
222                        // Remove CloseOnExec if FD_CLOEXEC flag is not present
223                        if matches!(
224                            new_metadata.special_semantics,
225                            Some(SpecialSemantics::CloseOnExec)
226                        ) {
227                            new_metadata.special_semantics = None;
228                        }
229                    }
230
231                    // Update the metadata
232                    let _ = task.handle_table.update_metadata(handle, new_metadata);
233                }
234            }
235
236            Ok(())
237        } else {
238            Err("Invalid file descriptor")
239        }
240    }
241
242    /// Get file status flags (F_GETFL)
243    pub fn get_file_status_flags(&self, fd: usize) -> Option<u32> {
244        if fd < MAX_FDS && self.fd_to_handle[fd].is_some() {
245            Some(self.file_status_flags[fd])
246        } else {
247            None
248        }
249    }
250
251    /// Set file status flags (F_SETFL)
252    pub fn set_file_status_flags(&mut self, fd: usize, flags: u32) -> Result<(), &'static str> {
253        if fd < MAX_FDS && self.fd_to_handle[fd].is_some() {
254            self.file_status_flags[fd] = flags;
255            Ok(())
256        } else {
257            Err("Invalid file descriptor")
258        }
259    }
260
261    /// Get total number of allocated file descriptors
262    pub fn fd_count(&self) -> usize {
263        self.fd_to_handle.iter().filter(|&&h| h.is_some()).count()
264    }
265
266    /// Get the list of allocated file descriptors (for debugging)
267    pub fn allocated_fds(&self) -> Vec<usize> {
268        self.fd_to_handle
269            .iter()
270            .enumerate()
271            .filter_map(|(fd, &handle)| if handle.is_some() { Some(fd) } else { None })
272            .collect()
273    }
274
275    /// Process pending signals and handle them according to Linux semantics
276    /// Returns true if execution should be interrupted (signal handler called or process terminated)
277    pub fn process_signals(&self, trapframe: &mut Trapframe) -> bool {
278        let mut signal_state = self.signal_state.lock();
279        signal::process_pending_signals_with_state(&mut *signal_state, trapframe)
280    }
281
282    // No pthread/TLS probing helpers; user space owns pthread layout.
283
284    /// Handle incoming event from Scarlet event system and convert to signal if applicable
285    pub fn handle_event_direct(&self, event: &crate::ipc::event::Event) {
286        if let Some(signal) = signal::handle_event_to_signal(event) {
287            let mut signal_state = self.signal_state.lock();
288            signal_state.add_pending(signal);
289        }
290    }
291
292    /// Check if there are pending signals ready for delivery
293    pub fn has_pending_signals(&self) -> bool {
294        let signal_state = self.signal_state.lock();
295        signal_state.next_deliverable_signal().is_some()
296    }
297
298    /// Allocate a unique timer identifier for POSIX timers
299    pub fn allocate_posix_timer_id(&mut self) -> u64 {
300        let mut id = self.next_timer_id;
301        if id == 0 {
302            id = 1;
303        }
304        self.next_timer_id = id.wrapping_add(1);
305        if self.next_timer_id == 0 {
306            self.next_timer_id = 1;
307        }
308        id
309    }
310
311    /// Store a POSIX timer definition tracked by this ABI instance
312    pub fn store_posix_timer(&mut self, timer: PosixTimer) {
313        self.posix_timers.insert(timer.id, timer);
314    }
315
316    /// Retrieve a reference to a stored POSIX timer
317    pub fn get_posix_timer(&self, id: u64) -> Option<&PosixTimer> {
318        self.posix_timers.get(&id)
319    }
320
321    /// Remove a POSIX timer from this ABI instance
322    pub fn remove_posix_timer(&mut self, id: u64) -> Option<PosixTimer> {
323        self.posix_timers.remove(&id)
324    }
325}
326
327impl AbiModule for LinuxRiscv64Abi {
328    fn name() -> &'static str {
329        "linux-riscv64"
330    }
331
332    fn get_name(&self) -> alloc::string::String {
333        Self::name().to_string()
334    }
335
336    fn clone_boxed(&self) -> alloc::boxed::Box<dyn AbiModule + Send + Sync> {
337        Box::new(self.clone())
338    }
339
340    fn handle_syscall(
341        &mut self,
342        trapframe: &mut crate::arch::Trapframe,
343    ) -> Result<usize, &'static str> {
344        syscall_handler(self, trapframe)
345    }
346
347    fn handle_event(
348        &self,
349        event: crate::ipc::Event,
350        target_task_id: u32,
351    ) -> Result<(), &'static str> {
352        // Convert event to signal if applicable
353        if let Some(signal) = signal::handle_event_to_signal(&event) {
354            let scheduler = crate::sched::scheduler::get_scheduler();
355            let target_task = scheduler
356                .get_task_by_id(target_task_id as usize)
357                .ok_or("Target task not found")?;
358
359            // Check if this is a fatal signal that should terminate immediately
360            match signal {
361                signal::LinuxSignal::SIGKILL
362                | signal::LinuxSignal::SIGTERM
363                | signal::LinuxSignal::SIGINT => {
364                    // Fatal signals: terminate task immediately
365                    let exit_code = 128 + (signal as i32); // Standard Unix exit code for signals
366                    crate::early_println!(
367                        "Linux ABI: Terminating task {} due to signal {} (exit code {})",
368                        target_task.get_id(),
369                        signal as u32,
370                        exit_code
371                    );
372                    target_task.exit(exit_code);
373                }
374                _ => {
375                    // Other signals: add to pending (for future handler implementation)
376                    let mut signal_state = self.signal_state.lock();
377                    signal_state.add_pending(signal);
378                    crate::early_println!(
379                        "Linux ABI: Added signal {} to pending for task {}",
380                        signal as u32,
381                        target_task_id
382                    );
383                }
384            }
385        }
386
387        // For non-signal events, just acknowledge
388        Ok(())
389    }
390
391    fn on_task_cloned(
392        &mut self,
393        _parent_task: &crate::task::Task,
394        _child_task: &crate::task::Task,
395        _flags: crate::task::CloneFlags,
396    ) -> Result<(), &'static str> {
397        // Child ABI state is a clone of parent's state (including pointers set in sys_clone).
398        // Preserve child-specific fields (child_tid_ptr, clear_child_tid_ptr, tls_pointer),
399        // and only update TGID and transient flags.
400        let mut ts = self.thread_state.clone();
401        let parent_tgid = ts.tgid;
402        let is_thread = ts.pending_clone_is_thread;
403
404        // Initialize child's TGID based on whether this was a thread (CLONE_THREAD) or a process clone.
405        // Note: For non-thread (process) clones, TGID will be set to the child's ID later in set_id()
406        // when the task is added to the scheduler. We set it to 0 here as a placeholder.
407        ts.tgid = if is_thread {
408            if parent_tgid != 0 {
409                parent_tgid
410            } else {
411                _parent_task.get_id()
412            }
413        } else {
414            0 // Will be set to child's ID in Task::set_id() when added to scheduler
415        };
416
417        // Clear transient flag in the child copy
418        ts.pending_clone_is_thread = false;
419
420        // No debug dump or futex watch arming.
421
422        // Commit updated thread state
423        self.thread_state = ts;
424        Ok(())
425    }
426
427    fn on_task_exit(&mut self, task: &crate::task::Task) {
428        // No pthread/TLS structure probing at exit; user space owns pthread list.
429        // Linux semantics: if clear_child_tid is set, write 0 and FUTEX_WAKE.
430        if let Some(ptr) = self.thread_state.clear_child_tid_ptr {
431            if let Some(paddr) = task.vm_manager.translate_vaddr(ptr) {
432                unsafe {
433                    *(paddr as *mut i32) = 0;
434                }
435                // Wake one waiter on the TID address as per Linux semantics
436                let _ = futex::wake_address(ptr, 1);
437            }
438        }
439        // TODO: robust list-based wakeups for owned mutexes at thread exit.
440    }
441
442    fn get_task_namespace(&self) -> Arc<crate::task::namespace::TaskNamespace> {
443        self.namespace.clone()
444    }
445
446    fn can_execute_binary(
447        &self,
448        file_object: &crate::object::KernelObject,
449        file_path: &str,
450        current_abi: Option<&(dyn AbiModule + Send + Sync)>,
451    ) -> Option<u8> {
452        // Stage 1: Basic format validation (following implementation guidelines)
453        let magic_score = match file_object.as_file() {
454            Some(file_obj) => {
455                // Check ELF magic bytes (Linux uses ELF format)
456                let mut magic_buffer = [0u8; 4];
457                file_obj.seek(SeekFrom::Start(0)).ok(); // Reset to start
458                match file_obj.read(&mut magic_buffer) {
459                    Ok(bytes_read) if bytes_read >= 4 => {
460                        if magic_buffer == [0x7F, b'E', b'L', b'F'] {
461                            35 // Basic ELF format compatibility (slightly lower than Scarlet)
462                        } else {
463                            return None; // Not an ELF file, cannot execute
464                        }
465                    }
466                    _ => return None, // Read failed, cannot determine
467                }
468            }
469            None => return None, // Not a file object
470        };
471
472        let mut confidence = magic_score;
473
474        // Stage 2: ELF header checks
475        if let Some(file_obj) = file_object.as_file() {
476            // Check ELF header for System-V ABI (Linux uses System-V ABI)
477            let mut osabi_buffer = [0u8; 1];
478            file_obj.seek(SeekFrom::Start(7)).ok(); // OSABI is at
479            match file_obj.read(&mut osabi_buffer) {
480                Ok(bytes_read) if bytes_read == 1 => {
481                    if osabi_buffer[0] == 0 {
482                        // System-V ABI
483                        confidence += 50; // Strong indicator for System-V ABI
484                    }
485                }
486                _ => return None, // Read failed, cannot determine
487            }
488        } else {
489            return None; // Not a file object
490        }
491
492        // Stage 3: File path hints - Linux specific patterns
493        if file_path.contains("linux") || file_path.ends_with(".linux") {
494            confidence += 20; // Strong Linux indicator
495        } else if file_path.ends_with(".elf") {
496            confidence += 5; // General ELF compatibility
497        }
498
499        // Stage 4: ABI inheritance bonus - moderate priority for same ABI
500        if let Some(abi) = current_abi {
501            if abi.get_name() == self.get_name() {
502                confidence += 15; // Moderate inheritance bonus for Linux
503            }
504        }
505
506        Some(confidence.min(100)) // Standard 0-100 confidence range
507    }
508
509    fn execute_binary(
510        &self,
511        file_object: &crate::object::KernelObject,
512        argv: &[&str],
513        envp: &[&str],
514        task: &crate::task::Task,
515        trapframe: &mut crate::arch::Trapframe,
516    ) -> Result<(), &'static str> {
517        match file_object.as_file() {
518            Some(file_obj) => {
519                // Reset task state for Linux execution
520                task.text_size.store(0, Ordering::SeqCst);
521                task.data_size.store(0, Ordering::SeqCst);
522                task.stack_size.store(0, Ordering::SeqCst);
523                task.brk
524                    .store(usize::MAX, core::sync::atomic::Ordering::SeqCst);
525
526                // Load ELF using Linux-compatible method with dynamic linking support
527                match analyze_and_load_elf_with_strategy(
528                    file_obj,
529                    task,
530                    &LoadStrategy {
531                        choose_base_address: |target, needs_relocation| {
532                            match (target, needs_relocation) {
533                                (LoadTarget::MainProgram, false) => 0, // Static executables
534                                (LoadTarget::MainProgram, true) => 0x40000000, // PIE executables
535                                (LoadTarget::Interpreter, _) => 0x40000000, // Dynamic linker
536                                (LoadTarget::SharedLib, _) => 0x50000000, // Shared libraries
537                            }
538                        },
539                        resolve_interpreter: |requested| {
540                            // Map interpreter paths to system paths
541                            requested.map(|path| {
542                                if path.starts_with("/lib/ld-") || path.starts_with("/lib64/ld-") {
543                                    // Map to our system path
544                                    format!("/scarlet/system/linux-riscv64{}", path)
545                                } else {
546                                    path.to_string()
547                                }
548                            })
549                        },
550                    },
551                ) {
552                    Ok(load_result) => {
553                        // Set the name
554                        *task.name.write() =
555                            argv.get(0).map_or("linux".to_string(), |s| s.to_string());
556                        // Do not resolve pthread/TLS or arm futex-based watches in kernel.
557                        crate::println!("Program segments:");
558                        task.vm_manager.with_memmaps(|mm| {
559                            for map in mm.values() {
560                                crate::println!("  VA: {:#x}-{:#x} -> PA: {:#x}-{:#x} (perm: {:#x}, shared: {})",
561                                    map.vmarea.start, map.vmarea.end,
562                                    map.pmarea.start, map.pmarea.end,
563                                    map.permissions, map.is_shared);
564                            }
565                        });
566                        crate::println!("=================================");
567
568                        // Clear page table entries
569                        let idx =
570                            arch::vm::get_root_pagetable_ptr(task.vm_manager.get_asid()).unwrap();
571                        let root_page_table = arch::vm::get_pagetable(idx).unwrap();
572                        root_page_table.unmap_all();
573                        // Setup the trampoline
574                        arch::vm::setup_trampoline_for_user(&task.vm_manager);
575                        // Setup the stack following Linux ABI standard layout
576                        let (_, stack_top) = setup_user_stack(task);
577                        let mut sp = stack_top as usize;
578
579                        // For dynamic executables, reserve space for the dynamic linker's stack frame
580                        if let ExecutionMode::Dynamic { .. } = &load_result.mode {
581                            // Reserve 96 bytes for the dynamic linker's stack frame
582                            // This matches what _dlstart_c expects
583                            sp -= 96;
584                            // Zero out the reserved space
585                            unsafe {
586                                let paddr = task.vm_manager.translate_vaddr(sp).unwrap();
587                                let slice = core::slice::from_raw_parts_mut(paddr as *mut u8, 96);
588                                slice.fill(0);
589                            }
590                        }
591
592                        // --- 1. Argument and environment strings (at high addresses) ---
593                        let mut arg_vaddrs: Vec<u64> = Vec::new();
594                        for &arg in argv.iter() {
595                            let len = arg.len() + 1; // +1 for null terminator
596                            sp -= len;
597                            let vaddr = sp;
598                            unsafe {
599                                let paddr = task.vm_manager.translate_vaddr(vaddr).unwrap();
600                                let slice = core::slice::from_raw_parts_mut(paddr as *mut u8, len);
601                                slice[..len - 1].copy_from_slice(arg.as_bytes());
602                                slice[len - 1] = 0; // Null terminator
603                            }
604                            arg_vaddrs.push(vaddr as u64);
605                        }
606
607                        let mut env_vaddrs: Vec<u64> = Vec::new();
608                        for &env in envp.iter() {
609                            // crate::println!("Setting up env: {}", env);
610                            // Debug: Print raw bytes for LD_LIBRARY_PATH
611                            // if env.starts_with("LD_LIBRARY_PATH=") {
612                            //     crate::println!("LD_LIBRARY_PATH env string length: {}", env.len());
613                            //     crate::println!("LD_LIBRARY_PATH raw bytes: {:?}", env.as_bytes());
614                            //     for (i, &byte) in env.as_bytes().iter().enumerate() {
615                            //         if byte < 32 || byte > 126 {
616                            //             crate::println!("  Non-printable byte at {}: 0x{:02x} ('{}' is printable)",
617                            //                           i, byte, (byte >= 32 && byte <= 126));
618                            //         }
619                            //     }
620                            // }
621                            let len = env.len() + 1;
622                            sp -= len;
623                            let vaddr = sp;
624                            unsafe {
625                                let paddr = task.vm_manager.translate_vaddr(vaddr).unwrap();
626                                let slice = core::slice::from_raw_parts_mut(paddr as *mut u8, len);
627                                slice[..len - 1].copy_from_slice(env.as_bytes());
628                                slice[len - 1] = 0; // Null terminator
629                            }
630                            env_vaddrs.push(vaddr as u64);
631                        }
632
633                        // --- 2. Platform-specific padding and auxiliary vector ---
634                        // Align to 16 bytes before starting structured data
635
636                        sp = sp & !0xF;
637
638                        // Build auxiliary vector based on the ELF loading result
639                        use crate::task::elf_loader::build_auxiliary_vector;
640                        let auxv = build_auxiliary_vector(&load_result);
641
642                        // --- Calculate total size needed for structured data ---
643                        let auxv_size = auxv.len() * 16; // Each auxv entry is 16 bytes
644                        let envp_size = (env_vaddrs.len() + 1) * 8; // +1 for NULL terminator
645                        let argv_size = (arg_vaddrs.len() + 1) * 8; // +1 for NULL terminator
646                        let argc_size = 8;
647                        let total_structured_size = auxv_size + envp_size + argv_size + argc_size;
648
649                        // Align the total size and calculate final sp
650                        let aligned_size = (total_structured_size + 15) & !15; // Round up to 16-byte boundary
651                        sp -= aligned_size;
652                        let final_sp = sp;
653                        let mut current_pos = final_sp;
654
655                        // --- Place data from the calculated position ---
656
657                        // --- 1. Argument count (argc) ---
658                        let argc = argv.len() as u64;
659                        unsafe {
660                            *(task.vm_manager.translate_vaddr(current_pos).unwrap() as *mut u64) =
661                                argc;
662                        }
663                        current_pos += 8;
664
665                        // --- 2. Argument pointer array (argv) ---
666                        for &arg_vaddr in arg_vaddrs.iter() {
667                            unsafe {
668                                *(task.vm_manager.translate_vaddr(current_pos).unwrap()
669                                    as *mut u64) = arg_vaddr;
670                            }
671                            current_pos += 8;
672                        }
673                        // NULL terminator for argv
674                        unsafe {
675                            *(task.vm_manager.translate_vaddr(current_pos).unwrap() as *mut u64) =
676                                0;
677                        }
678                        current_pos += 8;
679
680                        // --- 3. Environment pointer array (envp) ---
681                        for &env_vaddr in env_vaddrs.iter() {
682                            unsafe {
683                                *(task.vm_manager.translate_vaddr(current_pos).unwrap()
684                                    as *mut u64) = env_vaddr;
685                            }
686                            current_pos += 8;
687                        }
688                        // NULL terminator for envp
689                        unsafe {
690                            *(task.vm_manager.translate_vaddr(current_pos).unwrap() as *mut u64) =
691                                0;
692                        }
693                        current_pos += 8;
694
695                        // --- 4. Auxiliary vector (auxv) ---
696                        // crate::println!("Setting up auxiliary vector with {} entries:", auxv.len());
697                        for auxv_entry in auxv.iter() {
698                            // crate::println!("  auxv[{}]: type={:#x} value={:#x} @ sp={:#x}",
699                            //     i, auxv_entry.a_type, auxv_entry.a_val, current_pos);
700                            unsafe {
701                                let paddr = task.vm_manager.translate_vaddr(current_pos).unwrap()
702                                    as *mut u64;
703                                *paddr = auxv_entry.a_type;
704                                *(paddr.add(1)) = auxv_entry.a_val;
705                            }
706                            current_pos += 16; // Each entry is 16 bytes
707                        }
708
709                        // Use the aligned final_sp
710                        sp = final_sp;
711
712                        // // Debug: Dump stack contents around the final SP
713                        // crate::println!("DEBUG: Final stack dump from sp={:#x}:", sp);
714                        // for i in 0..32 {
715                        //     let addr = sp + (i * 8);
716                        //     if let Some(paddr) = task.vm_manager.translate_vaddr(addr) {
717                        //         let value = unsafe { *(paddr as *const u64) };
718                        //         crate::println!("  [{:#x}] = {:#018x} ({})", addr, value,
719                        //             core::str::from_utf8(&value.to_le_bytes()).unwrap_or("<invalid>"));
720                        //     }
721                        // }
722
723                        task.set_entry_point(load_result.entry_point as usize);
724                        task.vcpu.lock().iregs = IntRegisters::new(); // Clear registers
725                        task.vcpu.lock().set_sp(sp); // Set stack pointer
726
727                        // Initialize trapframe with clean state
728                        trapframe.regs = task.vcpu.lock().iregs;
729                        trapframe.epc = load_result.entry_point;
730                        // crate::println!("DEBUG: Set trapframe.epc to {:#x}", trapframe.epc);
731
732                        // Switch to the new task
733                        task.vcpu.lock().switch(trapframe);
734                        Ok(())
735                    }
736                    Err(e) => {
737                        crate::println!("Failed to load Linux ELF binary: {:?}", e);
738                        Err("Failed to load Linux ELF binary")
739                    }
740                }
741            }
742            None => Err("Invalid file object type for Linux binary execution"),
743        }
744    }
745
746    fn get_default_cwd(&self) -> &str {
747        "/" // Linux uses root as default working directory
748    }
749
750    fn setup_overlay_environment(
751        &self,
752        target_vfs: &Arc<VfsManager>,
753        base_vfs: &Arc<VfsManager>,
754        system_path: &str,
755        config_path: &str,
756    ) -> Result<(), &'static str> {
757        // crate::println!("Setting up Linux overlay environment with system path: {} and config path: {}", system_path, config_path);
758        // Linux ABI uses overlay mount with system Linux tools and config persistence
759        let lower_vfs_list = alloc::vec![(base_vfs, system_path)];
760        let upper_vfs = base_vfs;
761        let fs = match OverlayFS::new_from_paths_and_vfs(
762            Some((upper_vfs, config_path)),
763            lower_vfs_list,
764            "/",
765        ) {
766            Ok(fs) => fs,
767            Err(e) => {
768                crate::println!(
769                    "Failed to create overlay filesystem for Linux ABI: {}",
770                    e.message
771                );
772                return Err("Failed to create Linux overlay environment");
773            }
774        };
775        match target_vfs.mount(fs, "/", 0) {
776            Ok(()) => Ok(()),
777            Err(e) => {
778                crate::println!(
779                    "Failed to create cross-VFS overlay for Linux ABI: {}",
780                    e.message
781                );
782                Err("Failed to create Linux overlay environment")
783            }
784        }
785    }
786
787    fn setup_shared_resources(
788        &self,
789        target_vfs: &Arc<VfsManager>,
790        base_vfs: &Arc<VfsManager>,
791    ) -> Result<(), &'static str> {
792        // crate::println!("Setting up Linux shared resources with base VFS");
793        // Linux shared resource setup: bind mount common directories and Scarlet gateway
794        match create_dir_if_not_exists(target_vfs, "/home") {
795            Ok(()) => {}
796            Err(_e) => {
797                // crate::println!("Failed to create /home directory for Linux: {}", _e.message);
798                return Err("Failed to create /home directory for Linux");
799            }
800        }
801
802        match target_vfs.bind_mount_from(base_vfs, "/home", "/home") {
803            Ok(()) => {}
804            Err(_e) => {
805                // crate::println!("Failed to bind mount /home for Linux: {}", _e.message);
806            }
807        }
808
809        match create_dir_if_not_exists(target_vfs, "/data") {
810            Ok(()) => {}
811            Err(e) => {
812                crate::println!("Failed to create /data directory for Linux: {}", e.message);
813                return Err("Failed to create /data directory for Linux");
814            }
815        }
816
817        match target_vfs.bind_mount_from(base_vfs, "/data/shared", "/data/shared") {
818            Ok(()) => {}
819            Err(_e) => {
820                // crate::println!("Failed to bind mount /data/shared for Linux: {}", _e.message);
821            }
822        }
823
824        // Setup devices directory
825        match create_dir_if_not_exists(target_vfs, "/dev") {
826            Ok(()) => {}
827            Err(_e) => {
828                crate::println!("Failed to create /dev directory for Linux: {}", _e.message);
829                return Err("Failed to create /dev directory for Linux");
830            }
831        }
832        match target_vfs.bind_mount_from(base_vfs, "/dev", "/dev") {
833            Ok(()) => {}
834            Err(_e) => {
835                crate::println!("Failed to bind mount /dev for Linux: {}", _e.message);
836                return Err("Failed to bind mount /dev for Linux");
837            }
838        }
839
840        // Setup tmp directory
841        match create_dir_if_not_exists(target_vfs, "/tmp") {
842            Ok(()) => {}
843            Err(_e) => {
844                crate::println!("Failed to create /tmp directory for Linux: {}", _e.message);
845                return Err("Failed to create /tmp directory for Linux");
846            }
847        }
848        match target_vfs.bind_mount_from(base_vfs, "/tmp", "/tmp") {
849            Ok(()) => {}
850            Err(_e) => {
851                crate::println!("Failed to bind mount /tmp for Linux: {}", _e.message);
852                return Err("Failed to bind mount /tmp for Linux");
853            }
854        }
855
856        // Setup gateway to native Scarlet environment (read-only for security)
857        match create_dir_if_not_exists(target_vfs, "/scarlet") {
858            Ok(()) => {}
859            Err(_e) => {
860                crate::println!(
861                    "Failed to create /scarlet directory for Linux: {}",
862                    _e.message
863                );
864                return Err("Failed to create /scarlet directory for Linux");
865            }
866        }
867        match target_vfs.bind_mount_from(base_vfs, "/", "/scarlet") {
868            Ok(()) => Ok(()),
869            Err(_e) => {
870                crate::println!(
871                    "Failed to bind mount native Scarlet root to /scarlet for Linux: {}",
872                    _e.message
873                );
874                return Err("Failed to bind mount native Scarlet root to /scarlet for Linux");
875            }
876        }
877    }
878
879    fn initialize_from_existing_handles(
880        &mut self,
881        _task: &crate::task::Task,
882    ) -> Result<(), &'static str> {
883        // _task.handle_table.close_all();
884        self.init_std_fds(
885            0, // stdin handle
886            1, // stdout handle
887            2, // stderr handle
888        );
889        // Initialize TGID for the task at ABI attach time
890        self.thread_state.tgid = _task.get_id();
891        Ok(())
892    }
893}
894
895syscall_table! {
896    Invalid = 0 => |_abi: &mut crate::abi::linux::riscv64::LinuxRiscv64Abi, _trapframe: &mut crate::arch::Trapframe| {
897        0
898    },
899    Getcwd = 17 => fs::sys_getcwd,
900    Eventfd2 = 19 => fs::sys_eventfd2,
901    // EpollCreate1 = 20 => fs::sys_epoll_create1, // Already defined below
902    EpollCtl = 21 => fs::sys_epoll_ctl,
903    EpollPwait = 22 => fs::sys_epoll_pwait,
904    EpollCreate1 = 20 => fs::sys_epoll_create1,
905    Flock = 32 => fs::sys_flock,
906    Dup = 23 => fs::sys_dup,
907    Dup3 = 24 => fs::sys_dup3,
908    Fcntl = 25 => fs::sys_fcntl,
909    Ioctl = 29 => fs::sys_ioctl,
910    MkdirAt = 34 => fs::sys_mkdirat,
911    UnlinkAt = 35 => fs::sys_unlinkat,
912    Ftruncate = 46 => fs::sys_ftruncate,
913    Fallocate = 47 => fs::sys_fallocate,
914    LinkAt = 37 => fs::sys_linkat,
915    FaccessAt = 48 => fs::sys_faccessat,
916    Chdir = 49 => fs::sys_chdir,
917    Fchmod = 52 => fs::sys_fchmod,
918    OpenAt = 56 => fs::sys_openat,
919    Close = 57 => fs::sys_close,
920    Pipe2 = 59 => pipe::sys_pipe2,
921    GetDents64 = 61 => fs::sys_getdents64,
922    Lseek = 62 => fs::sys_lseek,
923    Read = 63 => fs::sys_read,
924    Write = 64 => fs::sys_write,
925    Readv = 65 => fs::sys_readv,
926    Writev = 66 => fs::sys_writev,
927    Pread64 = 67 => fs::sys_pread64,
928    Pwrite64 = 68 => fs::sys_pwrite64,
929    Pselect6 = 72 => fs::sys_pselect6,
930    Ppoll = 73 => fs::sys_ppoll,
931    NewFstAtAt = 79 => fs::sys_newfstatat,
932    NewFstat = 80 => fs::sys_newfstat,
933    ReadLinkAt = 78 => fs::sys_readlinkat,
934    Fsync = 82 => fs::sys_fsync,
935    Exit = 93 => proc::sys_exit,
936    ExitGroup = 94 => proc::sys_exit_group,
937    SetTidAddress = 96 => proc::sys_set_tid_address,
938    Futex = 98 => futex::sys_futex,
939    SetRobustList = 99 => proc::sys_set_robust_list,
940    Nanosleep = 101 => time::sys_nanosleep,
941    TimerCreate = 107 => time::sys_timer_create,
942    TimerGettime = 108 => time::sys_timer_gettime,
943    TimerGetoverrun = 109 => time::sys_timer_getoverrun,
944    TimerSettime = 110 => time::sys_timer_settime,
945    TimerDelete = 111 => time::sys_timer_delete,
946    ClockGettime = 113 => time::sys_clock_gettime,
947    ClockGetres = 114 => time::sys_clock_getres,
948    RtSigaction = 134 => signal::sys_rt_sigaction,
949    RtSigprocmask = 135 => signal::sys_rt_sigprocmask,
950    SetGid = 144 => proc::sys_setgid,
951    SetUid = 146 => proc::sys_setuid,
952    SetPgid = 154 => proc::sys_setpgid,
953    GetPgid = 155 => proc::sys_getpgid,
954    Uname = 160 => proc::sys_uname,
955    Umask = 166 => fs::sys_umask,
956    Prctl = 167 => proc::sys_prctl,
957    GetPid = 172 => proc::sys_getpid,
958    GetPpid = 173 => proc::sys_getppid,
959    GetUid = 174 => proc::sys_getuid,
960    GetEuid = 175 => proc::sys_geteuid,
961    GetGid = 176 => proc::sys_getgid,
962    GetEgid = 177 => proc::sys_getegid,
963    GetTid = 178 => proc::sys_gettid,
964    Kill = 129 => signal::sys_tkill, // Alias sys_kill to sys_tkill
965    Tkill = 130 => signal::sys_tkill,
966    // Brk = 214 => proc::sys_brk, // Already defined above
967    Brk = 214 => proc::sys_brk,
968    Munmap = 215 => mm::sys_munmap,
969    Clone = 220 => proc::sys_clone,
970    Execve = 221 => fs::sys_execve,
971    Mmap = 222 => mm::sys_mmap,
972    Mprotect = 226 => mm::sys_mprotect,
973    EpollWait = 232 => fs::sys_epoll_wait,
974    Getrandom = 278 => fs::sys_getrandom,
975    MemfdCreate = 279 => proc::sys_memfd_create, // Linux memfd_create
976    Wait4 = 260 => proc::sys_wait4,
977    Prlimit64 = 261 => proc::sys_prlimit64,
978    Socket = 198 => socket::sys_socket,
979    Socketpair = 199 => socket::sys_socketpair,
980    Bind = 200 => socket::sys_bind,
981    Listen = 201 => socket::sys_listen,
982    Accept = 202 => socket::sys_accept,
983    Connect = 203 => socket::sys_connect,
984    GetSockname = 204 => socket::sys_getsockname,
985    GetPeerName = 205 => socket::sys_getpeername,
986    Sendto = 206 => socket::sys_sendto,
987    Recvfrom = 207 => socket::sys_recvfrom,
988    SetSockopt = 208 => socket::sys_setsockopt,
989    GetSockopt = 209 => socket::sys_getsockopt,
990    Shutdown = 210 => socket::sys_shutdown,
991    Sendmsg = 211 => socket::sys_sendmsg,
992    Recvmsg = 212 => socket::sys_recvmsg,
993    Statx = 291 => fs::sys_statx,
994    RenameAt2 = 276 => fs::sys_renameat2,
995    Membarrier = 283 => proc::sys_membarrier,
996    FaccessAt2 = 439 => fs::sys_faccessat2,
997}
998
999fn create_dir_if_not_exists(vfs: &Arc<VfsManager>, path: &str) -> Result<(), FileSystemError> {
1000    match vfs.create_dir(path) {
1001        Ok(()) => Ok(()),
1002        Err(e) => {
1003            if e.kind == FileSystemErrorKind::AlreadyExists {
1004                Ok(()) // Directory already exists, nothing to do
1005            } else {
1006                Err(e) // Some other error occurred
1007            }
1008        }
1009    }
1010}
1011
1012fn register_linux_abi() {
1013    register_abi!(LinuxRiscv64Abi);
1014}
1015
1016early_initcall!(register_linux_abi);