kernel/
profiler.rs

1//! Simple profiler for measuring function execution time.
2//!
3//! This module provides a simple profiling utility to measure the execution time,
4//! call count and other statistics for functions and code blocks. It uses a
5//! global registry to store profiling data and a RAII guard (`ProfileGuard`)
6//! to automatically record timings.
7//!
8//! # Usage
9//!
10//! To profile a scope, use the `profile_scope!` macro at the beginning of the
11//! function or block you want to measure.
12//!
13//! ```
14//! use crate::profiler::profile_scope;
15//!
16//! fn my_function() {
17//!     profile_scope!("my_function");
18//!     // ... function logic ...
19//! }
20//! ```
21//!
22//! At the end of your program or at a convenient checkpoint, call
23//! `print_profiling_results()` to display the collected statistics.
24
25#[macro_export]
26macro_rules! profile_scope {
27    ($name:expr) => {
28        #[cfg(feature = "profiler")]
29        let _guard = $crate::profiler::ProfileGuard::new($name);
30    };
31}
32
33#[cfg(feature = "profiler")]
34pub use self::profiler_impl::*;
35
36#[cfg(feature = "profiler")]
37mod profiler_impl {
38    use crate::early_println;
39    use crate::timer::get_time_ns;
40    use alloc::collections::BTreeMap;
41    use alloc::string::{String, ToString};
42    use alloc::sync::Arc;
43    use alloc::vec::Vec;
44    use lazy_static::lazy_static;
45    use spin::Mutex;
46
47    /// Tree node for hierarchical profiling
48    #[derive(Clone)]
49    pub struct ProfileNode {
50        pub name: String,
51        pub call_count: u64,
52        pub total_time_ns: u64,
53        pub self_time_ns: u64, // Time excluding children
54        pub min_time_ns: u64,
55        pub max_time_ns: u64,
56        pub children: BTreeMap<String, ProfileNode>,
57    }
58
59    impl ProfileNode {
60        pub fn new(name: String) -> Self {
61            ProfileNode {
62                name,
63                call_count: 0,
64                total_time_ns: 0,
65                self_time_ns: 0,
66                min_time_ns: u64::MAX,
67                max_time_ns: 0,
68                children: BTreeMap::new(),
69            }
70        }
71
72        pub fn add_call(&mut self, elapsed_time: u64) {
73            self.call_count += 1;
74            self.total_time_ns += elapsed_time;
75            self.self_time_ns += elapsed_time; // Will be adjusted when children are subtracted
76            self.min_time_ns = self.min_time_ns.min(elapsed_time);
77            self.max_time_ns = self.max_time_ns.max(elapsed_time);
78        }
79    }
80
81    /// Call stack entry for tracking current execution path
82    pub struct CallStackEntry {
83        pub name: String,
84        pub start_time: u64,
85    }
86
87    lazy_static! {
88        /// Global profiling tree root
89        pub static ref PROFILER_ROOT: Arc<Mutex<ProfileNode>> =
90            Arc::new(Mutex::new(ProfileNode::new("ROOT".to_string())));
91
92        /// Call stack for tracking current execution path
93        pub static ref CALL_STACK: Arc<Mutex<Vec<CallStackEntry>>> =
94            Arc::new(Mutex::new(Vec::new()));
95    }
96
97    /// A RAII guard that records the execution time of its scope.
98    ///
99    /// When created, it records the start time. When it goes out of scope (and is dropped),
100    /// it calculates the elapsed time and updates the global profiler registry.
101    pub struct ProfileGuard {
102        name: &'static str,
103        start_time: u64,
104    }
105
106    impl ProfileGuard {
107        /// Creates a new `ProfileGuard`.
108        pub fn new(name: &'static str) -> Self {
109            let start_time = get_time_ns();
110
111            // Push to call stack
112            let mut call_stack = CALL_STACK.lock();
113            call_stack.push(CallStackEntry {
114                name: name.to_string(),
115                start_time,
116            });
117
118            ProfileGuard { name, start_time }
119        }
120    }
121
122    impl Drop for ProfileGuard {
123        fn drop(&mut self) {
124            let end_time = get_time_ns();
125            let elapsed = end_time.saturating_sub(self.start_time);
126
127            let mut call_stack = CALL_STACK.lock();
128            let mut root = PROFILER_ROOT.lock();
129
130            // Pop current function from call stack
131            if let Some(current_entry) = call_stack.pop() {
132                assert_eq!(current_entry.name, self.name);
133            }
134
135            // Navigate to the correct position in the tree
136            let mut current_node = &mut *root;
137
138            // Follow the call stack path to find/create the parent node
139            for entry in call_stack.iter() {
140                current_node = current_node
141                    .children
142                    .entry(entry.name.clone())
143                    .or_insert_with(|| ProfileNode::new(entry.name.clone()));
144            }
145
146            // Add this function as a child of the current node
147            let child_node = current_node
148                .children
149                .entry(self.name.to_string())
150                .or_insert_with(|| ProfileNode::new(self.name.to_string()));
151
152            child_node.add_call(elapsed);
153        }
154    }
155
156    /// Prints the collected profiling results in a tree structure
157    pub fn print_profiling_results() {
158        let root = PROFILER_ROOT.lock();
159
160        early_println!("--- Tree-based Profiling Results ---");
161        early_println!(
162            "{:<50} | {:>10} | {:>15} | {:>15} | {:>15} | {:>12}",
163            "Function",
164            "Count",
165            "Total (ms)",
166            "Self (ms)",
167            "Average (μs)",
168            "% of Parent"
169        );
170        early_println!("{}", "-".repeat(140));
171
172        print_node(&root, 0, root.total_time_ns);
173        early_println!("{}", "-".repeat(25));
174    }
175
176    fn print_node(node: &ProfileNode, depth: usize, parent_total_time: u64) {
177        if node.name == "ROOT" {
178            // Don't print the root node, just its children
179            for (_, child) in &node.children {
180                print_node(child, 0, child.total_time_ns);
181            }
182            return;
183        }
184
185        let indent = "  ".repeat(depth);
186        let total_ms = node.total_time_ns as f64 / 1_000_000.0;
187        let self_ms = node.self_time_ns as f64 / 1_000_000.0;
188        let avg_us = if node.call_count > 0 {
189            (node.total_time_ns / node.call_count) as f64 / 1_000.0
190        } else {
191            0.0
192        };
193        let percentage = if parent_total_time > 0 {
194            (node.total_time_ns as f64 / parent_total_time as f64) * 100.0
195        } else {
196            0.0
197        };
198
199        // Calculate available width for function name: base width minus indent
200        let base_width: usize = 50;
201        let available_width = base_width.saturating_sub(indent.len());
202
203        early_println!(
204            "{}{:<width$} | {:>10} | {:>15.3} | {:>15.3} | {:>15.3} | {:>11.2}%",
205            indent,
206            node.name,
207            node.call_count,
208            total_ms,
209            self_ms,
210            avg_us,
211            percentage,
212            width = available_width
213        );
214
215        // Print children sorted by total time (descending)
216        let mut children: alloc::vec::Vec<_> = node.children.values().collect();
217        children.sort_by(|a, b| b.total_time_ns.cmp(&a.total_time_ns));
218
219        for child in children {
220            print_node(child, depth + 1, node.total_time_ns);
221        }
222    }
223}