asm_util/
tsc.rs

1use crate::{c_internal, ip_matches};
2
3/// Emulates an x86-64 processor's timestamp counter, as read by rdtsc and
4/// rdtscp.
5#[repr(C)]
6#[allow(non_snake_case)]
7pub struct Tsc {
8    // TODO: rename and make non-pub when we drop C API
9    pub cyclesPerSecond: u64,
10}
11
12impl Tsc {
13    /// Returns the host system's native TSC rate, or None if it couldn't be found.
14    ///
15    /// WARNING: this is known to fail completely on some supported CPUs
16    /// (particularly AMD), and can return the wrong value for others. i.e. this
17    /// needs more work if we need to dependably get the host's TSC rate.
18    /// e.g. see <https://github.com/shadow/shadow/issues/1519>.
19    pub fn native_cycles_per_second() -> Option<u64> {
20        let res = unsafe { c_internal::TscC_nativeCyclesPerSecond() };
21        if res == 0 { None } else { Some(res) }
22    }
23
24    pub fn new(cycles_per_second: u64) -> Self {
25        Self {
26            cyclesPerSecond: cycles_per_second,
27        }
28    }
29
30    fn set_rdtsc_cycles(&self, rax: &mut u64, rdx: &mut u64, nanos: u64) {
31        // The multiply is guaranteed not to overflow since both operands are 64 bit.
32        let cycles = u128::from(self.cyclesPerSecond) * u128::from(nanos) / 1_000_000_000;
33        // *possible* that we'll wrap around here, but only after a very long
34        // simulated time and/or a ridiculously fast clock. Wrapping is also
35        // presumably what would happen on real hardware.
36        let cycles = cycles as u64;
37        *rdx = (cycles >> 32) & 0xff_ff_ff_ff;
38        *rax = cycles & 0xff_ff_ff_ff;
39    }
40
41    const RDTSC: [u8; 2] = [0x0f, 0x31];
42    const RDTSCP: [u8; 3] = [0x0f, 0x01, 0xf9];
43
44    /// Updates registers to reflect the result of executing an rdtsc
45    /// instruction at time `nanos`.
46    pub fn emulate_rdtsc(&self, rax: &mut u64, rdx: &mut u64, rip: &mut u64, nanos: u64) {
47        self.set_rdtsc_cycles(rax, rdx, nanos);
48        *rip += Self::RDTSC.len() as u64;
49    }
50
51    /// Updates registers to reflect the result of executing an rdtscp
52    /// instruction at time `nanos`.
53    pub fn emulate_rdtscp(
54        &self,
55        rax: &mut u64,
56        rdx: &mut u64,
57        rcx: &mut u64,
58        rip: &mut u64,
59        nanos: u64,
60    ) {
61        self.set_rdtsc_cycles(rax, rdx, nanos);
62        *rip += Self::RDTSCP.len() as u64;
63
64        // rcx is set to IA32_TSC_AUX. According to the Intel developer manual
65        // 17.17.2 "IA32_TSC_AUX Register and RDTSCP Support", "IA32_TSC_AUX
66        // provides a 32-bit field that is initialized by privileged software with a
67        // signature value (for example, a logical processor ID)." ... "User mode
68        // software can use RDTSCP to detect if CPU migration has occurred between
69        // successive reads of the TSC. It can also be used to adjust for per-CPU
70        // differences in TSC values in a NUMA system."
71        //
72        // For now we just hard-code an arbitrary constant, which should be fine for
73        // the stated purpose.
74        // `hex(int(random.random()*2**32))`
75        *rcx = 0x806eb479;
76    }
77
78    /// Whether `ip` points to an rdtsc instruction.
79    ///
80    /// # Safety
81    ///
82    /// `ip` must be a dereferenceable pointer, pointing to the
83    /// beginning of a valid x86_64 instruction.
84    pub unsafe fn ip_is_rdtsc(ip: *const u8) -> bool {
85        unsafe { ip_matches(ip, &Self::RDTSC) }
86    }
87
88    /// Whether `ip` points to an rdtscp instruction.
89    ///
90    /// # Safety
91    ///
92    /// `ip` must be a dereferenceable pointer, pointing to the
93    /// beginning of a valid x86_64 instruction.
94    pub unsafe fn ip_is_rdtscp(ip: *const u8) -> bool {
95        unsafe { ip_matches(ip, &Self::RDTSCP) }
96    }
97}
98
99mod export {
100    use super::*;
101
102    /// Returns the host system's native TSC rate, or 0 if it couldn't be found.
103    ///
104    /// WARNING: this is known to fail completely on some supported CPUs
105    /// (particularly AMD), and can return the wrong value for others. i.e. this
106    /// needs more work if we need to dependably get the host's TSC rate.
107    /// e.g. see https://github.com/shadow/shadow/issues/1519.
108    #[unsafe(no_mangle)]
109    pub extern "C-unwind" fn Tsc_nativeCyclesPerSecond() -> u64 {
110        Tsc::native_cycles_per_second().unwrap_or(0)
111    }
112
113    /// Instantiate a TSC with the given clock rate.
114    #[unsafe(no_mangle)]
115    pub extern "C-unwind" fn Tsc_create(cycles_per_second: u64) -> Tsc {
116        Tsc::new(cycles_per_second)
117    }
118
119    /// Updates `regs` to reflect the result of executing an rdtsc instruction at
120    /// time `nanos`.
121    #[unsafe(no_mangle)]
122    pub extern "C-unwind" fn Tsc_emulateRdtsc(
123        tsc: *const Tsc,
124        rax: *mut u64,
125        rdx: *mut u64,
126        rip: *mut u64,
127        nanos: u64,
128    ) {
129        let tsc = unsafe { tsc.as_ref().unwrap() };
130        let rax = unsafe { rax.as_mut().unwrap() };
131        let rdx = unsafe { rdx.as_mut().unwrap() };
132        let rip = unsafe { rip.as_mut().unwrap() };
133        tsc.emulate_rdtsc(rax, rdx, rip, nanos)
134    }
135
136    /// Updates `regs` to reflect the result of executing an rdtscp instruction at
137    /// time `nanos`.
138    #[unsafe(no_mangle)]
139    pub extern "C-unwind" fn Tsc_emulateRdtscp(
140        tsc: *const Tsc,
141        rax: *mut u64,
142        rdx: *mut u64,
143        rcx: *mut u64,
144        rip: *mut u64,
145        nanos: u64,
146    ) {
147        let tsc = unsafe { tsc.as_ref().unwrap() };
148        let rax = unsafe { rax.as_mut().unwrap() };
149        let rdx = unsafe { rdx.as_mut().unwrap() };
150        let rcx = unsafe { rcx.as_mut().unwrap() };
151        let rip = unsafe { rip.as_mut().unwrap() };
152        tsc.emulate_rdtscp(rax, rdx, rcx, rip, nanos)
153    }
154
155    /// Whether `buf` begins with an rdtsc instruction.
156    #[unsafe(no_mangle)]
157    pub extern "C-unwind" fn isRdtsc(ip: *const u8) -> bool {
158        unsafe { Tsc::ip_is_rdtsc(ip) }
159    }
160
161    /// Whether `buf` begins with an rdtscp instruction.
162    #[unsafe(no_mangle)]
163    pub extern "C-unwind" fn isRdtscp(ip: *const u8) -> bool {
164        unsafe { Tsc::ip_is_rdtscp(ip) }
165    }
166}
167
168#[cfg(test)]
169mod test {
170    use super::*;
171
172    fn get_emulated_cycles(clock: u64, nanos: u64) -> u64 {
173        let tsc = Tsc::new(clock);
174
175        let mut rax = 0;
176        let mut rdx = 0;
177        let mut rcx = 0;
178        let mut rip = 0;
179
180        tsc.emulate_rdtsc(&mut rax, &mut rdx, &mut rip, nanos);
181        assert_eq!(rax >> 32, 0);
182        assert_eq!(rdx >> 32, 0);
183        let rdtsc_res = (rdx << 32) | rax;
184
185        tsc.emulate_rdtscp(&mut rax, &mut rdx, &mut rcx, &mut rip, nanos);
186        assert_eq!(rax >> 32, 0);
187        assert_eq!(rdx >> 32, 0);
188        let rdtscp_res = (rdx << 32) | rax;
189
190        assert_eq!(rdtsc_res, rdtscp_res);
191        rdtsc_res
192    }
193
194    #[test]
195    fn ns_granularity_at_1_ghz() {
196        assert_eq!(get_emulated_cycles(1_000_000_000, 1), 1);
197    }
198
199    #[test]
200    fn scales_with_clock_rate() {
201        let base_clock = 1_000_000_000;
202        let base_nanos = 1;
203        assert_eq!(
204            get_emulated_cycles(1000 * base_clock, base_nanos),
205            1000 * get_emulated_cycles(base_clock, base_nanos)
206        );
207    }
208
209    #[test]
210    fn scales_with_time() {
211        let base_clock = 1_000_000_000;
212        let base_nanos = 1;
213        assert_eq!(
214            get_emulated_cycles(base_clock, 1000 * base_nanos),
215            1000 * get_emulated_cycles(base_clock, base_nanos)
216        );
217    }
218
219    #[test]
220    fn large_cycle_count() {
221        let one_year_in_seconds: u64 = 365 * 24 * 60 * 60;
222        let ten_b_cycles_per_second: u64 = 10_000_000_000;
223        let expected_cycles = one_year_in_seconds
224            .checked_mul(ten_b_cycles_per_second)
225            .unwrap();
226        let actual_cycles =
227            get_emulated_cycles(ten_b_cycles_per_second, one_year_in_seconds * 1_000_000_000);
228        assert_eq!(actual_cycles, expected_cycles);
229    }
230}