shadow_tsc/
lib.rs

1// https://github.com/rust-lang/rfcs/blob/master/text/2585-unsafe-block-in-unsafe-fn.md
2#![deny(unsafe_op_in_unsafe_fn)]
3
4// Force cargo to link against crates that aren't (yet) referenced from Rust
5// code (but are referenced from this crate's C code).
6// https://github.com/rust-lang/cargo/issues/9391
7extern crate logger;
8
9/// cbindgen:ignore
10pub mod c_internal {
11    #![allow(non_upper_case_globals)]
12    #![allow(non_camel_case_types)]
13    #![allow(non_snake_case)]
14    // https://github.com/rust-lang/rust/issues/66220
15    #![allow(improper_ctypes)]
16    include!(concat!(env!("OUT_DIR"), "/c_internal.rs"));
17}
18
19/// Emulates an x86-64 processor's timestamp counter, as read by rdtsc and
20/// rdtscp.
21#[repr(C)]
22#[allow(non_snake_case)]
23pub struct Tsc {
24    // TODO: rename and make non-pub when we drop C API
25    pub cyclesPerSecond: u64,
26}
27
28impl Tsc {
29    /// Returns the host system's native TSC rate, or None if it couldn't be found.
30    ///
31    /// WARNING: this is known to fail completely on some supported CPUs
32    /// (particularly AMD), and can return the wrong value for others. i.e. this
33    /// needs more work if we need to dependably get the host's TSC rate.
34    /// e.g. see <https://github.com/shadow/shadow/issues/1519>.
35    pub fn native_cycles_per_second() -> Option<u64> {
36        let res = unsafe { c_internal::TscC_nativeCyclesPerSecond() };
37        if res == 0 { None } else { Some(res) }
38    }
39
40    pub fn new(cycles_per_second: u64) -> Self {
41        Self {
42            cyclesPerSecond: cycles_per_second,
43        }
44    }
45
46    fn set_rdtsc_cycles(&self, rax: &mut u64, rdx: &mut u64, nanos: u64) {
47        // The multiply is guaranteed not to overflow since both operands are 64 bit.
48        let cycles = u128::from(self.cyclesPerSecond) * u128::from(nanos) / 1_000_000_000;
49        // *possible* that we'll wrap around here, but only after a very long
50        // simulated time and/or a ridiculously fast clock. Wrapping is also
51        // presumably what would happen on real hardware.
52        let cycles = cycles as u64;
53        *rdx = (cycles >> 32) & 0xff_ff_ff_ff;
54        *rax = cycles & 0xff_ff_ff_ff;
55    }
56
57    const RDTSC: [u8; 2] = [0x0f, 0x31];
58    const RDTSCP: [u8; 3] = [0x0f, 0x01, 0xf9];
59
60    /// Updates registers to reflect the result of executing an rdtsc
61    /// instruction at time `nanos`.
62    pub fn emulate_rdtsc(&self, rax: &mut u64, rdx: &mut u64, rip: &mut u64, nanos: u64) {
63        self.set_rdtsc_cycles(rax, rdx, nanos);
64        *rip += Self::RDTSC.len() as u64;
65    }
66
67    /// Updates registers to reflect the result of executing an rdtscp
68    /// instruction at time `nanos`.
69    pub fn emulate_rdtscp(
70        &self,
71        rax: &mut u64,
72        rdx: &mut u64,
73        rcx: &mut u64,
74        rip: &mut u64,
75        nanos: u64,
76    ) {
77        self.set_rdtsc_cycles(rax, rdx, nanos);
78        *rip += Self::RDTSCP.len() as u64;
79
80        // rcx is set to IA32_TSC_AUX. According to the Intel developer manual
81        // 17.17.2 "IA32_TSC_AUX Register and RDTSCP Support", "IA32_TSC_AUX
82        // provides a 32-bit field that is initialized by privileged software with a
83        // signature value (for example, a logical processor ID)." ... "User mode
84        // software can use RDTSCP to detect if CPU migration has occurred between
85        // successive reads of the TSC. It can also be used to adjust for per-CPU
86        // differences in TSC values in a NUMA system."
87        //
88        // For now we just hard-code an arbitrary constant, which should be fine for
89        // the stated purpose.
90        // `hex(int(random.random()*2**32))`
91        *rcx = 0x806eb479;
92    }
93
94    /// SAFETY: `ip` must be a dereferenceable pointer, pointing to the beginning
95    /// of a valid x86_64 instruction, and `insn` must be a valid x86_64 instruction.
96    unsafe fn ip_matches(ip: *const u8, insn: &[u8]) -> bool {
97        // SAFETY:
98        // * Caller has guaranteed that `ip` points to some valid instruction.
99        // * Caller has guaranteed that `insn` is a valid instruction.
100        // * No instruction can be a prefix of another, so `insn` can't be a prefix
101        //   of some *other* instruction at `ip`.
102        // * [`std::Iterator::all`] is short-circuiting.
103        //
104        // e.g. consider the case where `ip` points to a 1-byte `ret`
105        // instruction, and the next byte of memory isn't accessible. That
106        // single byte *cannot* match the first byte of `insn`, so we'll never
107        // dereference `ip.offset(1)`, which would be unsound.
108        insn.iter()
109            .enumerate()
110            .all(|(offset, byte)| unsafe { *ip.add(offset) == *byte })
111    }
112
113    /// Whether `ip` points to an rdtsc instruction.
114    ///
115    /// # Safety
116    ///
117    /// `ip` must be a dereferenceable pointer, pointing to the
118    /// beginning of a valid x86_64 instruction.
119    pub unsafe fn ip_is_rdtsc(ip: *const u8) -> bool {
120        unsafe { Self::ip_matches(ip, &Self::RDTSC) }
121    }
122
123    /// Whether `ip` points to an rdtscp instruction.
124    ///
125    /// # Safety
126    ///
127    /// `ip` must be a dereferenceable pointer, pointing to the
128    /// beginning of a valid x86_64 instruction.
129    pub unsafe fn ip_is_rdtscp(ip: *const u8) -> bool {
130        unsafe { Self::ip_matches(ip, &Self::RDTSCP) }
131    }
132}
133
134mod export {
135    use super::*;
136
137    /// Returns the host system's native TSC rate, or 0 if it couldn't be found.
138    ///
139    /// WARNING: this is known to fail completely on some supported CPUs
140    /// (particularly AMD), and can return the wrong value for others. i.e. this
141    /// needs more work if we need to dependably get the host's TSC rate.
142    /// e.g. see https://github.com/shadow/shadow/issues/1519.
143    #[unsafe(no_mangle)]
144    pub extern "C-unwind" fn Tsc_nativeCyclesPerSecond() -> u64 {
145        Tsc::native_cycles_per_second().unwrap_or(0)
146    }
147
148    /// Instantiate a TSC with the given clock rate.
149    #[unsafe(no_mangle)]
150    pub extern "C-unwind" fn Tsc_create(cycles_per_second: u64) -> Tsc {
151        Tsc::new(cycles_per_second)
152    }
153
154    /// Updates `regs` to reflect the result of executing an rdtsc instruction at
155    /// time `nanos`.
156    #[unsafe(no_mangle)]
157    pub extern "C-unwind" fn Tsc_emulateRdtsc(
158        tsc: *const Tsc,
159        rax: *mut u64,
160        rdx: *mut u64,
161        rip: *mut u64,
162        nanos: u64,
163    ) {
164        let tsc = unsafe { tsc.as_ref().unwrap() };
165        let rax = unsafe { rax.as_mut().unwrap() };
166        let rdx = unsafe { rdx.as_mut().unwrap() };
167        let rip = unsafe { rip.as_mut().unwrap() };
168        tsc.emulate_rdtsc(rax, rdx, rip, nanos)
169    }
170
171    /// Updates `regs` to reflect the result of executing an rdtscp instruction at
172    /// time `nanos`.
173    #[unsafe(no_mangle)]
174    pub extern "C-unwind" fn Tsc_emulateRdtscp(
175        tsc: *const Tsc,
176        rax: *mut u64,
177        rdx: *mut u64,
178        rcx: *mut u64,
179        rip: *mut u64,
180        nanos: u64,
181    ) {
182        let tsc = unsafe { tsc.as_ref().unwrap() };
183        let rax = unsafe { rax.as_mut().unwrap() };
184        let rdx = unsafe { rdx.as_mut().unwrap() };
185        let rcx = unsafe { rcx.as_mut().unwrap() };
186        let rip = unsafe { rip.as_mut().unwrap() };
187        tsc.emulate_rdtscp(rax, rdx, rcx, rip, nanos)
188    }
189
190    /// Whether `buf` begins with an rdtsc instruction.
191    #[unsafe(no_mangle)]
192    pub extern "C-unwind" fn isRdtsc(ip: *const u8) -> bool {
193        unsafe { Tsc::ip_is_rdtsc(ip) }
194    }
195
196    /// Whether `buf` begins with an rdtscp instruction.
197    #[unsafe(no_mangle)]
198    pub extern "C-unwind" fn isRdtscp(ip: *const u8) -> bool {
199        unsafe { Tsc::ip_is_rdtscp(ip) }
200    }
201}
202
203#[cfg(test)]
204mod test {
205    use super::*;
206
207    fn get_emulated_cycles(clock: u64, nanos: u64) -> u64 {
208        let tsc = Tsc::new(clock);
209
210        let mut rax = 0;
211        let mut rdx = 0;
212        let mut rcx = 0;
213        let mut rip = 0;
214
215        tsc.emulate_rdtsc(&mut rax, &mut rdx, &mut rip, nanos);
216        assert_eq!(rax >> 32, 0);
217        assert_eq!(rdx >> 32, 0);
218        let rdtsc_res = (rdx << 32) | rax;
219
220        tsc.emulate_rdtscp(&mut rax, &mut rdx, &mut rcx, &mut rip, nanos);
221        assert_eq!(rax >> 32, 0);
222        assert_eq!(rdx >> 32, 0);
223        let rdtscp_res = (rdx << 32) | rax;
224
225        assert_eq!(rdtsc_res, rdtscp_res);
226        rdtsc_res
227    }
228
229    #[test]
230    fn ns_granularity_at_1_ghz() {
231        assert_eq!(get_emulated_cycles(1_000_000_000, 1), 1);
232    }
233
234    #[test]
235    fn scales_with_clock_rate() {
236        let base_clock = 1_000_000_000;
237        let base_nanos = 1;
238        assert_eq!(
239            get_emulated_cycles(1000 * base_clock, base_nanos),
240            1000 * get_emulated_cycles(base_clock, base_nanos)
241        );
242    }
243
244    #[test]
245    fn scales_with_time() {
246        let base_clock = 1_000_000_000;
247        let base_nanos = 1;
248        assert_eq!(
249            get_emulated_cycles(base_clock, 1000 * base_nanos),
250            1000 * get_emulated_cycles(base_clock, base_nanos)
251        );
252    }
253
254    #[test]
255    fn large_cycle_count() {
256        let one_year_in_seconds: u64 = 365 * 24 * 60 * 60;
257        let ten_b_cycles_per_second: u64 = 10_000_000_000;
258        let expected_cycles = one_year_in_seconds
259            .checked_mul(ten_b_cycles_per_second)
260            .unwrap();
261        let actual_cycles =
262            get_emulated_cycles(ten_b_cycles_per_second, one_year_in_seconds * 1_000_000_000);
263        assert_eq!(actual_cycles, expected_cycles);
264    }
265}