shadow_tsc/lib.rs
1// https://github.com/rust-lang/rfcs/blob/master/text/2585-unsafe-block-in-unsafe-fn.md
2#![deny(unsafe_op_in_unsafe_fn)]
3
4// Force cargo to link against crates that aren't (yet) referenced from Rust
5// code (but are referenced from this crate's C code).
6// https://github.com/rust-lang/cargo/issues/9391
7extern crate logger;
8
9/// cbindgen:ignore
10pub mod c_internal {
11 #![allow(non_upper_case_globals)]
12 #![allow(non_camel_case_types)]
13 #![allow(non_snake_case)]
14 // https://github.com/rust-lang/rust/issues/66220
15 #![allow(improper_ctypes)]
16 include!(concat!(env!("OUT_DIR"), "/c_internal.rs"));
17}
18
19/// Emulates an x86-64 processor's timestamp counter, as read by rdtsc and
20/// rdtscp.
21#[repr(C)]
22#[allow(non_snake_case)]
23pub struct Tsc {
24 // TODO: rename and make non-pub when we drop C API
25 pub cyclesPerSecond: u64,
26}
27
28impl Tsc {
29 /// Returns the host system's native TSC rate, or None if it couldn't be found.
30 ///
31 /// WARNING: this is known to fail completely on some supported CPUs
32 /// (particularly AMD), and can return the wrong value for others. i.e. this
33 /// needs more work if we need to dependably get the host's TSC rate.
34 /// e.g. see <https://github.com/shadow/shadow/issues/1519>.
35 pub fn native_cycles_per_second() -> Option<u64> {
36 let res = unsafe { c_internal::TscC_nativeCyclesPerSecond() };
37 if res == 0 { None } else { Some(res) }
38 }
39
40 pub fn new(cycles_per_second: u64) -> Self {
41 Self {
42 cyclesPerSecond: cycles_per_second,
43 }
44 }
45
46 fn set_rdtsc_cycles(&self, rax: &mut u64, rdx: &mut u64, nanos: u64) {
47 // The multiply is guaranteed not to overflow since both operands are 64 bit.
48 let cycles = u128::from(self.cyclesPerSecond) * u128::from(nanos) / 1_000_000_000;
49 // *possible* that we'll wrap around here, but only after a very long
50 // simulated time and/or a ridiculously fast clock. Wrapping is also
51 // presumably what would happen on real hardware.
52 let cycles = cycles as u64;
53 *rdx = (cycles >> 32) & 0xff_ff_ff_ff;
54 *rax = cycles & 0xff_ff_ff_ff;
55 }
56
57 const RDTSC: [u8; 2] = [0x0f, 0x31];
58 const RDTSCP: [u8; 3] = [0x0f, 0x01, 0xf9];
59
60 /// Updates registers to reflect the result of executing an rdtsc
61 /// instruction at time `nanos`.
62 pub fn emulate_rdtsc(&self, rax: &mut u64, rdx: &mut u64, rip: &mut u64, nanos: u64) {
63 self.set_rdtsc_cycles(rax, rdx, nanos);
64 *rip += Self::RDTSC.len() as u64;
65 }
66
67 /// Updates registers to reflect the result of executing an rdtscp
68 /// instruction at time `nanos`.
69 pub fn emulate_rdtscp(
70 &self,
71 rax: &mut u64,
72 rdx: &mut u64,
73 rcx: &mut u64,
74 rip: &mut u64,
75 nanos: u64,
76 ) {
77 self.set_rdtsc_cycles(rax, rdx, nanos);
78 *rip += Self::RDTSCP.len() as u64;
79
80 // rcx is set to IA32_TSC_AUX. According to the Intel developer manual
81 // 17.17.2 "IA32_TSC_AUX Register and RDTSCP Support", "IA32_TSC_AUX
82 // provides a 32-bit field that is initialized by privileged software with a
83 // signature value (for example, a logical processor ID)." ... "User mode
84 // software can use RDTSCP to detect if CPU migration has occurred between
85 // successive reads of the TSC. It can also be used to adjust for per-CPU
86 // differences in TSC values in a NUMA system."
87 //
88 // For now we just hard-code an arbitrary constant, which should be fine for
89 // the stated purpose.
90 // `hex(int(random.random()*2**32))`
91 *rcx = 0x806eb479;
92 }
93
94 /// SAFETY: `ip` must be a dereferenceable pointer, pointing to the beginning
95 /// of a valid x86_64 instruction, and `insn` must be a valid x86_64 instruction.
96 unsafe fn ip_matches(ip: *const u8, insn: &[u8]) -> bool {
97 // SAFETY:
98 // * Caller has guaranteed that `ip` points to some valid instruction.
99 // * Caller has guaranteed that `insn` is a valid instruction.
100 // * No instruction can be a prefix of another, so `insn` can't be a prefix
101 // of some *other* instruction at `ip`.
102 // * [`std::Iterator::all`] is short-circuiting.
103 //
104 // e.g. consider the case where `ip` points to a 1-byte `ret`
105 // instruction, and the next byte of memory isn't accessible. That
106 // single byte *cannot* match the first byte of `insn`, so we'll never
107 // dereference `ip.offset(1)`, which would be unsound.
108 insn.iter()
109 .enumerate()
110 .all(|(offset, byte)| unsafe { *ip.add(offset) == *byte })
111 }
112
113 /// Whether `ip` points to an rdtsc instruction.
114 ///
115 /// # Safety
116 ///
117 /// `ip` must be a dereferenceable pointer, pointing to the
118 /// beginning of a valid x86_64 instruction.
119 pub unsafe fn ip_is_rdtsc(ip: *const u8) -> bool {
120 unsafe { Self::ip_matches(ip, &Self::RDTSC) }
121 }
122
123 /// Whether `ip` points to an rdtscp instruction.
124 ///
125 /// # Safety
126 ///
127 /// `ip` must be a dereferenceable pointer, pointing to the
128 /// beginning of a valid x86_64 instruction.
129 pub unsafe fn ip_is_rdtscp(ip: *const u8) -> bool {
130 unsafe { Self::ip_matches(ip, &Self::RDTSCP) }
131 }
132}
133
134mod export {
135 use super::*;
136
137 /// Returns the host system's native TSC rate, or 0 if it couldn't be found.
138 ///
139 /// WARNING: this is known to fail completely on some supported CPUs
140 /// (particularly AMD), and can return the wrong value for others. i.e. this
141 /// needs more work if we need to dependably get the host's TSC rate.
142 /// e.g. see https://github.com/shadow/shadow/issues/1519.
143 #[unsafe(no_mangle)]
144 pub extern "C-unwind" fn Tsc_nativeCyclesPerSecond() -> u64 {
145 Tsc::native_cycles_per_second().unwrap_or(0)
146 }
147
148 /// Instantiate a TSC with the given clock rate.
149 #[unsafe(no_mangle)]
150 pub extern "C-unwind" fn Tsc_create(cycles_per_second: u64) -> Tsc {
151 Tsc::new(cycles_per_second)
152 }
153
154 /// Updates `regs` to reflect the result of executing an rdtsc instruction at
155 /// time `nanos`.
156 #[unsafe(no_mangle)]
157 pub extern "C-unwind" fn Tsc_emulateRdtsc(
158 tsc: *const Tsc,
159 rax: *mut u64,
160 rdx: *mut u64,
161 rip: *mut u64,
162 nanos: u64,
163 ) {
164 let tsc = unsafe { tsc.as_ref().unwrap() };
165 let rax = unsafe { rax.as_mut().unwrap() };
166 let rdx = unsafe { rdx.as_mut().unwrap() };
167 let rip = unsafe { rip.as_mut().unwrap() };
168 tsc.emulate_rdtsc(rax, rdx, rip, nanos)
169 }
170
171 /// Updates `regs` to reflect the result of executing an rdtscp instruction at
172 /// time `nanos`.
173 #[unsafe(no_mangle)]
174 pub extern "C-unwind" fn Tsc_emulateRdtscp(
175 tsc: *const Tsc,
176 rax: *mut u64,
177 rdx: *mut u64,
178 rcx: *mut u64,
179 rip: *mut u64,
180 nanos: u64,
181 ) {
182 let tsc = unsafe { tsc.as_ref().unwrap() };
183 let rax = unsafe { rax.as_mut().unwrap() };
184 let rdx = unsafe { rdx.as_mut().unwrap() };
185 let rcx = unsafe { rcx.as_mut().unwrap() };
186 let rip = unsafe { rip.as_mut().unwrap() };
187 tsc.emulate_rdtscp(rax, rdx, rcx, rip, nanos)
188 }
189
190 /// Whether `buf` begins with an rdtsc instruction.
191 #[unsafe(no_mangle)]
192 pub extern "C-unwind" fn isRdtsc(ip: *const u8) -> bool {
193 unsafe { Tsc::ip_is_rdtsc(ip) }
194 }
195
196 /// Whether `buf` begins with an rdtscp instruction.
197 #[unsafe(no_mangle)]
198 pub extern "C-unwind" fn isRdtscp(ip: *const u8) -> bool {
199 unsafe { Tsc::ip_is_rdtscp(ip) }
200 }
201}
202
203#[cfg(test)]
204mod test {
205 use super::*;
206
207 fn get_emulated_cycles(clock: u64, nanos: u64) -> u64 {
208 let tsc = Tsc::new(clock);
209
210 let mut rax = 0;
211 let mut rdx = 0;
212 let mut rcx = 0;
213 let mut rip = 0;
214
215 tsc.emulate_rdtsc(&mut rax, &mut rdx, &mut rip, nanos);
216 assert_eq!(rax >> 32, 0);
217 assert_eq!(rdx >> 32, 0);
218 let rdtsc_res = (rdx << 32) | rax;
219
220 tsc.emulate_rdtscp(&mut rax, &mut rdx, &mut rcx, &mut rip, nanos);
221 assert_eq!(rax >> 32, 0);
222 assert_eq!(rdx >> 32, 0);
223 let rdtscp_res = (rdx << 32) | rax;
224
225 assert_eq!(rdtsc_res, rdtscp_res);
226 rdtsc_res
227 }
228
229 #[test]
230 fn ns_granularity_at_1_ghz() {
231 assert_eq!(get_emulated_cycles(1_000_000_000, 1), 1);
232 }
233
234 #[test]
235 fn scales_with_clock_rate() {
236 let base_clock = 1_000_000_000;
237 let base_nanos = 1;
238 assert_eq!(
239 get_emulated_cycles(1000 * base_clock, base_nanos),
240 1000 * get_emulated_cycles(base_clock, base_nanos)
241 );
242 }
243
244 #[test]
245 fn scales_with_time() {
246 let base_clock = 1_000_000_000;
247 let base_nanos = 1;
248 assert_eq!(
249 get_emulated_cycles(base_clock, 1000 * base_nanos),
250 1000 * get_emulated_cycles(base_clock, base_nanos)
251 );
252 }
253
254 #[test]
255 fn large_cycle_count() {
256 let one_year_in_seconds: u64 = 365 * 24 * 60 * 60;
257 let ten_b_cycles_per_second: u64 = 10_000_000_000;
258 let expected_cycles = one_year_in_seconds
259 .checked_mul(ten_b_cycles_per_second)
260 .unwrap();
261 let actual_cycles =
262 get_emulated_cycles(ten_b_cycles_per_second, one_year_in_seconds * 1_000_000_000);
263 assert_eq!(actual_cycles, expected_cycles);
264 }
265}