formatting_nostd/
format_buffer.rs

1use core::ffi::CStr;
2use core::mem::MaybeUninit;
3
4/// A self-contained buffer that can be used with both Rust's formatting utilities and
5/// libc's sprintf.
6///
7/// Because those tools panic on errors, overflowing writes are truncated rather
8/// than returning an error. A non-zero truncation count is included in
9/// `Display` output of this object, and can be checked via the `truncated`
10/// method.
11///
12/// The generic parameter `N` is the internal size of the buffer.  One byte is
13/// reserved for NULL to support conversion to `CStr`.
14///
15/// To format a message with Rust's formatting:
16/// ```
17/// # use formatting_nostd::FormatBuffer;
18/// use core::fmt::Write;
19/// let mut buf = FormatBuffer::<1000>::new();
20/// let x = 42;
21/// write!(&mut buf, "{x}").unwrap();
22/// assert_eq!(buf.as_str(), "42");
23/// let y = 43;
24/// write!(&mut buf, " {y}").unwrap();
25/// assert_eq!(buf.as_str(), "42 43");
26/// ```
27pub struct FormatBuffer<const N: usize> {
28    buffer: [MaybeUninit<u8>; N],
29    /// Does *not* include NULL byte.
30    used: usize,
31    truncated: usize,
32}
33
34impl<const N: usize> FormatBuffer<N> {
35    const CAPACITY_INCLUDING_NULL: usize = N;
36    const CAPACITY: usize = N - 1;
37
38    pub fn new() -> Self {
39        assert!(Self::CAPACITY_INCLUDING_NULL >= 1);
40        let mut res = Self {
41            buffer: [MaybeUninit::uninit(); N],
42            used: 0,
43            truncated: 0,
44        };
45        res.null_terminate();
46        res
47    }
48
49    /// Remaining capacity in bytes.
50    pub fn capacity_remaining(&self) -> usize {
51        Self::CAPACITY - self.used
52    }
53
54    pub fn capacity_remaining_including_null(&self) -> usize {
55        Self::CAPACITY_INCLUDING_NULL - self.used
56    }
57
58    /// How many bytes (not chars) have been truncated.
59    /// This shouldn't be relied on for an exact count; in particular
60    /// the accounting is not precise in `sprintf` if utf8 replacement
61    /// characters need to be inserted.
62    pub fn truncated(&self) -> usize {
63        self.truncated
64    }
65
66    fn null_terminate(&mut self) {
67        self.buffer[self.used].write(0);
68    }
69
70    /// Reset to empty. This may be cheaper than assigning a fresh
71    /// `FormatBuffer::new`, since the latter requires copying the uninitialized
72    /// buffer. (Though such a copy could get optimized to the same cost
73    /// depending on opt level, inlining, etc.)
74    pub fn reset(&mut self) {
75        self.used = 0;
76        self.truncated = 0;
77        self.null_terminate();
78    }
79
80    // The initialized part of the internal buffer.
81    fn initd_buffer_including_null(&self) -> &[u8] {
82        let buffer: *const MaybeUninit<u8> = self.buffer.as_ptr();
83        // MaybeUninit<u8> is guaranteed to have the same ABI as u8.
84        let buffer: *const u8 = buffer as *const u8;
85        // SAFETY: We know this byte range is initialized.
86        let rv = unsafe { core::slice::from_raw_parts(buffer, self.used + 1) };
87        assert_eq!(rv.last(), Some(&0));
88        rv
89    }
90
91    fn initd_buffer_excluding_null(&self) -> &[u8] {
92        let res = self.initd_buffer_including_null();
93        &res[..(res.len() - 1)]
94    }
95
96    /// `str` representation of internal buffer.
97    ///
98    /// If you'd like to render the buffer including any non-zero
99    /// truncation count, use the `Display` attribute instead.
100    pub fn as_str(&self) -> &str {
101        // SAFETY: We've ensured that only valid utf8 is appended to the buffer.
102        unsafe { core::str::from_utf8_unchecked(self.initd_buffer_excluding_null()) }
103    }
104
105    /// Returns `None` if the buffer has interior NULL bytes.
106    pub fn as_cstr(&self) -> Option<&CStr> {
107        CStr::from_bytes_with_nul(self.initd_buffer_including_null()).ok()
108    }
109
110    /// Appends the result of formatting `fmt` and `args`, following the conventions
111    /// of libc's `sprintf`.
112    ///
113    /// Any non-utf8 sequences in the resulting string are replaced with the
114    /// utf8 replacement character. If truncation occurs, the truncation count
115    /// doesn't necessarily account for all such substitutions.
116    ///
117    /// Currently calls libc's `vsnprintf` internally and panics on unexpected error.
118    /// TODO: Ideally we'd find or create our own reimplementation of `vsnprintf` instead,
119    /// since `vsnprintf` isn't guaranteed to be async-signal-safe.
120    ///
121    /// # Safety
122    ///
123    /// `fmt` and `args` must be consistent, as with arguments to libc's `sprintf`.
124    pub unsafe fn sprintf(&mut self, fmt: &CStr, args: va_list::VaList) {
125        // We use a temp buffer for the direct libc destination, so that we
126        // can relatively easily do a lossy utf8 decode from that buffer to
127        // our internal buffer.
128        //
129        // We *could* instead do a lossy decode in place to avoid having to
130        // allocate this additional buffer. However, because the unicode
131        // replacement character is multiple bytes, each insertion would be an
132        // O(N) to shift of the rest of the buffer.  Performance-wise that's
133        // probably fine since in the common case nothing would be substituted,
134        // but it'd also make the code significantly trickier.
135        //
136        // Meanwhile, this stack allocation is ~free... as long as we don't
137        // overflow the stack.
138        let mut buf = [MaybeUninit::<i8>::uninit(); N];
139
140        let rv = unsafe { vsnprintf(buf.as_mut_ptr() as *mut i8, buf.len(), fmt.as_ptr(), args) };
141
142        // Number of non-NULL bytes for the fully formatted string.
143        let formatted_len = match usize::try_from(rv) {
144            Ok(n) => n,
145            Err(_) => {
146                panic!("vsnprintf returned {rv}");
147            }
148        };
149
150        // we use a hyper-local helper function to ensure that the new slice has the correct lifetime.
151        // <https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#caveat>
152        unsafe fn transmute_to_u8(buf: &[MaybeUninit<i8>]) -> &[u8] {
153            unsafe { core::slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len()) }
154        }
155
156        // `vsnprintf` never writes more bytes than the size of the buffer, and
157        // always NULL-terminates.  i.e. if it had to truncate, then only
158        // `buf.len()-1` non-NULL bytes will have been written.
159        let non_null_bytes_written = core::cmp::min(buf.len() - 1, formatted_len);
160        let initd_buf = unsafe { transmute_to_u8(&buf[..non_null_bytes_written]) };
161
162        for decoded_char in crate::utf8::decode_lossy(initd_buf) {
163            if self.truncated > 0 || decoded_char.len() > self.capacity_remaining() {
164                self.truncated += decoded_char.len()
165            } else {
166                self.write_fitting_str(decoded_char)
167            }
168        }
169
170        // Also account for bytes truncated in our call to vsnprintf. We do this
171        // *after* the decoding loop to support writing as much as we can of the
172        // current vsnprintf result before we start truncating.
173        self.truncated += formatted_len - non_null_bytes_written;
174        self.null_terminate();
175    }
176
177    // Panics if the bytes don't fit.
178    fn write_fitting_str(&mut self, src: &str) {
179        assert!(src.len() <= self.capacity_remaining());
180
181        // SAFETY: the pointer arithmetic here stays inside the original object (the buffer).
182        let dst: *mut MaybeUninit<u8> = unsafe { self.buffer.as_mut_ptr().add(self.used) };
183
184        // `MaybeUninit` guarantees this cast is safe, as long as we don't try to read
185        // the uninitialized data.
186        let dst: *mut u8 = dst as *mut u8;
187
188        unsafe { core::ptr::copy_nonoverlapping(src.as_ptr(), dst, src.len()) };
189        self.used += src.len();
190        self.null_terminate();
191    }
192}
193
194impl<const N: usize> core::fmt::Write for FormatBuffer<N> {
195    fn write_str(&mut self, src: &str) -> Result<(), core::fmt::Error> {
196        if self.truncated() > 0 {
197            // Never write more after having started truncating.
198            self.truncated += src.len();
199            return Ok(());
200        }
201
202        if src.len() <= self.capacity_remaining() {
203            self.write_fitting_str(src);
204            return Ok(());
205        }
206
207        // Find safe end to split at.
208        // TODO: consider `str::floor_char_boundary` once it's stabilized.
209        let mut nbytes = self.capacity_remaining();
210        while !src.is_char_boundary(nbytes) {
211            nbytes -= 1;
212        }
213        self.truncated += src.len() - nbytes;
214
215        self.write_fitting_str(&src[..nbytes]);
216        Ok(())
217    }
218}
219
220impl<const N: usize> core::fmt::Display for FormatBuffer<N> {
221    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
222        if self.truncated == 0 {
223            write!(f, "{}", self.as_str())
224        } else {
225            write!(f, "{}...<truncated {}>", self.as_str(), self.truncated())
226        }
227    }
228}
229
230impl<const N: usize> Default for FormatBuffer<N> {
231    fn default() -> Self {
232        Self::new()
233    }
234}
235
236// Ensure the system libc is linked.
237extern crate libc;
238
239unsafe extern "C" {
240    // Use libc's `vsnprintf` function. The `libc` crate doesn't expose it, so
241    // we declare it ourselves.
242    //
243    // From `sprintf(3)`:
244    // > int vsnprintf(char *str, size_t size, const char *format, va_list ap);
245    //
246    // `va_list::VaList` is ABI compatible with libc's `va_list`.
247    fn vsnprintf(
248        str: *mut core::ffi::c_char,
249        size: usize,
250        fmt: *const core::ffi::c_char,
251        ap: va_list::VaList,
252    ) -> i32;
253}
254
255#[cfg(test)]
256mod test {
257    use core::fmt::Write;
258
259    use std::ffi::CString;
260
261    use super::*;
262
263    #[test]
264    fn test_format_buffer_write_str_exact() {
265        let mut buf = FormatBuffer::<4>::new();
266        assert!(buf.write_str("123").is_ok());
267        assert_eq!(buf.as_str(), "123");
268        assert_eq!(buf.truncated(), 0);
269    }
270
271    #[test]
272    fn test_format_buffer_write_str_truncated() {
273        let mut buf = FormatBuffer::<3>::new();
274        assert!(buf.write_str("123").is_ok());
275        assert_eq!(buf.as_str(), "12");
276        assert_eq!(buf.truncated(), 1);
277    }
278
279    #[test]
280    fn test_format_buffer_write_str_truncated_unicode() {
281        let mut buf = FormatBuffer::<3>::new();
282        // U+00A1 "inverted exclamation mark" is 2 bytes in utf8.
283        // Ensure that both bytes are truncated, rather than splitting in the
284        // middle.
285        assert!(buf.write_str("1¡").is_ok());
286        assert_eq!(buf.as_str(), "1");
287        assert_eq!(buf.truncated(), 2);
288
289        // While there is 1 byte of capacity left, once bytes have been truncated
290        // the buffer truncates all additional writes.
291        assert_eq!(buf.capacity_remaining(), 1);
292        assert!(buf.write_str("2").is_ok());
293        assert_eq!(buf.capacity_remaining(), 1);
294        assert_eq!(buf.truncated(), 3);
295    }
296
297    #[test]
298    fn test_format_buffer_display_truncated() {
299        let mut buf = FormatBuffer::<3>::new();
300        assert!(buf.write_str("123").is_ok());
301        assert_eq!(format!("{buf}"), "12...<truncated 1>");
302    }
303
304    #[test]
305    fn test_format_buffer_write_str_multiple() {
306        let mut buf = FormatBuffer::<7>::new();
307        assert!(buf.write_str("123").is_ok());
308        assert_eq!(buf.as_str(), "123");
309        assert!(buf.write_str("456").is_ok());
310        assert_eq!(buf.as_str(), "123456");
311    }
312
313    #[test]
314    fn test_cstr_ok() {
315        let mut buf = FormatBuffer::<7>::new();
316        assert!(buf.write_str("123").is_ok());
317        let expected = CString::new("123").unwrap();
318        assert_eq!(buf.as_cstr(), Some(expected.as_c_str()));
319    }
320}
321
322// sprintf tests don't work under miri since we use FFI.
323#[cfg(all(test, not(miri)))]
324mod sprintf_test {
325    use std::ffi::CString;
326
327    use super::*;
328
329    // Wrapper code we expose to our C test harness.
330    #[unsafe(no_mangle)]
331    unsafe extern "C-unwind" fn test_format_buffer_valist(
332        format_buffer: *mut FormatBuffer<10>,
333        fmt: *const core::ffi::c_char,
334        args: va_list::VaList,
335    ) {
336        let fmt = unsafe { CStr::from_ptr(fmt) };
337        let format_buffer = unsafe { format_buffer.as_mut().unwrap() };
338        unsafe { format_buffer.sprintf(fmt, args) };
339    }
340
341    unsafe extern "C-unwind" {
342        // Wrapper code that our C test harness exposes to us.
343        // It calls `test_format_buffer_valist` and returns the result.
344        //
345        // We need this to transform varargs (...) to a `VaList`;
346        // we don't have a way to construct a `VaList` in pure Rust.
347        #[allow(improper_ctypes)]
348        fn test_format_buffer_vararg(
349            format_buffer: *mut FormatBuffer<10>,
350            fmt: *const core::ffi::c_char,
351            ...
352        );
353    }
354
355    #[test]
356    fn test_sprintf_noargs() {
357        let mut buf = FormatBuffer::<10>::new();
358        let fmt = CString::new("hello").unwrap();
359        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
360        assert_eq!(buf.as_str(), "hello");
361        assert_eq!(buf.truncated(), 0);
362    }
363
364    #[test]
365    fn test_sprintf_args() {
366        let mut buf = FormatBuffer::<10>::new();
367        let fmt = CString::new("x %d y").unwrap();
368        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr(), 42i32) };
369        assert_eq!(buf.as_str(), "x 42 y");
370        assert_eq!(buf.truncated(), 0);
371    }
372
373    #[test]
374    fn test_sprintf_truncated() {
375        let mut buf = FormatBuffer::<10>::new();
376        let fmt = CString::new("1234567890123").unwrap();
377
378        // The last *4* bytes will be truncated, only writing *9*.
379        // Internally we use libc's `vsnprintf` which always NULL-terminates.
380        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
381        assert_eq!(buf.as_str(), "123456789");
382        assert_eq!(buf.truncated(), 4);
383    }
384
385    #[test]
386    fn test_sprintf_truncated_partly_full() {
387        let mut buf = FormatBuffer::<10>::new();
388        let fmt = CString::new("12345678").unwrap();
389        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
390        assert_eq!(buf.as_str(), "12345678");
391        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
392        assert_eq!(buf.as_str(), "123456781");
393        assert_eq!(buf.truncated(), 7);
394    }
395
396    #[test]
397    fn test_sprintf_truncated_unicode() {
398        let mut buf = FormatBuffer::<10>::new();
399        // U+00A1 "inverted exclamation mark" is 2 bytes in utf8.
400        // Ensure that both bytes are truncated, rather than splitting in the
401        // middle.
402        let fmt = CString::new("123456789¡").unwrap();
403        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
404        assert_eq!(buf.as_str(), "123456789");
405        assert_eq!(buf.truncated(), 2);
406    }
407
408    #[test]
409    fn test_sprintf_unicode_replacement() {
410        let mut buf = FormatBuffer::<10>::new();
411        // Cause the formatted output to have a continuation byte 0x80 without
412        // a previous start byte; i.e. be invalid utf8. It should get replaced with
413        // a replacment character.
414        let fmt = CString::new("x%cy").unwrap();
415        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr(), 0x80 as core::ffi::c_int) };
416        assert_eq!(buf.as_str(), "x�y");
417        assert_eq!(buf.truncated(), 0);
418    }
419
420    #[test]
421    fn test_sprintf_unicode_replacement_truncation() {
422        let mut buf = FormatBuffer::<10>::new();
423        // Cause the formatted output to have a continuation byte 0x80 without
424        // a previous start byte; i.e. be invalid utf8. It should get replaced with
425        // a replacment character.
426        let fmt = CString::new("12345678%c").unwrap();
427        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr(), 0x80 as core::ffi::c_int) };
428        // The unicode replacement charater won't fit, so should get truncated completely.
429        assert_eq!(buf.as_str(), "12345678");
430        // We're not guaranteeing anything about the exact count in this case,
431        // other than it should be non-zero.
432        assert!(buf.truncated() > 0);
433    }
434
435    #[test]
436    fn test_sprintf_multiple() {
437        let mut buf = FormatBuffer::<10>::new();
438        let fmt = CString::new("123").unwrap();
439        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
440        let fmt = CString::new("456").unwrap();
441        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
442        assert_eq!(buf.as_str(), "123456");
443        assert_eq!(buf.truncated(), 0);
444    }
445
446    #[test]
447    fn test_sprintf_cstr_fail() {
448        let mut buf = FormatBuffer::<10>::new();
449        // Cause the formatted output to have an interior NULL byte.
450        let fmt = CString::new("1234%c56").unwrap();
451
452        // We have to cast 0 to `c_int` here, because the vararg ABI doesn't
453        // support passing a char. (i.e. casting to `c_char` fails to compile)
454        unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr(), 0 as core::ffi::c_int) };
455        assert_eq!(buf.as_cstr(), None);
456    }
457}