1use core::ffi::CStr;
2use core::mem::MaybeUninit;
34/// A self-contained buffer that can be used with both Rust's formatting utilities and
5/// libc's sprintf.
6///
7/// Because those tools panic on errors, overflowing writes are truncated rather
8/// than returning an error. A non-zero truncation count is included in
9/// `Display` output of this object, and can be checked via the `truncated`
10/// method.
11///
12/// The generic parameter `N` is the internal size of the buffer. One byte is
13/// reserved for NULL to support conversion to `CStr`.
14///
15/// To format a message with Rust's formatting:
16/// ```
17/// # use formatting_nostd::FormatBuffer;
18/// use core::fmt::Write;
19/// let mut buf = FormatBuffer::<1000>::new();
20/// let x = 42;
21/// write!(&mut buf, "{x}").unwrap();
22/// assert_eq!(buf.as_str(), "42");
23/// let y = 43;
24/// write!(&mut buf, " {y}").unwrap();
25/// assert_eq!(buf.as_str(), "42 43");
26/// ```
27pub struct FormatBuffer<const N: usize> {
28 buffer: [MaybeUninit<u8>; N],
29/// Does *not* include NULL byte.
30used: usize,
31 truncated: usize,
32}
3334impl<const N: usize> FormatBuffer<N> {
35const CAPACITY_INCLUDING_NULL: usize = N;
36const CAPACITY: usize = N - 1;
3738pub fn new() -> Self {
39assert!(Self::CAPACITY_INCLUDING_NULL >= 1);
40let mut res = Self {
41 buffer: [MaybeUninit::uninit(); N],
42 used: 0,
43 truncated: 0,
44 };
45 res.null_terminate();
46 res
47 }
4849/// Remaining capacity in bytes.
50pub fn capacity_remaining(&self) -> usize {
51Self::CAPACITY - self.used
52 }
5354pub fn capacity_remaining_including_null(&self) -> usize {
55Self::CAPACITY_INCLUDING_NULL - self.used
56 }
5758/// How many bytes (not chars) have been truncated.
59 /// This shouldn't be relied on for an exact count; in particular
60 /// the accounting is not precise in `sprintf` if utf8 replacement
61 /// characters need to be inserted.
62pub fn truncated(&self) -> usize {
63self.truncated
64 }
6566fn null_terminate(&mut self) {
67self.buffer[self.used].write(0);
68 }
6970/// Reset to empty. This may be cheaper than assigning a fresh
71 /// `FormatBuffer::new`, since the latter requires copying the uninitialized
72 /// buffer. (Though such a copy could get optimized to the same cost
73 /// depending on opt level, inlining, etc.)
74pub fn reset(&mut self) {
75self.used = 0;
76self.truncated = 0;
77self.null_terminate();
78 }
7980// The initialized part of the internal buffer.
81fn initd_buffer_including_null(&self) -> &[u8] {
82let buffer: *const MaybeUninit<u8> = self.buffer.as_ptr();
83// MaybeUninit<u8> is guaranteed to have the same ABI as u8.
84let buffer: *const u8 = buffer as *const u8;
85// SAFETY: We know this byte range is initialized.
86let rv = unsafe { core::slice::from_raw_parts(buffer, self.used + 1) };
87assert_eq!(rv.last(), Some(&0));
88 rv
89 }
9091fn initd_buffer_excluding_null(&self) -> &[u8] {
92let res = self.initd_buffer_including_null();
93&res[..(res.len() - 1)]
94 }
9596/// `str` representation of internal buffer.
97 ///
98 /// If you'd like to render the buffer including any non-zero
99 /// truncation count, use the `Display` attribute instead.
100pub fn as_str(&self) -> &str {
101// SAFETY: We've ensured that only valid utf8 is appended to the buffer.
102unsafe { core::str::from_utf8_unchecked(self.initd_buffer_excluding_null()) }
103 }
104105/// Returns `None` if the buffer has interior NULL bytes.
106pub fn as_cstr(&self) -> Option<&CStr> {
107 CStr::from_bytes_with_nul(self.initd_buffer_including_null()).ok()
108 }
109110/// Appends the result of formatting `fmt` and `args`, following the conventions
111 /// of libc's `sprintf`.
112 ///
113 /// Any non-utf8 sequences in the resulting string are replaced with the
114 /// utf8 replacement character. If truncation occurs, the truncation count
115 /// doesn't necessarily account for all such substitutions.
116 ///
117 /// Currently calls libc's `vsnprintf` internally and panics on unexpected error.
118 /// TODO: Ideally we'd find or create our own reimplementation of `vsnprintf` instead,
119 /// since `vsnprintf` isn't guaranteed to be async-signal-safe.
120 ///
121 /// # Safety
122 ///
123 /// `fmt` and `args` must be consistent, as with arguments to libc's `sprintf`.
124pub unsafe fn sprintf(&mut self, fmt: &CStr, args: va_list::VaList) {
125// We use a temp buffer for the direct libc destination, so that we
126 // can relatively easily do a lossy utf8 decode from that buffer to
127 // our internal buffer.
128 //
129 // We *could* instead do a lossy decode in place to avoid having to
130 // allocate this additional buffer. However, because the unicode
131 // replacement character is multiple bytes, each insertion would be an
132 // O(N) to shift of the rest of the buffer. Performance-wise that's
133 // probably fine since in the common case nothing would be substituted,
134 // but it'd also make the code significantly trickier.
135 //
136 // Meanwhile, this stack allocation is ~free... as long as we don't
137 // overflow the stack.
138let mut buf = [MaybeUninit::<i8>::uninit(); N];
139140let rv = unsafe { vsnprintf(buf.as_mut_ptr() as *mut i8, buf.len(), fmt.as_ptr(), args) };
141142// Number of non-NULL bytes for the fully formatted string.
143let formatted_len = match usize::try_from(rv) {
144Ok(n) => n,
145Err(_) => {
146panic!("vsnprintf returned {rv}");
147 }
148 };
149150// we use a hyper-local helper function to ensure that the new slice has the correct lifetime.
151 // <https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#caveat>
152unsafe fn transmute_to_u8(buf: &[MaybeUninit<i8>]) -> &[u8] {
153unsafe { core::slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len()) }
154 }
155156// `vsnprintf` never writes more bytes than the size of the buffer, and
157 // always NULL-terminates. i.e. if it had to truncate, then only
158 // `buf.len()-1` non-NULL bytes will have been written.
159let non_null_bytes_written = core::cmp::min(buf.len() - 1, formatted_len);
160let initd_buf = unsafe { transmute_to_u8(&buf[..non_null_bytes_written]) };
161162for decoded_char in crate::utf8::decode_lossy(initd_buf) {
163if self.truncated > 0 || decoded_char.len() > self.capacity_remaining() {
164self.truncated += decoded_char.len()
165 } else {
166self.write_fitting_str(decoded_char)
167 }
168 }
169170// Also account for bytes truncated in our call to vsnprintf. We do this
171 // *after* the decoding loop to support writing as much as we can of the
172 // current vsnprintf result before we start truncating.
173self.truncated += formatted_len - non_null_bytes_written;
174self.null_terminate();
175 }
176177// Panics if the bytes don't fit.
178fn write_fitting_str(&mut self, src: &str) {
179assert!(src.len() <= self.capacity_remaining());
180181// SAFETY: the pointer arithmetic here stays inside the original object (the buffer).
182let dst: *mut MaybeUninit<u8> = unsafe { self.buffer.as_mut_ptr().add(self.used) };
183184// `MaybeUninit` guarantees this cast is safe, as long as we don't try to read
185 // the uninitialized data.
186let dst: *mut u8 = dst as *mut u8;
187188unsafe { core::ptr::copy_nonoverlapping(src.as_ptr(), dst, src.len()) };
189self.used += src.len();
190self.null_terminate();
191 }
192}
193194impl<const N: usize> core::fmt::Write for FormatBuffer<N> {
195fn write_str(&mut self, src: &str) -> Result<(), core::fmt::Error> {
196if self.truncated() > 0 {
197// Never write more after having started truncating.
198self.truncated += src.len();
199return Ok(());
200 }
201202if src.len() <= self.capacity_remaining() {
203self.write_fitting_str(src);
204return Ok(());
205 }
206207// Find safe end to split at.
208 // TODO: consider `str::floor_char_boundary` once it's stabilized.
209let mut nbytes = self.capacity_remaining();
210while !src.is_char_boundary(nbytes) {
211 nbytes -= 1;
212 }
213self.truncated += src.len() - nbytes;
214215self.write_fitting_str(&src[..nbytes]);
216Ok(())
217 }
218}
219220impl<const N: usize> core::fmt::Display for FormatBuffer<N> {
221fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
222if self.truncated == 0 {
223write!(f, "{}", self.as_str())
224 } else {
225write!(f, "{}...<truncated {}>", self.as_str(), self.truncated())
226 }
227 }
228}
229230impl<const N: usize> Default for FormatBuffer<N> {
231fn default() -> Self {
232Self::new()
233 }
234}
235236// Ensure the system libc is linked.
237extern crate libc;
238239unsafe extern "C" {
240// Use libc's `vsnprintf` function. The `libc` crate doesn't expose it, so
241 // we declare it ourselves.
242 //
243 // From `sprintf(3)`:
244 // > int vsnprintf(char *str, size_t size, const char *format, va_list ap);
245 //
246 // `va_list::VaList` is ABI compatible with libc's `va_list`.
247fn vsnprintf(
248 str: *mut core::ffi::c_char,
249 size: usize,
250 fmt: *const core::ffi::c_char,
251 ap: va_list::VaList,
252 ) -> i32;
253}
254255#[cfg(test)]
256mod test {
257use core::fmt::Write;
258259use std::ffi::CString;
260261use super::*;
262263#[test]
264fn test_format_buffer_write_str_exact() {
265let mut buf = FormatBuffer::<4>::new();
266assert!(buf.write_str("123").is_ok());
267assert_eq!(buf.as_str(), "123");
268assert_eq!(buf.truncated(), 0);
269 }
270271#[test]
272fn test_format_buffer_write_str_truncated() {
273let mut buf = FormatBuffer::<3>::new();
274assert!(buf.write_str("123").is_ok());
275assert_eq!(buf.as_str(), "12");
276assert_eq!(buf.truncated(), 1);
277 }
278279#[test]
280fn test_format_buffer_write_str_truncated_unicode() {
281let mut buf = FormatBuffer::<3>::new();
282// U+00A1 "inverted exclamation mark" is 2 bytes in utf8.
283 // Ensure that both bytes are truncated, rather than splitting in the
284 // middle.
285assert!(buf.write_str("1¡").is_ok());
286assert_eq!(buf.as_str(), "1");
287assert_eq!(buf.truncated(), 2);
288289// While there is 1 byte of capacity left, once bytes have been truncated
290 // the buffer truncates all additional writes.
291assert_eq!(buf.capacity_remaining(), 1);
292assert!(buf.write_str("2").is_ok());
293assert_eq!(buf.capacity_remaining(), 1);
294assert_eq!(buf.truncated(), 3);
295 }
296297#[test]
298fn test_format_buffer_display_truncated() {
299let mut buf = FormatBuffer::<3>::new();
300assert!(buf.write_str("123").is_ok());
301assert_eq!(format!("{buf}"), "12...<truncated 1>");
302 }
303304#[test]
305fn test_format_buffer_write_str_multiple() {
306let mut buf = FormatBuffer::<7>::new();
307assert!(buf.write_str("123").is_ok());
308assert_eq!(buf.as_str(), "123");
309assert!(buf.write_str("456").is_ok());
310assert_eq!(buf.as_str(), "123456");
311 }
312313#[test]
314fn test_cstr_ok() {
315let mut buf = FormatBuffer::<7>::new();
316assert!(buf.write_str("123").is_ok());
317let expected = CString::new("123").unwrap();
318assert_eq!(buf.as_cstr(), Some(expected.as_c_str()));
319 }
320}
321322// sprintf tests don't work under miri since we use FFI.
323#[cfg(all(test, not(miri)))]
324mod sprintf_test {
325use std::ffi::CString;
326327use super::*;
328329// Wrapper code we expose to our C test harness.
330#[unsafe(no_mangle)]
331unsafe extern "C-unwind" fn test_format_buffer_valist(
332 format_buffer: *mut FormatBuffer<10>,
333 fmt: *const core::ffi::c_char,
334 args: va_list::VaList,
335 ) {
336let fmt = unsafe { CStr::from_ptr(fmt) };
337let format_buffer = unsafe { format_buffer.as_mut().unwrap() };
338unsafe { format_buffer.sprintf(fmt, args) };
339 }
340341unsafe extern "C-unwind" {
342// Wrapper code that our C test harness exposes to us.
343 // It calls `test_format_buffer_valist` and returns the result.
344 //
345 // We need this to transform varargs (...) to a `VaList`;
346 // we don't have a way to construct a `VaList` in pure Rust.
347#[allow(improper_ctypes)]
348fn test_format_buffer_vararg(
349 format_buffer: *mut FormatBuffer<10>,
350 fmt: *const core::ffi::c_char,
351 ...
352 );
353 }
354355#[test]
356fn test_sprintf_noargs() {
357let mut buf = FormatBuffer::<10>::new();
358let fmt = CString::new("hello").unwrap();
359unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
360assert_eq!(buf.as_str(), "hello");
361assert_eq!(buf.truncated(), 0);
362 }
363364#[test]
365fn test_sprintf_args() {
366let mut buf = FormatBuffer::<10>::new();
367let fmt = CString::new("x %d y").unwrap();
368unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr(), 42i32) };
369assert_eq!(buf.as_str(), "x 42 y");
370assert_eq!(buf.truncated(), 0);
371 }
372373#[test]
374fn test_sprintf_truncated() {
375let mut buf = FormatBuffer::<10>::new();
376let fmt = CString::new("1234567890123").unwrap();
377378// The last *4* bytes will be truncated, only writing *9*.
379 // Internally we use libc's `vsnprintf` which always NULL-terminates.
380unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
381assert_eq!(buf.as_str(), "123456789");
382assert_eq!(buf.truncated(), 4);
383 }
384385#[test]
386fn test_sprintf_truncated_partly_full() {
387let mut buf = FormatBuffer::<10>::new();
388let fmt = CString::new("12345678").unwrap();
389unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
390assert_eq!(buf.as_str(), "12345678");
391unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
392assert_eq!(buf.as_str(), "123456781");
393assert_eq!(buf.truncated(), 7);
394 }
395396#[test]
397fn test_sprintf_truncated_unicode() {
398let mut buf = FormatBuffer::<10>::new();
399// U+00A1 "inverted exclamation mark" is 2 bytes in utf8.
400 // Ensure that both bytes are truncated, rather than splitting in the
401 // middle.
402let fmt = CString::new("123456789¡").unwrap();
403unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
404assert_eq!(buf.as_str(), "123456789");
405assert_eq!(buf.truncated(), 2);
406 }
407408#[test]
409fn test_sprintf_unicode_replacement() {
410let mut buf = FormatBuffer::<10>::new();
411// Cause the formatted output to have a continuation byte 0x80 without
412 // a previous start byte; i.e. be invalid utf8. It should get replaced with
413 // a replacment character.
414let fmt = CString::new("x%cy").unwrap();
415unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr(), 0x80 as core::ffi::c_int) };
416assert_eq!(buf.as_str(), "x�y");
417assert_eq!(buf.truncated(), 0);
418 }
419420#[test]
421fn test_sprintf_unicode_replacement_truncation() {
422let mut buf = FormatBuffer::<10>::new();
423// Cause the formatted output to have a continuation byte 0x80 without
424 // a previous start byte; i.e. be invalid utf8. It should get replaced with
425 // a replacment character.
426let fmt = CString::new("12345678%c").unwrap();
427unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr(), 0x80 as core::ffi::c_int) };
428// The unicode replacement charater won't fit, so should get truncated completely.
429assert_eq!(buf.as_str(), "12345678");
430// We're not guaranteeing anything about the exact count in this case,
431 // other than it should be non-zero.
432assert!(buf.truncated() > 0);
433 }
434435#[test]
436fn test_sprintf_multiple() {
437let mut buf = FormatBuffer::<10>::new();
438let fmt = CString::new("123").unwrap();
439unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
440let fmt = CString::new("456").unwrap();
441unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr()) };
442assert_eq!(buf.as_str(), "123456");
443assert_eq!(buf.truncated(), 0);
444 }
445446#[test]
447fn test_sprintf_cstr_fail() {
448let mut buf = FormatBuffer::<10>::new();
449// Cause the formatted output to have an interior NULL byte.
450let fmt = CString::new("1234%c56").unwrap();
451452// We have to cast 0 to `c_int` here, because the vararg ABI doesn't
453 // support passing a char. (i.e. casting to `c_char` fails to compile)
454unsafe { test_format_buffer_vararg(&mut buf, fmt.as_ptr(), 0 as core::ffi::c_int) };
455assert_eq!(buf.as_cstr(), None);
456 }
457}