shlex/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
// Copyright 2015 Nicholas Allegra (comex).
// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or
// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
//! Parse strings like, and escape strings for, POSIX shells.
//!
//! Same idea as (but implementation not directly based on) the Python shlex module.
//!
//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
//! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
//!
//! ## <span style="color:red">Warning</span>
//!
//! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they
//! cannot be quoted portably).
//!
//! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or
//! even scripts `source`d from interactive shells).
//!
//! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce
//! ugly outputs (which may not be copy-pastable).
//!
//! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin
//! of an interactive shell, you should stop, because control characters can lead to arbitrary
//! command injection.
//!
//! For more information, and for information about more minor issues, please see [quoting_warning].
//!
//! ## Compatibility
//!
//! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**;
//! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not
//! POSIX-compatible but close enough).
//!
//! It also aims to be compatible with Python `shlex` and C `wordexp`.
#![cfg_attr(not(feature = "std"), no_std)]
extern crate alloc;
use alloc::vec::Vec;
use alloc::borrow::Cow;
use alloc::string::String;
#[cfg(test)]
use alloc::vec;
#[cfg(test)]
use alloc::borrow::ToOwned;
pub mod bytes;
#[cfg(all(doc, not(doctest)))]
#[path = "quoting_warning.md"]
pub mod quoting_warning;
/// An iterator that takes an input string and splits it into the words using the same syntax as
/// the POSIX shell.
///
/// See [`bytes::Shlex`].
pub struct Shlex<'a>(bytes::Shlex<'a>);
impl<'a> Shlex<'a> {
pub fn new(in_str: &'a str) -> Self {
Self(bytes::Shlex::new(in_str.as_bytes()))
}
}
impl<'a> Iterator for Shlex<'a> {
type Item = String;
fn next(&mut self) -> Option<String> {
self.0.next().map(|byte_word| {
// Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8.
unsafe { String::from_utf8_unchecked(byte_word) }
})
}
}
impl<'a> core::ops::Deref for Shlex<'a> {
type Target = bytes::Shlex<'a>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<'a> core::ops::DerefMut for Shlex<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
/// Convenience function that consumes the whole string at once. Returns None if the input was
/// erroneous.
pub fn split(in_str: &str) -> Option<Vec<String>> {
let mut shl = Shlex::new(in_str);
let res = shl.by_ref().collect();
if shl.had_error { None } else { Some(res) }
}
/// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts).
///
/// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call
/// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the
/// future will not be enabled by default; they will be enabled through corresponding non-default
/// [`Quoter`] options.
///
/// ...In theory. In the unlikely event that additional classes of inputs are discovered that,
/// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk
/// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by
/// default.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum QuoteError {
/// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings
/// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if
/// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let
/// them pass through.
Nul,
}
impl core::fmt::Display for QuoteError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
QuoteError::Nul => f.write_str("cannot shell-quote string containing nul byte"),
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for QuoteError {}
/// A more configurable interface to quote strings. If you only want the default settings you can
/// use the convenience functions [`try_quote`] and [`try_join`].
///
/// The bytes equivalent is [`bytes::Quoter`].
#[derive(Default, Debug, Clone)]
pub struct Quoter {
inner: bytes::Quoter,
}
impl Quoter {
/// Create a new [`Quoter`] with default settings.
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not
/// allowed and will result in an error of [`QuoteError::Nul`].
#[inline]
pub fn allow_nul(mut self, allow: bool) -> Self {
self.inner = self.inner.allow_nul(allow);
self
}
/// Convenience function that consumes an iterable of words and turns it into a single string,
/// quoting words when necessary. Consecutive words will be separated by a single space.
pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> {
// Safety: given valid UTF-8, bytes::join() will always return valid UTF-8.
self.inner.join(words.into_iter().map(|s| s.as_bytes()))
.map(|bytes| unsafe { String::from_utf8_unchecked(bytes) })
}
/// Given a single word, return a string suitable to encode it as a shell argument.
pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> {
Ok(match self.inner.quote(in_str.as_bytes())? {
Cow::Borrowed(out) => {
// Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
unsafe { core::str::from_utf8_unchecked(out) }.into()
}
Cow::Owned(out) => {
// Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
unsafe { String::from_utf8_unchecked(out) }.into()
}
})
}
}
impl From<bytes::Quoter> for Quoter {
fn from(inner: bytes::Quoter) -> Quoter {
Quoter { inner }
}
}
impl From<Quoter> for bytes::Quoter {
fn from(quoter: Quoter) -> bytes::Quoter {
quoter.inner
}
}
/// Convenience function that consumes an iterable of words and turns it into a single string,
/// quoting words when necessary. Consecutive words will be separated by a single space.
///
/// Uses default settings except that nul bytes are passed through, which [may be
/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
///
/// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter).
///
/// (That configuration never returns `Err`, so this function does not panic.)
///
/// The bytes equivalent is [bytes::join].
#[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")]
pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
Quoter::new().allow_nul(true).join(words).unwrap()
}
/// Convenience function that consumes an iterable of words and turns it into a single string,
/// quoting words when necessary. Consecutive words will be separated by a single space.
///
/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
///
/// Equivalent to [`Quoter::new().join(words)`](Quoter).
///
/// The bytes equivalent is [bytes::try_join].
pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> {
Quoter::new().join(words)
}
/// Given a single word, return a string suitable to encode it as a shell argument.
///
/// Uses default settings except that nul bytes are passed through, which [may be
/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
///
/// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter).
///
/// (That configuration never returns `Err`, so this function does not panic.)
///
/// The bytes equivalent is [bytes::quote].
#[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")]
pub fn quote(in_str: &str) -> Cow<str> {
Quoter::new().allow_nul(true).quote(in_str).unwrap()
}
/// Given a single word, return a string suitable to encode it as a shell argument.
///
/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
///
/// Equivalent to [`Quoter::new().quote(in_str)`](Quoter).
///
/// (That configuration never returns `Err`, so this function does not panic.)
///
/// The bytes equivalent is [bytes::try_quote].
pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> {
Quoter::new().quote(in_str)
}
#[cfg(test)]
static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
("foo$baz", Some(&["foo$baz"])),
("foo baz", Some(&["foo", "baz"])),
("foo\"bar\"baz", Some(&["foobarbaz"])),
("foo \"bar\"baz", Some(&["foo", "barbaz"])),
(" foo \nbar", Some(&["foo", "bar"])),
("foo\\\nbar", Some(&["foobar"])),
("\"foo\\\nbar\"", Some(&["foobar"])),
("'baz\\$b'", Some(&["baz\\$b"])),
("'baz\\\''", None),
("\\", None),
("\"\\", None),
("'\\", None),
("\"", None),
("'", None),
("foo #bar\nbaz", Some(&["foo", "baz"])),
("foo #bar", Some(&["foo"])),
("foo#bar", Some(&["foo#bar"])),
("foo\"#bar", None),
("'\\n'", Some(&["\\n"])),
("'\\\\n'", Some(&["\\\\n"])),
];
#[test]
fn test_split() {
for &(input, output) in SPLIT_TEST_ITEMS {
assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
}
}
#[test]
fn test_lineno() {
let mut sh = Shlex::new("\nfoo\nbar");
while let Some(word) = sh.next() {
if word == "bar" {
assert_eq!(sh.line_no, 3);
}
}
}
#[test]
#[cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))]
fn test_quote() {
// This is a list of (unquoted, quoted) pairs.
// But it's using a single long (raw) string literal with an ad-hoc format, just because it's
// hard to read if we have to put the test strings through Rust escaping on top of the escaping
// being tested. (Even raw string literals are noisy for short strings).
// Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences.
let tests = r#"
<> => <''>
<foobar> => <foobar>
<foo bar> => <'foo bar'>
<"foo bar'"> => <"\"foo bar'\"">
<'foo bar'> => <"'foo bar'">
<"> => <'"'>
<"'> => <"\"'">
<hello!world> => <'hello!world'>
<'hello!world> => <"'hello"'!world'>
<'hello!> => <"'hello"'!'>
<hello ^ world> => <'hello ''^ world'>
<hello^> => <hello'^'>
<!world'> => <'!world'"'">
<{a, b}> => <'{a, b}'>
<NL> => <'NL'>
<^> => <'^'>
<foo^bar> => <foo'^bar'>
<NLx^> => <'NLx''^'>
<NL^x> => <'NL''^x'>
<NL ^x> => <'NL ''^x'>
<{a,b}> => <'{a,b}'>
<a,b> => <'a,b'>
<a..b => <a..b>
<'$> => <"'"'$'>
<"^> => <'"''^'>
"#;
let mut ok = true;
for test in tests.trim().split('\n') {
let parts: Vec<String> = test
.replace("NL", "\n")
.split("=>")
.map(|part| part.trim().trim_start_matches('<').trim_end_matches('>').to_owned())
.collect();
assert!(parts.len() == 2);
let unquoted = &*parts[0];
let quoted_expected = &*parts[1];
let quoted_actual = try_quote(&parts[0]).unwrap();
if quoted_expected != quoted_actual {
#[cfg(not(feature = "std"))]
panic!("FAIL: for input <{}>, expected <{}>, got <{}>",
unquoted, quoted_expected, quoted_actual);
#[cfg(feature = "std")]
println!("FAIL: for input <{}>, expected <{}>, got <{}>",
unquoted, quoted_expected, quoted_actual);
ok = false;
}
}
assert!(ok);
}
#[test]
#[allow(deprecated)]
fn test_join() {
assert_eq!(join(vec![]), "");
assert_eq!(join(vec![""]), "''");
assert_eq!(join(vec!["a", "b"]), "a b");
assert_eq!(join(vec!["foo bar", "baz"]), "'foo bar' baz");
}
#[test]
fn test_fallible() {
assert_eq!(try_join(vec!["\0"]), Err(QuoteError::Nul));
assert_eq!(try_quote("\0"), Err(QuoteError::Nul));
}