clap_lex/lib.rs
1//! Minimal, flexible command-line parser
2//!
3//! As opposed to a declarative parser, this processes arguments as a stream of tokens. As lexing
4//! a command-line is not context-free, we rely on the caller to decide how to interpret the
5//! arguments.
6//!
7//! # Examples
8//!
9//! ```rust
10//! use std::path::PathBuf;
11//! use std::ffi::OsStr;
12//!
13//! type BoxedError = Box<dyn std::error::Error + Send + Sync>;
14//!
15//! #[derive(Debug)]
16//! struct Args {
17//! paths: Vec<PathBuf>,
18//! color: Color,
19//! verbosity: usize,
20//! }
21//!
22//! #[derive(Debug)]
23//! enum Color {
24//! Always,
25//! Auto,
26//! Never,
27//! }
28//!
29//! impl Color {
30//! fn parse(s: Option<&OsStr>) -> Result<Self, BoxedError> {
31//! let s = s.map(|s| s.to_str().ok_or(s));
32//! match s {
33//! Some(Ok("always")) | Some(Ok("")) | None => {
34//! Ok(Color::Always)
35//! }
36//! Some(Ok("auto")) => {
37//! Ok(Color::Auto)
38//! }
39//! Some(Ok("never")) => {
40//! Ok(Color::Never)
41//! }
42//! Some(invalid) => {
43//! Err(format!("Invalid value for `--color`, {invalid:?}").into())
44//! }
45//! }
46//! }
47//! }
48//!
49//! fn parse_args(
50//! raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>>
51//! ) -> Result<Args, BoxedError> {
52//! let mut args = Args {
53//! paths: Vec::new(),
54//! color: Color::Auto,
55//! verbosity: 0,
56//! };
57//!
58//! let raw = clap_lex::RawArgs::new(raw);
59//! let mut cursor = raw.cursor();
60//! raw.next(&mut cursor); // Skip the bin
61//! while let Some(arg) = raw.next(&mut cursor) {
62//! if arg.is_escape() {
63//! args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from));
64//! } else if arg.is_stdio() {
65//! args.paths.push(PathBuf::from("-"));
66//! } else if let Some((long, value)) = arg.to_long() {
67//! match long {
68//! Ok("verbose") => {
69//! if let Some(value) = value {
70//! return Err(format!("`--verbose` does not take a value, got `{value:?}`").into());
71//! }
72//! args.verbosity += 1;
73//! }
74//! Ok("color") => {
75//! args.color = Color::parse(value)?;
76//! }
77//! _ => {
78//! return Err(
79//! format!("Unexpected flag: --{}", arg.display()).into()
80//! );
81//! }
82//! }
83//! } else if let Some(mut shorts) = arg.to_short() {
84//! while let Some(short) = shorts.next_flag() {
85//! match short {
86//! Ok('v') => {
87//! args.verbosity += 1;
88//! }
89//! Ok('c') => {
90//! let value = shorts.next_value_os();
91//! args.color = Color::parse(value)?;
92//! }
93//! Ok(c) => {
94//! return Err(format!("Unexpected flag: -{c}").into());
95//! }
96//! Err(e) => {
97//! return Err(format!("Unexpected flag: -{}", e.to_string_lossy()).into());
98//! }
99//! }
100//! }
101//! } else {
102//! args.paths.push(PathBuf::from(arg.to_value_os().to_owned()));
103//! }
104//! }
105//!
106//! Ok(args)
107//! }
108//!
109//! let args = parse_args(["bin", "--hello", "world"]);
110//! println!("{args:?}");
111//! ```
112
113#![cfg_attr(docsrs, feature(doc_auto_cfg))]
114#![warn(missing_docs)]
115#![warn(clippy::print_stderr)]
116#![warn(clippy::print_stdout)]
117
118mod ext;
119
120use std::ffi::OsStr;
121use std::ffi::OsString;
122
123pub use std::io::SeekFrom;
124
125pub use ext::OsStrExt;
126
127/// Command-line arguments
128#[derive(Default, Clone, Debug, PartialEq, Eq)]
129pub struct RawArgs {
130 items: Vec<OsString>,
131}
132
133impl RawArgs {
134 //// Create an argument list to parse
135 ///
136 /// **NOTE:** The argument returned will be the current binary.
137 ///
138 /// # Example
139 ///
140 /// ```rust,no_run
141 /// # use std::path::PathBuf;
142 /// let raw = clap_lex::RawArgs::from_args();
143 /// let mut cursor = raw.cursor();
144 /// let _bin = raw.next_os(&mut cursor);
145 ///
146 /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
147 /// println!("{paths:?}");
148 /// ```
149 pub fn from_args() -> Self {
150 Self::new(std::env::args_os())
151 }
152
153 //// Create an argument list to parse
154 ///
155 /// # Example
156 ///
157 /// ```rust,no_run
158 /// # use std::path::PathBuf;
159 /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
160 /// let mut cursor = raw.cursor();
161 /// let _bin = raw.next_os(&mut cursor);
162 ///
163 /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
164 /// println!("{paths:?}");
165 /// ```
166 pub fn new(iter: impl IntoIterator<Item = impl Into<OsString>>) -> Self {
167 let iter = iter.into_iter();
168 Self::from(iter)
169 }
170
171 /// Create a cursor for walking the arguments
172 ///
173 /// # Example
174 ///
175 /// ```rust,no_run
176 /// # use std::path::PathBuf;
177 /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
178 /// let mut cursor = raw.cursor();
179 /// let _bin = raw.next_os(&mut cursor);
180 ///
181 /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
182 /// println!("{paths:?}");
183 /// ```
184 pub fn cursor(&self) -> ArgCursor {
185 ArgCursor::new()
186 }
187
188 /// Advance the cursor, returning the next [`ParsedArg`]
189 pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> {
190 self.next_os(cursor).map(ParsedArg::new)
191 }
192
193 /// Advance the cursor, returning a raw argument value.
194 pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> {
195 let next = self.items.get(cursor.cursor).map(|s| s.as_os_str());
196 cursor.cursor = cursor.cursor.saturating_add(1);
197 next
198 }
199
200 /// Return the next [`ParsedArg`]
201 pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> {
202 self.peek_os(cursor).map(ParsedArg::new)
203 }
204
205 /// Return a raw argument value.
206 pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> {
207 self.items.get(cursor.cursor).map(|s| s.as_os_str())
208 }
209
210 /// Return all remaining raw arguments, advancing the cursor to the end
211 ///
212 /// # Example
213 ///
214 /// ```rust,no_run
215 /// # use std::path::PathBuf;
216 /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
217 /// let mut cursor = raw.cursor();
218 /// let _bin = raw.next_os(&mut cursor);
219 ///
220 /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
221 /// println!("{paths:?}");
222 /// ```
223 pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> {
224 let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str());
225 cursor.cursor = self.items.len();
226 remaining
227 }
228
229 /// Adjust the cursor's position
230 pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) {
231 let pos = match pos {
232 SeekFrom::Start(pos) => pos,
233 SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64,
234 SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64,
235 };
236 let pos = (pos as usize).min(self.items.len());
237 cursor.cursor = pos;
238 }
239
240 /// Inject arguments before the [`RawArgs::next`]
241 pub fn insert(
242 &mut self,
243 cursor: &ArgCursor,
244 insert_items: impl IntoIterator<Item = impl Into<OsString>>,
245 ) {
246 self.items.splice(
247 cursor.cursor..cursor.cursor,
248 insert_items.into_iter().map(Into::into),
249 );
250 }
251
252 /// Any remaining args?
253 pub fn is_end(&self, cursor: &ArgCursor) -> bool {
254 self.peek_os(cursor).is_none()
255 }
256}
257
258impl<I, T> From<I> for RawArgs
259where
260 I: Iterator<Item = T>,
261 T: Into<OsString>,
262{
263 fn from(val: I) -> Self {
264 Self {
265 items: val.map(|x| x.into()).collect(),
266 }
267 }
268}
269
270/// Position within [`RawArgs`]
271#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
272pub struct ArgCursor {
273 cursor: usize,
274}
275
276impl ArgCursor {
277 fn new() -> Self {
278 Self { cursor: 0 }
279 }
280}
281
282/// Command-line Argument
283#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
284pub struct ParsedArg<'s> {
285 inner: &'s OsStr,
286}
287
288impl<'s> ParsedArg<'s> {
289 fn new(inner: &'s OsStr) -> Self {
290 Self { inner }
291 }
292
293 /// Argument is length of 0
294 pub fn is_empty(&self) -> bool {
295 self.inner.is_empty()
296 }
297
298 /// Does the argument look like a stdio argument (`-`)
299 pub fn is_stdio(&self) -> bool {
300 self.inner == "-"
301 }
302
303 /// Does the argument look like an argument escape (`--`)
304 pub fn is_escape(&self) -> bool {
305 self.inner == "--"
306 }
307
308 /// Does the argument look like a negative number?
309 ///
310 /// This won't parse the number in full but attempts to see if this looks
311 /// like something along the lines of `-3`, `-0.3`, or `-33.03`
312 pub fn is_negative_number(&self) -> bool {
313 self.to_value()
314 .ok()
315 .and_then(|s| Some(is_number(s.strip_prefix('-')?)))
316 .unwrap_or_default()
317 }
318
319 /// Treat as a long-flag
320 pub fn to_long(&self) -> Option<(Result<&str, &OsStr>, Option<&OsStr>)> {
321 let raw = self.inner;
322 let remainder = raw.strip_prefix("--")?;
323 if remainder.is_empty() {
324 debug_assert!(self.is_escape());
325 return None;
326 }
327
328 let (flag, value) = if let Some((p0, p1)) = remainder.split_once("=") {
329 (p0, Some(p1))
330 } else {
331 (remainder, None)
332 };
333 let flag = flag.to_str().ok_or(flag);
334 Some((flag, value))
335 }
336
337 /// Can treat as a long-flag
338 pub fn is_long(&self) -> bool {
339 self.inner.starts_with("--") && !self.is_escape()
340 }
341
342 /// Treat as a short-flag
343 pub fn to_short(&self) -> Option<ShortFlags<'_>> {
344 if let Some(remainder_os) = self.inner.strip_prefix("-") {
345 if remainder_os.starts_with("-") {
346 None
347 } else if remainder_os.is_empty() {
348 debug_assert!(self.is_stdio());
349 None
350 } else {
351 Some(ShortFlags::new(remainder_os))
352 }
353 } else {
354 None
355 }
356 }
357
358 /// Can treat as a short-flag
359 pub fn is_short(&self) -> bool {
360 self.inner.starts_with("-") && !self.is_stdio() && !self.inner.starts_with("--")
361 }
362
363 /// Treat as a value
364 ///
365 /// **NOTE:** May return a flag or an escape.
366 pub fn to_value_os(&self) -> &OsStr {
367 self.inner
368 }
369
370 /// Treat as a value
371 ///
372 /// **NOTE:** May return a flag or an escape.
373 pub fn to_value(&self) -> Result<&str, &OsStr> {
374 self.inner.to_str().ok_or(self.inner)
375 }
376
377 /// Safely print an argument that may contain non-UTF8 content
378 ///
379 /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead.
380 pub fn display(&self) -> impl std::fmt::Display + '_ {
381 self.inner.to_string_lossy()
382 }
383}
384
385/// Walk through short flags within a [`ParsedArg`]
386#[derive(Clone, Debug)]
387pub struct ShortFlags<'s> {
388 inner: &'s OsStr,
389 utf8_prefix: std::str::CharIndices<'s>,
390 invalid_suffix: Option<&'s OsStr>,
391}
392
393impl<'s> ShortFlags<'s> {
394 fn new(inner: &'s OsStr) -> Self {
395 let (utf8_prefix, invalid_suffix) = split_nonutf8_once(inner);
396 let utf8_prefix = utf8_prefix.char_indices();
397 Self {
398 inner,
399 utf8_prefix,
400 invalid_suffix,
401 }
402 }
403
404 /// Move the iterator forward by `n` short flags
405 pub fn advance_by(&mut self, n: usize) -> Result<(), usize> {
406 for i in 0..n {
407 self.next().ok_or(i)?.map_err(|_| i)?;
408 }
409 Ok(())
410 }
411
412 /// No short flags left
413 pub fn is_empty(&self) -> bool {
414 self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty()
415 }
416
417 /// Does the short flag look like a number
418 ///
419 /// Ideally call this before doing any iterator
420 pub fn is_negative_number(&self) -> bool {
421 self.invalid_suffix.is_none() && is_number(self.utf8_prefix.as_str())
422 }
423
424 /// Advance the iterator, returning the next short flag on success
425 ///
426 /// On error, returns the invalid-UTF8 value
427 pub fn next_flag(&mut self) -> Option<Result<char, &'s OsStr>> {
428 if let Some((_, flag)) = self.utf8_prefix.next() {
429 return Some(Ok(flag));
430 }
431
432 if let Some(suffix) = self.invalid_suffix {
433 self.invalid_suffix = None;
434 return Some(Err(suffix));
435 }
436
437 None
438 }
439
440 /// Advance the iterator, returning everything left as a value
441 pub fn next_value_os(&mut self) -> Option<&'s OsStr> {
442 if let Some((index, _)) = self.utf8_prefix.next() {
443 self.utf8_prefix = "".char_indices();
444 self.invalid_suffix = None;
445 // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary
446 let remainder = unsafe { ext::split_at(self.inner, index).1 };
447 return Some(remainder);
448 }
449
450 if let Some(suffix) = self.invalid_suffix {
451 self.invalid_suffix = None;
452 return Some(suffix);
453 }
454
455 None
456 }
457}
458
459impl<'s> Iterator for ShortFlags<'s> {
460 type Item = Result<char, &'s OsStr>;
461
462 fn next(&mut self) -> Option<Self::Item> {
463 self.next_flag()
464 }
465}
466
467fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) {
468 match b.try_str() {
469 Ok(s) => (s, None),
470 Err(err) => {
471 // SAFETY: `err.valid_up_to()`, which came from str::from_utf8(), is guaranteed
472 // to be a valid UTF8 boundary
473 let (valid, after_valid) = unsafe { ext::split_at(b, err.valid_up_to()) };
474 let valid = valid.try_str().unwrap();
475 (valid, Some(after_valid))
476 }
477 }
478}
479
480fn is_number(arg: &str) -> bool {
481 // Return true if this looks like an integer or a float where it's all
482 // digits plus an optional single dot after some digits.
483 //
484 // For floats allow forms such as `1.`, `1.2`, `1.2e10`, etc.
485 let mut seen_dot = false;
486 let mut position_of_e = None;
487 for (i, c) in arg.as_bytes().iter().enumerate() {
488 match c {
489 // Digits are always valid
490 b'0'..=b'9' => {}
491
492 // Allow a `.`, but only one, only if it comes before an
493 // optional exponent, and only if it's not the first character.
494 b'.' if !seen_dot && position_of_e.is_none() && i > 0 => seen_dot = true,
495
496 // Allow an exponent `e` but only at most one after the first
497 // character.
498 b'e' if position_of_e.is_none() && i > 0 => position_of_e = Some(i),
499
500 _ => return false,
501 }
502 }
503
504 // Disallow `-1e` which isn't a valid float since it doesn't actually have
505 // an exponent.
506 match position_of_e {
507 Some(i) => i != arg.len() - 1,
508 None => true,
509 }
510}