1 //! Minimal, flexible command-line parser
2 //!
3 //! As opposed to a declarative parser, this processes arguments as a stream of tokens.  As lexing
4 //! a command-line is not context-free, we rely on the caller to decide how to interpret the
5 //! arguments.
6 //!
7 //! # Examples
8 //!
9 //! ```rust
10 //! use std::path::PathBuf;
11 //!
12 //! type BoxedError = Box<dyn std::error::Error + Send + Sync>;
13 //!
14 //! #[derive(Debug)]
15 //! struct Args {
16 //!     paths: Vec<PathBuf>,
17 //!     color: Color,
18 //!     verbosity: usize,
19 //! }
20 //!
21 //! #[derive(Debug)]
22 //! enum Color {
23 //!     Always,
24 //!     Auto,
25 //!     Never,
26 //! }
27 //!
28 //! impl Color {
29 //!     fn parse(s: Option<&clap_lex::RawOsStr>) -> Result<Self, BoxedError> {
30 //!         let s = s.map(|s| s.to_str().ok_or(s));
31 //!         match s {
32 //!             Some(Ok("always")) | Some(Ok("")) | None => {
33 //!                 Ok(Color::Always)
34 //!             }
35 //!             Some(Ok("auto")) => {
36 //!                 Ok(Color::Auto)
37 //!             }
38 //!             Some(Ok("never")) => {
39 //!                 Ok(Color::Never)
40 //!             }
41 //!             Some(invalid) => {
42 //!                 Err(format!("Invalid value for `--color`, {:?}", invalid).into())
43 //!             }
44 //!         }
45 //!     }
46 //! }
47 //!
48 //! fn parse_args(
49 //!     raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>>
50 //! ) -> Result<Args, BoxedError> {
51 //!     let mut args = Args {
52 //!         paths: Vec::new(),
53 //!         color: Color::Auto,
54 //!         verbosity: 0,
55 //!     };
56 //!
57 //!     let raw = clap_lex::RawArgs::new(raw);
58 //!     let mut cursor = raw.cursor();
59 //!     raw.next(&mut cursor);  // Skip the bin
60 //!     while let Some(arg) = raw.next(&mut cursor) {
61 //!         if arg.is_escape() {
62 //!             args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from));
63 //!         } else if arg.is_stdio() {
64 //!             args.paths.push(PathBuf::from("-"));
65 //!         } else if let Some((long, value)) = arg.to_long() {
66 //!             match long {
67 //!                 Ok("verbose") => {
68 //!                     if let Some(value) = value {
69 //!                         return Err(format!("`--verbose` does not take a value, got `{:?}`", value).into());
70 //!                     }
71 //!                     args.verbosity += 1;
72 //!                 }
73 //!                 Ok("color") => {
74 //!                     args.color = Color::parse(value)?;
75 //!                 }
76 //!                 _ => {
77 //!                     return Err(
78 //!                         format!("Unexpected flag: --{}", arg.display()).into()
79 //!                     );
80 //!                 }
81 //!             }
82 //!         } else if let Some(mut shorts) = arg.to_short() {
83 //!             while let Some(short) = shorts.next_flag() {
84 //!                 match short {
85 //!                     Ok('v') => {
86 //!                         args.verbosity += 1;
87 //!                     }
88 //!                     Ok('c') => {
89 //!                         let value = shorts.next_value_os();
90 //!                         args.color = Color::parse(value)?;
91 //!                     }
92 //!                     Ok(c) => {
93 //!                         return Err(format!("Unexpected flag: -{}", c).into());
94 //!                     }
95 //!                     Err(e) => {
96 //!                         return Err(format!("Unexpected flag: -{}", e.to_str_lossy()).into());
97 //!                     }
98 //!                 }
99 //!             }
100 //!         } else {
101 //!             args.paths.push(PathBuf::from(arg.to_value_os().to_os_str().into_owned()));
102 //!         }
103 //!     }
104 //!
105 //!     Ok(args)
106 //! }
107 //!
108 //! let args = parse_args(["bin", "--hello", "world"]);
109 //! println!("{:?}", args);
110 //! ```
111 
112 use std::ffi::OsStr;
113 use std::ffi::OsString;
114 
115 pub use std::io::SeekFrom;
116 
117 pub use os_str_bytes::RawOsStr;
118 pub use os_str_bytes::RawOsString;
119 
120 /// Command-line arguments
121 #[derive(Default, Clone, Debug, PartialEq, Eq)]
122 pub struct RawArgs {
123     items: Vec<OsString>,
124 }
125 
126 impl RawArgs {
127     //// Create an argument list to parse
128     ///
129     /// **NOTE:** The argument returned will be the current binary.
130     ///
131     /// # Example
132     ///
133     /// ```rust,no_run
134     /// # use std::path::PathBuf;
135     /// let raw = clap_lex::RawArgs::from_args();
136     /// let mut cursor = raw.cursor();
137     /// let _bin = raw.next_os(&mut cursor);
138     ///
139     /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
140     /// println!("{:?}", paths);
141     /// ```
from_args() -> Self142     pub fn from_args() -> Self {
143         Self::new(std::env::args_os())
144     }
145 
146     //// Create an argument list to parse
147     ///
148     /// # Example
149     ///
150     /// ```rust,no_run
151     /// # use std::path::PathBuf;
152     /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
153     /// let mut cursor = raw.cursor();
154     /// let _bin = raw.next_os(&mut cursor);
155     ///
156     /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
157     /// println!("{:?}", paths);
158     /// ```
new(iter: impl IntoIterator<Item = impl Into<std::ffi::OsString>>) -> Self159     pub fn new(iter: impl IntoIterator<Item = impl Into<std::ffi::OsString>>) -> Self {
160         let iter = iter.into_iter();
161         Self::from(iter)
162     }
163 
164     /// Create a cursor for walking the arguments
165     ///
166     /// # Example
167     ///
168     /// ```rust,no_run
169     /// # use std::path::PathBuf;
170     /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
171     /// let mut cursor = raw.cursor();
172     /// let _bin = raw.next_os(&mut cursor);
173     ///
174     /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
175     /// println!("{:?}", paths);
176     /// ```
cursor(&self) -> ArgCursor177     pub fn cursor(&self) -> ArgCursor {
178         ArgCursor::new()
179     }
180 
181     /// Advance the cursor, returning the next [`ParsedArg`]
next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>>182     pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> {
183         self.next_os(cursor).map(ParsedArg::new)
184     }
185 
186     /// Advance the cursor, returning a raw argument value.
next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr>187     pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> {
188         let next = self.items.get(cursor.cursor).map(|s| s.as_os_str());
189         cursor.cursor = cursor.cursor.saturating_add(1);
190         next
191     }
192 
193     /// Return the next [`ParsedArg`]
peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>>194     pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> {
195         self.peek_os(cursor).map(ParsedArg::new)
196     }
197 
198     /// Return a raw argument value.
peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr>199     pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> {
200         self.items.get(cursor.cursor).map(|s| s.as_os_str())
201     }
202 
203     /// Return all remaining raw arguments, advancing the cursor to the end
204     ///
205     /// # Example
206     ///
207     /// ```rust,no_run
208     /// # use std::path::PathBuf;
209     /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]);
210     /// let mut cursor = raw.cursor();
211     /// let _bin = raw.next_os(&mut cursor);
212     ///
213     /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>();
214     /// println!("{:?}", paths);
215     /// ```
remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr>216     pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> {
217         let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str());
218         cursor.cursor = self.items.len();
219         remaining
220     }
221 
222     /// Adjust the cursor's position
seek(&self, cursor: &mut ArgCursor, pos: SeekFrom)223     pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) {
224         let pos = match pos {
225             SeekFrom::Start(pos) => pos,
226             SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64,
227             SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64,
228         };
229         let pos = (pos as usize).min(self.items.len());
230         cursor.cursor = pos;
231     }
232 
233     /// Inject arguments before the [`RawArgs::next`]
insert( &mut self, cursor: &ArgCursor, insert_items: impl IntoIterator<Item = impl Into<OsString>>, )234     pub fn insert(
235         &mut self,
236         cursor: &ArgCursor,
237         insert_items: impl IntoIterator<Item = impl Into<OsString>>,
238     ) {
239         self.items.splice(
240             cursor.cursor..cursor.cursor,
241             insert_items.into_iter().map(Into::into),
242         );
243     }
244 
245     /// Any remaining args?
is_end(&self, cursor: &ArgCursor) -> bool246     pub fn is_end(&self, cursor: &ArgCursor) -> bool {
247         self.peek_os(cursor).is_none()
248     }
249 }
250 
251 impl<I, T> From<I> for RawArgs
252 where
253     I: Iterator<Item = T>,
254     T: Into<OsString>,
255 {
from(val: I) -> Self256     fn from(val: I) -> Self {
257         Self {
258             items: val.map(|x| x.into()).collect(),
259         }
260     }
261 }
262 
263 /// Position within [`RawArgs`]
264 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
265 pub struct ArgCursor {
266     cursor: usize,
267 }
268 
269 impl ArgCursor {
new() -> Self270     fn new() -> Self {
271         Self { cursor: 0 }
272     }
273 }
274 
275 /// Command-line Argument
276 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
277 pub struct ParsedArg<'s> {
278     inner: std::borrow::Cow<'s, RawOsStr>,
279     utf8: Option<&'s str>,
280 }
281 
282 impl<'s> ParsedArg<'s> {
new(inner: &'s OsStr) -> Self283     fn new(inner: &'s OsStr) -> Self {
284         let utf8 = inner.to_str();
285         let inner = RawOsStr::new(inner);
286         Self { inner, utf8 }
287     }
288 
289     /// Argument is length of 0
is_empty(&self) -> bool290     pub fn is_empty(&self) -> bool {
291         self.inner.as_ref().is_empty()
292     }
293 
294     /// Does the argument look like a stdio argument (`-`)
is_stdio(&self) -> bool295     pub fn is_stdio(&self) -> bool {
296         self.inner.as_ref() == "-"
297     }
298 
299     /// Does the argument look like an argument escape (`--`)
is_escape(&self) -> bool300     pub fn is_escape(&self) -> bool {
301         self.inner.as_ref() == "--"
302     }
303 
304     /// Does the argument look like a number
is_number(&self) -> bool305     pub fn is_number(&self) -> bool {
306         self.to_value()
307             .map(|s| s.parse::<f64>().is_ok())
308             .unwrap_or_default()
309     }
310 
311     /// Treat as a long-flag
to_long(&self) -> Option<(Result<&str, &RawOsStr>, Option<&RawOsStr>)>312     pub fn to_long(&self) -> Option<(Result<&str, &RawOsStr>, Option<&RawOsStr>)> {
313         if let Some(raw) = self.utf8 {
314             let remainder = raw.strip_prefix("--")?;
315             if remainder.is_empty() {
316                 debug_assert!(self.is_escape());
317                 return None;
318             }
319 
320             let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') {
321                 (p0, Some(p1))
322             } else {
323                 (remainder, None)
324             };
325             let flag = Ok(flag);
326             let value = value.map(RawOsStr::from_str);
327             Some((flag, value))
328         } else {
329             let raw = self.inner.as_ref();
330             let remainder = raw.strip_prefix("--")?;
331             if remainder.is_empty() {
332                 debug_assert!(self.is_escape());
333                 return None;
334             }
335 
336             let (flag, value) = if let Some((p0, p1)) = remainder.split_once('=') {
337                 (p0, Some(p1))
338             } else {
339                 (remainder, None)
340             };
341             let flag = flag.to_str().ok_or(flag);
342             Some((flag, value))
343         }
344     }
345 
346     /// Can treat as a long-flag
is_long(&self) -> bool347     pub fn is_long(&self) -> bool {
348         self.inner.as_ref().starts_with("--") && !self.is_escape()
349     }
350 
351     /// Treat as a short-flag
to_short(&self) -> Option<ShortFlags<'_>>352     pub fn to_short(&self) -> Option<ShortFlags<'_>> {
353         if let Some(remainder_os) = self.inner.as_ref().strip_prefix('-') {
354             if remainder_os.starts_with('-') {
355                 None
356             } else if remainder_os.is_empty() {
357                 debug_assert!(self.is_stdio());
358                 None
359             } else {
360                 let remainder = self.utf8.map(|s| &s[1..]);
361                 Some(ShortFlags::new(remainder_os, remainder))
362             }
363         } else {
364             None
365         }
366     }
367 
368     /// Can treat as a short-flag
is_short(&self) -> bool369     pub fn is_short(&self) -> bool {
370         self.inner.as_ref().starts_with('-')
371             && !self.is_stdio()
372             && !self.inner.as_ref().starts_with("--")
373     }
374 
375     /// Treat as a value
376     ///
377     /// **NOTE:** May return a flag or an escape.
to_value_os(&self) -> &RawOsStr378     pub fn to_value_os(&self) -> &RawOsStr {
379         self.inner.as_ref()
380     }
381 
382     /// Treat as a value
383     ///
384     /// **NOTE:** May return a flag or an escape.
to_value(&self) -> Result<&str, &RawOsStr>385     pub fn to_value(&self) -> Result<&str, &RawOsStr> {
386         self.utf8.ok_or_else(|| self.inner.as_ref())
387     }
388 
389     /// Safely print an argument that may contain non-UTF8 content
390     ///
391     /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead.
display(&self) -> impl std::fmt::Display + '_392     pub fn display(&self) -> impl std::fmt::Display + '_ {
393         self.inner.to_str_lossy()
394     }
395 }
396 
397 /// Walk through short flags within a [`ParsedArg`]
398 #[derive(Clone, Debug)]
399 pub struct ShortFlags<'s> {
400     inner: &'s RawOsStr,
401     utf8_prefix: std::str::CharIndices<'s>,
402     invalid_suffix: Option<&'s RawOsStr>,
403 }
404 
405 impl<'s> ShortFlags<'s> {
new(inner: &'s RawOsStr, utf8: Option<&'s str>) -> Self406     fn new(inner: &'s RawOsStr, utf8: Option<&'s str>) -> Self {
407         let (utf8_prefix, invalid_suffix) = if let Some(utf8) = utf8 {
408             (utf8, None)
409         } else {
410             split_nonutf8_once(inner)
411         };
412         let utf8_prefix = utf8_prefix.char_indices();
413         Self {
414             inner,
415             utf8_prefix,
416             invalid_suffix,
417         }
418     }
419 
420     /// Move the iterator forward by `n` short flags
advance_by(&mut self, n: usize) -> Result<(), usize>421     pub fn advance_by(&mut self, n: usize) -> Result<(), usize> {
422         for i in 0..n {
423             self.next().ok_or(i)?.map_err(|_| i)?;
424         }
425         Ok(())
426     }
427 
428     /// No short flags left
is_empty(&self) -> bool429     pub fn is_empty(&self) -> bool {
430         self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty()
431     }
432 
433     /// Does the short flag look like a number
434     ///
435     /// Ideally call this before doing any iterator
is_number(&self) -> bool436     pub fn is_number(&self) -> bool {
437         self.invalid_suffix.is_none() && self.utf8_prefix.as_str().parse::<f64>().is_ok()
438     }
439 
440     /// Advance the iterator, returning the next short flag on success
441     ///
442     /// On error, returns the invalid-UTF8 value
next_flag(&mut self) -> Option<Result<char, &'s RawOsStr>>443     pub fn next_flag(&mut self) -> Option<Result<char, &'s RawOsStr>> {
444         if let Some((_, flag)) = self.utf8_prefix.next() {
445             return Some(Ok(flag));
446         }
447 
448         if let Some(suffix) = self.invalid_suffix {
449             self.invalid_suffix = None;
450             return Some(Err(suffix));
451         }
452 
453         None
454     }
455 
456     /// Advance the iterator, returning everything left as a value
next_value_os(&mut self) -> Option<&'s RawOsStr>457     pub fn next_value_os(&mut self) -> Option<&'s RawOsStr> {
458         if let Some((index, _)) = self.utf8_prefix.next() {
459             self.utf8_prefix = "".char_indices();
460             self.invalid_suffix = None;
461             return Some(&self.inner[index..]);
462         }
463 
464         if let Some(suffix) = self.invalid_suffix {
465             self.invalid_suffix = None;
466             return Some(suffix);
467         }
468 
469         None
470     }
471 }
472 
473 impl<'s> Iterator for ShortFlags<'s> {
474     type Item = Result<char, &'s RawOsStr>;
475 
next(&mut self) -> Option<Self::Item>476     fn next(&mut self) -> Option<Self::Item> {
477         self.next_flag()
478     }
479 }
480 
split_nonutf8_once(b: &RawOsStr) -> (&str, Option<&RawOsStr>)481 fn split_nonutf8_once(b: &RawOsStr) -> (&str, Option<&RawOsStr>) {
482     match std::str::from_utf8(b.as_raw_bytes()) {
483         Ok(s) => (s, None),
484         Err(err) => {
485             let (valid, after_valid) = b.split_at(err.valid_up_to());
486             let valid = std::str::from_utf8(valid.as_raw_bytes()).unwrap();
487             (valid, Some(after_valid))
488         }
489     }
490 }
491