diff options
author | Matthias Andreas Benkard <code@mail.matthias.benkard.de> | 2025-07-31 20:45:50 +0200 |
---|---|---|
committer | Matthias Andreas Benkard <code@mail.matthias.benkard.de> | 2025-07-31 20:45:50 +0200 |
commit | 1a795d8eb7a9e7475414fa537810726d2be127cb (patch) | |
tree | 7427b10ad55f21b34bbf6f2ec18456189a94fa1e | |
parent | 34297979bb9e4eb76d55ff1a89072865bb525d13 (diff) |
Add a proper command line parser.
-rw-r--r-- | src/main.rs | 589 |
1 files changed, 359 insertions, 230 deletions
diff --git a/src/main.rs b/src/main.rs index 744217a..8b9229e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -164,6 +164,19 @@ enum FileSpec { } #[derive(Debug)] +enum RedirectType { + OutputOverwrite, // > + OutputAppend, // >> + InputFrom, // < +} + +#[derive(Debug)] +struct Redirect { + redirect_type: RedirectType, + target: FileSpec, +} + +#[derive(Debug)] enum Command { Pipe { left: Box<Command>, @@ -171,7 +184,7 @@ enum Command { }, Redirect { command: Box<Command>, - target: FileSpec, + redirects: Vec<Redirect>, }, External { program: String, @@ -413,303 +426,419 @@ enum BuiltinCommand { Shift, } -impl Command { - fn parse(input: &str) -> Result<Command> { - use BuiltinCommand::*; - use Command::*; +#[derive(Debug, Clone, PartialEq)] +enum Token { + Word(String), + Pipe, + RedirectOut, // > + RedirectAppend, // >> + RedirectIn, // < + Newline, + Eof, +} - // Implement COMMAND.COM-style command line parsing. - // See https://en.wikipedia.org/wiki/Command.com#Command_line_syntax - // and https://stackoverflow.com/questions/4094699/how-does-the-windows-command-interpreter-cmd-exe-parse-scripts for details. - // - let whitespace = Regex::new(r"\s+")?; - let mut split_input = whitespace.splitn(input, 2); - let Some(name) = split_input.next() else { - return Ok(Empty); - }; - let args = split_input.next().unwrap_or(""); +struct Lexer { + input: Vec<char>, + position: usize, + current_char: Option<char>, +} - match name.to_uppercase().as_str() { - "" => Ok(Empty), +impl Lexer { + fn new(input: &str) -> Self { + let chars: Vec<char> = input.chars().collect(); + let current_char = chars.get(0).copied(); - "ECHO" => Ok(Builtin(match args.to_uppercase().as_str() { - "ON" => EchoOn, - "OFF" => EchoOff, - "" => EchoPlain, - _ => EchoText { - message: args.to_string(), + Self { + input: chars, + position: 0, + current_char, + } + } + + fn advance(&mut self) { + self.position += 1; + self.current_char = self.input.get(self.position).copied(); + } + + fn peek(&self) -> Option<char> { + self.input.get(self.position + 1).copied() + } + + fn skip_whitespace(&mut self) { + while let Some(ch) = self.current_char { + if ch.is_whitespace() && ch != '\n' { + self.advance(); + } else { + break; + } + } + } + + fn read_word(&mut self) -> String { + let mut word = String::new(); + let mut in_quotes = false; + let mut quote_char = '"'; + + while let Some(ch) = self.current_char { + match ch { + // Handle quotes + '"' | '\'' if !in_quotes => { + in_quotes = true; + quote_char = ch; + self.advance(); }, - })), + ch if in_quotes && ch == quote_char => { + in_quotes = false; + self.advance(); + }, + // Stop at special characters when not in quotes + '|' | '>' | '<' | '\n' if !in_quotes => { + break; + }, + // Stop at whitespace when not in quotes + ch if !in_quotes && ch.is_whitespace() => { + break; + }, + // Regular character + _ => { + word.push(ch); + self.advance(); + } + } + } - "CLS" => Ok(Builtin(Cls)), + word + } - "EXIT" => Ok(Builtin(Exit)), + fn next_token(&mut self) -> Token { + loop { + match self.current_char { + None => return Token::Eof, - "MORE" => Ok(Builtin(More)), + Some('\n') => { + self.advance(); + return Token::Newline; + }, - "VERIFY" => Ok(Builtin(Verify)), + Some(ch) if ch.is_whitespace() => { + self.skip_whitespace(); + continue; + }, - // File-oriented commands - "COPY" => { - let parts: Vec<&str> = args.split_whitespace().collect(); - if parts.len() >= 2 { - Ok(Builtin(Copy { - from: parse_filespec(parts[0])?, - to: parse_filespec(parts[1])?, - })) - } else { - Err(eyre::eyre!("COPY requires source and destination")) + Some('|') => { + self.advance(); + return Token::Pipe; + }, + + Some('>') => { + self.advance(); + if self.current_char == Some('>') { + self.advance(); + return Token::RedirectAppend; + } else { + return Token::RedirectOut; + } + }, + + Some('<') => { + self.advance(); + return Token::RedirectIn; + }, + + Some(_) => { + let word = self.read_word(); + if word.is_empty() { + // This shouldn't happen, but just in case + self.advance(); + continue; + } + return Token::Word(word); } + } + } + } + + fn tokenize(&mut self) -> Vec<Token> { + let mut tokens = Vec::new(); + + loop { + let token = self.next_token(); + let is_eof = matches!(token, Token::Eof); + tokens.push(token); + if is_eof { + break; + } + } + + tokens + } +} + +struct Parser { + tokens: Vec<Token>, + position: usize, +} + +impl Parser { + fn new(tokens: Vec<Token>) -> Self { + Self { + tokens, + position: 0, + } + } + + fn current_token(&self) -> &Token { + self.tokens.get(self.position).unwrap_or(&Token::Eof) + } + + fn advance(&mut self) { + if self.position < self.tokens.len() { + self.position += 1; + } + } + + fn expect_word(&mut self) -> Result<String> { + match self.current_token() { + Token::Word(word) => { + let result = word.clone(); + self.advance(); + Ok(result) }, + token => Err(eyre::eyre!("Expected word, found {:?}", token)), + } + } - "DELTREE" => Ok(Builtin(Deltree { - path: PathBuf::from(args.trim()), - })), + fn parse_command(&mut self) -> Result<Command> { + self.parse_pipeline() + } - "DIR" => Ok(Builtin(Dir { - path: if args.trim().is_empty() { - PathBuf::from(".") - } else { - PathBuf::from(args.trim()) - }, - })), + fn parse_pipeline(&mut self) -> Result<Command> { + let mut left = self.parse_redirected_command()?; - "FC" => Ok(Builtin(Fc)), + while matches!(self.current_token(), Token::Pipe) { + self.advance(); // consume | + let right = self.parse_redirected_command()?; + left = Command::Pipe { + left: Box::new(left), + right: Box::new(right), + }; + } - "FIND" => Ok(Builtin(Find)), + Ok(left) + } - "MKDIR" | "MD" => Ok(Builtin(Mkdir { - path: PathBuf::from(args.trim()), - })), + fn parse_redirected_command(&mut self) -> Result<Command> { + let mut command = self.parse_simple_command()?; + let mut redirects = Vec::new(); - "MOVE" => Ok(Builtin(Move)), + while matches!(self.current_token(), Token::RedirectOut | Token::RedirectAppend | Token::RedirectIn) { + let redirect_type = match self.current_token() { + Token::RedirectOut => RedirectType::OutputOverwrite, + Token::RedirectAppend => RedirectType::OutputAppend, + Token::RedirectIn => RedirectType::InputFrom, + _ => unreachable!(), + }; - "DEL" | "ERASE" => Ok(Builtin(Remove { - path: PathBuf::from(args.trim()), - })), + self.advance(); // consume redirect token - "REN" | "RENAME" => { - let parts: Vec<&str> = args.split_whitespace().collect(); - if parts.len() >= 2 { - Ok(Builtin(Rename { - from: parse_filespec(parts[0])?, - to: parse_filespec(parts[1])?, - })) + let target_str = self.expect_word()?; + let target = parse_filespec(&target_str); + + redirects.push(Redirect { + redirect_type, + target, + }); + } + + if redirects.is_empty() { + Ok(command) + } else { + Ok(Command::Redirect { + command: Box::new(command), + redirects, + }) + } + } + + fn parse_simple_command(&mut self) -> Result<Command> { + use BuiltinCommand::*; + use Command::*; + + if matches!(self.current_token(), Token::Eof | Token::Newline) { + return Ok(Empty); + } + + let command_name = self.expect_word()?; + let mut args = Vec::new(); + + // Collect arguments until we hit a special token + while matches!(self.current_token(), Token::Word(_)) { + if let Token::Word(arg) = self.current_token() { + args.push(arg.clone()); + self.advance(); + } + } + + match command_name.to_uppercase().as_str() { + "ECHO" => { + if args.is_empty() { + Ok(Builtin(EchoPlain)) } else { - Err(eyre::eyre!("RENAME requires source and destination")) + match args[0].to_uppercase().as_str() { + "ON" if args.len() == 1 => Ok(Builtin(EchoOn)), + "OFF" if args.len() == 1 => Ok(Builtin(EchoOff)), + _ => Ok(Builtin(EchoText { + message: args.join(" "), + })), + } } }, - "REPLACE" => Ok(Builtin(Replace)), + "CLS" => Ok(Builtin(Cls)), - "RMDIR" | "RD" => Ok(Builtin(Rmdir { - path: PathBuf::from(args.trim()), - })), + "EXIT" => Ok(Builtin(Exit)), + + "MORE" => Ok(Builtin(More)), - "SORT" => Ok(Builtin(Sort)), + "VERIFY" => Ok(Builtin(Verify)), - "TREE" => Ok(Builtin(Tree { - path: if args.trim().is_empty() { + // File-oriented commands + "COPY" => { + if args.len() >= 2 { + Ok(Builtin(Copy { + from: parse_filespec(&args[0]), + to: parse_filespec(&args[1]), + })) + } else { + Err(eyre::eyre!("COPY requires source and destination")) + } + }, + + "DIR" => Ok(Builtin(Dir { + path: if args.is_empty() { PathBuf::from(".") } else { - PathBuf::from(args.trim()) + PathBuf::from(&args[0]) }, })), - "TYPE" => Ok(Builtin(Type { - file: parse_filespec(args.trim())?, - })), - - "XCOPY" => { - let parts: Vec<&str> = args.split_whitespace().collect(); - let recursive = parts.contains(&"/S") || parts.contains(&"/s"); - if parts.len() >= 2 { - Ok(Builtin(Xcopy { - from: parse_filespec(parts[0])?, - to: parse_filespec(parts[1])?, - recursive, - })) + "TYPE" => { + if args.is_empty() { + Err(eyre::eyre!("TYPE requires a filename")) } else { - Err(eyre::eyre!("XCOPY requires source and destination")) + Ok(Builtin(Type { + file: parse_filespec(&args[0]), + })) } }, - // Shell-oriented commands - "APPEND" => Ok(Builtin(Append)), - "CD" | "CHDIR" => Ok(Builtin(Chdir { - path: if args.trim().is_empty() { + path: if args.is_empty() { std::env::current_dir()? } else { - PathBuf::from(args.trim()) + PathBuf::from(&args[0]) }, })), - "PATH" => { - if args.trim().is_empty() { - Ok(Builtin(PathGet)) + "MKDIR" | "MD" => { + if args.is_empty() { + Err(eyre::eyre!("MKDIR requires a directory name")) } else { - Ok(Builtin(PathSet { - value: args.to_string(), + Ok(Builtin(Mkdir { + path: PathBuf::from(&args[0]), })) } }, - "PROMPT" => { - if args.trim().is_empty() { - Ok(Builtin(PromptGet)) + "RMDIR" | "RD" => { + if args.is_empty() { + Err(eyre::eyre!("RMDIR requires a directory name")) } else { - Ok(Builtin(PromptSet { - message: args.to_string(), + Ok(Builtin(Rmdir { + path: PathBuf::from(&args[0]), })) } }, - "SET" => { - if args.trim().is_empty() { - Err(eyre::eyre!("SET requires variable name")) - } else if let Some(eq_pos) = args.find('=') { - let name = args[..eq_pos].trim().to_string(); - let value = args[eq_pos + 1..].trim().to_string(); - Ok(Builtin(Set { name, value })) + "DEL" | "ERASE" => { + if args.is_empty() { + Err(eyre::eyre!("DEL requires a filename")) } else { - Err(eyre::eyre!("SET requires variable=value format")) + Ok(Builtin(Remove { + path: PathBuf::from(&args[0]), + })) } }, - "SETVER" => Ok(Builtin(Setver)), + "REM" => Ok(Builtin(Rem { + message: args.join(" "), + })), "VER" => Ok(Builtin(Ver)), - // Utilities "DATE" => Ok(Builtin(Date)), "TIME" => Ok(Builtin(Time)), - // Dummies - "FASTOPEN" => Ok(Builtin(Fastopen)), - - "SMARTDRV" => Ok(Builtin(Smartdrv)), - - "SIZER" => Ok(Builtin(Sizer)), - - // For later - "ASSIGN" => Ok(Builtin(Assign)), - - "ATTRIB" => Ok(Builtin(Attrib)), - - "CHKDSK" => Ok(Builtin(Chkdsk)), - - "DOSKEY" => Ok(Builtin(Doskey)), - - "DOSSHELL" => Ok(Builtin(Dosshell)), - - "EDIT" => Ok(Builtin(Edit)), - - "FASTHELP" => Ok(Builtin(Fasthelp)), - - "HELP" => Ok(Builtin(Help)), - - "JOIN" => Ok(Builtin(Join)), - - "MEM" => Ok(Builtin(Mem)), - - "POWER" => Ok(Builtin(Power)), - - "SUBST" => Ok(Builtin(Subst)), - - "TRUENAME" => Ok(Builtin(Truename)), - - // For much later, if ever - "BREAK" => Ok(Builtin(Break)), - - "CHCP" => Ok(Builtin(Chcp)), - - "CTTY" => Ok(Builtin(Ctty)), - - "DEFRAG" => Ok(Builtin(Defrag)), - - "DISKCOPY" => Ok(Builtin(Diskcopy)), - - "EMM386" => Ok(Builtin(Emm386)), - - "FDISK" => Ok(Builtin(Fdisk)), - - "FORMAT" => Ok(Builtin(Format)), - - "INTERLNK" => Ok(Builtin(Interlnk)), - - "KEYB" => Ok(Builtin(Keyb)), - - "LABEL" => Ok(Builtin(Label)), - - "MODE" => Ok(Builtin(Mode)), - - "MSAV" => Ok(Builtin(Msav)), - - "MSBACKUP" => Ok(Builtin(Msbackup)), - - "MSCDEX" => Ok(Builtin(Mscdex)), - - "MSD" => Ok(Builtin(Msd)), - - "PRINT" => Ok(Builtin(Print)), - - "QBASIC" => Ok(Builtin(Qbasic)), - - "RESTORE" => Ok(Builtin(Restore)), - - "SCANDISK" => Ok(Builtin(Scandisk)), - - "SHARE" => Ok(Builtin(Share)), - - "SYS" => Ok(Builtin(Sys)), - - "UNDELETE" => Ok(Builtin(Undelete)), - - "UNFORMAT" => Ok(Builtin(Unformat)), - - "VOL" => Ok(Builtin(Vol)), - - "VSAFE" => Ok(Builtin(Vsafe)), - - // Scripting - "CALL" => Ok(Builtin(Call)), - - "CHOICE" => Ok(Builtin(Choice)), - - "FOR" => Ok(Builtin(For)), - - "GOTO" => Ok(Builtin(Goto)), - - "IF" => Ok(Builtin(If)), - - "PAUSE" => Ok(Builtin(Pause)), - - "REM" => Ok(Builtin(Rem { - message: args.to_string(), - })), - - "SHIFT" => Ok(Builtin(Shift)), + "PATH" => { + if args.is_empty() { + Ok(Builtin(PathGet)) + } else { + Ok(Builtin(PathSet { + value: args.join(" "), + })) + } + }, - _ if name.len() == 2 && name.ends_with(":") => Ok(Empty), + "SET" => { + if args.is_empty() { + Err(eyre::eyre!("SET requires variable name")) + } else { + let full_arg = args.join(" "); + if let Some(eq_pos) = full_arg.find('=') { + let name = full_arg[..eq_pos].trim().to_string(); + let value = full_arg[eq_pos + 1..].trim().to_string(); + Ok(Builtin(Set { name, value })) + } else { + Err(eyre::eyre!("SET requires variable=value format")) + } + } + }, - _ => { - //let parts: Vec<&str> = args.split_whitespace().collect(); - } + // Drive change (like C:, D:, etc.) + _ if command_name.len() == 2 && command_name.ends_with(':') => Ok(Empty), - //Err(eyre::eyre!("parse not implemented for {:?}", input)), + // External command + _ => Ok(External { + program: command_name, + args, + }), } } } -fn parse_filespec(spec: &str) -> Result<FileSpec> { - match spec.to_uppercase().as_str() { - "CON" => Ok(FileSpec::Con), - "LPT1" => Ok(FileSpec::Lpt1), - "LPT2" => Ok(FileSpec::Lpt2), - "LPT3" => Ok(FileSpec::Lpt3), - "PRN" => Ok(FileSpec::Prn), - _ => Ok(FileSpec::Path(PathBuf::from(spec))), +fn parse_filespec(p0: &str) -> FileSpec { + match p0 { + "CON" => FileSpec::Con, + "LPT1" => FileSpec::Lpt1, + "LPT2" => FileSpec::Lpt2, + "LPT3" => FileSpec::Lpt3, + "PRN" => FileSpec::Prn, + _ => FileSpec::Path(PathBuf::from(p0)), + } +} + +impl Command { + fn parse(input: &str) -> Result<Command> { + let trimmed = input.trim(); + if trimmed.is_empty() { + return Ok(Command::Empty); + } + + let mut lexer = Lexer::new(trimmed); + let tokens = lexer.tokenize(); + + let mut parser = Parser::new(tokens); + parser.parse_command() } } |