diff options
Diffstat (limited to 'src/parser.zig')
-rw-r--r-- | src/parser.zig | 547 |
1 files changed, 547 insertions, 0 deletions
diff --git a/src/parser.zig b/src/parser.zig new file mode 100644 index 0000000..6d5ae2e --- /dev/null +++ b/src/parser.zig @@ -0,0 +1,547 @@ +const std = @import("std"); +const ArrayList = std.ArrayList; +const Allocator = std.mem.Allocator; + +pub const FileSpec = union(enum) { + Con, + Lpt1, + Lpt2, + Lpt3, + Prn, + Path: []const u8, +}; + +pub const RedirectType = enum { + OutputOverwrite, // > + OutputAppend, // >> + InputFrom, // < +}; + +pub const Redirect = struct { + redirect_type: RedirectType, + target: FileSpec, +}; + +pub const BuiltinCommand = union(enum) { + // File-oriented + Copy: struct { + from: FileSpec, + to: FileSpec, + }, + Deltree: struct { + path: []const u8, + }, + Dir: struct { + path: []const u8, + }, + Fc, + Find, + Mkdir: struct { + path: []const u8, + }, + Move, + Remove: struct { + path: []const u8, + }, + Rename: struct { + from: FileSpec, + to: FileSpec, + }, + Replace, + Rmdir: struct { + path: []const u8, + }, + Sort, + Tree: struct { + path: []const u8, + }, + Type: struct { + file: FileSpec, + }, + Xcopy: struct { + from: FileSpec, + to: FileSpec, + recursive: bool, + }, + + // Shell-oriented + Append, + Chdir: struct { + path: []const u8, + }, + EchoOff, + EchoOn, + EchoPlain, + EchoText: struct { + message: []const u8, + }, + Exit, + PathGet, + PathSet: struct { + value: []const u8, + }, + PromptGet, + PromptSet: struct { + message: []const u8, + }, + Set: struct { + name: []const u8, + value: []const u8, + }, + Setver, + Ver, + + // Utilities + Date, + Time, + + // Screen-oriented + Cls, + More, + + // Dummies + Verify, + Fastopen, + Smartdrv, + Sizer, + + // For later + Assign, + Attrib, + Chkdsk, + Doskey, + Dosshell, + Edit, + Fasthelp, + Help, + Join, + Mem, + Power, + Subst, + Truename, + + // For much later, if ever + Break, + Chcp, + Ctty, + Defrag, + Diskcopy, + Emm386, + Fdisk, + Format, + Interlnk, + Keyb, + Label, + Mode, + Msav, + Msbackup, + Mscdex, + Msd, + Print_: void, // 'print' is reserved in Zig + Qbasic, + Restore, + Scandisk, + Share, + Sys, + Undelete, + Unformat, + Vol, + Vsafe, + + // Scripting + Call, + Choice, + Echo, + For, + Goto, + If, + Pause, + Prompt, + Rem: struct { + message: []const u8, + }, + Shift, +}; + +pub const Command = union(enum) { + Pipe: struct { + left: *Command, + right: *Command, + }, + Redirect: struct { + command: *Command, + redirects: ArrayList(Redirect), + }, + External: struct { + program: []const u8, + args: ArrayList([]const u8), + }, + Builtin: BuiltinCommand, + Empty, + + pub fn deinit(self: *Command, allocator: Allocator) void { + switch (self.*) { + .Pipe => |*pipe| { + pipe.left.deinit(allocator); + pipe.right.deinit(allocator); + allocator.destroy(pipe.left); + allocator.destroy(pipe.right); + }, + .Redirect => |*redirect| { + redirect.command.deinit(allocator); + allocator.destroy(redirect.command); + redirect.redirects.deinit(); + }, + .External => |*external| { + external.args.deinit(); + }, + else => {}, + } + } +}; + +const Token = union(enum) { + Word: []const u8, + Pipe, + RedirectOut, // > + RedirectAppend, // >> + RedirectIn, // < + Newline, + Eof, +}; + +const Lexer = struct { + input: []const u8, + position: usize, + current_char: ?u8, + + pub fn init(input: []const u8) Lexer { + return Lexer{ + .input = input, + .position = 0, + .current_char = if (input.len > 0) input[0] else null, + }; + } + + fn advance(self: *Lexer) void { + self.position += 1; + self.current_char = if (self.position < self.input.len) self.input[self.position] else null; + } + + fn peek(self: *const Lexer) ?u8 { + const next_pos = self.position + 1; + return if (next_pos < self.input.len) self.input[next_pos] else null; + } + + fn skipWhitespace(self: *Lexer) void { + while (self.current_char) |ch| { + if (std.ascii.isWhitespace(ch) and ch != '\n') { + self.advance(); + } else { + break; + } + } + } + + fn readWord(self: *Lexer, allocator: Allocator) ![]const u8 { + var word = ArrayList(u8).init(allocator); + defer word.deinit(); + + var in_quotes = false; + var quote_char: u8 = '"'; + + while (self.current_char) |ch| { + switch (ch) { + '"', '\'' => { + if (!in_quotes) { + in_quotes = true; + quote_char = ch; + self.advance(); + } else if (ch == quote_char) { + in_quotes = false; + self.advance(); + } else { + try word.append(ch); + self.advance(); + } + }, + '|', '>', '<', '\n' => { + if (!in_quotes) break; + try word.append(ch); + self.advance(); + }, + else => { + if (!in_quotes and std.ascii.isWhitespace(ch)) break; + try word.append(ch); + self.advance(); + }, + } + } + + return allocator.dupe(u8, word.items); + } + + fn nextToken(self: *Lexer, allocator: Allocator) !Token { + while (true) { + if (self.current_char) |ch| { + switch (ch) { + '\n' => { + self.advance(); + return Token.Newline; + }, + '|' => { + self.advance(); + return Token.Pipe; + }, + '>' => { + self.advance(); + if (self.current_char == '>') { + self.advance(); + return Token.RedirectAppend; + } + return Token.RedirectOut; + }, + '<' => { + self.advance(); + return Token.RedirectIn; + }, + else => { + if (std.ascii.isWhitespace(ch)) { + self.skipWhitespace(); + continue; + } + const word = try self.readWord(allocator); + if (word.len == 0) { + self.advance(); + continue; + } + return Token{ .Word = word }; + }, + } + } else { + return Token.Eof; + } + } + } + + pub fn tokenize(self: *Lexer, allocator: Allocator) !ArrayList(Token) { + var tokens = ArrayList(Token).init(allocator); + + while (true) { + const token = try self.nextToken(allocator); + const is_eof = switch (token) { + .Eof => true, + else => false, + }; + try tokens.append(token); + if (is_eof) break; + } + + return tokens; + } +}; + +const Parser = struct { + tokens: ArrayList(Token), + position: usize, + allocator: Allocator, + + pub fn init(tokens: ArrayList(Token), allocator: Allocator) Parser { + return Parser{ + .tokens = tokens, + .position = 0, + .allocator = allocator, + }; + } + + fn currentToken(self: *const Parser) Token { + if (self.position < self.tokens.items.len) { + return self.tokens.items[self.position]; + } + return Token.Eof; + } + + fn advance(self: *Parser) void { + if (self.position < self.tokens.items.len) { + self.position += 1; + } + } + + fn expectWord(self: *Parser) ![]const u8 { + switch (self.currentToken()) { + .Word => |word| { + self.advance(); + return word; + }, + else => return error.ExpectedWord, + } + } + + pub fn parseCommand(self: *Parser) !Command { + return self.parsePipeline(); + } + + fn parsePipeline(self: *Parser) !Command { + var left = try self.parseRedirectedCommand(); + + while (true) { + switch (self.currentToken()) { + .Pipe => { + self.advance(); // consume | + const right = try self.parseRedirectedCommand(); + const left_ptr = try self.allocator.create(Command); + const right_ptr = try self.allocator.create(Command); + left_ptr.* = left; + right_ptr.* = right; + left = Command{ .Pipe = .{ .left = left_ptr, .right = right_ptr } }; + }, + else => break, + } + } + + return left; + } + + fn parseRedirectedCommand(self: *Parser) !Command { + const command = try self.parseSimpleCommand(); + var redirects = ArrayList(Redirect).init(self.allocator); + + while (true) { + const redirect_type = switch (self.currentToken()) { + .RedirectOut => RedirectType.OutputOverwrite, + .RedirectAppend => RedirectType.OutputAppend, + .RedirectIn => RedirectType.InputFrom, + else => break, + }; + + self.advance(); // consume redirect token + + const target_str = try self.expectWord(); + const target = parseFilespec(target_str); + + try redirects.append(Redirect{ + .redirect_type = redirect_type, + .target = target, + }); + } + + if (redirects.items.len == 0) { + redirects.deinit(); + return command; + } else { + const command_ptr = try self.allocator.create(Command); + command_ptr.* = command; + return Command{ .Redirect = .{ .command = command_ptr, .redirects = redirects } }; + } + } + + fn parseSimpleCommand(self: *Parser) !Command { + switch (self.currentToken()) { + .Eof, .Newline => return Command.Empty, + .Word => |command_name| { + self.advance(); + var args = ArrayList([]const u8).init(self.allocator); + + // Collect arguments + while (true) { + switch (self.currentToken()) { + .Word => |arg| { + try args.append(arg); + self.advance(); + }, + else => break, + } + } + + return try self.parseBuiltinCommand(command_name, args); + }, + else => return error.UnexpectedToken, + } + } + + fn parseBuiltinCommand(self: *Parser, command_name: []const u8, args: ArrayList([]const u8)) !Command { + const cmd_upper = try std.ascii.allocUpperString(self.allocator, command_name); + defer self.allocator.free(cmd_upper); + + if (std.mem.eql(u8, cmd_upper, "ECHO")) { + if (args.items.len == 0) { + return Command{ .Builtin = BuiltinCommand.EchoPlain }; + } else { + const first_arg_upper = try std.ascii.allocUpperString(self.allocator, args.items[0]); + defer self.allocator.free(first_arg_upper); + + if (std.mem.eql(u8, first_arg_upper, "ON") and args.items.len == 1) { + return Command{ .Builtin = BuiltinCommand.EchoOn }; + } else if (std.mem.eql(u8, first_arg_upper, "OFF") and args.items.len == 1) { + return Command{ .Builtin = BuiltinCommand.EchoOff }; + } else { + const message = try std.mem.join(self.allocator, " ", args.items); + return Command{ .Builtin = BuiltinCommand{ .EchoText = .{ .message = message } } }; + } + } + } else if (std.mem.eql(u8, cmd_upper, "CLS")) { + return Command{ .Builtin = BuiltinCommand.Cls }; + } else if (std.mem.eql(u8, cmd_upper, "EXIT")) { + return Command{ .Builtin = BuiltinCommand.Exit }; + } else if (std.mem.eql(u8, cmd_upper, "MORE")) { + return Command{ .Builtin = BuiltinCommand.More }; + } else if (std.mem.eql(u8, cmd_upper, "VERIFY")) { + return Command{ .Builtin = BuiltinCommand.Verify }; + } else if (std.mem.eql(u8, cmd_upper, "DIR")) { + const path = if (args.items.len == 0) "." else args.items[0]; + return Command{ .Builtin = BuiltinCommand{ .Dir = .{ .path = path } } }; + } else if (std.mem.eql(u8, cmd_upper, "VER")) { + return Command{ .Builtin = BuiltinCommand.Ver }; + } else if (std.mem.eql(u8, cmd_upper, "DATE")) { + return Command{ .Builtin = BuiltinCommand.Date }; + } else if (std.mem.eql(u8, cmd_upper, "TIME")) { + return Command{ .Builtin = BuiltinCommand.Time }; + } else if (std.mem.eql(u8, cmd_upper, "TYPE")) { + if (args.items.len == 0) { + return error.ExpectedWord; // Will be caught and show "Bad command or file name" + } + const file_spec = parseFilespec(args.items[0]); + return Command{ .Builtin = BuiltinCommand{ .Type = .{ .file = file_spec } } }; + } else if (std.mem.eql(u8, cmd_upper, "SORT")) { + return Command{ .Builtin = BuiltinCommand.Sort }; + } else { + // External command + return Command{ .External = .{ .program = command_name, .args = args } }; + } + } +}; + +fn parseFilespec(path_str: []const u8) FileSpec { + var upper_buf: [256]u8 = undefined; + if (path_str.len >= upper_buf.len) return FileSpec{ .Path = path_str }; + const upper_str = std.ascii.upperString(upper_buf[0..path_str.len], path_str); + + if (std.mem.eql(u8, upper_str, "CON")) return FileSpec.Con; + if (std.mem.eql(u8, upper_str, "LPT1")) return FileSpec.Lpt1; + if (std.mem.eql(u8, upper_str, "LPT2")) return FileSpec.Lpt2; + if (std.mem.eql(u8, upper_str, "LPT3")) return FileSpec.Lpt3; + if (std.mem.eql(u8, upper_str, "PRN")) return FileSpec.Prn; + return FileSpec{ .Path = path_str }; +} + +pub fn parse(input: []const u8, allocator: Allocator) !Command { + const trimmed = std.mem.trim(u8, input, " \t\r\n"); + if (trimmed.len == 0) { + return Command.Empty; + } + + var lexer = Lexer.init(trimmed); + var tokens = try lexer.tokenize(allocator); + defer { + for (tokens.items) |token| { + switch (token) { + .Word => |word| allocator.free(word), + else => {}, + } + } + tokens.deinit(); + } + + var parser = Parser.init(tokens, allocator); + return parser.parseCommand(); +} |