diff options
Diffstat (limited to 'src/parser.zig')
-rw-r--r-- | src/parser.zig | 416 |
1 files changed, 416 insertions, 0 deletions
diff --git a/src/parser.zig b/src/parser.zig new file mode 100644 index 0000000..b07f31c --- /dev/null +++ b/src/parser.zig @@ -0,0 +1,416 @@ +const std = @import("std"); +const ArrayList = std.ArrayList; +const Allocator = std.mem.Allocator; + +const syntax = @import("syntax.zig"); +const FileSpec = syntax.FileSpec; +const RedirectType = syntax.RedirectType; +const Redirect = syntax.Redirect; +const Token = syntax.Token; + +const cmd = @import("cmd.zig"); +const BuiltinCommand = cmd.BuiltinCommand; +const Command = cmd.Command; + +const Lexer = struct { + input: []const u8, + position: usize, + current_char: ?u8, + + pub fn init(input: []const u8) Lexer { + return Lexer{ + .input = input, + .position = 0, + .current_char = if (input.len > 0) input[0] else null, + }; + } + + fn advance(self: *Lexer) void { + self.position += 1; + self.current_char = if (self.position < self.input.len) self.input[self.position] else null; + } + + fn peek(self: *const Lexer) ?u8 { + const next_pos = self.position + 1; + return if (next_pos < self.input.len) self.input[next_pos] else null; + } + + fn skipWhitespace(self: *Lexer) void { + while (self.current_char) |ch| { + if (std.ascii.isWhitespace(ch) and ch != '\n') { + self.advance(); + } else { + break; + } + } + } + + fn readWord(self: *Lexer, allocator: Allocator) ![]const u8 { + var word = ArrayList(u8).init(allocator); + defer word.deinit(); + + var in_quotes = false; + var quote_char: u8 = '"'; + + while (self.current_char) |ch| { + switch (ch) { + '"', '\'' => { + if (!in_quotes) { + in_quotes = true; + quote_char = ch; + self.advance(); + } else if (ch == quote_char) { + in_quotes = false; + self.advance(); + } else { + try word.append(ch); + self.advance(); + } + }, + '|', '>', '<', '\n' => { + if (!in_quotes) break; + try word.append(ch); + self.advance(); + }, + else => { + if (!in_quotes and std.ascii.isWhitespace(ch)) break; + try word.append(ch); + self.advance(); + }, + } + } + + return allocator.dupe(u8, word.items); + } + + fn nextToken(self: *Lexer, allocator: Allocator) !Token { + while (true) { + if (self.current_char) |ch| { + switch (ch) { + '\n' => { + self.advance(); + return Token.Newline; + }, + '|' => { + self.advance(); + return Token.Pipe; + }, + '>' => { + self.advance(); + if (self.current_char == '>') { + self.advance(); + return Token.RedirectAppend; + } + return Token.RedirectOut; + }, + '<' => { + self.advance(); + return Token.RedirectIn; + }, + else => { + if (std.ascii.isWhitespace(ch)) { + self.skipWhitespace(); + continue; + } + const word = try self.readWord(allocator); + if (word.len == 0) { + self.advance(); + continue; + } + return Token{ .Word = word }; + }, + } + } else { + return Token.Eof; + } + } + } + + pub fn tokenize(self: *Lexer, allocator: Allocator) !ArrayList(Token) { + var tokens = ArrayList(Token).init(allocator); + + while (true) { + const token = try self.nextToken(allocator); + const is_eof = switch (token) { + .Eof => true, + else => false, + }; + try tokens.append(token); + if (is_eof) break; + } + + return tokens; + } +}; + +const Parser = struct { + tokens: ArrayList(Token), + position: usize, + allocator: Allocator, + + pub fn init(tokens: ArrayList(Token), allocator: Allocator) Parser { + return Parser{ + .tokens = tokens, + .position = 0, + .allocator = allocator, + }; + } + + fn currentToken(self: *const Parser) Token { + if (self.position < self.tokens.items.len) { + return self.tokens.items[self.position]; + } + return Token.Eof; + } + + fn advance(self: *Parser) void { + if (self.position < self.tokens.items.len) { + self.position += 1; + } + } + + fn expectWord(self: *Parser) ![]const u8 { + switch (self.currentToken()) { + .Word => |word| { + self.advance(); + return word; + }, + else => return error.ExpectedWord, + } + } + + pub fn parseCommand(self: *Parser) !Command { + return self.parsePipeline(); + } + + fn parsePipeline(self: *Parser) !Command { + var left = try self.parseRedirectedCommand(); + + while (true) { + switch (self.currentToken()) { + .Pipe => { + self.advance(); // consume | + const right = try self.parseRedirectedCommand(); + const left_ptr = try self.allocator.create(Command); + const right_ptr = try self.allocator.create(Command); + left_ptr.* = left; + right_ptr.* = right; + left = Command{ .Pipe = .{ .left = left_ptr, .right = right_ptr } }; + }, + else => break, + } + } + + return left; + } + + fn parseRedirectedCommand(self: *Parser) !Command { + const command = try self.parseSimpleCommand(); + var redirects = ArrayList(Redirect).init(self.allocator); + + while (true) { + const redirect_type = switch (self.currentToken()) { + .RedirectOut => RedirectType.OutputOverwrite, + .RedirectAppend => RedirectType.OutputAppend, + .RedirectIn => RedirectType.InputFrom, + else => break, + }; + + self.advance(); // consume redirect token + + const target_str = try self.expectWord(); + const target = try parseFilespec(self.allocator, target_str); + + try redirects.append(Redirect{ + .redirect_type = redirect_type, + .target = target, + }); + } + + if (redirects.items.len == 0) { + redirects.deinit(); + return command; + } else { + const command_ptr = try self.allocator.create(Command); + command_ptr.* = command; + return Command{ .Redirect = .{ .command = command_ptr, .redirects = redirects } }; + } + } + + fn parseSimpleCommand(self: *Parser) !Command { + switch (self.currentToken()) { + .Eof, .Newline => return Command.Empty, + .Word => |command_name| { + self.advance(); + var args = ArrayList([]const u8).init(self.allocator); + + // Collect arguments + while (true) { + switch (self.currentToken()) { + .Word => |arg| { + try args.append(arg); + self.advance(); + }, + else => break, + } + } + + const result = try self.parseBuiltinCommand(command_name, args); + // For builtin commands, free the args ArrayList (the strings inside belong to tokens and will be freed later) + if (result == .Builtin) { + args.deinit(); + } + return result; + }, + else => return error.UnexpectedToken, + } + } + + fn parseBuiltinCommand(self: *Parser, command_name: []const u8, args: ArrayList([]const u8)) !Command { + const cmd_upper = try std.ascii.allocUpperString(self.allocator, command_name); + defer self.allocator.free(cmd_upper); + + if (std.mem.eql(u8, cmd_upper, "ECHO")) { + if (args.items.len == 0) { + return Command{ .Builtin = BuiltinCommand.EchoPlain }; + } else { + const first_arg_upper = try std.ascii.allocUpperString(self.allocator, args.items[0]); + defer self.allocator.free(first_arg_upper); + + if (std.mem.eql(u8, first_arg_upper, "ON") and args.items.len == 1) { + return Command{ .Builtin = BuiltinCommand.EchoOn }; + } else if (std.mem.eql(u8, first_arg_upper, "OFF") and args.items.len == 1) { + return Command{ .Builtin = BuiltinCommand.EchoOff }; + } else { + const message = try std.mem.join(self.allocator, " ", args.items); + return Command{ .Builtin = BuiltinCommand{ .EchoText = .{ .message = message } } }; + } + } + } else if (std.mem.eql(u8, cmd_upper, "CLS")) { + return Command{ .Builtin = BuiltinCommand.Cls }; + } else if (std.mem.eql(u8, cmd_upper, "EXIT")) { + return Command{ .Builtin = BuiltinCommand.Exit }; + } else if (std.mem.eql(u8, cmd_upper, "MORE")) { + return Command{ .Builtin = BuiltinCommand.More }; + } else if (std.mem.eql(u8, cmd_upper, "VERIFY")) { + return Command{ .Builtin = BuiltinCommand.Verify }; + } else if (std.mem.eql(u8, cmd_upper, "DIR")) { + const path = if (args.items.len == 0) + try self.allocator.dupe(u8, ".") + else + try self.allocator.dupe(u8, args.items[0]); + return Command{ .Builtin = BuiltinCommand{ .Dir = .{ .path = path } } }; + } else if (std.mem.eql(u8, cmd_upper, "VER")) { + return Command{ .Builtin = BuiltinCommand.Ver }; + } else if (std.mem.eql(u8, cmd_upper, "DATE")) { + return Command{ .Builtin = BuiltinCommand.Date }; + } else if (std.mem.eql(u8, cmd_upper, "TIME")) { + return Command{ .Builtin = BuiltinCommand.Time }; + } else if (std.mem.eql(u8, cmd_upper, "TYPE")) { + if (args.items.len == 0) { + return error.ExpectedWord; // Will be caught and show "Bad command or file name" + } + const file_spec = try parseFilespec(self.allocator, args.items[0]); + return Command{ .Builtin = BuiltinCommand{ .Type = .{ .file = file_spec } } }; + } else if (std.mem.eql(u8, cmd_upper, "SORT")) { + return Command{ .Builtin = BuiltinCommand.Sort }; + } else if (std.mem.eql(u8, cmd_upper, "CD") or std.mem.eql(u8, cmd_upper, "CHDIR")) { + const path = if (args.items.len == 0) + try self.allocator.dupe(u8, "") + else + try self.allocator.dupe(u8, args.items[0]); + return Command{ .Builtin = BuiltinCommand{ .Chdir = .{ .path = path } } }; + } else if (std.mem.eql(u8, cmd_upper, "COPY")) { + if (args.items.len < 2) { + return error.ExpectedWord; // Will show "Bad command or file name" + } + const from_spec = try parseFilespec(self.allocator, args.items[0]); + const to_spec = try parseFilespec(self.allocator, args.items[1]); + return Command{ .Builtin = BuiltinCommand{ .Copy = .{ .from = from_spec, .to = to_spec } } }; + } else if (std.mem.eql(u8, cmd_upper, "DEL") or std.mem.eql(u8, cmd_upper, "ERASE")) { + if (args.items.len == 0) { + return error.ExpectedWord; // Will show "Bad command or file name" + } + const path = try self.allocator.dupe(u8, args.items[0]); + return Command{ .Builtin = BuiltinCommand{ .Remove = .{ .path = path } } }; + } else if (std.mem.eql(u8, cmd_upper, "MD") or std.mem.eql(u8, cmd_upper, "MKDIR")) { + if (args.items.len == 0) { + return error.ExpectedWord; // Will show "Bad command or file name" + } + const path = try self.allocator.dupe(u8, args.items[0]); + return Command{ .Builtin = BuiltinCommand{ .Mkdir = .{ .path = path } } }; + } else if (std.mem.eql(u8, cmd_upper, "RD") or std.mem.eql(u8, cmd_upper, "RMDIR")) { + if (args.items.len == 0) { + return error.ExpectedWord; // Will show "Bad command or file name" + } + const path = try self.allocator.dupe(u8, args.items[0]); + return Command{ .Builtin = BuiltinCommand{ .Rmdir = .{ .path = path } } }; + } else if (std.mem.eql(u8, cmd_upper, "REN") or std.mem.eql(u8, cmd_upper, "RENAME")) { + if (args.items.len < 2) { + return error.ExpectedWord; // Will show "Bad command or file name" + } + const from_spec = try parseFilespec(self.allocator, args.items[0]); + const to_spec = try parseFilespec(self.allocator, args.items[1]); + return Command{ .Builtin = BuiltinCommand{ .Rename = .{ .from = from_spec, .to = to_spec } } }; + } else if (std.mem.eql(u8, cmd_upper, "MOVE")) { + // MOVE command is more complex - for now just show not implemented + return Command{ .Builtin = BuiltinCommand.Move }; + } else if (std.mem.eql(u8, cmd_upper, "PATH")) { + if (args.items.len == 0) { + return Command{ .Builtin = BuiltinCommand.PathGet }; + } else { + // PATH=value or PATH value + const value = if (std.mem.startsWith(u8, args.items[0], "=")) + try self.allocator.dupe(u8, args.items[0][1..]) // Skip the '=' + else + try self.allocator.dupe(u8, args.items[0]); + return Command{ .Builtin = BuiltinCommand{ .PathSet = .{ .value = value } } }; + } + } else { + // External command - need to duplicate all strings + const program_copy = try self.allocator.dupe(u8, command_name); + var args_copy = ArrayList([]const u8).init(self.allocator); + for (args.items) |arg| { + const arg_copy = try self.allocator.dupe(u8, arg); + try args_copy.append(arg_copy); + } + args.deinit(); // Free the original args list (but not the strings, as they belong to tokens) + return Command{ .External = .{ .program = program_copy, .args = args_copy } }; + } + } +}; + +fn parseFilespec(allocator: Allocator, path_str: []const u8) !FileSpec { + var upper_buf: [256]u8 = undefined; + if (path_str.len >= upper_buf.len) return FileSpec{ .Path = try allocator.dupe(u8, path_str) }; + const upper_str = std.ascii.upperString(upper_buf[0..path_str.len], path_str); + + if (std.mem.eql(u8, upper_str, "CON")) return FileSpec.Con; + if (std.mem.eql(u8, upper_str, "LPT1")) return FileSpec.Lpt1; + if (std.mem.eql(u8, upper_str, "LPT2")) return FileSpec.Lpt2; + if (std.mem.eql(u8, upper_str, "LPT3")) return FileSpec.Lpt3; + if (std.mem.eql(u8, upper_str, "PRN")) return FileSpec.Prn; + return FileSpec{ .Path = try allocator.dupe(u8, path_str) }; +} + +pub fn parse(input: []const u8, allocator: Allocator) !Command { + const trimmed = std.mem.trim(u8, input, " \t\r\n"); + if (trimmed.len == 0) { + return Command.Empty; + } + + var lexer = Lexer.init(trimmed); + var tokens = try lexer.tokenize(allocator); + defer { + for (tokens.items) |token| { + switch (token) { + .Word => |word| allocator.free(word), + else => {}, + } + } + tokens.deinit(); + } + + var parser = Parser.init(tokens, allocator); + return parser.parseCommand(); +} |