summaryrefslogtreecommitdiff
path: root/src/parser.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.zig')
-rw-r--r--src/parser.zig416
1 files changed, 416 insertions, 0 deletions
diff --git a/src/parser.zig b/src/parser.zig
new file mode 100644
index 0000000..b07f31c
--- /dev/null
+++ b/src/parser.zig
@@ -0,0 +1,416 @@
+const std = @import("std");
+const ArrayList = std.ArrayList;
+const Allocator = std.mem.Allocator;
+
+const syntax = @import("syntax.zig");
+const FileSpec = syntax.FileSpec;
+const RedirectType = syntax.RedirectType;
+const Redirect = syntax.Redirect;
+const Token = syntax.Token;
+
+const cmd = @import("cmd.zig");
+const BuiltinCommand = cmd.BuiltinCommand;
+const Command = cmd.Command;
+
+const Lexer = struct {
+ input: []const u8,
+ position: usize,
+ current_char: ?u8,
+
+ pub fn init(input: []const u8) Lexer {
+ return Lexer{
+ .input = input,
+ .position = 0,
+ .current_char = if (input.len > 0) input[0] else null,
+ };
+ }
+
+ fn advance(self: *Lexer) void {
+ self.position += 1;
+ self.current_char = if (self.position < self.input.len) self.input[self.position] else null;
+ }
+
+ fn peek(self: *const Lexer) ?u8 {
+ const next_pos = self.position + 1;
+ return if (next_pos < self.input.len) self.input[next_pos] else null;
+ }
+
+ fn skipWhitespace(self: *Lexer) void {
+ while (self.current_char) |ch| {
+ if (std.ascii.isWhitespace(ch) and ch != '\n') {
+ self.advance();
+ } else {
+ break;
+ }
+ }
+ }
+
+ fn readWord(self: *Lexer, allocator: Allocator) ![]const u8 {
+ var word = ArrayList(u8).init(allocator);
+ defer word.deinit();
+
+ var in_quotes = false;
+ var quote_char: u8 = '"';
+
+ while (self.current_char) |ch| {
+ switch (ch) {
+ '"', '\'' => {
+ if (!in_quotes) {
+ in_quotes = true;
+ quote_char = ch;
+ self.advance();
+ } else if (ch == quote_char) {
+ in_quotes = false;
+ self.advance();
+ } else {
+ try word.append(ch);
+ self.advance();
+ }
+ },
+ '|', '>', '<', '\n' => {
+ if (!in_quotes) break;
+ try word.append(ch);
+ self.advance();
+ },
+ else => {
+ if (!in_quotes and std.ascii.isWhitespace(ch)) break;
+ try word.append(ch);
+ self.advance();
+ },
+ }
+ }
+
+ return allocator.dupe(u8, word.items);
+ }
+
+ fn nextToken(self: *Lexer, allocator: Allocator) !Token {
+ while (true) {
+ if (self.current_char) |ch| {
+ switch (ch) {
+ '\n' => {
+ self.advance();
+ return Token.Newline;
+ },
+ '|' => {
+ self.advance();
+ return Token.Pipe;
+ },
+ '>' => {
+ self.advance();
+ if (self.current_char == '>') {
+ self.advance();
+ return Token.RedirectAppend;
+ }
+ return Token.RedirectOut;
+ },
+ '<' => {
+ self.advance();
+ return Token.RedirectIn;
+ },
+ else => {
+ if (std.ascii.isWhitespace(ch)) {
+ self.skipWhitespace();
+ continue;
+ }
+ const word = try self.readWord(allocator);
+ if (word.len == 0) {
+ self.advance();
+ continue;
+ }
+ return Token{ .Word = word };
+ },
+ }
+ } else {
+ return Token.Eof;
+ }
+ }
+ }
+
+ pub fn tokenize(self: *Lexer, allocator: Allocator) !ArrayList(Token) {
+ var tokens = ArrayList(Token).init(allocator);
+
+ while (true) {
+ const token = try self.nextToken(allocator);
+ const is_eof = switch (token) {
+ .Eof => true,
+ else => false,
+ };
+ try tokens.append(token);
+ if (is_eof) break;
+ }
+
+ return tokens;
+ }
+};
+
+const Parser = struct {
+ tokens: ArrayList(Token),
+ position: usize,
+ allocator: Allocator,
+
+ pub fn init(tokens: ArrayList(Token), allocator: Allocator) Parser {
+ return Parser{
+ .tokens = tokens,
+ .position = 0,
+ .allocator = allocator,
+ };
+ }
+
+ fn currentToken(self: *const Parser) Token {
+ if (self.position < self.tokens.items.len) {
+ return self.tokens.items[self.position];
+ }
+ return Token.Eof;
+ }
+
+ fn advance(self: *Parser) void {
+ if (self.position < self.tokens.items.len) {
+ self.position += 1;
+ }
+ }
+
+ fn expectWord(self: *Parser) ![]const u8 {
+ switch (self.currentToken()) {
+ .Word => |word| {
+ self.advance();
+ return word;
+ },
+ else => return error.ExpectedWord,
+ }
+ }
+
+ pub fn parseCommand(self: *Parser) !Command {
+ return self.parsePipeline();
+ }
+
+ fn parsePipeline(self: *Parser) !Command {
+ var left = try self.parseRedirectedCommand();
+
+ while (true) {
+ switch (self.currentToken()) {
+ .Pipe => {
+ self.advance(); // consume |
+ const right = try self.parseRedirectedCommand();
+ const left_ptr = try self.allocator.create(Command);
+ const right_ptr = try self.allocator.create(Command);
+ left_ptr.* = left;
+ right_ptr.* = right;
+ left = Command{ .Pipe = .{ .left = left_ptr, .right = right_ptr } };
+ },
+ else => break,
+ }
+ }
+
+ return left;
+ }
+
+ fn parseRedirectedCommand(self: *Parser) !Command {
+ const command = try self.parseSimpleCommand();
+ var redirects = ArrayList(Redirect).init(self.allocator);
+
+ while (true) {
+ const redirect_type = switch (self.currentToken()) {
+ .RedirectOut => RedirectType.OutputOverwrite,
+ .RedirectAppend => RedirectType.OutputAppend,
+ .RedirectIn => RedirectType.InputFrom,
+ else => break,
+ };
+
+ self.advance(); // consume redirect token
+
+ const target_str = try self.expectWord();
+ const target = try parseFilespec(self.allocator, target_str);
+
+ try redirects.append(Redirect{
+ .redirect_type = redirect_type,
+ .target = target,
+ });
+ }
+
+ if (redirects.items.len == 0) {
+ redirects.deinit();
+ return command;
+ } else {
+ const command_ptr = try self.allocator.create(Command);
+ command_ptr.* = command;
+ return Command{ .Redirect = .{ .command = command_ptr, .redirects = redirects } };
+ }
+ }
+
+ fn parseSimpleCommand(self: *Parser) !Command {
+ switch (self.currentToken()) {
+ .Eof, .Newline => return Command.Empty,
+ .Word => |command_name| {
+ self.advance();
+ var args = ArrayList([]const u8).init(self.allocator);
+
+ // Collect arguments
+ while (true) {
+ switch (self.currentToken()) {
+ .Word => |arg| {
+ try args.append(arg);
+ self.advance();
+ },
+ else => break,
+ }
+ }
+
+ const result = try self.parseBuiltinCommand(command_name, args);
+ // For builtin commands, free the args ArrayList (the strings inside belong to tokens and will be freed later)
+ if (result == .Builtin) {
+ args.deinit();
+ }
+ return result;
+ },
+ else => return error.UnexpectedToken,
+ }
+ }
+
+ fn parseBuiltinCommand(self: *Parser, command_name: []const u8, args: ArrayList([]const u8)) !Command {
+ const cmd_upper = try std.ascii.allocUpperString(self.allocator, command_name);
+ defer self.allocator.free(cmd_upper);
+
+ if (std.mem.eql(u8, cmd_upper, "ECHO")) {
+ if (args.items.len == 0) {
+ return Command{ .Builtin = BuiltinCommand.EchoPlain };
+ } else {
+ const first_arg_upper = try std.ascii.allocUpperString(self.allocator, args.items[0]);
+ defer self.allocator.free(first_arg_upper);
+
+ if (std.mem.eql(u8, first_arg_upper, "ON") and args.items.len == 1) {
+ return Command{ .Builtin = BuiltinCommand.EchoOn };
+ } else if (std.mem.eql(u8, first_arg_upper, "OFF") and args.items.len == 1) {
+ return Command{ .Builtin = BuiltinCommand.EchoOff };
+ } else {
+ const message = try std.mem.join(self.allocator, " ", args.items);
+ return Command{ .Builtin = BuiltinCommand{ .EchoText = .{ .message = message } } };
+ }
+ }
+ } else if (std.mem.eql(u8, cmd_upper, "CLS")) {
+ return Command{ .Builtin = BuiltinCommand.Cls };
+ } else if (std.mem.eql(u8, cmd_upper, "EXIT")) {
+ return Command{ .Builtin = BuiltinCommand.Exit };
+ } else if (std.mem.eql(u8, cmd_upper, "MORE")) {
+ return Command{ .Builtin = BuiltinCommand.More };
+ } else if (std.mem.eql(u8, cmd_upper, "VERIFY")) {
+ return Command{ .Builtin = BuiltinCommand.Verify };
+ } else if (std.mem.eql(u8, cmd_upper, "DIR")) {
+ const path = if (args.items.len == 0)
+ try self.allocator.dupe(u8, ".")
+ else
+ try self.allocator.dupe(u8, args.items[0]);
+ return Command{ .Builtin = BuiltinCommand{ .Dir = .{ .path = path } } };
+ } else if (std.mem.eql(u8, cmd_upper, "VER")) {
+ return Command{ .Builtin = BuiltinCommand.Ver };
+ } else if (std.mem.eql(u8, cmd_upper, "DATE")) {
+ return Command{ .Builtin = BuiltinCommand.Date };
+ } else if (std.mem.eql(u8, cmd_upper, "TIME")) {
+ return Command{ .Builtin = BuiltinCommand.Time };
+ } else if (std.mem.eql(u8, cmd_upper, "TYPE")) {
+ if (args.items.len == 0) {
+ return error.ExpectedWord; // Will be caught and show "Bad command or file name"
+ }
+ const file_spec = try parseFilespec(self.allocator, args.items[0]);
+ return Command{ .Builtin = BuiltinCommand{ .Type = .{ .file = file_spec } } };
+ } else if (std.mem.eql(u8, cmd_upper, "SORT")) {
+ return Command{ .Builtin = BuiltinCommand.Sort };
+ } else if (std.mem.eql(u8, cmd_upper, "CD") or std.mem.eql(u8, cmd_upper, "CHDIR")) {
+ const path = if (args.items.len == 0)
+ try self.allocator.dupe(u8, "")
+ else
+ try self.allocator.dupe(u8, args.items[0]);
+ return Command{ .Builtin = BuiltinCommand{ .Chdir = .{ .path = path } } };
+ } else if (std.mem.eql(u8, cmd_upper, "COPY")) {
+ if (args.items.len < 2) {
+ return error.ExpectedWord; // Will show "Bad command or file name"
+ }
+ const from_spec = try parseFilespec(self.allocator, args.items[0]);
+ const to_spec = try parseFilespec(self.allocator, args.items[1]);
+ return Command{ .Builtin = BuiltinCommand{ .Copy = .{ .from = from_spec, .to = to_spec } } };
+ } else if (std.mem.eql(u8, cmd_upper, "DEL") or std.mem.eql(u8, cmd_upper, "ERASE")) {
+ if (args.items.len == 0) {
+ return error.ExpectedWord; // Will show "Bad command or file name"
+ }
+ const path = try self.allocator.dupe(u8, args.items[0]);
+ return Command{ .Builtin = BuiltinCommand{ .Remove = .{ .path = path } } };
+ } else if (std.mem.eql(u8, cmd_upper, "MD") or std.mem.eql(u8, cmd_upper, "MKDIR")) {
+ if (args.items.len == 0) {
+ return error.ExpectedWord; // Will show "Bad command or file name"
+ }
+ const path = try self.allocator.dupe(u8, args.items[0]);
+ return Command{ .Builtin = BuiltinCommand{ .Mkdir = .{ .path = path } } };
+ } else if (std.mem.eql(u8, cmd_upper, "RD") or std.mem.eql(u8, cmd_upper, "RMDIR")) {
+ if (args.items.len == 0) {
+ return error.ExpectedWord; // Will show "Bad command or file name"
+ }
+ const path = try self.allocator.dupe(u8, args.items[0]);
+ return Command{ .Builtin = BuiltinCommand{ .Rmdir = .{ .path = path } } };
+ } else if (std.mem.eql(u8, cmd_upper, "REN") or std.mem.eql(u8, cmd_upper, "RENAME")) {
+ if (args.items.len < 2) {
+ return error.ExpectedWord; // Will show "Bad command or file name"
+ }
+ const from_spec = try parseFilespec(self.allocator, args.items[0]);
+ const to_spec = try parseFilespec(self.allocator, args.items[1]);
+ return Command{ .Builtin = BuiltinCommand{ .Rename = .{ .from = from_spec, .to = to_spec } } };
+ } else if (std.mem.eql(u8, cmd_upper, "MOVE")) {
+ // MOVE command is more complex - for now just show not implemented
+ return Command{ .Builtin = BuiltinCommand.Move };
+ } else if (std.mem.eql(u8, cmd_upper, "PATH")) {
+ if (args.items.len == 0) {
+ return Command{ .Builtin = BuiltinCommand.PathGet };
+ } else {
+ // PATH=value or PATH value
+ const value = if (std.mem.startsWith(u8, args.items[0], "="))
+ try self.allocator.dupe(u8, args.items[0][1..]) // Skip the '='
+ else
+ try self.allocator.dupe(u8, args.items[0]);
+ return Command{ .Builtin = BuiltinCommand{ .PathSet = .{ .value = value } } };
+ }
+ } else {
+ // External command - need to duplicate all strings
+ const program_copy = try self.allocator.dupe(u8, command_name);
+ var args_copy = ArrayList([]const u8).init(self.allocator);
+ for (args.items) |arg| {
+ const arg_copy = try self.allocator.dupe(u8, arg);
+ try args_copy.append(arg_copy);
+ }
+ args.deinit(); // Free the original args list (but not the strings, as they belong to tokens)
+ return Command{ .External = .{ .program = program_copy, .args = args_copy } };
+ }
+ }
+};
+
+fn parseFilespec(allocator: Allocator, path_str: []const u8) !FileSpec {
+ var upper_buf: [256]u8 = undefined;
+ if (path_str.len >= upper_buf.len) return FileSpec{ .Path = try allocator.dupe(u8, path_str) };
+ const upper_str = std.ascii.upperString(upper_buf[0..path_str.len], path_str);
+
+ if (std.mem.eql(u8, upper_str, "CON")) return FileSpec.Con;
+ if (std.mem.eql(u8, upper_str, "LPT1")) return FileSpec.Lpt1;
+ if (std.mem.eql(u8, upper_str, "LPT2")) return FileSpec.Lpt2;
+ if (std.mem.eql(u8, upper_str, "LPT3")) return FileSpec.Lpt3;
+ if (std.mem.eql(u8, upper_str, "PRN")) return FileSpec.Prn;
+ return FileSpec{ .Path = try allocator.dupe(u8, path_str) };
+}
+
+pub fn parse(input: []const u8, allocator: Allocator) !Command {
+ const trimmed = std.mem.trim(u8, input, " \t\r\n");
+ if (trimmed.len == 0) {
+ return Command.Empty;
+ }
+
+ var lexer = Lexer.init(trimmed);
+ var tokens = try lexer.tokenize(allocator);
+ defer {
+ for (tokens.items) |token| {
+ switch (token) {
+ .Word => |word| allocator.free(word),
+ else => {},
+ }
+ }
+ tokens.deinit();
+ }
+
+ var parser = Parser.init(tokens, allocator);
+ return parser.parseCommand();
+}