#include #include #include #include #include #include #include #include #include #include #include constexpr size_t max_length = 1024; constexpr size_t max_env_var_length = 256; const char* token_separators = " \t\n\r"; char* esc = "\x1B"; char* default_prompt = "arsh> "; static sigjmp_buf restart_jmp_buf; static constexpr int restart_value = 69; static volatile sig_atomic_t jump_active = 0; size_t min_size_t(size_t a, size_t b) { return a > b ? b : a; } void sigint_handler(int signo) { if(!jump_active) { return; } siglongjmp(restart_jmp_buf, restart_value); } /*Checked calloc: if allocation fails -> exits*/ void* ccalloc(size_t size, size_t n) { void* ret = calloc(size, n); if(!ret) { perror("calloc failed"); exit(1); } return ret; } static constexpr size_t max_line = 1024; static char string_buffer[max_line]; char* __readline_file(FILE* stream) { if(fgets(string_buffer, max_line, stream) == NULL) { if(ferror(stream)) { perror("fgets error"); clearerr(stdin); return NULL; } return NULL; } size_t length = strlen(string_buffer); //Remove trailing \n if (length > 0 && string_buffer[length - 1] == '\n') { string_buffer[length - 1] = '\0'; } return string_buffer; } static struct termios orig_termios; void disable_raw_mode() { tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); } void enable_raw_mode() { if (tcgetattr(STDIN_FILENO, &orig_termios) == -1) exit(1); atexit(disable_raw_mode); struct termios raw = orig_termios; raw.c_lflag &= ~(ECHO | ICANON); raw.c_iflag &= ~(IXON | ICRNL); raw.c_oflag &= ~(OPOST); tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); } size_t utf8_char_len(unsigned char c) { if ((c & 0x80) == 0) return 1; if ((c & 0xE0) == 0xC0) return 2; if ((c & 0xF0) == 0xE0) return 3; if ((c & 0xF8) == 0xF0) return 4; return 1; } size_t prev_char_start(char* buf, size_t pos) { if (pos == 0) return 0; pos--; while (pos > 0 && ((buf[pos] & 0xC0) == 0x80)) { pos--; } return pos; } size_t next_char_start(char* buf, size_t len, size_t pos) { if (pos >= len) return len; return pos + utf8_char_len((unsigned char)buf[pos]); } ssize_t read_utf8_char(int fd, char* out) { unsigned char c; if (read(fd, &c, 1) != 1) return -1; size_t len = utf8_char_len(c); out[0] = c; for (size_t i = 1; i < len; i++) { if (read(fd, &out[i], 1) != 1) return -1; } return len; } size_t utf8_display_width(const char *buf, size_t len) { size_t i = 0; size_t width = 0; while (i < len) { unsigned char c = buf[i]; if ((c & 0x80) == 0) { i += 1; width += 1; } else if ((c & 0xE0) == 0xC0) { i += 2; width += 1; } else if ((c & 0xF0) == 0xE0) { i += 3; width += 1; } else { i += 4; width += 1; } } return width; } size_t visible_width(const char *s) { size_t i = 0; size_t width = 0; while (s[i]) { // ANSI escape sequence if (s[i] == '\x1b' && s[i+1] == '[') { i += 2; while (s[i] && !((s[i] >= '@' && s[i] <= '~'))) i++; if (s[i]) i++; continue; } unsigned char c = s[i]; if ((c & 0x80) == 0) { i += 1; width += 1; } else if ((c & 0xE0) == 0xC0) { i += 2; width += 1; } else if ((c & 0xF0) == 0xE0) { i += 3; width += 1; } else { i += 4; width += 1; } } return width; } char* __readline_interactive(char* prompt) { enable_raw_mode(); static char buffer[max_length]; size_t cursor = 0; size_t len = 0; ssize_t swrite(int fd, const char* str) { return write(fd, str, strlen(str)); } void render_line(char *prompt, char *buf, size_t len, size_t cursor) { swrite(STDOUT_FILENO, "\r"); swrite(STDOUT_FILENO, prompt); write(STDOUT_FILENO, buf, len); swrite(STDOUT_FILENO, "\x1b[K"); size_t prompt_width = visible_width(prompt); size_t cell_cursor = utf8_display_width(buf, cursor); char seq[64]; snprintf(seq, sizeof(seq), "\r\x1b[%zuC", prompt_width + cell_cursor); swrite(STDOUT_FILENO, seq); } render_line(prompt, buffer, len, cursor); while (1) { char utf8[4]; ssize_t clen = read_utf8_char(STDIN_FILENO, utf8); if (clen <= 0) continue; if (utf8[0] == '\x1b') { char seq[3]; if (read(STDIN_FILENO, &seq[0], 1) != 1) continue; if (read(STDIN_FILENO, &seq[1], 1) != 1) continue; if (seq[0] == '[') { switch (seq[1]) { case 'A': break; // Arrow up case 'B': break; // Arrow down case 'C': // Arrow right cursor = next_char_start(buffer, len, cursor); break; case 'D': // Arrow left cursor = prev_char_start(buffer, cursor); break; case '3': if (read(STDIN_FILENO, &seq[2], 1) != 1) continue; switch (seq[2]) { case '~': // Delete if (cursor < len) { size_t next = next_char_start(buffer, len, cursor); size_t diff = next - cursor; memmove(buffer + cursor, buffer + next, len - next); len -= diff; } break; } break; } } } else if (utf8[0] == '\r') { // enter buffer[len] = '\0'; swrite(STDOUT_FILENO, "\r\n"); break; } else if (utf8[0] == 127) { // backspace if (len > 0) { if (cursor > 0) { size_t prev = prev_char_start(buffer, cursor); memmove(buffer + prev, buffer + cursor, len - cursor); len -= (cursor - prev); cursor = prev; } } } else { if(len < sizeof(buffer)) { memmove(buffer + cursor + clen, buffer + cursor, len - cursor); memcpy(buffer + cursor, utf8, clen); cursor += clen; len += clen; } } render_line(prompt, buffer, len, cursor); } disable_raw_mode(); return buffer; } char* readline(char* prompt, FILE* stream) { if (isatty(fileno(stream))) { return __readline_interactive(prompt); } return __readline_file(stream); } /** * Struct to represent a command and its arguments. */ struct command { /** * IO redirections; redirect[i] should be used as fd i in the child. * A value of -1 indicates no redirect. */ int redirect[3]; /** The arguments; must be NULL-terminated. */ char* argv[]; }; /** The name of the executable. */ char* command_name(struct command* cmd) { return cmd->argv[0]; } /** * Struct to represent a pipeline of commands. The intention is that cmd[i]'s * output goes to cmd[i+1]'s input. */ struct pipeline { /** The total number of commands. */ size_t n_cmds; struct command* cmds[]; }; void* parser_memory[max_length] = {0}; size_t parser_memory_index = 0; void parser_allocated(void* ptr) { parser_memory[parser_memory_index] = ptr; if(parser_memory_index == max_length) { fprintf(stderr, "Not enough memory for parser: Memory will leak!"); exit(1); } parser_memory_index++; } void* parser_realloc(void* ptr, size_t size) { size_t index; for(index = 0; index < parser_memory_index; index++) { if(parser_memory[index] == ptr) { break; } } ptr = realloc(ptr, size); if (!ptr) { perror("realloc failed"); exit(1); } parser_memory[index] = ptr; return ptr; } void parser_free() { for(size_t i = 0; i < parser_memory_index; i++) { free(parser_memory[i]); } memset(parser_memory, 0, max_length); parser_memory_index = 0; } char* expand_variable(const char* name) { char* val = getenv(name); char* ret = val ? strdup(val) : strdup(""); parser_allocated(ret); return ret; } static void append_char(char** buf, size_t* len, size_t* cap, char c) { if (*len + 1 >= *cap) { *cap *= 2; *buf = parser_realloc(*buf, *cap); } (*buf)[(*len)++] = c; } static void append_str(char** buf, size_t* len, size_t* cap, const char* s) { while (*s) append_char(buf, len, cap, *s++); } struct pipeline* parse_pipeline(char *str); int execute_pipeline(struct pipeline* pl, int capture, char** out); char* get_home(); static char* run_subcommand(const char* cmd) { struct pipeline* pl = parse_pipeline((char*)cmd); char* output = NULL; execute_pipeline(pl, 1, &output); return output; } char* parse_token(char** input) { char* p = *input; while (*p && isspace(*p)) p++; if (!*p) { *input = p; return NULL; } size_t cap = 64; size_t len = 0; char* buf = malloc(cap); if (!buf) { perror("malloc"); exit(1); } int in_single = 0; int in_double = 0; while (*p) { if (!in_single && !in_double && isspace(*p)) break; if (!in_double && *p == '\'') { in_single = !in_single; p++; continue; } if (!in_single && *p == '"') { in_double = !in_double; p++; continue; } if (!in_single && *p == '~') { p++; append_str(&buf, &len, &cap, get_home()); continue; } if (!in_single && *p == '\\') { p++; if (*p) append_char(&buf, &len, &cap, *p++); continue; } if (!in_single && *p == '$') { p++; if (*p == '(') { p++; size_t cap2 = 256; size_t len2 = 0; char* cmd = malloc(cap2); if (!cmd) { perror("malloc"); exit(1); } int depth = 1; int in_single2 = 0; int in_double2 = 0; while (*p && depth > 0) { if (!in_double2 && *p == '\'') { in_single2 = !in_single2; } else if (!in_single2 && *p == '"') { in_double2 = !in_double2; } else if (!in_single2 && !in_double2) { if (*p == '(') depth++; else if (*p == ')') depth--; } if (depth > 0) append_char(&cmd, &len2, &cap2, *p); p++; } if (depth != 0) { fprintf(stderr, "syntax error: unclosed $( )\n"); free(cmd); return NULL; } cmd[len2] = '\0'; char* out = run_subcommand(cmd); append_str(&buf, &len, &cap, out); free(cmd); continue; } if (*p == '{') { p++; char name[max_env_var_length]; int i = 0; while (*p && *p != '}') name[i++] = *p++; name[i] = '\0'; if (*p == '}') p++; char* val = expand_variable(name); append_str(&buf, &len, &cap, val); continue; } char name[max_env_var_length]; int i = 0; while (*p && (isalnum(*p) || *p == '_')) name[i++] = *p++; name[i] = '\0'; char* val = expand_variable(name); append_str(&buf, &len, &cap, val); continue; } if (!in_single && *p == '`') { p++; char cmd[max_length]; int i = 0; while (*p && *p != '`') cmd[i++] = *p++; if (*p == '`') p++; cmd[i] = '\0'; char* out = run_subcommand(cmd); append_str(&buf, &len, &cap, out); continue; } append_char(&buf, &len, &cap, *p++); } if (in_single || in_double) { fprintf(stderr, "syntax error: unclosed quote\n"); free(buf); *input = p; return NULL; } buf[len] = '\0'; parser_allocated(buf); *input = p; return buf; } int open_fout(const char* file, bool append) { return open(file, O_WRONLY | O_CREAT | (append ? O_APPEND : O_TRUNC), 0644); } /** * Parses str into a freshly allocated command struct and returns a pointer to it. * The redirects in the returned command will be set to -1, ie no redirect. */ struct command* parse_command(char* str) { /* Copy the input line in case the caller wants it later. */ char* copy = strndup(str, max_length); parser_allocated(copy); char* token; int i = 0; /* * Being lazy and allocating way too much memory for the args array. * Using calloc to ensure it's zero-initialised, which is important because * execvp expects a NULL-terminated array of arguments. */ struct command* ret = ccalloc(sizeof(struct command) + strlen(copy) * sizeof(char*), 1); parser_allocated(ret); ret->redirect[0] = ret->redirect[1] = ret->redirect[2] = -1; char* p = copy; while (*p) { char* token = parse_token(&p); if (!token) break; if (strcmp(token, ">") == 0) { char* file = parse_token(&p); int fd = open_fout(file, false); ret->redirect[STDOUT_FILENO] = fd; continue; } if (strcmp(token, ">>") == 0) { char* file = parse_token(&p); int fd = open_fout(file, true); ret->redirect[STDOUT_FILENO] = fd; continue; } if (strcmp(token, "<") == 0) { char* file = parse_token(&p); int fd = open(file, O_RDONLY); ret->redirect[STDIN_FILENO] = fd; continue; } if (strcmp(token, "2>") == 0) { char* file = parse_token(&p); int fd = open_fout(file, false); ret->redirect[STDERR_FILENO] = fd; continue; } if (strcmp(token, "&>") == 0) { char* file = parse_token(&p); int fd = open_fout(file, false); ret->redirect[STDOUT_FILENO] = fd; ret->redirect[STDERR_FILENO] = fd; continue; } ret->argv[i++] = token; } return ret; } /** * Parses str into a freshly allocated pipeline_struct and returns a pointer to * it. All commands in cmds will also be freshy allocated, and have their * redirects set to -1, ie no redirect. */ struct pipeline* parse_pipeline(char *str) { char* copy = strndup(str, max_length); parser_allocated(copy); size_t cap = 4; size_t count = 0; struct command** cmds = malloc(sizeof(struct command*) * cap); if (!cmds) { perror("malloc"); exit(1); } int in_single = 0; int in_double = 0; int in_backtick = 0; int paren_depth = 0; char* start = copy; char* p = copy; while (*p) { if (!in_double && !in_backtick && *p == '\'') in_single = !in_single; else if (!in_single && !in_backtick && *p == '"') in_double = !in_double; else if (!in_single && !in_double && *p == '`') in_backtick = !in_backtick; else if (!in_single && !in_double && !in_backtick) { if (*p == '$' && *(p+1) == '(') { paren_depth++; p++; // skip '(' next iteration } else if (*p == '(' && paren_depth > 0) { paren_depth++; } else if (*p == ')' && paren_depth > 0) { paren_depth--; } else if (*p == '|' && paren_depth == 0) { *p = '\0'; if (count >= cap) { cap *= 2; cmds = realloc(cmds, sizeof(struct command*) * cap); if (!cmds) { perror("realloc"); exit(1); } } cmds[count++] = parse_command(start); start = p + 1; } } p++; } cmds[count++] = parse_command(start); struct pipeline* ret = ccalloc(sizeof(struct pipeline) + count * sizeof(struct command*), 1); parser_allocated(ret); parser_allocated(cmds); ret->n_cmds = count; for (size_t i = 0; i < count; i++) ret->cmds[i] = cmds[i]; return ret; } void close_ALL_the_pipes(int n_pipes, int (*pipes)[2]) { for (int i = 0; i < n_pipes; ++i) { close(pipes[i][0]); close(pipes[i][1]); } } int exec_with_redir(struct command* command, int n_pipes, int (*pipes)[2]) { void apply_redir(int fd, int target) { if (fd != -1) { dup2(fd, target); if (fd > 2) close(fd); //if file } } apply_redir(command->redirect[0], STDIN_FILENO); apply_redir(command->redirect[1], STDOUT_FILENO); apply_redir(command->redirect[2], STDERR_FILENO); close_ALL_the_pipes(n_pipes, pipes); return execvp(command_name(command), command->argv); } static struct sigaction s_old; pid_t run_with_redir(struct command* command, int n_pipes, int (*pipes)[2]) { pid_t child_pid = fork(); if(child_pid < 0) { perror("Fork failed"); exit(1); } if (child_pid) { /* We are the parent. */ return child_pid; } else { // We are the child. */ sigaction(SIGINT, &s_old, NULL); exec_with_redir(command, n_pipes, pipes); perror(command_name(command)); exit(1); } } int cd(char* path) { int result = chdir(path); if(result < 0) { return result; } char cwd[max_length]; if (getcwd(cwd, sizeof(cwd)) != NULL) { setenv("PWD", cwd, 1); } return result; } int execute_pipeline(struct pipeline* pl, int capture, char** out) { int n_pipes = pl->n_cmds - 1; int (*pipes)[2] = ccalloc(sizeof(int[2]), n_pipes); parser_allocated(pipes); for (int i = 1; i < pl->n_cmds; ++i) { pipe(pipes[i-1]); pl->cmds[i]->redirect[STDIN_FILENO] = pipes[i-1][0]; pl->cmds[i-1]->redirect[STDOUT_FILENO] = pipes[i-1][1]; } int capture_pipe[2]; if (capture) { pipe(capture_pipe); pl->cmds[pl->n_cmds - 1]->redirect[STDOUT_FILENO] = capture_pipe[1]; } pid_t* pids = ccalloc(sizeof(pid_t), pl->n_cmds); parser_allocated(pids); for (int i = 0; i < pl->n_cmds; ++i) { struct command* cmd = pl->cmds[i]; char* cmd_name = command_name(cmd); if(!cmd_name) { continue; } if(strlen(cmd_name) >= 1 && cmd_name[0] == '#') { return 0; } if(strcmp(cmd_name, "cd") == 0) { char* path = cmd->argv[1] ? cmd->argv[1] : getenv("HOME"); if (cd(path) < 0) { char err_buf[max_length] = {0}; sprintf(err_buf, "cd: %s", path); perror(err_buf); } continue; } if(strcmp(cmd_name, "exit") == 0) { int32_t status_code = cmd->argv[1] != NULL ? atoi(cmd->argv[1]) : 0; exit(status_code); } pids[i] = run_with_redir(cmd, n_pipes, pipes); } close_ALL_the_pipes(n_pipes, pipes); if (capture) close(capture_pipe[1]); int status; for (int i = 0; i < pl->n_cmds; ++i) waitpid(pids[i], &status, 0); if (capture) { char buffer[max_length]; size_t total = 0; char* result = NULL; ssize_t n; while ((n = read(capture_pipe[0], buffer, sizeof(buffer))) > 0) { result = realloc(result, total + n + 1); memcpy(result + total, buffer, n); total += n; } if (!result) result = strdup(""); result[total] = '\0'; if (total > 0 && result[total-1] == '\n') result[total-1] = '\0'; parser_allocated(result); *out = result; close(capture_pipe[0]); } return WEXITSTATUS(status); } char username_buf[max_env_var_length] = {0}; char* get_user() { char* result = getenv("USER"); if(!result) { return "UNKNOWN"; } size_t length = min_size_t(sizeof(username_buf), strlen(result) + 1); memcpy(username_buf, result, length); return username_buf; } char home_buf[max_length] = {0}; char* get_home() { char* result = getenv("HOME"); if(!result) { return "UNKNOWN_HOME_PATH"; } size_t length = min_size_t(sizeof(home_buf), strlen(result) + 1); memcpy(home_buf, result, length); return home_buf; } char pwd_buf[max_length] = {0}; char* get_pwd() { char* result = getenv("PWD"); if(!result) { return "UNKNOWN"; } size_t length = min_size_t(sizeof(pwd_buf), strlen(result) + 1); memcpy(pwd_buf, result, length); return pwd_buf; } char* prettify_pwd(char* pwd) { if (!pwd) { fprintf(stderr, "\nInternal Error: pwd can't be null\n"); exit(1); } char* home = get_home(); size_t home_len = strlen(home); size_t pwd_len = strlen(pwd); if(pwd_len < home_len) { return pwd; } if(memcmp(home, pwd, home_len) == 0) { char* ret = ccalloc(sizeof(char), pwd_len - home_len + 1 + 1); parser_allocated(ret); ret[0] = '~'; memcpy(ret + 1, pwd + home_len, pwd_len - home_len); ret[pwd_len - home_len + 1] = '\0'; return ret; } return pwd; } char prompt_buf[max_length] = {0}; char* generate_ps1_prompt() { char env_buf[max_env_var_length] = {0}; char* ps1 = getenv("PS1"); if(!ps1) { return default_prompt; } size_t ps1_len = min_size_t(sizeof(env_buf), strlen(ps1) + 1) - 1; if(ps1_len == 0) { return default_prompt; } memcpy(env_buf, ps1, ps1_len); env_buf[ps1_len + 1] = '\0'; if(ps1_len < 2) { memcpy(prompt_buf, env_buf, ps1_len + 1); return prompt_buf; } size_t i = 1, j = 0; size_t start = 0; while(env_buf[i] != '\0' && j < sizeof(prompt_buf)) { if(env_buf[i - 1] != '\\') { i++; continue; } char* data = 0; switch (env_buf[i]) { case 'u': data = get_user(); break; case 'w': data = prettify_pwd(get_pwd()); break; case 'e': data = esc; break; case ']': case '[': data = ""; break; default: i++; continue; } if(!data) { i++; continue; } size_t len = strlen(data); if(j + i - 1 - start + len >= sizeof(prompt_buf)) { fprintf(stderr, "\nOut of memory(1) for prompt: %d >= %d\n", j + i - start + len, sizeof(prompt_buf)); break; } size_t start_len = min_size_t(sizeof(prompt_buf) - j, i - start - 1); memcpy(prompt_buf + j, env_buf + start, start_len); j += start_len; if(j + len >= sizeof(prompt_buf)) { fprintf(stderr, "\nOut of memory(2) for prompt: %d >= %d\n", j + len, sizeof(prompt_buf)); break; } memcpy(prompt_buf + j, data, len); i += 1; start = i; j += len; } size_t start_len = min_size_t(sizeof(prompt_buf) - j, i - start); memcpy(prompt_buf + j, env_buf + start, start_len); j += start_len; prompt_buf[j] = '\0'; return prompt_buf; } int main(int argc, const char* argv[]) { setenv("SHELL", argv[0], 1); FILE* stream = stdin; char* prompt = generate_ps1_prompt(); const bool spawn_command = argc >= 2 && strcmp(argv[1], "-c") == 0; if(spawn_command) { if(argc !=3) { fprintf(stderr, "There must be exactly 3 arguments, when starting with a command '-c', but provided %d\n", argc); exit(1); } char* input = strdup(argv[2]); struct pipeline* pipeline = parse_pipeline(input); exit(execute_pipeline(pipeline, 0, NULL)); } if(argc == 2) { FILE* file = fopen(argv[1], "r"); if(file) { stream = file; prompt = NULL; } } struct sigaction s; s.sa_handler = sigint_handler; sigemptyset(&s.sa_mask); s.sa_flags = SA_RESTART; sigaction(SIGINT, &s, &s_old); int stat_loc; if(sigsetjmp(restart_jmp_buf, 1) == restart_value) { printf("\n"); } jump_active = 1; while(true) { char* input = readline(prompt, stream); if(input == NULL) //CTRL + D { if(prompt) { printf("\nexit\n"); } exit(0); } struct pipeline* pipeline = parse_pipeline(input); execute_pipeline(pipeline, 0, NULL); parser_free(); if(prompt) { prompt = generate_ps1_prompt(); } } }