diff --git a/.main.cpp.swo b/.main.cpp.swo new file mode 100644 index 0000000..322b485 Binary files /dev/null and b/.main.cpp.swo differ diff --git a/.main.cpp.swp b/.main.cpp.swp new file mode 100644 index 0000000..7d6e9be Binary files /dev/null and b/.main.cpp.swp differ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7bc54f5 --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +# A simple Makefile to make the process of compilation easier for the user :-) +# With this makefile, instead of writing the cumbersome command for the whole project compilation into a single binary, +# simply write (on the terminal),~~ + +# make a : TO complile the project files +# make run: TO run the compiled binary files +# make clean: TO delete all the built binary files. + +#~~~~~~~~~~~~~~~~~~Libraries~~~~~~~~~~~~~~~~~~~~~~~~~# +# Install readline library: sudo apt-get install libreadline6 libreadline6-dev +# Install boost library: sudo apt-get install libboost-all-dev + +#~~~~~~~~~~~~~~~~~~VARIABLES~~~~~~~~~~~~~~~~~~~~~~~~~# +CC = g++ -std=c++11 +# -std=c++11 This flag provides C++11 support in the g++ compiler +CFLAGS = -c -Wall +# argument Wall is used for giving warnings + +#~~~~~~~~~~~~~~~~~COMMANDS~~~~~~~~~~~~~~~~~~~~~~~~~~~# +a: all +run: all + ./all +all: main.o parser.o lexical.o syntax_analyser.o processCtrl.o + $(CC) main.o parser.o lexical.o syntax_analyser.o processCtrl.o -o all -lboost_system -lboost_filesystem -lreadline +main.o: main.cpp + $(CC) $(CFLAGS) main.cpp +parser.o: parser.cpp + $(CC) $(CFLAGS) parser.cpp +lexical.o: lexical.cpp + $(CC) $(CFLAGS) lexical.cpp +syntax_analyser.o: syntax_analyser.cpp + $(CC) $(CFLAGS) syntax_analyser.cpp +processCtrl.o: processCtrl.cpp + $(CC) $(CFLAGS) processCtrl.cpp +clean: + rm -rf *o all \ No newline at end of file diff --git a/README.md b/README.md index 1215082..68ecbd5 100644 --- a/README.md +++ b/README.md @@ -1 +1,93 @@ -# Shell-Development-Project +![image](https://user-images.githubusercontent.com/29279664/32775488-8dbfbcfc-c955-11e7-95f6-33f9fdffa7cf.png) +# Abstract + Most useful interaction with a UNIX system occurs through the shell. Using a series of easy to remember and + simple commands, one can navigate the UNIX file system and issue commands to perform a wide variety of tasks. + Even though it may appear simple, the shell encapsulates many significant components of the operating system. +# Introduction + This project aims to create a command line interface that provides almost every functionality provided by the + BASH and look forward to implementing minute tweaks and quick fixes that can influence its performance in terms + of memory utilization, execution time and error handling. Moreover, the implementation methods make use of new + styles and libraries. + This includes implementation of concepts of Compiler Design (lexical analysis, syntax analysis, error handling) + amalgamated with the concepts of process creation and control in Unix Systems. + The project aims to make the shell more memory-efficient by making use of C++ Boost:: Filesystem Library that + enables Directory Traversal and implementation of Change Directory command (cd) in the Unix Environment. Also, + a new method of constructing SLR (1) parsing table has been improvised, this reduces the wastage of memory + caused using conventional Sparse Matrix Method. + Hence, the newly developed shell has been named BISHOP (Boost Implemented OPerational SHell). + +

Implementation Details

+ +# Environment: + The shell maintains many variables which allow the user to maintain settings and easily navigate the filesystem. + Two of these that are particularly important are the current working directory and the PATH. As its name implies, + the current working directory variable keeps track of the user's current directory. The kernel searches in the + directories specified by the PATH variable starting with the leftmost directory first. + Bishop’s environment uses Boost filesystem Library to check the user’s pwd (Present Working Directory) and + execute cd (change Directory) command effectively. Also, the program code makes use of C’s readline library + for text completion functionalities originally provided in the BASH. + +# Command Analysis: + Bishop parses the user input command by performing SLR (1) syntax analysis. The LR (k)-method uses two tables, + which describe the behavior of a push-down automaton, used during the parsing process. These two tables, + called action table and goto table, are sparse tables. Moreover, the data in them are not homogeneous in + structure since both item numbers and right sides of productions are stored. + I referred a paper which proposes: a new parsing table structure, which is dense and homogeneous; a parsing + algorithm; and an algorithm for generation of this table, based on the SLR (1)-method. + +![image](https://user-images.githubusercontent.com/29279664/30530025-b3a76f54-9c60-11e7-8004-eb885140cced.png) + +The proposed parsing table structure has 4 attributes: +- CurrentItem - the LR (0)-Item on the top the of stack +- NextSymbol - the next symbol from the input queue +- Result - an integer whose meaning depends on the value of the attribute Action +- Action - if action is ‘S’, then Result contains the number of a LR(0)-Item; if action is ‘R’, then Result contains the number of a production; if action is ‘A’, then the input queue is recognized as true. + +# Pipelining: + UNIX provides a variety of useful programs for use (grep, ls, echo, to name a few). + Like instructions in C++, these programs tend to be quite effective at doing one specific thing + (Such as grep: searching text, ls: printing directories, and echo: printing text to the console). + However, programmers/OS users would like to accomplish large tasks consisting of many individual + operations. Doing such requires using results from previous steps in order to complete a larger + problem. Bishop supports this through the pipe operation (represented by the character |) just + like the BASH using Unix System calls (dup2(int oldfd, int newfd)). + A pipe in between two commands causes the standard output of one to be redirected into the standard + input of another. An example of this is provided below, using the pipe operation to search for all + processes with the name under root user: + ps auxx | grep “root” +# Process Control: + Bishop uses the conventional methods of process creation and execution in Unix/Linux (fork () and execvp () + system calls) aided by different implementation techniques like C++ STL (Standard Template Library) classes: + std::string and std::vector. +- fork (): +Every other process in the system is brought to life through a call to the fork () system call. A process calling fork is copied by the kernel. At this point, the process that called fork is known as the parent, and the newly created process is known as the child since the parent process caused the child to be created. The newly created process is essentially exactly the same as the parent, even having the exact same variables and open _les. Fork is unique in that it is called once by the parent process and returns twice (to the parent and child separately). +To the parent, fork returns the process id (PID) of the newly created child. To the child, fork returns 0. If the call fails entirely, -1 is returned to the parent, and no child is created. + +For more information about fork, see its manpage by calling man 2 fork. + +- exec (): +UNIX provides a function, known as exec, which allows programmer to change a process's address space in order to run an entirely new program. Running an exec command deletes the existing text, data, and stack segments of the existing process and replaces them with those of a new program. +The system call interface provides a total of six different variations of the exec function which differ only on how they handle input arguments. This means that only one system call (usually execve ()) is actually required to be implemented. The other functions are stubs which perform necessary preparations and then eventually call execve. + +Bishop uses execvp (const char *file, char *const argv[]) system call as it maintains a vector of command input strings. + +# References: +- Linux Lectures by Dr. B Frazer +https://www.youtube.com/watch?v=9seb8hddeK4&index=2&t=7s&list=LLDUlQqsRbRzlDdJlyERjLBw +- Pipes Lectures +https://www.youtube.com/watch?v=uHH7nHkgZ4w +- Machine Problem 4: The UNIX Shell, PDF- Texas A&M University +- Stevens, W. Richard. Advanced Programming in the UNIX Environment +- Linux System Programming: Talking Directly to the Kernel and C Library, Author: Robert Love +- Parsing table structure and algorithm for the LR(k) parsing Method + BY: ILTSCHEV, V[elko] I[vanov] + [New LR Parsing method.pdf](https://github.com/Akashi96/High-Performance-Linked-Lists/files/1309805/New.LR.Parsing.method.pdf) +# Necessary Installations: +- C++ Boost Library: +
sudo apt-get install libboost-all-dev
+- C Readline Library: +
sudo apt-get install libreadline6 libreadline6-dev
+ + + + diff --git a/ShellmakingSteps.pdf b/ShellmakingSteps.pdf new file mode 100644 index 0000000..9f4fc00 Binary files /dev/null and b/ShellmakingSteps.pdf differ diff --git a/all b/all new file mode 100755 index 0000000..e999460 Binary files /dev/null and b/all differ diff --git a/lexical.cpp b/lexical.cpp new file mode 100644 index 0000000..4350c4e --- /dev/null +++ b/lexical.cpp @@ -0,0 +1,199 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // -lboost_system: Write this while compiling, this prevents + // the linker error. +#include + +#include "lexical.h" + +int validateCmd(string cmd) +{ + if (cmd == "cd") + { + return 1; + } + boost::filesystem::path pathToBin("/bin/" + cmd); + boost::filesystem::path pathToBin2("/usr/bin/" + cmd); + boost::filesystem::path pathToBin3("/usr/share/applications" + cmd); + + // cout << "path=" << pathToBin << "\n"; + if (! boost::filesystem::exists(pathToBin) && ! boost::filesystem::exists(pathToBin2) && ! boost::filesystem::exists(pathToBin3)) + throw std::runtime_error(cmd + ": command not found"); + return 0; +} + +int validateDir(string dir) +{ + // replace(dir.begin(), dir.end(), '~', "/home/akash"); //Cannot use std::replace() because, it replaces char with another char + //here we have to replace character with string + boost::filesystem::path path(dir); + // cout << "current path:" << boost::filesystem::current_path() < + // cout << "changed current path:" << boost::filesystem::current_path() <"; + break; + + case '<': return ""; + break; + + case '>': return ""; + break; + + case '&': if (testCmd[1] == ' ') + return ""; + if (testCmd[1] == '&') + return "<&&>"; + else + return ""; + break; + + case '/': return ""; + break; + + case '"': return ""; + break; + + default: return ""; + break; + } +} + +int substrPos(string cmd, int it, int &strStartPos, int &strEndPos) +{ + if (it == -1) // if iterator is at the beginning of the command string + { + strStartPos = -1; + strEndPos = cmd.find(' ', it + 1); + } + else + { + strStartPos = cmd.find(' ', it); + strEndPos = cmd.find(' ', strStartPos + 1); + // cout << "strStartPos:" << strStartPos << endl; + // cout << "strEndPos:" << strEndPos << endl; + if (strStartPos == -1) // If no further space is found, means that we have found all + { // the tokens + return -1; + } + if (strEndPos == -1 && strStartPos != -1) // If space before the token is found, but not the space after + { // the token, this means that we are on the last token. + strEndPos = cmd.length(); // In that case, make the variable- that takes the index value + // space after a token- take the value of length of the string + // i.e. 1 more than the index position of last character. + + if (strEndPos - strStartPos == 1) // if we've reached the end of the string and the last character + return -1; // entered by the user is blank: ' '. + + return 0; + } + } + return 0; +} +int cmdLexicalAnalysis(string cmd, std::vector & tokenStream, std::vector & tokens, int& pipeCount) +{ + int strStartPos, strEndPos, strLength, mark, commandTurn = 1, dirArg = 0;//strStartPos: Stores the index value of space before a token. + // srtEndPos: Stores the index value of space after a token. + string token, subString; + strLength = cmd.length(); + // cout << "Length:" << strLength << endl; + // tokenStream.append(""); + for (int it = -1; it < strLength - 1; it++) + { + // cout << "Iter:" << it << endl; + mark = substrPos(cmd, it, strStartPos, strEndPos); // Find the position of space characters + // before and after a token in the command + // cout << "Mark:" << mark << endl; + if (mark == -1) + break; + it = strStartPos; // Move the iterator to the space preceding the next token + // cout << "Updated Iter:" << it << endl; + subString = cmd.substr(strStartPos + 1, strEndPos - strStartPos - 1); + token = lexems(subString); // Stores the string value of token type + // cout << "token:" << cmd.substr(strStartPos + 1, strEndPos - strStartPos - 1) << endl; + + if (token == "") + { + it = cmd.find('"', it + 2) - 1; + // cout << it; + token = ""; + } + if (dirArg == 1) + { + token = ""; + while (cmd[strEndPos - 1] == '\\') // when the file or directory name consists of spaces. + { + subString.erase(strEndPos - strStartPos - 2); // erase backslash(\) character from path string. + // bool::filesystem::exists() does not require the use of + // backslash(\) to check the existence of file or directory + // it is the feature of the shell. + it ++; // move the iterator by one space to help finding the next two spaces in the string using substrPos() + substrPos(cmd, it, strStartPos, strEndPos); + subString = subString + " " + cmd.substr(strStartPos + 1, strEndPos - strStartPos - 1); + // Take the next substring and add it to the + // previous substring to treat them as + // one space separated directory path. + it = strStartPos; // reset the position of the iterator, so that at the very end when, blackslash(\) + // is not found, we DO NOT repeat the substring already taken in consideration + // and unwillingly add it to the tokenstream as a + } + validateDir(subString); + dirArg = 0; + } + if (commandTurn == 1) + { + token = ""; // This converts the token type of string at the beginning of the + // command or just after the PIPE operator to + commandTurn = 0; // commandTurn helps to ensure whether or not, the next token + // needs to be of type or not. + /* Now, since we know that the token has to be a unix command, we need to validate it. + The subString variable already contains the token value, just send it to validateCmd() to check the + validity of the */ + dirArg = validateCmd(subString); + if (dirArg == 1) + token = ""; + } + if (token == "" || token == "<&&>") + { + commandTurn++; + if (token == "") + pipeCount++; + } + tokenStream.push_back(token); + tokens.push_back(strdup(subString.c_str())); + } + return 0; +} \ No newline at end of file diff --git a/lexical.h b/lexical.h new file mode 100644 index 0000000..1ab9f6f --- /dev/null +++ b/lexical.h @@ -0,0 +1,14 @@ +#ifndef LEXICAL_H_INCLUDED +#define LEXICAL_H_INCLUDED + +#include +using namespace std; + +string lexems(string testCmd); +int cmdLexicalAnalysis(string cmd, std::vector &tokenStream, std::vector &tokens, int& pipeCount); +int cmdSyntaxAnalysis(string cmd); +int substrPos(string cmd, int it, int &strStartPos, int &strEndpos); +int validateDir(string dir); +int validateCmd(string cmd); + +#endif // LEXICAL_H_INCLUDED \ No newline at end of file diff --git a/lexical.h.gch b/lexical.h.gch new file mode 100644 index 0000000..fd6d882 Binary files /dev/null and b/lexical.h.gch differ diff --git a/lexical.o b/lexical.o new file mode 100644 index 0000000..fe76e32 Binary files /dev/null and b/lexical.o differ diff --git a/main.cpp b/main.cpp old mode 100644 new mode 100755 index e69de29..e6351e2 --- a/main.cpp +++ b/main.cpp @@ -0,0 +1,163 @@ +/*input + +*/ +///------------------------------------------------------------------------------------- +/* + AUTHOR: Akash Agarwal + 1405231007-Computer Science Department + IET LUCKNOW + LIFE MOTTO: while(!(suceed=try())) +*/ +///-------------------------------------------------------------------------------------- + +//Predefined #INCLUDES + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//User Defined #INCLUDES + +#include "lexical.h" +// #include "termcolor.hpp" +using namespace std; +/* +ANSI colour codes. +**Note that not all terminals support this; if colour sequences are not supported, garbage will show up.** + +Example: + +cout << "\033[1;31mbold red text\033[0m\n"; +Here, \033 is the ESC character, ASCII 27. It is followed by [, then zero or more numbers separated by ;, +and finally the letter m. The numbers describe the colour and format to switch to from that point onwards. + +The codes for foreground and background colours are: +____________________________________ + + foreground background + +black 30 40 +red 31 41 +green 32 42 +yellow 33 43 +blue 34 44 +magenta 35 45 +cyan 36 46 +white 37 47 +_____________________________________ +Additionally, we can use these: +_________________________________________________________________ + +reset 0 (everything back to normal) +bold/bright 1 (often a brighter shade of the same colour) +underline 4 +inverse 7 (swap foreground and background colours) +bold/bright off 21 +underline off 24 +inverse off 27 +__________________________________________________________________ + +See the table on Wikipedia for other, less widely supported codes. +*/ +#define ANSI_COLOR_RED "\x1B[1;31m" +#define ANSI_COLOR_YELLOW "\x1B[1;33m" +#define ANSI_COLOR_BLUE "\x1B[1;34m" +#define ANSI_COLOR_RESET "\x1B[1;0m" +#define ANSI_COLOR_WHITE "\x1B[37m" +#define ANSI_COLOR_GREEN "\x1B[1;32m" + +int main() +{ + string pathtoHome, username, hostname; + username = string(getenv("USER")); // If compiler is showing error: "basic_string::_M_construct null not valid" + // that means Ubuntu isn't exporting the variable like it should, + // put "export HOSTNAME" in "/etc/bash.bashrc" and the code works. + hostname = string(getenv("HOSTNAME")); // it will export both the username and the hostname. + pathtoHome = "/home/" + username; + chdir (pathtoHome.c_str()); + static char* line_read = (char *) NULL; + while(true) + { + string prompt = "\n" + + + username + // + + // ANSI_COLOR_RED + + + "@" + // + + // ANSI_COLOR_GREEN + + + hostname + // + + // ANSI_COLOR_YELLOW + + + ":" + // + + // ANSI_COLOR_BLUE + + + boost::algorithm::replace_all_copy(boost::filesystem::current_path().string(), pathtoHome, "~") + // + + // ANSI_COLOR_RESET + + + "!>> "; + + + line_read = readline (prompt.c_str()); + + if (! line_read || strcmp(line_read,"exit") == 0) + { + cout << endl; + break; + } + + /* If the line has any text in it, save it on the history. */ + if (line_read && *line_read) + add_history (line_read); + try + { + string cmd(line_read); + cmdSyntaxAnalysis(cmd); + } + catch (const std::exception& e) + { + cout << e.what() << "\n"; + } + } + return 0; +} + + +// /* A static variable for holding the line. */ +// static char *line_read = (char *)NULL; + +// /* Read a string, and return a pointer to it. Returns NULL on EOF. */ +// char * +// rl_gets () +// { +// /* If the buffer has already been allocated, return the memory +// to the free pool. */ +// if (line_read) +// { +// free (line_read); +// line_read = (char *)NULL; +// } + +// /* Get a line from the user. */ +// line_read = readline (""); + +// /* If the line has any text in it, save it on the history. */ +// if (line_read && *line_read) +// add_history (line_read); + +// return (line_read); +// } \ No newline at end of file diff --git a/main.o b/main.o new file mode 100644 index 0000000..bd587e8 Binary files /dev/null and b/main.o differ diff --git a/parser.cpp b/parser.cpp new file mode 100644 index 0000000..a28eed2 --- /dev/null +++ b/parser.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // -lboost_system: Write this while compiling, this prevents + // the linker error. +#include + +#include "lexical.h" +#include "syntax.h" +#include "processctrl.h" + +int cmdSyntaxAnalysis(string cmd) +{ + std::vector tokenStream; + std::vector tokens; + std::unordered_map table; + int flag; + int whichTypeofExecution = 0; + int pipeCount = 0; + + cmdLexicalAnalysis(cmd, tokenStream, tokens, pipeCount); + constructTable(table); + if(! tokenStream.empty()) + tokenStream.push_back("<$>"); + // tokenStream.push_back(""); + // tokenStream.push_back(""); + // tokenStream.push_back(""); + // tokenStream.push_back(""); + // tokenStream.push_back(""); + flag = parser(table, tokenStream); + if (flag == 1) + { + try + { + processctrl(cmd, tokens, pipeCount, whichTypeofExecution); + // processctrl(cmd, tokens); + } + catch(int x) + { + std::cout << "Maybe memory is not available for one more process...:-(" << std::endl; + } + } +return 0; +} diff --git a/parser.o b/parser.o new file mode 100644 index 0000000..ff2fa88 Binary files /dev/null and b/parser.o differ diff --git a/processCtrl.cpp b/processCtrl.cpp new file mode 100644 index 0000000..16cdf15 --- /dev/null +++ b/processCtrl.cpp @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include // -lboost_system: Write this while compiling, this prevents + // the linker error. +#include +#include "lexical.h" +#include "processctrl.h" +using namespace std; +// https://stackoverflow.com/questions/8389033/implementation-of-multiple-pipes-in-c +#define STDIN 0 +#define STDOUT 1 + +pid_t callFork() +{ + pid_t pid = fork(); + if (pid < 0) + { + perror("fork failed"); + throw -1; + } +return pid; +} +int executeCommand(char **const ptrToArrayofStrings, int& flag) +{ + if (flag != 1) + { + pid_t pid = callFork(); + if (pid == 0) // child + { + // cout << ptrToArrayofStrings[0] << ptrToArrayofStrings[1] << ptrToArrayofStrings[2] << ptrToArrayofStrings[3]; + execvp(ptrToArrayofStrings[0], ptrToArrayofStrings); + exit(0); + } + } + else + { + execvp(ptrToArrayofStrings[0], ptrToArrayofStrings); + exit(0); + } + // Parent + //wait for child + wait(NULL); +return 0; +} +int piping(vector& commandList4Pipe) +{ + int vectorSize = commandList4Pipe.size(); + // // cout << "vectorSize" << vectorSize; + int pipeFDSize = (vectorSize - 1) * 2; + int pipes[pipeFDSize]; + pid_t forkArr[vectorSize]; + for (int i = 0; i < pipeFDSize; i += 2) + pipe(pipes + i); + for (int i = 0; i < vectorSize; i++) + { + if ((forkArr[i] = fork()) < 0) + { + perror("Pipes Fork error"); + throw -1; + } + if (forkArr[i] == 0) + { // child + + if (i != 0) /* child gets input from the previous command, if it's not the first command */ + if(dup2(pipes[((i - 1) * 2)], STDIN) < 0) + cout << "1st dup error"; + + if (i != vectorSize -1) /* child outputs to next command, if it's not the last command */ + if(dup2(pipes[(i * 2) + 1], STDOUT) < 0) + cout << "2nd dup error"; + + for (int i = 0; i < pipeFDSize; i++) + close(pipes[i]); + std::vector tokenstream; + std::vector tokens; + int pipeCount2, flag = 1; + string cmd = commandList4Pipe.at(i); + // cout << cmd << endl; + cmdLexicalAnalysis(cmd, tokenstream, tokens, pipeCount2); + // for(int i=0;i& tokens, int& pipeCount, int& flag) +{ + int pos = 0; + char *ptrToCharPtr[1024]; + int index = 0; + // cout << pipeCount; + if (pipeCount) + { // cout << "in here"; + vector pipeVector; + while ((pos = cmd.find("|", index)) != -1) + { + string subString = cmd.substr(index, pos - index - 1); + // cout << subString <<"hello"; + pipeVector.push_back(subString); + index = pos + 2; + } + if (pos == -1) + pos = cmd.length(); + // cout << cmd.substr(index, pos - index) << "hello"; + pipeVector.push_back(cmd.substr(index, pos - index)); + // for (int i = 0; i < pipeVector.size(); i++) + // cout << pipeVector[i]; + piping(pipeVector); + } + else + { + int vectorSize = tokens.size(); + int count = 0; + for (int i = 0; i < vectorSize; i++) + { + if (strcmp(tokens[i],"&&") == 0) // || (flag = strcmp(tokens[i],"|")) == 0 + { + // cout << "In && or |\n"; + ptrToCharPtr[count++] = NULL; + // if (flag == 0) + // { + // commandList4Pipe.push_back(ptrToCharPtr); + // cout << **ptrToCharPtr; + // } + + executeCommand(ptrToCharPtr, flag); + count = -1; + } + ptrToCharPtr[count++] = tokens[i]; + } + ptrToCharPtr[count++] = NULL; + executeCommand(ptrToCharPtr, flag); + } +return 0; +} \ No newline at end of file diff --git a/processCtrl.o b/processCtrl.o new file mode 100644 index 0000000..ed12b9d Binary files /dev/null and b/processCtrl.o differ diff --git a/processctrl.h b/processctrl.h new file mode 100644 index 0000000..3772608 --- /dev/null +++ b/processctrl.h @@ -0,0 +1,20 @@ +#ifndef PROCESSCTRL_H_INCLUDED +#define PROCESSCTRL_H_INCLUDED + +#include + +pid_t callFork(); +int executeCommand(char **const ptrToArrayofStrings); +int pipeFunction(char **const ptrToArrayofStrings, int pipes[], int pipeCount, int& keepsTrackofPIPEpos); +int processctrl(std::string cmd, std::vector & tokens, int& pipeCount, int& flag); + +#endif // PROCESSCTRL_H_INCLUDED + +// #ifndef PROCESSCTRL_H_INCLUDED +// #define PROCESSCTRL_H_INCLUDED + +// #include + +// int processctrl(std::string cmd, std::vector & tokens); + +// #endif // PROCESSCTRL_H_INCLUDED \ No newline at end of file diff --git a/syntax b/syntax new file mode 100644 index 0000000..7af4f0c Binary files /dev/null and b/syntax differ diff --git a/syntax.h b/syntax.h new file mode 100644 index 0000000..3c818e0 --- /dev/null +++ b/syntax.h @@ -0,0 +1,94 @@ +#ifndef SYNTAX_H_INCLUDED +#define SYNTAX_H_INCLUDED + +#include +#include +#include +#include +// #include + +typedef std::pair key; +typedef std::pair value; + +// typedef std::unordered_map table; + +// For detailed knowledge on the unordered_maps, go to the following link: +// http://marknelson.us/2011/09/03/hash-functions-for-c-unordered-containers/ + + +/* +->We’re used to seeing unordered_map declared with two template parameters: for ex- unordered_map Name; +But a look at the help page shows that IT ACTUALLY TAKES FIVE – the last three usually accept DEFAULT values: +________________________________________________________________ + +template, + class Pred = std::equal_to, + class Alloc = std::allocator > > + class unordered_map; +________________________________________________________________ + +The THIRD PARAMETER to the definition is a function object type that is used by the class to convert a key into a hash code. +By default it is set to std::hash. +Internally the unordered_map class calls operator() on an object of that type in order to get a hash code for a given key. + +Although unordered maps have a pre-defined hash function for the fundamental data types (int, char, std::string, etc.), but +An unordered_map doesn’t know how to create a hash for the given key type of std::pair, because it's a user-defined data type. + +As a general rule of thumb, if I have two hashes for independent variables, and I combine them using XOR, I can expect that the resulting hash +is probably just as good as the input hashes. Therefore, i can define the hash function as follows: + +size_t name_hash( const Name & name ) +{ + return hash()(name.first) ^ hash()(name.second); +} + +->Also,The standard defines a constructor that takes an initial number of buckets and a hashing object as inputs. +So the first step is to modify the declaration code to look like this: + +________________________________________________ + +unordered_map ids(100, name_hash ); +________________________________________________ + +But, The default implementation of unordered_map expects to be using a function object of type std::hash to calculate hashes, +and that is not what we are passing into the constructor. Thus, we'll have to convert our defined hash function to std::hash +Methods: +1) By using the: decltype keyword + unordered_map ids(100, name_hash ); +2) Specializing std::hash + use the name hash for your object, and define it as a specialization of that template class, + and finally, we have to hoist the whole thing into the std namespace. + Something like the one used in this file. +*/ +namespace std +{ + template<> + class hash + { + public: + size_t operator()(const key &key) const + { //^^^^^^ this const shows that the function doesnot change + // the state of the object. i.e. the method does not change any member variables. + + return hash()(key.first) ^ hash()(key.second); + } + }; +}; +// class MyException:public exception +// { +// public: +// MyException(const std::string m = "Syntax Error"):msg(m){} +// ~MyException(void); +// const char* what() +// { +// return msg.c_str(); +// } +// private: +// std::string msg; +// }; +void constructTable(std::unordered_map & table); +int parser(std::unordered_map & table, std::vector & tokenStream); +int reduceMove(int productionNo, std::unordered_map & table); +#endif // SYNTAX_H_INCLUDED \ No newline at end of file diff --git a/syntax_analyser.cpp b/syntax_analyser.cpp new file mode 100644 index 0000000..c663a69 --- /dev/null +++ b/syntax_analyser.cpp @@ -0,0 +1,246 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // -lboost_system: Write this while compiling, this prevents + // the linker error. +#include + +#include "syntax.h" + +typedef std::pair stackElement; +std::stack stack; + +void constructTable(std::unordered_map & table) +{ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~SLR(1) TABLE CONSTRUCTION~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + /* The table consists of 4 columns: + + 1) Current Item: Indicates value at the top of the stack + 2) Next Symbol: Indicates the next token in the tokenStream + 3) Result: Indicates the state transition number for the canonical collection of LR(0) items represented by the DFA + 4) Action: Indicates the action to be performed- Shift, Reduce or Goto. +The above described table is an optimised version of the standard SLR(1) table and provides: +1) fast access of data items + ( With std::unordered_map, we can access data in O(1) time in the best case and O(n) in worst case ) +2) less memory consumption + (The standard table is a sparse table, but this table doesnot contain spaces, hence low memory utilization ) + +The following table is constructed making the canonical collection of LR(0) items and using the SLR(1) algorithm +to traverse the Deterministic Finite Automaton. + +The Shell Grammar used in this process is shown below: +___^^^^^^^^^^^^^^^____________________________________________________________________ + + valid_string = unix_command PIPE unix_command || unix_command REDIRECTION + filename || unix_command AMP || unix_command || special_command + unix_command = command_name ARGS + special_command = cd DIRECTORY || exit + command_name = any valid executable/interpreted file name + AMP = & + ARG = string + ARGS = ARG ARGS || ARG + DIRECTORY = absolute_path || relative_path + PIPE = | + REDIRECTION = < || > +_______________________________________________________________________________________ +*/ + + table[key(0,"")] = value(8,'S'); + table[key(0,"")] = value(1,'G'); + table[key(0,"")] = value(2,'G'); + + table[key(1,"<$>")] = value(1,'A'); + + table[key(2,"")] = value(3,'S'); + table[key(2,"")] = value(5,'S'); + table[key(2,"")] = value(7,'S'); + table[key(2,"<$>")] = value(4,'R'); + + table[key(3,"")] = value(8,'S'); + table[key(3,"")] = value(4,'G'); + + table[key(4,"<$>")] = value(1,'R'); + + table[key(5,"")] = value(6,'S'); + + table[key(6,"<$>")] = value(2,'R'); + + table[key(7,"<$>")] = value(3,'R'); + + table[key(8,"")] = value(11,'S'); + table[key(8,"")] = value(10,'G'); + table[key(8,"")] = value(9,'G'); + + table[key(9,"")] = value(5,'R'); + table[key(9,"")] = value(5,'R'); + table[key(9,"")] = value(5,'R'); + table[key(9,"<$>")] = value(5,'R'); + + table[key(10,"")] = value(11,'S'); + table[key(10,"")] = value(7,'R'); + table[key(10,"")] = value(7,'R'); + table[key(10,"")] = value(7,'R'); + table[key(10,"<$>")] = value(7,'R'); + table[key(10,"")] = value(10,'G'); + table[key(10,"")] = value(12,'G'); + + table[key(11,"")] = value(8,'R'); + table[key(11,"")] = value(8,'R'); + table[key(11,"")] = value(8,'R'); + table[key(11,"")] = value(8,'R'); + table[key(11,"<$>")] = value(8,'R'); + + + table[key(12,"")] = value(6,'R'); + table[key(12,"")] = value(6,'R'); + table[key(12,"")] = value(6,'R'); + table[key(12,"<$>")] = value(6,'R'); + + table[key(0,"")] = value(13,'G'); + table[key(0,"")] = value(14,'S'); + table[key(13,"<$>")] = value(9,'R'); + table[key(14,"")] = value(15,'S'); + table[key(15,"<$>")] = value(10,'R'); + table[key(8,"<$>")] = value(11,'R'); + + table[key(2,"<&&>")] = value(16,'S'); + table[key(16,"")] = value(8,'S'); + table[key(16,"")] = value(17,'G'); + table[key(17,"<$>")] = value(1,'R'); + table[key(11,"<&&>")] = value(8,'R'); + table[key(10,"<&&>")] = value(7,'R'); + table[key(9,"<&&>")] = value(5,'R'); + table[key(12,"<&&>")] = value(6,'R'); + + // auto ii = table.find(key(12,"")); + // std::cout << ii->first.first + // << std::endl + // << ii->first.second + // << std::endl + // << ii->second.first + // << std::endl + // << ii->second.second + // << std::endl; +} + +int reduceMove(int productionNo, std::unordered_map & table) +{ + int gotoState; + switch(productionNo) + { + case 1: for (int i = 0;i < 3; i++) + stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 2: for (int i = 0;i < 3; i++) + stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 3: for (int i = 0;i < 2; i++) + stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 4: stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 5: for (int i = 0; i < 2; i++) + stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 6: for (int i = 0; i < 2; i++) + stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 7: stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 8: stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 9: stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 10: for (int i = 0; i < 2; i++) + stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + case 11: stack.pop(); + gotoState = stack.top().first; + stack.push(stackElement(-1,"")); + break; + default: throw productionNo; + } + auto ii = table.find(key(gotoState, stack.top().second)); + if (ii != table.end()) + { + if (ii->second.second == 'G') + stack.top().first = ii->second.first; + } +return 0; +} + +int parser(std::unordered_map & table, std::vector & tokenStream) +{ + + std::string nextSym; + stack.push(stackElement(0,"<$>")); + // std::cout << stack.top().first + // << stack.top().second; + int tokenStreamLength = tokenStream.size(); + // std::cout << tokenStreamLength; + try + { + for (int i = 0; i < tokenStreamLength; i++) + { + nextSym = tokenStream[i]; + // std::cout << "nextSym=" << nextSym <second.second == 'S') + stack.push(stackElement(ii->second.first, nextSym));; + if (ii->second.second == 'A') + return 1; + if (ii->second.second == 'R') + { + reduceMove(ii->second.first, table); + i--; + } + // std::cout << "stack.top().first" << stack.top().first << std::endl; + // std::cout << "stack.top().second"<< stack.top().second << std::endl; + } + else + { + throw nextSym; + } + } + } + catch (std::string e) + { + std::cout << "Syntax error: " << e << " unexpected" << std::endl; + } + catch (int e) + { + std::cout << "Syntax error: " << e << " the syntax used is not in accordance with the shell grammar." << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/syntax_analyser.o b/syntax_analyser.o new file mode 100644 index 0000000..8236cb2 Binary files /dev/null and b/syntax_analyser.o differ