diff --git a/headers/Lexer.h b/headers/Lexer.h index d9edbce..770baa4 100644 --- a/headers/Lexer.h +++ b/headers/Lexer.h @@ -2,30 +2,45 @@ #include #include +#include enum TokenType{ - Identifier, - Number, - Equals, - OpenParenthesis, - CloseParenthesis, - BinaryOperator, - Test + OPEN_PAREN, CLOSE_PAREN, OPEN_BRACE, CLOSE_BRACE, + COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR, + + BINARY_OP, + + BANG, BANG_EQUAL, + EQUAL, DOUBLE_EQUAL, + GREATER, GREATER_EQUAL, + LESS, LESS_EQUAL, + + IDENTIFIER, STRING, NUMBER, + + AND, OR, TRUE, FALSE, IF, ELSE, FUNCTION, FOR, + WHILE, VAR, CLASS, SUPER, THIS, NONE, RETURN, + + END_OF_FILE }; const std::map KEYWORDS { - {"test", Test} + }; struct Token { TokenType type; - std::string value; + std::string lexeme; + //TODO Object literal; + int line; }; class Lexer{ public: std::vector Tokenize(std::string source); - +private: + int line; +private: + bool matchOn(char expected, std::vector& src); }; diff --git a/source.bob b/source.bob index 61c423f..76a517e 100644 --- a/source.bob +++ b/source.bob @@ -1,9 +1,8 @@ -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) -x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) \ No newline at end of file +bob.test +10 +11.1 + +test = (11 + 2 "xs") + +//" +//11. \ No newline at end of file diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 15a93c0..87301e6 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -2,6 +2,9 @@ #include #include + +using namespace std; + std::vector Lexer::Tokenize(std::string source){ std::vector tokens; std::vector src{source.begin(), source.end()}; @@ -11,22 +14,129 @@ std::vector Lexer::Tokenize(std::string source){ char t = src[0]; if(t == '(') { - tokens.push_back(Token{OpenParenthesis, std::string(1, t)}); //brace initialization in case you forget + tokens.push_back(Token{OPEN_PAREN, std::string(1, t)}); //brace initialization in case you forget src.erase(src.begin()); } else if(t == ')') { - tokens.push_back(Token{CloseParenthesis, std::string(1, t)}); + tokens.push_back(Token{CLOSE_PAREN, std::string(1, t)}); src.erase(src.begin()); } - else if(t == '+' || t == '-' || t == '*' || t == '/') + else if(t == '{') { - tokens.push_back(Token{BinaryOperator, std::string(1, t)}); + tokens.push_back(Token{OPEN_BRACE, std::string(1, t)}); + src.erase(src.begin()); + } + else if(t == '}') + { + tokens.push_back(Token{CLOSE_BRACE, std::string(1, t)}); + src.erase(src.begin()); + } + else if(t == ',') + { + tokens.push_back(Token{COMMA, std::string(1, t)}); + src.erase(src.begin()); + } + else if(t == '.') + { + tokens.push_back(Token{DOT, std::string(1, t)}); + src.erase(src.begin()); + } + else if(t == ';') + { + tokens.push_back(Token{SEMICOLON, std::string(1, t)}); + src.erase(src.begin()); + } + else if(t == '+') + { + tokens.push_back(Token{PLUS, std::string(1, t)}); + src.erase(src.begin()); + } + else if(t == '-') + { + tokens.push_back(Token{MINUS, std::string(1, t)}); + src.erase(src.begin()); + } + else if(t == '*') + { + tokens.push_back(Token{STAR, std::string(1, t)}); src.erase(src.begin()); } else if(t == '=') { - tokens.push_back(Token{Equals, std::string(1, t)}); + std::string token = std::string(1, t); + src.erase(src.begin()); + bool match = matchOn('=', src); + token += match ? "=" : ""; + tokens.push_back(Token{match ? DOUBLE_EQUAL : EQUAL, token}); + } + else if(t == '!') + { + std::string token = std::string(1, t); + src.erase(src.begin()); + bool match = matchOn('=', src); + token += match ? "=" : ""; + tokens.push_back(Token{match ? BANG_EQUAL : BANG, token}); + } + else if(t == '<') + { + std::string token = std::string(1, t); + src.erase(src.begin()); + bool match = matchOn('=', src); + token += match ? "=" : ""; + tokens.push_back(Token{match ? LESS_EQUAL : LESS, token}); + } + else if(t == '>') + { + std::string token = std::string(1, t); + src.erase(src.begin()); + bool match = matchOn('=', src); + token += match ? "=" : ""; + tokens.push_back(Token{match ? GREATER_EQUAL : GREATER, token}); + } + else if(t == '/') + { + std::string token = std::string(1, t); + src.erase(src.begin()); + bool match = matchOn('/', src); + if(match) + { + while(src.size() > 0 && src[0] != '\n') + { + src.erase(src.begin()); + } + } + else + { + tokens.push_back(Token{SLASH, std::string(1, t)}); + } + } + else if(t == '"') + { + std::string str = std::string(1, src[0]); + src.erase(src.begin()); + while(src.size() > 0 && src[0] != '"') + { + if(src[0] == '\n') line++; + str += src[0]; + src.erase(src.begin()); + } + if(src.size() == 0) + { + throw std::runtime_error("Unterminated string at line: " + std::to_string(this->line)); + } + else if(src[0] == '"') + { + str += '"'; + src.erase(src.begin()); + tokens.push_back(Token{STRING, str}); + } + + + } + else if(t == '\n') + { + line++; src.erase(src.begin()); } else @@ -41,7 +151,26 @@ std::vector Lexer::Tokenize(std::string source){ src.erase(src.begin()); } - tokens.push_back(Token{Number, num}); + if(src.size() > 0 && src[0] == '.') + { + src.erase(src.begin()); + if(src.size() > 0 && std::isdigit(src[0])) + { + num += '.'; + while(src.size() > 0 && std::isdigit(src[0])) + { + num += src[0]; + src.erase(src.begin()); + } + } + else + { + throw std::runtime_error("malformed number at: " + std::to_string(this->line)); + } + + } + + tokens.push_back(Token{NUMBER, num}); } else if(std::isalpha(t)) { @@ -58,7 +187,7 @@ std::vector Lexer::Tokenize(std::string source){ } else { - tokens.push_back(Token{Identifier, ident}); + tokens.push_back(Token{IDENTIFIER, ident}); } } @@ -68,6 +197,7 @@ std::vector Lexer::Tokenize(std::string source){ } else { + throw std::runtime_error("Unknown Token: '" + std::string(1, t) + "'"); } @@ -78,3 +208,11 @@ std::vector Lexer::Tokenize(std::string source){ return tokens; } + +bool Lexer::matchOn(char expected, std::vector &src) +{ + if(src.size() == 0) return false; + if(src[0] != expected) return false; + src.erase(src.begin()); + return true; +} diff --git a/src/bob.cpp b/src/bob.cpp index 9bfb3c2..5d9dbeb 100644 --- a/src/bob.cpp +++ b/src/bob.cpp @@ -46,8 +46,14 @@ public: } this->run(line); + hadError = false; } } + + void error(int line, string message) + { + + } private: @@ -59,9 +65,14 @@ private: vector tokens = lexer.Tokenize(source); for(Token t : tokens){ - cout << "{type: " << t.type << ", value: " << t.value << "}" << endl; + cout << "{type: " << t.type << ", value: " << t.lexeme << "}" << endl; } } + + void report(int line, string where, string message) + { + hadError = true; + } }; int main(){ @@ -77,8 +88,8 @@ int main(){ Bob bobLang; - //bobLang.runFile("source.bob"); - bobLang.runPrompt(); + bobLang.runFile("source.bob"); + //bobLang.runPrompt(); return 0;