Added more lexer tokenization cases, string literal and number tokenization

This commit is contained in:
Bobby Lucero 2023-05-21 00:52:05 -04:00
parent 23799125a7
commit dfc518c582
4 changed files with 192 additions and 29 deletions

View File

@ -2,30 +2,45 @@
#include <string> #include <string>
#include <map> #include <map>
#include <vector>
enum TokenType{ enum TokenType{
Identifier, OPEN_PAREN, CLOSE_PAREN, OPEN_BRACE, CLOSE_BRACE,
Number, COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,
Equals,
OpenParenthesis, BINARY_OP,
CloseParenthesis,
BinaryOperator, BANG, BANG_EQUAL,
Test EQUAL, DOUBLE_EQUAL,
GREATER, GREATER_EQUAL,
LESS, LESS_EQUAL,
IDENTIFIER, STRING, NUMBER,
AND, OR, TRUE, FALSE, IF, ELSE, FUNCTION, FOR,
WHILE, VAR, CLASS, SUPER, THIS, NONE, RETURN,
END_OF_FILE
}; };
const std::map<std::string, TokenType> KEYWORDS { const std::map<std::string, TokenType> KEYWORDS {
{"test", Test}
}; };
struct Token struct Token
{ {
TokenType type; TokenType type;
std::string value; std::string lexeme;
//TODO Object literal;
int line;
}; };
class Lexer{ class Lexer{
public: public:
std::vector<Token> Tokenize(std::string source); std::vector<Token> Tokenize(std::string source);
private:
int line;
private:
bool matchOn(char expected, std::vector<char>& src);
}; };

View File

@ -1,9 +1,8 @@
x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) bob.test
x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) 10
x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) 11.1
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) test = (11 + 2 "xs")
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) //"
x = 45 * ( 4 / 3 ) * (45 / (20 * 3)) //11.
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))

View File

@ -2,6 +2,9 @@
#include <iostream> #include <iostream>
#include <cctype> #include <cctype>
using namespace std;
std::vector<Token> Lexer::Tokenize(std::string source){ std::vector<Token> Lexer::Tokenize(std::string source){
std::vector<Token> tokens; std::vector<Token> tokens;
std::vector<char> src{source.begin(), source.end()}; std::vector<char> src{source.begin(), source.end()};
@ -11,22 +14,129 @@ std::vector<Token> Lexer::Tokenize(std::string source){
char t = src[0]; char t = src[0];
if(t == '(') if(t == '(')
{ {
tokens.push_back(Token{OpenParenthesis, std::string(1, t)}); //brace initialization in case you forget tokens.push_back(Token{OPEN_PAREN, std::string(1, t)}); //brace initialization in case you forget
src.erase(src.begin()); src.erase(src.begin());
} }
else if(t == ')') else if(t == ')')
{ {
tokens.push_back(Token{CloseParenthesis, std::string(1, t)}); tokens.push_back(Token{CLOSE_PAREN, std::string(1, t)});
src.erase(src.begin()); src.erase(src.begin());
} }
else if(t == '+' || t == '-' || t == '*' || t == '/') else if(t == '{')
{ {
tokens.push_back(Token{BinaryOperator, std::string(1, t)}); tokens.push_back(Token{OPEN_BRACE, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '}')
{
tokens.push_back(Token{CLOSE_BRACE, std::string(1, t)});
src.erase(src.begin());
}
else if(t == ',')
{
tokens.push_back(Token{COMMA, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '.')
{
tokens.push_back(Token{DOT, std::string(1, t)});
src.erase(src.begin());
}
else if(t == ';')
{
tokens.push_back(Token{SEMICOLON, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '+')
{
tokens.push_back(Token{PLUS, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '-')
{
tokens.push_back(Token{MINUS, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '*')
{
tokens.push_back(Token{STAR, std::string(1, t)});
src.erase(src.begin()); src.erase(src.begin());
} }
else if(t == '=') else if(t == '=')
{ {
tokens.push_back(Token{Equals, std::string(1, t)}); std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('=', src);
token += match ? "=" : "";
tokens.push_back(Token{match ? DOUBLE_EQUAL : EQUAL, token});
}
else if(t == '!')
{
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('=', src);
token += match ? "=" : "";
tokens.push_back(Token{match ? BANG_EQUAL : BANG, token});
}
else if(t == '<')
{
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('=', src);
token += match ? "=" : "";
tokens.push_back(Token{match ? LESS_EQUAL : LESS, token});
}
else if(t == '>')
{
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('=', src);
token += match ? "=" : "";
tokens.push_back(Token{match ? GREATER_EQUAL : GREATER, token});
}
else if(t == '/')
{
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('/', src);
if(match)
{
while(src.size() > 0 && src[0] != '\n')
{
src.erase(src.begin());
}
}
else
{
tokens.push_back(Token{SLASH, std::string(1, t)});
}
}
else if(t == '"')
{
std::string str = std::string(1, src[0]);
src.erase(src.begin());
while(src.size() > 0 && src[0] != '"')
{
if(src[0] == '\n') line++;
str += src[0];
src.erase(src.begin());
}
if(src.size() == 0)
{
throw std::runtime_error("Unterminated string at line: " + std::to_string(this->line));
}
else if(src[0] == '"')
{
str += '"';
src.erase(src.begin());
tokens.push_back(Token{STRING, str});
}
}
else if(t == '\n')
{
line++;
src.erase(src.begin()); src.erase(src.begin());
} }
else else
@ -41,7 +151,26 @@ std::vector<Token> Lexer::Tokenize(std::string source){
src.erase(src.begin()); src.erase(src.begin());
} }
tokens.push_back(Token{Number, num}); if(src.size() > 0 && src[0] == '.')
{
src.erase(src.begin());
if(src.size() > 0 && std::isdigit(src[0]))
{
num += '.';
while(src.size() > 0 && std::isdigit(src[0]))
{
num += src[0];
src.erase(src.begin());
}
}
else
{
throw std::runtime_error("malformed number at: " + std::to_string(this->line));
}
}
tokens.push_back(Token{NUMBER, num});
} }
else if(std::isalpha(t)) else if(std::isalpha(t))
{ {
@ -58,7 +187,7 @@ std::vector<Token> Lexer::Tokenize(std::string source){
} }
else else
{ {
tokens.push_back(Token{Identifier, ident}); tokens.push_back(Token{IDENTIFIER, ident});
} }
} }
@ -68,6 +197,7 @@ std::vector<Token> Lexer::Tokenize(std::string source){
} }
else else
{ {
throw std::runtime_error("Unknown Token: '" + std::string(1, t) + "'"); throw std::runtime_error("Unknown Token: '" + std::string(1, t) + "'");
} }
@ -78,3 +208,11 @@ std::vector<Token> Lexer::Tokenize(std::string source){
return tokens; return tokens;
} }
bool Lexer::matchOn(char expected, std::vector<char> &src)
{
if(src.size() == 0) return false;
if(src[0] != expected) return false;
src.erase(src.begin());
return true;
}

View File

@ -46,9 +46,15 @@ public:
} }
this->run(line); this->run(line);
hadError = false;
} }
} }
void error(int line, string message)
{
}
private: private:
bool hadError = false; bool hadError = false;
@ -59,9 +65,14 @@ private:
vector<Token> tokens = lexer.Tokenize(source); vector<Token> tokens = lexer.Tokenize(source);
for(Token t : tokens){ for(Token t : tokens){
cout << "{type: " << t.type << ", value: " << t.value << "}" << endl; cout << "{type: " << t.type << ", value: " << t.lexeme << "}" << endl;
} }
} }
void report(int line, string where, string message)
{
hadError = true;
}
}; };
int main(){ int main(){
@ -77,8 +88,8 @@ int main(){
Bob bobLang; Bob bobLang;
//bobLang.runFile("source.bob"); bobLang.runFile("source.bob");
bobLang.runPrompt(); //bobLang.runPrompt();
return 0; return 0;