Added more lexer tokenization cases, string literal and number tokenization

This commit is contained in:
Bobby Lucero 2023-05-21 00:52:05 -04:00
parent 23799125a7
commit dfc518c582
4 changed files with 192 additions and 29 deletions

View File

@ -2,30 +2,45 @@
#include <string>
#include <map>
#include <vector>
enum TokenType{
Identifier,
Number,
Equals,
OpenParenthesis,
CloseParenthesis,
BinaryOperator,
Test
OPEN_PAREN, CLOSE_PAREN, OPEN_BRACE, CLOSE_BRACE,
COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,
BINARY_OP,
BANG, BANG_EQUAL,
EQUAL, DOUBLE_EQUAL,
GREATER, GREATER_EQUAL,
LESS, LESS_EQUAL,
IDENTIFIER, STRING, NUMBER,
AND, OR, TRUE, FALSE, IF, ELSE, FUNCTION, FOR,
WHILE, VAR, CLASS, SUPER, THIS, NONE, RETURN,
END_OF_FILE
};
const std::map<std::string, TokenType> KEYWORDS {
{"test", Test}
};
struct Token
{
TokenType type;
std::string value;
std::string lexeme;
//TODO Object literal;
int line;
};
class Lexer{
public:
std::vector<Token> Tokenize(std::string source);
private:
int line;
private:
bool matchOn(char expected, std::vector<char>& src);
};

View File

@ -1,9 +1,8 @@
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
x = 45 * ( 4 / 3 ) * (45 / (20 * 3))
bob.test
10
11.1
test = (11 + 2 "xs")
//"
//11.

View File

@ -2,6 +2,9 @@
#include <iostream>
#include <cctype>
using namespace std;
std::vector<Token> Lexer::Tokenize(std::string source){
std::vector<Token> tokens;
std::vector<char> src{source.begin(), source.end()};
@ -11,22 +14,129 @@ std::vector<Token> Lexer::Tokenize(std::string source){
char t = src[0];
if(t == '(')
{
tokens.push_back(Token{OpenParenthesis, std::string(1, t)}); //brace initialization in case you forget
tokens.push_back(Token{OPEN_PAREN, std::string(1, t)}); //brace initialization in case you forget
src.erase(src.begin());
}
else if(t == ')')
{
tokens.push_back(Token{CloseParenthesis, std::string(1, t)});
tokens.push_back(Token{CLOSE_PAREN, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '+' || t == '-' || t == '*' || t == '/')
else if(t == '{')
{
tokens.push_back(Token{BinaryOperator, std::string(1, t)});
tokens.push_back(Token{OPEN_BRACE, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '}')
{
tokens.push_back(Token{CLOSE_BRACE, std::string(1, t)});
src.erase(src.begin());
}
else if(t == ',')
{
tokens.push_back(Token{COMMA, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '.')
{
tokens.push_back(Token{DOT, std::string(1, t)});
src.erase(src.begin());
}
else if(t == ';')
{
tokens.push_back(Token{SEMICOLON, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '+')
{
tokens.push_back(Token{PLUS, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '-')
{
tokens.push_back(Token{MINUS, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '*')
{
tokens.push_back(Token{STAR, std::string(1, t)});
src.erase(src.begin());
}
else if(t == '=')
{
tokens.push_back(Token{Equals, std::string(1, t)});
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('=', src);
token += match ? "=" : "";
tokens.push_back(Token{match ? DOUBLE_EQUAL : EQUAL, token});
}
else if(t == '!')
{
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('=', src);
token += match ? "=" : "";
tokens.push_back(Token{match ? BANG_EQUAL : BANG, token});
}
else if(t == '<')
{
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('=', src);
token += match ? "=" : "";
tokens.push_back(Token{match ? LESS_EQUAL : LESS, token});
}
else if(t == '>')
{
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('=', src);
token += match ? "=" : "";
tokens.push_back(Token{match ? GREATER_EQUAL : GREATER, token});
}
else if(t == '/')
{
std::string token = std::string(1, t);
src.erase(src.begin());
bool match = matchOn('/', src);
if(match)
{
while(src.size() > 0 && src[0] != '\n')
{
src.erase(src.begin());
}
}
else
{
tokens.push_back(Token{SLASH, std::string(1, t)});
}
}
else if(t == '"')
{
std::string str = std::string(1, src[0]);
src.erase(src.begin());
while(src.size() > 0 && src[0] != '"')
{
if(src[0] == '\n') line++;
str += src[0];
src.erase(src.begin());
}
if(src.size() == 0)
{
throw std::runtime_error("Unterminated string at line: " + std::to_string(this->line));
}
else if(src[0] == '"')
{
str += '"';
src.erase(src.begin());
tokens.push_back(Token{STRING, str});
}
}
else if(t == '\n')
{
line++;
src.erase(src.begin());
}
else
@ -41,7 +151,26 @@ std::vector<Token> Lexer::Tokenize(std::string source){
src.erase(src.begin());
}
tokens.push_back(Token{Number, num});
if(src.size() > 0 && src[0] == '.')
{
src.erase(src.begin());
if(src.size() > 0 && std::isdigit(src[0]))
{
num += '.';
while(src.size() > 0 && std::isdigit(src[0]))
{
num += src[0];
src.erase(src.begin());
}
}
else
{
throw std::runtime_error("malformed number at: " + std::to_string(this->line));
}
}
tokens.push_back(Token{NUMBER, num});
}
else if(std::isalpha(t))
{
@ -58,7 +187,7 @@ std::vector<Token> Lexer::Tokenize(std::string source){
}
else
{
tokens.push_back(Token{Identifier, ident});
tokens.push_back(Token{IDENTIFIER, ident});
}
}
@ -68,6 +197,7 @@ std::vector<Token> Lexer::Tokenize(std::string source){
}
else
{
throw std::runtime_error("Unknown Token: '" + std::string(1, t) + "'");
}
@ -78,3 +208,11 @@ std::vector<Token> Lexer::Tokenize(std::string source){
return tokens;
}
bool Lexer::matchOn(char expected, std::vector<char> &src)
{
if(src.size() == 0) return false;
if(src[0] != expected) return false;
src.erase(src.begin());
return true;
}

View File

@ -46,9 +46,15 @@ public:
}
this->run(line);
hadError = false;
}
}
void error(int line, string message)
{
}
private:
bool hadError = false;
@ -59,9 +65,14 @@ private:
vector<Token> tokens = lexer.Tokenize(source);
for(Token t : tokens){
cout << "{type: " << t.type << ", value: " << t.value << "}" << endl;
cout << "{type: " << t.type << ", value: " << t.lexeme << "}" << endl;
}
}
void report(int line, string where, string message)
{
hadError = true;
}
};
int main(){
@ -77,8 +88,8 @@ int main(){
Bob bobLang;
//bobLang.runFile("source.bob");
bobLang.runPrompt();
bobLang.runFile("source.bob");
//bobLang.runPrompt();
return 0;