AST Struct Generator

This commit is contained in:
Bobby Lucero 2023-05-21 20:40:43 -04:00
parent dfc518c582
commit c5f981321c
18 changed files with 448 additions and 174 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
.vscode/ .vscode/
build/ build/
.idea

View File

@ -7,22 +7,26 @@ CC = g++
CFLAGS = -Wall -Wextra -std=c++11 CFLAGS = -Wall -Wextra -std=c++11
# Source directory # Source directory
SRC_DIR = ./src SRC_DIR = ./source
# Output directory # Output directory
BUILD_DIR = ./build BUILD_DIR = ./build
# Get all CPP files in the source directory # Find all C++ files recursively in the source directory
CPP_FILES := $(wildcard $(SRC_DIR)/*.cpp) CPP_FILES := $(shell find $(SRC_DIR) -type f -name '*.cpp')
# Generate object file names by replacing the source directory with the build directory # Generate object file names by replacing the source directory with the build directory
OBJ_FILES := $(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(CPP_FILES)) OBJ_FILES := $(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(CPP_FILES))
# Create directories for object files
$(shell mkdir -p $(dir $(OBJ_FILES)))
# Default target # Default target
all: $(BUILD_DIR)/bob all: $(BUILD_DIR)/bob
# Rule to create the build directory if it doesn't exist # Rule to create necessary directories
$(shell mkdir -p $(BUILD_DIR)) $(DIRS):
mkdir -p $(patsubst $(SRC_DIR)/%, $(OUTPUT_DIR)/%, $@)
# Rule to compile object files # Rule to compile object files
$(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp

49
headers/Expression.h Normal file
View File

@ -0,0 +1,49 @@
//
// Created by Bobby Lucero on 5/21/23.
//
#pragma once
#include "Lexer.h"
#include <iostream>
struct Expr{
virtual ~Expr()
{
}
};
struct BinaryExpr : Expr
{
const Expr left;
const Token oper;
const Expr right;
BinaryExpr(Expr left, Token oper, Expr right) : left(left), oper(oper), right(right)
{
}
};
struct GroupingExpr : Expr
{
const Expr expression;
GroupingExpr(Expr expression) : expression(expression)
{
}
};
struct LiteralExpr : Expr
{
const std::string value;
LiteralExpr(std::string value) : value(value)
{
}
};
struct UnaryExpr : Expr
{
const Token oper;
const Expr right;
UnaryExpr(Token oper, Expr right) : oper(oper), right(right)
{
}
};

View File

@ -24,7 +24,21 @@ enum TokenType{
}; };
const std::map<std::string, TokenType> KEYWORDS { const std::map<std::string, TokenType> KEYWORDS {
{"and", AND},
{"or", OR},
{"true", TRUE},
{"false", FALSE},
{"if", IF},
{"else", ELSE},
{"func", FUNCTION},
{"for", FOR},
{"while", WHILE},
{"var", VAR},
{"class", CLASS},
{"super", SUPER},
{"this", THIS},
{"none", NONE},
{"return", RETURN}
}; };
struct Token struct Token
@ -41,6 +55,8 @@ public:
std::vector<Token> Tokenize(std::string source); std::vector<Token> Tokenize(std::string source);
private: private:
int line; int line;
std::vector<char> src;
private: private:
bool matchOn(char expected, std::vector<char>& src); bool matchOn(char expected);
void advance();
}; };

31
headers/bob.h Normal file
View File

@ -0,0 +1,31 @@
#pragma once
#include <iostream>
#include <fstream>
#include <string>
#include "../headers/Lexer.h"
#define VERSION "0.0.1"
class Bob
{
public:
Lexer lexer;
public:
void runFile(std::string path);
void runPrompt();
void error(int line, std::string message);
private:
bool hadError = false;
private:
void run(std::string source);
void report(int line, std::string where, std::string message);
};

View File

@ -0,0 +1,40 @@
#pragma once
#include <iostream>
#include <vector>
std::vector<std::string> splitString(const std::string& input, std::string delimiter) {
std::vector<std::string> tokens;
std::string token;
size_t start = 0;
size_t end = input.find(delimiter);
while (end != std::string::npos) {
token = input.substr(start, end - start);
tokens.push_back(token);
start = end + 1;
end = input.find(delimiter, start);
}
// Add the last token (after the last delimiter)
token = input.substr(start, end);
tokens.push_back(token);
return tokens;
}
std::string trim(const std::string& str) {
// Find the first non-whitespace character
size_t start = str.find_first_not_of(" \t\n\r");
// If the string is all whitespace, return an empty string
if (start == std::string::npos) {
return "";
}
// Find the last non-whitespace character
size_t end = str.find_last_not_of(" \t\n\r");
// Extract the trimmed substring
return str.substr(start, end - start + 1);
}

View File

@ -2,7 +2,15 @@ bob.test
10 10
11.1 11.1
test = (11 + 2 "xs") test = (11 + 2 "xs
hello
end")
//" //
//11. //11.
12//11
11.
11.69 + 66.735293857293875 + 235982735987235.0 + 1
123a

BIN
source/.DS_Store vendored Normal file

Binary file not shown.

5
source/Expression.cpp Normal file
View File

@ -0,0 +1,5 @@
//
// Created by Bobby Lucero on 5/21/23.
//
#include "../headers/Expression.h"

View File

@ -1,135 +1,142 @@
#include "../headers/Lexer.h" #include "../headers/Lexer.h"
#include <iostream>
#include <cctype> #include <cctype>
using namespace std; using namespace std;
std::vector<Token> Lexer::Tokenize(std::string source){ std::vector<Token> Lexer::Tokenize(std::string source){
std::vector<Token> tokens; std::vector<Token> tokens;
std::vector<char> src{source.begin(), source.end()}; src = std::vector<char>{source.begin(), source.end()};
line = 0;
while(src.size() > 0) while(!src.empty())
{ {
char t = src[0]; char t = src[0];
if(t == '(') if(t == '(')
{ {
tokens.push_back(Token{OPEN_PAREN, std::string(1, t)}); //brace initialization in case you forget tokens.push_back(Token{OPEN_PAREN, std::string(1, t), line}); //brace initialization in case you forget
src.erase(src.begin()); advance();
} }
else if(t == ')') else if(t == ')')
{ {
tokens.push_back(Token{CLOSE_PAREN, std::string(1, t)}); tokens.push_back(Token{CLOSE_PAREN, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == '{') else if(t == '{')
{ {
tokens.push_back(Token{OPEN_BRACE, std::string(1, t)}); tokens.push_back(Token{OPEN_BRACE, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == '}') else if(t == '}')
{ {
tokens.push_back(Token{CLOSE_BRACE, std::string(1, t)}); tokens.push_back(Token{CLOSE_BRACE, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == ',') else if(t == ',')
{ {
tokens.push_back(Token{COMMA, std::string(1, t)}); tokens.push_back(Token{COMMA, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == '.') else if(t == '.')
{ {
tokens.push_back(Token{DOT, std::string(1, t)}); tokens.push_back(Token{DOT, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == ';') else if(t == ';')
{ {
tokens.push_back(Token{SEMICOLON, std::string(1, t)}); tokens.push_back(Token{SEMICOLON, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == '+') else if(t == '+')
{ {
tokens.push_back(Token{PLUS, std::string(1, t)}); tokens.push_back(Token{PLUS, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == '-') else if(t == '-')
{ {
tokens.push_back(Token{MINUS, std::string(1, t)}); tokens.push_back(Token{MINUS, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == '*') else if(t == '*')
{ {
tokens.push_back(Token{STAR, std::string(1, t)}); tokens.push_back(Token{STAR, std::string(1, t), line});
src.erase(src.begin()); advance();
} }
else if(t == '=') else if(t == '=')
{ {
std::string token = std::string(1, t); std::string token = std::string(1, t);
src.erase(src.begin()); advance();
bool match = matchOn('=', src); bool match = matchOn('=');
token += match ? "=" : ""; token += match ? "=" : "";
tokens.push_back(Token{match ? DOUBLE_EQUAL : EQUAL, token}); tokens.push_back(Token{match ? DOUBLE_EQUAL : EQUAL, token, line});
} }
else if(t == '!') else if(t == '!')
{ {
std::string token = std::string(1, t); std::string token = std::string(1, t);
src.erase(src.begin()); advance();
bool match = matchOn('=', src); bool match = matchOn('=');
token += match ? "=" : ""; token += match ? "=" : "";
tokens.push_back(Token{match ? BANG_EQUAL : BANG, token}); tokens.push_back(Token{match ? BANG_EQUAL : BANG, token, line});
} }
else if(t == '<') else if(t == '<')
{ {
std::string token = std::string(1, t); std::string token = std::string(1, t);
src.erase(src.begin()); advance();
bool match = matchOn('=', src); bool match = matchOn('=');
token += match ? "=" : ""; token += match ? "=" : "";
tokens.push_back(Token{match ? LESS_EQUAL : LESS, token}); tokens.push_back(Token{match ? LESS_EQUAL : LESS, token, line});
} }
else if(t == '>') else if(t == '>')
{ {
std::string token = std::string(1, t); std::string token = std::string(1, t);
src.erase(src.begin()); advance();
bool match = matchOn('=', src); bool match = matchOn('=');
token += match ? "=" : ""; token += match ? "=" : "";
tokens.push_back(Token{match ? GREATER_EQUAL : GREATER, token}); tokens.push_back(Token{match ? GREATER_EQUAL : GREATER, token, line});
}
else if(t == '&')
{
std::string token = std::string(1, t);
advance();
bool match = matchOn('&');
token += match ? "&" : "";
if(match) tokens.push_back(Token{AND, token, line});
} }
else if(t == '/') else if(t == '/')
{ {
std::string token = std::string(1, t); std::string token = std::string(1, t);
src.erase(src.begin()); advance();
bool match = matchOn('/', src); bool match = matchOn('/');
if(match) if(match)
{ {
while(src.size() > 0 && src[0] != '\n') while(!src.empty() && src[0] != '\n')
{ {
src.erase(src.begin()); advance();
} }
} }
else else
{ {
tokens.push_back(Token{SLASH, std::string(1, t)}); tokens.push_back(Token{SLASH, std::string(1, t), line});
} }
} }
else if(t == '"') else if(t == '"')
{ {
std::string str = std::string(1, src[0]); std::string str = std::string(1, src[0]);
src.erase(src.begin()); advance();
while(src.size() > 0 && src[0] != '"') while(!src.empty() && src[0] != '"')
{ {
if(src[0] == '\n') line++; if(src[0] == '\n') line++;
str += src[0]; str += src[0];
src.erase(src.begin()); advance();
} }
if(src.size() == 0) if(src.empty())
{ {
throw std::runtime_error("Unterminated string at line: " + std::to_string(this->line)); throw std::runtime_error("Unterminated string at line: " + std::to_string(this->line));
} }
else if(src[0] == '"') else if(src[0] == '"')
{ {
str += '"'; str += '"';
src.erase(src.begin()); advance();
tokens.push_back(Token{STRING, str}); tokens.push_back(Token{STRING, str, line});
} }
@ -137,30 +144,30 @@ std::vector<Token> Lexer::Tokenize(std::string source){
else if(t == '\n') else if(t == '\n')
{ {
line++; line++;
src.erase(src.begin()); advance();
} }
else else
{ {
//Multi char tokens //Multi char tokens
if(std::isdigit(t)) if(std::isdigit(t))
{ {
std::string num = ""; std::string num;
while(src.size() > 0 && std::isdigit(src[0])) while(!src.empty() && std::isdigit(src[0]))
{ {
num += src[0]; num += src[0];
src.erase(src.begin()); advance();
} }
if(src.size() > 0 && src[0] == '.') if(!src.empty() && src[0] == '.')
{ {
src.erase(src.begin()); advance();
if(src.size() > 0 && std::isdigit(src[0])) if(!src.empty() && std::isdigit(src[0]))
{ {
num += '.'; num += '.';
while(src.size() > 0 && std::isdigit(src[0])) while(!src.empty() && std::isdigit(src[0]))
{ {
num += src[0]; num += src[0];
src.erase(src.begin()); advance();
} }
} }
else else
@ -170,30 +177,30 @@ std::vector<Token> Lexer::Tokenize(std::string source){
} }
tokens.push_back(Token{NUMBER, num}); tokens.push_back(Token{NUMBER, num, line});
} }
else if(std::isalpha(t)) else if(std::isalpha(t))
{ {
std::string ident = ""; std::string ident;
while(src.size() > 0 && std::isalpha(src[0])) while(!src.empty() && std::isalpha(src[0]))
{ {
ident += src[0]; ident += src[0];
src.erase(src.begin()); advance();
} }
if(KEYWORDS.find(ident) != KEYWORDS.end()) //identifier is a keyword if(KEYWORDS.find(ident) != KEYWORDS.end()) //identifier is a keyword
{ {
tokens.push_back(Token{KEYWORDS.at(ident), ident}); tokens.push_back(Token{KEYWORDS.at(ident), ident, line});
} }
else else
{ {
tokens.push_back(Token{IDENTIFIER, ident}); tokens.push_back(Token{IDENTIFIER, ident, line});
} }
} }
else if(t == ' ' || t == '\t' || t == '\n') else if(t == ' ' || t == '\t')
{ {
src.erase(src.begin()); //ignore t advance();
} }
else else
{ {
@ -209,10 +216,15 @@ std::vector<Token> Lexer::Tokenize(std::string source){
return tokens; return tokens;
} }
bool Lexer::matchOn(char expected, std::vector<char> &src) bool Lexer::matchOn(char expected)
{ {
if(src.size() == 0) return false; if(src.empty()) return false;
if(src[0] != expected) return false; if(src[0] != expected) return false;
src.erase(src.begin()); advance();
return true; return true;
} }
void Lexer::advance()
{
src.erase(src.begin());
}

60
source/bob.cpp Normal file
View File

@ -0,0 +1,60 @@
#include "../headers/bob.h"
using namespace std;
void Bob::runFile(string path)
{
ifstream file = ifstream(path);
string source = "";
if(file.is_open()){
source = string(istreambuf_iterator<char>(file), istreambuf_iterator<char>());
}
else
{
cout << "File not found" << endl;
return;
}
this->run(source);
}
void Bob::runPrompt()
{
cout << "Bob v" << VERSION << ", 2023" << endl;
for(;;)
{
string line;
cout << "-> ";
std::getline(std::cin, line);
if(std::cin.eof())
{
break;
}
this->run(line);
hadError = false;
}
}
void Bob::error(int line, string message)
{
}
void Bob::run(string source)
{
vector<Token> tokens = lexer.Tokenize(source);
for(Token t : tokens){
cout << "{type: " << t.type << ", value: " << t.lexeme << "}" << endl;
}
}
void Bob::report(int line, string where, string message)
{
hadError = true;
}

37
source/main.cpp Normal file
View File

@ -0,0 +1,37 @@
//
// Created by Bobby Lucero on 5/21/23.
//
#include "../headers/bob.h"
#include "../headers/Expression.h"
#include "../headers/Lexer.h"
int main(){
Bob bobLang;
//bobLang.runFile("source.bob");
Expr a;
Expr b;
Token t = {PLUS, "+", 1};
Token t2 = {MINUS, "-", 1};
BinaryExpr e = BinaryExpr(a, t, b);
std::shared_ptr<Expr> any = std::make_shared<BinaryExpr>(a, t, b);
if(std::shared_ptr<BinaryExpr> binexpr = std::dynamic_pointer_cast<BinaryExpr>(any))
{
std::cout << binexpr->oper.lexeme;
}
any = std::make_shared<BinaryExpr>(a, t2, b);
if(std::shared_ptr<BinaryExpr> binexpr = std::dynamic_pointer_cast<BinaryExpr>(any))
{
std::cout << binexpr->oper.lexeme;
}
std::cout << std::endl;
bobLang.runPrompt();
return 0;
}

View File

@ -1,96 +0,0 @@
#include <iostream>
#include <fstream>
#include <string>
#include "../headers/Lexer.h"
#define VERSION "0.0.1"
using namespace std;
class Bob
{
public:
Lexer lexer;
public:
void runFile(string path)
{
ifstream file = ifstream(path);
string source = "";
if(file.is_open()){
source = string(istreambuf_iterator<char>(file), istreambuf_iterator<char>());
}
else
{
cout << "File not found" << endl;
return;
}
this->run(source);
}
void runPrompt()
{
cout << "Bob v" << VERSION << ", 2023" << endl;
for(;;)
{
string line;
cout << "-> ";
std::getline(std::cin, line);
if(std::cin.eof())
{
break;
}
this->run(line);
hadError = false;
}
}
void error(int line, string message)
{
}
private:
bool hadError = false;
private:
void run(string source)
{
vector<Token> tokens = lexer.Tokenize(source);
for(Token t : tokens){
cout << "{type: " << t.type << ", value: " << t.lexeme << "}" << endl;
}
}
void report(int line, string where, string message)
{
hadError = true;
}
};
int main(){
// string TokenTypeMappings[] = {
// "Identifier",
// "Number",
// "Equals",
// "OpenParen",
// "CloseParen",
// "BinaryOperator",
// "TestKeyword"
// };
Bob bobLang;
bobLang.runFile("source.bob");
//bobLang.runPrompt();
return 0;
}

35
testthing Normal file
View File

@ -0,0 +1,35 @@
struct BinaryExpr : Expr
{
const Expr left;
const Token oper;
const Expr right;
BinaryExpr(Expr left, Token oper, Expr right) : left(left), oper(oper), right(right)
{
}
};
struct GroupingExpr : Expr
{
const Expr expression;
GroupingExpr(Expr expression) : expression(expression)
{
}
};
struct LiteralExpr : Expr
{
const std::string value;
LiteralExpr(std::string value) : value(value)
{
}
};
struct UnaryExpr : Expr
{
const Token oper;
const Expr right;
UnaryExpr(Token oper, Expr right) : oper(oper), right(right)
{
}
};

BIN
tools/GenerateAST Executable file

Binary file not shown.

72
tools/GenerateAST.cpp Normal file
View File

@ -0,0 +1,72 @@
//
// Created by Bobby Lucero on 5/21/23.
//
#include <iostream>
#include <vector>
#include <fstream>
#include "../headers/helperFunctions/HelperFunctions.h"
void defineType(std::ofstream &out, std::string baseName, std::string className, std::string fieldList)
{
out << "struct " << className << "Expr : " << baseName << "\n{\n";
std::vector<std::string> fields = splitString(fieldList, ", ");
for(std::string field : fields)
{
out << " const " << trim(field) << ";\n";
}
out << "\n " << className << "Expr(" << fieldList << ") : ";
std::string explicitDeclaration;
for(std::string field : fields)
{
std::string name = splitString(trim(field), " ")[1];
explicitDeclaration += trim(name) + "(" + trim(name) + "), ";
}
explicitDeclaration = trim(explicitDeclaration);
explicitDeclaration.pop_back();
out << explicitDeclaration;
out << "\n {\n";
out << " }\n";
out << "};" << std::endl;
}
void defineAst(std::string outputDir, std::string baseName, const std::vector<std::string>& types)
{
std::ofstream outFile(outputDir);
if(outFile.is_open())
{
for(std::string type : types)
{
std::vector<std::string> type_split = splitString(type, "$");
std::string className = trim(type_split[0]);
std::string fields = trim(type_split[1]);
defineType(outFile, baseName, className, fields);
}
}
}
int main(int argc, char* argv[]){
if(argc != 2)
{
std::cerr << "Usage " << argv[0] << " <output directory>" << std::endl;
std::exit(64);
}
std::string outputDir = argv[1];
defineAst(outputDir, "Expr", {
"Binary $ Expr left, Token oper, Expr right",
"Grouping $ Expr expression",
"Literal $ std::string value",
"Unary $ Token oper, Expr right"
});
}