AST Struct Generator

2023-05-21 20:40:43 -04:00 · 2023-05-21 20:40:43 -04:00 · c5f981321c
commit c5f981321c
parent dfc518c582
18 changed files with 448 additions and 174 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 .vscode/
 build/
 .idea
--- a/14
+++ b/14
@ -7,22 +7,26 @@ CC = g++
 CFLAGS = -Wall -Wextra -std=c++11
 # Source directory
-SRC_DIR = ./src
+SRC_DIR = ./source
 # Output directory
 BUILD_DIR = ./build
-# Get all CPP files in the source directory
+# Find all C++ files recursively in the source directory
-CPP_FILES := $(wildcard $(SRC_DIR)/*.cpp)
+CPP_FILES := $(shell find $(SRC_DIR) -type f -name '*.cpp')
 # Generate object file names by replacing the source directory with the build directory
 OBJ_FILES := $(patsubst $(SRC_DIR)/%.cpp,$(BUILD_DIR)/%.o,$(CPP_FILES))
 # Create directories for object files
 $(shell mkdir -p $(dir $(OBJ_FILES)))
 # Default target
 all: $(BUILD_DIR)/bob
-# Rule to create the build directory if it doesn't exist
+# Rule to create necessary directories
-$(shell mkdir -p $(BUILD_DIR))
+$(DIRS):
 	mkdir -p $(patsubst $(SRC_DIR)/%, $(OUTPUT_DIR)/%, $@)
 # Rule to compile object files
 $(BUILD_DIR)/%.o: $(SRC_DIR)/%.cpp
--- a/headers/Expression.h
+++ b/headers/Expression.h
@ -0,0 +1,49 @@
 //
 // Created by Bobby Lucero on 5/21/23.
 //
 #pragma once
 #include "Lexer.h"
 #include <iostream>
 struct Expr{
    virtual ~Expr()
    {
    }
 };
 struct BinaryExpr : Expr
 {
    const Expr left;
    const Token oper;
    const Expr right;
    BinaryExpr(Expr left, Token oper, Expr right) : left(left), oper(oper), right(right)
    {
    }
 };
 struct GroupingExpr : Expr
 {
    const Expr expression;
    GroupingExpr(Expr expression) : expression(expression)
    {
    }
 };
 struct LiteralExpr : Expr
 {
    const std::string value;
    LiteralExpr(std::string value) : value(value)
    {
    }
 };
 struct UnaryExpr : Expr
 {
    const Token oper;
    const Expr right;
    UnaryExpr(Token oper, Expr right) : oper(oper), right(right)
    {
    }
 };
--- a/headers/Lexer.h
+++ b/headers/Lexer.h
@ -24,7 +24,21 @@ enum TokenType{
 };
 const std::map<std::string, TokenType> KEYWORDS {
-    
+        {"and", AND},
        {"or", OR},
        {"true", TRUE},
        {"false", FALSE},
        {"if", IF},
        {"else", ELSE},
        {"func", FUNCTION},
        {"for", FOR},
        {"while", WHILE},
        {"var", VAR},
        {"class", CLASS},
        {"super", SUPER},
        {"this", THIS},
        {"none", NONE},
        {"return", RETURN}
 };
 struct Token
@ -41,6 +55,8 @@ public:
    std::vector<Token> Tokenize(std::string source);
 private:
    int line;
    std::vector<char> src;
 private:
-    bool matchOn(char expected, std::vector<char>& src);
+    bool matchOn(char expected);
    void advance();
 };
--- a/headers/bob.h
+++ b/headers/bob.h
@ -0,0 +1,31 @@
 #pragma once
 #include <iostream>
 #include <fstream>
 #include <string>
 #include "../headers/Lexer.h"
 #define VERSION "0.0.1"
 class Bob
 {
 public:
    Lexer lexer;
 public:
    void runFile(std::string path);
    void runPrompt();
    void error(int line, std::string message);
 private:
    bool hadError = false;
 private:
    void run(std::string source);
    void report(int line, std::string where, std::string message);
 };
--- a/headers/helperFunctions/HelperFunctions.h
+++ b/headers/helperFunctions/HelperFunctions.h
@ -0,0 +1,40 @@
 #pragma once
 #include <iostream>
 #include <vector>
 std::vector<std::string> splitString(const std::string& input, std::string delimiter) {
    std::vector<std::string> tokens;
    std::string token;
    size_t start = 0;
    size_t end = input.find(delimiter);
    while (end != std::string::npos) {
        token = input.substr(start, end - start);
        tokens.push_back(token);
        start = end + 1;
        end = input.find(delimiter, start);
    }
    // Add the last token (after the last delimiter)
    token = input.substr(start, end);
    tokens.push_back(token);
    return tokens;
 }
 std::string trim(const std::string& str) {
    // Find the first non-whitespace character
    size_t start = str.find_first_not_of(" \t\n\r");
    // If the string is all whitespace, return an empty string
    if (start == std::string::npos) {
        return "";
    }
    // Find the last non-whitespace character
    size_t end = str.find_last_not_of(" \t\n\r");
    // Extract the trimmed substring
    return str.substr(start, end - start + 1);
 }
--- a/source.bob
+++ b/source.bob
@ -2,7 +2,15 @@ bob.test
 10
 11.1
-test = (11 + 2 "xs")
+test = (11 + 2 "xs
 hello
 end")
-//"
+//
-//11.
+//11.
 12//11
 11.
 11.69 + 66.735293857293875 + 235982735987235.0 + 1
 123a
--- a/source/.DS_Store
+++ b/source/.DS_Store
--- a/source/Expression.cpp
+++ b/source/Expression.cpp
@ -0,0 +1,5 @@
 //
 // Created by Bobby Lucero on 5/21/23.
 //
 #include "../headers/Expression.h"
--- a/source/Lexer.cpp
+++ b/source/Lexer.cpp
@ -1,135 +1,142 @@
 #include "../headers/Lexer.h"
 #include <iostream>
 #include <cctype>
 using namespace std;
 std::vector<Token> Lexer::Tokenize(std::string source){
    std::vector<Token> tokens;
-    std::vector<char> src{source.begin(), source.end()};
+    src = std::vector<char>{source.begin(), source.end()};
    line = 0;
-    while(src.size() > 0)
+    while(!src.empty())
    {
        char t = src[0];
        if(t == '(')
        {
-            tokens.push_back(Token{OPEN_PAREN, std::string(1, t)}); //brace initialization in case you forget
+            tokens.push_back(Token{OPEN_PAREN, std::string(1, t), line}); //brace initialization in case you forget
-            src.erase(src.begin());
+            advance();
        }
        else if(t == ')')
        {
-            tokens.push_back(Token{CLOSE_PAREN, std::string(1, t)});
+            tokens.push_back(Token{CLOSE_PAREN, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
        else if(t == '{')
        {
-            tokens.push_back(Token{OPEN_BRACE, std::string(1, t)});
+            tokens.push_back(Token{OPEN_BRACE, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
        else if(t == '}')
        {
-            tokens.push_back(Token{CLOSE_BRACE, std::string(1, t)});
+            tokens.push_back(Token{CLOSE_BRACE, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
        else if(t == ',')
        {
-            tokens.push_back(Token{COMMA, std::string(1, t)});
+            tokens.push_back(Token{COMMA, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
        else if(t == '.')
        {
-            tokens.push_back(Token{DOT, std::string(1, t)});
+            tokens.push_back(Token{DOT, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
        else if(t == ';')
        {
-            tokens.push_back(Token{SEMICOLON, std::string(1, t)});
+            tokens.push_back(Token{SEMICOLON, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
        else if(t == '+')
        {
-            tokens.push_back(Token{PLUS, std::string(1, t)});
+            tokens.push_back(Token{PLUS, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
        else if(t == '-')
        {
-            tokens.push_back(Token{MINUS, std::string(1, t)});
+            tokens.push_back(Token{MINUS, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
-         else if(t == '*')
+        else if(t == '*')
        {
-            tokens.push_back(Token{STAR, std::string(1, t)});
+            tokens.push_back(Token{STAR, std::string(1, t), line});
-            src.erase(src.begin());
+            advance();
        }
        else if(t == '=')
        {
            std::string token = std::string(1, t);
-            src.erase(src.begin());
+            advance();
-            bool match = matchOn('=', src);
+            bool match = matchOn('=');
            token += match ? "=" : "";
-            tokens.push_back(Token{match ? DOUBLE_EQUAL : EQUAL, token});
+            tokens.push_back(Token{match ? DOUBLE_EQUAL : EQUAL, token, line});
        }
        else if(t == '!')
        {
            std::string token = std::string(1, t);
-            src.erase(src.begin());
+            advance();
-            bool match = matchOn('=', src);
+            bool match = matchOn('=');
            token += match ? "=" : "";
-            tokens.push_back(Token{match ? BANG_EQUAL : BANG, token});
+            tokens.push_back(Token{match ? BANG_EQUAL : BANG, token, line});
        }
        else if(t == '<')
        {
            std::string token = std::string(1, t);
-            src.erase(src.begin());
+            advance();
-            bool match = matchOn('=', src);
+            bool match = matchOn('=');
            token += match ? "=" : "";
-            tokens.push_back(Token{match ? LESS_EQUAL : LESS, token});
+            tokens.push_back(Token{match ? LESS_EQUAL : LESS, token, line});
        }
        else if(t == '>')
        {
            std::string token = std::string(1, t);
-            src.erase(src.begin());
+            advance();
-            bool match = matchOn('=', src);
+            bool match = matchOn('=');
            token += match ? "=" : "";
-            tokens.push_back(Token{match ? GREATER_EQUAL : GREATER, token});
+            tokens.push_back(Token{match ? GREATER_EQUAL : GREATER, token, line});
        }
        else if(t == '&')
        {
            std::string token = std::string(1, t);
            advance();
            bool match = matchOn('&');
            token += match ? "&" : "";
            if(match) tokens.push_back(Token{AND, token, line});
        }
        else if(t == '/')
        {
            std::string token = std::string(1, t);
-            src.erase(src.begin());
+            advance();
-            bool match = matchOn('/', src);
+            bool match = matchOn('/');
            if(match)
            {
-                while(src.size() > 0 && src[0] != '\n')
+                while(!src.empty() && src[0] != '\n')
                {
-                    src.erase(src.begin());
+                    advance();
                }
            }
            else
            {
-                tokens.push_back(Token{SLASH, std::string(1, t)});
+                tokens.push_back(Token{SLASH, std::string(1, t), line});
            }
        }
        else if(t == '"')
        {
            std::string str = std::string(1, src[0]);
-            src.erase(src.begin());
+            advance();
-            while(src.size() > 0 && src[0] != '"')
+            while(!src.empty() && src[0] != '"')
            {
                if(src[0] == '\n') line++;
                str += src[0];
-                src.erase(src.begin());
+                advance();
            }
-            if(src.size() == 0)
+            if(src.empty())
            {
                throw std::runtime_error("Unterminated string at line: " + std::to_string(this->line));
            }
            else if(src[0] == '"')
            {
                str += '"';
-                src.erase(src.begin());
+                advance();
-                tokens.push_back(Token{STRING, str});
+                tokens.push_back(Token{STRING, str, line});
            }
@ -137,30 +144,30 @@ std::vector<Token> Lexer::Tokenize(std::string source){
        else if(t == '\n')
        {
            line++;
-            src.erase(src.begin());
+            advance();
        }
        else
        {
            //Multi char tokens
            if(std::isdigit(t))
            {
-                std::string num = "";
+                std::string num;
-                while(src.size() > 0 && std::isdigit(src[0]))
+                while(!src.empty() && std::isdigit(src[0]))
                {
                    num += src[0];
-                    src.erase(src.begin());
+                    advance();
                }
-                if(src.size() > 0 && src[0] == '.')
+                if(!src.empty() && src[0] == '.')
                {
-                    src.erase(src.begin());
+                    advance();
-                    if(src.size() > 0 && std::isdigit(src[0]))
+                    if(!src.empty() && std::isdigit(src[0]))
                    {
                        num += '.';
-                        while(src.size() > 0 && std::isdigit(src[0]))
+                        while(!src.empty() && std::isdigit(src[0]))
                        {
                            num += src[0];
-                            src.erase(src.begin());
+                            advance();
                        }
                    }
                    else
@ -170,30 +177,30 @@ std::vector<Token> Lexer::Tokenize(std::string source){
                }
-                tokens.push_back(Token{NUMBER, num});
+                tokens.push_back(Token{NUMBER, num, line});
            }
            else if(std::isalpha(t))
            {
-                std::string ident = "";
+                std::string ident;
-                while(src.size() > 0 && std::isalpha(src[0]))
+                while(!src.empty() && std::isalpha(src[0]))
                {
                    ident += src[0];
-                    src.erase(src.begin());
+                    advance();
                }
                if(KEYWORDS.find(ident) != KEYWORDS.end()) //identifier is a keyword
                {
-                    tokens.push_back(Token{KEYWORDS.at(ident), ident});
+                    tokens.push_back(Token{KEYWORDS.at(ident), ident, line});
                }
                else
                {
-                    tokens.push_back(Token{IDENTIFIER, ident});
+                    tokens.push_back(Token{IDENTIFIER, ident, line});
                }
            }
-            else if(t == ' ' || t == '\t' || t == '\n')
+            else if(t == ' ' || t == '\t')
            {
-                    src.erase(src.begin()); //ignore t
+                    advance();
            }
            else
            {
@ -209,10 +216,15 @@ std::vector<Token> Lexer::Tokenize(std::string source){
    return tokens;
 }
-bool Lexer::matchOn(char expected, std::vector<char> &src)
+bool Lexer::matchOn(char expected)
 {
-    if(src.size() == 0) return false;
+    if(src.empty()) return false;
    if(src[0] != expected) return false;
-    src.erase(src.begin());
+    advance();
    return true;
 }
 void Lexer::advance()
 {
    src.erase(src.begin());
 }
--- a/source/bob.cpp
+++ b/source/bob.cpp
@ -0,0 +1,60 @@
 #include "../headers/bob.h"
 using namespace std;
 void Bob::runFile(string path)
 {
    ifstream file = ifstream(path);
    string source = "";
    if(file.is_open()){
        source = string(istreambuf_iterator<char>(file), istreambuf_iterator<char>());
    }
    else
    {
        cout << "File not found" << endl;
        return;
    }
    this->run(source);
 }
 void Bob::runPrompt()
 {
    cout << "Bob v" << VERSION << ", 2023" << endl;
    for(;;)
    {
        string line;
        cout << "-> ";
        std::getline(std::cin, line);
        if(std::cin.eof())
        {
            break;
        }
        this->run(line);
        hadError = false;
    }
 }
 void Bob::error(int line, string message)
 {
 }
 void Bob::run(string source)
 {
    vector<Token> tokens = lexer.Tokenize(source);
    for(Token t : tokens){
        cout << "{type: " << t.type << ", value: " << t.lexeme << "}" << endl;
    }
 }
 void Bob::report(int line, string where, string message)
 {
    hadError = true;
 }
--- a/source/main.cpp
+++ b/source/main.cpp
@ -0,0 +1,37 @@
 //
 // Created by Bobby Lucero on 5/21/23.
 //
 #include "../headers/bob.h"
 #include "../headers/Expression.h"
 #include "../headers/Lexer.h"
 int main(){
    Bob bobLang;
    //bobLang.runFile("source.bob");
    Expr a;
    Expr b;
    Token t = {PLUS, "+", 1};
    Token t2 = {MINUS, "-", 1};
    BinaryExpr e = BinaryExpr(a, t, b);
    std::shared_ptr<Expr> any = std::make_shared<BinaryExpr>(a, t, b);
    if(std::shared_ptr<BinaryExpr> binexpr = std::dynamic_pointer_cast<BinaryExpr>(any))
    {
        std::cout << binexpr->oper.lexeme;
    }
    any = std::make_shared<BinaryExpr>(a, t2, b);
    if(std::shared_ptr<BinaryExpr> binexpr = std::dynamic_pointer_cast<BinaryExpr>(any))
    {
        std::cout << binexpr->oper.lexeme;
    }
    std::cout << std::endl;
    bobLang.runPrompt();
    return 0;
 }
--- a/source/test.cpp
+++ b/source/test.cpp
--- a/src/bob.cpp
+++ b/src/bob.cpp
@ -1,96 +0,0 @@
 #include <iostream>
 #include <fstream>
 #include <string>
 #include "../headers/Lexer.h"
 #define VERSION "0.0.1"
 using namespace std;
 class Bob
 {
 public:
 	Lexer lexer;
 public:
 	void runFile(string path)
 	{
 		ifstream file = ifstream(path); 
 		string source = "";
 		if(file.is_open()){
 			source = string(istreambuf_iterator<char>(file), istreambuf_iterator<char>());
 		}
 		else
 		{
 			cout << "File not found" << endl;
 			return;
 		}
 		this->run(source);
 	}
 	void runPrompt()
 	{
 		cout << "Bob v" << VERSION << ", 2023" << endl;
 		for(;;)
 		{
 			string line;
 			cout << "-> ";
 			std::getline(std::cin, line);
 			if(std::cin.eof())
 			{
 				break;
 			}
 			this->run(line);
 			hadError = false;
 		}
 	}
 	void error(int line, string message)
 	{
 	}
 private:
 	bool hadError = false;
 private:
 	void run(string source)
 	{
 		vector<Token> tokens = lexer.Tokenize(source);
 		for(Token t : tokens){
 			cout << "{type: " << t.type << ", value: " << t.lexeme << "}" << endl;
 		}
 	}
 	void report(int line, string where, string message)
 	{
 		hadError = true;
 	}
 };
 int main(){
 	// string TokenTypeMappings[] = {
 	// 	"Identifier",
 	// 	"Number",
 	// 	"Equals",
 	// 	"OpenParen",
 	// 	"CloseParen",
 	// 	"BinaryOperator",
 	// 	"TestKeyword"
 	// };
 	Bob bobLang;
 	bobLang.runFile("source.bob");
 	//bobLang.runPrompt();
 	return 0;
 }
--- a/35
+++ b/35
@ -0,0 +1,35 @@
 struct BinaryExpr : Expr
 {
    const Expr left;
    const Token oper;
    const Expr right;
    BinaryExpr(Expr left, Token oper, Expr right) : left(left), oper(oper), right(right)
    {
    }
 };
 struct GroupingExpr : Expr
 {
    const Expr expression;
    GroupingExpr(Expr expression) : expression(expression)
    {
    }
 };
 struct LiteralExpr : Expr
 {
    const std::string value;
    LiteralExpr(std::string value) : value(value)
    {
    }
 };
 struct UnaryExpr : Expr
 {
    const Token oper;
    const Expr right;
    UnaryExpr(Token oper, Expr right) : oper(oper), right(right)
    {
    }
 };
--- a/tools/GenerateAST
+++ b/tools/GenerateAST
--- a/tools/GenerateAST.cpp
+++ b/tools/GenerateAST.cpp
@ -0,0 +1,72 @@
 //
 // Created by Bobby Lucero on 5/21/23.
 //
 #include <iostream>
 #include <vector>
 #include <fstream>
 #include "../headers/helperFunctions/HelperFunctions.h"
 void defineType(std::ofstream &out, std::string baseName, std::string className, std::string fieldList)
 {
    out << "struct " << className << "Expr : " << baseName << "\n{\n";
    std::vector<std::string> fields = splitString(fieldList, ", ");
    for(std::string field : fields)
    {
        out << "    const " << trim(field) << ";\n";
    }
    out << "\n    " << className << "Expr(" << fieldList << ") : ";
    std::string explicitDeclaration;
    for(std::string field : fields)
    {
        std::string name = splitString(trim(field), " ")[1];
        explicitDeclaration += trim(name) + "(" + trim(name) + "), ";
    }
    explicitDeclaration = trim(explicitDeclaration);
    explicitDeclaration.pop_back();
    out << explicitDeclaration;
    out << "\n    {\n";
    out << "    }\n";
    out << "};" << std::endl;
 }
 void defineAst(std::string outputDir, std::string baseName, const std::vector<std::string>& types)
 {
    std::ofstream outFile(outputDir);
    if(outFile.is_open())
    {
        for(std::string type : types)
        {
            std::vector<std::string> type_split = splitString(type, "$");
            std::string className = trim(type_split[0]);
            std::string fields = trim(type_split[1]);
            defineType(outFile, baseName, className, fields);
        }
    }
 }
 int main(int argc, char* argv[]){
    if(argc != 2)
    {
        std::cerr << "Usage " << argv[0] << " <output directory>" << std::endl;
        std::exit(64);
    }
    std::string outputDir = argv[1];
    defineAst(outputDir, "Expr", {
            "Binary   $ Expr left, Token oper, Expr right",
            "Grouping $ Expr expression",
            "Literal  $ std::string value",
            "Unary    $ Token oper, Expr right"
    });
 }