Bob/source/Lexer.cpp
Bobby Lucero 7c57a9a111 Implement functions, closures, standard library, and comprehensive number system
- Add function declarations, calls, and return statements
- Implement lexical scoping with Environment class and closures
- Convert print from statement to standard library function
- Add assert() function to standard library for testing
- Add time() function for microsecond precision benchmarking
- Create StdLib class and BuiltinFunction wrapper for standard library
- Implement first-class functions and higher-order functions
- Add function parameter support (tested up to 100 parameters)
- Support alphanumeric identifiers in variable and function names
- Add underscore support in variable names and identifiers
- Implement string + number and number + string concatenation
- Add boolean + string and string + boolean concatenation
- Support string multiplication (string * number)
- Fix decimal truncation issue by using std::stod for all number parsing
- Add comprehensive number formatting with proper precision handling
- Support huge numbers (epoch timestamps) without integer overflow
- Clean number display (no trailing zeros on integers)
- Add basic error handling with program termination on errors
- Add comprehensive test suite covering all features
- Add escape sequence support (\n, \t, \", \\)
- Add comprehensive documentation and language reference
- Update development roadmap with completed features
2025-07-30 17:51:48 -04:00

361 lines
9.9 KiB
C++

#include "../headers/Lexer.h"
#include "../headers/helperFunctions/HelperFunctions.h"
#include <cctype>
#include <stdexcept>
using namespace std;
std::vector<Token> Lexer::Tokenize(std::string source){
std::vector<Token> tokens;
src = std::vector<char>{source.begin(), source.end()};
line = 0;
while(!src.empty())
{
char t = src[0];
if(t == '(')
{
tokens.push_back(Token{OPEN_PAREN, std::string(1, t), line}); //brace initialization in case you forget
advance();
}
else if(t == ')')
{
tokens.push_back(Token{CLOSE_PAREN, std::string(1, t), line});
advance();
}
else if(t == '{')
{
tokens.push_back(Token{OPEN_BRACE, std::string(1, t), line});
advance();
}
else if(t == '}')
{
tokens.push_back(Token{CLOSE_BRACE, std::string(1, t), line});
advance();
}
else if(t == ',')
{
tokens.push_back(Token{COMMA, std::string(1, t), line});
advance();
}
else if(t == '.')
{
tokens.push_back(Token{DOT, std::string(1, t), line});
advance();
}
else if(t == ';')
{
tokens.push_back(Token{SEMICOLON, std::string(1, t), line});
advance();
}
else if(t == '+')
{
tokens.push_back(Token{PLUS, std::string(1, t), line});
advance();
}
else if(t == '-')
{
tokens.push_back(Token{MINUS, std::string(1, t), line});
advance();
}
else if(t == '*')
{
tokens.push_back(Token{STAR, std::string(1, t), line});
advance();
}
else if(t == '%')
{
tokens.push_back(Token{PERCENT, std::string(1, t), line});
advance();
}
else if(t == '~')
{
tokens.push_back(Token{BIN_NOT, std::string(1, t), line});
advance();
}
else if(t == '=')
{
std::string token = std::string(1, t);
advance();
bool match = matchOn('=');
token += match ? "=" : "";
tokens.push_back(Token{match ? DOUBLE_EQUAL : EQUAL, token, line});
}
else if(t == '!')
{
std::string token = std::string(1, t);
advance();
bool match = matchOn('=');
token += match ? "=" : "";
tokens.push_back(Token{match ? BANG_EQUAL : BANG, token, line});
}
else if(t == '<')
{
std::string token = std::string(1, t);
advance();
if(matchOn('='))
{
tokens.push_back(Token{LESS_EQUAL, "<=", line});
}
else if(matchOn('<'))
{
tokens.push_back(Token{BIN_SLEFT, "<<", line});
}
else
{
tokens.push_back(Token{LESS, token, line});
}
}
else if(t == '>')
{
std::string token = std::string(1, t);
advance();
bool match = matchOn('=');
token += match ? "=" : "";
tokens.push_back(Token{match ? GREATER_EQUAL : GREATER, token, line});
}
else if(t == '&')
{
std::string token = std::string(1, t);
advance();
bool match = matchOn('&');
token += match ? "&" : "";
if(match) tokens.push_back(Token{AND, token, line});
}
else if(t == '/')
{
std::string token = std::string(1, t);
advance();
bool match = matchOn('/');
if(match)
{
while(!src.empty() && src[0] != '\n')
{
advance();
}
}
else
{
tokens.push_back(Token{SLASH, std::string(1, t), line});
}
}
else if(t == '"')
{
bool last_was_escape = false;
std::string str;
advance();
while(!src.empty())
{
std::string next_c = std::string(1, src[0]);
if(next_c == "\"") break;
if(next_c == "\\")
{
advance();
next_c = "\\" + std::string(1, src[0]);
}
if(next_c == "\n") line++;
str += next_c;
advance();
}
if(src.empty())
{
throw std::runtime_error("LEXER: Unterminated string at line: " + std::to_string(this->line));
}
else if(src[0] == '"')
{
advance();
std::string escaped_str = parseEscapeCharacters(str);
tokens.push_back(Token{STRING, escaped_str, line});
}
}
else if(t == '\n')
{
line++;
advance();
}
else
{
bool isNotation = false;
bool notationInvalidated = false;
char notationChar;
//Multi char tokens
if(std::isdigit(t))
{
std::string num;
if(src[0] != '0') notationInvalidated = true;
while(!src.empty())
{
if(std::isdigit(src[0]))
{
if(src[0] == '0' && !notationInvalidated)
{
if(peekNext() == 'b' || peekNext() == 'x') {
num += "0";
num += peekNext();
notationChar = peekNext();
advance(2);
isNotation = true;
break;
}
}
num += src[0];
advance();
}
else
{
break;
}
}
if(!isNotation) {
if (!src.empty() && src[0] == '.') {
advance();
if (!src.empty() && std::isdigit(src[0])) {
num += '.';
while (!src.empty() && std::isdigit(src[0])) {
num += src[0];
advance();
}
} else {
throw std::runtime_error("LEXER: malformed number at: " + std::to_string(this->line));
}
}
}
else
{
if(!src.empty() && (src[0]))
{
if(notationChar == 'b') {
while (!src.empty() && (src[0] == '0' || src[0] == '1')) {
num += src[0];
advance();
}
}
else if(notationChar == 'x')
{
while (!src.empty() && std::isxdigit(src[0])) {
num += src[0];
advance();
}
}
}
else
{
throw std::runtime_error("LEXER: malformed notation at: " + std::to_string(this->line));
}
}
tokens.push_back(Token{NUMBER, num, line});
}
else if(std::isalpha(t))
{
std::string ident;
while(!src.empty() && (std::isalpha(src[0]) || std::isdigit(src[0]) || src[0] == '_'))
{
ident += src[0];
advance();
}
if(KEYWORDS.find(ident) != KEYWORDS.end()) //identifier is a keyword
{
tokens.push_back(Token{KEYWORDS.at(ident), ident, line});
}
else
{
tokens.push_back(Token{IDENTIFIER, ident, line});
}
}
else if(t == ' ' || t == '\t')
{
advance();
}
else
{
throw std::runtime_error("LEXER: Unknown Token: '" + std::string(1, t) + "'");
}
}
}
tokens.push_back({END_OF_FILE, "eof", line});
return tokens;
}
bool Lexer::matchOn(char expected)
{
if(src.empty()) return false;
if(src[0] != expected) return false;
advance();
return true;
}
void Lexer::advance(int by)
{
for (int i = 0; i < by; ++i) {
src.erase(src.begin());
}
}
char Lexer::peekNext()
{
if(src.size() > 1)
{
return src[1];
}
return '\0';
}
std::string Lexer::parseEscapeCharacters(const std::string& input) {
std::string output;
bool escapeMode = false;
for (char c : input) {
if (escapeMode) {
switch (c) {
case 'n':
output += '\n';
break;
case 't':
output += '\t';
break;
case '"':
output += '\"';
break;
case '\\':
output += '\\';
break;
default:
throw runtime_error("Invalid escape character: " + std::string(1, c));
}
escapeMode = false;
} else if (c == '\\') {
escapeMode = true;
} else {
output += c;
}
}
return output;
}