diff options
| author | Cori Barker <coribarker2@gmail.com> | 2026-01-19 09:35:45 +0000 |
|---|---|---|
| committer | Cori Barker <coribarker2@gmail.com> | 2026-01-19 09:35:45 +0000 |
| commit | e34338219036acf103d161dfbd36fa67597d23e7 (patch) | |
| tree | 00b4b09fd7fcb47502aee4b5be0e939fd771aab1 | |
lexer
| -rw-r--r-- | include/lexer/lexer.h | 27 | ||||
| -rw-r--r-- | include/lexer/token.h | 17 | ||||
| -rw-r--r-- | include/lexer/token_type.h | 25 | ||||
| -rw-r--r-- | src/lexer/lexer.cpp | 103 |
4 files changed, 172 insertions, 0 deletions
diff --git a/include/lexer/lexer.h b/include/lexer/lexer.h new file mode 100644 index 0000000..f24e92e --- /dev/null +++ b/include/lexer/lexer.h @@ -0,0 +1,27 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "token.h" + +#include <vector> +#include <string> + +class Lexer { +public: + explicit Lexer (const std::string& src); + + std::vector<Token> tokenise(); + +private: + int line; + int column; + int position; + std::string src; + std::vector<Token> tokens; + + char advance(); + void skipWhitespace(); + void skipComment(); +}; + +#endif diff --git a/include/lexer/token.h b/include/lexer/token.h new file mode 100644 index 0000000..54ac116 --- /dev/null +++ b/include/lexer/token.h @@ -0,0 +1,17 @@ +#ifndef TOKEN_H +#define TOKEN_H + +#include "token_type.h" + +#include <string> + +struct Token { + TokenType type; + std::string value; + int line; + int column; + + Token(TokenType t, const std::string& val, int line, int col) : type{t}, value{val}, line{line}, column{col} {}; +}; + +#endif diff --git a/include/lexer/token_type.h b/include/lexer/token_type.h new file mode 100644 index 0000000..f83c6d6 --- /dev/null +++ b/include/lexer/token_type.h @@ -0,0 +1,25 @@ +#ifndef TOKEN_TYPE_H +#define TOKEN_TYPE_H + +enum class TokenType { + INT, + STRING, + + NUMBER, + IDENTIFIER, + + PLUS, + MINUS, + MULTIPLY, + DIVIDE, + + ASSIGN, + + SEMICOLON, + + END_OF_FILE, + INVALID + +}; + +#endif diff --git a/src/lexer/lexer.cpp b/src/lexer/lexer.cpp new file mode 100644 index 0000000..42700dd --- /dev/null +++ b/src/lexer/lexer.cpp @@ -0,0 +1,103 @@ +#include "lexer/lexer.h" + +Lexer::Lexer(const std::string& src) : src(src), position(0), line(1), column(1) {} + +std::vector<Token> Lexer::tokenise() { + while (position < src.size()) { + if (std::isdigit(src[position])) { + std::string digit; + int original_column = column; + + while (std::isdigit(src[position])) { + digit.push_back(src[position]); + advance(); + } + + tokens.push_back(Token(TokenType::NUMBER, digit, line, original_column)); + + } else if (std::isalpha(src[position])) { + if (src.substr(position, 3) == "int") { + tokens.push_back(Token(TokenType::INT, "int", line, column)); + advance(); + advance(); + advance(); + + } else { + std::string identifier; + int original_column = column; + + while (std::isalnum(src[position])) { + identifier.push_back(src[position]); + advance(); + } + + tokens.push_back(Token(TokenType::IDENTIFIER, identifier, line, original_column)); + } + + } else if (src[position] == '+') { + tokens.push_back(Token(TokenType::PLUS, std::string(1, src[position]), line, column)); + advance(); + + } else if (src[position] == '-') { + tokens.push_back(Token(TokenType::MINUS, std::string(1, src[position]), line, column)); + advance(); + + } else if (src[position] == '*') { + tokens.push_back(Token(TokenType::MULTIPLY, std::string(1, src[position]), line, column)); + advance(); + + } else if (src[position] == '/') { + tokens.push_back(Token(TokenType::DIVIDE, std::string(1, src[position]), line, column)); + advance(); + + } else if (src[position] == '=') { + tokens.push_back(Token(TokenType::ASSIGN, std::string(1, src[position]), line, column)); + advance(); + + } else if (src[position] == ';') { + tokens.push_back(Token(TokenType::SEMICOLON, std::string(1, src[position]), line, column)); + advance(); + + } else { + tokens.push_back(Token(TokenType::INVALID, std::string(1, src[position]), line, column)); + advance(); + + } + skipComment(); + skipWhitespace(); + + } + tokens.push_back(Token(TokenType::END_OF_FILE, std::string(), line, column)); + return tokens; +} + +char Lexer::advance() { + column++; + return src[position++]; +} + +void Lexer::skipWhitespace() { + while (src[position] == ' ' || src[position] == '\n') { + if (src[position] == ' ') { + position++; + column++; + + } else if (src[position] == '\n') { + position++; + column = 1; + line++; + } + } +} + +void Lexer::skipComment() { + if (src[position] == '/' && src[++position] == '/') { + while (src[position] != '\n') { + position++; + column++; + } + position++; + line += 1; + column = 1; + } +} |
