commit e34338219036acf103d161dfbd36fa67597d23e7
Author: Cori Barker <coribarker2@gmail.com>
Date: Mon, 19 Jan 2026 09:35:45 +0000
lexer
Diffstat:
4 files changed, 172 insertions(+), 0 deletions(-)
diff --git a/include/lexer/lexer.h b/include/lexer/lexer.h
@@ -0,0 +1,27 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#include "token.h"
+
+#include <vector>
+#include <string>
+
+class Lexer {
+public:
+ explicit Lexer (const std::string& src);
+
+ std::vector<Token> tokenise();
+
+private:
+ int line;
+ int column;
+ int position;
+ std::string src;
+ std::vector<Token> tokens;
+
+ char advance();
+ void skipWhitespace();
+ void skipComment();
+};
+
+#endif
diff --git a/include/lexer/token.h b/include/lexer/token.h
@@ -0,0 +1,17 @@
+#ifndef TOKEN_H
+#define TOKEN_H
+
+#include "token_type.h"
+
+#include <string>
+
+struct Token {
+ TokenType type;
+ std::string value;
+ int line;
+ int column;
+
+ Token(TokenType t, const std::string& val, int line, int col) : type{t}, value{val}, line{line}, column{col} {};
+};
+
+#endif
diff --git a/include/lexer/token_type.h b/include/lexer/token_type.h
@@ -0,0 +1,25 @@
+#ifndef TOKEN_TYPE_H
+#define TOKEN_TYPE_H
+
+enum class TokenType {
+ INT,
+ STRING,
+
+ NUMBER,
+ IDENTIFIER,
+
+ PLUS,
+ MINUS,
+ MULTIPLY,
+ DIVIDE,
+
+ ASSIGN,
+
+ SEMICOLON,
+
+ END_OF_FILE,
+ INVALID
+
+};
+
+#endif
diff --git a/src/lexer/lexer.cpp b/src/lexer/lexer.cpp
@@ -0,0 +1,103 @@
+#include "lexer/lexer.h"
+
+Lexer::Lexer(const std::string& src) : src(src), position(0), line(1), column(1) {}
+
+std::vector<Token> Lexer::tokenise() {
+ while (position < src.size()) {
+ if (std::isdigit(src[position])) {
+ std::string digit;
+ int original_column = column;
+
+ while (std::isdigit(src[position])) {
+ digit.push_back(src[position]);
+ advance();
+ }
+
+ tokens.push_back(Token(TokenType::NUMBER, digit, line, original_column));
+
+ } else if (std::isalpha(src[position])) {
+ if (src.substr(position, 3) == "int") {
+ tokens.push_back(Token(TokenType::INT, "int", line, column));
+ advance();
+ advance();
+ advance();
+
+ } else {
+ std::string identifier;
+ int original_column = column;
+
+ while (std::isalnum(src[position])) {
+ identifier.push_back(src[position]);
+ advance();
+ }
+
+ tokens.push_back(Token(TokenType::IDENTIFIER, identifier, line, original_column));
+ }
+
+ } else if (src[position] == '+') {
+ tokens.push_back(Token(TokenType::PLUS, std::string(1, src[position]), line, column));
+ advance();
+
+ } else if (src[position] == '-') {
+ tokens.push_back(Token(TokenType::MINUS, std::string(1, src[position]), line, column));
+ advance();
+
+ } else if (src[position] == '*') {
+ tokens.push_back(Token(TokenType::MULTIPLY, std::string(1, src[position]), line, column));
+ advance();
+
+ } else if (src[position] == '/') {
+ tokens.push_back(Token(TokenType::DIVIDE, std::string(1, src[position]), line, column));
+ advance();
+
+ } else if (src[position] == '=') {
+ tokens.push_back(Token(TokenType::ASSIGN, std::string(1, src[position]), line, column));
+ advance();
+
+ } else if (src[position] == ';') {
+ tokens.push_back(Token(TokenType::SEMICOLON, std::string(1, src[position]), line, column));
+ advance();
+
+ } else {
+ tokens.push_back(Token(TokenType::INVALID, std::string(1, src[position]), line, column));
+ advance();
+
+ }
+ skipComment();
+ skipWhitespace();
+
+ }
+ tokens.push_back(Token(TokenType::END_OF_FILE, std::string(), line, column));
+ return tokens;
+}
+
+char Lexer::advance() {
+ column++;
+ return src[position++];
+}
+
+void Lexer::skipWhitespace() {
+ while (src[position] == ' ' || src[position] == '\n') {
+ if (src[position] == ' ') {
+ position++;
+ column++;
+
+ } else if (src[position] == '\n') {
+ position++;
+ column = 1;
+ line++;
+ }
+ }
+}
+
+void Lexer::skipComment() {
+ if (src[position] == '/' && src[++position] == '/') {
+ while (src[position] != '\n') {
+ position++;
+ column++;
+ }
+ position++;
+ line += 1;
+ column = 1;
+ }
+}