lexer

author: Cori Barker <coribarker2@gmail.com> 2026-01-19 09:35:45 +0000
committer: Cori Barker <coribarker2@gmail.com> 2026-01-19 09:35:45 +0000
commit: e34338219036acf103d161dfbd36fa67597d23e7 (patch)
tree: 00b4b09fd7fcb47502aee4b5be0e939fd771aab1
4 files changed, 172 insertions, 0 deletions
diff --git a/include/lexer/lexer.h b/include/lexer/lexer.h
new file mode 100644
index 0000000..f24e92e
--- /dev/null
+++ b/include/lexer/lexer.h
@@ -0,0 +1,27 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#include "token.h"
+
+#include <vector>
+#include <string>
+
+class Lexer {
+public:
+    explicit Lexer (const std::string& src);
+
+    std::vector<Token> tokenise();
+
+private:
+    int line;
+    int column;
+    int position;
+    std::string src;
+    std::vector<Token> tokens;
+
+    char advance();
+    void skipWhitespace();
+    void skipComment();
+};
+
+#endif
diff --git a/include/lexer/token.h b/include/lexer/token.h
new file mode 100644
index 0000000..54ac116
--- /dev/null
+++ b/include/lexer/token.h
@@ -0,0 +1,17 @@
+#ifndef TOKEN_H
+#define TOKEN_H
+
+#include "token_type.h"
+
+#include <string>
+
+struct Token {
+    TokenType type;
+    std::string value;
+    int line;
+    int column;
+
+    Token(TokenType t, const std::string& val, int line, int col) : type{t}, value{val}, line{line}, column{col} {};
+};
+
+#endif
diff --git a/include/lexer/token_type.h b/include/lexer/token_type.h
new file mode 100644
index 0000000..f83c6d6
--- /dev/null
+++ b/include/lexer/token_type.h
@@ -0,0 +1,25 @@
+#ifndef TOKEN_TYPE_H
+#define TOKEN_TYPE_H
+
+enum class TokenType {
+    INT,
+    STRING,
+
+    NUMBER,
+    IDENTIFIER,
+
+    PLUS,
+    MINUS,
+    MULTIPLY,
+    DIVIDE,
+
+    ASSIGN,
+
+    SEMICOLON,
+
+    END_OF_FILE,
+    INVALID
+
+};
+
+#endif
diff --git a/src/lexer/lexer.cpp b/src/lexer/lexer.cpp
new file mode 100644
index 0000000..42700dd
--- /dev/null
+++ b/src/lexer/lexer.cpp
@@ -0,0 +1,103 @@
+#include "lexer/lexer.h"
+
+Lexer::Lexer(const std::string& src) : src(src), position(0), line(1), column(1) {}
+
+std::vector<Token> Lexer::tokenise() {
+    while (position < src.size()) {
+        if (std::isdigit(src[position])) {
+            std::string digit;
+            int original_column = column;
+
+            while (std::isdigit(src[position])) {
+                digit.push_back(src[position]);
+                advance();
+            }
+
+            tokens.push_back(Token(TokenType::NUMBER, digit, line, original_column));
+
+        } else if (std::isalpha(src[position])) {
+            if (src.substr(position, 3) == "int") {
+                tokens.push_back(Token(TokenType::INT, "int", line, column));
+                advance();
+                advance();
+                advance();
+
+            } else {
+                std::string identifier;
+                int original_column = column;
+
+                while (std::isalnum(src[position])) {
+                    identifier.push_back(src[position]);
+                    advance();
+                }
+
+                tokens.push_back(Token(TokenType::IDENTIFIER, identifier, line, original_column));
+            }
+
+        } else if (src[position] == '+') {
+            tokens.push_back(Token(TokenType::PLUS, std::string(1, src[position]), line, column));
+            advance();
+
+        } else if (src[position] == '-') {
+            tokens.push_back(Token(TokenType::MINUS, std::string(1, src[position]), line, column));
+            advance();
+
+        } else if (src[position] == '*') {
+            tokens.push_back(Token(TokenType::MULTIPLY, std::string(1, src[position]), line, column));
+            advance();
+
+        } else if (src[position] == '/') {
+            tokens.push_back(Token(TokenType::DIVIDE, std::string(1, src[position]), line, column));
+            advance();
+
+        } else if (src[position] == '=') {
+            tokens.push_back(Token(TokenType::ASSIGN, std::string(1, src[position]), line, column));
+            advance();
+
+        } else if (src[position] == ';') {
+            tokens.push_back(Token(TokenType::SEMICOLON, std::string(1, src[position]), line, column));
+            advance();
+
+        } else {
+            tokens.push_back(Token(TokenType::INVALID, std::string(1, src[position]), line, column));
+            advance();
+
+        }
+        skipComment();
+        skipWhitespace();
+
+    }
+    tokens.push_back(Token(TokenType::END_OF_FILE, std::string(), line, column));
+    return tokens;
+}
+
+char Lexer::advance() {
+    column++;
+    return src[position++];
+}
+
+void Lexer::skipWhitespace() {
+    while (src[position] == ' ' || src[position] == '\n') {
+        if (src[position] == ' ') {
+            position++;
+            column++;
+
+        } else if (src[position] == '\n') {
+            position++;
+            column = 1;
+            line++;
+        }
+    }
+}
+
+void Lexer::skipComment() {
+    if (src[position] == '/' && src[++position] == '/') {
+        while (src[position] != '\n') {
+            position++;
+            column++;
+        }
+        position++;
+        line += 1;
+        column = 1;
+    }
+}
author	Cori Barker <coribarker2@gmail.com>	2026-01-19 09:35:45 +0000
committer	Cori Barker <coribarker2@gmail.com>	2026-01-19 09:35:45 +0000
commit	e34338219036acf103d161dfbd36fa67597d23e7 (patch)
tree	00b4b09fd7fcb47502aee4b5be0e939fd771aab1