bl-compiler

compiler for the bl programming language.
Log | Files | Refs | README

lexer.cpp (3117B)


      1 #include "lexer.hpp"
      2 
      3 Lexer::Lexer(const std::string& src) : src(src), position(0), line(1), column(1) {}
      4 
      5 std::vector<Token> Lexer::tokenise() {
      6     while (position < src.size()) {
      7         if (std::isdigit(src[position])) {
      8             std::string digit;
      9             int original_column = column;
     10 
     11             while (std::isdigit(src[position])) {
     12                 digit.push_back(src[position]);
     13                 advance();
     14             }
     15 
     16             tokens.push_back(Token(TokenType::NUMBER, digit, line, original_column));
     17 
     18         } else if (std::isalpha(src[position])) {
     19             if (src.substr(position, 3) == "int") {
     20                 tokens.push_back(Token(TokenType::INT, "int", line, column));
     21                 advance();
     22                 advance();
     23                 advance();
     24 
     25             } else {
     26                 std::string identifier;
     27                 int original_column = column;
     28 
     29                 while (std::isalnum(src[position])) {
     30                     identifier.push_back(src[position]);
     31                     advance();
     32                 }
     33 
     34                 tokens.push_back(Token(TokenType::IDENTIFIER, identifier, line, original_column));
     35             }
     36 
     37         } else if (src[position] == '+') {
     38             tokens.push_back(Token(TokenType::PLUS, std::string(1, src[position]), line, column));
     39             advance();
     40 
     41         } else if (src[position] == '-') {
     42             tokens.push_back(Token(TokenType::MINUS, std::string(1, src[position]), line, column));
     43             advance();
     44 
     45         } else if (src[position] == '*') {
     46             tokens.push_back(Token(TokenType::MULTIPLY, std::string(1, src[position]), line, column));
     47             advance();
     48 
     49         } else if (src[position] == '/') {
     50             tokens.push_back(Token(TokenType::DIVIDE, std::string(1, src[position]), line, column));
     51             advance();
     52 
     53         } else if (src[position] == '=') {
     54             tokens.push_back(Token(TokenType::ASSIGN, std::string(1, src[position]), line, column));
     55             advance();
     56 
     57         } else if (src[position] == ';') {
     58             tokens.push_back(Token(TokenType::SEMICOLON, std::string(1, src[position]), line, column));
     59             advance();
     60 
     61         } else {
     62             tokens.push_back(Token(TokenType::INVALID, std::string(1, src[position]), line, column));
     63             advance();
     64 
     65         }
     66         skipComment();
     67         skipWhitespace();
     68 
     69     }
     70     tokens.push_back(Token(TokenType::END_OF_FILE, std::string(), line, column));
     71     return tokens;
     72 }
     73 
     74 char Lexer::advance() {
     75     column++;
     76     return src[position++];
     77 }
     78 
     79 void Lexer::skipWhitespace() {
     80     while (src[position] == ' ' || src[position] == '\n') {
     81         if (src[position] == ' ') {
     82             position++;
     83             column++;
     84 
     85         } else if (src[position] == '\n') {
     86             position++;
     87             column = 1;
     88             line++;
     89         }
     90     }
     91 }
     92 
     93 void Lexer::skipComment() {
     94     if (src[position] == '/' && src[++position] == '/') {
     95         while (src[position] != '\n') {
     96             position++;
     97             column++;
     98         }
     99         position++;
    100         line += 1;
    101         column = 1;
    102     }
    103 }