From 26978f75c899c856dc8a8ab73ed27ba64aa370b1 Mon Sep 17 00:00:00 2001 From: Username404-59 Date: Wed, 24 Nov 2021 17:44:44 +0100 Subject: [PATCH] Add "ASTERISK" and "DIVIDE" token types to lex.hpp, and implement comments --- src/etc/lexer.cpp | 32 ++++++++++++++++++++++++++++---- src/headers/lex.hpp | 8 ++++---- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/etc/lexer.cpp b/src/etc/lexer.cpp index 27a7261..4f92e63 100644 --- a/src/etc/lexer.cpp +++ b/src/etc/lexer.cpp @@ -17,12 +17,37 @@ vector lex(const string& in) const char& current = in[i]; switch (current) { + case DIVIDE: if (in[i + 1] == current) i += 2; else goto insertToken; + case TAG: { + if ((current == TAG && in[i + 1] == DEFINE)) { // See the IDENTIFIER case in Parser.hpp + goto insertToken; + } else { + while (not (in[i + 1] == EOF_ || in[i + 1] == '\n')) { + ++i; + } + break; + } + } + case ASTERISK: { + if ((in.size() - i) > 2 && in[i + 1] == ASTERISK) { + i += 2; + try { + while (not (in.at(i) == ASTERISK && in.at(i + 1) == ASTERISK)) { + lineNumber += (in[i] == '\n'); ++i; + } + ++i; + } catch (const out_of_range&) { + throw tok::LexerException("A never ending comment was found", --lineNumber); + } + break; + } else goto insertToken; + } [[unlikely]] case EOF_: --lineNumber; - case TAG: case DEFINE: case LPAR: case RPAR: + case DEFINE: case LPAR: case RPAR: case LBRACE: case RBRACE: case LBRACKET: case RBRACKET: case PLUS: case HYPHEN: case LCOMP: case RCOMP: case DOT: case DOLLAR_SIGN: case SQUOTE: - resVal.emplace_back(static_cast(current), lineNumber); + insertToken: resVal.emplace_back(static_cast(current), lineNumber); [[likely]] case ' ': case '\t': case '\r': break; [[likely]] case '\n': ++lineNumber; break; default: { @@ -45,8 +70,7 @@ vector lex(const string& in) } } } - case UNEXPECTED: break; - default: resVal.emplace_back(type, string(1, current), lineNumber); + default: break; } break; } diff --git a/src/headers/lex.hpp b/src/headers/lex.hpp index 03767c8..5279a74 100644 --- a/src/headers/lex.hpp +++ b/src/headers/lex.hpp @@ -11,11 +11,11 @@ struct tok { enum type: const unsigned short { UNEXPECTED = std::numeric_limits::max() + 1, IDENTIFIER, NUMBER, ALPHACHAR, EOF_ = '\0', DEFINE = '=', TAG = '#', DOLLAR_SIGN = '$', DOT = '.', - PLUS = '+', LPAR = '(', LBRACE = '{', LBRACKET = '[', RPAR = ')', + LPAR = '(', LBRACE = '{', LBRACKET = '[', RPAR = ')', RBRACE = '}', RBRACKET = ']', - HYPHEN = '-', LCOMP = '>', RCOMP = '<', - SQUOTE = '\'', - STRING = '"', + PLUS = '+', HYPHEN = '-', DIVIDE = '/', + LCOMP = '>', RCOMP = '<', + SQUOTE = '\'', ASTERISK = '*', STRING = '"', }; const type toktype; const std::string toktext;