Add "ASTERISK" and "DIVIDE" token types to lex.hpp, and implement comments

2021-11-24 17:44:44 +01:00 · 2021-11-24 17:44:44 +01:00 · 26978f75c8
commit 26978f75c8
parent 7d40230f65
2 changed files with 32 additions and 8 deletions
--- a/src/etc/lexer.cpp
+++ b/src/etc/lexer.cpp
@ -17,12 +17,37 @@ vector<tok> lex(const string& in)
        const char& current = in[i];

        switch (current) {
+            case DIVIDE: if (in[i + 1] == current) i += 2; else goto insertToken;
+            case TAG: {
+                if ((current == TAG && in[i + 1] == DEFINE)) { // See the IDENTIFIER case in Parser.hpp
+                    goto insertToken;
+                } else {
+                    while (not (in[i + 1] == EOF_ || in[i + 1] == '\n')) {
+                        ++i;
+                    }
+                    break;
+                }
+            }
+            case ASTERISK: {
+                if ((in.size() - i) > 2 && in[i + 1] == ASTERISK) {
+                    i += 2;
+                    try {
+                        while (not (in.at(i) == ASTERISK && in.at(i + 1) == ASTERISK)) {
+                            lineNumber += (in[i] == '\n'); ++i;
+                        }
+                        ++i;
+                    } catch (const out_of_range&) {
+                        throw tok::LexerException("A never ending comment was found", --lineNumber);
+                    }
+                    break;
+                } else goto insertToken;
+            }
            [[unlikely]] case EOF_: --lineNumber;
-            case TAG: case DEFINE: case LPAR: case RPAR:
+            case DEFINE: case LPAR: case RPAR:
            case LBRACE: case RBRACE: case LBRACKET: case RBRACKET:
            case PLUS: case HYPHEN: case LCOMP: case RCOMP:
            case DOT: case DOLLAR_SIGN: case SQUOTE:
-                resVal.emplace_back(static_cast<tok::type>(current), lineNumber);
+                insertToken: resVal.emplace_back(static_cast<tok::type>(current), lineNumber);
            [[likely]] case ' ': case '\t': case '\r': break;
            [[likely]] case '\n': ++lineNumber; break;
            default: {
@ -45,8 +70,7 @@ vector<tok> lex(const string& in)
                            }
                        }
                    }
-                    case UNEXPECTED: break;
-                    default: resVal.emplace_back(type, string(1, current), lineNumber);
+                    default: break;
                }
                break;
            }
--- a/src/headers/lex.hpp
+++ b/src/headers/lex.hpp
@ -11,11 +11,11 @@ struct tok {
    enum type: const unsigned short {
        UNEXPECTED = std::numeric_limits<unsigned char>::max() + 1, IDENTIFIER, NUMBER, ALPHACHAR,
        EOF_ = '\0', DEFINE = '=', TAG = '#', DOLLAR_SIGN = '$', DOT = '.',
-        PLUS = '+', LPAR = '(', LBRACE = '{', LBRACKET = '[', RPAR = ')',
+        LPAR = '(', LBRACE = '{', LBRACKET = '[', RPAR = ')',
        RBRACE = '}', RBRACKET = ']',
-        HYPHEN = '-', LCOMP = '>', RCOMP = '<',
-        SQUOTE = '\'',
-        STRING = '"',
+        PLUS = '+', HYPHEN = '-', DIVIDE = '/',
+        LCOMP = '>', RCOMP = '<',
+        SQUOTE = '\'', ASTERISK = '*', STRING = '"',
    };
    const type toktype;
    const std::string toktext;