From 45cbe5b9e22c5ace69c6dcead5872ef1b80e143e Mon Sep 17 00:00:00 2001 From: Username404-59 Date: Mon, 22 Feb 2021 19:42:20 +0100 Subject: [PATCH] Lex the entire strings. --- src/etc/lexer.cpp | 72 +++++++++++++++++++++++++++++---------------- src/headers/lex.hpp | 8 ++--- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/etc/lexer.cpp b/src/etc/lexer.cpp index 2cb7341..30a5586 100644 --- a/src/etc/lexer.cpp +++ b/src/etc/lexer.cpp @@ -10,35 +10,57 @@ vector resVal; void separate() {resVal.emplace_back(tok::SEPARATOR, "");} -vector lex(const string& in) +string generatedStr; +bool isLexingString = false; + +tok::type getIdentifierCharType(const char& Char) { + if (isalpha(Char)) return tok::IDENTIFIER; + else if (isalnum(Char)) return tok::NUMBER; + else return tok::UNEXPECTED; +} + +vector& lex(const string& in) { for (const char& current : in) { - switch (current) { - case '#': resVal.emplace_back(tok::TAG, "#"); break; - case '=': resVal.emplace_back(tok::DEFINE, "="); break; - case '(': resVal.emplace_back(tok::LPAR, "("); break; - case ')': resVal.emplace_back(tok::RPAR, ")"); break; - case '{': resVal.emplace_back(tok::LBRACE, "{"); break; - case '}': resVal.emplace_back(tok::RBRACE, "}"); break; - case '[': resVal.emplace_back(tok::LBRACKET, "["); break; - case ']': resVal.emplace_back(tok::RBRACKET, "]"); break; - case '+': resVal.emplace_back(tok::PLUS, "+"); break; - case '-': resVal.emplace_back(tok::HYPHEN, "-"); break; - case '>': resVal.emplace_back(tok::LCOMP, ">"); break; - case '<': resVal.emplace_back(tok::RCOMP, "<"); break; - case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break; - case '"': resVal.emplace_back(tok::DQUOTE, "\""); break; - case ' ': case '\t': case '\r': - case '\n': separate(); break; - default: { - if (isdigit(current)) - resVal.emplace_back(tok::INT, string(1, current)); - else if (isalpha(current)) - resVal.emplace_back(tok::ALPHACHAR, string(1, current)); - else resVal.emplace_back(tok::OTHER, string(1, current)); - break; + if (!isLexingString) { + if (!generatedStr.empty()) { + resVal.emplace_back(tok::STRING, generatedStr); + generatedStr = ""; } + switch (current) { + case '#': resVal.emplace_back(tok::TAG, "#"); break; + case '=': resVal.emplace_back(tok::DEFINE, "="); break; + case '(': resVal.emplace_back(tok::LPAR, "("); break; + case ')': resVal.emplace_back(tok::RPAR, ")"); break; + case '{': resVal.emplace_back(tok::LBRACE, "{"); break; + case '}': resVal.emplace_back(tok::RBRACE, "}"); break; + case '[': resVal.emplace_back(tok::LBRACKET, "["); break; + case ']': resVal.emplace_back(tok::RBRACKET, "]"); break; + case '+': resVal.emplace_back(tok::PLUS, "+"); break; + case '-': resVal.emplace_back(tok::HYPHEN, "-"); break; + case '>': resVal.emplace_back(tok::LCOMP, ">"); break; + case '<': resVal.emplace_back(tok::RCOMP, "<"); break; + case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break; + case '"': isLexingString = true; break; + case ' ': case '\t': case '\r': + case '\n': separate(); break; + default: { + tok::type type = getIdentifierCharType(current); + if (type != tok::UNEXPECTED) resVal.emplace_back(type, string(1, current)); + } + } + } else { + if ((current == '"')) { // TODO Permit the usage of double quotes in strings using '\' (an example would be "\"") + isLexingString = false; + } else generatedStr.append(string(1, current)); } } + /* Test + for (const auto& it : resVal) { + if (it.toktype == tok::STRING) cout << '"' << it.toktext << '"' << '\n'; + if (it.toktype == tok::NUMBER) cout << it.toktext << "= number\n"; + if (it.toktype == tok::IDENTIFIER) cout << it.toktext << '\n'; + } + */ return resVal; } \ No newline at end of file diff --git a/src/headers/lex.hpp b/src/headers/lex.hpp index fcd7759..b4a519c 100644 --- a/src/headers/lex.hpp +++ b/src/headers/lex.hpp @@ -7,16 +7,16 @@ struct tok { enum type { - DEFINE, TAG, INT, PLUS, MINUS, LPAR, LBRACE, LBRACKET, RPAR, RBRACE, RBRACKET, ALPHACHAR, HYPHEN, LCOMP, RCOMP, - SQUOTE, DQUOTE, + DEFINE, TAG, NUMBER, PLUS, LPAR, LBRACE, LBRACKET, RPAR, RBRACE, RBRACKET, ALPHACHAR, HYPHEN, LCOMP, RCOMP, + SQUOTE, STRING, IDENTIFIER, SEPARATOR, - OTHER + UNEXPECTED }; type toktype; std::string toktext; tok(type Type, std::string Text): toktype(Type), toktext(Text) {} friend std::ostream& operator<<(std::ostream& output, const tok& it) { return output << it.toktext; } }; -std::vector lex(const std::string& in); \ No newline at end of file +std::vector& lex(const std::string& in); \ No newline at end of file