Lex the entire strings.

This commit is contained in:
Username404-59 2021-02-22 19:42:20 +01:00
parent e8040a009a
commit 45cbe5b9e2
Signed by: Username404-59
GPG Key ID: 7AB361FBB257A5D1
2 changed files with 51 additions and 29 deletions

View File

@ -10,35 +10,57 @@ vector<tok> resVal;
void separate() {resVal.emplace_back(tok::SEPARATOR, "");}
vector<tok> lex(const string& in)
string generatedStr;
bool isLexingString = false;
tok::type getIdentifierCharType(const char& Char) {
if (isalpha(Char)) return tok::IDENTIFIER;
else if (isalnum(Char)) return tok::NUMBER;
else return tok::UNEXPECTED;
}
vector<tok>& lex(const string& in)
{
for (const char& current : in) {
switch (current) {
case '#': resVal.emplace_back(tok::TAG, "#"); break;
case '=': resVal.emplace_back(tok::DEFINE, "="); break;
case '(': resVal.emplace_back(tok::LPAR, "("); break;
case ')': resVal.emplace_back(tok::RPAR, ")"); break;
case '{': resVal.emplace_back(tok::LBRACE, "{"); break;
case '}': resVal.emplace_back(tok::RBRACE, "}"); break;
case '[': resVal.emplace_back(tok::LBRACKET, "["); break;
case ']': resVal.emplace_back(tok::RBRACKET, "]"); break;
case '+': resVal.emplace_back(tok::PLUS, "+"); break;
case '-': resVal.emplace_back(tok::HYPHEN, "-"); break;
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
case '"': resVal.emplace_back(tok::DQUOTE, "\""); break;
case ' ': case '\t': case '\r':
case '\n': separate(); break;
default: {
if (isdigit(current))
resVal.emplace_back(tok::INT, string(1, current));
else if (isalpha(current))
resVal.emplace_back(tok::ALPHACHAR, string(1, current));
else resVal.emplace_back(tok::OTHER, string(1, current));
break;
if (!isLexingString) {
if (!generatedStr.empty()) {
resVal.emplace_back(tok::STRING, generatedStr);
generatedStr = "";
}
switch (current) {
case '#': resVal.emplace_back(tok::TAG, "#"); break;
case '=': resVal.emplace_back(tok::DEFINE, "="); break;
case '(': resVal.emplace_back(tok::LPAR, "("); break;
case ')': resVal.emplace_back(tok::RPAR, ")"); break;
case '{': resVal.emplace_back(tok::LBRACE, "{"); break;
case '}': resVal.emplace_back(tok::RBRACE, "}"); break;
case '[': resVal.emplace_back(tok::LBRACKET, "["); break;
case ']': resVal.emplace_back(tok::RBRACKET, "]"); break;
case '+': resVal.emplace_back(tok::PLUS, "+"); break;
case '-': resVal.emplace_back(tok::HYPHEN, "-"); break;
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
case '"': isLexingString = true; break;
case ' ': case '\t': case '\r':
case '\n': separate(); break;
default: {
tok::type type = getIdentifierCharType(current);
if (type != tok::UNEXPECTED) resVal.emplace_back(type, string(1, current));
}
}
} else {
if ((current == '"')) { // TODO Permit the usage of double quotes in strings using '\' (an example would be "\"")
isLexingString = false;
} else generatedStr.append(string(1, current));
}
}
/* Test
for (const auto& it : resVal) {
if (it.toktype == tok::STRING) cout << '"' << it.toktext << '"' << '\n';
if (it.toktype == tok::NUMBER) cout << it.toktext << "= number\n";
if (it.toktype == tok::IDENTIFIER) cout << it.toktext << '\n';
}
*/
return resVal;
}

View File

@ -7,16 +7,16 @@
struct tok {
enum type {
DEFINE, TAG, INT, PLUS, MINUS, LPAR, LBRACE, LBRACKET, RPAR, RBRACE, RBRACKET, ALPHACHAR, HYPHEN, LCOMP, RCOMP,
SQUOTE, DQUOTE,
DEFINE, TAG, NUMBER, PLUS, LPAR, LBRACE, LBRACKET, RPAR, RBRACE, RBRACKET, ALPHACHAR, HYPHEN, LCOMP, RCOMP,
SQUOTE,
STRING,
IDENTIFIER,
SEPARATOR,
OTHER
UNEXPECTED
};
type toktype;
std::string toktext;
tok(type Type, std::string Text): toktype(Type), toktext(Text) {}
friend std::ostream& operator<<(std::ostream& output, const tok& it) { return output << it.toktext; }
};
std::vector<tok> lex(const std::string& in);
std::vector<tok>& lex(const std::string& in);