Lex identifiers and numbers instead of just strings, and use isdigit instead of isalphanum in lexer.cpp.

This commit is contained in:
Username404-59 2021-03-02 20:39:30 +01:00
parent 179d3813ab
commit d42c0d4293
Signed by: Username404-59
GPG Key ID: 7AB361FBB257A5D1
1 changed files with 25 additions and 13 deletions

View File

@ -10,22 +10,23 @@ vector<tok> resVal;
void separate() {resVal.emplace_back(tok::SEPARATOR, "");} void separate() {resVal.emplace_back(tok::SEPARATOR, "");}
string generatedStr; bool longLex = false;
bool isLexingString = false;
tok::type getIdentifierCharType(const char& Char) { tok::type getIdentifierCharType(const char& Char) {
if (isalpha(Char)) return tok::IDENTIFIER; if (isalpha(Char)) return tok::IDENTIFIER;
else if (isalnum(Char)) return tok::NUMBER; else if (isdigit(Char)) return tok::NUMBER;
else if (Char == '"') return tok::STRING;
else return tok::UNEXPECTED; else return tok::UNEXPECTED;
} }
vector<tok>& lex(const string& in) vector<tok>& lex(const string& in)
{ {
pair<tok::type, string> generated;
for (const char& current : in) { for (const char& current : in) {
if (!isLexingString) { if (!longLex) {
if (!generatedStr.empty()) { if (!generated.second.empty()) {
resVal.emplace_back(tok::STRING, generatedStr); resVal.emplace_back(generated.first, generated.second);
generatedStr = ""; generated.second = "";
} }
switch (current) { switch (current) {
case '#': resVal.emplace_back(tok::TAG, "#"); break; case '#': resVal.emplace_back(tok::TAG, "#"); break;
@ -41,18 +42,29 @@ vector<tok>& lex(const string& in)
case '>': resVal.emplace_back(tok::LCOMP, ">"); break; case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
case '<': resVal.emplace_back(tok::RCOMP, "<"); break; case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break; case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
case '"': isLexingString = true; break;
case ' ': case '\t': case '\r': case ' ': case '\t': case '\r':
case '\n': separate(); break; case '\n': separate(); break;
default: { // TODO Lex entire identifiers and numbers default: {
tok::type type = getIdentifierCharType(current); tok::type type = getIdentifierCharType(current);
if (type != tok::UNEXPECTED) resVal.emplace_back(type, string(1, current)); switch (type) {
case tok::UNEXPECTED: break;
case tok::IDENTIFIER: case tok::NUMBER: {
generated.second.append(string(1, current));
}
case tok::STRING: {
generated.first = type;
longLex = true;
}; break;
default: resVal.emplace_back(type, string(1, current));
}
break;
} }
} }
} else { } else {
if ((current == '"')) { // TODO Permit the usage of double quotes in strings using '\' (an example would be "\"") tok::type currentType = getIdentifierCharType(current);
isLexingString = false; if ((currentType == tok::STRING) || ((currentType == tok::UNEXPECTED) && ((generated.first == tok::IDENTIFIER) || (generated.first == tok::NUMBER)))) {
} else generatedStr.append(string(1, current)); longLex = false;
} else generated.second.append(string(1, current));
} }
} }
/* Test /* Test