Lex identifiers and numbers instead of just strings, and use isdigit instead of isalphanum in lexer.cpp.

This commit is contained in:
Username404-59 2021-03-02 20:39:30 +01:00
parent 179d3813ab
commit d42c0d4293
Signed by: Username404-59
GPG Key ID: 7AB361FBB257A5D1
1 changed files with 25 additions and 13 deletions

View File

@ -10,22 +10,23 @@ vector<tok> resVal;
void separate() {resVal.emplace_back(tok::SEPARATOR, "");}
string generatedStr;
bool isLexingString = false;
bool longLex = false;
tok::type getIdentifierCharType(const char& Char) {
if (isalpha(Char)) return tok::IDENTIFIER;
else if (isalnum(Char)) return tok::NUMBER;
else if (isdigit(Char)) return tok::NUMBER;
else if (Char == '"') return tok::STRING;
else return tok::UNEXPECTED;
}
vector<tok>& lex(const string& in)
{
pair<tok::type, string> generated;
for (const char& current : in) {
if (!isLexingString) {
if (!generatedStr.empty()) {
resVal.emplace_back(tok::STRING, generatedStr);
generatedStr = "";
if (!longLex) {
if (!generated.second.empty()) {
resVal.emplace_back(generated.first, generated.second);
generated.second = "";
}
switch (current) {
case '#': resVal.emplace_back(tok::TAG, "#"); break;
@ -41,18 +42,29 @@ vector<tok>& lex(const string& in)
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
case '"': isLexingString = true; break;
case ' ': case '\t': case '\r':
case '\n': separate(); break;
default: { // TODO Lex entire identifiers and numbers
default: {
tok::type type = getIdentifierCharType(current);
if (type != tok::UNEXPECTED) resVal.emplace_back(type, string(1, current));
switch (type) {
case tok::UNEXPECTED: break;
case tok::IDENTIFIER: case tok::NUMBER: {
generated.second.append(string(1, current));
}
case tok::STRING: {
generated.first = type;
longLex = true;
}; break;
default: resVal.emplace_back(type, string(1, current));
}
break;
}
}
} else {
if ((current == '"')) { // TODO Permit the usage of double quotes in strings using '\' (an example would be "\"")
isLexingString = false;
} else generatedStr.append(string(1, current));
tok::type currentType = getIdentifierCharType(current);
if ((currentType == tok::STRING) || ((currentType == tok::UNEXPECTED) && ((generated.first == tok::IDENTIFIER) || (generated.first == tok::NUMBER)))) {
longLex = false;
} else generated.second.append(string(1, current));
}
}
/* Test