Lex identifiers and numbers instead of just strings, and use isdigit instead of isalphanum in lexer.cpp.
This commit is contained in:
parent
179d3813ab
commit
d42c0d4293
|
@ -10,22 +10,23 @@ vector<tok> resVal;
|
||||||
|
|
||||||
void separate() {resVal.emplace_back(tok::SEPARATOR, "");}
|
void separate() {resVal.emplace_back(tok::SEPARATOR, "");}
|
||||||
|
|
||||||
string generatedStr;
|
bool longLex = false;
|
||||||
bool isLexingString = false;
|
|
||||||
|
|
||||||
tok::type getIdentifierCharType(const char& Char) {
|
tok::type getIdentifierCharType(const char& Char) {
|
||||||
if (isalpha(Char)) return tok::IDENTIFIER;
|
if (isalpha(Char)) return tok::IDENTIFIER;
|
||||||
else if (isalnum(Char)) return tok::NUMBER;
|
else if (isdigit(Char)) return tok::NUMBER;
|
||||||
|
else if (Char == '"') return tok::STRING;
|
||||||
else return tok::UNEXPECTED;
|
else return tok::UNEXPECTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<tok>& lex(const string& in)
|
vector<tok>& lex(const string& in)
|
||||||
{
|
{
|
||||||
|
pair<tok::type, string> generated;
|
||||||
for (const char& current : in) {
|
for (const char& current : in) {
|
||||||
if (!isLexingString) {
|
if (!longLex) {
|
||||||
if (!generatedStr.empty()) {
|
if (!generated.second.empty()) {
|
||||||
resVal.emplace_back(tok::STRING, generatedStr);
|
resVal.emplace_back(generated.first, generated.second);
|
||||||
generatedStr = "";
|
generated.second = "";
|
||||||
}
|
}
|
||||||
switch (current) {
|
switch (current) {
|
||||||
case '#': resVal.emplace_back(tok::TAG, "#"); break;
|
case '#': resVal.emplace_back(tok::TAG, "#"); break;
|
||||||
|
@ -41,18 +42,29 @@ vector<tok>& lex(const string& in)
|
||||||
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
|
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
|
||||||
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
|
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
|
||||||
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
|
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
|
||||||
case '"': isLexingString = true; break;
|
|
||||||
case ' ': case '\t': case '\r':
|
case ' ': case '\t': case '\r':
|
||||||
case '\n': separate(); break;
|
case '\n': separate(); break;
|
||||||
default: { // TODO Lex entire identifiers and numbers
|
default: {
|
||||||
tok::type type = getIdentifierCharType(current);
|
tok::type type = getIdentifierCharType(current);
|
||||||
if (type != tok::UNEXPECTED) resVal.emplace_back(type, string(1, current));
|
switch (type) {
|
||||||
|
case tok::UNEXPECTED: break;
|
||||||
|
case tok::IDENTIFIER: case tok::NUMBER: {
|
||||||
|
generated.second.append(string(1, current));
|
||||||
|
}
|
||||||
|
case tok::STRING: {
|
||||||
|
generated.first = type;
|
||||||
|
longLex = true;
|
||||||
|
}; break;
|
||||||
|
default: resVal.emplace_back(type, string(1, current));
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if ((current == '"')) { // TODO Permit the usage of double quotes in strings using '\' (an example would be "\"")
|
tok::type currentType = getIdentifierCharType(current);
|
||||||
isLexingString = false;
|
if ((currentType == tok::STRING) || ((currentType == tok::UNEXPECTED) && ((generated.first == tok::IDENTIFIER) || (generated.first == tok::NUMBER)))) {
|
||||||
} else generatedStr.append(string(1, current));
|
longLex = false;
|
||||||
|
} else generated.second.append(string(1, current));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Test
|
/* Test
|
||||||
|
|
Loading…
Reference in New Issue