Lex the entire strings.
This commit is contained in:
parent
e8040a009a
commit
45cbe5b9e2
@ -10,35 +10,57 @@ vector<tok> resVal;
|
||||
|
||||
void separate() {resVal.emplace_back(tok::SEPARATOR, "");}
|
||||
|
||||
vector<tok> lex(const string& in)
|
||||
string generatedStr;
|
||||
bool isLexingString = false;
|
||||
|
||||
tok::type getIdentifierCharType(const char& Char) {
|
||||
if (isalpha(Char)) return tok::IDENTIFIER;
|
||||
else if (isalnum(Char)) return tok::NUMBER;
|
||||
else return tok::UNEXPECTED;
|
||||
}
|
||||
|
||||
vector<tok>& lex(const string& in)
|
||||
{
|
||||
for (const char& current : in) {
|
||||
switch (current) {
|
||||
case '#': resVal.emplace_back(tok::TAG, "#"); break;
|
||||
case '=': resVal.emplace_back(tok::DEFINE, "="); break;
|
||||
case '(': resVal.emplace_back(tok::LPAR, "("); break;
|
||||
case ')': resVal.emplace_back(tok::RPAR, ")"); break;
|
||||
case '{': resVal.emplace_back(tok::LBRACE, "{"); break;
|
||||
case '}': resVal.emplace_back(tok::RBRACE, "}"); break;
|
||||
case '[': resVal.emplace_back(tok::LBRACKET, "["); break;
|
||||
case ']': resVal.emplace_back(tok::RBRACKET, "]"); break;
|
||||
case '+': resVal.emplace_back(tok::PLUS, "+"); break;
|
||||
case '-': resVal.emplace_back(tok::HYPHEN, "-"); break;
|
||||
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
|
||||
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
|
||||
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
|
||||
case '"': resVal.emplace_back(tok::DQUOTE, "\""); break;
|
||||
case ' ': case '\t': case '\r':
|
||||
case '\n': separate(); break;
|
||||
default: {
|
||||
if (isdigit(current))
|
||||
resVal.emplace_back(tok::INT, string(1, current));
|
||||
else if (isalpha(current))
|
||||
resVal.emplace_back(tok::ALPHACHAR, string(1, current));
|
||||
else resVal.emplace_back(tok::OTHER, string(1, current));
|
||||
break;
|
||||
if (!isLexingString) {
|
||||
if (!generatedStr.empty()) {
|
||||
resVal.emplace_back(tok::STRING, generatedStr);
|
||||
generatedStr = "";
|
||||
}
|
||||
switch (current) {
|
||||
case '#': resVal.emplace_back(tok::TAG, "#"); break;
|
||||
case '=': resVal.emplace_back(tok::DEFINE, "="); break;
|
||||
case '(': resVal.emplace_back(tok::LPAR, "("); break;
|
||||
case ')': resVal.emplace_back(tok::RPAR, ")"); break;
|
||||
case '{': resVal.emplace_back(tok::LBRACE, "{"); break;
|
||||
case '}': resVal.emplace_back(tok::RBRACE, "}"); break;
|
||||
case '[': resVal.emplace_back(tok::LBRACKET, "["); break;
|
||||
case ']': resVal.emplace_back(tok::RBRACKET, "]"); break;
|
||||
case '+': resVal.emplace_back(tok::PLUS, "+"); break;
|
||||
case '-': resVal.emplace_back(tok::HYPHEN, "-"); break;
|
||||
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
|
||||
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
|
||||
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
|
||||
case '"': isLexingString = true; break;
|
||||
case ' ': case '\t': case '\r':
|
||||
case '\n': separate(); break;
|
||||
default: {
|
||||
tok::type type = getIdentifierCharType(current);
|
||||
if (type != tok::UNEXPECTED) resVal.emplace_back(type, string(1, current));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((current == '"')) { // TODO Permit the usage of double quotes in strings using '\' (an example would be "\"")
|
||||
isLexingString = false;
|
||||
} else generatedStr.append(string(1, current));
|
||||
}
|
||||
}
|
||||
/* Test
|
||||
for (const auto& it : resVal) {
|
||||
if (it.toktype == tok::STRING) cout << '"' << it.toktext << '"' << '\n';
|
||||
if (it.toktype == tok::NUMBER) cout << it.toktext << "= number\n";
|
||||
if (it.toktype == tok::IDENTIFIER) cout << it.toktext << '\n';
|
||||
}
|
||||
*/
|
||||
return resVal;
|
||||
}
|
@ -7,16 +7,16 @@
|
||||
|
||||
struct tok {
|
||||
enum type {
|
||||
DEFINE, TAG, INT, PLUS, MINUS, LPAR, LBRACE, LBRACKET, RPAR, RBRACE, RBRACKET, ALPHACHAR, HYPHEN, LCOMP, RCOMP,
|
||||
SQUOTE, DQUOTE,
|
||||
DEFINE, TAG, NUMBER, PLUS, LPAR, LBRACE, LBRACKET, RPAR, RBRACE, RBRACKET, ALPHACHAR, HYPHEN, LCOMP, RCOMP,
|
||||
SQUOTE,
|
||||
STRING,
|
||||
IDENTIFIER,
|
||||
SEPARATOR,
|
||||
OTHER
|
||||
UNEXPECTED
|
||||
};
|
||||
type toktype;
|
||||
std::string toktext;
|
||||
tok(type Type, std::string Text): toktype(Type), toktext(Text) {}
|
||||
friend std::ostream& operator<<(std::ostream& output, const tok& it) { return output << it.toktext; }
|
||||
};
|
||||
std::vector<tok> lex(const std::string& in);
|
||||
std::vector<tok>& lex(const std::string& in);
|
Loading…
Reference in New Issue
Block a user