Lex the entire strings.
This commit is contained in:
parent
e8040a009a
commit
45cbe5b9e2
@ -10,35 +10,57 @@ vector<tok> resVal;
|
|||||||
|
|
||||||
void separate() {resVal.emplace_back(tok::SEPARATOR, "");}
|
void separate() {resVal.emplace_back(tok::SEPARATOR, "");}
|
||||||
|
|
||||||
vector<tok> lex(const string& in)
|
string generatedStr;
|
||||||
|
bool isLexingString = false;
|
||||||
|
|
||||||
|
tok::type getIdentifierCharType(const char& Char) {
|
||||||
|
if (isalpha(Char)) return tok::IDENTIFIER;
|
||||||
|
else if (isalnum(Char)) return tok::NUMBER;
|
||||||
|
else return tok::UNEXPECTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<tok>& lex(const string& in)
|
||||||
{
|
{
|
||||||
for (const char& current : in) {
|
for (const char& current : in) {
|
||||||
switch (current) {
|
if (!isLexingString) {
|
||||||
case '#': resVal.emplace_back(tok::TAG, "#"); break;
|
if (!generatedStr.empty()) {
|
||||||
case '=': resVal.emplace_back(tok::DEFINE, "="); break;
|
resVal.emplace_back(tok::STRING, generatedStr);
|
||||||
case '(': resVal.emplace_back(tok::LPAR, "("); break;
|
generatedStr = "";
|
||||||
case ')': resVal.emplace_back(tok::RPAR, ")"); break;
|
|
||||||
case '{': resVal.emplace_back(tok::LBRACE, "{"); break;
|
|
||||||
case '}': resVal.emplace_back(tok::RBRACE, "}"); break;
|
|
||||||
case '[': resVal.emplace_back(tok::LBRACKET, "["); break;
|
|
||||||
case ']': resVal.emplace_back(tok::RBRACKET, "]"); break;
|
|
||||||
case '+': resVal.emplace_back(tok::PLUS, "+"); break;
|
|
||||||
case '-': resVal.emplace_back(tok::HYPHEN, "-"); break;
|
|
||||||
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
|
|
||||||
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
|
|
||||||
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
|
|
||||||
case '"': resVal.emplace_back(tok::DQUOTE, "\""); break;
|
|
||||||
case ' ': case '\t': case '\r':
|
|
||||||
case '\n': separate(); break;
|
|
||||||
default: {
|
|
||||||
if (isdigit(current))
|
|
||||||
resVal.emplace_back(tok::INT, string(1, current));
|
|
||||||
else if (isalpha(current))
|
|
||||||
resVal.emplace_back(tok::ALPHACHAR, string(1, current));
|
|
||||||
else resVal.emplace_back(tok::OTHER, string(1, current));
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
switch (current) {
|
||||||
|
case '#': resVal.emplace_back(tok::TAG, "#"); break;
|
||||||
|
case '=': resVal.emplace_back(tok::DEFINE, "="); break;
|
||||||
|
case '(': resVal.emplace_back(tok::LPAR, "("); break;
|
||||||
|
case ')': resVal.emplace_back(tok::RPAR, ")"); break;
|
||||||
|
case '{': resVal.emplace_back(tok::LBRACE, "{"); break;
|
||||||
|
case '}': resVal.emplace_back(tok::RBRACE, "}"); break;
|
||||||
|
case '[': resVal.emplace_back(tok::LBRACKET, "["); break;
|
||||||
|
case ']': resVal.emplace_back(tok::RBRACKET, "]"); break;
|
||||||
|
case '+': resVal.emplace_back(tok::PLUS, "+"); break;
|
||||||
|
case '-': resVal.emplace_back(tok::HYPHEN, "-"); break;
|
||||||
|
case '>': resVal.emplace_back(tok::LCOMP, ">"); break;
|
||||||
|
case '<': resVal.emplace_back(tok::RCOMP, "<"); break;
|
||||||
|
case '\'': resVal.emplace_back(tok::SQUOTE, "\'"); break;
|
||||||
|
case '"': isLexingString = true; break;
|
||||||
|
case ' ': case '\t': case '\r':
|
||||||
|
case '\n': separate(); break;
|
||||||
|
default: {
|
||||||
|
tok::type type = getIdentifierCharType(current);
|
||||||
|
if (type != tok::UNEXPECTED) resVal.emplace_back(type, string(1, current));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ((current == '"')) { // TODO Permit the usage of double quotes in strings using '\' (an example would be "\"")
|
||||||
|
isLexingString = false;
|
||||||
|
} else generatedStr.append(string(1, current));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* Test
|
||||||
|
for (const auto& it : resVal) {
|
||||||
|
if (it.toktype == tok::STRING) cout << '"' << it.toktext << '"' << '\n';
|
||||||
|
if (it.toktype == tok::NUMBER) cout << it.toktext << "= number\n";
|
||||||
|
if (it.toktype == tok::IDENTIFIER) cout << it.toktext << '\n';
|
||||||
|
}
|
||||||
|
*/
|
||||||
return resVal;
|
return resVal;
|
||||||
}
|
}
|
@ -7,16 +7,16 @@
|
|||||||
|
|
||||||
struct tok {
|
struct tok {
|
||||||
enum type {
|
enum type {
|
||||||
DEFINE, TAG, INT, PLUS, MINUS, LPAR, LBRACE, LBRACKET, RPAR, RBRACE, RBRACKET, ALPHACHAR, HYPHEN, LCOMP, RCOMP,
|
DEFINE, TAG, NUMBER, PLUS, LPAR, LBRACE, LBRACKET, RPAR, RBRACE, RBRACKET, ALPHACHAR, HYPHEN, LCOMP, RCOMP,
|
||||||
SQUOTE, DQUOTE,
|
SQUOTE,
|
||||||
STRING,
|
STRING,
|
||||||
IDENTIFIER,
|
IDENTIFIER,
|
||||||
SEPARATOR,
|
SEPARATOR,
|
||||||
OTHER
|
UNEXPECTED
|
||||||
};
|
};
|
||||||
type toktype;
|
type toktype;
|
||||||
std::string toktext;
|
std::string toktext;
|
||||||
tok(type Type, std::string Text): toktype(Type), toktext(Text) {}
|
tok(type Type, std::string Text): toktype(Type), toktext(Text) {}
|
||||||
friend std::ostream& operator<<(std::ostream& output, const tok& it) { return output << it.toktext; }
|
friend std::ostream& operator<<(std::ostream& output, const tok& it) { return output << it.toktext; }
|
||||||
};
|
};
|
||||||
std::vector<tok> lex(const std::string& in);
|
std::vector<tok>& lex(const std::string& in);
|
Loading…
Reference in New Issue
Block a user