90 lines
3.7 KiB
C++
90 lines
3.7 KiB
C++
#include "../headers/lex.hpp"
|
|
using namespace std;
|
|
using enum tok::type;
|
|
|
|
tok::type getIdentifierCharType(const char& Char) {
|
|
if (isalpha(Char) || Char == '_') return IDENTIFIER;
|
|
else if (isdigit(Char)) return NUMBER;
|
|
else if (Char == STRING) return STRING;
|
|
else return UNEXPECTED;
|
|
}
|
|
|
|
vector<tok> lex(const string& in)
|
|
{
|
|
vector<tok> resVal;
|
|
unsigned long lineNumber = 1;
|
|
for (unsigned int i = 0; i < in.size(); ++i) {
|
|
const char& current = in[i];
|
|
|
|
switch (current) {
|
|
case LPAR: case LBRACE: case LBRACKET: {
|
|
const auto reversedCharacter = static_cast<char>(tok::inverseLCharacter(current));
|
|
if (find_corresponding(in.cbegin() + i + 1, in.cend(), current, reversedCharacter) != in.cend()) {
|
|
goto insertToken;
|
|
} else throw tok::LexerException(string("Missing \"") + reversedCharacter + "\" character", lineNumber);
|
|
}
|
|
case DIVIDE: if (in[i + 1] == current) i += 2; else goto insertToken;
|
|
case TAG: {
|
|
if (current == TAG && in[i + 1] == DEFINE) { // See the IDENTIFIER case in Parser.hpp
|
|
goto insertToken;
|
|
} else {
|
|
while (not (i == in.size() || in[i + 1] == '\n')) {
|
|
++i;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
case ASTERISK: {
|
|
if (in[i + 1] == ASTERISK) {
|
|
i += 2;
|
|
try {
|
|
while (not (in.at(i) == ASTERISK && in.at(i + 1) == ASTERISK)) {
|
|
lineNumber += (in[i] == '\n'); ++i;
|
|
}
|
|
++i;
|
|
} catch (const out_of_range&) {
|
|
throw tok::LexerException("A never ending comment was found", --lineNumber);
|
|
}
|
|
break;
|
|
} else goto insertToken;
|
|
}
|
|
case DEFINE: case RPAR: case COMMA:
|
|
case RBRACE: case RBRACKET:
|
|
case PLUS: case HYPHEN: case LCOMP: case RCOMP:
|
|
case DOT: case DOLLAR_SIGN: case SQUOTE:
|
|
insertToken: resVal.emplace_back(static_cast<tok::type>(current), lineNumber);
|
|
[[likely]] case ' ': case '\t': case '\r': case ';': break;
|
|
[[likely]] case '\n': ++lineNumber; break;
|
|
default: {
|
|
const tok::type type = getIdentifierCharType(current);
|
|
bool isTypeString = (type == STRING);
|
|
switch (type) {
|
|
case STRING: ++i;
|
|
case IDENTIFIER: case NUMBER: {
|
|
string formedString;
|
|
for (;i < in.size(); ++i) {
|
|
const tok::type currentCharType = getIdentifierCharType(in[i]);
|
|
const bool isString = currentCharType == STRING;
|
|
if (isTypeString && (i == in.size() - 1 && not isString)) throw tok::LexerException("A never ending string was found", lineNumber);
|
|
if ((currentCharType == type || isTypeString) && !isString) {
|
|
formedString += in[i];
|
|
} else {
|
|
if (not isTypeString) --i;
|
|
resVal.emplace_back(type, formedString, lineNumber);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
default: break;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/* Test
|
|
for (const auto& it : resVal) {
|
|
cout << it << ' ' << it.toktype << '\n';
|
|
}
|
|
*/
|
|
return resVal;
|
|
} |