Yerbacon/src/etc/lexer.cpp
Username404 f3d03adead
Add functions, indentation, better variables parsing and more
Signed-off-by: Username404 <w.iron.zombie@gmail.com>
2022-03-13 15:15:29 +01:00

90 lines
3.7 KiB
C++

#include "../headers/lex.hpp"
using namespace std;
using enum tok::type;
tok::type getIdentifierCharType(const char& Char) {
if (isalpha(Char) || Char == '_') return IDENTIFIER;
else if (isdigit(Char)) return NUMBER;
else if (Char == STRING) return STRING;
else return UNEXPECTED;
}
vector<tok> lex(const string& in)
{
vector<tok> resVal;
unsigned long lineNumber = 1;
for (unsigned int i = 0; i < in.size(); ++i) {
const char& current = in[i];
switch (current) {
case LPAR: case LBRACE: case LBRACKET: {
const auto reversedCharacter = static_cast<char>(tok::inverseLCharacter(current));
if (find_corresponding(in.cbegin() + i + 1, in.cend(), current, reversedCharacter) != in.cend()) {
goto insertToken;
} else throw tok::LexerException(string("Missing \"") + reversedCharacter + "\" character", lineNumber);
}
case DIVIDE: if (in[i + 1] == current) i += 2; else goto insertToken;
case TAG: {
if (current == TAG && in[i + 1] == DEFINE) { // See the IDENTIFIER case in Parser.hpp
goto insertToken;
} else {
while (not (i == in.size() || in[i + 1] == '\n')) {
++i;
}
break;
}
}
case ASTERISK: {
if (in[i + 1] == ASTERISK) {
i += 2;
try {
while (not (in.at(i) == ASTERISK && in.at(i + 1) == ASTERISK)) {
lineNumber += (in[i] == '\n'); ++i;
}
++i;
} catch (const out_of_range&) {
throw tok::LexerException("A never ending comment was found", --lineNumber);
}
break;
} else goto insertToken;
}
case DEFINE: case RPAR: case COMMA:
case RBRACE: case RBRACKET:
case PLUS: case HYPHEN: case LCOMP: case RCOMP:
case DOT: case DOLLAR_SIGN: case SQUOTE:
insertToken: resVal.emplace_back(static_cast<tok::type>(current), lineNumber);
[[likely]] case ' ': case '\t': case '\r': case ';': break;
[[likely]] case '\n': ++lineNumber; break;
default: {
const tok::type type = getIdentifierCharType(current);
bool isTypeString = (type == STRING);
switch (type) {
case STRING: ++i;
case IDENTIFIER: case NUMBER: {
string formedString;
for (;i < in.size(); ++i) {
const tok::type currentCharType = getIdentifierCharType(in[i]);
const bool isString = currentCharType == STRING;
if (isTypeString && (i == in.size() - 1 && not isString)) throw tok::LexerException("A never ending string was found", lineNumber);
if ((currentCharType == type || isTypeString) && !isString) {
formedString += in[i];
} else {
if (not isTypeString) --i;
resVal.emplace_back(type, formedString, lineNumber);
break;
}
}
}
default: break;
}
break;
}
}
}
/* Test
for (const auto& it : resVal) {
cout << it << ' ' << it.toktype << '\n';
}
*/
return resVal;
}