Yerbacon/src/etc/lexer.cpp

60 lines
2.5 KiB
C++

#include "../headers/lex.hpp"
using namespace std;
using enum tok::type;
tok::type getIdentifierCharType(const char& Char) {
if (isalpha(Char)) return IDENTIFIER;
else if (isdigit(Char)) return NUMBER;
else if (Char == STRING) return STRING;
else return UNEXPECTED;
}
vector<tok> lex(const string& in, const char separatorCharacter)
{
vector<tok> resVal;
unsigned long lineNumber = 1;
for (unsigned int i = 0; i < in.size(); ++i) {
const char& current = in[i];
switch (current) {
case TAG: case DEFINE: case LPAR: case RPAR:
case LBRACE: case RBRACE: case LBRACKET: case RBRACKET:
case PLUS: case HYPHEN: case LCOMP: case RCOMP:
case DOT: case DOLLAR_SIGN: case SQUOTE:
resVal.emplace_back(static_cast<tok::type>(current), string(1, current), lineNumber); break;
[[likely]] case '\n': ++lineNumber; case ';': resVal.emplace_back(SEPARATOR, string(1, separatorCharacter), lineNumber);
[[likely]] case ' ': case '\t': [[unlikely]] case '\r': break;
default: {
tok::type type = getIdentifierCharType(current);
bool isTypeString = (type == STRING);
switch (type) {
case STRING: ++i;
case IDENTIFIER: case NUMBER: {
string formedString;
for (;i < in.size(); ++i) {
const tok::type currentCharType = getIdentifierCharType(in[i]);
const bool isString = currentCharType == STRING;
if (i == in.size() - 1 && not isString) throw tok::LexerException("A never ending string was found", lineNumber);
if ((currentCharType == type || isTypeString) && !isString) {
formedString += string(1, in[i]);
} else {
if (not isTypeString) --i;
resVal.emplace_back(type, formedString, lineNumber);
break;
}
}
}
case UNEXPECTED: break;
default: resVal.emplace_back(type, string(1, current), lineNumber);
}
break;
}
}
}
/* Test
for (const auto& it : resVal) {
cout << it << ' ' << it.toktype << '\n';
}
*/
return resVal;
}