Yerbacon/src/headers/parsing/Parser.hpp
Username404 85a3582612
Parser.hpp: Add basic parsing of if/elseif/else statements
Signed-off-by: Username404 <w.iron.zombie@gmail.com>
2023-08-06 19:49:35 +02:00

207 lines
12 KiB
C++

#ifndef YERBACON_PARSER_HPP
#define YERBACON_PARSER_HPP
#include <string>
#include "ParseComponents.hpp"
#include "../Yerbacon.hpp"
#include <concepts>
#include <span>
#include <memory>
#include <cmath>
#include "ReservedIdentifiers.hpp"
namespace Parser {
typedef Yerbacon::Exception ParsingException;
void error(const tok& token, const string& text, unsigned long line, const bool& quoteTokenText = false) {
throw ParsingException(quoteTokenText ? "\"" + token.toktext + "\"" + text : text, line);
}
inline void parsingError(
const tok& token, const string& text,
const bool& quoteTokenText = false
) { error(token, text, token.line, quoteTokenText); }
void filter_comma_list(vector<tok>& tokens) {
if (tokens.size() >= 2) {
for (auto iterator = tokens.begin(); iterator->toktype != tok::COMMA && iterator < tokens.end() - 1; ++iterator) {
const auto nextIterator = iterator + 1;
if (nextIterator->toktype == tok::COMMA) {
tokens.erase(nextIterator);
} else if (nextIterator->toktype != tok::DOT && iterator->toktype != tok::DOT) {
throw ParsingException("Missing comma after \"" + iterator->toktext + '"');
}
}
}
}
vector<tok> filter_comma_list(input_iterator auto begin, input_iterator auto end) {
vector tokens(begin, end);
filter_comma_list(tokens);
return tokens;
}
IS(ParseTree)
inline T parse(const input_iterator auto&, const input_iterator auto&);
IS(ParseTree)
T parse(const span<const tok>&& lexed) {
T parseTree;
using namespace StandardComponents;
using enum tok::type;
unsigned int i = 0;
const auto nextAre = [&i, &lexed]<convertible_to<tok::type> Y>(const initializer_list<Y>& nextValues) -> bool {
unsigned int j = 1;
for (const Y& nextValue: nextValues) {
if (cmp_less(lexed.size() - i, nextValues.size()) || lexed[i + j].toktype != nextValue) {
return false;
}
++j;
}
return true;
};
for (;i < lexed.size(); ++i) {
const bool hasNext = (i + 1) < lexed.size();
const tok& current = lexed[i], next = hasNext ? lexed[i + 1] : tok(UNEXPECTED, current.line);
try {
switch (current.toktype) {
case NUMBER: {
long double v = stoul(current.toktext);
if (i != 0 && lexed[i - 1].toktype == HYPHEN) v = -v;
types::Integer::precision_type p = 0;
if (nextAre({DOT, NUMBER})) {
i += 2;
const string& right = lexed[i].toktext;
p = min(static_cast<int>(right.size()), numeric_limits<long double>::digits10);
v += copysign(stold(right.substr(0, p)) / powl(deca::num, p), v);
}
parseTree << types::Integer(v, p);
break;
}
case STRING: parseTree << current.toktext; break;
case IDENTIFIER: {
using enum ReservedIdentifier;
if (current.toktext == ID(CLASS) || current.toktext == ID(STRUCT)) {
if (next.toktype == IDENTIFIER) {
parseTree << Class(next.toktext); ++i;
} else {
parsingError(next, hasNext ? " is not a valid class identifier" : "A class identifier is required", hasNext);
}
} else {
unsigned int parametersDistance = 0;
if (next.toktype == LPAR) {
const auto closing = find_corresponding(lexed.begin() + i + 2, lexed.end(), LPAR, RPAR);
parametersDistance = distance(lexed.begin() + i, closing);
i += parametersDistance;
}
const bool is_branch = current.toktext == ID(CONDITION_STATEMENT_BRANCH);
const bool is_conditional_branch = (not is_branch) and current.toktext == ID(CONDITION_STATEMENT_CONDITIONAL_BRANCH);
if (current.toktext == ID(CONDITION_STATEMENT) or is_branch or is_conditional_branch) {
if (is_branch or is_conditional_branch) {
const auto& last_id = parseTree.at(parseTree.size() - 1)->getId();
if (last_id != typeid(Condition::Statement)) {
parsingError(current, "unexpected \"" + current.toktext + "\" without preceding if statement");
}
}
if (*(lexed.begin() + i + 1) != tok::LBRACE) parsingError(*(lexed.begin() + i), "missing statement body");
const auto body_end = find_corresponding(lexed.begin() + i + 2, lexed.end(), LBRACE, RBRACE);
auto* statement = (is_branch or is_conditional_branch) ? dynamic_cast<Condition::Statement*>(parseTree[parseTree.size() - 1].get()) : nullptr;
auto& else_branches = statement->else_branches;
if ((is_conditional_branch or is_branch) and not statement->is_last_branch_conditional())
parsingError(current, "<- unexpected branch", true);
optional<Condition> condition;
// TODO Check that the condition is valid
if (next.toktype == LPAR and ((lexed.begin() + i - parametersDistance) + 2) < lexed.begin() + i)
condition.emplace(parse((lexed.begin() + i - parametersDistance) + 2, lexed.begin() + i));
if (condition.has_value() or is_branch) {
if (is_conditional_branch or is_branch) {
auto branch = (condition.has_value() ? (Condition::Statement::Branch(move(condition.value()))) : Condition::Statement::Branch());
branch.ParseTree::operator=(parse(lexed.begin() + i + 2, body_end));
else_branches << branch;
} else {
Condition::Statement new_statement (move(condition.value()));
new_statement.ParseTree::operator=(parse(lexed.begin() + i + 2, body_end));
parseTree << new_statement;
}
i += distance(lexed.begin() + i, body_end);
} else if (not is_branch) parsingError(current, "missing condition after an \"" + current.toktext + "\" statement");
continue;
} else if (nextAre({LCOMP, LCOMP, LBRACE})) {
Function function(current.toktext);
if (parametersDistance > 2)
function.parameters = parse(filter_comma_list(lexed.begin() + ((i + 2) - parametersDistance), lexed.begin() + i)); // TODO Parse parameters correctly
parseTree << function;
i += 2;
break;
} else i -= parametersDistance;
bool isFinalDefine = nextAre({TAG, DEFINE});
if (isFinalDefine || next.toktype == DEFINE) {
const optional previousDefinition = parseTree.template findReferenceByName<Define>(current.toktext);
if (previousDefinition.has_value() && (previousDefinition.value().get().final || isFinalDefine))
parsingError(current, previousDefinition->get().final ? " cannot be redefined as it is final" : " cannot be made final after it has been declared", true);
const unsigned increment = 2 + isFinalDefine;
const auto beginning = lexed.begin() + i + increment;
const auto end = find_if(beginning, lexed.end(), [&current](const tok& it){
return it.toktype == SEMICOLON || it.line != current.line;
});
parseTree << Define(isFinalDefine, current.toktext, parse(beginning, end));
i += 1 + isFinalDefine + distance(beginning, end);
} else {
const bool method = nextAre({DOT, IDENTIFIER, LPAR});
const bool property = not method && nextAre({DOT, IDENTIFIER});
const string name = property or method ? current.toktext + '.' + lexed[i + 2].toktext : current.toktext;
if (method or next.toktype == LPAR) {
parseTree << Call(name);
} else {
parseTree << Reference(name);
}
if (property or method) i += 2;
}
}
}
case SEMICOLON: break;
case LPAR: case LBRACE: case LBRACKET: {
const auto closingCharacter = find_corresponding(lexed.begin() + i + 1, lexed.end(), current.toktype, tok::inverseLCharacter(current.toktype));
vector<tok> subTokens(lexed.begin() + i + 1, closingCharacter);
if (current.toktype == LPAR || current.toktype == LBRACKET) filter_comma_list(subTokens);
if (not parseTree.empty()) {
try {
auto& previous = dynamic_cast<ParseTree&>(*parseTree.at(parseTree.size() - 1));
if (find_if(reverse_iterator(lexed.begin() + i), lexed.rend(), [](const tok& token){
return token.toktype != SEMICOLON;
})->toktype != *closingCharacter) {
previous = parse(subTokens);
i = distance(lexed.begin(), closingCharacter);
break;
}
} catch (const out_of_range&) {} catch (const bad_cast&) {}
}
}
default: parsingError(current, " \u27F5 Unexpected character", true);
}
} catch (const NamedIdentifier<true>::identifier_reserved_exception&) {
parsingError(current, " is a reserved identifier", true);
}
if (not parseTree.empty()) {
const auto& last = parseTree.cend() - 1;
const type_info& lastId = last->get()->getId();
const auto* last_identifier = dynamic_cast<NamedIdentifier<true>*>(last->get());
if (last_identifier != nullptr) {
if (lastId != typeid(Define) and
any_of(parseTree.cbegin(), last, [&last_identifier](const component_ptr& pointer){
try {
return dynamic_cast<NamedIdentifier<true>&>(*pointer).name == last_identifier->name;
} catch (const bad_cast&) {
return false;
}
}))
{ parsingError(current, " has already been defined previously", true); }
}
}
}
return parseTree;
}
template<derived_from<ParseTree> T>
inline T parse(const input_iterator auto& begin, const input_iterator auto& end) { return parse<T>(span(begin, end)); }
}
#endif //YERBACON_PARSER_HPP