#ifndef YERBACON_PARSER_HPP #define YERBACON_PARSER_HPP #include #include "ParseComponents.hpp" #include "../Yerbacon.hpp" #include #include #include #include #include "ReservedIdentifiers.hpp" namespace Parser { typedef Yerbacon::Exception ParsingException; void error(const tok& token, const string& text, unsigned long line, const bool& quoteTokenText = false) { throw ParsingException(quoteTokenText ? "\"" + token.toktext + "\"" + text : text, line); } inline void parsingError( const tok& token, const string& text, const bool& quoteTokenText = false ) { error(token, text, token.line, quoteTokenText); } void filter_comma_list(vector& tokens) { if (tokens.size() >= 2) { for (auto iterator = tokens.begin(); iterator->toktype != tok::COMMA && iterator < tokens.end() - 1; ++iterator) { const auto nextIterator = iterator + 1; if (nextIterator->toktype == tok::COMMA) { tokens.erase(nextIterator); } else if (nextIterator->toktype != tok::DOT && iterator->toktype != tok::DOT) { throw ParsingException("Missing comma after \"" + iterator->toktext + '"'); } } } } vector filter_comma_list(input_iterator auto begin, input_iterator auto end) { vector tokens(begin, end); filter_comma_list(tokens); return tokens; } IS(ParseTree) inline T parse(const input_iterator auto&, const input_iterator auto&); IS(ParseTree) T parse(const span&& lexed) { T parseTree; using namespace StandardComponents; using enum tok::type; unsigned int i = 0; const auto nextAre = [&i, &lexed] Y>(const initializer_list& nextValues) -> bool { unsigned int j = 1; for (const Y& nextValue: nextValues) { if (cmp_less(lexed.size() - i, nextValues.size()) || lexed[i + j].toktype != nextValue) { return false; } ++j; } return true; }; for (;i < lexed.size(); ++i) { const bool hasNext = (i + 1) < lexed.size(); const tok& current = lexed[i], next = hasNext ? lexed[i + 1] : tok(UNEXPECTED, current.line); try { switch (current.toktype) { case NUMBER: { long double v = stoul(current.toktext); if (i != 0 && lexed[i - 1].toktype == HYPHEN) v = -v; types::Integer::precision_type p = 0; if (nextAre({DOT, NUMBER})) { i += 2; const string& right = lexed[i].toktext; p = min(static_cast(right.size()), numeric_limits::digits10); v += copysign(stold(right.substr(0, p)) / powl(deca::num, p), v); } parseTree << types::Integer(v, p); break; } case STRING: parseTree << current.toktext; break; case IDENTIFIER: { using enum ReservedIdentifier; if (current.toktext == ID(CLASS) || current.toktext == ID(STRUCT)) { if (next.toktype == IDENTIFIER) { parseTree << Class(next.toktext); ++i; } else { parsingError(next, hasNext ? " is not a valid class identifier" : "A class identifier is required", hasNext); } } else { unsigned int parametersDistance = 0; if (next.toktype == LPAR) { const auto closing = find_corresponding(lexed.begin() + i + 2, lexed.end(), LPAR, RPAR); parametersDistance = distance(lexed.begin() + i, closing); i += parametersDistance; } const bool is_branch = current.toktext == ID(CONDITION_STATEMENT_BRANCH); const bool is_conditional_branch = (not is_branch) and current.toktext == ID(CONDITION_STATEMENT_CONDITIONAL_BRANCH); if (current.toktext == ID(CONDITION_STATEMENT) or is_branch or is_conditional_branch) { if (is_branch or is_conditional_branch) { const auto& last_id = parseTree.at(parseTree.size() - 1)->getId(); if (last_id != typeid(Condition::Statement)) { parsingError(current, "unexpected \"" + current.toktext + "\" without preceding if statement"); } } if (*(lexed.begin() + i + 1) != tok::LBRACE) parsingError(*(lexed.begin() + i), "missing statement body"); const auto body_end = find_corresponding(lexed.begin() + i + 2, lexed.end(), LBRACE, RBRACE); auto* statement = (is_branch or is_conditional_branch) ? dynamic_cast(parseTree[parseTree.size() - 1].get()) : nullptr; auto& else_branches = statement->else_branches; if ((is_conditional_branch or is_branch) and not statement->is_last_branch_conditional()) parsingError(current, "<- unexpected branch", true); optional condition; // TODO Check that the condition is valid if (next.toktype == LPAR and ((lexed.begin() + i - parametersDistance) + 2) < lexed.begin() + i) condition.emplace(parse((lexed.begin() + i - parametersDistance) + 2, lexed.begin() + i)); if (condition.has_value() or is_branch) { if (is_conditional_branch or is_branch) { auto branch = (condition.has_value() ? (Condition::Statement::Branch(move(condition.value()))) : Condition::Statement::Branch()); branch.ParseTree::operator=(parse(lexed.begin() + i + 2, body_end)); else_branches << branch; } else { Condition::Statement new_statement (move(condition.value())); new_statement.ParseTree::operator=(parse(lexed.begin() + i + 2, body_end)); parseTree << new_statement; } i += distance(lexed.begin() + i, body_end); } else if (not is_branch) parsingError(current, "missing condition after an \"" + current.toktext + "\" statement"); continue; } else if (nextAre({LCOMP, LCOMP, LBRACE})) { Function function(current.toktext); if (parametersDistance > 2) function.parameters = parse(filter_comma_list(lexed.begin() + ((i + 2) - parametersDistance), lexed.begin() + i)); // TODO Parse parameters correctly parseTree << function; i += 2; break; } else i -= parametersDistance; bool isFinalDefine = nextAre({TAG, DEFINE}); if (isFinalDefine || next.toktype == DEFINE) { const optional previousDefinition = parseTree.template findReferenceByName(current.toktext); if (previousDefinition.has_value() && (previousDefinition.value().get().final || isFinalDefine)) parsingError(current, previousDefinition->get().final ? " cannot be redefined as it is final" : " cannot be made final after it has been declared", true); const unsigned increment = 2 + isFinalDefine; const auto beginning = lexed.begin() + i + increment; const auto end = find_if(beginning, lexed.end(), [¤t](const tok& it){ return it.toktype == SEMICOLON || it.line != current.line; }); parseTree << Define(isFinalDefine, current.toktext, parse(beginning, end)); i += 1 + isFinalDefine + distance(beginning, end); } else { const bool method = nextAre({DOT, IDENTIFIER, LPAR}); const bool property = not method && nextAre({DOT, IDENTIFIER}); const string name = property or method ? current.toktext + '.' + lexed[i + 2].toktext : current.toktext; if (method or next.toktype == LPAR) { parseTree << Call(name); } else { parseTree << Reference(name); } if (property or method) i += 2; } } } case SEMICOLON: break; case LPAR: case LBRACE: case LBRACKET: { const auto closingCharacter = find_corresponding(lexed.begin() + i + 1, lexed.end(), current.toktype, tok::inverseLCharacter(current.toktype)); vector subTokens(lexed.begin() + i + 1, closingCharacter); if (current.toktype == LPAR || current.toktype == LBRACKET) filter_comma_list(subTokens); if (not parseTree.empty()) { try { auto& previous = dynamic_cast(*parseTree.at(parseTree.size() - 1)); if (find_if(reverse_iterator(lexed.begin() + i), lexed.rend(), [](const tok& token){ return token.toktype != SEMICOLON; })->toktype != *closingCharacter) { previous = parse(subTokens); i = distance(lexed.begin(), closingCharacter); break; } } catch (const out_of_range&) {} catch (const bad_cast&) {} } } default: parsingError(current, " \u27F5 Unexpected character", true); } } catch (const NamedIdentifier::identifier_reserved_exception&) { parsingError(current, " is a reserved identifier", true); } if (not parseTree.empty()) { const auto& last = parseTree.cend() - 1; const type_info& lastId = last->get()->getId(); const auto* last_identifier = dynamic_cast*>(last->get()); if (last_identifier != nullptr) { if (lastId != typeid(Define) and any_of(parseTree.cbegin(), last, [&last_identifier](const component_ptr& pointer){ try { return dynamic_cast&>(*pointer).name == last_identifier->name; } catch (const bad_cast&) { return false; } })) { parsingError(current, " has already been defined previously", true); } } } } return parseTree; } template T> inline T parse(const input_iterator auto& begin, const input_iterator auto& end) { return parse(span(begin, end)); } } #endif //YERBACON_PARSER_HPP