Yerbacon/src/headers/parsing/Parser.hpp

#ifndef YERBACON_PARSER_HPP
#define YERBACON_PARSER_HPP

#include <string>
#include "ParseComponents.hpp"
#include "../Yerbacon.hpp"
#include <concepts>
#include <span>
#include <memory>

namespace Parser {
    typedef Yerbacon::Exception ParsingException;
    void error(const tok& token, const string& text, unsigned long line, const bool& quoteTokenText = false) {
        throw ParsingException(quoteTokenText ? "\"" + token.toktext + "\"" + text : text, line);
    }
    inline void parsingError(
        const tok& token, const string& text,
        const bool& quoteTokenText = false
    ) { error(token, text, token.line, quoteTokenText); }

    IS(ParseTree)
    inline T parse(const input_iterator auto&, const input_iterator auto&);

    IS(ParseTree)
    T parse(const span<const tok>&& lexed) {
        T parseTree;
        using namespace StandardComponents;
        using enum tok::type;
        unsigned int i = 0;
        const auto nextAre = [&i, &lexed]<convertible_to<tok::type> Y>(const initializer_list<Y>& nextValues) -> bool {
            unsigned int j = 1;
            for (const Y& nextValue: nextValues) {
                if (cmp_less(lexed.size() - i, nextValues.size()) || lexed[i + j].toktype != nextValue) {
                    return false;
                }
                ++j;
            }
            return true;
        };
        for (;i < lexed.size(); ++i) {
            const bool hasNext = (i + 1) < lexed.size();
            const tok& current = lexed[i], next = hasNext ? lexed[i + 1] : tok(UNEXPECTED, current.line);

            switch (current.toktype) {
                case STRING: parseTree << types::String(current.toktext.data()); break;
                case IDENTIFIER: {
                    if (current.toktext == "class" || current.toktext == "structure") {
                        if (next.toktype == IDENTIFIER) {
                            parseTree << Class(next.toktext); ++i;
                        } else {
                            parsingError(next, hasNext ? " is not a valid class identifier" : "A class identifier is required", hasNext);
                        }
                    } else {
                        unsigned int parametersDistance = 0;
                        if (next.toktype == LPAR) {
                            const auto it = find_if(lexed.begin() + i, lexed.end(), [](const tok& token){ return token.toktype == RPAR; });
                            parametersDistance = distance(lexed.begin() + i, it);
                            i += parametersDistance;
                        }
                        if (nextAre({LCOMP, LCOMP, LBRACE})) {
                            Function function(current.toktext);
                            if (parametersDistance > 2) {
                                function.parameters = parse(lexed.begin() + ((i + 2) - parametersDistance), lexed.begin() + i);
                            }
                            parseTree << function;
                            i += 2;
                            break;
                        } else i -= parametersDistance;

                        bool isFinalDefine = nextAre({TAG, DEFINE});
                        if (isFinalDefine || next.toktype == DEFINE) {
                            const optional previousDefinition = parseTree.template findReferenceByName<Define>(current.toktext);
                            if (previousDefinition.has_value()) {
                                if (previousDefinition.value().get().final || isFinalDefine) {
                                    parsingError(current, previousDefinition->get().final ? " cannot be redefined as it is final" : " cannot be made final after it has been declared", true);
                                }
                            }
                            const unsigned increment = 2 + isFinalDefine;
                            const auto beginning = lexed.begin() + i + increment;
                            parseTree << Define(isFinalDefine, current.toktext, parse( // TODO Find another way of choosing the tokens to parse
                                beginning, find_if(beginning, lexed.end(), [&current](const tok& it){
                                    return it.line != current.line;
                                })
                            ));
                            i += increment;
                        } else if (next.toktype == LPAR) {
                            parseTree << Call(current.toktext);
                        } else
                            parseTree << Reference(current.toktext);
                        }
                    }
                    break;
                case LPAR: case LBRACE: case LBRACKET: {
                    const auto closingCharacter = find_corresponding(lexed.begin() + i + 1, lexed.end(), current.toktype, tok::inverseLCharacter(current.toktype));
                    vector<tok> subTokens(lexed.begin() + i + 1, closingCharacter);
                    if (current.toktype == LPAR || current.toktype == LBRACKET) {
                        if (subTokens.size() >= 2 && subTokens[1].toktype != RPAR) {
                            for (auto iterator = subTokens.cbegin(); iterator < (subTokens.cend() - 1); ++iterator) {
                                const auto nextIterator = iterator + 1;
                                if (nextIterator->toktype == COMMA) {
                                    subTokens.erase(nextIterator);
                                } else throw ParsingException("Missing comma after \"" + iterator->toktext + '"');
                            }
                        }
                    }
                    const component_ptr& previous = parseTree.at(parseTree.size() - 1);
                    if (current.toktype == LPAR) {
                        try {
                            dynamic_cast<Call&>(*previous).ParseTree::operator=(parse(subTokens));
                        } catch (const bad_cast&) {
                            parsingError(current, "Unexpected parenthesis");
                        }
                    } else if (current.toktype == LBRACE) {
                        const type_info& previous_id = previous->getId();
                        if (previous_id == typeid(Function)) {
                            dynamic_cast<Function&>(*previous).ParseTree::operator=(parse(subTokens));
                        } else if (previous_id == typeid(Class)) {
                            dynamic_cast<Class&>(*previous).body = parse(subTokens);
                        }
                    } else parseTree << parse(subTokens);
                    i = distance(lexed.begin(), closingCharacter);
                    break;
                }
                case RPAR: case RBRACE: case RBRACKET: parsingError(current, " \u27F5 Unexpected character", true);
                default: break;
            }
        }
        return parseTree;
    }
    template<derived_from<ParseTree> T>
    inline T parse(const input_iterator auto& begin, const input_iterator auto& end) { return parse(span(begin, end)); }
}

#endif //YERBACON_PARSER_HPP