From 4e08c0cf05490d79274c6511d3165468b74aea4a Mon Sep 17 00:00:00 2001 From: Username404 Date: Wed, 6 Jul 2022 15:35:16 +0200 Subject: [PATCH] Implement reserved identifiers Signed-off-by: Username404 --- CMakeLists.txt | 2 +- src/headers/parsing/ParseComponents.hpp | 29 ++-- src/headers/parsing/Parser.hpp | 151 ++++++++++---------- src/headers/parsing/ReservedIdentifiers.hpp | 25 ++++ src/headers/transpiler/Target.hpp | 3 +- 5 files changed, 123 insertions(+), 87 deletions(-) create mode 100644 src/headers/parsing/ReservedIdentifiers.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 40178de..9898617 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,7 +145,7 @@ set(CPACK_PACKAGE_INSTALL_DIRECTORY "${PROJECT_NAME} ${CMAKE_PROJECT_VERSION_MAJ set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${CMAKE_PROJECT_VERSION}-${TIME}") include_directories(${CMAKE_CURRENT_LIST_DIR}) -add_executable(${EXENAME} src/main.cpp ${CMAKE_CURRENT_BINARY_DIR}/processed/${PROJECT_NAME}.rc src/etc/filefuncs.cpp src/etc/lexer.cpp src/headers/lex.hpp src/headers/misc.hpp src/headers/parsing/ParseComponents.hpp src/headers/transpiler/Target.hpp src/headers/transpiler/implementations/Lua.hpp src/headers/transpiler/implementations/Js.hpp src/headers/transpiler/implementations/Py.hpp src/headers/parsing/Parser.hpp src/headers/arguments.hpp) +add_executable(${EXENAME} src/main.cpp ${CMAKE_CURRENT_BINARY_DIR}/processed/${PROJECT_NAME}.rc src/etc/filefuncs.cpp src/etc/lexer.cpp src/headers/lex.hpp src/headers/misc.hpp src/headers/parsing/ParseComponents.hpp src/headers/transpiler/Target.hpp src/headers/transpiler/implementations/Lua.hpp src/headers/transpiler/implementations/Js.hpp src/headers/transpiler/implementations/Py.hpp src/headers/parsing/Parser.hpp src/headers/arguments.hpp src/headers/parsing/ReservedIdentifiers.hpp) target_compile_definitions(${EXENAME} PRIVATE YBCON_VERSION="${CODENAME} ${PROJECT_VERSION}") target_precompile_headers(${EXENAME} PRIVATE src/headers/Yerbacon.hpp) if (Threads_FOUND) diff --git a/src/headers/parsing/ParseComponents.hpp b/src/headers/parsing/ParseComponents.hpp index 603052a..3919b82 100644 --- a/src/headers/parsing/ParseComponents.hpp +++ b/src/headers/parsing/ParseComponents.hpp @@ -9,6 +9,7 @@ #include #include #include +#include "ReservedIdentifiers.hpp" using namespace std; #include "../lex.hpp" @@ -19,16 +20,22 @@ struct ParseComponent { [[nodiscard]] inline const type_info& getId() const { return typeid(*this); } virtual ~ParseComponent() = default; }; - +template struct NamedIdentifier: virtual ParseComponent { + struct identifier_reserved_exception: exception {}; const string name; - explicit NamedIdentifier(string_view nameText): name(nameText) {} - NamedIdentifier() = default; + explicit NamedIdentifier(const string_view nameText): name(nameText) { + if (disallow_reserved and reserved(name)) { + throw identifier_reserved_exception(); + } + } + NamedIdentifier() = delete; }; typedef unique_ptr component_ptr; #define IS_PARSECOMPONENT IS(ParseComponent) +#define IS_IDENTIFIER enable_if_t, T> or is_base_of_v, T>, T> class ParseTree: public virtual ParseComponent { mutable vector subComponents; using array_type = decltype(subComponents); @@ -68,8 +75,8 @@ public: }); return filteredComponents; } - IS(NamedIdentifier) - optional> findReferenceByName(const string& name) const { + template + optional> findReferenceByName(const string& name) const { const vector identifiers = findById(); for (T* identifier: identifiers) { if (identifier->getId() == typeid(T) && identifier->name == name) { @@ -80,8 +87,8 @@ public: }; inline component_ptr& operator[](const unsigned int& index) const { return subComponents[index]; } inline component_ptr& at(const unsigned int& index) const { return subComponents.at(index); } - IS(NamedIdentifier) - inline auto operator[](const string& key) const { return findReferenceByName(key); } + template + inline auto operator[](const string& key) const { return findReferenceByName(key); } IS_PARSECOMPONENT inline void add(const T& component) { addComponent(component); } IS_PARSECOMPONENT inline void addAll(const initializer_list& components) { addAllComponents(components); } IS_PARSECOMPONENT inline ParseTree& operator<<(const T& component) { add(component); return *this; } @@ -96,14 +103,14 @@ public: #undef IS_PARSECOMPONENT namespace StandardComponents { - struct Define: NamedIdentifier { + struct Define: NamedIdentifier { const bool final; ParseTree content; explicit Define(const bool& isFinal, string_view nameText, ParseTree&& content): NamedIdentifier(nameText), final(isFinal), content(move(content)) {} explicit Define(string_view nameText, ParseTree&& content): Define(false, nameText, move(content)) {} Define(Define&& define) noexcept: Define(define.final, define.name, move(define.content)) {} }; - struct Reference: NamedIdentifier { + struct Reference: NamedIdentifier { using NamedIdentifier::NamedIdentifier; }; namespace types { @@ -122,11 +129,11 @@ namespace StandardComponents { using Reference::Reference; }; - struct Function: NamedIdentifier, ParseTree { + struct Function: NamedIdentifier, ParseTree { ParseTree parameters; using NamedIdentifier::NamedIdentifier; }; - struct Class: NamedIdentifier, ParseTree { + struct Class: NamedIdentifier, ParseTree { using NamedIdentifier::NamedIdentifier; }; } diff --git a/src/headers/parsing/Parser.hpp b/src/headers/parsing/Parser.hpp index 1d7f58c..86f45a1 100644 --- a/src/headers/parsing/Parser.hpp +++ b/src/headers/parsing/Parser.hpp @@ -8,6 +8,7 @@ #include #include #include +#include "ReservedIdentifiers.hpp" namespace Parser { typedef Yerbacon::Exception ParsingException; @@ -58,92 +59,94 @@ namespace Parser { const bool hasNext = (i + 1) < lexed.size(); const tok& current = lexed[i], next = hasNext ? lexed[i + 1] : tok(UNEXPECTED, current.line); - switch (current.toktype) { - case NUMBER: { - long double v = stoul(current.toktext); - if (i != 0 && lexed[i - 1].toktype == HYPHEN) v = -v; - types::Integer::precision_type p = 0; - if (nextAre({DOT, NUMBER})) { - i += 2; - const string& right = lexed[i].toktext; - p = min(static_cast(right.size()), numeric_limits::digits10); - v += copysign(stold(right.substr(0, p)) / powl(deca::num, p), v); - } - parseTree << types::Integer(v, p); - break; - } - case STRING: parseTree << current.toktext; break; - case IDENTIFIER: { - if (current.toktext == "class" || current.toktext == "structure") { - if (next.toktype == IDENTIFIER) { - parseTree << Class(next.toktext); ++i; - } else { - parsingError(next, hasNext ? " is not a valid class identifier" : "A class identifier is required", hasNext); - } - } else { - unsigned int parametersDistance = 0; - if (next.toktype == LPAR) { - const auto closing = find_if(lexed.begin() + i, lexed.end(), [](const tok& token){ return token.toktype == RPAR; }); - parametersDistance = distance(lexed.begin() + i, closing); - i += parametersDistance; - } - if (nextAre({LCOMP, LCOMP, LBRACE})) { - Function function(current.toktext); - if (parametersDistance > 2) - function.parameters = parse(filter_comma_list(lexed.begin() + ((i + 2) - parametersDistance), lexed.begin() + i)); - parseTree << function; + try { + switch (current.toktype) { + case NUMBER: { + long double v = stoul(current.toktext); + if (i != 0 && lexed[i - 1].toktype == HYPHEN) v = -v; + types::Integer::precision_type p = 0; + if (nextAre({DOT, NUMBER})) { i += 2; - break; - } else i -= parametersDistance; + const string& right = lexed[i].toktext; + p = min(static_cast(right.size()), numeric_limits::digits10); + v += copysign(stold(right.substr(0, p)) / powl(deca::num, p), v); + } + parseTree << types::Integer(v, p); + break; + } + case STRING: parseTree << current.toktext; break; + case IDENTIFIER: { + using enum ReservedIdentifier; + if (current.toktext == ID(CLASS) || current.toktext == ID(STRUCT)) { + if (next.toktype == IDENTIFIER) { + parseTree << Class(next.toktext); ++i; + } else { + parsingError(next, hasNext ? " is not a valid class identifier" : "A class identifier is required", hasNext); + } + } else { + unsigned int parametersDistance = 0; + if (next.toktype == LPAR) { + const auto closing = find_if(lexed.begin() + i, lexed.end(), [](const tok& token){ return token.toktype == RPAR; }); + parametersDistance = distance(lexed.begin() + i, closing); + i += parametersDistance; + } + if (nextAre({LCOMP, LCOMP, LBRACE})) { + Function function(current.toktext); + if (parametersDistance > 2) + function.parameters = parse(filter_comma_list(lexed.begin() + ((i + 2) - parametersDistance), lexed.begin() + i)); + parseTree << function; + i += 2; + break; + } else i -= parametersDistance; - bool isFinalDefine = nextAre({TAG, DEFINE}); - if (isFinalDefine || next.toktype == DEFINE) { - const optional previousDefinition = parseTree.template findReferenceByName(current.toktext); - if (previousDefinition.has_value() && (previousDefinition.value().get().final || isFinalDefine)) - parsingError(current, previousDefinition->get().final ? " cannot be redefined as it is final" : " cannot be made final after it has been declared", true); - const unsigned increment = 2 + isFinalDefine; - const auto beginning = lexed.begin() + i + increment; - const auto end = find_if(beginning, lexed.end(), [¤t](const tok& it){ - return it.toktype == SEMICOLON || it.line != current.line; - }); - parseTree << Define(isFinalDefine, current.toktext, parse(beginning, end)); - i += 1 + isFinalDefine + distance(beginning, end); - } else if (next.toktype == LPAR) { - parseTree << Call(current.toktext); - } else - parseTree << Reference(current.toktext); + bool isFinalDefine = nextAre({TAG, DEFINE}); + if (isFinalDefine || next.toktype == DEFINE) { + const optional previousDefinition = parseTree.template findReferenceByName(current.toktext); + if (previousDefinition.has_value() && (previousDefinition.value().get().final || isFinalDefine)) + parsingError(current, previousDefinition->get().final ? " cannot be redefined as it is final" : " cannot be made final after it has been declared", true); + const unsigned increment = 2 + isFinalDefine; + const auto beginning = lexed.begin() + i + increment; + const auto end = find_if(beginning, lexed.end(), [¤t](const tok& it){ + return it.toktype == SEMICOLON || it.line != current.line; + }); + parseTree << Define(isFinalDefine, current.toktext, parse(beginning, end)); + i += 1 + isFinalDefine + distance(beginning, end); + } else if (next.toktype == LPAR) { + parseTree << Call(current.toktext); + } else parseTree << Reference(current.toktext); } } - case SEMICOLON: break; - case LPAR: case LBRACE: case LBRACKET: { - const auto closingCharacter = find_corresponding(lexed.begin() + i + 1, lexed.end(), current.toktype, tok::inverseLCharacter(current.toktype)); - vector subTokens(lexed.begin() + i + 1, closingCharacter); - if (current.toktype == LPAR || current.toktype == LBRACKET) filter_comma_list(subTokens); - if (not parseTree.empty()) { - try { - auto& previous = dynamic_cast(*parseTree.at(parseTree.size() - 1)); - if (find_if(reverse_iterator(lexed.begin() + i), lexed.rend(), [](const tok& token){ - return token.toktype != SEMICOLON; - })->toktype != *closingCharacter) { - previous = parse(subTokens); - i = distance(lexed.begin(), closingCharacter); - break; - } - } catch (const out_of_range&) {} catch (const bad_cast&) {} + case SEMICOLON: break; + case LPAR: case LBRACE: case LBRACKET: { + const auto closingCharacter = find_corresponding(lexed.begin() + i + 1, lexed.end(), current.toktype, tok::inverseLCharacter(current.toktype)); + vector subTokens(lexed.begin() + i + 1, closingCharacter); + if (current.toktype == LPAR || current.toktype == LBRACKET) filter_comma_list(subTokens); + if (not parseTree.empty()) { + try { + auto& previous = dynamic_cast(*parseTree.at(parseTree.size() - 1)); + if (find_if(reverse_iterator(lexed.begin() + i), lexed.rend(), [](const tok& token){ + return token.toktype != SEMICOLON; + })->toktype != *closingCharacter) { + previous = parse(subTokens); + i = distance(lexed.begin(), closingCharacter); + break; + } + } catch (const out_of_range&) {} catch (const bad_cast&) {} + } } + default: parsingError(current, " \u27F5 Unexpected character", true); } - default: parsingError(current, " \u27F5 Unexpected character", true); + } catch (const NamedIdentifier::identifier_reserved_exception&) { + parsingError(current, " is a reserved identifier", true); } try { const auto& last = parseTree.cend() - 1; const type_info& lastId = last->get()->getId(); - const auto& last_identifier = dynamic_cast(**last); - if (lastId != typeid(Reference) && - lastId != typeid(Call) && - lastId != typeid(Define) and + const auto& last_identifier = dynamic_cast&>(**last); + if (lastId != typeid(Define) and any_of(parseTree.cbegin(), last, [&last_identifier](const component_ptr& pointer){ try { - return dynamic_cast(*pointer).name == last_identifier.name; + return dynamic_cast&>(*pointer).name == last_identifier.name; } catch (const bad_cast&) { return false; } diff --git a/src/headers/parsing/ReservedIdentifiers.hpp b/src/headers/parsing/ReservedIdentifiers.hpp new file mode 100644 index 0000000..0801838 --- /dev/null +++ b/src/headers/parsing/ReservedIdentifiers.hpp @@ -0,0 +1,25 @@ +#ifndef YERBACON_RESERVEDIDENTIFIERS_HPP +#define YERBACON_RESERVEDIDENTIFIERS_HPP + +#include +#include +#include "../Yerbacon.hpp" +#include + +static const array identifiers { + "class", "structure", "print", "print_line" +}; + +enum class ReservedIdentifier: size_t { + CLASS, STRUCT, PRINT_FUNCTION, PRINT_LINE_FUNCTION +}; + +inline const char* ID(const ReservedIdentifier& identifier) { + return identifiers.at(reinterpret_cast(identifier)); +} + +bool reserved(const string_view words) { + return find(identifiers.cbegin(), identifiers.cend(), words) != identifiers.cend(); +} + +#endif //YERBACON_RESERVEDIDENTIFIERS_HPP diff --git a/src/headers/transpiler/Target.hpp b/src/headers/transpiler/Target.hpp index 9e84a84..434a341 100644 --- a/src/headers/transpiler/Target.hpp +++ b/src/headers/transpiler/Target.hpp @@ -132,7 +132,8 @@ public: make_task(StandardComponents::Reference, output << parseComponent.name;), make_task(StandardComponents::Call, const auto print_functions = printFunctions(); - output << ((parseComponent.name == "print") ? print_functions.first : (parseComponent.name == "print_line") ? print_functions.second : parseComponent.name) << '('; + using enum ReservedIdentifier; + output << ((parseComponent.name == ID(PRINT_FUNCTION)) ? print_functions.first : (parseComponent.name == ID(PRINT_LINE_FUNCTION)) ? print_functions.second : parseComponent.name) << '('; separate_transpileTree(parseComponent, ", "); output << ')'; ),