From f3d03adeaddeda1c0e8c51ac2a3a32515efb4a26 Mon Sep 17 00:00:00 2001 From: Username404 Date: Sun, 13 Mar 2022 15:11:07 +0100 Subject: [PATCH] Add functions, indentation, better variables parsing and more Signed-off-by: Username404 --- src/etc/lexer.cpp | 10 ++- src/headers/lex.hpp | 14 ++++ src/headers/parsing/ParseComponents.hpp | 16 +++- src/headers/parsing/Parser.hpp | 76 +++++++++++++------ src/headers/transpiler/Target.hpp | 66 +++++++++------- src/headers/transpiler/implementations/Js.hpp | 12 ++- .../transpiler/implementations/Lua.hpp | 11 ++- src/headers/transpiler/implementations/Py.hpp | 10 ++- src/main.cpp | 5 +- 9 files changed, 155 insertions(+), 65 deletions(-) diff --git a/src/etc/lexer.cpp b/src/etc/lexer.cpp index 4a048b0..22ffbd2 100644 --- a/src/etc/lexer.cpp +++ b/src/etc/lexer.cpp @@ -17,6 +17,12 @@ vector lex(const string& in) const char& current = in[i]; switch (current) { + case LPAR: case LBRACE: case LBRACKET: { + const auto reversedCharacter = static_cast(tok::inverseLCharacter(current)); + if (find_corresponding(in.cbegin() + i + 1, in.cend(), current, reversedCharacter) != in.cend()) { + goto insertToken; + } else throw tok::LexerException(string("Missing \"") + reversedCharacter + "\" character", lineNumber); + } case DIVIDE: if (in[i + 1] == current) i += 2; else goto insertToken; case TAG: { if (current == TAG && in[i + 1] == DEFINE) { // See the IDENTIFIER case in Parser.hpp @@ -42,8 +48,8 @@ vector lex(const string& in) break; } else goto insertToken; } - case DEFINE: case LPAR: case RPAR: case COMMA: - case LBRACE: case RBRACE: case LBRACKET: case RBRACKET: + case DEFINE: case RPAR: case COMMA: + case RBRACE: case RBRACKET: case PLUS: case HYPHEN: case LCOMP: case RCOMP: case DOT: case DOLLAR_SIGN: case SQUOTE: insertToken: resVal.emplace_back(static_cast(current), lineNumber); diff --git a/src/headers/lex.hpp b/src/headers/lex.hpp index 41a8334..b578b84 100644 --- a/src/headers/lex.hpp +++ b/src/headers/lex.hpp @@ -5,6 +5,7 @@ #include "Yerbacon.hpp" #include #include +#include struct tok { typedef Yerbacon::Exception LexerException; @@ -29,6 +30,19 @@ struct tok { operator char() const { return static_cast(toktype); } friend std::ostream& operator<<(std::ostream& output, const tok& it) { return output << it.toktext; } }; + +auto find_corresponding(std::input_iterator auto begin, std::input_iterator auto end, const unsigned char open, const unsigned char close) { + unsigned short occurrences = 1; + return std::find_if(begin, end, [&open, &close, &occurrences](const char& it){ + if (it == open) { + ++occurrences; + } else if (it == close) { + return --occurrences == 0; + } + return false; + }); +} + std::vector lex(const std::string& in); #endif //YERBACON_TEST_H \ No newline at end of file diff --git a/src/headers/parsing/ParseComponents.hpp b/src/headers/parsing/ParseComponents.hpp index f4e5877..5130c06 100644 --- a/src/headers/parsing/ParseComponents.hpp +++ b/src/headers/parsing/ParseComponents.hpp @@ -23,6 +23,7 @@ struct ParseComponent { struct NamedIdentifier: virtual ParseComponent { const string name; explicit NamedIdentifier(string_view nameText): name(nameText) {} + NamedIdentifier() = default; }; typedef unique_ptr component_ptr; @@ -95,8 +96,10 @@ public: namespace StandardComponents { struct Define: NamedIdentifier { const bool final; - explicit Define(const bool& isFinal, string_view nameText): NamedIdentifier(nameText), final(isFinal) {} - explicit Define(string_view nameText): Define(false, nameText) {} + ParseTree content; + explicit Define(const bool& isFinal, string_view nameText, ParseTree&& content): NamedIdentifier(nameText), final(isFinal), content(move(content)) {} + explicit Define(string_view nameText, ParseTree&& content): Define(false, nameText, move(content)) {} + Define(Define&& define) noexcept: Define(define.final, define.name, move(define.content)) {} }; struct Reference: NamedIdentifier { using NamedIdentifier::NamedIdentifier; @@ -107,9 +110,16 @@ namespace StandardComponents { explicit String(const char* string): content(string) {} }; } - struct Call: ParseTree {}; + struct Call: ParseTree, Reference { + using Reference::Reference; + }; + struct Function: NamedIdentifier, ParseTree { + ParseTree parameters; + using NamedIdentifier::NamedIdentifier; + }; struct Class: NamedIdentifier { + ParseTree body; using NamedIdentifier::NamedIdentifier; }; } diff --git a/src/headers/parsing/Parser.hpp b/src/headers/parsing/Parser.hpp index 2ea29ed..7b68e15 100644 --- a/src/headers/parsing/Parser.hpp +++ b/src/headers/parsing/Parser.hpp @@ -6,6 +6,7 @@ #include "../Yerbacon.hpp" #include #include +#include namespace Parser { typedef Yerbacon::Exception ParsingException; @@ -50,6 +51,22 @@ namespace Parser { parsingError(next, hasNext ? " is not a valid class identifier" : "A class identifier is required", hasNext); } } else { + unsigned int parametersDistance = 0; + if (next.toktype == LPAR) { + const auto it = find_if(lexed.begin() + i, lexed.end(), [](const tok& token){ return token.toktype == RPAR; }); + parametersDistance = distance(lexed.begin() + i, it); + i += parametersDistance; + } + if (nextAre({LCOMP, LCOMP, LBRACE})) { + Function function(current.toktext); + if (parametersDistance > 2) { + function.parameters = parse(lexed.begin() + ((i + 2) - parametersDistance), lexed.begin() + i); + } + parseTree << function; + i += 2; + break; + } else i -= parametersDistance; + bool isFinalDefine = nextAre({TAG, DEFINE}); if (isFinalDefine || next.toktype == DEFINE) { const optional previousDefinition = parseTree.template findReferenceByName(current.toktext); @@ -58,39 +75,48 @@ namespace Parser { parsingError(current, previousDefinition->get().final ? " cannot be redefined as it is final" : " cannot be made final after it has been declared", true); } } - parseTree << Define(isFinalDefine, current.toktext); - i += 1 + isFinalDefine; - } else { + parseTree << Define(isFinalDefine, current.toktext, parse( // TODO Find another way of choosing the tokens to parse + lexed.begin() + i + 2 + isFinalDefine, find_if(lexed.begin() + i + 2 + isFinalDefine, lexed.end(), [¤t](const tok& it){ + return it.line != current.line; + }) + )); + i += 2 + isFinalDefine; + } else if (next.toktype == '(') { + parseTree << Call(current.toktext); + } else parseTree << Reference(current.toktext); } } break; - } case LPAR: case LBRACE: case LBRACKET: { - const auto inverseCharacter = tok::inverseLCharacter(current.toktype); - const auto closingCharacter = find_if(lexed.begin() + i, lexed.end(), [&inverseCharacter](const tok& it){ - return it.toktype == inverseCharacter; - }); - if (closingCharacter != lexed.end()) { - vector subTokens(lexed.begin() + i + 1, closingCharacter); - if (current.toktype == LPAR || current.toktype == LBRACKET) { - if (subTokens.size() >= 2 && subTokens[1].toktype != RPAR) { - for (auto iterator = subTokens.cbegin(); iterator < (subTokens.cend() - 1); ++iterator) { - const auto nextIterator = iterator + 1; - if (nextIterator->toktype == COMMA) { - subTokens.erase(nextIterator); - } else throw ParsingException("Missing comma after \"" + iterator->toktext + '"'); - } + const auto closingCharacter = find_corresponding(lexed.begin() + i + 1, lexed.end(), current.toktype, tok::inverseLCharacter(current.toktype)); + vector subTokens(lexed.begin() + i + 1, closingCharacter); + if (current.toktype == LPAR || current.toktype == LBRACKET) { + if (subTokens.size() >= 2 && subTokens[1].toktype != RPAR) { + for (auto iterator = subTokens.cbegin(); iterator < (subTokens.cend() - 1); ++iterator) { + const auto nextIterator = iterator + 1; + if (nextIterator->toktype == COMMA) { + subTokens.erase(nextIterator); + } else throw ParsingException("Missing comma after \"" + iterator->toktext + '"'); } } - switch (current.toktype) { - case LPAR: parseTree << parse(subTokens); break; - case LBRACE: // TODO Add structures for class/function bodies - case LBRACKET: - default: parseTree << parse(subTokens); break; + } + const component_ptr& previous = parseTree.at(parseTree.size() - 1); + if (current.toktype == LPAR) { + try { + dynamic_cast(*previous).ParseTree::operator=(parse(subTokens)); + } catch (const bad_cast&) { + parsingError(current, "Unexpected parenthesis"); } - i = distance(lexed.begin(), closingCharacter); - } else parsingError(current, string(" is missing a closing \"").append(1, inverseCharacter) + '"', true); + } else if (current.toktype == LBRACE) { + const type_info& previous_id = previous->getId(); + if (previous_id == typeid(Function)) { + dynamic_cast(*previous).ParseTree::operator=(parse(subTokens)); + } else if (previous_id == typeid(Class)) { + dynamic_cast(*previous).body = parse(subTokens); + } + } else parseTree << parse(subTokens); + i = distance(lexed.begin(), closingCharacter); break; } case RPAR: case RBRACE: case RBRACKET: parsingError(current, " \u27F5 Unexpected character", true); diff --git a/src/headers/transpiler/Target.hpp b/src/headers/transpiler/Target.hpp index 235b3e0..eb758e9 100644 --- a/src/headers/transpiler/Target.hpp +++ b/src/headers/transpiler/Target.hpp @@ -74,33 +74,58 @@ protected: typedef pair print_functions_pair; virtual unordered_task_map getTaskMap() = 0; virtual print_functions_pair printFunctions() = 0; - void transpileTree(const derived_from auto& parseTree) { + IS(ParseTree) + void transpileTree(const T& parseTree, const unsigned short& indentationLevel = 0, const function&& postInsertionFunction = [](auto&){}) { if (parseTree.size() > 0) { - unsigned int subIndex = 0; - for (auto pointer_iterator = parseTree.cbegin(); pointer_iterator < parseTree.cend(); ++pointer_iterator) { - getTaskMapInstance().at(pointer_iterator->get()->getId())(parseTree, subIndex); ++subIndex; - if ((pointer_iterator + 1) != parseTree.cend() && parseTree.getId() == typeid(StandardComponents::Call)) { - output << ", "; + const auto added_size = indentationLevel * strlen(indentation); + if (newLines) { + separator.reserve(separator.size() + added_size); + for (unsigned short level = 0; level < indentationLevel; ++level) { + separator.append(indentation); } } + unsigned int subIndex = 0; + for (auto pointer_iterator = parseTree.cbegin(); pointer_iterator < parseTree.cend(); ++pointer_iterator) { + const type_info& id = pointer_iterator->get()->getId(); + try { + getTaskMapInstance().at(id)(parseTree, subIndex); ++subIndex; + } catch (const out_of_range&) { + throw Yerbacon::Exception(string( + #ifndef __GNUC__ + id.name() + #else + abi::__cxa_demangle(id.name(), nullptr, nullptr, nullptr) + #endif + ) += " is not supported by the current target"); + } + postInsertionFunction(pointer_iterator); + } + if (newLines) separator.erase(separator.size() - added_size); } } + IS(ParseTree) + void separate_transpileTree(const T& parseTree, const unsigned short& indentationLevel = 0) { + transpileTree(parseTree, indentationLevel, [this, &parseTree](const auto& iterator){ + if (iterator + 1 != parseTree.cend()) { + output << separator; + } + }); + } typedef optional optional_string; virtual optional_string uniqueLineSeparator() { return ";"; }; const bool newLines; - const char* separator; + string separator; + static constexpr const char* indentation = " "; public: const unordered_task_map& getTaskMapInstance() { static unordered_task_map staticMap = getTaskMap(); // Default / Shared tasks: staticMap.merge(unordered_task_map({ make_task(ParseTree, transpileTree(parseComponent);), - make_task(StandardComponents::Reference, - const auto print_functions = printFunctions(); - output << ((parseComponent.name == "print") ? print_functions.first : (parseComponent.name == "print_line") ? print_functions.second : parseComponent.name); - ), + make_task(StandardComponents::Reference, output << parseComponent.name;), make_task(StandardComponents::Call, - output << '('; + const auto print_functions = printFunctions(); + output << ((parseComponent.name == "print") ? print_functions.first : (parseComponent.name == "print_line") ? print_functions.second : parseComponent.name) << '('; transpileTree(parseComponent); output << ')'; ) @@ -110,23 +135,8 @@ public: static shared_ptr forName(string_view name, bool newLines); string transpileWithTree(const ParseTree& tree) { separator = newLines ? "\n" : (supportsOneLine() ? uniqueLineSeparator().value() : throw Yerbacon::Exception("--newlines=off is not supported by the current target")); - const unordered_task_map& taskMap = getTaskMapInstance(); output.str(string()); - for (unsigned int i = 0; i < tree.size(); ++i) { - const component_ptr& component = tree[i]; - const type_info& id = component->getId(); - try { - taskMap.at(id)(tree, i); - } catch (const out_of_range&) { - throw Yerbacon::Exception(string( - #ifndef __GNUC__ - id.name() - #else - abi::__cxa_demangle(id.name(), nullptr, nullptr, nullptr) - #endif - ) += " is not supported by the current target"); - } - } + separate_transpileTree(tree); return output.str() + '\n'; }; explicit Target(const bool& newLines): newLines(newLines), separator() {}; diff --git a/src/headers/transpiler/implementations/Js.hpp b/src/headers/transpiler/implementations/Js.hpp index 60c476f..9c0da05 100644 --- a/src/headers/transpiler/implementations/Js.hpp +++ b/src/headers/transpiler/implementations/Js.hpp @@ -8,8 +8,18 @@ struct JsTarget: Target { return { make_task(Define, output << (parseComponent.final ? "const " : "let ") << parseComponent.name << " = "; + transpileTree(parseComponent.content); ), - make_task(types::String, stringInterpolation(parseComponent.content);) + make_task(types::String, stringInterpolation(parseComponent.content);), + make_task(Function, + output << "function " << parseComponent.name << '('; + transpileTree(parseComponent.parameters); + output << ") {"; + if (newLines) output << separator << indentation; + separate_transpileTree(parseComponent, 1); + if (newLines) output << separator; + output << '}'; + ) }; } using Target::Target; diff --git a/src/headers/transpiler/implementations/Lua.hpp b/src/headers/transpiler/implementations/Lua.hpp index d8c9b58..bfdc7f4 100644 --- a/src/headers/transpiler/implementations/Lua.hpp +++ b/src/headers/transpiler/implementations/Lua.hpp @@ -12,8 +12,17 @@ struct LuaTarget: Target { output << parseComponent.name; if (parseComponent.final) output << " "; // TODO Find an alternative to for lua <5.4 output << " = "; + transpileTree(parseComponent.content); ), - make_task(types::String, stringInterpolation(parseComponent.content, "[[", "]]", "..");) + make_task(types::String, stringInterpolation(parseComponent.content, "[[", "]]", "..");), + make_task(Function, + output << "function " << parseComponent.name << '('; + transpileTree(parseComponent.parameters); + output << ')' << separator; + if (newLines) output << indentation; + separate_transpileTree(parseComponent, 1); + output << separator << "end"; + ) }; } using Target::Target; diff --git a/src/headers/transpiler/implementations/Py.hpp b/src/headers/transpiler/implementations/Py.hpp index 2b2396a..8a71409 100644 --- a/src/headers/transpiler/implementations/Py.hpp +++ b/src/headers/transpiler/implementations/Py.hpp @@ -7,8 +7,14 @@ struct PyTarget: Target { optional_string uniqueLineSeparator() final { return {}; } unordered_task_map getTaskMap() final { return { - make_task(Define, output << parseComponent.name << " = ";), - make_task(types::String, stringInterpolation(R"(""")", parseComponent.content);) + make_task(Define, output << parseComponent.name << " = "; transpileTree(parseComponent.content);), + make_task(types::String, stringInterpolation(R"(""")", parseComponent.content);), + make_task(Function, + output << "def " << parseComponent.name << '('; + transpileTree(parseComponent.parameters); + output << "):" << separator << indentation; + separate_transpileTree(parseComponent, 1); + ), }; } using Target::Target; diff --git a/src/main.cpp b/src/main.cpp index 45a9656..4bed4eb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -46,9 +46,8 @@ int main(int argc, char* argv[]) { } else invalid_argument: Yerbacon::fail({"\"", currentArg.data(), "\" is not a valid argument."}); } } - const auto currentTarget = Target::forName(target, newLines); - const auto compile = [&target, ¤tTarget](string_view name) -> string { - string transpiledString = currentTarget->transpileWithTree(parseString(getFileContent(name.data()))); + const auto compile = [&target, &newLines](string_view name) -> string { + string transpiledString = Target::forName(target, newLines)->transpileWithTree(parseString(getFileContent(name.data()))); name.remove_suffix(6); string outputFile; (outputFile = name).append(target);