================
@@ -0,0 +1,1594 @@
+"use strict";
+/*
+ * MIT License
+ *
+ * Copyright (c) 2021 alex-pinkus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE
+ * SOFTWARE.
+ */
+const PRECS = {
+ multiplication: 11,
+ addition: 10,
+ infix_operations: 9,
+ nil_coalescing: 8,
+ check: 7,
+ prefix_operations: 7,
+ comparison: 6,
+ postfix_operations: 6,
+ equality: 5,
+ conjunction: 4,
+ disjunction: 3,
+ block: 2,
+ loop: 1,
+ keypath: 1,
+ parameter_pack: 1,
+ control_transfer: 0,
+ as: -1,
+ tuple: -1,
+ if: -1,
+ switch: -1,
+ do: -1,
+ fully_open_range: -1,
+ range: -1,
+ navigation: -1,
+ expr: -1,
+ ty: -1,
+ call: -2,
+ ternary: -2,
+ try: -2,
+ call_suffix: -2,
+ range_suffix: -2,
+ ternary_binary_suffix: -2,
+ await: -2,
+ assignment: -3,
+ comment: -3,
+ lambda: -3,
+ regex: -4,
+};
+
+const DYNAMIC_PRECS = {
+ call : 1,
+};
+
+const DEC_DIGITS = token(sep1(/[0-9]+/, /_+/));
+const HEX_DIGITS = token(sep1(/[0-9a-fA-F]+/, /_+/));
+const OCT_DIGITS = token(sep1(/[0-7]+/, /_+/));
+const BIN_DIGITS = token(sep1(/[01]+/, /_+/));
+const REAL_EXPONENT = token(seq(/[eE]/, optional(/[+-]/), DEC_DIGITS));
+const HEX_REAL_EXPONENT = token(seq(/[pP]/, optional(/[+-]/), DEC_DIGITS));
+
+var LEXICAL_IDENTIFIER;
+
+if (tree_sitter_version_supports_emoji()) {
+ LEXICAL_IDENTIFIER =
+
/[_\p{XID_Start}\p{Emoji}&&[^0-9#*]](\p{EMod}|\x{FE0F}\x{20E3}?)?([_\p{XID_Continue}\p{Emoji}\x{200D}](\p{EMod}|\x{FE0F}\x{20E3}?)?)*/;
+} else {
+ LEXICAL_IDENTIFIER = /[_\p{XID_Start}][_\p{XID_Continue}]*/;
+}
+
+module.exports = grammar({
+ name : "swift",
+ conflicts : ($) => [
+ // @Type(... could either be an annotation constructor
+ // invocation or an annotated expression
+ [ $.attribute ],
+ [ $._attribute_argument ],
+ // Is `foo { ... }` a constructor invocation or function
+ // invocation?
+ [ $._simple_user_type, $._expression ],
+ // To support nested types A.B not being interpreted as
+ // `(navigation_expression ... (type_identifier))
+ // (navigation_suffix)`
+ [ $.user_type ],
+ // How to tell the difference between Foo.bar(with:and:), and
+ // Foo.bar(with: smth, and: other)? You need GLR
+ [ $.value_argument ],
+ // { (foo, bar) ...
+ [ $._expression, $.lambda_parameter ],
+ [ $._primary_expression, $.lambda_parameter ],
+ // (start: start, end: end)
+ [ $._tuple_type_item_identifier, $.tuple_expression ],
+ // After a `{` in a function or switch context, it's ambigous
+ // whether we're starting a set of local statements or applying
+ // some modifiers to a capture or pattern.
+ [ $.modifiers ],
+ // `+(...)` is ambigously either "call the function produced by
+ // a reference to the operator `+`" or "use the unary operator
+ // `+` on the result of the parenthetical expression."
+ [ $._additive_operator, $._prefix_unary_operator ],
+ [ $._referenceable_operator, $._prefix_unary_operator ],
+ // `{ [self, b, c] ...` could be a capture list or an array
+ // literal depending on what else happens.
+ [ $.capture_list_item, $._expression ],
+ [ $.capture_list_item, $._expression, $._simple_user_type ],
+ [ $._primary_expression, $.capture_list_item ],
+ // a ? b : c () could be calling c(), or it could be calling a
+ // function that's produced by the result of
+ // `(a ? b : c)`. We have a small hack to force it to be the
+ // former of these by intentionally introducing a conflict.
+ [ $.call_suffix, $.expr_hack_at_ternary_binary_call_suffix ],
+ // try {expression} is a bit magic and applies quite broadly:
+ // `try foo()` and `try foo { }` show that this is right
+ // associative, and `try foo ? bar() : baz` even more so. But
it
+ // doesn't always win: something like `if try foo { } ...`
+ // should award its braces to the `if`. In order to make this
+ // actually happen, we need to parse all the options and pick
+ // the best one that doesn't error out.
+ [ $.try_expression, $._unary_expression ],
+ [ $.try_expression, $._expression ],
+ // await {expression} has the same special cases as `try`.
+ [ $.await_expression, $._unary_expression ],
+ [ $.await_expression, $._expression ],
+ // In a computed property, when you see an @attribute, it's not
+ // yet clear if that's going to be for a locally-declared class
+ // or a getter / setter specifier.
+ [
+ $._local_property_declaration,
+ $._local_typealias_declaration,
+ $._local_function_declaration,
+ $._local_class_declaration,
+ $.computed_getter,
+ $.computed_modify,
+ $.computed_setter,
+ ],
+ // The `class` modifier is legal in many of the same positions
+ // that a class declaration itself would be.
+ [ $._bodyless_function_declaration, $.property_modifier ],
+ [ $.init_declaration, $.property_modifier ],
+ // Patterns, man
+ [ $._navigable_type_expression, $._case_pattern ],
+ [
+ $._no_expr_pattern_already_bound, $._binding_pattern_no_expr
+ ],
+
+ // On encountering a closure starting with `{ @Foo ...`, we
+ // don't yet know if that attribute applies to the closure type
+ // or to a declaration within the closure. What a mess! We just
+ // have to hope that if we keep going, only one of those will
+ // parse (because there will be an `in` or a `let`).
+ [
+ $._lambda_type_declaration,
+ $._local_property_declaration,
+ $._local_typealias_declaration,
+ $._local_function_declaration,
+ $._local_class_declaration,
+ ],
+
+ // We want `foo() { }` to be treated as one function call, but
+ // we _also_ want `if foo() { ... }` to be treated as a full
+ // if-statement. This means we have to treat it as a conflict
+ // rather than purely a left or right associative construct,
and
+ // let the parser realize that the second expression won't
parse
+ // properly with the `{ ... }` as a lambda.
+ [ $.constructor_suffix ],
+ [ $.call_suffix ],
+
+ // `actor` is allowed to be an identifier, even though it is
+ // also a locally permitted declaration. If we encounter it,
the
+ // only way to know what it's meant to be is to keep going.
+ [ $._modifierless_class_declaration, $.property_modifier ],
+ [ $._fn_call_lambda_arguments ],
+
+ // `borrowing` and `consuming` are legal as identifiers, but
are
+ // also legal modifiers
+ [ $.parameter_modifiers ],
+
+ // These are keywords sometimes, but simple identifiers other
+ // times, and it just depends on the rest of their usage.
+ [
+ $._contextual_simple_identifier,
+ $._modifierless_class_declaration
+ ],
+ [
+ $._contextual_simple_identifier, $.property_behavior_modifier
+ ],
+ [ $._contextual_simple_identifier, $.parameter_modifier ],
+ [ $._contextual_simple_identifier, $.type_parameter_pack ],
+ [ $._contextual_simple_identifier, $.type_pack_expansion ],
+ [ $._contextual_simple_identifier, $.visibility_modifier ],
+],
+ extras : ($) => [$.comment,
+ $.multiline_comment,
+ /\s+/, // Whitespace
+],
+ externals : ($) => [
+ // Comments and raw strings are parsed in a custom scanner
+ // because they require us to carry forward state to maintain
+ // symmetry. For instance, parsing a multiline comment requires
+ // us to increment a counter whenever we see
+ // `/*`, and decrement it whenever we see `*/`. A standard
+ // grammar would only be able to exit the comment at the first
+ // `*/` (like C does). Similarly, when you start a string with
+ // `##"`, you're required to include the same number of `#`
+ // symbols to end it.
+ $.multiline_comment,
+ $.raw_str_part,
+ $.raw_str_continuing_indicator,
+ $.raw_str_end_part,
+ // Because Swift doesn't have explicit semicolons, we also do
+ // some whitespace handling in a custom scanner. Line breaks
are
+ // _sometimes_ meaningful as the end of a statement: try to
+ // write `let foo: Foo let bar: Bar`, for instance and the
+ // compiler will complain, but add either a newline or a
+ // semicolon and it's fine. We borrow the idea from the Kotlin
+ // grammar that a newline is sometimes a "semicolon". By
+ // including `\n` in both `_semi` and an anonymous `whitespace`
+ // extras, we _should_ be able to let the parser decide if a
+ // newline is meaningful. If the parser sees something like
+ // `foo.bar(1\n)`, it knows that a "semicolon" would not be
+ // valid there, so it parses that as whitespace. On the other
+ // hand, `let foo: Foo\n let bar: Bar` has a meaningful
newline.
+ // Unfortunately, we can't simply stop at that. There are some
+ // expressions and statements that remain valid if you end them
+ // early, but are expected to be parsed across multiple lines.
+ // One particular nefarious example is a function declaration,
+ // where you might have something like `func foo<A>(args: A) ->
+ // Foo throws where A: Hashable`. This would still be a valid
+ // declaration even if it ended after the `)`, the `Foo`, or
the
+ // `throws`, so a grammar that simply interprets a newline as
+ // "sometimes a semi" would parse those incorrectly. To solve
+ // that case, our custom scanner must do a bit of extra
+ // lookahead itself. If we're about to generate a
+ // `_semi`, we advance a bit further to see if the next
+ // non-whitespace token would be one of these other operators.
+ // If so, we ignore the `_semi` and just produce the operator;
+ // if not, we produce the `_semi` and let the rest of the
+ // grammar sort it out. This isn't perfect, but it works well
+ // enough most of the time.
+ $._implicit_semi,
+ $._explicit_semi,
+ // Every one of the below operators will suppress a `_semi` if
+ // we encounter it after a newline.
+ $._arrow_operator_custom,
+ $._dot_custom,
+ $._conjunction_operator_custom,
+ $._disjunction_operator_custom,
+ $._nil_coalescing_operator_custom,
+ $._eq_custom,
+ $._eq_eq_custom,
+ $._plus_then_ws,
+ $._minus_then_ws,
+ $._bang_custom,
+ $._throws_keyword,
+ $._rethrows_keyword,
+ $.default_keyword,
+ $.where_keyword,
+ $["else"],
+ $.catch_keyword,
+ $._as_custom,
+ $._as_quest_custom,
+ $._as_bang_custom,
+ $._async_keyword_custom,
+ $._custom_operator,
+ $._hash_symbol_custom,
+ $._directive_if,
+ $._directive_elseif,
+ $._directive_else,
+ $._directive_endif,
+
+ // Fake operator that will never get triggered, but follows the
+ // sequence of characters for `try!`. Tracked by the custom
+ // scanner so that it can avoid triggering `$.bang` for that
+ // case.
+ $._fake_try_bang,
+],
+ inline : ($) => [$._locally_permitted_modifiers],
+ rules : {
+ ////////////////////////////////
+ // File Structure
+ ////////////////////////////////
+ source_file : ($) => seq(
+ optional($.shebang_line),
+ optional(seq($._top_level_statement,
+ repeat(seq($._semi, $._top_level_statement)),
+ optional($._semi)))),
+ _semi : ($) => choice($._implicit_semi, $._explicit_semi),
+ shebang_line : ($) => seq($._hash_symbol, "!", /[^\r\n]*/),
+ ////////////////////////////////
+ // Lexical Structure -
+ // https://docs.swift.org/swift-book/ReferenceManual/LexicalStructure.html
+ ////////////////////////////////
+ comment : ($) => token(prec(PRECS.comment, seq("//", /.*/))),
+ // Identifiers
+ simple_identifier : ($) =>
+ choice(LEXICAL_IDENTIFIER, /`[^\r\n` ]*`/,
/\$[0-9]+/,
+ token(seq("$", LEXICAL_IDENTIFIER)),
+ $._contextual_simple_identifier),
+ // Keywords that were added after they were already legal as identifiers.
+ // `tree-sitter` will prefer exact matches
+ // when parsing so unless we explicitly say that these are legal, the
parser
+ // will interpret them as their keyword.
+ _contextual_simple_identifier : ($) =>
+ choice("actor", "async", "each", "lazy",
+ "repeat", "package",
+ $._parameter_ownership_modifier),
+ identifier : ($) => sep1($.simple_identifier, $._dot),
+ // Literals
+ _basic_literal : ($) =>
+ choice($.integer_literal, $.hex_literal, $.oct_literal,
+ $.bin_literal, $.real_literal, $.boolean_literal,
+ $._string_literal, $.regex_literal, "nil"),
+ real_literal : ($) => token(choice(seq(DEC_DIGITS, REAL_EXPONENT),
+ seq(optional(DEC_DIGITS), ".",
+ DEC_DIGITS,
optional(REAL_EXPONENT)),
+ seq("0x", HEX_DIGITS,
+ optional(seq(".", HEX_DIGITS)),
+ HEX_REAL_EXPONENT))),
+ integer_literal : ($) => token(seq(optional(/[1-9]/), DEC_DIGITS)),
+ hex_literal : ($) => token(seq("0", /[xX]/, HEX_DIGITS)),
+ oct_literal : ($) => token(seq("0", /[oO]/, OCT_DIGITS)),
+ bin_literal : ($) => token(seq("0", /[bB]/, BIN_DIGITS)),
+ boolean_literal : ($) => choice("true", "false"),
+ // String literals
+ _string_literal : ($) => choice($.line_string_literal,
+ $.multi_line_string_literal,
+ $.raw_string_literal),
+ line_string_literal : ($) => seq(
+ '"',
+ repeat(choice(field("text",
$._line_string_content),
+ $._interpolation)),
+ '"'),
+ _line_string_content : ($) => choice($.line_str_text, $.str_escaped_char),
+ line_str_text : ($) => /[^\\"]+/,
+ str_escaped_char : ($) => choice($._escaped_identifier,
+ $._uni_character_literal),
+ _uni_character_literal : ($) => seq("\\", "u", /\{[0-9a-fA-F]+\}/),
+ multi_line_string_literal : ($) =>
+ seq('"""',
+ repeat(choice(
+ field("text",
+ $._multi_line_string_content),
+ $._interpolation)),
+ '"""'),
+ raw_string_literal : ($) => seq(
+ repeat(seq(
+ field("text", $.raw_str_part),
+ field("interpolation", $.raw_str_interpolation),
+ optional($.raw_str_continuing_indicator))),
+ field("text", $.raw_str_end_part)),
+ raw_str_interpolation : ($) => seq($.raw_str_interpolation_start,
+ $._interpolation_contents, ")"),
+ raw_str_interpolation_start : ($) => /\\#*\(/,
+ _multi_line_string_content : ($) => choice($.multi_line_str_text,
+ $.str_escaped_char, '"'),
+ _interpolation : ($) => seq("\\(", $._interpolation_contents, ")"),
+ _interpolation_contents : ($) =>
+ sep1Opt(field("interpolation",
+ alias($.value_argument,
+
$.interpolated_expression)),
+ ","),
+ _escaped_identifier : ($) => /\\[0\\tnr"'\n]/,
+ multi_line_str_text : ($) => /[^\\"]+/,
+ // Based on
+ //
https://gitlab.com/woolsweater/tree-sitter-swifter/-/blob/3d47c85bd47ce54cdf2023a9c0e01eb90adfcc1d/grammar.js#L1019
----------------
JDevlieghere wrote:
I think this is likely already best place to do the attribution.
https://github.com/llvm/llvm-project/pull/181297
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits