https://github.com/ilovepi created https://github.com/llvm/llvm-project/pull/160165
Delimiters in mustache are generally 2-4 character sequences. While good for general search, we can beat find() for these short sequences by just using memchr() to find the first match, and then checking the next few characters directly. >From 55d058710c42bb6e09a9bd7cf38a8a5b63b4d454 Mon Sep 17 00:00:00 2001 From: Paul Kirth <[email protected]> Date: Mon, 22 Sep 2025 15:59:23 +0000 Subject: [PATCH] [llvm][mustache] Specialize delimiter search Delimiters in mustache are generally 2-4 character sequences. While good for general search, we can beat find() for these short sequences by just using memchr() to find the first match, and then checking the next few characters directly. --- llvm/lib/Support/Mustache.cpp | 56 +++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp index 6c2ed6c84c6cf..c7cebe6b64fae 100644 --- a/llvm/lib/Support/Mustache.cpp +++ b/llvm/lib/Support/Mustache.cpp @@ -17,6 +17,50 @@ namespace { using Accessor = SmallVector<std::string>; +// A more generic specialized find for needles of length 1-3. +[[maybe_unused]] +static size_t findDelimiters(StringRef Haystack, StringRef Needle, + size_t Offset = 0) { + const size_t N = Needle.size(); + if (N == 0) + return Offset; + if (N > 3) { + // Fallback for longer needles where more advanced algorithms are better. + return Haystack.find(Needle, Offset); + } + + const char *HaystackStart = Haystack.data(); + const size_t HaystackSize = Haystack.size(); + if (HaystackSize < N + Offset) + return StringRef::npos; + + const char *NeedleStart = Needle.data(); + const char *Current = HaystackStart + Offset; + const char *End = HaystackStart + HaystackSize; + + while (Current + N <= End) { + // Stage 1: Find the first character of the needle. + Current = (const char *)::memchr(Current, NeedleStart[0], End - Current); + if (!Current || Current + N > End) + return StringRef::npos; + + // Stage 2: Validate the rest of the sequence. + if (N == 1) + return Current - HaystackStart; + if (N == 2 && Current[1] == NeedleStart[1]) + return Current - HaystackStart; + if (N == 3 && Current[1] == NeedleStart[1] && Current[2] == NeedleStart[2]) + return Current - HaystackStart; + + // Mismatch, advance and continue the search. + ++Current; + } + + return StringRef::npos; +} + + + static bool isFalsey(const json::Value &V) { return V.getAsNull() || (V.getAsBoolean() && !V.getAsBoolean().value()) || (V.getAsArray() && V.getAsArray()->empty()); @@ -306,7 +350,9 @@ SmallVector<Token> tokenize(StringRef Template) { StringLiteral Open("{{"); StringLiteral Close("}}"); size_t Start = 0; - size_t DelimiterStart = Template.find(Open); + // size_t DelimiterStart = Template.find(Open); + size_t DelimiterStart = findDelimiters(Template, Open); + if (DelimiterStart == StringRef::npos) { Tokens.emplace_back(Template.str()); return Tokens; @@ -314,7 +360,8 @@ SmallVector<Token> tokenize(StringRef Template) { while (DelimiterStart != StringRef::npos) { if (DelimiterStart != Start) Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start).str()); - size_t DelimiterEnd = Template.find(Close, DelimiterStart); + // size_t DelimiterEnd = Template.find(Close, DelimiterStart); + size_t DelimiterEnd = findDelimiters(Template, Close, DelimiterStart); if (DelimiterEnd == StringRef::npos) break; @@ -326,7 +373,8 @@ SmallVector<Token> tokenize(StringRef Template) { std::string RawBody = Open.str() + Interpolated + Close.str(); Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]); Start = DelimiterEnd + Close.size(); - DelimiterStart = Template.find(Open, Start); + // DelimiterStart = Template.find(Open, Start); + DelimiterStart = findDelimiters(Template, Open, Start); } if (Start < Template.size()) @@ -572,6 +620,8 @@ void ASTNode::render(const json::Value &CurrentCtx, raw_ostream &OS) { ParentContext = &CurrentCtx; const json::Value *ContextPtr = Ty == Root ? ParentContext : findContext(); + if (AccessorValue.empty() && (Ty != Root && Ty != Text)) + return; switch (Ty) { case Root: renderChild(CurrentCtx, OS); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
