[mlir] [llvm] [clang] [llvm] Improve implementation of StringRef::find_last_of and cie (PR #71865)

2023-11-15 Thread via cfe-commits


@@ -274,6 +278,23 @@ StringRef::size_type 
StringRef::find_first_not_of(StringRef Chars,
 /// Note: O(size() + Chars.size())
 StringRef::size_type StringRef::find_last_of(StringRef Chars,
  size_t From) const {
+#ifdef __SSE2__
+  if (Chars.size() == 2) {
+__m128i Needle0 = _mm_set1_epi8(Chars[0]);
+__m128i Needle1 = _mm_set1_epi8(Chars[1]);
+size_type Sz = std::min(From, Length);
+do {
+  Sz = Sz < 16 ? 0 : Sz - 16;
+  __m128i Buffer = _mm_loadu_si128((const __m128i *)(Data + Sz));
+  unsigned Mask = _mm_movemask_epi8(_mm_or_si128(
+  _mm_cmpeq_epi8(Buffer, Needle0), _mm_cmpeq_epi8(Buffer, Needle1)));
+  if (Mask != 0) {
+return Sz + sizeof(Mask) * CHAR_BIT - llvm::countl_zero(Mask);
+  }
+} while (Sz);
+return npos;
+  }
+#endif

serge-sans-paille wrote:

I don't think using https://github.com/xtensor-stack/xsimd is an option :-) And 
https://en.cppreference.com/w/cpp/experimental/simd/simd is still not a thing 
:-/

We already have some bits of SSE2 in clang and llvm. OK to factor this in a 
function.

https://github.com/llvm/llvm-project/pull/71865
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [llvm] [clang] [llvm] Improve implementation of StringRef::find_last_of and cie (PR #71865)

2023-11-14 Thread Mehdi Amini via cfe-commits


@@ -274,6 +278,23 @@ StringRef::size_type 
StringRef::find_first_not_of(StringRef Chars,
 /// Note: O(size() + Chars.size())
 StringRef::size_type StringRef::find_last_of(StringRef Chars,
  size_t From) const {
+#ifdef __SSE2__
+  if (Chars.size() == 2) {
+__m128i Needle0 = _mm_set1_epi8(Chars[0]);
+__m128i Needle1 = _mm_set1_epi8(Chars[1]);
+size_type Sz = std::min(From, Length);
+do {
+  Sz = Sz < 16 ? 0 : Sz - 16;
+  __m128i Buffer = _mm_loadu_si128((const __m128i *)(Data + Sz));
+  unsigned Mask = _mm_movemask_epi8(_mm_or_si128(
+  _mm_cmpeq_epi8(Buffer, Needle0), _mm_cmpeq_epi8(Buffer, Needle1)));
+  if (Mask != 0) {
+return Sz + sizeof(Mask) * CHAR_BIT - llvm::countl_zero(Mask);
+  }
+} while (Sz);
+return npos;
+  }
+#endif

joker-eph wrote:

Can this be abstracted or made out-of-line?
I'm wondering about the scalability of HW-specific intrinsics in-line 
(anticipating for the incoming `#elif defined(ARM64)`...)

https://github.com/llvm/llvm-project/pull/71865
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [llvm] [clang] [llvm] Improve implementation of StringRef::find_last_of and cie (PR #71865)

2023-11-13 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/71865

>From 65e931aa7bde6fffe3dfeee1a2147c8fcd73f0af Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Mon, 13 Nov 2023 08:59:29 +0100
Subject: [PATCH 1/2] Replace usage of StringRef::find_last_of with a string
 literal of size one by the equivalent char literal

---
 clang/lib/Driver/ToolChains/ZOS.cpp   | 2 +-
 llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 2 +-
 llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp   | 2 +-
 mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/ZOS.cpp 
b/clang/lib/Driver/ToolChains/ZOS.cpp
index a7d6f030f3850a5..96dbf602e7c1fc9 100644
--- a/clang/lib/Driver/ToolChains/ZOS.cpp
+++ b/clang/lib/Driver/ToolChains/ZOS.cpp
@@ -143,7 +143,7 @@ void zos::Linker::ConstructJob(Compilation , const 
JobAction ,
 StringRef OutputName = Output.getFilename();
 // Strip away the last file suffix in presence from output name and add
 // a new .x suffix.
-size_t Suffix = OutputName.find_last_of(".");
+size_t Suffix = OutputName.find_last_of('.');
 const char *SideDeckName =
 Args.MakeArgString(OutputName.substr(0, Suffix) + ".x");
 CmdArgs.push_back("-x");
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index d03820f5f9e6c93..7b5dc3795b0227f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -102,7 +102,7 @@ void 
AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
 bool ArgDump = false;
 StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx,
   CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
-size_t pTag = CurFmt.find_last_of("%");
+size_t pTag = CurFmt.find_last_of('%');
 if (pTag != StringRef::npos) {
   ArgDump = true;
   while (pTag && CurFmt[--pTag] == '%') {
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp 
b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index d6354876b558744..ae9e801f8f50b84 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -163,7 +163,7 @@ lookupBuiltin(StringRef DemangledCall,
   // the information after angle brackets and return type removed.
   if (BuiltinName.find('<') && BuiltinName.back() == '>') {
 BuiltinName = BuiltinName.substr(0, BuiltinName.find('<'));
-BuiltinName = BuiltinName.substr(BuiltinName.find_last_of(" ") + 1);
+BuiltinName = BuiltinName.substr(BuiltinName.find_last_of(' ') + 1);
   }
 
   // Check if the extracted name begins with "__spirv_ImageSampleExplicitLod"
diff --git a/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp 
b/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp
index fe668130d4e2dbc..b6f4f3cc1869edb 100644
--- a/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp
+++ b/mlir/lib/Tools/lsp-server-support/SourceMgrUtils.cpp
@@ -83,7 +83,7 @@ lsp::extractSourceDocComment(llvm::SourceMgr , 
SMLoc loc) {
 
   // Pop the last line from the buffer string.
   auto popLastLine = [&]() -> std::optional {
-size_t newlineOffset = buffer.find_last_of("\n");
+size_t newlineOffset = buffer.find_last_of('\n');
 if (newlineOffset == StringRef::npos)
   return std::nullopt;
 StringRef lastLine = buffer.drop_front(newlineOffset).trim();

>From 731953e2cb13c7c2438a14a927b3af91317b72f2 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 9 Nov 2023 20:41:40 +0100
Subject: [PATCH 2/2] [llvm] Improve implementation of StringRef::find_last_of
 for the usual case of 2 chars

Almost all usage of StringRef::find_last_of in Clang/LLVM use a Needle
of 2 elements, which can easily be optimized in SSE2. The IPC of the
improved version is significantly better as shown in

https://godbolt.org/z/h1dsdcMd8

And it does not require an extra structure.
---
 llvm/lib/Support/StringRef.cpp | 21 +
 1 file changed, 21 insertions(+)

diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index feee47ca693b251..1f91805802d0b54 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -15,6 +15,10 @@
 #include "llvm/Support/Error.h"
 #include 
 
+#ifdef __SSE2__
+#include 
+#endif
+
 using namespace llvm;
 
 // MSVC emits references to this into the translation units which reference it.
@@ -274,6 +278,23 @@ StringRef::size_type 
StringRef::find_first_not_of(StringRef Chars,
 /// Note: O(size() + Chars.size())
 StringRef::size_type StringRef::find_last_of(StringRef Chars,
  size_t From) const {
+#ifdef __SSE2__
+  if (Chars.size() == 2) {
+__m128i Needle0 = _mm_set1_epi8(Chars[0]);
+__m128i Needle1 = _mm_set1_epi8(Chars[1]);
+size_type Sz = std::min(From, Length);
+do {
+  Sz =