[PATCH] D132352: Introduce noread_thread_id to address the thread identification problem in coroutines

Chuanqi Xu via Phabricator via cfe-commits Mon, 22 Aug 2022 22:43:43 -0700

ChuanqiXu updated this revision to Diff 454697.
ChuanqiXu added a comment.

I post the incorrect version the last time.



CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D132352/new/

https://reviews.llvm.org/D132352

Files:
  llvm/docs/LangRef.rst
  llvm/include/llvm/Bitcode/LLVMBitCodes.h
  llvm/include/llvm/IR/Attributes.td
  llvm/include/llvm/IR/InstrTypes.h
  llvm/lib/Bitcode/Reader/BitcodeReader.cpp
  llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
  llvm/lib/Transforms/Scalar/EarlyCSE.cpp
  llvm/lib/Transforms/Scalar/GVN.cpp
  llvm/lib/Transforms/Scalar/NewGVN.cpp
  llvm/lib/Transforms/Utils/CodeExtractor.cpp
  llvm/test/Transforms/Coroutines/coro-readnone-01.ll
  llvm/test/Transforms/Coroutines/coro-readnone-02.ll
  llvm/test/Transforms/Coroutines/coro-readnone-03.ll

Index: llvm/test/Transforms/Coroutines/coro-readnone-03.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Coroutines/coro-readnone-03.ll
@@ -0,0 +1,64 @@
+; Tests that the readnone and noread_thread_id function which cross suspend points would be optimized expectedly.
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK-O3
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK
+
+define ptr @f() presplitcoroutine {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  %j = call i32 @readnone_func() readnone noread_thread_id
+  %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sus_result, label %suspend [i8 0, label %resume
+                                         i8 1, label %cleanup]
+resume:
+  %i = call i32 @readnone_func() readnone noread_thread_id
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  br label %cleanup
+
+diff:
+  call void @print_diff()
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0)
+  ret ptr %hdl
+}
+
+; CHECK-O3: define{{.*}}@f.resume(
+; CHECK-O3-NEXT : CoroEnd:
+; CHECK-O3-NEXT :   tail call void @print_same()
+; CHECK-O3-NEXT :   tail call void @free(ptr nonnull %hdl)
+; CHECK-O3-NEXT :   ret void
+
+; CHECK-LABEL: @f(
+; CHECK: resume:
+; CHECK: br i1 true, label %same, label %diff
+
+declare i32 @readnone_func() readnone noread_thread_id
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)
Index: llvm/test/Transforms/Coroutines/coro-readnone-02.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Coroutines/coro-readnone-02.ll
@@ -0,0 +1,81 @@
+; Tests that the readnone function which don't cross suspend points could be optimized expectly after split.
+;
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,early-cse,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,gvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='coro-split,newgvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED
+
+define ptr @f() presplitcoroutine {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sus_result, label %suspend [i8 0, label %resume
+                                         i8 1, label %cleanup]
+resume:
+  %i = call i32 @readnone_func() readnone
+  ; noop call to break optimization to combine two consecutive readonly calls.
+  call void @nop()
+  %j = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  br label %cleanup
+
+diff:
+  call void @print_diff()
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0)
+  ret ptr %hdl
+}
+
+;
+; CHECK_SPLITTED-LABEL: f.resume(
+; CHECK_SPLITTED-NEXT:  :
+; CHECK_SPLITTED-NEXT:    call i32 @readnone_func() #[[ATTR_NUM:[0-9]+]]
+; CHECK_SPLITTED-NEXT:    call void @nop()
+; CHECK_SPLITTED-NEXT:    call void @print_same()
+;
+; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { readnone }
+;
+; CHECK_UNSPLITTED-LABEL: @f(
+; CHECK_UNSPLITTED: br i1 %cmp, label %same, label %diff
+; CHECK_UNSPLITTED-EMPTY:
+; CHECK_UNSPLITTED-NEXT: same:
+; CHECK_UNSPLITTED-NEXT:   call void @print_same()
+; CHECK_UNSPLITTED-NEXT:   br label
+; CHECK_UNSPLITTED-EMPTY:
+; CHECK_UNSPLITTED-NEXT: diff:
+; CHECK_UNSPLITTED-NEXT:   call void @print_diff()
+; CHECK_UNSPLITTED-NEXT:   br label
+
+declare i32 @readnone_func() readnone
+declare void @nop()
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)
Index: llvm/test/Transforms/Coroutines/coro-readnone-01.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/Coroutines/coro-readnone-01.ll
@@ -0,0 +1,89 @@
+; Tests that the readnone function which cross suspend points wouldn't be misoptimized.
+; RUN: opt < %s -S -passes='default<O3>' | FileCheck %s --check-prefixes=CHECK,CHECK_SPLITTED
+; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED
+
+define ptr @f() presplitcoroutine {
+entry:
+  %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call ptr @malloc(i32 %size)
+  %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc)
+  %j = call i32 @readnone_func() readnone
+  %sus_result = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sus_result, label %suspend [i8 0, label %resume
+                                         i8 1, label %cleanup]
+resume:
+  %i = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  br label %cleanup
+
+diff:
+  call void @print_diff()
+  br label %cleanup
+
+cleanup:
+  %mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
+  call void @free(ptr %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(ptr %hdl, i1 0)
+  ret ptr %hdl
+}
+
+; Tests that normal functions wouldn't be affected.
+define i1 @normal_function() {
+entry:
+  %i = call i32 @readnone_func() readnone
+  %j = call i32 @readnone_func() readnone
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %same, label %diff
+
+same:
+  call void @print_same()
+  ret i1 true
+
+diff:
+  call void @print_diff()
+  ret i1 false
+}
+
+; CHECK_SPLITTED-LABEL: normal_function(
+; CHECK_SPLITTED-NEXT: entry
+; CHECK_SPLITTED-NEXT:   call i32 @readnone_func()
+; CHECK_SPLITTED-NEXT:   call void @print_same()
+; CHECK_SPLITTED-NEXT:   ret i1 true
+;
+; CHECK_SPLITTED-LABEL: f.resume(
+; CHECK_UNSPLITTED-LABEL: @f(
+; CHECK: br i1 %cmp, label %same, label %diff
+; CHECK-EMPTY:
+; CHECK-NEXT: same:
+; CHECK-NEXT:   call void @print_same()
+; CHECK-NEXT:   br label
+; CHECK-EMPTY:
+; CHECK-NEXT: diff:
+; CHECK-NEXT:   call void @print_diff()
+; CHECK-NEXT:   br label
+
+declare i32 @readnone_func() readnone
+
+declare void @print_same()
+declare void @print_diff()
+declare ptr @llvm.coro.free(token, ptr)
+declare i32 @llvm.coro.size.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+
+declare token @llvm.coro.id(i32, ptr, ptr, ptr)
+declare i1 @llvm.coro.alloc(token)
+declare ptr @llvm.coro.begin(token, ptr)
+declare i1 @llvm.coro.end(ptr, i1)
+
+declare noalias ptr @malloc(i32)
+declare void @free(ptr)
Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
===================================================================
--- llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -922,6 +922,7 @@
       case Attribute::WriteOnly:
       case Attribute::AllocKind:
       case Attribute::PresplitCoroutine:
+      case Attribute::NoReadThreadID:
         continue;
       // Those attributes should be safe to propagate to the extracted function.
       case Attribute::AlwaysInline:
Index: llvm/lib/Transforms/Scalar/NewGVN.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1610,17 +1610,25 @@
       return ExprResult::some(createVariableOrConstant(ReturnedValue));
     }
   }
-  if (AA->doesNotAccessMemory(CI)) {
+
+  if (!CI->canReadDifferentThreadIDIfMoved())
+    return ExprResult::none();
+
+  if (AA->doesNotAccessMemory(CI))
     return ExprResult::some(
         createCallExpression(CI, TOPClass->getMemoryLeader()));
-  } else if (AA->onlyReadsMemory(CI)) {
+
+  if (AA->onlyReadsMemory(CI)) {
     if (auto *MA = MSSA->getMemoryAccess(CI)) {
       auto *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(MA);
       return ExprResult::some(createCallExpression(CI, DefiningAccess));
-    } else // MSSA determined that CI does not access memory.
-      return ExprResult::some(
-          createCallExpression(CI, TOPClass->getMemoryLeader()));
+    }
+
+    // MSSA determined that CI does not access memory.
+    return ExprResult::some(
+        createCallExpression(CI, TOPClass->getMemoryLeader()));
   }
+
   return ExprResult::none();
 }
 
Index: llvm/lib/Transforms/Scalar/GVN.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/GVN.cpp
+++ llvm/lib/Transforms/Scalar/GVN.cpp
@@ -450,12 +450,16 @@
 }
 
 uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
-  if (AA->doesNotAccessMemory(C)) {
+  if (AA->doesNotAccessMemory(C) &&
+      C->canReadDifferentThreadIDIfMoved()) {
     Expression exp = createExpr(C);
     uint32_t e = assignExpNewValueNum(exp).first;
     valueNumbering[C] = e;
     return e;
-  } else if (MD && AA->onlyReadsMemory(C)) {
+  }
+
+  if (MD && AA->onlyReadsMemory(C) &&
+      C->canReadDifferentThreadIDIfMoved()) {
     Expression exp = createExpr(C);
     auto ValNum = assignExpNewValueNum(exp);
     if (ValNum.second) {
@@ -546,10 +550,10 @@
     uint32_t v = lookupOrAdd(cdep);
     valueNumbering[C] = v;
     return v;
-  } else {
-    valueNumbering[C] = nextValueNumber;
-    return nextValueNumber++;
   }
+
+  valueNumbering[C] = nextValueNumber;
+  return nextValueNumber++;
 }
 
 /// Returns true if a value number exists for the specified value.
Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -132,7 +132,9 @@
         }
         }
       }
-      return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+      return CI->doesNotAccessMemory() &&
+             CI->canReadDifferentThreadIDIfMoved() &&
+             !CI->getType()->isVoidTy();
     }
     return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
            isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
@@ -463,7 +465,8 @@
       return false;
 
     CallInst *CI = dyn_cast<CallInst>(Inst);
-    if (!CI || !CI->onlyReadsMemory())
+    if (!CI || !CI->onlyReadsMemory() ||
+        !CI->canReadDifferentThreadIDIfMoved())
       return false;
     return true;
   }
Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -782,6 +782,8 @@
     return bitc::ATTR_KIND_MUSTPROGRESS;
   case Attribute::PresplitCoroutine:
     return bitc::ATTR_KIND_PRESPLIT_COROUTINE;
+  case Attribute::NoReadThreadID:
+    return bitc::ATTR_KIND_NO_READ_THREAD_ID;
   case Attribute::EndAttrKinds:
     llvm_unreachable("Can not encode end-attribute kinds marker.");
   case Attribute::None:
Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2005,6 +2005,8 @@
     return Attribute::Hot;
   case bitc::ATTR_KIND_PRESPLIT_COROUTINE:
     return Attribute::PresplitCoroutine;
+  case bitc::ATTR_KIND_NO_READ_THREAD_ID:
+    return Attribute::NoReadThreadID;
   }
 }
 
Index: llvm/include/llvm/IR/InstrTypes.h
===================================================================
--- llvm/include/llvm/IR/InstrTypes.h
+++ llvm/include/llvm/IR/InstrTypes.h
@@ -1847,6 +1847,24 @@
   /// Return true if the call should not be inlined.
   bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
   void setIsNoInline() { addFnAttr(Attribute::NoInline); }
+
+  /// Determine if the call does not read thread ID.
+  bool doesNotReadThreadID() const {
+    return hasFnAttr(Attribute::NoReadThreadID);
+  }
+  void setDoesNotReadThreadID() { addFnAttr(Attribute::NoReadThreadID); }
+
+  /// Return true if the call does not read thread ID or the call doesn't live
+  /// in a presplit coroutine function.
+  ///
+  /// This implies that the call never reads different thread ID in the parent
+  /// function. So that the optimizer could merge such calls if the calls does
+  /// not access or only reads memory.
+  bool canReadDifferentThreadIDIfMoved() const {
+    return doesNotReadThreadID() || !getFunction() ||
+           !getFunction()->isPresplitCoroutine();
+  }
+
   /// Determine if the call does not access memory.
   bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); }
   void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); }
Index: llvm/include/llvm/IR/Attributes.td
===================================================================
--- llvm/include/llvm/IR/Attributes.td
+++ llvm/include/llvm/IR/Attributes.td
@@ -214,6 +214,9 @@
 /// Similar to byval but without a copy.
 def Preallocated : TypeAttr<"preallocated", [FnAttr, ParamAttr]>;
 
+/// Function does not read thread ID.
+def NoReadThreadID : EnumAttr<"noread_thread_id", [FnAttr]>;
+
 /// Function does not access memory.
 def ReadNone : EnumAttr<"readnone", [FnAttr, ParamAttr]>;
 
Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h
===================================================================
--- llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -690,6 +690,7 @@
   ATTR_KIND_PRESPLIT_COROUTINE = 83,
   ATTR_KIND_FNRETTHUNK_EXTERN = 84,
   ATTR_KIND_SKIP_PROFILE = 85,
+  ATTR_KIND_NO_READ_THREAD_ID = 86,
 };
 
 enum ComdatSelectionKindCodes {
Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -1926,6 +1926,13 @@
     function that has a ``"probe-stack"`` attribute is inlined into a
     function that has no ``"probe-stack"`` attribute at all, the resulting
     function has the ``"probe-stack"`` attribute of the callee.
+``noread_thread_id``
+    This attribute indicates that the function does not rely on the
+    identity of the current thread in any way, such as by reading the
+    current thread ID or taking the address of a thread-local variable.
+
+    If the function does rely on the identity of the current thread,
+    the behavior is undefined.
 ``readnone``
     On a function, this attribute indicates that the function computes its
     result (or decides to unwind an exception) based strictly on its arguments,

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D132352: Introduce noread_thread_id to address the thread identification problem in coroutines

Reply via email to