paulkirth created this revision.
paulkirth added reviewers: phosek, leonardchan, jakehehrlich, mcgrathr.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, mgorny.
Herald added projects: clang, LLVM.
paulkirth added a parent revision: D65300: [clang] [CodeGen] clang-misexpect 
prototype for compiler warnings.

This patch contains a prototype of the basic functionality of clang-misexpect 
in the PGO pipeline. clang-misexpect is a proposed clang-tool that can report 
potentially incorrect usage of __builtin_expect() by comparing the developer's 
annotation against a collected PGO profile. A more detailed proposal and 
discussion appears on the CFE-dev mailing list 
(http://lists.llvm.org/pipermail/cfe-dev/2019-July/062971.html) and a prototype 
of the frontend changes appear here on 
phabricator(https://reviews.llvm.org/D65300)

We extend the work in https://reviews.llvm.org/D65300 by adding support for IR 
and sampling based profiles to the backend. We also supply a standalone 
clang-misexpect tool built on top of libTooling. The standalone tool uses a 
compile commands database and profiling data to issue the warnings enabled by 
the new checks in Clang and LLVM.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D66324

Files:
  clang-tools-extra/CMakeLists.txt
  clang-tools-extra/clang-misexpect/CMakeLists.txt
  clang-tools-extra/clang-misexpect/ClangMisExpect.cpp
  clang-tools-extra/clang-misexpect/ClangMisExpect.h
  clang-tools-extra/clang-misexpect/tool/CMakeLists.txt
  clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/Basic/DiagnosticFrontendKinds.td
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGStmt.cpp
  clang/lib/CodeGen/CMakeLists.txt
  clang/lib/CodeGen/CodeGenAction.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/CodeGen/MisExpect.cpp
  clang/lib/CodeGen/MisExpect.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/Profile/Inputs/misexpect-branch-nonconst-expect-arg.proftext
  clang/test/Profile/Inputs/misexpect-branch.proftext
  clang/test/Profile/Inputs/misexpect-switch-default-only.proftext
  clang/test/Profile/Inputs/misexpect-switch.proftext
  clang/test/Profile/misexpect-branch-cold.c
  clang/test/Profile/misexpect-branch-nonconst-expected-val.c
  clang/test/Profile/misexpect-branch.c
  clang/test/Profile/misexpect-no-warning-without-flag.c
  clang/test/Profile/misexpect-switch-default.c
  clang/test/Profile/misexpect-switch-nonconst.c
  clang/test/Profile/misexpect-switch-only-default-case.c
  clang/test/Profile/misexpect-switch.c
  llvm/include/llvm/IR/DiagnosticInfo.h
  llvm/include/llvm/IR/FixedMetadataKinds.def
  llvm/include/llvm/IR/MDBuilder.h
  llvm/include/llvm/Transforms/Utils/MisExpect.h
  llvm/lib/IR/DiagnosticInfo.cpp
  llvm/lib/IR/MDBuilder.cpp
  llvm/lib/Transforms/IPO/SampleProfile.cpp
  llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
  llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
  llvm/lib/Transforms/Utils/CMakeLists.txt
  llvm/lib/Transforms/Utils/MisExpect.cpp

Index: llvm/lib/Transforms/Utils/MisExpect.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -0,0 +1,105 @@
+//===--- MisExpect.cpp - Check Use of __builtin_expect() with PGO data ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of
+// __builtin_expect(). This utility extracts the threshold values from metadata
+// associated with the instrumented Branch or Switch. The threshold values are
+// then used to determin if a warning would be emmited.
+//
+// MisExpect metadata is generated when llvm.expect intrinsics are lowered see
+// LowerExpectIntrinsic.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MisExpect.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include <bits/stdint-uintn.h>
+#include <numeric>
+
+#define DEBUG_TYPE "misexpect"
+
+namespace {
+
+llvm::Instruction *getOprndOrInst(llvm::Instruction *I) {
+  llvm::Instruction *Ret = nullptr;
+  if (auto B = llvm::dyn_cast<llvm::BranchInst>(I)) {
+    Ret = llvm::dyn_cast<llvm::Instruction>(B->getCondition());
+  } else if (auto S = llvm::dyn_cast<llvm::SwitchInst>(I)) {
+    Ret = llvm::dyn_cast<llvm::Instruction>(S->getCondition());
+  }
+  return Ret ? Ret : I;
+}
+
+void emitMisexpectWarning(llvm::Instruction *I, llvm::LLVMContext &Ctx,
+                          double PercentageCorrect) {
+  auto PerString = llvm::formatv("{0:P}", PercentageCorrect);
+  llvm::Twine Msg(PerString);
+  llvm::Instruction *Cond = getOprndOrInst(I);
+  Ctx.diagnose(llvm::DiagnosticInfoMisExpect(Cond, Msg));
+}
+
+} // namespace
+
+namespace llvm {
+namespace misexpect {
+
+void verifyMisExpect(Instruction *I, const SmallVector<uint32_t, 4> &Weights,
+                     LLVMContext &Ctx) {
+  if (auto *MisExpectData = I->getMetadata(LLVMContext::MD_misexpect)) {
+    auto *MisExpectDataName = dyn_cast<MDString>(MisExpectData->getOperand(0));
+    if (MisExpectDataName &&
+        MisExpectDataName->getString().equals("misexpect")) {
+      LLVM_DEBUG(llvm::dbgs() << "------------------\n");
+      LLVM_DEBUG(llvm::dbgs()
+                 << "Function: " << I->getFunction()->getName() << "\n");
+      LLVM_DEBUG(llvm::dbgs() << "Instruction: " << *I << ":\n");
+      LLVM_DEBUG(for (int Idx = 0, Size = Weights.size(); Idx < Size; ++Idx) {
+        llvm::dbgs() << "Weights[" << Idx << "] = " << Weights[Idx] << "\n";
+      });
+      LLVM_DEBUG(llvm::dbgs() << "------------------\n");
+
+      // extract values from misexpect metadata
+      auto *C = mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(1));
+      auto *L = mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(2));
+      auto *U = mdconst::dyn_extract<ConstantInt>(MisExpectData->getOperand(3));
+
+      uint64_t Index = C->getValue().getZExtValue();
+      uint64_t LikelyBranchWeight = L->getValue().getZExtValue();
+      uint64_t UnlikelyBranchWeight = U->getValue().getZExtValue();
+      uint64_t ProfileCount = Weights[Index];
+      uint64_t CaseTotal =
+          std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
+                          [](uint64_t W1, uint64_t W2) { return W1 + W2; });
+      int NumUnlikelyTargets = Weights.size() - 2;
+
+      const uint64_t TotalBranchWeight =
+          LikelyBranchWeight + (UnlikelyBranchWeight * NumUnlikelyTargets);
+
+      double Percentage = ((double)ProfileCount / (double)CaseTotal);
+
+      const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight,
+                                                    TotalBranchWeight);
+      auto ScaledThreshold = LikelyThreshold.scale(CaseTotal);
+
+      if (ProfileCount < ScaledThreshold)
+        emitMisexpectWarning(I, Ctx, Percentage);
+    }
+  }
+}
+
+} // namespace misexpect
+} // namespace llvm
+
+#undef DEBUG_TYPE
Index: llvm/lib/Transforms/Utils/CMakeLists.txt
===================================================================
--- llvm/lib/Transforms/Utils/CMakeLists.txt
+++ llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -40,6 +40,7 @@
   LowerSwitch.cpp
   Mem2Reg.cpp
   MetaRenamer.cpp
+  MisExpect.cpp
   ModuleUtils.cpp
   NameAnonGlobals.cpp
   PredicateInfo.cpp
Index: llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -71,13 +71,15 @@
   unsigned n = SI.getNumCases(); // +1 for default case.
   SmallVector<uint32_t, 16> Weights(n + 1, UnlikelyBranchWeight);
 
-  if (Case == *SI.case_default())
-    Weights[0] = LikelyBranchWeight;
-  else
-    Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight;
+  uint64_t Index = (Case == *SI.case_default()) ? 0 : Case.getCaseIndex() + 1;
+  Weights[Index] = LikelyBranchWeight;
 
   SI.setMetadata(LLVMContext::MD_prof,
                  MDBuilder(CI->getContext()).createBranchWeights(Weights));
+  SI.setMetadata(
+      LLVMContext::MD_misexpect,
+      MDBuilder(CI->getContext())
+          .createMisExpect(Index, LikelyBranchWeight, UnlikelyBranchWeight));
 
   SI.setCondition(ArgValue);
   return true;
@@ -280,14 +282,19 @@
 
   MDBuilder MDB(CI->getContext());
   MDNode *Node;
+  MDNode *ExpNode;
 
   if ((ExpectedValue->getZExtValue() == ValueComparedTo) ==
-      (Predicate == CmpInst::ICMP_EQ))
+      (Predicate == CmpInst::ICMP_EQ)) {
     Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight);
-  else
+    ExpNode = MDB.createMisExpect(0, LikelyBranchWeight, UnlikelyBranchWeight);
+  } else {
     Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
+    ExpNode = MDB.createMisExpect(1, LikelyBranchWeight, UnlikelyBranchWeight);
+  }
 
   BSI.setMetadata(LLVMContext::MD_prof, Node);
+  BSI.setMetadata(LLVMContext::MD_misexpect, ExpNode);
 
   if (CmpI)
     CmpI->setOperand(0, ArgValue);
Index: llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -108,6 +108,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -1776,6 +1777,9 @@
                                            : Weights) {
     dbgs() << W << " ";
   } dbgs() << "\n";);
+
+  misexpect::verifyMisExpect(TI, Weights, TI->getContext());
+
   TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
   if (EmitBranchProbability) {
     std::string BrCondStr = getBranchCondString(TI);
Index: llvm/lib/Transforms/IPO/SampleProfile.cpp
===================================================================
--- llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -72,6 +72,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/MisExpect.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -1447,6 +1448,8 @@
       }
     }
 
+    misexpect::verifyMisExpect(TI, Weights, TI->getContext());
+
     uint64_t TempWeight;
     // Only set weights if there is at least one non-zero weight.
     // In any other case, let the analyzer set weights.
Index: llvm/lib/IR/MDBuilder.cpp
===================================================================
--- llvm/lib/IR/MDBuilder.cpp
+++ llvm/lib/IR/MDBuilder.cpp
@@ -309,3 +309,15 @@
   };
   return MDNode::get(Context, Vals);
 }
+
+MDNode *MDBuilder::createMisExpect(uint64_t Index, uint64_t LikleyWeight,
+                                   uint64_t UnlikleyWeight) {
+  auto IntType = Type::getInt64Ty(Context);
+  Metadata *Vals[] = {
+      createString("misexpect"),
+      createConstant(ConstantInt::get(IntType, Index)),
+      createConstant(ConstantInt::get(IntType, LikleyWeight)),
+      createConstant(ConstantInt::get(IntType, UnlikleyWeight)),
+  };
+  return MDNode::get(Context, Vals);
+}
Index: llvm/lib/IR/DiagnosticInfo.cpp
===================================================================
--- llvm/lib/IR/DiagnosticInfo.cpp
+++ llvm/lib/IR/DiagnosticInfo.cpp
@@ -370,5 +370,16 @@
   return OS.str();
 }
 
+DiagnosticInfoMisExpect::DiagnosticInfoMisExpect(const Instruction *Inst,
+                                                 Twine &Msg)
+    : DiagnosticInfoWithLocationBase(DK_MisExpect, DS_Warning,
+                                     *Inst->getParent()->getParent(),
+                                     Inst->getDebugLoc()),
+      Msg(Msg) {}
+
+void DiagnosticInfoMisExpect::print(DiagnosticPrinter &DP) const {
+  DP << getLocationStr() << ": " << getMsg();
+}
+
 void OptimizationRemarkAnalysisFPCommute::anchor() {}
 void OptimizationRemarkAnalysisAliasing::anchor() {}
Index: llvm/include/llvm/Transforms/Utils/MisExpect.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Transforms/Utils/MisExpect.h
@@ -0,0 +1,35 @@
+//===--- MisExpect.cpp - Check Use of __builtin_expect() with PGO data ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of
+// __builtin_expect(). This utility extracts the threshold values from metadata
+// associated with the instrumented Branch or Switch. The threshold values are
+// then used to determin if a warning would be emmited.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+namespace misexpect {
+
+/// verifyMisExpect - compares PGO counters to the thresholds used for
+/// __builtin_expect and warns if the PGO counters are outside of the expected
+/// range.
+/// \param I the Instruction being checked
+/// \param Weights A vector of profile weights for each target block
+/// \param Ctx The current LLVM context
+void verifyMisExpect(llvm::Instruction *I,
+                     const llvm::SmallVector<uint32_t, 4> &Weights,
+                     llvm::LLVMContext &Ctx);
+
+} // namespace misexpect
+} // namespace llvm
Index: llvm/include/llvm/IR/MDBuilder.h
===================================================================
--- llvm/include/llvm/IR/MDBuilder.h
+++ llvm/include/llvm/IR/MDBuilder.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/DataTypes.h"
 #include <utility>
@@ -75,6 +76,10 @@
   /// Return metadata containing the section prefix for a function.
   MDNode *createFunctionSectionPrefix(StringRef Prefix);
 
+  /// return metadata containing expected value
+  MDNode *createMisExpect(uint64_t Index, uint64_t LikelyWeight,
+                          uint64_t UnlikelyWeight);
+
   //===------------------------------------------------------------------===//
   // Range metadata.
   //===------------------------------------------------------------------===//
Index: llvm/include/llvm/IR/FixedMetadataKinds.def
===================================================================
--- llvm/include/llvm/IR/FixedMetadataKinds.def
+++ llvm/include/llvm/IR/FixedMetadataKinds.def
@@ -39,3 +39,4 @@
 LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25)
 LLVM_FIXED_MD_KIND(MD_callback, "callback", 26)
 LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27)
+LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28)
Index: llvm/include/llvm/IR/DiagnosticInfo.h
===================================================================
--- llvm/include/llvm/IR/DiagnosticInfo.h
+++ llvm/include/llvm/IR/DiagnosticInfo.h
@@ -75,7 +75,8 @@
   DK_MIRParser,
   DK_PGOProfile,
   DK_Unsupported,
-  DK_FirstPluginKind
+  DK_FirstPluginKind,
+  DK_MisExpect
 };
 
 /// Get the next available kind ID for a plugin diagnostic.
@@ -1002,6 +1003,31 @@
   void print(DiagnosticPrinter &DP) const override;
 };
 
+/// Diagnostic information for MisExpect analysis.
+class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase {
+public:
+  DiagnosticInfoMisExpect(const Function &Fn, const Twine &Msg,
+                          const DiagnosticLocation &Loc = DiagnosticLocation(),
+                          DiagnosticSeverity Severity = DS_Error)
+      : DiagnosticInfoWithLocationBase(DK_MisExpect, Severity, Fn, Loc),
+        Msg(Msg) {}
+
+  DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg);
+
+  /// \see DiagnosticInfo::print.
+  void print(DiagnosticPrinter &DP) const override;
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == DK_MisExpect;
+  }
+
+  const Twine &getMsg() const { return Msg; }
+
+private:
+  /// Message to report.
+  const Twine &Msg;
+};
+
 } // end namespace llvm
 
 #endif // LLVM_IR_DIAGNOSTICINFO_H
Index: clang/test/Profile/misexpect-switch.c
===================================================================
--- /dev/null
+++ clang/test/Profile/misexpect-switch.c
@@ -0,0 +1,41 @@
+// Test that misexpect detects mis-annotated switch statements
+
+// RUN: llvm-profdata merge %S/Inputs/misexpect-switch.proftext -o %t.profdata
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -fmisexpect
+
+int sum(int *buff, int size);
+int random_sample(int *buff, int size);
+int rand();
+void init_arry();
+
+const int inner_loop = 1000;
+const int outer_loop = 20;
+const int arry_size = 25;
+
+int arry[arry_size] = {0};
+
+int main() {
+  init_arry();
+  int val = 0;
+
+  int j, k;
+  for (j = 0; j < outer_loop; ++j) {
+    for (k = 0; k < inner_loop; ++k) {
+      unsigned condition = rand() % 10000;
+      switch (__builtin_expect(condition, 0)) { // expected-warning-re {{Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% of profiled executions.}}
+      case 0:
+        val += sum(arry, arry_size);
+        break;
+      case 1:
+      case 2:
+      case 3:
+        break;
+      default:
+        val += random_sample(arry, arry_size);
+        break;
+      } // end switch
+    }   // end inner_loop
+  }     // end outer_loop
+
+  return 0;
+}
Index: clang/test/Profile/misexpect-switch-only-default-case.c
===================================================================
--- /dev/null
+++ clang/test/Profile/misexpect-switch-only-default-case.c
@@ -0,0 +1,35 @@
+// Test that misexpect emits no warning when there is only one switch case
+
+// RUN: llvm-profdata merge %S/Inputs/misexpect-switch-default-only.proftext -o %t.profdata
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -fmisexpect
+
+// expected-no-diagnostics
+int sum(int *buff, int size);
+int random_sample(int *buff, int size);
+int rand();
+void init_arry();
+
+const int inner_loop = 1000;
+const int outer_loop = 20;
+const int arry_size = 25;
+
+int arry[arry_size] = {0};
+
+int main() {
+  init_arry();
+  int val = 0;
+
+  int j, k;
+  for (j = 0; j < outer_loop; ++j) {
+    for (k = 0; k < inner_loop; ++k) {
+      unsigned condition = rand() % 10000;
+      switch (__builtin_expect(condition, 0)) {
+      default:
+        val += random_sample(arry, arry_size);
+        break;
+      }; // end switch
+    }    // end inner_loop
+  }      // end outer_loop
+
+  return 0;
+}
Index: clang/test/Profile/misexpect-switch-nonconst.c
===================================================================
--- /dev/null
+++ clang/test/Profile/misexpect-switch-nonconst.c
@@ -0,0 +1,43 @@
+// Test that misexpect emits no warning when switch condition is non-const
+
+// RUN: llvm-profdata merge %S/Inputs/misexpect-switch.proftext -o %t.profdata
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -fmisexpect
+
+// expected-no-diagnostics
+int sum(int *buff, int size);
+int random_sample(int *buff, int size);
+int rand();
+void init_arry();
+
+const int inner_loop = 1000;
+const int outer_loop = 20;
+const int arry_size = 25;
+
+int arry[arry_size] = {0};
+
+int main() {
+  init_arry();
+  int val = 0;
+
+  int j, k;
+  for (j = 0; j < outer_loop; ++j) {
+    for (k = 0; k < inner_loop; ++k) {
+      unsigned condition = rand() % 10000;
+      switch (__builtin_expect(condition, rand())) {
+      case 0:
+        val += sum(arry, arry_size);
+        break;
+      case 1:
+      case 2:
+      case 3:
+      case 4:
+        val += random_sample(arry, arry_size);
+        break;
+      default:
+        __builtin_unreachable();
+      } // end switch
+    }   // end inner_loop
+  }     // end outer_loop
+
+  return 0;
+}
Index: clang/test/Profile/misexpect-switch-default.c
===================================================================
--- /dev/null
+++ clang/test/Profile/misexpect-switch-default.c
@@ -0,0 +1,42 @@
+// Test that misexpect detects mis-annotated switch statements for default case
+
+// RUN: llvm-profdata merge %S/Inputs/misexpect-switch.proftext -o %t.profdata
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -fmisexpect
+
+int sum(int *buff, int size);
+int random_sample(int *buff, int size);
+int rand();
+void init_arry();
+
+const int inner_loop = 1000;
+const int outer_loop = 20;
+const int arry_size = 25;
+
+int arry[arry_size] = {0};
+
+int main() {
+  init_arry();
+  int val = 0;
+
+  int j, k;
+  for (j = 0; j < outer_loop; ++j) {
+    for (k = 0; k < inner_loop; ++k) {
+      unsigned condition = rand() % 5;
+      switch (__builtin_expect(condition, 6)) { // expected-warning-re {{Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% of profiled executions.}}
+      case 0:
+        val += sum(arry, arry_size);
+        break;
+      case 1:
+      case 2:
+      case 3:
+      case 4:
+        val += random_sample(arry, arry_size);
+        break;
+      default:
+        __builtin_unreachable();
+      } // end switch
+    }   // end inner_loop
+  }     // end outer_loop
+
+  return 0;
+}
Index: clang/test/Profile/misexpect-no-warning-without-flag.c
===================================================================
--- /dev/null
+++ clang/test/Profile/misexpect-no-warning-without-flag.c
@@ -0,0 +1,27 @@
+// Test that misexpect emits no warning without -fmisexpect flag
+
+// RUN: llvm-profdata merge %S/Inputs/misexpect-branch.proftext -o %t.profdata
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify
+
+// expected-no-diagnostics
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+int foo(int);
+int baz(int);
+int buzz();
+
+const int inner_loop = 100;
+const int outer_loop = 2000;
+
+int bar() {
+  int rando = buzz();
+  int x = 0;
+  if (likely(rando % (outer_loop * inner_loop) == 0)) {
+    x = baz(rando);
+  } else {
+    x = foo(50);
+  }
+  return x;
+}
+
Index: clang/test/Profile/misexpect-branch.c
===================================================================
--- /dev/null
+++ clang/test/Profile/misexpect-branch.c
@@ -0,0 +1,26 @@
+// Test that misexpect detects mis-annotated branches
+
+// RUN: llvm-profdata merge %S/Inputs/misexpect-branch.proftext -o %t.profdata
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -fmisexpect
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+int foo(int);
+int baz(int);
+int buzz();
+
+const int inner_loop = 100;
+const int outer_loop = 2000;
+
+int bar() {
+  int rando = buzz();
+  int x = 0;
+  if (likely(rando % (outer_loop * inner_loop) == 0)) { // expected-warning-re {{Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% of profiled executions.}}
+    x = baz(rando);
+  } else {
+    x = foo(50);
+  }
+  return x;
+}
+
Index: clang/test/Profile/misexpect-branch-nonconst-expected-val.c
===================================================================
--- /dev/null
+++ clang/test/Profile/misexpect-branch-nonconst-expected-val.c
@@ -0,0 +1,26 @@
+// Test that misexpect emits no warning when condition is not a compile-time constant
+
+// RUN: llvm-profdata merge %S/Inputs/misexpect-branch-nonconst-expect-arg.proftext -o %t.profdata
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -fmisexpect
+
+// expected-no-diagnostics
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+int foo(int);
+int baz(int);
+int buzz();
+
+const int inner_loop = 100;
+const int outer_loop = 2000;
+
+int bar() {
+  int rando = buzz();
+  int x = 0;
+  if (__builtin_expect(rando % (outer_loop * inner_loop) == 0, buzz())) {
+    x = baz(rando);
+  } else {
+    x = foo(50);
+  }
+  return x;
+}
Index: clang/test/Profile/misexpect-branch-cold.c
===================================================================
--- /dev/null
+++ clang/test/Profile/misexpect-branch-cold.c
@@ -0,0 +1,27 @@
+// Test that misexpect emits no warning when prediction is correct
+
+// RUN: llvm-profdata merge %S/Inputs/misexpect-branch.proftext -o %t.profdata
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -fmisexpect
+
+// expected-no-diagnostics
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+int foo(int);
+int baz(int);
+int buzz();
+
+const int inner_loop = 100;
+const int outer_loop = 2000;
+
+int bar() {
+  int rando = buzz();
+  int x = 0;
+  if (unlikely(rando % (outer_loop * inner_loop) == 0)) {
+    x = baz(rando);
+  } else {
+    x = foo(50);
+  }
+  return x;
+}
+
Index: clang/test/Profile/Inputs/misexpect-switch.proftext
===================================================================
--- /dev/null
+++ clang/test/Profile/Inputs/misexpect-switch.proftext
@@ -0,0 +1,16 @@
+main
+# Func Hash:
+1965403898329309329
+# Num Counters:
+9
+# Counter Values:
+1
+20
+20000
+20000
+12
+26
+0
+0
+19962
+
Index: clang/test/Profile/Inputs/misexpect-switch-default-only.proftext
===================================================================
--- /dev/null
+++ clang/test/Profile/Inputs/misexpect-switch-default-only.proftext
@@ -0,0 +1,12 @@
+main
+# Func Hash:
+79676873694057560
+# Num Counters:
+5
+# Counter Values:
+1
+20
+20000
+20000
+20000
+
Index: clang/test/Profile/Inputs/misexpect-branch.proftext
===================================================================
--- /dev/null
+++ clang/test/Profile/Inputs/misexpect-branch.proftext
@@ -0,0 +1,9 @@
+bar
+# Func Hash:
+45795613684824
+# Num Counters:
+2
+# Counter Values:
+200000
+0
+
Index: clang/test/Profile/Inputs/misexpect-branch-nonconst-expect-arg.proftext
===================================================================
--- /dev/null
+++ clang/test/Profile/Inputs/misexpect-branch-nonconst-expect-arg.proftext
@@ -0,0 +1,9 @@
+bar
+# Func Hash:
+11262309464
+# Num Counters:
+2
+# Counter Values:
+200000
+2
+
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -809,6 +809,7 @@
       << Args.getLastArg(OPT_fprofile_remapping_file_EQ)->getAsString(Args)
       << "-fexperimental-new-pass-manager";
   }
+  Opts.MisExpect = Args.hasFlag(OPT_fmisexpect, OPT_fno_misexpect, false);
 
   Opts.CoverageMapping =
       Args.hasFlag(OPT_fcoverage_mapping, OPT_fno_coverage_mapping, false);
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4051,6 +4051,9 @@
   Args.AddLastArg(CmdArgs, options::OPT_ffine_grained_bitfield_accesses,
                   options::OPT_fno_fine_grained_bitfield_accesses);
 
+  if (Args.hasFlag(options::OPT_fmisexpect, options::OPT_fno_misexpect, false))
+    CmdArgs.push_back("-fmisexpect");
+
   // Handle segmented stacks.
   if (Args.hasArg(options::OPT_fsplit_stack))
     CmdArgs.push_back("-split-stacks");
Index: clang/lib/CodeGen/MisExpect.h
===================================================================
--- /dev/null
+++ clang/lib/CodeGen/MisExpect.h
@@ -0,0 +1,55 @@
+//===--- MisExpect.h - Check Use of __builtin_expect() with PGO data ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of
+// __builtin_expect(). It uses PGO profiles for validation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_MISEXPECT_H
+#define LLVM_CLANG_LIB_CODEGEN_MISEXPECT_H
+
+#include "CodeGenModule.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/Optional.h"
+
+namespace clang {
+namespace CodeGen {
+namespace MisExpect {
+
+/// CheckMisExpectBranch - check if a branch is annotated with
+/// __builtin_expect and when using profiling data, verify that the profile
+/// agrees with the use of the annotation
+/// \param Cond the conditional expression being checked
+/// \param TrueCount the profile counter for this block
+/// \param CurrProfCount the current total profile count
+/// \param CGM a reference to the current CodeGenModule
+void CheckMisExpectBranch(const Expr *Cond, const llvm::BranchInst *BI,
+                          uint64_t TrueCount, uint64_t CurrProfCount,
+                          CodeGenModule &CGM);
+
+/// CheckMisExpect - check if a branch is annotated with __builtin_expect and
+/// when using profiling data, verify that the profile agrees with the use of
+/// the annotation
+/// \param Call the call expression to __builtin_expect()
+/// \param SwitchWeights pointer to a vector of profile counts for each case arm
+/// \param CaseMap a table mapping the constant value of a case target to its
+/// index in the SwitchWeights vector
+/// \param CGM a reference to the current CodeGenModule
+void CheckMisExpectSwitch(const CallExpr *Call,
+                          llvm::SwitchInst *SwitchInstruction,
+                          llvm::SmallVector<uint64_t, 16> *SwitchWeights,
+                          CodeGenModule &CGM);
+
+} // namespace MisExpect
+} // namespace CodeGen
+} // namespace clang
+
+#endif // LLVM_CLANG_LIB_CODEGEN_MISEXPECT_H
Index: clang/lib/CodeGen/MisExpect.cpp
===================================================================
--- /dev/null
+++ clang/lib/CodeGen/MisExpect.cpp
@@ -0,0 +1,234 @@
+//===--- MisExpect.cpp - Check Use of __builtin_expect() with PGO data ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This contains code to emit warnings for potentially incorrect usage of
+// __builtin_expect(). It uses PGO profiles for validation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MisExpect.h"
+#include "CodeGenModule.h"
+#include "clang/Basic/Builtins.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticFrontend.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+
+#include <algorithm>
+#include <bits/stdint-intn.h>
+#include <bits/stdint-uintn.h>
+#include <numeric>
+
+#define DEBUG_TYPE "misexpect"
+namespace {
+
+using namespace clang;
+using namespace clang::CodeGen;
+
+struct SwitchDebugInfo {
+  SmallVector<uint64_t, 16> *SwitchWeights;
+  uint64_t ExpectedVal;
+  uint64_t Index;
+  uint64_t TakenCount;
+  uint64_t ScaledThreshold;
+  double Percentage;
+};
+
+// Emit Warning notifying user that the current PGO counter is a mismatch with
+// the use of __builtin_expect()
+// \param PercentageCorrect the percentage the expected target of
+// __builtin_expect() was taken during profiling as an integer
+void EmitMisExpectWarning(const clang::CallExpr *Call, CodeGenModule &CGM,
+                          double PercentageCorrect) {
+  SourceLocation ExprLoc = Call->getBeginLoc();
+  auto PercentStr = llvm::formatv("{0:P}", PercentageCorrect).str();
+  CGM.getDiags().Report(ExprLoc, diag::warn_profile_data_misexpect)
+      << PercentStr;
+}
+
+// Prints some debug diagnostics useful when checking SwitchStmts.
+// Allows for simple comparison of the Case Value mappings to their index in the
+// SwitchWeights data structure in CGStmts.cpp
+void DebugPrintMisExpectSwitchInfo(SwitchDebugInfo SDI) {
+  assert(SDI.SwitchWeights && "Null pointer in DebugPrintMisExpectSwitchInfo");
+  LLVM_DEBUG(auto &OS = llvm::dbgs();
+             uint64_t Max = *std::max_element(SDI.SwitchWeights->begin(),
+                                              SDI.SwitchWeights->end());
+             auto size = SDI.SwitchWeights->size();
+             OS << "------------------\n";
+             for (size_t i = 0; i < size; ++i) {
+               OS << "Index: " << i << "\tProfile Value:\t"
+                  << (*SDI.SwitchWeights)[i] << "\n";
+             }
+
+             uint64_t CaseTotal = std::accumulate(SDI.SwitchWeights->begin(),
+                                                  SDI.SwitchWeights->end(), 0);
+             OS << "Profile Count:\t" << CaseTotal << "\n";
+             OS << "Expected Value:\t" << SDI.ExpectedVal << "\n";
+             OS << "Expected Index:\t" << SDI.Index << "\n";
+             OS << "Taken Count:\t" << SDI.TakenCount << "\n";
+             OS << "Max Count:\t" << Max << "\n";
+             OS << "Threshold:\t" << SDI.ScaledThreshold << "\n";
+             OS << llvm::formatv("Ratio: {0}/{1} = {2:P}\n", SDI.TakenCount,
+                                 CaseTotal, SDI.Percentage);
+             OS << "------------------\n";);
+}
+
+// getExpectedValue - Returns the value that __builtin_expect() is expecting.
+// \param BI the branch instruction being checked
+// \return None if second parameter to __builtin_expect() cannot be evaluated at
+// compile-time, else returns an empty Optional.
+template <class BrSwtchInst>
+llvm::Optional<llvm::ConstantInt *> getExpectedValue(const BrSwtchInst *BI) {
+  // TODO: consider moving funciton into a support lib to improve code reuse
+  if (!BI)
+    return llvm::None;
+
+  const llvm::CallInst *CI;
+  if (const llvm::ICmpInst *CmpI =
+          dyn_cast<llvm::ICmpInst>(BI->getCondition())) {
+    auto CmpConstOperand = dyn_cast<llvm::ConstantInt>(CmpI->getOperand(1));
+    if (!CmpConstOperand)
+      return llvm::None;
+    CI = dyn_cast<llvm::CallInst>(CmpI->getOperand(0));
+  } else {
+    CI = dyn_cast<llvm::CallInst>(BI->getCondition());
+  }
+
+  if (!CI)
+    return llvm::None;
+
+  llvm::Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != llvm::Intrinsic::expect)
+    return llvm::None;
+
+  llvm::ConstantInt *ExpectedValue =
+      dyn_cast<llvm::ConstantInt>(CI->getArgOperand(1));
+
+  if (!ExpectedValue)
+    return llvm::None;
+
+  return ExpectedValue;
+}
+
+} // namespace
+
+namespace clang {
+namespace CodeGen {
+namespace MisExpect {
+
+// TODO: see LowerExpectIntrinsic.cpp for notes on sharing these constants
+const uint32_t LikelyBranchWeight = 2000;
+const uint32_t UnlikelyBranchWeight = 1;
+
+void CheckMisExpectBranch(const Expr *Cond, const llvm::BranchInst *BI,
+                          uint64_t TrueCount, uint64_t CurrProfCount,
+                          CodeGenModule &CGM) {
+  auto CGOpt = CGM.getCodeGenOpts();
+  if (!CGOpt.MisExpect ||
+      (CGOpt.getProfileUse() == CodeGenOptions::ProfileNone))
+    return;
+
+  auto *Call = dyn_cast<CallExpr>(Cond->IgnoreImpCasts());
+  auto Exp = getExpectedValue(BI);
+
+  if (!Call || !Exp.hasValue())
+    return;
+
+  const long ExpectedVal = Exp.getValue()->getZExtValue();
+  const bool ExpectedTrueBranch = (ExpectedVal != 0);
+  bool IncorrectPerfCounters = false;
+  uint64_t Scaled;
+  double Percentage;
+  const uint64_t TotalBranchWeight = LikelyBranchWeight + UnlikelyBranchWeight;
+
+  // LowerExpectIntrinsics.cpp:49 LikelyBranchWeight = 2000
+  // LowerExpectIntrinsics.cpp:52  UnlikelyBranchWeight = 1
+  if (ExpectedTrueBranch) {
+    const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight,
+                                                  TotalBranchWeight);
+    Scaled = LikelyThreshold.scale(CurrProfCount);
+    Percentage = (TrueCount / (double)CurrProfCount);
+    if (TrueCount < Scaled)
+      IncorrectPerfCounters = true;
+  } else {
+    const llvm::BranchProbability UnlikelyThreshold(UnlikelyBranchWeight,
+                                                    LikelyBranchWeight);
+    Scaled = UnlikelyThreshold.scale(CurrProfCount);
+    Percentage = ((CurrProfCount - TrueCount) / (double)CurrProfCount);
+    if (TrueCount > Scaled)
+      IncorrectPerfCounters = true;
+  }
+
+  LLVM_DEBUG(llvm::dbgs() << "------------------\n");
+  LLVM_DEBUG(llvm::dbgs() << "Function:" << BI->getFunction()->getName()
+                          << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "Instruction:" << *BI << "\n");
+
+  LLVM_DEBUG(llvm::dbgs() << "Expected Value:\t" << ExpectedVal << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "Current Count:\t" << CurrProfCount << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "True Count:\t" << TrueCount << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "Scaled Count:\t" << Scaled << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "------------------\n");
+
+  if (IncorrectPerfCounters)
+    EmitMisExpectWarning(Call, CGM, Percentage);
+}
+
+void CheckMisExpectSwitch(const CallExpr *Call, llvm::SwitchInst *SI,
+                          SmallVector<uint64_t, 16> *SwitchWeights,
+                          CodeGenModule &CGM) {
+  if (!SwitchWeights)
+    return;
+
+  Optional<llvm::ConstantInt *> ExpectedValOpt = getExpectedValue(SI);
+
+  if (!ExpectedValOpt.hasValue())
+    return;
+
+  llvm::ConstantInt *ExpectedValue = ExpectedValOpt.getValue();
+
+  llvm::SwitchInst::CaseHandle Case = *SI->findCaseValue(ExpectedValue);
+  unsigned n = SI->getNumCases(); // +1 for default case.
+
+  // The default case is always mapped to index 0 of the SwitchWeights vector.
+  // This relies on internal details of another component, so ideally we can
+  // expose an interface that we can use instead of relying on implementaion
+  // details in another module.
+  // TODO: create interface to switchweights default index
+  uint64_t Index = (Case == *SI->case_default()) ? 0 : Case.getCaseIndex() + 1;
+  uint64_t TakenCount = (*SwitchWeights)[Index];
+
+  uint64_t CaseTotal =
+      std::accumulate(SwitchWeights->begin(), SwitchWeights->end(), (uint64_t)0,
+                      [](uint64_t w1, uint64_t w2) { return w1 + w2; });
+  const uint64_t TotalBranchWeight =
+      LikelyBranchWeight + (UnlikelyBranchWeight * n);
+  double Percentage = ((double)TakenCount / (double)CaseTotal);
+  const llvm::BranchProbability LikelyThreshold(LikelyBranchWeight,
+                                                TotalBranchWeight);
+  auto ScaledThreshold = LikelyThreshold.scale(CaseTotal);
+
+  LLVM_DEBUG(DebugPrintMisExpectSwitchInfo(
+      {SwitchWeights, ExpectedValue->getZExtValue(), Index, TakenCount,
+       ScaledThreshold, Percentage}));
+
+  if (TakenCount < ScaledThreshold)
+    EmitMisExpectWarning(Call, CGM, Percentage);
+}
+#undef DEBUG_TYPE
+
+} // namespace MisExpect
+} // namespace CodeGen
+} // namespace clang
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -20,6 +20,7 @@
 #include "CodeGenModule.h"
 #include "CodeGenPGO.h"
 #include "TargetInfo.h"
+#include "MisExpect.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTLambda.h"
 #include "clang/AST/Decl.h"
@@ -33,6 +34,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Operator.h"
@@ -1360,8 +1362,6 @@
   return true;
 }
 
-
-
 /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an if
 /// statement) to the specified blocks.  Based on the condition, this might try
 /// to simplify the codegen of the conditional based on the branch.
@@ -1558,7 +1558,9 @@
     ApplyDebugLocation DL(*this, Cond);
     CondV = EvaluateExprAsBool(Cond);
   }
-  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights, Unpredictable);
+  llvm::BranchInst *BI = Builder.CreateCondBr(CondV, TrueBlock, FalseBlock,
+                                              Weights, Unpredictable);
+  MisExpect::CheckMisExpectBranch(Cond, BI, TrueCount, CurrentCount, CGM);
 }
 
 /// ErrorUnsupported - Print out an error that codegen doesn't support the
Index: clang/lib/CodeGen/CodeGenAction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenAction.cpp
+++ clang/lib/CodeGen/CodeGenAction.cpp
@@ -14,6 +14,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclGroup.h"
+#include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LangStandard.h"
 #include "clang/Basic/SourceManager.h"
@@ -363,6 +364,7 @@
     bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
     /// Specialized handler for unsupported backend feature diagnostic.
     void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D);
+    void MisExpectDiagHandler(const llvm::DiagnosticInfoMisExpect &D);
     /// Specialized handlers for optimization remarks.
     /// Note that these handlers only accept remarks and they always handle
     /// them.
@@ -615,6 +617,25 @@
         << Filename << Line << Column;
 }
 
+void BackendConsumer::MisExpectDiagHandler(
+    const llvm::DiagnosticInfoMisExpect &D) {
+  StringRef Filename;
+  unsigned Line, Column;
+  bool BadDebugInfo = false;
+  FullSourceLoc Loc =
+      getBestLocationFromDebugLoc(D, BadDebugInfo, Filename, Line, Column);
+
+  Diags.Report(Loc, diag::warn_profile_data_misexpect) << D.getMsg().str();
+
+  if (BadDebugInfo)
+    // If we were not able to translate the file:line:col information
+    // back to a SourceLocation, at least emit a note stating that
+    // we could not translate this location. This can happen in the
+    // case of #line directives.
+    Diags.Report(Loc, diag::note_fe_backend_invalid_loc)
+        << Filename << Line << Column;
+}
+
 void BackendConsumer::EmitOptimizationMessage(
     const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) {
   // We only support warnings and remarks.
@@ -785,6 +806,9 @@
   case llvm::DK_Unsupported:
     UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI));
     return;
+  case llvm::DK_MisExpect:
+    MisExpectDiagHandler(cast<DiagnosticInfoMisExpect>(DI));
+    return;
   default:
     // Plugin IDs are not bound to any value as they are set dynamically.
     ComputeDiagRemarkID(Severity, backend_plugin, DiagID);
Index: clang/lib/CodeGen/CMakeLists.txt
===================================================================
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -87,6 +87,7 @@
   ItaniumCXXABI.cpp
   MacroPPCallbacks.cpp
   MicrosoftCXXABI.cpp
+  MisExpect.cpp
   ModuleBuilder.cpp
   ObjectFilePCHContainerOperations.cpp
   PatternInit.cpp
Index: clang/lib/CodeGen/CGStmt.cpp
===================================================================
--- clang/lib/CodeGen/CGStmt.cpp
+++ clang/lib/CodeGen/CGStmt.cpp
@@ -14,6 +14,7 @@
 #include "CGDebugInfo.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
+#include "MisExpect.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/PrettyStackTrace.h"
@@ -1698,10 +1699,15 @@
   auto *Call = dyn_cast<CallExpr>(S.getCond());
   if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
     auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
-    if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
-      llvm::MDBuilder MDHelper(getLLVMContext());
-      SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
-                              MDHelper.createUnpredictable());
+    if (FD) {
+      if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+        llvm::MDBuilder MDHelper(getLLVMContext());
+        SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
+                                MDHelper.createUnpredictable());
+      } else if (CGM.getCodeGenOpts().MisExpect &&
+                 FD->getBuiltinID() == Builtin::BI__builtin_expect) {
+        MisExpect::CheckMisExpectSwitch(Call, SwitchInsn, SwitchWeights, CGM);
+      }
     }
   }
 
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -716,6 +716,11 @@
     Group<f_Group>, Alias<fprofile_sample_accurate>;
 def fno_auto_profile_accurate : Flag<["-"], "fno-auto-profile-accurate">,
     Group<f_Group>, Alias<fno_profile_sample_accurate>;
+def fmisexpect : Flag<["-"], "fmisexpect">,
+    Group<f_Group>, Flags<[CC1Option]>,
+    HelpText<"Validate use of __builtin_expect with instrumentation data">;
+def fno_misexpect : Flag<["-"], "fno-misexpect">,
+    Group<f_Group>, Flags<[CC1Option]>;
 def fdebug_compilation_dir : Separate<["-"], "fdebug-compilation-dir">,
     Group<f_Group>, Flags<[CC1Option, CC1AsOption, CoreOption]>,
     HelpText<"The compilation directory to embed in the debug info.">;
Index: clang/include/clang/Basic/DiagnosticGroups.td
===================================================================
--- clang/include/clang/Basic/DiagnosticGroups.td
+++ clang/include/clang/Basic/DiagnosticGroups.td
@@ -1031,6 +1031,7 @@
 def ProfileInstrMissing : DiagGroup<"profile-instr-missing">;
 def ProfileInstrOutOfDate : DiagGroup<"profile-instr-out-of-date">;
 def ProfileInstrUnprofiled : DiagGroup<"profile-instr-unprofiled">;
+def MisExpect : DiagGroup<"misexpect">;
 
 // AddressSanitizer frontend instrumentation remarks.
 def SanitizeAddressRemarks : DiagGroup<"sanitize-address">;
Index: clang/include/clang/Basic/DiagnosticFrontendKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -275,6 +275,10 @@
 def warn_profile_data_unprofiled : Warning<
   "no profile data available for file \"%0\"">,
   InGroup<ProfileInstrUnprofiled>;
+def warn_profile_data_misexpect : Warning<
+      "Potential performance regression from use of __builtin_expect(): "
+      "Annotation was correct on %0 of profiled executions.">,
+  InGroup<MisExpect>;
 
 } // end of instrumentation issue category
 
Index: clang/include/clang/Basic/CodeGenOptions.def
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.def
+++ clang/include/clang/Basic/CodeGenOptions.def
@@ -169,6 +169,7 @@
                                    ///< enable code coverage analysis.
 CODEGENOPT(DumpCoverageMapping , 1, 0) ///< Dump the generated coverage mapping
                                        ///< regions.
+CODEGENOPT(MisExpect , 1, 0) ///< Validate __builtin_expect with PGO counters
 
   /// If -fpcc-struct-return or -freg-struct-return is specified.
 ENUM_CODEGENOPT(StructReturnConvention, StructReturnConventionKind, 2, SRCK_Default)
Index: clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp
@@ -0,0 +1,84 @@
+#include "../ClangMisExpect.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Tooling/AllTUsExecution.h"
+#include "clang/Tooling/ArgumentsAdjusters.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Execution.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Signals.h"
+#include <string>
+
+using namespace clang;
+using namespace clang::tooling;
+using namespace clang::misexpect;
+using Path = std::string;
+
+static llvm::cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
+static llvm::cl::OptionCategory
+    ClangMisExpectCategory("clang-misexpect options");
+
+static llvm::cl::opt<Path> ProfileDir(
+    "profile-dir",
+    llvm::cl::desc(
+        "Specify a path to the profile data to use during validation"),
+    llvm::cl::cat(ClangMisExpectCategory));
+
+static llvm::cl::opt<ProfileKind> ProfFormat(
+    "profile-format",
+    llvm::cl::desc(
+        "Specify the format of the profile data used during validation"),
+    llvm::cl::init(Clang),
+    llvm::cl::values(clEnumValN(Clang, "clang", "Clang Instrumentation"),
+                     clEnumValN(IR, "llvm", "IR Instrumentation"),
+                     clEnumValN(CSIR, "csllvm",
+                                "Context sensitive IR Instrumentation"),
+                     clEnumValN(Sample, "sample", "Sampling Instrumentation")),
+    llvm::cl::cat(ClangMisExpectCategory));
+
+int main(int argc, const char **argv) {
+  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
+
+  ExecutorName.setInitialValue("all-TUs");
+
+  auto Executor =
+      createExecutorFromCommandLineArgs(argc, argv, ClangMisExpectCategory);
+
+  if (!Executor) {
+    llvm::errs() << llvm::toString(Executor.takeError()) << "\n";
+    return 1;
+  }
+
+  llvm::errs() << "Executor Created ... \n";
+
+  CommonOptionsParser OptionsParser(argc, argv, ClangMisExpectCategory,
+                                    llvm::cl::ZeroOrMore);
+
+  auto &CompDB = OptionsParser.getCompilations();
+  auto &OS = llvm::errs();
+  OS << "Starting execution ... \n";
+
+  // TODO: better error handling here
+  auto Err =
+      Executor->get()->execute(llvm::make_unique<misexpect::MisExpectFactory>(
+                                   CompDB, ProfileDir, ProfFormat),
+                               getStripPluginsAdjuster());
+  if (Err) {
+    OS.changeColor(raw_ostream::Colors::RED, true);
+    OS << "Error: ";
+    OS.resetColor();
+    OS << llvm::toString(std::move(Err)) << "\n";
+  }
+
+  llvm::errs() << "Execution complete\n";
+
+  // Emit collected data.
+  Executor->get()->getToolResults()->forEachResult(
+      [](llvm::StringRef key, llvm::StringRef value) {
+        llvm::errs() << "----" << key.str() << "\n" << value.str() << "\n";
+      });
+  return 0;
+}
Index: clang-tools-extra/clang-misexpect/tool/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/tool/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_LINK_COMPONENTS
+  AllTargetsAsmParsers
+  AllTargetsDescs
+  AllTargetsInfos
+  support
+  )
+
+add_clang_tool(clang-misexpect
+  ClangMisExpectMain.cpp
+  )
+add_dependencies(clang-misexpect
+  clang-resource-headers
+  )
+target_link_libraries(clang-misexpect
+  PRIVATE
+  clangAST
+  clangASTMatchers
+  clangBasic
+  clangMisExpect
+  clangFrontend
+  clangCodeGen
+  clangSema
+  clangTooling
+  clangToolingCore
+  clangToolingSyntax
+  )
+
Index: clang-tools-extra/clang-misexpect/ClangMisExpect.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/ClangMisExpect.h
@@ -0,0 +1,58 @@
+//===-- ClangMisExpect.h - ClangMisexpect -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes a method to create the FrontendActionFactory for the
+// clang-misexpect tool. The factory consumes a compilation database and valid
+// profiling data to run the clang frontend over a codebase and issue warnings
+// generated from the -fmisexpect and -Wmisexpect compiler flags.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/Frontend/FrontendOptions.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Rewrite/Frontend/FrontendActions.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace clang {
+namespace misexpect {
+
+enum ProfileKind {
+  Clang,
+  IR,
+  CSIR,
+  Sample,
+};
+
+class MisExpectFactory : public tooling::FrontendActionFactory {
+  using Path = std::string;
+
+public:
+  MisExpectFactory(tooling::CompilationDatabase &CompDB, Path Profile,
+                   ProfileKind ProfileType);
+
+  FrontendAction *create() override;
+
+  bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
+                     FileManager *Files,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                     DiagnosticConsumer *DiagConsumer) override;
+
+private:
+  tooling::CompilationDatabase &CompileCommands;
+  Path ProfilePath;
+  ProfileKind ProfileType;
+};
+
+} // namespace misexpect
+} // namespace clang
Index: clang-tools-extra/clang-misexpect/ClangMisExpect.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/ClangMisExpect.cpp
@@ -0,0 +1,76 @@
+//===-- ClangMisExpect.cpp - ClangMisexpect ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a method to create the FrontendActionFactory for the
+// clang-misexpect tool. The factory consumes a compilation database and valid
+// profiling data to run the clang frontend over a codebase and issue warnings
+// generated from the -fmisexpect and -Wmisexpect compiler flags.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangMisExpect.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/CodeGen/CodeGenAction.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+using namespace clang::tooling;
+using namespace misexpect;
+
+#define DEBUG_TYPE "misexpect"
+
+MisExpectFactory::MisExpectFactory(tooling::CompilationDatabase &CompDB,
+                                   Path ProfilePath, ProfileKind ProfileType)
+    : CompileCommands(CompDB), ProfilePath(ProfilePath),
+      ProfileType(ProfileType) {}
+
+FrontendAction *MisExpectFactory::create() { return new EmitLLVMOnlyAction(); }
+
+bool MisExpectFactory::runInvocation(
+    std::shared_ptr<CompilerInvocation> Invocation, FileManager *Files,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *DiagConsumer) {
+  // Only run the compiler up until the frontend generates the CFG
+  Invocation->getFrontendOpts().ProgramAction = frontend::EmitLLVMOnly;
+
+  // clear the existing profile flags and metadata
+  Invocation->getCodeGenOpts().setProfileUse(CodeGenOptions::ProfileNone);
+  Invocation->getCodeGenOpts().setProfileInstr(CodeGenOptions::ProfileNone);
+  Invocation->getCodeGenOpts().ProfileInstrumentUsePath = "";
+  Invocation->getCodeGenOpts().SampleProfileFile = "";
+
+  switch (ProfileType) {
+  case ProfileKind::Clang:
+    Invocation->getCodeGenOpts().setProfileUse(
+        CodeGenOptions::ProfileClangInstr);
+    break;
+  case ProfileKind::IR:
+    Invocation->getCodeGenOpts().setProfileUse(CodeGenOptions::ProfileIRInstr);
+    break;
+  case ProfileKind::CSIR:
+    Invocation->getCodeGenOpts().setProfileUse(
+        CodeGenOptions::ProfileCSIRInstr);
+    break;
+  case ProfileKind::Sample:
+    Invocation->getCodeGenOpts().SampleProfileFile = ProfilePath;
+    break;
+  default:
+    llvm_unreachable("Bad Profile Format given to clang-misexpect use one of "
+                     "(clang, llvm, csllvm, sample)");
+  };
+
+  if (ProfileType != ProfileKind::Sample)
+    Invocation->getCodeGenOpts().ProfileInstrumentUsePath = ProfilePath;
+
+  return FrontendActionFactory::runInvocation(Invocation, Files,
+                                              PCHContainerOps, DiagConsumer);
+}
+
+#undef DEBUG_TYPE
Index: clang-tools-extra/clang-misexpect/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_clang_library(clangMisExpect
+  ClangMisExpect.cpp
+
+  DEPENDS
+  ClangSACheckers
+
+  LINK_LIBS
+  clangAST
+  clangASTMatchers
+  clangBasic
+  clangCodeGen
+  clangFormat
+  clangFrontend
+  clangFrontendTool
+  clangLex
+  clangRewrite
+  clangSema
+  clangSerialization
+  clangTooling
+  clangToolingCore
+  )
+
+add_subdirectory(tool)
Index: clang-tools-extra/CMakeLists.txt
===================================================================
--- clang-tools-extra/CMakeLists.txt
+++ clang-tools-extra/CMakeLists.txt
@@ -5,6 +5,7 @@
 add_subdirectory(modularize)
 add_subdirectory(clang-tidy)
 add_subdirectory(clang-tidy-vs)
+add_subdirectory(clang-misexpect)
 
 add_subdirectory(clang-change-namespace)
 add_subdirectory(clang-doc)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to