================
@@ -2460,63 +2528,108 @@ void SampleProfileMatcher::runOnFunction(const 
Function &F) {
       !ProbeManager->profileIsValid(F, *FSFlattened)) {
     // The matching result will be saved to IRToProfileLocationMap, create a 
new
     // map for each function.
+    auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
     runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
-                            getIRToProfileLocationMap(F));
+                            IRToProfileLocationMap);
+    PostMatchStats.countMismatchedCallsites(F, IRAnchors, ProfileAnchors,
+                                            IRToProfileLocationMap);
   }
 }
 
-void SampleProfileMatcher::runOnModule() {
-  ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
-                                   FunctionSamples::ProfileIsCS);
-  for (auto &F : M) {
-    if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
-      continue;
-    runOnFunction(F);
-  }
-  if (SalvageStaleProfile)
-    distributeIRToProfileLocationMap();
-
+void SampleProfileMatcher::reportOrPersistProfileStats() {
   if (ReportProfileStaleness) {
     if (FunctionSamples::ProfileIsProbeBased) {
-      errs() << "(" << NumMismatchedFuncHash << "/" << TotalProfiledFunc << ")"
+      errs() << "(" << PreMatchStats.NumMismatchedFuncHash << "/"
+             << PreMatchStats.TotalProfiledFunc << ")"
              << " of functions' profile are invalid and "
-             << " (" << MismatchedFuncHashSamples << "/" << 
TotalFuncHashSamples
-             << ")"
+             << " (" << PreMatchStats.MismatchedFuncHashSamples << "/"
+             << PreMatchStats.TotalFunctionSamples << ")"
              << " of samples are discarded due to function hash mismatch.\n";
     }
-    errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites
-           << ")"
+    errs() << "(" << PreMatchStats.NumMismatchedCallsites << "/"
+           << PreMatchStats.TotalProfiledCallsites << ")"
            << " of callsites' profile are invalid and "
-           << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
-           << ")"
+           << "(" << PreMatchStats.MismatchedCallsiteSamples << "/"
+           << PreMatchStats.TotalFunctionSamples << ")"
            << " of samples are discarded due to callsite location mismatch.\n";
+    if (SalvageStaleProfile) {
+      uint64_t NumRecoveredCallsites = PostMatchStats.TotalProfiledCallsites -
+                                       PostMatchStats.NumMismatchedCallsites;
+      uint64_t NumMismatchedCallsites =
+          PreMatchStats.NumMismatchedCallsites - NumRecoveredCallsites;
+      errs() << "Out of " << PostMatchStats.TotalProfiledCallsites
+             << " callsites used for profile matching, "
+             << NumRecoveredCallsites
+             << " callsites have been recovered. After the matching, ("
+             << NumMismatchedCallsites << "/"
+             << PreMatchStats.TotalProfiledCallsites
+             << ") of callsites are still invalid ("
+             << PostMatchStats.MismatchedCallsiteSamples << "/"
+             << PreMatchStats.TotalFunctionSamples << ")"
+             << " of samples are still discarded.\n";
+    }
   }
 
   if (PersistProfileStaleness) {
     LLVMContext &Ctx = M.getContext();
     MDBuilder MDB(Ctx);
 
     SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
+    ProfStatsVec.emplace_back("NumMismatchedCallsites",
+                              PreMatchStats.NumMismatchedCallsites);
+    ProfStatsVec.emplace_back("TotalProfiledCallsites",
+                              PreMatchStats.TotalProfiledCallsites);
+    ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
+                              PreMatchStats.MismatchedCallsiteSamples);
+    ProfStatsVec.emplace_back("TotalProfiledFunc",
+                              PreMatchStats.TotalProfiledFunc);
+    ProfStatsVec.emplace_back("TotalFunctionSamples",
+                              PreMatchStats.TotalFunctionSamples);
     if (FunctionSamples::ProfileIsProbeBased) {
-      ProfStatsVec.emplace_back("NumMismatchedFuncHash", 
NumMismatchedFuncHash);
-      ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
+      ProfStatsVec.emplace_back("NumMismatchedFuncHash",
+                                PreMatchStats.NumMismatchedFuncHash);
       ProfStatsVec.emplace_back("MismatchedFuncHashSamples",
-                                MismatchedFuncHashSamples);
-      ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
+                                PreMatchStats.MismatchedFuncHashSamples);
+    }
+    if (SalvageStaleProfile) {
+      ProfStatsVec.emplace_back("PostMatchNumMismatchedCallsites",
+                                PostMatchStats.NumMismatchedCallsites);
+      ProfStatsVec.emplace_back("NumCallsitesForMatching",
+                                PostMatchStats.TotalProfiledCallsites);
+      ProfStatsVec.emplace_back("PostMatchMismatchedCallsiteSamples",
+                                PostMatchStats.MismatchedCallsiteSamples);
     }
-
-    ProfStatsVec.emplace_back("NumMismatchedCallsites", 
NumMismatchedCallsites);
-    ProfStatsVec.emplace_back("TotalProfiledCallsites", 
TotalProfiledCallsites);
-    ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
-                              MismatchedCallsiteSamples);
-    ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples);
 
     auto *MD = MDB.createLLVMStats(ProfStatsVec);
     auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
     NMD->addOperand(MD);
   }
 }
 
+void SampleProfileMatcher::runOnModule() {
+  ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+                                   FunctionSamples::ProfileIsCS);
+  for (auto &F : M) {
+    if (ShouldSkipProfileLoading(F))
+      continue;
+    runOnFunction(F);
+  }
+
+  if (SalvageStaleProfile)
+    distributeIRToProfileLocationMap();
+
+  PreMatchStats.countMismatchedCallsiteSamples();
+  if (SalvageStaleProfile) {
+    // If a function doesn't run the matching but has mismatched callsites, 
this
+    // won't be any data for that function in post-match stats, so just reuse
+    // the pre-match stats.
+    PostMatchStats.copyUnchangedCallsiteMismatches(
+        PreMatchStats.FuncMismatchedCallsites);
+    PostMatchStats.countMismatchedCallsiteSamples();
----------------
WenleiHe wrote:

I see.. Now I understand what new mismatch means -- thanks for the 
clarification.

There are two scenarios I think:

1. The original match by luck is actually incorrect too. In this case there is 
no new mismatch, but we are under-reporting input mismatches as there is no 
easy way to tell whether it's correct match or not. I seems the example you 
have is in this category. I tend to think we just need to live with it? 

2. The original match by luck is correct. In this case should we try to make 
sure the matching algorithm doesn't introduce new mismatches. Or at least make 
sure such case is very very unlikely to happen? Does this actually happen and 
would Meyers diffing have the same issue of introducing new mismatches? 

Regardless if we really want consider such cases, maybe 
`StringMap<std::unordered_map<LineLocation, MismatchState>>` with 
`MismatchState` containing one bit for input mismatch, and one bit for final 
mismatch is all we need? 

If we really want to shoot for best/most accurate metric, lucky but incorrect 
original match (like 1) should be counted as mismatch. But it's probably not 
worth the complexity to do that? 

> OK, that seems not obey the rule to move "all" the stats out of 
> function-level. No strong option on this.

I guess strictly speaking you are right. But it still moves most of the 
non-trivial counting out of matching logic. 


https://github.com/llvm/llvm-project/pull/79090
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to