zoecarver created this revision.
Herald added subscribers: cfe-commits, kristof.beyls.
Herald added a project: clang.

This patch adds a virtual method `getCPUCacheLineSize()` to `TargetInfo`. 
Currently, I've only (partially) implemented the method in `X86TargetInfo`. 
It's extremely important that each CPU's cache line size correct (e.g., we 
can't just define it as `64` across the board) so, it has been a little slow 
getting to this point. There are still quite a few CPUs I haven't been able to 
find the cache line size of yet; for those, I'm returning zero so that the 
caller of this method can propagate an error. See the commented table above 
`X86TargetInfo::getCPUCacheLineSize` to check my sources for each CPU.

I'll work on the ARM CPUs next, but that will probably come later in a 
different patch.

Also, I updated the current uses of cache line sizes in the compiler to use 
this API when possible. The only one (that I could find) that I didn't update 
is in `TargetTransformInfo`. Updating that would require a more significant API 
change, which would be out of scope for this patch. It would be nice if that 
also used this API (to keep everything in one place), so I'll try to update 
that too at some point.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74918

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Sema/SemaStmt.cpp

Index: clang/lib/Sema/SemaStmt.cpp
===================================================================
--- clang/lib/Sema/SemaStmt.cpp
+++ clang/lib/Sema/SemaStmt.cpp
@@ -2813,7 +2813,12 @@
   // diagnostic for these instances. 64 bytes is a common size of a cache line.
   // (The function `getTypeSize` returns the size in bits.)
   ASTContext &Ctx = SemaRef.Context;
-  if (Ctx.getTypeSize(VariableType) <= 64 * 8 &&
+
+  unsigned targetCacheLineSize = Ctx.getTargetInfo().getCPUCacheLineSize();
+  if (!targetCacheLineSize)
+    targetCacheLineSize = 64;
+
+  if (Ctx.getTypeSize(VariableType) <= targetCacheLineSize * 8 &&
       (VariableType.isTriviallyCopyableType(Ctx) ||
        hasTrivialABIAttr(VariableType)))
     return;
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -182,6 +182,8 @@
       StringRef Name,
       llvm::SmallVectorImpl<StringRef> &Features) const override;
 
+  unsigned getCPUCacheLineSize() const override;
+
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override;
 
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -1731,6 +1731,117 @@
   }
 }
 
+// Below is based on the following information:
+// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
+// |           Processor Name           | Cache Line Size (Bytes) |                                                                            Source                                                                            |
+// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
+// | i386                               |                      64 | https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf                                          |
+// | i486                               |                      64 | "four doublewords" https://en.wikichip.org/w/images/d/d3/i486_MICROPROCESSOR_HARDWARE_REFERENCE_MANUAL_%281990%29.pdf                                        |
+// | i586/Pentium MMX                   |                      32 | https://www.7-cpu.com/cpu/P-MMX.html                                                                                                                         |
+// | i686/Pentium                       |                      32 | https://www.7-cpu.com/cpu/P6.html                                                                                                                            |
+// | Netburst/Pentium4                  |                      64 | https://www.7-cpu.com/cpu/P4-180.html                                                                                                                        |
+// | Atom                               |                      64 | https://www.7-cpu.com/cpu/Atom.html                                                                                                                          |
+// | Westmere                           |                      64 | https://en.wikichip.org/wiki/intel/microarchitectures/sandy_bridge_(client) "Cache Architecture"                                                             |
+// | Sandy Bridge                       |                      64 | https://en.wikipedia.org/wiki/Sandy_Bridge and https://www.7-cpu.com/cpu/SandyBridge.html                                                                    |
+// | Ivy Bridge                         |                      64 | https://blog.stuffedcow.net/2013/01/ivb-cache-replacement/ and https://www.7-cpu.com/cpu/IvyBridge.html                                                      |
+// | Haswell                            |                      64 | https://www.7-cpu.com/cpu/Haswell.html                                                                                                                       |
+// | Bradwell                           |                      64 | https://www.7-cpu.com/cpu/Broadwell.html                                                                                                                     |
+// | Skylake (including skylake-avx512) |                      64 | https://www.nas.nasa.gov/hecc/support/kb/skylake-processors_550.html "Cache Hierarchy"                                                                       |
+// | Cascade Lake                       |                      64 | https://www.nas.nasa.gov/hecc/support/kb/cascade-lake-processors_579.html "Cache Hierarchy"                                                                  |
+// | Skylake                            |                      64 | https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake "Memory Hierarchy"                                                                           |
+// | Ice Lake                           |                      64 | https://www.7-cpu.com/cpu/Ice_Lake.html                                                                                                                      |
+// | Knights Landing                    |                      64 | https://software.intel.com/en-us/articles/intel-xeon-phi-processor-7200-family-memory-management-optimizations "The Intel® Xeon Phi™ Processor Architecture" |
+// | Knights Mill                       |                      64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache "       |
+// +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
+unsigned X86TargetInfo::getCPUCacheLineSize() const {
+  switch (CPU) {
+    // i586
+    case CK_i586:
+    case CK_Pentium:
+    case CK_PentiumMMX:
+    // i686
+    case CK_PentiumPro:
+    case CK_i686:
+    case CK_Pentium2:
+    case CK_Pentium3:
+    case CK_PentiumM:
+    case CK_C3_2:
+      return 32;
+      
+    case CK_i386:
+    // i486
+    case CK_i486:
+    case CK_WinChipC6:
+    case CK_WinChip2:
+    case CK_C3:
+    // Netburst
+    case CK_Pentium4:
+    case CK_Prescott:
+    case CK_Nocona:
+    // Atom
+    case CK_Bonnell:
+    case CK_Silvermont:
+    case CK_Goldmont:
+    case CK_GoldmontPlus:
+    case CK_Tremont:
+
+    case CK_Westmere:
+    case CK_SandyBridge:
+    case CK_IvyBridge:
+    case CK_Haswell:
+    case CK_Broadwell:
+    case CK_SkylakeClient:
+    case CK_SkylakeServer:
+    case CK_Cascadelake:
+    case CK_IcelakeClient:
+    case CK_IcelakeServer:
+    case CK_KNL:
+    case CK_KNM:
+      return 64;
+
+    // The following currently have unknown cache line sizes (but they are probably all 64):
+    // Core
+    case CK_Core2:
+
+    case CK_Nehalem:
+    case CK_Cooperlake:
+    case CK_Cannonlake:
+    case CK_Tigerlake:
+    case CK_Lakemont:
+    // K6
+    case CK_K6:
+    case CK_K6_2:
+    case CK_K6_3:
+    // K7
+    case CK_Athlon:
+    case CK_AthlonXP:
+    // K8
+    case CK_K8:
+    case CK_K8SSE3:
+    case CK_AMDFAM10:
+    // Bobcat
+    case CK_BTVER1:
+    case CK_BTVER2:
+    // Bulldozer
+    case CK_BDVER1:
+    case CK_BDVER2:
+    case CK_BDVER3:
+    case CK_BDVER4:
+    // Zen
+    case CK_ZNVER1:
+    case CK_ZNVER2:
+    // Geode
+    case CK_Geode:
+
+    // The following are deprecated (and unknown):
+    case CK_x86_64:
+    case CK_Yonah:
+    case CK_Generic:
+    case CK_Penryn:
+      return 0;
+  }
+}
+
 bool X86TargetInfo::validateOutputSize(const llvm::StringMap<bool> &FeatureMap,
                                        StringRef Constraint,
                                        unsigned Size) const {
Index: clang/include/clang/Basic/TargetInfo.h
===================================================================
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -1188,6 +1188,10 @@
         "cpu_specific Multiversioning not implemented on this target");
   }
 
+  // Get the cache line size of a given cpu. This method switches over
+  // the given cpu and returns `0` if the CPU is not found.
+  virtual unsigned getCPUCacheLineSize() const { return 0; }
+
   // Returns maximal number of args passed in registers.
   unsigned getRegParmMax() const {
     assert(RegParmMax < 7 && "RegParmMax value is larger than AST can handle");
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to