pengfei updated this revision to Diff 468080.
pengfei added a comment.

Fix lit fails.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D136040/new/

https://reviews.llvm.org/D136040

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/prfchiintrin.h
  clang/lib/Headers/x86gprintrin.h
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/X86/prefetchi-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Sema/builtin-prefetch.c
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
  llvm/lib/Target/X86/X86Instr3DNow.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/prefetch.ll
  llvm/test/MC/Disassembler/X86/x86-64.txt
  llvm/test/MC/X86/PREFETCH-64.s

Index: llvm/test/MC/X86/PREFETCH-64.s
===================================================================
--- llvm/test/MC/X86/PREFETCH-64.s
+++ llvm/test/MC/X86/PREFETCH-64.s
@@ -168,3 +168,50 @@
 // CHECK: encoding: [0x0f,0x0d,0x12]        
 prefetchwt1 (%rdx) 
 
+// CHECK: prefetchit0 485498096
+// CHECK: encoding: [0x0f,0x18,0x3c,0x25,0xf0,0x1c,0xf0,0x1c]
+prefetchit0 485498096
+
+// CHECK: prefetchit0 64(%rdx)
+// CHECK: encoding: [0x0f,0x18,0x7a,0x40]
+prefetchit0 64(%rdx)
+
+// CHECK: prefetchit0 64(%rdx,%rax,4)
+// CHECK: encoding: [0x0f,0x18,0x7c,0x82,0x40]
+prefetchit0 64(%rdx,%rax,4)
+
+// CHECK: prefetchit0 -64(%rdx,%rax,4)
+// CHECK: encoding: [0x0f,0x18,0x7c,0x82,0xc0]
+prefetchit0 -64(%rdx,%rax,4)
+
+// CHECK: prefetchit0 64(%rdx,%rax)
+// CHECK: encoding: [0x0f,0x18,0x7c,0x02,0x40]
+prefetchit0 64(%rdx,%rax)
+
+// CHECK: prefetchit0 (%rdx)
+// CHECK: encoding: [0x0f,0x18,0x3a]
+prefetchit0 (%rdx)
+
+// CHECK: prefetchit1 485498096
+// CHECK: encoding: [0x0f,0x18,0x34,0x25,0xf0,0x1c,0xf0,0x1c]
+prefetchit1 485498096
+
+// CHECK: prefetchit1 64(%rdx)
+// CHECK: encoding: [0x0f,0x18,0x72,0x40]
+prefetchit1 64(%rdx)
+
+// CHECK: prefetchit1 64(%rdx,%rax,4)
+// CHECK: encoding: [0x0f,0x18,0x74,0x82,0x40]
+prefetchit1 64(%rdx,%rax,4)
+
+// CHECK: prefetchit1 -64(%rdx,%rax,4)
+// CHECK: encoding: [0x0f,0x18,0x74,0x82,0xc0]
+prefetchit1 -64(%rdx,%rax,4)
+
+// CHECK: prefetchit1 64(%rdx,%rax)
+// CHECK: encoding: [0x0f,0x18,0x74,0x02,0x40]
+prefetchit1 64(%rdx,%rax)
+
+// CHECK: prefetchit1 (%rdx)
+// CHECK: encoding: [0x0f,0x18,0x32]
+prefetchit1 (%rdx)
Index: llvm/test/MC/Disassembler/X86/x86-64.txt
===================================================================
--- llvm/test/MC/Disassembler/X86/x86-64.txt
+++ llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -761,3 +761,9 @@
 
 # CHECK: rdpru
 0x0f,0x01,0xfd
+
+# CHECK: prefetchit0 (%rip)
+0x0f,0x18,0x3d,0x00,0x00,0x00,0x00
+
+# CHECK: prefetchit1 (%rip)
+0x0f,0x18,0x35,0x00,0x00,0x00,0x00
Index: llvm/test/CodeGen/X86/prefetch.ll
===================================================================
--- llvm/test/CodeGen/X86/prefetch.ll
+++ llvm/test/CodeGen/X86/prefetch.ll
@@ -11,6 +11,8 @@
 ; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
 ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW
 ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=3DNOW
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1,+prefetchi | FileCheck %s -check-prefix=PREFETCHWT1
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+prefetchwt1,+prefetchi | FileCheck %s -check-prefix=PREFETCHI
 
 ; Rules:
 ; 3dnow by itself get you just the single prefetch instruction with no hints
@@ -33,6 +35,8 @@
 ; SSE-NEXT:    prefetcht1 (%eax)
 ; SSE-NEXT:    prefetcht0 (%eax)
 ; SSE-NEXT:    prefetchnta (%eax)
+; SSE-NEXT:    prefetcht0 (%eax)
+; SSE-NEXT:    prefetcht1 (%eax)
 ; SSE-NEXT:    retl
 ;
 ; PRFCHWSSE-LABEL: t:
@@ -46,6 +50,8 @@
 ; PRFCHWSSE-NEXT:    prefetchw (%eax)
 ; PRFCHWSSE-NEXT:    prefetchw (%eax)
 ; PRFCHWSSE-NEXT:    prefetchw (%eax)
+; PRFCHWSSE-NEXT:    prefetcht0 (%eax)
+; PRFCHWSSE-NEXT:    prefetcht1 (%eax)
 ; PRFCHWSSE-NEXT:    retl
 ;
 ; PREFETCHWT1-LABEL: t:
@@ -59,6 +65,8 @@
 ; PREFETCHWT1-NEXT:    prefetchwt1 (%eax)
 ; PREFETCHWT1-NEXT:    prefetchw (%eax)
 ; PREFETCHWT1-NEXT:    prefetchwt1 (%eax)
+; PREFETCHWT1-NEXT:    prefetcht0 (%eax)
+; PREFETCHWT1-NEXT:    prefetcht1 (%eax)
 ; PREFETCHWT1-NEXT:    retl
 ;
 ; 3DNOW-LABEL: t:
@@ -72,7 +80,23 @@
 ; 3DNOW-NEXT:    prefetchw (%eax)
 ; 3DNOW-NEXT:    prefetchw (%eax)
 ; 3DNOW-NEXT:    prefetchw (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
 ; 3DNOW-NEXT:    retl
+;
+; PREFETCHI-LABEL: t:
+; PREFETCHI:       # %bb.0: # %entry
+; PREFETCHI-NEXT:    prefetcht2 (%rdi)
+; PREFETCHI-NEXT:    prefetcht1 (%rdi)
+; PREFETCHI-NEXT:    prefetcht0 (%rdi)
+; PREFETCHI-NEXT:    prefetchnta (%rdi)
+; PREFETCHI-NEXT:    prefetchwt1 (%rdi)
+; PREFETCHI-NEXT:    prefetchwt1 (%rdi)
+; PREFETCHI-NEXT:    prefetchw (%rdi)
+; PREFETCHI-NEXT:    prefetchwt1 (%rdi)
+; PREFETCHI-NEXT:    prefetchit0 (%rdi)
+; PREFETCHI-NEXT:    prefetchit1 (%rdi)
+; PREFETCHI-NEXT:    retq
 entry:
 	tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 2, i32 1 )
@@ -82,6 +106,8 @@
 	tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 2, i32 1 )
 	tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 3, i32 1 )
 	tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 0, i32 1 )
+	tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 3, i32 0 )
+	tail call void @llvm.prefetch( ptr %ptr, i32 1, i32 2, i32 0 )
 	ret void
 }
 
Index: llvm/lib/Target/X86/X86Subtarget.h
===================================================================
--- llvm/lib/Target/X86/X86Subtarget.h
+++ llvm/lib/Target/X86/X86Subtarget.h
@@ -221,7 +221,8 @@
     // We implicitly enable these when we have a write prefix supporting cache
     // level OR if we have prfchw, but don't already have a read prefetch from
     // 3dnow.
-    return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1();
+    return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
+           hasPREFETCHI();
   }
   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
   // These are generic getters that OR together all of the thunk types
Index: llvm/lib/Target/X86/X86InstrSSE.td
===================================================================
--- llvm/lib/Target/X86/X86InstrSSE.td
+++ llvm/lib/Target/X86/X86InstrSSE.td
@@ -3201,13 +3201,13 @@
 // Prefetch intrinsic.
 let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
 def PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
-    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB;
+    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), imm)]>, TB;
 def PREFETCHT1   : I<0x18, MRM2m, (outs), (ins i8mem:$src),
-    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB;
+    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), imm)]>, TB;
 def PREFETCHT2   : I<0x18, MRM3m, (outs), (ins i8mem:$src),
-    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB;
+    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), imm)]>, TB;
 def PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
-    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB;
+    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), imm)]>, TB;
 }
 
 // FIXME: How should flush instruction be modeled?
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -956,6 +956,7 @@
 def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
 def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
 def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
+def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">;
 def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
 def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
 def HasLAHFSAHF  : Predicate<"Subtarget->hasLAHFSAHF()">;
@@ -2998,6 +2999,16 @@
                    [(set EFLAGS, (X86testui))]>, XS;
 }
 
+//===----------------------------------------------------------------------===//
+// PREFETCHIT0 and PREFETCHIT1 Instructions
+//
+let Predicates = [HasPREFETCHI, In64BitMode], SchedRW = [WriteLoad] in {
+  def PREFETCHIT0 : I<0x18, MRM7m, (outs), (ins i8mem:$src),
+    "prefetchit0\t$src", [(prefetch addr:$src, (i32 1), (i32 3), (i32 0))]>, TB;
+  def PREFETCHIT1 : I<0x18, MRM6m, (outs), (ins i8mem:$src),
+    "prefetchit1\t$src", [(prefetch addr:$src, (i32 1), (i32 2), (i32 0))]>, TB;
+}
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments to auto generate TBM instructions.
 //===----------------------------------------------------------------------===//
Index: llvm/lib/Target/X86/X86Instr3DNow.td
===================================================================
--- llvm/lib/Target/X86/X86Instr3DNow.td
+++ llvm/lib/Target/X86/X86Instr3DNow.td
@@ -93,7 +93,7 @@
 let Predicates = [Has3DNow, NoSSEPrefetch] in
 def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
                       "prefetch\t$addr",
-                      [(prefetch addr:$addr, imm, imm, (i32 1))]>, TB;
+                      [(prefetch addr:$addr, imm, imm, (i32 imm))]>, TB;
 
 def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
                   [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>,
Index: llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
===================================================================
--- llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
+++ llvm/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -73,7 +73,8 @@
 
 bool IsPrefetchOpcode(unsigned Opcode) {
   return Opcode == X86::PREFETCHNTA || Opcode == X86::PREFETCHT0 ||
-         Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2;
+         Opcode == X86::PREFETCHT1 || Opcode == X86::PREFETCHT2 ||
+         Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1;
 }
 } // end anonymous namespace
 
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -134,6 +134,9 @@
 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
                       "Enable AVX-512 PreFetch Instructions",
                                       [FeatureAVX512]>;
+def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
+                                   "true",
+                                   "Prefetch instruction with T0 or T1 Hint">;
 def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
                                    "true",
                                    "Prefetch with Intent to Write and T1 Hint">;
Index: llvm/lib/Support/X86TargetParser.cpp
===================================================================
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -581,6 +581,7 @@
 constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
 constexpr FeatureBitset ImpliedFeaturesHRESET = {};
 
+static constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
 static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
     FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
 // Key Locker Features
Index: llvm/lib/Support/Host.cpp
===================================================================
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -1808,6 +1808,7 @@
   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
+  Features["prefetchi"]  = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
 
   bool HasLeafD = MaxLevel >= 0xd &&
                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
Index: llvm/include/llvm/Support/X86TargetParser.def
===================================================================
--- llvm/include/llvm/Support/X86TargetParser.def
+++ llvm/include/llvm/Support/X86TargetParser.def
@@ -174,6 +174,7 @@
 X86_FEATURE       (MWAITX,          "mwaitx")
 X86_FEATURE       (PCONFIG,         "pconfig")
 X86_FEATURE       (PKU,             "pku")
+X86_FEATURE       (PREFETCHI,       "prefetchi")
 X86_FEATURE       (PREFETCHWT1,     "prefetchwt1")
 X86_FEATURE       (PRFCHW,          "prfchw")
 X86_FEATURE       (PTWRITE,         "ptwrite")
Index: clang/test/Sema/builtin-prefetch.c
===================================================================
--- clang/test/Sema/builtin-prefetch.c
+++ clang/test/Sema/builtin-prefetch.c
@@ -4,11 +4,12 @@
   int a;
   __builtin_prefetch(&a);
   __builtin_prefetch(&a, 1);
-  __builtin_prefetch(&a, 1, 2);
-  __builtin_prefetch(&a, 1, 9, 3); // expected-error{{too many arguments to function}}
+  __builtin_prefetch(&a, 1, 2, 0);
+  __builtin_prefetch(&a, 1, 9, 8, 3); // expected-error{{too many arguments to function}}
   __builtin_prefetch(&a, "hello", 2); // expected-error{{argument to '__builtin_prefetch' must be a constant integer}}
   __builtin_prefetch(&a, a, 2); // expected-error{{argument to '__builtin_prefetch' must be a constant integer}}
   __builtin_prefetch(&a, 2); // expected-error{{argument value 2 is outside the valid range [0, 1]}}
   __builtin_prefetch(&a, 0, 4); // expected-error{{argument value 4 is outside the valid range [0, 3]}}
   __builtin_prefetch(&a, -1, 4); // expected-error{{argument value -1 is outside the valid range [0, 1]}}
+  __builtin_prefetch(&a, 1, 2, 3); // expected-error{{argument value 3 is outside the valid range [0, 1]}}
 }
Index: clang/test/Driver/x86-target-features.c
===================================================================
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -91,6 +91,11 @@
 // PREFETCHWT1: "-target-feature" "+prefetchwt1"
 // NO-PREFETCHWT1: "-target-feature" "-prefetchwt1"
 
+// RUN: %clang --target=i386 -march=i386 -mprefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PREFETCHI %s
+// RUN: %clang --target=i386 -march=i386 -mno-prefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHI %s
+// PREFETCHI: "-target-feature" "+prefetchi"
+// NO-PREFETCHI: "-target-feature" "-prefetchi"
+
 // RUN: %clang --target=i386 -march=i386 -mclzero %s -### 2>&1 | FileCheck -check-prefix=CLZERO %s
 // RUN: %clang --target=i386 -march=i386 -mno-clzero %s -### 2>&1 | FileCheck -check-prefix=NO-CLZERO %s
 // CLZERO: "-target-feature" "+clzero"
Index: clang/test/CodeGen/X86/prefetchi-builtins.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/X86/prefetchi-builtins.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-unknown -target-feature +prefetchi -emit-llvm -o - %s | FileCheck %s
+
+
+#include <x86intrin.h>
+
+void test_m_prefetch_it0(void *p) {
+  return _m_prefetchit0(p);
+  // CHECK-LABEL: define{{.*}} void @test_m_prefetch_it0
+  // CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 3, i32 0)
+}
+
+void test_m_prefetch_it1(void *p) {
+  return _m_prefetchit1(p);
+  // CHECK-LABEL: define{{.*}} void @test_m_prefetch_it1
+  // CHECK: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 2, i32 0)
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -7571,15 +7571,15 @@
 bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
   unsigned NumArgs = TheCall->getNumArgs();
 
-  if (NumArgs > 3)
+  if (NumArgs > 4)
     return Diag(TheCall->getEndLoc(),
                 diag::err_typecheck_call_too_many_args_at_most)
-           << 0 /*function call*/ << 3 << NumArgs << TheCall->getSourceRange();
+           << 0 /*function call*/ << 4 << NumArgs << TheCall->getSourceRange();
 
   // Argument 0 is checked for us and the remaining arguments must be
   // constant integers.
   for (unsigned i = 1; i != NumArgs; ++i)
-    if (SemaBuiltinConstantArgRange(TheCall, i, 0, i == 1 ? 1 : 3))
+    if (SemaBuiltinConstantArgRange(TheCall, i, 0, i == 2 ? 3 : 1))
       return true;
 
   return false;
Index: clang/lib/Headers/x86gprintrin.h
===================================================================
--- clang/lib/Headers/x86gprintrin.h
+++ clang/lib/Headers/x86gprintrin.h
@@ -25,6 +25,11 @@
 #include <crc32intrin.h>
 #endif
 
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    defined(__PRFCHI__)
+#include <prfchiintrin.h>
+#endif
+
 #if defined(__i386__)
 #define __SAVE_GPRBX "mov {%%ebx, %%eax |eax, ebx};"
 #define __RESTORE_GPRBX "mov {%%eax, %%ebx |ebx, eax};"
Index: clang/lib/Headers/prfchiintrin.h
===================================================================
--- /dev/null
+++ clang/lib/Headers/prfchiintrin.h
@@ -0,0 +1,56 @@
+/*===---- prfchiintrin.h - PREFETCHI intrinsic -----------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __PRFCHIINTRIN_H
+#define __PRFCHIINTRIN_H
+
+#ifdef __x86_64__
+
+/// Loads an instruction sequence containing the specified memory address into
+///    all level cache.
+///
+///    Note that the effect of this intrinsic is dependent on the processor
+///    implementation.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PREFETCHIT0 instruction.
+///
+/// \param __P
+///    A pointer specifying the memory address to be prefetched.
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_m_prefetchit0(volatile const void *__P) {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+  __builtin_prefetch((const void *)__P, 0, 3 /* _MM_HINT_T0 */, 0 /* inst */);
+#pragma clang diagnostic pop
+}
+
+/// Loads an instruction sequence containing the specified memory address into
+///    all but the first-level cache.
+///
+///    Note that the effect of this intrinsic is dependent on the processor
+///    implementation.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the \c PREFETCHIT1 instruction.
+///
+/// \param __P
+///    A pointer specifying the memory address to be prefetched.
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_m_prefetchit1(volatile const void *__P) {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+  __builtin_prefetch((const void *)__P, 0, 2 /* _MM_HINT_T1 */, 0 /* inst */);
+#pragma clang diagnostic pop
+}
+#endif /* __x86_64__ */
+
+#endif /* __PRFCHWINTRIN_H */
Index: clang/lib/Headers/cpuid.h
===================================================================
--- clang/lib/Headers/cpuid.h
+++ clang/lib/Headers/cpuid.h
@@ -204,6 +204,9 @@
 #define bit_AVX512BF16    0x00000020
 #define bit_HRESET        0x00400000
 
+/* Features in %edx for leaf 7 sub-leaf 1 */
+#define bit_PREFETCHI     0x00004000
+
 /* Features in %eax for leaf 13 sub-leaf 1 */
 #define bit_XSAVEOPT    0x00000001
 #define bit_XSAVEC      0x00000002
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -2917,13 +2917,14 @@
                                              /*EmittedE=*/nullptr, IsDynamic));
   }
   case Builtin::BI__builtin_prefetch: {
-    Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
+    Value *Locality, *RW, *Data, *Address = EmitScalarExpr(E->getArg(0));
     // FIXME: Technically these constants should of type 'int', yes?
     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
       llvm::ConstantInt::get(Int32Ty, 0);
     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
       llvm::ConstantInt::get(Int32Ty, 3);
-    Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
+    Data = (E->getNumArgs() > 3) ? EmitScalarExpr(E->getArg(3)) :
+      llvm::ConstantInt::get(Int32Ty, 1);
     Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
   }
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -123,6 +123,7 @@
   bool HasCLFLUSHOPT = false;
   bool HasCLWB = false;
   bool HasMOVBE = false;
+  bool HasPREFETCHI = false;
   bool HasPREFETCHWT1 = false;
   bool HasRDPID = false;
   bool HasRDPRU = false;
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -290,6 +290,8 @@
       HasCLWB = true;
     } else if (Feature == "+wbnoinvd") {
       HasWBNOINVD = true;
+    } else if (Feature == "+prefetchi") {
+      HasPREFETCHI = true;
     } else if (Feature == "+prefetchwt1") {
       HasPREFETCHWT1 = true;
     } else if (Feature == "+clzero") {
@@ -738,6 +740,8 @@
     Builder.defineMacro("__SHSTK__");
   if (HasSGX)
     Builder.defineMacro("__SGX__");
+  if (HasPREFETCHI)
+    Builder.defineMacro("__PREFETCHI__");
   if (HasPREFETCHWT1)
     Builder.defineMacro("__PREFETCHWT1__");
   if (HasCLZERO)
@@ -929,6 +933,7 @@
       .Case("pconfig", true)
       .Case("pku", true)
       .Case("popcnt", true)
+      .Case("prefetchi", true)
       .Case("prefetchwt1", true)
       .Case("prfchw", true)
       .Case("ptwrite", true)
@@ -1025,6 +1030,7 @@
       .Case("pconfig", HasPCONFIG)
       .Case("pku", HasPKU)
       .Case("popcnt", HasPOPCNT)
+      .Case("prefetchi", HasPREFETCHI)
       .Case("prefetchwt1", HasPREFETCHWT1)
       .Case("prfchw", HasPRFCHW)
       .Case("ptwrite", HasPTWRITE)
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -4648,6 +4648,8 @@
 def mno_pconfig : Flag<["-"], "mno-pconfig">, Group<m_x86_Features_Group>;
 def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
 def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
+def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>;
+def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>;
 def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
 def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
 def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -550,6 +550,7 @@
 --------------------
 - Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk.
 - Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC.
+- Add support for ``PREFETCHI`` instructions.
 
 DWARF Support in Clang
 ----------------------
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to