SixWeining created this revision.
SixWeining added reviewers: MaskRay, xen0n, xry111.
Herald added subscribers: Enna1, StephenFan, hiraditya, dberris.
Herald added a project: All.
SixWeining requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

Only support patching FunctionEntry/FunctionExit/FunctionTailExit for now.

Currently version 0 (which uses absolute address) is used because
LoongArch doesn't have a single 64bit PC-relative reloction type like
32bit version R_LARCH_32_PCREL.
Perhaps we can use a pair of relocations (R_LARCH_ADD64/R_LARCH_SUB64)
as replacement but that needs some changes to the backend.

Depends on D140725 <https://reviews.llvm.org/D140725>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D140727

Files:
  clang/lib/Driver/XRayArgs.cpp
  compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
  compiler-rt/lib/xray/CMakeLists.txt
  compiler-rt/lib/xray/xray_interface.cpp
  compiler-rt/lib/xray/xray_loongarch64.cpp
  compiler-rt/lib/xray/xray_trampoline_loongarch64.S
  compiler-rt/lib/xray/xray_tsc.h
  compiler-rt/test/xray/TestCases/Posix/always-never-instrument.cpp
  compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp
  compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp
  compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp
  compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp
  compiler-rt/test/xray/TestCases/Posix/c-test.cpp
  compiler-rt/test/xray/TestCases/Posix/clang-no-xray-instrument.cpp
  compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp
  compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp
  compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp
  compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp
  compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp
  compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp
  compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp
  llvm/lib/CodeGen/XRayInstrumentation.cpp
  llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
  llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
  llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
  llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
  llvm/lib/Target/LoongArch/LoongArchSubtarget.h
  llvm/lib/XRay/InstrumentationMap.cpp
  llvm/test/CodeGen/LoongArch/xray-attribute-instrumentation.ll

Index: llvm/test/CodeGen/LoongArch/xray-attribute-instrumentation.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/LoongArch/xray-attribute-instrumentation.ll
@@ -0,0 +1,71 @@
+; RUN: llc --mtriple=loongarch64 %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch64 -filetype=obj %s -o %t
+; RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC
+
+define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
+; CHECK-LABEL: foo:
+; CHECK-LABEL: .Lfunc_begin0:
+; CHECK:       .p2align 2
+; CHECK-LABEL: .Lxray_sled_begin0:
+; CHECK-NEXT:  b .Lxray_sled_end0
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-LABEL: .Lxray_sled_end0:
+  ret i32 0
+; CHECK-LABEL: .Lxray_sled_begin1:
+; CHECK-NEXT:  b .Lxray_sled_end1
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT:  nop
+; CHECK-NEXT: .Lxray_sled_end1:
+; CHECK-NEXT:  ret
+; CHECK-NEXT: .Lfunc_end0:
+}
+
+; CHECK-LABEL: .section xray_instr_map
+; CHECK-NEXT: .Lxray_sleds_start0:
+; CHECK-NEXT: .dword .Lxray_sled_begin0
+; CHECK-NEXT: .dword foo
+; CHECK-NEXT: .byte 0x00
+; CHECK-NEXT: .byte 0x01
+; CHECK-NEXT: .byte 0x00
+; CHECK-NEXT: .space 13
+; CHECK-NEXT: .dword .Lxray_sled_begin1
+; CHECK-NEXT: .dword foo
+; CHECK-NEXT: .byte 0x01
+; CHECK-NEXT: .byte 0x01
+; CHECK-NEXT: .byte 0x00
+; CHECK-NEXT: .space 13
+; CHECK-NEXT: .Lxray_sleds_end0:
+
+; CHECK-LABEL:  .section xray_fn_idx
+; CHECK:      .dword .Lxray_sleds_start0
+; CHECK-NEXT: .dword .Lxray_sleds_end0
+
+; RELOC:      Section ([[#]]) .relaxray_instr_map {
+; RELOC-NEXT:   0x0 R_LARCH_64 .text 0x0
+; RELOC-NEXT:   0x8 R_LARCH_64 foo 0x0
+; RELOC-NEXT:   0x20 R_LARCH_64 .text 0x34
+; RELOC-NEXT:   0x28 R_LARCH_64 foo 0x0
+; RELOC-NEXT: }
+; RELOC-NEXT: Section ([[#]]) .relaxray_fn_idx {
+; RELOC-NEXT:   0x0 R_LARCH_64 xray_instr_map 0x0
+; RELOC-NEXT:   0x8 R_LARCH_64 xray_instr_map 0x40
+; RELOC-NEXT: }
Index: llvm/lib/XRay/InstrumentationMap.cpp
===================================================================
--- llvm/lib/XRay/InstrumentationMap.cpp
+++ llvm/lib/XRay/InstrumentationMap.cpp
@@ -60,6 +60,7 @@
   // Find the section named "xray_instr_map".
   if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) ||
       !(ObjFile.getBinary()->getArch() == Triple::x86_64 ||
+        ObjFile.getBinary()->getArch() == Triple::loongarch64 ||
         ObjFile.getBinary()->getArch() == Triple::ppc64le ||
         ObjFile.getBinary()->getArch() == Triple::arm ||
         ObjFile.getBinary()->getArch() == Triple::aarch64))
Index: llvm/lib/Target/LoongArch/LoongArchSubtarget.h
===================================================================
--- llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -93,6 +93,7 @@
   MVT getGRLenVT() const { return GRLenVT; }
   unsigned getGRLen() const { return GRLen; }
   LoongArchABI::ABI getTargetABI() const { return TargetABI; }
+  bool isXRaySupported() const override { return is64Bit(); }
 };
 } // end namespace llvm
 
Index: llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
===================================================================
--- llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -82,6 +82,8 @@
   ArrayRef<std::pair<unsigned, const char *>>
   getSerializableDirectMachineOperandTargetFlags() const override;
 
+  MCInst getNop() const override;
+
 protected:
   const LoongArchSubtarget &STI;
 };
Index: llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
===================================================================
--- llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
 #include "MCTargetDesc/LoongArchMatInt.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCInstBuilder.h"
 
 using namespace llvm;
 
@@ -486,3 +487,10 @@
       {MO_GD_PC_HI, "loongarch-gd-pc-hi"}};
   return makeArrayRef(TargetFlags);
 }
+
+MCInst LoongArchInstrInfo::getNop() const {
+  return MCInstBuilder(LoongArch::ANDI)
+      .addReg(LoongArch::R0)
+      .addReg(LoongArch::R0)
+      .addImm(0);
+}
Index: llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
===================================================================
--- llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
+++ llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
@@ -41,6 +41,12 @@
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                              const char *ExtraCode, raw_ostream &OS) override;
 
+  void emitSled(const MachineInstr &MI, SledKind Kind);
+
+  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+  void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+
   // tblgen'erated function.
   bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
                                    const MachineInstr *MI);
Index: llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+++ llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
@@ -17,6 +17,8 @@
 #include "MCTargetDesc/LoongArchInstPrinter.h"
 #include "TargetInfo/LoongArchTargetInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/TargetRegistry.h"
 
 using namespace llvm;
@@ -35,11 +37,78 @@
   if (emitPseudoExpansionLowering(*OutStreamer, MI))
     return;
 
+  switch (MI->getOpcode()) {
+  case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
+    LowerPATCHABLE_FUNCTION_ENTER(*MI);
+    return;
+
+  case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
+    LowerPATCHABLE_FUNCTION_EXIT(*MI);
+    return;
+
+  case TargetOpcode::PATCHABLE_TAIL_CALL:
+    LowerPATCHABLE_TAIL_CALL(*MI);
+    return;
+  }
+
   MCInst TmpInst;
   if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this))
     EmitToStreamer(*OutStreamer, TmpInst);
 }
 
+void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(
+    const MachineInstr &MI) {
+  // TODO: handle "patchable-function-entry" function attribute
+  emitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) {
+  emitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void LoongArchAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) {
+  emitSled(MI, SledKind::TAIL_CALL);
+}
+
+void LoongArchAsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) {
+  const int8_t NoopsInSledCount = 11;
+  // For loongarch64 we want to emit the following pattern:
+  //
+  // .Lxray_sled_beginN:
+  //   ALIGN
+  //   B .Lxray_sled_endN
+  //   11 NOP instructions (44 bytes)
+  // .Lxray_sled_endN
+  //
+  // We need the 44 bytes (11instructions) because at runtime, we'd be patching
+  // over the full 48 bytes (12 instructions) with the following pattern:
+  //
+  //   addi.d  sp, sp, -16                        ;create stack frame
+  //   st.d    ra, sp, 8                          ;save return address
+  //   lu12i.w t0, %abs_hi20(__xray_FunctionEntry/Exit)
+  //   ori     t0, t0, %abs_lo12(__xray_FunctionEntry/Exit)
+  //   lu32i.d t0, %abs64_lo20(__xray_FunctionEntry/Exit)
+  //   lu52i.d t0, t0, %abs64_hi12(__xray_FunctionEntry/Exit)
+  //   lu12i.w t1, %abs_hi20(function_id)
+  //   ori     t1, t1, %abs_lo12(function_id)     ;pass function id
+  //   jirl    ra, t0, 0                          ;call Tracing hook
+  //   ld.d    ra, sp, 8                          ;restore return address
+  //   addi.d  sp, sp, 16                         ;delete stack frame
+  //
+  // Update compiler-rt/lib/xray/xray_loongarch64.cpp accordingly when number
+  // of instructions change.
+  OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo());
+  MCSymbol *BeginOfSled = OutContext.createTempSymbol("xray_sled_begin");
+  MCSymbol *EndOfSled = OutContext.createTempSymbol("xray_sled_end");
+  OutStreamer->emitLabel(BeginOfSled);
+  EmitToStreamer(*OutStreamer,
+                 MCInstBuilder(LoongArch::B)
+                     .addExpr(MCSymbolRefExpr::create(EndOfSled, OutContext)));
+  emitNops(NoopsInSledCount);
+  OutStreamer->emitLabel(EndOfSled);
+  recordSled(BeginOfSled, MI, Kind); // FIXME: use version 2
+}
+
 bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                           const char *ExtraCode,
                                           raw_ostream &OS) {
@@ -112,6 +181,8 @@
 
 bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   AsmPrinter::runOnMachineFunction(MF);
+  // Emit the XRay table for this function.
+  emitXRayTable();
   return true;
 }
 
Index: llvm/lib/CodeGen/XRayInstrumentation.cpp
===================================================================
--- llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -226,6 +226,7 @@
     case Triple::ArchType::thumb:
     case Triple::ArchType::aarch64:
     case Triple::ArchType::hexagon:
+    case Triple::ArchType::loongarch64:
     case Triple::ArchType::mips:
     case Triple::ArchType::mipsel:
     case Triple::ArchType::mips64:
Index: compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp
+++ compiler-rt/test/xray/TestCases/Posix/quiet-start.cpp
@@ -10,7 +10,7 @@
 //
 // FIXME: Understand how to make this work on other platforms
 // REQUIRES: built-in-llvm-tree
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 #include <iostream>
 
 using namespace std;
Index: compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp
+++ compiler-rt/test/xray/TestCases/Posix/profiling-single-threaded.cpp
@@ -11,7 +11,7 @@
 // RUN: [ $PROFILES -eq 2 ]
 // RUN: rm -f xray-log.profiling-single-*
 //
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 
 #include "xray/xray_interface.h"
Index: compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp
+++ compiler-rt/test/xray/TestCases/Posix/profiling-multi-threaded.cpp
@@ -11,7 +11,7 @@
 // RUN: [ $PROFILES -eq 1 ]
 // RUN: rm -f xray-log.profiling-multi-*
 //
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 
 #include "xray/xray_interface.h"
Index: compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp
+++ compiler-rt/test/xray/TestCases/Posix/fdr-thread-order.cpp
@@ -8,7 +8,7 @@
 // RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t.exe %t/* | \
 // RUN:   FileCheck %s --check-prefix TRACE
 // FIXME: Make llvm-xray work on non-x86_64 as well.
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 
 #include "xray/xray_log_interface.h"
Index: compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp
+++ compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp
@@ -8,7 +8,7 @@
 // RUN:   "`ls fdr-logging-1thr-* | head -n1`" | FileCheck %s
 // RUN: rm fdr-logging-1thr-*
 //
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 
 #include "xray/xray_log_interface.h"
 #include <cassert>
Index: compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp
+++ compiler-rt/test/xray/TestCases/Posix/fdr-mode-multiple.cpp
@@ -8,7 +8,7 @@
 // RUN: [ $FILES -eq 0 ]
 // RUN: rm -f fdr-inmemory-test-*
 //
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 
 #include "xray/xray_log_interface.h"
Index: compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp
+++ compiler-rt/test/xray/TestCases/Posix/fdr-mode-inmemory.cpp
@@ -8,7 +8,7 @@
 // RUN: [ $FILES -eq 0 ]
 // RUN: rm -f fdr-inmemory-test-*
 //
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 
 #include "xray/xray_log_interface.h"
Index: compiler-rt/test/xray/TestCases/Posix/clang-no-xray-instrument.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/clang-no-xray-instrument.cpp
+++ compiler-rt/test/xray/TestCases/Posix/clang-no-xray-instrument.cpp
@@ -3,7 +3,7 @@
 //
 // RUN: %clangxx -fno-xray-instrument -c %s -o %t.o
 // RUN: not %llvm_xray extract -symbolize %t.o 2>&1 | FileCheck %s
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 
 // CHECK: llvm-xray: Cannot extract instrumentation map
Index: compiler-rt/test/xray/TestCases/Posix/c-test.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/c-test.cpp
+++ compiler-rt/test/xray/TestCases/Posix/c-test.cpp
@@ -4,7 +4,7 @@
 // RUN:     2>&1 | FileCheck %s
 // RUN: rm -f xray-log.c-test.*
 //
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 __attribute__((xray_always_instrument)) void always() {}
 
Index: compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp
+++ compiler-rt/test/xray/TestCases/Posix/basic-filtering.cpp
@@ -23,7 +23,7 @@
 // RUN:     FileCheck %s --check-prefix TRACE
 // RUN: rm -f basic-filtering-*
 //
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 
 #include <cstdio>
Index: compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp
+++ compiler-rt/test/xray/TestCases/Posix/arg1-logging-implicit-this.cpp
@@ -4,7 +4,7 @@
 // RUN: rm -f log-args-this-*
 // RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=log-args-this-" %run %t
 //
-// XFAIL: target={{(arm|aarch64|mips).*}}
+// XFAIL: target={{(arm|aarch64|loongarch64|mips).*}}
 // UNSUPPORTED: target=powerpc64le{{.*}}
 #include "xray/xray_interface.h"
 #include <cassert>
Index: compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp
+++ compiler-rt/test/xray/TestCases/Posix/arg1-logger.cpp
@@ -11,7 +11,7 @@
 // RUN: rm -f arg1-logger-*
 //
 // At the time of writing, the ARM trampolines weren't written yet.
-// XFAIL: target={{(arm|aarch64|mips).*}}
+// XFAIL: target={{(arm|aarch64|loongarch64|mips).*}}
 // See the mailing list discussion of r296998.
 // UNSUPPORTED: target=powerpc64le{{.*}}
 
Index: compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp
+++ compiler-rt/test/xray/TestCases/Posix/arg1-arg0-logging.cpp
@@ -6,7 +6,7 @@
 // RUN: XRAY_OPTIONS="patch_premain=true verbosity=1 xray_logfile_base=arg0-arg1-logging-" %run %t
 //
 // TODO: Support these in ARM and PPC
-// XFAIL: target={{(arm|aarch64|mips).*}}
+// XFAIL: target={{(arm|aarch64|loongarch64|mips).*}}
 // UNSUPPORTED: target=powerpc64le{{.*}}
 
 #include "xray/xray_interface.h"
Index: compiler-rt/test/xray/TestCases/Posix/always-never-instrument.cpp
===================================================================
--- compiler-rt/test/xray/TestCases/Posix/always-never-instrument.cpp
+++ compiler-rt/test/xray/TestCases/Posix/always-never-instrument.cpp
@@ -9,7 +9,7 @@
 // RUN:    FileCheck %s --check-prefix NOINSTR
 // RUN: %llvm_xray extract -symbolize %t | \
 // RUN:    FileCheck %s --check-prefix ALWAYSINSTR
-// REQUIRES: x86_64-target-arch
+// REQUIRES: x86_64-target-arch || loongarch64-target-arch
 // REQUIRES: built-in-llvm-tree
 
 // NOINSTR-NOT: {{.*__xray_NeverInstrumented.*}}
Index: compiler-rt/lib/xray/xray_tsc.h
===================================================================
--- compiler-rt/lib/xray/xray_tsc.h
+++ compiler-rt/lib/xray/xray_tsc.h
@@ -43,7 +43,7 @@
 #elif defined(__powerpc64__)
 #include "xray_powerpc64.inc"
 #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) ||         \
-    defined(__hexagon__)
+    defined(__hexagon__) || defined(__loongarch_lp64)
 // Emulated TSC.
 // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
 //   not have a constant frequency like TSC on x86(_64), it may go faster
Index: compiler-rt/lib/xray/xray_trampoline_loongarch64.S
===================================================================
--- /dev/null
+++ compiler-rt/lib/xray/xray_trampoline_loongarch64.S
@@ -0,0 +1,121 @@
+//===-- xray_trampoline_loongarch64.s ---------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the loongarch-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+  .text
+  .file "xray_trampoline_loongarch64.S"
+  .globl __xray_FunctionEntry
+  .p2align 2
+  .type __xray_FunctionEntry,@function
+__xray_FunctionEntry:
+  .cfi_startproc
+  // Save argument registers before doing any actual work.
+  .cfi_def_cfa_offset 136
+  addi.d  $sp, $sp, -136
+  st.d    $ra, $sp, 128
+  .cfi_offset 1, -8
+  st.d    $a7, $sp, 120
+  st.d    $a6, $sp, 112
+  st.d    $a5, $sp, 104
+  st.d    $a4, $sp, 96
+  st.d    $a3, $sp, 88
+  st.d    $a2, $sp, 80
+  st.d    $a1, $sp, 72
+  st.d    $a0, $sp, 64
+  fst.d   $f7, $sp, 56
+  fst.d   $f6, $sp, 48
+  fst.d   $f5, $sp, 40
+  fst.d   $f4, $sp, 32
+  fst.d   $f3, $sp, 24
+  fst.d   $f2, $sp, 16
+  fst.d   $f1, $sp, 8
+  fst.d   $f0, $sp, 0
+
+  la.got  $t2, _ZN6__xray19XRayPatchedFunctionE
+  ld.d    $t2, $t2, 0
+
+  beqz    $t2, FunctionEntry_restore
+
+  // a1=0 means that we are tracing an entry event
+  move    $a1, $zero
+  // Function ID is in t1 (the first parameter).
+  move    $a0, $t1
+  jirl    $ra, $t2, 0
+
+FunctionEntry_restore:
+  // Restore argument registers
+  fld.d   $f0, $sp, 0
+  fld.d   $f1, $sp, 8
+  fld.d   $f2, $sp, 16
+  fld.d   $f3, $sp, 24
+  fld.d   $f4, $sp, 32
+  fld.d   $f5, $sp, 40
+  fld.d   $f6, $sp, 48
+  fld.d   $f7, $sp, 56
+  ld.d    $a0, $sp, 64
+  ld.d    $a1, $sp, 72
+  ld.d    $a2, $sp, 80
+  ld.d    $a3, $sp, 88
+  ld.d    $a4, $sp, 96
+  ld.d    $a5, $sp, 104
+  ld.d    $a6, $sp, 112
+  ld.d    $a7, $sp, 120
+  ld.d    $ra, $sp, 128
+  addi.d  $sp, $sp, 136
+  ret
+FunctionEntry_end:
+  .size __xray_FunctionEntry, FunctionEntry_end-__xray_FunctionEntry
+  .cfi_endproc
+
+  .text
+  .globl __xray_FunctionExit
+  .p2align 2
+  .type __xray_FunctionExit,@function
+__xray_FunctionExit:
+  .cfi_startproc
+  // Save return registers before doing any actual work.
+  .cfi_def_cfa_offset 48
+  addi.d  $sp, $sp, -48
+  st.d    $ra, $sp, 40
+  .cfi_offset 1, -8
+  st.d    $fp, $sp, 32
+  st.d    $a1, $sp, 24
+  st.d    $a0, $sp, 16
+  fst.d   $f1, $sp, 8
+  fst.d   $f0, $sp, 0
+
+  la.got  $t2, _ZN6__xray19XRayPatchedFunctionE
+  ld.d    $t2, $t2, 0
+
+  beqz    $t2, FunctionExit_restore
+
+  // a1=1 means that we are tracing an exit event
+  ori     $a1, $zero, 1
+  // Function ID is in t1 (the first parameter).
+  move    $a0, $t1
+  jirl    $ra, $t2, 0
+
+FunctionExit_restore:
+  // Restore return registers
+  fld.d   $f0, $sp, 0
+  fld.d   $f1, $sp, 8
+  ld.d    $a1, $sp, 24
+  ld.d    $a0, $sp, 16
+  ld.d    $fp, $sp, 32
+  ld.d    $ra, $sp, 40
+  addi.d  $sp, $sp, 48
+  ret
+
+FunctionExit_end:
+  .size __xray_FunctionExit, FunctionExit_end-__xray_FunctionExit
+  .cfi_endproc
Index: compiler-rt/lib/xray/xray_loongarch64.cpp
===================================================================
--- /dev/null
+++ compiler-rt/lib/xray/xray_loongarch64.cpp
@@ -0,0 +1,173 @@
+//===-------- xray_loongarch64.cpp ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of loongarch-specific routines.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum PatchOpcodes : uint32_t {
+  PO_ADDID = 0x02c00000,  // addi.d rd, rj, imm
+  PO_SD = 0x29c00000,     // st.d rd, base, offset
+  PO_LU12IW = 0x14000000, // lu12i.w rd, imm
+  PO_ORI = 0x03800000,    // ori rd, rs, imm
+  PO_LU32ID = 0x16000000, // lu32i.d rd, imm
+  PO_LU52ID = 0x03000000, // lu52i.d rd, rj, imm
+  PO_JIRL = 0x4c000000,   // jirl rd, rj, 0
+  PO_LD = 0x28c00000,     // ld.d rd, base, offset
+  PO_B48 = 0x50003000,    // b #48
+};
+
+enum RegNum : uint32_t {
+  RN_T0 = 0xC,
+  RN_T1 = 0xD,
+  RN_RA = 0x1,
+  RN_SP = 0x3,
+};
+
+// Encode instructions in 2RI12 format, e.g. addi.d/lu521.d/ori/ld.d/st.d.
+inline static uint32_t
+encodeInstruction2RI12(uint32_t Opcode, uint32_t Rd, uint32_t Rj,
+                       uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+  return (Opcode | Rj << 5 | Rd | Imm << 10);
+}
+
+// Encode instructions in 1RI20 format, e.g. lu12i.w/lu32i.d.
+inline static uint32_t
+encodeInstruction1RI20(uint32_t Opcode, uint32_t Rd,
+                       uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+  return (Opcode | Rd | Imm << 5);
+}
+
+// Encode instructions in 2RI16 format, e.g. jirl.
+inline static uint32_t
+encodeInstruction2RI16(uint32_t Opcode, uint32_t Rd, uint32_t Rj,
+                       uint32_t Imm) XRAY_NEVER_INSTRUMENT {
+  return (Opcode | Rj << 5 | Rd | Imm << 10);
+}
+
+inline static bool patchSled(const bool Enable, const uint32_t FuncId,
+                             const XRaySledEntry &Sled,
+                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+  // When |Enable| == true,
+  // We replace the following compile-time stub (sled):
+  //
+  // xray_sled_n:
+  //	B .tmpN
+  //	11 NOPs (44 bytes)
+  //	.tmpN
+  //
+  // With the following runtime patch:
+  //
+  // xray_sled_n:
+  //   addi.d  sp, sp, -16                       ;create stack frame
+  //   st.d    ra, sp, 8                         ;save return address
+  //   lu12i.w t0, %abs_hi20(__xray_FunctionEntry/Exit)
+  //   ori     t0, t0, %abs_lo12(__xray_FunctionEntry/Exit)
+  //   lu32i.d t0, %abs64_lo20(__xray_FunctionEntry/Exit)
+  //   lu52i.d t0, t0, %abs64_hi12(__xray_FunctionEntry/Exit)
+  //   lu12i.w t1, %abs_hi20(function_id)
+  //   ori     t1, t1, %abs_lo12(function_id)    ;pass function id
+  //   jirl    ra, t0, 0                         ;call Tracing hook
+  //   ld.d    ra, sp, 8                         ;restore return address
+  //   addi.d  sp, sp, 16                        ;delete stack frame
+  //
+  // Replacement of the first 4-byte instruction should be the last and atomic
+  // operation, so that the user code which reaches the sled concurrently
+  // either jumps over the whole sled, or executes the whole sled when the
+  // latter is ready.
+  //
+  // When |Enable|==false, we set back the first instruction in the sled to be
+  //   B #48
+
+  uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
+  if (Enable) {
+    uint32_t LoTracingHookAddr = reinterpret_cast<int64_t>(TracingHook) & 0xfff;
+    uint32_t HiTracingHookAddr =
+        (reinterpret_cast<int64_t>(TracingHook) >> 12) & 0xfffff;
+    uint32_t HigherTracingHookAddr =
+        (reinterpret_cast<int64_t>(TracingHook) >> 32) & 0xfffff;
+    uint32_t HighestTracingHookAddr =
+        (reinterpret_cast<int64_t>(TracingHook) >> 52) & 0xfff;
+    uint32_t LoFunctionID = FuncId & 0xfff;
+    uint32_t HiFunctionID = (FuncId >> 12) & 0xfffff;
+    Address[1] = encodeInstruction2RI12(PatchOpcodes::PO_SD, RegNum::RN_RA,
+                                        RegNum::RN_SP, 0x8);
+    Address[2] = encodeInstruction1RI20(PatchOpcodes::PO_LU12IW, RegNum::RN_T0,
+                                        HiTracingHookAddr);
+    Address[3] = encodeInstruction2RI12(PatchOpcodes::PO_ORI, RegNum::RN_T0,
+                                        RegNum::RN_T0, LoTracingHookAddr);
+    Address[4] = encodeInstruction1RI20(PatchOpcodes::PO_LU32ID, RegNum::RN_T0,
+                                        HigherTracingHookAddr);
+    Address[5] = encodeInstruction2RI12(PatchOpcodes::PO_LU52ID, RegNum::RN_T0,
+                                        RegNum::RN_T0, HighestTracingHookAddr);
+    Address[6] = encodeInstruction1RI20(PatchOpcodes::PO_LU12IW, RegNum::RN_T1,
+                                        HiFunctionID);
+    Address[7] = encodeInstruction2RI12(PatchOpcodes::PO_ORI, RegNum::RN_T1,
+                                        RegNum::RN_T1, LoFunctionID);
+    Address[8] = encodeInstruction2RI16(PatchOpcodes::PO_JIRL, RegNum::RN_RA,
+                                        RegNum::RN_T0, 0);
+    Address[9] = encodeInstruction2RI12(PatchOpcodes::PO_LD, RegNum::RN_RA,
+                                        RegNum::RN_SP, 0x8);
+    Address[10] = encodeInstruction2RI12(PatchOpcodes::PO_ADDID, RegNum::RN_SP,
+                                         RegNum::RN_SP, 0x10);
+    uint32_t CreateStackSpace = encodeInstruction2RI12(
+        PatchOpcodes::PO_ADDID, RegNum::RN_SP, RegNum::RN_SP, 0xff0);
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace,
+        std::memory_order_release);
+  } else {
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint32_t> *>(Address),
+        uint32_t(PatchOpcodes::PO_B48), std::memory_order_release);
+  }
+  return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+                        const XRaySledEntry &Sled,
+                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampoline);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // FIXME: In the future we'd need to distinguish between non-tail exits and
+  // tail exits for better information preservation.
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // FIXME: Implement in loongarch?
+  return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // FIXME: Implement in loongarch?
+  return false;
+}
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+  // FIXME: this will have to be implemented in the trampoline assembly file
+}
Index: compiler-rt/lib/xray/xray_interface.cpp
===================================================================
--- compiler-rt/lib/xray/xray_interface.cpp
+++ compiler-rt/lib/xray/xray_interface.cpp
@@ -46,6 +46,8 @@
 static const int16_t cSledLength = 32;
 #elif defined(__arm__)
 static const int16_t cSledLength = 28;
+#elif SANITIZER_LOONGARCH64
+static const int16_t cSledLength = 48;
 #elif SANITIZER_MIPS32
 static const int16_t cSledLength = 48;
 #elif SANITIZER_MIPS64
Index: compiler-rt/lib/xray/CMakeLists.txt
===================================================================
--- compiler-rt/lib/xray/CMakeLists.txt
+++ compiler-rt/lib/xray/CMakeLists.txt
@@ -47,6 +47,11 @@
   xray_trampoline_AArch64.S
   )
 
+set(loongarch64_SOURCES
+  xray_loongarch64.cpp
+  xray_trampoline_loongarch64.S
+  )
+
 set(mips_SOURCES
   xray_mips.cpp
   xray_trampoline_mips.S
@@ -117,6 +122,7 @@
   ${arm_SOURCES}
   ${armhf_SOURCES}
   ${hexagon_SOURCES}
+  ${loongarch64_SOURCES}
   ${mips_SOURCES}
   ${mipsel_SOURCES}
   ${mips64_SOURCES}
Index: compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
===================================================================
--- compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -77,7 +77,7 @@
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
 else()
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
-		powerpc64le ${HEXAGON})
+		powerpc64le ${HEXAGON} ${LOONGARCH64})
 endif()
 set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
 
Index: clang/lib/Driver/XRayArgs.cpp
===================================================================
--- clang/lib/Driver/XRayArgs.cpp
+++ clang/lib/Driver/XRayArgs.cpp
@@ -42,6 +42,7 @@
     case llvm::Triple::aarch64:
     case llvm::Triple::hexagon:
     case llvm::Triple::ppc64le:
+    case llvm::Triple::loongarch64:
     case llvm::Triple::mips:
     case llvm::Triple::mipsel:
     case llvm::Triple::mips64:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D140727: [XRay] Add ini... Lu Weining via Phabricator via cfe-commits

Reply via email to