ashwin98 updated this revision to Diff 555925.
ashwin98 marked an inline comment as done.
ashwin98 added a comment.

Sorry for the delay in getting back to you. I've updated the NOP sled 
generation and tested it out, it now seems to work with compressed instructions 
- the patched sleds were identical to what I'd previously observed when 
compiling with march=rv64g. When I had originally written this in 2021/early 
2022, some of the instructions were getting corrupted when they were being 
patched in - either this was a bug in my implementation that has been fixed 
over the course of the review process, or it might've been an issue with qemu 
or llvm at the time that has been fixed in the years since.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D117929/new/

https://reviews.llvm.org/D117929

Files:
  clang/lib/Driver/XRayArgs.cpp
  compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
  compiler-rt/lib/xray/CMakeLists.txt
  compiler-rt/lib/xray/xray_interface.cpp
  compiler-rt/lib/xray/xray_riscv.cpp
  compiler-rt/lib/xray/xray_trampoline_riscv32.S
  compiler-rt/lib/xray/xray_trampoline_riscv64.S
  compiler-rt/lib/xray/xray_tsc.h
  llvm/lib/CodeGen/XRayInstrumentation.cpp
  llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
  llvm/lib/Target/RISCV/RISCVSubtarget.h
  llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll

Index: llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=riscv32-unknown-linux-gnu -mattr=+d -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=riscv64-unknown-linux-gnu -mattr=+d -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-RISCV64 %s
+
+define i32 @foo() nounwind "function-instrument"="xray-always" {
+; CHECK:                        .p2align 2
+; CHECK-LABEL:                  .Lxray_sled_0:
+; CHECK-NEXT:                   j .Ltmp0
+; CHECK-COUNT-14:               nop
+; CHECK-RISCV64-COUNT-4:        nop
+; CHECK-LABEL:                  .Ltmp0:
+  ret i32 0
+; CHECK:                        .p2align 2
+; CHECK-LABEL:                  .Lxray_sled_1:
+; CHECK-NEXT:                   j .Ltmp1
+; CHECK-COUNT-14:               nop
+; CHECK-RISCV64-COUNT-4:        nop
+; CHECK-LABEL:                  .Ltmp1:
+; CHECK-NEXT:                   ret
+}
+; CHECK:                        .section xray_instr_map,"ao",@progbits,foo
+; CHECK-LABEL:                  .Lxray_sleds_start0:
+; CHECK:                        .Lxray_sled_0-[[TMP:.Ltmp[0-9]+]]
+; CHECK:                        .Lxray_sled_1-[[TMP:.Ltmp[0-9]+]]
+; CHECK-LABEL:                  .Lxray_sleds_end0:
Index: llvm/lib/Target/RISCV/RISCVSubtarget.h
===================================================================
--- llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -160,6 +160,10 @@
     assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
     return UserReservedRegister[i];
   }
+  // Add XRay support - assume D and C extensions available
+  bool isXRaySupported() const override {
+    return hasStdExtD() && hasStdExtC();
+  }
 
   bool hasMacroFusion() const { return hasLUIADDIFusion(); }
 
Index: llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "MCTargetDesc/RISCVMCExpr.h"
 #include "MCTargetDesc/RISCVTargetStreamer.h"
 #include "RISCV.h"
+#include "RISCVSubtarget.h"
 #include "RISCVMachineFunctionInfo.h"
 #include "RISCVTargetMachine.h"
 #include "TargetInfo/RISCVTargetInfo.h"
@@ -84,6 +85,11 @@
   // Wrapper needed for tblgenned pseudo lowering.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
 
+  // XRay Support
+  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI);
+  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI);
+  void LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI);
+
   void emitStartOfAsmFile(Module &M) override;
   void emitEndOfAsmFile(Module &M) override;
 
@@ -93,6 +99,8 @@
 
 private:
   void emitAttributes();
+  // XRay Support
+  void emitSled(const MachineInstr *MI, SledKind Kind);
 
   void emitNTLHint(const MachineInstr *MI);
 
@@ -167,6 +175,26 @@
   case RISCV::PseudoRVVInitUndefM4:
   case RISCV::PseudoRVVInitUndefM8:
     return;
+  case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
+    // This switch case section is only for handling XRay sleds.
+    //
+    // patchable-function-entry is handled in lowerToMCInst
+    // Therefore, we break out of the switch statement if we encounter it here.
+    const Function &F = MI->getParent()->getParent()->getFunction();
+    if (F.hasFnAttribute("patchable-function-entry"))
+      break;
+
+    LowerPATCHABLE_FUNCTION_ENTER(MI);
+    return;
+  }
+  case TargetOpcode::PATCHABLE_FUNCTION_EXIT: {
+    LowerPATCHABLE_FUNCTION_EXIT(MI);
+    return;
+  }
+  case TargetOpcode::PATCHABLE_TAIL_CALL: {
+    LowerPATCHABLE_TAIL_CALL(MI);
+    return;
+  }
   }
 
   MCInst OutInst;
@@ -282,11 +310,68 @@
   SetupMachineFunction(MF);
   emitFunctionBody();
 
+  // Emit the XRay table
+  emitXRayTable();
+
   if (!isSameAttribute())
     RTS.emitDirectiveOptionPop();
   return false;
 }
 
+void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI) {
+  emitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI) {
+  emitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void RISCVAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI) {
+  emitSled(MI, SledKind::TAIL_CALL);
+}
+
+void RISCVAsmPrinter::emitSled(const MachineInstr *MI, SledKind Kind) {
+  // The following variable holds the count of the number of NOPs to be patched
+  // in for XRay instrumentation during compilation. RISCV64 needs 36 C.NOPs,
+  // RISCV32 needs 28 C.NOPs.
+  const uint8_t NoopsInSledCount =
+      MI->getParent()->getParent()->getSubtarget<RISCVSubtarget>().is64Bit()
+          ? 36
+          : 28;
+
+  // We want to emit the jump instruction and the nops constituting the sled.
+  // The format is as follows:
+  // .Lxray_sled_N
+  //   ALIGN
+  //   J .tmpN (60 or 76 byte jump, depending on ISA)
+  //   28 or 36 C.NOP instructions
+  // .tmpN
+
+  OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo());
+  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+  OutStreamer->emitLabel(CurSled);
+  auto Target = OutContext.createTempSymbol();
+
+  const MCExpr *TargetExpr = MCSymbolRefExpr::create(
+      Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext);
+
+  // Emit "J bytes" instruction, which jumps over the nop sled to the actual
+  // start of function.
+  EmitToStreamer(
+      *OutStreamer,
+      MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addExpr(TargetExpr));
+
+  // Emit NOP instructions
+  for (int8_t I = 0; I < NoopsInSledCount; ++I)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::ADDI)
+                                     .addReg(RISCV::X0)
+                                     .addReg(RISCV::X0)
+                                     .addImm(0));
+
+  OutStreamer->emitLabel(Target);
+  recordSled(CurSled, *MI, Kind, 2);
+}
+
 void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) {
   RISCVTargetStreamer &RTS =
       static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
Index: llvm/lib/CodeGen/XRayInstrumentation.cpp
===================================================================
--- llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -230,7 +230,9 @@
     case Triple::ArchType::mips:
     case Triple::ArchType::mipsel:
     case Triple::ArchType::mips64:
-    case Triple::ArchType::mips64el: {
+    case Triple::ArchType::mips64el:
+    case Triple::ArchType::riscv32:
+    case Triple::ArchType::riscv64: {
       // For the architectures which don't have a single return instruction
       InstrumentationOptions op;
       op.HandleTailcall = false;
Index: compiler-rt/lib/xray/xray_tsc.h
===================================================================
--- compiler-rt/lib/xray/xray_tsc.h
+++ compiler-rt/lib/xray/xray_tsc.h
@@ -43,7 +43,7 @@
 #elif defined(__powerpc64__)
 #include "xray_powerpc64.inc"
 #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) ||         \
-    defined(__hexagon__) || defined(__loongarch_lp64)
+    defined(__hexagon__) || defined(__loongarch_lp64) || defined(__riscv)
 // Emulated TSC.
 // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
 //   not have a constant frequency like TSC on x86(_64), it may go faster
Index: compiler-rt/lib/xray/xray_trampoline_riscv64.S
===================================================================
--- /dev/null
+++ compiler-rt/lib/xray/xray_trampoline_riscv64.S
@@ -0,0 +1,122 @@
+//===-- xray_trampoline_riscv64.s ----------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the riscv64-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../builtins/assembly.h"
+#include "../sanitizer_common/sanitizer_asm.h"
+
+	.text
+	.p2align 2
+	.global ASM_SYMBOL(__xray_FunctionEntry)
+	ASM_TYPE_FUNCTION(__xray_FunctionEntry)
+ASM_SYMBOL(__xray_FunctionEntry):
+	CFI_STARTPROC
+	// Push argument registers to stack
+	addi	sp, sp, -136
+	.cfi_def_cfa_offset 136
+	sd	ra, 128(sp)
+	.cfi_offset ra, -8
+	sd	a7, 120(sp)
+	sd	a6, 112(sp)
+	sd	a5, 104(sp)
+	sd	a4, 96(sp)
+	sd	a3, 88(sp)
+	sd	a2, 80(sp)
+	sd	a1, 72(sp)
+	sd	a0, 64(sp)
+	fsd	fa7, 56(sp)
+	fsd	fa6, 48(sp)
+	fsd	fa5, 40(sp)
+	fsd	fa4, 32(sp)
+	fsd	fa3, 24(sp)
+	fsd	fa2, 16(sp)
+	fsd	fa1, 8(sp)
+	fsd	fa0, 0(sp)
+
+	// Load the handler function pointer into a2
+	la	a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
+	ld	a2, 0(a2)
+
+	// Handler address will be null if it is not set
+	beq	a2, x0, 1f
+
+	// If we reach here, we are tracing an event
+	// a0 already contains function id
+	// a1 = 0 means we are tracing an entry event
+	mv	a1, x0
+	jalr	a2
+
+1:
+	// Restore argument registers
+	fld	fa0, 0(sp)
+	fld	fa1, 8(sp)
+	fld	fa2, 16(sp)
+	fld	fa3, 24(sp)
+	fld	fa4, 32(sp)
+	fld	fa5, 40(sp)
+	fld	fa6, 48(sp)
+	fld	fa7, 56(sp)
+	ld	a0, 64(sp)
+	ld	a1, 72(sp)
+	ld	a2, 80(sp)
+	ld	a3, 88(sp)
+	ld	a4, 96(sp)
+	ld	a5, 104(sp)
+	ld	a6, 112(sp)
+	ld	a7, 120(sp)
+	ld	ra, 128(sp)
+	addi	sp, sp, 136
+	jr	ra
+	ASM_SIZE(__xray_FunctionEntry)
+	CFI_ENDPROC
+
+	.text
+	.p2align 2
+	.global ASM_SYMBOL(__xray_FunctionExit)
+	ASM_TYPE_FUNCTION(__xray_FunctionExit)
+ASM_SYMBOL(__xray_FunctionExit):
+	CFI_STARTPROC
+	// Push return registers to stack
+	addi	sp, sp, -40
+	.cfi_def_cfa_offset 40
+	sd	ra, 32(sp)
+	.cfi_offset ra, -8
+	sd	a1, 24(sp)
+	sd	a0, 16(sp)
+	fsd	fa1, 8(sp)
+	fsd	fa0, 0(sp)
+
+	// Load the handler function pointer into a2
+	la	a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
+	ld	a2, 0(a2)
+
+	// Handler address will be null if it is not set
+	beq	a2, x0, 1f
+
+	// If we reach here, we are tracing an event
+	// a0 already contains function id
+	// a1 = 1 means we are tracing an exit event
+	addi	a1, x0, 1
+	jalr	a2
+
+1:
+	// Restore return registers
+	fld	fa0, 0(sp)
+	fld	fa1, 8(sp)
+	ld	a0, 16(sp)
+	ld	a1, 24(sp)
+	ld	ra, 32(sp)
+	addi	sp, sp, 40
+	jr	ra
+	ASM_SIZE(__xray_FunctionExit)
+	CFI_ENDPROC
Index: compiler-rt/lib/xray/xray_trampoline_riscv32.S
===================================================================
--- /dev/null
+++ compiler-rt/lib/xray/xray_trampoline_riscv32.S
@@ -0,0 +1,128 @@
+//===-- xray_trampoline_riscv32.s ----------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This implements the riscv32-specific assembler for the trampolines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../builtins/assembly.h"
+#include "../sanitizer_common/sanitizer_asm.h"
+
+	.text
+	.p2align 2
+	.global ASM_SYMBOL(__xray_FunctionEntry)
+	ASM_TYPE_FUNCTION(__xray_FunctionEntry)
+ASM_SYMBOL(__xray_FunctionEntry):
+	CFI_STARTPROC
+	// Push argument registers to stack
+	addi	sp, sp, -100
+	.cfi_def_cfa_offset 100
+	sw	ra, 96(sp)
+	.cfi_offset ra, -8
+	sw	a7, 92(sp)
+	sw	a6, 88(sp)
+	sw	a5, 84(sp)
+	sw	a4, 80(sp)
+	sw	a3, 76(sp)
+	sw	a2, 72(sp)
+	sw	a1, 68(sp)
+	sw	a0, 64(sp)
+	// The current implementation only supports double precision floats
+	// In case of RISCV32F, then these (faX) would still be 32 bit
+	// wide registers, so we should be incrementing by 4
+	fsd	fa7, 56(sp)
+	fsd	fa6, 48(sp)
+	fsd	fa5, 40(sp)
+	fsd	fa4, 32(sp)
+	fsd	fa3, 24(sp)
+	fsd	fa2, 16(sp)
+	fsd	fa1, 8(sp)
+	fsd	fa0, 0(sp)
+
+	// Load the handler function pointer into a2
+	li	a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
+	lw	a2, 0(a2)
+
+	// Handler address will be null if it is not set
+	beq	a2, x0, 1f
+
+	// If we reach here, we are tracing an event
+	// a0 already contains function id
+	// a1 = 0 means we are tracing an entry event
+	mv	a1, x0
+	jalr	a2
+
+1:
+	// Restore argument registers
+	fld	fa0, 0(sp)
+	fld	fa1, 8(sp)
+	fld	fa2, 16(sp)
+	fld	fa3, 24(sp)
+	fld	fa4, 32(sp)
+	fld	fa5, 40(sp)
+	fld	fa6, 48(sp)
+	fld	fa7, 56(sp)
+	lw	a0, 64(sp)
+	lw	a1, 68(sp)
+	lw	a2, 72(sp)
+	lw	a3, 76(sp)
+	lw	a4, 80(sp)
+	lw	a5, 84(sp)
+	lw	a6, 88(sp)
+	lw	a7, 92(sp)
+	lw	ra, 96(sp)
+	addi	sp, sp, 100
+	jr	ra
+	ASM_SIZE(__xray_FunctionEntry)
+	CFI_ENDPROC
+
+	.text
+	.p2align 2
+	.global ASM_SYMBOL(__xray_FunctionExit)
+	ASM_TYPE_FUNCTION(__xray_FunctionExit)
+ASM_SYMBOL(__xray_FunctionExit):
+	CFI_STARTPROC
+	// Push return registers to stack
+	addi	sp, sp, -28
+	.cfi_def_cfa_offset 28
+	sw	ra, 24(sp)
+	.cfi_offset ra, -8
+	sw	a1, 20(sp)
+	sw	a0, 16(sp)
+	// The current implementation only supports double precision floats
+	// In case of RISCV32F, then these (faX) would still be 32 bit
+	// wide registers, so we should be incrementing by 4
+	fsd	fa1, 8(sp)
+	fsd	fa0, 0(sp)
+
+	// Load the handler function pointer into a2
+	li	a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
+	lw	a2, 0(a2)
+
+	// Handler address will be null if it is not set
+	beq	a2, x0, 1f
+
+	// If we reach here, we are tracing an event
+	// a0 already contains function id
+	// a1 = 1 means we are tracing an exit event
+	addi	a1, x0, 1
+	jalr	a2
+
+1:
+	// Restore return registers
+	fld	fa0, 0(sp)
+	fld	fa1, 8(sp)
+	lw	a0, 16(sp)
+	lw	a1, 20(sp)
+	lw	ra, 24(sp)
+	addi	sp, sp, 28
+	jr	ra
+	ASM_SIZE(__xray_FunctionExit)
+	CFI_ENDPROC
Index: compiler-rt/lib/xray/xray_riscv.cpp
===================================================================
--- /dev/null
+++ compiler-rt/lib/xray/xray_riscv.cpp
@@ -0,0 +1,302 @@
+//===-- xray_riscv.cpp ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of riscv-specific routines (32- and 64-bit).
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_common.h"
+#include "xray_defs.h"
+#include "xray_interface_internal.h"
+#include <atomic>
+
+namespace __xray {
+
+// The machine codes for some instructions used in runtime patching.
+enum PatchOpcodes : uint32_t {
+  PO_ADDI = 0x00000013, // addi rd, rs1, imm
+  PO_ADD = 0x00000033,  // add rd, rs1, rs2
+  PO_SW = 0x00002023,   // sw rt, base(offset)
+  PO_SD = 0x00003023,   // sd rt, base(offset)
+  PO_LUI = 0x00000037,  // lui rd, imm
+  PO_ORI = 0x00006013,  // ori rd, rs1, imm
+  PO_OR = 0x00006033,   // or rd, rs1, rs2
+  PO_SLLI = 0x00001013, // slli rd, rs, shamt
+  PO_SRLI = 0x00005013, // srli rd, rs, shamt
+  PO_JALR = 0x00000067, // jalr rs
+  PO_LW = 0x00002003,   // lw rd, base(offset)
+  PO_LD = 0x00003003,   // ld rd, base(offset)
+  PO_J = 0x0000006f,    // jal #n_bytes
+  PO_NOP = 0x00000013,  // nop - pseduo-instruction, same as addi x0, x0, 0
+};
+
+enum RegNum : uint32_t {
+  RN_R0 = 0x0,
+  RN_RA = 0x1,
+  RN_SP = 0x2,
+  RN_T0 = 0x5,
+  RN_T1 = 0x6,
+  RN_T2 = 0x7,
+  RN_A0 = 0xa,
+};
+
+static inline uint32_t
+encodeRTypeInstruction(uint32_t Opcode, uint32_t Rs1, uint32_t Rs2,
+                       uint32_t Rd) {
+  return Rs2 << 20 | Rs1 << 15 | Rd << 7 | Opcode;
+}
+
+static inline uint32_t
+encodeITypeInstruction(uint32_t Opcode, uint32_t Rs1, uint32_t Rd,
+                       uint32_t Imm) {
+  return Imm << 20 | Rs1 << 15 | Rd << 7 | Opcode;
+}
+
+static inline uint32_t
+encodeSTypeInstruction(uint32_t Opcode, uint32_t Rs1, uint32_t Rs2,
+                       uint32_t Imm) {
+  uint32_t imm_msbs = (Imm & 0xfe0) << 25;
+  uint32_t imm_lsbs = (Imm & 0x01f) << 7;
+  return imm_msbs | Rs2 << 20 | Rs1 << 15 | imm_lsbs | Opcode;
+}
+
+static inline uint32_t
+encodeUTypeInstruction(uint32_t Opcode, uint32_t Rd,
+                       uint32_t Imm) {
+  return Imm << 12 | Rd << 7 | Opcode;
+}
+
+static inline uint32_t
+encodeJTypeInstruction(uint32_t Opcode, uint32_t Rd,
+                       uint32_t Imm) {
+  uint32_t imm_msb = (Imm & 0x80000) << 31;
+  uint32_t imm_lsbs = (Imm & 0x003ff) << 21;
+  uint32_t imm_11 = (Imm & 0x00400) << 20;
+  uint32_t imm_1912 = (Imm & 0x7f800) << 12;
+  return imm_msb | imm_lsbs | imm_11 | imm_1912 | Rd << 7 | Opcode;
+}
+
+#if SANITIZER_RISCV64
+static uint32_t hi20(uint64_t val) { return (val + 0x800) >> 12; }
+static uint32_t lo12(uint64_t val) { return val & 0xfff; }
+#elif defined(__riscv) && (__riscv_xlen == 32)
+static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
+static uint32_t lo12(uint32_t val) { return val & 0xfff; }
+#endif
+
+static inline bool patchSled(const bool Enable, const uint32_t FuncId,
+                             const XRaySledEntry &Sled,
+                             void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
+  // When |Enable| == true,
+  // We replace the following compile-time stub (sled):
+  //
+  // xray_sled_n:
+  //	J .tmpN
+  //	14 or 18 NOPs (56 or 72 bytes)
+  //	.tmpN
+  //
+  // With one of the following runtime patches:
+  //
+  // xray_sled_n (32-bit):
+  //    addi sp, sp, -16                                ;create stack frame
+  //    sw ra, 12(sp)                                   ;save return address
+  //    sw t2, 8(sp)                                    ;save register t2
+  //    sw t1, 4(sp)                                    ;save register t1
+  //    sw a0, 0(sp)                                    ;save register a0
+  //    lui t1, %hi(__xray_FunctionEntry/Exit)
+  //    addi t1, t1, %lo(__xray_FunctionEntry/Exit)
+  //    lui a0, %hi(function_id)
+  //    addi a0, a0, %lo(function_id)                   ;pass function id
+  //    jalr t1                                         ;call Tracing hook
+  //    lw a0, 0(sp)                                    ;restore register a0
+  //    lw t1, 4(sp)                                    ;restore register t1
+  //    lw t2, 8(sp)                                    ;restore register t2
+  //    lw ra, 12(sp)                                   ;restore return address
+  //    addi sp, sp, 16                                 ;delete stack frame
+  //
+  // xray_sled_n (64-bit):
+  //    addi sp, sp, -32                                ;create stack frame
+  //    sd ra, 24(sp)                                   ;save return address
+  //    sd t2, 16(sp)                                   ;save register t2
+  //    sd t1, 8(sp)                                    ;save register t1
+  //    sd a0, 0(sp)                                    ;save register a0
+  //    lui t2, %highest(__xray_FunctionEntry/Exit)
+  //    addi t2, t2, %higher(__xray_FunctionEntry/Exit)
+  //    slli t2, t2, 32
+  //    lui t1, t1, %hi(__xray_FunctionEntry/Exit)
+  //    addi t1, t1, %lo(__xray_FunctionEntry/Exit)
+  //    add t1, t2, t1
+  //    lui a0, %hi(function_id)
+  //    addi a0, a0, %lo(function_id)                   ;pass function id
+  //    jalr t1                                         ;call Tracing hook
+  //    ld a0, 0(sp)                                    ;restore register a0
+  //    ld t1, 8(sp)                                    ;restore register t1
+  //    ld t2, 16(sp)                                   ;restore register t2
+  //    ld ra, 24(sp)                                   ;restore return address
+  //    addi sp, sp, 32                                 ;delete stack frame
+  //
+  // Replacement of the first 4-byte instruction should be the last and atomic
+  // operation, so that the user code which reaches the sled concurrently
+  // either jumps over the whole sled, or executes the whole sled when the
+  // latter is ready.
+  //
+  // When |Enable|==false, we set back the first instruction in the sled to be
+  //   J 60 bytes (rv32)
+  //   J 76 bytes (rv64)
+
+  uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
+  if (Enable) {
+    // If the ISA is RISCV 64, the Tracing Hook needs to be typecast to a 64 bit
+    // value
+#if SANITIZER_RISCV64
+    uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint64_t>(TracingHook));
+    uint32_t HiTracingHookAddr = hi20(reinterpret_cast<uint64_t>(TracingHook));
+    uint32_t HigherTracingHookAddr =
+        lo12((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32);
+    uint32_t HighestTracingHookAddr =
+        hi20((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32);
+    // We typecast the Tracing Hook to a 32 bit value for RISCV32
+#elif defined(__riscv) && (__riscv_xlen == 32)
+    uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint32_t>(TracingHook));
+    uint32_t HiTracingHookAddr = hi20((reinterpret_cast<uint32_t>(TracingHook));
+#endif
+    uint32_t LoFunctionID = lo12(FuncId);
+    uint32_t HiFunctionID = hi20(FuncId);
+    // The sled that is patched in for RISCV64 defined below. We need the entire
+    // sleds corresponding to both ISAs to be protected by defines because the
+    // first few instructions are all different, because we store doubles in
+    // case of RV64 and store words for RV32. Subsequently, we have LUI - and in
+    // case of RV64, we need extra instructions from this point on, so we see
+    // differences in addresses to which instructions are stored.
+#if SANITIZER_RISCV64
+    Address[1] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP,
+                                        RegNum::RN_RA, 0x18);
+    Address[2] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP,
+                                        RegNum::RN_T2, 0x10);
+    Address[3] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP,
+                                        RegNum::RN_T1, 0x8);
+    Address[4] = encodeSTypeInstruction(PatchOpcodes::PO_SD, RegNum::RN_SP,
+                                        RegNum::RN_A0, 0x0);
+    Address[5] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T2,
+                                        HighestTracingHookAddr);
+    Address[6] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T2,
+                                        RegNum::RN_T2, HigherTracingHookAddr);
+    Address[7] = encodeITypeInstruction(PatchOpcodes::PO_SLLI, RegNum::RN_T2,
+                                        RegNum::RN_T2, 0x20);
+    Address[8] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1,
+                                        HiTracingHookAddr);
+    Address[9] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1,
+                                        RegNum::RN_T1, LoTracingHookAddr);
+    Address[10] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_T1,
+                                         RegNum::RN_T2, RegNum::RN_T1);
+    Address[11] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0,
+                                         HiFunctionID);
+    Address[12] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_A0,
+                                         RegNum::RN_A0, LoFunctionID);
+    Address[13] = encodeITypeInstruction(PatchOpcodes::PO_JALR, RegNum::RN_T1,
+                                         RegNum::RN_RA, 0x0);
+    Address[14] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP,
+                                         RegNum::RN_A0, 0x0);
+    Address[15] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP,
+                                         RegNum::RN_T1, 0x8);
+    Address[16] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP,
+                                         RegNum::RN_T2, 0x10);
+    Address[17] = encodeITypeInstruction(PatchOpcodes::PO_LD, RegNum::RN_SP,
+                                         RegNum::RN_RA, 0x18);
+    Address[18] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_SP,
+                                         RegNum::RN_SP, 0x20);
+    uint32_t CreateStackSpace = encodeITypeInstruction(
+        PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 0xffe0);
+#elif defined(__riscv) && (__riscv_xlen == 32)
+    Address[1] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP,
+                                        RegNum::RN_RA, 0x0c);
+    Address[2] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP,
+                                        RegNum::RN_T2, 0x08);
+    Address[3] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP,
+                                        RegNum::RN_T1, 0x4);
+    Address[4] = encodeSTypeInstruction(PatchOpcodes::PO_SW, RegNum::RN_SP,
+                                        RegNum::RN_A0, 0x0);
+    Address[5] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1,
+                                        HiTracingHookAddr);
+    Address[6] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1,
+                                        RegNum::RN_T1, LoTracingHookAddr);
+    Address[7] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0,
+                                        HiFunctionID);
+    Address[8] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_A0,
+                                        RegNum::RN_A0, LoFunctionID);
+    Address[9] = encodeITypeInstruction(PatchOpcodes::PO_JALR, RegNum::RN_T1,
+                                        RegNum::RN_RA, 0x0);
+    Address[10] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP,
+                                         RegNum::RN_A0, 0x0);
+    Address[11] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP,
+                                         RegNum::RN_T1, 0x4);
+    Address[12] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP,
+                                         RegNum::RN_T2, 0x08);
+    Address[13] = encodeITypeInstruction(PatchOpcodes::PO_LW, RegNum::RN_SP,
+                                         RegNum::RN_RA, 0x0c);
+    Address[14] = encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_SP,
+                                         RegNum::RN_SP, 0x10);
+    uint32_t CreateStackSpace = encodeITypeInstruction(
+        PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 0xfff0);
+#endif
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace,
+        std::memory_order_release);
+  } else {
+    uint32_t CreateBranch = encodeJTypeInstruction(
+    // Jump distance is different in both ISAs due to difference in size of
+    // sleds
+#if SANITIZER_RISCV64
+        PatchOpcodes::PO_J, RegNum::RN_R0,
+        0x026); // jump encodes an offset in multiples of 2 bytes. 38*2 = 76
+#elif defined(__riscv) && (__riscv_xlen == 32)
+        PatchOpcodes::PO_J, RegNum::RN_R0,
+        0x01e); // jump encodes an offset in multiples of 2 bytes. 30*2 = 60
+#endif
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateBranch,
+        std::memory_order_release);
+  }
+  return true;
+}
+
+bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+                        const XRaySledEntry &Sled,
+                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, Trampoline);
+}
+
+bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
+                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // TODO: Implement tail exits for riscv
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+  // return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit);
+}
+
+bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
+                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // TODO: Implement for riscv?
+  return false;
+}
+
+bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
+                     const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
+  // TODO: Implement for riscv?
+  return false;
+}
+} // namespace __xray
+
+extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
+  // TODO: this will have to be implemented in the trampoline assembly file
+}
Index: compiler-rt/lib/xray/xray_interface.cpp
===================================================================
--- compiler-rt/lib/xray/xray_interface.cpp
+++ compiler-rt/lib/xray/xray_interface.cpp
@@ -56,6 +56,10 @@
 static const int16_t cSledLength = 8;
 #elif defined(__hexagon__)
 static const int16_t cSledLength = 20;
+#elif SANITIZER_RISCV64
+static const int16_t cSledLength = 76;
+#elif defined(__riscv) && (__riscv_xlen == 32)
+static const int16_t cSledLength = 60;
 #else
 #error "Unsupported CPU Architecture"
 #endif /* CPU architecture */
Index: compiler-rt/lib/xray/CMakeLists.txt
===================================================================
--- compiler-rt/lib/xray/CMakeLists.txt
+++ compiler-rt/lib/xray/CMakeLists.txt
@@ -83,6 +83,16 @@
   xray_trampoline_hexagon.S
   )
 
+set(riscv32_SOURCES
+  xray_riscv.cpp
+  xray_trampoline_riscv32.S
+  )
+
+set(riscv64_SOURCES
+  xray_riscv.cpp
+  xray_trampoline_riscv64.S
+  )
+
 set(XRAY_SOURCE_ARCHS
   arm
   armhf
@@ -141,6 +151,8 @@
   ${mips64_SOURCES}
   ${mips64el_SOURCES}
   ${powerpc64le_SOURCES}
+  ${riscv32_SOURCES}
+  ${riscv64_SOURCES}
   ${XRAY_IMPL_HEADERS}
   )
 list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES)
Index: compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
===================================================================
--- compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -80,7 +80,7 @@
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64})
 else()
 set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
-		powerpc64le ${HEXAGON} ${LOONGARCH64})
+		powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64})
 endif()
 set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
 
Index: clang/lib/Driver/XRayArgs.cpp
===================================================================
--- clang/lib/Driver/XRayArgs.cpp
+++ clang/lib/Driver/XRayArgs.cpp
@@ -53,6 +53,8 @@
     case llvm::Triple::mipsel:
     case llvm::Triple::mips64:
     case llvm::Triple::mips64el:
+    case llvm::Triple::riscv32:
+    case llvm::Triple::riscv64:
       break;
     default:
       D.Diag(diag::err_drv_unsupported_opt_for_target)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to