[clang] [llvm] [clang-tools-extra] [libunwind] [libunwind] Replace process_vm_readv with SYS_rt_sigprocmask (PR #74791)

2023-12-19 Thread Jordan R AW via cfe-commits

https://github.com/ajordanr-google updated 
https://github.com/llvm/llvm-project/pull/74791

>From 1f4df1b82970c95684eed93c8f6bcaa6d6507b88 Mon Sep 17 00:00:00 2001
From: Jordan R Abrahams-Whitehead 
Date: Fri, 8 Dec 2023 00:09:59 +
Subject: [PATCH 01/12] [libunwind] Replace process_vm_readv with pipe

process_vm_readv is generally considered dangerous from a syscall
perspective, and is frequently blanket banned in seccomp filters such as
those in Chromium and ChromiumOS. We can get the same behaviour during
the invalid PC address case with pipes and write/read.

Testing to ensure that process_vm_readv does not appear, I ran the
output of check-unwind on an ARM64 device under strace. Previously,
bad_unwind_info in particular would use process_vm_readv, but with this
commit, it now uses pipe2:

```
strace test/Output/bad_unwind_info.pass.cpp.dir/t.tmp.exe \
  |& grep process_vm_readv
strace test/Output/bad_unwind_info.pass.cpp.dir/t.tmp.exe \
  |& grep pipe2
```
---
 libunwind/src/UnwindCursor.hpp | 50 --
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp
index 647a5a9c9d92d9..5e4e376220daa0 100644
--- a/libunwind/src/UnwindCursor.hpp
+++ b/libunwind/src/UnwindCursor.hpp
@@ -33,6 +33,7 @@
 #if defined(_LIBUNWIND_TARGET_LINUX) &&
\
 (defined(_LIBUNWIND_TARGET_AARCH64) || defined(_LIBUNWIND_TARGET_RISCV) || 
\
  defined(_LIBUNWIND_TARGET_S390X))
+#include 
 #include 
 #include 
 #include 
@@ -2700,19 +2701,18 @@ bool UnwindCursor::setInfoForSigReturn(Registers_arm64 &) {
   // [1] 
https://github.com/torvalds/linux/blob/master/arch/arm64/kernel/vdso/sigreturn.S
   const pint_t pc = static_cast(this->getReg(UNW_REG_IP));
   // The PC might contain an invalid address if the unwind info is bad, so
-  // directly accessing it could cause a segfault. Use process_vm_readv to read
-  // the memory safely instead. process_vm_readv was added in Linux 3.2, and
-  // AArch64 supported was added in Linux 3.7, so the syscall is guaranteed to
-  // be present. Unfortunately, there are Linux AArch64 environments where the
-  // libc wrapper for the syscall might not be present (e.g. Android 5), so 
call
-  // the syscall directly instead.
+  // directly accessing it could cause a segfault. Use pipe/write/read to read
+  // the memory safely instead.
+  int pipefd[2];
+  if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
+return false;
   uint32_t instructions[2];
-  struct iovec local_iov = {, sizeof instructions};
-  struct iovec remote_iov = {reinterpret_cast(pc), sizeof 
instructions};
-  long bytesRead =
-  syscall(SYS_process_vm_readv, getpid(), _iov, 1, _iov, 1, 
0);
+  const auto bufferSize = sizeof instructions;
+  if (write(pipefd[1], reinterpret_cast(pc), bufferSize) != bufferSize)
+return false;
+  const auto bytesRead = read(pipefd[0], instructions, bufferSize);
   // Look for instructions: mov x8, #0x8b; svc #0x0
-  if (bytesRead != sizeof instructions || instructions[0] != 0xd2801168 ||
+  if (bytesRead != bufferSize || instructions[0] != 0xd2801168 ||
   instructions[1] != 0xd401)
 return false;
 
@@ -2762,17 +2762,20 @@ int UnwindCursor::stepThroughSigReturn(Registers_arm64 &) {
 template 
 bool UnwindCursor::setInfoForSigReturn(Registers_riscv &) {
   const pint_t pc = static_cast(getReg(UNW_REG_IP));
+  int pipefd[2];
+  if (pipe2(pipefd, O_CLOEXEC | O_NONBLOCK) == -1)
+return false;
   uint32_t instructions[2];
-  struct iovec local_iov = {, sizeof instructions};
-  struct iovec remote_iov = {reinterpret_cast(pc), sizeof 
instructions};
-  long bytesRead =
-  syscall(SYS_process_vm_readv, getpid(), _iov, 1, _iov, 1, 
0);
+  const auto bufferSize = sizeof instructions;
+  if (write(pipefd[1], reinterpret_cast(pc), bufferSize) != bufferSize)
+return false;
+  const auto bytesRead = read(pipefd[0], instructions, bufferSize);
   // Look for the two instructions used in the sigreturn trampoline
   // __vdso_rt_sigreturn:
   //
   // 0x08b00893 li a7,0x8b
   // 0x0073 ecall
-  if (bytesRead != sizeof instructions || instructions[0] != 0x08b00893 ||
+  if (bytesRead != bufferSize || instructions[0] != 0x08b00893 ||
   instructions[1] != 0x0073)
 return false;
 
@@ -2822,13 +2825,18 @@ bool UnwindCursor::setInfoForSigReturn(Registers_s390x &) {
   // onto the stack.
   const pint_t pc = static_cast(this->getReg(UNW_REG_IP));
   // The PC might contain an invalid address if the unwind info is bad, so
-  // directly accessing it could cause a segfault. Use process_vm_readv to
+  // directly accessing it could cause a segfault. Use pipe/write/read to
   // read the memory safely instead.
   uint16_t inst;
-  struct iovec local_iov = {, sizeof inst};
-  struct iovec remote_iov = {reinterpret_cast(pc), sizeof inst};
-  long bytesRead = process_vm_readv(getpid(), _iov, 1, _iov, 1, 
0);
-  if (bytesRead == 

[clang] [llvm] [clang-tools-extra] [libunwind] [libunwind] Replace process_vm_readv with SYS_rt_sigprocmask (PR #74791)

2023-12-19 Thread Jordan R AW via cfe-commits


@@ -2974,6 +2966,39 @@ bool UnwindCursor::getFunctionName(char *buf, 
size_t bufLen,
  buf, bufLen, offset);
 }
 
+#if defined(_LIBUNWIND_CHECK_LINUX_SIGRETURN)
+template 
+bool UnwindCursor::isReadableAddr(const pint_t addr) const {
+  // This code is heavily based on Abseil's 'address_is_readable.cc',
+  // which is Copyright Abseil Authors (2017), and provided under
+  // the Apache License 2.0.
+
+  // Align to 8-bytes.
+  const auto alignedAddr = addr & ~pint_t{7};
+  const auto sigsetAddr = reinterpret_cast(alignedAddr);
+  // We have to check that addr is nullptr because sigprocmask allows that
+  // as an argument without failure.
+  if (!sigsetAddr)
+return false;
+
+  // We MUST use the raw sigprocmask syscall here, as wrappers may try to
+  // access sigsetAddr which may cause a SIGSEGV. The raw syscall however is
+  // safe. Additionally, we need to pass the kernel_sigset_size, which is
+  // different from libc sizeof(sigset_t). Some archs have sigset_t
+  // defined as unsigned long, so let's use that.
+  const auto approxKernelSigsetSize = sizeof(unsigned long);

ajordanr-google wrote:

How about `(NSIG-1)/8`? 
https://www.gnu.org/software/libc/manual/html_node/Standard-Signals.html. Just 
let libc handle it. It should work cleanly for everything that's not MIPS. I 
don't have a MIPS system to test if this works for it.

https://github.com/llvm/llvm-project/pull/74791
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang-tools-extra] [libunwind] [libunwind] Replace process_vm_readv with SYS_rt_sigprocmask (PR #74791)

2023-12-18 Thread Fangrui Song via cfe-commits

https://github.com/MaskRay edited 
https://github.com/llvm/llvm-project/pull/74791
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [clang-tools-extra] [libunwind] [libunwind] Replace process_vm_readv with SYS_rt_sigprocmask (PR #74791)

2023-12-18 Thread Jordan R AW via cfe-commits

https://github.com/ajordanr-google edited 
https://github.com/llvm/llvm-project/pull/74791
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits