This is an automated email from the ASF dual-hosted git repository.

wwbmmm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brpc.git


The following commit(s) were added to refs/heads/master by this push:
     new a18463f6 Support task tracer (#2851)
a18463f6 is described below

commit a18463f6c380331065efbf3eb954d6d379dd6d2d
Author: Bright Chen <chenguangmin...@foxmail.com>
AuthorDate: Mon Jan 6 14:43:55 2025 +0800

    Support task tracer (#2851)
    
    * Support task tracer
    
    * Opt signal trace
    
    * Rename BRPC_VALIDATE_GFLAG to BUTIL_VALIDATE_GFLAG
    
    * Update picture of document
---
 .github/actions/init-ut-make-config/action.yml     |  13 +
 .github/actions/install-all-dependences/action.yml |   2 +-
 .github/workflows/ci-linux.yml                     |  46 +-
 CMakeLists.txt                                     |  20 +
 README_cn.md                                       |   1 +
 config_brpc.sh                                     |  29 +-
 docs/cn/bthread_tracer.md                          |  79 +++
 docs/cn/getting_started.md                         |   8 +-
 docs/en/getting_started.md                         |   8 +-
 docs/images/bthread_status_model.svg               |   3 +
 docs/images/bthread_stb_model.svg                  |   3 +
 src/brpc/builtin/bthreads_service.cpp              |  18 +-
 src/brpc/input_messenger.cpp                       |   3 +-
 src/brpc/reloadable_flags.cpp                      |  26 +-
 src/brpc/reloadable_flags.h                        |  47 +-
 src/brpc/shared_object.h                           |  46 +-
 src/bthread/bthread.cpp                            |  75 +++
 src/bthread/bthread.h                              |  19 +-
 src/bthread/butex.cpp                              |  33 +-
 src/bthread/execution_queue.cpp                    |   4 +-
 src/bthread/execution_queue_inl.h                  |  12 +-
 src/bthread/task_control.cpp                       |  45 +-
 src/bthread/task_control.h                         |  20 +-
 src/bthread/task_group.cpp                         | 123 +++--
 src/bthread/task_group.h                           |  17 +-
 src/bthread/task_group_inl.h                       |   8 +-
 src/bthread/task_meta.h                            |  57 ++-
 src/bthread/task_tracer.cpp                        | 569 +++++++++++++++++++++
 src/bthread/task_tracer.h                          | 143 ++++++
 src/butil/debug/stack_trace.cc                     |  13 +-
 src/butil/debug/stack_trace.h                      |   1 +
 src/butil/debug/stack_trace_posix.cc               |  22 +
 src/butil/memory/scope_guard.h                     |   6 +-
 src/butil/reloadable_flags.h                       |  75 +++
 src/{brpc => butil}/shared_object.h                |  10 +-
 src/butil/time.h                                   |   2 +-
 test/brpc_builtin_service_unittest.cpp             |  31 +-
 test/bthread_unittest.cpp                          |  54 +-
 38 files changed, 1439 insertions(+), 252 deletions(-)

diff --git a/.github/actions/init-ut-make-config/action.yml 
b/.github/actions/init-ut-make-config/action.yml
new file mode 100644
index 00000000..b13800ed
--- /dev/null
+++ b/.github/actions/init-ut-make-config/action.yml
@@ -0,0 +1,13 @@
+inputs:
+  options:
+    description: extra options for config_brpc.sh
+    required: false
+runs:
+  using: "composite"
+  steps:
+    - run: sudo git clone https://github.com/libunwind/libunwind.git && cd 
libunwind && sudo git checkout tags/v1.8.1 && sudo mkdir -p /libunwind && sudo 
autoreconf -i && sudo CC=clang CXX=clang++ ./configure --prefix=/libunwind && 
sudo make -j ${{env.proc_num}} && sudo make install
+      shell: bash
+    - run: sudo apt-get update && sudo apt-get install -y libgtest-dev cmake 
gdb libstdc++6-9-dbg && cd /usr/src/gtest && sudo cmake . && sudo make -j 
${{env.proc_num}} && sudo mv lib/libgtest* /usr/lib/
+      shell: bash
+    - run: sh config_brpc.sh --headers="/libunwind/include /usr/include" 
--libs="/libunwind/lib /usr/lib /usr/lib64" --nodebugsymbols ${{inputs.options}}
+      shell: bash
diff --git a/.github/actions/install-all-dependences/action.yml 
b/.github/actions/install-all-dependences/action.yml
index f507aa40..8ff502f8 100644
--- a/.github/actions/install-all-dependences/action.yml
+++ b/.github/actions/install-all-dependences/action.yml
@@ -2,7 +2,7 @@ runs:
   using: "composite"
   steps:
     - uses: ./.github/actions/install-essential-dependences
-    - run: sudo apt-get install -y libgoogle-glog-dev automake bison flex 
libboost-all-dev libevent-dev libtool pkg-config  libibverbs1 libibverbs-dev
+    - run: sudo apt-get install -y libgoogle-glog-dev automake bison flex 
libboost-all-dev libevent-dev libtool pkg-config  libibverbs1 libibverbs-dev  
libunwind8-dev
       shell: bash
     - run: wget 
https://archive.apache.org/dist/thrift/0.11.0/thrift-0.11.0.tar.gz && tar -xf 
thrift-0.11.0.tar.gz
       shell: bash
diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index 14961ebb..1854483a 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -31,7 +31,7 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/install-essential-dependences
-    - name: cmake 
+    - name: cmake
       run: |
            export CC=gcc && export CXX=g++
            mkdir build
@@ -47,7 +47,7 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - run: bazel test --verbose_failures -- //... -//example/...
-  
+
   gcc-compile-with-boringssl:
     runs-on: ubuntu-20.04
     steps:
@@ -61,7 +61,7 @@ jobs:
     - uses: ./.github/actions/install-all-dependences
     - uses: ./.github/actions/init-make-config
       with:
-       options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma  
--with-debug-bthread-sche-safety --with-debug-lock
+       options: --cc=gcc --cxx=g++ --with-thrift --with-glog --with-rdma  
--with-debug-bthread-sche-safety --with-debug-lock --with-bthread-tracer
     - name: compile
       run: |
         make -j ${{env.proc_num}}
@@ -76,7 +76,7 @@ jobs:
            export CC=gcc && export CXX=g++
            mkdir build
            cd build
-           cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON 
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON ..
+           cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON 
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON 
-WITH_BTHREAD_TRACER=ON ..
     - name: compile
       run: |
            cd build
@@ -105,7 +105,7 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/install-essential-dependences
-    - name: cmake 
+    - name: cmake
       run: |
            export CC=clang && export CXX=clang++
            mkdir build
@@ -135,7 +135,7 @@ jobs:
     - uses: ./.github/actions/install-all-dependences
     - uses: ./.github/actions/init-make-config
       with:
-        options: --cc=clang --cxx=clang++ --with-thrift --with-glog 
--with-rdma  --with-debug-bthread-sche-safety --with-debug-lock
+        options: --cc=clang --cxx=clang++ --with-thrift --with-glog 
--with-rdma  --with-debug-bthread-sche-safety --with-debug-lock 
--with-bthread-tracer
     - name: compile
       run: |
            make -j ${{env.proc_num}}
@@ -150,7 +150,7 @@ jobs:
            export CC=clang && export CXX=clang++
            mkdir build
            cd build
-           cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON 
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON ..
+           cmake -DWITH_MESALINK=OFF -DWITH_GLOG=ON -DWITH_THRIFT=ON 
-DWITH_RDMA=ON -DWITH_DEBUG_BTHREAD_SCHE_SAFETY=ON -DWITH_DEBUG_LOCK=ON 
-WITH_BTHREAD_TRACER=ON ..
     - name: compile
       run: |
            cd build
@@ -165,21 +165,17 @@ jobs:
   clang-unittest:
     runs-on: ubuntu-20.04
     steps:
-    - uses: actions/checkout@v2
-    - uses: ./.github/actions/install-essential-dependences
-    - name: install gtest
-      run: |
-           sudo apt-get update
-           sudo apt-get install -y cmake libgtest-dev gdb
-           cd /usr/src/gtest && sudo cmake . && sudo make && sudo mv 
lib/libgtest* /usr/lib/
-    - uses: ./.github/actions/init-make-config
-      with:
-        options: --cc=clang --cxx=clang++
-    - name: compile tests
-      run: |
-           cd test
-           make -j ${{env.proc_num}}
-    - name: run tests
-      run: |
-           cd test
-           sh ./run_tests.sh
+      - uses: actions/checkout@v2
+      - uses: ./.github/actions/install-essential-dependences
+      - uses: ./.github/actions/init-ut-make-config
+        with:
+          options: --cc=clang --cxx=clang++ --with-bthread-tracer
+      - name: compile tests
+        run: |
+          cat config.mk
+          cd test
+          make -j ${{env.proc_num}}
+      - name: run tests
+        run: |
+          cd test
+          sh ./run_tests.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f3688766..a6b7716b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,6 +24,7 @@ option(WITH_BORINGSSL "With BoringSSL" OFF)
 option(DEBUG "Print debug logs" OFF)
 option(WITH_DEBUG_SYMBOLS "With debug symbols" ON)
 option(WITH_THRIFT "With thrift framed protocol supported" OFF)
+option(WITH_BTHREAD_TRACER "With bthread tracer supported" OFF)
 option(WITH_SNAPPY "With snappy" OFF)
 option(WITH_RDMA "With RDMA" OFF)
 option(WITH_DEBUG_BTHREAD_SCHE_SAFETY "With debugging bthread sche safety" OFF)
@@ -81,6 +82,20 @@ if(WITH_THRIFT)
     endif()
 endif()
 
+if (WITH_BTHREAD_TRACER)
+    if (NOT (CMAKE_SYSTEM_NAME STREQUAL "Linux")  OR NOT 
(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64"))
+        message(FATAL_ERROR "bthread tracer is only supported on Linux x86_64 
platform")
+    endif()
+    find_path(LIBUNWIND_INCLUDE_PATH NAMES libunwind.h)
+    find_library(LIBUNWIND_LIB NAMES unwind)
+    find_library(LIBUNWIND_X86_64_LIB NAMES unwind-x86_64)
+    if (NOT LIBUNWIND_INCLUDE_PATH OR NOT LIBUNWIND_LIB)
+        message(FATAL_ERROR "Fail to find libunwind, which is needed by 
bthread tracer")
+    endif()
+    add_definitions(-DBRPC_BTHREAD_TRACER)
+    include_directories(${LIBUNWIND_INCLUDE_PATH})
+endif ()
+
 set(WITH_RDMA_VAL "0")
 if(WITH_RDMA)
     set(WITH_RDMA_VAL "1")
@@ -319,6 +334,11 @@ if(WITH_SNAPPY)
     set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lsnappy")
 endif()
 
+if (WITH_BTHREAD_TRACER)
+    set(DYNAMIC_LIB ${DYNAMIC_LIB} ${LIBUNWIND_LIB} ${LIBUNWIND_X86_64_LIB})
+    set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lunwind -lunwind-x86_64")
+endif()
+
 if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
     set(DYNAMIC_LIB ${DYNAMIC_LIB} rt)
     set(BRPC_PRIVATE_LIBS "${BRPC_PRIVATE_LIBS} -lrt")
diff --git a/README_cn.md b/README_cn.md
index 66f194e8..bed6e843 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -38,6 +38,7 @@
     * [bthread or not](docs/cn/bthread_or_not.md)
     * [thread-local](docs/cn/thread_local.md)
     * [Execution Queue](docs/cn/execution_queue.md)
+    * [bthread tracer](docs/cn/bthread_tracer.md)
   * Client
     * [基础功能](docs/cn/client.md)
     * [错误码](docs/cn/error_code.md)
diff --git a/config_brpc.sh b/config_brpc.sh
index fa607292..2563b0e2 100755
--- a/config_brpc.sh
+++ b/config_brpc.sh
@@ -38,11 +38,12 @@ else
     LDD=ldd
 fi
 
-TEMP=`getopt -o v: --long 
headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,with-debug-bthread-sche-safety,with-debug-lock,nodebugsymbols
 -n 'config_brpc' -- "$@"`
+TEMP=`getopt -o v: --long 
headers:,libs:,cc:,cxx:,with-glog,with-thrift,with-rdma,with-mesalink,with-bthread-tracer,with-debug-bthread-sche-safety,with-debug-lock,nodebugsymbols
 -n 'config_brpc' -- "$@"`
 WITH_GLOG=0
 WITH_THRIFT=0
 WITH_RDMA=0
 WITH_MESALINK=0
+WITH_BTHREAD_TRACER=0
 BRPC_DEBUG_BTHREAD_SCHE_SAFETY=0
 DEBUGSYMBOLS=-g
 BRPC_DEBUG_LOCK=0
@@ -69,6 +70,7 @@ while true; do
         --with-thrift) WITH_THRIFT=1; shift 1 ;;
         --with-rdma) WITH_RDMA=1; shift 1 ;;
         --with-mesalink) WITH_MESALINK=1; shift 1 ;;
+        --with-bthread-tracer) WITH_BTHREAD_TRACER=1; shift 1 ;;
         --with-debug-bthread-sche-safety ) BRPC_DEBUG_BTHREAD_SCHE_SAFETY=1; 
shift 1 ;;
         --with-debug-lock ) BRPC_DEBUG_LOCK=1; shift 1 ;;
         --nodebugsymbols ) DEBUGSYMBOLS=; shift 1 ;;
@@ -352,8 +354,27 @@ fi
 
 LEVELDB_HDR=$(find_dir_of_header_or_die leveldb/db.h)
 
-HDRS=$($ECHO 
"$GFLAGS_HDR\n$PROTOBUF_HDR\n$ABSL_HDR\n$LEVELDB_HDR\n$OPENSSL_HDR" | sort | 
uniq)
-LIBS=$($ECHO 
"$GFLAGS_LIB\n$PROTOBUF_LIB\n$ABSL_LIB\n$LEVELDB_LIB\n$OPENSSL_LIB\n$SNAPPY_LIB"
 | sort | uniq)
+CPPFLAGS=
+
+if [ $WITH_BTHREAD_TRACER != 0 ]; then
+    if [ "$SYSTEM" != "Linux" ] || [ "$(uname -m)" != "x86_64" ]; then
+        >&2 $ECHO "bthread tracer is only supported on Linux x86_64 platform"
+        exit 1
+    fi
+    LIBUNWIND_HDR=$(find_dir_of_header_or_die libunwind.h)
+    LIBUNWIND_LIB=$(find_dir_of_lib_or_die unwind)
+
+    CPPFLAGS="${CPPFLAGS} -DBRPC_BTHREAD_TRACER"
+
+    if [ -f "$LIBUNWIND_LIB/libunwind.$SO" ]; then
+        DYNAMIC_LINKINGS="$DYNAMIC_LINKINGS -lunwind -lunwind-x86_64"
+    else
+        STATIC_LINKINGS="$STATIC_LINKINGS -lunwind -lunwind-x86_64"
+    fi
+fi
+
+HDRS=$($ECHO 
"$LIBUNWIND_HDR\n$GFLAGS_HDR\n$PROTOBUF_HDR\n$ABSL_HDR\n$LEVELDB_HDR\n$OPENSSL_HDR"
 | sort | uniq)
+LIBS=$($ECHO 
"$LIBUNWIND_LIB\n$GFLAGS_LIB\n$PROTOBUF_LIB\n$ABSL_LIB\n$LEVELDB_LIB\n$OPENSSL_LIB\n$SNAPPY_LIB"
 | sort | uniq)
 
 absent_in_the_list() {
     TMP=`$ECHO "$1\n$2" | sort | uniq`
@@ -411,7 +432,7 @@ append_to_output "STATIC_LINKINGS=$STATIC_LINKINGS"
 append_to_output "DYNAMIC_LINKINGS=$DYNAMIC_LINKINGS"
 
 # CPP means C PreProcessing, not C PlusPlus
-CPPFLAGS="-DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS 
-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=$BRPC_DEBUG_BTHREAD_SCHE_SAFETY 
-DBRPC_DEBUG_LOCK=$BRPC_DEBUG_LOCK"
+CPPFLAGS="${CPPFLAGS}  -DBRPC_WITH_GLOG=$WITH_GLOG -DGFLAGS_NS=$GFLAGS_NS 
-DBRPC_DEBUG_BTHREAD_SCHE_SAFETY=$BRPC_DEBUG_BTHREAD_SCHE_SAFETY 
-DBRPC_DEBUG_LOCK=$BRPC_DEBUG_LOCK"
 
 # Avoid over-optimizations of TLS variables by GCC>=4.8
 # See: https://github.com/apache/brpc/issues/1693
diff --git a/docs/cn/bthread_tracer.md b/docs/cn/bthread_tracer.md
new file mode 100644
index 00000000..7758bee1
--- /dev/null
+++ b/docs/cn/bthread_tracer.md
@@ -0,0 +1,79 @@
+gdb(ptrace)+ gdb_bthread_stack.py主要的缺点是要慢和阻塞进程,需要一种高效的追踪bthread调用栈的方法。
+
+bRPC框架的协作式用户态协程无法像Golang内建的抢占式协程一样实现高效的STW(Stop the 
World),框架也无法干预用户逻辑的执行,所以要追踪bthread调用栈是比较困难的。
+
+在线追踪bthread调用栈需要解决以下问题:
+1. 追踪挂起bthread的调用栈。 
+2. 追踪运行中bthread的调用栈。
+
+# bthread状态模型
+
+以下是目前的bthread状态模型。
+
+![bthread状态模型](../images/bthread_status_model.svg)
+
+# 设计方案
+
+## 核心思路
+
+为了解决上述两个问题,该方案实现了STB(Stop The 
Bthread),核心思路可以简单总结为,在追踪bthread调用栈的过程中,状态不能流转到当前追踪方法不支持的状态。STB包含了两种追踪模式:上下文(context)追踪模式和信号追踪模式。
+
+### 上下文(context)追踪模式
+上下文追踪模式可以追踪挂起bthread的调用栈。挂起的bthread栈是稳定的,利用TaskMeta.stack中保存的上下文信息(x86_64下关键的寄存器主要是RIP、RSP、RBP),通过一些可以回溯指定上下文调用栈的库来追踪bthread调用栈。但是挂起的bthread随时可能会被唤醒,执行逻辑(包括jump_stack),则bthread栈会一直变化。不稳定的上下文是不能用来追踪调用栈的,需要在jump_stack前拦截bthread的调度,等到调用栈追踪完成后才继续运行bthread。所以,上下文追踪模式支持就绪、挂起这两个状态。
+
+### 信号追踪模式
+
+信号追踪模式可以追踪运行中bthread的调用栈。运行中bthread是不稳定的,不能使用TaskMeta.stack来追踪bthread调用栈。只能另辟蹊径,使用信号中断bthread运行逻辑,在信号处理函数中回溯bthread调用栈。使用信号有两个问题:
+
+1. 异步信号安全问题。
+2. 
信号追踪模式不支持jump_stack。调用栈回溯需要寄存器信息,但jump_stack会操作寄存器,这个过程是不安全的,所以jump_stack不能被信号中断,需要在jump_stack前拦截bthread的调度,等到bthread调用栈追踪完成后才继续挂起bthread。
+
+所以,追踪模式只支持运行状态。
+
+### 小结
+
+jump_stack是bthread挂起或者运行的必经之路,也是STB的拦截点。STB将状态分成三类:
+1. 上下文追踪模式的状态:就绪、挂起。
+2. 支持信号追踪模式的状态:运行。
+3. 
不支持追踪的状态。jump_stack的过程是不允许使用以上两种调用栈追踪方法,需要在jump_stack前拦截bthread的调度,等到调用栈追踪完成后才继续调度bthread。
+
+### 详细流程
+
+以下是引入STB后的bthread状态模型,在原来bthread状态模型的基础上,加入两个状态(拦截点):将运行、挂起中。
+
+![bthread STB状态模型](../images/bthread_stb_model.svg)
+
+经过上述分析,总结出STB的流程:
+
+1. 
TaskTracer(实现STB的一个模块)收到追踪bthread调用栈的请求时,标识正在追踪。追踪完成后,标识追踪完成,并TaskTracer发信号通知可能处于将运行或者挂起中状态的bthread。根据bthread状态,TaskTracer执行不同的逻辑:
+- 创建、就绪但还没分配栈、销毁:直接结束追踪。
+- 挂起、就绪:使用上下文追踪模式追踪bthread的调用栈。
+- 运行:使用信号追踪模式追踪bthread的调用栈。
+- 将运行、挂起中:TaskTracer自旋等到bthread状态流转到下一个状态(挂起或者运行)后继续追踪。
+
+2. TaskTracer追踪时,bthread根据状态也会执行不同的逻辑:
+- 创建、就绪但还没分配栈、就绪:不需要额外处理。
+- 挂起、运行:通知TaskTracer继续追踪。
+- 
将运行、挂起中、销毁:bthread通过条件变量等到TaskTracer追踪完成。TaskTracer追踪完成后会通过条件变量通知bthread继续执行jump_stack。
+
+# 使用方法
+
+1. 下载安装libunwind。
+2. 
给config_brpc.sh增加`--with-bthread-tracer`选项或者给cmake增加`-DWITH_BTHREAD_TRACER=ON`选项。
+3. 
访问服务的内置服务:`http://ip:port/bthreads/<bthread_id>?st=1`或者代码里调用`bthread::stack_trace()`函数。
+4. 
如果希望追踪pthread的调用栈,在对应pthread上调用`bthread::init_for_pthread_stack_trace()`函数获取一个伪bthread_t,然后使用步骤3即可获取pthread调用栈。
+
+下面是追踪bthread调用栈的输出示例:
+```shell
+#0 0x00007fdbbed500b5 __clock_gettime_2
+#1 0x000000000041f2b6 butil::cpuwide_time_ns()
+#2 0x000000000041f289 butil::cpuwide_time_us()
+#3 0x000000000041f1b9 butil::EveryManyUS::operator bool()
+#4 0x0000000000413289 (anonymous namespace)::spin_and_log()
+#5 0x00007fdbbfa58dc0 bthread::TaskGroup::task_runner()
+```
+
+# 相关flag
+
+- 
`enable_fast_unwind`:是否启用快速回溯功能,默认为true。大多数情况下,不需要关闭快速回溯功能。除非你关注的调用栈函数名转换失败,显示为`<unknown>`,则可以尝试关闭快速回溯功能,但这会导致性能下降。以包含30帧的调用栈举例,快速回溯基本上在200us以内就可以完成,而关闭快速回溯则需要4ms左右,性能下降了近20倍。
+- 
`signal_trace_timeout_ms`:信号追踪模式的超时时间,默认为50ms。虽然libunwind文档显示回溯功能是异步信号安全的,但是[gpertools社区发现libunwind在某些情况下会死锁](https://github.com/gperftools/gperftools/issues/775),所以TaskTracer会设置了超时时间,超时后会放弃回溯,打破死锁。
\ No newline at end of file
diff --git a/docs/cn/getting_started.md b/docs/cn/getting_started.md
index 13f84805..76aa6f86 100644
--- a/docs/cn/getting_started.md
+++ b/docs/cn/getting_started.md
@@ -382,7 +382,7 @@ brpc默认**不**链接 
[tcmalloc](http://goog-perftools.sourceforge.net/doc/tcm
 
 ## glog: 3.3+
 
-brpc实现了一个默认的[日志功能](../../src/butil/logging.h)它和glog冲突。要替换成glog,可以给config_brpc.sh增加*--with-glog*选项或者给cmake增加`-DWITH_GLOG=ON`选项。
+brpc实现了一个默认的[日志功能](../../src/butil/logging.h)它和glog冲突。要替换成glog,可以给config_brpc.sh增加`--with-glog`选项或者给cmake增加`-DWITH_GLOG=ON`选项。
 
 ## valgrind: 3.8+
 
@@ -392,6 +392,12 @@ brpc会自动检测valgrind(然后注册bthread的栈)。不支持老版本
 
 无已知问题。
 
+## libunwind: 1.3-1.8.1
+
+bRPC默认**不**链接 
[libunwind](https://github.com/libunwind/libunwind)。用户需要追踪bthread功能则链接libunwind,可以给config_brpc.sh增加`--with-bthread-tracer`选项或者给cmake增加`-DWITH_BTHREAD_TRACER=ON`选项。
+
+建议使用最新版本的libunwind。
+
 # 实例追踪
 
 我们提供了一个程序去帮助你追踪和监控所有brpc实例。 只需要在某处运行 
[trackme_server](https://github.com/apache/brpc/tree/master/tools/trackme_server/)
 然后再带着 
-trackme_server=SERVER参数启动需要被追踪的实例。trackme_server将从实例周期性地收到ping消息然后打印日志。您可以从日志中聚合实例地址,并调用实例的内置服务以获取更多信息。
diff --git a/docs/en/getting_started.md b/docs/en/getting_started.md
index a8197620..a2ae80d4 100644
--- a/docs/en/getting_started.md
+++ b/docs/en/getting_started.md
@@ -377,7 +377,7 @@ When you remove tcmalloc, not only remove the linkage with 
tcmalloc but also the
 
 ## glog: 3.3+
 
-brpc implements a default [logging utility](../../src/butil/logging.h) which 
conflicts with glog. To replace this with glog, add *--with-glog* to 
config_brpc.sh or add `-DWITH_GLOG=ON` to cmake.
+brpc implements a default [logging utility](../../src/butil/logging.h) which 
conflicts with glog. To replace this with glog, add `--with-glog` to 
config_brpc.sh or add `-DWITH_GLOG=ON` to cmake.
 
 ## valgrind: 3.8+
 
@@ -385,6 +385,12 @@ brpc detects valgrind automatically (and registers stacks 
of bthread). Older val
 
 ## thrift: 0.9.3-0.11.0
 
+## libunwind: 1.3-1.8.1
+
+brpc does **not** link [libunwind](https://github.com/libunwind/libunwind) by 
default. Users link libunwind on-demand by adding `--with-glog` to 
config_brpc.sh or adding `-DWITH_GLOG=ON` to cmake.
+
+It is recommended to use the latest possible version of libunwind.
+
 no known issues.
 
 # Track instances
diff --git a/docs/images/bthread_status_model.svg 
b/docs/images/bthread_status_model.svg
new file mode 100644
index 00000000..46379c96
--- /dev/null
+++ b/docs/images/bthread_status_model.svg
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" 
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd";>
+<svg xmlns="http://www.w3.org/2000/svg"; 
xmlns:xlink="http://www.w3.org/1999/xlink"; version="1.1" width="1045px" 
height="669px" viewBox="-0.5 -0.5 1045 669" style="background-color: rgb(255, 
255, 255);"><defs/><rect fill="#ffffff" width="100%" height="100%" x="0" 
y="0"/><g><g data-cell-id="0"><g data-cell-id="1"><g 
data-cell-id="DboKUKvH6xi3xOyzVJv4-1887"><g><rect x="151" y="153" width="150" 
height="60" rx="4.2" ry="4.2" fill="#e85642" stroke="none" 
pointer-events="all"/></g><g><g transfo [...]
\ No newline at end of file
diff --git a/docs/images/bthread_stb_model.svg 
b/docs/images/bthread_stb_model.svg
new file mode 100644
index 00000000..5dcc3683
--- /dev/null
+++ b/docs/images/bthread_stb_model.svg
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" 
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd";>
+<svg xmlns="http://www.w3.org/2000/svg"; 
xmlns:xlink="http://www.w3.org/1999/xlink"; version="1.1" width="1059px" 
height="779px" viewBox="-0.5 -0.5 1059 779" style="background-color: rgb(255, 
255, 255);"><defs/><rect fill="#ffffff" width="100%" height="100%" x="0" 
y="0"/><g><g data-cell-id="g0N0ArMlp0iuiEGkKfUx-0"><g 
data-cell-id="g0N0ArMlp0iuiEGkKfUx-1"><g 
data-cell-id="g0N0ArMlp0iuiEGkKfUx-35"><g><rect x="168" y="113" width="150" 
height="60" rx="4.2" ry="4.2" fill="#e85642" stroke="none" [...]
\ No newline at end of file
diff --git a/src/brpc/builtin/bthreads_service.cpp 
b/src/brpc/builtin/bthreads_service.cpp
index 209f7a99..7676f656 100644
--- a/src/brpc/builtin/bthreads_service.cpp
+++ b/src/brpc/builtin/bthreads_service.cpp
@@ -16,14 +16,14 @@
 // under the License.
 
 
-#include <ostream>
+#include "bthread/bthread.h"
 #include "brpc/closure_guard.h"        // ClosureGuard
 #include "brpc/controller.h"           // Controller
 #include "brpc/builtin/common.h"
 #include "brpc/builtin/bthreads_service.h"
 
 namespace bthread {
-void print_task(std::ostream& os, bthread_t tid);
+extern void print_task(std::ostream& os, bthread_t tid);
 }
 
 
@@ -40,12 +40,24 @@ void 
BthreadsService::default_method(::google::protobuf::RpcController* cntl_bas
     const std::string& constraint = cntl->http_request().unresolved_path();
     
     if (constraint.empty()) {
+#ifdef BRPC_BTHREAD_TRACER
+        os << "Use /bthreads/<bthread_id> or /bthreads/<bthread_id>?st=1 for 
stack trace";
+#else
         os << "Use /bthreads/<bthread_id>";
+#endif // BRPC_BTHREAD_TRACER
     } else {
         char* endptr = NULL;
         bthread_t tid = strtoull(constraint.c_str(), &endptr, 10);
-        if (*endptr == '\0' || *endptr == '/') {
+        if (*endptr == '\0' || *endptr == '/' || *endptr == '?') {
             ::bthread::print_task(os, tid);
+
+#ifdef BRPC_BTHREAD_TRACER
+            const std::string* st = cntl->http_request().uri().GetQuery("st");
+            if (NULL != st && *st == "1") {
+                os << "\nbthread call stack:\n";
+                ::bthread::stack_trace(os, tid);
+            }
+#endif // BRPC_BTHREAD_TRACER
         } else {
             cntl->SetFailed(ENOMETHOD, "path=%s is not a bthread id",
                             constraint.c_str());
diff --git a/src/brpc/input_messenger.cpp b/src/brpc/input_messenger.cpp
index d6e1c356..b28c7044 100644
--- a/src/brpc/input_messenger.cpp
+++ b/src/brpc/input_messenger.cpp
@@ -287,8 +287,7 @@ int InputMessenger::ProcessNewMessage(
         // This unique_ptr prevents msg to be lost before transfering
         // ownership to last_msg
         DestroyingPtr<InputMessageBase> msg(pr.message());
-        QueueMessage(last_msg.release(), &num_bthread_created,
-                            m->_keytable_pool);
+        QueueMessage(last_msg.release(), &num_bthread_created, 
m->_keytable_pool);
         if (_handlers[index].process == NULL) {
             LOG(ERROR) << "process of index=" << index << " is NULL";
             continue;
diff --git a/src/brpc/reloadable_flags.cpp b/src/brpc/reloadable_flags.cpp
index 5f5ca026..958dc057 100644
--- a/src/brpc/reloadable_flags.cpp
+++ b/src/brpc/reloadable_flags.cpp
@@ -15,10 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-
-#include <unistd.h>                    // write, _exit
-#include <gflags/gflags.h>
-#include "butil/macros.h"
 #include "brpc/reloadable_flags.h"
 
 namespace brpc {
@@ -62,37 +58,25 @@ bool NonNegativeInteger(const char*, int64_t val) {
     return val >= 0;
 }
 
-template <typename T>
-static bool RegisterFlagValidatorOrDieImpl(
-    const T* flag, bool (*validate_fn)(const char*, T val)) {
-    if (GFLAGS_NS::RegisterFlagValidator(flag, validate_fn)) {
-        return true;
-    }
-    // Error printed by gflags does not have newline. Add one to it.
-    char newline = '\n';
-    butil::ignore_result(write(2, &newline, 1));
-    _exit(1);
-}
-
 bool RegisterFlagValidatorOrDie(const bool* flag,
                                 bool (*validate_fn)(const char*, bool)) {
-    return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+    return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
 }
 bool RegisterFlagValidatorOrDie(const int32_t* flag,
                                 bool (*validate_fn)(const char*, int32_t)) {
-    return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+    return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
 }
 bool RegisterFlagValidatorOrDie(const int64_t* flag,
                                 bool (*validate_fn)(const char*, int64_t)) {
-    return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+    return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
 }
 bool RegisterFlagValidatorOrDie(const uint64_t* flag,
                                 bool (*validate_fn)(const char*, uint64_t)) {
-    return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+    return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
 }
 bool RegisterFlagValidatorOrDie(const double* flag,
                                 bool (*validate_fn)(const char*, double)) {
-    return RegisterFlagValidatorOrDieImpl(flag, validate_fn);
+    return butil::RegisterFlagValidatorOrDieImpl(flag, validate_fn);
 }
 
 } // namespace brpc
diff --git a/src/brpc/reloadable_flags.h b/src/brpc/reloadable_flags.h
index c451bde8..60b7851f 100644
--- a/src/brpc/reloadable_flags.h
+++ b/src/brpc/reloadable_flags.h
@@ -19,28 +19,9 @@
 #ifndef BRPC_RELOADABLE_FLAGS_H
 #define BRPC_RELOADABLE_FLAGS_H
 
-// To brpc developers: This is a header included by user, don't depend
-// on internal structures, use opaque pointers instead.
-
-#include <stdint.h>
-
-// Register an always-true valiator to a gflag so that the gflag is treated as
-// reloadable by brpc. If a validator exists, abort the program.
-// You should call this macro within global scope. for example:
-//
-//   DEFINE_int32(foo, 0, "blah blah");
-//   BRPC_VALIDATE_GFLAG(foo, brpc::PassValidate);
-//
-// This macro does not work for string-flags because they're thread-unsafe to
-// modify directly. To emphasize this, you have to write the validator by
-// yourself and use GFLAGS_NS::GetCommandLineOption() to acess the flag.
-#define BRPC_VALIDATE_GFLAG(flag, validate_fn)                     \
-    namespace brpc_flags {}                                        \
-    const int register_FLAGS_ ## flag ## _dummy                    \
-                 __attribute__((__unused__)) =                     \
-        ::brpc::RegisterFlagValidatorOrDie(                        \
-            &FLAGS_##flag, (validate_fn))
+#include "butil/reloadable_flags.h"
 
+#define BRPC_VALIDATE_GFLAG(flag, validate_fn) BUTIL_VALIDATE_GFLAG(flag, 
validate_fn)
 
 namespace brpc {
 
@@ -59,18 +40,18 @@ extern bool PositiveInteger(const char*, uint64_t);
 extern bool NonNegativeInteger(const char*, int32_t);
 extern bool NonNegativeInteger(const char*, int64_t);
 
-extern bool RegisterFlagValidatorOrDie(const bool* flag,
-                                  bool (*validate_fn)(const char*, bool));
-extern bool RegisterFlagValidatorOrDie(const int32_t* flag,
-                                  bool (*validate_fn)(const char*, int32_t));
-extern bool RegisterFlagValidatorOrDie(const uint32_t* flag,
-                                  bool (*validate_fn)(const char*, uint32_t));
-extern bool RegisterFlagValidatorOrDie(const int64_t* flag,
-                                  bool (*validate_fn)(const char*, int64_t));
-extern bool RegisterFlagValidatorOrDie(const uint64_t* flag,
-                                  bool (*validate_fn)(const char*, uint64_t));
-extern bool RegisterFlagValidatorOrDie(const double* flag,
-                                  bool (*validate_fn)(const char*, double));
+extern bool RegisterFlagValidatorOrDie(
+    const bool* flag, bool (*validate_fn)(const char*, bool));
+extern bool RegisterFlagValidatorOrDie(
+    const int32_t* flag, bool (*validate_fn)(const char*, int32_t));
+extern bool RegisterFlagValidatorOrDie(
+    const uint32_t* flag, bool (*validate_fn)(const char*, uint32_t));
+extern bool RegisterFlagValidatorOrDie(
+    const int64_t* flag, bool (*validate_fn)(const char*, int64_t));
+extern bool RegisterFlagValidatorOrDie(
+    const uint64_t* flag, bool (*validate_fn)(const char*, uint64_t));
+extern bool RegisterFlagValidatorOrDie(
+    const double* flag, bool (*validate_fn)(const char*, double));
 } // namespace brpc
 
 
diff --git a/src/brpc/shared_object.h b/src/brpc/shared_object.h
index d8ff9aae..296bdea3 100644
--- a/src/brpc/shared_object.h
+++ b/src/brpc/shared_object.h
@@ -19,53 +19,11 @@
 #ifndef BRPC_SHARED_OBJECT_H
 #define BRPC_SHARED_OBJECT_H
 
-#include "butil/intrusive_ptr.hpp"                   // butil::intrusive_ptr
-#include "butil/atomicops.h"
-
+#include "butil/shared_object.h"
 
 namespace brpc {
 
-// Inherit this class to be intrusively shared. Comparing to shared_ptr,
-// intrusive_ptr saves one malloc (for shared_count) and gets better cache
-// locality when the ref/deref are frequent, in the cost of lack of weak_ptr
-// and worse interface.
-class SharedObject {
-friend void intrusive_ptr_add_ref(SharedObject*);
-friend void intrusive_ptr_release(SharedObject*);
-
-public:
-    SharedObject() : _nref(0) { }
-    int ref_count() const { return _nref.load(butil::memory_order_relaxed); }
-    
-    // Add ref and returns the ref_count seen before added.
-    // The effect is basically same as butil::intrusive_ptr<T>(obj).detach()
-    // except that the latter one does not return the seen ref_count which is
-    // useful in some scenarios.
-    int AddRefManually()
-    { return _nref.fetch_add(1, butil::memory_order_relaxed); }
-
-    // Remove one ref, if the ref_count hit zero, delete this object.
-    // Same as butil::intrusive_ptr<T>(obj, false).reset(NULL)
-    void RemoveRefManually() {
-        if (_nref.fetch_sub(1, butil::memory_order_release) == 1) {
-            butil::atomic_thread_fence(butil::memory_order_acquire);
-            delete this;
-        }
-    }
-
-protected:
-    virtual ~SharedObject() { }
-private:
-    butil::atomic<int> _nref;
-};
-
-inline void intrusive_ptr_add_ref(SharedObject* obj) {
-    obj->AddRefManually();
-}
-
-inline void intrusive_ptr_release(SharedObject* obj) {
-    obj->RemoveRefManually();
-}
+using butil::SharedObject;
 
 } // namespace brpc
 
diff --git a/src/bthread/bthread.cpp b/src/bthread/bthread.cpp
index b834b42a..3de72657 100644
--- a/src/bthread/bthread.cpp
+++ b/src/bthread/bthread.cpp
@@ -19,9 +19,11 @@
 
 // Date: Tue Jul 10 17:40:58 CST 2012
 
+#include <sys/syscall.h>
 #include <gflags/gflags.h>
 #include "butil/macros.h"                       // BAIDU_CASSERT
 #include "butil/logging.h"
+#include "butil/thread_local.h"
 #include "bthread/task_group.h"                // TaskGroup
 #include "bthread/task_control.h"              // TaskControl
 #include "bthread/timer_thread.h"
@@ -115,6 +117,79 @@ inline TaskControl* get_or_new_task_control() {
     return c;
 }
 
+#ifdef BRPC_BTHREAD_TRACER
+BAIDU_THREAD_LOCAL TaskMeta* pthread_fake_meta = NULL;
+
+bthread_t init_for_pthread_stack_trace() {
+    if (NULL != pthread_fake_meta) {
+        return pthread_fake_meta->tid;
+    }
+
+    TaskControl* c = get_task_control();
+    if (NULL == c) {
+        LOG(ERROR) << "TaskControl has not been created, "
+                      "please use bthread_start_xxx before call this function";
+        return INVALID_BTHREAD;
+    }
+
+    butil::ResourceId<TaskMeta> slot;
+    pthread_fake_meta = butil::get_resource(&slot);
+    if (BAIDU_UNLIKELY(NULL == pthread_fake_meta)) {
+        LOG(ERROR) << "Fail to get TaskMeta";
+        return INVALID_BTHREAD;
+    }
+
+    pthread_fake_meta->attr = BTHREAD_ATTR_PTHREAD;
+    pthread_fake_meta->tid = make_tid(*pthread_fake_meta->version_butex, slot);
+    // Make TaskTracer use signal trace mode for pthread.
+    c->_task_tracer.set_running_status(syscall(SYS_gettid), pthread_fake_meta);
+
+    // Release the TaskMeta at exit of pthread.
+    butil::thread_atexit([]() {
+        // Similar to TaskGroup::task_runner.
+        bool tracing;
+        {
+            BAIDU_SCOPED_LOCK(pthread_fake_meta->version_lock);
+            tracing = TaskTracer::set_end_status_unsafe(pthread_fake_meta);
+            // If resulting version is 0,
+            // change it to 1 to make bthread_t never be 0.
+            if (0 == ++*pthread_fake_meta->version_butex) {
+                ++*pthread_fake_meta->version_butex;
+            }
+        }
+
+        if (tracing) {
+            // Wait for tracing completion.
+            get_task_control()->_task_tracer.WaitForTracing(pthread_fake_meta);
+        }
+        get_task_control()->_task_tracer.set_status(
+            TASK_STATUS_UNKNOWN, pthread_fake_meta);
+
+        butil::return_resource(get_slot(pthread_fake_meta->tid));
+        pthread_fake_meta = NULL;
+    });
+
+    return pthread_fake_meta->tid;
+}
+
+void stack_trace(std::ostream& os, bthread_t tid) {
+    TaskControl* c = get_task_control();
+    if (NULL == c) {
+        os << "TaskControl has not been created";
+        return;
+    }
+    c->stack_trace(os, tid);
+}
+
+std::string stack_trace(bthread_t tid) {
+    TaskControl* c = get_task_control();
+    if (NULL == c) {
+        return "TaskControl has not been created";
+    }
+    return c->stack_trace(tid);
+}
+#endif // BRPC_BTHREAD_TRACER
+
 static int add_workers_for_each_tag(int num) {
     int added = 0;
     auto c = get_task_control();
diff --git a/src/bthread/bthread.h b/src/bthread/bthread.h
index 72b32ad9..7e42c96c 100644
--- a/src/bthread/bthread.h
+++ b/src/bthread/bthread.h
@@ -28,12 +28,25 @@
 #include "bthread/errno.h"
 
 #if defined(__cplusplus)
-#  include <iostream>
-#  include "bthread/mutex.h"        // use bthread_mutex_t in the RAII way
-#endif
+#include <iostream>
+#include "bthread/mutex.h"        // use bthread_mutex_t in the RAII way
+#endif // __cplusplus
 
 #include "bthread/id.h"
 
+#if defined(__cplusplus) && defined(BRPC_BTHREAD_TRACER)
+namespace bthread {
+// Assign a TaskMeta to the pthread and set the state to Running,
+// so that `stack_trace()' can trace the call stack of the pthread.
+bthread_t init_for_pthread_stack_trace();
+
+// Trace the call stack of the bthread, or pthread which has been
+// initialized by `init_for_pthread_stack_trace()'.
+void stack_trace(std::ostream& os, bthread_t tid);
+std::string stack_trace(bthread_t tid);
+} // namespace bthread
+#endif // __cplusplus && BRPC_BTHREAD_TRACER
+
 __BEGIN_DECLS
 
 // Create bthread `fn(args)' with attributes `attr' and put the identifier into
diff --git a/src/bthread/butex.cpp b/src/bthread/butex.cpp
index 25cb9539..c54198c6 100644
--- a/src/bthread/butex.cpp
+++ b/src/bthread/butex.cpp
@@ -283,11 +283,11 @@ inline TaskGroup* get_task_group(TaskControl* c, 
bthread_tag_t tag) {
     return is_same_tag(tag) ? tls_task_group : c->choose_one_group(tag);
 }
 
-inline void run_in_local_task_group(TaskGroup* g, bthread_t tid, bool 
nosignal) {
+inline void run_in_local_task_group(TaskGroup* g, TaskMeta* next_meta, bool 
nosignal) {
     if (!nosignal) {
-        TaskGroup::exchange(&g, tid);
+        TaskGroup::exchange(&g, next_meta);
     } else {
-        g->ready_to_run(tid, nosignal);
+        g->ready_to_run(next_meta, nosignal);
     }
 }
 
@@ -311,9 +311,9 @@ int butex_wake(void* arg, bool nosignal) {
     unsleep_if_necessary(bbw, get_global_timer_thread());
     TaskGroup* g = get_task_group(bbw->control, bbw->tag);
     if (g == tls_task_group) {
-        run_in_local_task_group(g, bbw->tid, nosignal);
+        run_in_local_task_group(g, bbw->task_meta, nosignal);
     } else {
-        g->ready_to_run_remote(bbw->tid, check_nosignal(nosignal, g->tag()));
+        g->ready_to_run_remote(bbw->task_meta, check_nosignal(nosignal, 
g->tag()));
     }
     return 1;
 }
@@ -363,7 +363,7 @@ int butex_wake_n(void* arg, size_t n, bool nosignal) {
         w->RemoveFromList();
         unsleep_if_necessary(w, get_global_timer_thread());
         auto g = get_task_group(w->control, w->tag);
-        g->ready_to_run_general(w->tid, true);
+        g->ready_to_run_general(w->task_meta, true);
         nwakeups[g->tag()] = g;
         ++nwakeup;
     }
@@ -375,9 +375,9 @@ int butex_wake_n(void* arg, size_t n, bool nosignal) {
     }
     auto g = get_task_group(next->control, next->tag);
     if (g == tls_task_group) {
-        run_in_local_task_group(g, next->tid, nosignal);
+        run_in_local_task_group(g, next->task_meta, nosignal);
     } else {
-        g->ready_to_run_remote(next->tid, check_nosignal(nosignal, g->tag()));
+        g->ready_to_run_remote(next->task_meta, check_nosignal(nosignal, 
g->tag()));
     }
     return nwakeup;
 }
@@ -436,7 +436,7 @@ int butex_wake_except(void* arg, bthread_t 
excluded_bthread) {
         w->RemoveFromList();
         unsleep_if_necessary(w, get_global_timer_thread());
         auto g = get_task_group(w->control, w->tag);
-        g->ready_to_run_general(w->tid, true);
+        g->ready_to_run_general(w->task_meta, true);
         nwakeups[g->tag()] = g;
         ++nwakeup;
     } while (!bthread_waiters.empty());
@@ -480,9 +480,9 @@ int butex_requeue(void* arg, void* arg2) {
     unsleep_if_necessary(bbw, get_global_timer_thread());
     auto g = is_same_tag(bbw->tag) ? tls_task_group : NULL;
     if (g) {
-        TaskGroup::exchange(&g, front->tid);
+        TaskGroup::exchange(&g, bbw->task_meta);
     } else {
-        
bbw->control->choose_one_group(bbw->tag)->ready_to_run_remote(front->tid);
+        
bbw->control->choose_one_group(bbw->tag)->ready_to_run_remote(bbw->task_meta);
     }
     return 1;
 }
@@ -520,7 +520,7 @@ inline bool erase_from_butex(ButexWaiter* bw, bool wakeup, 
WaiterState state) {
     if (erased && wakeup) {
         if (bw->tid) {
             ButexBthreadWaiter* bbw = static_cast<ButexBthreadWaiter*>(bw);
-            get_task_group(bbw->control, 
bbw->tag)->ready_to_run_general(bw->tid);
+            get_task_group(bbw->control, 
bbw->tag)->ready_to_run_general(bbw->task_meta);
         } else {
             ButexPthreadWaiter* pw = static_cast<ButexPthreadWaiter*>(bw);
             wakeup_pthread(pw);
@@ -535,7 +535,7 @@ struct WaitForButexArgs {
     bool prepend;
 };
 
-static void wait_for_butex(void* arg) {
+void wait_for_butex(void* arg) {
     auto args = static_cast<WaitForButexArgs*>(arg);
     ButexBthreadWaiter* const bw = args->bw;
     Butex* const b = bw->initial_butex;
@@ -565,6 +565,9 @@ static void wait_for_butex(void* arg) {
                 b->waiters.Append(bw);
             }
             bw->container.store(b, butil::memory_order_relaxed);
+#ifdef BRPC_BTHREAD_TRACER
+            bw->control->_task_tracer.set_status(TASK_STATUS_SUSPENDED, 
bw->task_meta);
+#endif // BRPC_BTHREAD_TRACER
             if (bw->abstime != NULL) {
                 bw->sleep_id = get_global_timer_thread()->schedule(
                     erase_from_butex_and_wakeup, bw, *bw->abstime);
@@ -582,7 +585,7 @@ static void wait_for_butex(void* arg) {
     // the two functions. The on-stack ButexBthreadWaiter is safe to use and
     // bw->waiter_state will not change again.
     // unsleep_if_necessary(bw, get_global_timer_thread());
-    tls_task_group->ready_to_run(bw->tid);
+    tls_task_group->ready_to_run(bw->task_meta);
     // FIXME: jump back to original thread is buggy.
     
     // // Value unmatched or waiter is already woken up by TimerThread, jump
@@ -697,7 +700,7 @@ int butex_wait(void* arg, int expected_value, const 
timespec* abstime, bool prep
     // release fence matches with acquire fence in 
interrupt_and_consume_waiters
     // in task_group.cpp to guarantee visibility of `interrupted'.
     bbw.task_meta->current_waiter.store(&bbw, butil::memory_order_release);
-    WaitForButexArgs args{ &bbw, prepend};
+    WaitForButexArgs args{ &bbw, prepend };
     g->set_remained(wait_for_butex, &args);
     TaskGroup::sched(&g);
 
diff --git a/src/bthread/execution_queue.cpp b/src/bthread/execution_queue.cpp
index 557669ee..bb01882c 100644
--- a/src/bthread/execution_queue.cpp
+++ b/src/bthread/execution_queue.cpp
@@ -68,7 +68,7 @@ inline ExecutionQueueVars* get_execq_vars() {
 
 void ExecutionQueueBase::start_execute(TaskNode* node) {
     node->next = TaskNode::UNCONNECTED;
-    node->status = UNEXECUTED;
+    node->status = TaskNode::UNEXECUTED;
     node->iterated = false;
     if (node->high_priority) {
         // Add _high_priority_tasks before pushing this task into queue to
@@ -304,7 +304,7 @@ int ExecutionQueueBase::_execute(TaskNode* head, bool 
high_priority, int* nitera
     if (head != NULL && head->stop_task) {
         CHECK(head->next == NULL);
         head->iterated = true;
-        head->status = EXECUTED;
+        head->status = TaskNode::EXECUTED;
         TaskIteratorBase iter(NULL, this, true, false);
         _execute_func(_meta, _type_specific_function, iter);
         if (niterated) {
diff --git a/src/bthread/execution_queue_inl.h 
b/src/bthread/execution_queue_inl.h
index 64725051..f5998a26 100644
--- a/src/bthread/execution_queue_inl.h
+++ b/src/bthread/execution_queue_inl.h
@@ -38,17 +38,17 @@ struct ExecutionQueueId {
     uint64_t value;
 };
 
-enum TaskStatus {
-    UNEXECUTED = 0,
-    EXECUTING = 1,
-    EXECUTED = 2
-};
-
 struct TaskNode;
 class ExecutionQueueBase;
 typedef void (*clear_task_mem)(TaskNode*);
 
 struct BAIDU_CACHELINE_ALIGNMENT TaskNode {
+    enum TaskStatus {
+        UNEXECUTED = 0,
+        EXECUTING = 1,
+        EXECUTED = 2
+    };
+
     TaskNode()
         : version(0)
         , status(UNEXECUTED)
diff --git a/src/bthread/task_control.cpp b/src/bthread/task_control.cpp
index d0549ea9..55ed1f2e 100644
--- a/src/bthread/task_control.cpp
+++ b/src/bthread/task_control.cpp
@@ -19,6 +19,7 @@
 
 // Date: Tue Jul 10 17:40:58 CST 2012
 
+#include <sys/syscall.h>                   // SYS_gettid
 #include "butil/scoped_lock.h"             // BAIDU_SCOPED_LOCK
 #include "butil/errno.h"                   // berror
 #include "butil/logging.h"
@@ -88,14 +89,19 @@ void* TaskControl::worker_thread(void* arg) {
         LOG(ERROR) << "Fail to create TaskGroup in pthread=" << pthread_self();
         return NULL;
     }
+
+    g->_tid = syscall(SYS_gettid);
+
     std::string worker_thread_name = butil::string_printf(
-        "brpc_wkr:%d-%d", g->tag(), c->_next_worker_id.fetch_add(1, 
butil::memory_order_relaxed));
+        "brpc_wkr:%d-%d", g->tag(),
+        c->_next_worker_id.fetch_add(1, butil::memory_order_relaxed));
     butil::PlatformThread::SetName(worker_thread_name.c_str());
-    BT_VLOG << "Created worker=" << pthread_self() << " bthread=" << 
g->main_tid()
-            << " tag=" << g->tag();
+    BT_VLOG << "Created worker=" << pthread_self() << " tid=" << g->_tid
+            << " bthread=" << g->main_tid() << " tag=" << g->tag();
     tls_task_group = g;
     c->_nworkers << 1;
     c->tag_nworkers(g->tag()) << 1;
+
     g->run_main_task();
 
     stat = g->main_stat();
@@ -208,6 +214,13 @@ int TaskControl::init(int concurrency) {
         LOG(ERROR) << "Fail to get global_timer_thread";
         return -1;
     }
+
+#ifdef BRPC_BTHREAD_TRACER
+    if (!_task_tracer.Init()) {
+        LOG(ERROR) << "Fail to init TaskTracer";
+        return -1;
+    }
+#endif // BRPC_BTHREAD_TRACER
     
     _workers.resize(_concurrency);   
     for (int i = 0; i < _concurrency; ++i) {
@@ -281,6 +294,16 @@ TaskGroup* TaskControl::choose_one_group(bthread_tag_t 
tag) {
     return NULL;
 }
 
+#ifdef BRPC_BTHREAD_TRACER
+void TaskControl::stack_trace(std::ostream& os, bthread_t tid) {
+    _task_tracer.Trace(os, tid);
+}
+
+std::string TaskControl::stack_trace(bthread_t tid) {
+    return _task_tracer.Trace(tid);
+}
+#endif // BRPC_BTHREAD_TRACER
+
 extern int stop_and_join_epoll_threads();
 
 void TaskControl::stop_and_join() {
@@ -301,13 +324,19 @@ void TaskControl::stop_and_join() {
             pl.stop();
         }
     }
-    // Interrupt blocking operations.
-    for (size_t i = 0; i < _workers.size(); ++i) {
-        interrupt_pthread(_workers[i]);
+
+    for (auto worker: _workers) {
+        // Interrupt blocking operations.
+#ifdef BRPC_BTHREAD_TRACER
+        // TaskTracer has registered signal handler for SIGURG.
+        pthread_kill(worker, SIGURG);
+#else
+        interrupt_pthread(worker);
+#endif // BRPC_BTHREAD_TRACER
     }
     // Join workers
-    for (size_t i = 0; i < _workers.size(); ++i) {
-        pthread_join(_workers[i], NULL);
+    for (auto worker : _workers) {
+        pthread_join(worker, NULL);
     }
 }
 
diff --git a/src/bthread/task_control.h b/src/bthread/task_control.h
index 12598079..95820a86 100644
--- a/src/bthread/task_control.h
+++ b/src/bthread/task_control.h
@@ -25,14 +25,15 @@
 #ifndef NDEBUG
 #include <iostream>                             // std::ostream
 #endif
+#include <signal.h>
 #include <stddef.h>                             // size_t
 #include <vector>
 #include <array>
 #include <memory>
 #include "butil/atomicops.h"                     // butil::atomic
 #include "bvar/bvar.h"                          // bvar::PassiveStatus
+#include "bthread/task_tracer.h"
 #include "bthread/task_meta.h"                  // TaskMeta
-#include "butil/resource_pool.h"                 // ResourcePool
 #include "bthread/work_stealing_queue.h"        // WorkStealingQueue
 #include "bthread/parking_lot.h"
 
@@ -43,7 +44,11 @@ class TaskGroup;
 
 // Control all task groups
 class TaskControl {
-    friend class TaskGroup;
+friend class TaskGroup;
+friend void wait_for_butex(void*);
+#ifdef BRPC_BTHREAD_TRACER
+friend bthread_t init_for_pthread_stack_trace();
+#endif // BRPC_BTHREAD_TRACER
 
 public:
     TaskControl();
@@ -86,6 +91,12 @@ public:
     // If this method is called after init(), it never returns NULL.
     TaskGroup* choose_one_group(bthread_tag_t tag);
 
+#ifdef BRPC_BTHREAD_TRACER
+    // A stacktrace of bthread can be helpful in debugging.
+    void stack_trace(std::ostream& os, bthread_t tid);
+    std::string stack_trace(bthread_t tid);
+#endif // BRPC_BTHREAD_TRACER
+
 private:
     typedef std::array<TaskGroup*, BTHREAD_MAX_CONCURRENCY> TaggedGroups;
     static const int PARKING_LOT_NUM = 4;
@@ -144,6 +155,11 @@ private:
     std::vector<bvar::Adder<int64_t>*> _tagged_nbthreads;
 
     std::vector<TaggedParkingLot> _pl;
+
+#ifdef BRPC_BTHREAD_TRACER
+    TaskTracer _task_tracer;
+#endif // BRPC_BTHREAD_TRACER
+
 };
 
 inline bvar::LatencyRecorder& TaskControl::exposed_pending_time() {
diff --git a/src/bthread/task_group.cpp b/src/bthread/task_group.cpp
index f922050e..170b2730 100644
--- a/src/bthread/task_group.cpp
+++ b/src/bthread/task_group.cpp
@@ -35,7 +35,6 @@
 #include "bthread/task_control.h"
 #include "bthread/task_group.h"
 #include "bthread/timer_thread.h"
-#include "bthread/errno.h"
 
 namespace bthread {
 
@@ -45,8 +44,7 @@ static const bthread_attr_t BTHREAD_ATTR_TASKGROUP = {
 static bool pass_bool(const char*, bool) { return true; }
 
 DEFINE_bool(show_bthread_creation_in_vars, false, "When this flags is on, The 
time "
-            "from bthread creation to first run will be recorded and shown "
-            "in /vars");
+            "from bthread creation to first run will be recorded and shown in 
/vars");
 const bool ALLOW_UNUSED dummy_show_bthread_creation_in_vars =
     ::GFLAGS_NS::RegisterFlagValidator(&FLAGS_show_bthread_creation_in_vars,
                                     pass_bool);
@@ -188,8 +186,7 @@ void TaskGroup::run_main_task() {
 }
 
 TaskGroup::TaskGroup(TaskControl* c)
-    :
-    _cur_meta(NULL)
+    : _cur_meta(NULL)
     , _control(c)
     , _num_nosignal(0)
     , _nsignaled(0)
@@ -207,7 +204,7 @@ TaskGroup::TaskGroup(TaskControl* c)
     , _sched_recursive_guard(0)
 #endif
     , _tag(BTHREAD_TAG_DEFAULT)
-{
+    , _tid(-1) {
     _steal_seed = butil::fast_rand();
     _steal_offset = OFFSET_TABLE[_steal_seed % ARRAY_SIZE(OFFSET_TABLE)];
     CHECK(c);
@@ -268,6 +265,9 @@ void TaskGroup::task_runner(intptr_t skip_remained) {
     // NOTE: tls_task_group is volatile since tasks are moved around
     //       different groups.
     TaskGroup* g = tls_task_group;
+#ifdef BRPC_BTHREAD_TRACER
+    TaskTracer::set_running_status(g->tid(), g->_cur_meta);
+#endif // BRPC_BTHREAD_TRACER
 
     if (!skip_remained) {
         while (g->_last_context_remained) {
@@ -343,14 +343,28 @@ void TaskGroup::task_runner(intptr_t skip_remained) {
         // is 0, change it to 1 to make bthread_t never be 0. Any access
         // or join to the bthread after changing version will be rejected.
         // The spinlock is for visibility of TaskGroup::get_attr.
+#ifdef BRPC_BTHREAD_TRACER
+        bool tracing = false;
+#endif // BRPC_BTHREAD_TRACER
         {
             BAIDU_SCOPED_LOCK(m->version_lock);
+#ifdef BRPC_BTHREAD_TRACER
+            tracing = TaskTracer::set_end_status_unsafe(m);
+#endif // BRPC_BTHREAD_TRACER
             if (0 == ++*m->version_butex) {
                 ++*m->version_butex;
             }
         }
         butex_wake_except(m->version_butex, 0);
 
+#ifdef BRPC_BTHREAD_TRACER
+        if (tracing) {
+            // Wait for tracing completion.
+            g->_control->_task_tracer.WaitForTracing(m);
+        }
+        g->_control->_task_tracer.set_status(TASK_STATUS_UNKNOWN, m);
+#endif // BRPC_BTHREAD_TRACER
+
         g->_control->_nbthreads << -1;
         g->_control->tag_nbthreads(g->tag()) << -1;
         g->set_remained(TaskGroup::_release_last_context, m);
@@ -385,7 +399,7 @@ int TaskGroup::start_foreground(TaskGroup** pg,
     const bthread_attr_t using_attr = (attr ? *attr : BTHREAD_ATTR_NORMAL);
     butil::ResourceId<TaskMeta> slot;
     TaskMeta* m = butil::get_resource(&slot);
-    if (__builtin_expect(!m, 0)) {
+    if (BAIDU_UNLIKELY(NULL == m)) {
         return ENOMEM;
     }
     CHECK(m->current_waiter.load(butil::memory_order_relaxed) == NULL);
@@ -412,9 +426,12 @@ int TaskGroup::start_foreground(TaskGroup** pg,
     TaskGroup* g = *pg;
     g->_control->_nbthreads << 1;
     g->_control->tag_nbthreads(g->tag()) << 1;
+#ifdef BRPC_BTHREAD_TRACER
+    g->_control->_task_tracer.set_status(TASK_STATUS_CREATED, m);
+#endif // BRPC_BTHREAD_TRACER
     if (g->is_current_pthread_task()) {
         // never create foreground task in pthread.
-        g->ready_to_run(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL));
+        g->ready_to_run(m, using_attr.flags & BTHREAD_NOSIGNAL);
     } else {
         // NOSIGNAL affects current task, not the new task.
         RemainedFn fn = NULL;
@@ -423,10 +440,7 @@ int TaskGroup::start_foreground(TaskGroup** pg,
         } else {
             fn = ready_to_run_in_worker;
         }
-        ReadyToRunArgs args = {
-            g->current_tid(),
-            (bool)(using_attr.flags & BTHREAD_NOSIGNAL)
-        };
+        ReadyToRunArgs args = { g->_cur_meta, (bool)(using_attr.flags & 
BTHREAD_NOSIGNAL) };
         g->set_remained(fn, &args);
         TaskGroup::sched_to(pg, m->tid);
     }
@@ -445,7 +459,7 @@ int TaskGroup::start_background(bthread_t* __restrict th,
     const bthread_attr_t using_attr = (attr ? *attr : BTHREAD_ATTR_NORMAL);
     butil::ResourceId<TaskMeta> slot;
     TaskMeta* m = butil::get_resource(&slot);
-    if (__builtin_expect(!m, 0)) {
+    if (BAIDU_UNLIKELY(NULL == m)) {
         return ENOMEM;
     }
     CHECK(m->current_waiter.load(butil::memory_order_relaxed) == NULL);
@@ -470,10 +484,13 @@ int TaskGroup::start_background(bthread_t* __restrict th,
     }
     _control->_nbthreads << 1;
     _control->tag_nbthreads(tag()) << 1;
+#ifdef BRPC_BTHREAD_TRACER
+    _control->_task_tracer.set_status(TASK_STATUS_CREATED, m);
+#endif // BRPC_BTHREAD_TRACER
     if (REMOTE) {
-        ready_to_run_remote(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL));
+        ready_to_run_remote(m, (using_attr.flags & BTHREAD_NOSIGNAL));
     } else {
-        ready_to_run(m->tid, (using_attr.flags & BTHREAD_NOSIGNAL));
+        ready_to_run(m, (using_attr.flags & BTHREAD_NOSIGNAL));
     }
     return 0;
 }
@@ -495,7 +512,7 @@ int TaskGroup::join(bthread_t tid, void** return_value) {
         return EINVAL;
     }
     TaskMeta* m = address_meta(tid);
-    if (__builtin_expect(!m, 0)) {
+    if (BAIDU_UNLIKELY(NULL == m)) {
         // The bthread is not created yet, this join is definitely wrong.
         return EINVAL;
     }
@@ -642,9 +659,16 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* 
next_meta) {
         if (cur_meta->stack != NULL) {
             if (next_meta->stack != cur_meta->stack) {
                 CheckBthreadScheSafety();
+#ifdef BRPC_BTHREAD_TRACER
+                g->_control->_task_tracer.set_status(TASK_STATUS_JUMPING, 
cur_meta);
+                g->_control->_task_tracer.set_status(TASK_STATUS_JUMPING, 
next_meta);
+#endif // BRPC_BTHREAD_TRACER
                 jump_stack(cur_meta->stack, next_meta->stack);
                 // probably went to another group, need to assign g again.
                 g = BAIDU_GET_VOLATILE_THREAD_LOCAL(tls_task_group);
+#ifdef BRPC_BTHREAD_TRACER
+                TaskTracer::set_running_status(g->tid(), g->_cur_meta);
+#endif // BRPC_BTHREAD_TRACER
             }
 #ifndef NDEBUG
             else {
@@ -653,8 +677,13 @@ void TaskGroup::sched_to(TaskGroup** pg, TaskMeta* 
next_meta) {
                 CHECK(cur_meta->stack == g->_main_stack);
             }
 #endif
+        } /* else because of ending_sched(including 
pthread_task->pthread_task). */
+#ifdef BRPC_BTHREAD_TRACER
+        else {
+            // _cur_meta: TASK_STATUS_FIRST_READY -> TASK_STATUS_RUNNING.
+            TaskTracer::set_running_status(g->tid(), g->_cur_meta);
         }
-        // else because of ending_sched(including pthread_task->pthread_task)
+#endif // BRPC_BTHREAD_TRACER
     } else {
         LOG(FATAL) << "bthread=" << g->current_tid() << " sched_to itself!";
     }
@@ -686,8 +715,12 @@ void TaskGroup::destroy_self() {
     }
 }
 
-void TaskGroup::ready_to_run(bthread_t tid, bool nosignal) {
-    push_rq(tid);
+
+void TaskGroup::ready_to_run(TaskMeta* meta, bool nosignal) {
+#ifdef BRPC_BTHREAD_TRACER
+    _control->_task_tracer.set_status(TASK_STATUS_READY, meta);
+#endif // BRPC_BTHREAD_TRACER
+    push_rq(meta->tid);
     if (nosignal) {
         ++_num_nosignal;
     } else {
@@ -707,9 +740,12 @@ void TaskGroup::flush_nosignal_tasks() {
     }
 }
 
-void TaskGroup::ready_to_run_remote(bthread_t tid, bool nosignal) {
+void TaskGroup::ready_to_run_remote(TaskMeta* meta, bool nosignal) {
+#ifdef BRPC_BTHREAD_TRACER
+    _control->_task_tracer.set_status(TASK_STATUS_READY, meta);
+#endif // BRPC_BTHREAD_TRACER
     _remote_rq._mutex.lock();
-    while (!_remote_rq.push_locked(tid)) {
+    while (!_remote_rq.push_locked(meta->tid)) {
         flush_nosignal_tasks_remote_locked(_remote_rq._mutex);
         LOG_EVERY_SECOND(ERROR) << "_remote_rq is full, capacity="
                                 << _remote_rq.capacity();
@@ -740,11 +776,11 @@ void 
TaskGroup::flush_nosignal_tasks_remote_locked(butil::Mutex& locked_mutex) {
     _control->signal_task(val, _tag);
 }
 
-void TaskGroup::ready_to_run_general(bthread_t tid, bool nosignal) {
+void TaskGroup::ready_to_run_general(TaskMeta* meta, bool nosignal) {
     if (tls_task_group == this) {
-        return ready_to_run(tid, nosignal);
+        return ready_to_run(meta, nosignal);
     }
-    return ready_to_run_remote(tid, nosignal);
+    return ready_to_run_remote(meta, nosignal);
 }
 
 void TaskGroup::flush_nosignal_tasks_general() {
@@ -756,12 +792,16 @@ void TaskGroup::flush_nosignal_tasks_general() {
 
 void TaskGroup::ready_to_run_in_worker(void* args_in) {
     ReadyToRunArgs* args = static_cast<ReadyToRunArgs*>(args_in);
-    return tls_task_group->ready_to_run(args->tid, args->nosignal);
+    return tls_task_group->ready_to_run(args->meta, args->nosignal);
 }
 
 void TaskGroup::ready_to_run_in_worker_ignoresignal(void* args_in) {
     ReadyToRunArgs* args = static_cast<ReadyToRunArgs*>(args_in);
-    return tls_task_group->push_rq(args->tid);
+#ifdef BRPC_BTHREAD_TRACER
+    tls_task_group->_control->_task_tracer.set_status(
+        TASK_STATUS_READY, args->meta);
+#endif // BRPC_BTHREAD_TRACER
+    return tls_task_group->push_rq(args->meta->tid);
 }
 
 struct SleepArgs {
@@ -776,7 +816,7 @@ static void ready_to_run_from_timer_thread(void* arg) {
     const SleepArgs* e = static_cast<const SleepArgs*>(arg);
     auto g = e->group;
     auto tag = g->tag();
-    g->control()->choose_one_group(tag)->ready_to_run_remote(e->tid);
+    g->control()->choose_one_group(tag)->ready_to_run_remote(e->meta);
 }
 
 void TaskGroup::_add_sleep_event(void* void_args) {
@@ -785,6 +825,9 @@ void TaskGroup::_add_sleep_event(void* void_args) {
     // will be gone.
     SleepArgs e = *static_cast<SleepArgs*>(void_args);
     TaskGroup* g = e.group;
+#ifdef BRPC_BTHREAD_TRACER
+    g->_control->_task_tracer.set_status(TASK_STATUS_SUSPENDED, e.meta);
+#endif // BRPC_BTHREAD_TRACER
 
     TimerThread::TaskId sleep_id;
     sleep_id = get_global_timer_thread()->schedule(
@@ -794,7 +837,7 @@ void TaskGroup::_add_sleep_event(void* void_args) {
     if (!sleep_id) {
         e.meta->sleep_failed = true;
         // Fail to schedule timer, go back to previous thread.
-        g->ready_to_run(e.tid);
+        g->ready_to_run(e.meta);
         return;
     }
 
@@ -818,7 +861,7 @@ void TaskGroup::_add_sleep_event(void* void_args) {
         // schedule previous thread as well. If sleep_id does not exist,
         // previous thread is scheduled by timer thread before and we don't
         // have to do it again.
-        g->ready_to_run(e.tid);
+        g->ready_to_run(e.meta);
     }
 }
 
@@ -921,12 +964,12 @@ int TaskGroup::interrupt(bthread_t tid, TaskControl* c, 
bthread_tag_t tag) {
         if (get_global_timer_thread()->unschedule(sleep_id) == 0) {
             bthread::TaskGroup* g = bthread::tls_task_group;
             if (g) {
-                g->ready_to_run(tid);
+                g->ready_to_run(TaskGroup::address_meta(tid));
             } else {
                 if (!c) {
                     return EINVAL;
                 }
-                c->choose_one_group(tag)->ready_to_run_remote(tid);
+                
c->choose_one_group(tag)->ready_to_run_remote(TaskGroup::address_meta(tid));
             }
         }
     }
@@ -935,7 +978,7 @@ int TaskGroup::interrupt(bthread_t tid, TaskControl* c, 
bthread_tag_t tag) {
 
 void TaskGroup::yield(TaskGroup** pg) {
     TaskGroup* g = *pg;
-    ReadyToRunArgs args = { g->current_tid(), false };
+    ReadyToRunArgs args = {  g->_cur_meta, false };
     g->set_remained(ready_to_run_in_worker, &args);
     sched(pg);
 }
@@ -957,6 +1000,9 @@ void print_task(std::ostream& os, bthread_t tid) {
     bool has_tls = false;
     int64_t cpuwide_start_ns = 0;
     TaskStatistics stat = {0, 0};
+    TaskStatus status = TASK_STATUS_UNKNOWN;
+    bool traced = false;
+    pid_t worker_tid = 0;
     {
         BAIDU_SCOPED_LOCK(m->version_lock);
         if (given_ver == *m->version_butex) {
@@ -970,6 +1016,9 @@ void print_task(std::ostream& os, bthread_t tid) {
             has_tls = m->local_storage.keytable;
             cpuwide_start_ns = m->cpuwide_start_ns;
             stat = m->stat;
+            status = m->status;
+            traced = m->traced;
+            worker_tid = m->worker_tid;
         }
     }
     if (!matched) {
@@ -986,7 +1035,15 @@ void print_task(std::ostream& os, bthread_t tid) {
            << "}\nhas_tls=" << has_tls
            << "\nuptime_ns=" << butil::cpuwide_time_ns() - cpuwide_start_ns
            << "\ncputime_ns=" << stat.cputime_ns
-           << "\nnswitch=" << stat.nswitch;
+           << "\nnswitch=" << stat.nswitch
+#ifdef BRPC_BTHREAD_TRACER
+           << "\nstatus=" << status
+           << "\ntraced=" << traced
+           << "\nworker_tid=" << worker_tid;
+#else
+           ;
+           (void)status;(void)traced;(void)worker_tid;
+#endif // BRPC_BTHREAD_TRACER
     }
 }
 
diff --git a/src/bthread/task_group.h b/src/bthread/task_group.h
index 7c197c01..c3d2ae46 100644
--- a/src/bthread/task_group.h
+++ b/src/bthread/task_group.h
@@ -83,7 +83,7 @@ public:
     // then being popped by sched(pg), which is not necessary.
     static void sched_to(TaskGroup** pg, TaskMeta* next_meta);
     static void sched_to(TaskGroup** pg, bthread_t next_tid);
-    static void exchange(TaskGroup** pg, bthread_t next_tid);
+    static void exchange(TaskGroup** pg, TaskMeta* next_meta);
 
     // The callback will be run in the beginning of next-run bthread.
     // Can't be called by current bthread directly because it often needs
@@ -151,18 +151,18 @@ public:
     int64_t cumulated_cputime_ns() const { return _cumulated_cputime_ns; }
 
     // Push a bthread into the runqueue
-    void ready_to_run(bthread_t tid, bool nosignal = false);
+    void ready_to_run(TaskMeta* meta, bool nosignal = false);
     // Flush tasks pushed to rq but signalled.
     void flush_nosignal_tasks();
 
     // Push a bthread into the runqueue from another non-worker thread.
-    void ready_to_run_remote(bthread_t tid, bool nosignal = false);
+    void ready_to_run_remote(TaskMeta* meta, bool nosignal = false);
     void flush_nosignal_tasks_remote_locked(butil::Mutex& locked_mutex);
     void flush_nosignal_tasks_remote();
 
     // Automatically decide the caller is remote or local, and call
     // the corresponding function.
-    void ready_to_run_general(bthread_t tid, bool nosignal = false);
+    void ready_to_run_general(TaskMeta* meta, bool nosignal = false);
     void flush_nosignal_tasks_general();
 
     // The TaskControl that this TaskGroup belongs to.
@@ -189,6 +189,8 @@ public:
 
     bthread_tag_t tag() const { return _tag; }
 
+    pid_t tid() const { return _tid; }
+
     int64_t current_task_cpu_clock_ns() {
         if (_last_cpu_clock_ns == 0) {
             return 0;
@@ -206,7 +208,7 @@ friend class TaskControl;
 
     int init(size_t runqueue_capacity);
 
-    // You shall call destroy_self() instead of destructor because deletion
+    // You shall call destroy_selfm() instead of destructor because deletion
     // of groups are postponed to avoid race.
     ~TaskGroup();
 
@@ -216,7 +218,7 @@ friend class TaskControl;
     static void _release_last_context(void*);
     static void _add_sleep_event(void*);
     struct ReadyToRunArgs {
-        bthread_t tid;
+        TaskMeta* meta;
         bool nosignal;
     };
     static void ready_to_run_in_worker(void*);
@@ -273,6 +275,9 @@ friend class TaskControl;
     int _sched_recursive_guard;
     // tag of this taskgroup
     bthread_tag_t _tag;
+
+    // Worker thread id.
+    pid_t _tid;
 };
 
 }  // namespace bthread
diff --git a/src/bthread/task_group_inl.h b/src/bthread/task_group_inl.h
index 45626ceb..75c377e1 100644
--- a/src/bthread/task_group_inl.h
+++ b/src/bthread/task_group_inl.h
@@ -46,17 +46,17 @@ inline TaskMeta* TaskGroup::address_meta(bthread_t tid) {
     return address_resource(get_slot(tid));
 }
 
-inline void TaskGroup::exchange(TaskGroup** pg, bthread_t next_tid) {
+inline void TaskGroup::exchange(TaskGroup** pg, TaskMeta* next_meta) {
     TaskGroup* g = *pg;
     if (g->is_current_pthread_task()) {
-        return g->ready_to_run(next_tid);
+        return g->ready_to_run(next_meta);
     }
-    ReadyToRunArgs args = { g->current_tid(), false };
+    ReadyToRunArgs args = { g->_cur_meta, false };
     g->set_remained((g->current_task()->about_to_quit
                      ? ready_to_run_in_worker_ignoresignal
                      : ready_to_run_in_worker),
                     &args);
-    TaskGroup::sched_to(pg, next_tid);
+    TaskGroup::sched_to(pg, next_meta);
 }
 
 inline void TaskGroup::sched_to(TaskGroup** pg, bthread_t next_tid) {
diff --git a/src/bthread/task_meta.h b/src/bthread/task_meta.h
index 3d901633..34d86632 100644
--- a/src/bthread/task_meta.h
+++ b/src/bthread/task_meta.h
@@ -27,6 +27,7 @@
 #include "butil/atomicops.h"          // butil::atomic
 #include "bthread/types.h"           // bthread_attr_t
 #include "bthread/stack.h"           // ContextualStack
+#include "bthread/timer_thread.h"
 
 namespace bthread {
 
@@ -49,59 +50,75 @@ struct LocalStorage {
 
 const static LocalStorage LOCAL_STORAGE_INIT = 
BTHREAD_LOCAL_STORAGE_INITIALIZER;
 
+enum TaskStatus {
+    TASK_STATUS_UNKNOWN,
+    TASK_STATUS_CREATED,
+    TASK_STATUS_FIRST_READY,
+    TASK_STATUS_READY,
+    TASK_STATUS_JUMPING,
+    TASK_STATUS_RUNNING,
+    TASK_STATUS_SUSPENDED,
+    TASK_STATUS_END,
+};
+
 struct TaskMeta {
     // [Not Reset]
-    butil::atomic<ButexWaiter*> current_waiter;
-    uint64_t current_sleep;
+    butil::atomic<ButexWaiter*> current_waiter{NULL};
+    uint64_t current_sleep{TimerThread::INVALID_TASK_ID};
 
     // A flag to mark if the Timer scheduling failed.
-    bool sleep_failed;
+    bool sleep_failed{false};
 
     // A builtin flag to mark if the thread is stopping.
-    bool stop;
+    bool stop{false};
 
     // The thread is interrupted and should wake up from some blocking ops.
-    bool interrupted;
+    bool interrupted{false};
 
     // Scheduling of the thread can be delayed.
-    bool about_to_quit;
+    bool about_to_quit{false};
     
     // [Not Reset] guarantee visibility of version_butex.
-    pthread_spinlock_t version_lock;
+    pthread_spinlock_t version_lock{};
     
     // [Not Reset] only modified by one bthread at any time, no need to be 
atomic
-    uint32_t* version_butex;
+    uint32_t* version_butex{NULL};
 
     // The identifier. It does not have to be here, however many code is
     // simplified if they can get tid from TaskMeta.
-    bthread_t tid;
+    bthread_t tid{INVALID_BTHREAD};
 
     // User function and argument
-    void* (*fn)(void*);
-    void* arg;
+    void* (*fn)(void*){NULL};
+    void* arg{NULL};
 
     // Stack of this task.
-    ContextualStack* stack;
+    ContextualStack* stack{NULL};
 
     // Attributes creating this task
-    bthread_attr_t attr;
+    bthread_attr_t attr{BTHREAD_ATTR_NORMAL};
     
     // Statistics
-    int64_t cpuwide_start_ns;
-    TaskStatistics stat;
+    int64_t cpuwide_start_ns{0};
+    TaskStatistics stat{};
 
     // bthread local storage, sync with tls_bls (defined in task_group.cpp)
     // when the bthread is created or destroyed.
     // DO NOT use this field directly, use tls_bls instead.
-    LocalStorage local_storage;
+    LocalStorage local_storage{};
+
+    // Only used when TaskTracer is enabled.
+    // Bthread status.
+    TaskStatus status{TASK_STATUS_UNKNOWN};
+    // Whether bthread is traced?
+    bool traced{false};
+    // Worker thread id.
+    pid_t worker_tid{-1};
 
 public:
     // Only initialize [Not Reset] fields, other fields will be reset in
     // bthread_start* functions
-    TaskMeta()
-        : current_waiter(NULL)
-        , current_sleep(0)
-        , stack(NULL) {
+    TaskMeta() {
         pthread_spin_init(&version_lock, 0);
         version_butex = butex_create_checked<uint32_t>();
         *version_butex = 1;
diff --git a/src/bthread/task_tracer.cpp b/src/bthread/task_tracer.cpp
new file mode 100644
index 00000000..acdf9208
--- /dev/null
+++ b/src/bthread/task_tracer.cpp
@@ -0,0 +1,569 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifdef BRPC_BTHREAD_TRACER
+
+#include "bthread/task_tracer.h"
+#include <unistd.h>
+#include <poll.h>
+#include <gflags/gflags.h>
+#include "butil/debug/stack_trace.h"
+#include "butil/memory/scope_guard.h"
+#include "butil/reloadable_flags.h"
+#include "bthread/task_group.h"
+#include "bthread/processor.h"
+
+namespace bthread {
+
+DEFINE_bool(enable_fast_unwind, true, "Whether to enable fast unwind");
+DEFINE_uint32(signal_trace_timeout_ms, 50, "Timeout for signal trace in ms");
+BUTIL_VALIDATE_GFLAG(signal_trace_timeout_ms, 
butil::PositiveInteger<uint32_t>);
+
+extern BAIDU_THREAD_LOCAL TaskMeta* pthread_fake_meta;
+
+TaskTracer::SignalSync::~SignalSync() {
+    if (_pipe_init) {
+        close(pipe_fds[0]);
+        close(pipe_fds[1]);
+    }
+
+    if (_sem_init) {
+        sem_destroy(&sem);
+    }
+}
+
+bool TaskTracer::SignalSync::Init() {
+    if (pipe(pipe_fds) != 0) {
+        PLOG(ERROR) << "Fail to pipe";
+        return false;
+    }
+    if (butil::make_non_blocking(pipe_fds[0]) != 0) {
+        PLOG(ERROR) << "Fail to make_non_blocking";
+        return false;
+    }
+    if (butil::make_non_blocking(pipe_fds[1]) != 0) {
+        PLOG(ERROR) << "Fail to make_non_blocking";
+        return false;
+    }
+    _pipe_init = true;
+
+    if (sem_init(&sem, 0, 0) != 0) {
+        PLOG(ERROR) << "Fail to sem_init";
+        return false;
+    }
+    _sem_init = true;
+
+    return true;
+}
+
+std::string TaskTracer::Result::OutputToString() {
+    std::string str;
+    if (err_count > 0 || frame_count > 0) {
+        str.reserve(1024);
+    }
+    if (frame_count > 0) {
+        if (fast_unwind) {
+            butil::debug::StackTrace stack_trace((void**)&ips, frame_count);
+            stack_trace.OutputToString(str);
+        } else {
+            for (size_t i = 0; i < frame_count; ++i) {
+                butil::string_appendf(&str, "#%zu 0x%016lx ", i, ips[i]);
+                if (mangled[i][0] == '\0') {
+                    str.append("<unknown>");
+                } else {
+                    str.append(butil::demangle(mangled[i]));
+                }
+                if (i + 1 < frame_count) {
+                    str.push_back('\n');
+                }
+            }
+        }
+    } else {
+        str.append("No frame");
+    }
+
+    if (err_count > 0) {
+        str.append("\nError message:\n");
+    }
+    for (size_t i = 0; i < err_count; ++i) {
+        str.append(err_msg[i]);
+        if (i + 1 < err_count) {
+            str.push_back('\n');
+        }
+    }
+
+    return str;
+}
+
+void TaskTracer::Result::OutputToStream(std::ostream& os) {
+    if (frame_count > 0) {
+        if (fast_unwind) {
+            butil::debug::StackTrace stack_trace((void**)&ips, frame_count);
+            stack_trace.OutputToStream(&os);
+        } else {
+            for (size_t i = 0; i < frame_count; ++i) {
+                os << "# " << i << " 0x" << std::hex << ips[i] << std::dec << 
" ";
+                if (mangled[i][0] == '\0') {
+                    os << "<unknown>";
+                } else {
+                    os << butil::demangle(mangled[i]);
+                }
+                if (i + 1 < frame_count) {
+                    os << '\n';
+                }
+            }
+        }
+    } else {
+        os << "No frame";
+    }
+
+    if (err_count == 0) {
+        return;
+    }
+
+    os << "\nError message:\n";
+    for (size_t i = 0; i < err_count; ++i) {
+        os << err_msg[i];
+        if (i + 1 < err_count) {
+            os << '\n';
+        }
+    }
+}
+
+bool TaskTracer::Init() {
+    if (_trace_time.expose("bthread_trace_time") != 0) {
+        return false;
+    }
+    if (!RegisterSignalHandler()) {
+        return false;
+    }
+    // Warm up the libunwind.
+    unw_cursor_t cursor;
+    if (unw_getcontext(&_context) == 0 && unw_init_local(&cursor, &_context) 
== 0) {
+        butil::ignore_result(TraceCore(cursor));
+    }
+    return true;
+}
+
+void TaskTracer::set_status(TaskStatus s, TaskMeta* m) {
+    CHECK_NE(TASK_STATUS_RUNNING, s) << "Use `set_running_status' instead";
+    CHECK_NE(TASK_STATUS_END, s) << "Use `set_end_status_unsafe' instead";
+
+    bool tracing;
+    {
+        BAIDU_SCOPED_LOCK(m->version_lock);
+        if (TASK_STATUS_UNKNOWN == m->status && TASK_STATUS_JUMPING == s) {
+            // Do not update status for jumping when bthread is ending.
+            return;
+        }
+
+        tracing = m->traced;
+        // bthread is scheduled for the first time.
+        if (TASK_STATUS_READY == s || NULL == m->stack) {
+            m->status = TASK_STATUS_FIRST_READY;
+        } else {
+            m->status = s;
+        }
+        if (TASK_STATUS_CREATED == s) {
+            m->worker_tid = -1;
+        }
+    }
+
+    // Make sure bthread does not jump stack when it is being traced.
+    if (tracing && TASK_STATUS_JUMPING == s) {
+        WaitForTracing(m);
+    }
+}
+
+void TaskTracer::set_running_status(pid_t worker_tid, TaskMeta* m) {
+    BAIDU_SCOPED_LOCK(m->version_lock);
+    m->worker_tid = worker_tid;
+    m->status = TASK_STATUS_RUNNING;
+}
+
+bool TaskTracer::set_end_status_unsafe(TaskMeta* m) {
+    m->status = TASK_STATUS_END;
+    return m->traced;
+}
+
+std::string TaskTracer::Trace(bthread_t tid) {
+    return TraceImpl(tid).OutputToString();
+}
+
+void TaskTracer::Trace(std::ostream& os, bthread_t tid) {
+    TraceImpl(tid).OutputToStream(os);
+}
+
+void TaskTracer::WaitForTracing(TaskMeta* m) {
+    BAIDU_SCOPED_LOCK(_mutex);
+    while (m->traced) {
+        _cond.Wait();
+    }
+}
+
+TaskStatus TaskTracer::WaitForJumping(TaskMeta* m) {
+    // Reasons for not using locks here:
+    // 1. It is necessary to lock before jump_stack, unlock after jump_stack,
+    //    which involves two different bthread and is prone to errors.
+    // 2. jump_stack is fast.
+    int i = 0;
+    do {
+        // The bthread is jumping now, spin until it finishes.
+        if (i++ < 30) {
+            cpu_relax();
+        } else {
+            sched_yield();
+        }
+
+        BAIDU_SCOPED_LOCK(m->version_lock);
+        if (TASK_STATUS_JUMPING != m->status) {
+            return m->status;
+        }
+    } while (true);
+}
+
+TaskTracer::Result TaskTracer::TraceImpl(bthread_t tid) {
+    butil::Timer timer(butil::Timer::STARTED);
+    BRPC_SCOPE_EXIT {
+        timer.stop();
+        _trace_time << timer.n_elapsed();
+    };
+
+    if (tid == bthread_self() ||
+        (NULL != pthread_fake_meta && tid == pthread_fake_meta->tid)) {
+        return Result::MakeErrorResult("Can not trace self=%d", tid);
+    }
+
+    // Make sure only one bthread is traced at a time.
+    BAIDU_SCOPED_LOCK(_trace_request_mutex);
+
+    TaskMeta* m = TaskGroup::address_meta(tid);
+    if (NULL == m) {
+        return Result::MakeErrorResult("bthread=%d never existed", tid);
+    }
+
+    BAIDU_SCOPED_LOCK(_mutex);
+    TaskStatus status;
+    pid_t worker_tid;
+    const uint32_t given_version = get_version(tid);
+    {
+        BAIDU_SCOPED_LOCK(m->version_lock);
+        if (given_version == *m->version_butex) {
+            // Start tracing.
+            m->traced = true;
+            worker_tid = m->worker_tid;
+            status = m->status;
+        } else {
+            return Result::MakeErrorResult("bthread=%d not exist now", tid);
+        }
+    }
+
+    if (TASK_STATUS_UNKNOWN == status) {
+        return Result::MakeErrorResult("bthread=%d not exist now", tid);
+    } else if (TASK_STATUS_CREATED == status) {
+        return Result::MakeErrorResult("bthread=%d has just been created", 
tid);
+    } else if (TASK_STATUS_FIRST_READY == status) {
+        return Result::MakeErrorResult("bthread=%d is scheduled for the first 
time", tid);
+    } else if (TASK_STATUS_END == status) {
+        return Result::MakeErrorResult("bthread=%d has ended", tid);
+    } else if (TASK_STATUS_JUMPING == status) {
+        // Wait for jumping completion.
+        status = WaitForJumping(m);
+    }
+
+    // After jumping, the status may be RUNNING, SUSPENDED, or READY, which is 
traceable.
+    Result result{};
+    if (TASK_STATUS_RUNNING == status) {
+        result = SignalTrace(worker_tid);
+    } else if (TASK_STATUS_SUSPENDED == status || TASK_STATUS_READY == status) 
{
+        result = ContextTrace(m->stack->context);
+    }
+
+    {
+        BAIDU_SCOPED_LOCK(m->version_lock);
+        // If m->status is BTHREAD_STATUS_END, the bthread also waits for 
tracing completion,
+        // so given_version != *m->version_butex is OK.
+        m->traced = false;
+    }
+    // Wake up the waiting worker thread to jump.
+    _cond.Signal();
+
+    return result;
+}
+
+unw_cursor_t TaskTracer::MakeCursor(bthread_fcontext_t fcontext) {
+    unw_cursor_t cursor;
+    unw_init_local(&cursor, &_context);
+    auto regs = reinterpret_cast<uintptr_t*>(fcontext);
+
+    // Only need RBP, RIP, RSP on x86_64.
+    // The base pointer (RBP).
+    if (unw_set_reg(&cursor, UNW_X86_64_RBP, regs[6]) != 0) {
+        LOG(ERROR) << "Fail to set RBP";
+    }
+    // The instruction pointer (RIP).
+    if (unw_set_reg(&cursor, UNW_REG_IP, regs[7]) != 0) {
+        LOG(ERROR) << "Fail to set RIP";
+    }
+#if UNW_VERSION_MAJOR >= 1 && UNW_VERSION_MINOR >= 7
+    // The stack pointer (RSP).
+    if (unw_set_reg(&cursor, UNW_REG_SP, regs[8]) != 0) {
+        LOG(ERROR) << "Fail to set RSP";
+    }
+#endif
+
+    return cursor;
+}
+
+TaskTracer::Result TaskTracer::ContextTrace(bthread_fcontext_t fcontext) {
+    unw_cursor_t cursor = MakeCursor(fcontext);
+    return TraceCore(cursor);
+}
+
+bool TaskTracer::RegisterSignalHandler() {
+    // Set up the signal handler.
+    struct sigaction old_sa{};
+    struct sigaction sa{};
+    sa.sa_sigaction = SignalHandler;
+    sa.sa_flags = SA_SIGINFO;
+    sigfillset(&sa.sa_mask);
+    if (sigaction(SIGURG, &sa, &old_sa) != 0) {
+        PLOG(ERROR) << "Failed to sigaction";
+        return false;
+    }
+    if (NULL != old_sa.sa_handler || NULL != old_sa.sa_sigaction) {
+        LOG(ERROR) << "Signal handler of SIGURG is already registered";
+        return false;
+    }
+
+    return true;
+}
+
+// Caution: This function should be async-signal-safety.
+void TaskTracer::SignalHandler(int, siginfo_t* info, void* context) {
+    ErrnoGuard guard;
+    butil::intrusive_ptr<SignalSync> signal_sync(
+        static_cast<SignalSync*>(info->si_value.sival_ptr));
+    if (NULL == signal_sync) {
+        // The signal is not from Tracer, such as TaskControl, do nothing.
+        return;
+    }
+
+    signal_sync->context = static_cast<unw_context_t*>(context);
+    // Notify SignalTrace that SignalTraceHandler has started.
+    // Binary semaphore do not fail, so no need to check return value.
+    // sem_post() is async-signal-safe.
+    sem_post(&signal_sync->sem);
+
+    butil::Timer timer;
+    if (FLAGS_signal_trace_timeout_ms > 0) {
+        timer.start();
+    }
+    int timeout = -1;
+    pollfd poll_fd = {signal_sync->pipe_fds[0], POLLIN, 0};
+    // Wait for tracing to complete.
+    while (true) {
+        if (FLAGS_signal_trace_timeout_ms > 0) {
+            timer.stop();
+            // At least 1ms timeout.
+            timeout = std::max(
+                (int64_t)FLAGS_signal_trace_timeout_ms - timer.m_elapsed(), 
(int64_t)1);
+        }
+        // poll() is async-signal-safe.
+        // Similar to self-pipe trick: 
https://man7.org/tlpi/code/online/dist/altio/self_pipe.c.html
+        int rc = poll(&poll_fd, 1, timeout);
+        if (-1 == rc && EINTR == errno) {
+            continue;
+        }
+        // No need to read the pipe or handle errors, just return.
+        return;
+    }
+}
+
+// Caution: This fnction should be async-signal-safety.
+bool TaskTracer::WaitForSignalHandler(butil::intrusive_ptr<SignalSync> 
signal_sync,
+                                      const timespec* abs_timeout, Result& 
result) {
+    // It is safe to sem_timedwait() here and sem_post() in SignalHandler.
+    while (sem_timedwait(&signal_sync->sem, abs_timeout) != 0) {
+        if (EINTR == errno) {
+            continue;
+        }
+        if (ETIMEDOUT == errno) {
+            result.SetError("Timeout exceed %dms", 
FLAGS_signal_trace_timeout_ms);
+        } else {
+            // During the process of signal handler,
+            // can not use berro() which is not async-signal-safe.
+            result.SetError("Fail to sem_timedwait, errno=%d", errno);
+        }
+        return false;
+    }
+    return true;
+}
+
+// Caution: This fnction should be async-signal-safety.
+void TaskTracer::WakeupSignalHandler(butil::intrusive_ptr<SignalSync> 
signal_sync, Result& result) {
+    while (true) {
+        ssize_t nw = write(signal_sync->pipe_fds[1], "1", 1);
+        if (0 < nw) {
+            break;
+        } else if (-1 == nw && EINTR == errno) {
+            // Only EINTR is allowed. Even EAGAIN should not be returned.
+            continue;
+        }
+        // During the process of signal handler,
+        // can not use berro() which is not async-signal-safe.
+        result.SetError("Fail to write pipe to notify signal handler, 
errno=%d", errno);
+    }
+}
+
+TaskTracer::Result TaskTracer::SignalTrace(pid_t tid) {
+    // CAUTION:
+    // The signal handler will wait for the backtrace to complete.
+    // If the worker thread is interrupted when holding a resource(lock, etc),
+    // and this function waits for the resource during capturing backtraces,
+    // it may cause a deadlock.
+    //
+    // 
https://github.com/gperftools/gperftools/wiki/gperftools'-stacktrace-capturing-methods-and-their-issues#libunwind
+    // Generally, libunwind promises async-signal-safety for capturing 
backtraces.
+    // But in practice, it is only partially async-signal-safe due to reliance 
on
+    // dl_iterate_phdr API, which is used to enumerate all loaded ELF modules
+    // (.so files and main executable binary). No libc offers dl_iterate_pdhr 
that
+    // is async-signal-safe. In practice, the issue may happen if we take 
tracing
+    // signal during an existing dl_iterate_phdr call (such as when the program
+    // throws an exception) or during dlopen/dlclose-ing some .so module.
+    // Deadlock call stack:
+    // #0  __lll_lock_wait (futex=futex@entry=0x7f0d3d7f0990 
<_rtld_global+2352>, private=0) at lowlevellock.c:52
+    // #1  0x00007f0d3a73c131 in __GI___pthread_mutex_lock 
(mutex=0x7f0d3d7f0990 <_rtld_global+2352>) at ../nptl/pthread_mutex_lock.c:115
+    // #2  0x00007f0d38eb0231 in __GI___dl_iterate_phdr 
(callback=callback@entry=0x7f0d38c456a0 <_ULx86_64_dwarf_callback>, 
data=data@entry=0x7f0d07defad0) at dl-iteratephdr.c:40
+    // #3  0x00007f0d38c45d79 in _ULx86_64_dwarf_find_proc_info 
(as=0x7f0d38c4f340 <local_addr_space>, ip=ip@entry=139694791966897, 
pi=pi@entry=0x7f0d07df0498, need_unwind_info=need_unwind_info@entry=1, 
arg=0x7f0d07df0340) at dwarf/Gfind_proc_info-lsb.c:759
+    // #4  0x00007f0d38c43260 in fetch_proc_info (c=c@entry=0x7f0d07df0340, 
ip=139694791966897) at dwarf/Gparser.c:461
+    // #5  0x00007f0d38c44e46 in find_reg_state (sr=0x7f0d07defd10, 
c=0x7f0d07df0340) at dwarf/Gparser.c:925
+    // #6  _ULx86_64_dwarf_step (c=c@entry=0x7f0d07df0340) at 
dwarf/Gparser.c:972
+    // #7  0x00007f0d38c40c14 in _ULx86_64_step 
(cursor=cursor@entry=0x7f0d07df0340) at x86_64/Gstep.c:71
+    // #8  0x00007f0d399ed8f6 in GetStackTraceWithContext_libunwind 
(result=<optimized out>, max_depth=63, skip_count=132054887, ucp=<optimized 
out>) at src/stacktrace_libunwind-inl.h:138
+    // #9  0x00007f0d399ee083 in GetStackTraceWithContext 
(result=0x7f0d07df07b8, max_depth=63, skip_count=3, uc=0x7f0d07df0a40) at 
src/stacktrace.cc:305
+    // #10 0x00007f0d399ea992 in CpuProfiler::prof_handler 
(signal_ucontext=<optimized out>, cpu_profiler=0x7f0d399f6600, sig=<optimized 
out>) at src/profiler.cc:359
+    // #11 0x00007f0d399eb633 in ProfileHandler::SignalHandler (sig=27, 
sinfo=0x7f0d07df0b70, ucontext=0x7f0d07df0a40) at src/profile-handler.cc:530
+    // #12 <signal handler called>
+    // #13 0x00007f0d3a73c0b1 in __GI___pthread_mutex_lock 
(mutex=0x7f0d3d7f0990 <_rtld_global+2352>) at ../nptl/pthread_mutex_lock.c:115
+    // #14 0x00007f0d38eb0231 in __GI___dl_iterate_phdr 
(callback=0x7f0d38f525f0, data=0x7f0d07df10c0) at dl-iteratephdr.c:40
+    // #15 0x00007f0d38f536c1 in _Unwind_Find_FDE () from 
/lib/x86_64-linux-gnu/libgcc_s.so.1
+    // #16 0x00007f0d38f4f868 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1
+    // #17 0x00007f0d38f50a20 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1
+    // #18 0x00007f0d38f50f99 in _Unwind_RaiseException () from 
/lib/x86_64-linux-gnu/libgcc_s.so.1
+    // #19 0x00007f0d390088dc in __cxa_throw () from 
/lib/x86_64-linux-gnu/libstdc++.so.6
+    // #20 0x00007f0d3b5b2245 in __cxxabiv1::__cxa_throw 
(thrownException=0x7f0d114ea8c0, type=0x7f0d3d6dd830 <typeinfo for 
rockset::GRPCError>, destructor=<optimized out>) at 
/src/folly/folly/experimental/exception_tracer/ExceptionTracerLib.cpp:107
+    //
+    // Therefore, we do not capture backtracks in the signal handler to avoid 
mutex
+    // reentry and deadlock. Instead, we capture backtracks in this function 
and
+    // ends the signal handler after capturing backtraces is complete.
+    // Even so, there is still a deadlock problem:
+    // the worker thread is interrupted when during an existing 
dl_iterate_phdr call,
+    // and wait for the capturing backtraces to complete. This function capture
+    // backtracks with dl_iterate_phdr. We introduce a timeout mechanism in 
signal
+    // handler to avoid deadlock.
+
+    // Each signal trace has an independent SignalSync to
+    // prevent the previous SignalHandler from affecting the new SignalTrace.
+    butil::intrusive_ptr<SignalSync> signal_sync(new SignalSync());
+    if (!signal_sync->Init()) {
+        return Result::MakeErrorResult("Fail to init SignalSync");
+    }
+    // Add reference for SignalHandler.
+    signal_sync->AddRefManually();
+
+    union sigval value{};
+    value.sival_ptr = signal_sync.get();
+    size_t sigqueue_try = 0;
+    while (sigqueue(tid, SIGURG, value) != 0) {
+        if (errno != EAGAIN || sigqueue_try++ >= 3) {
+            return Result::MakeErrorResult("Fail to sigqueue: %s", berror());
+        }
+    }
+
+    // Caution: Start here, need to ensure async-signal-safety.
+    Result result;
+    // Wakeup the signal handler at the end.
+    BRPC_SCOPE_EXIT {
+        WakeupSignalHandler(signal_sync, result);
+    };
+
+    timespec abs_timeout{};
+    timespec* abs_timeout_ptr = NULL;
+    if (FLAGS_signal_trace_timeout_ms > 0) {
+        abs_timeout = 
butil::milliseconds_from_now(FLAGS_signal_trace_timeout_ms);
+        abs_timeout_ptr = &abs_timeout;
+    }
+    // Wait for the signal handler to start.
+    if (!WaitForSignalHandler(signal_sync, abs_timeout_ptr, result)) {
+        return result;
+    }
+
+    if (NULL == signal_sync->context) {
+        result.SetError("context is NULL");
+        return result;
+    }
+    unw_cursor_t cursor;
+    int rc = unw_init_local(&cursor, signal_sync->context);
+    if (0 != rc) {
+        result.SetError("Failed to init local, rc=%d", rc);
+        return result;
+    }
+
+    return TraceCore(cursor);
+}
+
+TaskTracer::Result TaskTracer::TraceCore(unw_cursor_t& cursor) {
+    Result result{};
+    result.fast_unwind = FLAGS_enable_fast_unwind;
+    for (result.frame_count = 0; result.frame_count < arraysize(result.ips); 
++result.frame_count) {
+        int rc = unw_step(&cursor);
+        if (0 == rc) {
+            break;
+        } else if (rc < 0) {
+            return Result::MakeErrorResult("Fail to unw_step, rc=%d", rc);
+        }
+
+        unw_word_t ip = 0;
+        // Fast unwind do not care about the return value.
+        rc = unw_get_reg(&cursor, UNW_REG_IP, &ip);
+        result.ips[result.frame_count] = ip;
+
+        if (result.fast_unwind) {
+            continue;
+        }
+
+        if (0 != rc) {
+            result.mangled[result.frame_count][0] = '\0';
+            continue;
+        }
+
+        // Slow path.
+        rc = unw_get_proc_name(&cursor, result.mangled[result.frame_count],
+                               sizeof(result.mangled[result.frame_count]), 
NULL);
+        // UNW_ENOMEM is OK.
+        if (0 != rc && UNW_ENOMEM != rc) {
+            result.mangled[result.frame_count][0] = '\0';
+        }
+    }
+
+    return result;
+}
+
+} // namespace bthread
+
+#endif // BRPC_BTHREAD_TRACER
diff --git a/src/bthread/task_tracer.h b/src/bthread/task_tracer.h
new file mode 100644
index 00000000..8c84f7b9
--- /dev/null
+++ b/src/bthread/task_tracer.h
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef BTHREAD_TASK_TRACER_H
+#define BTHREAD_TASK_TRACER_H
+
+#ifdef BRPC_BTHREAD_TRACER
+
+#include <signal.h>
+#include <semaphore.h>
+#include <vector>
+#include <algorithm>
+#include <libunwind.h>
+#include "butil/strings/safe_sprintf.h"
+#include "butil/synchronization/condition_variable.h"
+#include "butil/shared_object.h"
+#include "butil/fd_utility.h"
+#include "bthread/task_meta.h"
+#include "bthread/mutex.h"
+
+namespace bthread {
+
+// Tracer for bthread.
+class TaskTracer {
+public:
+    // Returns 0 on success, -1 otherwise.
+    bool Init();
+    // Set the status to `s'.
+    void set_status(TaskStatus s, TaskMeta* meta);
+    static void set_running_status(pid_t worker_tid, TaskMeta* meta);
+    static bool set_end_status_unsafe(TaskMeta* m);
+
+    // Trace the bthread of `tid'.
+    std::string Trace(bthread_t tid);
+    void Trace(std::ostream& os, bthread_t tid);
+
+    // When the worker is jumping stack from a bthread to another,
+    void WaitForTracing(TaskMeta* m);
+
+private:
+    // Error number guard used in signal handler.
+    class ErrnoGuard {
+    public:
+        ErrnoGuard() : _errno(errno) {}
+        ~ErrnoGuard() { errno = _errno; }
+    private:
+        int _errno;
+    };
+
+    struct Result {
+        template<typename... Args>
+        static Result MakeErrorResult(const char* fmt, Args... args) {
+            Result result{};
+            result.SetError(fmt, std::forward<Args>(args)...);
+            return result;
+        }
+
+        template<typename... Args>
+        void SetError(const char* fmt, Args... args) {
+            err_count = std::max(err_count + 1, MAX_ERROR_NUM);
+            butil::strings::SafeSPrintf(err_msg[err_count - 1], fmt, args...);
+        }
+
+        std::string OutputToString();
+        void OutputToStream(std::ostream& os);
+
+        bool OK() const { return err_count == 0; }
+
+        static const size_t MAX_TRACE_NUM = 64;
+        static const size_t MAX_ERROR_NUM = 2;
+
+        unw_word_t ips[MAX_TRACE_NUM];
+        char mangled[MAX_TRACE_NUM][256]{};
+        size_t frame_count{0};
+        char err_msg[MAX_ERROR_NUM][64]{};
+        size_t err_count{0};
+
+        bool fast_unwind{false};
+    };
+
+    // For signal trace.
+    struct SignalSync : public butil::SharedObject {
+        ~SignalSync() override;
+        bool Init();
+
+        unw_context_t* context{NULL};
+        sem_t sem{};
+        int pipe_fds[2]{};
+
+    private:
+        bool _pipe_init{false};
+        bool _sem_init{false};
+    };
+
+    static TaskStatus WaitForJumping(TaskMeta* m);
+    Result TraceImpl(bthread_t tid);
+
+    unw_cursor_t MakeCursor(bthread_fcontext_t fcontext);
+    Result ContextTrace(bthread_fcontext_t fcontext);
+
+    static bool RegisterSignalHandler();
+    static void SignalHandler(int sig, siginfo_t* info, void* context);
+    static bool WaitForSignalHandler(butil::intrusive_ptr<SignalSync> 
signal_sync,
+                                     const timespec* abs_timeout, Result& 
result);
+    static void WakeupSignalHandler(
+        butil::intrusive_ptr<SignalSync> signal_sync, Result& result);
+    Result SignalTrace(pid_t worker_tid);
+
+    static Result TraceCore(unw_cursor_t& cursor);
+
+    // Make sure only one bthread is traced at a time.
+    bthread::Mutex _trace_request_mutex;
+
+    // For signal trace.
+    // Make sure bthread does not jump stack when it is being traced.
+    butil::Mutex _mutex;
+    butil::ConditionVariable _cond{&_mutex};
+
+    // For context trace.
+    unw_context_t _context{};
+
+    bvar::LatencyRecorder _trace_time{"bthread_trace_time"};
+};
+
+} // namespace bthread
+
+#endif // BRPC_BTHREAD_TRACER
+
+#endif // BRPC_BTHREAD_TRACER_H
diff --git a/src/butil/debug/stack_trace.cc b/src/butil/debug/stack_trace.cc
index 38abede9..97a4cd76 100644
--- a/src/butil/debug/stack_trace.cc
+++ b/src/butil/debug/stack_trace.cc
@@ -29,17 +29,18 @@ const void *const *StackTrace::Addresses(size_t* count) 
const {
 }
 
 size_t StackTrace::CopyAddressTo(void** buffer, size_t max_nframes) const {
-    size_t nframes = std::min(count_, max_nframes);
-    memcpy(buffer, trace_, nframes * sizeof(void*));
-    return nframes;
+  size_t nframes = std::min(count_, max_nframes);
+  memcpy(buffer, trace_, nframes * sizeof(void*));
+  return nframes;
 }
 
 std::string StackTrace::ToString() const {
-  std::stringstream stream;
+  std::string str;
+  str.reserve(1024);
 #if !defined(__UCLIBC__)
-  OutputToStream(&stream);
+  OutputToString(str);
 #endif
-  return stream.str();
+  return str;
 }
 
 }  // namespace debug
diff --git a/src/butil/debug/stack_trace.h b/src/butil/debug/stack_trace.h
index 5c6545ad..e8120583 100644
--- a/src/butil/debug/stack_trace.h
+++ b/src/butil/debug/stack_trace.h
@@ -79,6 +79,7 @@ class BUTIL_EXPORT StackTrace {
 #if !defined(__UCLIBC__)
   // Resolves backtrace to symbols and write to stream.
   void OutputToStream(std::ostream* os) const;
+  void OutputToString(std::string& str) const;
 #endif
 
   // Resolves backtrace to symbols and returns as string.
diff --git a/src/butil/debug/stack_trace_posix.cc 
b/src/butil/debug/stack_trace_posix.cc
index 878f94a7..9ef91c2f 100644
--- a/src/butil/debug/stack_trace_posix.cc
+++ b/src/butil/debug/stack_trace_posix.cc
@@ -429,6 +429,23 @@ class StreamBacktraceOutputHandler : public 
BacktraceOutputHandler {
   DISALLOW_COPY_AND_ASSIGN(StreamBacktraceOutputHandler);
 };
 
+class StringBacktraceOutputHandler : public BacktraceOutputHandler {
+public:
+    explicit StringBacktraceOutputHandler(std::string& str) : _str(str) {}
+
+    DISALLOW_COPY_AND_ASSIGN(StringBacktraceOutputHandler);
+
+    void HandleOutput(const char* output) OVERRIDE {
+        if (NULL == output) {
+            return;
+        }
+        _str.append(output);
+    }
+
+private:
+    std::string& _str;
+};
+
 void WarmUpBacktrace() {
   // Warm up stack trace infrastructure. It turns out that on the first
   // call glibc initializes some internal data structures using pthread_once,
@@ -801,6 +818,11 @@ void StackTrace::OutputToStream(std::ostream* os) const {
   StreamBacktraceOutputHandler handler(os);
   ProcessBacktrace(trace_, count_, &handler);
 }
+
+void StackTrace::OutputToString(std::string& str) const {
+    StringBacktraceOutputHandler handler(str);
+    ProcessBacktrace(trace_, count_, &handler);
+}
 #endif
 
 namespace internal {
diff --git a/src/butil/memory/scope_guard.h b/src/butil/memory/scope_guard.h
index 837acbbc..7d72a560 100644
--- a/src/butil/memory/scope_guard.h
+++ b/src/butil/memory/scope_guard.h
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef BRPC_SCOPED_GUARD_H
-#define BRPC_SCOPED_GUARD_H
+#ifndef BUTIL_SCOPED_GUARD_H
+#define BUTIL_SCOPED_GUARD_H
 
 #include "butil/type_traits.h"
 #include "butil/macros.h"
@@ -104,4 +104,4 @@ operator+(ScopeExitHelper, Callback&& callback) {
   auto BRPC_ANONYMOUS_VARIABLE(SCOPE_EXIT) =                \
       ::butil::internal::ScopeExitHelper() + [&]() noexcept
 
-#endif // BRPC_SCOPED_GUARD_H
+#endif // BUTIL_SCOPED_GUARD_H
diff --git a/src/butil/reloadable_flags.h b/src/butil/reloadable_flags.h
new file mode 100644
index 00000000..2cb11e92
--- /dev/null
+++ b/src/butil/reloadable_flags.h
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#ifndef BUTIL_RELOADABLE_FLAGS_H
+#define BUTIL_RELOADABLE_FLAGS_H
+
+#include <stdint.h>
+#include <unistd.h>                    // write, _exit
+#include <gflags/gflags.h>
+#include "butil/macros.h"
+#include "butil/type_traits.h"
+
+// Register an always-true valiator to a gflag so that the gflag is treated as
+// reloadable by brpc. If a validator exists, abort the program.
+// You should call this macro within global scope. for example:
+//
+//   DEFINE_int32(foo, 0, "blah blah");
+//   BRPC_VALIDATE_GFLAG(foo, brpc::PassValidate);
+//
+// This macro does not work for string-flags because they're thread-unsafe to
+// modify directly. To emphasize this, you have to write the validator by
+// yourself and use GFLAGS_NS::GetCommandLineOption() to acess the flag.
+#define BUTIL_VALIDATE_GFLAG(flag, validate_fn)                     \
+    namespace butil_flags {}                                       \
+    const int register_FLAGS_ ## flag ## _dummy                    \
+                 __attribute__((__unused__)) =                     \
+        ::butil::RegisterFlagValidatorOrDieImpl<                   \
+            decltype(FLAGS_##flag)>(&FLAGS_##flag, (validate_fn))
+
+
+namespace butil {
+
+template <typename T>
+bool PassValidate(const char*, T) {
+    return true;
+}
+
+template <typename T>
+bool PositiveInteger(const char*, T v) {
+    return v > 0;
+}
+
+template <typename T>
+bool RegisterFlagValidatorOrDieImpl(
+    const T* flag, bool (*validate_fn)(const char*, T val)) {
+    static_assert(!butil::is_same<std::string, T>::value,
+                  "Not support string flags");
+    if (GFLAGS_NS::RegisterFlagValidator(flag, validate_fn)) {
+        return true;
+    }
+    // Error printed by gflags does not have newline. Add one to it.
+    char newline = '\n';
+    butil::ignore_result(write(2, &newline, 1));
+    _exit(1);
+}
+
+} // namespace butil
+
+
+#endif  // BUTIL_RELOADABLE_FLAGS_H
diff --git a/src/brpc/shared_object.h b/src/butil/shared_object.h
similarity index 94%
copy from src/brpc/shared_object.h
copy to src/butil/shared_object.h
index d8ff9aae..abcfd46c 100644
--- a/src/brpc/shared_object.h
+++ b/src/butil/shared_object.h
@@ -16,14 +16,14 @@
 // under the License.
 
 
-#ifndef BRPC_SHARED_OBJECT_H
-#define BRPC_SHARED_OBJECT_H
+#ifndef BUTIL_SHARED_OBJECT_H
+#define BUTIL_SHARED_OBJECT_H
 
 #include "butil/intrusive_ptr.hpp"                   // butil::intrusive_ptr
 #include "butil/atomicops.h"
 
 
-namespace brpc {
+namespace butil {
 
 // Inherit this class to be intrusively shared. Comparing to shared_ptr,
 // intrusive_ptr saves one malloc (for shared_count) and gets better cache
@@ -67,7 +67,7 @@ inline void intrusive_ptr_release(SharedObject* obj) {
     obj->RemoveRefManually();
 }
 
-} // namespace brpc
+} // namespace butil
 
 
-#endif // BRPC_SHARED_OBJECT_H
+#endif // BUTIL_SHARED_OBJECT_H
diff --git a/src/butil/time.h b/src/butil/time.h
index 90378c59..8b856998 100644
--- a/src/butil/time.h
+++ b/src/butil/time.h
@@ -384,7 +384,7 @@ public:
     };
 
     Timer() : _stop(0), _start(0) {}
-    explicit Timer(const TimerType) {
+    explicit Timer(const TimerType) : Timer() {
         start();
     }
 
diff --git a/test/brpc_builtin_service_unittest.cpp 
b/test/brpc_builtin_service_unittest.cpp
index 178c3f9e..8ea17a47 100644
--- a/test/brpc_builtin_service_unittest.cpp
+++ b/test/brpc_builtin_service_unittest.cpp
@@ -837,6 +837,17 @@ void* dummy_bthread(void*) {
     return NULL;
 }
 
+
+#ifdef BRPC_BTHREAD_TRACER
+bool g_bthread_trace_stop = false;
+void* bthread_trace(void*) {
+    while (!g_bthread_trace_stop) {
+        bthread_usleep(1000 * 100);
+    }
+    return NULL;
+}
+#endif // BRPC_BTHREAD_TRACER
+
 TEST_F(BuiltinServiceTest, bthreads) {
     brpc::BthreadsService service;
     brpc::BthreadsRequest req;
@@ -867,7 +878,25 @@ TEST_F(BuiltinServiceTest, bthreads) {
         service.default_method(&cntl, &req, &res, &done);
         EXPECT_FALSE(cntl.Failed());
         CheckContent(cntl, "stop=0");
-    }    
+    }
+
+#ifdef BRPC_BTHREAD_TRACER
+    {
+        bthread_t th;
+        EXPECT_EQ(0, bthread_start_background(&th, NULL, bthread_trace, NULL));
+        ClosureChecker done;
+        brpc::Controller cntl;
+        std::string id_string;
+        butil::string_printf(&id_string, "%llu?st=1", (unsigned long long)th);
+        cntl.http_request().uri().SetHttpURL("/bthreads/" + id_string);
+        cntl.http_request()._unresolved_path = id_string;
+        service.default_method(&cntl, &req, &res, &done);
+        g_bthread_trace_stop = true;
+        EXPECT_FALSE(cntl.Failed());
+        CheckContent(cntl, "stop=0");
+        CheckContent(cntl, "bthread_trace");
+    }
+#endif // BRPC_BTHREAD_TRACER
 }
 
 TEST_F(BuiltinServiceTest, sockets) {
diff --git a/test/bthread_unittest.cpp b/test/bthread_unittest.cpp
index 5ed8aba8..d1605234 100644
--- a/test/bthread_unittest.cpp
+++ b/test/bthread_unittest.cpp
@@ -20,14 +20,24 @@
 #include "butil/time.h"
 #include "butil/macros.h"
 #include "butil/logging.h"
-#include "butil/logging.h"
 #include "butil/gperftools_profiler.h"
 #include "bthread/bthread.h"
 #include "bthread/unstable.h"
 #include "bthread/task_meta.h"
 
+int main(int argc, char* argv[]) {
+    testing::InitGoogleTest(&argc, argv);
+    GFLAGS_NS::ParseCommandLineFlags(&argc, &argv, true);
+    int rc = RUN_ALL_TESTS();
+    return rc;
+}
+
 namespace bthread {
-    extern __thread bthread::LocalStorage tls_bls;
+extern __thread bthread::LocalStorage tls_bls;
+DECLARE_bool(enable_fast_unwind);
+#ifdef BRPC_BTHREAD_TRACER
+extern std::string stack_trace(bthread_t tid);
+#endif // BRPC_BTHREAD_TRACER
 }
 
 namespace {
@@ -608,4 +618,44 @@ TEST_F(BthreadTest, yield_single_thread) {
     ASSERT_EQ(0, bthread_join(tid, NULL));
 }
 
+#ifdef BRPC_BTHREAD_TRACER
+TEST_F(BthreadTest, trace) {
+    stop = false;
+    bthread_t th;
+    ASSERT_EQ(0, bthread_start_urgent(&th, NULL, spin_and_log, (void*)1));
+    usleep(100 * 1000);
+    bthread::FLAGS_enable_fast_unwind = false;
+    std::string st = bthread::stack_trace(th);
+    LOG(INFO) << "fast_unwind spin_and_log stack trace:\n" << st;
+    ASSERT_NE(std::string::npos, st.find("spin_and_log"));
+
+    bthread::FLAGS_enable_fast_unwind = true;
+    st = bthread::stack_trace(th);
+    LOG(INFO) << "spin_and_log stack trace:\n" << st;
+    ASSERT_NE(std::string::npos, st.find("spin_and_log"));
+    stop = true;
+    ASSERT_EQ(0, bthread_join(th, NULL));
+
+    stop = false;
+    ASSERT_EQ(0, bthread_start_urgent(&th, NULL, repeated_sleep, (void*)1));
+    usleep(100 * 1000);
+    bthread::FLAGS_enable_fast_unwind = false;
+    st = bthread::stack_trace(th);
+    LOG(INFO) << "fast_unwind repeated_sleep stack trace:\n" << st;
+    ASSERT_NE(std::string::npos, st.find("repeated_sleep"));
+
+    bthread::FLAGS_enable_fast_unwind = true;
+    st = bthread::stack_trace(th);
+    LOG(INFO) << "repeated_sleep stack trace:\n" << st;
+    ASSERT_NE(std::string::npos, st.find("repeated_sleep"));
+    stop = true;
+    ASSERT_EQ(0, bthread_join(th, NULL));
+
+    st = bthread::stack_trace(th);
+    LOG(INFO) << "ended bthread stack trace:\n" << st;
+    ASSERT_NE(std::string::npos, st.find("not exist now"));
+
+}
+#endif // BRPC_BTHREAD_TRACER
+
 } // namespace


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@brpc.apache.org
For additional commands, e-mail: dev-h...@brpc.apache.org

Reply via email to