Repository: thrift
Updated Branches:
  refs/heads/master 6c08ac72c -> 147c2849a


THRIFT-2026: Eliminate some undefined behavior in C/C++
Clients: glib, C++
Patch: Jim Apple <jbapple-imp...@apache.org>

This closes #1214

This patch fixes some undefined behavior were found using Clang's
UndefinedBehaviorSanitizer (UBSan). To check for undefined behavior,
run /build/docker/scripts/ubsan.sh. This is run during CI builds, as
well.

The examples of the types of undefined behavior fixed in this commit
are:

1. Enumerations exhibit undefined behavior when they have values
   outside of a range dependent on the values of their enumerators, as
   specified in C++14's chapter 7.2 ("Enumeration declarations"),
   paragraph 8.

2. Left shift of negative values, used in zigzag encoding, is
   undefined behavior. See 5.8 ("Shift operators"), paragraph 2 for
   C++ and 6.5.7 ("Bitwise shift operators"), paragraph 4 for C99 and
   C11.


Project: http://git-wip-us.apache.org/repos/asf/thrift/repo
Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/147c2849
Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/147c2849
Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/147c2849

Branch: refs/heads/master
Commit: 147c2849af9c28f2ce347b4005e022ac13db9dd8
Parents: 6c08ac7
Author: Jim Apple <jbapple-imp...@apache.org>
Authored: Sat Mar 18 12:56:50 2017 -0700
Committer: James E. King, III <jk...@apache.org>
Committed: Sat Mar 25 08:16:18 2017 -0400

----------------------------------------------------------------------
 .travis.yml                                     |  6 ++++
 build/docker/scripts/ubsan.sh                   | 35 ++++++++++++++++++++
 .../src/thrift/generate/t_c_glib_generator.cc   |  4 +--
 .../cpp/src/thrift/generate/t_d_generator.cc    |  2 +-
 compiler/cpp/src/thrift/parse/t_base_type.h     |  6 ++--
 .../c_glib/protocol/thrift_compact_protocol.c   |  4 +--
 .../src/thrift/protocol/TCompactProtocol.tcc    |  4 +--
 7 files changed, 51 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/thrift/blob/147c2849/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 70293ed..10c6fd0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -152,6 +152,12 @@ env:
       BUILD_ENV="-e CC=gcc -e CXX=g++"
       DISTRO=debian
 
+    # C and C++ undefined behavior. This wraps autotools.sh, but each binary 
crashes if
+    # undefined behavior occurs. Skips the known flaky tests.
+    - TEST_NAME="UBSan"
+      SCRIPT="ubsan.sh"
+      BUILD_ARG="--without-haskell --without-nodejs --without-perl 
--without-python"
+
 matrix:
   include:
     # QA jobs for code analytics and metrics

http://git-wip-us.apache.org/repos/asf/thrift/blob/147c2849/build/docker/scripts/ubsan.sh
----------------------------------------------------------------------
diff --git a/build/docker/scripts/ubsan.sh b/build/docker/scripts/ubsan.sh
new file mode 100755
index 0000000..6db10f3
--- /dev/null
+++ b/build/docker/scripts/ubsan.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+set -ex
+
+# Wraps autotools.sh, but each binary crashes if it exhibits undefined 
behavior. See
+# 
http://releases.llvm.org/3.8.0/tools/clang/docs/UndefinedBehaviorSanitizer.html
+
+# Install a more recent clang than default:
+sudo apt-get update
+sudo apt-get install -y --no-install-recommends clang-3.8 llvm-3.8-dev
+export CC=clang-3.8
+export CXX=clang++-3.8
+
+# Set the undefined behavior flags. This crashes on all undefined behavior 
except for
+# undefined casting, aka "vptr".
+#
+# TODO: fix undefined vptr behavior and turn this option back on.
+export CFLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined 
-fno-sanitize=vptr"
+# Builds without optimization and with debugging symbols for making crash 
reports more
+# readable.
+export CFLAGS="${CFLAGS} -O0 -ggdb3"
+export CXXFLAGS="${CFLAGS}"
+export UBSAN_OPTIONS=print_stacktrace=1
+
+# llvm-symbolizer must be on PATH, but the above installation instals a binary 
called
+# "llvm-symbolizer-3.8", not "llvm-symbolizer". This fixes that with a 
softlink in a new
+# directory.
+CLANG_PATH="$(mktemp -d)"
+trap "rm -rf ${CLANG_PATH}" EXIT
+ln -s "$(whereis llvm-symbolizer-3.8  | rev | cut -d ' ' -f 1 | rev)" \
+  "${CLANG_PATH}/llvm-symbolizer"
+export PATH="${CLANG_PATH}:${PATH}"
+llvm-symbolizer -version
+
+build/docker/scripts/autotools.sh $*

http://git-wip-us.apache.org/repos/asf/thrift/blob/147c2849/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc
----------------------------------------------------------------------
diff --git a/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc 
b/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc
index 7d4e4f0..7d4818e 100644
--- a/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc
+++ b/compiler/cpp/src/thrift/generate/t_c_glib_generator.cc
@@ -4061,9 +4061,9 @@ void 
t_c_glib_generator::generate_deserialize_field(ofstream& out,
   // if the type is not required and this is a thrift struct (no prefix),
   // set the isset variable.  if the type is required, then set the
   // local variable indicating the value was set, so that we can do    // 
validation later.
-  if (tfield->get_req() != t_field::T_REQUIRED && prefix != "") {
+  if (prefix != "" && tfield->get_req() != t_field::T_REQUIRED) {
     indent(out) << prefix << "__isset_" << tfield->get_name() << suffix << " = 
TRUE;" << endl;
-  } else if (tfield->get_req() == t_field::T_REQUIRED && prefix != "") {
+  } else if (prefix != "" && tfield->get_req() == t_field::T_REQUIRED) {
     indent(out) << "isset_" << tfield->get_name() << " = TRUE;" << endl;
   }
 }

http://git-wip-us.apache.org/repos/asf/thrift/blob/147c2849/compiler/cpp/src/thrift/generate/t_d_generator.cc
----------------------------------------------------------------------
diff --git a/compiler/cpp/src/thrift/generate/t_d_generator.cc 
b/compiler/cpp/src/thrift/generate/t_d_generator.cc
index 4816681..8192549 100644
--- a/compiler/cpp/src/thrift/generate/t_d_generator.cc
+++ b/compiler/cpp/src/thrift/generate/t_d_generator.cc
@@ -382,7 +382,7 @@ private:
           << (*f_iter)->get_name() << " called\");" << endl;
 
       t_base_type* rt = (t_base_type*)(*f_iter)->get_returntype();
-      if (rt->get_base() != t_base_type::TYPE_VOID) {
+      if (!rt->is_void()) {
         indent(out) << "return typeof(return).init;" << endl;
       }
 

http://git-wip-us.apache.org/repos/asf/thrift/blob/147c2849/compiler/cpp/src/thrift/parse/t_base_type.h
----------------------------------------------------------------------
diff --git a/compiler/cpp/src/thrift/parse/t_base_type.h 
b/compiler/cpp/src/thrift/parse/t_base_type.h
index 32523cb..71398ba 100644
--- a/compiler/cpp/src/thrift/parse/t_base_type.h
+++ b/compiler/cpp/src/thrift/parse/t_base_type.h
@@ -57,15 +57,15 @@ public:
 
   void set_string_list(bool val) { string_list_ = val; }
 
-  bool is_string_list() const { return (base_ == TYPE_STRING) && string_list_; 
}
+  bool is_string_list() const { return string_list_ && (base_ == TYPE_STRING); 
}
 
   void set_binary(bool val) { binary_ = val; }
 
-  bool is_binary() const { return (base_ == TYPE_STRING) && binary_; }
+  bool is_binary() const { return binary_ && (base_ == TYPE_STRING); }
 
   void set_string_enum(bool val) { string_enum_ = val; }
 
-  bool is_string_enum() const { return base_ == TYPE_STRING && string_enum_; }
+  bool is_string_enum() const { return string_enum_ && base_ == TYPE_STRING; }
 
   void add_string_enum_val(std::string val) { 
string_enum_vals_.push_back(val); }
 

http://git-wip-us.apache.org/repos/asf/thrift/blob/147c2849/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c
----------------------------------------------------------------------
diff --git a/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c 
b/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c
index a0dd3cc..cae4749 100644
--- a/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c
+++ b/lib/c_glib/src/thrift/c_glib/protocol/thrift_compact_protocol.c
@@ -120,7 +120,7 @@ thrift_bitwise_cast_gdouble (const guint64 v)
 static guint64
 i64_to_zigzag (const gint64 l)
 {
-  return (l << 1) ^ (l >> 63);
+  return (((guint64)l) << 1) ^ (l >> 63);
 }
 
 /**
@@ -130,7 +130,7 @@ i64_to_zigzag (const gint64 l)
 static guint32
 i32_to_zigzag (const gint32 n)
 {
-  return (n << 1) ^ (n >> 31);
+  return (((guint32)n) << 1) ^ (n >> 31);
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/thrift/blob/147c2849/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc
----------------------------------------------------------------------
diff --git a/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc 
b/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc
index 6bbf9ef..d40c331 100644
--- a/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc
+++ b/lib/cpp/src/thrift/protocol/TCompactProtocol.tcc
@@ -387,7 +387,7 @@ uint32_t 
TCompactProtocolT<Transport_>::writeVarint64(uint64_t n) {
  */
 template <class Transport_>
 uint64_t TCompactProtocolT<Transport_>::i64ToZigzag(const int64_t l) {
-  return (l << 1) ^ (l >> 63);
+  return (static_cast<uint64_t>(l) << 1) ^ (l >> 63);
 }
 
 /**
@@ -396,7 +396,7 @@ uint64_t TCompactProtocolT<Transport_>::i64ToZigzag(const 
int64_t l) {
  */
 template <class Transport_>
 uint32_t TCompactProtocolT<Transport_>::i32ToZigzag(const int32_t n) {
-  return (n << 1) ^ (n >> 31);
+  return (static_cast<uint32_t>(n) << 1) ^ (n >> 31);
 }
 
 /**

Reply via email to