Revert "KUDU-1448. Enable AVX2 bitshuffle at runtime"

This is causing ASAN and TSAN tests to fail on el6 likely due to this ld.so bug:
https://patchwork.ozlabs.org/patch/506223/

This reverts commit f10ab4a1d7ac0735890dcab6685a9fba7edac74e.

Change-Id: I86f6cc93f1e44f8d7243b742532901fd95bd1264
Reviewed-on: http://gerrit.cloudera.org:8080/5245
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/04eeacdd
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/04eeacdd
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/04eeacdd

Branch: refs/heads/master
Commit: 04eeacddd0efb07eac7b07eaa3175f731a77fb51
Parents: e177ae8
Author: Todd Lipcon <[email protected]>
Authored: Mon Nov 28 20:27:59 2016 +0000
Committer: Todd Lipcon <[email protected]>
Committed: Mon Nov 28 23:07:14 2016 +0000

----------------------------------------------------------------------
 src/kudu/cfile/CMakeLists.txt             |  1 -
 src/kudu/cfile/bitshuffle_arch_wrapper.cc | 93 --------------------------
 src/kudu/cfile/bitshuffle_arch_wrapper.h  | 35 ----------
 src/kudu/cfile/bshuf_block.cc             |  3 +-
 src/kudu/cfile/bshuf_block.h              | 11 ++-
 src/kudu/gutil/cpu.cc                     |  2 +-
 thirdparty/build-definitions.sh           | 45 ++-----------
 7 files changed, 12 insertions(+), 178 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/04eeacdd/src/kudu/cfile/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/CMakeLists.txt b/src/kudu/cfile/CMakeLists.txt
index f192c6d..ad2a961 100644
--- a/src/kudu/cfile/CMakeLists.txt
+++ b/src/kudu/cfile/CMakeLists.txt
@@ -32,7 +32,6 @@ add_library(cfile
   binary_dict_block.cc
   binary_plain_block.cc
   binary_prefix_block.cc
-  bitshuffle_arch_wrapper.cc
   block_cache.cc
   block_compression.cc
   bloomfile.cc

http://git-wip-us.apache.org/repos/asf/kudu/blob/04eeacdd/src/kudu/cfile/bitshuffle_arch_wrapper.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/bitshuffle_arch_wrapper.cc 
b/src/kudu/cfile/bitshuffle_arch_wrapper.cc
deleted file mode 100644
index 3ccaee5..0000000
--- a/src/kudu/cfile/bitshuffle_arch_wrapper.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "kudu/cfile/bitshuffle_arch_wrapper.h"
-
-#include <stdint.h>
-
-// Include the bitshuffle header once to get the default (non-AVX2)
-// symbols.
-#include <bitshuffle.h>
-
-// Include the bitshuffle header again, but this time importing the
-// AVX2-compiled symbols by defining some macros.
-#undef BITSHUFFLE_H
-#define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx2
-#define bshuf_compress_lz4 bshuf_compress_lz4_avx2
-#define bshuf_decompress_lz4 bshuf_decompress_lz4_avx2
-#include <bitshuffle.h> // NOLINT(*)
-#undef bshuf_compress_lz4_bound
-#undef bshuf_compress_lz4
-#undef bshuf_decompress_lz4
-
-#include "kudu/gutil/cpu.h"
-
-using base::CPU;
-
-namespace kudu {
-namespace bitshuffle {
-
-// On Linux, we dynamically dispatch using the 'ifunc' attribute, which 
dynamically
-// resolves the function implementation on first call.
-// ------------------------------------------------------------
-#ifndef __APPLE__
-
-// First the actual 'resolver' functions, which pick which implementation to 
use.
-extern "C" {
-decltype(&bshuf_compress_lz4_bound) bshuf_compress_lz4_bound_ifunc() {
-  return CPU().has_avx2() ? &bshuf_compress_lz4_bound_avx2 : 
&bshuf_compress_lz4_bound;
-}
-decltype(&bshuf_compress_lz4) bshuf_compress_lz4_ifunc() {
-  return CPU().has_avx2() ? &bshuf_compress_lz4_avx2 : &bshuf_compress_lz4;
-}
-decltype(&bshuf_decompress_lz4) bshuf_decompress_lz4_ifunc() {
-  return CPU().has_avx2() ? &bshuf_decompress_lz4_avx2 : &bshuf_decompress_lz4;
-}
-} // extern "C"
-
-// Then the ifunc declarations.
-__attribute__((ifunc("bshuf_compress_lz4_ifunc")))
-int64_t compress_lz4(void* in, void* out, size_t size,
-                     size_t elem_size, size_t block_size);
-
-__attribute__((ifunc("bshuf_decompress_lz4_ifunc")))
-int64_t decompress_lz4(void* in, void* out, size_t size,
-                       size_t elem_size, size_t block_size);
-
-__attribute__((ifunc("bshuf_compress_lz4_bound_ifunc")))
-size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size);
-
-
-// On OSX, we don't do the ifunc magic, and instead just pass through to 
non-AVX
-// bitshuffle.
-// ------------------------------------------------------------
-#else
-int64_t compress_lz4(void* in, void* out, size_t size,
-                     size_t elem_size, size_t block_size) {
-  return bshuf_compress_lz4(in, out, size, elem_size, block_size);
-}
-int64_t decompress_lz4(void* in, void* out, size_t size,
-                       size_t elem_size, size_t block_size) {
-  return bshuf_decompress_lz4(in, out, size, elem_size, block_size);
-}
-size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size) {
-  return bshuf_compress_lz4_bound(size, elem_size, block_size);
-}
-#endif // defined(__APPLE__)
-
-} // namespace bitshuffle
-} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/04eeacdd/src/kudu/cfile/bitshuffle_arch_wrapper.h
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/bitshuffle_arch_wrapper.h 
b/src/kudu/cfile/bitshuffle_arch_wrapper.h
deleted file mode 100644
index 712d0fe..0000000
--- a/src/kudu/cfile/bitshuffle_arch_wrapper.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-
-// This namespace has wrappers for the Bitshuffle library which do runtime 
dispatch to
-// either AVX2-accelerated or regular SSE2 implementations based on the 
available CPU. The
-// dispatch is at dynamic linking time using the 'ifunc' attribute, which will 
have fewer
-// indirections and branches compared to checking the CPU information on each 
call.
-namespace kudu {
-namespace bitshuffle {
-
-// See <bitshuffle.h> for documentation on these functions.
-size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size);
-int64_t compress_lz4(void* in, void* out, size_t size, size_t elem_size, 
size_t block_size);
-int64_t decompress_lz4(void* in, void* out, size_t size, size_t elem_size, 
size_t block_size);
-
-} // namespace bitshuffle
-} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/04eeacdd/src/kudu/cfile/bshuf_block.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/bshuf_block.cc b/src/kudu/cfile/bshuf_block.cc
index e71c6cc..019d141 100644
--- a/src/kudu/cfile/bshuf_block.cc
+++ b/src/kudu/cfile/bshuf_block.cc
@@ -91,8 +91,7 @@ Status BShufBlockDecoder<UINT32>::Expand() {
     decoded_.resize(num_elems_after_padding_ * size_of_elem_);
     uint8_t* in = const_cast<uint8_t*>(&data_[kHeaderSize]);
 
-    bytes = bitshuffle::decompress_lz4(in, decoded_.data(), 
num_elems_after_padding_,
-                                       size_of_elem_, 0);
+    bytes = bshuf_decompress_lz4(in, decoded_.data(), 
num_elems_after_padding_, size_of_elem_, 0);
     if (PREDICT_FALSE(bytes < 0)) {
       // Ideally, this should not happen.
       AbortWithBitShuffleError(bytes);

http://git-wip-us.apache.org/repos/asf/kudu/blob/04eeacdd/src/kudu/cfile/bshuf_block.h
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/bshuf_block.h b/src/kudu/cfile/bshuf_block.h
index 22b207c..7730842 100644
--- a/src/kudu/cfile/bshuf_block.h
+++ b/src/kudu/cfile/bshuf_block.h
@@ -25,8 +25,8 @@
 
 #include <algorithm>
 #include <stdint.h>
+#include <bitshuffle.h>
 
-#include "kudu/cfile/bitshuffle_arch_wrapper.h"
 #include "kudu/cfile/block_encodings.h"
 #include "kudu/cfile/cfile_util.h"
 #include "kudu/common/columnblock.h"
@@ -142,12 +142,12 @@ class BShufBlockBuilder : public BlockBuilder {
 
     int num_elems_after_padding = count_ + NumOfPaddingNeeded();
     buffer_.resize(kMaxHeaderSize +
-                   bitshuffle::compress_lz4_bound(num_elems_after_padding, 
final_size_of_type, 0));
+                   bshuf_compress_lz4_bound(num_elems_after_padding, 
final_size_of_type, 0));
 
     InlineEncodeFixed32(&buffer_[0], ordinal_pos);
     InlineEncodeFixed32(&buffer_[4], count_);
-    int64_t bytes = bitshuffle::compress_lz4(data_.data(), 
&buffer_[kMaxHeaderSize],
-                                             num_elems_after_padding, 
final_size_of_type, 0);
+    int64_t bytes = bshuf_compress_lz4(data_.data(), &buffer_[kMaxHeaderSize],
+                                    num_elems_after_padding, 
final_size_of_type, 0);
     if (PREDICT_FALSE(bytes < 0)) {
       // This means the bitshuffle function fails.
       // Ideally, this should not happen.
@@ -337,8 +337,7 @@ class BShufBlockDecoder : public BlockDecoder {
       int64_t bytes;
       decoded_.resize(num_elems_after_padding_ * size_of_type);
       uint8_t* in = const_cast<uint8_t*>(&data_[kHeaderSize]);
-      bytes = bitshuffle::decompress_lz4(in, decoded_.data(), 
num_elems_after_padding_,
-                                         size_of_type, 0);
+      bytes = bshuf_decompress_lz4(in, decoded_.data(), 
num_elems_after_padding_, size_of_type, 0);
       if (PREDICT_FALSE(bytes < 0)) {
         // Ideally, this should not happen.
         AbortWithBitShuffleError(bytes);

http://git-wip-us.apache.org/repos/asf/kudu/blob/04eeacdd/src/kudu/gutil/cpu.cc
----------------------------------------------------------------------
diff --git a/src/kudu/gutil/cpu.cc b/src/kudu/gutil/cpu.cc
index f4c3885a..c6bf41f 100644
--- a/src/kudu/gutil/cpu.cc
+++ b/src/kudu/gutil/cpu.cc
@@ -68,7 +68,7 @@ void __cpuid(int cpu_info[4], int info_type) {
   __asm__ volatile (
     "cpuid\n"
     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), 
"=d"(cpu_info[3])
-    : "a"(info_type), "c"(0)
+    : "a"(info_type)
   );
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/04eeacdd/thirdparty/build-definitions.sh
----------------------------------------------------------------------
diff --git a/thirdparty/build-definitions.sh b/thirdparty/build-definitions.sh
index 923a98b..d90f2c1 100644
--- a/thirdparty/build-definitions.sh
+++ b/thirdparty/build-definitions.sh
@@ -407,46 +407,11 @@ build_bitshuffle() {
   mkdir -p $BITSHUFFLE_BDIR
   pushd $BITSHUFFLE_BDIR
   # bitshuffle depends on lz4, therefore set the flag I$PREFIX/include
-
-  # This library has significant optimizations when built with -mavx2. However,
-  # we still need to support non-AVX2-capable hardware. So, we build it twice,
-  # once with the flag and once without, and use some linker tricks to
-  # suffix the AVX2 symbols with '_avx2'. OSX doesn't have objcopy, so we only
-  # do this trick on Linux.
-  if [ -n "$OS_LINUX" ]; then
-    arches="default avx2"
-  else
-    arches="default"
-  fi
-  to_link=""
-  for arch in $arches ; do
-    arch_flag=""
-    if [ "$arch" == "avx2" ]; then
-      arch_flag="-mavx2"
-    fi
-    tmp_obj=bitshuffle_${arch}_tmp.o
-    dst_obj=bitshuffle_${arch}.o
-    ${CC:-gcc} $EXTRA_CFLAGS $arch_flag -std=c99 -I$PREFIX/include -O3 
-DNDEBUG -fPIC -c \
-      "$BITSHUFFLE_SOURCE/src/bitshuffle_core.c" \
-      "$BITSHUFFLE_SOURCE/src/bitshuffle.c" \
-      "$BITSHUFFLE_SOURCE/src/iochain.c"
-    # Merge the object files together to produce a combined .o file.
-    ld -r -o $tmp_obj bitshuffle_core.o bitshuffle.o iochain.o
-    # For the AVX2 symbols, suffix them.
-    if [ "$arch" == "avx2" ]; then
-      # Create a mapping file with '<old_sym> <suffixed_sym>' on each line.
-      nm --defined-only --extern-only $tmp_obj | while read addr type sym ; do
-        echo ${sym} ${sym}_${arch}
-      done > renames.txt
-      objcopy --redefine-syms=renames.txt $tmp_obj $dst_obj
-    else
-      mv $tmp_obj $dst_obj
-    fi
-    to_link="$to_link $dst_obj"
-  done
-
-  rm -f bitshuffle.a
-  ar rs bitshuffle.a $to_link
+  ${CC:-gcc} $EXTRA_CFLAGS -std=c99 -I$PREFIX/include -O3 -DNDEBUG -fPIC -c \
+    "$BITSHUFFLE_SOURCE/src/bitshuffle_core.c" \
+    "$BITSHUFFLE_SOURCE/src/bitshuffle.c" \
+    "$BITSHUFFLE_SOURCE/src/iochain.c"
+  ar rs bitshuffle.a bitshuffle_core.o bitshuffle.o iochain.o
   cp bitshuffle.a $PREFIX/lib/
   cp $BITSHUFFLE_SOURCE/src/bitshuffle.h $PREFIX/include/bitshuffle.h
   cp $BITSHUFFLE_SOURCE/src/bitshuffle_core.h $PREFIX/include/bitshuffle_core.h

Reply via email to