Repository: parquet-cpp Updated Branches: refs/heads/master f91189abc -> 0149a7afc
PARQUET-267: Detach thirdparty code from build configuration. This is based off of pull request https://github.com/apache/parquet-cpp/pull/14. Author: Kalon Mills <[email protected]> Closes #16 from kalaxy/libparquet-library-update-build and squashes the following commits: 0ce51db [Kalon Mills] Add script for automating build env setup. 82a198c [Kalon Mills] Make thrift build on mac only a warning when not specified explicitly. d096c64 [Kalon Mills] Update build instructions. 6709182 [Kalon Mills] Support thrift dependency in thirdparty scripts for linux. bedd0d4 [Kalon Mills] Remove thirdparty code lz4 from repo. 30c2b7e [Kalon Mills] Support build environment configuration of LZ4 library. 73e7785 [Kalon Mills] Support build environment configuration of Snappy library. Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/0149a7af Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/0149a7af Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/0149a7af Branch: refs/heads/master Commit: 0149a7afc0de2c62326580b383ca3307b6711a9d Parents: f91189a Author: Kalon Mills <[email protected]> Authored: Tue Jan 19 14:03:54 2016 -0800 Committer: Nong Li <[email protected]> Committed: Tue Jan 19 14:03:54 2016 -0800 ---------------------------------------------------------------------- .travis.yml | 28 +- CMakeLists.txt | 3 - README.md | 52 +- cmake_modules/FindLz4.cmake | 69 ++- cmake_modules/FindSnappy.cmake | 70 ++- setup_build_env.sh | 27 + src/parquet/compression/lz4-codec.cc | 2 +- thirdparty/build_thirdparty.sh | 26 +- thirdparty/download_thirdparty.sh | 26 +- thirdparty/lz4-svn/CMakeLists.txt | 13 - thirdparty/lz4-svn/lz4.c | 906 ------------------------------ thirdparty/lz4-svn/lz4.h | 128 ----- thirdparty/lz4-svn/lz4hc.c | 730 ------------------------ thirdparty/lz4-svn/lz4hc.h | 60 -- thirdparty/versions.sh | 13 +- 15 files changed, 212 insertions(+), 1941 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/.travis.yml ---------------------------------------------------------------------- diff --git a/.travis.yml b/.travis.yml index 5da9a6f..4ca229f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,8 +20,7 @@ addons: - g++-4.9 - cmake - valgrind - - libboost-dev - #- libsnappy-dev currently handled by thirdparty scipts. + - libboost-dev #needed for thrift cpp compilation - libboost-program-options-dev #needed for thrift cpp compilation - libboost-test-dev #needed for thrift cpp compilation - libssl-dev #needed for thrift cpp compilation @@ -31,33 +30,24 @@ addons: - pkg-config #needed for thrift cpp compilation before_install: - - pushd thirdparty - # thrift cpp + - mkdir $HOME/build_dir + - cd $HOME/build_dir - > if [ $TRAVIS_OS_NAME == osx ]; then brew update && - brew install thrift; + brew install thrift lz4 snappy; fi - > if [ $TRAVIS_OS_NAME == linux ]; then - wget http://archive.apache.org/dist/thrift/0.9.1/thrift-0.9.1.tar.gz && - tar xfz thrift-0.9.1.tar.gz && - pushd thrift-0.9.1 && - ./configure CXXFLAGS='-fPIC' --without-qt4 --without-c_glib --without-csharp --without-java --without-erlang --without-nodejs --without-lua --without-python --without-perl --without-php --without-php_extension --without-ruby --without-haskell --without-go --without-d --with-cpp --prefix=$HOME/local && - make clean && - make install && - popd; + cp -r $TRAVIS_BUILD_DIR/thirdparty . && + ./thirdparty/download_thirdparty.sh && + ./thirdparty/build_thirdparty.sh && + export THRIFT_HOME=$HOME/build_dir/thirdparty/installed SNAPPY_HOME=$HOME/build_dir/thirdparty/installed LZ4_HOME=$HOME/build_dir/thirdparty/installed; fi - # snappy and lz4 - - ./download_thirdparty.sh - - ./build_thirdparty.sh - - popd before_script: - export CC="gcc-4.9" - export CXX="g++-4.9" - - mkdir build - - cd build - - THRIFT_HOME=$HOME/local cmake .. + - cmake $TRAVIS_BUILD_DIR script: make http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index eb67f75..03b7da5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,9 +22,6 @@ enable_testing() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake_modules") set(BUILD_SUPPORT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/build-support) -set(THIRDPARTY_PREFIX ${CMAKE_SOURCE_DIR}/thirdparty/installed) -set(CMAKE_PREFIX_PATH ${THIRDPARTY_PREFIX}) - if(APPLE) set(CMAKE_MACOSX_RPATH 1) set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/README.md ---------------------------------------------------------------------- diff --git a/README.md b/README.md index 3af78d6..cb81272 100644 --- a/README.md +++ b/README.md @@ -2,32 +2,36 @@ Parquet-cpp [](http =========== A C++ library to read parquet files. -To build you will need some version of boost installed and thrift 0.7+ installed. -(If you are building thrift from source, you will need to set the THRIFT_HOME env -variable to the directory containing include/ and lib/.) - -Then run: -<br> -<code> -thirdparty/download_thirdparty.sh -</code> -<br> -<code> -thirdparty/build_thirdparty.sh -</code> -<br> -<code> -cmake . -</code> -<br> -<code> -make -</code> - -The binaries will be built to ./bin which contains the libraries to link against as +## Third Party Dependencies +- snappy +- lz4 +- thrift 0.7+ [install instructions](https://thrift.apache.org/docs/install/) + +Many package managers support some or all of these dependencies. E.g.: +```shell +ubuntu$ sudo apt-get install libboost-dev libsnappy-dev liblz4-dev +``` +```shell +mac$ brew install snappy lz4 thrift +``` + +./setup_build_env.sh tries to automate setting up a build environment for you with third party dependencies. You use it by running `./setup_build_env.sh`. By default, it will create a build directory `build/`. You can override the build directory by setting the BUILD_DIR env variable to another location. + +Also feel free to take a look at our [.travis.yml](.travis.yml) to see how that build env is set up. + + +## Build +- `cmake .` + - You can customize dependent library locations through various environment variables: + - THRIFT_HOME customizes the thrift installed location. + - SNAPPY_HOME customizes the snappy installed location. + - LZ4_HOME customizes the lz4 installed location. +- `make` + +The binaries will be built to ./debug which contains the libraries to link against as well as a few example executables. -Incremental builds can be done afterwords with just <code> make </code>. +Incremental builds can be done afterwords with just `make`. Design ======== http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/cmake_modules/FindLz4.cmake ---------------------------------------------------------------------- diff --git a/cmake_modules/FindLz4.cmake b/cmake_modules/FindLz4.cmake index 4060cdb..1f6ba47 100644 --- a/cmake_modules/FindLz4.cmake +++ b/cmake_modules/FindLz4.cmake @@ -1,4 +1,3 @@ -# Copyright 2012 Cloudera Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,47 +10,71 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +# +# Tries to find Lz4 headers and libraries. +# +# Usage of this module as follows: +# +# find_package(Lz4) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# Lz4_HOME - When set, this path is inspected instead of standard library +# locations as the root of the Lz4 installation. +# The environment variable LZ4_HOME overrides this veriable. +# # - Find LZ4 (lz4.h, liblz4.a, liblz4.so, and liblz4.so.1) # This module defines # LZ4_INCLUDE_DIR, directory containing headers # LZ4_LIBS, directory containing lz4 libraries # LZ4_STATIC_LIB, path to liblz4.a +# LZ4_SHARED_LIB, path to liblz4's shared library # LZ4_FOUND, whether lz4 has been found -set(LZ4_SEARCH_HEADER_PATHS - ${THIRDPARTY_PREFIX}/include -) - -set(LZ4_SEARCH_LIB_PATH - ${THIRDPARTY_PREFIX}/lib -) +if( NOT "$ENV{LZ4_HOME}" STREQUAL "") + file( TO_CMAKE_PATH "$ENV{LZ4_HOME}" _native_path ) + list( APPEND _lz4_roots ${_native_path} ) +elseif ( Lz4_HOME ) + list( APPEND _lz4_roots ${Lz4_HOME} ) +endif() -find_path(LZ4_INCLUDE_DIR lz4.h PATHS - ${LZ4_SEARCH_HEADER_PATHS} - # make sure we don't accidentally pick up a different version - NO_DEFAULT_PATH -) +# Try the parameterized roots, if they exist +if ( _lz4_roots ) + find_path( LZ4_INCLUDE_DIR NAMES lz4.h + PATHS ${_lz4_roots} NO_DEFAULT_PATH + PATH_SUFFIXES "include" ) + find_library( LZ4_LIBRARIES NAMES lz4 + PATHS ${_lz4_roots} NO_DEFAULT_PATH + PATH_SUFFIXES "lib" ) +else () + find_path( LZ4_INCLUDE_DIR NAMES lz4.h ) + find_library( LZ4_LIBRARIES NAMES lz4 ) +endif () -find_library(LZ4_LIB_PATH NAMES liblz4.a PATHS ${LZ4_SEARCH_LIB_PATH} NO_DEFAULT_PATH) -if (LZ4_INCLUDE_DIR AND LZ4_LIB_PATH) +if (LZ4_INCLUDE_DIR AND LZ4_LIBRARIES) set(LZ4_FOUND TRUE) - set(LZ4_LIBS ${LZ4_SEARCH_LIB_PATH}) - set(LZ4_STATIC_LIB ${LZ4_SEARCH_LIB_PATH}/liblz4.a) + get_filename_component( LZ4_LIBS ${LZ4_LIBRARIES} DIRECTORY ) + set(LZ4_LIB_NAME liblz4) + set(LZ4_STATIC_LIB ${LZ4_LIBS}/${LZ4_LIB_NAME}.a) + set(LZ4_SHARED_LIB ${LZ4_LIBS}/${LZ4_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) else () set(LZ4_FOUND FALSE) endif () if (LZ4_FOUND) if (NOT Lz4_FIND_QUIETLY) - message(STATUS "Found the Lz4 library: ${LZ4_LIB_PATH}") + message(STATUS "Found the Lz4 library: ${LZ4_LIBRARIES}") endif () else () if (NOT Lz4_FIND_QUIETLY) - set(LZ4_ERR_MSG "Could not find the Lz4 library. Looked for headers") - set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${LZ4_SEARCH_HEADER_PATHS}, and for libs") - set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${LZ4_SEARCH_LIB_PATH}") + set(LZ4_ERR_MSG "Could not find the Lz4 library. Looked in ") + if ( _lz4_roots ) + set(LZ4_ERR_MSG "${LZ4_ERR_MSG} in ${_lz4_roots}.") + else () + set(LZ4_ERR_MSG "${LZ4_ERR_MSG} system search paths.") + endif () if (Lz4_FIND_REQUIRED) message(FATAL_ERROR "${LZ4_ERR_MSG}") else (Lz4_FIND_REQUIRED) @@ -63,5 +86,7 @@ endif () mark_as_advanced( LZ4_INCLUDE_DIR LZ4_LIBS + LZ4_LIBRARIES LZ4_STATIC_LIB + LZ4_SHARED_LIB ) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/cmake_modules/FindSnappy.cmake ---------------------------------------------------------------------- diff --git a/cmake_modules/FindSnappy.cmake b/cmake_modules/FindSnappy.cmake index 3d1ba14..08e8d73 100644 --- a/cmake_modules/FindSnappy.cmake +++ b/cmake_modules/FindSnappy.cmake @@ -1,4 +1,3 @@ -# Copyright 2012 Cloudera Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,47 +10,70 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -# - Find SNAPPY (snappy.h, libsnappy.a, libsnappy.so, and libsnappy.so.1) +# +# Tries to find Snappy headers and libraries. +# +# Usage of this module as follows: +# +# find_package(Snappy) +# +# Variables used by this module, they can change the default behaviour and need +# to be set before calling find_package: +# +# Snappy_HOME - When set, this path is inspected instead of standard library +# locations as the root of the Snappy installation. +# The environment variable SNAPPY_HOME overrides this veriable. +# # This module defines # SNAPPY_INCLUDE_DIR, directory containing headers # SNAPPY_LIBS, directory containing snappy libraries # SNAPPY_STATIC_LIB, path to libsnappy.a +# SNAPPY_SHARED_LIB, path to libsnappy's shared library # SNAPPY_FOUND, whether snappy has been found -set(SNAPPY_SEARCH_HEADER_PATHS - ${THIRDPARTY_PREFIX}/include -) - -set(SNAPPY_SEARCH_LIB_PATH - ${THIRDPARTY_PREFIX}/lib -) +if( NOT "$ENV{SNAPPY_HOME}" STREQUAL "") + file( TO_CMAKE_PATH "$ENV{SNAPPY_HOME}" _native_path ) + list( APPEND _snappy_roots ${_native_path} ) +elseif ( Snappy_HOME ) + list( APPEND _snappy_roots ${Snappy_HOME} ) +endif() -find_path(SNAPPY_INCLUDE_DIR snappy.h PATHS - ${SNAPPY_SEARCH_HEADER_PATHS} - # make sure we don't accidentally pick up a different version - NO_DEFAULT_PATH -) +# Try the parameterized roots, if they exist +if ( _snappy_roots ) + find_path( SNAPPY_INCLUDE_DIR NAMES snappy.h + PATHS ${_snappy_roots} NO_DEFAULT_PATH + PATH_SUFFIXES "include" ) + find_library( SNAPPY_LIBRARIES NAMES snappy + PATHS ${_snappy_roots} NO_DEFAULT_PATH + PATH_SUFFIXES "lib" ) +else () + find_path( SNAPPY_INCLUDE_DIR NAMES snappy.h ) + find_library( SNAPPY_LIBRARIES NAMES snappy ) +endif () -find_library(SNAPPY_LIB_PATH NAMES snappy PATHS ${SNAPPY_SEARCH_LIB_PATH} NO_DEFAULT_PATH) -if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIB_PATH) +if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARIES) set(SNAPPY_FOUND TRUE) - set(SNAPPY_LIBS ${SNAPPY_SEARCH_LIB_PATH}) - set(SNAPPY_STATIC_LIB ${SNAPPY_SEARCH_LIB_PATH}/libsnappy.a) + get_filename_component( SNAPPY_LIBS ${SNAPPY_LIBRARIES} DIRECTORY ) + set(SNAPPY_LIB_NAME libsnappy) + set(SNAPPY_STATIC_LIB ${SNAPPY_LIBS}/${SNAPPY_LIB_NAME}.a) + set(SNAPPY_SHARED_LIB ${SNAPPY_LIBS}/${SNAPPY_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) else () set(SNAPPY_FOUND FALSE) endif () if (SNAPPY_FOUND) if (NOT Snappy_FIND_QUIETLY) - message(STATUS "Found the Snappy library: ${SNAPPY_LIB_PATH}") + message(STATUS "Found the Snappy library: ${SNAPPY_LIBRARIES}") endif () else () if (NOT Snappy_FIND_QUIETLY) - set(SNAPPY_ERR_MSG "Could not find the Snappy library. Looked for headers") - set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${SNAPPY_SEARCH_HEADER_PATHS}, and for libs") - set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${SNAPPY_SEARCH_LIB_PATH}") + set(SNAPPY_ERR_MSG "Could not find the Snappy library. Looked in ") + if ( _snappy_roots ) + set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} in ${_snappy_roots}.") + else () + set(SNAPPY_ERR_MSG "${SNAPPY_ERR_MSG} system search paths.") + endif () if (Snappy_FIND_REQUIRED) message(FATAL_ERROR "${SNAPPY_ERR_MSG}") else (Snappy_FIND_REQUIRED) @@ -63,5 +85,7 @@ endif () mark_as_advanced( SNAPPY_INCLUDE_DIR SNAPPY_LIBS + SNAPPY_LIBRARIES SNAPPY_STATIC_LIB + SNAPPY_SHARED_LIB ) http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/setup_build_env.sh ---------------------------------------------------------------------- diff --git a/setup_build_env.sh b/setup_build_env.sh new file mode 100755 index 0000000..e99f5d4 --- /dev/null +++ b/setup_build_env.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +set -e + +SOURCE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) +: ${BUILD_DIR:=$SOURCE_DIR/build} + +mkdir -p $BUILD_DIR +cp -r $SOURCE_DIR/thirdparty $BUILD_DIR +cd $BUILD_DIR +./thirdparty/download_thirdparty.sh +./thirdparty/build_thirdparty.sh + +export SNAPPY_HOME=$BUILD_DIR/thirdparty/installed +export LZ4_HOME=$BUILD_DIR/thirdparty/installed +# build script doesn't support building thrift on OSX +if [ "$(uname)" != "Darwin" ]; then + export THRIFT_HOME=$BUILD_DIR/thirdparty/installed +fi + +cmake $SOURCE_DIR + +cd $SOURCE_DIR + +echo +echo "Build env initialized in $BUILD_DIR." + http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/src/parquet/compression/lz4-codec.cc ---------------------------------------------------------------------- diff --git a/src/parquet/compression/lz4-codec.cc b/src/parquet/compression/lz4-codec.cc index 6655387..9166538 100644 --- a/src/parquet/compression/lz4-codec.cc +++ b/src/parquet/compression/lz4-codec.cc @@ -20,7 +20,7 @@ namespace parquet_cpp { void Lz4Codec::Decompress(int input_len, const uint8_t* input, int output_len, uint8_t* output_buffer) { - int n = LZ4_uncompress(reinterpret_cast<const char*>(input), + int n = LZ4_decompress_fast(reinterpret_cast<const char*>(input), reinterpret_cast<char*>(output_buffer), output_len); if (n != input_len) { throw parquet_cpp::ParquetException("Corrupt lz4 compressed data."); http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/thirdparty/build_thirdparty.sh ---------------------------------------------------------------------- diff --git a/thirdparty/build_thirdparty.sh b/thirdparty/build_thirdparty.sh index 868185f..891e613 100755 --- a/thirdparty/build_thirdparty.sh +++ b/thirdparty/build_thirdparty.sh @@ -17,6 +17,7 @@ else case $arg in "lz4") F_LZ4=1 ;; "snappy") F_SNAPPY=1 ;; + "thrift") F_THRIFT=1 ;; *) echo "Unknown module: $arg"; exit 1 ;; esac done @@ -47,18 +48,39 @@ export PATH=$PREFIX/bin:$PATH # build snappy if [ -n "$F_ALL" -o -n "$F_SNAPPY" ]; then - cd $SNAPPY_DIR + cd $TP_DIR/$SNAPPY_BASEDIR ./configure --with-pic --prefix=$PREFIX make -j$PARALLEL install fi # build lz4 if [ -n "$F_ALL" -o -n "$F_LZ4" ]; then - cd $LZ4_DIR + cd $TP_DIR/$LZ4_BASEDIR/cmake_unofficial CFLAGS=-fPIC cmake -DCMAKE_INSTALL_PREFIX:PATH=$PREFIX $LZ4_DIR make -j$PARALLEL install fi +# build thrift +if [ -n "$F_ALL" -o -n "$F_THRIFT" ]; then + if [ "$(uname)" == "Darwin" ]; then + echo "thrift compilation under OSX is not currently supported." + + # exit with an error if thrift was specified explicitly otherwise it is + # just a warning + if [ -n "$F_THRIFT" ]; then + exit 1 + fi + else + # linux build + # this expects all of the depedencies for thrift to already be installed in + # such a way that ./configure can find them + cd $TP_DIR/$THRIFT_BASEDIR + ./configure CXXFLAGS='-fPIC' --without-qt4 --without-c_glib --without-csharp --without-java --without-erlang --without-nodejs --without-lua --without-python --without-perl --without-php --without-php_extension --without-ruby --without-haskell --without-go --without-d --with-cpp --prefix=$PREFIX + make clean + make install + fi +fi + echo "---------------------" echo "Thirdparty dependencies built and installed into $PREFIX successfully" http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/thirdparty/download_thirdparty.sh ---------------------------------------------------------------------- diff --git a/thirdparty/download_thirdparty.sh b/thirdparty/download_thirdparty.sh index 9144ac4..7cde792 100755 --- a/thirdparty/download_thirdparty.sh +++ b/thirdparty/download_thirdparty.sh @@ -4,14 +4,28 @@ set -x set -e TP_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) -cd $TP_DIR -source versions.sh +source $TP_DIR/versions.sh -if [ ! -d snappy-${SNAPPY_VERSION} ]; then +download_extract_and_cleanup() { + filename=$TP_DIR/$(basename "$1") + curl -#LC - "$1" -o $filename + tar xzf $filename -C $TP_DIR + rm $filename +} + +if [ ! -d ${LZ4_BASEDIR} ]; then + echo "Fetching lz4" + download_extract_and_cleanup $LZ4_URL +fi + +if [ ! -d ${SNAPPY_BASEDIR} ]; then echo "Fetching snappy" - curl -OC - http://snappy.googlecode.com/files/snappy-${SNAPPY_VERSION}.tar.gz - tar xzf snappy-${SNAPPY_VERSION}.tar.gz - rm snappy-${SNAPPY_VERSION}.tar.gz + download_extract_and_cleanup $SNAPPY_URL +fi + +if [ ! -d ${THRIFT_BASEDIR} ]; then + echo "Fetching thrift" + download_extract_and_cleanup $THRIFT_URL fi http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/thirdparty/lz4-svn/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/thirdparty/lz4-svn/CMakeLists.txt b/thirdparty/lz4-svn/CMakeLists.txt deleted file mode 100644 index 712403c..0000000 --- a/thirdparty/lz4-svn/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -cmake_minimum_required(VERSION 2.8) - -SET(CMAKE_BUILD_TYPE "Release") -set(CMAKE_C_FLAGS_RELEASE "-O3 -std=c99 -Wall -W -Wundef -Wno-implicit-function-declaration") - -set(LZ4_SOURCES lz4.c lz4hc.c) -set(LZ4_HEADERS lz4.h lz4hc.h) - -add_library(lz4 STATIC ${LZ4_SOURCES}) -target_link_libraries(lz4) - -install(FILES ${LZ4_HEADERS} DESTINATION include) -install(TARGETS lz4 DESTINATION lib) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/thirdparty/lz4-svn/lz4.c ---------------------------------------------------------------------- diff --git a/thirdparty/lz4-svn/lz4.c b/thirdparty/lz4-svn/lz4.c deleted file mode 100644 index 1f2eafd..0000000 --- a/thirdparty/lz4-svn/lz4.c +++ /dev/null @@ -1,906 +0,0 @@ -/* - LZ4 - Fast LZ compression algorithm - Copyright (C) 2011-2012, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ -*/ - -//************************************** -// Tuning parameters -//************************************** -// MEMORY_USAGE : -// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -// Increasing memory usage improves compression ratio -// Reduced memory usage can improve speed, due to cache effect -// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache -#define MEMORY_USAGE 14 - -// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : -// This will provide a small boost to performance for big endian cpu, but the resulting compressed stream will be incompatible with little-endian CPU. -// You can set this option to 1 in situations where data will remain within closed environment -// This option is useless on Little_Endian CPU (such as x86) -//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 - - - -//************************************** -// CPU Feature Detection -//************************************** -// 32 or 64 bits ? -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode -# define LZ4_ARCH64 1 -#else -# define LZ4_ARCH64 0 -#endif - -// Little Endian or Big Endian ? -// Overwrite the #define below if you know your architecture endianess -#if defined (__GLIBC__) -# include <endian.h> -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LZ4_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define LZ4_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define LZ4_BIG_ENDIAN 1 -#else -// Little Endian assumed. PDP Endian and other very rare endian format are unsupported. -#endif - -// Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected -// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance -#if defined(__ARM_FEATURE_UNALIGNED) -# define LZ4_FORCE_UNALIGNED_ACCESS 1 -#endif - -// Define this parameter if your target system or compiler does not support hardware bit count -#if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count -# define LZ4_FORCE_SW_BITCOUNT -#endif - - -//************************************** -// Compiler Options -//************************************** -#if __STDC_VERSION__ >= 199901L // C99 -/* "restrict" is a known keyword */ -#else -# define restrict // Disable restrict -#endif - -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -#ifdef _MSC_VER // Visual Studio -# include <intrin.h> // For Visual 2005 -# if LZ4_ARCH64 // 64-bit -# pragma intrinsic(_BitScanForward64) // For Visual 2005 -# pragma intrinsic(_BitScanReverse64) // For Visual 2005 -# else -# pragma intrinsic(_BitScanForward) // For Visual 2005 -# pragma intrinsic(_BitScanReverse) // For Visual 2005 -# endif -#endif - -#ifdef _MSC_VER -# define lz4_bswap16(x) _byteswap_ushort(x) -#else -# define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) -#endif - -#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) -# define expect(expr,value) (__builtin_expect ((expr),(value)) ) -#else -# define expect(expr,value) (expr) -#endif - -#define likely(expr) expect((expr) != 0, 1) -#define unlikely(expr) expect((expr) != 0, 0) - - -//************************************** -// Includes -//************************************** -#include <stdlib.h> // for malloc -#include <string.h> // for memset -#include "lz4.h" - - -//************************************** -// Basic Types -//************************************** -#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively -# define BYTE unsigned __int8 -# define U16 unsigned __int16 -# define U32 unsigned __int32 -# define S32 __int32 -# define U64 unsigned __int64 -#else -# include <stdint.h> -# define BYTE uint8_t -# define U16 uint16_t -# define U32 uint32_t -# define S32 int32_t -# define U64 uint64_t -#endif - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -# pragma pack(push, 1) -#endif - -typedef struct _U16_S { U16 v; } U16_S; -typedef struct _U32_S { U32 v; } U32_S; -typedef struct _U64_S { U64 v; } U64_S; - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -# pragma pack(pop) -#endif - -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) - - -//************************************** -// Constants -//************************************** -#define MINMATCH 4 - -#define HASH_LOG (MEMORY_USAGE-2) -#define HASHTABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASHTABLESIZE - 1) - -// NOTCOMPRESSIBLE_DETECTIONLEVEL : -// Decreasing this value will make the algorithm skip faster data segments considered "incompressible" -// This may decrease compression ratio dramatically, but will be faster on incompressible data -// Increasing this value will make the algorithm search more before declaring a segment "incompressible" -// This could improve compression a bit, but will be slower on incompressible data -// The default value (6) is recommended -#define NOTCOMPRESSIBLE_DETECTIONLEVEL 6 -#define SKIPSTRENGTH (NOTCOMPRESSIBLE_DETECTIONLEVEL>2?NOTCOMPRESSIBLE_DETECTIONLEVEL:2) -#define STACKLIMIT 13 -#define HEAPMODE (HASH_LOG>STACKLIMIT) // Defines if memory is allocated into the stack (local variable), or into the heap (malloc()). -#define COPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH+MINMATCH) -#define MINLENGTH (MFLIMIT+1) - -#define MAXD_LOG 16 -#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) - -#define ML_BITS 4 -#define ML_MASK ((1U<<ML_BITS)-1) -#define RUN_BITS (8-ML_BITS) -#define RUN_MASK ((1U<<RUN_BITS)-1) - - -//************************************** -// Architecture-specific macros -//************************************** -#if LZ4_ARCH64 // 64-bit -# define STEPSIZE 8 -# define UARCH U64 -# define AARCH A64 -# define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; -# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) -# define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e) -# define HTYPE U32 -# define INITBASE(base) const BYTE* const base = ip -#else // 32-bit -# define STEPSIZE 4 -# define UARCH U32 -# define AARCH A32 -# define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; -# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); -# define LZ4_SECURECOPY LZ4_WILDCOPY -# define HTYPE const BYTE* -# define INITBASE(base) const int base = 0 -#endif - -#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) -# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } -# define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; } -#else // Little Endian -# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } -# define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } -#endif - - -//************************************** -// Local structures -//************************************** -struct refTables -{ - HTYPE hashTable[HASHTABLESIZE]; -}; - - -//************************************** -// Macros -//************************************** -#define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG)) -#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) -#define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); -#define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+l; LZ4_WILDCOPY(s,d,e); d=e; } - - -//**************************** -// Private functions -//**************************** -#if LZ4_ARCH64 - -static inline int LZ4_NbCommonBytes (register U64 val) -{ -#if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); - #else - int r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); - #else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; - #endif -#endif -} - -#else - -static inline int LZ4_NbCommonBytes (register U32 val) -{ -#if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); - #else - int r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); - #else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; - #endif -#endif -} - -#endif - - - -//****************************** -// Compression functions -//****************************** - -// LZ4_compressCtx : -// ----------------- -// Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. -// If it cannot achieve it, compression will stop, and result of the function will be zero. -// return : the number of bytes written in buffer 'dest', or 0 if the compression fails - -static inline int LZ4_compressCtx(void** ctx, - const char* source, - char* dest, - int isize, - int maxOutputSize) -{ -#if HEAPMODE - struct refTables *srt = (struct refTables *) (*ctx); - HTYPE* HashTable; -#else - HTYPE HashTable[HASHTABLESIZE] = {0}; -#endif - - const BYTE* ip = (BYTE*) source; - INITBASE(base); - const BYTE* anchor = ip; - const BYTE* const iend = ip + isize; - const BYTE* const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; - - int length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; - - - // Init - if (isize<MINLENGTH) goto _last_literals; -#if HEAPMODE - if (*ctx == NULL) - { - srt = (struct refTables *) malloc ( sizeof(struct refTables) ); - *ctx = (void*) srt; - } - HashTable = (HTYPE*)(srt->hashTable); - memset((void*)HashTable, 0, sizeof(srt->hashTable)); -#else - (void) ctx; -#endif - - - // First Byte - HashTable[LZ4_HASH_VALUE(ip)] = ip - base; - ip++; forwardH = LZ4_HASH_VALUE(ip); - - // Main Loop - for ( ; ; ) - { - int findMatchAttempts = (1U << skipStrength) + 3; - const BYTE* forwardIp = ip; - const BYTE* ref; - BYTE* token; - - // Find a match - do { - U32 h = forwardH; - int step = findMatchAttempts++ >> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - - if unlikely(forwardIp > mflimit) { goto _last_literals; } - - forwardH = LZ4_HASH_VALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = ip - base; - - } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); - - // Catch up - while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; } - - // Encode Literal length - length = (int)(ip - anchor); - token = op++; - if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit -#ifdef _MSC_VER - if (length>=(int)RUN_MASK) - { - int len = length-RUN_MASK; - *token=(RUN_MASK<<ML_BITS); - if (len>254) - { - do { *op++ = 255; len -= 255; } while (len>254); - *op++ = (BYTE)len; - memcpy(op, anchor, length); - op += length; - goto _next_match; - } - else - *op++ = (BYTE)len; - } - else *token = (length<<ML_BITS); -#else - if (length>=(int)RUN_MASK) - { - int len; - *token=(RUN_MASK<<ML_BITS); - len = length-RUN_MASK; - for(; len > 254 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (length<<ML_BITS); -#endif - - // Copy Literals - LZ4_BLINDCOPY(anchor, op, length); - -_next_match: - // Encode Offset - LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref)); - - // Start Counting - ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified - anchor = ip; - while likely(ip<matchlimit-(STEPSIZE-1)) - { - UARCH diff = AARCH(ref) ^ AARCH(ip); - if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; } - ip += LZ4_NbCommonBytes(diff); - goto _endCount; - } - if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; } - if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; } - if ((ip<matchlimit) && (*ref == *ip)) ip++; -_endCount: - - // Encode MatchLength - length = (int)(ip - anchor); - if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit - if (length>=(int)ML_MASK) - { - *token += ML_MASK; - length -= ML_MASK; - for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } - if (length > 254) { length-=255; *op++ = 255; } - *op++ = (BYTE)length; - } - else *token += length; - - // Test end of chunk - if (ip > mflimit) { anchor = ip; break; } - - // Fill table - HashTable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; - - // Test next position - ref = base + HashTable[LZ4_HASH_VALUE(ip)]; - HashTable[LZ4_HASH_VALUE(ip)] = ip - base; - if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } - - // Prepare next loop - anchor = ip++; - forwardH = LZ4_HASH_VALUE(ip); - } - -_last_literals: - // Encode Last Literals - { - int lastRun = (int)(iend - anchor); - if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (lastRun<<ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend-anchor; - } - - // End - return (int) (((char*)op)-dest); -} - - - -// Note : this function is valid only if isize < LZ4_64KLIMIT -#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1)) -#define HASHLOG64K (HASH_LOG+1) -#define HASH64KTABLESIZE (1U<<HASHLOG64K) -#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG64K)) -#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) -static inline int LZ4_compress64kCtx(void** ctx, - const char* source, - char* dest, - int isize, - int maxOutputSize) -{ -#if HEAPMODE - struct refTables *srt = (struct refTables *) (*ctx); - U16* HashTable; -#else - U16 HashTable[HASH64KTABLESIZE] = {0}; -#endif - - const BYTE* ip = (BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const base = ip; - const BYTE* const iend = ip + isize; - const BYTE* const mflimit = iend - MFLIMIT; -#define matchlimit (iend - LASTLITERALS) - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; - - int len, length; - const int skipStrength = SKIPSTRENGTH; - U32 forwardH; - - - // Init - if (isize<MINLENGTH) goto _last_literals; -#if HEAPMODE - if (*ctx == NULL) - { - srt = (struct refTables *) malloc ( sizeof(struct refTables) ); - *ctx = (void*) srt; - } - HashTable = (U16*)(srt->hashTable); - memset((void*)HashTable, 0, sizeof(srt->hashTable)); -#else - (void) ctx; -#endif - - - // First Byte - ip++; forwardH = LZ4_HASH64K_VALUE(ip); - - // Main Loop - for ( ; ; ) - { - int findMatchAttempts = (1U << skipStrength) + 3; - const BYTE* forwardIp = ip; - const BYTE* ref; - BYTE* token; - - // Find a match - do { - U32 h = forwardH; - int step = findMatchAttempts++ >> skipStrength; - ip = forwardIp; - forwardIp = ip + step; - - if (forwardIp > mflimit) { goto _last_literals; } - - forwardH = LZ4_HASH64K_VALUE(forwardIp); - ref = base + HashTable[h]; - HashTable[h] = (U16)(ip - base); - - } while (A32(ref) != A32(ip)); - - // Catch up - while ((ip>anchor) && (ref>(BYTE*)source) && (ip[-1]==ref[-1])) { ip--; ref--; } - - // Encode Literal length - length = (int)(ip - anchor); - token = op++; - if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit -#ifdef _MSC_VER - if (length>=(int)RUN_MASK) - { - int len = length-RUN_MASK; - *token=(RUN_MASK<<ML_BITS); - if (len>254) - { - do { *op++ = 255; len -= 255; } while (len>254); - *op++ = (BYTE)len; - memcpy(op, anchor, length); - op += length; - goto _next_match; - } - else - *op++ = (BYTE)len; - } - else *token = (length<<ML_BITS); -#else - if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } - else *token = (length<<ML_BITS); -#endif - - // Copy Literals - LZ4_BLINDCOPY(anchor, op, length); - -_next_match: - // Encode Offset - LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref)); - - // Start Counting - ip+=MINMATCH; ref+=MINMATCH; // MinMatch verified - anchor = ip; - while (ip<matchlimit-(STEPSIZE-1)) - { - UARCH diff = AARCH(ref) ^ AARCH(ip); - if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; } - ip += LZ4_NbCommonBytes(diff); - goto _endCount; - } - if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; } - if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; } - if ((ip<matchlimit) && (*ref == *ip)) ip++; -_endCount: - - // Encode MatchLength - len = (int)(ip - anchor); - if unlikely(op + (1 + LASTLITERALS) + (len>>8) > oend) return 0; // Check output limit - if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *op++ = 255; *op++ = 255; } if (len > 254) { len-=255; *op++ = 255; } *op++ = (BYTE)len; } - else *token += len; - - // Test end of chunk - if (ip > mflimit) { anchor = ip; break; } - - // Fill table - HashTable[LZ4_HASH64K_VALUE(ip-2)] = (U16)(ip - 2 - base); - - // Test next position - ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; - HashTable[LZ4_HASH64K_VALUE(ip)] = (U16)(ip - base); - if (A32(ref) == A32(ip)) { token = op++; *token=0; goto _next_match; } - - // Prepare next loop - anchor = ip++; - forwardH = LZ4_HASH64K_VALUE(ip); - } - -_last_literals: - // Encode Last Literals - { - int lastRun = (int)(iend - anchor); - if (op + lastRun + 1 + (lastRun-RUN_MASK+255)/255 > oend) return 0; - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (lastRun<<ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend-anchor; - } - - // End - return (int) (((char*)op)-dest); -} - - -int LZ4_compress_limitedOutput(const char* source, - char* dest, - int isize, - int maxOutputSize) -{ -#if HEAPMODE - void* ctx = malloc(sizeof(struct refTables)); - int result; - if (isize < LZ4_64KLIMIT) - result = LZ4_compress64kCtx(&ctx, source, dest, isize, maxOutputSize); - else result = LZ4_compressCtx(&ctx, source, dest, isize, maxOutputSize); - free(ctx); - return result; -#else - if (isize < (int)LZ4_64KLIMIT) return LZ4_compress64kCtx(NULL, source, dest, isize, maxOutputSize); - return LZ4_compressCtx(NULL, source, dest, isize, maxOutputSize); -#endif -} - - -int LZ4_compress(const char* source, - char* dest, - int isize) -{ - return LZ4_compress_limitedOutput(source, dest, isize, LZ4_compressBound(isize)); -} - - - - -//**************************** -// Decompression functions -//**************************** - -// Note : The decoding functions LZ4_uncompress() and LZ4_uncompress_unknownOutputSize() -// are safe against "buffer overflow" attack type. -// They will never write nor read outside of the provided output buffers. -// LZ4_uncompress_unknownOutputSize() also insures that it will never read outside of the input buffer. -// A corrupted input will produce an error result, a negative int, indicating the position of the error within input stream. - -int LZ4_uncompress(const char* source, - char* dest, - int osize) -{ - // Local Variables - const BYTE* restrict ip = (const BYTE*) source; - const BYTE* ref; - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + osize; - BYTE* cpy; - - unsigned token; - - size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 - size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; -#endif - - - // Main Loop - while (1) - { - size_t length; - - // get runlength - token = *ip++; - if ((length=(token>>ML_BITS)) == RUN_MASK) { size_t len; for (;(len=*ip++)==255;length+=255){} length += len; } - - // copy literals - cpy = op+length; - if (cpy>oend-COPYLENGTH) - { - if (cpy != oend) goto _output_error; // Error : not enough place for another match (min 4) + 5 literals - memcpy(op, ip, length); - ip += length; - break; // EOF - } - LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; - - // get offset - LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; - if unlikely(ref < (BYTE* const)dest) goto _output_error; // Error : offset outside destination buffer - - // get matchlength - if ((length=(token&ML_MASK)) == ML_MASK) { for (;*ip==255;length+=255) {ip++;} length += *ip++; } - - // copy repeated sequence - if unlikely((op-ref)<STEPSIZE) - { -#if LZ4_ARCH64 - size_t dec64 = dec64table[op-ref]; -#else - const int dec64 = 0; -#endif - op[0] = ref[0]; - op[1] = ref[1]; - op[2] = ref[2]; - op[3] = ref[3]; - op += 4, ref += 4; ref -= dec32table[op-ref]; - A32(op) = A32(ref); - op += STEPSIZE-4; ref -= dec64; - } else { LZ4_COPYSTEP(ref,op); } - cpy = op + length - (STEPSIZE-4); - - if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4)) - { - if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals - LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); - while(op<cpy) *op++=*ref++; - op=cpy; - continue; - } - - LZ4_WILDCOPY(ref, op, cpy); - op=cpy; // correction - } - - // end of decoding - return (int) (((char*)ip)-source); - - // write overflow error detected -_output_error: - return (int) (-(((char*)ip)-source)); -} - - -int LZ4_uncompress_unknownOutputSize( - const char* source, - char* dest, - int isize, - int maxOutputSize) -{ - // Local Variables - const BYTE* restrict ip = (const BYTE*) source; - const BYTE* const iend = ip + isize; - const BYTE* ref; - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + maxOutputSize; - BYTE* cpy; - - size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 - size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; -#endif - - - // Special case - if unlikely(ip==iend) goto _output_error; // A correctly formed null-compressed LZ4 must have at least one byte (token=0) - - // Main Loop - while (1) - { - unsigned token; - size_t length; - - // get runlength - token = *ip++; - if ((length=(token>>ML_BITS)) == RUN_MASK) - { - int s=255; - while (likely(ip<iend) && (s==255)) { s=*ip++; length += s; } - } - - // copy literals - cpy = op+length; - if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) - { - if (cpy > oend) goto _output_error; // Error : writes beyond output buffer - if (ip+length != iend) goto _output_error; // Error : LZ4 format requires to consume all input at this stage (no match within the last 11 bytes, and at least 8 remaining input bytes for another match+literals) - memcpy(op, ip, length); - op += length; - break; // Necessarily EOF, due to parsing restrictions - } - LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; - - // get offset - LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; - if unlikely(ref < (BYTE* const)dest) goto _output_error; // Error : offset outside of destination buffer - - // get matchlength - if ((length=(token&ML_MASK)) == ML_MASK) - { - while likely(ip<iend-(LASTLITERALS+1)) // Error : a minimum input bytes must remain for LASTLITERALS + token - { - int s = *ip++; - length +=s; - if (s==255) continue; - break; - } - } - - // copy repeated sequence - if unlikely(op-ref<STEPSIZE) - { -#if LZ4_ARCH64 - size_t dec64 = dec64table[op-ref]; -#else - const int dec64 = 0; -#endif - op[0] = ref[0]; - op[1] = ref[1]; - op[2] = ref[2]; - op[3] = ref[3]; - op += 4, ref += 4; ref -= dec32table[op-ref]; - A32(op) = A32(ref); - op += STEPSIZE-4; ref -= dec64; - } else { LZ4_COPYSTEP(ref,op); } - cpy = op + length - (STEPSIZE-4); - - if unlikely(cpy>oend-(COPYLENGTH+(STEPSIZE-4))) - { - if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals - LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); - while(op<cpy) *op++=*ref++; - op=cpy; - continue; - } - - LZ4_WILDCOPY(ref, op, cpy); - op=cpy; // correction - } - - // end of decoding - return (int) (((char*)op)-dest); - - // write overflow error detected -_output_error: - return (int) (-(((char*)ip)-source)); -} - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/thirdparty/lz4-svn/lz4.h ---------------------------------------------------------------------- diff --git a/thirdparty/lz4-svn/lz4.h b/thirdparty/lz4-svn/lz4.h deleted file mode 100644 index 4897eb2..0000000 --- a/thirdparty/lz4-svn/lz4.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - LZ4 - Fast LZ compression algorithm - Header File - Copyright (C) 2011-2012, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ -*/ -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - - -//************************************** -// Compiler Options -//************************************** -#if defined(_MSC_VER) && !defined(__cplusplus) // Visual Studio -# define inline __inline // Visual is not C99, but supports some kind of inline -#endif - - -//**************************** -// Simple Functions -//**************************** - -int LZ4_compress (const char* source, char* dest, int isize); -int LZ4_uncompress (const char* source, char* dest, int osize); - -/* -LZ4_compress() : - Compresses 'isize' bytes from 'source' into 'dest'. - Destination buffer must be already allocated, - and must be sized to handle worst cases situations (input data not compressible) - Worst case size evaluation is provided by function LZ4_compressBound() - - isize : is the input size. Max supported value is ~1.9GB - return : the number of bytes written in buffer dest - - -LZ4_uncompress() : - osize : is the output size, therefore the original size - return : the number of bytes read in the source buffer - If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction - This function never writes outside of provided buffers, and never modifies input buffer. - note : destination buffer must be already allocated. - its size must be a minimum of 'osize' bytes. -*/ - - -//**************************** -// Advanced Functions -//**************************** - -static inline int LZ4_compressBound(int isize) { return ((isize) + ((isize)/255) + 16); } -#define LZ4_COMPRESSBOUND( isize) ((isize) + ((isize)/255) + 16) - -/* -LZ4_compressBound() : - Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible) - primarily useful for memory allocation of output buffer. - inline function is recommended for the general case, - but macro is also provided when results need to be evaluated at compile time (such as table size allocation). - - isize : is the input size. Max supported value is ~1.9GB - return : maximum output size in a "worst case" scenario - note : this function is limited by "int" range (2^31-1) -*/ - - -int LZ4_compress_limitedOutput (const char* source, char* dest, int isize, int maxOutputSize); - -/* -LZ4_compress_limitedOutput() : - Compress 'isize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'. - If it cannot achieve it, compression will stop, and result of the function will be zero. - This function never writes outside of provided output buffer. - - isize : is the input size. Max supported value is ~1.9GB - maxOutputSize : is the size of the destination buffer (which must be already allocated) - return : the number of bytes written in buffer 'dest' - or 0 if the compression fails -*/ - - -int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); - -/* -LZ4_uncompress_unknownOutputSize() : - isize : is the input size, therefore the compressed size - maxOutputSize : is the size of the destination buffer (which must be already allocated) - return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) - If the source stream is malformed, the function will stop decoding and return a negative result, indicating the byte position of the faulty instruction - This function never writes beyond dest + maxOutputSize, and is therefore protected against malicious data packets - note : Destination buffer must be already allocated. - This version is slightly slower than LZ4_uncompress() -*/ - - -#if defined (__cplusplus) -} -#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/thirdparty/lz4-svn/lz4hc.c ---------------------------------------------------------------------- diff --git a/thirdparty/lz4-svn/lz4hc.c b/thirdparty/lz4-svn/lz4hc.c deleted file mode 100644 index 7324492..0000000 --- a/thirdparty/lz4-svn/lz4hc.c +++ /dev/null @@ -1,730 +0,0 @@ -/* - LZ4 HC - High Compression Mode of LZ4 - Copyright (C) 2011-2012, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ -*/ - - -//************************************** -// CPU Feature Detection -//************************************** -// 32 or 64 bits ? -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode -# define LZ4_ARCH64 1 -#else -# define LZ4_ARCH64 0 -#endif - -// Little Endian or Big Endian ? -// Overwrite the #define below if you know your architecture endianess -#if defined (__GLIBC__) -# include <endian.h> -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LZ4_BIG_ENDIAN 1 -# endif -#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) -# define LZ4_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) -# define LZ4_BIG_ENDIAN 1 -#else -// Little Endian assumed. PDP Endian and other very rare endian format are unsupported. -#endif - -// Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected -// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance -#if defined(__ARM_FEATURE_UNALIGNED) -# define LZ4_FORCE_UNALIGNED_ACCESS 1 -#endif - -// Define this parameter if your target system or compiler does not support hardware bit count -#if defined(_MSC_VER) && defined(_WIN32_WCE) // Visual Studio for Windows CE does not support Hardware bit count -# define LZ4_FORCE_SW_BITCOUNT -#endif - - -//************************************** -// Compiler Options -//************************************** -#if __STDC_VERSION__ >= 199901L // C99 - /* "restrict" is a known keyword */ -#else -# define restrict // Disable restrict -#endif - -#ifdef _MSC_VER -# define inline __inline // Visual is not C99, but supports some kind of inline -# define forceinline __forceinline -# include <intrin.h> // For Visual 2005 -# if LZ4_ARCH64 // 64-bit -# pragma intrinsic(_BitScanForward64) // For Visual 2005 -# pragma intrinsic(_BitScanReverse64) // For Visual 2005 -# else -# pragma intrinsic(_BitScanForward) // For Visual 2005 -# pragma intrinsic(_BitScanReverse) // For Visual 2005 -# endif -#else -# ifdef __GNUC__ -# define forceinline inline __attribute__((always_inline)) -# else -# define forceinline inline -# endif -#endif - -#ifdef _MSC_VER // Visual Studio -#define lz4_bswap16(x) _byteswap_ushort(x) -#else -#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))) -#endif - - -//************************************** -// Includes -//************************************** -#include <stdlib.h> // calloc, free -#include <string.h> // memset, memcpy -#include "lz4hc.h" - -#define ALLOCATOR(s) calloc(1,s) -#define FREEMEM free -#define MEM_INIT memset - - -//************************************** -// Basic Types -//************************************** -#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively -#define BYTE unsigned __int8 -#define U16 unsigned __int16 -#define U32 unsigned __int32 -#define S32 __int32 -#define U64 unsigned __int64 -#else -#include <stdint.h> -#define BYTE uint8_t -#define U16 uint16_t -#define U32 uint32_t -#define S32 int32_t -#define U64 uint64_t -#endif - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack(push, 1) -#endif - -typedef struct _U16_S { U16 v; } U16_S; -typedef struct _U32_S { U32 v; } U32_S; -typedef struct _U64_S { U64 v; } U64_S; - -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack(pop) -#endif - -#define A64(x) (((U64_S *)(x))->v) -#define A32(x) (((U32_S *)(x))->v) -#define A16(x) (((U16_S *)(x))->v) - - -//************************************** -// Constants -//************************************** -#define MINMATCH 4 - -#define DICTIONARY_LOGSIZE 16 -#define MAXD (1<<DICTIONARY_LOGSIZE) -#define MAXD_MASK ((U32)(MAXD - 1)) -#define MAX_DISTANCE (MAXD - 1) - -#define HASH_LOG (DICTIONARY_LOGSIZE-1) -#define HASHTABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASHTABLESIZE - 1) - -#define MAX_NB_ATTEMPTS 256 - -#define ML_BITS 4 -#define ML_MASK (size_t)((1U<<ML_BITS)-1) -#define RUN_BITS (8-ML_BITS) -#define RUN_MASK ((1U<<RUN_BITS)-1) - -#define COPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH+MINMATCH) -#define MINLENGTH (MFLIMIT+1) -#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) - - -//************************************** -// Architecture-specific macros -//************************************** -#if LZ4_ARCH64 // 64-bit -#define STEPSIZE 8 -#define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; -#define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) -#define UARCH U64 -#define AARCH A64 -#define HTYPE U32 -#define INITBASE(b,s) const BYTE* const b = s -#else // 32-bit -#define STEPSIZE 4 -#define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; -#define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); -#define UARCH U32 -#define AARCH A32 -#define HTYPE const BYTE* -#define INITBASE(b,s) const int b = 0 -#endif - -#if defined(LZ4_BIG_ENDIAN) -#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } -#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; } -#else // Little Endian -#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } -#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } -#endif - - -//************************************************************ -// Local Types -//************************************************************ -typedef struct -{ - const BYTE* base; - HTYPE hashTable[HASHTABLESIZE]; - U16 chainTable[MAXD]; - const BYTE* nextToUpdate; -} LZ4HC_Data_Structure; - - -//************************************** -// Macros -//************************************** -#define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e); -#define LZ4_BLINDCOPY(s,d,l) { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; } -#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG)) -#define HASH_VALUE(p) HASH_FUNCTION(A32(p)) -#define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base) -#define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK] -#define GETNEXT(p) ((p) - (size_t)DELTANEXT(p)) - - -//************************************** -// Private functions -//************************************** -#if LZ4_ARCH64 - -inline static int LZ4_NbCommonBytes (register U64 val) -{ -#if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll(val) >> 3); - #else - int r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll(val) >> 3); - #else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; - #endif -#endif -} - -#else - -inline static int LZ4_NbCommonBytes (register U32 val) -{ -#if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanReverse( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz(val) >> 3); - #else - int r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; - #endif -#else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, val ); - return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz(val) >> 3); - #else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; - #endif -#endif -} - -#endif - - -inline static int LZ4HC_Init (LZ4HC_Data_Structure* hc4, const BYTE* base) -{ - MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); - MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); - hc4->nextToUpdate = base + LZ4_ARCH64; - hc4->base = base; - return 1; -} - - -inline static void* LZ4HC_Create (const BYTE* base) -{ - void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure)); - - LZ4HC_Init ((LZ4HC_Data_Structure*)hc4, base); - return hc4; -} - - -inline static int LZ4HC_Free (void** LZ4HC_Data) -{ - FREEMEM(*LZ4HC_Data); - *LZ4HC_Data = NULL; - return (1); -} - - -// Update chains up to ip (excluded) -forceinline static void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip) -{ - U16* chainTable = hc4->chainTable; - HTYPE* HashTable = hc4->hashTable; - INITBASE(base,hc4->base); - - while(hc4->nextToUpdate < ip) - { - const BYTE* p = hc4->nextToUpdate; - size_t delta = (p) - HASH_POINTER(p); - if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; - DELTANEXT(p) = (U16)delta; - HashTable[HASH_VALUE(p)] = (p) - base; - hc4->nextToUpdate++; - } -} - - -forceinline static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit) -{ - const BYTE* p1t = p1; - - while (p1t<matchlimit-(STEPSIZE-1)) - { - UARCH diff = AARCH(p2) ^ AARCH(p1t); - if (!diff) { p1t+=STEPSIZE; p2+=STEPSIZE; continue; } - p1t += LZ4_NbCommonBytes(diff); - return (p1t - p1); - } - if (LZ4_ARCH64) if ((p1t<(matchlimit-3)) && (A32(p2) == A32(p1t))) { p1t+=4; p2+=4; } - if ((p1t<(matchlimit-1)) && (A16(p2) == A16(p1t))) { p1t+=2; p2+=2; } - if ((p1t<matchlimit) && (*p2 == *p1t)) p1t++; - return (p1t - p1); -} - - -forceinline static int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos) -{ - U16* const chainTable = hc4->chainTable; - HTYPE* const HashTable = hc4->hashTable; - const BYTE* ref; - INITBASE(base,hc4->base); - int nbAttempts=MAX_NB_ATTEMPTS; - size_t repl=0, ml=0; - U16 delta; - - // HC4 match finder - LZ4HC_Insert(hc4, ip); - ref = HASH_POINTER(ip); - -#define REPEAT_OPTIMIZATION -#ifdef REPEAT_OPTIMIZATION - // Detect repetitive sequences of length <= 4 - if (ref >= ip-4) // potential repetition - { - if (A32(ref) == A32(ip)) // confirmed - { - delta = (U16)(ip-ref); - repl = ml = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - *matchpos = ref; - } - ref = GETNEXT(ref); - } -#endif - - while ((ref >= ip-MAX_DISTANCE) && (nbAttempts)) - { - nbAttempts--; - if (*(ref+ml) == *(ip+ml)) - if (A32(ref) == A32(ip)) - { - size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit) + MINMATCH; - if (mlt > ml) { ml = mlt; *matchpos = ref; } - } - ref = GETNEXT(ref); - } - -#ifdef REPEAT_OPTIMIZATION - // Complete table - if (repl) - { - const BYTE* ptr = ip; - const BYTE* end; - - end = ip + repl - (MINMATCH-1); - while(ptr < end-delta) - { - DELTANEXT(ptr) = delta; // Pre-Load - ptr++; - } - do - { - DELTANEXT(ptr) = delta; - HashTable[HASH_VALUE(ptr)] = (ptr) - base; // Head of chain - ptr++; - } while(ptr < end); - hc4->nextToUpdate = end; - } -#endif - - return (int)ml; -} - - -forceinline static int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos) -{ - U16* const chainTable = hc4->chainTable; - HTYPE* const HashTable = hc4->hashTable; - INITBASE(base,hc4->base); - const BYTE* ref; - int nbAttempts = MAX_NB_ATTEMPTS; - int delta = (int)(ip-startLimit); - - // First Match - LZ4HC_Insert(hc4, ip); - ref = HASH_POINTER(ip); - - while ((ref >= ip-MAX_DISTANCE) && (nbAttempts)) - { - nbAttempts--; - if (*(startLimit + longest) == *(ref - delta + longest)) - if (A32(ref) == A32(ip)) - { -#if 1 - const BYTE* reft = ref+MINMATCH; - const BYTE* ipt = ip+MINMATCH; - const BYTE* startt = ip; - - while (ipt<matchlimit-(STEPSIZE-1)) - { - UARCH diff = AARCH(reft) ^ AARCH(ipt); - if (!diff) { ipt+=STEPSIZE; reft+=STEPSIZE; continue; } - ipt += LZ4_NbCommonBytes(diff); - goto _endCount; - } - if (LZ4_ARCH64) if ((ipt<(matchlimit-3)) && (A32(reft) == A32(ipt))) { ipt+=4; reft+=4; } - if ((ipt<(matchlimit-1)) && (A16(reft) == A16(ipt))) { ipt+=2; reft+=2; } - if ((ipt<matchlimit) && (*reft == *ipt)) ipt++; -_endCount: - reft = ref; -#else - // Easier for code maintenance, but unfortunately slower too - const BYTE* startt = ip; - const BYTE* reft = ref; - const BYTE* ipt = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, ref+MINMATCH, matchlimit); -#endif - - while ((startt>startLimit) && (reft > hc4->base) && (startt[-1] == reft[-1])) {startt--; reft--;} - - if ((ipt-startt) > longest) - { - longest = (int)(ipt-startt); - *matchpos = reft; - *startpos = startt; - } - } - ref = GETNEXT(ref); - } - - return longest; -} - - -forceinline static int LZ4_encodeSequence(const BYTE** ip, BYTE** op, const BYTE** anchor, int ml, const BYTE* ref) -{ - int length, len; - BYTE* token; - - // Encode Literal length - length = (int)(*ip - *anchor); - token = (*op)++; - if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } - else *token = (length<<ML_BITS); - - // Copy Literals - LZ4_BLINDCOPY(*anchor, *op, length); - - // Encode Offset - LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref)); - - // Encode MatchLength - len = (int)(ml-MINMATCH); - if (len>=(int)ML_MASK) { *token+=ML_MASK; len-=ML_MASK; for(; len > 509 ; len-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (len > 254) { len-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)len; } - else *token += len; - - // Prepare next loop - *ip += ml; - *anchor = *ip; - - return 0; -} - - -//**************************** -// Compression CODE -//**************************** - -int LZ4_compressHCCtx(LZ4HC_Data_Structure* ctx, - const char* source, - char* dest, - int isize) -{ - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + isize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); - - BYTE* op = (BYTE*) dest; - - int ml, ml2, ml3, ml0; - const BYTE* ref=NULL; - const BYTE* start2=NULL; - const BYTE* ref2=NULL; - const BYTE* start3=NULL; - const BYTE* ref3=NULL; - const BYTE* start0; - const BYTE* ref0; - - ip++; - - // Main Loop - while (ip < mflimit) - { - ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref)); - if (!ml) { ip++; continue; } - - // saved, in case we would skip too much - start0 = ip; - ref0 = ref; - ml0 = ml; - -_Search2: - if (ip+ml < mflimit) - ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2); - else ml2 = ml; - - if (ml2 == ml) // No better match - { - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); - continue; - } - - if (start0 < ip) - { - if (start2 < ip + ml0) // empirical - { - ip = start0; - ref = ref0; - ml = ml0; - } - } - - // Here, start0==ip - if ((start2 - ip) < 3) // First Match too small : removed - { - ml = ml2; - ip = start2; - ref =ref2; - goto _Search2; - } - -_Search3: - // Currently we have : - // ml2 > ml1, and - // ip1+3 <= ip2 (usually < ip1+ml1) - if ((start2 - ip) < OPTIMAL_ML) - { - int correction; - int new_ml = ml; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) - - if (start2 + ml2 < mflimit) - ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3); - else ml3 = ml2; - - if (ml3 == ml2) // No better match : 2 sequences to encode - { - // ip & ref are known; Now for ml - if (start2 < ip+ml) ml = (int)(start2 - ip); - // Now, encode 2 sequences - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); - ip = start2; - LZ4_encodeSequence(&ip, &op, &anchor, ml2, ref2); - continue; - } - - if (start3 < ip+ml+3) // Not enough space for match 2 : remove it - { - if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 - { - if (start2 < ip+ml) - { - int correction = (int)(ip+ml - start2); - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) - { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } - - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); - ip = start3; - ref = ref3; - ml = ml3; - - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; - } - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; - } - - // OK, now we have 3 ascending matches; let's write at least the first one - // ip & ref are known; Now for ml - if (start2 < ip+ml) - { - if ((start2 - ip) < (int)ML_MASK) - { - int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - else - { - ml = (int)(start2 - ip); - } - } - LZ4_encodeSequence(&ip, &op, &anchor, ml, ref); - - ip = start2; - ref = ref2; - ml = ml2; - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - - goto _Search3; - - } - - // Encode Last Literals - { - int lastRun = (int)(iend - anchor); - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } - else *op++ = (lastRun<<ML_BITS); - memcpy(op, anchor, iend - anchor); - op += iend-anchor; - } - - // End - return (int) (((char*)op)-dest); -} - - -int LZ4_compressHC(const char* source, - char* dest, - int isize) -{ - void* ctx = LZ4HC_Create((const BYTE*)source); - int result = LZ4_compressHCCtx(ctx, source, dest, isize); - LZ4HC_Free (&ctx); - - return result; -} - - http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/thirdparty/lz4-svn/lz4hc.h ---------------------------------------------------------------------- diff --git a/thirdparty/lz4-svn/lz4hc.h b/thirdparty/lz4-svn/lz4hc.h deleted file mode 100644 index cb74689..0000000 --- a/thirdparty/lz4-svn/lz4hc.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - LZ4 HC - High Compression Mode of LZ4 - Header File - Copyright (C) 2011-2012, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ -*/ -#pragma once - - -#if defined (__cplusplus) -extern "C" { -#endif - - -int LZ4_compressHC (const char* source, char* dest, int isize); - -/* -LZ4_compressHC : - return : the number of bytes in compressed buffer dest - note : destination buffer must be already allocated. - To avoid any problem, size it to handle worst cases situations (input data not compressible) - Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h") -*/ - - -/* Note : -Decompression functions are provided within regular LZ4 source code (see "lz4.h") (BSD license) -*/ - - -#if defined (__cplusplus) -} -#endif http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/0149a7af/thirdparty/versions.sh ---------------------------------------------------------------------- diff --git a/thirdparty/versions.sh b/thirdparty/versions.sh index dcce196..72f1156 100644 --- a/thirdparty/versions.sh +++ b/thirdparty/versions.sh @@ -1,6 +1,11 @@ -SNAPPY_VERSION=1.1.1 -SNAPPY_DIR=$TP_DIR/snappy-$SNAPPY_VERSION +LZ4_VERSION="r131" +LZ4_URL="https://github.com/Cyan4973/lz4/archive/${LZ4_VERSION}.tar.gz" +LZ4_BASEDIR=lz4-$LZ4_VERSION -LZ4_VERSION=svn -LZ4_DIR=$TP_DIR/lz4-$LZ4_VERSION +SNAPPY_VERSION=1.1.3 +SNAPPY_URL="https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz" +SNAPPY_BASEDIR=snappy-$SNAPPY_VERSION +THRIFT_VERSION=0.9.1 +THRIFT_URL="http://archive.apache.org/dist/thrift/${THRIFT_VERSION}/thrift-${THRIFT_VERSION}.tar.gz" +THRIFT_BASEDIR=thrift-$THRIFT_VERSION
