commit tensorflow for openSUSE:Factory

root Mon, 22 Jul 2019 03:20:31 -0700

Hello community,

here is the log from the commit of package tensorflow for openSUSE:Factory 
checked in at 2019-07-22 12:20:01
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/tensorflow (Old)
 and      /work/SRC/openSUSE:Factory/.tensorflow.new.4126 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "tensorflow"

Mon Jul 22 12:20:01 2019 rev:5 rq:716088 version:1.13.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/tensorflow/tensorflow.changes    2019-01-21 
10:51:01.095970939 +0100
+++ /work/SRC/openSUSE:Factory/.tensorflow.new.4126/tensorflow.changes  
2019-07-22 12:20:09.555668006 +0200
@@ -1,0 +2,93 @@
+Wed Jul 17 08:18:34 UTC 2019 - Christian Goll <[email protected]>
+
+- fixed installation location of shared library 
+
+-------------------------------------------------------------------
+Mon Jul  8 14:04:17 UTC 2019 - Christian Goll <[email protected]>
+
+- removed bazel mirror from as much source links as possible 
+- added support-new-bazel.patch support newer upcoming bazel
+  versions
+
+-------------------------------------------------------------------
+Tue Jun  4 14:16:10 UTC 2019 - Guillaume GARDET <[email protected]>
+
+- Fix build for lite flavor:
+  * tensorflow-fix_lite.patch
+
+-------------------------------------------------------------------
+Wed May 29 16:11:36 UTC 2019 - Guillaume GARDET <[email protected]>
+
+- Call ldconfig for devel package in post/postun
+
+-------------------------------------------------------------------
+Mon May 27 15:00:28 UTC 2019 - Guillaume GARDET <[email protected]>
+
+- Fix aarch64 build with upstream patch:
+  * tensorflow-make_aws_sdk_work_on_aarch64.patch
+
+-------------------------------------------------------------------
+Mon May 27 04:08:54 UTC 2019 - Guillaume GARDET <[email protected]>
+
+- Add Lite flavor
+
+-------------------------------------------------------------------
+Fri Apr 26 08:27:55 UTC 2019 - Christian Goll <[email protected]>
+
+- updated to 1.13.1 fixes boo#1133490 
+
+-------------------------------------------------------------------
+Fri Mar 29 13:06:28 UTC 2019 - Guillaume GARDET <[email protected]>
+
+- Update _constraints to avoid OOM errors
+
+-------------------------------------------------------------------
+Fri Mar 29 08:18:09 UTC 2019 - Guillaume GARDET <[email protected]>
+
+- Build and package libtensorflow_cc and libtensorflow_framework
+
+-------------------------------------------------------------------
+Tue Mar 19 15:40:25 UTC 2019 - Christian Goll <[email protected]>
+
+- added fix_mvapich_mpi_bzl.patch which fixes detection of 
+  mvapich2 mpi library
+- fixed python3 build
+
+-------------------------------------------------------------------
+Tue Mar 12 20:33:56 UTC 2019 - Adrian Schröter <[email protected]>
+
+- update to version 1.13.1
+  * Major Features and Improvements
+  * TensorFlow Lite has moved from contrib to core. This means that Python 
modules are under tf.lite and source code is now under tensorflow/lite rather 
than tensorflow/contrib/lite.
+  * TensorFlow GPU binaries are now built against CUDA 10 and TensorRT 5.0.
+  * Support for Python3.7 on all operating systems.
+  * Moved NCCL to core.
+- drop merged patch mpilibpath_configure_py.patch
+- drop obsolete pyton3.7 patches
+- disabled jemalloc for now
+
+-------------------------------------------------------------------
+Tue Feb 12 08:39:57 UTC 2019 - [email protected]
+
+- enabled aws and googlecloud support
+  * removed no_aws_and_googlecloud.patch
+
+-------------------------------------------------------------------
+Mon Feb 11 16:27:20 UTC 2019 - Christian Goll <[email protected]>
+
+- Fixed build issues with python 3.7 what introduced the patches
+  * python3_7_compatibility.patch backported from upstream
+  * python3.7_unicode.patch fixes a minor function call
+  * python3.7_async_keyword.patch avoids the new keyword async
+
+-------------------------------------------------------------------
+Thu Jan 31 11:44:21 UTC 2019 - Bernhard Wiedemann <[email protected]>
+
+- Fix build with python 3.7
+
+-------------------------------------------------------------------
+Fri Jan 18 16:45:48 UTC 2019 - Guillaume GARDET <[email protected]>
+
+- Build and package libtensorflow.so as some packages may link to it
+
+-------------------------------------------------------------------

Old:
----
  mpilibpath_configure_py.patch
  no_aws_and_googlecloud.patch
  protobuf_v3.6.0.tar.gz
  re2-2018-04-01.tar.gz
  tensorflow-1.10.0.tar.gz

New:
----
  816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz
  aws-sdk-cpp-1.3.15.tar.gz
  bazel-toolchains.tar.gz
  fft.tgz
  fix_mvapich_mpi_bzl.patch
  google-cloud-cpp.tar.gz
  google-flatbuffers-1.10.0~pre.tar.gz
  google-nsync-1.20.1.tar.gz
  grpc.tar.gz
  kafka-v0.11.5.tar.gz
  keras-applications-1.0.6.tar.gz
  keras-preprocessing-1.0.9.tar.gz
  license.rst.txt
  master.zip
  nanopb.tar.gz
  protobuf_v3.6.1.2.tar.gz
  re2-2018-10-01.tar.gz
  release-1.8.0.tar.gz
  rules_docker.tar.gz
  support-new-bazel.patch
  tensorflow-1.13.1.tar.gz
  tensorflow-fix_lite.patch
  tensorflow-make_aws_sdk_work_on_aarch64.patch
  unicode-org-icu.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ tensorflow.spec ++++++
--- /var/tmp/diff_new_pack.3GoBTM/_old  2019-07-22 12:20:14.283666766 +0200
+++ /var/tmp/diff_new_pack.3GoBTM/_new  2019-07-22 12:20:14.287666765 +0200
@@ -18,18 +18,29 @@
 
 #
 %define pname tensorflow
-%define vers 1.10.0
-%define _vers 1_10_10
-%define python_ver_hack python3.6
+%define vers 1.13.1
+%define _vers 1_13_1
+%define python_ver_hack python3.[0-9]
 
 %global flavor @BUILD_FLAVOR@%{nil}
 
+# Build tensorflow, not Tensorflow-lite
+%define is_lite 0
+
 %if "%{flavor}" == "standard"
 %bcond_with cuda
 %bcond_with mpi
 %bcond_with opencl
 %endif
 
+%if "%{flavor}" == "lite"
+%define is_lite 1
+%bcond_with cuda
+%bcond_with mpi
+%bcond_with opencl
+%define package_suffix -lite
+%endif
+
 %if "%{flavor}" == "hpc"
 %bcond_with cuda
 %bcond_with mpi
@@ -129,41 +140,48 @@
 %{!?compiler_family:%global compiler_family gnu}
 %{hpc_init -c %compiler_family %{?with_mpi:-m %mpi_flavor} %{?c_f_ver:-v 
%{c_f_ver}} %{?mpi_ver:-V %{mpi_ver}} %{?ext:-e %{ext}}}
 %{?with_mpi:%global hpc_module_pname p%{pname}}
+%define python_flavor python3
 %define package_name   %{hpc_package_name %_vers}
 %define libname(l:s:)   lib%{pname}%{-l*}%{hpc_package_name_tail %{?_vers}}
 %define package_python_sitearch %hpc_python_sitearch
 %define package_python_sitelib 
%{hpc_prefix}/lib64/%{python_ver_hack}/site-packages/
 %define package_prefix %hpc_prefix
 %define package_bindir %hpc_bindir
+%define package_libdir %hpc_libdir
 %else
 %define package_name   %pname%{?package_suffix}
 %define package_python_sitearch %{python3_sitearch}
 %define package_python_sitelib %{python3_sitelib} 
 %define package_prefix %_prefix
 %define package_bindir %_bindir
+%define package_libdir %_libdir
 %define libname(l:s:)   lib%{pname}%{!-l:%{-s:-}}%{-l*}%{-s*}%{?package_suffix}
 %endif
 
 Name:           %{package_name}
 Version:        %vers
 Release:        0
-#Release:        1%{?config_dependant}%{?dist}
 Summary:        A framework used for deep learning
 License:        Apache-2.0 AND BSD-2-Clause AND BSD-3-Clause AND FSFUL AND MIT 
AND MPL-2.0 AND OpenSSL AND Python-2.0
 Group:          Development/Languages/Python
 Url:            https://www.tensorflow.org/
 Source0:        
https://github.com/tensorflow/tensorflow/archive/v%{version}.tar.gz#/tensorflow-%{version}.tar.gz
 Source1:        tensorflow-rpmlintrc
+# IMPORTANT
+# although some of the following libraries are available in factory they could
+# not be used as 
+#   * explicit versions are needed which differ from the factory ones
+#   * bazel and the obs version have different symbols due to hidden compiler 
flags
 # License10: Apache-2.0
 Source10:       
https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz#/rules_closure.tar.gz
 # License11: BSD-3-Clause
-Source11:       
https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz#/protobuf_v3.6.0.tar.gz
+Source11:       
https://github.com/protocolbuffers/protobuf/archive/v3.6.1.2.tar.gz#/protobuf_v3.6.1.2.tar.gz
 # License12:  Python-2.0
 Source12:       
https://pypi.python.org/packages/bc/cc/3cdb0a02e7e96f6c70bd971bc8a90b8463fda83e264fa9c5c1c98ceabd81/backports.weakref-1.0rc1.tar.gz#/backports.weakref-1.0rc1.tar.gz
 # License13: BSD-3-Clause
 Source13:       
https://github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip#/double_conversion.zip
 # License14: BSD-3-Clause
-Source14:       
https://mirror.bazel.build/pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f601e7702726f430e814822b96/gast-0.2.0.tar.gz#/gast-0.2.0.tar.gz
+Source14:       
https://pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f601e7702726f430e814822b96/gast-0.2.0.tar.gz#/gast-0.2.0.tar.gz
 # License15: MIT
 Source15:       
https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz#/farmhash.tar.gz
 # License16: Apache-2.0
@@ -173,37 +191,73 @@
 # License18: BSD-3-Clause
 Source18:       
https://github.com/hfp/libxsmm/archive/1.9.tar.gz#/libxsmm_1.9.tar.gz
 # License19: Apache-2.0
-Source19:       
https://github.com/abseil/abseil-cpp/archive/9613678332c976568272c8f4a78631a29159271d.tar.gz#/abseil-cpp.tar.gz
-# License20: BSD-2-Clause
-# License21: OpenSSL and ISC and Intel
-Source20:       
https://github.com/google/boringssl/archive/a0fb951d2a26a8ee746b52f3ba81ab011a0af778.tar.gz#/boring_ssl.tar.gz
-# License22: Apache-2.0
+Source19:       
https://github.com/abseil/abseil-cpp/archive/389ec3f906f018661a5308458d623d01f96d7b23.tar.gz#/abseil-cpp.tar.gz
+# License20: OpenSSL and ISC and Intel
+Source20:       
https://github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz#/boring_ssl.tar.gz
+# License21: Apache-2.0
 Source21:       
https://github.com/googleapis/googleapis/archive/f81082ea1e2f85c43649bee26e0d9871d4b41cdb.zip#/googleapis.zip
 # License23: Apache-2.0
-Source22:       
https://mirror.bazel.build/github.com/google/flatbuffers/archive/v1.9.0.tar.gz#/flatbuffers_v1.9.0.tar.gz
-# License24: BSD-3-Clause
+Source22:       
https://github.com/google/flatbuffers/archive/v1.9.0.tar.gz#/flatbuffers_v1.9.0.tar.gz
+# License23: BSD-3-Clause
 Source23:       https://github.com/NVlabs/cub/archive/1.8.0.zip#/cub_1.8.0.zip
-# License25: Apache-2.0
+# License24: Apache-2.0
 Source24:       
https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz#/highwayhash.tar.gz
-# License28: Apache-2.0
+# License25: Apache-2.0
 Source25:       
https://github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz#/abseil-pypi-v0.2.2.tar.gz
-# License29: MPL-2.0
+# License26: MPL-2.0
 # NOTE: tensorflow only uses MPL-2.0 part of eigen
-Source26:       
https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz#/eigen.tar.gz
-# License30: BSD-2-Clause
-Source27:       
https://mirror.bazel.build/github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz#/arm_neon_2_x86_sse.tar.gz
+Source26:       
https://bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz#/eigen.tar.gz
+# License27: BSD-2-Clause
+Source27:       
https://github.com/intel/ARM_NEON_2_x86_SSE/archive/1200fe90bb174a6224a525ee60148671a786a71f.tar.gz#/arm_neon_2_x86_sse.tar.gz
 Source28:       
https://mirror.bazel.build/docs.python.org/2.7/_sources/license.txt#/python-license.txt
-# License32: MIT
+# License29: MIT
 Source29:       
https://github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz#/json-cpp-1.8.4.tar.gz
-# License33: FSFUL
+# License30: FSFUL
 Source30:       http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz#/fft.tar.gz
-# Source34: Apache-2.0
+# License31: Apache-2.0
 Source31:       
https://github.com/grpc/grpc/archive/v1.13.0.tar.gz#/grpc-v1.13.0.gz
-# Source35: BSD-3.0
-Source32:       
https://mirror.bazel.build/github.com/google/re2/archive/2018-04-01.tar.gz#/re2-2018-04-01.tar.gz
-# patch the libray search path in configure.py
-Patch0:         mpilibpath_configure_py.patch
-Patch1:         no_aws_and_googlecloud.patch
+# License32: BSD-3.0
+Source32:       
https://github.com/google/re2/archive/2018-10-01.tar.gz#/re2-2018-10-01.tar.gz
+# License33: Apache-2.0
+Source33:       
https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz#/aws-sdk-cpp-1.3.15.tar.gz
+# License34: BSD-3-Clause and Intel
+Source34:       
https://github.com/edenhill/librdkafka/archive/v0.11.5.tar.gz#/kafka-v0.11.5.tar.gz
+# The factory protobuf library has other symbols due to hidden compiler flags
+# License35: Apache-2.0
+Source35:       
https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/v0.4.0.tar.gz#/google-cloud-cpp.tar.gz
+# License36: Apache-2.0
+Source36:       
https://github.com/nlopezgi/bazel-toolchains/archive/3f8c58fe530fedc446de04673bc1e32985887dea.tar.gz#/bazel-toolchains.tar.gz
+# License37: Apache-2.0
+Source37:       
https://github.com/bazelbuild/rules_docker/archive/a9bb1dab84cdf46e34d1b34b53a17bda129b5eba.tar.gz#/rules_docker.tar.gz
+# License38: MIT
+Source38:       
https://github.com/keras-team/keras-preprocessing/archive/1.0.9.tar.gz#/keras-preprocessing-1.0.9.tar.gz
+# License39: MIT
+Source39:       
https://github.com/keras-team/keras-applications/archive/1.0.6.tar.gz#/keras-applications-1.0.6.tar.gz
+# License40: MIT
+Source40:       
https://github.com/google/nsync/archive/1.20.1.tar.gz#/google-nsync-1.20.1.tar.gz
+# License41: Apache-2.0
+# something between 1.16.1 and 1.18~pre
+Source41:       
https://github.com/grpc/grpc/archive/69b6c047bc767b4d80e7af4d00ccb7c45b683dae.tar.gz#/grpc.tar.gz
+# License42: Apache-2.0
+Source42:       
https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz#/google-flatbuffers-1.10.0~pre.tar.gz
+# License43: BSD and ICU License
+Source43:       
https://github.com/unicode-org/icu/archive/release-62-1.tar.gz#/unicode-org-icu.tar.gz
+# License44: BSD like
+Source44:       
https://github.com/nanopb/nanopb/archive/f8ac463766281625ad710900479130c7fcb4d63b.tar.gz#/nanopb.tar.gz
+# License45: Python license itself, do need as sha256b have to match so could 
not use system one
+Source45:       
https://mirror.bazel.build/docs.python.org/2.7/_sources/license.rst.txt
+# Deps sources for Tensorflow-Lite (use same eigen, gemmlowp and abseil_cpp 
packages as non lite version)
+Source100:      
https://github.com/google/googletest/archive/release-1.8.0.tar.gz
+Source101:      https://github.com/intel/ARM_NEON_2_x86_SSE/archive/master.zip
+Source102:      
http://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz
+# Source103:      
http://mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.11.0.tar.gz
+Source104:      http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz
+Patch1:         support-new-bazel.patch
+Patch2:         fix_mvapich_mpi_bzl.patch
+# PATCH-FIX-UPSTREAM https://github.com/tensorflow/tensorflow/pull/22856
+Patch3:         tensorflow-make_aws_sdk_work_on_aarch64.patch
+# PATCH-FIX-OPENSUSE - Use installed flatbuffers lib for Tensorflow-Lite
+Patch4:         tensorflow-fix_lite.patch
 
 Requires:       python3
 Requires:       python3-abseil
@@ -219,11 +273,11 @@
 %else
 Provides:       python3-tensorflow
 %endif
-BuildRequires:  bazel
+BuildRequires:  bazel == 0.19.2
 BuildRequires:  curl
 %if %{with cuda}
 Requires:       cuda-9.0
-BuildRequires   cuda-9.0
+BuildRequires:  cuda-9.0
 %endif
 %if %{with opencl}
 Requires:       Mesa-libOpenCL
@@ -232,11 +286,14 @@
 %endif
 BuildRequires:  curl-devel
 BuildRequires:  fdupes
+%if %{is_lite}
+BuildRequires:  flatbuffers-devel
+%endif
 BuildRequires:  fftw3-devel
 BuildRequires:  gcc-c++
 BuildRequires:  giflib-devel
 #BuildRequires:  grpc-devel >= 1.12
-BuildRequires:  jemalloc-devel
+#BuildRequires:  jemalloc-devel
 BuildRequires:  libjpeg-turbo
 %if 0%{?suse_version} < 1550
 BuildRequires:  libjpeg62-turbo
@@ -251,6 +308,9 @@
 BuildRequires:  pcre-devel
 BuildRequires:  python3
 BuildRequires:  python3-Cython
+BuildRequires:  python3-Keras-Applications
+BuildRequires:  python3-Keras-Preprocessing
+BuildRequires:  python3-astor
 BuildRequires:  python3-base
 BuildRequires:  python3-devel
 BuildRequires:  python3-mock
@@ -266,20 +326,18 @@
 BuildRequires:  unzip
 BuildRequires:  zlib-devel
 %if %{with hpc}
+%hpc_requires
 BuildRequires:  %{compiler_family}%{?c_f_ver}-compilers-hpc-macros-devel
+BuildRequires:  lua-lmod
+BuildRequires:  suse-hpc
 %if %{with mpi}
 BuildRequires:  
%{mpi_flavor}%{?mpi_vers}-%{compiler_family}%{?c_f_ver}-hpc-macros-devel
 %endif
-BuildRequires:  lua-lmod
-BuildRequires:  suse-hpc
-%hpc_requires
 %endif
 
 # just use rpmlint
-# there are some serious compiler warnings, regearding 
no-return-in-nonvoid-function
-BuildRequires:  -post-build-checks
-
-BuildRoot:      %{_tmppath}/%{name}-%{version}-build
+# there are some serious compiler warnings, regarding 
no-return-in-nonvoid-function
+#!BuildRequires:  -post-build-checks
 
 %if "%flavor" == ""
 ExclusiveArch:  do_not_build
@@ -363,40 +421,90 @@
 %makebazelcache %{SOURCE30}
 %makebazelcache %{SOURCE31}
 %makebazelcache %{SOURCE32}
+%makebazelcache %{SOURCE33}
+%makebazelcache %{SOURCE34}
+%makebazelcache %{SOURCE35}
+%makebazelcache %{SOURCE36}
+%makebazelcache %{SOURCE37}
+%makebazelcache %{SOURCE38}
+%makebazelcache %{SOURCE39}
+%makebazelcache %{SOURCE40}
+%makebazelcache %{SOURCE41}
+%makebazelcache %{SOURCE42}
+%makebazelcache %{SOURCE43}
+%makebazelcache %{SOURCE44}
+%makebazelcache %{SOURCE45}
 
 # unpack tensorflow
 
 %setup -q -c -n tensorflow-%{version}
 %sanitize_dir
 pwd
-%patch0 -p 1
 %patch1 -p 1
+%patch2 -p 1
+%patch3 -p 1
+%patch4 -p 1
 
 echo $MPI_DIR
 
+%if %{is_lite}
+mkdir tensorflow/lite/tools/make/downloads/
+pushd tensorflow/lite/tools/make/downloads/
+#  eigen, gemmlowp and abseil_cpp 
+cp %{SOURCE26} %{SOURCE17} %{SOURCE19} .
+mkdir tmp
+tar xzf eigen.tar.gz -C tmp && mv tmp/* eigen
+unzip gemmlowp.zip -d tmp && mv tmp/* gemmlowp
+tar xzf %{SOURCE100} -C tmp && mv tmp/* fgoogletest
+tar xzf abseil-cpp.tar.gz -C tmp && mv tmp/* absl
+unzip %{SOURCE101} -d neon_2_sse
+tar xzf %{SOURCE102} -C tmp && mv tmp/* farmhash
+# We use installed flatbuffers
+# tar xzf %{SOURCE103} -C tmp && mv tmp/* flatbuffers
+tar xzf %{SOURCE104} -C tmp && mv tmp/* fft2d
+# sed fixes from tensorflow/lite/tools/make/download_dependencies.sh
+sed -i -e 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA 
);#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed 
by script#' \
+  "./eigen/Eigen/src/Core/arch/NEON/Complex.h"
+sed -i -e 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA 
);#static uint32x2_t p2ui_CONJ_XOR;// = vld1_u32( conj_XOR_DATA ); - Removed by 
scripts#' \
+  "./eigen/Eigen/src/Core/arch/NEON/Complex.h"
+sed -i -e 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA 
);#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - 
Removed by script#' \
+  "./eigen/Eigen/src/Core/arch/NEON/Complex.h"
+find -name fixedpoint.h
+popd
+%endif
+
 %build
-%limit_build -m 1600
+%limit_build -m 4000
+
+%if %{is_lite}
+make %{?_smp_mflags} -f tensorflow/lite/tools/make/Makefile \
+    $(pwd)/tensorflow/lite/tools/make/gen/linux_$(uname 
-m)/lib/libtensorflow-lite.a \
+    $(pwd)/tensorflow/lite/tools/make/gen/linux_$(uname -m)/bin/minimal 
+# Build of benchmark-lib.a is broken
+%else
 
 %if %{with hpc}
 %hpc_setup
 module load gnu
 %if %{with mpi}
 module load %mpi_flavor
+export MPI_HOME=${MPI_HOME:-$MPI_DIR}
 %endif #mpi
 %endif #hpc
 
 export TEST_TMPDIR=%{bazeldir}
-export PYTHON_LIB_PATH=/usr/lib64/python3.6/site-packages 
+export PYTHON_LIB_PATH=%{python3_sitearch}
 export PYTHON_BIN_PATH=/usr/bin/python3
 export CC_OPT_FLAGS=-O2
 export TF_NEED_JEMALLOC=0 
 export TF_NEED_GCP=0 
-export TF_NEED_HDFS=0
-export TF_NEED_S3=0
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
 export TF_ENABLE_XLA=0 
 export TF_NEED_VERBS=0 
 export TF_NEED_OPENCL=0 
-export 
TF_SYSTEM_LIBS="nasm,jpeg,png_archive,org_sqlite,gif_archive,six_archive,astor_archive,termcolor_archive,pcre,swig,curl,lmdb,zlib_archive,snappy,cython,jemalloc"
+export TF_NEED_ROCM=0
+export 
TF_SYSTEM_LIBS="nasm,jpeg,png_archive,org_sqlite,gif_archive,six_archive,astor_archive,termcolor_archive,pcre,swig,curl,lmdb,zlib_archive,snappy,cython"
 #export 
TF_SYSTEM_LIBS="com_googlesource_code_re2,nasm,jpeg,png_archive,org_sqlite,gif_archive,six_archive,astor_archive,termcolor_archive,pcre,swig,curl,grpc,lmdb,zlib_archive,snappy,cython,jemalloc"
 %if %{with cuda}
 export TF_NEED_CUDA=1 
@@ -430,8 +538,21 @@
        %{?copts} --jobs %{?jobs} \
        //tensorflow/tools/pip_package:build_pip_package
 bazel-bin/tensorflow/tools/pip_package/build_pip_package 
%{_topdir}/%{name}-%{version}
+bazel build -c opt //tensorflow:libtensorflow.so
+bazel build -c opt //tensorflow:libtensorflow_cc.so
+%endif
 
 %install
+
+%if %{is_lite}
+pushd tensorflow/lite/tools/make/gen/linux_*/
+install -D bin/minimal %{buildroot}%{_bindir}/tflite_minimal
+install -D lib/libtensorflow-lite.a %{buildroot}%{_libdir}/libtensorflow-lite.a
+popd
+install -D tensorflow/lite/schema/schema_generated.h 
%{buildroot}%{_includedir}/tensorflow/lite/schema/schema_generated.h
+install -D tensorflow/lite/schema/schema.fbs 
%{buildroot}%{_includedir}/tensorflow/lite/schema/schema.fbs
+%else
+
 pip install %{_topdir}/%{name}-%{version}/*whl 
--root=%{buildroot}%{?hpc_prefix} \
        --no-warn-script-location --no-index --no-deps 
 # remove spurious executeable bits
@@ -444,6 +565,10 @@
 rm -r lib
 cd -
 %endif
+# install libtensorflow*.so
+install -D bazel-bin/tensorflow/libtensorflow.so 
%{buildroot}%{package_libdir}/libtensorflow.so
+install -D bazel-bin/tensorflow/libtensorflow_cc.so 
%{buildroot}%{package_libdir}/libtensorflow_cc.so
+install -D bazel-bin/tensorflow/libtensorflow_framework.so 
%{buildroot}%{package_libdir}/libtensorflow_framework.so
 # remove external libs
 %fdupes -s %{buildroot}%{?hpc_prefix}  
 find %{buildroot} -name \*.h -type f -exec chmod 644 {} +
@@ -492,6 +617,21 @@
 EOF
 %endif
 
+# %%{is_lite}
+%endif
+
+%post -n %{package_name}-devel -p /sbin/ldconfig
+%postun -n %{package_name}-devel -p /sbin/ldconfig
+
+# Lite version is very different so package it separetly
+%if %{is_lite}
+%files
+%{package_bindir}/*
+%files -n %{package_name}-devel
+%{package_libdir}/libtensorflow-lite.a
+%dir %{_includedir}/tensorflow/lite/schema/
+%{_includedir}/tensorflow/lite/schema/*
+%else # not lite build
 %files
 %defattr(-,root,root,-)
 %{package_python_sitearch}/*
@@ -504,7 +644,10 @@
 %endif
 %files -n %{package_name}-devel
 %{package_python_sitelib}/tensorflow/include
+%{package_libdir}/libtensorflow*.so
 %files -n %{package_name}-doc
 %{package_python_sitelib}/tensorflow/examples
 
+%endif
+
 %changelog

++++++ protobuf_v3.6.0.tar.gz -> 
816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz ++++++
++++ 821455 lines of diff (skipped)

++++++ _constraints ++++++
--- /var/tmp/diff_new_pack.3GoBTM/_old  2019-07-22 12:20:15.227666518 +0200
+++ /var/tmp/diff_new_pack.3GoBTM/_new  2019-07-22 12:20:15.231666517 +0200
@@ -1,7 +1,7 @@
 <constraints>
   <hardware>
     <memory>
-     <size unit="M">8192</size>
+     <size unit="G">10</size>
     </memory>
     <disk>
       <size unit="G">10</size>

++++++ _multibuild ++++++
--- /var/tmp/diff_new_pack.3GoBTM/_old  2019-07-22 12:20:15.247666513 +0200
+++ /var/tmp/diff_new_pack.3GoBTM/_new  2019-07-22 12:20:15.255666511 +0200
@@ -1,5 +1,6 @@
 <multibuild>
   <package>standard</package>
+  <package>lite</package>
   <package>hpc</package>
   <package>hpc-openmpi2</package>
   <package>hpc-mvapich2</package>

++++++ abseil-cpp.tar.gz ++++++
++++ 65018 lines of diff (skipped)

++++++ arm_neon_2_x86_sse.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d/NEON_2_SSE.h 
new/ARM_NEON_2_x86_SSE-1200fe90bb174a6224a525ee60148671a786a71f/NEON_2_SSE.h
--- 
old/ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d/NEON_2_SSE.h    
    2017-05-30 09:44:55.000000000 +0200
+++ 
new/ARM_NEON_2_x86_SSE-1200fe90bb174a6224a525ee60148671a786a71f/NEON_2_SSE.h    
    2018-04-04 09:24:16.000000000 +0200
@@ -1,6 +1,6 @@
 //created by Victoria Zhislina, the Senior Application Engineer, Intel 
Corporation,  [email protected]
 
-//*** Copyright (C) 2012-2016 Intel Corporation.  All rights reserved.
+//*** Copyright (C) 2012-2017 Intel Corporation.  All rights reserved.
 
 //IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 
@@ -36,21 +36,21 @@
 //performance overhead and the necessity to use the EMMS instruction 
(_mm_empty())for mmx-x87 floating point  switching
 
//*****************************************************************************************
 
-//!!!!!!!!!!!!!!  To use this file just include it in your project that uses 
ARM NEON intinsics instead of "arm_neon.h" and complile it as usual
-//!!!!!!!!!!!!!!  but please pay attention at #define USE_SSE4 below - you 
might need to define it manualy for newest Intel Atom platforms for greater 
performance.
+//!!!!!!!!!!!!!!  To use this file just include it in your project that uses 
ARM NEON intinsics instead of "arm_neon.h" and compile it as usual
+//!!!!!!!!!!!!!!  but please pay attention at #define USE_SSE4 below - you 
might need to define it manualy for newest Intel Atom or any Intel Core 
platforms for greater performance.
 
 #ifndef NEON2SSE_H
 #define NEON2SSE_H
 
 
/*********************************************************************************************************************/
 //!!!!!!!!!!!!!! 
+//if USE_SSE4 is defined, some functions use SSE4 instructions instead of 
earlier SSE versions, when undefined - SIMD up to SSSE3 are used
+//For older devices without SSE4 support it should be undefined,  for newer 
devices - defined, probably manualy if your compiler doesn't set __SSE4_2__ 
predefine
 #ifndef USE_SSE4
 #if defined(__SSE4_2__)
     #define USE_SSE4
 #endif
 #endif
-//if USE_SSE4 is defined, some functions use SSE4 instructions instead of 
earlier SSE versions, when undefined - SIMD up to SSSE3 are used
-//For older devices without SSE4 support it should be undefined,  for newer 
devices - defined, probably manualy if your compiler doesn't set __SSE4_2__ 
predefine
 
/*********************************************************************************************************************/
 
 #include <xmmintrin.h>     //SSE
@@ -62,6 +62,7 @@
 #include <nmmintrin.h> //SSE4.2
 #endif
 
+#include <math.h>
 
 //***************  functions and data attributes, compiler dependent  
*********************************
 
//***********************************************************************************
@@ -150,6 +151,9 @@
 typedef __m128 float16x4_t; //not supported by IA, for compartibility
 typedef __m128 float16x8_t; //not supported by IA, for compartibility
 
+typedef __m64_128 float64x1_t;
+typedef __m128d float64x2_t;
+
 typedef __m128i int8x16_t;
 typedef __m128i int16x8_t;
 typedef __m128i int32x4_t;
@@ -174,6 +178,9 @@
 typedef   float __fp16;
 #endif
 
+typedef   double float64_t;
+
+
 typedef  uint8_t poly8_t;
 typedef  uint16_t poly16_t;
 
@@ -861,6 +868,9 @@
 uint16x8_t vmaxq_u16(uint16x8_t a, uint16x8_t b); // VMAX.U16 q0,q0,q0
 uint32x4_t vmaxq_u32(uint32x4_t a, uint32x4_t b); // VMAX.U32 q0,q0,q0
 float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b); // VMAX.F32 q0,q0,q0
+
+float64x2_t vmaxq_f64(float64x2_t a, float64x2_t b); // VMAX.F64 q0,q0,q0
+
 //vmin -> Vr[i] := (Va[i] >= Vb[i]) ? Vb[i] : Va[i]
 int8x8_t vmin_s8(int8x8_t a, int8x8_t b); // VMIN.S8 d0,d0,d0
 int16x4_t vmin_s16(int16x4_t a, int16x4_t b); // VMIN.S16 d0,d0,d0
@@ -876,6 +886,9 @@
 uint16x8_t vminq_u16(uint16x8_t a, uint16x8_t b); // VMIN.U16 q0,q0,q0
 uint32x4_t vminq_u32(uint32x4_t a, uint32x4_t b); // VMIN.U32 q0,q0,q0
 float32x4_t vminq_f32(float32x4_t a, float32x4_t b); // VMIN.F32 q0,q0,q0
+
+float64x2_t vminq_f64(float64x2_t a, float64x2_t b); // VMIN.F64 q0,q0,q0
+
 //Pairwise addition
 //Pairwise add
 int8x8_t vpadd_s8(int8x8_t a, int8x8_t b); // VPADD.I8 d0,d0,d0
@@ -1225,6 +1238,9 @@
 float32x2_t vld1_f32(__transfersize(2) float32_t const * ptr); // VLD1.32 
{d0}, [r0]
 poly8x8_t vld1_p8(__transfersize(8) poly8_t const * ptr); // VLD1.8 {d0}, [r0]
 poly16x4_t vld1_p16(__transfersize(4) poly16_t const * ptr); // VLD1.16 {d0}, 
[r0]
+
+float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr); // VLD1.64 
{d0, d1}, [r0]
+
 //Load a single lane from memory
 uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t 
vec, __constrange(0,15) int lane); //VLD1.8 {d0[0]}, [r0]
 uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t 
vec, __constrange(0,7) int lane); // VLD1.16 {d0[0]}, [r0]
@@ -1755,6 +1771,7 @@
 uint32x2_t vcvt_n_u32_f32(float32x2_t a, __constrange(1,32) int b); // 
VCVT.U32.F32 d0, d0, #32
 int32x4_t vcvtq_n_s32_f32(float32x4_t a, __constrange(1,32) int b); // 
VCVT.S32.F32 q0, q0, #32
 uint32x4_t vcvtq_n_u32_f32(float32x4_t a, __constrange(1,32) int b); // 
VCVT.U32.F32 q0, q0, #32
+int32x4_t vcvtnq_s32_f32(float32x4_t a); // VCVTN.S32.F32 q0, q0
 //Convert to float
 float32x2_t vcvt_f32_s32(int32x2_t a); // VCVT.F32.S32 d0, d0
 float32x2_t vcvt_f32_u32(uint32x2_t a); // VCVT.F32.U32 d0, d0
@@ -2003,6 +2020,10 @@
 int16x8_t vabsq_s16(int16x8_t a); // VABS.S16 q0,q0
 int32x4_t vabsq_s32(int32x4_t a); // VABS.S32 q0,q0
 float32x4_t vabsq_f32(float32x4_t a); // VABS.F32 q0,q0
+
+int64x2_t vabsq_s64(int64x2_t a); // VABS.S64 q0,q0
+float64x2_t vabsq_f64(float64x2_t a); // VABS.F64 q0,q0
+
 //Saturating absolute: Vd[i] = sat(|Va[i]|)
 int8x8_t vqabs_s8(int8x8_t a); // VQABS.S8 d0,d0
 int16x4_t vqabs_s16(int16x4_t a); // VQABS.S16 d0,d0
@@ -2246,16 +2267,26 @@
 poly8x16x2_t vuzpq_p8(poly8x16_t a, poly8x16_t b); // VUZP.8 q0,q0
 poly16x8x2_t vuzpq_p16(poly16x8_t a, poly16x8_t b); // VUZP.16 q0,q0
 
+float32x4_t vrndnq_f32(float32x4_t a); // VRND.F32 q0,q0
+
+float64x2_t vrndnq_f64(float64x2_t a); // VRND.F64 q0,q0
+
+//Sqrt
+float32x4_t vsqrtq_f32(float32x4_t a); // VSQRT.F32 q0,q0
+
+float64x2_t vsqrtq_f64(float64x2_t a); // VSQRT.F64 q0,q0
+
+
 
 
//^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 // the following macros solve the problem of the "immediate parameters 
requirement" for some x86 intrinsics. 
 // we need it to compile the code unless the "Intrinsic parameter must be an 
immediate value" error is our goal
 //
-#if ( ((defined(_MSC_VER)|| defined (__INTEL_COMPILER)) && defined DEBUG ) || 
defined(__GNUC__) && !defined(__llvm__) )    
+#if  ( defined (__INTEL_COMPILER)  || defined (__GNUC__) && !defined(__llvm__) 
)    
 
     #define _MM_ALIGNR_EPI8 _mm_alignr_epi8
 
-    #define _MM_EXTRACT_EPI16  _mm_extract_epi16
+#define _MM_EXTRACT_EPI16  (int16_t) _mm_extract_epi16
     #define _MM_INSERT_EPI16 _mm_insert_epi16
 #ifdef USE_SSE4
         #define _MM_EXTRACT_EPI8  _mm_extract_epi8
@@ -2328,7 +2359,7 @@
         _NEON2SSE_SWITCH8(_mm_insert_epi16, vec, LANE, _NEON2SSE_COMMA p)
     }
 
-    _NEON2SSE_INLINE int _MM_EXTRACT_EPI16(__m128i vec, const int LANE)
+       _NEON2SSE_INLINE int16_t _MM_EXTRACT_EPI16(__m128i vec, const int LANE)
     {
         _NEON2SSE_SWITCH8(_mm_extract_epi16, vec, LANE,)
     }
@@ -3117,7 +3148,7 @@
 {
     //no signed average in x86 SIMD, go to unsigned
     __m128i c128, au, bu, sum;
-    c128 = _mm_set1_epi8(0x80); //-128
+    c128 = _mm_set1_epi8((int8_t)0x80); //-128
     au = _mm_sub_epi8(a, c128); //add 128
     bu = _mm_sub_epi8(b, c128); //add 128
     sum = _mm_avg_epu8(au, bu);
@@ -3129,7 +3160,7 @@
 {
     //no signed average in x86 SIMD, go to unsigned
     __m128i cx8000, au, bu, sum;
-    cx8000 = _mm_set1_epi16(0x8000); // - 32768
+    cx8000 = _mm_set1_epi16((int16_t)0x8000); // - 32768
     au = _mm_sub_epi16(a, cx8000); //add 32768
     bu = _mm_sub_epi16(b, cx8000); //add 32768
     sum = _mm_avg_epu16(au, bu);
@@ -4747,7 +4778,7 @@
 {
     // //need to deal with the possibility of internal overflow
     __m128i c128, au,bu;
-    c128 = _mm_set1_epi8 (128);
+    c128 = _mm_set1_epi8((int8_t)128);
     au = _mm_add_epi8( a, c128);
     bu = _mm_add_epi8( b, c128);
     return vhsubq_u8(au,bu);
@@ -4758,7 +4789,7 @@
 {
     //need to deal with the possibility of internal overflow
     __m128i c8000, au,bu;
-    c8000 = _mm_set1_epi16(0x8000);
+    c8000 = _mm_set1_epi16((int16_t)0x8000);
     au = _mm_add_epi16( a, c8000);
     bu = _mm_add_epi16( b, c8000);
     return vhsubq_u16(au,bu);
@@ -5192,7 +5223,7 @@
         return _mm_cmpeq_epi16(cmp, a); //a>=b
     #else
         __m128i c8000, as, bs, m1, m2;
-        c8000 = _mm_set1_epi16 (0x8000);
+        c8000 = _mm_set1_epi16 ((int16_t)0x8000);
         as = _mm_sub_epi16(a,c8000);
         bs = _mm_sub_epi16(b,c8000);
         m1 = _mm_cmpgt_epi16(as, bs);
@@ -5428,7 +5459,7 @@
 {
     //no unsigned chars comparison, only signed available,so need the trick
     __m128i c128, as, bs;
-    c128 = _mm_set1_epi8 (128);
+    c128 = _mm_set1_epi8 ((int8_t)128);
     as = _mm_sub_epi8(a,c128);
     bs = _mm_sub_epi8(b,c128);
     return _mm_cmpgt_epi8 (as, bs);
@@ -5439,7 +5470,7 @@
 {
     //no unsigned short comparison, only signed available,so need the trick
     __m128i c8000, as, bs;
-    c8000 = _mm_set1_epi16 (0x8000);
+    c8000 = _mm_set1_epi16 ((int16_t)0x8000);
     as = _mm_sub_epi16(a,c8000);
     bs = _mm_sub_epi16(b,c8000);
     return _mm_cmpgt_epi16 ( as, bs);
@@ -6137,6 +6168,11 @@
 float32x4_t vmaxq_f32(float32x4_t a, float32x4_t b); // VMAX.F32 q0,q0,q0
 #define vmaxq_f32 _mm_max_ps
 
+
+float64x2_t vmaxq_f64(float64x2_t a, float64x2_t b); // VMAX.F64 q0,q0,q0
+#define vmaxq_f64 _mm_max_pd
+
+
 //*************** Minimum: vmin -> Vr[i] := (Va[i] >= Vb[i]) ? Vb[i] : Va[i] 
********************************
 
//***********************************************************************************************************
 int8x8_t   vmin_s8(int8x8_t a, int8x8_t b); // VMIN.S8 d0,d0,d0
@@ -6221,6 +6257,11 @@
 float32x4_t vminq_f32(float32x4_t a, float32x4_t b); // VMIN.F32 q0,q0,q0
 #define vminq_f32 _mm_min_ps
 
+
+float64x2_t vminq_f64(float64x2_t a, float64x2_t b); // VMIN.F64 q0,q0,q0
+#define vminq_f64 _mm_min_pd
+
+
 //*************  Pairwise addition operations. 
**************************************
 
//************************************************************************************
 //Pairwise add - adds adjacent pairs of elements of two vectors, and places 
the results in the destination vector
@@ -6283,7 +6324,7 @@
     uint16x4_t res64;
     __m128i c32767,  cfffe, as, bs, res;
     c32767 = _mm_set1_epi16 (32767);
-    cfffe = _mm_set1_epi16 (0xfffe);
+    cfffe = _mm_set1_epi16 ((int16_t)0xfffe);
     as = _mm_sub_epi16 (_pM128i(a), c32767);
     bs = _mm_sub_epi16 (_pM128i(b), c32767);
     res = _mm_hadd_epi16 (as, bs);
@@ -8355,7 +8396,7 @@
     // manual saturation solution looks more optimal than 32 bits conversion 
one
     __m128i cb, c8000, a_signed, saturation_mask,  shift_res;
     cb = _mm_set1_epi16((1 << (16 - b)) - 1 - 0x8000 );
-    c8000 = _mm_set1_epi16 (0x8000);
+    c8000 = _mm_set1_epi16 ((int16_t)0x8000);
 //no unsigned shorts comparison in SSE, only signed available, so need the 
trick
     a_signed = _mm_sub_epi16(a, c8000); //go to signed
     saturation_mask = _mm_cmpgt_epi16 (a_signed, cb);
@@ -9196,7 +9237,7 @@
 // it loads a 32-byte block aligned on a 16-byte boundary and extracts the 16 
bytes corresponding to the unaligned access
 //If the ptr is aligned then could use __m128i _mm_load_si128 ((__m128i*) ptr) 
instead;
 #define LOAD_SI128(ptr) \
-        ( ((unsigned long)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) 
: _mm_loadu_si128((__m128i*)(ptr))
+        ( ((uintptr_t)(ptr) & 15) == 0 ) ? _mm_load_si128((__m128i*)(ptr)) : 
_mm_loadu_si128((__m128i*)(ptr))
 
 uint8x16_t vld1q_u8(__transfersize(16) uint8_t const * ptr); // VLD1.8 {d0, 
d1}, [r0]
 #define vld1q_u8 LOAD_SI128
@@ -9233,7 +9274,7 @@
 float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr); // VLD1.32 
{d0, d1}, [r0]
 _NEON2SSE_INLINE float32x4_t vld1q_f32(__transfersize(4) float32_t const * ptr)
 {
-    if( (((unsigned long)(ptr)) & 15 ) == 0 ) //16 bits aligned
+    if( (((uintptr_t)(ptr)) & 15 ) == 0 ) //16 bits aligned
         return _mm_load_ps(ptr);
     else
         return _mm_loadu_ps(ptr);
@@ -9288,6 +9329,17 @@
 poly16x4_t vld1_p16(__transfersize(4) poly16_t const * ptr); // VLD1.16 {d0}, 
[r0]
 #define vld1_p16 vld1_u16
 
+
+float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr); // VLD1.64 
{d0, d1}, [r0]
+_NEON2SSE_INLINE float64x2_t vld1q_f64(__transfersize(4) float64_t const * ptr)
+{
+       if ((((uintptr_t)(ptr)) & 15) == 0) //16 bits aligned
+               return _mm_load_pd(ptr);
+       else
+               return _mm_loadu_pd(ptr);
+}
+
+
 
//***********************************************************************************************************
 //******* Lane load functions - insert the data at  vector's given position 
(lane) *************************
 
//***********************************************************************************************************
@@ -9522,7 +9574,7 @@
 // If ptr is 16bit aligned and you  need to store data without cache pollution 
then use void _mm_stream_si128 ((__m128i*)ptr, val);
 //here we assume the case of  NOT 16bit aligned ptr possible. If it is aligned 
we could to use _mm_store_si128 like shown in the following macro
 #define STORE_SI128(ptr, val) \
-        (((unsigned long)(ptr) & 15) == 0 ) ? _mm_store_si128 
((__m128i*)(ptr), val) : _mm_storeu_si128 ((__m128i*)(ptr), val);
+        (((uintptr_t)(ptr) & 15) == 0 ) ? _mm_store_si128 ((__m128i*)(ptr), 
val) : _mm_storeu_si128 ((__m128i*)(ptr), val);
 
 void vst1q_u8(__transfersize(16) uint8_t * ptr, uint8x16_t val); // VST1.8 
{d0, d1}, [r0]
 #define vst1q_u8 STORE_SI128
@@ -9554,7 +9606,7 @@
 void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t val); // VST1.32 
{d0, d1}, [r0]
 _NEON2SSE_INLINE void vst1q_f32(__transfersize(4) float32_t * ptr, float32x4_t 
val)
 {
-    if( ((unsigned long)(ptr) & 15)  == 0 ) //16 bits aligned
+    if( ((uintptr_t)(ptr) & 15)  == 0 ) //16 bits aligned
         _mm_store_ps (ptr, val);
     else
         _mm_storeu_ps (ptr, val);
@@ -9639,22 +9691,22 @@
 //***********Store a lane of a vector into memory (extract given lane) 
*********************
 
//******************************************************************************************
 void vst1q_lane_u8(__transfersize(1) uint8_t * ptr, uint8x16_t val, 
__constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0]
-#define vst1q_lane_u8(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI8 (val, lane)
+#define vst1q_lane_u8(ptr, val, lane) *(ptr) = (uint8_t) _MM_EXTRACT_EPI8 
(val, lane)
 
 void vst1q_lane_u16(__transfersize(1) uint16_t * ptr, uint16x8_t val, 
__constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0]
-#define vst1q_lane_u16(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI16 (val, lane)
+#define vst1q_lane_u16(ptr, val, lane) *(ptr) = (uint16_t) _MM_EXTRACT_EPI16 
(val, lane)
 
 void vst1q_lane_u32(__transfersize(1) uint32_t * ptr, uint32x4_t val, 
__constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0]
-#define vst1q_lane_u32(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI32 (val, lane)
+#define vst1q_lane_u32(ptr, val, lane) *(ptr) = (uint32_t) _MM_EXTRACT_EPI32 
(val, lane)
 
 void vst1q_lane_u64(__transfersize(1) uint64_t * ptr, uint64x2_t val, 
__constrange(0,1) int lane); // VST1.64 {d0}, [r0]
-#define vst1q_lane_u64(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI64 (val, lane)
+#define vst1q_lane_u64(ptr, val, lane) *(ptr) = (uint64_t) _MM_EXTRACT_EPI64 
(val, lane)
 
 void vst1q_lane_s8(__transfersize(1) int8_t * ptr, int8x16_t val, 
__constrange(0,15) int lane); // VST1.8 {d0[0]}, [r0]
-#define vst1q_lane_s8(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI8 (val, lane)
+#define vst1q_lane_s8(ptr, val, lane) *(ptr) = (int8_t) _MM_EXTRACT_EPI8 (val, 
lane)
 
 void vst1q_lane_s16(__transfersize(1) int16_t * ptr, int16x8_t val, 
__constrange(0,7) int lane); // VST1.16 {d0[0]}, [r0]
-#define vst1q_lane_s16(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI16 (val, lane)
+#define vst1q_lane_s16(ptr, val, lane) *(ptr) = (int16_t) _MM_EXTRACT_EPI16 
(val, lane)
 
 void vst1q_lane_s32(__transfersize(1) int32_t * ptr, int32x4_t val, 
__constrange(0,3) int lane); // VST1.32 {d0[0]}, [r0]
 #define vst1q_lane_s32(ptr, val, lane) *(ptr) = _MM_EXTRACT_EPI32 (val, lane)
@@ -11881,22 +11933,22 @@
 #define vget_lane_f32(vec, lane) vec.m64_f32[lane]
 
 uint8_t vgetq_lane_u8(uint8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 
r0, d0[0]
-#define vgetq_lane_u8 _MM_EXTRACT_EPI8
+#define vgetq_lane_u8 (uint8_t) _MM_EXTRACT_EPI8
 
 uint16_t vgetq_lane_u16(uint16x8_t vec, __constrange(0,7) int lane); // 
VMOV.s16 r0, d0[0]
-#define  vgetq_lane_u16 _MM_EXTRACT_EPI16
+#define  vgetq_lane_u16 (uint16_t) _MM_EXTRACT_EPI16
 
 uint32_t vgetq_lane_u32(uint32x4_t vec, __constrange(0,3) int lane); // 
VMOV.32 r0, d0[0]
-#define vgetq_lane_u32 _MM_EXTRACT_EPI32
+#define vgetq_lane_u32 (uint32_t) _MM_EXTRACT_EPI32
 
 int8_t vgetq_lane_s8(int8x16_t vec, __constrange(0,15) int lane); // VMOV.S8 
r0, d0[0]
-#define vgetq_lane_s8 vgetq_lane_u8
+#define vgetq_lane_s8 _MM_EXTRACT_EPI8
 
 int16_t vgetq_lane_s16(int16x8_t vec, __constrange(0,7) int lane); // VMOV.S16 
r0, d0[0]
-#define vgetq_lane_s16 vgetq_lane_u16
+#define vgetq_lane_s16 _MM_EXTRACT_EPI16
 
 int32_t vgetq_lane_s32(int32x4_t vec, __constrange(0,3) int lane); // VMOV.32 
r0, d0[0]
-#define vgetq_lane_s32 vgetq_lane_u32
+#define vgetq_lane_s32 _MM_EXTRACT_EPI32
 
 poly8_t vgetq_lane_p8(poly8x16_t vec, __constrange(0,15) int lane); // VMOV.U8 
r0, d0[0]
 #define vgetq_lane_p8 vgetq_lane_u8
@@ -11920,10 +11972,10 @@
 
 
 int64_t vgetq_lane_s64(int64x2_t vec, __constrange(0,1) int lane); // VMOV 
r0,r0,d0
-#define vgetq_lane_s64 (int64_t) vgetq_lane_u64
+#define vgetq_lane_s64 _MM_EXTRACT_EPI64
 
 uint64_t vgetq_lane_u64(uint64x2_t vec, __constrange(0,1) int lane); // VMOV 
r0,r0,d0
-#define vgetq_lane_u64 _MM_EXTRACT_EPI64
+#define vgetq_lane_u64 (uint64_t) _MM_EXTRACT_EPI64
 
 // ***************** Set lanes within a vector 
********************************************
 // 
**************************************************************************************
@@ -12725,6 +12777,13 @@
     return vcvtq_u32_f32(_mm_mul_ps(a,cconst128));
 }
 
+
+int32x4_t vcvtnq_s32_f32(float32x4_t a); // VCVTN.S32.F32 q0, q0
+_NEON2SSE_INLINE int32x4_t vcvtnq_s32_f32(float32x4_t a)
+{
+  return _mm_cvtps_epi32(a);
+}
+
 //***************** Convert to float *************************
 //*************************************************************
 float32x2_t vcvt_f32_s32(int32x2_t a); // VCVT.F32.S32 d0, d0
@@ -14562,6 +14621,22 @@
     return _mm_and_ps (a, *(__m128*)c7fffffff);
 }
 
+#ifdef _NEON2SSE_64BIT
+int64x2_t vabsq_s64(int64x2_t a); // VABS.S64 q0,q0
+_NEON2SSE_INLINE int64x2_t vabsq_s64(int64x2_t a) // VABS.S64 q0,q0
+{
+    __m128i sign = _mm_srai_epi32 (_mm_shuffle_epi32 (a, 0xf5), 31);
+    return _mm_sub_epi64 (_mm_xor_si128 (a, sign), sign);
+}
+
+float64x2_t vabsq_f64(float64x2_t a); // VABS.F64 q0,q0
+_NEON2SSE_INLINE float64x2_t vabsq_f64(float64x2_t a) // VABS.F64 q0,q0
+{
+    _NEON2SSE_ALIGN_16 int64_t mask[2] = {0x7fffffffffffffffLL, 
0x7fffffffffffffffLL};
+    return _mm_and_pd (a, *(__m128d*)mask);
+}
+#endif
+
 //****** Saturating absolute: Vd[i] = sat(|Va[i]|) *********************
 //**********************************************************************
 //For signed-integer data types, the absolute value of the most negative value 
is not representable by the data type, saturation takes place
@@ -14596,7 +14671,7 @@
 _NEON2SSE_INLINE int8x16_t vqabsq_s8(int8x16_t a) // VQABS.S8 q0,q0
 {
     __m128i c_128, abs, abs_cmp;
-    c_128 = _mm_set1_epi8 (0x80); //-128
+    c_128 = _mm_set1_epi8 ((int8_t)0x80); //-128
     abs = _mm_abs_epi8 (a);
     abs_cmp = _mm_cmpeq_epi8 (abs, c_128);
     return _mm_xor_si128 (abs,  abs_cmp);
@@ -14606,7 +14681,7 @@
 _NEON2SSE_INLINE int16x8_t vqabsq_s16(int16x8_t a) // VQABS.S16 q0,q0
 {
     __m128i c_32768, abs, abs_cmp;
-    c_32768 = _mm_set1_epi16 (0x8000); //-32768
+    c_32768 = _mm_set1_epi16 ((int16_t)0x8000); //-32768
     abs = _mm_abs_epi16 (a);
     abs_cmp = _mm_cmpeq_epi16 (abs, c_32768);
     return _mm_xor_si128 (abs,  abs_cmp);
@@ -14919,7 +14994,7 @@
 {
     __m128i cff, c80, c1, a_mask, a_neg, a_pos, a_comb;
     cff = _mm_cmpeq_epi8 (a,a); //0xff
-    c80 = _mm_set1_epi8(0x80);
+    c80 = _mm_set1_epi8((int8_t)0x80);
     c1 = _mm_set1_epi8(1);
     a_mask = _mm_and_si128(a, c80);
     a_mask = _mm_cmpeq_epi8(a_mask, c80); //0xff if negative input and 0 if 
positive
@@ -16589,4 +16664,46 @@
 uint32x4_t vreinterpretq_u32_p8 (poly8x16_t t);
 #define vreinterpretq_u32_p8
 
+//*************  Round ******************
+float32x4_t vrndnq_f32(float32x4_t a);
+#ifdef USE_SSE4
+#define vrndnq_f32(a) _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT | 
_MM_FROUND_NO_EXC)
+#else
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING( float32x4_t 
vrndnq_f32(float32x4_t a), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+    int i;
+    _NEON2SSE_ALIGN_16 float32_t res[4];
+    _mm_store_ps(res, a);
+     for(i = 0; i<4; i++) {
+       res[i] = nearbyintf(res[i]);
+     }
+    return _mm_load_ps(res);
+}
+#endif
+
+
+float64x2_t vrndnq_f64(float64x2_t a);
+#ifdef USE_SSE4
+#define  vrndnq_f64(a)  _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT | 
_MM_FROUND_NO_EXC)
+#else
+_NEON2SSE_INLINE _NEON2SSE_PERFORMANCE_WARNING(float64x2_t 
vrndnq_f64(float64x2_t a), _NEON2SSE_REASON_SLOW_SERIAL)
+{
+     _NEON2SSE_ALIGN_16 float64_t res[2];
+     _mm_store_pd(res, a);
+     res[0] = nearbyintf(res[0]);
+     res[1] = nearbyintf(res[1]);
+     return _mm_load_pd(res);
+}
+#endif
+
+
+
+//************* Sqrt ******************
+float32x4_t vsqrtq_f32(float32x4_t a);
+#define vsqrtq_f32 _mm_sqrt_ps
+
+float64x2_t vsqrtq_f64(float64x2_t a);
+#define vsqrtq_f64 _mm_sqrt_pd
+
+
 #endif /* NEON2SSE_H */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d/ReadMe.md 
new/ARM_NEON_2_x86_SSE-1200fe90bb174a6224a525ee60148671a786a71f/ReadMe.md
--- old/ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d/ReadMe.md   
2017-05-30 09:44:55.000000000 +0200
+++ new/ARM_NEON_2_x86_SSE-1200fe90bb174a6224a525ee60148671a786a71f/ReadMe.md   
2018-04-04 09:24:16.000000000 +0200
@@ -6,4 +6,6 @@
 
 To take advantage of this file just include it in your project that uses ARM 
NEON intinsics instead of "arm_neon.h", compile it as usual and enjoy the 
result.
 
+For significant performance improvement in some cases you might need to define 
USE_SSE4 in your project settings. Otherwise SIMD up to SSSE3 to be used.
+
 For more information and license please read the NEON_2_SSE.h content.

++++++ boring_ssl.tar.gz ++++++
/work/SRC/openSUSE:Factory/tensorflow/boring_ssl.tar.gz 
/work/SRC/openSUSE:Factory/.tensorflow.new.4126/boring_ssl.tar.gz differ: char 
13, line 1



++++++ eigen.tar.gz ++++++
++++ 72880 lines of diff (skipped)

++++++ fix_mvapich_mpi_bzl.patch ++++++
diff --git a/third_party/mpi/mpi.bzl b/third_party/mpi/mpi.bzl
index 3a48335..1cd43f8 100644
--- a/third_party/mpi/mpi.bzl
+++ b/third_party/mpi/mpi.bzl
@@ -2,7 +2,7 @@
 #based on the configuration options return one or the other
 
 def mpi_hdr():
-    MPI_LIB_IS_OPENMPI = True
+    MPI_LIB_IS_OPENMPI=True
     hdrs = []
     if MPI_LIB_IS_OPENMPI:
         hdrs = ["mpi.h", "mpi_portable_platform.h"]  #When using OpenMPI



++++++ license.rst.txt ++++++
++++ 903 lines (skipped)

++++++ protobuf_v3.6.0.tar.gz -> protobuf_v3.6.1.2.tar.gz ++++++
++++ 4997 lines of diff (skipped)

++++++ re2-2018-04-01.tar.gz -> re2-2018-10-01.tar.gz ++++++
++++ 2612 lines of diff (skipped)

++++++ support-new-bazel.patch ++++++
--- a/configure.py.orig 2019-03-12 21:43:27.333211414 +0100
+++ a/configure.py      2019-03-12 21:43:50.225119652 +0100
@@ -1554,7 +1554,7 @@
   # environment variables.
   environ_cp = dict(os.environ)
 
-  check_bazel_version('0.19.0', '0.21.0')
+  check_bazel_version('0.19.0', '0.22.0')
 
   reset_tf_configure_bazelrc()
 
++++++ tensorflow-1.10.0.tar.gz -> tensorflow-1.13.1.tar.gz ++++++
/work/SRC/openSUSE:Factory/tensorflow/tensorflow-1.10.0.tar.gz 
/work/SRC/openSUSE:Factory/.tensorflow.new.4126/tensorflow-1.13.1.tar.gz 
differ: char 12, line 1

++++++ tensorflow-fix_lite.patch ++++++
--- tensorflow-1.13.1/tensorflow/lite/tools/make/Makefile.orig  2019-06-04 
13:13:08.329080620 +0200
+++ tensorflow-1.13.1/tensorflow/lite/tools/make/Makefile       2019-06-04 
16:05:13.325963284 +0200
@@ -38,11 +38,12 @@ INCLUDES := \
 -I$(OBJDIR)
 # This is at the end so any globally-installed frameworks like protobuf don't
 # override local versions in the source tree.
-INCLUDES += -I/usr/local/include
+INCLUDES += -I/usr/include
 
 # These are the default libraries needed, but they can be added to or
 # overridden by the platform-specific settings in target makefiles.
 LIBS := \
+-lflatbuffers \
 -lstdc++ \
 -lpthread \
 -lm \
++++++ tensorflow-make_aws_sdk_work_on_aarch64.patch ++++++
>From 3f88ddb71ba49d343a5db1304c296e78ddeb2575 Mon Sep 17 00:00:00 2001
From: Koan-Sin Tan <[email protected]>
Date: Wed, 10 Oct 2018 02:34:02 +0000
Subject: [PATCH] [aarch64] make aws sdk work on aarch64

`bazel build //tensorflow/tools/pip_package:build_pip_package'
requires AWS SDK by default. but platform part was not built
on aarch64
---
 tensorflow/BUILD            | 6 ++++++
 third_party/aws/BUILD.bazel | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 9b62a504525d..8486922e00b0 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -163,6 +163,12 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "linux_aarch64",
+    values = {"cpu": "aarch64"},
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "linux_x86_64",
     values = {"cpu": "k8"},
diff --git a/third_party/aws/BUILD.bazel b/third_party/aws/BUILD.bazel
index 5426f79e4650..66baa8fdf3b7 100644
--- a/third_party/aws/BUILD.bazel
+++ b/third_party/aws/BUILD.bazel
@@ -12,6 +12,9 @@ load("@org_tensorflow//third_party:common.bzl", 
"template_rule")
 cc_library(
     name = "aws",
     srcs = select({
+        "@org_tensorflow//tensorflow:linux_aarch64": glob([
+            "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
+        ]),
         "@org_tensorflow//tensorflow:linux_x86_64": glob([
             "aws-cpp-sdk-core/source/platform/linux-shared/*.cpp",
         ]),

commit tensorflow for openSUSE:Factory

Reply via email to