Re: [flac-dev] [PATCH 0/7] PowerPC64 performance improvements

2018-07-12 Thread Anton Blanchard
Hi,

> Are those all Virtual Machines? If so, how do you really know that
> the performance is 3.3x?

I have access to a number of different POWER9 systems, all native.

Thanks,
Anton
___
flac-dev mailing list
flac-dev@xiph.org
http://lists.xiph.org/mailman/listinfo/flac-dev


Re: [flac-dev] [PATCH 0/7] PowerPC64 performance improvements

2018-07-11 Thread Anton Blanchard
Hi Brian,

> Thank you for this collection of patches.
> 
> How can I test them? What platforms (computers) have the ppc64 or
> POWER9 processor?

The IBM Bounty Source page has a list of resources:

https://www.bountysource.com/teams/ibm/bounties

Travis also has ppc64le support, so if you add the linux-ppc64le target
then FLAC will be tested on ppc64le.

Thanks,
Anton
___
flac-dev mailing list
flac-dev@xiph.org
http://lists.xiph.org/mailman/listinfo/flac-dev


[flac-dev] [PATCH 7/7] Add VSX optimised versions of autocorrelation loops

2018-07-10 Thread Anton Blanchard
Add a POWER8 and POWER9 version of the autocorrelation functions.

flac --best is about 3.3x faster on POWER9 with this patch.

Signed-off-by: Anton Blanchard 
---
 src/libFLAC/Makefile.am   |   1 +
 src/libFLAC/include/private/lpc.h |  14 +
 src/libFLAC/lpc_intrin_vsx.c  | 942 ++
 src/libFLAC/stream_encoder.c  |  30 +
 4 files changed, 987 insertions(+)
 create mode 100644 src/libFLAC/lpc_intrin_vsx.c

diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am
index 863f7f95..f0f32f04 100644
--- a/src/libFLAC/Makefile.am
+++ b/src/libFLAC/Makefile.am
@@ -114,6 +114,7 @@ libFLAC_sources = \
lpc_intrin_sse2.c \
lpc_intrin_sse41.c \
lpc_intrin_avx2.c \
+   lpc_intrin_vsx.c \
md5.c \
memory.c \
metadata_iterators.c \
diff --git a/src/libFLAC/include/private/lpc.h 
b/src/libFLAC/include/private/lpc.h
index 63d64324..64dfd1f8 100644
--- a/src/libFLAC/include/private/lpc.h
+++ b/src/libFLAC/include/private/lpc.h
@@ -91,6 +91,20 @@ void 
FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new(const FLAC__real da
 void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new(const FLAC__real 
data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
 #endif
 #  endif
+#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
+#ifdef FLAC__HAS_TARGET_POWER9
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_4(const 
FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(const 
FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_12(const 
FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_16(const 
FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
+#endif
+#ifdef FLAC__HAS_TARGET_POWER8
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_4(const 
FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8(const 
FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_12(const 
FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_16(const 
FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
+#endif
+#endif
 #endif
 
 /*
diff --git a/src/libFLAC/lpc_intrin_vsx.c b/src/libFLAC/lpc_intrin_vsx.c
new file mode 100644
index ..48c82182
--- /dev/null
+++ b/src/libFLAC/lpc_intrin_vsx.c
@@ -0,0 +1,942 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2016  Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include 
+#endif
+
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+#ifndef FLAC__NO_ASM
+#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
+
+#include "private/cpu.h"
+#include "private/lpc.h"
+#include "FLAC/assert.h"
+#include "FLAC/format.h"
+
+#include 
+
+#ifdef FLAC__HAS_TARGET_POWER8
+__attribute__((target("cpu=power8")))
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_16(const 
FLAC__real data[], uint32_t data_len, uint3

[flac-dev] [PATCH 5/7] Check if compiler supports target attribute on ppc64

2018-07-10 Thread Anton Blanchard
From: Amitay Isaacs 

Check if the compiler supports __attribute__((target("cpu=power8")))
and __attribute__((target("cpu=power9")))

Signed-off-by: Amitay Isaacs 
---
 configure.ac | 20 
 1 file changed, 20 insertions(+)

diff --git a/configure.ac b/configure.ac
index 55078293..3d18bb91 100644
--- a/configure.ac
+++ b/configure.ac
@@ -175,6 +175,26 @@ else
 AC_DEFINE([FLAC__HAS_X86INTRIN], 0)
 fi
 
+if test x"$cpu_ppc64" = xtrue ; then
+
+AC_C_ATTRIBUTE([target("cpu=power8")],
+  [have_cpu_power8=yes],
+  [have_cpu_power8=no])
+if test x"$have_cpu_power8" = xyes ; then
+  AC_DEFINE(FLAC__HAS_TARGET_POWER8)
+  AH_TEMPLATE(FLAC__HAS_TARGET_POWER8, [define if compiler has 
__attribute__((target("cpu=power8"))) support])
+fi
+
+AC_C_ATTRIBUTE([target("cpu=power9")],
+  [have_cpu_power9=yes],
+  [have_cpu_power9=no])
+if test x"$have_cpu_power9" = xyes ; then
+  AC_DEFINE(FLAC__HAS_TARGET_POWER9)
+  AH_TEMPLATE(FLAC__HAS_TARGET_POWER9, [define if compiler has 
__attribute__((target("cpu=power9"))) support])
+fi
+
+fi
+
 case "$host" in
i386-*-openbsd3.[[0-3]]) OBJ_FORMAT=aoutb ;;
*-*-cygwin|*mingw*) OBJ_FORMAT=win32 ;;
-- 
2.17.1

___
flac-dev mailing list
flac-dev@xiph.org
http://lists.xiph.org/mailman/listinfo/flac-dev


[flac-dev] [PATCH 6/7] Add runtime detection of POWER8 and POWER9

2018-07-10 Thread Anton Blanchard
Use getauxval() to determine if we are on POWER8 or POWER9 or newer.
POWER8 is represented by version 2.07 and POWER9 by version 3.00.

Signed-off-by: Anton Blanchard 
---
 src/libFLAC/cpu.c | 31 +++
 src/libFLAC/include/private/cpu.h |  6 ++
 2 files changed, 37 insertions(+)

diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c
index bf0708c8..64da9cbc 100644
--- a/src/libFLAC/cpu.c
+++ b/src/libFLAC/cpu.c
@@ -53,6 +53,9 @@
 #define dfprintf(file, format, ...)
 #endif
 
+#if defined FLAC__CPU_PPC
+#include 
+#endif
 
 #if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && (defined 
FLAC__HAS_NASM || FLAC__HAS_X86INTRIN) && !defined FLAC__NO_ASM
 
@@ -230,6 +233,29 @@ x86_cpu_info (FLAC__CPUInfo *info)
 #endif
 }
 
+static void
+ppc_cpu_info (FLAC__CPUInfo *info)
+{
+#if defined FLAC__CPU_PPC
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x0080
+#endif
+
+#ifndef PPC_FEATURE2_ARCH_2_07
+#define PPC_FEATURE2_ARCH_2_07 0x8000
+#endif
+
+   if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00) {
+   info->ppc.arch_3_00 = true;
+   } else if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) {
+   info->ppc.arch_2_07 = true;
+   }
+#else
+   info->ppc.arch_2_07 = false;
+   info->ppc.arch_3_00 = false;
+#endif
+}
+
 void FLAC__cpu_info (FLAC__CPUInfo *info)
 {
memset(info, 0, sizeof(*info));
@@ -238,6 +264,8 @@ void FLAC__cpu_info (FLAC__CPUInfo *info)
info->type = FLAC__CPUINFO_TYPE_IA32;
 #elif defined FLAC__CPU_X86_64
info->type = FLAC__CPUINFO_TYPE_X86_64;
+#elif defined FLAC__CPU_PPC
+   info->type = FLAC__CPUINFO_TYPE_PPC;
 #else
info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
 #endif
@@ -247,6 +275,9 @@ void FLAC__cpu_info (FLAC__CPUInfo *info)
case FLAC__CPUINFO_TYPE_X86_64:
x86_cpu_info (info);
break;
+   case FLAC__CPUINFO_TYPE_PPC:
+   ppc_cpu_info (info);
+   break;
default:
info->use_asm = false;
break;
diff --git a/src/libFLAC/include/private/cpu.h 
b/src/libFLAC/include/private/cpu.h
index 3fe279b0..e07aa09d 100644
--- a/src/libFLAC/include/private/cpu.h
+++ b/src/libFLAC/include/private/cpu.h
@@ -153,6 +153,7 @@
 typedef enum {
FLAC__CPUINFO_TYPE_IA32,
FLAC__CPUINFO_TYPE_X86_64,
+   FLAC__CPUINFO_TYPE_PPC,
FLAC__CPUINFO_TYPE_UNKNOWN
 } FLAC__CPUInfo_Type;
 
@@ -173,11 +174,16 @@ typedef struct {
FLAC__bool fma;
 } FLAC__CPUInfo_x86;
 
+typedef struct {
+   FLAC__bool arch_3_00;
+   FLAC__bool arch_2_07;
+} FLAC__CPUInfo_ppc;
 
 typedef struct {
FLAC__bool use_asm;
FLAC__CPUInfo_Type type;
FLAC__CPUInfo_x86 x86;
+   FLAC__CPUInfo_ppc ppc;
 } FLAC__CPUInfo;
 
 void FLAC__cpu_info(FLAC__CPUInfo *info);
-- 
2.17.1

___
flac-dev mailing list
flac-dev@xiph.org
http://lists.xiph.org/mailman/listinfo/flac-dev


[flac-dev] [PATCH 4/7] Add m4 macro to check for C __attribute__ features

2018-07-10 Thread Anton Blanchard
From: Amitay Isaacs 

Signed-off-by: Amitay Isaacs 
---
 m4/c_attribute.m4 | 18 ++
 1 file changed, 18 insertions(+)
 create mode 100644 m4/c_attribute.m4

diff --git a/m4/c_attribute.m4 b/m4/c_attribute.m4
new file mode 100644
index ..48aa6223
--- /dev/null
+++ b/m4/c_attribute.m4
@@ -0,0 +1,18 @@
+#
+# Check for supported __attribute__ features
+#
+# AC_C_ATTRIBUTE(FEATURE, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+AC_DEFUN([AC_C_ATTRIBUTE],
+[AS_VAR_PUSHDEF([CACHEVAR], [ax_cv_c_attribute_$1])dnl
+AC_CACHE_CHECK([for  __attribute__ (($1))],
+  CACHEVAR,[
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+[[ void foo(void) __attribute__ (($1)); ]])],
+[AS_VAR_SET(CACHEVAR, [yes])],
+[AS_VAR_SET(CACHEVAR, [no])])])
+AS_VAR_IF(CACHEVAR,yes,
+  [m4_default([$2], :)],
+  [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl
-- 
2.17.1

___
flac-dev mailing list
flac-dev@xiph.org
http://lists.xiph.org/mailman/listinfo/flac-dev


[flac-dev] [PATCH 3/7] configure.ac: Fix FLAC__CPU_PPC on little endian, and add FLAC__CPU_PPC64

2018-07-10 Thread Anton Blanchard
FLAC__CPU_PPC wasn't catching powerpcle or powerpc64le. Fix that and
add a new define for FLAC__CPU_PPC64.

Signed-off-by: Anton Blanchard 
---
 configure.ac | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 592e7750..55078293 100644
--- a/configure.ac
+++ b/configure.ac
@@ -141,7 +141,16 @@ case "$host_cpu" in
AH_TEMPLATE(FLAC__CPU_IA32, [define if building for ia32/i386])
asm_optimisation=$asm_opt
;;
-   powerpc|powerpc64)
+   powerpc64|powerpc64le)
+   cpu_ppc64=true
+   cpu_ppc=true
+   AC_DEFINE(FLAC__CPU_PPC)
+   AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC])
+   AC_DEFINE(FLAC__CPU_PPC64)
+   AH_TEMPLATE(FLAC__CPU_PPC64, [define if building for PowerPC64])
+   asm_optimisation=$asm_opt
+   ;;
+   powerpc|powerpcle)
cpu_ppc=true
AC_DEFINE(FLAC__CPU_PPC)
AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC])
@@ -157,6 +166,7 @@ esac
 AM_CONDITIONAL(FLAC__CPU_X86_64, test "x$cpu_x86_64" = xtrue)
 AM_CONDITIONAL(FLaC__CPU_IA32, test "x$cpu_ia32" = xtrue)
 AM_CONDITIONAL(FLaC__CPU_PPC, test "x$cpu_ppc" = xtrue)
+AM_CONDITIONAL(FLaC__CPU_PPC64, test "x$cpu_ppc64" = xtrue)
 AM_CONDITIONAL(FLaC__CPU_SPARC, test "x$cpu_sparc" = xtrue)
 
 if test "x$ac_cv_header_x86intrin_h" = xyes; then
-- 
2.17.1

___
flac-dev mailing list
flac-dev@xiph.org
http://lists.xiph.org/mailman/listinfo/flac-dev


[flac-dev] [PATCH 2/7] configure.ac: Add VSX enable/disable

2018-07-10 Thread Anton Blanchard
We want to create functions with PowerPC VSX instructions, so add
a configure check.

Signed-off-by: Anton Blanchard 
---
 configure.ac | 13 +
 1 file changed, 13 insertions(+)

diff --git a/configure.ac b/configure.ac
index 77e3628e..592e7750 100644
--- a/configure.ac
+++ b/configure.ac
@@ -228,6 +228,19 @@ AC_DEFINE(FLAC__USE_ALTIVEC)
 AH_TEMPLATE(FLAC__USE_ALTIVEC, [define to enable use of Altivec instructions])
 fi
 
+AC_ARG_ENABLE(vsx,
+AC_HELP_STRING([--disable-vsx], [Disable VSX optimizations]),
+[case "${enableval}" in
+   yes) use_vsx=true ;;
+   no)  use_vsx=false ;;
+   *) AC_MSG_ERROR(bad value ${enableval} for --enable-vsx) ;;
+esac],[use_vsx=true])
+AM_CONDITIONAL(FLaC__USE_VSX, test "x$use_vsx" = xtrue)
+if test "x$use_vsx" = xtrue ; then
+AC_DEFINE(FLAC__USE_VSX)
+AH_TEMPLATE(FLAC__USE_VSX, [define to enable use of VSX instructions])
+fi
+
 AC_ARG_ENABLE(avx,
 AC_HELP_STRING([--disable-avx], [Disable AVX, AVX2 optimizations]),
 [case "${enableval}" in
-- 
2.17.1

___
flac-dev mailing list
flac-dev@xiph.org
http://lists.xiph.org/mailman/listinfo/flac-dev


[flac-dev] [PATCH 0/7] PowerPC64 performance improvements

2018-07-10 Thread Anton Blanchard
The following series adds initial vector support for PowerPC64.
On POWER9, flac --best is about 3.3x faster.

Amitay Isaacs (2):
  Add m4 macro to check for C __attribute__ features
  Check if compiler supports target attribute on ppc64

Anton Blanchard (5):
  configure.ac: Remove SPE detection code
  configure.ac: Add VSX enable/disable
  configure.ac: Fix FLAC__CPU_PPC on little endian, and add
FLAC__CPU_PPC64
  Add runtime detection of POWER8 and POWER9
  Add VSX optimised versions of autocorrelation loops

 configure.ac  |  53 +-
 m4/c_attribute.m4 |  18 +
 src/libFLAC/Makefile.am   |   1 +
 src/libFLAC/cpu.c |  31 +
 src/libFLAC/include/private/cpu.h |   6 +
 src/libFLAC/include/private/lpc.h |  14 +
 src/libFLAC/lpc_intrin_vsx.c  | 942 ++
 src/libFLAC/stream_encoder.c  |  30 +
 8 files changed, 1086 insertions(+), 9 deletions(-)
 create mode 100644 m4/c_attribute.m4
 create mode 100644 src/libFLAC/lpc_intrin_vsx.c

-- 
2.17.1

___
flac-dev mailing list
flac-dev@xiph.org
http://lists.xiph.org/mailman/listinfo/flac-dev