Hello community, here is the log from the commit of package libjpeg-turbo for openSUSE:Factory checked in at 2016-09-25 14:28:07 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/libjpeg-turbo (Old) and /work/SRC/openSUSE:Factory/.libjpeg-turbo.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "libjpeg-turbo" Changes: -------- --- /work/SRC/openSUSE:Factory/libjpeg-turbo/libjpeg-turbo.changes 2016-06-13 21:48:46.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.libjpeg-turbo.new/libjpeg-turbo.changes 2016-09-25 14:28:09.000000000 +0200 @@ -1,0 +2,34 @@ +Wed Sep 21 10:50:36 UTC 2016 - [email protected] + +- Update to version 1.5.1 + + Fix for PowerPC platforms lacking AltiVec instructions + + Fix ABI problem with clang/llvm on aarch64. + + Fancy upsampling is now supported when decompressing JPEG + images that use 4:4:0 (h1v2) chroma subsampling. + + If merged upsampling isn't SIMD-accelerated but YCbCr-to-RGB + conversion is, then libjpeg-turbo will now disable merged + upsampling when decompressing YCbCr JPEG images into RGB + or extended RGB output images. This significantly speeds up + the decompression of 4:2:0 and 4:2:2 JPEGs on ARM platforms + if fancy upsampling is not used + (for example, if the -nosmooth option to djpeg is specified.) + + The TurboJPEG API will now decompress 4:2:2 and 4:4:0 JPEG + images with 2x2 luminance sampling factors and 2x1 or 1x2 + chrominance sampling factors. + + Fixed an unsigned integer overflow in the libjpeg memory manager. + + Fixed additional negative left shifts and other issues reported + by the GCC and Clang undefined behavior sanitizers when + attempting to decompress specially-crafted malformed JPEG + images. None of these issues posed a security threat, but + removing the warnings makes it easier to detect actual + security issues, should they arise in the future. + + Fixed an out-of-bounds array reference, introduced by + 1.4.902 and detected by the Clang undefined behavior sanitizer, + that could be triggered by a specially-crafted malformed + JPEG image with more than four components. Because the + out-of-bounds reference was still within the same structure, + it was not known to pose a security threat, but removing + the warning makes it easier to detect actual security issues, + should they arise in the future. + +------------------------------------------------------------------- libjpeg62-turbo.changes: same change Old: ---- libjpeg-turbo-1.5.0.tar.gz New: ---- libjpeg-turbo-1.5.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ libjpeg-turbo.spec ++++++ --- /var/tmp/diff_new_pack.eETy2o/_old 2016-09-25 14:28:10.000000000 +0200 +++ /var/tmp/diff_new_pack.eETy2o/_new 2016-09-25 14:28:10.000000000 +0200 @@ -16,7 +16,7 @@ # -%define srcver 1.5.0 +%define srcver 1.5.1 %define major 8 %define minor 1 %define micro 2 ++++++ libjpeg62-turbo.spec ++++++ --- /var/tmp/diff_new_pack.eETy2o/_old 2016-09-25 14:28:10.000000000 +0200 +++ /var/tmp/diff_new_pack.eETy2o/_new 2016-09-25 14:28:10.000000000 +0200 @@ -19,7 +19,7 @@ %define major 62 %define minor 2 %define micro 0 -%define srcver 1.5.0 +%define srcver 1.5.1 %define libver %{major}.%{minor}.%{micro} Name: libjpeg62-turbo ++++++ libjpeg-turbo-1.5.0.tar.gz -> libjpeg-turbo-1.5.1.tar.gz ++++++ ++++ 1739 lines of diff (skipped) ++++ retrying with extended exclude list diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/BUILDING.md new/libjpeg-turbo-1.5.1/BUILDING.md --- old/libjpeg-turbo-1.5.0/BUILDING.md 2016-06-07 19:33:39.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/BUILDING.md 2016-09-21 01:36:59.000000000 +0200 @@ -323,11 +323,6 @@ IOS_SYSROOT=$IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk IOS_GCC=$IOS_PLATFORMDIR/Developer/usr/bin/arm-apple-darwin10-llvm-gcc-4.2 - *ARMv6 (code will run on all iOS devices, not SIMD-accelerated)* - [NOTE: Requires Xcode 4.4.x or earlier] - - IOS_CFLAGS="-march=armv6 -mcpu=arm1176jzf-s -mfpu=vfp" - *ARMv7 (code will run on iPhone 3GS-4S/iPad 1st-3rd Generation and newer)* IOS_CFLAGS="-march=armv7 -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon" @@ -399,8 +394,8 @@ above in order to support older versions of iOS than the default version supported by the SDK. -Once built, lipo can be used to combine the ARMv6, v7, v7s, and/or v8 variants -into a universal library. +Once built, lipo can be used to combine the ARMv7, v7s, and/or v8 variants into +a universal library. ### Building libjpeg-turbo for Android @@ -782,7 +777,6 @@ make command line as shown above. make iosdmg [BUILDDIR32={32-bit build directory}] \ - [BUILDDIRARMV6={ARMv6 build directory}] \ [BUILDDIRARMV7={ARMv7 build directory}] \ [BUILDDIRARMV7S={ARMv7s build directory}] \ [BUILDDIRARMV8={ARMv8 build directory}] @@ -791,19 +785,17 @@ libjpeg-turbo static libraries contain ARM architectures necessary to build iOS applications. If building on an x86-64 system, the binaries will also contain the i386 architecture, as with `make udmg` above. You should first -configure ARMv6, ARMv7, ARMv7s, and/or ARMv8 out-of-tree builds of -libjpeg-turbo (see "Building libjpeg-turbo for iOS" above.) If you are -building an x86-64 version of libjpeg-turbo, you should configure a 32-bit -out-of-tree build as well. Next, build libjpeg-turbo as you would normally, -using an out-of-tree build. When it is built, run `make iosdmg` from the -build directory. The build system will look for the ARMv6 build under -*{source_directory}*/iosarmv6 by default, the ARMv7 build under -*{source_directory}*/iosarmv7 by default, the ARMv7s build under -*{source_directory}*/iosarmv7s by default, the ARMv8 build under -*{source_directory}*/iosarmv8 by default, and (if applicable) the 32-bit build -under *{source_directory}*/osxx86 by default, but you can override this by -setting the `BUILDDIR32`, `BUILDDIRARMV6`, `BUILDDIRARMV7`, `BUILDDIRARMV7S`, -and/or `BUILDDIRARMV8` variables on the `make` command line as shown above. +configure ARMv7, ARMv7s, and/or ARMv8 out-of-tree builds of libjpeg-turbo (see +"Building libjpeg-turbo for iOS" above.) If you are building an x86-64 version +of libjpeg-turbo, you should configure a 32-bit out-of-tree build as well. +Next, build libjpeg-turbo as you would normally, using an out-of-tree build. +When it is built, run `make iosdmg` from the build directory. The build system +will look for the ARMv7 build under *{source_directory}*/iosarmv7 by default, +the ARMv7s build under *{source_directory}*/iosarmv7s by default, the ARMv8 +build under *{source_directory}*/iosarmv8 by default, and (if applicable) the +32-bit build under *{source_directory}*/osxx86 by default, but you can override +this by setting the `BUILDDIR32`, `BUILDDIRARMV7`, `BUILDDIRARMV7S`, and/or +`BUILDDIRARMV8` variables on the `make` command line as shown above. NOTE: If including an ARMv8 build in the package, then you may need to use Xcode's version of lipo instead of the operating system's. To do this, pass diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/CMakeLists.txt new/libjpeg-turbo-1.5.1/CMakeLists.txt --- old/libjpeg-turbo-1.5.0/CMakeLists.txt 2016-06-07 19:33:39.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/CMakeLists.txt 2016-09-21 01:36:59.000000000 +0200 @@ -9,7 +9,7 @@ endif() project(libjpeg-turbo C) -set(VERSION 1.5.0) +set(VERSION 1.5.1) string(REPLACE "." ";" VERSION_TRIPLET ${VERSION}) list(GET VERSION_TRIPLET 0 VERSION_MAJOR) list(GET VERSION_TRIPLET 1 VERSION_MINOR) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/ChangeLog.md new/libjpeg-turbo-1.5.1/ChangeLog.md --- old/libjpeg-turbo-1.5.0/ChangeLog.md 2016-06-07 19:33:39.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/ChangeLog.md 2016-09-21 01:36:59.000000000 +0200 @@ -1,3 +1,94 @@ +1.5.1 +===== + +### Significant changes relative to 1.5.0: + +1. Previously, the undocumented `JSIMD_FORCE*` environment variables could be +used to force-enable a particular SIMD instruction set if multiple instruction +sets were available on a particular platform. On x86 platforms, where CPU +feature detection is bulletproof and multiple SIMD instruction sets are +available, it makes sense for those environment variables to allow forcing the +use of an instruction set only if that instruction set is available. However, +since the ARM implementations of libjpeg-turbo can only use one SIMD +instruction set, and since their feature detection code is less bulletproof +(parsing /proc/cpuinfo), it makes sense for the `JSIMD_FORCENEON` environment +variable to bypass the feature detection code and really force the use of NEON +instructions. A new environment variable (`JSIMD_FORCEDSPR2`) was introduced +in the MIPS implementation for the same reasons, and the existing +`JSIMD_FORCENONE` environment variable was extended to that implementation. +These environment variables provide a workaround for those attempting to test +ARM and MIPS builds of libjpeg-turbo in QEMU, which passes through +/proc/cpuinfo from the host system. + +2. libjpeg-turbo previously assumed that AltiVec instructions were always +available on PowerPC platforms, which led to "illegal instruction" errors when +running on PowerPC chips that lack AltiVec support (such as the older 7xx/G3 +and newer e5500 series.) libjpeg-turbo now examines /proc/cpuinfo on +Linux/Android systems and enables AltiVec instructions only if the CPU supports +them. It also now provides two environment variables, `JSIMD_FORCEALTIVEC` and +`JSIMD_FORCENONE`, to force-enable and force-disable AltiVec instructions in +environments where /proc/cpuinfo is an unreliable means of CPU feature +detection (such as when running in QEMU.) On OS X, libjpeg-turbo continues to +assume that AltiVec support is always available, which means that libjpeg-turbo +cannot be used with G3 Macs unless you set the environment variable +`JSIMD_FORCENONE` to `1`. + +3. Fixed an issue whereby 64-bit ARM (AArch64) builds of libjpeg-turbo would +crash when built with recent releases of the Clang/LLVM compiler. This was +caused by an ABI conformance issue in some of libjpeg-turbo's 64-bit NEON SIMD +routines. Those routines were incorrectly using 64-bit instructions to +transfer a 32-bit JDIMENSION argument, whereas the ABI allows the upper +(unused) 32 bits of a 32-bit argument's register to be undefined. The new +Clang/LLVM optimizer uses load combining to transfer multiple adjacent 32-bit +structure members into a single 64-bit register, and this exposed the ABI +conformance issue. + +4. Fancy upsampling is now supported when decompressing JPEG images that use +4:4:0 (h1v2) chroma subsampling. These images are generated when losslessly +rotating or transposing JPEG images that use 4:2:2 (h2v1) chroma subsampling. +The h1v2 fancy upsampling algorithm is not currently SIMD-accelerated. + +5. If merged upsampling isn't SIMD-accelerated but YCbCr-to-RGB conversion is, +then libjpeg-turbo will now disable merged upsampling when decompressing YCbCr +JPEG images into RGB or extended RGB output images. This significantly speeds +up the decompression of 4:2:0 and 4:2:2 JPEGs on ARM platforms if fancy +upsampling is not used (for example, if the `-nosmooth` option to djpeg is +specified.) + +6. The TurboJPEG API will now decompress 4:2:2 and 4:4:0 JPEG images with +2x2 luminance sampling factors and 2x1 or 1x2 chrominance sampling factors. +This is a non-standard way of specifying 2x subsampling (normally 4:2:2 JPEGs +have 2x1 luminance and 1x1 chrominance sampling factors, and 4:4:0 JPEGs have +1x2 luminance and 1x1 chrominance sampling factors), but the JPEG specification +and the libjpeg API both allow it. + +7. Fixed an unsigned integer overflow in the libjpeg memory manager, detected +by the Clang undefined behavior sanitizer, that could be triggered by +attempting to decompress a specially-crafted malformed JPEG image. This issue +affected only 32-bit code and did not pose a security threat, but removing the +warning makes it easier to detect actual security issues, should they arise in +the future. + +8. Fixed additional negative left shifts and other issues reported by the GCC +and Clang undefined behavior sanitizers when attempting to decompress +specially-crafted malformed JPEG images. None of these issues posed a security +threat, but removing the warnings makes it easier to detect actual security +issues, should they arise in the future. + +9. Fixed an out-of-bounds array reference, introduced by 1.4.90[2] (partial +image decompression) and detected by the Clang undefined behavior sanitizer, +that could be triggered by a specially-crafted malformed JPEG image with more +than four components. Because the out-of-bounds reference was still within the +same structure, it was not known to pose a security threat, but removing the +warning makes it easier to detect actual security issues, should they arise in +the future. + +10. Fixed another ABI conformance issue in the 64-bit ARM (AArch64) NEON SIMD +code. Some of the routines were incorrectly reading and storing data below the +stack pointer, which caused segfaults in certain applications under specific +circumstances. + + 1.5.0 ===== diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/Makefile.am new/libjpeg-turbo-1.5.1/Makefile.am --- old/libjpeg-turbo-1.5.0/Makefile.am 2016-06-07 19:33:39.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/Makefile.am 2016-09-21 01:36:59.000000000 +0200 @@ -11,7 +11,10 @@ nodist_include_HEADERS = jconfig.h pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = pkgscripts/libjpeg.pc pkgscripts/libturbojpeg.pc +pkgconfig_DATA = pkgscripts/libjpeg.pc +if WITH_TURBOJPEG +pkgconfig_DATA += pkgscripts/libturbojpeg.pc +endif HDRS = jchuff.h jdct.h jdhuff.h jerror.h jinclude.h jmemsys.h jmorecfg.h \ jpegint.h jpeglib.h jversion.h jsimd.h jsimddct.h jpegcomp.h \ @@ -757,12 +760,12 @@ sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall - sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" + sh pkgscripts/makemacpkg -build32 ${BUILDDIR32} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" else iosdmg: all pkgscripts/makemacpkg pkgscripts/uninstall - sh pkgscripts/makemacpkg -buildarmv6 ${BUILDDIRARMV6} -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" + sh pkgscripts/makemacpkg -buildarmv7 ${BUILDDIRARMV7} -buildarmv7s ${BUILDDIRARMV7S} -buildarmv8 ${BUILDDIRARMV8} -lipo "${LIPO}" endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/bmp.c new/libjpeg-turbo-1.5.1/bmp.c --- old/libjpeg-turbo-1.5.0/bmp.c 2016-06-07 19:33:39.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/bmp.c 2016-09-21 01:36:59.000000000 +0200 @@ -108,10 +108,14 @@ m=(m-k)/(1.0-k); y=(y-k)/(1.0-k); } - if(c>1.0) c=1.0; if(c<0.) c=0.; - if(m>1.0) m=1.0; if(m<0.) m=0.; - if(y>1.0) y=1.0; if(y<0.) y=0.; - if(k>1.0) k=1.0; if(k<0.) k=0.; + if(c>1.0) c=1.0; + if(c<0.) c=0.; + if(m>1.0) m=1.0; + if(m<0.) m=0.; + if(y>1.0) y=1.0; + if(y<0.) y=0.; + if(k>1.0) k=1.0; + if(k<0.) k=0.; *dstcolptr++=(unsigned char)(255.0-c*255.0+0.5); *dstcolptr++=(unsigned char)(255.0-m*255.0+0.5); *dstcolptr++=(unsigned char)(255.0-y*255.0+0.5); @@ -133,9 +137,12 @@ double r=c*k/255.; double g=m*k/255.; double b=y*k/255.; - if(r>255.0) r=255.0; if(r<0.) r=0.; - if(g>255.0) g=255.0; if(g<0.) g=0.; - if(b>255.0) b=255.0; if(b<0.) b=0.; + if(r>255.0) r=255.0; + if(r<0.) r=0.; + if(g>255.0) g=255.0; + if(g<0.) g=0.; + if(b>255.0) b=255.0; + if(b<0.) b=0.; dstcolptr[tjRedOffset[dstpf]]=(unsigned char)(r+0.5); dstcolptr[tjGreenOffset[dstpf]]=(unsigned char)(g+0.5); dstcolptr[tjBlueOffset[dstpf]]=(unsigned char)(b+0.5); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/configure.ac new/libjpeg-turbo-1.5.1/configure.ac --- old/libjpeg-turbo-1.5.0/configure.ac 2016-06-07 19:33:39.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/configure.ac 2016-09-21 01:36:59.000000000 +0200 @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.56]) -AC_INIT([libjpeg-turbo], [1.5.0]) +AC_INIT([libjpeg-turbo], [1.5.1]) AM_INIT_AUTOMAKE([-Wall foreign dist-bzip2]) AC_PREFIX_DEFAULT(/opt/libjpeg-turbo) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/jdarith.c new/libjpeg-turbo-1.5.1/jdarith.c --- old/libjpeg-turbo-1.5.0/jdarith.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/jdarith.c 2016-09-21 01:36:59.000000000 +0200 @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Developed 1997-2015 by Guido Vollbeding. * libjpeg-turbo Modifications: - * Copyright (C) 2015, D. R. Commander. + * Copyright (C) 2015-2016, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -382,7 +382,7 @@ if (arith_decode(cinfo, st)) v |= m; v += 1; if (sign) v = -v; /* Scale and output coefficient in natural (dezigzagged) order */ - (*block)[jpeg_natural_order[k]] = (JCOEF) (v << cinfo->Al); + (*block)[jpeg_natural_order[k]] = (JCOEF) ((unsigned)v << cinfo->Al); } return TRUE; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/jdhuff.c new/libjpeg-turbo-1.5.1/jdhuff.c --- old/libjpeg-turbo-1.5.0/jdhuff.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/jdhuff.c 2016-09-21 01:36:59.000000000 +0200 @@ -109,9 +109,9 @@ actbl = compptr->ac_tbl_no; /* Compute derived values for Huffman tables */ /* We may do this more than once for a table, but it's not expensive */ - pdtbl = entropy->dc_derived_tbls + dctbl; + pdtbl = (d_derived_tbl **)(entropy->dc_derived_tbls) + dctbl; jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, pdtbl); - pdtbl = entropy->ac_derived_tbls + actbl; + pdtbl = (d_derived_tbl **)(entropy->ac_derived_tbls) + actbl; jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, pdtbl); /* Initialize DC predictions to 0 */ entropy->saved.last_dc_val[ci] = 0; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/jdmaster.c new/libjpeg-turbo-1.5.1/jdmaster.c --- old/libjpeg-turbo-1.5.0/jdmaster.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/jdmaster.c 2016-09-21 01:36:59.000000000 +0200 @@ -22,6 +22,7 @@ #include "jpeglib.h" #include "jpegcomp.h" #include "jdmaster.h" +#include "jsimd.h" /* @@ -69,6 +70,17 @@ cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size || cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size) return FALSE; +#ifdef WITH_SIMD + /* If YCbCr-to-RGB color conversion is SIMD-accelerated but merged upsampling + isn't, then disabling merged upsampling is likely to be faster when + decompressing YCbCr JPEG images. */ + if (!jsimd_can_h2v2_merged_upsample() && !jsimd_can_h2v1_merged_upsample() && + jsimd_can_ycc_rgb() && cinfo->jpeg_color_space == JCS_YCbCr && + (cinfo->out_color_space == JCS_RGB || + (cinfo->out_color_space >= JCS_EXT_RGB && + cinfo->out_color_space <= JCS_EXT_ARGB))) + return FALSE; +#endif /* ??? also need to test for upsample-time rescaling, when & if supported */ return TRUE; /* by golly, it'll work... */ #else diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/jdphuff.c new/libjpeg-turbo-1.5.1/jdphuff.c --- old/libjpeg-turbo-1.5.0/jdphuff.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/jdphuff.c 2016-09-21 01:36:59.000000000 +0200 @@ -4,7 +4,7 @@ * This file was part of the Independent JPEG Group's software: * Copyright (C) 1995-1997, Thomas G. Lane. * libjpeg-turbo Modifications: - * Copyright (C) 2015, D. R. Commander. + * Copyright (C) 2015-2016, D. R. Commander. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -170,12 +170,12 @@ if (is_DC_band) { if (cinfo->Ah == 0) { /* DC refinement needs no table */ tbl = compptr->dc_tbl_no; - pdtbl = entropy->derived_tbls + tbl; + pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl; jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl); } } else { tbl = compptr->ac_tbl_no; - pdtbl = entropy->derived_tbls + tbl; + pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl; jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl); /* remember the single active table */ entropy->ac_derived_tbl = entropy->derived_tbls[tbl]; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/jdsample.c new/libjpeg-turbo-1.5.1/jdsample.c --- old/libjpeg-turbo-1.5.0/jdsample.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/jdsample.c 2016-09-21 01:36:59.000000000 +0200 @@ -304,6 +304,48 @@ /* + * Fancy processing for 1:1 horizontal and 2:1 vertical (4:4:0 subsampling). + * + * This is a less common case, but it can be encountered when losslessly + * rotating/transposing a JPEG file that uses 4:2:2 chroma subsampling. + */ + +METHODDEF(void) +h1v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info *compptr, + JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) +{ + JSAMPARRAY output_data = *output_data_ptr; + JSAMPROW inptr0, inptr1, outptr; +#if BITS_IN_JSAMPLE == 8 + int thiscolsum; +#else + JLONG thiscolsum; +#endif + JDIMENSION colctr; + int inrow, outrow, v; + + inrow = outrow = 0; + while (outrow < cinfo->max_v_samp_factor) { + for (v = 0; v < 2; v++) { + /* inptr0 points to nearest input row, inptr1 points to next nearest */ + inptr0 = input_data[inrow]; + if (v == 0) /* next nearest is row above */ + inptr1 = input_data[inrow-1]; + else /* next nearest is row below */ + inptr1 = input_data[inrow+1]; + outptr = output_data[outrow++]; + + for(colctr = 0; colctr < compptr->downsampled_width; colctr++) { + thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); + *outptr++ = (JSAMPLE) ((thiscolsum + 1) >> 2); + } + } + inrow++; + } +} + + +/* * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. * Again a triangle filter; see comments for h2v1 case, above. * @@ -431,6 +473,11 @@ else upsample->methods[ci] = h2v1_upsample; } + } else if (h_in_group == h_out_group && + v_in_group * 2 == v_out_group && do_fancy) { + /* Non-fancy upsampling is handled by the generic method */ + upsample->methods[ci] = h1v2_fancy_upsample; + upsample->pub.need_context_rows = TRUE; } else if (h_in_group * 2 == h_out_group && v_in_group * 2 == v_out_group) { /* Special cases for 2h2v upsampling */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/jmemmgr.c new/libjpeg-turbo-1.5.1/jmemmgr.c --- old/libjpeg-turbo-1.5.0/jmemmgr.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/jmemmgr.c 2016-09-21 01:36:59.000000000 +0200 @@ -32,6 +32,7 @@ #include "jinclude.h" #include "jpeglib.h" #include "jmemsys.h" /* import the system-dependent declarations */ +#include <stdint.h> #ifndef NO_GETENV #ifndef HAVE_STDLIB_H /* <stdlib.h> should declare getenv() */ @@ -650,18 +651,26 @@ maximum_space = 0; for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) { if (sptr->mem_buffer == NULL) { /* if not realized yet */ + size_t new_space = (long) sptr->rows_in_array * + (long) sptr->samplesperrow * sizeof(JSAMPLE); + space_per_minheight += (long) sptr->maxaccess * (long) sptr->samplesperrow * sizeof(JSAMPLE); - maximum_space += (long) sptr->rows_in_array * - (long) sptr->samplesperrow * sizeof(JSAMPLE); + if (SIZE_MAX - maximum_space < new_space) + out_of_memory(cinfo, 10); + maximum_space += new_space; } } for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) { if (bptr->mem_buffer == NULL) { /* if not realized yet */ + size_t new_space = (long) bptr->rows_in_array * + (long) bptr->blocksperrow * sizeof(JBLOCK); + space_per_minheight += (long) bptr->maxaccess * (long) bptr->blocksperrow * sizeof(JBLOCK); - maximum_space += (long) bptr->rows_in_array * - (long) bptr->blocksperrow * sizeof(JBLOCK); + if (SIZE_MAX - maximum_space < new_space) + out_of_memory(cinfo, 11); + maximum_space += new_space; } } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/jpegint.h new/libjpeg-turbo-1.5.1/jpegint.h --- old/libjpeg-turbo-1.5.0/jpegint.h 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/jpegint.h 2016-09-21 01:36:59.000000000 +0200 @@ -155,8 +155,8 @@ /* Partial decompression variables */ JDIMENSION first_iMCU_col; JDIMENSION last_iMCU_col; - JDIMENSION first_MCU_col[MAX_COMPS_IN_SCAN]; - JDIMENSION last_MCU_col[MAX_COMPS_IN_SCAN]; + JDIMENSION first_MCU_col[MAX_COMPONENTS]; + JDIMENSION last_MCU_col[MAX_COMPONENTS]; boolean jinit_upsampler_no_alloc; }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/simd/Makefile.am new/libjpeg-turbo-1.5.1/simd/Makefile.am --- old/libjpeg-turbo-1.5.0/simd/Makefile.am 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/simd/Makefile.am 2016-09-21 01:36:59.000000000 +0200 @@ -73,19 +73,24 @@ if SIMD_POWERPC -libsimd_la_SOURCES = jsimd_powerpc.c jsimd_altivec.h jcsample.h \ +noinst_LTLIBRARIES += libsimd_altivec.la + +libsimd_altivec_la_SOURCES = \ jccolor-altivec.c jcgray-altivec.c jcsample-altivec.c \ jdcolor-altivec.c jdmerge-altivec.c jdsample-altivec.c \ jfdctfst-altivec.c jfdctint-altivec.c \ jidctfst-altivec.c jidctint-altivec.c \ jquanti-altivec.c -libsimd_la_CFLAGS = -maltivec +libsimd_altivec_la_CFLAGS = -maltivec jccolor-altivec.lo: jccolext-altivec.c jcgray-altivec.lo: jcgryext-altivec.c jdcolor-altivec.lo: jdcolext-altivec.c jdmerge-altivec.lo: jdmrgext-altivec.c +libsimd_la_SOURCES = jsimd_powerpc.c jsimd_altivec.h jcsample.h +libsimd_la_LIBADD = libsimd_altivec.la + endif AM_CPPFLAGS = -I$(top_srcdir) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/simd/jsimd_arm.c new/libjpeg-turbo-1.5.1/simd/jsimd_arm.c --- old/libjpeg-turbo-1.5.0/simd/jsimd_arm.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/simd/jsimd_arm.c 2016-09-21 01:36:59.000000000 +0200 @@ -125,7 +125,7 @@ /* Force different settings through environment variables */ env = getenv("JSIMD_FORCENEON"); if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support &= JSIMD_ARM_NEON; + simd_support = JSIMD_ARM_NEON; env = getenv("JSIMD_FORCENONE"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support = 0; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/simd/jsimd_arm64.c new/libjpeg-turbo-1.5.1/simd/jsimd_arm64.c --- old/libjpeg-turbo-1.5.0/simd/jsimd_arm64.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/simd/jsimd_arm64.c 2016-09-21 01:36:59.000000000 +0200 @@ -142,7 +142,7 @@ /* Force different settings through environment variables */ env = getenv("JSIMD_FORCENEON"); if ((env != NULL) && (strcmp(env, "1") == 0)) - simd_support &= JSIMD_ARM_NEON; + simd_support = JSIMD_ARM_NEON; env = getenv("JSIMD_FORCENONE"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support = 0; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/simd/jsimd_arm64_neon.S new/libjpeg-turbo-1.5.1/simd/jsimd_arm64_neon.S --- old/libjpeg-turbo-1.5.0/simd/jsimd_arm64_neon.S 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/simd/jsimd_arm64_neon.S 2016-09-21 01:36:59.000000000 +0200 @@ -210,10 +210,16 @@ TMP7 .req x13 TMP8 .req x14 + /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x3 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x3, w3 + sub sp, sp, #64 adr x15, Ljsimd_idct_islow_neon_consts - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], #32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], #32 + mov x10, sp + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x10], #32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x10], #32 ld1 {v0.8h, v1.8h}, [x15] ld1 {v2.8h, v3.8h, v4.8h, v5.8h}, [COEF_BLOCK], #64 ld1 {v18.8h, v19.8h, v20.8h, v21.8h}, [DCT_TABLE], #64 @@ -238,7 +244,6 @@ shl v10.8h, v2.8h, #(PASS1_BITS) sqxtn v16.8b, v15.8h mov TMP1, v16.d[0] - sub sp, sp, #64 mvn TMP2, TMP1 cbnz TMP2, 2f @@ -807,6 +812,11 @@ TMP7 .req x13 TMP8 .req x14 + /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x3 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x3, w3 + /* Load and dequantize coefficients into NEON registers * with the following allocation: * 0 1 2 3 | 4 5 6 7 @@ -1101,19 +1111,18 @@ TMP3 .req x2 TMP4 .req x15 + /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x3 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x3, w3 + /* Save all used NEON registers */ - sub sp, sp, 272 - str x15, [sp], 16 + sub sp, sp, 64 + mov x9, sp /* Load constants (v3.4h is just used for padding) */ adr TMP4, Ljsimd_idct_4x4_neon_consts - st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 - st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 - st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 - st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 - st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32 ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4] /* Load all COEF_BLOCK into NEON registers with the following allocation: @@ -1222,16 +1231,8 @@ #endif /* vpop {v8.4h - v15.4h} ;not available */ - sub sp, sp, #272 - ldr x15, [sp], 16 - ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 - ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 - ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 - ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 - ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 blr x30 .unreq DCT_TABLE @@ -1299,19 +1300,19 @@ TMP1 .req x0 TMP2 .req x15 + /* OUTPUT_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x3 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x3, w3 + /* vpush {v8.4h - v15.4h} ; not available */ - sub sp, sp, 208 - str x15, [sp], 16 + sub sp, sp, 64 + mov x9, sp /* Load constants */ adr TMP2, Ljsimd_idct_2x2_neon_consts - st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 - st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 - st1 {v21.8b, v22.8b}, [sp], 16 - st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 - st1 {v30.8b, v31.8b}, [sp], 16 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32 ld1 {v14.4h}, [TMP2] /* Load all COEF_BLOCK into NEON registers with the following allocation: @@ -1411,15 +1412,8 @@ st1 {v26.b}[1], [TMP2], 1 st1 {v27.b}[5], [TMP2], 1 - sub sp, sp, #208 - ldr x15, [sp], 16 - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 - ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 - ld1 {v21.8b, v22.8b}, [sp], 16 - ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 - ld1 {v30.8b, v31.8b}, [sp], 16 blr x30 .unreq DCT_TABLE @@ -1688,24 +1682,24 @@ .else asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3 .endif - OUTPUT_WIDTH .req x0 + OUTPUT_WIDTH .req w0 INPUT_BUF .req x1 - INPUT_ROW .req x2 + INPUT_ROW .req w2 OUTPUT_BUF .req x3 - NUM_ROWS .req x4 + NUM_ROWS .req w4 INPUT_BUF0 .req x5 INPUT_BUF1 .req x6 INPUT_BUF2 .req x1 RGB .req x7 - Y .req x8 - U .req x9 - V .req x10 - N .req x15 + Y .req x9 + U .req x10 + V .req x11 + N .req w15 - sub sp, sp, 336 - str x15, [sp], 16 + sub sp, sp, 64 + mov x9, sp /* Load constants to d1, d2, d3 (v0.4h is just used for padding) */ .if \fast_st3 == 1 @@ -1715,23 +1709,11 @@ .endif /* Save NEON registers */ - st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 - st1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 - st1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 - st1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 - st1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 - st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32 ld1 {v0.4h, v1.4h}, [x15], 16 ld1 {v2.8h}, [x15] - /* Save ARM registers and handle input arguments */ - /* push {x4, x5, x6, x7, x8, x9, x10, x30} */ - stp x4, x5, [sp], 16 - stp x6, x7, [sp], 16 - stp x8, x9, [sp], 16 - stp x10, x30, [sp], 16 ldr INPUT_BUF0, [INPUT_BUF] ldr INPUT_BUF1, [INPUT_BUF, #8] ldr INPUT_BUF2, [INPUT_BUF, #16] @@ -1745,11 +1727,10 @@ cmp NUM_ROWS, #1 b.lt 9f 0: - lsl x16, INPUT_ROW, #3 - ldr Y, [INPUT_BUF0, x16] - ldr U, [INPUT_BUF1, x16] + ldr Y, [INPUT_BUF0, INPUT_ROW, uxtw #3] + ldr U, [INPUT_BUF1, INPUT_ROW, uxtw #3] mov N, OUTPUT_WIDTH - ldr V, [INPUT_BUF2, x16] + ldr V, [INPUT_BUF2, INPUT_ROW, uxtw #3] add INPUT_ROW, INPUT_ROW, #1 ldr RGB, [OUTPUT_BUF], #8 @@ -1799,21 +1780,8 @@ b.gt 0b 9: /* Restore all registers and return */ - sub sp, sp, #336 - ldr x15, [sp], 16 - ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [sp], 32 - ld1 {v4.8b, v5.8b, v6.8b, v7.8b}, [sp], 32 ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 - ld1 {v16.8b, v17.8b, v18.8b, v19.8b}, [sp], 32 - ld1 {v20.8b, v21.8b, v22.8b, v23.8b}, [sp], 32 - ld1 {v24.8b, v25.8b, v26.8b, v27.8b}, [sp], 32 - ld1 {v28.8b, v29.8b, v30.8b, v31.8b}, [sp], 32 - /* pop {r4, r5, r6, r7, r8, r9, r10, pc} */ - ldp x4, x5, [sp], 16 - ldp x6, x7, [sp], 16 - ldp x8, x9, [sp], 16 - ldp x10, x30, [sp], 16 br x30 .unreq OUTPUT_WIDTH .unreq INPUT_ROW @@ -2054,8 +2022,8 @@ OUTPUT_WIDTH .req w0 INPUT_BUF .req x1 OUTPUT_BUF .req x2 - OUTPUT_ROW .req x3 - NUM_ROWS .req x4 + OUTPUT_ROW .req w3 + NUM_ROWS .req w4 OUTPUT_BUF0 .req x5 OUTPUT_BUF1 .req x6 @@ -2082,17 +2050,18 @@ /* Save NEON registers */ sub sp, sp, #64 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + mov x9, sp + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32 /* Outer loop over scanlines */ cmp NUM_ROWS, #1 b.lt 9f 0: - ldr Y, [OUTPUT_BUF0, OUTPUT_ROW, lsl #3] - ldr U, [OUTPUT_BUF1, OUTPUT_ROW, lsl #3] + ldr Y, [OUTPUT_BUF0, OUTPUT_ROW, uxtw #3] + ldr U, [OUTPUT_BUF1, OUTPUT_ROW, uxtw #3] mov N, OUTPUT_WIDTH - ldr V, [OUTPUT_BUF2, OUTPUT_ROW, lsl #3] + ldr V, [OUTPUT_BUF2, OUTPUT_ROW, uxtw #3] add OUTPUT_ROW, OUTPUT_ROW, #1 ldr RGB, [INPUT_BUF], #8 @@ -2136,7 +2105,6 @@ b.gt 0b 9: /* Restore all registers and return */ - sub sp, sp, #64 ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 br x30 @@ -2199,6 +2167,11 @@ TMP8 .req x4 TMPDUP .req w3 + /* START_COL is a JDIMENSION (unsigned int) argument, so the ABI doesn't + guarantee that the upper (unused) 32 bits of x1 are valid. This + instruction ensures that those bits are set to zero. */ + uxtw x1, w1 + mov TMPDUP, #128 ldp TMP1, TMP2, [SAMPLE_DATA], 16 ldp TMP3, TMP4, [SAMPLE_DATA], 16 @@ -2335,8 +2308,9 @@ /* Save NEON registers */ sub sp, sp, #64 - st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 - st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 + mov x10, sp + st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x10], 32 + st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x10], 32 /* Load all DATA into NEON registers with the following allocation: * 0 1 2 3 | 4 5 6 7 @@ -2566,7 +2540,6 @@ st1 {v20.8h, v21.8h, v22.8h, v23.8h}, [DATA] /* Restore NEON registers */ - sub sp, sp, #64 ld1 {v8.8b, v9.8b, v10.8b, v11.8b}, [sp], 32 ld1 {v12.8b, v13.8b, v14.8b, v15.8b}, [sp], 32 @@ -3080,7 +3053,7 @@ sub sp, sp, 272 sub BUFFER, BUFFER, #0x1 /* BUFFER=buffer-- */ /* Save ARM registers */ - stp x19, x20, [sp], 16 + stp x19, x20, [sp] .if \fast_tbl == 1 adr x15, Ljsimd_huff_encode_one_block_neon_consts .else @@ -3294,7 +3267,7 @@ and v18.16b, v18.16b, v23.16b add x3, x4, #0x400 /* r1 = dctbl->ehufsi */ and v20.16b, v20.16b, v23.16b - add x15, sp, #0x80 /* x15 = t2 */ + add x15, sp, #0x90 /* x15 = t2 */ and v22.16b, v22.16b, v23.16b ldr w10, [x4, x12, lsl #2] addp v16.16b, v16.16b, v18.16b @@ -3317,7 +3290,7 @@ rbit x9, x9 /* x9 = index0 */ ldrb w14, [x4, #0xf0] /* x14 = actbl->ehufsi[0xf0] */ cmp w12, #(64-8) - mov x11, sp + add x11, sp, #16 b.lt 4f cbz x9, 6f st1 {v0.8h, v1.8h, v2.8h, v3.8h}, [x11], #64 @@ -3421,7 +3394,7 @@ put_bits x3, x11 cbnz x9, 1b 6: - add x13, sp, #0xfe + add x13, sp, #0x10e cmp x15, x13 b.hs 1f ldr w12, [x5] @@ -3429,7 +3402,6 @@ checkbuf47 put_bits x12, x14 1: - sub sp, sp, 16 str PUT_BUFFER, [x0, #0x10] str PUT_BITSw, [x0, #0x18] ldp x19, x20, [sp], 16 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/simd/jsimd_mips.c new/libjpeg-turbo-1.5.1/simd/jsimd_mips.c --- old/libjpeg-turbo-1.5.0/simd/jsimd_mips.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/simd/jsimd_mips.c 2016-09-21 01:36:59.000000000 +0200 @@ -2,7 +2,7 @@ * jsimd_mips.c * * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB - * Copyright (C) 2009-2011, 2014, D. R. Commander. + * Copyright (C) 2009-2011, 2014, 2016, D. R. Commander. * Copyright (C) 2013-2014, MIPS Technologies, Inc., California. * Copyright (C) 2015, Matthieu Darbois. * @@ -77,6 +77,14 @@ if (!parse_proc_cpuinfo("MIPS 74K")) return; #endif + + /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCEDSPR2"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = JSIMD_MIPS_DSPR2; + env = getenv("JSIMD_FORCENONE"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = 0; } static const int mips_idct_ifast_coefs[4] = { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/simd/jsimd_powerpc.c new/libjpeg-turbo-1.5.1/simd/jsimd_powerpc.c --- old/libjpeg-turbo-1.5.0/simd/jsimd_powerpc.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/simd/jsimd_powerpc.c 2016-09-21 01:36:59.000000000 +0200 @@ -2,7 +2,7 @@ * jsimd_powerpc.c * * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB - * Copyright (C) 2009-2011, 2014-2015, D. R. Commander. + * Copyright (C) 2009-2011, 2014-2016, D. R. Commander. * Copyright (C) 2015, Matthieu Darbois. * * Based on the x86 SIMD extension for IJG JPEG library, @@ -22,19 +22,106 @@ #include "../jsimddct.h" #include "jsimd.h" +#include <stdio.h> +#include <string.h> +#include <ctype.h> + static unsigned int simd_support = ~0; +#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + +#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) + +LOCAL(int) +check_feature (char *buffer, char *feature) +{ + char *p; + if (*feature == 0) + return 0; + if (strncmp(buffer, "cpu", 3) != 0) + return 0; + buffer += 3; + while (isspace(*buffer)) + buffer++; + + /* Check if 'feature' is present in the buffer as a separate word */ + while ((p = strstr(buffer, feature))) { + if (p > buffer && !isspace(*(p - 1))) { + buffer++; + continue; + } + p += strlen(feature); + if (*p != 0 && !isspace(*p)) { + buffer++; + continue; + } + return 1; + } + return 0; +} + +LOCAL(int) +parse_proc_cpuinfo (int bufsize) +{ + char *buffer = (char *)malloc(bufsize); + FILE *fd; + simd_support = 0; + + if (!buffer) + return 0; + + fd = fopen("/proc/cpuinfo", "r"); + if (fd) { + while (fgets(buffer, bufsize, fd)) { + if (!strchr(buffer, '\n') && !feof(fd)) { + /* "impossible" happened - insufficient size of the buffer! */ + fclose(fd); + free(buffer); + return 0; + } + if (check_feature(buffer, "altivec")) + simd_support |= JSIMD_ALTIVEC; + } + fclose(fd); + } + free(buffer); + return 1; +} + +#endif + +/* + * Check what SIMD accelerations are supported. + * + * FIXME: This code is racy under a multi-threaded environment. + */ LOCAL(void) init_simd (void) { char *env = NULL; +#if !defined(__ALTIVEC__) && (defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)) + int bufsize = 1024; /* an initial guess for the line buffer size limit */ +#endif if (simd_support != ~0U) return; - simd_support = JSIMD_ALTIVEC; + simd_support = 0; + +#if defined(__ALTIVEC__) || defined(__APPLE__) + simd_support |= JSIMD_ALTIVEC; +#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) + while (!parse_proc_cpuinfo(bufsize)) { + bufsize *= 2; + if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) + break; + } +#endif /* Force different settings through environment variables */ + env = getenv("JSIMD_FORCEALTIVEC"); + if ((env != NULL) && (strcmp(env, "1") == 0)) + simd_support = JSIMD_ALTIVEC; env = getenv("JSIMD_FORCENONE"); if ((env != NULL) && (strcmp(env, "1") == 0)) simd_support = 0; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/tjbench.c new/libjpeg-turbo-1.5.1/tjbench.c --- old/libjpeg-turbo-1.5.0/tjbench.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/tjbench.c 2016-09-21 01:36:59.000000000 +0200 @@ -248,7 +248,8 @@ int y=(int)((double)srcbuf[rindex]*0.299 + (double)srcbuf[gindex]*0.587 + (double)srcbuf[bindex]*0.114 + 0.5); - if(y>255) y=255; if(y<0) y=0; + if(y>255) y=255; + if(y<0) y=0; dstbuf[rindex]=abs(dstbuf[rindex]-y); dstbuf[gindex]=abs(dstbuf[gindex]-y); dstbuf[bindex]=abs(dstbuf[bindex]-y); @@ -300,7 +301,8 @@ for(tilew=dotile? 8:w, tileh=dotile? 8:h; ; tilew*=2, tileh*=2) { - if(tilew>w) tilew=w; if(tileh>h) tileh=h; + if(tilew>w) tilew=w; + if(tileh>h) tileh=h; ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh; if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *) @@ -447,7 +449,8 @@ for(i=0; i<ntilesw*ntilesh; i++) { - if(jpegbuf[i]) tjFree(jpegbuf[i]); jpegbuf[i]=NULL; + if(jpegbuf[i]) tjFree(jpegbuf[i]); + jpegbuf[i]=NULL; } free(jpegbuf); jpegbuf=NULL; free(jpegsize); jpegsize=NULL; @@ -465,7 +468,8 @@ { for(i=0; i<ntilesw*ntilesh; i++) { - if(jpegbuf[i]) tjFree(jpegbuf[i]); jpegbuf[i]=NULL; + if(jpegbuf[i]) tjFree(jpegbuf[i]); + jpegbuf[i]=NULL; } free(jpegbuf); jpegbuf=NULL; } @@ -532,7 +536,8 @@ for(tilew=dotile? 16:w, tileh=dotile? 16:h; ; tilew*=2, tileh*=2) { - if(tilew>w) tilew=w; if(tileh>h) tileh=h; + if(tilew>w) tilew=w; + if(tileh>h) tileh=h; ntilesw=(w+tilew-1)/tilew; ntilesh=(h+tileh-1)/tileh; if((jpegbuf=(unsigned char **)malloc(sizeof(unsigned char *) @@ -692,7 +697,8 @@ { for(i=0; i<ntilesw*ntilesh; i++) { - if(jpegbuf[i]) tjFree(jpegbuf[i]); jpegbuf[i]=NULL; + if(jpegbuf[i]) tjFree(jpegbuf[i]); + jpegbuf[i]=NULL; } free(jpegbuf); jpegbuf=NULL; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 --exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh old/libjpeg-turbo-1.5.0/turbojpeg.c new/libjpeg-turbo-1.5.1/turbojpeg.c --- old/libjpeg-turbo-1.5.0/turbojpeg.c 2016-06-07 19:33:40.000000000 +0200 +++ new/libjpeg-turbo-1.5.1/turbojpeg.c 2016-09-21 01:36:59.000000000 +0200 @@ -368,6 +368,29 @@ retval=i; break; } } + /* Handle 4:2:2 and 4:4:0 images whose sampling factors are specified + in non-standard ways. */ + if(dinfo->comp_info[0].h_samp_factor==2 && + dinfo->comp_info[0].v_samp_factor==2 && + (i==TJSAMP_422 || i==TJSAMP_440)) + { + int match=0; + for(k=1; k<dinfo->num_components; k++) + { + int href=tjMCUHeight[i]/8, vref=tjMCUWidth[i]/8; + if(dinfo->jpeg_color_space==JCS_YCCK && k==3) + { + href=vref=2; + } + if(dinfo->comp_info[k].h_samp_factor==href + && dinfo->comp_info[k].v_samp_factor==vref) + match++; + } + if(match==dinfo->num_components-1) + { + retval=i; break; + } + } } } return retval; @@ -570,7 +593,8 @@ if(setjmp(this->jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. */ - if(this) free(this); return NULL; + if(this) free(this); + return NULL; } jpeg_create_compress(&this->cinfo); @@ -1231,7 +1255,8 @@ if(setjmp(this->jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. */ - if(this) free(this); return NULL; + if(this) free(this); + return NULL; } jpeg_create_decompress(&this->dinfo);
