From 2b6e59d96819e18a1852a7ac7ab08b19163dfe75 Mon Sep 17 00:00:00 2001 From: Vadim Barkov Date: Sat, 14 Jan 2017 16:05:33 +0300 Subject: [PATCH] Added initial code for PowerPC VSX optimisation --- Makefile.am | 5 + configure.ac | 46 +++++++ png.h | 5 +- pngpriv.h | 30 ++++ pngrutil.c | 2 +- powerpc/filter_vsx_intrinsics.c | 233 ++++++++++++++++++++++++++++++++ powerpc/powerpc_init.c | 126 +++++++++++++++++ 7 files changed, 445 insertions(+), 2 deletions(-) create mode 100644 powerpc/filter_vsx_intrinsics.c create mode 100644 powerpc/powerpc_init.c diff --git a/Makefile.am b/Makefile.am index fb209edd2..fa8281695 100644 --- a/Makefile.am +++ b/Makefile.am @@ -102,6 +102,11 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c\ mips/filter_msa_intrinsics.c endif +if PNG_POWERPC_VSX +libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\ + powerpc/filter_vsx_intrinsics.c +endif + nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \ diff --git a/configure.ac b/configure.ac index c060c4603..4168eb268 100644 --- a/configure.ac +++ b/configure.ac @@ -391,6 +391,52 @@ AM_CONDITIONAL([PNG_MIPS_MSA], mipsel*|mips64el*) :;; esac]) +# PowerPC +# === +# +# PowerPC VSX (SIMD) support. + +AC_ARG_ENABLE([powerpc-vsx], +AS_HELP_STRING([[[--enable-powerpc-vsx]]], + [Enable POWERPC VSX optimizations: =no/off, check, api, yes/on:] + [no/off: disable the optimizations; check: use internal checking code] + [(deprecated and poorly supported); api: disable by default, enable by] + [a call to png_set_option; yes/on: turn on unconditionally.] + [If not specified: determined by the compiler.]), + [case "$enableval" in + no|off) + # disable the default enabling on __ppc64__ systems: + AC_DEFINE([PNG_POWERPC_VSX_OPT], [0], + [Disable POWERPC VSX optimizations]) + # Prevent inclusion of the platform specific files below: + enable_powerpc_vsx=no;; + check) + AC_DEFINE([PNG_POWERPC_VSX_CHECK_SUPPORTED], [], + [Check for POWERPC VSX support at run-time]);; + api) + AC_DEFINE([PNG_POWERPC_VSX_API_SUPPORTED], [], + [Turn on POWERPC VSX optimizations at run-time]);; + yes|on) + AC_DEFINE([PNG_POWERPC_VSX_OPT], [2], + [Enable POWERPC VSX optimizations]) + AC_MSG_WARN([--enable-powerpc-vsx: please specify 'check' or 'api', if] + [you want the optimizations unconditionally pass '-maltivec and -mabi=altivec'] + [to the compiler.]);; + *) + AC_MSG_ERROR([--enable-powerpc-vsx=${enable_powerpc_vsx}: invalid value]) + esac]) + +# Add PowerPC specific files to all builds where the host_cpu is powerpc('powerpc*') or +# where POWERPC optimizations were explicitly requested (this allows a fallback if a +# future host CPU does not match 'powerpc*') + +AM_CONDITIONAL([PNG_POWERPC_VSX], + [test "$enable_powerpc_vsx" != 'no' && + case "$host_cpu" in + powerpc*) :;; + esac]) + + AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]]) # Config files, substituting as above diff --git a/png.h b/png.h index cad9a825e..f62f44504 100644 --- a/png.h +++ b/png.h @@ -3225,7 +3225,10 @@ PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory, # define PNG_MIPS_MSA 6 /* HARDWARE: MIPS Msa SIMD instructions supported */ #endif #define PNG_IGNORE_ADLER32 8 -#define PNG_OPTION_NEXT 10 /* Next option - numbers must be even */ +#ifdef PNG_POWERPC_VSX_API_SUPPORTED +# define PNG_POWERPC_VSX 10 /* HARDWARE: PowerPC VSX SIMD instructions supported */ +#endif +#define PNG_OPTION_NEXT 12 /* Next option - numbers must be even */ /* Return values: NOTE: there are four values and 'off' is *not* zero */ #define PNG_OPTION_UNSET 0 /* Unset - defaults to off */ diff --git a/pngpriv.h b/pngpriv.h index 50ff68b1c..00981abd3 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -190,6 +190,14 @@ # endif #endif +#ifndef PNG_POWERPC_VSX_OPT +# if defined(__ppc64__) && defined(__ALTIVEC__) && defined(PNG_ALIGNED_MEMORY_SUPPORTED) +# define PNG_POWERPC_VSX_OPT 2 +# else +# define PNG_POWERPC_VSX_OPT 0 +# endif +#endif + #ifndef PNG_INTEL_SSE_OPT # ifdef PNG_INTEL_SSE /* Only check for SSE if the build configuration has been modified to @@ -246,6 +254,11 @@ # endif #endif /* PNG_MIPS_MSA_OPT > 0 */ +#if PNG_POWERPC_VSX_OPT > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx +# define PNG_POWERPC_VSX_IMPLEMENTATION 1 +#endif + /* Is this a build of a DLL where compilation of the object modules requires * different preprocessor settings to those required for a simple library? If @@ -1292,6 +1305,23 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); #endif +#if PNG_POWERPC_VSX_OPT > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info, + png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vsx,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + #if PNG_INTEL_SSE_IMPLEMENTATION > 0 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); diff --git a/pngrutil.c b/pngrutil.c index c34da5d4c..97f50b09f 100644 --- a/pngrutil.c +++ b/pngrutil.c @@ -3797,7 +3797,7 @@ png_read_filter_row_sub(png_row_infop row_info, png_bytep row, png_size_t i; png_size_t istop = row_info->rowbytes; unsigned int bpp = (row_info->pixel_depth + 7) >> 3; - png_bytep rp = row + bpp; + png_bytep rp = row + bpp; PNG_UNUSED(prev_row) diff --git a/powerpc/filter_vsx_intrinsics.c b/powerpc/filter_vsx_intrinsics.c new file mode 100644 index 000000000..8e4ef2930 --- /dev/null +++ b/powerpc/filter_vsx_intrinsics.c @@ -0,0 +1,233 @@ + +/* filter_vsx_intrinsics.c - PowerPC optimised filter functions + * + * Copyright (c) 2016 Glenn Randers-Pehrson + * Written by Vadim Barkov, 2017. + * Last changed in libpng 1.6.25 [September 1, 2016] + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ +#include +#include +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +/* This code requires -maltivec and -mabi=altivec on the command line: */ +#if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ + +/* libpng row pointers are not necessarily aligned to any particular boundary, + * however this code will only work with appropriate alignment. arm/arm_init.c + * checks for this (and will not compile unless it is done). This code uses + * variants of png_aligncast to avoid compiler warnings. + */ +#define png_ptr(type,pointer) png_aligncast(type *,pointer) +#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer) + +/*#include */ + +#if PNG_POWERPC_VSX_OPT > 0 + +void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_size_t i; + png_size_t istop = row_info->rowbytes; + png_bytep rp = row; + png_const_bytep pp = prev_row; + + for (i = 0; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); + rp++; + } + +} + +void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_size_t i; + png_size_t istop = row_info->rowbytes; + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + png_bytep rp = row + bpp; + + PNG_UNUSED(prev_row) + + for (i = bpp; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } +} + +void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_size_t i; + png_size_t istop = row_info->rowbytes; + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + png_bytep rp = row + bpp; + + PNG_UNUSED(prev_row) + + for (i = bpp; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } +} + +void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_size_t i; + png_bytep rp = row; + png_const_bytep pp = prev_row; + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + png_size_t istop = row_info->rowbytes - bpp; + + for (i = 0; i < bpp; i++) + { + *rp = (png_byte)(((int)(*rp) + + ((int)(*pp++) / 2 )) & 0xff); + + rp++; + } + + for (i = 0; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } +} + +void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_size_t i; + png_bytep rp = row; + png_const_bytep pp = prev_row; + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + png_size_t istop = row_info->rowbytes - bpp; + + for (i = 0; i < bpp; i++) + { + *rp = (png_byte)(((int)(*rp) + + ((int)(*pp++) / 2 )) & 0xff); + + rp++; + } + + for (i = 0; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } +} + +void png_read_filter_row_paeth4_vsx(png_row_infop row_info, + png_bytep row, + png_const_bytep prev_row) +{ + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + png_bytep rp_end = row + bpp; + + /* Process the first pixel in the row completely (this is the same as 'up' + * because there is only one candidate predictor for the first row). + */ + while (row < rp_end) + { + int a = *row + *prev_row++; + *row++ = (png_byte)a; + } + + /* Remainder */ + rp_end = rp_end + (row_info->rowbytes - bpp); + + while (row < rp_end) + { + int a, b, c, pa, pb, pc, p; + + c = *(prev_row - bpp); + a = *(row - bpp); + b = *prev_row++; + + p = b - c; + pc = a - c; + + #ifdef PNG_USE_ABS + pa = abs(p); + pb = abs(pc); + pc = abs(p + pc); + #else + pa = p < 0 ? -p : p; + pb = pc < 0 ? -pc : pc; + pc = (p + pc) < 0 ? -(p + pc) : p + pc; + #endif + + if (pb < pa) pa = pb, a = b; + if (pc < pa) a = c; + + a += *row; + *row++ = (png_byte)a; + } +} + +void png_read_filter_row_paeth3_vsx(png_row_infop row_info, + png_bytep row, + png_const_bytep prev_row) +{ + unsigned int bpp = (row_info->pixel_depth + 7) >> 3; + png_bytep rp_end = row + bpp; + + /* Process the first pixel in the row completely (this is the same as 'up' + * because there is only one candidate predictor for the first row). + */ + while (row < rp_end) + { + int a = *row + *prev_row++; + *row++ = (png_byte)a; + } + + /* Remainder */ + rp_end = rp_end + (row_info->rowbytes - bpp); + + while (row < rp_end) + { + int a, b, c, pa, pb, pc, p; + + c = *(prev_row - bpp); + a = *(row - bpp); + b = *prev_row++; + + p = b - c; + pc = a - c; + + #ifdef PNG_USE_ABS + pa = abs(p); + pb = abs(pc); + pc = abs(p + pc); + #else + pa = p < 0 ? -p : p; + pb = pc < 0 ? -pc : pc; + pc = (p + pc) < 0 ? -(p + pc) : p + pc; + #endif + + if (pb < pa) pa = pb, a = b; + if (pc < pa) a = c; + + a += *row; + *row++ = (png_byte)a; + } +} + +#endif /* PNG_POWERPC_VSX_OPT > 0 */ +#endif /* PNG_POWERPC_VSX_IMPLEMENTATION == 1 (intrinsics) */ +#endif /* READ */ diff --git a/powerpc/powerpc_init.c b/powerpc/powerpc_init.c new file mode 100644 index 000000000..d3aeb28db --- /dev/null +++ b/powerpc/powerpc_init.c @@ -0,0 +1,126 @@ + +/* powerpc_init.c - POWERPC optimised filter functions + * + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ +/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are + * called. + */ +#define _POSIX_SOURCE 1 + +#include +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +#if PNG_POWERPC_VSX_OPT > 0 +#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED /* Do run-time checks */ +/* WARNING: it is strongly recommended that you do not build libpng with + * run-time checks for CPU features if at all possible. In the case of the PowerPC + * VSX instructions there is no processor-specific way of detecting the + * presence of the required support, therefore run-time detection is extremely + * OS specific. + * + * You may set the macro PNG_POWERPC_VSX_FILE to the file name of file containing + * a fragment of C source code which defines the png_have_vsx function. There + * are a number of implementations in contrib/powerpc-vsx, but the only one that + * has partial support is contrib/powerpc-vsx/linux.c - a generic Linux + * implementation which reads /proc/cpufino. + */ +#ifndef PNG_POWERPC_VSX_FILE +# ifdef __linux__ +# define PNG_POWERPC_VSX_FILE "contrib/powerpc-vsx/linux.c" +# endif +#endif + +#ifdef PNG_POWERPC_VSX_FILE + +#include /* for sig_atomic_t */ +static int png_have_vsx(png_structp png_ptr); +#include PNG_POWERPC_VSX_FILE + +#else /* PNG_POWERPC_VSX_FILE */ +# error "PNG_POWERPC_VSX_FILE undefined: no support for run-time POWERPC VSX checks" +#endif /* PNG_POWERPC_VSX_FILE */ +#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */ + +#ifndef PNG_ALIGNED_MEMORY_SUPPORTED +# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" +#endif + +void +png_init_filter_functions_vsx(png_structp pp, unsigned int bpp) +{ + /* The switch statement is compiled in for POWERPC_VSX_API, the call to + * png_have_vsx is compiled in for POWERPC_VSX_CHECK. If both are defined + * the check is only performed if the API has not set the PowerPC option on + * or off explicitly. In this case the check controls what happens. + */ + +#ifdef PNG_POWERPC_VSX_API_SUPPORTED + switch ((pp->options >> PNG_POWERPC_VSX) & 3) + { + case PNG_OPTION_UNSET: + /* Allow the run-time check to execute if it has been enabled - + * thus both API and CHECK can be turned on. If it isn't supported + * this case will fall through to the 'default' below, which just + * returns. + */ +#endif /* PNG_POWERPC_VSX_API_SUPPORTED */ +#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED + { + static volatile sig_atomic_t no_vsx = -1; /* not checked */ + + if (no_vsx < 0) + no_vsx = !png_have_vsx(pp); + + if (no_vsx) + return; + } +#ifdef PNG_POWERPC_VSX_API_SUPPORTED + break; +#endif +#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */ + +#ifdef PNG_POWERPC_VSX_API_SUPPORTED + default: /* OFF or INVALID */ + return; + + case PNG_OPTION_ON: + /* Option turned on */ + break; + } +#endif + + /* IMPORTANT: any new internal functions used here must be declared using + * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the + * 'prefix' option to configure works: + * + * ./configure --with-libpng-prefix=foobar_ + * + * Verify you have got this right by running the above command, doing a build + * and examining pngprefix.h; it must contain a #define for every external + * function you add. (Notice that this happens automatically for the + * initialization function.) + */ + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vsx; + + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vsx; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vsx; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vsx; + } + + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vsx; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vsx; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vsx; + } +} +#endif /* PNG_POWERPC_VSX_OPT > 0 */ +#endif /* READ */