Added initial code for PowerPC VSX optimisation

This commit is contained in:
Vadim Barkov 2017-01-14 16:05:33 +03:00
parent eaca53a2d9
commit 2b6e59d968
7 changed files with 445 additions and 2 deletions

View File

@ -102,6 +102,11 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c\
mips/filter_msa_intrinsics.c
endif
if PNG_POWERPC_VSX
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\
powerpc/filter_vsx_intrinsics.c
endif
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \

View File

@ -391,6 +391,52 @@ AM_CONDITIONAL([PNG_MIPS_MSA],
mipsel*|mips64el*) :;;
esac])
# PowerPC
# ===
#
# PowerPC VSX (SIMD) support.
AC_ARG_ENABLE([powerpc-vsx],
AS_HELP_STRING([[[--enable-powerpc-vsx]]],
[Enable POWERPC VSX optimizations: =no/off, check, api, yes/on:]
[no/off: disable the optimizations; check: use internal checking code]
[(deprecated and poorly supported); api: disable by default, enable by]
[a call to png_set_option; yes/on: turn on unconditionally.]
[If not specified: determined by the compiler.]),
[case "$enableval" in
no|off)
# disable the default enabling on __ppc64__ systems:
AC_DEFINE([PNG_POWERPC_VSX_OPT], [0],
[Disable POWERPC VSX optimizations])
# Prevent inclusion of the platform specific files below:
enable_powerpc_vsx=no;;
check)
AC_DEFINE([PNG_POWERPC_VSX_CHECK_SUPPORTED], [],
[Check for POWERPC VSX support at run-time]);;
api)
AC_DEFINE([PNG_POWERPC_VSX_API_SUPPORTED], [],
[Turn on POWERPC VSX optimizations at run-time]);;
yes|on)
AC_DEFINE([PNG_POWERPC_VSX_OPT], [2],
[Enable POWERPC VSX optimizations])
AC_MSG_WARN([--enable-powerpc-vsx: please specify 'check' or 'api', if]
[you want the optimizations unconditionally pass '-maltivec and -mabi=altivec']
[to the compiler.]);;
*)
AC_MSG_ERROR([--enable-powerpc-vsx=${enable_powerpc_vsx}: invalid value])
esac])
# Add PowerPC specific files to all builds where the host_cpu is powerpc('powerpc*') or
# where POWERPC optimizations were explicitly requested (this allows a fallback if a
# future host CPU does not match 'powerpc*')
AM_CONDITIONAL([PNG_POWERPC_VSX],
[test "$enable_powerpc_vsx" != 'no' &&
case "$host_cpu" in
powerpc*) :;;
esac])
AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]])
# Config files, substituting as above

5
png.h
View File

@ -3225,7 +3225,10 @@ PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory,
# define PNG_MIPS_MSA 6 /* HARDWARE: MIPS Msa SIMD instructions supported */
#endif
#define PNG_IGNORE_ADLER32 8
#define PNG_OPTION_NEXT 10 /* Next option - numbers must be even */
#ifdef PNG_POWERPC_VSX_API_SUPPORTED
# define PNG_POWERPC_VSX 10 /* HARDWARE: PowerPC VSX SIMD instructions supported */
#endif
#define PNG_OPTION_NEXT 12 /* Next option - numbers must be even */
/* Return values: NOTE: there are four values and 'off' is *not* zero */
#define PNG_OPTION_UNSET 0 /* Unset - defaults to off */

View File

@ -190,6 +190,14 @@
# endif
#endif
#ifndef PNG_POWERPC_VSX_OPT
# if defined(__ppc64__) && defined(__ALTIVEC__) && defined(PNG_ALIGNED_MEMORY_SUPPORTED)
# define PNG_POWERPC_VSX_OPT 2
# else
# define PNG_POWERPC_VSX_OPT 0
# endif
#endif
#ifndef PNG_INTEL_SSE_OPT
# ifdef PNG_INTEL_SSE
/* Only check for SSE if the build configuration has been modified to
@ -246,6 +254,11 @@
# endif
#endif /* PNG_MIPS_MSA_OPT > 0 */
#if PNG_POWERPC_VSX_OPT > 0
# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx
# define PNG_POWERPC_VSX_IMPLEMENTATION 1
#endif
/* Is this a build of a DLL where compilation of the object modules requires
* different preprocessor settings to those required for a simple library? If
@ -1292,6 +1305,23 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif
#if PNG_POWERPC_VSX_OPT > 0
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info,
png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vsx,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vsx,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vsx,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vsx,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vsx,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vsx,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif
#if PNG_INTEL_SSE_IMPLEMENTATION > 0
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);

View File

@ -3797,7 +3797,7 @@ png_read_filter_row_sub(png_row_infop row_info, png_bytep row,
png_size_t i;
png_size_t istop = row_info->rowbytes;
unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_bytep rp = row + bpp;
png_bytep rp = row + bpp;
PNG_UNUSED(prev_row)

View File

@ -0,0 +1,233 @@
/* filter_vsx_intrinsics.c - PowerPC optimised filter functions
*
* Copyright (c) 2016 Glenn Randers-Pehrson
* Written by Vadim Barkov, 2017.
* Last changed in libpng 1.6.25 [September 1, 2016]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include <stdio.h>
#include <stdint.h>
#include "../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
/* This code requires -maltivec and -mabi=altivec on the command line: */
#if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
/* libpng row pointers are not necessarily aligned to any particular boundary,
* however this code will only work with appropriate alignment. arm/arm_init.c
* checks for this (and will not compile unless it is done). This code uses
* variants of png_aligncast to avoid compiler warnings.
*/
#define png_ptr(type,pointer) png_aligncast(type *,pointer)
#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
/*#include <altivec.h>*/
#if PNG_POWERPC_VSX_OPT > 0
void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_size_t istop = row_info->rowbytes;
png_bytep rp = row;
png_const_bytep pp = prev_row;
for (i = 0; i < istop; i++)
{
*rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
rp++;
}
}
void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_size_t istop = row_info->rowbytes;
unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_bytep rp = row + bpp;
PNG_UNUSED(prev_row)
for (i = bpp; i < istop; i++)
{
*rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
rp++;
}
}
void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_size_t istop = row_info->rowbytes;
unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_bytep rp = row + bpp;
PNG_UNUSED(prev_row)
for (i = bpp; i < istop; i++)
{
*rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
rp++;
}
}
void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_bytep rp = row;
png_const_bytep pp = prev_row;
unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_size_t istop = row_info->rowbytes - bpp;
for (i = 0; i < bpp; i++)
{
*rp = (png_byte)(((int)(*rp) +
((int)(*pp++) / 2 )) & 0xff);
rp++;
}
for (i = 0; i < istop; i++)
{
*rp = (png_byte)(((int)(*rp) +
(int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
rp++;
}
}
void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_bytep rp = row;
png_const_bytep pp = prev_row;
unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_size_t istop = row_info->rowbytes - bpp;
for (i = 0; i < bpp; i++)
{
*rp = (png_byte)(((int)(*rp) +
((int)(*pp++) / 2 )) & 0xff);
rp++;
}
for (i = 0; i < istop; i++)
{
*rp = (png_byte)(((int)(*rp) +
(int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
rp++;
}
}
void png_read_filter_row_paeth4_vsx(png_row_infop row_info,
png_bytep row,
png_const_bytep prev_row)
{
unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_bytep rp_end = row + bpp;
/* Process the first pixel in the row completely (this is the same as 'up'
* because there is only one candidate predictor for the first row).
*/
while (row < rp_end)
{
int a = *row + *prev_row++;
*row++ = (png_byte)a;
}
/* Remainder */
rp_end = rp_end + (row_info->rowbytes - bpp);
while (row < rp_end)
{
int a, b, c, pa, pb, pc, p;
c = *(prev_row - bpp);
a = *(row - bpp);
b = *prev_row++;
p = b - c;
pc = a - c;
#ifdef PNG_USE_ABS
pa = abs(p);
pb = abs(pc);
pc = abs(p + pc);
#else
pa = p < 0 ? -p : p;
pb = pc < 0 ? -pc : pc;
pc = (p + pc) < 0 ? -(p + pc) : p + pc;
#endif
if (pb < pa) pa = pb, a = b;
if (pc < pa) a = c;
a += *row;
*row++ = (png_byte)a;
}
}
void png_read_filter_row_paeth3_vsx(png_row_infop row_info,
png_bytep row,
png_const_bytep prev_row)
{
unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_bytep rp_end = row + bpp;
/* Process the first pixel in the row completely (this is the same as 'up'
* because there is only one candidate predictor for the first row).
*/
while (row < rp_end)
{
int a = *row + *prev_row++;
*row++ = (png_byte)a;
}
/* Remainder */
rp_end = rp_end + (row_info->rowbytes - bpp);
while (row < rp_end)
{
int a, b, c, pa, pb, pc, p;
c = *(prev_row - bpp);
a = *(row - bpp);
b = *prev_row++;
p = b - c;
pc = a - c;
#ifdef PNG_USE_ABS
pa = abs(p);
pb = abs(pc);
pc = abs(p + pc);
#else
pa = p < 0 ? -p : p;
pb = pc < 0 ? -pc : pc;
pc = (p + pc) < 0 ? -(p + pc) : p + pc;
#endif
if (pb < pa) pa = pb, a = b;
if (pc < pa) a = c;
a += *row;
*row++ = (png_byte)a;
}
}
#endif /* PNG_POWERPC_VSX_OPT > 0 */
#endif /* PNG_POWERPC_VSX_IMPLEMENTATION == 1 (intrinsics) */
#endif /* READ */

126
powerpc/powerpc_init.c Normal file
View File

@ -0,0 +1,126 @@
/* powerpc_init.c - POWERPC optimised filter functions
*
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
* called.
*/
#define _POSIX_SOURCE 1
#include <stdio.h>
#include "../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_POWERPC_VSX_OPT > 0
#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED /* Do run-time checks */
/* WARNING: it is strongly recommended that you do not build libpng with
* run-time checks for CPU features if at all possible. In the case of the PowerPC
* VSX instructions there is no processor-specific way of detecting the
* presence of the required support, therefore run-time detection is extremely
* OS specific.
*
* You may set the macro PNG_POWERPC_VSX_FILE to the file name of file containing
* a fragment of C source code which defines the png_have_vsx function. There
* are a number of implementations in contrib/powerpc-vsx, but the only one that
* has partial support is contrib/powerpc-vsx/linux.c - a generic Linux
* implementation which reads /proc/cpufino.
*/
#ifndef PNG_POWERPC_VSX_FILE
# ifdef __linux__
# define PNG_POWERPC_VSX_FILE "contrib/powerpc-vsx/linux.c"
# endif
#endif
#ifdef PNG_POWERPC_VSX_FILE
#include <signal.h> /* for sig_atomic_t */
static int png_have_vsx(png_structp png_ptr);
#include PNG_POWERPC_VSX_FILE
#else /* PNG_POWERPC_VSX_FILE */
# error "PNG_POWERPC_VSX_FILE undefined: no support for run-time POWERPC VSX checks"
#endif /* PNG_POWERPC_VSX_FILE */
#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */
#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
#endif
void
png_init_filter_functions_vsx(png_structp pp, unsigned int bpp)
{
/* The switch statement is compiled in for POWERPC_VSX_API, the call to
* png_have_vsx is compiled in for POWERPC_VSX_CHECK. If both are defined
* the check is only performed if the API has not set the PowerPC option on
* or off explicitly. In this case the check controls what happens.
*/
#ifdef PNG_POWERPC_VSX_API_SUPPORTED
switch ((pp->options >> PNG_POWERPC_VSX) & 3)
{
case PNG_OPTION_UNSET:
/* Allow the run-time check to execute if it has been enabled -
* thus both API and CHECK can be turned on. If it isn't supported
* this case will fall through to the 'default' below, which just
* returns.
*/
#endif /* PNG_POWERPC_VSX_API_SUPPORTED */
#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED
{
static volatile sig_atomic_t no_vsx = -1; /* not checked */
if (no_vsx < 0)
no_vsx = !png_have_vsx(pp);
if (no_vsx)
return;
}
#ifdef PNG_POWERPC_VSX_API_SUPPORTED
break;
#endif
#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */
#ifdef PNG_POWERPC_VSX_API_SUPPORTED
default: /* OFF or INVALID */
return;
case PNG_OPTION_ON:
/* Option turned on */
break;
}
#endif
/* IMPORTANT: any new internal functions used here must be declared using
* PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
* 'prefix' option to configure works:
*
* ./configure --with-libpng-prefix=foobar_
*
* Verify you have got this right by running the above command, doing a build
* and examining pngprefix.h; it must contain a #define for every external
* function you add. (Notice that this happens automatically for the
* initialization function.)
*/
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vsx;
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vsx;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vsx;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vsx;
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vsx;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vsx;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vsx;
}
}
#endif /* PNG_POWERPC_VSX_OPT > 0 */
#endif /* READ */