[libpng16] Rearranged the ARM-NEON optimizations: Isolated the machine specific

code to the hardware subdirectory and added comments to pngrutil.c so that
implementors of other optimizations know what to do.
This commit is contained in:
John Bowler 2012-12-13 11:26:50 -06:00 committed by Glenn Randers-Pehrson
parent 5c2d76fdef
commit f758d6c813
8 changed files with 111 additions and 68 deletions

View File

@ -1,5 +1,5 @@
Libpng 1.6.0beta33 - December 10, 2012
Libpng 1.6.0beta33 - December 13, 2012
This is not intended to be a public release. It will be replaced
within a few weeks by a public version or by another test version.
@ -552,12 +552,15 @@ Version 1.6.0beta32 [November 25, 2012]
Fixed error checking in the simplified write API (Olaf van der Spek)
Made png_user_version_check() ok to use with libpng version 1.10.x and later.
Version 1.6.0beta33 [December 10, 2012]
Version 1.6.0beta33 [December 13, 2012]
Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
that causes the MALLOC_MAX limit not to work (John Bowler)
Change png_warning() to png_app_error() in pngwrite.c and comment the
fall-through condition.
Change png_warning() to png_app_warning() in png_write_tRNS().
Rearranged the ARM-NEON optimizations: Isolated the machine specific code
to the hardware subdirectory and added comments to pngrutil.c so that
implementors of other optimizations know what to do.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit

View File

@ -4304,12 +4304,15 @@ Version 1.6.0beta32 [November 25, 2012]
Fixed error checking in the simplified write API (Olaf van der Spek)
Made png_user_version_check() ok to use with libpng version 1.10.x and later.
Version 1.6.0beta33 [December 10, 2012]
Version 1.6.0beta33 [December 13, 2012]
Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
that causes the MALLOC_MAX limit not to work (John Bowler)
Change png_warning() to png_app_error() in pngwrite.c and comment the
fall-through condition.
Change png_warning() to png_app_warning() in png_write_tRNS().
Rearranged the ARM-NEON optimizations: Isolated the machine specific code
to the hardware subdirectory and added comments to pngrutil.c so that
implementors of other optimizations know what to do.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit

View File

@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
if PNG_ARM_NEON
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
arm/filter_neon.S
endif
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h

74
arm/arm_init.c Normal file
View File

@ -0,0 +1,74 @@
/* filter_neon.S - NEON optimised filter functions
*
* Copyright (c) 2011 Glenn Randers-Pehrson
* Written by Mans Rullgard, 2011.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../pngpriv.h"
/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */
#if defined __linux__ && defined __arm__
#include <stdio.h>
#include <elf.h>
#include <asm/hwcap.h>
static int png_have_hwcap(unsigned cap)
{
FILE *f = fopen("/proc/self/auxv", "r");
Elf32_auxv_t aux;
int have_cap = 0;
if (!f)
return 0;
while (fread(&aux, sizeof(aux), 1, f) > 0)
{
if (aux.a_type == AT_HWCAP &&
aux.a_un.a_val & cap)
{
have_cap = 1;
break;
}
}
fclose(f);
return have_cap;
}
#endif /* __linux__ && __arm__ */
void
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
{
#ifdef __arm__
#ifdef __linux__
if (!png_have_hwcap(HWCAP_NEON))
return;
#endif
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth3_neon;
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth4_neon;
}
#else
PNG_UNUSED(pp)
PNG_UNUSED(bpp)
#endif
}

View File

@ -9,6 +9,7 @@
* and license in png.h
*/
#ifdef __arm__
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
#endif
@ -223,3 +224,4 @@ func png_read_filter_row_paeth3_neon, export=1
pop {r4,pc}
endfunc
#endif

View File

@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack])
AC_ARG_ENABLE([arm-neon],
AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
[if test "${enableval}" = "yes"; then
AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations])
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers])
AC_DEFINE([PNG_FILTER_OPTIMIZATIONS],
[png_init_filter_functions_neon],
[ARM NEON filter initialization function])
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1],
[Align row buffers])
fi])
AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])

View File

@ -1862,6 +1862,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
#endif /* SIMPLIFIED READ/WRITE */
#ifdef PNG_FILTER_OPTIMIZATIONS
PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
/* This is the initialization function for hardware specific optimizations,
* one implementation (for ARM NEON machines) is contained in
* arm/filter_neon.c. It need not be defined - the generic code will be used
* if not.
*/
#endif
/* Maintainer: Put new private prototypes here ^ */
#include "pngdebug.h"

View File

@ -3863,66 +3863,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
}
}
#ifdef PNG_ARM_NEON
#ifdef __linux__
#include <stdio.h>
#include <elf.h>
#include <asm/hwcap.h>
static int png_have_hwcap(unsigned cap)
{
FILE *f = fopen("/proc/self/auxv", "r");
Elf32_auxv_t aux;
int have_cap = 0;
if (!f)
return 0;
while (fread(&aux, (sizeof aux), 1, f) > 0)
{
if (aux.a_type == AT_HWCAP &&
aux.a_un.a_val & cap)
{
have_cap = 1;
break;
}
}
fclose(f);
return have_cap;
}
#endif /* __linux__ */
static void
png_init_filter_functions_neon(png_structrp pp, unsigned int bpp)
{
#ifdef __linux__
if (!png_have_hwcap(HWCAP_NEON))
return;
#endif
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth3_neon;
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth4_neon;
}
}
#endif /* PNG_ARM_NEON */
static void
png_init_filter_functions(png_structrp pp)
{
@ -3938,8 +3878,16 @@ png_init_filter_functions(png_structrp pp)
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth_multibyte_pixel;
#ifdef PNG_ARM_NEON
png_init_filter_functions_neon(pp, bpp);
#ifdef PNG_FILTER_OPTIMIZATIONS
/* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
* call to install hardware optimizations for the above functions; simply
* replace whatever elements of the pp->read_filter[] array with a hardware
* specific (or, for that matter, generic) optimization.
*
* To see an example of this examine what configure.ac does when
* --enable-arm-neon is specified on the command line.
*/
PNG_FILTER_OPTIMIZATIONS(pp, bpp);
#endif
}