[libpng16] Rearranged the ARM-NEON optimizations: Isolated the machine specific
code to the hardware subdirectory and added comments to pngrutil.c so that implementors of other optimizations know what to do.
This commit is contained in:
parent
5c2d76fdef
commit
f758d6c813
7
ANNOUNCE
7
ANNOUNCE
@ -1,5 +1,5 @@
|
||||
|
||||
Libpng 1.6.0beta33 - December 10, 2012
|
||||
Libpng 1.6.0beta33 - December 13, 2012
|
||||
|
||||
This is not intended to be a public release. It will be replaced
|
||||
within a few weeks by a public version or by another test version.
|
||||
@ -552,12 +552,15 @@ Version 1.6.0beta32 [November 25, 2012]
|
||||
Fixed error checking in the simplified write API (Olaf van der Spek)
|
||||
Made png_user_version_check() ok to use with libpng version 1.10.x and later.
|
||||
|
||||
Version 1.6.0beta33 [December 10, 2012]
|
||||
Version 1.6.0beta33 [December 13, 2012]
|
||||
Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
|
||||
that causes the MALLOC_MAX limit not to work (John Bowler)
|
||||
Change png_warning() to png_app_error() in pngwrite.c and comment the
|
||||
fall-through condition.
|
||||
Change png_warning() to png_app_warning() in png_write_tRNS().
|
||||
Rearranged the ARM-NEON optimizations: Isolated the machine specific code
|
||||
to the hardware subdirectory and added comments to pngrutil.c so that
|
||||
implementors of other optimizations know what to do.
|
||||
|
||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
|
||||
(subscription required; visit
|
||||
|
5
CHANGES
5
CHANGES
@ -4304,12 +4304,15 @@ Version 1.6.0beta32 [November 25, 2012]
|
||||
Fixed error checking in the simplified write API (Olaf van der Spek)
|
||||
Made png_user_version_check() ok to use with libpng version 1.10.x and later.
|
||||
|
||||
Version 1.6.0beta33 [December 10, 2012]
|
||||
Version 1.6.0beta33 [December 13, 2012]
|
||||
Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
|
||||
that causes the MALLOC_MAX limit not to work (John Bowler)
|
||||
Change png_warning() to png_app_error() in pngwrite.c and comment the
|
||||
fall-through condition.
|
||||
Change png_warning() to png_app_warning() in png_write_tRNS().
|
||||
Rearranged the ARM-NEON optimizations: Isolated the machine specific code
|
||||
to the hardware subdirectory and added comments to pngrutil.c so that
|
||||
implementors of other optimizations know what to do.
|
||||
|
||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
|
||||
(subscription required; visit
|
||||
|
@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
|
||||
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
|
||||
|
||||
if PNG_ARM_NEON
|
||||
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S
|
||||
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
|
||||
arm/filter_neon.S
|
||||
endif
|
||||
|
||||
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
|
||||
|
74
arm/arm_init.c
Normal file
74
arm/arm_init.c
Normal file
@ -0,0 +1,74 @@
|
||||
|
||||
/* filter_neon.S - NEON optimised filter functions
|
||||
*
|
||||
* Copyright (c) 2011 Glenn Randers-Pehrson
|
||||
* Written by Mans Rullgard, 2011.
|
||||
*
|
||||
* This code is released under the libpng license.
|
||||
* For conditions of distribution and use, see the disclaimer
|
||||
* and license in png.h
|
||||
*/
|
||||
#include "../pngpriv.h"
|
||||
|
||||
/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */
|
||||
#if defined __linux__ && defined __arm__
|
||||
#include <stdio.h>
|
||||
#include <elf.h>
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
static int png_have_hwcap(unsigned cap)
|
||||
{
|
||||
FILE *f = fopen("/proc/self/auxv", "r");
|
||||
Elf32_auxv_t aux;
|
||||
int have_cap = 0;
|
||||
|
||||
if (!f)
|
||||
return 0;
|
||||
|
||||
while (fread(&aux, sizeof(aux), 1, f) > 0)
|
||||
{
|
||||
if (aux.a_type == AT_HWCAP &&
|
||||
aux.a_un.a_val & cap)
|
||||
{
|
||||
have_cap = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
return have_cap;
|
||||
}
|
||||
#endif /* __linux__ && __arm__ */
|
||||
|
||||
void
|
||||
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
|
||||
{
|
||||
#ifdef __arm__
|
||||
#ifdef __linux__
|
||||
if (!png_have_hwcap(HWCAP_NEON))
|
||||
return;
|
||||
#endif
|
||||
|
||||
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
|
||||
|
||||
if (bpp == 3)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth3_neon;
|
||||
}
|
||||
|
||||
else if (bpp == 4)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth4_neon;
|
||||
}
|
||||
#else
|
||||
PNG_UNUSED(pp)
|
||||
PNG_UNUSED(bpp)
|
||||
#endif
|
||||
}
|
@ -9,6 +9,7 @@
|
||||
* and license in png.h
|
||||
*/
|
||||
|
||||
#ifdef __arm__
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
|
||||
#endif
|
||||
@ -223,3 +224,4 @@ func png_read_filter_row_paeth3_neon, export=1
|
||||
|
||||
pop {r4,pc}
|
||||
endfunc
|
||||
#endif
|
||||
|
@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack])
|
||||
AC_ARG_ENABLE([arm-neon],
|
||||
AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
|
||||
[if test "${enableval}" = "yes"; then
|
||||
AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations])
|
||||
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers])
|
||||
AC_DEFINE([PNG_FILTER_OPTIMIZATIONS],
|
||||
[png_init_filter_functions_neon],
|
||||
[ARM NEON filter initialization function])
|
||||
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1],
|
||||
[Align row buffers])
|
||||
fi])
|
||||
AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])
|
||||
|
||||
|
@ -1862,6 +1862,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
|
||||
|
||||
#endif /* SIMPLIFIED READ/WRITE */
|
||||
|
||||
#ifdef PNG_FILTER_OPTIMIZATIONS
|
||||
PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
|
||||
/* This is the initialization function for hardware specific optimizations,
|
||||
* one implementation (for ARM NEON machines) is contained in
|
||||
* arm/filter_neon.c. It need not be defined - the generic code will be used
|
||||
* if not.
|
||||
*/
|
||||
#endif
|
||||
|
||||
/* Maintainer: Put new private prototypes here ^ */
|
||||
|
||||
#include "pngdebug.h"
|
||||
|
72
pngrutil.c
72
pngrutil.c
@ -3863,66 +3863,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PNG_ARM_NEON
|
||||
|
||||
#ifdef __linux__
|
||||
#include <stdio.h>
|
||||
#include <elf.h>
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
static int png_have_hwcap(unsigned cap)
|
||||
{
|
||||
FILE *f = fopen("/proc/self/auxv", "r");
|
||||
Elf32_auxv_t aux;
|
||||
int have_cap = 0;
|
||||
|
||||
if (!f)
|
||||
return 0;
|
||||
|
||||
while (fread(&aux, (sizeof aux), 1, f) > 0)
|
||||
{
|
||||
if (aux.a_type == AT_HWCAP &&
|
||||
aux.a_un.a_val & cap)
|
||||
{
|
||||
have_cap = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
return have_cap;
|
||||
}
|
||||
#endif /* __linux__ */
|
||||
|
||||
static void
|
||||
png_init_filter_functions_neon(png_structrp pp, unsigned int bpp)
|
||||
{
|
||||
#ifdef __linux__
|
||||
if (!png_have_hwcap(HWCAP_NEON))
|
||||
return;
|
||||
#endif
|
||||
|
||||
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
|
||||
|
||||
if (bpp == 3)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth3_neon;
|
||||
}
|
||||
|
||||
else if (bpp == 4)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth4_neon;
|
||||
}
|
||||
}
|
||||
#endif /* PNG_ARM_NEON */
|
||||
|
||||
static void
|
||||
png_init_filter_functions(png_structrp pp)
|
||||
{
|
||||
@ -3938,8 +3878,16 @@ png_init_filter_functions(png_structrp pp)
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth_multibyte_pixel;
|
||||
|
||||
#ifdef PNG_ARM_NEON
|
||||
png_init_filter_functions_neon(pp, bpp);
|
||||
#ifdef PNG_FILTER_OPTIMIZATIONS
|
||||
/* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
|
||||
* call to install hardware optimizations for the above functions; simply
|
||||
* replace whatever elements of the pp->read_filter[] array with a hardware
|
||||
* specific (or, for that matter, generic) optimization.
|
||||
*
|
||||
* To see an example of this examine what configure.ac does when
|
||||
* --enable-arm-neon is specified on the command line.
|
||||
*/
|
||||
PNG_FILTER_OPTIMIZATIONS(pp, bpp);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user