From f758d6c8137fa3fda76620982feee04eb3773fe0 Mon Sep 17 00:00:00 2001 From: John Bowler Date: Thu, 13 Dec 2012 11:26:50 -0600 Subject: [PATCH] [libpng16] Rearranged the ARM-NEON optimizations: Isolated the machine specific code to the hardware subdirectory and added comments to pngrutil.c so that implementors of other optimizations know what to do. --- ANNOUNCE | 7 +++-- CHANGES | 5 +++- Makefile.am | 3 +- arm/arm_init.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ arm/filter_neon.S | 2 ++ configure.ac | 7 +++-- pngpriv.h | 9 ++++++ pngrutil.c | 72 +++++++-------------------------------------- 8 files changed, 111 insertions(+), 68 deletions(-) create mode 100644 arm/arm_init.c diff --git a/ANNOUNCE b/ANNOUNCE index dbcb945bf..b888474e4 100644 --- a/ANNOUNCE +++ b/ANNOUNCE @@ -1,5 +1,5 @@ -Libpng 1.6.0beta33 - December 10, 2012 +Libpng 1.6.0beta33 - December 13, 2012 This is not intended to be a public release. It will be replaced within a few weeks by a public version or by another test version. @@ -552,12 +552,15 @@ Version 1.6.0beta32 [November 25, 2012] Fixed error checking in the simplified write API (Olaf van der Spek) Made png_user_version_check() ok to use with libpng version 1.10.x and later. -Version 1.6.0beta33 [December 10, 2012] +Version 1.6.0beta33 [December 13, 2012] Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX) that causes the MALLOC_MAX limit not to work (John Bowler) Change png_warning() to png_app_error() in pngwrite.c and comment the fall-through condition. Change png_warning() to png_app_warning() in png_write_tRNS(). + Rearranged the ARM-NEON optimizations: Isolated the machine specific code + to the hardware subdirectory and added comments to pngrutil.c so that + implementors of other optimizations know what to do. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/CHANGES b/CHANGES index 40d64f12c..0ce1c7ef0 100644 --- a/CHANGES +++ b/CHANGES @@ -4304,12 +4304,15 @@ Version 1.6.0beta32 [November 25, 2012] Fixed error checking in the simplified write API (Olaf van der Spek) Made png_user_version_check() ok to use with libpng version 1.10.x and later. -Version 1.6.0beta33 [December 10, 2012] +Version 1.6.0beta33 [December 13, 2012] Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX) that causes the MALLOC_MAX limit not to work (John Bowler) Change png_warning() to png_app_error() in pngwrite.c and comment the fall-through condition. Change png_warning() to png_app_warning() in png_write_tRNS(). + Rearranged the ARM-NEON optimizations: Isolated the machine specific code + to the hardware subdirectory and added comments to pngrutil.c so that + implementors of other optimizations know what to do. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/Makefile.am b/Makefile.am index dfa5da82e..98c01ae66 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\ png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa if PNG_ARM_NEON -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S +libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\ + arm/filter_neon.S endif nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h diff --git a/arm/arm_init.c b/arm/arm_init.c new file mode 100644 index 000000000..6b0a925f2 --- /dev/null +++ b/arm/arm_init.c @@ -0,0 +1,74 @@ + +/* filter_neon.S - NEON optimised filter functions + * + * Copyright (c) 2011 Glenn Randers-Pehrson + * Written by Mans Rullgard, 2011. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ +#include "../pngpriv.h" + +/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */ +#if defined __linux__ && defined __arm__ +#include +#include +#include + +static int png_have_hwcap(unsigned cap) +{ + FILE *f = fopen("/proc/self/auxv", "r"); + Elf32_auxv_t aux; + int have_cap = 0; + + if (!f) + return 0; + + while (fread(&aux, sizeof(aux), 1, f) > 0) + { + if (aux.a_type == AT_HWCAP && + aux.a_un.a_val & cap) + { + have_cap = 1; + break; + } + } + + fclose(f); + + return have_cap; +} +#endif /* __linux__ && __arm__ */ + +void +png_init_filter_functions_neon(png_structp pp, unsigned int bpp) +{ +#ifdef __arm__ +#ifdef __linux__ + if (!png_have_hwcap(HWCAP_NEON)) + return; +#endif + + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; + + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth3_neon; + } + + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth4_neon; + } +#else + PNG_UNUSED(pp) + PNG_UNUSED(bpp) +#endif +} diff --git a/arm/filter_neon.S b/arm/filter_neon.S index 63a5d8c17..9ce04d3be 100644 --- a/arm/filter_neon.S +++ b/arm/filter_neon.S @@ -9,6 +9,7 @@ * and license in png.h */ +#ifdef __arm__ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ #endif @@ -223,3 +224,4 @@ func png_read_filter_row_paeth3_neon, export=1 pop {r4,pc} endfunc +#endif diff --git a/configure.ac b/configure.ac index c4941dd86..ecea4fabd 100644 --- a/configure.ac +++ b/configure.ac @@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack]) AC_ARG_ENABLE([arm-neon], AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]), [if test "${enableval}" = "yes"; then - AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations]) - AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers]) + AC_DEFINE([PNG_FILTER_OPTIMIZATIONS], + [png_init_filter_functions_neon], + [ARM NEON filter initialization function]) + AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], + [Align row buffers]) fi]) AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes]) diff --git a/pngpriv.h b/pngpriv.h index 1a5636389..afad74324 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -1862,6 +1862,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY); #endif /* SIMPLIFIED READ/WRITE */ +#ifdef PNG_FILTER_OPTIMIZATIONS +PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); + /* This is the initialization function for hardware specific optimizations, + * one implementation (for ARM NEON machines) is contained in + * arm/filter_neon.c. It need not be defined - the generic code will be used + * if not. + */ +#endif + /* Maintainer: Put new private prototypes here ^ */ #include "pngdebug.h" diff --git a/pngrutil.c b/pngrutil.c index aa5cd0cfb..db7afb315 100644 --- a/pngrutil.c +++ b/pngrutil.c @@ -3863,66 +3863,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row, } } -#ifdef PNG_ARM_NEON - -#ifdef __linux__ -#include -#include -#include - -static int png_have_hwcap(unsigned cap) -{ - FILE *f = fopen("/proc/self/auxv", "r"); - Elf32_auxv_t aux; - int have_cap = 0; - - if (!f) - return 0; - - while (fread(&aux, (sizeof aux), 1, f) > 0) - { - if (aux.a_type == AT_HWCAP && - aux.a_un.a_val & cap) - { - have_cap = 1; - break; - } - } - - fclose(f); - - return have_cap; -} -#endif /* __linux__ */ - -static void -png_init_filter_functions_neon(png_structrp pp, unsigned int bpp) -{ -#ifdef __linux__ - if (!png_have_hwcap(HWCAP_NEON)) - return; -#endif - - pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; - - if (bpp == 3) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth3_neon; - } - - else if (bpp == 4) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth4_neon; - } -} -#endif /* PNG_ARM_NEON */ - static void png_init_filter_functions(png_structrp pp) { @@ -3938,8 +3878,16 @@ png_init_filter_functions(png_structrp pp) pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth_multibyte_pixel; -#ifdef PNG_ARM_NEON - png_init_filter_functions_neon(pp, bpp); +#ifdef PNG_FILTER_OPTIMIZATIONS + /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to + * call to install hardware optimizations for the above functions; simply + * replace whatever elements of the pp->read_filter[] array with a hardware + * specific (or, for that matter, generic) optimization. + * + * To see an example of this examine what configure.ac does when + * --enable-arm-neon is specified on the command line. + */ + PNG_FILTER_OPTIMIZATIONS(pp, bpp); #endif }