From 65c0339b5c73dcc29f19896dd310c4491df62677 Mon Sep 17 00:00:00 2001 From: Glenn Randers-Pehrson Date: Thu, 6 Oct 2011 21:54:17 -0500 Subject: [PATCH] [libpng15] Speed up png_combine_row() for interlaced images. This reduces the generality of the code, allowing it to be optimized for Adam7 interlace. The masks passed to png_combine_row() are now generated internally, avoiding some code duplication and localizing the interlace handling somewhat. --- ANNOUNCE | 10 ++++++---- CHANGES | 8 +++++--- pngpriv.h | 50 +++++++++++++++++++++++++++++++++++++++++++------- pngread.c | 31 ++++++++++--------------------- 4 files changed, 64 insertions(+), 35 deletions(-) diff --git a/ANNOUNCE b/ANNOUNCE index 18cba6e2c..9ecaeea3b 100644 --- a/ANNOUNCE +++ b/ANNOUNCE @@ -1,5 +1,5 @@ -Libpng 1.5.6beta05 - October 6, 2011 +Libpng 1.5.6beta05 - October 7, 2011 This is not intended to be a public release. It will be replaced within a few weeks by a public version or by another test version. @@ -55,9 +55,11 @@ Version 1.5.6beta03 [September 28, 2011] Version 1.5.6beta04 [October 5, 2011] Fixed typo in Makefile.in and Makefile.am ("-M Wl" should be "-M -Wl")." -Version 1.5.6beta05 [October 6, 2011] - Replaced a short but frequently-used memcpy() in png_combine_row() with - a loop. +Version 1.5.6beta05 [October 7, 2011] + Speed up png_combine_row() for interlaced images. This reduces the generality + of the code, allowing it to be optimized for Adam7 interlace. The masks + passed to png_combine_row() are now generated internally, avoiding + some code duplication and localizing the interlace handling somewhat. Send comments/corrections/commendations to png-mng-implement at lists.sf.net: (subscription required; visit diff --git a/CHANGES b/CHANGES index 850ae3223..e5acbe2dc 100644 --- a/CHANGES +++ b/CHANGES @@ -3616,9 +3616,11 @@ Version 1.5.6beta03 [September 28, 2011] Version 1.5.6beta04 [October 5, 2011] Fixed typo in Makefile.in and Makefile.am ("-M Wl" should be "-M -Wl")." -Version 1.5.6beta05 [October 6, 2011] - Replaced a short but frequently-used memcpy() in png_combine_row() with - a loop. +Version 1.5.6beta05 [October 7, 2011] + Speed up png_combine_row() for interlaced images. This reduces the generality + of the code, allowing it to be optimized for Adam7 interlace. The masks + passed to png_combine_row() are now generated internally, avoiding + some code duplication and localizing the interlace handling somewhat. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/pngpriv.h b/pngpriv.h index 7a7400a22..dda61fdb8 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -44,6 +44,9 @@ */ #include +/* This is used to find 'offsetof', used below for alignment tests. */ +#include + #define PNGLIB_BUILD /*libpng is being built, not used*/ #ifdef PNG_USER_CONFIG @@ -321,6 +324,22 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp; # define png_memset memset # endif #endif + +/* These macros may need to be architecture dependent, they take a pointer and + * an alignment requirement. + */ +#ifdef offsetof +# define png_alignof(type) offsetof(struct{char c; type t;}, t) +#endif + +/* This implicitly assumes alignment is always to a power of 2. */ +#ifdef png_alignof +# define png_isaligned(ptr, type)\ + ((((char*)ptr-(char*)0) & (png_alignof(type)-1)) == 0) +#else +# define png_isaligned(ptr, type) 0 +#endif + /* End of memory model/platform independent support */ /* End of 1.5.0beta36 move from pngconf.h */ @@ -823,17 +842,34 @@ PNG_EXTERN void png_write_finish_row PNGARG((png_structp png_ptr)); PNG_EXTERN void png_write_start_row PNGARG((png_structp png_ptr)); /* Combine a row of data, dealing with alpha, etc. if requested. 'row' is an - * array of png_ptr->width pixels, 'mask' is a mask of the pixels to copy from - * png_ptr->row_buf+1. 'mask' describes each block of 8 pixels - only the low 8 - * bits are used. This function is only ever used to write to row buffers - * provided by the caller of the relevant libpng API and the row must have - * already been transformed by the read transformations. + * array of png_ptr->width pixels. If the image is not interlaced or this + * is the final pass this just does a png_memcpy, otherwise the "display" flag + * is used to determine whether to copy pixels that are not in the current pass. + * + * Because 'png_do_read_interlace' (below) replicates pixels this allows this + * function to achieve the documented 'blocky' appearance during interlaced read + * if display is 1 and the 'sparkle' appearance, where existing pixels in 'row' + * are not changed if they are not in the current pass, when display is 0. + * + * 'display' must be 0 or 1, otherwise the memcpy will be done regardless. + * + * The API always reads from the png_struct row buffer and always assumes that + * it is full width (png_do_read_interlace has already been called.) + * + * This function is only ever used to write to row buffers provided by the + * caller of the relevant libpng API and the row must have already been + * transformed by the read transformations. */ PNG_EXTERN void png_combine_row PNGARG((png_structp png_ptr, png_bytep row, - int mask)); + int display)); #ifdef PNG_READ_INTERLACING_SUPPORTED -/* Expand an interlaced row */ +/* Expand an interlaced row: the 'row_info' describes the pass data that has + * been read in and must correspond to the pixels in 'row', the pixels are + * expanded (moved apart) in 'row' to match the final layout, when doing this + * the pixels are *replicated* to the intervening space. This is essential for + * the correct operation of png_combine_row, above. + */ PNG_EXTERN void png_do_read_interlace PNGARG((png_row_infop row_info, png_bytep row, int pass, png_uint_32 transformations)); #endif diff --git a/pngread.c b/pngread.c index 3747ebe9e..9b312a0a4 100644 --- a/pngread.c +++ b/pngread.c @@ -390,11 +390,6 @@ png_start_read_image(png_structp png_ptr) void PNGAPI png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) { -#ifdef PNG_READ_INTERLACING_SUPPORTED - PNG_CONST int png_pass_dsp_mask[7] = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, - 0xff}; - PNG_CONST int png_pass_mask[7] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff}; -#endif int ret; png_row_info row_info; @@ -474,8 +469,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) if (png_ptr->row_number & 0x07) { if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, - png_pass_dsp_mask[png_ptr->pass]); + png_combine_row(png_ptr, dsp_row, 1/*display*/); png_read_finish_row(png_ptr); return; } @@ -485,8 +479,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) if ((png_ptr->row_number & 0x07) || png_ptr->width < 5) { if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, - png_pass_dsp_mask[png_ptr->pass]); + png_combine_row(png_ptr, dsp_row, 1/*display*/); png_read_finish_row(png_ptr); return; @@ -497,8 +490,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) if ((png_ptr->row_number & 0x07) != 4) { if (dsp_row != NULL && (png_ptr->row_number & 4)) - png_combine_row(png_ptr, dsp_row, - png_pass_dsp_mask[png_ptr->pass]); + png_combine_row(png_ptr, dsp_row, 1/*display*/); png_read_finish_row(png_ptr); return; @@ -509,8 +501,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) if ((png_ptr->row_number & 3) || png_ptr->width < 3) { if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, - png_pass_dsp_mask[png_ptr->pass]); + png_combine_row(png_ptr, dsp_row, 1/*display*/); png_read_finish_row(png_ptr); return; @@ -521,8 +512,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) if ((png_ptr->row_number & 3) != 2) { if (dsp_row != NULL && (png_ptr->row_number & 2)) - png_combine_row(png_ptr, dsp_row, - png_pass_dsp_mask[png_ptr->pass]); + png_combine_row(png_ptr, dsp_row, 1/*display*/); png_read_finish_row(png_ptr); return; @@ -532,8 +522,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) if ((png_ptr->row_number & 1) || png_ptr->width < 2) { if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, - png_pass_dsp_mask[png_ptr->pass]); + png_combine_row(png_ptr, dsp_row, 1/*display*/); png_read_finish_row(png_ptr); return; @@ -651,20 +640,20 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) png_ptr->transformations); if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, png_pass_dsp_mask[png_ptr->pass]); + png_combine_row(png_ptr, dsp_row, 1/*display*/); if (row != NULL) - png_combine_row(png_ptr, row, png_pass_mask[png_ptr->pass]); + png_combine_row(png_ptr, row, 0/*row*/); } else #endif { if (row != NULL) - png_combine_row(png_ptr, row, 0xff); + png_combine_row(png_ptr, row, -1/*ignored*/); if (dsp_row != NULL) - png_combine_row(png_ptr, dsp_row, 0xff); + png_combine_row(png_ptr, dsp_row, -1/*ignored*/); } png_read_finish_row(png_ptr);