[libpng15] Speed up png_combine_row() for interlaced images. This reduces the

generality of the code, allowing it to be optimized for Adam7 interlace.  The
masks passed to png_combine_row() are now generated internally, avoiding
some code duplication and localizing the interlace handling somewhat.
This commit is contained in:
Glenn Randers-Pehrson 2011-10-06 21:54:17 -05:00
parent 4daae30174
commit 65c0339b5c
4 changed files with 64 additions and 35 deletions

View File

@ -1,5 +1,5 @@
Libpng 1.5.6beta05 - October 6, 2011 Libpng 1.5.6beta05 - October 7, 2011
This is not intended to be a public release. It will be replaced This is not intended to be a public release. It will be replaced
within a few weeks by a public version or by another test version. within a few weeks by a public version or by another test version.
@ -55,9 +55,11 @@ Version 1.5.6beta03 [September 28, 2011]
Version 1.5.6beta04 [October 5, 2011] Version 1.5.6beta04 [October 5, 2011]
Fixed typo in Makefile.in and Makefile.am ("-M Wl" should be "-M -Wl")." Fixed typo in Makefile.in and Makefile.am ("-M Wl" should be "-M -Wl")."
Version 1.5.6beta05 [October 6, 2011] Version 1.5.6beta05 [October 7, 2011]
Replaced a short but frequently-used memcpy() in png_combine_row() with Speed up png_combine_row() for interlaced images. This reduces the generality
a loop. of the code, allowing it to be optimized for Adam7 interlace. The masks
passed to png_combine_row() are now generated internally, avoiding
some code duplication and localizing the interlace handling somewhat.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net: Send comments/corrections/commendations to png-mng-implement at lists.sf.net:
(subscription required; visit (subscription required; visit

View File

@ -3616,9 +3616,11 @@ Version 1.5.6beta03 [September 28, 2011]
Version 1.5.6beta04 [October 5, 2011] Version 1.5.6beta04 [October 5, 2011]
Fixed typo in Makefile.in and Makefile.am ("-M Wl" should be "-M -Wl")." Fixed typo in Makefile.in and Makefile.am ("-M Wl" should be "-M -Wl")."
Version 1.5.6beta05 [October 6, 2011] Version 1.5.6beta05 [October 7, 2011]
Replaced a short but frequently-used memcpy() in png_combine_row() with Speed up png_combine_row() for interlaced images. This reduces the generality
a loop. of the code, allowing it to be optimized for Adam7 interlace. The masks
passed to png_combine_row() are now generated internally, avoiding
some code duplication and localizing the interlace handling somewhat.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit (subscription required; visit

View File

@ -44,6 +44,9 @@
*/ */
#include <stdlib.h> #include <stdlib.h>
/* This is used to find 'offsetof', used below for alignment tests. */
#include <stddef.h>
#define PNGLIB_BUILD /*libpng is being built, not used*/ #define PNGLIB_BUILD /*libpng is being built, not used*/
#ifdef PNG_USER_CONFIG #ifdef PNG_USER_CONFIG
@ -321,6 +324,22 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
# define png_memset memset # define png_memset memset
# endif # endif
#endif #endif
/* These macros may need to be architecture dependent, they take a pointer and
* an alignment requirement.
*/
#ifdef offsetof
# define png_alignof(type) offsetof(struct{char c; type t;}, t)
#endif
/* This implicitly assumes alignment is always to a power of 2. */
#ifdef png_alignof
# define png_isaligned(ptr, type)\
((((char*)ptr-(char*)0) & (png_alignof(type)-1)) == 0)
#else
# define png_isaligned(ptr, type) 0
#endif
/* End of memory model/platform independent support */ /* End of memory model/platform independent support */
/* End of 1.5.0beta36 move from pngconf.h */ /* End of 1.5.0beta36 move from pngconf.h */
@ -823,17 +842,34 @@ PNG_EXTERN void png_write_finish_row PNGARG((png_structp png_ptr));
PNG_EXTERN void png_write_start_row PNGARG((png_structp png_ptr)); PNG_EXTERN void png_write_start_row PNGARG((png_structp png_ptr));
/* Combine a row of data, dealing with alpha, etc. if requested. 'row' is an /* Combine a row of data, dealing with alpha, etc. if requested. 'row' is an
* array of png_ptr->width pixels, 'mask' is a mask of the pixels to copy from * array of png_ptr->width pixels. If the image is not interlaced or this
* png_ptr->row_buf+1. 'mask' describes each block of 8 pixels - only the low 8 * is the final pass this just does a png_memcpy, otherwise the "display" flag
* bits are used. This function is only ever used to write to row buffers * is used to determine whether to copy pixels that are not in the current pass.
* provided by the caller of the relevant libpng API and the row must have *
* already been transformed by the read transformations. * Because 'png_do_read_interlace' (below) replicates pixels this allows this
* function to achieve the documented 'blocky' appearance during interlaced read
* if display is 1 and the 'sparkle' appearance, where existing pixels in 'row'
* are not changed if they are not in the current pass, when display is 0.
*
* 'display' must be 0 or 1, otherwise the memcpy will be done regardless.
*
* The API always reads from the png_struct row buffer and always assumes that
* it is full width (png_do_read_interlace has already been called.)
*
* This function is only ever used to write to row buffers provided by the
* caller of the relevant libpng API and the row must have already been
* transformed by the read transformations.
*/ */
PNG_EXTERN void png_combine_row PNGARG((png_structp png_ptr, png_bytep row, PNG_EXTERN void png_combine_row PNGARG((png_structp png_ptr, png_bytep row,
int mask)); int display));
#ifdef PNG_READ_INTERLACING_SUPPORTED #ifdef PNG_READ_INTERLACING_SUPPORTED
/* Expand an interlaced row */ /* Expand an interlaced row: the 'row_info' describes the pass data that has
* been read in and must correspond to the pixels in 'row', the pixels are
* expanded (moved apart) in 'row' to match the final layout, when doing this
* the pixels are *replicated* to the intervening space. This is essential for
* the correct operation of png_combine_row, above.
*/
PNG_EXTERN void png_do_read_interlace PNGARG((png_row_infop row_info, PNG_EXTERN void png_do_read_interlace PNGARG((png_row_infop row_info,
png_bytep row, int pass, png_uint_32 transformations)); png_bytep row, int pass, png_uint_32 transformations));
#endif #endif

View File

@ -390,11 +390,6 @@ png_start_read_image(png_structp png_ptr)
void PNGAPI void PNGAPI
png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
{ {
#ifdef PNG_READ_INTERLACING_SUPPORTED
PNG_CONST int png_pass_dsp_mask[7] = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55,
0xff};
PNG_CONST int png_pass_mask[7] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
#endif
int ret; int ret;
png_row_info row_info; png_row_info row_info;
@ -474,8 +469,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
if (png_ptr->row_number & 0x07) if (png_ptr->row_number & 0x07)
{ {
if (dsp_row != NULL) if (dsp_row != NULL)
png_combine_row(png_ptr, dsp_row, png_combine_row(png_ptr, dsp_row, 1/*display*/);
png_pass_dsp_mask[png_ptr->pass]);
png_read_finish_row(png_ptr); png_read_finish_row(png_ptr);
return; return;
} }
@ -485,8 +479,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
if ((png_ptr->row_number & 0x07) || png_ptr->width < 5) if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
{ {
if (dsp_row != NULL) if (dsp_row != NULL)
png_combine_row(png_ptr, dsp_row, png_combine_row(png_ptr, dsp_row, 1/*display*/);
png_pass_dsp_mask[png_ptr->pass]);
png_read_finish_row(png_ptr); png_read_finish_row(png_ptr);
return; return;
@ -497,8 +490,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
if ((png_ptr->row_number & 0x07) != 4) if ((png_ptr->row_number & 0x07) != 4)
{ {
if (dsp_row != NULL && (png_ptr->row_number & 4)) if (dsp_row != NULL && (png_ptr->row_number & 4))
png_combine_row(png_ptr, dsp_row, png_combine_row(png_ptr, dsp_row, 1/*display*/);
png_pass_dsp_mask[png_ptr->pass]);
png_read_finish_row(png_ptr); png_read_finish_row(png_ptr);
return; return;
@ -509,8 +501,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
if ((png_ptr->row_number & 3) || png_ptr->width < 3) if ((png_ptr->row_number & 3) || png_ptr->width < 3)
{ {
if (dsp_row != NULL) if (dsp_row != NULL)
png_combine_row(png_ptr, dsp_row, png_combine_row(png_ptr, dsp_row, 1/*display*/);
png_pass_dsp_mask[png_ptr->pass]);
png_read_finish_row(png_ptr); png_read_finish_row(png_ptr);
return; return;
@ -521,8 +512,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
if ((png_ptr->row_number & 3) != 2) if ((png_ptr->row_number & 3) != 2)
{ {
if (dsp_row != NULL && (png_ptr->row_number & 2)) if (dsp_row != NULL && (png_ptr->row_number & 2))
png_combine_row(png_ptr, dsp_row, png_combine_row(png_ptr, dsp_row, 1/*display*/);
png_pass_dsp_mask[png_ptr->pass]);
png_read_finish_row(png_ptr); png_read_finish_row(png_ptr);
return; return;
@ -532,8 +522,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
if ((png_ptr->row_number & 1) || png_ptr->width < 2) if ((png_ptr->row_number & 1) || png_ptr->width < 2)
{ {
if (dsp_row != NULL) if (dsp_row != NULL)
png_combine_row(png_ptr, dsp_row, png_combine_row(png_ptr, dsp_row, 1/*display*/);
png_pass_dsp_mask[png_ptr->pass]);
png_read_finish_row(png_ptr); png_read_finish_row(png_ptr);
return; return;
@ -651,20 +640,20 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
png_ptr->transformations); png_ptr->transformations);
if (dsp_row != NULL) if (dsp_row != NULL)
png_combine_row(png_ptr, dsp_row, png_pass_dsp_mask[png_ptr->pass]); png_combine_row(png_ptr, dsp_row, 1/*display*/);
if (row != NULL) if (row != NULL)
png_combine_row(png_ptr, row, png_pass_mask[png_ptr->pass]); png_combine_row(png_ptr, row, 0/*row*/);
} }
else else
#endif #endif
{ {
if (row != NULL) if (row != NULL)
png_combine_row(png_ptr, row, 0xff); png_combine_row(png_ptr, row, -1/*ignored*/);
if (dsp_row != NULL) if (dsp_row != NULL)
png_combine_row(png_ptr, dsp_row, 0xff); png_combine_row(png_ptr, dsp_row, -1/*ignored*/);
} }
png_read_finish_row(png_ptr); png_read_finish_row(png_ptr);