From 8be28e3eb6821a39c90deb05f00305c3afa1ded6 Mon Sep 17 00:00:00 2001 From: John Bowler Date: Fri, 7 Oct 2011 18:36:37 -0500 Subject: [PATCH] [libpng15] Optimized png_combine_row() when rows are aligned This gains a small percentage for 16-bit and 32-bit pixels in the typical case where the output row buffers are appropriately aligned. The optimization was not previously possible because the png_struct buffer was always misaligned. --- ANNOUNCE | 4 ++++ CHANGES | 4 ++++ pngrutil.c | 27 ++++++++++++++++----------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/ANNOUNCE b/ANNOUNCE index 8871e17c4..04c2d59d5 100644 --- a/ANNOUNCE +++ b/ANNOUNCE @@ -64,6 +64,10 @@ Version 1.5.6beta05 [October 7, 2011] a bug in the code that attempted to align it; the code needs to subtract one from the pointer to take account of the filter byte prepended to each row. + Optimized png_combine_row() when rows are aligned This gains a small + percentage for 16-bit and 32-bit pixels in the typical case where the + output row buffers are appropriately aligned. The optimization was not + previously possible because the png_struct buffer was always misaligned. Send comments/corrections/commendations to png-mng-implement at lists.sf.net: (subscription required; visit diff --git a/CHANGES b/CHANGES index 066fb4300..549346a92 100644 --- a/CHANGES +++ b/CHANGES @@ -3625,6 +3625,10 @@ Version 1.5.6beta05 [October 7, 2011] a bug in the code that attempted to align it; the code needs to subtract one from the pointer to take account of the filter byte prepended to each row. + Optimized png_combine_row() when rows are aligned This gains a small + percentage for 16-bit and 32-bit pixels in the typical case where the + output row buffers are appropriately aligned. The optimization was not + previously possible because the png_struct buffer was always misaligned. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/pngrutil.c b/pngrutil.c index ee06ed62d..ba298860f 100644 --- a/pngrutil.c +++ b/pngrutil.c @@ -2931,12 +2931,13 @@ png_combine_row(png_structp png_ptr, png_bytep dp, int display) /* This is a common optimization for 2 and 4 byte pixels, for other * values rely on the toolchain memcpy being optimized. - * - * TBD: this should use png_isaligned, but currently something isn't - * aligned (NOTE: to be investigated in a really serious fashion.) */ - else if (pixel_depth == 2) + else if (pixel_depth == sizeof (png_uint_16) && + png_isaligned(sp, png_uint_16) && png_isaligned(dp, png_uint_16)) { + png_uint_16p dp16 = (png_uint_16p)dp; + png_uint_16p sp16 = (png_uint_16p)sp; + do { m >>= 1; @@ -2945,16 +2946,20 @@ png_combine_row(png_structp png_ptr, png_bytep dp, int display) m = mask; if (m & 1) - dp[0] = sp[0], dp[1] = sp[1]; + *dp16 = *sp16; - dp += 2; - sp += 2; + ++dp16; + ++sp16; } while (--row_width > 0); } - else if (pixel_depth == 4) /* as above, not optimal */ + else if (pixel_depth == sizeof (png_uint_32) && + png_isaligned(sp, png_uint_32) && png_isaligned(dp, png_uint_32)) { + png_uint_32p dp32 = (png_uint_32p)dp; + png_uint_32p sp32 = (png_uint_32p)sp; + do { m >>= 1; @@ -2963,10 +2968,10 @@ png_combine_row(png_structp png_ptr, png_bytep dp, int display) m = mask; if (m & 1) - dp[0] = sp[0], dp[1] = sp[1], dp[2] = sp[2], dp[3] = sp[3]; + *dp32 = *sp32; - dp += 4; - sp += 4; + ++dp32; + ++sp32; } while (--row_width > 0); }