diff --git a/png.h b/png.h index 111a788b5..42694385a 100644 --- a/png.h +++ b/png.h @@ -1,7 +1,7 @@ /* png.h - header file for PNG reference library * - * libpng version 1.5.6beta05 - October 7, 2011 + * libpng version 1.5.6beta05 - October 11, 2011 * Copyright (c) 1998-2011 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) @@ -11,7 +11,7 @@ * Authors and maintainers: * libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat * libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger - * libpng versions 0.97, January 1998, through 1.5.6beta05 - October 7, 2011: Glenn + * libpng versions 0.97, January 1998, through 1.5.6beta05 - October 11, 2011: Glenn * See also "Contributing Authors", below. * * Note about libpng version numbers: @@ -192,7 +192,7 @@ * * This code is released under the libpng license. * - * libpng versions 1.2.6, August 15, 2004, through 1.5.6beta05, October 7, 2011, are + * libpng versions 1.2.6, August 15, 2004, through 1.5.6beta05, October 11, 2011, are * Copyright (c) 2004, 2006-2011 Glenn Randers-Pehrson, and are * distributed according to the same disclaimer and license as libpng-1.2.5 * with the following individual added to the list of Contributing Authors: @@ -304,7 +304,7 @@ * Y2K compliance in libpng: * ========================= * - * October 7, 2011 + * October 11, 2011 * * Since the PNG Development group is an ad-hoc body, we can't make * an official declaration. @@ -367,7 +367,7 @@ /* Version information for png.h - this should match the version in png.c */ #define PNG_LIBPNG_VER_STRING "1.5.6beta05" #define PNG_HEADER_VERSION_STRING \ - " libpng version 1.5.6beta05 - October 7, 2011\n" + " libpng version 1.5.6beta05 - October 11, 2011\n" #define PNG_LIBPNG_VER_SONUM 15 #define PNG_LIBPNG_VER_DLLNUM 15 @@ -2462,8 +2462,16 @@ PNG_EXPORT(216, png_uint_32, png_get_io_chunk_type, * full, image which appears in a given pass. 'pass' is in the range 0 * to 6 and the result is in the range 0 to 7. */ -#define PNG_PASS_START_ROW(pass) (((1U&~(pass))<<(3-((pass)>>1)))&7) -#define PNG_PASS_START_COL(pass) (((1U& (pass))<<(3-(((pass)+1)>>1)))&7) +#define PNG_PASS_START_ROW(pass) (((1&~(pass))<<(3-((pass)>>1)))&7) +#define PNG_PASS_START_COL(pass) (((1& (pass))<<(3-(((pass)+1)>>1)))&7) + +/* A macro to return the offset between pixels in the output row for a pair of + * pixels in the input - effectively the inverse of the 'COL_SHIFT' macro that + * follows. Note that ROW_OFFSET is the offset from one row to the next whereas + * COL_OFFSET is from one column to the next, within a row. + */ +#define PNG_PASS_ROW_OFFSET(pass) ((pass)>2?(8>>(((pass)-1)>>1)):8) +#define PNG_PASS_COL_OFFSET(pass) (1<<((7-(pass))>>1)) /* Two macros to help evaluate the number of rows or columns in each * pass. This is expressed as a shift - effectively log2 of the number or @@ -2498,8 +2506,8 @@ PNG_EXPORT(216, png_uint_32, png_get_io_chunk_type, * the tile. */ #define PNG_PASS_MASK(pass,off) ( \ - ((0x110145AFU>>(((7-(off))-(pass))<<2)) & 0xFU) | \ - ((0x01145AF0U>>(((7-(off))-(pass))<<2)) & 0xF0U)) + ((0x110145AF>>(((7-(off))-(pass))<<2)) & 0xF) | \ + ((0x01145AF0>>(((7-(off))-(pass))<<2)) & 0xF0)) #define PNG_ROW_IN_INTERLACE_PASS(y, pass) \ ((PNG_PASS_MASK(pass,0) >> ((y)&7)) & 1) diff --git a/pngpriv.h b/pngpriv.h index dda61fdb8..0f06d11cd 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -325,17 +325,47 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp; # endif #endif -/* These macros may need to be architecture dependent, they take a pointer and - * an alignment requirement. - */ +/* These macros may need to be architecture dependent. */ +#define PNG_ALIGN_NONE 0 /* do not use data alignment */ +#define PNG_ALIGN_ALWAYS 1 /* assume unaligned accesses are OK */ #ifdef offsetof -# define png_alignof(type) offsetof(struct{char c; type t;}, t) +# define PNG_ALIGN_OFFSET 2 /* use offsetof to determine alignment */ +#else +# define PNG_ALIGN_OFFSET -1 /* prevent the use of this */ +#endif +#define PNG_ALIGN_SIZE 3 /* use sizeof to determine alignment */ + +#ifndef PNG_ALIGN_TYPE + /* Default to using aligned access optimizations and requiring alignment to a + * multiple of the data type size. Override in a compiler specific fashion + * if necessary by inserting tests here: + */ +# define PNG_ALIGN_TYPE PNG_ALIGN_SIZE +#endif + +#if PNG_ALIGN_TYPE == PNG_ALIGN_SIZE + /* This is used because in some compiler implementations non-aligned + * structure members are supported, so the offsetof approach below fails. + * Set PNG_ALIGN_TO_SIZE=0 for compiler combinations where unaligned access + * is good for performance. Do not do this unless you have tested the result + * and understand it. + */ +# define png_alignof(type) (sizeof (type)) +#else +# if PNG_ALIGN_TYPE == PNG_ALIGN_OFFSET +# define png_alignof(type) offsetof(struct{char c; type t;}, t) +# else +# if PNG_ALIGN_TYPE == PNG_ALIGN_ALWAYS +# define png_alignof(type) (1) +# endif + /* Else leave png_alignof undefined to prevent use thereof */ +# endif #endif /* This implicitly assumes alignment is always to a power of 2. */ #ifdef png_alignof # define png_isaligned(ptr, type)\ - ((((char*)ptr-(char*)0) & (png_alignof(type)-1)) == 0) + ((((const char*)ptr-(const char*)0) & (png_alignof(type)-1)) == 0) #else # define png_isaligned(ptr, type) 0 #endif @@ -859,7 +889,14 @@ PNG_EXTERN void png_write_start_row PNGARG((png_structp png_ptr)); * This function is only ever used to write to row buffers provided by the * caller of the relevant libpng API and the row must have already been * transformed by the read transformations. + * + * The PNG_USE_COMPILE_TIME_MASKS option causes generation of pre-computed + * bitmasks for use within the code, otherwise runtime generated masks are used. + * The default is compile time masks. */ +#ifndef PNG_USE_COMPILE_TIME_MASKS +# define PNG_USE_COMPILE_TIME_MASKS 1 +#endif PNG_EXTERN void png_combine_row PNGARG((png_structp png_ptr, png_bytep row, int display)); diff --git a/pngrutil.c b/pngrutil.c index ba298860f..e5feb18da 100644 --- a/pngrutil.c +++ b/pngrutil.c @@ -2782,9 +2782,10 @@ png_check_chunk_name(png_structp png_ptr, png_uint_32 chunk_name) void /* PRIVATE */ png_combine_row(png_structp png_ptr, png_bytep dp, int display) { - int pixel_depth = png_ptr->transformed_pixel_depth; - png_bytep sp = png_ptr->row_buf + 1; + unsigned int pixel_depth = png_ptr->transformed_pixel_depth; + png_const_bytep sp = png_ptr->row_buf + 1; png_uint_32 row_width = png_ptr->width; + unsigned int pass = png_ptr->pass; png_debug(1, "in png_combine_row"); @@ -2812,197 +2813,391 @@ png_combine_row(png_structp png_ptr, png_bytep dp, int display) */ #ifdef PNG_READ_INTERLACING_SUPPORTED if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE) && - png_ptr->pass < 6 && (display == 0 || display == 1)) + pass < 6 && (display == 0 || + /* The following copies everything for 'display' on passes 0, 2 and 4. */ + (display == 1 && (pass & 1) != 0))) { - /* These are reversed from the values used prior to libpng 1.5.6 to allow - * testing against '1' rather than 0x80 + /* Narrow images may have no bits in a pass; the caller should handle + * this, but this test is cheap: */ - static PNG_CONST png_byte png_pass_mask[2][6] = { - {0x01, 0x10, 0x11, 0x44, 0x55, 0xaa /*, 0xff*/}, /* regular */ - {0xff, 0xf0, 0xff, 0xcc, 0xff, 0xaa /*, 0xff*/}};/* display */ - unsigned int mask = png_pass_mask[display][png_ptr->pass] + 0x100; + if (row_width <= PNG_PASS_START_COL(pass)) + return; - if (mask != 0x1ff) + if (pixel_depth < 8) { - if (pixel_depth < 8) - { - /* Must write partial bytes, the 'shift' here is to the left, but - * the PNG bits go to the right, i.e. start at the most significant - * bit. - */ - unsigned int shift; - unsigned int inc = (unsigned int)pixel_depth; - unsigned int m = mask << 1; - unsigned int pixel_mask = (1 << pixel_depth) - 1; + /* For pixel depths up to 4bpp the 8-pixel mask can be expanded to fit + * into 32 bits, then a single loop over the bytes using the four byte + * values in the 32 bit mask can be used. For the 'display' option the + * expanded mask may also not require any masking within a byte. To + * make this work the PACKSWAP option must be taken into account - it + * simply requires the pixels to be reversed in each byte. + * + * The 'regular' case requires a mask for each of the first 6 passes, + * the 'display' case does a copy for the even passes in the range + * 0..6. This has already been handled in the tst above. + * + * The masks are arranged as four bytes with the first byte to use in + * the lowest bits (little-endian) regardless of the order (PACKSWAP or + * not) of the pixels in each byte. + * + * NOTE: the whole of this logic depends on the caller of this function + * only calling it on rows appropriate to the pass. This function only + * understands the 'x' logic, the 'y' logic is handled by the caller. + * + * The following defines allow generation of compile time constant bit + * masks for each pixel depth and each possibility of swapped or not + * swapped bytes. Pass is in the range 0..6, 'x', a pixel index, is in + * the range 0..7, the result is 1 if the pixel is to be copied in the + * pass, 0 if not. 'S' is for the sparkle method, 'B' for the block + * method. + */ +# define S_COPY(p,x) (((p)<4 ? 0x80088822 >> ((3-(p))*8+(7-(x))) :\ + 0xaa55ff00 >> ((7-(p))*8+(7-(x)))) & 1) +# define B_COPY(p,x) (((p)<4 ? 0xff0fff33 >> ((3-(p))*8+(7-(x))) :\ + 0xff55ff00 >> ((7-(p))*8+(7-(x)))) & 1) -# ifdef PNG_READ_PACKSWAP_SUPPORTED - if (png_ptr->transformations & PNG_PACKSWAP) - { - /* The bytes have been swapped; start at the other end and - * move in the opposite direction. - */ - shift = 0; - /* inc is already correct */ - } - else -# endif + /* Return a mask for pass 'p' pixel 'x' at depth 'd'. The mask is + * little endian - the first pixel is at bit 0 - however the extra + * parameter 's' can be set to cause the mask position to be swapped + * within each byte, to match the PNG format. This is done by XOR of + * the shift with 7, 6 or 4 for bit depths 1, 2 and 4. + */ +# define PIXEL_MASK(p,x,d,s) (((1U<<(d))-1)<<(((x)*(d))^((s)?8-(d):0))) - /* Bits not swapped: normal case */ - { - shift = 8 - inc; - inc = -inc; /* but note, unsigned */ - } + /* Hence generate the appropriate 'block' or 'sparkle' pixel copy mask. + */ +# define S_MASKx(p,x,d,s) (S_COPY(p,x)?PIXEL_MASK(p,x,d,s):0) +# define B_MASKx(p,x,d,s) (B_COPY(p,x)?PIXEL_MASK(p,x,d,s):0) - for (;;) - { - m >>= 1; + /* Combine 8 of these to get the full mask. For the 1 and 2 bpp cases + * the result needs replicating, for the 4bpp case the above generates + * a full 32 bits. + */ +# define MASK_EXPAND(m,d) ((m)*((d)==1?0x01010101:((d)==2?0x00010001:1))) - if (m == 1) - m = mask; +# define S_MASK(p,d,s) MASK_EXPAND(S_MASKx(p,0,d,s) + S_MASKx(p,1,d,s) +\ + S_MASKx(p,2,d,s) + S_MASKx(p,3,d,s) + S_MASKx(p,4,d,s) +\ + S_MASKx(p,5,d,s) + S_MASKx(p,6,d,s) + S_MASKx(p,7,d,s), d) - if (m & 1) - { - /* Find the bits to select and copy those over: */ - unsigned int bit_mask = pixel_mask << shift; - *dp = (png_byte)((*dp & ~bit_mask) | (*sp & bit_mask)); - } +# define B_MASK(p,d,s) MASK_EXPAND(B_MASKx(p,0,d,s) + B_MASKx(p,1,d,s) +\ + B_MASKx(p,2,d,s) + B_MASKx(p,3,d,s) + B_MASKx(p,4,d,s) +\ + B_MASKx(p,5,d,s) + B_MASKx(p,6,d,s) + B_MASKx(p,7,d,s), d) - if (--row_width == 0) - break; +#if PNG_USE_COMPILE_TIME_MASKS + /* Utility macros to construct all the masks for a depth/swap + * combination. The 's' parameter says whether the format is PNG + * (big endian bytes) or not. Only the three odd numbered passes are + * required for the display/block algorithm. + */ +# define S_MASKS(d,s) { S_MASK(0,d,s), S_MASK(1,d,s), S_MASK(2,d,s),\ + S_MASK(3,d,s), S_MASK(4,d,s), S_MASK(5,d,s) } - /* And move to the next set of bits, checking for the end of this - * byte. - */ - shift += inc; - if (shift > 7) /* because it is unsigned */ - { - ++sp; - ++dp; - } - shift &= 7; - } - } +# define B_MASKS(d,s) { B_MASK(1,d,s), S_MASK(3,d,s), S_MASK(5,d,s) } - else /* pixel_depth >= 8 */ - { - unsigned int m; +# define DEPTH_INDEX(d) ((d)==1?0:((d)==2?1:2)) - pixel_depth >>= 3; /* now in bytes */ - m = mask << 1; + /* Hence the pre-compiled masks indexed by PACKSWAP (or not), depth and + * then pass: + */ + static PNG_CONST png_uint_32 row_mask[2/*PACKSWAP*/][3/*depth*/][6] = { + /* Little-endian byte masks for PACKSWAP */ + { S_MASKS(1,0), S_MASKS(2,0), S_MASKS(4,0) }, + /* Normal (big-endian byte) masks - PNG format */ + { S_MASKS(1,1), S_MASKS(2,1), S_MASKS(4,1) } + }; - /* This is here to give the compiler some help in the common cases - * where there are very few bytes. - */ - if (pixel_depth == 1) - { - do - { - m >>= 1; + /* display_mask has only three entries for the odd passes, so index by + * pass>>1. + */ + static PNG_CONST png_uint_32 display_mask[2][3][3] = { + /* Little-endian byte masks for PACKSWAP */ + { B_MASKS(1,0), B_MASKS(2,0), B_MASKS(4,0) }, + /* Normal (big-endian byte) masks - PNG format */ + { B_MASKS(1,1), B_MASKS(2,1), B_MASKS(4,1) } + }; - if (m == 1) - m = mask; +# define MASK(pass,depth,display,png)\ + ((display)?display_mask[png][DEPTH_INDEX(depth)][pass>>1]:\ + row_mask[png][DEPTH_INDEX(depth)][pass]) - if (m & 1) - *dp = *sp; +#else /* !PNG_USE_COMPILE_TIME_MASKS */ + /* This is the runtime alternative: it seems unlikely that this will + * ever be either smaller or faster than the compile time approach. + */ +# define MASK(pass,depth,display,png)\ + ((display)?B_MASK(pass,depth,png):S_MASK(pass,depth,png)) +#endif /* !PNG_USE_COMPILE_TIME_MASKS */ - ++dp; - ++sp; - } - while (--row_width > 0); - } + /* Use the appropriate mask to copy the required bits. In some cases + * the byte mask will be 0 or 0xff, optimize these cases. row_width is + * the number of pixels, but the code copies bytes, so it is necessary + * to special case the end. + */ + png_uint_32 pixels_per_byte = 8 / pixel_depth; + png_uint_32 mask; - else if (pixel_depth == 3) - { - do - { - m >>= 1; - - if (m == 1) - m = mask; - - if (m & 1) - dp[0] = sp[0], dp[1] = sp[1], dp[2] = sp[2]; - - dp += 3; - sp += 3; - } - while (--row_width > 0); - } - - /* This is a common optimization for 2 and 4 byte pixels, for other - * values rely on the toolchain memcpy being optimized. - */ - else if (pixel_depth == sizeof (png_uint_16) && - png_isaligned(sp, png_uint_16) && png_isaligned(dp, png_uint_16)) - { - png_uint_16p dp16 = (png_uint_16p)dp; - png_uint_16p sp16 = (png_uint_16p)sp; - - do - { - m >>= 1; - - if (m == 1) - m = mask; - - if (m & 1) - *dp16 = *sp16; - - ++dp16; - ++sp16; - } - while (--row_width > 0); - } - - else if (pixel_depth == sizeof (png_uint_32) && - png_isaligned(sp, png_uint_32) && png_isaligned(dp, png_uint_32)) - { - png_uint_32p dp32 = (png_uint_32p)dp; - png_uint_32p sp32 = (png_uint_32p)sp; - - do - { - m >>= 1; - - if (m == 1) - m = mask; - - if (m & 1) - *dp32 = *sp32; - - ++dp32; - ++sp32; - } - while (--row_width > 0); - } +# ifdef PNG_READ_PACKSWAP_SUPPORTED + if (png_ptr->transformations & PNG_PACKSWAP) + mask = MASK(pass, pixel_depth, display, 0); else +# endif + mask = MASK(pass, pixel_depth, display, 1); + + for (;;) + { + png_uint_32 m; + + /* It doesn't matter in the following if png_uint_32 has more than + * 32 bits because the high bits always match those in m<<24, it is, + * however, essential to use OR here, not +, because of this. + */ + m = mask; + mask = (m >> 8) | (m << 24); /* rotate right to good compilers */ + m &= 0xff; + + if (m != 0) /* something to copy */ { - do - { - m >>= 1; - - if (m == 1) - m = mask; - - if (m & 1) - png_memcpy(dp, sp, pixel_depth); - - sp += pixel_depth; - dp += pixel_depth; - } - while (--row_width > 0); + if (m != 0xff) + *dp = (png_byte)((*dp & ~m) | (*sp & m)); + else + *dp = *sp; } + + /* NOTE: this may overwrite the last byte with garbage if the image + * is not an exact number of bytes wide, libpng has always done + * this. + */ + if (row_width <= pixels_per_byte) + return; + + row_width -= pixels_per_byte; + ++dp; + ++sp; + } + } + + else /* pixel_depth >= 8 */ + { + unsigned int bytes_to_copy, bytes_to_jump; + + /* Validate the depth - it must be a multiple of 8 */ + if (pixel_depth & 7) + png_error(png_ptr, "invalid user transform pixel depth"); + + pixel_depth >>= 3; /* now in bytes */ + row_width *= pixel_depth; + + /* Regardless of pass number the Adam 7 interlace always results in a + * fixed number of pixels to copy then to skip. There may be a + * different number of pixels to skip at the start though. + */ + { + unsigned int offset = PNG_PASS_START_COL(pass) * pixel_depth; + + row_width -= offset; + dp += offset; + sp += offset; } - return; - } - /* else mask is 0xff */ + /* Work out the bytes to copy. */ + if (display) + { + /* When doing the 'block' algorithm the pixel in the pass gets + * replicated to adjacent pixels. This is why the even (0,2,4,6) + * passes are skipped above - the entire expanded row is copied. + */ + bytes_to_copy = (1<<((6-pass)>>1)) * pixel_depth; + + /* But don't allow this number to exceed the actual row width. */ + if (bytes_to_copy > row_width) + bytes_to_copy = row_width; + } + + else /* normal row; Adam7 only ever gives us one pixel to copy. */ + bytes_to_copy = pixel_depth; + + /* In Adam7 there is a constant offset between where the pixels go. */ + bytes_to_jump = PNG_PASS_COL_OFFSET(pass) * pixel_depth; + + /* And simply copy these bytes. Some optimization is possible here, + * depending on the value of 'bytes_to_copy'. Speical case the low + * byte counts, which we know to be frequent. + */ + switch (bytes_to_copy) + { + case 1: + for (;;) + { + *dp = *sp; + + if (row_width <= bytes_to_jump) + return; + + dp += bytes_to_jump; + sp += bytes_to_jump; + row_width -= bytes_to_jump; + } + + case 2: + /* There is a possibility of a partial copy at the end here, this + * slows the code down somewhat. + */ + do + { + dp[0] = sp[0], dp[1] = sp[1]; + + if (row_width <= bytes_to_jump) + return; + + sp += bytes_to_jump; + dp += bytes_to_jump; + row_width -= bytes_to_jump; + } + while (row_width > 1); + + /* And there can only be one byte left at this point: */ + *dp = *sp; + return; + + case 3: + /* This can only be the RGB case, so each copy is exactly one + * pixel and it is not necessary to check for a partial copy. + */ + for(;;) + { + dp[0] = sp[0], dp[1] = sp[1], dp[2] = sp[2]; + + if (row_width <= bytes_to_jump) + return; + + sp += bytes_to_jump; + dp += bytes_to_jump; + row_width -= bytes_to_jump; + } + + default: +#if PNG_ALIGN_TYPE != PNG_ALIGN_NONE + /* Check for double byte alignment and, if possible, use a 16 + * bit copy. Don't attempt this for narrow images - ones that + * are less than an interlace panel wide. Don't attempt it for + * wide bytes-to-copy either - use the memcpy there. + */ + if (bytes_to_copy < 16 /*else use memcpy*/ && + png_isaligned(dp, png_uint_16) && + png_isaligned(sp, png_uint_16) && + bytes_to_copy % sizeof (png_uint_16) == 0 && + bytes_to_jump % sizeof (png_uint_16) == 0) + { + /* Everything is aligned for png_uint_16 copies, but try for + * png_uint_32 first. + */ + if (png_isaligned(dp, png_uint_32) && + png_isaligned(sp, png_uint_32) && + bytes_to_copy % sizeof (png_uint_32) == 0 && + bytes_to_jump % sizeof (png_uint_32) == 0) + { + png_uint_32p dp32 = (png_uint_32p)dp; + png_const_uint_32p sp32 = (png_const_uint_32p)sp; + unsigned int skip = (bytes_to_jump-bytes_to_copy) / + sizeof (png_uint_32); + + do + { + size_t c = bytes_to_copy; + do + { + *dp32++ = *sp32++; + c -= sizeof (png_uint_32); + } + while (c > 0); + + if (row_width <= bytes_to_jump) + return; + + dp32 += skip; + sp32 += skip; + row_width -= bytes_to_jump; + } + while (bytes_to_copy <= row_width); + + /* Get to here when the row_width truncates the final copy. + * There will be 1-3 bytes left to copy, so don't try the + * 16bit loop below. + */ + dp = (png_bytep)dp32; + sp = (png_const_bytep)sp32; + do + *dp++ = *sp++; + while (--row_width > 0); + return; + } + + /* Else do it in 16 bit quantities, but only if the size is + * not too large. + */ + else + { + png_uint_16p dp16 = (png_uint_16p)dp; + png_const_uint_16p sp16 = (png_const_uint_16p)sp; + unsigned int skip = (bytes_to_jump-bytes_to_copy) / + sizeof (png_uint_16); + + do + { + size_t c = bytes_to_copy; + do + { + *dp16++ = *sp16++; + c -= sizeof (png_uint_16); + } + while (c > 0); + + if (row_width <= bytes_to_jump) + return; + + dp16 += skip; + sp16 += skip; + row_width -= bytes_to_jump; + } + while (bytes_to_copy <= row_width); + + /* End of row - 1 byte left, bytes_to_copy>row_width: */ + dp = (png_bytep)dp16; + sp = (png_const_bytep)sp16; + do + *dp++ = *sp++; + while (--row_width > 0); + return; + } + } +#endif /* PNG_ALIGN_ code */ + + /* The true default - use a memcpy: */ + for (;;) + { + png_memcpy(dp, sp, bytes_to_copy); + + if (row_width <= bytes_to_jump) + return; + + sp += bytes_to_jump; + dp += bytes_to_jump; + row_width -= bytes_to_jump; + if (bytes_to_copy > row_width) + bytes_to_copy = row_width; + } + } + } /* pixel_depth >= 8 */ + + /* NOT REACHED*/ } + else #endif /* If here then the switch above wasn't used so just memcpy the whole row - * from the temporary row buffer: + * from the temporary row buffer (notice that this overwrites the end of the + * destination row if it is a partial byte.) */ png_memcpy(dp, sp, PNG_ROWBYTES(pixel_depth, row_width)); } diff --git a/pngvalid.c b/pngvalid.c index fe798c8d3..cad41253c 100644 --- a/pngvalid.c +++ b/pngvalid.c @@ -460,7 +460,13 @@ pixel_cmp(png_const_bytep pa, png_const_bytep pb, png_uint_32 bit_width) if (p == 0) return 0; } - return 1; /* Different */ + /* Return the index of the changed byte. */ + { + png_uint_32 where = 0; + + while (pa[where] == pb[where]) ++where; + return 1+where; + } } /*************************** BASIC PNG FILE WRITING ***************************/ @@ -4414,6 +4420,7 @@ static void standard_row_validate(standard_display *dp, png_structp pp, int iImage, int iDisplay, png_uint_32 y) { + int where; png_byte std[STANDARD_ROWMAX]; memset(std, 0xff, sizeof std); @@ -4430,11 +4437,12 @@ standard_row_validate(standard_display *dp, png_structp pp, * row bytes are always trashed, so we always do a pixel_cmp here even though * a memcmp of all cbRow bytes will succeed for the sequential reader. */ - if (iImage >= 0 && pixel_cmp(std, store_image_row(dp->ps, pp, iImage, y), - dp->bit_width) != 0) + if (iImage >= 0 && + (where = pixel_cmp(std, store_image_row(dp->ps, pp, iImage, y), + dp->bit_width)) != 0) { char msg[64]; - sprintf(msg, "PNG image row %d changed", y); + sprintf(msg, "PNG image row %d changed at byte %d", y, where-1); png_error(pp, msg); } @@ -4442,11 +4450,12 @@ standard_row_validate(standard_display *dp, png_structp pp, * byte at the end of the row if the row is not an exact multiple * of 8 bits wide. */ - if (iDisplay >= 0 && pixel_cmp(std, store_image_row(dp->ps, pp, iDisplay, y), - dp->bit_width) != 0) + if (iDisplay >= 0 && + (where = pixel_cmp(std, store_image_row(dp->ps, pp, iDisplay, y), + dp->bit_width)) != 0) { char msg[64]; - sprintf(msg, "display row %d changed", y); + sprintf(msg, "display row %d changed at byte %d", y, where-1); png_error(pp, msg); } }