[libpng15] Added support for ARM processor (Mans Rullgard)

This commit is contained in:
Mans Rullgard 2011-11-03 00:47:55 -05:00 committed by Glenn Randers-Pehrson
parent 2b7d730e27
commit d3a94802d8
8 changed files with 381 additions and 106 deletions

View File

@ -27,6 +27,7 @@ Other information:
Changes since the last public release (1.5.6):
Version 1.5.7 [November 3, 2011]
Added support for ARM processor (Mans Rullgard)
Send comments/corrections/commendations to png-mng-implement at lists.sf.net:
(subscription required; visit

View File

@ -3670,6 +3670,7 @@ Version 1.5.6 [November 3, 2011]
No changes.
Version 1.5.7 [November 3, 2011]
Added support for ARM processor (Mans Rullgard)
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit

220
arm/filter_neon.S Normal file
View File

@ -0,0 +1,220 @@
/* filter_neon.S - NEON optimised filter functions
*
* Copyright (c) 2011 Mans Rullgard
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#ifdef __ELF__
# define ELF
#else
# define ELF @
#endif
.arch armv7-a
.fpu neon
.macro func name, export=0
.macro endfunc
ELF .size \name, . - \name
.endfunc
.purgem endfunc
.endm
.text
.if \export
.global \name
.endif
ELF .type \name, STT_FUNC
.func \name
\name:
.endm
func png_read_filter_row_sub4_neon, export=1
ldr r3, [r0, #4] @ rowbytes
vmov.i8 d3, #0
1:
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
vadd.u8 d0, d3, d4
vadd.u8 d1, d0, d5
vadd.u8 d2, d1, d6
vadd.u8 d3, d2, d7
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
subs r3, r3, #16
bgt 1b
bx lr
endfunc
func png_read_filter_row_sub3_neon, export=1
ldr r3, [r0, #4] @ rowbytes
vmov.i8 d3, #0
mov r0, r1
mov r2, #3
mov r12, #12
vld1.8 {q11}, [r0], r12
1:
vext.8 d5, d22, d23, #3
vadd.u8 d0, d3, d22
vext.8 d6, d22, d23, #6
vadd.u8 d1, d0, d5
vext.8 d7, d23, d23, #1
vld1.8 {q11}, [r0], r12
vst1.32 {d0[0]}, [r1,:32], r2
vadd.u8 d2, d1, d6
vst1.32 {d1[0]}, [r1], r2
vadd.u8 d3, d2, d7
vst1.32 {d2[0]}, [r1], r2
vst1.32 {d3[0]}, [r1], r2
subs r3, r3, #12
bgt 1b
bx lr
endfunc
func png_read_filter_row_up_neon, export=1
ldr r3, [r0, #4] @ rowbytes
1:
vld1.8 {q0}, [r1,:128]
vld1.8 {q1}, [r2,:128]!
vadd.u8 q0, q0, q1
vst1.8 {q0}, [r1,:128]!
subs r3, r3, #16
bgt 1b
bx lr
endfunc
func png_read_filter_row_avg4_neon, export=1
ldr r12, [r0, #4] @ rowbytes
vmov.i8 d3, #0
1:
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]!
vhadd.u8 d0, d3, d16
vadd.u8 d0, d0, d4
vhadd.u8 d1, d0, d17
vadd.u8 d1, d1, d5
vhadd.u8 d2, d1, d18
vadd.u8 d2, d2, d6
vhadd.u8 d3, d2, d19
vadd.u8 d3, d3, d7
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
subs r12, r12, #16
bgt 1b
bx lr
endfunc
func png_read_filter_row_avg3_neon, export=1
push {r4,lr}
ldr r12, [r0, #4] @ rowbytes
vmov.i8 d3, #0
mov r0, r1
mov r4, #3
mov lr, #12
vld1.8 {q11}, [r0], lr
1:
vld1.8 {q10}, [r2], lr
vext.8 d5, d22, d23, #3
vhadd.u8 d0, d3, d20
vext.8 d17, d20, d21, #3
vadd.u8 d0, d0, d22
vext.8 d6, d22, d23, #6
vhadd.u8 d1, d0, d17
vext.8 d18, d20, d21, #6
vadd.u8 d1, d1, d5
vext.8 d7, d23, d23, #1
vld1.8 {q11}, [r0], lr
vst1.32 {d0[0]}, [r1,:32], r4
vhadd.u8 d2, d1, d18
vst1.32 {d1[0]}, [r1], r4
vext.8 d19, d21, d21, #1
vadd.u8 d2, d2, d6
vhadd.u8 d3, d2, d19
vst1.32 {d2[0]}, [r1], r4
vadd.u8 d3, d3, d7
vst1.32 {d3[0]}, [r1], r4
subs r12, r12, #12
bgt 1b
pop {r4,pc}
endfunc
.macro paeth rx, ra, rb, rc
vaddl.u8 q12, \ra, \rb @ a + b
vaddl.u8 q15, \rc, \rc @ 2*c
vabdl.u8 q13, \rb, \rc @ pa
vabdl.u8 q14, \ra, \rc @ pb
vabd.u16 q15, q12, q15 @ pc
vcle.u16 q12, q13, q14 @ pa <= pb
vcle.u16 q13, q13, q15 @ pa <= pc
vcle.u16 q14, q14, q15 @ pb <= pc
vand q12, q12, q13 @ pa <= pb && pa <= pc
vmovn.u16 d28, q14
vmovn.u16 \rx, q12
vbsl d28, \rb, \rc
vbsl \rx, \ra, d28
.endm
func png_read_filter_row_paeth4_neon, export=1
ldr r12, [r0, #4] @ rowbytes
vmov.i8 d3, #0
vmov.i8 d20, #0
1:
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]!
paeth d0, d3, d16, d20
vadd.u8 d0, d0, d4
paeth d1, d0, d17, d16
vadd.u8 d1, d1, d5
paeth d2, d1, d18, d17
vadd.u8 d2, d2, d6
paeth d3, d2, d19, d18
vmov d20, d19
vadd.u8 d3, d3, d7
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
subs r12, r12, #16
bgt 1b
bx lr
endfunc
func png_read_filter_row_paeth3_neon, export=1
push {r4,lr}
ldr r12, [r0, #4] @ rowbytes
vmov.i8 d3, #0
vmov.i8 d4, #0
mov r0, r1
mov r4, #3
mov lr, #12
vld1.8 {q11}, [r0], lr
1:
vld1.8 {q10}, [r2], lr
paeth d0, d3, d20, d4
vext.8 d5, d22, d23, #3
vadd.u8 d0, d0, d22
vext.8 d17, d20, d21, #3
paeth d1, d0, d17, d20
vst1.32 {d0[0]}, [r1,:32], r4
vext.8 d6, d22, d23, #6
vadd.u8 d1, d1, d5
vext.8 d18, d20, d21, #6
paeth d2, d1, d18, d17
vext.8 d7, d23, d23, #1
vld1.8 {q11}, [r0], lr
vst1.32 {d1[0]}, [r1], r4
vadd.u8 d2, d2, d6
vext.8 d19, d21, d21, #1
paeth d3, d2, d19, d18
vst1.32 {d2[0]}, [r1], r4
vmov d4, d19
vadd.u8 d3, d3, d7
vst1.32 {d3[0]}, [r1], r4
subs r12, r12, #12
bgt 1b
pop {r4,pc}
endfunc

View File

@ -985,7 +985,7 @@ png_push_process_row(png_structp png_ptr)
if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE)
{
if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST)
png_read_filter_row(&row_info, png_ptr->row_buf + 1,
png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
png_ptr->prev_row + 1, png_ptr->row_buf[0]);
else
png_error(png_ptr, "bad adaptive filter value");

View File

@ -922,9 +922,24 @@ PNG_EXTERN void png_do_write_interlace PNGARG((png_row_infop row_info,
/* Unfilter a row: check the filter value before calling this, there is no point
* calling it for PNG_FILTER_VALUE_NONE.
*/
PNG_EXTERN void png_read_filter_row PNGARG((png_row_infop row_info,
PNG_EXTERN void png_read_filter_row PNGARG((png_structp pp, png_row_infop row_info,
png_bytep row, png_const_bytep prev_row, int filter));
PNG_EXTERN void png_read_filter_row_up_neon PNGARG((png_row_infop row_info,
png_bytep row, png_const_bytep prev_row));
PNG_EXTERN void png_read_filter_row_sub3_neon PNGARG((png_row_infop row_info,
png_bytep row, png_const_bytep prev_row));
PNG_EXTERN void png_read_filter_row_sub4_neon PNGARG((png_row_infop row_info,
png_bytep row, png_const_bytep prev_row));
PNG_EXTERN void png_read_filter_row_avg3_neon PNGARG((png_row_infop row_info,
png_bytep row, png_const_bytep prev_row));
PNG_EXTERN void png_read_filter_row_avg4_neon PNGARG((png_row_infop row_info,
png_bytep row, png_const_bytep prev_row));
PNG_EXTERN void png_read_filter_row_paeth3_neon PNGARG((png_row_infop row_info,
png_bytep row, png_const_bytep prev_row));
PNG_EXTERN void png_read_filter_row_paeth4_neon PNGARG((png_row_infop row_info,
png_bytep row, png_const_bytep prev_row));
/* Choose the best filter to use and filter the row data */
PNG_EXTERN void png_write_find_filter PNGARG((png_structp png_ptr,
png_row_infop row_info));

View File

@ -578,7 +578,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
if (png_ptr->row_buf[0] > PNG_FILTER_VALUE_NONE)
{
if (png_ptr->row_buf[0] < PNG_FILTER_VALUE_LAST)
png_read_filter_row(&row_info, png_ptr->row_buf + 1,
png_read_filter_row(png_ptr, &row_info, png_ptr->row_buf + 1,
png_ptr->prev_row + 1, png_ptr->row_buf[0]);
else
png_error(png_ptr, "bad adaptive filter value");

View File

@ -3498,19 +3498,9 @@ png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,
}
#endif /* PNG_READ_INTERLACING_SUPPORTED */
/* 1.5.6: Changed to just take a png_row_info (not png_ptr) and to ignore bad
* adaptive filter bytes.
*/
void /* PRIVATE */
png_read_filter_row(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row, int filter)
{
switch (filter)
{
case PNG_FILTER_VALUE_NONE:
break;
case PNG_FILTER_VALUE_SUB:
static void
png_read_filter_row_sub(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_size_t istop = row_info->rowbytes;
@ -3518,14 +3508,18 @@ png_read_filter_row(png_row_infop row_info, png_bytep row,
png_bytep rp = row + bpp;
png_bytep lp = row;
PNG_UNUSED(prev_row)
for (i = bpp; i < istop; i++)
{
*rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff);
rp++;
}
break;
}
case PNG_FILTER_VALUE_UP:
static void
png_read_filter_row_up(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_size_t istop = row_info->rowbytes;
@ -3537,9 +3531,11 @@ png_read_filter_row(png_row_infop row_info, png_bytep row,
*rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
rp++;
}
break;
}
case PNG_FILTER_VALUE_AVG:
static void
png_read_filter_row_avg(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_bytep rp = row;
@ -3563,9 +3559,11 @@ png_read_filter_row(png_row_infop row_info, png_bytep row,
rp++;
}
break;
}
case PNG_FILTER_VALUE_PAETH:
static void
png_read_filter_row_paeth(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
png_bytep rp = row;
@ -3618,13 +3616,50 @@ png_read_filter_row(png_row_infop row_info, png_bytep row,
*rp = (png_byte)(((int)(*rp) + p) & 0xff);
rp++;
}
break;
}
default:
/* NOT REACHED */
break;
#ifdef PNG_ARM_NEON
static void
png_init_filter_functions_neon(png_structp pp)
{
unsigned int bpp = (pp->pixel_depth + 7) >> 3;
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
if (bpp == 3) {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_neon;
} else if (bpp == 4) {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_neon;
}
}
#endif
static void
png_init_filter_functions(png_structp pp)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub;
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth;
#ifdef PNG_ARM_NEON
png_init_filter_functions_neon(pp);
#endif
}
void /* PRIVATE */
png_read_filter_row(png_structp pp, png_row_infop row_info, png_bytep row,
png_const_bytep prev_row, int filter)
{
if (pp->read_filter[0] == NULL)
png_init_filter_functions(pp);
if (filter > PNG_FILTER_VALUE_NONE && filter < PNG_FILTER_VALUE_LAST)
pp->read_filter[filter-1](row_info, row, prev_row);
}
#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
void /* PRIVATE */

View File

@ -353,5 +353,8 @@ struct png_struct_def
/* New member added in libpng-1.5.6 */
png_bytep big_prev_row;
void (*read_filter[PNG_FILTER_VALUE_LAST-1])(png_row_infop row_info,
png_bytep row, png_const_bytep prev_row);
};
#endif /* PNGSTRUCT_H */