Fixed filter_paeth for PowerPC VSX

Now all tests are fine for ppc64le
This commit is contained in:
Vadim Barkov 2017-02-09 18:57:53 +00:00
parent d4bdca45b3
commit 99d7285f20

View File

@ -448,6 +448,32 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
*rp++ = (png_byte)a;\
}
#define VEC_CHAR_ZERO (vector unsigned char){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
#ifdef __LITTLE_ENDIAN__
# define VEC_CHAR_TO_SHORT1_4 (vector unsigned char){ 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT2_4 (vector unsigned char){ 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT3_4 (vector unsigned char){12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16}
# define VEC_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6}
# define VEC_CHAR_TO_SHORT1_3 (vector unsigned char){ 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT2_3 (vector unsigned char){ 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT3_3 (vector unsigned char){ 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT4_3 (vector unsigned char){12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16}
# define VEC_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16}
#endif
#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VEC_CHAR_ZERO,VEC_CHAR_TO_SHORT##offset##_##bpp)
#define vsx_short_to_char(vec,offset,bpp) vec_perm((vector unsigned char)(vec),VEC_CHAR_ZERO,VEC_SHORT_TO_CHAR##offset##_##bpp)
void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
@ -456,10 +482,9 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
int a, b, c, pa, pb, pc, p;
vector unsigned char rp_vec;
vector unsigned char pp_vec;
vector unsigned char a_vec,b_vec,c_vec,nearest_vec;
vector signed char pa_vec,pb_vec,pc_vec;
vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec;
vector unsigned char zero_vec = {0};
vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
declare_common_vars(row_info,row,prev_row,bpp)
rp -= bpp;
@ -493,56 +518,68 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
rp_vec = vec_ld(0,rp);
vec_ld_unaligned(pp_vec,pp);
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_4);
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4);
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_4);
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_4),1,4);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4),1,4);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_4),1,4);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec);
pa_vec = vec_abs(pa_vec);
pb_vec = vec_abs(pb_vec);
pc_vec = vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
vec_cmpeq(pa_vec_abs,smallest_vec),
vec_cmpeq(pa_vec,smallest_vec),
a_vec,
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
);
rp_vec = vec_add(rp_vec, nearest_vec);
if_then_else(
vec_cmpeq(pb_vec,smallest_vec),
b_vec,
c_vec
)
);
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4)));
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_4);
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4);
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_4);
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_4),2,4);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4),2,4);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_4),2,4);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec);
pa_vec = vec_abs(pa_vec);
pb_vec = vec_abs(pb_vec);
pc_vec = vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
vec_cmpeq(pa_vec_abs,smallest_vec),
vec_cmpeq(pa_vec,smallest_vec),
a_vec,
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
);
rp_vec = vec_add(rp_vec, nearest_vec);
if_then_else(
vec_cmpeq(pb_vec,smallest_vec),
b_vec,
c_vec
)
);
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4)));
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_4);
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4);
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_4);
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_4),3,4);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4),3,4);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_4),3,4);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec);
pa_vec = vec_abs(pa_vec);
pb_vec = vec_abs(pb_vec);
pc_vec = vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
vec_cmpeq(pa_vec_abs,smallest_vec),
vec_cmpeq(pa_vec,smallest_vec),
a_vec,
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
);
rp_vec = vec_add(rp_vec, nearest_vec);
if_then_else(
vec_cmpeq(pb_vec,smallest_vec),
b_vec,
c_vec
)
);
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4)));
vec_st(rp_vec,0,rp);
@ -566,10 +603,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
int a, b, c, pa, pb, pc, p;
vector unsigned char rp_vec;
vector unsigned char pp_vec;
vector unsigned char a_vec,b_vec,c_vec,nearest_vec;
vector signed char pa_vec,pb_vec,pc_vec;
vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec;
vector unsigned char zero_vec = {0};
vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
declare_common_vars(row_info,row,prev_row,bpp)
rp -= bpp;
@ -603,73 +639,89 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
rp_vec = vec_ld(0,rp);
vec_ld_unaligned(pp_vec,pp);
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_3);
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3);
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_3);
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_3),1,3);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3),1,3);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_3),1,3);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec);
pa_vec = vec_abs(pa_vec);
pb_vec = vec_abs(pb_vec);
pc_vec = vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
vec_cmpeq(pa_vec_abs,smallest_vec),
vec_cmpeq(pa_vec,smallest_vec),
a_vec,
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
);
rp_vec = vec_add(rp_vec, nearest_vec);
if_then_else(
vec_cmpeq(pb_vec,smallest_vec),
b_vec,
c_vec
)
);
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3)));
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_3);
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3);
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_3);
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_3),2,3);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3),2,3);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_3),2,3);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec);
pa_vec = vec_abs(pa_vec);
pb_vec = vec_abs(pb_vec);
pc_vec = vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
vec_cmpeq(pa_vec_abs,smallest_vec),
vec_cmpeq(pa_vec,smallest_vec),
a_vec,
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
);
rp_vec = vec_add(rp_vec, nearest_vec);
if_then_else(
vec_cmpeq(pb_vec,smallest_vec),
b_vec,
c_vec
)
);
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3)));
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_3);
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3);
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_3);
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_3),3,3);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3),3,3);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_3),3,3);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec);
pa_vec = vec_abs(pa_vec);
pb_vec = vec_abs(pb_vec);
pc_vec = vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
vec_cmpeq(pa_vec_abs,smallest_vec),
vec_cmpeq(pa_vec,smallest_vec),
a_vec,
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
);
rp_vec = vec_add(rp_vec, nearest_vec);
if_then_else(
vec_cmpeq(pb_vec,smallest_vec),
b_vec,
c_vec
)
);
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3)));
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT4_3);
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3);
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT4_3);
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT4_3),4,3);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3),4,3);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT4_3),4,3);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec);
pa_vec = vec_abs(pa_vec);
pb_vec = vec_abs(pb_vec);
pc_vec = vec_abs(pc_vec);
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
vec_cmpeq(pa_vec_abs,smallest_vec),
vec_cmpeq(pa_vec,smallest_vec),
a_vec,
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
);
rp_vec = vec_add(rp_vec, nearest_vec);
if_then_else(
vec_cmpeq(pb_vec,smallest_vec),
b_vec,
c_vec
)
);
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3)));
vec_st(rp_vec,0,rp);