Fixed filter_paeth for PowerPC VSX
Now all tests are fine for ppc64le
This commit is contained in:
parent
d4bdca45b3
commit
99d7285f20
@ -448,6 +448,32 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
|
||||
*rp++ = (png_byte)a;\
|
||||
}
|
||||
|
||||
#define VEC_CHAR_ZERO (vector unsigned char){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
|
||||
# define VEC_CHAR_TO_SHORT1_4 (vector unsigned char){ 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16}
|
||||
# define VEC_CHAR_TO_SHORT2_4 (vector unsigned char){ 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16}
|
||||
# define VEC_CHAR_TO_SHORT3_4 (vector unsigned char){12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16}
|
||||
|
||||
# define VEC_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16}
|
||||
# define VEC_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16}
|
||||
# define VEC_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6}
|
||||
|
||||
# define VEC_CHAR_TO_SHORT1_3 (vector unsigned char){ 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16}
|
||||
# define VEC_CHAR_TO_SHORT2_3 (vector unsigned char){ 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16}
|
||||
# define VEC_CHAR_TO_SHORT3_3 (vector unsigned char){ 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16}
|
||||
# define VEC_CHAR_TO_SHORT4_3 (vector unsigned char){12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16}
|
||||
|
||||
# define VEC_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16}
|
||||
# define VEC_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16}
|
||||
# define VEC_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16}
|
||||
# define VEC_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16}
|
||||
|
||||
#endif
|
||||
|
||||
#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VEC_CHAR_ZERO,VEC_CHAR_TO_SHORT##offset##_##bpp)
|
||||
#define vsx_short_to_char(vec,offset,bpp) vec_perm((vector unsigned char)(vec),VEC_CHAR_ZERO,VEC_SHORT_TO_CHAR##offset##_##bpp)
|
||||
|
||||
void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev_row)
|
||||
{
|
||||
@ -456,10 +482,9 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
|
||||
int a, b, c, pa, pb, pc, p;
|
||||
vector unsigned char rp_vec;
|
||||
vector unsigned char pp_vec;
|
||||
vector unsigned char a_vec,b_vec,c_vec,nearest_vec;
|
||||
vector signed char pa_vec,pb_vec,pc_vec;
|
||||
vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec;
|
||||
vector unsigned char zero_vec = {0};
|
||||
vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
|
||||
vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
|
||||
|
||||
declare_common_vars(row_info,row,prev_row,bpp)
|
||||
rp -= bpp;
|
||||
@ -493,56 +518,68 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
|
||||
rp_vec = vec_ld(0,rp);
|
||||
vec_ld_unaligned(pp_vec,pp);
|
||||
|
||||
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_4);
|
||||
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4);
|
||||
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_4);
|
||||
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
|
||||
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
|
||||
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
|
||||
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
|
||||
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
|
||||
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_4),1,4);
|
||||
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4),1,4);
|
||||
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_4),1,4);
|
||||
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
||||
pc_vec = vec_add(pa_vec,pb_vec);
|
||||
pa_vec = vec_abs(pa_vec);
|
||||
pb_vec = vec_abs(pb_vec);
|
||||
pc_vec = vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
||||
nearest_vec = if_then_else(
|
||||
vec_cmpeq(pa_vec_abs,smallest_vec),
|
||||
vec_cmpeq(pa_vec,smallest_vec),
|
||||
a_vec,
|
||||
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
|
||||
if_then_else(
|
||||
vec_cmpeq(pb_vec,smallest_vec),
|
||||
b_vec,
|
||||
c_vec
|
||||
)
|
||||
);
|
||||
rp_vec = vec_add(rp_vec, nearest_vec);
|
||||
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4)));
|
||||
|
||||
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_4);
|
||||
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4);
|
||||
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_4);
|
||||
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
|
||||
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
|
||||
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
|
||||
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
|
||||
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
|
||||
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_4),2,4);
|
||||
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4),2,4);
|
||||
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_4),2,4);
|
||||
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
||||
pc_vec = vec_add(pa_vec,pb_vec);
|
||||
pa_vec = vec_abs(pa_vec);
|
||||
pb_vec = vec_abs(pb_vec);
|
||||
pc_vec = vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
||||
nearest_vec = if_then_else(
|
||||
vec_cmpeq(pa_vec_abs,smallest_vec),
|
||||
vec_cmpeq(pa_vec,smallest_vec),
|
||||
a_vec,
|
||||
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
|
||||
if_then_else(
|
||||
vec_cmpeq(pb_vec,smallest_vec),
|
||||
b_vec,
|
||||
c_vec
|
||||
)
|
||||
);
|
||||
rp_vec = vec_add(rp_vec, nearest_vec);
|
||||
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4)));
|
||||
|
||||
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_4);
|
||||
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4);
|
||||
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_4);
|
||||
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
|
||||
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
|
||||
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
|
||||
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
|
||||
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
|
||||
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_4),3,4);
|
||||
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4),3,4);
|
||||
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_4),3,4);
|
||||
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
||||
pc_vec = vec_add(pa_vec,pb_vec);
|
||||
pa_vec = vec_abs(pa_vec);
|
||||
pb_vec = vec_abs(pb_vec);
|
||||
pc_vec = vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
||||
nearest_vec = if_then_else(
|
||||
vec_cmpeq(pa_vec_abs,smallest_vec),
|
||||
vec_cmpeq(pa_vec,smallest_vec),
|
||||
a_vec,
|
||||
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
|
||||
if_then_else(
|
||||
vec_cmpeq(pb_vec,smallest_vec),
|
||||
b_vec,
|
||||
c_vec
|
||||
)
|
||||
);
|
||||
rp_vec = vec_add(rp_vec, nearest_vec);
|
||||
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4)));
|
||||
|
||||
vec_st(rp_vec,0,rp);
|
||||
|
||||
@ -566,10 +603,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
|
||||
int a, b, c, pa, pb, pc, p;
|
||||
vector unsigned char rp_vec;
|
||||
vector unsigned char pp_vec;
|
||||
vector unsigned char a_vec,b_vec,c_vec,nearest_vec;
|
||||
vector signed char pa_vec,pb_vec,pc_vec;
|
||||
vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec;
|
||||
vector unsigned char zero_vec = {0};
|
||||
vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
|
||||
vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
|
||||
|
||||
declare_common_vars(row_info,row,prev_row,bpp)
|
||||
rp -= bpp;
|
||||
@ -603,73 +639,89 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
|
||||
rp_vec = vec_ld(0,rp);
|
||||
vec_ld_unaligned(pp_vec,pp);
|
||||
|
||||
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_3);
|
||||
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3);
|
||||
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_3);
|
||||
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
|
||||
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
|
||||
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
|
||||
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
|
||||
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
|
||||
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_3),1,3);
|
||||
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3),1,3);
|
||||
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_3),1,3);
|
||||
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
||||
pc_vec = vec_add(pa_vec,pb_vec);
|
||||
pa_vec = vec_abs(pa_vec);
|
||||
pb_vec = vec_abs(pb_vec);
|
||||
pc_vec = vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
||||
nearest_vec = if_then_else(
|
||||
vec_cmpeq(pa_vec_abs,smallest_vec),
|
||||
vec_cmpeq(pa_vec,smallest_vec),
|
||||
a_vec,
|
||||
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
|
||||
if_then_else(
|
||||
vec_cmpeq(pb_vec,smallest_vec),
|
||||
b_vec,
|
||||
c_vec
|
||||
)
|
||||
);
|
||||
rp_vec = vec_add(rp_vec, nearest_vec);
|
||||
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3)));
|
||||
|
||||
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_3);
|
||||
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3);
|
||||
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_3);
|
||||
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
|
||||
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
|
||||
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
|
||||
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
|
||||
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
|
||||
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_3),2,3);
|
||||
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3),2,3);
|
||||
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_3),2,3);
|
||||
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
||||
pc_vec = vec_add(pa_vec,pb_vec);
|
||||
pa_vec = vec_abs(pa_vec);
|
||||
pb_vec = vec_abs(pb_vec);
|
||||
pc_vec = vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
||||
nearest_vec = if_then_else(
|
||||
vec_cmpeq(pa_vec_abs,smallest_vec),
|
||||
vec_cmpeq(pa_vec,smallest_vec),
|
||||
a_vec,
|
||||
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
|
||||
if_then_else(
|
||||
vec_cmpeq(pb_vec,smallest_vec),
|
||||
b_vec,
|
||||
c_vec
|
||||
)
|
||||
);
|
||||
rp_vec = vec_add(rp_vec, nearest_vec);
|
||||
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3)));
|
||||
|
||||
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_3);
|
||||
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3);
|
||||
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_3);
|
||||
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
|
||||
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
|
||||
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
|
||||
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
|
||||
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
|
||||
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_3),3,3);
|
||||
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3),3,3);
|
||||
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_3),3,3);
|
||||
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
||||
pc_vec = vec_add(pa_vec,pb_vec);
|
||||
pa_vec = vec_abs(pa_vec);
|
||||
pb_vec = vec_abs(pb_vec);
|
||||
pc_vec = vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
||||
nearest_vec = if_then_else(
|
||||
vec_cmpeq(pa_vec_abs,smallest_vec),
|
||||
vec_cmpeq(pa_vec,smallest_vec),
|
||||
a_vec,
|
||||
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
|
||||
if_then_else(
|
||||
vec_cmpeq(pb_vec,smallest_vec),
|
||||
b_vec,
|
||||
c_vec
|
||||
)
|
||||
);
|
||||
rp_vec = vec_add(rp_vec, nearest_vec);
|
||||
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3)));
|
||||
|
||||
a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT4_3);
|
||||
b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3);
|
||||
c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT4_3);
|
||||
pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
|
||||
pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
|
||||
pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
|
||||
pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
|
||||
pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
|
||||
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT4_3),4,3);
|
||||
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3),4,3);
|
||||
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT4_3),4,3);
|
||||
pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
|
||||
pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
|
||||
pc_vec = vec_add(pa_vec,pb_vec);
|
||||
pa_vec = vec_abs(pa_vec);
|
||||
pb_vec = vec_abs(pb_vec);
|
||||
pc_vec = vec_abs(pc_vec);
|
||||
smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
|
||||
nearest_vec = if_then_else(
|
||||
vec_cmpeq(pa_vec_abs,smallest_vec),
|
||||
vec_cmpeq(pa_vec,smallest_vec),
|
||||
a_vec,
|
||||
if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
|
||||
if_then_else(
|
||||
vec_cmpeq(pb_vec,smallest_vec),
|
||||
b_vec,
|
||||
c_vec
|
||||
)
|
||||
);
|
||||
rp_vec = vec_add(rp_vec, nearest_vec);
|
||||
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3)));
|
||||
|
||||
vec_st(rp_vec,0,rp);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user