From 99d7285f206685aa80eea874cc17d0a6304761c2 Mon Sep 17 00:00:00 2001 From: Vadim Barkov Date: Thu, 9 Feb 2017 18:57:53 +0000 Subject: [PATCH] Fixed filter_paeth for PowerPC VSX Now all tests are fine for ppc64le --- powerpc/filter_vsx_intrinsics.c | 260 +++++++++++++++++++------------- 1 file changed, 156 insertions(+), 104 deletions(-) diff --git a/powerpc/filter_vsx_intrinsics.c b/powerpc/filter_vsx_intrinsics.c index 9376bc885..e1ba064a6 100644 --- a/powerpc/filter_vsx_intrinsics.c +++ b/powerpc/filter_vsx_intrinsics.c @@ -448,6 +448,32 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row, *rp++ = (png_byte)a;\ } +#define VEC_CHAR_ZERO (vector unsigned char){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} +#ifdef __LITTLE_ENDIAN__ + +# define VEC_CHAR_TO_SHORT1_4 (vector unsigned char){ 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT2_4 (vector unsigned char){ 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT3_4 (vector unsigned char){12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16} + +# define VEC_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16} +# define VEC_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16} +# define VEC_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6} + +# define VEC_CHAR_TO_SHORT1_3 (vector unsigned char){ 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT2_3 (vector unsigned char){ 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT3_3 (vector unsigned char){ 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT4_3 (vector unsigned char){12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16} + +# define VEC_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16} +# define VEC_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16} +# define VEC_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16} +# define VEC_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16} + +#endif + +#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VEC_CHAR_ZERO,VEC_CHAR_TO_SHORT##offset##_##bpp) +#define vsx_short_to_char(vec,offset,bpp) vec_perm((vector unsigned char)(vec),VEC_CHAR_ZERO,VEC_SHORT_TO_CHAR##offset##_##bpp) + void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -456,10 +482,9 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, int a, b, c, pa, pb, pc, p; vector unsigned char rp_vec; vector unsigned char pp_vec; - vector unsigned char a_vec,b_vec,c_vec,nearest_vec; - vector signed char pa_vec,pb_vec,pc_vec; - vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec; vector unsigned char zero_vec = {0}; + vector unsigned short a_vec,b_vec,c_vec,nearest_vec; + vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; declare_common_vars(row_info,row,prev_row,bpp) rp -= bpp; @@ -493,56 +518,68 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, rp_vec = vec_ld(0,rp); vec_ld_unaligned(pp_vec,pp); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_4); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_4); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_4),1,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4),1,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_4),1,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4))); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_4); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_4); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_4),2,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4),2,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_4),2,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4))); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_4); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_4); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_4),3,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4),3,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_4),3,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4))); vec_st(rp_vec,0,rp); @@ -566,10 +603,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, int a, b, c, pa, pb, pc, p; vector unsigned char rp_vec; vector unsigned char pp_vec; - vector unsigned char a_vec,b_vec,c_vec,nearest_vec; - vector signed char pa_vec,pb_vec,pc_vec; - vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec; vector unsigned char zero_vec = {0}; + vector unsigned short a_vec,b_vec,c_vec,nearest_vec; + vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; declare_common_vars(row_info,row,prev_row,bpp) rp -= bpp; @@ -603,73 +639,89 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, rp_vec = vec_ld(0,rp); vec_ld_unaligned(pp_vec,pp); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_3); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_3); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_3),1,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3),1,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_3),1,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3))); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_3); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_3); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_3),2,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3),2,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_3),2,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3))); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_3); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_3); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_3),3,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3),3,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_3),3,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3))); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT4_3); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT4_3); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT4_3),4,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3),4,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT4_3),4,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3))); vec_st(rp_vec,0,rp);