Refactoring

Now all defines in filter VSX file are prefixed with VSX
and have more clear names
This commit is contained in:
Vadim Barkov 2017-02-10 07:20:59 +00:00
parent d1c12e85c1
commit 310dee21f7

View File

@ -38,7 +38,7 @@
* ( this is taken from ../intel/filter_sse2_intrinsics.c ) * ( this is taken from ../intel/filter_sse2_intrinsics.c )
*/ */
#define declare_common_vars(row_info,row,prev_row,offset) \ #define vsx_declare_common_vars(row_info,row,prev_row,offset) \
png_size_t i;\ png_size_t i;\
png_bytep rp = row + offset;\ png_bytep rp = row + offset;\
png_const_bytep pp = prev_row;\ png_const_bytep pp = prev_row;\
@ -59,7 +59,7 @@ void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
{ {
vector unsigned char rp_vec; vector unsigned char rp_vec;
vector unsigned char pp_vec; vector unsigned char pp_vec;
declare_common_vars(row_info,row,prev_row,0) vsx_declare_common_vars(row_info,row,prev_row,0)
/* Altivec operations require 16-byte aligned data /* Altivec operations require 16-byte aligned data
* but input can be unaligned. So we calculate * but input can be unaligned. So we calculate
@ -100,24 +100,69 @@ void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
} }
#define VEC_SELECT1_4 (vector unsigned char){16,16,16,16, 0, 1, 2, 3,16,16,16,16,16,16,16,16} #define VSX_LEFTSHIFTED1_4 (vector unsigned char){16,16,16,16, 0, 1, 2, 3,16,16,16,16,16,16,16,16}
#define VEC_SELECT2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 4, 5, 6, 7,16,16,16,16} #define VSX_LEFTSHIFTED2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 4, 5, 6, 7,16,16,16,16}
#define VEC_SELECT3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 8, 9,10,11} #define VSX_LEFTSHIFTED3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 8, 9,10,11}
#define VEC_SELECT1_3 (vector unsigned char){16,16,16, 0, 1, 2,16,16,16,16,16,16,16,16,16,16} #define VSX_LEFTSHIFTED1_3 (vector unsigned char){16,16,16, 0, 1, 2,16,16,16,16,16,16,16,16,16,16}
#define VEC_SELECT2_3 (vector unsigned char){16,16,16,16,16,16, 3, 4, 5,16,16,16,16,16,16,16} #define VSX_LEFTSHIFTED2_3 (vector unsigned char){16,16,16,16,16,16, 3, 4, 5,16,16,16,16,16,16,16}
#define VEC_SELECT3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 6, 7, 8,16,16,16,16} #define VSX_LEFTSHIFTED3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 6, 7, 8,16,16,16,16}
#define VEC_SELECT4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 9,10,11,16} #define VSX_LEFTSHIFTED4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 9,10,11,16}
#define VSX_NOT_SHIFTED1_4 (vector unsigned char){16,16,16,16, 4, 5, 6, 7,16,16,16,16,16,16,16,16}
#define VSX_NOT_SHIFTED2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 8, 9,10,11,16,16,16,16}
#define VSX_NOT_SHIFTED3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,15}
#define VEC_AVG_SELECT1_4 (vector unsigned char){16,16,16,16, 4, 5, 6, 7,16,16,16,16,16,16,16,16} #define VSX_NOT_SHIFTED1_3 (vector unsigned char){16,16,16, 3, 4, 5,16,16,16,16,16,16,16,16,16,16}
#define VEC_AVG_SELECT2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 8, 9,10,11,16,16,16,16} #define VSX_NOT_SHIFTED2_3 (vector unsigned char){16,16,16,16,16,16, 6, 7, 8,16,16,16,16,16,16,16}
#define VEC_AVG_SELECT3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,15} #define VSX_NOT_SHIFTED3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 9,10,11,16,16,16,16}
#define VSX_NOT_SHIFTED4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,16}
#define VEC_AVG_SELECT1_3 (vector unsigned char){16,16,16, 3, 4, 5,16,16,16,16,16,16,16,16,16,16} #define VSX_CHAR_ZERO (vector unsigned char){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
#define VEC_AVG_SELECT2_3 (vector unsigned char){16,16,16,16,16,16, 6, 7, 8,16,16,16,16,16,16,16} #ifdef __LITTLE_ENDIAN__
#define VEC_AVG_SELECT3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 9,10,11,16,16,16,16}
#define VEC_AVG_SELECT4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,16} # define VSX_CHAR_TO_SHORT1_4 (vector unsigned char){ 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT2_4 (vector unsigned char){ 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT3_4 (vector unsigned char){12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16}
# define VSX_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6}
# define VSX_CHAR_TO_SHORT1_3 (vector unsigned char){ 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT2_3 (vector unsigned char){ 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT3_3 (vector unsigned char){ 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT4_3 (vector unsigned char){12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16}
# define VSX_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16}
#elif defined(__BIG_ENDIAN__)
# define VSX_CHAR_TO_SHORT1_4 (vector unsigned char){16, 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT2_4 (vector unsigned char){16, 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT3_4 (vector unsigned char){16,12,16,13,16,14,16,15,16,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 1, 3, 5, 7,16,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 1, 3, 5, 7,16,16,16,16}
# define VSX_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5, 7}
# define VSX_CHAR_TO_SHORT1_3 (vector unsigned char){16, 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT2_3 (vector unsigned char){16, 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT3_3 (vector unsigned char){16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16}
# define VSX_CHAR_TO_SHORT4_3 (vector unsigned char){16,12,16,13,16,14,16,16,16,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 1, 3, 5,16,16,16,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 1, 3, 5,16,16,16,16,16,16,16}
# define VSX_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 1, 3, 5,16,16,16,16}
# define VSX_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5,16}
#endif
#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VSX_CHAR_ZERO,VSX_CHAR_TO_SHORT##offset##_##bpp)
#define vsx_short_to_char(vec,offset,bpp) vec_perm((vector unsigned char)(vec),VSX_CHAR_ZERO,VSX_SHORT_TO_CHAR##offset##_##bpp)
#ifdef PNG_USE_ABS #ifdef PNG_USE_ABS
# define vsx_abs(number) abs(number) # define vsx_abs(number) abs(number)
@ -132,9 +177,8 @@ void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
vector unsigned char rp_vec; vector unsigned char rp_vec;
vector unsigned char part_vec; vector unsigned char part_vec;
vector unsigned char zero_vec = {0};
declare_common_vars(row_info,row,prev_row,bpp) vsx_declare_common_vars(row_info,row,prev_row,bpp)
PNG_UNUSED(pp) PNG_UNUSED(pp)
@ -155,13 +199,13 @@ void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
*(rp+i) += *(rp+i - bpp); *(rp+i) += *(rp+i - bpp);
rp_vec = vec_ld(0,rp); rp_vec = vec_ld(0,rp);
part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT1_4); part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4);
rp_vec = vec_add(rp_vec,part_vec); rp_vec = vec_add(rp_vec,part_vec);
part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT2_4); part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4);
rp_vec = vec_add(rp_vec,part_vec); rp_vec = vec_add(rp_vec,part_vec);
part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT3_4); part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4);
rp_vec = vec_add(rp_vec,part_vec); rp_vec = vec_add(rp_vec,part_vec);
vec_st(rp_vec,0,rp); vec_st(rp_vec,0,rp);
@ -186,9 +230,8 @@ void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row,
vector unsigned char rp_vec; vector unsigned char rp_vec;
vector unsigned char part_vec; vector unsigned char part_vec;
vector unsigned char zero_vec = {0};
declare_common_vars(row_info,row,prev_row,bpp) vsx_declare_common_vars(row_info,row,prev_row,bpp)
PNG_UNUSED(pp) PNG_UNUSED(pp)
@ -209,16 +252,16 @@ void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row,
*(rp+i) += *(rp+i - bpp); *(rp+i) += *(rp+i - bpp);
rp_vec = vec_ld(0,rp); rp_vec = vec_ld(0,rp);
part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT1_3); part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3);
rp_vec = vec_add(rp_vec,part_vec); rp_vec = vec_add(rp_vec,part_vec);
part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT2_3); part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3);
rp_vec = vec_add(rp_vec,part_vec); rp_vec = vec_add(rp_vec,part_vec);
part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT3_3); part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3);
rp_vec = vec_add(rp_vec,part_vec); rp_vec = vec_add(rp_vec,part_vec);
part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT4_3); part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3);
rp_vec = vec_add(rp_vec,part_vec); rp_vec = vec_add(rp_vec,part_vec);
vec_st(rp_vec,0,rp); vec_st(rp_vec,0,rp);
@ -249,9 +292,8 @@ void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row,
vector unsigned char pp_part_vec; vector unsigned char pp_part_vec;
vector unsigned char rp_part_vec; vector unsigned char rp_part_vec;
vector unsigned char avg_vec; vector unsigned char avg_vec;
vector unsigned char zero_vec = {0};
declare_common_vars(row_info,row,prev_row,bpp) vsx_declare_common_vars(row_info,row,prev_row,bpp)
rp -= bpp; rp -= bpp;
if(istop >= bpp) if(istop >= bpp)
istop -= bpp; istop -= bpp;
@ -292,20 +334,20 @@ void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row,
vec_ld_unaligned(pp_vec,pp); vec_ld_unaligned(pp_vec,pp);
rp_vec = vec_ld(0,rp); rp_vec = vec_ld(0,rp);
rp_part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT1_4); rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4);
pp_part_vec = vec_perm(pp_vec,zero_vec,VEC_AVG_SELECT1_4); pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_4);
avg_vec = vec_avg(rp_part_vec,pp_part_vec); avg_vec = vec_avg(rp_part_vec,pp_part_vec);
avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
rp_vec = vec_add(rp_vec,avg_vec); rp_vec = vec_add(rp_vec,avg_vec);
rp_part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT2_4); rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4);
pp_part_vec = vec_perm(pp_vec,zero_vec,VEC_AVG_SELECT2_4); pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_4);
avg_vec = vec_avg(rp_part_vec,pp_part_vec); avg_vec = vec_avg(rp_part_vec,pp_part_vec);
avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
rp_vec = vec_add(rp_vec,avg_vec); rp_vec = vec_add(rp_vec,avg_vec);
rp_part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT3_4); rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4);
pp_part_vec = vec_perm(pp_vec,zero_vec,VEC_AVG_SELECT3_4); pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_4);
avg_vec = vec_avg(rp_part_vec,pp_part_vec); avg_vec = vec_avg(rp_part_vec,pp_part_vec);
avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
rp_vec = vec_add(rp_vec,avg_vec); rp_vec = vec_add(rp_vec,avg_vec);
@ -337,9 +379,8 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
vector unsigned char pp_part_vec; vector unsigned char pp_part_vec;
vector unsigned char rp_part_vec; vector unsigned char rp_part_vec;
vector unsigned char avg_vec; vector unsigned char avg_vec;
vector unsigned char zero_vec = {0};
declare_common_vars(row_info,row,prev_row,bpp) vsx_declare_common_vars(row_info,row,prev_row,bpp)
rp -= bpp; rp -= bpp;
if(istop >= bpp) if(istop >= bpp)
istop -= bpp; istop -= bpp;
@ -380,26 +421,26 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
vec_ld_unaligned(pp_vec,pp); vec_ld_unaligned(pp_vec,pp);
rp_vec = vec_ld(0,rp); rp_vec = vec_ld(0,rp);
rp_part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT1_3); rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3);
pp_part_vec = vec_perm(pp_vec,zero_vec,VEC_AVG_SELECT1_3); pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_3);
avg_vec = vec_avg(rp_part_vec,pp_part_vec); avg_vec = vec_avg(rp_part_vec,pp_part_vec);
avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
rp_vec = vec_add(rp_vec,avg_vec); rp_vec = vec_add(rp_vec,avg_vec);
rp_part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT2_3); rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3);
pp_part_vec = vec_perm(pp_vec,zero_vec,VEC_AVG_SELECT2_3); pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_3);
avg_vec = vec_avg(rp_part_vec,pp_part_vec); avg_vec = vec_avg(rp_part_vec,pp_part_vec);
avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
rp_vec = vec_add(rp_vec,avg_vec); rp_vec = vec_add(rp_vec,avg_vec);
rp_part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT3_3); rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3);
pp_part_vec = vec_perm(pp_vec,zero_vec,VEC_AVG_SELECT3_3); pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_3);
avg_vec = vec_avg(rp_part_vec,pp_part_vec); avg_vec = vec_avg(rp_part_vec,pp_part_vec);
avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
rp_vec = vec_add(rp_vec,avg_vec); rp_vec = vec_add(rp_vec,avg_vec);
rp_part_vec = vec_perm(rp_vec,zero_vec,VEC_SELECT4_3); rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3);
pp_part_vec = vec_perm(pp_vec,zero_vec,VEC_AVG_SELECT4_3); pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED4_3);
avg_vec = vec_avg(rp_part_vec,pp_part_vec); avg_vec = vec_avg(rp_part_vec,pp_part_vec);
avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
rp_vec = vec_add(rp_vec,avg_vec); rp_vec = vec_add(rp_vec,avg_vec);
@ -446,52 +487,6 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
*rp++ = (png_byte)a;\ *rp++ = (png_byte)a;\
} }
#define VEC_CHAR_ZERO (vector unsigned char){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
#ifdef __LITTLE_ENDIAN__
# define VEC_CHAR_TO_SHORT1_4 (vector unsigned char){ 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT2_4 (vector unsigned char){ 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT3_4 (vector unsigned char){12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16}
# define VEC_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6}
# define VEC_CHAR_TO_SHORT1_3 (vector unsigned char){ 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT2_3 (vector unsigned char){ 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT3_3 (vector unsigned char){ 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT4_3 (vector unsigned char){12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16}
# define VEC_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16}
#elif defined(__BIG_ENDIAN__)
# define VEC_CHAR_TO_SHORT1_4 (vector unsigned char){16, 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT2_4 (vector unsigned char){16, 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT3_4 (vector unsigned char){16,12,16,13,16,14,16,15,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 1, 3, 5, 7,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 1, 3, 5, 7,16,16,16,16}
# define VEC_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5, 7}
# define VEC_CHAR_TO_SHORT1_3 (vector unsigned char){16, 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT2_3 (vector unsigned char){16, 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT3_3 (vector unsigned char){16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16}
# define VEC_CHAR_TO_SHORT4_3 (vector unsigned char){16,12,16,13,16,14,16,16,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 1, 3, 5,16,16,16,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 1, 3, 5,16,16,16,16,16,16,16}
# define VEC_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 1, 3, 5,16,16,16,16}
# define VEC_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5,16}
#endif
#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VEC_CHAR_ZERO,VEC_CHAR_TO_SHORT##offset##_##bpp)
#define vsx_short_to_char(vec,offset,bpp) vec_perm((vector unsigned char)(vec),VEC_CHAR_ZERO,VEC_SHORT_TO_CHAR##offset##_##bpp)
void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row) png_const_bytep prev_row)
{ {
@ -500,11 +495,10 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
int a, b, c, pa, pb, pc, p; int a, b, c, pa, pb, pc, p;
vector unsigned char rp_vec; vector unsigned char rp_vec;
vector unsigned char pp_vec; vector unsigned char pp_vec;
vector unsigned char zero_vec = {0};
vector unsigned short a_vec,b_vec,c_vec,nearest_vec; vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
declare_common_vars(row_info,row,prev_row,bpp) vsx_declare_common_vars(row_info,row,prev_row,bpp)
rp -= bpp; rp -= bpp;
if(istop >= bpp) if(istop >= bpp)
istop -= bpp; istop -= bpp;
@ -536,9 +530,9 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
rp_vec = vec_ld(0,rp); rp_vec = vec_ld(0,rp);
vec_ld_unaligned(pp_vec,pp); vec_ld_unaligned(pp_vec,pp);
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_4),1,4); a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4),1,4); b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_4),1,4);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_4),1,4); c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec); pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec); pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec); pc_vec = vec_add(pa_vec,pb_vec);
@ -557,9 +551,9 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
); );
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4))); rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4)));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_4),2,4); a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4),2,4); b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_4),2,4);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_4),2,4); c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec); pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec); pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec); pc_vec = vec_add(pa_vec,pb_vec);
@ -578,9 +572,9 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
); );
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4))); rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4)));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_4),3,4); a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4),3,4); b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_4),3,4);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_4),3,4); c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec); pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec); pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec); pc_vec = vec_add(pa_vec,pb_vec);
@ -621,11 +615,10 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
int a, b, c, pa, pb, pc, p; int a, b, c, pa, pb, pc, p;
vector unsigned char rp_vec; vector unsigned char rp_vec;
vector unsigned char pp_vec; vector unsigned char pp_vec;
vector unsigned char zero_vec = {0};
vector unsigned short a_vec,b_vec,c_vec,nearest_vec; vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
declare_common_vars(row_info,row,prev_row,bpp) vsx_declare_common_vars(row_info,row,prev_row,bpp)
rp -= bpp; rp -= bpp;
if(istop >= bpp) if(istop >= bpp)
istop -= bpp; istop -= bpp;
@ -657,9 +650,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
rp_vec = vec_ld(0,rp); rp_vec = vec_ld(0,rp);
vec_ld_unaligned(pp_vec,pp); vec_ld_unaligned(pp_vec,pp);
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_3),1,3); a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3),1,3); b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_3),1,3);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_3),1,3); c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec); pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec); pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec); pc_vec = vec_add(pa_vec,pb_vec);
@ -678,9 +671,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
); );
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3))); rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3)));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_3),2,3); a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3),2,3); b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_3),2,3);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_3),2,3); c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec); pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec); pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec); pc_vec = vec_add(pa_vec,pb_vec);
@ -699,9 +692,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
); );
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3))); rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3)));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_3),3,3); a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3),3,3); b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_3),3,3);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_3),3,3); c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec); pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec); pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec); pc_vec = vec_add(pa_vec,pb_vec);
@ -720,9 +713,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
); );
rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3))); rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3)));
a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT4_3),4,3); a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3);
b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3),4,3); b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED4_3),4,3);
c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT4_3),4,3); c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3);
pa_vec = (vector signed short) vec_sub(b_vec,c_vec); pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
pb_vec = (vector signed short) vec_sub(a_vec , c_vec); pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
pc_vec = vec_add(pa_vec,pb_vec); pc_vec = vec_add(pa_vec,pb_vec);