diff --git a/arm/filter_neon_intrinsics.c b/arm/filter_neon_intrinsics.c index ea7e356bc..2ee49cf7a 100644 --- a/arm/filter_neon_intrinsics.c +++ b/arm/filter_neon_intrinsics.c @@ -19,7 +19,11 @@ /* This code requires -mfpu=neon on the command line: */ #if PNG_ARM_NEON_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ +#if defined(_MSC_VER) && defined(_M_ARM64) +#include +#else #include +#endif /* libpng row pointers are not necessarily aligned to any particular boundary, * however this code will only work with appropriate alignment. arm/arm_init.c @@ -33,6 +37,11 @@ * 'type'. This is written this way just to hide the GCC strict aliasing * warning; note that the code is safe because there never is an alias between * the input and output pointers. + * + * When compiling with MSVC ARM64, the png_ldr macro can't be passed directly + * to vst4_lane_u32, because of an internal compiler error inside MSVC. + * To avoid this compiler bug, we use a temporary variable (vdest_val) to store + * the result of png_ldr. */ #define png_ldr(type,pointer)\ (temp_pointer = png_ptr(type,pointer), *temp_pointer) @@ -130,7 +139,9 @@ png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row, vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]); vdest.val[2] = vadd_u8(vdest.val[1], vrp.val[2]); vdest.val[3] = vadd_u8(vdest.val[2], vrp.val[3]); - vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0); + + uint32x2x4_t vdest_val = png_ldr(uint32x2x4_t, &vdest); + vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0); } PNG_UNUSED(prev_row) @@ -240,7 +251,8 @@ png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row, vdest.val[3] = vhadd_u8(vdest.val[2], vpp.val[3]); vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]); - vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0); + uint32x2x4_t vdest_val = png_ldr(uint32x2x4_t, &vdest); + vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0); } } @@ -378,7 +390,8 @@ png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row, vlast = vpp.val[3]; - vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0); + uint32x2x4_t vdest_val = png_ldr(uint32x2x4_t, &vdest); + vst4_lane_u32(png_ptr(uint32_t,rp), vdest_val, 0); } }