diff --git a/src/zm_image.cpp b/src/zm_image.cpp index 3d6ac31b7..c9eb85b4d 100644 --- a/src/zm_image.cpp +++ b/src/zm_image.cpp @@ -45,7 +45,6 @@ static short *r_v_table; static short *g_v_table; static short *g_u_table; static short *b_u_table; -__attribute__((aligned(16))) static const uint8_t movemask[16] = {0,4,8,12,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; jpeg_compress_struct *Image::writejpg_ccinfo[101] = { 0 }; jpeg_compress_struct *Image::encodejpg_ccinfo[101] = { 0 }; @@ -4180,38 +4179,26 @@ void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned lon "mov $0x1F1F1F1F, %%eax\n\t" "movd %%eax, %%xmm4\n\t" "pshufd $0x0, %%xmm4, %%xmm4\n\t" - "mov $0xff, %%eax\n\t" - "movd %%eax, %%xmm0\n\t" - "pshufd $0x0, %%xmm0, %%xmm0\n\t" - "movdqa %3, %%xmm5\n\t" + "mov $0x00010502, %%eax\n\t" + "movd %%eax, %%xmm3\n\t" + "pshufd $0x0, %%xmm3, %%xmm3\n\t" + "pxor %%xmm0, %%xmm0\n\t" "sub $0x10, %0\n\t" "sub $0x4, %1\n\t" "ssse3_convert_rgba_gray8_iter:\n\t" - "movdqa (%0,%2,4), %%xmm3\n\t" - "psrlq $0x3, %%xmm3\n\t" - "pand %%xmm4, %%xmm3\n\t" - "movdqa %%xmm3, %%xmm2\n\t" - "psrld $0x8, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm1\n\t" - "pslld $0x2, %%xmm2\n\t" - "paddd %%xmm1, %%xmm2\n\t" - "movdqa %%xmm3, %%xmm1\n\t" - "pand %%xmm0, %%xmm1\n\t" - "paddd %%xmm1, %%xmm1\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "movdqa %%xmm3, %%xmm2\n\t" - "psrld $0x10, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "pshufb %%xmm5, %%xmm1\n\t" + "movdqa (%0,%2,4), %%xmm1\n\t" + "psrlq $0x3, %%xmm1\n\t" + "pand %%xmm4, %%xmm1\n\t" + "pmaddubsw %%xmm3, %%xmm1\n\t" + "phaddw %%xmm0, %%xmm1\n\t" + "packuswb %%xmm1, %%xmm1\n\t" "movd %%xmm1, %%eax\n\t" "movnti %%eax, (%1,%2)\n\t" "sub $0x4, %2\n\t" "jnz ssse3_convert_rgba_gray8_iter\n\t" : - : "r" (col1), "r" (result), "r" (count), "m" (*movemask) - : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory" + : "r" (col1), "r" (result), "r" (count) + : "%eax", "%xmm0", "%xmm1", "%xmm3", "%xmm4", "cc", "memory" ); #else Panic("SSE function called on a non x86\\x86-64 platform");