SSSE3 rgba->gray8 convert function now uses less instructions and is 10-20% faster
This commit is contained in:
parent
844b4bb5ee
commit
20604c2150
|
@ -45,7 +45,6 @@ static short *r_v_table;
|
|||
static short *g_v_table;
|
||||
static short *g_u_table;
|
||||
static short *b_u_table;
|
||||
__attribute__((aligned(16))) static const uint8_t movemask[16] = {0,4,8,12,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
||||
|
||||
jpeg_compress_struct *Image::writejpg_ccinfo[101] = { 0 };
|
||||
jpeg_compress_struct *Image::encodejpg_ccinfo[101] = { 0 };
|
||||
|
@ -4180,38 +4179,26 @@ void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned lon
|
|||
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||
"movd %%eax, %%xmm4\n\t"
|
||||
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
|
||||
"mov $0xff, %%eax\n\t"
|
||||
"movd %%eax, %%xmm0\n\t"
|
||||
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
|
||||
"movdqa %3, %%xmm5\n\t"
|
||||
"mov $0x00010502, %%eax\n\t"
|
||||
"movd %%eax, %%xmm3\n\t"
|
||||
"pshufd $0x0, %%xmm3, %%xmm3\n\t"
|
||||
"pxor %%xmm0, %%xmm0\n\t"
|
||||
"sub $0x10, %0\n\t"
|
||||
"sub $0x4, %1\n\t"
|
||||
"ssse3_convert_rgba_gray8_iter:\n\t"
|
||||
"movdqa (%0,%2,4), %%xmm3\n\t"
|
||||
"psrlq $0x3, %%xmm3\n\t"
|
||||
"pand %%xmm4, %%xmm3\n\t"
|
||||
"movdqa %%xmm3, %%xmm2\n\t"
|
||||
"psrld $0x8, %%xmm2\n\t"
|
||||
"pand %%xmm0, %%xmm2\n\t"
|
||||
"movdqa %%xmm2, %%xmm1\n\t"
|
||||
"pslld $0x2, %%xmm2\n\t"
|
||||
"paddd %%xmm1, %%xmm2\n\t"
|
||||
"movdqa %%xmm3, %%xmm1\n\t"
|
||||
"pand %%xmm0, %%xmm1\n\t"
|
||||
"paddd %%xmm1, %%xmm1\n\t"
|
||||
"paddd %%xmm2, %%xmm1\n\t"
|
||||
"movdqa %%xmm3, %%xmm2\n\t"
|
||||
"psrld $0x10, %%xmm2\n\t"
|
||||
"pand %%xmm0, %%xmm2\n\t"
|
||||
"paddd %%xmm2, %%xmm1\n\t"
|
||||
"pshufb %%xmm5, %%xmm1\n\t"
|
||||
"movdqa (%0,%2,4), %%xmm1\n\t"
|
||||
"psrlq $0x3, %%xmm1\n\t"
|
||||
"pand %%xmm4, %%xmm1\n\t"
|
||||
"pmaddubsw %%xmm3, %%xmm1\n\t"
|
||||
"phaddw %%xmm0, %%xmm1\n\t"
|
||||
"packuswb %%xmm1, %%xmm1\n\t"
|
||||
"movd %%xmm1, %%eax\n\t"
|
||||
"movnti %%eax, (%1,%2)\n\t"
|
||||
"sub $0x4, %2\n\t"
|
||||
"jnz ssse3_convert_rgba_gray8_iter\n\t"
|
||||
:
|
||||
: "r" (col1), "r" (result), "r" (count), "m" (*movemask)
|
||||
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
||||
: "r" (col1), "r" (result), "r" (count)
|
||||
: "%eax", "%xmm0", "%xmm1", "%xmm3", "%xmm4", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
Panic("SSE function called on a non x86\\x86-64 platform");
|
||||
|
|
Loading…
Reference in New Issue