SSSE3 rgba->gray8 convert function now uses less instructions and is 10-20% faster
This commit is contained in:
parent
844b4bb5ee
commit
20604c2150
|
@ -45,7 +45,6 @@ static short *r_v_table;
|
||||||
static short *g_v_table;
|
static short *g_v_table;
|
||||||
static short *g_u_table;
|
static short *g_u_table;
|
||||||
static short *b_u_table;
|
static short *b_u_table;
|
||||||
__attribute__((aligned(16))) static const uint8_t movemask[16] = {0,4,8,12,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
|
||||||
|
|
||||||
jpeg_compress_struct *Image::writejpg_ccinfo[101] = { 0 };
|
jpeg_compress_struct *Image::writejpg_ccinfo[101] = { 0 };
|
||||||
jpeg_compress_struct *Image::encodejpg_ccinfo[101] = { 0 };
|
jpeg_compress_struct *Image::encodejpg_ccinfo[101] = { 0 };
|
||||||
|
@ -4180,38 +4179,26 @@ void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned lon
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
"movd %%eax, %%xmm4\n\t"
|
"movd %%eax, %%xmm4\n\t"
|
||||||
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
|
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
|
||||||
"mov $0xff, %%eax\n\t"
|
"mov $0x00010502, %%eax\n\t"
|
||||||
"movd %%eax, %%xmm0\n\t"
|
"movd %%eax, %%xmm3\n\t"
|
||||||
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
|
"pshufd $0x0, %%xmm3, %%xmm3\n\t"
|
||||||
"movdqa %3, %%xmm5\n\t"
|
"pxor %%xmm0, %%xmm0\n\t"
|
||||||
"sub $0x10, %0\n\t"
|
"sub $0x10, %0\n\t"
|
||||||
"sub $0x4, %1\n\t"
|
"sub $0x4, %1\n\t"
|
||||||
"ssse3_convert_rgba_gray8_iter:\n\t"
|
"ssse3_convert_rgba_gray8_iter:\n\t"
|
||||||
"movdqa (%0,%2,4), %%xmm3\n\t"
|
"movdqa (%0,%2,4), %%xmm1\n\t"
|
||||||
"psrlq $0x3, %%xmm3\n\t"
|
"psrlq $0x3, %%xmm1\n\t"
|
||||||
"pand %%xmm4, %%xmm3\n\t"
|
"pand %%xmm4, %%xmm1\n\t"
|
||||||
"movdqa %%xmm3, %%xmm2\n\t"
|
"pmaddubsw %%xmm3, %%xmm1\n\t"
|
||||||
"psrld $0x8, %%xmm2\n\t"
|
"phaddw %%xmm0, %%xmm1\n\t"
|
||||||
"pand %%xmm0, %%xmm2\n\t"
|
"packuswb %%xmm1, %%xmm1\n\t"
|
||||||
"movdqa %%xmm2, %%xmm1\n\t"
|
|
||||||
"pslld $0x2, %%xmm2\n\t"
|
|
||||||
"paddd %%xmm1, %%xmm2\n\t"
|
|
||||||
"movdqa %%xmm3, %%xmm1\n\t"
|
|
||||||
"pand %%xmm0, %%xmm1\n\t"
|
|
||||||
"paddd %%xmm1, %%xmm1\n\t"
|
|
||||||
"paddd %%xmm2, %%xmm1\n\t"
|
|
||||||
"movdqa %%xmm3, %%xmm2\n\t"
|
|
||||||
"psrld $0x10, %%xmm2\n\t"
|
|
||||||
"pand %%xmm0, %%xmm2\n\t"
|
|
||||||
"paddd %%xmm2, %%xmm1\n\t"
|
|
||||||
"pshufb %%xmm5, %%xmm1\n\t"
|
|
||||||
"movd %%xmm1, %%eax\n\t"
|
"movd %%xmm1, %%eax\n\t"
|
||||||
"movnti %%eax, (%1,%2)\n\t"
|
"movnti %%eax, (%1,%2)\n\t"
|
||||||
"sub $0x4, %2\n\t"
|
"sub $0x4, %2\n\t"
|
||||||
"jnz ssse3_convert_rgba_gray8_iter\n\t"
|
"jnz ssse3_convert_rgba_gray8_iter\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (result), "r" (count), "m" (*movemask)
|
: "r" (col1), "r" (result), "r" (count)
|
||||||
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
: "%eax", "%xmm0", "%xmm1", "%xmm3", "%xmm4", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
Panic("SSE function called on a non x86\\x86-64 platform");
|
Panic("SSE function called on a non x86\\x86-64 platform");
|
||||||
|
|
Loading…
Reference in New Issue