Merge pull request #302 from pjhacnau/master

Additional compile guards to allow compilation on non-x86 archs
This commit is contained in:
Andrew Bauer 2014-02-01 14:02:47 -08:00
commit f5eedb2bfc
2 changed files with 120 additions and 99 deletions

View File

@ -4653,7 +4653,8 @@ __attribute__((noinline)) void std_deinterlace_4field_abgr(uint8_t* col1, uint8_
/* Grayscale SSSE3 */ /* Grayscale SSSE3 */
__attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray8(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) { __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray8(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
union { union {
uint32_t int32; uint32_t int32;
uint8_t int8a[4]; uint8_t int8a[4];
@ -4662,11 +4663,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray
threshold_mask.int8a[1] = 0; threshold_mask.int8a[1] = 0;
threshold_mask.int8a[2] = threshold; threshold_mask.int8a[2] = threshold;
threshold_mask.int8a[3] = 0; threshold_mask.int8a[3] = 0;
unsigned long row_width = width; unsigned long row_width = width;
uint8_t* max_ptr = col1 + (row_width * (height-2)); uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width; uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ ( __asm__ __volatile__ (
/* Load the threshold */ /* Load the threshold */
"mov %5, %%eax\n\t" "mov %5, %%eax\n\t"
@ -4674,9 +4675,9 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray
"pshufd $0x0, %%xmm4, %%xmm4\n\t" "pshufd $0x0, %%xmm4, %%xmm4\n\t"
/* Zero the temporary register */ /* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_gray8:\n\t" "algo_ssse3_deinterlace_4field_gray8:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -4685,11 +4686,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray
"pminub %%xmm5, %%xmm2\n\t" "pminub %%xmm5, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t" "psubb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -4697,17 +4698,17 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray
"pmaxub %%xmm2, %%xmm1\n\t" "pmaxub %%xmm2, %%xmm1\n\t"
"pminub %%xmm6, %%xmm2\n\t" "pminub %%xmm6, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t" "psubb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
"movdqa %%xmm1, %%xmm2\n\t" "movdqa %%xmm1, %%xmm2\n\t"
/* Do the comparison on words instead of bytes because we don't have unsigned comparison */ /* Do the comparison on words instead of bytes because we don't have unsigned comparison */
"punpcklbw %%xmm0, %%xmm1\n\t" // Expand pixels 0-7 into words into xmm1 "punpcklbw %%xmm0, %%xmm1\n\t" // Expand pixels 0-7 into words into xmm1
"punpckhbw %%xmm0, %%xmm2\n\t" // Expand pixels 8-15 into words into xmm2 "punpckhbw %%xmm0, %%xmm2\n\t" // Expand pixels 8-15 into words into xmm2
"pcmpgtw %%xmm4, %%xmm1\n\t" // Compare average delta with threshold for pixels 0-7 "pcmpgtw %%xmm4, %%xmm1\n\t" // Compare average delta with threshold for pixels 0-7
"pcmpgtw %%xmm4, %%xmm2\n\t" // Compare average delta with threshold for pixels 8-15 "pcmpgtw %%xmm4, %%xmm2\n\t" // Compare average delta with threshold for pixels 8-15
"packsswb %%xmm2, %%xmm1\n\t" // Pack the comparison results into xmm1 "packsswb %%xmm2, %%xmm1\n\t" // Pack the comparison results into xmm1
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow "movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
"pavgb %%xmm5, %%xmm2\n\t" // Average pabove and pbelow "pavgb %%xmm5, %%xmm2\n\t" // Average pabove and pbelow
"pand %%xmm1, %%xmm2\n\t" // Filter out pixels in avg that shouldn't be copied "pand %%xmm1, %%xmm2\n\t" // Filter out pixels in avg that shouldn't be copied
@ -4715,24 +4716,24 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent "por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent "movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove "sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove "sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */ /* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent "add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent "add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */ /* Check if we reached the row end */
"cmp %2, %0\n\t" "cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_gray8\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_gray8\n\t" // Go for another iteration
/* Next row */ /* Next row */
"add %4, %0\n\t" // Add width to pcurrent "add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent "add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t" "mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2 "add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */ /* Check if we reached the end */
"cmp %3, %0\n\t" "cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_gray8\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_gray8\n\t" // Go for another iteration
@ -4746,11 +4747,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray
"pminub %%xmm5, %%xmm2\n\t" "pminub %%xmm5, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t" "psubb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -4758,17 +4759,17 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray
"pmaxub %%xmm2, %%xmm1\n\t" "pmaxub %%xmm2, %%xmm1\n\t"
"pminub %%xmm6, %%xmm2\n\t" "pminub %%xmm6, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t" "psubb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
"movdqa %%xmm1, %%xmm2\n\t" "movdqa %%xmm1, %%xmm2\n\t"
/* Do the comparison on words instead of bytes because we don't have unsigned comparison */ /* Do the comparison on words instead of bytes because we don't have unsigned comparison */
"punpcklbw %%xmm0, %%xmm1\n\t" // Expand pixels 0-7 into words into xmm1 "punpcklbw %%xmm0, %%xmm1\n\t" // Expand pixels 0-7 into words into xmm1
"punpckhbw %%xmm0, %%xmm2\n\t" // Expand pixels 8-15 into words into xmm2 "punpckhbw %%xmm0, %%xmm2\n\t" // Expand pixels 8-15 into words into xmm2
"pcmpgtw %%xmm4, %%xmm1\n\t" // Compare average delta with threshold for pixels 0-7 "pcmpgtw %%xmm4, %%xmm1\n\t" // Compare average delta with threshold for pixels 0-7
"pcmpgtw %%xmm4, %%xmm2\n\t" // Compare average delta with threshold for pixels 8-15 "pcmpgtw %%xmm4, %%xmm2\n\t" // Compare average delta with threshold for pixels 8-15
"packsswb %%xmm2, %%xmm1\n\t" // Pack the comparison results into xmm1 "packsswb %%xmm2, %%xmm1\n\t" // Pack the comparison results into xmm1
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied "pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced "pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
@ -4778,18 +4779,22 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_gray
: "r" (col1), "r" (col2), "r" (max_ptr2), "r" (max_ptr), "r" (row_width), "m" (threshold_mask.int32) : "r" (col1), "r" (col2), "r" (max_ptr2), "r" (max_ptr), "r" (row_width), "m" (threshold_mask.int32)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory" : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
); );
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
} }
/* RGBA SSSE3 */ /* RGBA SSSE3 */
__attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) { __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {1,1,1,1,1,0,0,2,9,9,9,9,9,8,8,10}; __attribute__((aligned(16))) static const uint8_t movemask2[16] = {1,1,1,1,1,0,0,2,9,9,9,9,9,8,8,10};
const uint32_t threshold_val = threshold; const uint32_t threshold_val = threshold;
unsigned long row_width = width*4; unsigned long row_width = width*4;
uint8_t* max_ptr = col1 + (row_width * (height-2)); uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width; uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t" "movd %%eax, %%xmm4\n\t"
@ -4802,9 +4807,9 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
#endif #endif
/* Zero the temporary register */ /* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_rgba:\n\t" "algo_ssse3_deinterlace_4field_rgba:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -4824,11 +4829,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -4847,7 +4852,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
"pshufb %%xmm3, %%xmm2\n\t" "pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__) #if defined(__x86_64__)
@ -4855,7 +4860,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
#else #else
"movd %%eax, %%xmm7\n\t" // Setup the threshold "movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t" "pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold "pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif #endif
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow "movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
@ -4865,28 +4870,28 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent "por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent "movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove "sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove "sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */ /* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent "add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent "add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */ /* Check if we reached the row end */
"cmp %2, %0\n\t" "cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_rgba\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_rgba\n\t" // Go for another iteration
/* Next row */ /* Next row */
"add %4, %0\n\t" // Add width to pcurrent "add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent "add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t" "mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2 "add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */ /* Check if we reached the end */
"cmp %3, %0\n\t" "cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_rgba\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_rgba\n\t" // Go for another iteration
/* Special case for the last line */ /* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
@ -4907,11 +4912,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -4930,7 +4935,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
"pshufb %%xmm3, %%xmm2\n\t" "pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__) #if defined(__x86_64__)
@ -4938,7 +4943,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
#else #else
"movd %%eax, %%xmm7\n\t" // Setup the threshold "movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t" "pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold "pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif #endif
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied "pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
@ -4954,18 +4959,22 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_rgba
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory" : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif #endif
); );
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
} }
/* BGRA SSSE3 */ /* BGRA SSSE3 */
__attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) { __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {1,1,1,1,1,2,2,0,9,9,9,9,9,10,10,8}; __attribute__((aligned(16))) static const uint8_t movemask2[16] = {1,1,1,1,1,2,2,0,9,9,9,9,9,10,10,8};
const uint32_t threshold_val = threshold; const uint32_t threshold_val = threshold;
unsigned long row_width = width*4; unsigned long row_width = width*4;
uint8_t* max_ptr = col1 + (row_width * (height-2)); uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width; uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t" "movd %%eax, %%xmm4\n\t"
@ -4978,9 +4987,9 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
#endif #endif
/* Zero the temporary register */ /* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_bgra:\n\t" "algo_ssse3_deinterlace_4field_bgra:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5000,11 +5009,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5023,7 +5032,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
"pshufb %%xmm3, %%xmm2\n\t" "pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__) #if defined(__x86_64__)
@ -5031,7 +5040,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
#else #else
"movd %%eax, %%xmm7\n\t" // Setup the threshold "movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t" "pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold "pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif #endif
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow "movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
@ -5041,28 +5050,28 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent "por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent "movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove "sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove "sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */ /* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent "add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent "add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */ /* Check if we reached the row end */
"cmp %2, %0\n\t" "cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_bgra\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_bgra\n\t" // Go for another iteration
/* Next row */ /* Next row */
"add %4, %0\n\t" // Add width to pcurrent "add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent "add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t" "mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2 "add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */ /* Check if we reached the end */
"cmp %3, %0\n\t" "cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_bgra\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_bgra\n\t" // Go for another iteration
/* Special case for the last line */ /* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
@ -5083,11 +5092,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5106,7 +5115,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
"pshufb %%xmm3, %%xmm2\n\t" "pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__) #if defined(__x86_64__)
@ -5114,7 +5123,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
#else #else
"movd %%eax, %%xmm7\n\t" // Setup the threshold "movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t" "pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold "pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif #endif
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied "pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
@ -5130,18 +5139,22 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_bgra
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory" : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif #endif
); );
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
} }
/* ARGB SSSE3 */ /* ARGB SSSE3 */
__attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) { __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {2,2,2,2,2,1,1,3,10,10,10,10,10,9,9,11}; __attribute__((aligned(16))) static const uint8_t movemask2[16] = {2,2,2,2,2,1,1,3,10,10,10,10,10,9,9,11};
const uint32_t threshold_val = threshold; const uint32_t threshold_val = threshold;
unsigned long row_width = width*4; unsigned long row_width = width*4;
uint8_t* max_ptr = col1 + (row_width * (height-2)); uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width; uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t" "movd %%eax, %%xmm4\n\t"
@ -5154,9 +5167,9 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
#endif #endif
/* Zero the temporary register */ /* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_argb:\n\t" "algo_ssse3_deinterlace_4field_argb:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5176,11 +5189,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5199,7 +5212,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
"pshufb %%xmm3, %%xmm2\n\t" "pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__) #if defined(__x86_64__)
@ -5207,7 +5220,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
#else #else
"movd %%eax, %%xmm7\n\t" // Setup the threshold "movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t" "pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold "pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif #endif
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow "movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
@ -5217,28 +5230,28 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent "por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent "movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove "sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove "sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */ /* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent "add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent "add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */ /* Check if we reached the row end */
"cmp %2, %0\n\t" "cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_argb\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_argb\n\t" // Go for another iteration
/* Next row */ /* Next row */
"add %4, %0\n\t" // Add width to pcurrent "add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent "add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t" "mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2 "add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */ /* Check if we reached the end */
"cmp %3, %0\n\t" "cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_argb\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_argb\n\t" // Go for another iteration
/* Special case for the last line */ /* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
@ -5259,11 +5272,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5282,7 +5295,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
"pshufb %%xmm3, %%xmm2\n\t" "pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__) #if defined(__x86_64__)
@ -5290,7 +5303,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
#else #else
"movd %%eax, %%xmm7\n\t" // Setup the threshold "movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t" "pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold "pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif #endif
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied "pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
@ -5306,18 +5319,22 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_argb
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory" : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif #endif
); );
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
} }
/* ABGR SSSE3 */ /* ABGR SSSE3 */
__attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) { __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {2,2,2,2,2,3,3,1,10,10,10,10,10,11,11,9}; __attribute__((aligned(16))) static const uint8_t movemask2[16] = {2,2,2,2,2,3,3,1,10,10,10,10,10,11,11,9};
const uint32_t threshold_val = threshold; const uint32_t threshold_val = threshold;
unsigned long row_width = width*4; unsigned long row_width = width*4;
uint8_t* max_ptr = col1 + (row_width * (height-2)); uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width; uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t" "movd %%eax, %%xmm4\n\t"
@ -5330,9 +5347,9 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
#endif #endif
/* Zero the temporary register */ /* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_abgr:\n\t" "algo_ssse3_deinterlace_4field_abgr:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5352,11 +5369,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5375,7 +5392,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
"pshufb %%xmm3, %%xmm2\n\t" "pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__) #if defined(__x86_64__)
@ -5383,7 +5400,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
#else #else
"movd %%eax, %%xmm7\n\t" // Setup the threshold "movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t" "pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold "pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif #endif
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow "movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
@ -5393,28 +5410,28 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent "por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent "movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove "sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove "sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */ /* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent "add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent "add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */ /* Check if we reached the row end */
"cmp %2, %0\n\t" "cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_abgr\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_abgr\n\t" // Go for another iteration
/* Next row */ /* Next row */
"add %4, %0\n\t" // Add width to pcurrent "add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent "add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t" "mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2 "add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */ /* Check if we reached the end */
"cmp %3, %0\n\t" "cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_abgr\n\t" // Go for another iteration "jb algo_ssse3_deinterlace_4field_abgr\n\t" // Go for another iteration
/* Special case for the last line */ /* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */ /* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
@ -5435,11 +5452,11 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */ "movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */ /* Next row */
"add %4, %0\n\t" "add %4, %0\n\t"
"add %4, %1\n\t" "add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */ /* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t" "movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t" "movdqa (%1), %%xmm2\n\t"
@ -5458,7 +5475,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
"pshufb %%xmm3, %%xmm2\n\t" "pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t" "psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t" "packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together "pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__) #if defined(__x86_64__)
@ -5466,7 +5483,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
#else #else
"movd %%eax, %%xmm7\n\t" // Setup the threshold "movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t" "pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold "pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif #endif
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied "pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
@ -5482,5 +5499,7 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_deinterlace_4field_abgr
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory" : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif #endif
); );
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
} }

View File

@ -47,11 +47,13 @@ RETSIGTYPE zm_die_handler(int signal, siginfo_t * info, void *context)
RETSIGTYPE zm_die_handler(int signal) RETSIGTYPE zm_die_handler(int signal)
#endif #endif
{ {
#if (defined(__i386__) || defined(__x86_64__))
void *cr2 = 0; void *cr2 = 0;
void *ip = 0; void *ip = 0;
#endif
Error("Got signal %d (%s), crashing", signal, strsignal(signal)); Error("Got signal %d (%s), crashing", signal, strsignal(signal));
#if (defined(__i386__) || defined(__x86_64__))
// Get more information if available // Get more information if available
#if ( HAVE_SIGINFO_T && HAVE_UCONTEXT_T ) #if ( HAVE_SIGINFO_T && HAVE_UCONTEXT_T )
if (info && context) { if (info && context) {
@ -103,7 +105,7 @@ RETSIGTYPE zm_die_handler(int signal)
Info("Backtrace complete, please execute the following command for more information"); Info("Backtrace complete, please execute the following command for more information");
Info(cmd); Info(cmd);
#endif // ( !defined(ZM_NO_CRASHTRACE) && HAVE_DECL_BACKTRACE && HAVE_DECL_BACKTRACE_SYMBOLS ) #endif // ( !defined(ZM_NO_CRASHTRACE) && HAVE_DECL_BACKTRACE && HAVE_DECL_BACKTRACE_SYMBOLS )
#endif // (defined(__i386__) || defined(__x86_64__)
exit(signal); exit(signal);
} }