1) Added back the colour SSE2 delta algorithms
2) Modified SSE algorithms to use one less register 3) Fixed AlarmedPixels motion detection
This commit is contained in:
parent
642a1bfdb2
commit
1afe0bbf42
336
src/zm_image.cpp
336
src/zm_image.cpp
|
@ -181,12 +181,20 @@ void Image::Initialise()
|
||||||
Debug(2,"Delta: Using SSSE3 delta functions");
|
Debug(2,"Delta: Using SSSE3 delta functions");
|
||||||
} else if(sseversion >= 20) {
|
} else if(sseversion >= 20) {
|
||||||
/* SSE2 available */
|
/* SSE2 available */
|
||||||
fptr_delta8_rgba = &std_delta8_rgba;
|
fptr_delta8_rgba = &sse2_delta8_rgba;
|
||||||
fptr_delta8_bgra = &std_delta8_bgra;
|
fptr_delta8_bgra = &sse2_delta8_bgra;
|
||||||
fptr_delta8_argb = &std_delta8_argb;
|
fptr_delta8_argb = &sse2_delta8_argb;
|
||||||
fptr_delta8_abgr = &std_delta8_abgr;
|
fptr_delta8_abgr = &sse2_delta8_abgr;
|
||||||
|
/* On some systems, the 4 SSE2 algorithms above might be slower than
|
||||||
|
** the standard algorithms, especially on early Pentium 4 processors
|
||||||
|
** In that case, comment out the 4 lines above and uncomment the 4 lines below
|
||||||
|
*/
|
||||||
|
// fptr_delta8_rgba = &std_delta8_rgba;
|
||||||
|
// fptr_delta8_bgra = &std_delta8_bgra;
|
||||||
|
// fptr_delta8_argb = &std_delta8_argb;
|
||||||
|
// fptr_delta8_abgr = &std_delta8_abgr;
|
||||||
fptr_delta8_gray8 = &sse2_delta8_gray8;
|
fptr_delta8_gray8 = &sse2_delta8_gray8;
|
||||||
Debug(2,"Delta: Using standard and SSE2 delta functions");
|
Debug(2,"Delta: Using SSE2 delta functions");
|
||||||
} else {
|
} else {
|
||||||
/* No SSE available */
|
/* No SSE available */
|
||||||
fptr_delta8_rgba = &std_delta8_rgba;
|
fptr_delta8_rgba = &std_delta8_rgba;
|
||||||
|
@ -2403,7 +2411,7 @@ __attribute__ ((noinline)) void sse2_fastblend(const uint8_t* col1, const uint8_
|
||||||
"cmp %3, %4\n\t"
|
"cmp %3, %4\n\t"
|
||||||
"jb algo_sse2_blend\n\t"
|
"jb algo_sse2_blend\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (i), "m" (clearmask), "m" (divider)
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i), "m" (clearmask), "m" (divider)
|
||||||
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory"
|
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
|
@ -2666,7 +2674,7 @@ __attribute__ ((noinline)) void sse2_delta8_gray8(const uint8_t* col1, const uin
|
||||||
"cmp %3, %4\n\t"
|
"cmp %3, %4\n\t"
|
||||||
"jb algo_sse2_delta8_gray8\n\t"
|
"jb algo_sse2_delta8_gray8\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (i)
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i)
|
||||||
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory"
|
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
|
@ -2674,6 +2682,308 @@ __attribute__ ((noinline)) void sse2_delta8_gray8(const uint8_t* col1, const uin
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* RGB32: RGBA SSE2 */
|
||||||
|
__attribute__ ((noinline)) void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
#if (defined(__i386__) || defined(__x86_64__))
|
||||||
|
unsigned long i = 0;
|
||||||
|
|
||||||
|
/* XMM0 - clear mask - kept */
|
||||||
|
/* XMM1,2,3 - General purpose */
|
||||||
|
/* XMM4 - divide mask - kept */
|
||||||
|
/* XMM5 - temp */
|
||||||
|
/* XMM6 - temp */
|
||||||
|
/* XMM7 - unused */
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm4\n\t"
|
||||||
|
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
|
||||||
|
"mov $0xff, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm0\n\t"
|
||||||
|
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
|
||||||
|
"mov $0x80000000, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm5\n\t"
|
||||||
|
"pshufd $0x0, %%xmm5, %%xmm5\n\t"
|
||||||
|
"algo_sse2_delta8_rgba:\n\t"
|
||||||
|
"movdqa (%0,%4,4), %%xmm1\n\t"
|
||||||
|
"movdqa (%1,%4,4), %%xmm2\n\t"
|
||||||
|
"psrlq $0x3, %%xmm1\n\t"
|
||||||
|
"psrlq $0x3, %%xmm2\n\t"
|
||||||
|
"pand %%xmm4, %%xmm1\n\t"
|
||||||
|
"pand %%xmm4, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm5\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm6\n\t"
|
||||||
|
"pmaxub %%xmm1, %%xmm2\n\t"
|
||||||
|
"pminub %%xmm5, %%xmm6\n\t"
|
||||||
|
"psubb %%xmm6, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm3\n\t"
|
||||||
|
"psrld $0x8, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm1\n\t"
|
||||||
|
"pslld $0x2, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm1, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm3, %%xmm1\n\t"
|
||||||
|
"pand %%xmm0, %%xmm1\n\t"
|
||||||
|
"paddd %%xmm1, %%xmm1\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm1\n\t"
|
||||||
|
"movdqa %%xmm3, %%xmm2\n\t"
|
||||||
|
"psrld $0x10, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm1\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm3\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm3\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"psrldq $0x9, %%xmm2\n\t"
|
||||||
|
"por %%xmm2, %%xmm3\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"psrldq $0x6, %%xmm2\n\t"
|
||||||
|
"por %%xmm2, %%xmm3\n\t"
|
||||||
|
"pand %%xmm0, %%xmm1\n\t"
|
||||||
|
"psrldq $0x3, %%xmm1\n\t"
|
||||||
|
"por %%xmm1, %%xmm3\n\t"
|
||||||
|
"movd %%xmm3, %%eax\n\t"
|
||||||
|
"movnti %%eax, (%2,%4)\n\t"
|
||||||
|
"add $0x4, %4\n\t"
|
||||||
|
"cmp %3, %4\n\t"
|
||||||
|
"jb algo_sse2_delta8_rgba\n\t"
|
||||||
|
:
|
||||||
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i)
|
||||||
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory"
|
||||||
|
);
|
||||||
|
#else
|
||||||
|
Panic("SSE function called on a non x86\\x86-64 platform");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RGB32: BGRA SSE2 */
|
||||||
|
__attribute__ ((noinline)) void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
#if (defined(__i386__) || defined(__x86_64__))
|
||||||
|
unsigned long i = 0;
|
||||||
|
|
||||||
|
/* XMM0 - clear mask - kept */
|
||||||
|
/* XMM1,2,3 - General purpose */
|
||||||
|
/* XMM4 - divide mask - kept */
|
||||||
|
/* XMM5 - temp */
|
||||||
|
/* XMM6 - temp */
|
||||||
|
/* XMM7 - unused */
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm4\n\t"
|
||||||
|
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
|
||||||
|
"mov $0xff, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm0\n\t"
|
||||||
|
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
|
||||||
|
"mov $0x80000000, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm5\n\t"
|
||||||
|
"pshufd $0x0, %%xmm5, %%xmm5\n\t"
|
||||||
|
"algo_sse2_delta8_bgra:\n\t"
|
||||||
|
"movdqa (%0,%4,4), %%xmm1\n\t"
|
||||||
|
"movdqa (%1,%4,4), %%xmm2\n\t"
|
||||||
|
"psrlq $0x3, %%xmm1\n\t"
|
||||||
|
"psrlq $0x3, %%xmm2\n\t"
|
||||||
|
"pand %%xmm4, %%xmm1\n\t"
|
||||||
|
"pand %%xmm4, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm5\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm6\n\t"
|
||||||
|
"pmaxub %%xmm1, %%xmm2\n\t"
|
||||||
|
"pminub %%xmm5, %%xmm6\n\t"
|
||||||
|
"psubb %%xmm6, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm3\n\t"
|
||||||
|
"psrld $0x8, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm1\n\t"
|
||||||
|
"pslld $0x2, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm1, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm3, %%xmm1\n\t"
|
||||||
|
"pand %%xmm0, %%xmm1\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm1\n\t"
|
||||||
|
"movdqa %%xmm3, %%xmm2\n\t"
|
||||||
|
"psrld $0x10, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm1\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm3\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm3\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"psrldq $0x9, %%xmm2\n\t"
|
||||||
|
"por %%xmm2, %%xmm3\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"psrldq $0x6, %%xmm2\n\t"
|
||||||
|
"por %%xmm2, %%xmm3\n\t"
|
||||||
|
"pand %%xmm0, %%xmm1\n\t"
|
||||||
|
"psrldq $0x3, %%xmm1\n\t"
|
||||||
|
"por %%xmm1, %%xmm3\n\t"
|
||||||
|
"movd %%xmm3, %%eax\n\t"
|
||||||
|
"movnti %%eax, (%2,%4)\n\t"
|
||||||
|
"add $0x4, %4\n\t"
|
||||||
|
"cmp %3, %4\n\t"
|
||||||
|
"jb algo_sse2_delta8_bgra\n\t"
|
||||||
|
:
|
||||||
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i)
|
||||||
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory"
|
||||||
|
);
|
||||||
|
#else
|
||||||
|
Panic("SSE function called on a non x86\\x86-64 platform");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RGB32: ARGB SSE2 */
|
||||||
|
__attribute__ ((noinline)) void sse2_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
#if (defined(__i386__) || defined(__x86_64__))
|
||||||
|
unsigned long i = 0;
|
||||||
|
|
||||||
|
/* XMM0 - clear mask - kept */
|
||||||
|
/* XMM1,2,3 - General purpose */
|
||||||
|
/* XMM4 - divide mask - kept */
|
||||||
|
/* XMM5 - temp */
|
||||||
|
/* XMM6 - temp */
|
||||||
|
/* XMM7 - unused */
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm4\n\t"
|
||||||
|
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
|
||||||
|
"mov $0xff, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm0\n\t"
|
||||||
|
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
|
||||||
|
"mov $0x80000000, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm5\n\t"
|
||||||
|
"pshufd $0x0, %%xmm5, %%xmm5\n\t"
|
||||||
|
"algo_sse2_delta8_argb:\n\t"
|
||||||
|
"movdqa (%0,%4,4), %%xmm1\n\t"
|
||||||
|
"movdqa (%1,%4,4), %%xmm2\n\t"
|
||||||
|
"psrlq $0x3, %%xmm1\n\t"
|
||||||
|
"psrlq $0x3, %%xmm2\n\t"
|
||||||
|
"pand %%xmm4, %%xmm1\n\t"
|
||||||
|
"pand %%xmm4, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm5\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm6\n\t"
|
||||||
|
"pmaxub %%xmm1, %%xmm2\n\t"
|
||||||
|
"pminub %%xmm5, %%xmm6\n\t"
|
||||||
|
"psubb %%xmm6, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm3\n\t"
|
||||||
|
"psrld $0x10, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm1\n\t"
|
||||||
|
"pslld $0x2, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm1, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm3, %%xmm1\n\t"
|
||||||
|
"psrld $0x8, %%xmm1\n\t"
|
||||||
|
"pand %%xmm0, %%xmm1\n\t"
|
||||||
|
"paddd %%xmm1, %%xmm1\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm1\n\t"
|
||||||
|
"movdqa %%xmm3, %%xmm2\n\t"
|
||||||
|
"psrld $0x18, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm1\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm3\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm3\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"psrldq $0x9, %%xmm2\n\t"
|
||||||
|
"por %%xmm2, %%xmm3\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"psrldq $0x6, %%xmm2\n\t"
|
||||||
|
"por %%xmm2, %%xmm3\n\t"
|
||||||
|
"pand %%xmm0, %%xmm1\n\t"
|
||||||
|
"psrldq $0x3, %%xmm1\n\t"
|
||||||
|
"por %%xmm1, %%xmm3\n\t"
|
||||||
|
"movd %%xmm3, %%eax\n\t"
|
||||||
|
"movnti %%eax, (%2,%4)\n\t"
|
||||||
|
"add $0x4, %4\n\t"
|
||||||
|
"cmp %3, %4\n\t"
|
||||||
|
"jb algo_sse2_delta8_argb\n\t"
|
||||||
|
:
|
||||||
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i)
|
||||||
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory"
|
||||||
|
);
|
||||||
|
#else
|
||||||
|
Panic("SSE function called on a non x86\\x86-64 platform");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RGB32: ABGR SSE2 */
|
||||||
|
__attribute__ ((noinline)) void sse2_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
#if (defined(__i386__) || defined(__x86_64__))
|
||||||
|
unsigned long i = 0;
|
||||||
|
|
||||||
|
/* XMM0 - clear mask - kept */
|
||||||
|
/* XMM1,2,3 - General purpose */
|
||||||
|
/* XMM4 - divide mask - kept */
|
||||||
|
/* XMM5 - temp */
|
||||||
|
/* XMM6 - temp */
|
||||||
|
/* XMM7 - unused */
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm4\n\t"
|
||||||
|
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
|
||||||
|
"mov $0xff, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm0\n\t"
|
||||||
|
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
|
||||||
|
"mov $0x80000000, %%eax\n\t"
|
||||||
|
"movd %%eax, %%xmm5\n\t"
|
||||||
|
"pshufd $0x0, %%xmm5, %%xmm5\n\t"
|
||||||
|
"algo_sse2_delta8_abgr:\n\t"
|
||||||
|
"movdqa (%0,%4,4), %%xmm1\n\t"
|
||||||
|
"movdqa (%1,%4,4), %%xmm2\n\t"
|
||||||
|
"psrlq $0x3, %%xmm1\n\t"
|
||||||
|
"psrlq $0x3, %%xmm2\n\t"
|
||||||
|
"pand %%xmm4, %%xmm1\n\t"
|
||||||
|
"pand %%xmm4, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm5\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm6\n\t"
|
||||||
|
"pmaxub %%xmm1, %%xmm2\n\t"
|
||||||
|
"pminub %%xmm5, %%xmm6\n\t"
|
||||||
|
"psubb %%xmm6, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm3\n\t"
|
||||||
|
"psrld $0x10, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm2, %%xmm1\n\t"
|
||||||
|
"pslld $0x2, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm1, %%xmm2\n\t"
|
||||||
|
"movdqa %%xmm3, %%xmm1\n\t"
|
||||||
|
"psrld $0x8, %%xmm1\n\t"
|
||||||
|
"pand %%xmm0, %%xmm1\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm1\n\t"
|
||||||
|
"movdqa %%xmm3, %%xmm2\n\t"
|
||||||
|
"psrld $0x18, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm2\n\t"
|
||||||
|
"paddd %%xmm2, %%xmm1\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm3\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm3\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"psrldq $0x9, %%xmm2\n\t"
|
||||||
|
"por %%xmm2, %%xmm3\n\t"
|
||||||
|
"movdqa %%xmm1, %%xmm2\n\t"
|
||||||
|
"pand %%xmm0, %%xmm2\n\t"
|
||||||
|
"psrldq $0x6, %%xmm2\n\t"
|
||||||
|
"por %%xmm2, %%xmm3\n\t"
|
||||||
|
"pand %%xmm0, %%xmm1\n\t"
|
||||||
|
"psrldq $0x3, %%xmm1\n\t"
|
||||||
|
"por %%xmm1, %%xmm3\n\t"
|
||||||
|
"movd %%xmm3, %%eax\n\t"
|
||||||
|
"movnti %%eax, (%2,%4)\n\t"
|
||||||
|
"add $0x4, %4\n\t"
|
||||||
|
"cmp %3, %4\n\t"
|
||||||
|
"jb algo_sse2_delta8_abgr\n\t"
|
||||||
|
:
|
||||||
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i)
|
||||||
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory"
|
||||||
|
);
|
||||||
|
#else
|
||||||
|
Panic("SSE function called on a non x86\\x86-64 platform");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* RGB32: RGBA SSSE3 */
|
/* RGB32: RGBA SSSE3 */
|
||||||
__attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if (defined(__i386__) || defined(__x86_64__))
|
||||||
|
@ -2724,7 +3034,7 @@ __attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uin
|
||||||
"cmp %3, %4\n\t"
|
"cmp %3, %4\n\t"
|
||||||
"jb algo_ssse3_delta8_rgba\n\t"
|
"jb algo_ssse3_delta8_rgba\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (i), "m" (*movemask)
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i), "m" (*movemask)
|
||||||
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
|
@ -2782,7 +3092,7 @@ __attribute__ ((noinline)) void ssse3_delta8_bgra(const uint8_t* col1, const uin
|
||||||
"cmp %3, %4\n\t"
|
"cmp %3, %4\n\t"
|
||||||
"jb algo_ssse3_delta8_bgra\n\t"
|
"jb algo_ssse3_delta8_bgra\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (i), "m" (*movemask)
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i), "m" (*movemask)
|
||||||
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
|
@ -2841,7 +3151,7 @@ __attribute__ ((noinline)) void ssse3_delta8_argb(const uint8_t* col1, const uin
|
||||||
"cmp %3, %4\n\t"
|
"cmp %3, %4\n\t"
|
||||||
"jb algo_ssse3_delta8_argb\n\t"
|
"jb algo_ssse3_delta8_argb\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (i), "m" (*movemask)
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i), "m" (*movemask)
|
||||||
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
|
@ -2900,7 +3210,7 @@ __attribute__ ((noinline)) void ssse3_delta8_abgr(const uint8_t* col1, const uin
|
||||||
"cmp %3, %4\n\t"
|
"cmp %3, %4\n\t"
|
||||||
"jb algo_ssse3_delta8_abgr\n\t"
|
"jb algo_ssse3_delta8_abgr\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (i), "m" (*movemask)
|
: "r" (col1), "r" (col2), "r" (result), "m" (count), "r" (i), "m" (*movemask)
|
||||||
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
|
@ -3072,7 +3382,7 @@ __attribute__ ((noinline)) void ssse3_convert_rgba_gray8(const uint8_t* col1, ui
|
||||||
"cmp %2, %3\n\t"
|
"cmp %2, %3\n\t"
|
||||||
"jb algo_ssse3_convert_rgba_gray8\n\t"
|
"jb algo_ssse3_convert_rgba_gray8\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (result), "r" (count), "r" (i), "m" (*movemask)
|
: "r" (col1), "r" (result), "m" (count), "r" (i), "m" (*movemask)
|
||||||
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
|
@ -3113,7 +3423,7 @@ __attribute__ ((noinline)) void ssse3_convert_yuyv_gray8(const uint8_t* col1, ui
|
||||||
"cmp %2, %3\n\t"
|
"cmp %2, %3\n\t"
|
||||||
"jb algo_ssse3_convert_yuyv_gray8\n\t"
|
"jb algo_ssse3_convert_yuyv_gray8\n\t"
|
||||||
:
|
:
|
||||||
: "r" (col1), "r" (result), "r" (count), "r" (i), "m" (*movemask1), "m" (*movemask2)
|
: "r" (col1), "r" (result), "m" (count), "r" (i), "m" (*movemask1), "m" (*movemask2)
|
||||||
: "%xmm3", "%xmm4", "cc", "memory"
|
: "%xmm3", "%xmm4", "cc", "memory"
|
||||||
);
|
);
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -258,6 +258,10 @@ void std_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result,
|
||||||
void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void sse2_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void sse2_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void ssse3_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void ssse3_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void ssse3_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void ssse3_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
|
|
@ -2937,6 +2937,7 @@ bool Monitor::DumpSettings( char *output, bool verbose )
|
||||||
}
|
}
|
||||||
#endif // ZM_HAS_V4L
|
#endif // ZM_HAS_V4L
|
||||||
sprintf( output+strlen(output), "Colours : %d\n", camera->Colours() );
|
sprintf( output+strlen(output), "Colours : %d\n", camera->Colours() );
|
||||||
|
sprintf( output+strlen(output), "Subpixel Order : %d\n", camera->SubpixelOrder() );
|
||||||
sprintf( output+strlen(output), "Event Prefix : %s\n", event_prefix );
|
sprintf( output+strlen(output), "Event Prefix : %s\n", event_prefix );
|
||||||
sprintf( output+strlen(output), "Label Format : %s\n", label_format );
|
sprintf( output+strlen(output), "Label Format : %s\n", label_format );
|
||||||
sprintf( output+strlen(output), "Label Coord : %d,%d\n", label_coord.X(), label_coord.Y() );
|
sprintf( output+strlen(output), "Label Coord : %d,%d\n", label_coord.X(), label_coord.Y() );
|
||||||
|
|
|
@ -171,28 +171,15 @@ bool Zone::CheckAlarms( const Image *delta_image )
|
||||||
hi_x = ranges[py].hi_x;
|
hi_x = ranges[py].hi_x;
|
||||||
|
|
||||||
Debug( 7, "Checking line %d from %d -> %d", y, lo_x, hi_x );
|
Debug( 7, "Checking line %d from %d -> %d", y, lo_x, hi_x );
|
||||||
pdiff = diff_buff + (lo_x * y);
|
pdiff = diff_buff + (diff_width * y);
|
||||||
ppoly = pg_image->Buffer( ranges[py].off_x, py );
|
ppoly = pg_image->Buffer( ranges[py].off_x, py );
|
||||||
// Left margin
|
pdiff += lo_x;
|
||||||
if ( y < hi_y )
|
|
||||||
{
|
|
||||||
int next_lo_x = ranges[py+1].lo_x;
|
|
||||||
if ( next_lo_x < lo_x )
|
|
||||||
{
|
|
||||||
int lo_x_diff = lo_x-next_lo_x;
|
|
||||||
memset( pdiff-lo_x_diff, BLACK, lo_x_diff );
|
|
||||||
}
|
|
||||||
else if ( lo_x > 0 )
|
|
||||||
*(pdiff-1) = BLACK;
|
|
||||||
}
|
|
||||||
else if ( lo_x > 0 )
|
|
||||||
*(pdiff-1) = BLACK;
|
|
||||||
for ( int x = lo_x; x <= hi_x; x++, pdiff++, ppoly++ )
|
for ( int x = lo_x; x <= hi_x; x++, pdiff++, ppoly++ )
|
||||||
{
|
{
|
||||||
if ( *ppoly && (*pdiff > min_pixel_threshold) && (!max_pixel_threshold || (*pdiff < max_pixel_threshold)) )
|
if ( *ppoly && (*pdiff > min_pixel_threshold) && (!max_pixel_threshold || (*pdiff < max_pixel_threshold)) )
|
||||||
{
|
{
|
||||||
alarm_pixels++;
|
alarm_pixels++;
|
||||||
pixel_diff_count += abs(*pdiff);
|
pixel_diff_count += *pdiff;
|
||||||
*pdiff = WHITE;
|
*pdiff = WHITE;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -200,16 +187,6 @@ bool Zone::CheckAlarms( const Image *delta_image )
|
||||||
*pdiff = BLACK;
|
*pdiff = BLACK;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Right margin
|
|
||||||
if ( y < hi_y )
|
|
||||||
{
|
|
||||||
int next_hi_x = ranges[py+1].hi_x;
|
|
||||||
if ( next_hi_x > hi_x )
|
|
||||||
{
|
|
||||||
//printf( "%d: Setting %d-%d = %d\n", y, hi_x, next_hi_x, next_hi_x-hi_x );
|
|
||||||
memset( pdiff, BLACK, next_hi_x-hi_x );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if ( pixel_diff_count && alarm_pixels )
|
if ( pixel_diff_count && alarm_pixels )
|
||||||
pixel_diff = pixel_diff_count/alarm_pixels;
|
pixel_diff = pixel_diff_count/alarm_pixels;
|
||||||
|
|
Loading…
Reference in New Issue