Added #defines to control the inclusion of the SSE code.
This commit is contained in:
parent
349e18b543
commit
66f5d7c2f1
|
@ -2517,7 +2517,7 @@ void Image::Scale( unsigned int factor )
|
||||||
|
|
||||||
|
|
||||||
__attribute__ ((noinline)) void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
|
__attribute__ ((noinline)) void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
static uint32_t divider = 0;
|
static uint32_t divider = 0;
|
||||||
static uint32_t clearmask = 0;
|
static uint32_t clearmask = 0;
|
||||||
static double current_blendpercent = 0.0;
|
static double current_blendpercent = 0.0;
|
||||||
|
@ -2859,7 +2859,7 @@ __attribute__ ((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8
|
||||||
|
|
||||||
/* Grayscale SSE2 */
|
/* Grayscale SSE2 */
|
||||||
__attribute__ ((noinline)) void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"sub $0x10, %0\n\t"
|
"sub $0x10, %0\n\t"
|
||||||
|
@ -2887,7 +2887,7 @@ __attribute__ ((noinline)) void sse2_delta8_gray8(const uint8_t* col1, const uin
|
||||||
|
|
||||||
/* RGB32: RGBA SSE2 */
|
/* RGB32: RGBA SSE2 */
|
||||||
__attribute__ ((noinline)) void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -2956,7 +2956,7 @@ __attribute__ ((noinline)) void sse2_delta8_rgba(const uint8_t* col1, const uint
|
||||||
|
|
||||||
/* RGB32: BGRA SSE2 */
|
/* RGB32: BGRA SSE2 */
|
||||||
__attribute__ ((noinline)) void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -3025,7 +3025,7 @@ __attribute__ ((noinline)) void sse2_delta8_bgra(const uint8_t* col1, const uint
|
||||||
|
|
||||||
/* RGB32: ARGB SSE2 */
|
/* RGB32: ARGB SSE2 */
|
||||||
__attribute__ ((noinline)) void sse2_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void sse2_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -3095,7 +3095,7 @@ __attribute__ ((noinline)) void sse2_delta8_argb(const uint8_t* col1, const uint
|
||||||
|
|
||||||
/* RGB32: ABGR SSE2 */
|
/* RGB32: ABGR SSE2 */
|
||||||
__attribute__ ((noinline)) void sse2_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void sse2_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -3165,7 +3165,7 @@ __attribute__ ((noinline)) void sse2_delta8_abgr(const uint8_t* col1, const uint
|
||||||
|
|
||||||
/* RGB32: RGBA SSSE3 */
|
/* RGB32: RGBA SSSE3 */
|
||||||
__attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -3217,7 +3217,7 @@ __attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uin
|
||||||
|
|
||||||
/* RGB32: BGRA SSSE3 */
|
/* RGB32: BGRA SSSE3 */
|
||||||
__attribute__ ((noinline)) void ssse3_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void ssse3_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -3269,7 +3269,7 @@ __attribute__ ((noinline)) void ssse3_delta8_bgra(const uint8_t* col1, const uin
|
||||||
|
|
||||||
/* RGB32: ARGB SSSE3 */
|
/* RGB32: ARGB SSSE3 */
|
||||||
__attribute__ ((noinline)) void ssse3_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void ssse3_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -3322,7 +3322,7 @@ __attribute__ ((noinline)) void ssse3_delta8_argb(const uint8_t* col1, const uin
|
||||||
|
|
||||||
/* RGB32: ABGR SSSE3 */
|
/* RGB32: ABGR SSSE3 */
|
||||||
__attribute__ ((noinline)) void ssse3_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void ssse3_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -3575,7 +3575,7 @@ __attribute__ ((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint
|
||||||
|
|
||||||
/* RGBA to grayscale SSSE3 */
|
/* RGBA to grayscale SSSE3 */
|
||||||
__attribute__ ((noinline)) void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"mov $0x1F1F1F1F, %%eax\n\t"
|
"mov $0x1F1F1F1F, %%eax\n\t"
|
||||||
|
@ -3621,7 +3621,7 @@ __attribute__ ((noinline)) void ssse3_convert_rgba_gray8(const uint8_t* col1, ui
|
||||||
|
|
||||||
/* Converts a YUYV image into grayscale by extracting the Y channel */
|
/* Converts a YUYV image into grayscale by extracting the Y channel */
|
||||||
__attribute__ ((noinline)) void ssse3_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
__attribute__ ((noinline)) void ssse3_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
unsigned long i = 0;
|
unsigned long i = 0;
|
||||||
|
|
||||||
__attribute__((aligned(16))) static const uint8_t movemask1[16] = {0,2,4,6,8,10,12,14,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
__attribute__((aligned(16))) static const uint8_t movemask1[16] = {0,2,4,6,8,10,12,14,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
|
||||||
|
|
|
@ -193,7 +193,7 @@ void ssedetect() {
|
||||||
/* SSE2 aligned memory copy. Useful for big copying of aligned memory like image buffers in ZM */
|
/* SSE2 aligned memory copy. Useful for big copying of aligned memory like image buffers in ZM */
|
||||||
/* For platforms without SSE2 we will use standard x86 asm memcpy or glibc's memcpy() */
|
/* For platforms without SSE2 we will use standard x86 asm memcpy or glibc's memcpy() */
|
||||||
__attribute__((noinline)) void* sse2_aligned_memcpy(void* dest, const void* src, size_t bytes) {
|
__attribute__((noinline)) void* sse2_aligned_memcpy(void* dest, const void* src, size_t bytes) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
if(bytes > 128) {
|
if(bytes > 128) {
|
||||||
unsigned int remainder = bytes % 128;
|
unsigned int remainder = bytes % 128;
|
||||||
const uint8_t* lastsrc = (uint8_t*)src + (bytes - remainder);
|
const uint8_t* lastsrc = (uint8_t*)src + (bytes - remainder);
|
||||||
|
|
|
@ -1005,7 +1005,7 @@ void Zone::std_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsig
|
||||||
}
|
}
|
||||||
|
|
||||||
void Zone::sse2_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum) {
|
void Zone::sse2_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum) {
|
||||||
#if (defined(__i386__) || defined(__x86_64__))
|
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
|
||||||
__attribute__((aligned(16))) static uint8_t calc_maxpthreshold[16] = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
|
__attribute__((aligned(16))) static uint8_t calc_maxpthreshold[16] = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
|
||||||
__attribute__((aligned(16))) static uint8_t calc_minpthreshold[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
__attribute__((aligned(16))) static uint8_t calc_minpthreshold[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||||
static uint8_t current_minpthreshold = 0;
|
static uint8_t current_minpthreshold = 0;
|
||||||
|
|
Loading…
Reference in New Issue