Added #defines to control the inclusion of the SSE code.

This commit is contained in:
Kfir Itzhak 2011-07-14 16:45:58 +03:00
parent 349e18b543
commit 66f5d7c2f1
3 changed files with 14 additions and 14 deletions

View File

@ -2517,7 +2517,7 @@ void Image::Scale( unsigned int factor )
__attribute__ ((noinline)) void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) { __attribute__ ((noinline)) void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
static uint32_t divider = 0; static uint32_t divider = 0;
static uint32_t clearmask = 0; static uint32_t clearmask = 0;
static double current_blendpercent = 0.0; static double current_blendpercent = 0.0;
@ -2859,7 +2859,7 @@ __attribute__ ((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8
/* Grayscale SSE2 */ /* Grayscale SSE2 */
__attribute__ ((noinline)) void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"sub $0x10, %0\n\t" "sub $0x10, %0\n\t"
@ -2887,7 +2887,7 @@ __attribute__ ((noinline)) void sse2_delta8_gray8(const uint8_t* col1, const uin
/* RGB32: RGBA SSE2 */ /* RGB32: RGBA SSE2 */
__attribute__ ((noinline)) void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -2956,7 +2956,7 @@ __attribute__ ((noinline)) void sse2_delta8_rgba(const uint8_t* col1, const uint
/* RGB32: BGRA SSE2 */ /* RGB32: BGRA SSE2 */
__attribute__ ((noinline)) void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -3025,7 +3025,7 @@ __attribute__ ((noinline)) void sse2_delta8_bgra(const uint8_t* col1, const uint
/* RGB32: ARGB SSE2 */ /* RGB32: ARGB SSE2 */
__attribute__ ((noinline)) void sse2_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void sse2_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -3095,7 +3095,7 @@ __attribute__ ((noinline)) void sse2_delta8_argb(const uint8_t* col1, const uint
/* RGB32: ABGR SSE2 */ /* RGB32: ABGR SSE2 */
__attribute__ ((noinline)) void sse2_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void sse2_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -3165,7 +3165,7 @@ __attribute__ ((noinline)) void sse2_delta8_abgr(const uint8_t* col1, const uint
/* RGB32: RGBA SSSE3 */ /* RGB32: RGBA SSSE3 */
__attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -3217,7 +3217,7 @@ __attribute__ ((noinline)) void ssse3_delta8_rgba(const uint8_t* col1, const uin
/* RGB32: BGRA SSSE3 */ /* RGB32: BGRA SSSE3 */
__attribute__ ((noinline)) void ssse3_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void ssse3_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -3269,7 +3269,7 @@ __attribute__ ((noinline)) void ssse3_delta8_bgra(const uint8_t* col1, const uin
/* RGB32: ARGB SSSE3 */ /* RGB32: ARGB SSSE3 */
__attribute__ ((noinline)) void ssse3_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void ssse3_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -3322,7 +3322,7 @@ __attribute__ ((noinline)) void ssse3_delta8_argb(const uint8_t* col1, const uin
/* RGB32: ABGR SSSE3 */ /* RGB32: ABGR SSSE3 */
__attribute__ ((noinline)) void ssse3_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void ssse3_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -3575,7 +3575,7 @@ __attribute__ ((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint
/* RGBA to grayscale SSSE3 */ /* RGBA to grayscale SSSE3 */
__attribute__ ((noinline)) void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ ( __asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t" "mov $0x1F1F1F1F, %%eax\n\t"
@ -3621,7 +3621,7 @@ __attribute__ ((noinline)) void ssse3_convert_rgba_gray8(const uint8_t* col1, ui
/* Converts a YUYV image into grayscale by extracting the Y channel */ /* Converts a YUYV image into grayscale by extracting the Y channel */
__attribute__ ((noinline)) void ssse3_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { __attribute__ ((noinline)) void ssse3_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
unsigned long i = 0; unsigned long i = 0;
__attribute__((aligned(16))) static const uint8_t movemask1[16] = {0,2,4,6,8,10,12,14,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; __attribute__((aligned(16))) static const uint8_t movemask1[16] = {0,2,4,6,8,10,12,14,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};

View File

@ -193,7 +193,7 @@ void ssedetect() {
/* SSE2 aligned memory copy. Useful for big copying of aligned memory like image buffers in ZM */ /* SSE2 aligned memory copy. Useful for big copying of aligned memory like image buffers in ZM */
/* For platforms without SSE2 we will use standard x86 asm memcpy or glibc's memcpy() */ /* For platforms without SSE2 we will use standard x86 asm memcpy or glibc's memcpy() */
__attribute__((noinline)) void* sse2_aligned_memcpy(void* dest, const void* src, size_t bytes) { __attribute__((noinline)) void* sse2_aligned_memcpy(void* dest, const void* src, size_t bytes) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
if(bytes > 128) { if(bytes > 128) {
unsigned int remainder = bytes % 128; unsigned int remainder = bytes % 128;
const uint8_t* lastsrc = (uint8_t*)src + (bytes - remainder); const uint8_t* lastsrc = (uint8_t*)src + (bytes - remainder);

View File

@ -1005,7 +1005,7 @@ void Zone::std_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsig
} }
void Zone::sse2_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum) { void Zone::sse2_alarmedpixels(Image* pdiff_image, const Image* ppoly_image, unsigned int* pixel_count, unsigned int* pixel_sum) {
#if (defined(__i386__) || defined(__x86_64__)) #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static uint8_t calc_maxpthreshold[16] = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127}; __attribute__((aligned(16))) static uint8_t calc_maxpthreshold[16] = {127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127};
__attribute__((aligned(16))) static uint8_t calc_minpthreshold[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; __attribute__((aligned(16))) static uint8_t calc_minpthreshold[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
static uint8_t current_minpthreshold = 0; static uint8_t current_minpthreshold = 0;