From 6827e0412cd49e5d2a90ccb66b925756f4f8beb3 Mon Sep 17 00:00:00 2001 From: Isaac Connor Date: Fri, 28 Apr 2017 10:34:48 -0400 Subject: [PATCH] white and google style --- src/zm_image.cpp | 2198 +++++++++++++++++++++++----------------------- 1 file changed, 1096 insertions(+), 1102 deletions(-) diff --git a/src/zm_image.cpp b/src/zm_image.cpp index 250ed1b45..7b353ed17 100644 --- a/src/zm_image.cpp +++ b/src/zm_image.cpp @@ -74,8 +74,7 @@ static deinterlace_4field_fptr_t fptr_deinterlace_4field_gray8; /* Pointer to image buffer memory copy function */ imgbufcpy_fptr_t fptr_imgbufcpy; -Image::Image() -{ +Image::Image() { if ( !initialised ) Initialise(); width = 0; @@ -91,8 +90,7 @@ Image::Image() text[0] = '\0'; } -Image::Image( const char *filename ) -{ +Image::Image( const char *filename ) { if ( !initialised ) Initialise(); width = 0; @@ -158,14 +156,14 @@ Image::~Image() { /* Should be called as part of program shutdown to free everything */ void Image::Deinitialise() { if ( initialised ) { - /* - delete[] y_table; - delete[] uv_table; - delete[] r_v_table; - delete[] g_v_table; - delete[] g_u_table; - delete[] b_u_table; - */ + /* + delete[] y_table; + delete[] uv_table; + delete[] r_v_table; + delete[] g_v_table; + delete[] g_u_table; + delete[] b_u_table; + */ initialised = false; if ( readjpg_dcinfo ) { jpeg_destroy_decompress( readjpg_dcinfo ); @@ -206,24 +204,24 @@ void Image::Initialise() fptr_blend = &std_blend; Debug(4,"Blend: Using standard blend function"); } - + __attribute__((aligned(16))) uint8_t blend1[16] = {142,255,159,91,88,227,0,52,37,80,152,97,104,252,90,82}; __attribute__((aligned(16))) uint8_t blend2[16] = {129,56,136,96,119,149,94,29,96,176,1,144,230,203,111,172}; __attribute__((aligned(16))) uint8_t blendres[16]; __attribute__((aligned(16))) uint8_t blendexp[16] = {141,231,157,92,91,217,11,49,45,92,133,103,119,246,92,93}; /* Expected results for 12.5% blend */ - + (*fptr_blend)(blend1,blend2,blendres,16,12.5); - + /* Compare results with expected results */ for(int i=0;i<16;i++) { if(abs(blendexp[i] - blendres[i]) > 3) { Panic("Blend function failed self-test: Results differ from the expected results"); } } - + fptr_delta8_rgb = &std_delta8_rgb; fptr_delta8_bgr = &std_delta8_bgr; - + /* Assign the delta functions */ if(config.cpu_extensions) { if(sseversion >= 35) { @@ -241,10 +239,10 @@ void Image::Initialise() fptr_delta8_argb = &sse2_delta8_argb; fptr_delta8_abgr = &sse2_delta8_abgr; /* - ** On some systems, the 4 SSE2 algorithms above might be a little slower than - ** the standard algorithms, especially on early Pentium 4 processors. - ** In that case, comment out the 4 lines above and uncomment the 4 lines below - */ + ** On some systems, the 4 SSE2 algorithms above might be a little slower than + ** the standard algorithms, especially on early Pentium 4 processors. + ** In that case, comment out the 4 lines above and uncomment the 4 lines below + */ // fptr_delta8_rgba = &std_delta8_rgba; // fptr_delta8_bgra = &std_delta8_bgra; // fptr_delta8_argb = &std_delta8_argb; @@ -277,19 +275,19 @@ void Image::Initialise() fptr_delta8_gray8 = &std_delta8_gray8; Debug(4,"Delta: CPU extensions disabled, using standard delta functions"); } - + /* SSSE3 deinterlacing functions were removed because they were usually equal or slower than the standard code (compiled with -O2 or better) The function is too complicated to be vectorized efficiently - */ + */ fptr_deinterlace_4field_rgba = &std_deinterlace_4field_rgba; fptr_deinterlace_4field_bgra = &std_deinterlace_4field_bgra; fptr_deinterlace_4field_argb = &std_deinterlace_4field_argb; fptr_deinterlace_4field_abgr = &std_deinterlace_4field_abgr; fptr_deinterlace_4field_gray8 = &std_deinterlace_4field_gray8; Debug(4,"Deinterlace: Using standard functions"); - + #if defined(__i386__) && !defined(__x86_64__) /* Use SSE2 aligned memory copy? */ if(config.cpu_extensions && sseversion >= 20) { @@ -303,10 +301,10 @@ void Image::Initialise() fptr_imgbufcpy = &memcpy; Debug(4,"Image buffer copy: Using standard memcpy"); #endif - + /* Code below relocated from zm_local_camera */ Debug( 3, "Setting up static colour tables" ); - + y_table = y_table_global; uv_table = uv_table_global; r_v_table = r_v_table_global; @@ -314,63 +312,63 @@ void Image::Initialise() g_u_table = g_u_table_global; b_u_table = b_u_table_global; /* - y_table = new unsigned char[256]; - for ( int i = 0; i <= 255; i++ ) - { - unsigned char c = i; - if ( c <= 16 ) - y_table[c] = 0; - else if ( c >= 235 ) - y_table[c] = 255; - else - y_table[c] = (255*(c-16))/219; - } + y_table = new unsigned char[256]; + for ( int i = 0; i <= 255; i++ ) + { + unsigned char c = i; + if ( c <= 16 ) + y_table[c] = 0; + else if ( c >= 235 ) + y_table[c] = 255; + else + y_table[c] = (255*(c-16))/219; + } - uv_table = new signed char[256]; - for ( int i = 0; i <= 255; i++ ) - { - unsigned char c = i; - if ( c <= 16 ) - uv_table[c] = -127; - else if ( c >= 240 ) - uv_table[c] = 127; - else - uv_table[c] = (127*(c-128))/112; - } + uv_table = new signed char[256]; + for ( int i = 0; i <= 255; i++ ) + { + unsigned char c = i; + if ( c <= 16 ) + uv_table[c] = -127; + else if ( c >= 240 ) + uv_table[c] = 127; + else + uv_table[c] = (127*(c-128))/112; + } - r_v_table = new short[255]; - g_v_table = new short[255]; - g_u_table = new short[255]; - b_u_table = new short[255]; - for ( int i = 0; i < 255; i++ ) - { - r_v_table[i] = (1402*(i-128))/1000; - g_u_table[i] = (344*(i-128))/1000; - g_v_table[i] = (714*(i-128))/1000; - b_u_table[i] = (1772*(i-128))/1000; - } + r_v_table = new short[255]; + g_v_table = new short[255]; + g_u_table = new short[255]; + b_u_table = new short[255]; + for ( int i = 0; i < 255; i++ ) + { + r_v_table[i] = (1402*(i-128))/1000; + g_u_table[i] = (344*(i-128))/1000; + g_v_table[i] = (714*(i-128))/1000; + b_u_table[i] = (1772*(i-128))/1000; + } */ - + initialised = true; } /* Requests a writeable buffer to the image. This is safer than buffer() because this way we can guarantee that a buffer of required size exists */ uint8_t* Image::WriteBuffer(const unsigned int p_width, const unsigned int p_height, const unsigned int p_colours, const unsigned int p_subpixelorder) { unsigned int newsize; - + if(p_colours != ZM_COLOUR_GRAY8 && p_colours != ZM_COLOUR_RGB24 && p_colours != ZM_COLOUR_RGB32) { Error("WriteBuffer called with unexpected colours: %d",p_colours); return NULL; } - + if(!p_height || !p_width) { Error("WriteBuffer called with invalid width or height: %d %d",p_width,p_height); return NULL; } - + if(p_width != width || p_height != height || p_colours != colours || p_subpixelorder != subpixelorder) { newsize = (p_width * p_height) * p_colours; - + if(buffer == NULL) { AllocImgBuffer(newsize); } else { @@ -385,7 +383,7 @@ uint8_t* Image::WriteBuffer(const unsigned int p_width, const unsigned int p_hei } } } - + width = p_width; height = p_height; colours = p_colours; @@ -393,9 +391,9 @@ uint8_t* Image::WriteBuffer(const unsigned int p_width, const unsigned int p_hei pixels = height*width; size = newsize; } - + return buffer; - + } /* Assign an existing buffer to the image instead of copying from a source buffer. The goal is to reduce the amount of memory copying and increase efficiency and buffer reusing. */ @@ -416,12 +414,12 @@ void Image::AssignDirect( const unsigned int p_width, const unsigned int p_heigh } unsigned int new_buffer_size = ((p_width*p_height)*p_colours); - + if(buffer_size < new_buffer_size) { Error("Attempt to directly assign buffer from an undersized buffer of size: %zu, needed %dx%d*%d colours = %zu",buffer_size, p_width, p_height, p_colours, new_buffer_size ); return; } - + if(holdbuffer && buffer) { if(new_buffer_size > allocation) { Error("Held buffer is undersized for assigned buffer"); @@ -433,55 +431,55 @@ void Image::AssignDirect( const unsigned int p_width, const unsigned int p_heigh subpixelorder = p_subpixelorder; pixels = height*width; size = new_buffer_size; // was pixels*colours, but we already calculated it above as new_buffer_size - + /* Copy into the held buffer */ if(new_buffer != buffer) (*fptr_imgbufcpy)(buffer, new_buffer, size); - + /* Free the new buffer */ DumpBuffer(new_buffer, p_buffertype); } } else { /* Free an existing buffer if any */ DumpImgBuffer(); - + width = p_width; height = p_height; colours = p_colours; subpixelorder = p_subpixelorder; pixels = height*width; size = new_buffer_size; // was pixels*colours, but we already calculated it above as new_buffer_size - + allocation = buffer_size; buffertype = p_buffertype; buffer = new_buffer; } - + } void Image::Assign(const unsigned int p_width, const unsigned int p_height, const unsigned int p_colours, const unsigned int p_subpixelorder, const uint8_t* new_buffer, const size_t buffer_size) { unsigned int new_size = (p_width * p_height) * p_colours; - + if(new_buffer == NULL) { Error("Attempt to assign buffer from a NULL pointer"); return; } - + if(buffer_size < new_size) { Error("Attempt to assign buffer from an undersized buffer of size: %zu",buffer_size); return; } - + if(!p_height || !p_width) { Error("Attempt to assign buffer with invalid width or height: %d %d",p_width,p_height); return; } - + if(p_colours != ZM_COLOUR_GRAY8 && p_colours != ZM_COLOUR_RGB24 && p_colours != ZM_COLOUR_RGB32) { Error("Attempt to assign buffer with unexpected colours per pixel: %d",p_colours); return; } - + if ( !buffer || p_width != width || p_height != height || p_colours != colours || p_subpixelorder != subpixelorder) { if (holdbuffer && buffer) { @@ -495,7 +493,7 @@ void Image::Assign(const unsigned int p_width, const unsigned int p_height, cons AllocImgBuffer(new_size); } } - + width = p_width; height = p_height; pixels = width*height; @@ -503,25 +501,25 @@ void Image::Assign(const unsigned int p_width, const unsigned int p_height, cons subpixelorder = p_subpixelorder; size = new_size; } - + if(new_buffer != buffer) (*fptr_imgbufcpy)(buffer, new_buffer, size); - + } void Image::Assign( const Image &image ) { unsigned int new_size = (image.width * image.height) * image.colours; - + if(image.buffer == NULL) { Error("Attempt to assign image with an empty buffer"); return; } - + if(image.colours != ZM_COLOUR_GRAY8 && image.colours != ZM_COLOUR_RGB24 && image.colours != ZM_COLOUR_RGB32) { Error("Attempt to assign image with unexpected colours per pixel: %d",image.colours); return; } - + if ( !buffer || image.width != width || image.height != height || image.colours != colours || image.subpixelorder != subpixelorder) { if (holdbuffer && buffer) { @@ -535,7 +533,7 @@ void Image::Assign( const Image &image ) { AllocImgBuffer(new_size); } } - + width = image.width; height = image.height; pixels = width*height; @@ -543,7 +541,7 @@ void Image::Assign( const Image &image ) { subpixelorder = image.subpixelorder; size = new_size; } - + if(image.buffer != buffer) (*fptr_imgbufcpy)(buffer, image.buffer, size); } @@ -554,14 +552,14 @@ Image *Image::HighlightEdges( Rgb colour, unsigned int p_colours, unsigned int p { Panic( "Attempt to highlight image edges when colours = %d", colours ); } - + /* Convert the colour's RGBA subpixel order into the image's subpixel order */ colour = rgb_convert(colour,p_subpixelorder); - + /* Create a new image of the target format */ Image *high_image = new Image( width, height, p_colours, p_subpixelorder ); uint8_t* high_buff = high_image->WriteBuffer(width, height, p_colours, p_subpixelorder); - + /* Set image to all black */ high_image->Clear(); @@ -569,7 +567,7 @@ Image *Image::HighlightEdges( Rgb colour, unsigned int p_colours, unsigned int p unsigned int lo_y = limits?limits->Lo().Y():0; unsigned int hi_x = limits?limits->Hi().X():width-1; unsigned int hi_y = limits?limits->Hi().Y():height-1; - + if ( p_colours == ZM_COLOUR_GRAY8 ) { for ( unsigned int y = lo_y; y <= hi_y; y++ ) @@ -641,7 +639,7 @@ Image *Image::HighlightEdges( Rgb colour, unsigned int p_colours, unsigned int p } } } - + return( high_image ); } @@ -737,7 +735,7 @@ bool Image::ReadJpeg( const char *filename, unsigned int p_colours, unsigned int fclose( infile ); return( false ); } - + /* Check if the image has at least one huffman table defined. If not, use the standard ones */ /* This is required for the MJPEG capture palette of USB devices */ if(cinfo->dc_huff_tbl_ptrs[0] == NULL) { @@ -751,67 +749,67 @@ bool Image::ReadJpeg( const char *filename, unsigned int p_colours, unsigned int { Debug(9,"Image dimensions differ. Old: %ux%u New: %ux%u",width,height,new_width,new_height); } - + switch(p_colours) { case ZM_COLOUR_GRAY8: - { - cinfo->out_color_space = JCS_GRAYSCALE; - new_colours = ZM_COLOUR_GRAY8; - new_subpixelorder = ZM_SUBPIX_ORDER_NONE; - break; - } + { + cinfo->out_color_space = JCS_GRAYSCALE; + new_colours = ZM_COLOUR_GRAY8; + new_subpixelorder = ZM_SUBPIX_ORDER_NONE; + break; + } case ZM_COLOUR_RGB32: - { + { #ifdef JCS_EXTENSIONS - new_colours = ZM_COLOUR_RGB32; - if(p_subpixelorder == ZM_SUBPIX_ORDER_BGRA) { - cinfo->out_color_space = JCS_EXT_BGRX; - new_subpixelorder = ZM_SUBPIX_ORDER_BGRA; - } else if(p_subpixelorder == ZM_SUBPIX_ORDER_ARGB) { - cinfo->out_color_space = JCS_EXT_XRGB; - new_subpixelorder = ZM_SUBPIX_ORDER_ARGB; - } else if(p_subpixelorder == ZM_SUBPIX_ORDER_ABGR) { - cinfo->out_color_space = JCS_EXT_XBGR; - new_subpixelorder = ZM_SUBPIX_ORDER_ABGR; - } else { - /* Assume RGBA */ - cinfo->out_color_space = JCS_EXT_RGBX; - new_subpixelorder = ZM_SUBPIX_ORDER_RGBA; - } - break; + new_colours = ZM_COLOUR_RGB32; + if(p_subpixelorder == ZM_SUBPIX_ORDER_BGRA) { + cinfo->out_color_space = JCS_EXT_BGRX; + new_subpixelorder = ZM_SUBPIX_ORDER_BGRA; + } else if(p_subpixelorder == ZM_SUBPIX_ORDER_ARGB) { + cinfo->out_color_space = JCS_EXT_XRGB; + new_subpixelorder = ZM_SUBPIX_ORDER_ARGB; + } else if(p_subpixelorder == ZM_SUBPIX_ORDER_ABGR) { + cinfo->out_color_space = JCS_EXT_XBGR; + new_subpixelorder = ZM_SUBPIX_ORDER_ABGR; + } else { + /* Assume RGBA */ + cinfo->out_color_space = JCS_EXT_RGBX; + new_subpixelorder = ZM_SUBPIX_ORDER_RGBA; + } + break; #else - Warning("libjpeg-turbo is required for reading a JPEG directly into a RGB32 buffer, reading into a RGB24 buffer instead."); + Warning("libjpeg-turbo is required for reading a JPEG directly into a RGB32 buffer, reading into a RGB24 buffer instead."); #endif - } + } case ZM_COLOUR_RGB24: default: - { - new_colours = ZM_COLOUR_RGB24; - if(p_subpixelorder == ZM_SUBPIX_ORDER_BGR) { + { + new_colours = ZM_COLOUR_RGB24; + if(p_subpixelorder == ZM_SUBPIX_ORDER_BGR) { #ifdef JCS_EXTENSIONS - cinfo->out_color_space = JCS_EXT_BGR; - new_subpixelorder = ZM_SUBPIX_ORDER_BGR; + cinfo->out_color_space = JCS_EXT_BGR; + new_subpixelorder = ZM_SUBPIX_ORDER_BGR; #else - Warning("libjpeg-turbo is required for reading a JPEG directly into a BGR24 buffer, reading into a RGB24 buffer instead."); - cinfo->out_color_space = JCS_RGB; - new_subpixelorder = ZM_SUBPIX_ORDER_RGB; + Warning("libjpeg-turbo is required for reading a JPEG directly into a BGR24 buffer, reading into a RGB24 buffer instead."); + cinfo->out_color_space = JCS_RGB; + new_subpixelorder = ZM_SUBPIX_ORDER_RGB; #endif - } else { - /* Assume RGB */ -/* + } else { + /* Assume RGB */ + /* #ifdef JCS_EXTENSIONS - cinfo->out_color_space = JCS_EXT_RGB; +cinfo->out_color_space = JCS_EXT_RGB; #else - cinfo->out_color_space = JCS_RGB; +cinfo->out_color_space = JCS_RGB; #endif -*/ - cinfo->out_color_space = JCS_RGB; - new_subpixelorder = ZM_SUBPIX_ORDER_RGB; - } - break; - } + */ + cinfo->out_color_space = JCS_RGB; + new_subpixelorder = ZM_SUBPIX_ORDER_RGB; + } + break; + } } - + if(WriteBuffer(new_width, new_height, new_colours, new_subpixelorder) == NULL) { Error("Failed requesting writeable buffer for reading JPEG image."); jpeg_abort_decompress( cinfo ); @@ -883,97 +881,97 @@ bool Image::WriteJpeg( const char *filename, int quality_override, struct timeva cinfo->image_width = width; /* image width and height, in pixels */ cinfo->image_height = height; - + switch(colours) { case ZM_COLOUR_GRAY8: - { - cinfo->input_components = 1; - cinfo->in_color_space = JCS_GRAYSCALE; - break; - } + { + cinfo->input_components = 1; + cinfo->in_color_space = JCS_GRAYSCALE; + break; + } case ZM_COLOUR_RGB32: - { + { #ifdef JCS_EXTENSIONS - cinfo->input_components = 4; - if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) { - cinfo->in_color_space = JCS_EXT_BGRX; - } else if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) { - cinfo->in_color_space = JCS_EXT_XRGB; - } else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) { - cinfo->in_color_space = JCS_EXT_XBGR; - } else { - /* Assume RGBA */ - cinfo->in_color_space = JCS_EXT_RGBX; - } + cinfo->input_components = 4; + if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) { + cinfo->in_color_space = JCS_EXT_BGRX; + } else if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) { + cinfo->in_color_space = JCS_EXT_XRGB; + } else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) { + cinfo->in_color_space = JCS_EXT_XBGR; + } else { + /* Assume RGBA */ + cinfo->in_color_space = JCS_EXT_RGBX; + } #else - Error("libjpeg-turbo is required for JPEG encoding directly from RGB32 source"); - jpeg_abort_compress( cinfo ); - fclose(outfile); - return(false); + Error("libjpeg-turbo is required for JPEG encoding directly from RGB32 source"); + jpeg_abort_compress( cinfo ); + fclose(outfile); + return(false); #endif - break; - } + break; + } case ZM_COLOUR_RGB24: default: - { - cinfo->input_components = 3; - if(subpixelorder == ZM_SUBPIX_ORDER_BGR) { + { + cinfo->input_components = 3; + if(subpixelorder == ZM_SUBPIX_ORDER_BGR) { #ifdef JCS_EXTENSIONS - cinfo->in_color_space = JCS_EXT_BGR; + cinfo->in_color_space = JCS_EXT_BGR; #else - Error("libjpeg-turbo is required for JPEG encoding directly from BGR24 source"); - jpeg_abort_compress( cinfo ); - fclose(outfile); - return(false); + Error("libjpeg-turbo is required for JPEG encoding directly from BGR24 source"); + jpeg_abort_compress( cinfo ); + fclose(outfile); + return(false); #endif - } else { - /* Assume RGB */ -/* + } else { + /* Assume RGB */ + /* #ifdef JCS_EXTENSIONS - cinfo->out_color_space = JCS_EXT_RGB; +cinfo->out_color_space = JCS_EXT_RGB; #else - cinfo->out_color_space = JCS_RGB; +cinfo->out_color_space = JCS_RGB; #endif -*/ - cinfo->in_color_space = JCS_RGB; - } - break; - } + */ + cinfo->in_color_space = JCS_RGB; + } + break; + } } - + jpeg_set_defaults( cinfo ); jpeg_set_quality( cinfo, quality, FALSE ); cinfo->dct_method = JDCT_FASTEST; jpeg_start_compress( cinfo, TRUE ); - if ( config.add_jpeg_comments && text[0] ) - { + if ( config.add_jpeg_comments && text[0] ) { jpeg_write_marker( cinfo, JPEG_COM, (const JOCTET *)text, strlen(text) ); } - // If we have a non-zero time (meaning a parameter was passed in), then form a simple exif segment with that time as DateTimeOriginal and SubsecTimeOriginal - // No timestamp just leave off the exif section. - if(timestamp.tv_sec) - { - #define EXIFTIMES_MS_OFFSET 0x36 // three decimal digits for milliseconds - #define EXIFTIMES_MS_LEN 0x03 - #define EXIFTIMES_OFFSET 0x3E // 19 characters format '2015:07:21 13:14:45' not including quotes - #define EXIFTIMES_LEN 0x13 // = 19 - #define EXIF_CODE 0xE1 + // If we have a non-zero time (meaning a parameter was passed in), then form a simple exif segment with that time as DateTimeOriginal and SubsecTimeOriginal + // No timestamp just leave off the exif section. + if(timestamp.tv_sec) + { +#define EXIFTIMES_MS_OFFSET 0x36 // three decimal digits for milliseconds +#define EXIFTIMES_MS_LEN 0x03 +#define EXIFTIMES_OFFSET 0x3E // 19 characters format '2015:07:21 13:14:45' not including quotes +#define EXIFTIMES_LEN 0x13 // = 19 +#define EXIF_CODE 0xE1 - char timebuf[64], msbuf[64]; - strftime(timebuf, sizeof timebuf, "%Y:%m:%d %H:%M:%S", localtime(&(timestamp.tv_sec))); - snprintf(msbuf, sizeof msbuf, "%06d",(int)(timestamp.tv_usec)); // we only use milliseconds because that's all defined in exif, but this is the whole microseconds because we have it - unsigned char exiftimes[82] = { - 0x45, 0x78, 0x69, 0x66, 0x00, 0x00, 0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, - 0x69, 0x87, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x03, 0x90, 0x02, 0x00, 0x14, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x91, 0x92, - 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0x00 }; - memcpy(&exiftimes[EXIFTIMES_OFFSET], timebuf,EXIFTIMES_LEN); - memcpy(&exiftimes[EXIFTIMES_MS_OFFSET], msbuf ,EXIFTIMES_MS_LEN); - jpeg_write_marker (cinfo, EXIF_CODE, (const JOCTET *)exiftimes, sizeof(exiftimes) ); - } + // This is a lot of stuff to allocate on the stack. Recommend char *timebuf[64]; + char timebuf[64], msbuf[64]; + strftime(timebuf, sizeof timebuf, "%Y:%m:%d %H:%M:%S", localtime(&(timestamp.tv_sec))); + snprintf(msbuf, sizeof msbuf, "%06d",(int)(timestamp.tv_usec)); // we only use milliseconds because that's all defined in exif, but this is the whole microseconds because we have it + unsigned char exiftimes[82] = { + 0x45, 0x78, 0x69, 0x66, 0x00, 0x00, 0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x69, 0x87, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x03, 0x90, 0x02, 0x00, 0x14, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x91, 0x92, + 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x00 }; + memcpy(&exiftimes[EXIFTIMES_OFFSET], timebuf,EXIFTIMES_LEN); + memcpy(&exiftimes[EXIFTIMES_MS_OFFSET], msbuf, EXIFTIMES_MS_LEN); + jpeg_write_marker( cinfo, EXIF_CODE, (const JOCTET *)exiftimes, sizeof(exiftimes) ); + } JSAMPROW row_pointer; /* pointer to a single row */ int row_stride = cinfo->image_width * colours; /* physical row width in buffer */ @@ -1020,7 +1018,7 @@ bool Image::DecodeJpeg( const JOCTET *inbuffer, int inbuffer_size, unsigned int jpeg_abort_decompress( cinfo ); return( false ); } - + /* Check if the image has at least one huffman table defined. If not, use the standard ones */ /* This is required for the MJPEG capture palette of USB devices */ if(cinfo->dc_huff_tbl_ptrs[0] == NULL) { @@ -1034,67 +1032,67 @@ bool Image::DecodeJpeg( const JOCTET *inbuffer, int inbuffer_size, unsigned int { Debug(9,"Image dimensions differ. Old: %ux%u New: %ux%u",width,height,new_width,new_height); } - + switch(p_colours) { case ZM_COLOUR_GRAY8: - { - cinfo->out_color_space = JCS_GRAYSCALE; - new_colours = ZM_COLOUR_GRAY8; - new_subpixelorder = ZM_SUBPIX_ORDER_NONE; - break; - } + { + cinfo->out_color_space = JCS_GRAYSCALE; + new_colours = ZM_COLOUR_GRAY8; + new_subpixelorder = ZM_SUBPIX_ORDER_NONE; + break; + } case ZM_COLOUR_RGB32: - { + { #ifdef JCS_EXTENSIONS - new_colours = ZM_COLOUR_RGB32; - if(p_subpixelorder == ZM_SUBPIX_ORDER_BGRA) { - cinfo->out_color_space = JCS_EXT_BGRX; - new_subpixelorder = ZM_SUBPIX_ORDER_BGRA; - } else if(p_subpixelorder == ZM_SUBPIX_ORDER_ARGB) { - cinfo->out_color_space = JCS_EXT_XRGB; - new_subpixelorder = ZM_SUBPIX_ORDER_ARGB; - } else if(p_subpixelorder == ZM_SUBPIX_ORDER_ABGR) { - cinfo->out_color_space = JCS_EXT_XBGR; - new_subpixelorder = ZM_SUBPIX_ORDER_ABGR; - } else { - /* Assume RGBA */ - cinfo->out_color_space = JCS_EXT_RGBX; - new_subpixelorder = ZM_SUBPIX_ORDER_RGBA; - } - break; + new_colours = ZM_COLOUR_RGB32; + if(p_subpixelorder == ZM_SUBPIX_ORDER_BGRA) { + cinfo->out_color_space = JCS_EXT_BGRX; + new_subpixelorder = ZM_SUBPIX_ORDER_BGRA; + } else if(p_subpixelorder == ZM_SUBPIX_ORDER_ARGB) { + cinfo->out_color_space = JCS_EXT_XRGB; + new_subpixelorder = ZM_SUBPIX_ORDER_ARGB; + } else if(p_subpixelorder == ZM_SUBPIX_ORDER_ABGR) { + cinfo->out_color_space = JCS_EXT_XBGR; + new_subpixelorder = ZM_SUBPIX_ORDER_ABGR; + } else { + /* Assume RGBA */ + cinfo->out_color_space = JCS_EXT_RGBX; + new_subpixelorder = ZM_SUBPIX_ORDER_RGBA; + } + break; #else - Warning("libjpeg-turbo is required for reading a JPEG directly into a RGB32 buffer, reading into a RGB24 buffer instead."); + Warning("libjpeg-turbo is required for reading a JPEG directly into a RGB32 buffer, reading into a RGB24 buffer instead."); #endif - } + } case ZM_COLOUR_RGB24: default: - { - new_colours = ZM_COLOUR_RGB24; - if(p_subpixelorder == ZM_SUBPIX_ORDER_BGR) { + { + new_colours = ZM_COLOUR_RGB24; + if(p_subpixelorder == ZM_SUBPIX_ORDER_BGR) { #ifdef JCS_EXTENSIONS - cinfo->out_color_space = JCS_EXT_BGR; - new_subpixelorder = ZM_SUBPIX_ORDER_BGR; + cinfo->out_color_space = JCS_EXT_BGR; + new_subpixelorder = ZM_SUBPIX_ORDER_BGR; #else - Warning("libjpeg-turbo is required for reading a JPEG directly into a BGR24 buffer, reading into a RGB24 buffer instead."); - cinfo->out_color_space = JCS_RGB; - new_subpixelorder = ZM_SUBPIX_ORDER_RGB; + Warning("libjpeg-turbo is required for reading a JPEG directly into a BGR24 buffer, reading into a RGB24 buffer instead."); + cinfo->out_color_space = JCS_RGB; + new_subpixelorder = ZM_SUBPIX_ORDER_RGB; #endif - } else { - /* Assume RGB */ -/* + } else { + /* Assume RGB */ + /* #ifdef JCS_EXTENSIONS - cinfo->out_color_space = JCS_EXT_RGB; +cinfo->out_color_space = JCS_EXT_RGB; #else - cinfo->out_color_space = JCS_RGB; +cinfo->out_color_space = JCS_RGB; #endif -*/ - cinfo->out_color_space = JCS_RGB; - new_subpixelorder = ZM_SUBPIX_ORDER_RGB; - } - break; - } + */ + cinfo->out_color_space = JCS_RGB; + new_subpixelorder = ZM_SUBPIX_ORDER_RGB; + } + break; + } } - + if(WriteBuffer(new_width, new_height, new_colours, new_subpixelorder) == NULL) { Error("Failed requesting writeable buffer for reading JPEG image."); jpeg_abort_decompress( cinfo ); @@ -1145,59 +1143,59 @@ bool Image::EncodeJpeg( JOCTET *outbuffer, int *outbuffer_size, int quality_over switch(colours) { case ZM_COLOUR_GRAY8: - { - cinfo->input_components = 1; - cinfo->in_color_space = JCS_GRAYSCALE; - break; - } + { + cinfo->input_components = 1; + cinfo->in_color_space = JCS_GRAYSCALE; + break; + } case ZM_COLOUR_RGB32: - { + { #ifdef JCS_EXTENSIONS - cinfo->input_components = 4; - if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) { - cinfo->in_color_space = JCS_EXT_BGRX; - } else if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) { - cinfo->in_color_space = JCS_EXT_XRGB; - } else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) { - cinfo->in_color_space = JCS_EXT_XBGR; - } else { - /* Assume RGBA */ - cinfo->in_color_space = JCS_EXT_RGBX; - } + cinfo->input_components = 4; + if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) { + cinfo->in_color_space = JCS_EXT_BGRX; + } else if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) { + cinfo->in_color_space = JCS_EXT_XRGB; + } else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) { + cinfo->in_color_space = JCS_EXT_XBGR; + } else { + /* Assume RGBA */ + cinfo->in_color_space = JCS_EXT_RGBX; + } #else - Error("libjpeg-turbo is required for JPEG encoding directly from RGB32 source"); - jpeg_abort_compress( cinfo ); - return(false); + Error("libjpeg-turbo is required for JPEG encoding directly from RGB32 source"); + jpeg_abort_compress( cinfo ); + return(false); #endif - break; - } + break; + } case ZM_COLOUR_RGB24: default: - { - cinfo->input_components = 3; - if(subpixelorder == ZM_SUBPIX_ORDER_BGR) { + { + cinfo->input_components = 3; + if(subpixelorder == ZM_SUBPIX_ORDER_BGR) { #ifdef JCS_EXTENSIONS - cinfo->in_color_space = JCS_EXT_BGR; + cinfo->in_color_space = JCS_EXT_BGR; #else - Error("libjpeg-turbo is required for JPEG encoding directly from BGR24 source"); - jpeg_abort_compress( cinfo ); - return(false); + Error("libjpeg-turbo is required for JPEG encoding directly from BGR24 source"); + jpeg_abort_compress( cinfo ); + return(false); #endif - } else { - /* Assume RGB */ -/* + } else { + /* Assume RGB */ + /* #ifdef JCS_EXTENSIONS - cinfo->out_color_space = JCS_EXT_RGB; +cinfo->out_color_space = JCS_EXT_RGB; #else - cinfo->out_color_space = JCS_RGB; +cinfo->out_color_space = JCS_RGB; #endif -*/ - cinfo->in_color_space = JCS_RGB; - } - break; - } + */ + cinfo->in_color_space = JCS_RGB; + } + break; + } } - + jpeg_set_defaults( cinfo ); jpeg_set_quality( cinfo, quality, FALSE ); cinfo->dct_method = JDCT_FASTEST; @@ -1270,7 +1268,7 @@ bool Image::Crop( unsigned int lo_x, unsigned int lo_y, unsigned int hi_x, unsig unsigned int new_size = new_width*new_height*colours; uint8_t *new_buffer = AllocBuffer(new_size); - + unsigned int new_stride = new_width*colours; for ( unsigned int y = lo_y, ny = 0; y <= hi_y; y++, ny++ ) { @@ -1297,17 +1295,17 @@ void Image::Overlay( const Image &image ) { Panic( "Attempt to overlay different sized images, expected %dx%d, got %dx%d", width, height, image.width, image.height ); } - + if( colours == image.colours && subpixelorder != image.subpixelorder ) { Warning("Attempt to overlay images of same format but with different subpixel order."); } - + /* Grayscale ontop of grayscale - complete */ if ( colours == ZM_COLOUR_GRAY8 && image.colours == ZM_COLOUR_GRAY8 ) { const uint8_t* const max_ptr = buffer+size; const uint8_t* psrc = image.buffer; uint8_t* pdest = buffer; - + while( pdest < max_ptr ) { if ( *psrc ) @@ -1317,15 +1315,15 @@ void Image::Overlay( const Image &image ) pdest++; psrc++; } - - /* RGB24 ontop of grayscale - convert to same format first - complete */ + + /* RGB24 ontop of grayscale - convert to same format first - complete */ } else if ( colours == ZM_COLOUR_GRAY8 && image.colours == ZM_COLOUR_RGB24 ) { Colourise(image.colours, image.subpixelorder); - + const uint8_t* const max_ptr = buffer+size; const uint8_t* psrc = image.buffer; uint8_t* pdest = buffer; - + while( pdest < max_ptr ) { if ( RED_PTR_RGBA(psrc) || GREEN_PTR_RGBA(psrc) || BLUE_PTR_RGBA(psrc) ) @@ -1337,15 +1335,15 @@ void Image::Overlay( const Image &image ) pdest += 3; psrc += 3; } - - /* RGB32 ontop of grayscale - convert to same format first - complete */ + + /* RGB32 ontop of grayscale - convert to same format first - complete */ } else if( colours == ZM_COLOUR_GRAY8 && image.colours == ZM_COLOUR_RGB32 ) { Colourise(image.colours, image.subpixelorder); - + const Rgb* const max_ptr = (Rgb*)(buffer+size); const Rgb* prsrc = (Rgb*)image.buffer; Rgb* prdest = (Rgb*)buffer; - + if(subpixelorder == ZM_SUBPIX_ORDER_RGBA || subpixelorder == ZM_SUBPIX_ORDER_BGRA) { /* RGB\BGR\RGBA\BGRA subpixel order - Alpha byte is last */ while (prdest < max_ptr) { @@ -1367,13 +1365,13 @@ void Image::Overlay( const Image &image ) prsrc++; } } - - /* Grayscale ontop of RGB24 - complete */ + + /* Grayscale ontop of RGB24 - complete */ } else if ( colours == ZM_COLOUR_RGB24 && image.colours == ZM_COLOUR_GRAY8 ) { const uint8_t* const max_ptr = buffer+size; const uint8_t* psrc = image.buffer; uint8_t* pdest = buffer; - + while( pdest < max_ptr ) { if ( *psrc ) @@ -1383,13 +1381,13 @@ void Image::Overlay( const Image &image ) pdest += 3; psrc++; } - - /* RGB24 ontop of RGB24 - not complete. need to take care of different subpixel orders */ + + /* RGB24 ontop of RGB24 - not complete. need to take care of different subpixel orders */ } else if ( colours == ZM_COLOUR_RGB24 && image.colours == ZM_COLOUR_RGB24 ) { const uint8_t* const max_ptr = buffer+size; const uint8_t* psrc = image.buffer; uint8_t* pdest = buffer; - + while( pdest < max_ptr ) { if ( RED_PTR_RGBA(psrc) || GREEN_PTR_RGBA(psrc) || BLUE_PTR_RGBA(psrc) ) @@ -1401,17 +1399,17 @@ void Image::Overlay( const Image &image ) pdest += 3; psrc += 3; } - - /* RGB32 ontop of RGB24 - TO BE DONE */ + + /* RGB32 ontop of RGB24 - TO BE DONE */ } else if ( colours == ZM_COLOUR_RGB24 && image.colours == ZM_COLOUR_RGB32 ) { Error("Overlay of RGB32 ontop of RGB24 is not supported."); - - /* Grayscale ontop of RGB32 - complete */ + + /* Grayscale ontop of RGB32 - complete */ } else if ( colours == ZM_COLOUR_RGB32 && image.colours == ZM_COLOUR_GRAY8 ) { const Rgb* const max_ptr = (Rgb*)(buffer+size); Rgb* prdest = (Rgb*)buffer; const uint8_t* psrc = image.buffer; - + if(subpixelorder == ZM_SUBPIX_ORDER_RGBA || subpixelorder == ZM_SUBPIX_ORDER_BGRA) { /* RGBA\BGRA subpixel order - Alpha byte is last */ while (prdest < max_ptr) { @@ -1433,17 +1431,17 @@ void Image::Overlay( const Image &image ) psrc++; } } - - /* RGB24 ontop of RGB32 - TO BE DONE */ + + /* RGB24 ontop of RGB32 - TO BE DONE */ } else if ( colours == ZM_COLOUR_RGB32 && image.colours == ZM_COLOUR_RGB24 ) { Error("Overlay of RGB24 ontop of RGB32 is not supported."); - - /* RGB32 ontop of RGB32 - not complete. need to take care of different subpixel orders */ + + /* RGB32 ontop of RGB32 - not complete. need to take care of different subpixel orders */ } else if ( colours == ZM_COLOUR_RGB32 && image.colours == ZM_COLOUR_RGB32 ) { const Rgb* const max_ptr = (Rgb*)(buffer+size); Rgb* prdest = (Rgb*)buffer; const Rgb* prsrc = (Rgb*)image.buffer; - + if(image.subpixelorder == ZM_SUBPIX_ORDER_RGBA || image.subpixelorder == ZM_SUBPIX_ORDER_BGRA) { /* RGB\BGR\RGBA\BGRA subpixel order - Alpha byte is last */ while (prdest < max_ptr) { @@ -1466,7 +1464,7 @@ void Image::Overlay( const Image &image ) } } } - + } /* RGB32 compatible: complete */ @@ -1531,7 +1529,7 @@ void Image::Overlay( const Image &image, unsigned int x, unsigned int y ) } else { Error("Overlay called with unexpected colours: %d", colours); } - + } void Image::Blend( const Image &image, int transparency ) @@ -1542,33 +1540,33 @@ void Image::Blend( const Image &image, int transparency ) unsigned long milpixels; #endif uint8_t* new_buffer; - + if ( !(width == image.width && height == image.height && colours == image.colours && subpixelorder == image.subpixelorder) ) { Panic( "Attempt to blend different sized images, expected %dx%dx%d %d, got %dx%dx%d %d", width, height, colours, subpixelorder, image.width, image.height, image.colours, image.subpixelorder ); } - + if(transparency <= 0) return; - + new_buffer = AllocBuffer(size); - + #ifdef ZM_IMAGE_PROFILING clock_gettime(CLOCK_THREAD_CPUTIME_ID,&start); #endif - + /* Do the blending */ (*fptr_blend)(buffer, image.buffer, new_buffer, size, transparency); - + #ifdef ZM_IMAGE_PROFILING clock_gettime(CLOCK_THREAD_CPUTIME_ID,&end); timespec_diff(&start,&end,&diff); - + executetime = (1000000000ull * diff.tv_sec) + diff.tv_nsec; milpixels = (unsigned long)((long double)size)/((((long double)executetime)/1000)); Debug(5, "Blend: %u colours blended in %llu nanoseconds, %lu million colours/s\n",size,executetime,milpixels); #endif - + AssignDirect( width, height, colours, subpixelorder, new_buffer, size, ZM_BUFTYPE_ZM); } @@ -1671,11 +1669,11 @@ Image *Image::Highlight( unsigned int n_images, Image *images[], const Rgb thres #ifndef SOLARIS if ( (unsigned)abs((*psrc)-RGB_VAL(ref_colour,c)) >= RGB_VAL(threshold,c) ) #else - if ( (unsigned)std::abs((*psrc)-RGB_VAL(ref_colour,c)) >= RGB_VAL(threshold,c) ) + if ( (unsigned)std::abs((*psrc)-RGB_VAL(ref_colour,c)) >= RGB_VAL(threshold,c) ) #endif - { - count++; - } + { + count++; + } psrc += colours; } *pdest = (count*255)/n_images; @@ -1693,63 +1691,63 @@ void Image::Delta( const Image &image, Image* targetimage) const unsigned long long executetime; unsigned long milpixels; #endif - + if ( !(width == image.width && height == image.height && colours == image.colours && subpixelorder == image.subpixelorder) ) { Panic( "Attempt to get delta of different sized images, expected %dx%dx%d %d, got %dx%dx%d %d", width, height, colours, subpixelorder, image.width, image.height, image.colours, image.subpixelorder); } - + uint8_t *pdiff = targetimage->WriteBuffer(width, height, ZM_COLOUR_GRAY8, ZM_SUBPIX_ORDER_NONE); - + if(pdiff == NULL) { Panic("Failed requesting writeable buffer for storing the delta image"); } - + #ifdef ZM_IMAGE_PROFILING clock_gettime(CLOCK_THREAD_CPUTIME_ID,&start); #endif - + switch(colours) { case ZM_COLOUR_RGB24: - { - if(subpixelorder == ZM_SUBPIX_ORDER_BGR) { - /* BGR subpixel order */ - (*fptr_delta8_bgr)(buffer, image.buffer, pdiff, pixels); - } else { - /* Assume RGB subpixel order */ - (*fptr_delta8_rgb)(buffer, image.buffer, pdiff, pixels); - } - break; - } + { + if(subpixelorder == ZM_SUBPIX_ORDER_BGR) { + /* BGR subpixel order */ + (*fptr_delta8_bgr)(buffer, image.buffer, pdiff, pixels); + } else { + /* Assume RGB subpixel order */ + (*fptr_delta8_rgb)(buffer, image.buffer, pdiff, pixels); + } + break; + } case ZM_COLOUR_RGB32: - { - if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) { - /* ARGB subpixel order */ - (*fptr_delta8_argb)(buffer, image.buffer, pdiff, pixels); - } else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) { - /* ABGR subpixel order */ - (*fptr_delta8_abgr)(buffer, image.buffer, pdiff, pixels); - } else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) { - /* BGRA subpixel order */ - (*fptr_delta8_bgra)(buffer, image.buffer, pdiff, pixels); - } else { - /* Assume RGBA subpixel order */ - (*fptr_delta8_rgba)(buffer, image.buffer, pdiff, pixels); - } - break; - } + { + if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) { + /* ARGB subpixel order */ + (*fptr_delta8_argb)(buffer, image.buffer, pdiff, pixels); + } else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) { + /* ABGR subpixel order */ + (*fptr_delta8_abgr)(buffer, image.buffer, pdiff, pixels); + } else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) { + /* BGRA subpixel order */ + (*fptr_delta8_bgra)(buffer, image.buffer, pdiff, pixels); + } else { + /* Assume RGBA subpixel order */ + (*fptr_delta8_rgba)(buffer, image.buffer, pdiff, pixels); + } + break; + } case ZM_COLOUR_GRAY8: - (*fptr_delta8_gray8)(buffer, image.buffer, pdiff, pixels); - break; + (*fptr_delta8_gray8)(buffer, image.buffer, pdiff, pixels); + break; default: - Panic("Delta called with unexpected colours: %d",colours); - break; + Panic("Delta called with unexpected colours: %d",colours); + break; } - + #ifdef ZM_IMAGE_PROFILING clock_gettime(CLOCK_THREAD_CPUTIME_ID,&end); timespec_diff(&start,&end,&diff); - + executetime = (1000000000ull * diff.tv_sec) + diff.tv_nsec; milpixels = (unsigned long)((long double)pixels)/((((long double)executetime)/1000)); Debug(5, "Delta: %u delta pixels generated in %llu nanoseconds, %lu million pixels/s\n",pixels,executetime,milpixels); @@ -1820,7 +1818,7 @@ void Image::MaskPrivacy( const unsigned char *p_bitmask, const Rgb pixel_colour } } else if ( colours == ZM_COLOUR_RGB32 ) - { + { for ( unsigned int x = 0; x < width; x++, ptr += colours ) { Rgb *temp_ptr = (Rgb*)ptr; @@ -1828,10 +1826,10 @@ void Image::MaskPrivacy( const unsigned char *p_bitmask, const Rgb pixel_colour *temp_ptr = pixel_rgb_col; i++; } - } else { - Panic("MaskPrivacy called with unexpected colours: %d", colours); - return; - } + } else { + Panic("MaskPrivacy called with unexpected colours: %d", colours); + return; + } } } @@ -1853,7 +1851,7 @@ void Image::Annotate( const char *p_text, const Coord &coord, const unsigned int const uint8_t fg_bw_col = fg_colour & 0xff; const Rgb fg_rgb_col = rgb_convert(fg_colour,subpixelorder); const bool fg_trans = (fg_colour == RGB_TRANSPARENT); - + const uint8_t bg_r_col = RED_VAL_RGBA(bg_colour); const uint8_t bg_g_col = GREEN_VAL_RGBA(bg_colour); const uint8_t bg_b_col = BLUE_VAL_RGBA(bg_colour); @@ -1961,7 +1959,7 @@ void Image::Annotate( const char *p_text, const Coord &coord, const unsigned int } } else if ( colours == ZM_COLOUR_RGB32 ) - { + { unsigned int wc = width * colours; uint8_t *ptr = &buffer[((lo_line_y*width)+lo_line_x)<<2]; @@ -1981,22 +1979,22 @@ void Image::Annotate( const char *p_text, const Coord &coord, const unsigned int { if ( !fg_trans ) { - *temp_ptr = fg_rgb_col; + *temp_ptr = fg_rgb_col; } } else if ( !bg_trans ) { - *temp_ptr = bg_rgb_col; + *temp_ptr = bg_rgb_col; } } } } - - } else { - Panic("Annotate called with unexpected colours: %d",colours); - return; - } - + + } else { + Panic("Annotate called with unexpected colours: %d",colours); + return; + } + index += line_len; while ( text[index] == '\n' ) { @@ -2007,18 +2005,14 @@ void Image::Annotate( const char *p_text, const Coord &coord, const unsigned int } } -void Image::Timestamp( const char *label, const time_t when, const Coord &coord, const int size ) -{ +void Image::Timestamp( const char *label, const time_t when, const Coord &coord, const int size ) { char time_text[64]; strftime( time_text, sizeof(time_text), "%y/%m/%d %H:%M:%S", localtime( &when ) ); char text[64]; - if ( label ) - { + if ( label ) { snprintf( text, sizeof(text), "%s - %s", label, time_text ); Annotate( text, coord, size ); - } - else - { + } else { Annotate( time_text, coord, size ); } } @@ -2027,21 +2021,21 @@ void Image::Timestamp( const char *label, const time_t when, const Coord &coord, void Image::Colourise(const unsigned int p_reqcolours, const unsigned int p_reqsubpixelorder) { Debug(9, "Colourise: Req colours: %u Req subpixel order: %u Current colours: %u Current subpixel order: %u",p_reqcolours,p_reqsubpixelorder,colours,subpixelorder); - + if ( colours != ZM_COLOUR_GRAY8) { Warning("Target image is already colourised, colours: %u",colours); return; } - + if ( p_reqcolours == ZM_COLOUR_RGB32 ) { /* RGB32 */ Rgb* new_buffer = (Rgb*)AllocBuffer(pixels*sizeof(Rgb)); - + const uint8_t *psrc = buffer; Rgb* pdest = new_buffer; Rgb subpixel; Rgb newpixel; - + if ( p_reqsubpixelorder == ZM_SUBPIX_ORDER_ABGR || p_reqsubpixelorder == ZM_SUBPIX_ORDER_ARGB) { /* ARGB\ABGR subpixel order. alpha byte is first (mem+0), so we need to shift the pixel left in the end */ for(unsigned int i=0;i= 35) { /* Use SSSE3 functions */ switch(subpixelorder) { case ZM_SUBPIX_ORDER_BGRA: - ssse3_convert_bgra_gray8(buffer,buffer,pixels); - break; + ssse3_convert_bgra_gray8(buffer,buffer,pixels); + break; case ZM_SUBPIX_ORDER_ARGB: - ssse3_convert_argb_gray8(buffer,buffer,pixels); - break; + ssse3_convert_argb_gray8(buffer,buffer,pixels); + break; case ZM_SUBPIX_ORDER_ABGR: - ssse3_convert_abgr_gray8(buffer,buffer,pixels); - break; + ssse3_convert_abgr_gray8(buffer,buffer,pixels); + break; case ZM_SUBPIX_ORDER_RGBA: default: - ssse3_convert_rgba_gray8(buffer,buffer,pixels); - break; + ssse3_convert_rgba_gray8(buffer,buffer,pixels); + break; } } else { /* Use standard functions */ @@ -2114,29 +2108,29 @@ void Image::DeColourise() { switch(subpixelorder) { case ZM_SUBPIX_ORDER_BGRA: - std_convert_bgra_gray8(buffer,buffer,pixels); - break; + std_convert_bgra_gray8(buffer,buffer,pixels); + break; case ZM_SUBPIX_ORDER_ARGB: - std_convert_argb_gray8(buffer,buffer,pixels); - break; + std_convert_argb_gray8(buffer,buffer,pixels); + break; case ZM_SUBPIX_ORDER_ABGR: - std_convert_abgr_gray8(buffer,buffer,pixels); - break; + std_convert_abgr_gray8(buffer,buffer,pixels); + break; case ZM_SUBPIX_ORDER_RGBA: default: - std_convert_rgba_gray8(buffer,buffer,pixels); - break; + std_convert_rgba_gray8(buffer,buffer,pixels); + break; } } else { /* Assume RGB24 */ switch(subpixelorder) { case ZM_SUBPIX_ORDER_BGR: - std_convert_bgr_gray8(buffer,buffer,pixels); - break; + std_convert_bgr_gray8(buffer,buffer,pixels); + break; case ZM_SUBPIX_ORDER_RGB: default: - std_convert_rgb_gray8(buffer,buffer,pixels); - break; + std_convert_rgb_gray8(buffer,buffer,pixels); + break; } } } @@ -2149,10 +2143,10 @@ void Image::Fill( Rgb colour, const Box *limits ) { Panic( "Attempt to fill image with unexpected colours %d", colours ); } - + /* Convert the colour's RGBA subpixel order into the image's subpixel order */ colour = rgb_convert(colour,subpixelorder); - + unsigned int lo_x = limits?limits->Lo().X():0; unsigned int lo_y = limits?limits->Lo().Y():0; unsigned int hi_x = limits?limits->Hi().X():width-1; @@ -2186,7 +2180,7 @@ void Image::Fill( Rgb colour, const Box *limits ) for ( unsigned int y = lo_y; y <= (unsigned int)hi_y; y++ ) { Rgb *p = (Rgb*)&buffer[((y*width)+lo_x)<<2]; - + for ( unsigned int x = lo_x; x <= (unsigned int)hi_x; x++, p++) { /* Fast, copies the entire pixel in a single pass */ @@ -2202,12 +2196,12 @@ void Image::Fill( Rgb colour, int density, const Box *limits ) /* Allow the faster version to be used if density is not used (density=1) */ if(density <= 1) return Fill(colour,limits); - + if ( !(colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB24 || colours == ZM_COLOUR_RGB32 ) ) { Panic( "Attempt to fill image with unexpected colours %d", colours ); } - + /* Convert the colour's RGBA subpixel order into the image's subpixel order */ colour = rgb_convert(colour,subpixelorder); @@ -2256,7 +2250,7 @@ void Image::Fill( Rgb colour, int density, const Box *limits ) } } } - + } /* RGB32 compatible: complete */ @@ -2266,10 +2260,10 @@ void Image::Outline( Rgb colour, const Polygon &polygon ) { Panic( "Attempt to outline image with unexpected colours %d", colours ); } - + /* Convert the colour's RGBA subpixel order into the image's subpixel order */ colour = rgb_convert(colour,subpixelorder); - + int n_coords = polygon.getNumCoords(); for ( int j = 0, i = n_coords-1; j < n_coords; i = j++ ) { @@ -2363,7 +2357,7 @@ void Image::Outline( Rgb colour, const Polygon &polygon ) *(Rgb*)(buffer+(((int(round(y))*width)+x)<<2)) = colour; } } - + } } } @@ -2375,7 +2369,7 @@ void Image::Fill( Rgb colour, int density, const Polygon &polygon ) { Panic( "Attempt to fill image with unexpected colours %d", colours ); } - + /* Convert the colour's RGBA subpixel order into the image's subpixel order */ colour = rgb_convert(colour,subpixelorder); @@ -2521,7 +2515,7 @@ void Image::Fill( Rgb colour, const Polygon &polygon ) /* RGB32 compatible: complete */ void Image::Rotate( int angle ) { - + angle %= 360; if ( !angle ) @@ -2532,7 +2526,7 @@ void Image::Rotate( int angle ) { return; } - + unsigned int new_height = height; unsigned int new_width = width; uint8_t* rotate_buffer = AllocBuffer(size); @@ -2540,157 +2534,157 @@ void Image::Rotate( int angle ) switch( angle ) { case 90 : - { - new_height = width; - new_width = height; + { + new_height = width; + new_width = height; - unsigned int line_bytes = new_width*colours; - unsigned char *s_ptr = buffer; + unsigned int line_bytes = new_width*colours; + unsigned char *s_ptr = buffer; - if ( colours == ZM_COLOUR_GRAY8 ) - { - unsigned char *d_ptr; - for ( unsigned int i = new_width; i > 0; i-- ) + if ( colours == ZM_COLOUR_GRAY8 ) { - d_ptr = rotate_buffer+(i-1); - for ( unsigned int j = new_height; j > 0; j-- ) + unsigned char *d_ptr; + for ( unsigned int i = new_width; i > 0; i-- ) { - *d_ptr = *s_ptr++; - d_ptr += line_bytes; + d_ptr = rotate_buffer+(i-1); + for ( unsigned int j = new_height; j > 0; j-- ) + { + *d_ptr = *s_ptr++; + d_ptr += line_bytes; + } } } - } - else if ( colours == ZM_COLOUR_RGB32 ) - { - Rgb* s_rptr = (Rgb*)s_ptr; - Rgb* d_rptr; - for ( unsigned int i = new_width; i > 0; i-- ) + else if ( colours == ZM_COLOUR_RGB32 ) { - d_rptr = (Rgb*)(rotate_buffer+((i-1)<<2)); - for ( unsigned int j = new_height; j > 0; j-- ) + Rgb* s_rptr = (Rgb*)s_ptr; + Rgb* d_rptr; + for ( unsigned int i = new_width; i > 0; i-- ) { - *d_rptr = *s_rptr++; - d_rptr += new_width; + d_rptr = (Rgb*)(rotate_buffer+((i-1)<<2)); + for ( unsigned int j = new_height; j > 0; j-- ) + { + *d_rptr = *s_rptr++; + d_rptr += new_width; + } } } - } - else /* Assume RGB24 */ - { - unsigned char *d_ptr; - for ( unsigned int i = new_width; i > 0; i-- ) + else /* Assume RGB24 */ { - d_ptr = rotate_buffer+((i-1)*3); - for ( unsigned int j = new_height; j > 0; j-- ) + unsigned char *d_ptr; + for ( unsigned int i = new_width; i > 0; i-- ) { - *d_ptr = *s_ptr++; - *(d_ptr+1) = *s_ptr++; - *(d_ptr+2) = *s_ptr++; - d_ptr += line_bytes; + d_ptr = rotate_buffer+((i-1)*3); + for ( unsigned int j = new_height; j > 0; j-- ) + { + *d_ptr = *s_ptr++; + *(d_ptr+1) = *s_ptr++; + *(d_ptr+2) = *s_ptr++; + d_ptr += line_bytes; + } } } + break; } - break; - } case 180 : - { - unsigned char *s_ptr = buffer+size; - unsigned char *d_ptr = rotate_buffer; + { + unsigned char *s_ptr = buffer+size; + unsigned char *d_ptr = rotate_buffer; - if ( colours == ZM_COLOUR_GRAY8 ) - { - while( s_ptr > buffer ) + if ( colours == ZM_COLOUR_GRAY8 ) { - s_ptr--; - *d_ptr++ = *s_ptr; - } - } - else if ( colours == ZM_COLOUR_RGB32 ) - { - Rgb* s_rptr = (Rgb*)s_ptr; - Rgb* d_rptr = (Rgb*)d_ptr; - while( s_rptr > (Rgb*)buffer ) - { - s_rptr--; - *d_rptr++ = *s_rptr; - } - } - else /* Assume RGB24 */ - { - while( s_ptr > buffer ) - { - s_ptr -= 3; - *d_ptr++ = *s_ptr; - *d_ptr++ = *(s_ptr+1); - *d_ptr++ = *(s_ptr+2); - } - } - break; - } - case 270 : - { - new_height = width; - new_width = height; - - unsigned int line_bytes = new_width*colours; - unsigned char *s_ptr = buffer+size; - - if ( colours == ZM_COLOUR_GRAY8 ) - { - unsigned char *d_ptr; - for ( unsigned int i = new_width; i > 0; i-- ) - { - d_ptr = rotate_buffer+(i-1); - for ( unsigned int j = new_height; j > 0; j-- ) + while( s_ptr > buffer ) { s_ptr--; - *d_ptr = *s_ptr; - d_ptr += line_bytes; + *d_ptr++ = *s_ptr; } } - } - else if ( colours == ZM_COLOUR_RGB32 ) - { - Rgb* s_rptr = (Rgb*)s_ptr; - Rgb* d_rptr; - for ( unsigned int i = new_width; i > 0; i-- ) + else if ( colours == ZM_COLOUR_RGB32 ) { - d_rptr = (Rgb*)(rotate_buffer+((i-1)<<2)); - for ( unsigned int j = new_height; j > 0; j-- ) + Rgb* s_rptr = (Rgb*)s_ptr; + Rgb* d_rptr = (Rgb*)d_ptr; + while( s_rptr > (Rgb*)buffer ) { s_rptr--; - *d_rptr = *s_rptr; - d_rptr += new_width; + *d_rptr++ = *s_rptr; } } - } - else /* Assume RGB24 */ - { - unsigned char *d_ptr; - for ( unsigned int i = new_width; i > 0; i-- ) + else /* Assume RGB24 */ { - d_ptr = rotate_buffer+((i-1)*3); - for ( unsigned int j = new_height; j > 0; j-- ) + while( s_ptr > buffer ) { - *(d_ptr+2) = *(--s_ptr); - *(d_ptr+1) = *(--s_ptr); - *d_ptr = *(--s_ptr); - d_ptr += line_bytes; + s_ptr -= 3; + *d_ptr++ = *s_ptr; + *d_ptr++ = *(s_ptr+1); + *d_ptr++ = *(s_ptr+2); } } + break; + } + case 270 : + { + new_height = width; + new_width = height; + + unsigned int line_bytes = new_width*colours; + unsigned char *s_ptr = buffer+size; + + if ( colours == ZM_COLOUR_GRAY8 ) + { + unsigned char *d_ptr; + for ( unsigned int i = new_width; i > 0; i-- ) + { + d_ptr = rotate_buffer+(i-1); + for ( unsigned int j = new_height; j > 0; j-- ) + { + s_ptr--; + *d_ptr = *s_ptr; + d_ptr += line_bytes; + } + } + } + else if ( colours == ZM_COLOUR_RGB32 ) + { + Rgb* s_rptr = (Rgb*)s_ptr; + Rgb* d_rptr; + for ( unsigned int i = new_width; i > 0; i-- ) + { + d_rptr = (Rgb*)(rotate_buffer+((i-1)<<2)); + for ( unsigned int j = new_height; j > 0; j-- ) + { + s_rptr--; + *d_rptr = *s_rptr; + d_rptr += new_width; + } + } + } + else /* Assume RGB24 */ + { + unsigned char *d_ptr; + for ( unsigned int i = new_width; i > 0; i-- ) + { + d_ptr = rotate_buffer+((i-1)*3); + for ( unsigned int j = new_height; j > 0; j-- ) + { + *(d_ptr+2) = *(--s_ptr); + *(d_ptr+1) = *(--s_ptr); + *d_ptr = *(--s_ptr); + d_ptr += line_bytes; + } + } + } + break; } - break; - } } - + AssignDirect( new_width, new_height, colours, subpixelorder, rotate_buffer, size, ZM_BUFTYPE_ZM); - + } /* RGB32 compatible: complete */ void Image::Flip( bool leftright ) { uint8_t* flip_buffer = AllocBuffer(size); - + unsigned int line_bytes = width*colours; unsigned int line_bytes2 = 2*line_bytes; if ( leftright ) @@ -2755,9 +2749,9 @@ void Image::Flip( bool leftright ) d_ptr += line_bytes; } } - + AssignDirect( width, height, colours, subpixelorder, flip_buffer, size, ZM_BUFTYPE_ZM); - + } void Image::Scale( unsigned int factor ) @@ -2774,11 +2768,11 @@ void Image::Scale( unsigned int factor ) unsigned int new_width = (width*factor)/ZM_SCALE_BASE; unsigned int new_height = (height*factor)/ZM_SCALE_BASE; - + size_t scale_buffer_size = (new_width+1) * (new_height+1) * colours; - + uint8_t* scale_buffer = AllocBuffer(scale_buffer_size); - + if ( factor > ZM_SCALE_BASE ) { unsigned char *pd = scale_buffer; @@ -2845,7 +2839,7 @@ void Image::Scale( unsigned int factor ) { w_count += factor; w_index = w_count/ZM_SCALE_BASE; - + if ( w_index > last_w_index ) { for ( unsigned int c = 0; c < colours; c++ ) @@ -2865,15 +2859,15 @@ void Image::Scale( unsigned int factor ) new_width = last_w_index; new_height = last_h_index; } - + AssignDirect( new_width, new_height, colours, subpixelorder, scale_buffer, scale_buffer_size, ZM_BUFTYPE_ZM); - + } void Image::Deinterlace_Discard() { /* Simple deinterlacing. Copy the even lines into the odd lines */ - + if ( colours == ZM_COLOUR_GRAY8 ) { const uint8_t *psrc; @@ -2917,16 +2911,16 @@ void Image::Deinterlace_Discard() } else { Error("Deinterlace called with unexpected colours: %d", colours); } - + } void Image::Deinterlace_Linear() { /* Simple deinterlacing. The odd lines are average of the line above and line below */ - + const uint8_t *pbelow, *pabove; uint8_t *pcurrent; - + if ( colours == ZM_COLOUR_GRAY8 ) { for (unsigned int y = 1; y < (unsigned int)(height-1); y += 2) @@ -2993,15 +2987,15 @@ void Image::Deinterlace_Linear() } else { Error("Deinterlace called with unexpected colours: %d", colours); } - + } void Image::Deinterlace_Blend() { /* Simple deinterlacing. Blend the fields together. 50% blend */ - + uint8_t *pabove, *pcurrent; - + if ( colours == ZM_COLOUR_GRAY8 ) { for (unsigned int y = 1; y < (unsigned int)height; y += 2) @@ -3050,7 +3044,7 @@ void Image::Deinterlace_Blend() } else { Error("Deinterlace called with unexpected colours: %d", colours); } - + } void Image::Deinterlace_Blend_CustomRatio(int divider) @@ -3060,14 +3054,14 @@ void Image::Deinterlace_Blend_CustomRatio(int divider) /* 2 = 25% blending */ /* 3 = 12.% blending */ /* 4 = 6.25% blending */ - + uint8_t *pabove, *pcurrent; uint8_t subpix1, subpix2; - + if ( divider < 1 || divider > 4 ) { Error("Deinterlace called with invalid blend ratio"); } - + if ( colours == ZM_COLOUR_GRAY8 ) { for (unsigned int y = 1; y < (unsigned int)height; y += 2) @@ -3132,7 +3126,7 @@ void Image::Deinterlace_Blend_CustomRatio(int divider) } else { Error("Deinterlace called with unexpected colours: %d", colours); } - + } @@ -3142,44 +3136,44 @@ void Image::Deinterlace_4Field(const Image* next_image, unsigned int threshold) { Panic( "Attempt to deinterlace different sized images, expected %dx%dx%d %d, got %dx%dx%d %d", width, height, colours, subpixelorder, next_image->width, next_image->height, next_image->colours, next_image->subpixelorder); } - + switch(colours) { case ZM_COLOUR_RGB24: - { - if(subpixelorder == ZM_SUBPIX_ORDER_BGR) { - /* BGR subpixel order */ - std_deinterlace_4field_bgr(buffer, next_image->buffer, threshold, width, height); - } else { - /* Assume RGB subpixel order */ - std_deinterlace_4field_rgb(buffer, next_image->buffer, threshold, width, height); - } - break; - } + { + if(subpixelorder == ZM_SUBPIX_ORDER_BGR) { + /* BGR subpixel order */ + std_deinterlace_4field_bgr(buffer, next_image->buffer, threshold, width, height); + } else { + /* Assume RGB subpixel order */ + std_deinterlace_4field_rgb(buffer, next_image->buffer, threshold, width, height); + } + break; + } case ZM_COLOUR_RGB32: - { - if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) { - /* ARGB subpixel order */ - (*fptr_deinterlace_4field_argb)(buffer, next_image->buffer, threshold, width, height); - } else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) { - /* ABGR subpixel order */ - (*fptr_deinterlace_4field_abgr)(buffer, next_image->buffer, threshold, width, height); - } else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) { - /* BGRA subpixel order */ - (*fptr_deinterlace_4field_bgra)(buffer, next_image->buffer, threshold, width, height); - } else { - /* Assume RGBA subpixel order */ - (*fptr_deinterlace_4field_rgba)(buffer, next_image->buffer, threshold, width, height); - } - break; - } + { + if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) { + /* ARGB subpixel order */ + (*fptr_deinterlace_4field_argb)(buffer, next_image->buffer, threshold, width, height); + } else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) { + /* ABGR subpixel order */ + (*fptr_deinterlace_4field_abgr)(buffer, next_image->buffer, threshold, width, height); + } else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) { + /* BGRA subpixel order */ + (*fptr_deinterlace_4field_bgra)(buffer, next_image->buffer, threshold, width, height); + } else { + /* Assume RGBA subpixel order */ + (*fptr_deinterlace_4field_rgba)(buffer, next_image->buffer, threshold, width, height); + } + break; + } case ZM_COLOUR_GRAY8: - (*fptr_deinterlace_4field_gray8)(buffer, next_image->buffer, threshold, width, height); - break; + (*fptr_deinterlace_4field_gray8)(buffer, next_image->buffer, threshold, width, height); + break; default: - Panic("Deinterlace_4Field called with unexpected colours: %d",colours); - break; + Panic("Deinterlace_4Field called with unexpected colours: %d",colours); + break; } - + } @@ -3194,7 +3188,7 @@ void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, u static uint32_t divider = 0; static uint32_t clearmask = 0; static double current_blendpercent = 0.0; - + if(current_blendpercent != blendpercent) { /* Attempt to match the blending percent to one of the possible values */ if(blendpercent < 2.34375) { @@ -3226,29 +3220,29 @@ void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, u } __asm__ __volatile__( - "movd %4, %%xmm3\n\t" - "movd %5, %%xmm4\n\t" - "pshufd $0x0, %%xmm3, %%xmm3\n\t" - "sub $0x10, %0\n\t" - "sub $0x10, %1\n\t" - "sub $0x10, %2\n\t" - "sse2_fastblend_iter:\n\t" - "movdqa (%0,%3),%%xmm0\n\t" - "movdqa %%xmm0,%%xmm2\n\t" - "movdqa (%1,%3),%%xmm1\n\t" - "psrlq %%xmm4,%%xmm0\n\t" - "psrlq %%xmm4,%%xmm1\n\t" - "pand %%xmm3,%%xmm1\n\t" - "pand %%xmm3,%%xmm0\n\t" - "psubb %%xmm0,%%xmm1\n\t" - "paddb %%xmm2,%%xmm1\n\t" - "movntdq %%xmm1,(%2,%3)\n\t" - "sub $0x10, %3\n\t" - "jnz sse2_fastblend_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count), "m" (clearmask), "m" (divider) - : "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory" - ); + "movd %4, %%xmm3\n\t" + "movd %5, %%xmm4\n\t" + "pshufd $0x0, %%xmm3, %%xmm3\n\t" + "sub $0x10, %0\n\t" + "sub $0x10, %1\n\t" + "sub $0x10, %2\n\t" + "sse2_fastblend_iter:\n\t" + "movdqa (%0,%3),%%xmm0\n\t" + "movdqa %%xmm0,%%xmm2\n\t" + "movdqa (%1,%3),%%xmm1\n\t" + "psrlq %%xmm4,%%xmm0\n\t" + "psrlq %%xmm4,%%xmm1\n\t" + "pand %%xmm3,%%xmm1\n\t" + "pand %%xmm3,%%xmm0\n\t" + "psubb %%xmm0,%%xmm1\n\t" + "paddb %%xmm2,%%xmm1\n\t" + "movntdq %%xmm1,(%2,%3)\n\t" + "sub $0x10, %3\n\t" + "jnz sse2_fastblend_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count), "m" (clearmask), "m" (divider) + : "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -3258,7 +3252,7 @@ __attribute__((noinline)) void std_fastblend(const uint8_t* col1, const uint8_t* static int divider = 0; static double current_blendpercent = 0.0; const uint8_t* const max_ptr = result + count; - + if(current_blendpercent != blendpercent) { /* Attempt to match the blending percent to one of the possible values */ if(blendpercent < 2.34375) { @@ -3282,7 +3276,7 @@ __attribute__((noinline)) void std_fastblend(const uint8_t* col1, const uint8_t* } current_blendpercent = blendpercent; } - + while(result < max_ptr) { result[0] = ((col2[0] - col1[0])>>divider) + col1[0]; @@ -3301,7 +3295,7 @@ __attribute__((noinline)) void std_fastblend(const uint8_t* col1, const uint8_t* result[13] = ((col2[13] - col1[13])>>divider) + col1[13]; result[14] = ((col2[14] - col1[14])>>divider) + col1[14]; result[15] = ((col2[15] - col1[15])>>divider) + col1[15]; - + col1 += 16; col2 += 16; result += 16; @@ -3349,22 +3343,22 @@ void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* r /* Q3(D6,D7) = divider */ __asm__ __volatile__ ( - "mov r12, %4\n\t" - "vdup.8 q3, r12\n\t" - "neon32_armv7_fastblend_iter:\n\t" - "vldm %0!, {q0}\n\t" - "vldm %1!, {q1}\n\t" - "vrshl.u8 q2, q0, q3\n\t" - "vrshl.u8 q1, q1, q3\n\t" - "vsub.i8 q1, q1, q2\n\t" - "vadd.i8 q1, q1, q0\n\t" - "vstm %2!, {q1}\n\t" - "subs %3, %3, #16\n\t" - "bne neon32_armv7_fastblend_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count), "g" (divider) - : "%r12", "%q0", "%q1", "%q2", "%q3", "cc", "memory" - ); + "mov r12, %4\n\t" + "vdup.8 q3, r12\n\t" + "neon32_armv7_fastblend_iter:\n\t" + "vldm %0!, {q0}\n\t" + "vldm %1!, {q1}\n\t" + "vrshl.u8 q2, q0, q3\n\t" + "vrshl.u8 q1, q1, q3\n\t" + "vsub.i8 q1, q1, q2\n\t" + "vadd.i8 q1, q1, q0\n\t" + "vstm %2!, {q1}\n\t" + "subs %3, %3, #16\n\t" + "bne neon32_armv7_fastblend_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count), "g" (divider) + : "%r12", "%q0", "%q1", "%q2", "%q3", "cc", "memory" + ); #else Panic("Neon function called on a non-ARM platform or Neon code is absent"); #endif @@ -3374,10 +3368,10 @@ __attribute__((noinline)) void std_blend(const uint8_t* col1, const uint8_t* col double divide = blendpercent / 100.0; double opacity = 1.0 - divide; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { *result++ = (*col1++ * opacity) + (*col2++ * divide); - + } } @@ -3387,7 +3381,7 @@ __attribute__((noinline)) void std_blend(const uint8_t* col1, const uint8_t* col __attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { /* Loop unrolling is used to work on 16 bytes (16 grayscale pixels) at a time */ const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { result[0] = abs(col1[0] - col2[0]); result[1] = abs(col1[1] - col2[1]); @@ -3405,7 +3399,7 @@ __attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8 result[13] = abs(col1[13] - col2[13]); result[14] = abs(col1[14] - col2[14]); result[15] = abs(col1[15] - col2[15]); - + col1 += 16; col2 += 16; result += 16; @@ -3417,7 +3411,7 @@ __attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t /* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */ int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { r = abs(col1[0] - col2[0]); g = abs(col1[1] - col2[1]); @@ -3435,7 +3429,7 @@ __attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t g = abs(col1[10] - col2[10]); b = abs(col1[11] - col2[11]); result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 12; col2 += 12; result += 4; @@ -3447,7 +3441,7 @@ __attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t /* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */ int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { b = abs(col1[0] - col2[0]); g = abs(col1[1] - col2[1]); @@ -3465,7 +3459,7 @@ __attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t g = abs(col1[10] - col2[10]); r = abs(col1[11] - col2[11]); result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 12; col2 += 12; result += 4; @@ -3477,7 +3471,7 @@ __attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_ /* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */ int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { r = abs(col1[0] - col2[0]); g = abs(col1[1] - col2[1]); @@ -3495,7 +3489,7 @@ __attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_ g = abs(col1[13] - col2[13]); b = abs(col1[14] - col2[14]); result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 16; col2 += 16; result += 4; @@ -3507,7 +3501,7 @@ __attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_ /* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */ int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { b = abs(col1[0] - col2[0]); g = abs(col1[1] - col2[1]); @@ -3525,7 +3519,7 @@ __attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_ g = abs(col1[13] - col2[13]); r = abs(col1[14] - col2[14]); result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 16; col2 += 16; result += 4; @@ -3537,7 +3531,7 @@ __attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_ /* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */ int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { r = abs(col1[1] - col2[1]); g = abs(col1[2] - col2[2]); @@ -3555,7 +3549,7 @@ __attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_ g = abs(col1[14] - col2[14]); b = abs(col1[15] - col2[15]); result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 16; col2 += 16; result += 4; @@ -3567,7 +3561,7 @@ __attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_ /* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */ int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { b = abs(col1[1] - col2[1]); g = abs(col1[2] - col2[2]); @@ -3585,7 +3579,7 @@ __attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_ g = abs(col1[14] - col2[14]); r = abs(col1[15] - col2[15]); result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 16; col2 += 16; result += 4; @@ -3603,17 +3597,17 @@ void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t /* Q1(D2,D3) = col2 */ __asm__ __volatile__ ( - "neon32_armv7_delta8_gray8_iter:\n\t" - "vldm %0!, {q0}\n\t" - "vldm %1!, {q1}\n\t" - "vabd.u8 q0, q0, q1\n\t" - "vstm %2!, {q0}\n\t" - "subs %3, %3, #16\n\t" - "bne neon32_armv7_delta8_gray8_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count) - : "%q0", "%q1", "cc", "memory" - ); + "neon32_armv7_delta8_gray8_iter:\n\t" + "vldm %0!, {q0}\n\t" + "vldm %1!, {q1}\n\t" + "vabd.u8 q0, q0, q1\n\t" + "vstm %2!, {q0}\n\t" + "subs %3, %3, #16\n\t" + "bne neon32_armv7_delta8_gray8_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count) + : "%q0", "%q1", "cc", "memory" + ); #else Panic("Neon function called on a non-ARM platform or Neon code is absent"); #endif @@ -3631,24 +3625,24 @@ void neon32_armv7_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t /* Q2(D4,D5) = multiplier */ __asm__ __volatile__ ( - "mov r12, %4\n\t" - "vdup.32 q2, r12\n\t" - "neon32_armv7_delta8_rgb32_iter:\n\t" - "vldm %0!, {q0}\n\t" - "vldm %1!, {q1}\n\t" - "vabd.u8 q0, q0, q1\n\t" - "vrshr.u8 q0, q0, #3\n\t" - "vmul.i8 q0, q0, q2\n\t" - "vpadd.i8 d0, d0, d1\n\t" - "vpadd.i8 d2, d2, d3\n\t" - "vpadd.i8 d0, d0, d2\n\t" - "vst1.32 {d0[0]}, [%2]!\n\t" - "subs %3, %3, #4\n\t" - "bne neon32_armv7_delta8_rgb32_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (multiplier) - : "%r12", "%q0", "%q1", "%q2", "cc", "memory" - ); + "mov r12, %4\n\t" + "vdup.32 q2, r12\n\t" + "neon32_armv7_delta8_rgb32_iter:\n\t" + "vldm %0!, {q0}\n\t" + "vldm %1!, {q1}\n\t" + "vabd.u8 q0, q0, q1\n\t" + "vrshr.u8 q0, q0, #3\n\t" + "vmul.i8 q0, q0, q2\n\t" + "vpadd.i8 d0, d0, d1\n\t" + "vpadd.i8 d2, d2, d3\n\t" + "vpadd.i8 d0, d0, d2\n\t" + "vst1.32 {d0[0]}, [%2]!\n\t" + "subs %3, %3, #4\n\t" + "bne neon32_armv7_delta8_rgb32_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (multiplier) + : "%r12", "%q0", "%q1", "%q2", "cc", "memory" + ); #else Panic("Neon function called on a non-ARM platform or Neon code is absent"); #endif @@ -3682,24 +3676,24 @@ void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE)) __asm__ __volatile__ ( - "sub $0x10, %0\n\t" - "sub $0x10, %1\n\t" - "sub $0x10, %2\n\t" - "sse2_delta8_gray8_iter:\n\t" - "movdqa (%0,%3), %%xmm1\n\t" - "movdqa (%1,%3), %%xmm2\n\t" - "movdqa %%xmm1, %%xmm3\n\t" - "movdqa %%xmm2, %%xmm4\n\t" - "pmaxub %%xmm1, %%xmm2\n\t" - "pminub %%xmm3, %%xmm4\n\t" - "psubb %%xmm4, %%xmm2\n\t" - "movntdq %%xmm2, (%2,%3)\n\t" - "sub $0x10, %3\n\t" - "jnz sse2_delta8_gray8_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count) - : "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory" - ); + "sub $0x10, %0\n\t" + "sub $0x10, %1\n\t" + "sub $0x10, %2\n\t" + "sse2_delta8_gray8_iter:\n\t" + "movdqa (%0,%3), %%xmm1\n\t" + "movdqa (%1,%3), %%xmm2\n\t" + "movdqa %%xmm1, %%xmm3\n\t" + "movdqa %%xmm2, %%xmm4\n\t" + "pmaxub %%xmm1, %%xmm2\n\t" + "pminub %%xmm3, %%xmm4\n\t" + "psubb %%xmm4, %%xmm2\n\t" + "movntdq %%xmm2, (%2,%3)\n\t" + "sub $0x10, %3\n\t" + "jnz sse2_delta8_gray8_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count) + : "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -3711,53 +3705,53 @@ __attribute__((noinline,__target__("sse2"))) #endif void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE)) - + __asm__ __volatile__ ( - "mov $0x1F1F1F1F, %%eax\n\t" - "movd %%eax, %%xmm4\n\t" - "pshufd $0x0, %%xmm4, %%xmm4\n\t" - "mov $0xff, %%eax\n\t" - "movd %%eax, %%xmm0\n\t" - "pshufd $0x0, %%xmm0, %%xmm0\n\t" - "sub $0x10, %0\n\t" - "sub $0x10, %1\n\t" - "sub $0x4, %2\n\t" - "sse2_delta8_rgba_iter:\n\t" - "movdqa (%0,%3,4), %%xmm1\n\t" - "movdqa (%1,%3,4), %%xmm2\n\t" - "psrlq $0x3, %%xmm1\n\t" - "psrlq $0x3, %%xmm2\n\t" - "pand %%xmm4, %%xmm1\n\t" - "pand %%xmm4, %%xmm2\n\t" - "movdqa %%xmm1, %%xmm5\n\t" - "movdqa %%xmm2, %%xmm6\n\t" - "pmaxub %%xmm1, %%xmm2\n\t" - "pminub %%xmm5, %%xmm6\n\t" - "psubb %%xmm6, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm3\n\t" - "psrld $0x8, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm1\n\t" - "pslld $0x2, %%xmm2\n\t" - "paddd %%xmm1, %%xmm2\n\t" - "movdqa %%xmm3, %%xmm1\n\t" - "pand %%xmm0, %%xmm1\n\t" - "paddd %%xmm1, %%xmm1\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "movdqa %%xmm3, %%xmm2\n\t" - "psrld $0x10, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "packssdw %%xmm1, %%xmm1\n\t" - "packuswb %%xmm1, %%xmm1\n\t" - "movd %%xmm1, %%eax\n\t" - "movnti %%eax, (%2,%3)\n\t" - "sub $0x4, %3\n\t" - "jnz sse2_delta8_rgba_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count) - : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory" - ); + "mov $0x1F1F1F1F, %%eax\n\t" + "movd %%eax, %%xmm4\n\t" + "pshufd $0x0, %%xmm4, %%xmm4\n\t" + "mov $0xff, %%eax\n\t" + "movd %%eax, %%xmm0\n\t" + "pshufd $0x0, %%xmm0, %%xmm0\n\t" + "sub $0x10, %0\n\t" + "sub $0x10, %1\n\t" + "sub $0x4, %2\n\t" + "sse2_delta8_rgba_iter:\n\t" + "movdqa (%0,%3,4), %%xmm1\n\t" + "movdqa (%1,%3,4), %%xmm2\n\t" + "psrlq $0x3, %%xmm1\n\t" + "psrlq $0x3, %%xmm2\n\t" + "pand %%xmm4, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "movdqa %%xmm1, %%xmm5\n\t" + "movdqa %%xmm2, %%xmm6\n\t" + "pmaxub %%xmm1, %%xmm2\n\t" + "pminub %%xmm5, %%xmm6\n\t" + "psubb %%xmm6, %%xmm2\n\t" + "movdqa %%xmm2, %%xmm3\n\t" + "psrld $0x8, %%xmm2\n\t" + "pand %%xmm0, %%xmm2\n\t" + "movdqa %%xmm2, %%xmm1\n\t" + "pslld $0x2, %%xmm2\n\t" + "paddd %%xmm1, %%xmm2\n\t" + "movdqa %%xmm3, %%xmm1\n\t" + "pand %%xmm0, %%xmm1\n\t" + "paddd %%xmm1, %%xmm1\n\t" + "paddd %%xmm2, %%xmm1\n\t" + "movdqa %%xmm3, %%xmm2\n\t" + "psrld $0x10, %%xmm2\n\t" + "pand %%xmm0, %%xmm2\n\t" + "paddd %%xmm2, %%xmm1\n\t" + "packssdw %%xmm1, %%xmm1\n\t" + "packuswb %%xmm1, %%xmm1\n\t" + "movd %%xmm1, %%eax\n\t" + "movnti %%eax, (%2,%3)\n\t" + "sub $0x4, %3\n\t" + "jnz sse2_delta8_rgba_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count) + : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -3769,53 +3763,53 @@ __attribute__((noinline,__target__("sse2"))) #endif void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE)) - + __asm__ __volatile__ ( - "mov $0x1F1F1F1F, %%eax\n\t" - "movd %%eax, %%xmm4\n\t" - "pshufd $0x0, %%xmm4, %%xmm4\n\t" - "mov $0xff, %%eax\n\t" - "movd %%eax, %%xmm0\n\t" - "pshufd $0x0, %%xmm0, %%xmm0\n\t" - "sub $0x10, %0\n\t" - "sub $0x10, %1\n\t" - "sub $0x4, %2\n\t" - "sse2_delta8_bgra_iter:\n\t" - "movdqa (%0,%3,4), %%xmm1\n\t" - "movdqa (%1,%3,4), %%xmm2\n\t" - "psrlq $0x3, %%xmm1\n\t" - "psrlq $0x3, %%xmm2\n\t" - "pand %%xmm4, %%xmm1\n\t" - "pand %%xmm4, %%xmm2\n\t" - "movdqa %%xmm1, %%xmm5\n\t" - "movdqa %%xmm2, %%xmm6\n\t" - "pmaxub %%xmm1, %%xmm2\n\t" - "pminub %%xmm5, %%xmm6\n\t" - "psubb %%xmm6, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm3\n\t" - "psrld $0x8, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm1\n\t" - "pslld $0x2, %%xmm2\n\t" - "paddd %%xmm1, %%xmm2\n\t" - "movdqa %%xmm3, %%xmm1\n\t" - "pand %%xmm0, %%xmm1\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "movdqa %%xmm3, %%xmm2\n\t" - "psrld $0x10, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "paddd %%xmm2, %%xmm2\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "packssdw %%xmm1, %%xmm1\n\t" - "packuswb %%xmm1, %%xmm1\n\t" - "movd %%xmm1, %%eax\n\t" - "movnti %%eax, (%2,%3)\n\t" - "sub $0x4, %3\n\t" - "jnz sse2_delta8_bgra_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count) - : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory" - ); + "mov $0x1F1F1F1F, %%eax\n\t" + "movd %%eax, %%xmm4\n\t" + "pshufd $0x0, %%xmm4, %%xmm4\n\t" + "mov $0xff, %%eax\n\t" + "movd %%eax, %%xmm0\n\t" + "pshufd $0x0, %%xmm0, %%xmm0\n\t" + "sub $0x10, %0\n\t" + "sub $0x10, %1\n\t" + "sub $0x4, %2\n\t" + "sse2_delta8_bgra_iter:\n\t" + "movdqa (%0,%3,4), %%xmm1\n\t" + "movdqa (%1,%3,4), %%xmm2\n\t" + "psrlq $0x3, %%xmm1\n\t" + "psrlq $0x3, %%xmm2\n\t" + "pand %%xmm4, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "movdqa %%xmm1, %%xmm5\n\t" + "movdqa %%xmm2, %%xmm6\n\t" + "pmaxub %%xmm1, %%xmm2\n\t" + "pminub %%xmm5, %%xmm6\n\t" + "psubb %%xmm6, %%xmm2\n\t" + "movdqa %%xmm2, %%xmm3\n\t" + "psrld $0x8, %%xmm2\n\t" + "pand %%xmm0, %%xmm2\n\t" + "movdqa %%xmm2, %%xmm1\n\t" + "pslld $0x2, %%xmm2\n\t" + "paddd %%xmm1, %%xmm2\n\t" + "movdqa %%xmm3, %%xmm1\n\t" + "pand %%xmm0, %%xmm1\n\t" + "paddd %%xmm2, %%xmm1\n\t" + "movdqa %%xmm3, %%xmm2\n\t" + "psrld $0x10, %%xmm2\n\t" + "pand %%xmm0, %%xmm2\n\t" + "paddd %%xmm2, %%xmm2\n\t" + "paddd %%xmm2, %%xmm1\n\t" + "packssdw %%xmm1, %%xmm1\n\t" + "packuswb %%xmm1, %%xmm1\n\t" + "movd %%xmm1, %%eax\n\t" + "movnti %%eax, (%2,%3)\n\t" + "sub $0x4, %3\n\t" + "jnz sse2_delta8_bgra_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count) + : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -3827,54 +3821,54 @@ __attribute__((noinline,__target__("sse2"))) #endif void sse2_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE)) - + __asm__ __volatile__ ( - "mov $0x1F1F1F1F, %%eax\n\t" - "movd %%eax, %%xmm4\n\t" - "pshufd $0x0, %%xmm4, %%xmm4\n\t" - "mov $0xff, %%eax\n\t" - "movd %%eax, %%xmm0\n\t" - "pshufd $0x0, %%xmm0, %%xmm0\n\t" - "sub $0x10, %0\n\t" - "sub $0x10, %1\n\t" - "sub $0x4, %2\n\t" - "sse2_delta8_argb_iter:\n\t" - "movdqa (%0,%3,4), %%xmm1\n\t" - "movdqa (%1,%3,4), %%xmm2\n\t" - "psrlq $0x3, %%xmm1\n\t" - "psrlq $0x3, %%xmm2\n\t" - "pand %%xmm4, %%xmm1\n\t" - "pand %%xmm4, %%xmm2\n\t" - "movdqa %%xmm1, %%xmm5\n\t" - "movdqa %%xmm2, %%xmm6\n\t" - "pmaxub %%xmm1, %%xmm2\n\t" - "pminub %%xmm5, %%xmm6\n\t" - "psubb %%xmm6, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm3\n\t" - "psrld $0x10, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm1\n\t" - "pslld $0x2, %%xmm2\n\t" - "paddd %%xmm1, %%xmm2\n\t" - "movdqa %%xmm3, %%xmm1\n\t" - "psrld $0x8, %%xmm1\n\t" - "pand %%xmm0, %%xmm1\n\t" - "paddd %%xmm1, %%xmm1\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "movdqa %%xmm3, %%xmm2\n\t" - "psrld $0x18, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "packssdw %%xmm1, %%xmm1\n\t" - "packuswb %%xmm1, %%xmm1\n\t" - "movd %%xmm1, %%eax\n\t" - "movnti %%eax, (%2,%3)\n\t" - "sub $0x4, %3\n\t" - "jnz sse2_delta8_argb_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count) - : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory" - ); + "mov $0x1F1F1F1F, %%eax\n\t" + "movd %%eax, %%xmm4\n\t" + "pshufd $0x0, %%xmm4, %%xmm4\n\t" + "mov $0xff, %%eax\n\t" + "movd %%eax, %%xmm0\n\t" + "pshufd $0x0, %%xmm0, %%xmm0\n\t" + "sub $0x10, %0\n\t" + "sub $0x10, %1\n\t" + "sub $0x4, %2\n\t" + "sse2_delta8_argb_iter:\n\t" + "movdqa (%0,%3,4), %%xmm1\n\t" + "movdqa (%1,%3,4), %%xmm2\n\t" + "psrlq $0x3, %%xmm1\n\t" + "psrlq $0x3, %%xmm2\n\t" + "pand %%xmm4, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "movdqa %%xmm1, %%xmm5\n\t" + "movdqa %%xmm2, %%xmm6\n\t" + "pmaxub %%xmm1, %%xmm2\n\t" + "pminub %%xmm5, %%xmm6\n\t" + "psubb %%xmm6, %%xmm2\n\t" + "movdqa %%xmm2, %%xmm3\n\t" + "psrld $0x10, %%xmm2\n\t" + "pand %%xmm0, %%xmm2\n\t" + "movdqa %%xmm2, %%xmm1\n\t" + "pslld $0x2, %%xmm2\n\t" + "paddd %%xmm1, %%xmm2\n\t" + "movdqa %%xmm3, %%xmm1\n\t" + "psrld $0x8, %%xmm1\n\t" + "pand %%xmm0, %%xmm1\n\t" + "paddd %%xmm1, %%xmm1\n\t" + "paddd %%xmm2, %%xmm1\n\t" + "movdqa %%xmm3, %%xmm2\n\t" + "psrld $0x18, %%xmm2\n\t" + "pand %%xmm0, %%xmm2\n\t" + "paddd %%xmm2, %%xmm1\n\t" + "packssdw %%xmm1, %%xmm1\n\t" + "packuswb %%xmm1, %%xmm1\n\t" + "movd %%xmm1, %%eax\n\t" + "movnti %%eax, (%2,%3)\n\t" + "sub $0x4, %3\n\t" + "jnz sse2_delta8_argb_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count) + : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -3886,54 +3880,54 @@ __attribute__((noinline,__target__("sse2"))) #endif void sse2_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) { #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE)) - + __asm__ __volatile__ ( - "mov $0x1F1F1F1F, %%eax\n\t" - "movd %%eax, %%xmm4\n\t" - "pshufd $0x0, %%xmm4, %%xmm4\n\t" - "mov $0xff, %%eax\n\t" - "movd %%eax, %%xmm0\n\t" - "pshufd $0x0, %%xmm0, %%xmm0\n\t" - "sub $0x10, %0\n\t" - "sub $0x10, %1\n\t" - "sub $0x4, %2\n\t" - "sse2_delta8_abgr_iter:\n\t" - "movdqa (%0,%3,4), %%xmm1\n\t" - "movdqa (%1,%3,4), %%xmm2\n\t" - "psrlq $0x3, %%xmm1\n\t" - "psrlq $0x3, %%xmm2\n\t" - "pand %%xmm4, %%xmm1\n\t" - "pand %%xmm4, %%xmm2\n\t" - "movdqa %%xmm1, %%xmm5\n\t" - "movdqa %%xmm2, %%xmm6\n\t" - "pmaxub %%xmm1, %%xmm2\n\t" - "pminub %%xmm5, %%xmm6\n\t" - "psubb %%xmm6, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm3\n\t" - "psrld $0x10, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "movdqa %%xmm2, %%xmm1\n\t" - "pslld $0x2, %%xmm2\n\t" - "paddd %%xmm1, %%xmm2\n\t" - "movdqa %%xmm3, %%xmm1\n\t" - "psrld $0x8, %%xmm1\n\t" - "pand %%xmm0, %%xmm1\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "movdqa %%xmm3, %%xmm2\n\t" - "psrld $0x18, %%xmm2\n\t" - "pand %%xmm0, %%xmm2\n\t" - "paddd %%xmm2, %%xmm2\n\t" - "paddd %%xmm2, %%xmm1\n\t" - "packssdw %%xmm1, %%xmm1\n\t" - "packuswb %%xmm1, %%xmm1\n\t" - "movd %%xmm1, %%eax\n\t" - "movnti %%eax, (%2,%3)\n\t" - "sub $0x4, %3\n\t" - "jnz sse2_delta8_abgr_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count) - : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory" - ); + "mov $0x1F1F1F1F, %%eax\n\t" + "movd %%eax, %%xmm4\n\t" + "pshufd $0x0, %%xmm4, %%xmm4\n\t" + "mov $0xff, %%eax\n\t" + "movd %%eax, %%xmm0\n\t" + "pshufd $0x0, %%xmm0, %%xmm0\n\t" + "sub $0x10, %0\n\t" + "sub $0x10, %1\n\t" + "sub $0x4, %2\n\t" + "sse2_delta8_abgr_iter:\n\t" + "movdqa (%0,%3,4), %%xmm1\n\t" + "movdqa (%1,%3,4), %%xmm2\n\t" + "psrlq $0x3, %%xmm1\n\t" + "psrlq $0x3, %%xmm2\n\t" + "pand %%xmm4, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "movdqa %%xmm1, %%xmm5\n\t" + "movdqa %%xmm2, %%xmm6\n\t" + "pmaxub %%xmm1, %%xmm2\n\t" + "pminub %%xmm5, %%xmm6\n\t" + "psubb %%xmm6, %%xmm2\n\t" + "movdqa %%xmm2, %%xmm3\n\t" + "psrld $0x10, %%xmm2\n\t" + "pand %%xmm0, %%xmm2\n\t" + "movdqa %%xmm2, %%xmm1\n\t" + "pslld $0x2, %%xmm2\n\t" + "paddd %%xmm1, %%xmm2\n\t" + "movdqa %%xmm3, %%xmm1\n\t" + "psrld $0x8, %%xmm1\n\t" + "pand %%xmm0, %%xmm1\n\t" + "paddd %%xmm2, %%xmm1\n\t" + "movdqa %%xmm3, %%xmm2\n\t" + "psrld $0x18, %%xmm2\n\t" + "pand %%xmm0, %%xmm2\n\t" + "paddd %%xmm2, %%xmm2\n\t" + "paddd %%xmm2, %%xmm1\n\t" + "packssdw %%xmm1, %%xmm1\n\t" + "packuswb %%xmm1, %%xmm1\n\t" + "movd %%xmm1, %%eax\n\t" + "movnti %%eax, (%2,%3)\n\t" + "sub $0x4, %3\n\t" + "jnz sse2_delta8_abgr_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count) + : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -3953,36 +3947,36 @@ void ssse3_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t* resul /* XMM4 - divide mask */ __asm__ __volatile__ ( - "mov $0x1F1F1F1F, %%eax\n\t" - "movd %%eax, %%xmm4\n\t" - "pshufd $0x0, %%xmm4, %%xmm4\n\t" - "mov %4, %%eax\n\t" - "movd %%eax, %%xmm3\n\t" - "pshufd $0x0, %%xmm3, %%xmm3\n\t" - "pxor %%xmm0, %%xmm0\n\t" - "sub $0x10, %0\n\t" - "sub $0x10, %1\n\t" - "sub $0x4, %2\n\t" - "ssse3_delta8_rgb32_iter:\n\t" - "movdqa (%0,%3,4), %%xmm1\n\t" - "movdqa (%1,%3,4), %%xmm2\n\t" - "psrlq $0x3, %%xmm1\n\t" - "psrlq $0x3, %%xmm2\n\t" - "pand %%xmm4, %%xmm1\n\t" - "pand %%xmm4, %%xmm2\n\t" - "psubb %%xmm2, %%xmm1\n\t" - "pabsb %%xmm1, %%xmm1\n\t" - "pmaddubsw %%xmm3, %%xmm1\n\t" - "phaddw %%xmm0, %%xmm1\n\t" - "packuswb %%xmm1, %%xmm1\n\t" - "movd %%xmm1, %%eax\n\t" - "movnti %%eax, (%2,%3)\n\t" - "sub $0x4, %3\n\t" - "jnz ssse3_delta8_rgb32_iter\n\t" - : - : "r" (col1), "r" (col2), "r" (result), "r" (count), "g" (multiplier) - : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory" - ); + "mov $0x1F1F1F1F, %%eax\n\t" + "movd %%eax, %%xmm4\n\t" + "pshufd $0x0, %%xmm4, %%xmm4\n\t" + "mov %4, %%eax\n\t" + "movd %%eax, %%xmm3\n\t" + "pshufd $0x0, %%xmm3, %%xmm3\n\t" + "pxor %%xmm0, %%xmm0\n\t" + "sub $0x10, %0\n\t" + "sub $0x10, %1\n\t" + "sub $0x4, %2\n\t" + "ssse3_delta8_rgb32_iter:\n\t" + "movdqa (%0,%3,4), %%xmm1\n\t" + "movdqa (%1,%3,4), %%xmm2\n\t" + "psrlq $0x3, %%xmm1\n\t" + "psrlq $0x3, %%xmm2\n\t" + "pand %%xmm4, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "psubb %%xmm2, %%xmm1\n\t" + "pabsb %%xmm1, %%xmm1\n\t" + "pmaddubsw %%xmm3, %%xmm1\n\t" + "phaddw %%xmm0, %%xmm1\n\t" + "packuswb %%xmm1, %%xmm1\n\t" + "movd %%xmm1, %%eax\n\t" + "movnti %%eax, (%2,%3)\n\t" + "sub $0x4, %3\n\t" + "jnz ssse3_delta8_rgb32_iter\n\t" + : + : "r" (col1), "r" (col2), "r" (result), "r" (count), "g" (multiplier) + : "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -4015,7 +4009,7 @@ void ssse3_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result __attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { unsigned int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { r = col1[0]; g = col1[1]; @@ -4033,7 +4027,7 @@ __attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_ g = col1[10]; b = col1[11]; result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 12; result += 4; } @@ -4043,7 +4037,7 @@ __attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_ __attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { unsigned int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { b = col1[0]; g = col1[1]; @@ -4061,7 +4055,7 @@ __attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_ g = col1[10]; r = col1[11]; result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 12; result += 4; } @@ -4071,7 +4065,7 @@ __attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_ __attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { unsigned int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { r = col1[0]; g = col1[1]; @@ -4089,7 +4083,7 @@ __attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8 g = col1[13]; b = col1[14]; result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 16; result += 4; } @@ -4099,7 +4093,7 @@ __attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8 __attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { unsigned int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { b = col1[0]; g = col1[1]; @@ -4117,7 +4111,7 @@ __attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8 g = col1[13]; r = col1[14]; result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 16; result += 4; } @@ -4127,7 +4121,7 @@ __attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8 __attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { unsigned int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { r = col1[1]; g = col1[2]; @@ -4145,7 +4139,7 @@ __attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8 g = col1[14]; b = col1[15]; result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 16; result += 4; } @@ -4155,7 +4149,7 @@ __attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8 __attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { unsigned int r,g,b; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { b = col1[1]; g = col1[2]; @@ -4173,7 +4167,7 @@ __attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8 g = col1[14]; r = col1[15]; result[3] = (r + r + b + g + g + g + g + g)>>3; - + col1 += 16; result += 4; } @@ -4183,7 +4177,7 @@ __attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8 __attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { const uint16_t* yuvbuf = (const uint16_t*)col1; const uint8_t* const max_ptr = result + count; - + while(result < max_ptr) { result[0] = (uint8_t)yuvbuf[0]; result[1] = (uint8_t)yuvbuf[1]; @@ -4201,7 +4195,7 @@ __attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8 result[13] = (uint8_t)yuvbuf[13]; result[14] = (uint8_t)yuvbuf[14]; result[15] = (uint8_t)yuvbuf[15]; - + yuvbuf += 16; result += 16; } @@ -4220,30 +4214,30 @@ void ssse3_convert_rgb32_gray8(const uint8_t* col1, uint8_t* result, unsigned lo /* XMM4 - divide mask */ __asm__ __volatile__ ( - "mov $0x1F1F1F1F, %%eax\n\t" - "movd %%eax, %%xmm4\n\t" - "pshufd $0x0, %%xmm4, %%xmm4\n\t" - "mov %3, %%eax\n\t" - "movd %%eax, %%xmm3\n\t" - "pshufd $0x0, %%xmm3, %%xmm3\n\t" - "pxor %%xmm0, %%xmm0\n\t" - "sub $0x10, %0\n\t" - "sub $0x4, %1\n\t" - "ssse3_convert_rgb32_gray8_iter:\n\t" - "movdqa (%0,%2,4), %%xmm1\n\t" - "psrlq $0x3, %%xmm1\n\t" - "pand %%xmm4, %%xmm1\n\t" - "pmaddubsw %%xmm3, %%xmm1\n\t" - "phaddw %%xmm0, %%xmm1\n\t" - "packuswb %%xmm1, %%xmm1\n\t" - "movd %%xmm1, %%eax\n\t" - "movnti %%eax, (%1,%2)\n\t" - "sub $0x4, %2\n\t" - "jnz ssse3_convert_rgb32_gray8_iter\n\t" - : - : "r" (col1), "r" (result), "r" (count), "g" (multiplier) - : "%eax", "%xmm0", "%xmm1", "%xmm3", "%xmm4", "cc", "memory" - ); + "mov $0x1F1F1F1F, %%eax\n\t" + "movd %%eax, %%xmm4\n\t" + "pshufd $0x0, %%xmm4, %%xmm4\n\t" + "mov %3, %%eax\n\t" + "movd %%eax, %%xmm3\n\t" + "pshufd $0x0, %%xmm3, %%xmm3\n\t" + "pxor %%xmm0, %%xmm0\n\t" + "sub $0x10, %0\n\t" + "sub $0x4, %1\n\t" + "ssse3_convert_rgb32_gray8_iter:\n\t" + "movdqa (%0,%2,4), %%xmm1\n\t" + "psrlq $0x3, %%xmm1\n\t" + "pand %%xmm4, %%xmm1\n\t" + "pmaddubsw %%xmm3, %%xmm1\n\t" + "phaddw %%xmm0, %%xmm1\n\t" + "packuswb %%xmm1, %%xmm1\n\t" + "movd %%xmm1, %%eax\n\t" + "movnti %%eax, (%1,%2)\n\t" + "sub $0x4, %2\n\t" + "jnz ssse3_convert_rgb32_gray8_iter\n\t" + : + : "r" (col1), "r" (result), "r" (count), "g" (multiplier) + : "%eax", "%xmm0", "%xmm1", "%xmm3", "%xmm4", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -4276,10 +4270,10 @@ __attribute__((noinline,__target__("ssse3"))) void ssse3_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) { #if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE)) unsigned long i = 0; - + __attribute__((aligned(16))) static const uint8_t movemask1[16] = {0,2,4,6,8,10,12,14,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; __attribute__((aligned(16))) static const uint8_t movemask2[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0,2,4,6,8,10,12,14}; - + /* XMM0 - General purpose */ /* XMM1 - General purpose */ /* XMM2 - unused */ @@ -4290,28 +4284,28 @@ void ssse3_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned lon /* XMM7 - unused */ __asm__ __volatile__ ( - "movdqa %4, %%xmm3\n\t" - "movdqa %5, %%xmm4\n\t" - "algo_ssse3_convert_yuyv_gray8:\n\t" - "movdqa (%0), %%xmm0\n\t" - "pshufb %%xmm3, %%xmm0\n\t" - "movdqa 0x10(%0), %%xmm1\n\t" - "pshufb %%xmm4, %%xmm1\n\t" - "por %%xmm1, %%xmm0\n\t" - "movntdq %%xmm0, (%1)\n\t" - "add $0x10, %3\n\t" - "add $0x10, %1\n\t" - "add $0x20, %0\n\t" - "cmp %2, %3\n\t" - "jb algo_ssse3_convert_yuyv_gray8\n\t" - : + "movdqa %4, %%xmm3\n\t" + "movdqa %5, %%xmm4\n\t" + "algo_ssse3_convert_yuyv_gray8:\n\t" + "movdqa (%0), %%xmm0\n\t" + "pshufb %%xmm3, %%xmm0\n\t" + "movdqa 0x10(%0), %%xmm1\n\t" + "pshufb %%xmm4, %%xmm1\n\t" + "por %%xmm1, %%xmm0\n\t" + "movntdq %%xmm0, (%1)\n\t" + "add $0x10, %3\n\t" + "add $0x10, %1\n\t" + "add $0x20, %0\n\t" + "cmp %2, %3\n\t" + "jb algo_ssse3_convert_yuyv_gray8\n\t" + : #if (defined(_DEBUG) && !defined(__x86_64__)) /* Use one less register to allow compilation to success on 32bit with omit frame pointer disabled */ - : "r" (col1), "r" (result), "m" (count), "r" (i), "m" (*movemask1), "m" (*movemask2) + : "r" (col1), "r" (result), "m" (count), "r" (i), "m" (*movemask1), "m" (*movemask2) #else - : "r" (col1), "r" (result), "r" (count), "r" (i), "m" (*movemask1), "m" (*movemask2) + : "r" (col1), "r" (result), "r" (count), "r" (i), "m" (*movemask1), "m" (*movemask2) #endif - : "%xmm3", "%xmm4", "cc", "memory" - ); + : "%xmm3", "%xmm4", "cc", "memory" + ); #else Panic("SSE function called on a non x86\\x86-64 platform"); #endif @@ -4330,11 +4324,11 @@ __attribute__((noinline)) void zm_convert_yuyv_rgb(const uint8_t* col1, uint8_t* r = y1 + r_v_table[v]; g = y1 - (g_u_table[u]+g_v_table[v]); b = y1 + b_u_table[u]; - + result[0] = r<0?0:(r>255?255:r); result[1] = g<0?0:(g>255?255:g); result[2] = b<0?0:(b>255?255:b); - + r = y2 + r_v_table[v]; g = y2 - (g_u_table[u]+g_v_table[v]); b = y2 + b_u_table[u]; @@ -4343,7 +4337,7 @@ __attribute__((noinline)) void zm_convert_yuyv_rgb(const uint8_t* col1, uint8_t* result[4] = g<0?0:(g>255?255:g); result[5] = b<0?0:(b>255?255:b); } - + } /* YUYV to RGBA - modified the one above */ @@ -4359,11 +4353,11 @@ __attribute__((noinline)) void zm_convert_yuyv_rgba(const uint8_t* col1, uint8_t r = y1 + r_v_table[v]; g = y1 - (g_u_table[u]+g_v_table[v]); b = y1 + b_u_table[u]; - + result[0] = r<0?0:(r>255?255:r); result[1] = g<0?0:(g>255?255:g); result[2] = b<0?0:(b>255?255:b); - + r = y2 + r_v_table[v]; g = y2 - (g_u_table[u]+g_v_table[v]); b = y2 + b_u_table[u]; @@ -4372,7 +4366,7 @@ __attribute__((noinline)) void zm_convert_yuyv_rgba(const uint8_t* col1, uint8_t result[5] = g<0?0:(g>255?255:g); result[6] = b<0?0:(b>255?255:b); } - + } /* RGB555 to RGB24 - relocated from zm_local_camera.cpp */ @@ -4459,9 +4453,9 @@ __attribute__((noinline)) void std_deinterlace_4field_gray8(uint8_t* col1, uint8 pabove += width; pnabove += width; pbelow += width; - + } - + /* Special case for the last line */ max_ptr2 = pcurrent + width; while(pcurrent < max_ptr2) { @@ -4518,29 +4512,29 @@ __attribute__((noinline)) void std_deinterlace_4field_rgb(uint8_t* col1, uint8_t pabove += row_width; pnabove += row_width; pbelow += row_width; - + } - + /* Special case for the last line */ max_ptr2 = pcurrent + row_width; while(pcurrent < max_ptr2) { - r = abs(pnabove[0] - pabove[0]); - g = abs(pnabove[1] - pabove[1]); - b = abs(pnabove[2] - pabove[2]); - delta1 = (r + r + b + g + g + g + g + g)>>3; - r = abs(pncurrent[0] - pcurrent[0]); - g = abs(pncurrent[1] - pcurrent[1]); - b = abs(pncurrent[2] - pcurrent[2]); - delta2 = (r + r + b + g + g + g + g + g)>>3; - if(((delta1 + delta2) >> 1) >= threshold) { - pcurrent[0] = pabove[0]; - pcurrent[1] = pabove[1]; - pcurrent[2] = pabove[2]; - } - pabove += 3; - pnabove += 3; - pcurrent += 3; - pncurrent += 3; + r = abs(pnabove[0] - pabove[0]); + g = abs(pnabove[1] - pabove[1]); + b = abs(pnabove[2] - pabove[2]); + delta1 = (r + r + b + g + g + g + g + g)>>3; + r = abs(pncurrent[0] - pcurrent[0]); + g = abs(pncurrent[1] - pcurrent[1]); + b = abs(pncurrent[2] - pcurrent[2]); + delta2 = (r + r + b + g + g + g + g + g)>>3; + if(((delta1 + delta2) >> 1) >= threshold) { + pcurrent[0] = pabove[0]; + pcurrent[1] = pabove[1]; + pcurrent[2] = pabove[2]; + } + pabove += 3; + pnabove += 3; + pcurrent += 3; + pncurrent += 3; } } @@ -4587,29 +4581,29 @@ __attribute__((noinline)) void std_deinterlace_4field_bgr(uint8_t* col1, uint8_t pabove += row_width; pnabove += row_width; pbelow += row_width; - + } - + /* Special case for the last line */ max_ptr2 = pcurrent + row_width; while(pcurrent < max_ptr2) { - b = abs(pnabove[0] - pabove[0]); - g = abs(pnabove[1] - pabove[1]); - r = abs(pnabove[2] - pabove[2]); - delta1 = (r + r + b + g + g + g + g + g)>>3; - b = abs(pncurrent[0] - pcurrent[0]); - g = abs(pncurrent[1] - pcurrent[1]); - r = abs(pncurrent[2] - pcurrent[2]); - delta2 = (r + r + b + g + g + g + g + g)>>3; - if(((delta1 + delta2) >> 1) >= threshold) { - pcurrent[0] = pabove[0]; - pcurrent[1] = pabove[1]; - pcurrent[2] = pabove[2]; - } - pabove += 3; - pnabove += 3; - pcurrent += 3; - pncurrent += 3; + b = abs(pnabove[0] - pabove[0]); + g = abs(pnabove[1] - pabove[1]); + r = abs(pnabove[2] - pabove[2]); + delta1 = (r + r + b + g + g + g + g + g)>>3; + b = abs(pncurrent[0] - pcurrent[0]); + g = abs(pncurrent[1] - pcurrent[1]); + r = abs(pncurrent[2] - pcurrent[2]); + delta2 = (r + r + b + g + g + g + g + g)>>3; + if(((delta1 + delta2) >> 1) >= threshold) { + pcurrent[0] = pabove[0]; + pcurrent[1] = pabove[1]; + pcurrent[2] = pabove[2]; + } + pabove += 3; + pnabove += 3; + pcurrent += 3; + pncurrent += 3; } } @@ -4656,29 +4650,29 @@ __attribute__((noinline)) void std_deinterlace_4field_rgba(uint8_t* col1, uint8_ pabove += row_width; pnabove += row_width; pbelow += row_width; - + } - + /* Special case for the last line */ max_ptr2 = pcurrent + row_width; while(pcurrent < max_ptr2) { - r = abs(pnabove[0] - pabove[0]); - g = abs(pnabove[1] - pabove[1]); - b = abs(pnabove[2] - pabove[2]); - delta1 = (r + r + b + g + g + g + g + g)>>3; - r = abs(pncurrent[0] - pcurrent[0]); - g = abs(pncurrent[1] - pcurrent[1]); - b = abs(pncurrent[2] - pcurrent[2]); - delta2 = (r + r + b + g + g + g + g + g)>>3; - if(((delta1 + delta2) >> 1) >= threshold) { - pcurrent[0] = pabove[0]; - pcurrent[1] = pabove[1]; - pcurrent[2] = pabove[2]; - } - pabove += 4; - pnabove += 4; - pcurrent += 4; - pncurrent += 4; + r = abs(pnabove[0] - pabove[0]); + g = abs(pnabove[1] - pabove[1]); + b = abs(pnabove[2] - pabove[2]); + delta1 = (r + r + b + g + g + g + g + g)>>3; + r = abs(pncurrent[0] - pcurrent[0]); + g = abs(pncurrent[1] - pcurrent[1]); + b = abs(pncurrent[2] - pcurrent[2]); + delta2 = (r + r + b + g + g + g + g + g)>>3; + if(((delta1 + delta2) >> 1) >= threshold) { + pcurrent[0] = pabove[0]; + pcurrent[1] = pabove[1]; + pcurrent[2] = pabove[2]; + } + pabove += 4; + pnabove += 4; + pcurrent += 4; + pncurrent += 4; } } @@ -4725,29 +4719,29 @@ __attribute__((noinline)) void std_deinterlace_4field_bgra(uint8_t* col1, uint8_ pabove += row_width; pnabove += row_width; pbelow += row_width; - + } - + /* Special case for the last line */ max_ptr2 = pcurrent + row_width; while(pcurrent < max_ptr2) { - b = abs(pnabove[0] - pabove[0]); - g = abs(pnabove[1] - pabove[1]); - r = abs(pnabove[2] - pabove[2]); - delta1 = (r + r + b + g + g + g + g + g)>>3; - b = abs(pncurrent[0] - pcurrent[0]); - g = abs(pncurrent[1] - pcurrent[1]); - r = abs(pncurrent[2] - pcurrent[2]); - delta2 = (r + r + b + g + g + g + g + g)>>3; - if(((delta1 + delta2) >> 1) >= threshold) { - pcurrent[0] = pabove[0]; - pcurrent[1] = pabove[1]; - pcurrent[2] = pabove[2]; - } - pabove += 4; - pnabove += 4; - pcurrent += 4; - pncurrent += 4; + b = abs(pnabove[0] - pabove[0]); + g = abs(pnabove[1] - pabove[1]); + r = abs(pnabove[2] - pabove[2]); + delta1 = (r + r + b + g + g + g + g + g)>>3; + b = abs(pncurrent[0] - pcurrent[0]); + g = abs(pncurrent[1] - pcurrent[1]); + r = abs(pncurrent[2] - pcurrent[2]); + delta2 = (r + r + b + g + g + g + g + g)>>3; + if(((delta1 + delta2) >> 1) >= threshold) { + pcurrent[0] = pabove[0]; + pcurrent[1] = pabove[1]; + pcurrent[2] = pabove[2]; + } + pabove += 4; + pnabove += 4; + pcurrent += 4; + pncurrent += 4; } } @@ -4794,29 +4788,29 @@ __attribute__((noinline)) void std_deinterlace_4field_argb(uint8_t* col1, uint8_ pabove += row_width; pnabove += row_width; pbelow += row_width; - + } - + /* Special case for the last line */ max_ptr2 = pcurrent + row_width; while(pcurrent < max_ptr2) { - r = abs(pnabove[1] - pabove[1]); - g = abs(pnabove[2] - pabove[2]); - b = abs(pnabove[3] - pabove[3]); - delta1 = (r + r + b + g + g + g + g + g)>>3; - r = abs(pncurrent[1] - pcurrent[1]); - g = abs(pncurrent[2] - pcurrent[2]); - b = abs(pncurrent[3] - pcurrent[3]); - delta2 = (r + r + b + g + g + g + g + g)>>3; - if(((delta1 + delta2) >> 1) >= threshold) { - pcurrent[1] = pabove[1]; - pcurrent[2] = pabove[2]; - pcurrent[3] = pabove[3]; - } - pabove += 4; - pnabove += 4; - pcurrent += 4; - pncurrent += 4; + r = abs(pnabove[1] - pabove[1]); + g = abs(pnabove[2] - pabove[2]); + b = abs(pnabove[3] - pabove[3]); + delta1 = (r + r + b + g + g + g + g + g)>>3; + r = abs(pncurrent[1] - pcurrent[1]); + g = abs(pncurrent[2] - pcurrent[2]); + b = abs(pncurrent[3] - pcurrent[3]); + delta2 = (r + r + b + g + g + g + g + g)>>3; + if(((delta1 + delta2) >> 1) >= threshold) { + pcurrent[1] = pabove[1]; + pcurrent[2] = pabove[2]; + pcurrent[3] = pabove[3]; + } + pabove += 4; + pnabove += 4; + pcurrent += 4; + pncurrent += 4; } } @@ -4863,28 +4857,28 @@ __attribute__((noinline)) void std_deinterlace_4field_abgr(uint8_t* col1, uint8_ pabove += row_width; pnabove += row_width; pbelow += row_width; - + } - + /* Special case for the last line */ max_ptr2 = pcurrent + row_width; while(pcurrent < max_ptr2) { - b = abs(pnabove[1] - pabove[1]); - g = abs(pnabove[2] - pabove[2]); - r = abs(pnabove[3] - pabove[3]); - delta1 = (r + r + b + g + g + g + g + g)>>3; - b = abs(pncurrent[1] - pcurrent[1]); - g = abs(pncurrent[2] - pcurrent[2]); - r = abs(pncurrent[3] - pcurrent[3]); - delta2 = (r + r + b + g + g + g + g + g)>>3; - if(((delta1 + delta2) >> 1) >= threshold) { - pcurrent[1] = pabove[1]; - pcurrent[2] = pabove[2]; - pcurrent[3] = pabove[3]; - } - pabove += 4; - pnabove += 4; - pcurrent += 4; - pncurrent += 4; + b = abs(pnabove[1] - pabove[1]); + g = abs(pnabove[2] - pabove[2]); + r = abs(pnabove[3] - pabove[3]); + delta1 = (r + r + b + g + g + g + g + g)>>3; + b = abs(pncurrent[1] - pcurrent[1]); + g = abs(pncurrent[2] - pcurrent[2]); + r = abs(pncurrent[3] - pcurrent[3]); + delta2 = (r + r + b + g + g + g + g + g)>>3; + if(((delta1 + delta2) >> 1) >= threshold) { + pcurrent[1] = pabove[1]; + pcurrent[2] = pabove[2]; + pcurrent[3] = pabove[3]; + } + pabove += 4; + pnabove += 4; + pcurrent += 4; + pncurrent += 4; } }