zoneminder/src/zm_image.cpp

5696 lines
174 KiB
C++

//
// ZoneMinder Image Class Implementation, $Date$, $Revision$
// Copyright (C) 2001-2008 Philip Coombes
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "zm.h"
#include "zm_font.h"
#include "zm_bigfont.h"
#include "zm_image.h"
#include "zm_utils.h"
#include "zm_rgb.h"
#include <sys/stat.h>
#include <errno.h>
bool Image::initialised = false;
static unsigned char *y_table;
static signed char *uv_table;
static short *r_v_table;
static short *g_v_table;
static short *g_u_table;
static short *b_u_table;
__attribute__((aligned(16))) static const uint8_t movemask[16] = {0,4,8,12,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
jpeg_compress_struct *Image::writejpg_ccinfo[101] = { 0 };
jpeg_compress_struct *Image::encodejpg_ccinfo[101] = { 0 };
jpeg_decompress_struct *Image::readjpg_dcinfo = 0;
jpeg_decompress_struct *Image::decodejpg_dcinfo = 0;
struct zm_error_mgr Image::jpg_err;
/* Pointer to blend function. */
static blend_fptr_t fptr_blend;
/* Pointer to delta8 functions */
static delta_fptr_t fptr_delta8_rgb;
static delta_fptr_t fptr_delta8_bgr;
static delta_fptr_t fptr_delta8_rgba;
static delta_fptr_t fptr_delta8_bgra;
static delta_fptr_t fptr_delta8_argb;
static delta_fptr_t fptr_delta8_abgr;
static delta_fptr_t fptr_delta8_gray8;
/* Pointers to deinterlace_4field functions */
static deinterlace_4field_fptr_t fptr_deinterlace_4field_rgba;
static deinterlace_4field_fptr_t fptr_deinterlace_4field_bgra;
static deinterlace_4field_fptr_t fptr_deinterlace_4field_argb;
static deinterlace_4field_fptr_t fptr_deinterlace_4field_abgr;
static deinterlace_4field_fptr_t fptr_deinterlace_4field_gray8;
/* Pointer to image buffer memory copy function */
imgbufcpy_fptr_t fptr_imgbufcpy;
Image::Image()
{
if ( !initialised )
Initialise();
width = 0;
height = 0;
pixels = 0;
colours = 0;
subpixelorder = 0;
size = 0;
allocation = 0;
buffer = 0;
buffertype = 0;
holdbuffer = 0;
text[0] = '\0';
}
Image::Image( const char *filename )
{
if ( !initialised )
Initialise();
width = 0;
height = 0;
pixels = 0;
colours = 0;
subpixelorder = 0;
size = 0;
allocation = 0;
buffer = 0;
buffertype = 0;
holdbuffer = 0;
ReadJpeg( filename, ZM_COLOUR_RGB24, ZM_SUBPIX_ORDER_RGB);
text[0] = '\0';
}
Image::Image( int p_width, int p_height, int p_colours, int p_subpixelorder, uint8_t *p_buffer )
{
if ( !initialised )
Initialise();
width = p_width;
height = p_height;
pixels = width*height;
colours = p_colours;
subpixelorder = p_subpixelorder;
size = pixels*colours;
buffer = 0;
holdbuffer = 0;
if ( p_buffer )
{
allocation = size;
buffertype = ZM_BUFTYPE_DONTFREE;
buffer = p_buffer;
}
else
{
AllocImgBuffer(size);
}
text[0] = '\0';
}
Image::Image( const Image &p_image )
{
if ( !initialised )
Initialise();
width = p_image.width;
height = p_image.height;
pixels = p_image.pixels;
colours = p_image.colours;
subpixelorder = p_image.subpixelorder;
size = p_image.size; // allocation is set in AllocImgBuffer
buffer = 0;
holdbuffer = 0;
AllocImgBuffer(size);
(*fptr_imgbufcpy)(buffer, p_image.buffer, size);
strncpy( text, p_image.text, sizeof(text) );
}
Image::~Image()
{
DumpImgBuffer();
if ( initialised )
{
delete[] y_table;
delete[] uv_table;
delete[] r_v_table;
delete[] g_v_table;
delete[] g_u_table;
delete[] b_u_table;
initialised = false;
}
if ( readjpg_dcinfo )
{
jpeg_destroy_decompress( readjpg_dcinfo );
delete readjpg_dcinfo;
readjpg_dcinfo = 0;
}
if ( decodejpg_dcinfo )
{
jpeg_destroy_decompress( decodejpg_dcinfo );
delete decodejpg_dcinfo;
decodejpg_dcinfo = 0;
}
}
void Image::Initialise()
{
/* Assign the blend pointer to function */
if(config.fast_image_blends) {
if(config.cpu_extensions && sseversion >= 20) {
fptr_blend = &sse2_fastblend; /* SSE2 fast blend */
Debug(2,"Blend: Using SSE2 fast blend function");
} else {
fptr_blend = &std_fastblend; /* standard fast blend */
Debug(2,"Blend: Using fast blend function");
}
} else {
fptr_blend = &std_blend;
Debug(2,"Blend: Using standard blend function");
}
__attribute__((aligned(16))) uint8_t blend1[16] = {142,255,159,91,88,227,0,52,37,80,152,97,104,252,90,82};
__attribute__((aligned(16))) uint8_t blend2[16] = {129,56,136,96,119,149,94,29,96,176,1,144,230,203,111,172};
__attribute__((aligned(16))) uint8_t blendres[16];
__attribute__((aligned(16))) uint8_t blendexp[16] = {141,231,157,92,91,217,11,49,45,92,133,103,119,246,92,93}; /* Expected results for 12.5% blend */
(*fptr_blend)(blend1,blend2,blendres,16,12.5);
/* Compare results with expected results */
for(int i=0;i<16;i++) {
if(abs(blendexp[i] - blendres[i]) > 3) {
Panic("Blend function failed self-test: Results differ from the expected results");
}
}
fptr_delta8_rgb = &std_delta8_rgb;
fptr_delta8_bgr = &std_delta8_bgr;
/* Assign the delta functions */
if(config.cpu_extensions) {
if(sseversion >= 35) {
/* SSSE3 available */
fptr_delta8_rgba = &ssse3_delta8_rgba;
fptr_delta8_bgra = &ssse3_delta8_bgra;
fptr_delta8_argb = &ssse3_delta8_argb;
fptr_delta8_abgr = &ssse3_delta8_abgr;
fptr_delta8_gray8 = &sse2_delta8_gray8;
Debug(2,"Delta: Using SSSE3 delta functions");
} else if(sseversion >= 20) {
/* SSE2 available */
fptr_delta8_rgba = &sse2_delta8_rgba;
fptr_delta8_bgra = &sse2_delta8_bgra;
fptr_delta8_argb = &sse2_delta8_argb;
fptr_delta8_abgr = &sse2_delta8_abgr;
/*
** On some systems, the 4 SSE2 algorithms above might be a little slower than
** the standard algorithms, especially on early Pentium 4 processors.
** In that case, comment out the 4 lines above and uncomment the 4 lines below
*/
// fptr_delta8_rgba = &std_delta8_rgba;
// fptr_delta8_bgra = &std_delta8_bgra;
// fptr_delta8_argb = &std_delta8_argb;
// fptr_delta8_abgr = &std_delta8_abgr;
fptr_delta8_gray8 = &sse2_delta8_gray8;
Debug(2,"Delta: Using SSE2 delta functions");
} else {
/* No suitable SSE version available */
fptr_delta8_rgba = &std_delta8_rgba;
fptr_delta8_bgra = &std_delta8_bgra;
fptr_delta8_argb = &std_delta8_argb;
fptr_delta8_abgr = &std_delta8_abgr;
fptr_delta8_gray8 = &std_delta8_gray8;
Debug(2,"Delta: Using standard delta functions");
}
} else {
/* CPU extensions disabled */
fptr_delta8_rgba = &std_delta8_rgba;
fptr_delta8_bgra = &std_delta8_bgra;
fptr_delta8_argb = &std_delta8_argb;
fptr_delta8_abgr = &std_delta8_abgr;
fptr_delta8_gray8 = &std_delta8_gray8;
Debug(2,"Delta: CPU extensions disabled, using standard delta functions");
}
/* Use SSSE3 deinterlace functions? */
if(config.cpu_extensions && sseversion >= 35) {
fptr_deinterlace_4field_rgba = &ssse3_deinterlace_4field_rgba;
fptr_deinterlace_4field_bgra = &ssse3_deinterlace_4field_bgra;
fptr_deinterlace_4field_argb = &ssse3_deinterlace_4field_argb;
fptr_deinterlace_4field_abgr = &ssse3_deinterlace_4field_abgr;
fptr_deinterlace_4field_gray8 = &ssse3_deinterlace_4field_gray8;
Debug(2,"Deinterlace: Using SSSE3 delta functions");
} else {
fptr_deinterlace_4field_rgba = &std_deinterlace_4field_rgba;
fptr_deinterlace_4field_bgra = &std_deinterlace_4field_bgra;
fptr_deinterlace_4field_argb = &std_deinterlace_4field_argb;
fptr_deinterlace_4field_abgr = &std_deinterlace_4field_abgr;
fptr_deinterlace_4field_gray8 = &std_deinterlace_4field_gray8;
Debug(2,"Deinterlace: Using standard delta functions");
}
/* Use SSE2 aligned memory copy? */
if(config.cpu_extensions && sseversion >= 20) {
fptr_imgbufcpy = &sse2_aligned_memcpy;
Debug(2,"Image buffer copy: Using SSE2 aligned memcpy");
} else {
fptr_imgbufcpy = &memcpy;
Debug(2,"Image buffer copy: Using standard memcpy");
}
/* Code below relocated from zm_local_camera */
Debug( 3, "Setting up static colour tables" );
y_table = new unsigned char[256];
for ( int i = 0; i <= 255; i++ )
{
unsigned char c = i;
if ( c <= 16 )
y_table[c] = 0;
else if ( c >= 235 )
y_table[c] = 255;
else
y_table[c] = (255*(c-16))/219;
}
uv_table = new signed char[256];
for ( int i = 0; i <= 255; i++ )
{
unsigned char c = i;
if ( c <= 16 )
uv_table[c] = -127;
else if ( c >= 240 )
uv_table[c] = 127;
else
uv_table[c] = (127*(c-128))/112;
}
r_v_table = new short[255];
g_v_table = new short[255];
g_u_table = new short[255];
b_u_table = new short[255];
for ( int i = 0; i < 255; i++ )
{
r_v_table[i] = (1402*(i-128))/1000;
g_u_table[i] = (344*(i-128))/1000;
g_v_table[i] = (714*(i-128))/1000;
b_u_table[i] = (1772*(i-128))/1000;
}
initialised = true;
}
/* Requests a writeable buffer to the image. This is safer than buffer() because this way we can guarantee that a buffer of required size exists */
uint8_t* Image::WriteBuffer(const unsigned int p_width, const unsigned int p_height, const unsigned int p_colours, const unsigned int p_subpixelorder) {
unsigned int newsize;
if(p_colours != ZM_COLOUR_GRAY8 && p_colours != ZM_COLOUR_RGB24 && p_colours != ZM_COLOUR_RGB32) {
Error("WriteBuffer called with unexpected colours: %d",p_colours);
return NULL;
}
if(!p_height || !p_width) {
Error("WriteBuffer called with invalid width or height: %d %d",p_width,p_height);
return NULL;
}
if(p_width != width || p_height != height || p_colours != colours || p_subpixelorder != subpixelorder) {
newsize = (p_width * p_height) * p_colours;
if(buffer == NULL) {
AllocImgBuffer(newsize);
} else {
if(allocation < newsize) {
if(holdbuffer) {
Error("Held buffer is undersized for requested buffer");
return NULL;
} else {
/* Replace buffer with a bigger one */
//DumpImgBuffer(); // Done in AllocImgBuffer too
AllocImgBuffer(newsize);
}
}
}
width = p_width;
height = p_height;
colours = p_colours;
subpixelorder = p_subpixelorder;
pixels = height*width;
size = newsize;
}
return buffer;
}
/* Assign an existing buffer to the image instead of copying from a source buffer. The goal is to reduce the amount of memory copying and increase efficiency and buffer reusing. */
void Image::AssignDirect( const unsigned int p_width, const unsigned int p_height, const unsigned int p_colours, const unsigned int p_subpixelorder, uint8_t *new_buffer, const size_t buffer_size, const int p_buffertype) {
if(new_buffer == NULL) {
Error("Attempt to directly assign buffer from a NULL pointer");
return;
}
if(!p_height || !p_width) {
Error("Attempt to directly assign buffer with invalid width or height: %d %d",p_width,p_height);
return;
}
if(p_colours != ZM_COLOUR_GRAY8 && p_colours != ZM_COLOUR_RGB24 && p_colours != ZM_COLOUR_RGB32) {
Error("Attempt to directly assign buffer with unexpected colours per pixel: %d",p_colours);
return;
}
unsigned int new_buffer_size = ((p_width*p_height)*p_colours);
if(buffer_size < new_buffer_size) {
Error("Attempt to directly assign buffer from an undersized buffer of size: %zu, needed %dx%d*%d colours = %zu",buffer_size, p_width, p_height, p_colours, new_buffer_size );
return;
}
if(holdbuffer && buffer) {
if(new_buffer_size > allocation) {
Error("Held buffer is undersized for assigned buffer");
return;
} else {
width = p_width;
height = p_height;
colours = p_colours;
subpixelorder = p_subpixelorder;
pixels = height*width;
size = new_buffer_size; // was pixels*colours, but we already calculated it above as new_buffer_size
/* Copy into the held buffer */
if(new_buffer != buffer)
(*fptr_imgbufcpy)(buffer, new_buffer, size);
/* Free the new buffer */
DumpBuffer(new_buffer, p_buffertype);
}
} else {
/* Free an existing buffer if any */
DumpImgBuffer();
width = p_width;
height = p_height;
colours = p_colours;
subpixelorder = p_subpixelorder;
pixels = height*width;
size = new_buffer_size; // was pixels*colours, but we already calculated it above as new_buffer_size
allocation = buffer_size;
buffertype = p_buffertype;
buffer = new_buffer;
}
}
void Image::Assign(const unsigned int p_width, const unsigned int p_height, const unsigned int p_colours, const unsigned int p_subpixelorder, const uint8_t* new_buffer, const size_t buffer_size) {
unsigned int new_size = (p_width * p_height) * p_colours;
if(new_buffer == NULL) {
Error("Attempt to assign buffer from a NULL pointer");
return;
}
if(buffer_size < new_size) {
Error("Attempt to assign buffer from an undersized buffer of size: %zu",buffer_size);
return;
}
if(!p_height || !p_width) {
Error("Attempt to assign buffer with invalid width or height: %d %d",p_width,p_height);
return;
}
if(p_colours != ZM_COLOUR_GRAY8 && p_colours != ZM_COLOUR_RGB24 && p_colours != ZM_COLOUR_RGB32) {
Error("Attempt to assign buffer with unexpected colours per pixel: %d",p_colours);
return;
}
if ( !buffer || p_width != width || p_height != height || p_colours != colours || p_subpixelorder != subpixelorder) {
if (holdbuffer && buffer) {
if (new_size > allocation) {
Error("Held buffer is undersized for assigned buffer");
return;
}
} else {
if(new_size > allocation || !buffer) {
DumpImgBuffer();
AllocImgBuffer(new_size);
}
}
width = p_width;
height = p_height;
pixels = width*height;
colours = p_colours;
subpixelorder = p_subpixelorder;
size = new_size;
}
if(new_buffer != buffer)
(*fptr_imgbufcpy)(buffer, new_buffer, size);
}
void Image::Assign( const Image &image ) {
unsigned int new_size = (image.width * image.height) * image.colours;
if(image.buffer == NULL) {
Error("Attempt to assign image with an empty buffer");
return;
}
if(image.colours != ZM_COLOUR_GRAY8 && image.colours != ZM_COLOUR_RGB24 && image.colours != ZM_COLOUR_RGB32) {
Error("Attempt to assign image with unexpected colours per pixel: %d",image.colours);
return;
}
if ( !buffer || image.width != width || image.height != height || image.colours != colours || image.subpixelorder != subpixelorder) {
if (holdbuffer && buffer) {
if (new_size > allocation) {
Error("Held buffer is undersized for assigned buffer");
return;
}
} else {
if(new_size > allocation || !buffer) {
// DumpImgBuffer(); This is also done in AllocImgBuffer
AllocImgBuffer(new_size);
}
}
width = image.width;
height = image.height;
pixels = width*height;
colours = image.colours;
subpixelorder = image.subpixelorder;
size = new_size;
}
if(image.buffer != buffer)
(*fptr_imgbufcpy)(buffer, image.buffer, size);
}
Image *Image::HighlightEdges( Rgb colour, unsigned int p_colours, unsigned int p_subpixelorder, const Box *limits )
{
if ( colours != ZM_COLOUR_GRAY8 )
{
Panic( "Attempt to highlight image edges when colours = %d", colours );
}
/* Convert the colour's RGBA subpixel order into the image's subpixel order */
colour = rgb_convert(colour,p_subpixelorder);
/* Create a new image of the target format */
Image *high_image = new Image( width, height, p_colours, p_subpixelorder );
uint8_t* high_buff = high_image->WriteBuffer(width, height, p_colours, p_subpixelorder);
/* Set image to all black */
high_image->Clear();
unsigned int lo_x = limits?limits->Lo().X():0;
unsigned int lo_y = limits?limits->Lo().Y():0;
unsigned int hi_x = limits?limits->Hi().X():width-1;
unsigned int hi_y = limits?limits->Hi().Y():height-1;
if ( p_colours == ZM_COLOUR_GRAY8 )
{
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
const uint8_t* p = buffer + (y * width) + lo_x;
uint8_t* phigh = high_buff + (y * width) + lo_x;
for ( unsigned int x = lo_x; x <= hi_x; x++, p++, phigh++ )
{
bool edge = false;
if ( *p )
{
if ( !edge && x > 0 && !*(p-1) ) edge = true;
if ( !edge && x < (width-1) && !*(p+1) ) edge = true;
if ( !edge && y > 0 && !*(p-width) ) edge = true;
if ( !edge && y < (height-1) && !*(p+width) ) edge = true;
}
if ( edge )
{
*phigh = colour;
}
}
}
}
else if ( p_colours == ZM_COLOUR_RGB24 )
{
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
const uint8_t* p = buffer + (y * width) + lo_x;
uint8_t* phigh = high_buff + (((y * width) + lo_x) * 3);
for ( unsigned int x = lo_x; x <= hi_x; x++, p++, phigh += 3 )
{
bool edge = false;
if ( *p )
{
if ( !edge && x > 0 && !*(p-1) ) edge = true;
if ( !edge && x < (width-1) && !*(p+1) ) edge = true;
if ( !edge && y > 0 && !*(p-width) ) edge = true;
if ( !edge && y < (height-1) && !*(p+width) ) edge = true;
}
if ( edge )
{
RED_PTR_RGBA(phigh) = RED_VAL_RGBA(colour);
GREEN_PTR_RGBA(phigh) = GREEN_VAL_RGBA(colour);
BLUE_PTR_RGBA(phigh) = BLUE_VAL_RGBA(colour);
}
}
}
}
else if ( p_colours == ZM_COLOUR_RGB32 )
{
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
const uint8_t* p = buffer + (y * width) + lo_x;
Rgb* phigh = (Rgb*)(high_buff + (((y * width) + lo_x) * 4));
for ( unsigned int x = lo_x; x <= hi_x; x++, p++, phigh++ )
{
bool edge = false;
if ( *p )
{
if ( !edge && x > 0 && !*(p-1) ) edge = true;
if ( !edge && x < (width-1) && !*(p+1) ) edge = true;
if ( !edge && y > 0 && !*(p-width) ) edge = true;
if ( !edge && y < (height-1) && !*(p+width) ) edge = true;
}
if ( edge )
{
*phigh = colour;
}
}
}
}
return( high_image );
}
bool Image::ReadRaw( const char *filename )
{
FILE *infile;
if ( (infile = fopen( filename, "rb" )) == NULL )
{
Error( "Can't open %s: %s", filename, strerror(errno) );
return( false );
}
struct stat statbuf;
if ( fstat( fileno(infile), &statbuf ) < 0 )
{
Error( "Can't fstat %s: %s", filename, strerror(errno) );
return( false );
}
if ( statbuf.st_size != size )
{
Error( "Raw file size mismatch, expected %d bytes, found %ld", size, statbuf.st_size );
return( false );
}
if ( fread( buffer, size, 1, infile ) < 1 )
{
Fatal( "Unable to read from '%s': %s", filename, strerror(errno) );
return( false );
}
fclose( infile );
return( true );
}
bool Image::WriteRaw( const char *filename ) const
{
FILE *outfile;
if ( (outfile = fopen( filename, "wb" )) == NULL )
{
Error( "Can't open %s: %s", filename, strerror(errno) );
return( false );
}
if ( fwrite( buffer, size, 1, outfile ) != 1 )
{
Error( "Unable to write to '%s': %s", filename, strerror(errno) );
return( false );
}
fclose( outfile );
return( true );
}
bool Image::ReadJpeg( const char *filename, unsigned int p_colours, unsigned int p_subpixelorder)
{
unsigned int new_width, new_height, new_colours, new_subpixelorder;
struct jpeg_decompress_struct *cinfo = readjpg_dcinfo;
if ( !cinfo )
{
cinfo = readjpg_dcinfo = new jpeg_decompress_struct;
cinfo->err = jpeg_std_error( &jpg_err.pub );
jpg_err.pub.error_exit = zm_jpeg_error_exit;
jpg_err.pub.emit_message = zm_jpeg_emit_message;
jpeg_create_decompress( cinfo );
}
FILE *infile;
if ( (infile = fopen( filename, "rb" )) == NULL )
{
Error( "Can't open %s: %s", filename, strerror(errno) );
return( false );
}
if ( setjmp( jpg_err.setjmp_buffer ) )
{
jpeg_abort_decompress( cinfo );
fclose( infile );
return( false );
}
jpeg_stdio_src( cinfo, infile );
jpeg_read_header( cinfo, TRUE );
if ( cinfo->num_components != 1 && cinfo->num_components != 3 )
{
Error( "Unexpected colours when reading jpeg image: %d", colours );
jpeg_abort_decompress( cinfo );
fclose( infile );
return( false );
}
/* Check if the image has at least one huffman table defined. If not, use the standard ones */
/* This is required for the MJPEG capture palette of USB devices */
if(cinfo->dc_huff_tbl_ptrs[0] == NULL) {
zm_use_std_huff_tables(cinfo);
}
new_width = cinfo->image_width;
new_height = cinfo->image_height;
if ( width != new_width || height != new_height )
{
Debug(9,"Image dimensions differ. Old: %ux%u New: %ux%u",width,height,new_width,new_height);
}
switch(p_colours) {
case ZM_COLOUR_GRAY8:
{
cinfo->out_color_space = JCS_GRAYSCALE;
new_colours = ZM_COLOUR_GRAY8;
new_subpixelorder = ZM_SUBPIX_ORDER_NONE;
break;
}
case ZM_COLOUR_RGB32:
{
#ifdef JCS_EXTENSIONS
new_colours = ZM_COLOUR_RGB32;
if(p_subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
cinfo->out_color_space = JCS_EXT_BGRX;
new_subpixelorder = ZM_SUBPIX_ORDER_BGRA;
} else if(p_subpixelorder == ZM_SUBPIX_ORDER_ARGB) {
cinfo->out_color_space = JCS_EXT_XRGB;
new_subpixelorder = ZM_SUBPIX_ORDER_ARGB;
} else if(p_subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
cinfo->out_color_space = JCS_EXT_XBGR;
new_subpixelorder = ZM_SUBPIX_ORDER_ABGR;
} else {
/* Assume RGBA */
cinfo->out_color_space = JCS_EXT_RGBX;
new_subpixelorder = ZM_SUBPIX_ORDER_RGBA;
}
break;
#else
Warning("libjpeg-turbo is required for reading a JPEG directly into a RGB32 buffer, reading into a RGB24 buffer instead.");
#endif
}
case ZM_COLOUR_RGB24:
default:
{
new_colours = ZM_COLOUR_RGB24;
if(p_subpixelorder == ZM_SUBPIX_ORDER_BGR) {
#ifdef JCS_EXTENSIONS
cinfo->out_color_space = JCS_EXT_BGR;
new_subpixelorder = ZM_SUBPIX_ORDER_BGR;
#else
Warning("libjpeg-turbo is required for reading a JPEG directly into a BGR24 buffer, reading into a RGB24 buffer instead.");
cinfo->out_color_space = JCS_RGB;
new_subpixelorder = ZM_SUBPIX_ORDER_RGB;
#endif
} else {
/* Assume RGB */
/*
#ifdef JCS_EXTENSIONS
cinfo->out_color_space = JCS_EXT_RGB;
#else
cinfo->out_color_space = JCS_RGB;
#endif
*/
cinfo->out_color_space = JCS_RGB;
new_subpixelorder = ZM_SUBPIX_ORDER_RGB;
}
break;
}
}
if(WriteBuffer(new_width, new_height, new_colours, new_subpixelorder) == NULL) {
Error("Failed requesting writeable buffer for reading JPEG image.");
jpeg_abort_decompress( cinfo );
fclose( infile );
return( false );
}
jpeg_start_decompress( cinfo );
JSAMPROW row_pointer; /* pointer to a single row */
int row_stride = width * colours; /* physical row width in buffer */
while ( cinfo->output_scanline < cinfo->output_height )
{
row_pointer = &buffer[cinfo->output_scanline * row_stride];
jpeg_read_scanlines( cinfo, &row_pointer, 1 );
}
jpeg_finish_decompress( cinfo );
fclose( infile );
return( true );
}
// Multiple calling formats to permit inclusion (or not) of both quality_override and timestamp (exif), with suitable defaults.
// Note quality=zero means default
bool Image::WriteJpeg( const char *filename, int quality_override) const
{
return Image::WriteJpeg(filename, quality_override, (timeval){0,0});
}
bool Image::WriteJpeg( const char *filename) const
{
return Image::WriteJpeg(filename, 0, (timeval){0,0});
}
bool Image::WriteJpeg( const char *filename, struct timeval timestamp ) const
{
return Image::WriteJpeg(filename,0,timestamp);
}
bool Image::WriteJpeg( const char *filename, int quality_override, struct timeval timestamp ) const
{
if ( config.colour_jpeg_files && colours == ZM_COLOUR_GRAY8 )
{
Image temp_image( *this );
temp_image.Colourise( ZM_COLOUR_RGB24, ZM_SUBPIX_ORDER_RGB );
return( temp_image.WriteJpeg( filename, quality_override, timestamp) );
}
int quality = quality_override?quality_override:config.jpeg_file_quality;
struct jpeg_compress_struct *cinfo = writejpg_ccinfo[quality];
if ( !cinfo )
{
cinfo = writejpg_ccinfo[quality] = new jpeg_compress_struct;
cinfo->err = jpeg_std_error( &jpg_err.pub );
jpg_err.pub.error_exit = zm_jpeg_error_exit;
jpg_err.pub.emit_message = zm_jpeg_emit_message;
jpeg_create_compress( cinfo );
}
FILE *outfile;
if ( (outfile = fopen( filename, "wb" )) == NULL )
{
Error( "Can't open %s: %s", filename, strerror(errno) );
return( false );
}
jpeg_stdio_dest( cinfo, outfile );
cinfo->image_width = width; /* image width and height, in pixels */
cinfo->image_height = height;
switch(colours) {
case ZM_COLOUR_GRAY8:
{
cinfo->input_components = 1;
cinfo->in_color_space = JCS_GRAYSCALE;
break;
}
case ZM_COLOUR_RGB32:
{
#ifdef JCS_EXTENSIONS
cinfo->input_components = 4;
if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
cinfo->in_color_space = JCS_EXT_BGRX;
} else if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) {
cinfo->in_color_space = JCS_EXT_XRGB;
} else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
cinfo->in_color_space = JCS_EXT_XBGR;
} else {
/* Assume RGBA */
cinfo->in_color_space = JCS_EXT_RGBX;
}
#else
Error("libjpeg-turbo is required for JPEG encoding directly from RGB32 source");
jpeg_abort_compress( cinfo );
fclose(outfile);
return(false);
#endif
break;
}
case ZM_COLOUR_RGB24:
default:
{
cinfo->input_components = 3;
if(subpixelorder == ZM_SUBPIX_ORDER_BGR) {
#ifdef JCS_EXTENSIONS
cinfo->in_color_space = JCS_EXT_BGR;
#else
Error("libjpeg-turbo is required for JPEG encoding directly from BGR24 source");
jpeg_abort_compress( cinfo );
fclose(outfile);
return(false);
#endif
} else {
/* Assume RGB */
/*
#ifdef JCS_EXTENSIONS
cinfo->out_color_space = JCS_EXT_RGB;
#else
cinfo->out_color_space = JCS_RGB;
#endif
*/
cinfo->in_color_space = JCS_RGB;
}
break;
}
}
jpeg_set_defaults( cinfo );
jpeg_set_quality( cinfo, quality, FALSE );
cinfo->dct_method = JDCT_FASTEST;
jpeg_start_compress( cinfo, TRUE );
if ( config.add_jpeg_comments && text[0] )
{
jpeg_write_marker( cinfo, JPEG_COM, (const JOCTET *)text, strlen(text) );
}
// If we have a non-zero time (meaning a parameter was passed in), then form a simple exif segment with that time as DateTimeOriginal and SubsecTimeOriginal
// No timestamp just leave off the exif section.
if(timestamp.tv_sec)
{
#define EXIFTIMES_MS_OFFSET 0x36 // three decimal digits for milliseconds
#define EXIFTIMES_MS_LEN 0x03
#define EXIFTIMES_OFFSET 0x3E // 19 characters format '2015:07:21 13:14:45' not including quotes
#define EXIFTIMES_LEN 0x13 // = 19
#define EXIF_CODE 0xE1
char timebuf[64], msbuf[64];
strftime(timebuf, sizeof timebuf, "%Y:%m:%d %H:%M:%S", localtime(&(timestamp.tv_sec)));
snprintf(msbuf, sizeof msbuf, "%06d",(int)(timestamp.tv_usec)); // we only use milliseconds because that's all defined in exif, but this is the whole microseconds because we have it
unsigned char exiftimes[82] = {
0x45, 0x78, 0x69, 0x66, 0x00, 0x00, 0x49, 0x49, 0x2A, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00,
0x69, 0x87, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x03, 0x90, 0x02, 0x00, 0x14, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x91, 0x92,
0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0x00 };
memcpy(&exiftimes[EXIFTIMES_OFFSET], timebuf,EXIFTIMES_LEN);
memcpy(&exiftimes[EXIFTIMES_MS_OFFSET], msbuf ,EXIFTIMES_MS_LEN);
jpeg_write_marker (cinfo, EXIF_CODE, (const JOCTET *)exiftimes, sizeof(exiftimes) );
}
JSAMPROW row_pointer; /* pointer to a single row */
int row_stride = cinfo->image_width * colours; /* physical row width in buffer */
while ( cinfo->next_scanline < cinfo->image_height )
{
row_pointer = &buffer[cinfo->next_scanline * row_stride];
jpeg_write_scanlines( cinfo, &row_pointer, 1 );
}
jpeg_finish_compress( cinfo );
fclose( outfile );
return( true );
}
bool Image::DecodeJpeg( const JOCTET *inbuffer, int inbuffer_size, unsigned int p_colours, unsigned int p_subpixelorder)
{
unsigned int new_width, new_height, new_colours, new_subpixelorder;
struct jpeg_decompress_struct *cinfo = decodejpg_dcinfo;
if ( !cinfo )
{
cinfo = decodejpg_dcinfo = new jpeg_decompress_struct;
cinfo->err = jpeg_std_error( &jpg_err.pub );
jpg_err.pub.error_exit = zm_jpeg_error_exit;
jpg_err.pub.emit_message = zm_jpeg_emit_message;
jpeg_create_decompress( cinfo );
}
if ( setjmp( jpg_err.setjmp_buffer ) )
{
jpeg_abort_decompress( cinfo );
return( false );
}
zm_jpeg_mem_src( cinfo, inbuffer, inbuffer_size );
jpeg_read_header( cinfo, TRUE );
if ( cinfo->num_components != 1 && cinfo->num_components != 3 )
{
Error( "Unexpected colours when reading jpeg image: %d", colours );
jpeg_abort_decompress( cinfo );
return( false );
}
/* Check if the image has at least one huffman table defined. If not, use the standard ones */
/* This is required for the MJPEG capture palette of USB devices */
if(cinfo->dc_huff_tbl_ptrs[0] == NULL) {
zm_use_std_huff_tables(cinfo);
}
new_width = cinfo->image_width;
new_height = cinfo->image_height;
if ( width != new_width || height != new_height )
{
Debug(9,"Image dimensions differ. Old: %ux%u New: %ux%u",width,height,new_width,new_height);
}
switch(p_colours) {
case ZM_COLOUR_GRAY8:
{
cinfo->out_color_space = JCS_GRAYSCALE;
new_colours = ZM_COLOUR_GRAY8;
new_subpixelorder = ZM_SUBPIX_ORDER_NONE;
break;
}
case ZM_COLOUR_RGB32:
{
#ifdef JCS_EXTENSIONS
new_colours = ZM_COLOUR_RGB32;
if(p_subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
cinfo->out_color_space = JCS_EXT_BGRX;
new_subpixelorder = ZM_SUBPIX_ORDER_BGRA;
} else if(p_subpixelorder == ZM_SUBPIX_ORDER_ARGB) {
cinfo->out_color_space = JCS_EXT_XRGB;
new_subpixelorder = ZM_SUBPIX_ORDER_ARGB;
} else if(p_subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
cinfo->out_color_space = JCS_EXT_XBGR;
new_subpixelorder = ZM_SUBPIX_ORDER_ABGR;
} else {
/* Assume RGBA */
cinfo->out_color_space = JCS_EXT_RGBX;
new_subpixelorder = ZM_SUBPIX_ORDER_RGBA;
}
break;
#else
Warning("libjpeg-turbo is required for reading a JPEG directly into a RGB32 buffer, reading into a RGB24 buffer instead.");
#endif
}
case ZM_COLOUR_RGB24:
default:
{
new_colours = ZM_COLOUR_RGB24;
if(p_subpixelorder == ZM_SUBPIX_ORDER_BGR) {
#ifdef JCS_EXTENSIONS
cinfo->out_color_space = JCS_EXT_BGR;
new_subpixelorder = ZM_SUBPIX_ORDER_BGR;
#else
Warning("libjpeg-turbo is required for reading a JPEG directly into a BGR24 buffer, reading into a RGB24 buffer instead.");
cinfo->out_color_space = JCS_RGB;
new_subpixelorder = ZM_SUBPIX_ORDER_RGB;
#endif
} else {
/* Assume RGB */
/*
#ifdef JCS_EXTENSIONS
cinfo->out_color_space = JCS_EXT_RGB;
#else
cinfo->out_color_space = JCS_RGB;
#endif
*/
cinfo->out_color_space = JCS_RGB;
new_subpixelorder = ZM_SUBPIX_ORDER_RGB;
}
break;
}
}
if(WriteBuffer(new_width, new_height, new_colours, new_subpixelorder) == NULL) {
Error("Failed requesting writeable buffer for reading JPEG image.");
jpeg_abort_decompress( cinfo );
return( false );
}
jpeg_start_decompress( cinfo );
JSAMPROW row_pointer; /* pointer to a single row */
int row_stride = width * colours; /* physical row width in buffer */
while ( cinfo->output_scanline < cinfo->output_height )
{
row_pointer = &buffer[cinfo->output_scanline * row_stride];
jpeg_read_scanlines( cinfo, &row_pointer, 1 );
}
jpeg_finish_decompress( cinfo );
return( true );
}
bool Image::EncodeJpeg( JOCTET *outbuffer, int *outbuffer_size, int quality_override ) const
{
if ( config.colour_jpeg_files && colours == ZM_COLOUR_GRAY8 )
{
Image temp_image( *this );
temp_image.Colourise(ZM_COLOUR_RGB24, ZM_SUBPIX_ORDER_RGB );
return( temp_image.EncodeJpeg( outbuffer, outbuffer_size, quality_override ) );
}
int quality = quality_override?quality_override:config.jpeg_stream_quality;
struct jpeg_compress_struct *cinfo = encodejpg_ccinfo[quality];
if ( !cinfo )
{
cinfo = encodejpg_ccinfo[quality] = new jpeg_compress_struct;
cinfo->err = jpeg_std_error( &jpg_err.pub );
jpg_err.pub.error_exit = zm_jpeg_error_exit;
jpg_err.pub.emit_message = zm_jpeg_emit_message;
jpeg_create_compress( cinfo );
}
zm_jpeg_mem_dest( cinfo, outbuffer, outbuffer_size );
cinfo->image_width = width; /* image width and height, in pixels */
cinfo->image_height = height;
switch(colours) {
case ZM_COLOUR_GRAY8:
{
cinfo->input_components = 1;
cinfo->in_color_space = JCS_GRAYSCALE;
break;
}
case ZM_COLOUR_RGB32:
{
#ifdef JCS_EXTENSIONS
cinfo->input_components = 4;
if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
cinfo->in_color_space = JCS_EXT_BGRX;
} else if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) {
cinfo->in_color_space = JCS_EXT_XRGB;
} else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
cinfo->in_color_space = JCS_EXT_XBGR;
} else {
/* Assume RGBA */
cinfo->in_color_space = JCS_EXT_RGBX;
}
#else
Error("libjpeg-turbo is required for JPEG encoding directly from RGB32 source");
jpeg_abort_compress( cinfo );
return(false);
#endif
break;
}
case ZM_COLOUR_RGB24:
default:
{
cinfo->input_components = 3;
if(subpixelorder == ZM_SUBPIX_ORDER_BGR) {
#ifdef JCS_EXTENSIONS
cinfo->in_color_space = JCS_EXT_BGR;
#else
Error("libjpeg-turbo is required for JPEG encoding directly from BGR24 source");
jpeg_abort_compress( cinfo );
return(false);
#endif
} else {
/* Assume RGB */
/*
#ifdef JCS_EXTENSIONS
cinfo->out_color_space = JCS_EXT_RGB;
#else
cinfo->out_color_space = JCS_RGB;
#endif
*/
cinfo->in_color_space = JCS_RGB;
}
break;
}
}
jpeg_set_defaults( cinfo );
jpeg_set_quality( cinfo, quality, FALSE );
cinfo->dct_method = JDCT_FASTEST;
jpeg_start_compress( cinfo, TRUE );
JSAMPROW row_pointer; /* pointer to a single row */
int row_stride = cinfo->image_width * colours; /* physical row width in buffer */
while ( cinfo->next_scanline < cinfo->image_height )
{
row_pointer = &buffer[cinfo->next_scanline * row_stride];
jpeg_write_scanlines( cinfo, &row_pointer, 1 );
}
jpeg_finish_compress( cinfo );
return( true );
}
#if HAVE_ZLIB_H
bool Image::Unzip( const Bytef *inbuffer, unsigned long inbuffer_size )
{
unsigned long zip_size = size;
int result = uncompress( buffer, &zip_size, inbuffer, inbuffer_size );
if ( result != Z_OK )
{
Error( "Unzip failed, result = %d", result );
return( false );
}
if ( zip_size != (unsigned int)size )
{
Error( "Unzip failed, size mismatch, expected %d bytes, got %ld", size, zip_size );
return( false );
}
return( true );
}
bool Image::Zip( Bytef *outbuffer, unsigned long *outbuffer_size, int compression_level ) const
{
int result = compress2( outbuffer, outbuffer_size, buffer, size, compression_level );
if ( result != Z_OK )
{
Error( "Zip failed, result = %d", result );
return( false );
}
return( true );
}
#endif // HAVE_ZLIB_H
bool Image::Crop( unsigned int lo_x, unsigned int lo_y, unsigned int hi_x, unsigned int hi_y )
{
unsigned int new_width = (hi_x-lo_x)+1;
unsigned int new_height = (hi_y-lo_y)+1;
if ( lo_x > hi_x || lo_y > hi_y )
{
Error( "Invalid or reversed crop region %d,%d -> %d,%d", lo_x, lo_y, hi_x, hi_y );
return( false );
}
if ( lo_x < 0 || hi_x > (width-1) || ( lo_y < 0 || hi_y > (height-1) ) )
{
Error( "Attempting to crop outside image, %d,%d -> %d,%d not in %d,%d", lo_x, lo_y, hi_x, hi_y, width-1, height-1 );
return( false );
}
if ( new_width == width && new_height == height )
{
return( true );
}
unsigned int new_size = new_width*new_height*colours;
uint8_t *new_buffer = AllocBuffer(new_size);
unsigned int new_stride = new_width*colours;
for ( unsigned int y = lo_y, ny = 0; y <= hi_y; y++, ny++ )
{
unsigned char *pbuf = &buffer[((y*width)+lo_x)*colours];
unsigned char *pnbuf = &new_buffer[(ny*new_width)*colours];
memcpy( pnbuf, pbuf, new_stride );
}
AssignDirect(new_width, new_height, colours, subpixelorder, new_buffer, new_size, ZM_BUFTYPE_ZM);
return( true );
}
bool Image::Crop( const Box &limits )
{
return( Crop( limits.LoX(), limits.LoY(), limits.HiX(), limits.HiY() ) );
}
/* Far from complete */
/* Need to implement all possible of overlays possible */
void Image::Overlay( const Image &image )
{
if ( !(width == image.width && height == image.height) )
{
Panic( "Attempt to overlay different sized images, expected %dx%d, got %dx%d", width, height, image.width, image.height );
}
if( colours == image.colours && subpixelorder != image.subpixelorder ) {
Warning("Attempt to overlay images of same format but with different subpixel order.");
}
/* Grayscale ontop of grayscale - complete */
if ( colours == ZM_COLOUR_GRAY8 && image.colours == ZM_COLOUR_GRAY8 ) {
const uint8_t* const max_ptr = buffer+size;
const uint8_t* psrc = image.buffer;
uint8_t* pdest = buffer;
while( pdest < max_ptr )
{
if ( *psrc )
{
*pdest = *psrc;
}
pdest++;
psrc++;
}
/* RGB24 ontop of grayscale - convert to same format first - complete */
} else if ( colours == ZM_COLOUR_GRAY8 && image.colours == ZM_COLOUR_RGB24 ) {
Colourise(image.colours, image.subpixelorder);
const uint8_t* const max_ptr = buffer+size;
const uint8_t* psrc = image.buffer;
uint8_t* pdest = buffer;
while( pdest < max_ptr )
{
if ( RED_PTR_RGBA(psrc) || GREEN_PTR_RGBA(psrc) || BLUE_PTR_RGBA(psrc) )
{
RED_PTR_RGBA(pdest) = RED_PTR_RGBA(psrc);
GREEN_PTR_RGBA(pdest) = GREEN_PTR_RGBA(psrc);
BLUE_PTR_RGBA(pdest) = BLUE_PTR_RGBA(psrc);
}
pdest += 3;
psrc += 3;
}
/* RGB32 ontop of grayscale - convert to same format first - complete */
} else if( colours == ZM_COLOUR_GRAY8 && image.colours == ZM_COLOUR_RGB32 ) {
Colourise(image.colours, image.subpixelorder);
const Rgb* const max_ptr = (Rgb*)(buffer+size);
const Rgb* prsrc = (Rgb*)image.buffer;
Rgb* prdest = (Rgb*)buffer;
if(subpixelorder == ZM_SUBPIX_ORDER_RGBA || subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
/* RGB\BGR\RGBA\BGRA subpixel order - Alpha byte is last */
while (prdest < max_ptr) {
if ( RED_PTR_RGBA(prsrc) || GREEN_PTR_RGBA(prsrc) || BLUE_PTR_RGBA(prsrc) )
{
*prdest = *prsrc;
}
prdest++;
prsrc++;
}
} else {
/* ABGR\ARGB subpixel order - Alpha byte is first */
while (prdest < max_ptr) {
if ( RED_PTR_ABGR(prsrc) || GREEN_PTR_ABGR(prsrc) || BLUE_PTR_ABGR(prsrc) )
{
*prdest = *prsrc;
}
prdest++;
prsrc++;
}
}
/* Grayscale ontop of RGB24 - complete */
} else if ( colours == ZM_COLOUR_RGB24 && image.colours == ZM_COLOUR_GRAY8 ) {
const uint8_t* const max_ptr = buffer+size;
const uint8_t* psrc = image.buffer;
uint8_t* pdest = buffer;
while( pdest < max_ptr )
{
if ( *psrc )
{
RED_PTR_RGBA(pdest) = GREEN_PTR_RGBA(pdest) = BLUE_PTR_RGBA(pdest) = *psrc;
}
pdest += 3;
psrc++;
}
/* RGB24 ontop of RGB24 - not complete. need to take care of different subpixel orders */
} else if ( colours == ZM_COLOUR_RGB24 && image.colours == ZM_COLOUR_RGB24 ) {
const uint8_t* const max_ptr = buffer+size;
const uint8_t* psrc = image.buffer;
uint8_t* pdest = buffer;
while( pdest < max_ptr )
{
if ( RED_PTR_RGBA(psrc) || GREEN_PTR_RGBA(psrc) || BLUE_PTR_RGBA(psrc) )
{
RED_PTR_RGBA(pdest) = RED_PTR_RGBA(psrc);
GREEN_PTR_RGBA(pdest) = GREEN_PTR_RGBA(psrc);
BLUE_PTR_RGBA(pdest) = BLUE_PTR_RGBA(psrc);
}
pdest += 3;
psrc += 3;
}
/* RGB32 ontop of RGB24 - TO BE DONE */
} else if ( colours == ZM_COLOUR_RGB24 && image.colours == ZM_COLOUR_RGB32 ) {
Error("Overlay of RGB32 ontop of RGB24 is not supported.");
/* Grayscale ontop of RGB32 - complete */
} else if ( colours == ZM_COLOUR_RGB32 && image.colours == ZM_COLOUR_GRAY8 ) {
const Rgb* const max_ptr = (Rgb*)(buffer+size);
Rgb* prdest = (Rgb*)buffer;
const uint8_t* psrc = image.buffer;
if(subpixelorder == ZM_SUBPIX_ORDER_RGBA || subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
/* RGBA\BGRA subpixel order - Alpha byte is last */
while (prdest < max_ptr) {
if ( *psrc )
{
RED_PTR_RGBA(prdest) = GREEN_PTR_RGBA(prdest) = BLUE_PTR_RGBA(prdest) = *psrc;
}
prdest++;
psrc++;
}
} else {
/* ABGR\ARGB subpixel order - Alpha byte is first */
while (prdest < max_ptr) {
if ( *psrc )
{
RED_PTR_ABGR(prdest) = GREEN_PTR_ABGR(prdest) = BLUE_PTR_ABGR(prdest) = *psrc;
}
prdest++;
psrc++;
}
}
/* RGB24 ontop of RGB32 - TO BE DONE */
} else if ( colours == ZM_COLOUR_RGB32 && image.colours == ZM_COLOUR_RGB24 ) {
Error("Overlay of RGB24 ontop of RGB32 is not supported.");
/* RGB32 ontop of RGB32 - not complete. need to take care of different subpixel orders */
} else if ( colours == ZM_COLOUR_RGB32 && image.colours == ZM_COLOUR_RGB32 ) {
const Rgb* const max_ptr = (Rgb*)(buffer+size);
Rgb* prdest = (Rgb*)buffer;
const Rgb* prsrc = (Rgb*)image.buffer;
if(image.subpixelorder == ZM_SUBPIX_ORDER_RGBA || image.subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
/* RGB\BGR\RGBA\BGRA subpixel order - Alpha byte is last */
while (prdest < max_ptr) {
if ( RED_PTR_RGBA(prsrc) || GREEN_PTR_RGBA(prsrc) || BLUE_PTR_RGBA(prsrc) )
{
*prdest = *prsrc;
}
prdest++;
prsrc++;
}
} else {
/* ABGR\ARGB subpixel order - Alpha byte is first */
while (prdest < max_ptr) {
if ( RED_PTR_ABGR(prsrc) || GREEN_PTR_ABGR(prsrc) || BLUE_PTR_ABGR(prsrc) )
{
*prdest = *prsrc;
}
prdest++;
prsrc++;
}
}
}
}
/* RGB32 compatible: complete */
void Image::Overlay( const Image &image, unsigned int x, unsigned int y )
{
if ( !(width < image.width || height < image.height) )
{
Panic( "Attempt to overlay image too big for destination, %dx%d > %dx%d", image.width, image.height, width, height );
}
if ( !(width < (x+image.width) || height < (y+image.height)) )
{
Panic( "Attempt to overlay image outside of destination bounds, %dx%d @ %dx%d > %dx%d", image.width, image.height, x, y, width, height );
}
if ( !(colours == image.colours) )
{
Panic( "Attempt to partial overlay differently coloured images, expected %d, got %d", colours, image.colours );
}
unsigned int lo_x = x;
unsigned int lo_y = y;
unsigned int hi_x = (x+image.width)-1;
unsigned int hi_y = (y+image.height-1);
if ( colours == ZM_COLOUR_GRAY8 )
{
const uint8_t *psrc = image.buffer;
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
uint8_t *pdest = &buffer[(y*width)+lo_x];
for ( unsigned int x = lo_x; x <= hi_x; x++ )
{
*pdest++ = *psrc++;
}
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
const uint8_t *psrc = image.buffer;
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
uint8_t *pdest = &buffer[colours*((y*width)+lo_x)];
for ( unsigned int x = lo_x; x <= hi_x; x++ )
{
*pdest++ = *psrc++;
*pdest++ = *psrc++;
*pdest++ = *psrc++;
}
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
const Rgb *psrc = (Rgb*)(image.buffer);
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
Rgb *pdest = (Rgb*)&buffer[((y*width)+lo_x)<<2];
for ( unsigned int x = lo_x; x <= hi_x; x++ )
{
*pdest++ = *psrc++;
}
}
} else {
Error("Overlay called with unexpected colours: %d", colours);
}
}
void Image::Blend( const Image &image, int transparency )
{
#ifdef ZM_IMAGE_PROFILING
struct timespec start,end,diff;
unsigned long long executetime;
unsigned long milpixels;
#endif
uint8_t* new_buffer;
if ( !(width == image.width && height == image.height && colours == image.colours && subpixelorder == image.subpixelorder) )
{
Panic( "Attempt to blend different sized images, expected %dx%dx%d %d, got %dx%dx%d %d", width, height, colours, subpixelorder, image.width, image.height, image.colours, image.subpixelorder );
}
if(transparency <= 0)
return;
new_buffer = AllocBuffer(size);
#ifdef ZM_IMAGE_PROFILING
clock_gettime(CLOCK_THREAD_CPUTIME_ID,&start);
#endif
/* Do the blending */
(*fptr_blend)(buffer, image.buffer, new_buffer, size, transparency);
#ifdef ZM_IMAGE_PROFILING
clock_gettime(CLOCK_THREAD_CPUTIME_ID,&end);
timespec_diff(&start,&end,&diff);
executetime = (1000000000ull * diff.tv_sec) + diff.tv_nsec;
milpixels = (unsigned long)((long double)size)/((((long double)executetime)/1000));
Debug(5, "Blend: %u colours blended in %llu nanoseconds, %lu million colours/s\n",size,executetime,milpixels);
#endif
AssignDirect( width, height, colours, subpixelorder, new_buffer, size, ZM_BUFTYPE_ZM);
}
Image *Image::Merge( unsigned int n_images, Image *images[] )
{
if ( n_images <= 0 ) return( 0 );
if ( n_images == 1 ) return( new Image( *images[0] ) );
unsigned int width = images[0]->width;
unsigned int height = images[0]->height;
unsigned int colours = images[0]->colours;
for ( unsigned int i = 1; i < n_images; i++ )
{
if ( !(width == images[i]->width && height == images[i]->height && colours == images[i]->colours) )
{
Panic( "Attempt to merge different sized images, expected %dx%dx%d, got %dx%dx%d, for image %d", width, height, colours, images[i]->width, images[i]->height, images[i]->colours, i );
}
}
Image *result = new Image( width, height, images[0]->colours, images[0]->subpixelorder);
unsigned int size = result->size;
for ( unsigned int i = 0; i < size; i++ )
{
unsigned int total = 0;
uint8_t *pdest = result->buffer;
for ( unsigned int j = 0; j < n_images; j++ )
{
uint8_t *psrc = images[j]->buffer;
total += *psrc;
psrc++;
}
*pdest = total/n_images;
pdest++;
}
return( result );
}
Image *Image::Merge( unsigned int n_images, Image *images[], double weight )
{
if ( n_images <= 0 ) return( 0 );
if ( n_images == 1 ) return( new Image( *images[0] ) );
unsigned int width = images[0]->width;
unsigned int height = images[0]->height;
unsigned int colours = images[0]->colours;
for ( unsigned int i = 1; i < n_images; i++ )
{
if ( !(width == images[i]->width && height == images[i]->height && colours == images[i]->colours) )
{
Panic( "Attempt to merge different sized images, expected %dx%dx%d, got %dx%dx%d, for image %d", width, height, colours, images[i]->width, images[i]->height, images[i]->colours, i );
}
}
Image *result = new Image( *images[0] );
unsigned int size = result->size;
double factor = 1.0*weight;
for ( unsigned int i = 1; i < n_images; i++ )
{
uint8_t *pdest = result->buffer;
uint8_t *psrc = images[i]->buffer;
for ( unsigned int j = 0; j < size; j++ )
{
*pdest = (uint8_t)(((*pdest)*(1.0-factor))+((*psrc)*factor));
pdest++;
psrc++;
}
factor *= weight;
}
return( result );
}
Image *Image::Highlight( unsigned int n_images, Image *images[], const Rgb threshold, const Rgb ref_colour )
{
if ( n_images <= 0 ) return( 0 );
if ( n_images == 1 ) return( new Image( *images[0] ) );
unsigned int width = images[0]->width;
unsigned int height = images[0]->height;
unsigned int colours = images[0]->colours;
for ( unsigned int i = 1; i < n_images; i++ )
{
if ( !(width == images[i]->width && height == images[i]->height && colours == images[i]->colours) )
{
Panic( "Attempt to highlight different sized images, expected %dx%dx%d, got %dx%dx%d, for image %d", width, height, colours, images[i]->width, images[i]->height, images[i]->colours, i );
}
}
Image *result = new Image( width, height, images[0]->colours, images[0]->subpixelorder );
unsigned int size = result->size;
for ( unsigned int c = 0; c < colours; c++ )
{
for ( unsigned int i = 0; i < size; i++ )
{
unsigned int count = 0;
uint8_t *pdest = result->buffer+c;
for ( unsigned int j = 0; j < n_images; j++ )
{
uint8_t *psrc = images[j]->buffer+c;
#ifndef SOLARIS
if ( (unsigned)abs((*psrc)-RGB_VAL(ref_colour,c)) >= RGB_VAL(threshold,c) )
#else
if ( (unsigned)std::abs((*psrc)-RGB_VAL(ref_colour,c)) >= RGB_VAL(threshold,c) )
#endif
{
count++;
}
psrc += colours;
}
*pdest = (count*255)/n_images;
pdest += 3;
}
}
return( result );
}
/* New function to allow buffer re-using instead of allocationg memory for the delta image every time */
void Image::Delta( const Image &image, Image* targetimage) const
{
#ifdef ZM_IMAGE_PROFILING
struct timespec start,end,diff;
unsigned long long executetime;
unsigned long milpixels;
#endif
if ( !(width == image.width && height == image.height && colours == image.colours && subpixelorder == image.subpixelorder) )
{
Panic( "Attempt to get delta of different sized images, expected %dx%dx%d %d, got %dx%dx%d %d", width, height, colours, subpixelorder, image.width, image.height, image.colours, image.subpixelorder);
}
uint8_t *pdiff = targetimage->WriteBuffer(width, height, ZM_COLOUR_GRAY8, ZM_SUBPIX_ORDER_NONE);
if(pdiff == NULL) {
Panic("Failed requesting writeable buffer for storing the delta image");
}
#ifdef ZM_IMAGE_PROFILING
clock_gettime(CLOCK_THREAD_CPUTIME_ID,&start);
#endif
switch(colours) {
case ZM_COLOUR_RGB24:
{
if(subpixelorder == ZM_SUBPIX_ORDER_BGR) {
/* BGR subpixel order */
(*fptr_delta8_bgr)(buffer, image.buffer, pdiff, pixels);
} else {
/* Assume RGB subpixel order */
(*fptr_delta8_rgb)(buffer, image.buffer, pdiff, pixels);
}
break;
}
case ZM_COLOUR_RGB32:
{
if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) {
/* ARGB subpixel order */
(*fptr_delta8_argb)(buffer, image.buffer, pdiff, pixels);
} else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
/* ABGR subpixel order */
(*fptr_delta8_abgr)(buffer, image.buffer, pdiff, pixels);
} else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
/* BGRA subpixel order */
(*fptr_delta8_bgra)(buffer, image.buffer, pdiff, pixels);
} else {
/* Assume RGBA subpixel order */
(*fptr_delta8_rgba)(buffer, image.buffer, pdiff, pixels);
}
break;
}
case ZM_COLOUR_GRAY8:
(*fptr_delta8_gray8)(buffer, image.buffer, pdiff, pixels);
break;
default:
Panic("Delta called with unexpected colours: %d",colours);
break;
}
#ifdef ZM_IMAGE_PROFILING
clock_gettime(CLOCK_THREAD_CPUTIME_ID,&end);
timespec_diff(&start,&end,&diff);
executetime = (1000000000ull * diff.tv_sec) + diff.tv_nsec;
milpixels = (unsigned long)((long double)pixels)/((((long double)executetime)/1000));
Debug(5, "Delta: %u delta pixels generated in %llu nanoseconds, %lu million pixels/s\n",pixels,executetime,milpixels);
#endif
}
const Coord Image::centreCoord( const char *text ) const
{
int index = 0;
int line_no = 0;
int text_len = strlen( text );
int line_len = 0;
int max_line_len = 0;
const char *line = text;
while ( (index < text_len) && (line_len = strcspn( line, "\n" )) )
{
if ( line_len > max_line_len )
max_line_len = line_len;
index += line_len;
while ( text[index] == '\n' )
{
index++;
}
line = text+index;
line_no++;
}
int x = (width - (max_line_len * CHAR_WIDTH) ) / 2;
int y = (height - (line_no * LINE_HEIGHT) ) / 2;
return( Coord( x, y ) );
}
/* RGB32 compatible: complete */
void Image::MaskPrivacy( const unsigned char *p_bitmask, const Rgb pixel_colour )
{
const uint8_t pixel_r_col = RED_VAL_RGBA(pixel_colour);
const uint8_t pixel_g_col = GREEN_VAL_RGBA(pixel_colour);
const uint8_t pixel_b_col = BLUE_VAL_RGBA(pixel_colour);
const uint8_t pixel_bw_col = pixel_colour & 0xff;
const Rgb pixel_rgb_col = rgb_convert(pixel_colour,subpixelorder);
unsigned char *ptr = &buffer[0];
unsigned int i = 0;
for ( unsigned int y = 0; y < height; y++ )
{
if ( colours == ZM_COLOUR_GRAY8 )
{
for ( unsigned int x = 0; x < width; x++, ptr++ )
{
if ( p_bitmask[i] )
*ptr = pixel_bw_col;
i++;
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
for ( unsigned int x = 0; x < width; x++, ptr += colours )
{
if ( p_bitmask[i] )
{
RED_PTR_RGBA(ptr) = pixel_r_col;
GREEN_PTR_RGBA(ptr) = pixel_g_col;
BLUE_PTR_RGBA(ptr) = pixel_b_col;
}
i++;
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
for ( unsigned int x = 0; x < width; x++, ptr += colours )
{
Rgb *temp_ptr = (Rgb*)ptr;
if ( p_bitmask[i] )
*temp_ptr = pixel_rgb_col;
i++;
}
} else {
Panic("MaskPrivacy called with unexpected colours: %d", colours);
return;
}
}
}
/* RGB32 compatible: complete */
void Image::Annotate( const char *p_text, const Coord &coord, const unsigned int size, const Rgb fg_colour, const Rgb bg_colour )
{
strncpy( text, p_text, sizeof(text) );
unsigned int index = 0;
unsigned int line_no = 0;
unsigned int text_len = strlen( text );
unsigned int line_len = 0;
const char *line = text;
const uint8_t fg_r_col = RED_VAL_RGBA(fg_colour);
const uint8_t fg_g_col = GREEN_VAL_RGBA(fg_colour);
const uint8_t fg_b_col = BLUE_VAL_RGBA(fg_colour);
const uint8_t fg_bw_col = fg_colour & 0xff;
const Rgb fg_rgb_col = rgb_convert(fg_colour,subpixelorder);
const bool fg_trans = (fg_colour == RGB_TRANSPARENT);
const uint8_t bg_r_col = RED_VAL_RGBA(bg_colour);
const uint8_t bg_g_col = GREEN_VAL_RGBA(bg_colour);
const uint8_t bg_b_col = BLUE_VAL_RGBA(bg_colour);
const uint8_t bg_bw_col = bg_colour & 0xff;
const Rgb bg_rgb_col = rgb_convert(bg_colour,subpixelorder);
const bool bg_trans = (bg_colour == RGB_TRANSPARENT);
int zm_text_bitmask = 0x80;
if (size == 2)
zm_text_bitmask = 0x8000;
while ( (index < text_len) && (line_len = strcspn( line, "\n" )) )
{
unsigned int line_width = line_len * CHAR_WIDTH * size;
unsigned int lo_line_x = coord.X();
unsigned int lo_line_y = coord.Y() + (line_no * LINE_HEIGHT * size);
unsigned int min_line_x = 0;
unsigned int max_line_x = width - line_width;
unsigned int min_line_y = 0;
unsigned int max_line_y = height - (LINE_HEIGHT * size);
if ( lo_line_x > max_line_x )
lo_line_x = max_line_x;
if ( lo_line_x < min_line_x )
lo_line_x = min_line_x;
if ( lo_line_y > max_line_y )
lo_line_y = max_line_y;
if ( lo_line_y < min_line_y )
lo_line_y = min_line_y;
unsigned int hi_line_x = lo_line_x + line_width;
unsigned int hi_line_y = lo_line_y + (LINE_HEIGHT * size);
// Clip anything that runs off the right of the screen
if ( hi_line_x > width )
hi_line_x = width;
if ( hi_line_y > height )
hi_line_y = height;
if ( colours == ZM_COLOUR_GRAY8 )
{
unsigned char *ptr = &buffer[(lo_line_y*width)+lo_line_x];
for ( unsigned int y = lo_line_y, r = 0; y < hi_line_y && r < (CHAR_HEIGHT * size); y++, r++, ptr += width )
{
unsigned char *temp_ptr = ptr;
for ( unsigned int x = lo_line_x, c = 0; x < hi_line_x && c < line_len; c++ )
{
int f;
if (size == 2)
f = bigfontdata[(line[c] * CHAR_HEIGHT * size) + r];
else
f = fontdata[(line[c] * CHAR_HEIGHT) + r];
for ( unsigned int i = 0; i < (CHAR_WIDTH * size) && x < hi_line_x; i++, x++, temp_ptr++ )
{
if ( f & (zm_text_bitmask >> i) )
{
if ( !fg_trans )
*temp_ptr = fg_bw_col;
}
else if ( !bg_trans )
{
*temp_ptr = bg_bw_col;
}
}
}
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
unsigned int wc = width * colours;
unsigned char *ptr = &buffer[((lo_line_y*width)+lo_line_x)*colours];
for ( unsigned int y = lo_line_y, r = 0; y < hi_line_y && r < (CHAR_HEIGHT * size); y++, r++, ptr += wc )
{
unsigned char *temp_ptr = ptr;
for ( unsigned int x = lo_line_x, c = 0; x < hi_line_x && c < line_len; c++ )
{
int f;
if (size == 2)
f = bigfontdata[(line[c] * CHAR_HEIGHT * size) + r];
else
f = fontdata[(line[c] * CHAR_HEIGHT) + r];
for ( unsigned int i = 0; i < (CHAR_WIDTH * size) && x < hi_line_x; i++, x++, temp_ptr += colours )
{
if ( f & (zm_text_bitmask >> i) )
{
if ( !fg_trans )
{
RED_PTR_RGBA(temp_ptr) = fg_r_col;
GREEN_PTR_RGBA(temp_ptr) = fg_g_col;
BLUE_PTR_RGBA(temp_ptr) = fg_b_col;
}
}
else if ( !bg_trans )
{
RED_PTR_RGBA(temp_ptr) = bg_r_col;
GREEN_PTR_RGBA(temp_ptr) = bg_g_col;
BLUE_PTR_RGBA(temp_ptr) = bg_b_col;
}
}
}
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
unsigned int wc = width * colours;
uint8_t *ptr = &buffer[((lo_line_y*width)+lo_line_x)<<2];
for ( unsigned int y = lo_line_y, r = 0; y < hi_line_y && r < (CHAR_HEIGHT * size); y++, r++, ptr += wc )
{
Rgb* temp_ptr = (Rgb*)ptr;
for ( unsigned int x = lo_line_x, c = 0; x < hi_line_x && c < line_len; c++ )
{
int f;
if (size == 2)
f = bigfontdata[(line[c] * CHAR_HEIGHT * size) + r];
else
f = fontdata[(line[c] * CHAR_HEIGHT) + r];
for ( unsigned int i = 0; i < (CHAR_WIDTH * size) && x < hi_line_x; i++, x++, temp_ptr++ )
{
if ( f & (zm_text_bitmask >> i) )
{
if ( !fg_trans )
{
*temp_ptr = fg_rgb_col;
}
}
else if ( !bg_trans )
{
*temp_ptr = bg_rgb_col;
}
}
}
}
} else {
Panic("Annotate called with unexpected colours: %d",colours);
return;
}
index += line_len;
while ( text[index] == '\n' )
{
index++;
}
line = text+index;
line_no++;
}
}
void Image::Timestamp( const char *label, const time_t when, const Coord &coord, const int size )
{
char time_text[64];
strftime( time_text, sizeof(time_text), "%y/%m/%d %H:%M:%S", localtime( &when ) );
char text[64];
if ( label )
{
snprintf( text, sizeof(text), "%s - %s", label, time_text );
Annotate( text, coord, size );
}
else
{
Annotate( time_text, coord, size );
}
}
/* RGB32 compatible: complete */
void Image::Colourise(const unsigned int p_reqcolours, const unsigned int p_reqsubpixelorder)
{
Debug(9, "Colourise: Req colours: %u Req subpixel order: %u Current colours: %u Current subpixel order: %u",p_reqcolours,p_reqsubpixelorder,colours,subpixelorder);
if ( colours != ZM_COLOUR_GRAY8) {
Warning("Target image is already colourised, colours: %u",colours);
return;
}
if ( p_reqcolours == ZM_COLOUR_RGB32 ) {
/* RGB32 */
Rgb* new_buffer = (Rgb*)AllocBuffer(pixels*sizeof(Rgb));
const uint8_t *psrc = buffer;
Rgb* pdest = new_buffer;
Rgb subpixel;
Rgb newpixel;
if ( p_reqsubpixelorder == ZM_SUBPIX_ORDER_ABGR || p_reqsubpixelorder == ZM_SUBPIX_ORDER_ARGB) {
/* ARGB\ABGR subpixel order. alpha byte is first (mem+0), so we need to shift the pixel left in the end */
for(unsigned int i=0;i<pixels;i++) {
newpixel = subpixel = psrc[i];
newpixel = (newpixel<<8) | subpixel;
newpixel = (newpixel<<8) | subpixel;
pdest[i] = (newpixel<<8);
}
} else {
/* RGBA\BGRA subpixel order, alpha byte is last (mem+3) */
for(unsigned int i=0;i<pixels;i++) {
newpixel = subpixel = psrc[i];
newpixel = (newpixel<<8) | subpixel;
newpixel = (newpixel<<8) | subpixel;
pdest[i] = newpixel;
}
}
/* Directly assign the new buffer and make sure it will be freed when not needed anymore */
AssignDirect( width, height, p_reqcolours, p_reqsubpixelorder, (uint8_t*)new_buffer, pixels*4, ZM_BUFTYPE_ZM);
} else if(p_reqcolours == ZM_COLOUR_RGB24 ) {
/* RGB24 */
uint8_t *new_buffer = AllocBuffer(pixels*3);
uint8_t *pdest = new_buffer;
const uint8_t *psrc = buffer;
for(unsigned int i=0;i<(unsigned int)pixels;i++, pdest += 3)
{
RED_PTR_RGBA(pdest) = GREEN_PTR_RGBA(pdest) = BLUE_PTR_RGBA(pdest) = psrc[i];
}
/* Directly assign the new buffer and make sure it will be freed when not needed anymore */
AssignDirect( width, height, p_reqcolours, p_reqsubpixelorder, new_buffer, pixels*3, ZM_BUFTYPE_ZM);
} else {
Error("Colourise called with unexpected colours: %d",colours);
return;
}
}
/* RGB32 compatible: complete */
void Image::DeColourise()
{
colours = ZM_COLOUR_GRAY8;
subpixelorder = ZM_SUBPIX_ORDER_NONE;
size = width * height;
if ( colours == ZM_COLOUR_RGB32 )
{
switch(subpixelorder) {
case ZM_SUBPIX_ORDER_BGRA:
std_convert_bgra_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_ARGB:
std_convert_argb_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_ABGR:
std_convert_abgr_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_RGBA:
default:
std_convert_rgba_gray8(buffer,buffer,pixels);
break;
}
} else {
/* Assume RGB24 */
switch(subpixelorder) {
case ZM_SUBPIX_ORDER_BGR:
std_convert_bgr_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_RGB:
default:
std_convert_rgb_gray8(buffer,buffer,pixels);
break;
}
}
}
/* RGB32 compatible: complete */
void Image::Fill( Rgb colour, const Box *limits )
{
if ( !(colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB24 || colours == ZM_COLOUR_RGB32 ) )
{
Panic( "Attempt to fill image with unexpected colours %d", colours );
}
/* Convert the colour's RGBA subpixel order into the image's subpixel order */
colour = rgb_convert(colour,subpixelorder);
unsigned int lo_x = limits?limits->Lo().X():0;
unsigned int lo_y = limits?limits->Lo().Y():0;
unsigned int hi_x = limits?limits->Hi().X():width-1;
unsigned int hi_y = limits?limits->Hi().Y():height-1;
if ( colours == ZM_COLOUR_GRAY8 )
{
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
unsigned char *p = &buffer[(y*width)+lo_x];
for ( unsigned int x = lo_x; x <= hi_x; x++, p++)
{
*p = colour;
}
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
unsigned char *p = &buffer[colours*((y*width)+lo_x)];
for ( unsigned int x = lo_x; x <= hi_x; x++, p += 3)
{
RED_PTR_RGBA(p) = RED_VAL_RGBA(colour);
GREEN_PTR_RGBA(p) = GREEN_VAL_RGBA(colour);
BLUE_PTR_RGBA(p) = BLUE_VAL_RGBA(colour);
}
}
}
else if ( colours == ZM_COLOUR_RGB32 ) /* RGB32 */
{
for ( unsigned int y = lo_y; y <= (unsigned int)hi_y; y++ )
{
Rgb *p = (Rgb*)&buffer[((y*width)+lo_x)<<2];
for ( unsigned int x = lo_x; x <= (unsigned int)hi_x; x++, p++)
{
/* Fast, copies the entire pixel in a single pass */
*p = colour;
}
}
}
}
/* RGB32 compatible: complete */
void Image::Fill( Rgb colour, int density, const Box *limits )
{
/* Allow the faster version to be used if density is not used (density=1) */
if(density <= 1)
return Fill(colour,limits);
if ( !(colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB24 || colours == ZM_COLOUR_RGB32 ) )
{
Panic( "Attempt to fill image with unexpected colours %d", colours );
}
/* Convert the colour's RGBA subpixel order into the image's subpixel order */
colour = rgb_convert(colour,subpixelorder);
unsigned int lo_x = limits?limits->Lo().X():0;
unsigned int lo_y = limits?limits->Lo().Y():0;
unsigned int hi_x = limits?limits->Hi().X():width-1;
unsigned int hi_y = limits?limits->Hi().Y():height-1;
if ( colours == ZM_COLOUR_GRAY8 )
{
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
unsigned char *p = &buffer[(y*width)+lo_x];
for ( unsigned int x = lo_x; x <= hi_x; x++, p++)
{
if ( ( x == lo_x || x == hi_x || y == lo_y || y == hi_y ) || (!(x%density) && !(y%density) ) )
*p = colour;
}
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
unsigned char *p = &buffer[colours*((y*width)+lo_x)];
for ( unsigned int x = lo_x; x <= hi_x; x++, p += 3)
{
if ( ( x == lo_x || x == hi_x || y == lo_y || y == hi_y ) || (!(x%density) && !(y%density) ) ) {
RED_PTR_RGBA(p) = RED_VAL_RGBA(colour);
GREEN_PTR_RGBA(p) = GREEN_VAL_RGBA(colour);
BLUE_PTR_RGBA(p) = BLUE_VAL_RGBA(colour);
}
}
}
}
else if ( colours == ZM_COLOUR_RGB32 ) /* RGB32 */
{
for ( unsigned int y = lo_y; y <= hi_y; y++ )
{
Rgb* p = (Rgb*)&buffer[((y*width)+lo_x)<<2];
for ( unsigned int x = lo_x; x <= hi_x; x++, p++)
{
if ( ( x == lo_x || x == hi_x || y == lo_y || y == hi_y ) || (!(x%density) && !(y%density) ) )
/* Fast, copies the entire pixel in a single pass */
*p = colour;
}
}
}
}
/* RGB32 compatible: complete */
void Image::Outline( Rgb colour, const Polygon &polygon )
{
if ( !(colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB24 || colours == ZM_COLOUR_RGB32 ) )
{
Panic( "Attempt to outline image with unexpected colours %d", colours );
}
/* Convert the colour's RGBA subpixel order into the image's subpixel order */
colour = rgb_convert(colour,subpixelorder);
int n_coords = polygon.getNumCoords();
for ( int j = 0, i = n_coords-1; j < n_coords; i = j++ )
{
const Coord &p1 = polygon.getCoord( i );
const Coord &p2 = polygon.getCoord( j );
int x1 = p1.X();
int x2 = p2.X();
int y1 = p1.Y();
int y2 = p2.Y();
double dx = x2 - x1;
double dy = y2 - y1;
double grad;
//Debug( 9, "dx: %.2lf, dy: %.2lf", dx, dy );
if ( fabs(dx) <= fabs(dy) )
{
//Debug( 9, "dx <= dy" );
if ( y1 != y2 )
grad = dx/dy;
else
grad = width;
double x;
int y, yinc = (y1<y2)?1:-1;
grad *= yinc;
if ( colours == ZM_COLOUR_GRAY8 )
{
//Debug( 9, "x1:%d, x2:%d, y1:%d, y2:%d, gr:%.2f", x1, x2, y1, y2, grad );
for ( x = x1, y = y1; y != y2; y += yinc, x += grad )
{
//Debug( 9, "x:%.2f, y:%d", x, y );
buffer[(y*width)+int(round(x))] = colour;
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
for ( x = x1, y = y1; y != y2; y += yinc, x += grad )
{
unsigned char *p = &buffer[colours*((y*width)+int(round(x)))];
RED_PTR_RGBA(p) = RED_VAL_RGBA(colour);
GREEN_PTR_RGBA(p) = GREEN_VAL_RGBA(colour);
BLUE_PTR_RGBA(p) = BLUE_VAL_RGBA(colour);
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
for ( x = x1, y = y1; y != y2; y += yinc, x += grad )
{
*(Rgb*)(buffer+(((y*width)+int(round(x)))<<2)) = colour;
}
}
}
else
{
//Debug( 9, "dx > dy" );
if ( x1 != x2 )
grad = dy/dx;
else
grad = height;
//Debug( 9, "grad: %.2lf", grad );
double y;
int x, xinc = (x1<x2)?1:-1;
grad *= xinc;
if ( colours == ZM_COLOUR_GRAY8 )
{
//Debug( 9, "x1:%d, x2:%d, y1:%d, y2:%d, gr:%.2lf", x1, x2, y1, y2, grad );
for ( y = y1, x = x1; x != x2; x += xinc, y += grad )
{
//Debug( 9, "x:%d, y:%.2f", x, y );
buffer[(int(round(y))*width)+x] = colour;
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
for ( y = y1, x = x1; x != x2; x += xinc, y += grad )
{
unsigned char *p = &buffer[colours*((int(round(y))*width)+x)];
RED_PTR_RGBA(p) = RED_VAL_RGBA(colour);
GREEN_PTR_RGBA(p) = GREEN_VAL_RGBA(colour);
BLUE_PTR_RGBA(p) = BLUE_VAL_RGBA(colour);
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
for ( y = y1, x = x1; x != x2; x += xinc, y += grad )
{
*(Rgb*)(buffer+(((int(round(y))*width)+x)<<2)) = colour;
}
}
}
}
}
/* RGB32 compatible: complete */
void Image::Fill( Rgb colour, int density, const Polygon &polygon )
{
if ( !(colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB24 || colours == ZM_COLOUR_RGB32 ) )
{
Panic( "Attempt to fill image with unexpected colours %d", colours );
}
/* Convert the colour's RGBA subpixel order into the image's subpixel order */
colour = rgb_convert(colour,subpixelorder);
int n_coords = polygon.getNumCoords();
int n_global_edges = 0;
Edge global_edges[n_coords];
for ( int j = 0, i = n_coords-1; j < n_coords; i = j++ )
{
const Coord &p1 = polygon.getCoord( i );
const Coord &p2 = polygon.getCoord( j );
int x1 = p1.X();
int x2 = p2.X();
int y1 = p1.Y();
int y2 = p2.Y();
//Debug( 9, "x1:%d,y1:%d x2:%d,y2:%d", x1, y1, x2, y2 );
if ( y1 == y2 )
continue;
double dx = x2 - x1;
double dy = y2 - y1;
global_edges[n_global_edges].min_y = y1<y2?y1:y2;
global_edges[n_global_edges].max_y = y1<y2?y2:y1;
global_edges[n_global_edges].min_x = y1<y2?x1:x2;
global_edges[n_global_edges]._1_m = dx/dy;
n_global_edges++;
}
qsort( global_edges, n_global_edges, sizeof(*global_edges), Edge::CompareYX );
#ifndef ZM_DBG_OFF
if ( logLevel() >= Logger::DEBUG9 )
{
for ( int i = 0; i < n_global_edges; i++ )
{
Debug( 9, "%d: min_y: %d, max_y:%d, min_x:%.2f, 1/m:%.2f", i, global_edges[i].min_y, global_edges[i].max_y, global_edges[i].min_x, global_edges[i]._1_m );
}
}
#endif
int n_active_edges = 0;
Edge active_edges[n_global_edges];
int y = global_edges[0].min_y;
do
{
for ( int i = 0; i < n_global_edges; i++ )
{
if ( global_edges[i].min_y == y )
{
Debug( 9, "Moving global edge" );
active_edges[n_active_edges++] = global_edges[i];
if ( i < (n_global_edges-1) )
{
//memcpy( &global_edges[i], &global_edges[i+1], sizeof(*global_edges)*(n_global_edges-i) );
memmove( &global_edges[i], &global_edges[i+1], sizeof(*global_edges)*(n_global_edges-i) );
i--;
}
n_global_edges--;
}
else
{
break;
}
}
qsort( active_edges, n_active_edges, sizeof(*active_edges), Edge::CompareX );
#ifndef ZM_DBG_OFF
if ( logLevel() >= Logger::DEBUG9 )
{
for ( int i = 0; i < n_active_edges; i++ )
{
Debug( 9, "%d - %d: min_y: %d, max_y:%d, min_x:%.2f, 1/m:%.2f", y, i, active_edges[i].min_y, active_edges[i].max_y, active_edges[i].min_x, active_edges[i]._1_m );
}
}
#endif
if ( !(y%density) )
{
//Debug( 9, "%d", y );
for ( int i = 0; i < n_active_edges; )
{
int lo_x = int(round(active_edges[i++].min_x));
int hi_x = int(round(active_edges[i++].min_x));
if( colours == ZM_COLOUR_GRAY8 ) {
unsigned char *p = &buffer[(y*width)+lo_x];
for ( int x = lo_x; x <= hi_x; x++, p++)
{
if ( !(x%density) )
{
//Debug( 9, " %d", x );
*p = colour;
}
}
} else if( colours == ZM_COLOUR_RGB24 ) {
unsigned char *p = &buffer[colours*((y*width)+lo_x)];
for ( int x = lo_x; x <= hi_x; x++, p += 3)
{
if ( !(x%density) )
{
RED_PTR_RGBA(p) = RED_VAL_RGBA(colour);
GREEN_PTR_RGBA(p) = GREEN_VAL_RGBA(colour);
BLUE_PTR_RGBA(p) = BLUE_VAL_RGBA(colour);
}
}
} else if( colours == ZM_COLOUR_RGB32 ) {
Rgb *p = (Rgb*)&buffer[((y*width)+lo_x)<<2];
for ( int x = lo_x; x <= hi_x; x++, p++)
{
if ( !(x%density) )
{
/* Fast, copies the entire pixel in a single pass */
*p = colour;
}
}
}
}
}
y++;
for ( int i = n_active_edges-1; i >= 0; i-- )
{
if ( y >= active_edges[i].max_y ) // Or >= as per sheets
{
Debug( 9, "Deleting active_edge" );
if ( i < (n_active_edges-1) )
{
//memcpy( &active_edges[i], &active_edges[i+1], sizeof(*active_edges)*(n_active_edges-i) );
memmove( &active_edges[i], &active_edges[i+1], sizeof(*active_edges)*(n_active_edges-i) );
}
n_active_edges--;
}
else
{
active_edges[i].min_x += active_edges[i]._1_m;
}
}
} while ( n_global_edges || n_active_edges );
}
void Image::Fill( Rgb colour, const Polygon &polygon )
{
Fill( colour, 1, polygon );
}
/* RGB32 compatible: complete */
void Image::Rotate( int angle )
{
angle %= 360;
if ( !angle )
{
return;
}
if ( angle%90 )
{
return;
}
unsigned int new_height = height;
unsigned int new_width = width;
uint8_t* rotate_buffer = AllocBuffer(size);
switch( angle )
{
case 90 :
{
new_height = width;
new_width = height;
unsigned int line_bytes = new_width*colours;
unsigned char *s_ptr = buffer;
if ( colours == ZM_COLOUR_GRAY8 )
{
unsigned char *d_ptr;
for ( unsigned int i = new_width; i > 0; i-- )
{
d_ptr = rotate_buffer+(i-1);
for ( unsigned int j = new_height; j > 0; j-- )
{
*d_ptr = *s_ptr++;
d_ptr += line_bytes;
}
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
Rgb* s_rptr = (Rgb*)s_ptr;
Rgb* d_rptr;
for ( unsigned int i = new_width; i > 0; i-- )
{
d_rptr = (Rgb*)(rotate_buffer+((i-1)<<2));
for ( unsigned int j = new_height; j > 0; j-- )
{
*d_rptr = *s_rptr++;
d_rptr += new_width;
}
}
}
else /* Assume RGB24 */
{
unsigned char *d_ptr;
for ( unsigned int i = new_width; i > 0; i-- )
{
d_ptr = rotate_buffer+((i-1)*3);
for ( unsigned int j = new_height; j > 0; j-- )
{
*d_ptr = *s_ptr++;
*(d_ptr+1) = *s_ptr++;
*(d_ptr+2) = *s_ptr++;
d_ptr += line_bytes;
}
}
}
break;
}
case 180 :
{
unsigned char *s_ptr = buffer+size;
unsigned char *d_ptr = rotate_buffer;
if ( colours == ZM_COLOUR_GRAY8 )
{
while( s_ptr > buffer )
{
s_ptr--;
*d_ptr++ = *s_ptr;
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
Rgb* s_rptr = (Rgb*)s_ptr;
Rgb* d_rptr = (Rgb*)d_ptr;
while( s_rptr > (Rgb*)buffer )
{
s_rptr--;
*d_rptr++ = *s_rptr;
}
}
else /* Assume RGB24 */
{
while( s_ptr > buffer )
{
s_ptr -= 3;
*d_ptr++ = *s_ptr;
*d_ptr++ = *(s_ptr+1);
*d_ptr++ = *(s_ptr+2);
}
}
break;
}
case 270 :
{
new_height = width;
new_width = height;
unsigned int line_bytes = new_width*colours;
unsigned char *s_ptr = buffer+size;
if ( colours == ZM_COLOUR_GRAY8 )
{
unsigned char *d_ptr;
for ( unsigned int i = new_width; i > 0; i-- )
{
d_ptr = rotate_buffer+(i-1);
for ( unsigned int j = new_height; j > 0; j-- )
{
s_ptr--;
*d_ptr = *s_ptr;
d_ptr += line_bytes;
}
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
Rgb* s_rptr = (Rgb*)s_ptr;
Rgb* d_rptr;
for ( unsigned int i = new_width; i > 0; i-- )
{
d_rptr = (Rgb*)(rotate_buffer+((i-1)<<2));
for ( unsigned int j = new_height; j > 0; j-- )
{
s_rptr--;
*d_rptr = *s_rptr;
d_rptr += new_width;
}
}
}
else /* Assume RGB24 */
{
unsigned char *d_ptr;
for ( unsigned int i = new_width; i > 0; i-- )
{
d_ptr = rotate_buffer+((i-1)*3);
for ( unsigned int j = new_height; j > 0; j-- )
{
*(d_ptr+2) = *(--s_ptr);
*(d_ptr+1) = *(--s_ptr);
*d_ptr = *(--s_ptr);
d_ptr += line_bytes;
}
}
}
break;
}
}
AssignDirect( new_width, new_height, colours, subpixelorder, rotate_buffer, size, ZM_BUFTYPE_ZM);
}
/* RGB32 compatible: complete */
void Image::Flip( bool leftright )
{
uint8_t* flip_buffer = AllocBuffer(size);
unsigned int line_bytes = width*colours;
unsigned int line_bytes2 = 2*line_bytes;
if ( leftright )
{
// Horizontal flip, left to right
unsigned char *s_ptr = buffer+line_bytes;
unsigned char *d_ptr = flip_buffer;
unsigned char *max_d_ptr = flip_buffer + size;
if ( colours == ZM_COLOUR_GRAY8 )
{
while( d_ptr < max_d_ptr )
{
for ( unsigned int j = 0; j < width; j++ )
{
s_ptr--;
*d_ptr++ = *s_ptr;
}
s_ptr += line_bytes2;
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
Rgb* s_rptr = (Rgb*)s_ptr;
Rgb* d_rptr = (Rgb*)flip_buffer;
Rgb* max_d_rptr = (Rgb*)max_d_ptr;
while( d_rptr < max_d_rptr )
{
for ( unsigned int j = 0; j < width; j++ )
{
s_rptr--;
*d_rptr++ = *s_rptr;
}
s_rptr += width * 2;
}
}
else /* Assume RGB24 */
{
while( d_ptr < max_d_ptr )
{
for ( unsigned int j = 0; j < width; j++ )
{
s_ptr -= 3;
*d_ptr++ = *s_ptr;
*d_ptr++ = *(s_ptr+1);
*d_ptr++ = *(s_ptr+2);
}
s_ptr += line_bytes2;
}
}
}
else
{
// Vertical flip, top to bottom
unsigned char *s_ptr = buffer+(height*line_bytes);
unsigned char *d_ptr = flip_buffer;
while( s_ptr > buffer )
{
s_ptr -= line_bytes;
memcpy( d_ptr, s_ptr, line_bytes );
d_ptr += line_bytes;
}
}
AssignDirect( width, height, colours, subpixelorder, flip_buffer, size, ZM_BUFTYPE_ZM);
}
void Image::Scale( unsigned int factor )
{
if ( !factor )
{
Error( "Bogus scale factor %d found", factor );
return;
}
if ( factor == ZM_SCALE_BASE )
{
return;
}
unsigned int new_width = (width*factor)/ZM_SCALE_BASE;
unsigned int new_height = (height*factor)/ZM_SCALE_BASE;
size_t scale_buffer_size = (new_width+1) * (new_height+1) * colours;
uint8_t* scale_buffer = AllocBuffer(scale_buffer_size);
if ( factor > ZM_SCALE_BASE )
{
unsigned char *pd = scale_buffer;
unsigned int wc = width*colours;
unsigned int nwc = new_width*colours;
unsigned int h_count = ZM_SCALE_BASE/2;
unsigned int last_h_index = 0;
unsigned int last_w_index = 0;
unsigned int h_index;
for ( unsigned int y = 0; y < height; y++ )
{
unsigned char *ps = &buffer[y*wc];
unsigned int w_count = ZM_SCALE_BASE/2;
unsigned int w_index;
last_w_index = 0;
for ( unsigned int x = 0; x < width; x++ )
{
w_count += factor;
w_index = w_count/ZM_SCALE_BASE;
for (unsigned int f = last_w_index; f < w_index; f++ )
{
for ( unsigned int c = 0; c < colours; c++ )
{
*pd++ = *(ps+c);
}
}
ps += colours;
last_w_index = w_index;
}
h_count += factor;
h_index = h_count/ZM_SCALE_BASE;
for ( unsigned int f = last_h_index+1; f < h_index; f++ )
{
memcpy( pd, pd-nwc, nwc );
pd += nwc;
}
last_h_index = h_index;
}
new_width = last_w_index;
new_height = last_h_index;
}
else
{
unsigned char *pd = scale_buffer;
unsigned int wc = width*colours;
unsigned int xstart = factor/2;
unsigned int ystart = factor/2;
unsigned int h_count = ystart;
unsigned int last_h_index = 0;
unsigned int last_w_index = 0;
unsigned int h_index;
for ( unsigned int y = 0; y < (unsigned int)height; y++ )
{
h_count += factor;
h_index = h_count/ZM_SCALE_BASE;
if ( h_index > last_h_index )
{
unsigned int w_count = xstart;
unsigned int w_index;
last_w_index = 0;
unsigned char *ps = &buffer[y*wc];
for ( unsigned int x = 0; x < (unsigned int)width; x++ )
{
w_count += factor;
w_index = w_count/ZM_SCALE_BASE;
if ( w_index > last_w_index )
{
for ( unsigned int c = 0; c < colours; c++ )
{
*pd++ = *ps++;
}
}
else
{
ps += colours;
}
last_w_index = w_index;
}
}
last_h_index = h_index;
}
new_width = last_w_index;
new_height = last_h_index;
}
AssignDirect( new_width, new_height, colours, subpixelorder, scale_buffer, scale_buffer_size, ZM_BUFTYPE_ZM);
}
void Image::Deinterlace_Discard()
{
/* Simple deinterlacing. Copy the even lines into the odd lines */
if ( colours == ZM_COLOUR_GRAY8 )
{
const uint8_t *psrc;
uint8_t *pdest;
for (unsigned int y = 0; y < (unsigned int)height; y += 2)
{
psrc = buffer + (y * width);
pdest = buffer + ((y+1) * width);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pdest++ = *psrc++;
}
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
const uint8_t *psrc;
uint8_t *pdest;
for (unsigned int y = 0; y < (unsigned int)height; y += 2)
{
psrc = buffer + ((y * width) * 3);
pdest = buffer + (((y+1) * width) * 3);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pdest++ = *psrc++;
*pdest++ = *psrc++;
*pdest++ = *psrc++;
}
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
const Rgb *psrc;
Rgb *pdest;
for (unsigned int y = 0; y < (unsigned int)height; y += 2)
{
psrc = (Rgb*)(buffer + ((y * width) << 2));
pdest = (Rgb*)(buffer + (((y+1) * width) << 2));
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pdest++ = *psrc++;
}
}
} else {
Error("Deinterlace called with unexpected colours: %d", colours);
}
}
void Image::Deinterlace_Linear()
{
/* Simple deinterlacing. The odd lines are average of the line above and line below */
const uint8_t *pbelow, *pabove;
uint8_t *pcurrent;
if ( colours == ZM_COLOUR_GRAY8 )
{
for (unsigned int y = 1; y < (unsigned int)(height-1); y += 2)
{
pabove = buffer + ((y-1) * width);
pbelow = buffer + ((y+1) * width);
pcurrent = buffer + (y * width);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pcurrent++ = (*pabove++ + *pbelow++) >> 1;
}
}
/* Special case for the last line */
pcurrent = buffer + ((height-1) * width);
pabove = buffer + ((height-2) * width);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pcurrent++ = *pabove++;
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
for (unsigned int y = 1; y < (unsigned int)(height-1); y += 2)
{
pabove = buffer + (((y-1) * width) * 3);
pbelow = buffer + (((y+1) * width) * 3);
pcurrent = buffer + ((y * width) * 3);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pcurrent++ = (*pabove++ + *pbelow++) >> 1;
*pcurrent++ = (*pabove++ + *pbelow++) >> 1;
*pcurrent++ = (*pabove++ + *pbelow++) >> 1;
}
}
/* Special case for the last line */
pcurrent = buffer + (((height-1) * width) * 3);
pabove = buffer + (((height-2) * width) * 3);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pcurrent++ = *pabove++;
*pcurrent++ = *pabove++;
*pcurrent++ = *pabove++;
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
for (unsigned int y = 1; y < (unsigned int)(height-1); y += 2)
{
pabove = buffer + (((y-1) * width) << 2);
pbelow = buffer + (((y+1) * width) << 2);
pcurrent = buffer + ((y * width) << 2);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pcurrent++ = (*pabove++ + *pbelow++) >> 1;
*pcurrent++ = (*pabove++ + *pbelow++) >> 1;
*pcurrent++ = (*pabove++ + *pbelow++) >> 1;
*pcurrent++ = (*pabove++ + *pbelow++) >> 1;
}
}
/* Special case for the last line */
pcurrent = buffer + (((height-1) * width) << 2);
pabove = buffer + (((height-2) * width) << 2);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pcurrent++ = *pabove++;
*pcurrent++ = *pabove++;
*pcurrent++ = *pabove++;
*pcurrent++ = *pabove++;
}
} else {
Error("Deinterlace called with unexpected colours: %d", colours);
}
}
void Image::Deinterlace_Blend()
{
/* Simple deinterlacing. Blend the fields together. 50% blend */
uint8_t *pabove, *pcurrent;
if ( colours == ZM_COLOUR_GRAY8 )
{
for (unsigned int y = 1; y < (unsigned int)height; y += 2)
{
pabove = buffer + ((y-1) * width);
pcurrent = buffer + (y * width);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pabove = (*pabove + *pcurrent) >> 1;
*pcurrent++ = *pabove++;
}
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
for (unsigned int y = 1; y < (unsigned int)height; y += 2)
{
pabove = buffer + (((y-1) * width) * 3);
pcurrent = buffer + ((y * width) * 3);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pabove = (*pabove + *pcurrent) >> 1;
*pcurrent++ = *pabove++;
*pabove = (*pabove + *pcurrent) >> 1;
*pcurrent++ = *pabove++;
*pabove = (*pabove + *pcurrent) >> 1;
*pcurrent++ = *pabove++;
}
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
for (unsigned int y = 1; y < (unsigned int)height; y += 2)
{
pabove = buffer + (((y-1) * width) << 2);
pcurrent = buffer + ((y * width) << 2);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
*pabove = (*pabove + *pcurrent) >> 1;
*pcurrent++ = *pabove++;
*pabove = (*pabove + *pcurrent) >> 1;
*pcurrent++ = *pabove++;
*pabove = (*pabove + *pcurrent) >> 1;
*pcurrent++ = *pabove++;
*pabove = (*pabove + *pcurrent) >> 1;
*pcurrent++ = *pabove++;
}
}
} else {
Error("Deinterlace called with unexpected colours: %d", colours);
}
}
void Image::Deinterlace_Blend_CustomRatio(int divider)
{
/* Simple deinterlacing. Blend the fields together at a custom ratio. */
/* 1 = 50% blending */
/* 2 = 25% blending */
/* 3 = 12.% blending */
/* 4 = 6.25% blending */
uint8_t *pabove, *pcurrent;
uint8_t subpix1, subpix2;
if ( divider < 1 || divider > 4 ) {
Error("Deinterlace called with invalid blend ratio");
}
if ( colours == ZM_COLOUR_GRAY8 )
{
for (unsigned int y = 1; y < (unsigned int)height; y += 2)
{
pabove = buffer + ((y-1) * width);
pcurrent = buffer + (y * width);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
subpix1 = ((*pabove - *pcurrent)>>divider) + *pcurrent;
subpix2 = ((*pcurrent - *pabove)>>divider) + *pabove;
*pcurrent++ = subpix1;
*pabove++ = subpix2;
}
}
}
else if ( colours == ZM_COLOUR_RGB24 )
{
for (unsigned int y = 1; y < (unsigned int)height; y += 2)
{
pabove = buffer + (((y-1) * width) * 3);
pcurrent = buffer + ((y * width) * 3);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
subpix1 = ((*pabove - *pcurrent)>>divider) + *pcurrent;
subpix2 = ((*pcurrent - *pabove)>>divider) + *pabove;
*pcurrent++ = subpix1;
*pabove++ = subpix2;
subpix1 = ((*pabove - *pcurrent)>>divider) + *pcurrent;
subpix2 = ((*pcurrent - *pabove)>>divider) + *pabove;
*pcurrent++ = subpix1;
*pabove++ = subpix2;
subpix1 = ((*pabove - *pcurrent)>>divider) + *pcurrent;
subpix2 = ((*pcurrent - *pabove)>>divider) + *pabove;
*pcurrent++ = subpix1;
*pabove++ = subpix2;
}
}
}
else if ( colours == ZM_COLOUR_RGB32 )
{
for (unsigned int y = 1; y < (unsigned int)height; y += 2)
{
pabove = buffer + (((y-1) * width) << 2);
pcurrent = buffer + ((y * width) << 2);
for (unsigned int x = 0; x < (unsigned int)width; x++) {
subpix1 = ((*pabove - *pcurrent)>>divider) + *pcurrent;
subpix2 = ((*pcurrent - *pabove)>>divider) + *pabove;
*pcurrent++ = subpix1;
*pabove++ = subpix2;
subpix1 = ((*pabove - *pcurrent)>>divider) + *pcurrent;
subpix2 = ((*pcurrent - *pabove)>>divider) + *pabove;
*pcurrent++ = subpix1;
*pabove++ = subpix2;
subpix1 = ((*pabove - *pcurrent)>>divider) + *pcurrent;
subpix2 = ((*pcurrent - *pabove)>>divider) + *pabove;
*pcurrent++ = subpix1;
*pabove++ = subpix2;
subpix1 = ((*pabove - *pcurrent)>>divider) + *pcurrent;
subpix2 = ((*pcurrent - *pabove)>>divider) + *pabove;
*pcurrent++ = subpix1;
*pabove++ = subpix2;
}
}
} else {
Error("Deinterlace called with unexpected colours: %d", colours);
}
}
void Image::Deinterlace_4Field(const Image* next_image, unsigned int threshold)
{
if ( !(width == next_image->width && height == next_image->height && colours == next_image->colours && subpixelorder == next_image->subpixelorder) )
{
Panic( "Attempt to deinterlace different sized images, expected %dx%dx%d %d, got %dx%dx%d %d", width, height, colours, subpixelorder, next_image->width, next_image->height, next_image->colours, next_image->subpixelorder);
}
switch(colours) {
case ZM_COLOUR_RGB24:
{
if(subpixelorder == ZM_SUBPIX_ORDER_BGR) {
/* BGR subpixel order */
std_deinterlace_4field_bgr(buffer, next_image->buffer, threshold, width, height);
} else {
/* Assume RGB subpixel order */
std_deinterlace_4field_rgb(buffer, next_image->buffer, threshold, width, height);
}
break;
}
case ZM_COLOUR_RGB32:
{
if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) {
/* ARGB subpixel order */
(*fptr_deinterlace_4field_argb)(buffer, next_image->buffer, threshold, width, height);
} else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
/* ABGR subpixel order */
(*fptr_deinterlace_4field_abgr)(buffer, next_image->buffer, threshold, width, height);
} else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
/* BGRA subpixel order */
(*fptr_deinterlace_4field_bgra)(buffer, next_image->buffer, threshold, width, height);
} else {
/* Assume RGBA subpixel order */
(*fptr_deinterlace_4field_rgba)(buffer, next_image->buffer, threshold, width, height);
}
break;
}
case ZM_COLOUR_GRAY8:
(*fptr_deinterlace_4field_gray8)(buffer, next_image->buffer, threshold, width, height);
break;
default:
Panic("Deinterlace_4Field called with unexpected colours: %d",colours);
break;
}
}
/************************************************* BLEND FUNCTIONS *************************************************/
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("sse2")))
#endif
void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
static uint32_t divider = 0;
static uint32_t clearmask = 0;
static double current_blendpercent = 0.0;
if(current_blendpercent != blendpercent) {
/* Attempt to match the blending percent to one of the possible values */
if(blendpercent < 2.34375) {
// 1.5625% blending
divider = 6;
clearmask = 0x03030303;
} else if(blendpercent < 4.6875) {
// 3.125% blending
divider = 5;
clearmask = 0x07070707;
} else if(blendpercent < 9.375) {
// 6.25% blending
divider = 4;
clearmask = 0x0F0F0F0F;
} else if(blendpercent < 18.75) {
// 12.5% blending
divider = 3;
clearmask = 0x1F1F1F1F;
} else if(blendpercent < 37.5) {
// 25% blending
divider = 2;
clearmask = 0x3F3F3F3F;
} else {
// 50% blending
divider = 1;
clearmask = 0x7F7F7F7F;
}
current_blendpercent = blendpercent;
}
__asm__ __volatile__(
"movd %4, %%xmm3\n\t"
"movd %5, %%xmm4\n\t"
"pshufd $0x0, %%xmm3, %%xmm3\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x10, %2\n\t"
"sse2_fastblend_iter:\n\t"
"movdqa (%0,%3),%%xmm0\n\t"
"movdqa %%xmm0,%%xmm2\n\t"
"movdqa (%1,%3),%%xmm1\n\t"
"psrlq %%xmm4,%%xmm0\n\t"
"psrlq %%xmm4,%%xmm1\n\t"
"pand %%xmm3,%%xmm1\n\t"
"pand %%xmm3,%%xmm0\n\t"
"psubb %%xmm0,%%xmm1\n\t"
"paddb %%xmm2,%%xmm1\n\t"
"movntdq %%xmm1,(%2,%3)\n\t"
"sub $0x10, %3\n\t"
"jnz sse2_fastblend_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count), "m" (clearmask), "m" (divider)
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
__attribute__((noinline)) void std_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
static int divider = 0;
static double current_blendpercent = 0.0;
const uint8_t* const max_ptr = result + count;
if(current_blendpercent != blendpercent) {
/* Attempt to match the blending percent to one of the possible values */
if(blendpercent < 2.34375) {
// 1.5625% blending
divider = 6;
} else if(blendpercent < 4.6875) {
// 3.125% blending
divider = 5;
} else if(blendpercent < 9.375) {
// 6.25% blending
divider = 4;
} else if(blendpercent < 18.75) {
// 12.5% blending
divider = 3;
} else if(blendpercent < 37.5) {
// 25% blending
divider = 2;
} else {
// 50% blending
divider = 1;
}
current_blendpercent = blendpercent;
}
while(result < max_ptr) {
result[0] = ((col2[0] - col1[0])>>divider) + col1[0];
result[1] = ((col2[1] - col1[1])>>divider) + col1[1];
result[2] = ((col2[2] - col1[2])>>divider) + col1[2];
result[3] = ((col2[3] - col1[3])>>divider) + col1[3];
result[4] = ((col2[4] - col1[4])>>divider) + col1[4];
result[5] = ((col2[5] - col1[5])>>divider) + col1[5];
result[6] = ((col2[6] - col1[6])>>divider) + col1[6];
result[7] = ((col2[7] - col1[7])>>divider) + col1[7];
result[8] = ((col2[8] - col1[8])>>divider) + col1[8];
result[9] = ((col2[9] - col1[9])>>divider) + col1[9];
result[10] = ((col2[10] - col1[10])>>divider) + col1[10];
result[11] = ((col2[11] - col1[11])>>divider) + col1[11];
result[12] = ((col2[12] - col1[12])>>divider) + col1[12];
result[13] = ((col2[13] - col1[13])>>divider) + col1[13];
result[14] = ((col2[14] - col1[14])>>divider) + col1[14];
result[15] = ((col2[15] - col1[15])>>divider) + col1[15];
col1 += 16;
col2 += 16;
result += 16;
}
}
__attribute__((noinline)) void std_blend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
double divide = blendpercent / 100.0;
double opacity = 1.0 - divide;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
*result++ = (*col1++ * opacity) + (*col2++ * divide);
}
}
/************************************************* DELTA FUNCTIONS *************************************************/
/* Grayscale */
__attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (16 grayscale pixels) at a time */
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
result[0] = abs(col1[0] - col2[0]);
result[1] = abs(col1[1] - col2[1]);
result[2] = abs(col1[2] - col2[2]);
result[3] = abs(col1[3] - col2[3]);
result[4] = abs(col1[4] - col2[4]);
result[5] = abs(col1[5] - col2[5]);
result[6] = abs(col1[6] - col2[6]);
result[7] = abs(col1[7] - col2[7]);
result[8] = abs(col1[8] - col2[8]);
result[9] = abs(col1[9] - col2[9]);
result[10] = abs(col1[10] - col2[10]);
result[11] = abs(col1[11] - col2[11]);
result[12] = abs(col1[12] - col2[12]);
result[13] = abs(col1[13] - col2[13]);
result[14] = abs(col1[14] - col2[14]);
result[15] = abs(col1[15] - col2[15]);
col1 += 16;
col2 += 16;
result += 16;
}
}
/* RGB24: RGB */
__attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = abs(col1[0] - col2[0]);
g = abs(col1[1] - col2[1]);
b = abs(col1[2] - col2[2]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[3] - col2[3]);
g = abs(col1[4] - col2[4]);
b = abs(col1[5] - col2[5]);
result[1] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[6] - col2[6]);
g = abs(col1[7] - col2[7]);
b = abs(col1[8] - col2[8]);
result[2] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[9] - col2[9]);
g = abs(col1[10] - col2[10]);
b = abs(col1[11] - col2[11]);
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 12;
col2 += 12;
result += 4;
}
}
/* RGB24: BGR */
__attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = abs(col1[0] - col2[0]);
g = abs(col1[1] - col2[1]);
r = abs(col1[2] - col2[2]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[3] - col2[3]);
g = abs(col1[4] - col2[4]);
r = abs(col1[5] - col2[5]);
result[1] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[6] - col2[6]);
g = abs(col1[7] - col2[7]);
r = abs(col1[8] - col2[8]);
result[2] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[9] - col2[9]);
g = abs(col1[10] - col2[10]);
r = abs(col1[11] - col2[11]);
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 12;
col2 += 12;
result += 4;
}
}
/* RGB32: RGBA */
__attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = abs(col1[0] - col2[0]);
g = abs(col1[1] - col2[1]);
b = abs(col1[2] - col2[2]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[4] - col2[4]);
g = abs(col1[5] - col2[5]);
b = abs(col1[6] - col2[6]);
result[1] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[8] - col2[8]);
g = abs(col1[9] - col2[9]);
b = abs(col1[10] - col2[10]);
result[2] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[12] - col2[12]);
g = abs(col1[13] - col2[13]);
b = abs(col1[14] - col2[14]);
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 16;
col2 += 16;
result += 4;
}
}
/* RGB32: BGRA */
__attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = abs(col1[0] - col2[0]);
g = abs(col1[1] - col2[1]);
r = abs(col1[2] - col2[2]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[4] - col2[4]);
g = abs(col1[5] - col2[5]);
r = abs(col1[6] - col2[6]);
result[1] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[8] - col2[8]);
g = abs(col1[9] - col2[9]);
r = abs(col1[10] - col2[10]);
result[2] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[12] - col2[12]);
g = abs(col1[13] - col2[13]);
r = abs(col1[14] - col2[14]);
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 16;
col2 += 16;
result += 4;
}
}
/* RGB32: ARGB */
__attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = abs(col1[1] - col2[1]);
g = abs(col1[2] - col2[2]);
b = abs(col1[3] - col2[3]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[5] - col2[5]);
g = abs(col1[6] - col2[6]);
b = abs(col1[7] - col2[7]);
result[1] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[9] - col2[9]);
g = abs(col1[10] - col2[10]);
b = abs(col1[11] - col2[11]);
result[2] = (r + r + b + g + g + g + g + g)>>3;
r = abs(col1[13] - col2[13]);
g = abs(col1[14] - col2[14]);
b = abs(col1[15] - col2[15]);
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 16;
col2 += 16;
result += 4;
}
}
/* RGB32: ABGR */
__attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = abs(col1[1] - col2[1]);
g = abs(col1[2] - col2[2]);
r = abs(col1[3] - col2[3]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[5] - col2[5]);
g = abs(col1[6] - col2[6]);
r = abs(col1[7] - col2[7]);
result[1] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[9] - col2[9]);
g = abs(col1[10] - col2[10]);
r = abs(col1[11] - col2[11]);
result[2] = (r + r + b + g + g + g + g + g)>>3;
b = abs(col1[13] - col2[13]);
g = abs(col1[14] - col2[14]);
r = abs(col1[15] - col2[15]);
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 16;
col2 += 16;
result += 4;
}
}
/* Grayscale SSE2 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("sse2")))
#endif
void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x10, %2\n\t"
"sse2_delta8_gray8_iter:\n\t"
"movdqa (%0,%3), %%xmm1\n\t"
"movdqa (%1,%3), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm3\n\t"
"movdqa %%xmm2, %%xmm4\n\t"
"pmaxub %%xmm1, %%xmm2\n\t"
"pminub %%xmm3, %%xmm4\n\t"
"psubb %%xmm4, %%xmm2\n\t"
"movntdq %%xmm2, (%2,%3)\n\t"
"sub $0x10, %3\n\t"
"jnz sse2_delta8_gray8_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count)
: "%xmm1", "%xmm2", "%xmm3", "%xmm4", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGB32: RGBA SSE2 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("sse2")))
#endif
void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x4, %2\n\t"
"sse2_delta8_rgba_iter:\n\t"
"movdqa (%0,%3,4), %%xmm1\n\t"
"movdqa (%1,%3,4), %%xmm2\n\t"
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t"
"movdqa %%xmm2, %%xmm6\n\t"
"pmaxub %%xmm1, %%xmm2\n\t"
"pminub %%xmm5, %%xmm6\n\t"
"psubb %%xmm6, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm3\n\t"
"psrld $0x8, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm1, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"packssdw %%xmm1, %%xmm1\n\t"
"packuswb %%xmm1, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%2,%3)\n\t"
"sub $0x4, %3\n\t"
"jnz sse2_delta8_rgba_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGB32: BGRA SSE2 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("sse2")))
#endif
void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x4, %2\n\t"
"sse2_delta8_bgra_iter:\n\t"
"movdqa (%0,%3,4), %%xmm1\n\t"
"movdqa (%1,%3,4), %%xmm2\n\t"
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t"
"movdqa %%xmm2, %%xmm6\n\t"
"pmaxub %%xmm1, %%xmm2\n\t"
"pminub %%xmm5, %%xmm6\n\t"
"psubb %%xmm6, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm3\n\t"
"psrld $0x8, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"packssdw %%xmm1, %%xmm1\n\t"
"packuswb %%xmm1, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%2,%3)\n\t"
"sub $0x4, %3\n\t"
"jnz sse2_delta8_bgra_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGB32: ARGB SSE2 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("sse2")))
#endif
void sse2_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x4, %2\n\t"
"sse2_delta8_argb_iter:\n\t"
"movdqa (%0,%3,4), %%xmm1\n\t"
"movdqa (%1,%3,4), %%xmm2\n\t"
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t"
"movdqa %%xmm2, %%xmm6\n\t"
"pmaxub %%xmm1, %%xmm2\n\t"
"pminub %%xmm5, %%xmm6\n\t"
"psubb %%xmm6, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm3\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"psrld $0x8, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm1, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x18, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"packssdw %%xmm1, %%xmm1\n\t"
"packuswb %%xmm1, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%2,%3)\n\t"
"sub $0x4, %3\n\t"
"jnz sse2_delta8_argb_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGB32: ABGR SSE2 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("sse2")))
#endif
void sse2_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x4, %2\n\t"
"sse2_delta8_abgr_iter:\n\t"
"movdqa (%0,%3,4), %%xmm1\n\t"
"movdqa (%1,%3,4), %%xmm2\n\t"
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t"
"movdqa %%xmm2, %%xmm6\n\t"
"pmaxub %%xmm1, %%xmm2\n\t"
"pminub %%xmm5, %%xmm6\n\t"
"psubb %%xmm6, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm3\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"psrld $0x8, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x18, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"packssdw %%xmm1, %%xmm1\n\t"
"packuswb %%xmm1, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%2,%3)\n\t"
"sub $0x4, %3\n\t"
"jnz sse2_delta8_abgr_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGB32: RGBA SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"movdqa %4, %%xmm5\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x4, %2\n\t"
"ssse3_delta8_rgba_iter:\n\t"
"movdqa (%0,%3,4), %%xmm1\n\t"
"movdqa (%1,%3,4), %%xmm2\n\t"
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm3\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x8, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm1, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"pshufb %%xmm5, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%2,%3)\n\t"
"sub $0x4, %3\n\t"
"jnz ssse3_delta8_rgba_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count), "m" (*movemask)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGB32: BGRA SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"movdqa %4, %%xmm5\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x4, %2\n\t"
"ssse3_delta8_bgra_iter:\n\t"
"movdqa (%0,%3,4), %%xmm1\n\t"
"movdqa (%1,%3,4), %%xmm2\n\t"
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm3\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x8, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"pshufb %%xmm5, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%2,%3)\n\t"
"sub $0x4, %3\n\t"
"jnz ssse3_delta8_bgra_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count), "m" (*movemask)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGB32: ARGB SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"movdqa %4, %%xmm5\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x4, %2\n\t"
"ssse3_delta8_argb_iter:\n\t"
"movdqa (%0,%3,4), %%xmm1\n\t"
"movdqa (%1,%3,4), %%xmm2\n\t"
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm3\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"psrld $0x8, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm1, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x18, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"pshufb %%xmm5, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%2,%3)\n\t"
"sub $0x4, %3\n\t"
"jnz ssse3_delta8_argb_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count), "m" (*movemask)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGB32: ABGR SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"movdqa %4, %%xmm5\n\t"
"sub $0x10, %0\n\t"
"sub $0x10, %1\n\t"
"sub $0x4, %2\n\t"
"ssse3_delta8_abgr_iter:\n\t"
"movdqa (%0,%3,4), %%xmm1\n\t"
"movdqa (%1,%3,4), %%xmm2\n\t"
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm3\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"psrld $0x8, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x18, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"pshufb %%xmm5, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%2,%3)\n\t"
"sub $0x4, %3\n\t"
"jnz ssse3_delta8_abgr_iter\n\t"
:
: "r" (col1), "r" (col2), "r" (result), "r" (count), "m" (*movemask)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/************************************************* CONVERT FUNCTIONS *************************************************/
/* RGB24 to grayscale */
__attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = col1[0];
g = col1[1];
b = col1[2];
result[0] = (r + r + b + g + g + g + g + g)>>3;
r = col1[3];
g = col1[4];
b = col1[5];
result[1] = (r + r + b + g + g + g + g + g)>>3;
r = col1[6];
g = col1[7];
b = col1[8];
result[2] = (r + r + b + g + g + g + g + g)>>3;
r = col1[9];
g = col1[10];
b = col1[11];
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 12;
result += 4;
}
}
/* BGR24 to grayscale */
__attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = col1[0];
g = col1[1];
r = col1[2];
result[0] = (r + r + b + g + g + g + g + g)>>3;
b = col1[3];
g = col1[4];
r = col1[5];
result[1] = (r + r + b + g + g + g + g + g)>>3;
b = col1[6];
g = col1[7];
r = col1[8];
result[2] = (r + r + b + g + g + g + g + g)>>3;
b = col1[9];
g = col1[10];
r = col1[11];
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 12;
result += 4;
}
}
/* RGBA to grayscale */
__attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = col1[0];
g = col1[1];
b = col1[2];
result[0] = (r + r + b + g + g + g + g + g)>>3;
r = col1[4];
g = col1[5];
b = col1[6];
result[1] = (r + r + b + g + g + g + g + g)>>3;
r = col1[8];
g = col1[9];
b = col1[10];
result[2] = (r + r + b + g + g + g + g + g)>>3;
r = col1[12];
g = col1[13];
b = col1[14];
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 16;
result += 4;
}
}
/* BGRA to grayscale */
__attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = col1[0];
g = col1[1];
r = col1[2];
result[0] = (r + r + b + g + g + g + g + g)>>3;
b = col1[4];
g = col1[5];
r = col1[6];
result[1] = (r + r + b + g + g + g + g + g)>>3;
b = col1[8];
g = col1[9];
r = col1[10];
result[2] = (r + r + b + g + g + g + g + g)>>3;
b = col1[12];
g = col1[13];
r = col1[14];
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 16;
result += 4;
}
}
/* ARGB to grayscale */
__attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = col1[1];
g = col1[2];
b = col1[3];
result[0] = (r + r + b + g + g + g + g + g)>>3;
r = col1[5];
g = col1[6];
b = col1[7];
result[1] = (r + r + b + g + g + g + g + g)>>3;
r = col1[9];
g = col1[10];
b = col1[11];
result[2] = (r + r + b + g + g + g + g + g)>>3;
r = col1[13];
g = col1[14];
b = col1[15];
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 16;
result += 4;
}
}
/* ABGR to grayscale */
__attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = col1[1];
g = col1[2];
r = col1[3];
result[0] = (r + r + b + g + g + g + g + g)>>3;
b = col1[5];
g = col1[6];
r = col1[7];
result[1] = (r + r + b + g + g + g + g + g)>>3;
b = col1[9];
g = col1[10];
r = col1[11];
result[2] = (r + r + b + g + g + g + g + g)>>3;
b = col1[13];
g = col1[14];
r = col1[15];
result[3] = (r + r + b + g + g + g + g + g)>>3;
col1 += 16;
result += 4;
}
}
/* Converts a YUYV image into grayscale by extracting the Y channel */
__attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
const uint16_t* yuvbuf = (const uint16_t*)col1;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
result[0] = (uint8_t)yuvbuf[0];
result[1] = (uint8_t)yuvbuf[1];
result[2] = (uint8_t)yuvbuf[2];
result[3] = (uint8_t)yuvbuf[3];
result[4] = (uint8_t)yuvbuf[4];
result[5] = (uint8_t)yuvbuf[5];
result[6] = (uint8_t)yuvbuf[6];
result[7] = (uint8_t)yuvbuf[7];
result[8] = (uint8_t)yuvbuf[8];
result[9] = (uint8_t)yuvbuf[9];
result[10] = (uint8_t)yuvbuf[10];
result[11] = (uint8_t)yuvbuf[11];
result[12] = (uint8_t)yuvbuf[12];
result[13] = (uint8_t)yuvbuf[13];
result[14] = (uint8_t)yuvbuf[14];
result[15] = (uint8_t)yuvbuf[15];
yuvbuf += 16;
result += 16;
}
}
/* RGBA to grayscale SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"mov $0xff, %%eax\n\t"
"movd %%eax, %%xmm0\n\t"
"pshufd $0x0, %%xmm0, %%xmm0\n\t"
"movdqa %3, %%xmm5\n\t"
"sub $0x10, %0\n\t"
"sub $0x4, %1\n\t"
"ssse3_convert_rgba_gray8_iter:\n\t"
"movdqa (%0,%2,4), %%xmm3\n\t"
"psrlq $0x3, %%xmm3\n\t"
"pand %%xmm4, %%xmm3\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x8, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"pslld $0x2, %%xmm2\n\t"
"paddd %%xmm1, %%xmm2\n\t"
"movdqa %%xmm3, %%xmm1\n\t"
"pand %%xmm0, %%xmm1\n\t"
"paddd %%xmm1, %%xmm1\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"movdqa %%xmm3, %%xmm2\n\t"
"psrld $0x10, %%xmm2\n\t"
"pand %%xmm0, %%xmm2\n\t"
"paddd %%xmm2, %%xmm1\n\t"
"pshufb %%xmm5, %%xmm1\n\t"
"movd %%xmm1, %%eax\n\t"
"movnti %%eax, (%1,%2)\n\t"
"sub $0x4, %2\n\t"
"jnz ssse3_convert_rgba_gray8_iter\n\t"
:
: "r" (col1), "r" (result), "r" (count), "m" (*movemask)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* Converts a YUYV image into grayscale by extracting the Y channel */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
unsigned long i = 0;
__attribute__((aligned(16))) static const uint8_t movemask1[16] = {0,2,4,6,8,10,12,14,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0,2,4,6,8,10,12,14};
/* XMM0 - General purpose */
/* XMM1 - General purpose */
/* XMM2 - unused */
/* XMM3 - shift mask 1 */
/* XMM4 - shift mask 2 */
/* XMM5 - unused*/
/* XMM6 - unused */
/* XMM7 - unused */
__asm__ __volatile__ (
"movdqa %4, %%xmm3\n\t"
"movdqa %5, %%xmm4\n\t"
"algo_ssse3_convert_yuyv_gray8:\n\t"
"movdqa (%0), %%xmm0\n\t"
"pshufb %%xmm3, %%xmm0\n\t"
"movdqa 0x10(%0), %%xmm1\n\t"
"pshufb %%xmm4, %%xmm1\n\t"
"por %%xmm1, %%xmm0\n\t"
"movntdq %%xmm0, (%1)\n\t"
"add $0x10, %3\n\t"
"add $0x10, %1\n\t"
"add $0x20, %0\n\t"
"cmp %2, %3\n\t"
"jb algo_ssse3_convert_yuyv_gray8\n\t"
:
#if (defined(_DEBUG) && !defined(__x86_64__)) /* Use one less register to allow compilation to success on 32bit with omit frame pointer disabled */
: "r" (col1), "r" (result), "m" (count), "r" (i), "m" (*movemask1), "m" (*movemask2)
#else
: "r" (col1), "r" (result), "r" (count), "r" (i), "m" (*movemask1), "m" (*movemask2)
#endif
: "%xmm3", "%xmm4", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* YUYV to RGB24 - relocated from zm_local_camera.cpp */
__attribute__((noinline)) void zm_convert_yuyv_rgb(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
unsigned int y1,y2,u,v;
for(unsigned int i=0; i < count; i += 2, col1 += 4, result += 6) {
y1 = col1[0];
u = col1[1];
y2 = col1[2];
v = col1[3];
r = y1 + r_v_table[v];
g = y1 - (g_u_table[u]+g_v_table[v]);
b = y1 + b_u_table[u];
result[0] = r<0?0:(r>255?255:r);
result[1] = g<0?0:(g>255?255:g);
result[2] = b<0?0:(b>255?255:b);
r = y2 + r_v_table[v];
g = y2 - (g_u_table[u]+g_v_table[v]);
b = y2 + b_u_table[u];
result[3] = r<0?0:(r>255?255:r);
result[4] = g<0?0:(g>255?255:g);
result[5] = b<0?0:(b>255?255:b);
}
}
/* YUYV to RGBA - modified the one above */
__attribute__((noinline)) void zm_convert_yuyv_rgba(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
unsigned int y1,y2,u,v;
for(unsigned int i=0; i < count; i += 2, col1 += 4, result += 8) {
y1 = col1[0];
u = col1[1];
y2 = col1[2];
v = col1[3];
r = y1 + r_v_table[v];
g = y1 - (g_u_table[u]+g_v_table[v]);
b = y1 + b_u_table[u];
result[0] = r<0?0:(r>255?255:r);
result[1] = g<0?0:(g>255?255:g);
result[2] = b<0?0:(b>255?255:b);
r = y2 + r_v_table[v];
g = y2 - (g_u_table[u]+g_v_table[v]);
b = y2 + b_u_table[u];
result[4] = r<0?0:(r>255?255:r);
result[5] = g<0?0:(g>255?255:g);
result[6] = b<0?0:(b>255?255:b);
}
}
/* RGB555 to RGB24 - relocated from zm_local_camera.cpp */
__attribute__((noinline)) void zm_convert_rgb555_rgb(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
for(unsigned int i=0; i < count; i++, col1 += 2, result += 3) {
b = ((*col1)<<3)&0xf8;
g = (((*(col1+1))<<6)|((*col1)>>2))&0xf8;
r = ((*(col1+1))<<1)&0xf8;
result[0] = r;
result[1] = g;
result[2] = b;
}
}
/* RGB555 to RGBA - modified the one above */
__attribute__((noinline)) void zm_convert_rgb555_rgba(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
for(unsigned int i=0; i < count; i++, col1 += 2, result += 4) {
b = ((*col1)<<3)&0xf8;
g = (((*(col1+1))<<6)|((*col1)>>2))&0xf8;
r = ((*(col1+1))<<1)&0xf8;
result[0] = r;
result[1] = g;
result[2] = b;
}
}
/* RGB565 to RGB24 - relocated from zm_local_camera.cpp */
__attribute__((noinline)) void zm_convert_rgb565_rgb(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
for(unsigned int i=0; i < count; i++, col1 += 2, result += 3) {
b = ((*col1)<<3)&0xf8;
g = (((*(col1+1))<<5)|((*col1)>>3))&0xfc;
r = (*(col1+1))&0xf8;
result[0] = r;
result[1] = g;
result[2] = b;
}
}
/* RGB565 to RGBA - modified the one above */
__attribute__((noinline)) void zm_convert_rgb565_rgba(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
for(unsigned int i=0; i < count; i++, col1 += 2, result += 4) {
b = ((*col1)<<3)&0xf8;
g = (((*(col1+1))<<5)|((*col1)>>3))&0xfc;
r = (*(col1+1))&0xf8;
result[0] = r;
result[1] = g;
result[2] = b;
}
}
/************************************************* DEINTERLACE FUNCTIONS *************************************************/
/* Grayscale */
__attribute__((noinline)) void std_deinterlace_4field_gray8(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height)
{
uint8_t *pcurrent, *pabove, *pncurrent, *pnabove, *pbelow;
const uint8_t* const max_ptr = col1 + (width*(height-1));
const uint8_t *max_ptr2;
pcurrent = col1 + width;
pncurrent = col2 + width;
pabove = col1;
pnabove = col2;
pbelow = col1 + (width*2);
while(pcurrent < max_ptr)
{
max_ptr2 = pcurrent + width;
while(pcurrent < max_ptr2) {
if((unsigned int)((abs(*pnabove - *pabove) + abs(*pncurrent - *pcurrent)) >> 1) >= threshold) {
*pcurrent = (*pabove + *pbelow) >> 1;
}
pabove++;
pnabove++;
pcurrent++;
pncurrent++;
pbelow++;
}
pcurrent += width;
pncurrent += width;
pabove += width;
pnabove += width;
pbelow += width;
}
/* Special case for the last line */
max_ptr2 = pcurrent + width;
while(pcurrent < max_ptr2) {
if((unsigned int)((abs(*pnabove - *pabove) + abs(*pncurrent - *pcurrent)) >> 1) >= threshold) {
*pcurrent = *pabove;
}
pabove++;
pnabove++;
pcurrent++;
pncurrent++;
}
}
/* RGB */
__attribute__((noinline)) void std_deinterlace_4field_rgb(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height)
{
uint8_t *pcurrent, *pabove, *pncurrent, *pnabove, *pbelow;
const unsigned int row_width = width*3;
const uint8_t* const max_ptr = col1 + (row_width * (height-1));
const uint8_t *max_ptr2;
unsigned int b, g, r;
unsigned int delta1, delta2;
pcurrent = col1 + (width*3);
pncurrent = col2 + (width*3);
pabove = col1;
pnabove = col2;
pbelow = col1 + ((width*2)*3);
while(pcurrent < max_ptr)
{
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
r = abs(pnabove[0] - pabove[0]);
g = abs(pnabove[1] - pabove[1]);
b = abs(pnabove[2] - pabove[2]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
r = abs(pncurrent[0] - pcurrent[0]);
g = abs(pncurrent[1] - pcurrent[1]);
b = abs(pncurrent[2] - pcurrent[2]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[0] = (pabove[0] + pbelow[0]) >> 1;
pcurrent[1] = (pabove[1] + pbelow[1]) >> 1;
pcurrent[2] = (pabove[2] + pbelow[2]) >> 1;
}
pabove += 3;
pnabove += 3;
pcurrent += 3;
pncurrent += 3;
pbelow += 3;
}
pcurrent += row_width;
pncurrent += row_width;
pabove += row_width;
pnabove += row_width;
pbelow += row_width;
}
/* Special case for the last line */
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
r = abs(pnabove[0] - pabove[0]);
g = abs(pnabove[1] - pabove[1]);
b = abs(pnabove[2] - pabove[2]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
r = abs(pncurrent[0] - pcurrent[0]);
g = abs(pncurrent[1] - pcurrent[1]);
b = abs(pncurrent[2] - pcurrent[2]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[0] = pabove[0];
pcurrent[1] = pabove[1];
pcurrent[2] = pabove[2];
}
pabove += 3;
pnabove += 3;
pcurrent += 3;
pncurrent += 3;
}
}
/* BGR */
__attribute__((noinline)) void std_deinterlace_4field_bgr(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height)
{
uint8_t *pcurrent, *pabove, *pncurrent, *pnabove, *pbelow;
const unsigned int row_width = width*3;
const uint8_t* const max_ptr = col1 + (row_width * (height-1));
const uint8_t *max_ptr2;
unsigned int b, g, r;
unsigned int delta1, delta2;
pcurrent = col1 + (width*3);
pncurrent = col2 + (width*3);
pabove = col1;
pnabove = col2;
pbelow = col1 + ((width*2)*3);
while(pcurrent < max_ptr)
{
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
b = abs(pnabove[0] - pabove[0]);
g = abs(pnabove[1] - pabove[1]);
r = abs(pnabove[2] - pabove[2]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
b = abs(pncurrent[0] - pcurrent[0]);
g = abs(pncurrent[1] - pcurrent[1]);
r = abs(pncurrent[2] - pcurrent[2]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[0] = (pabove[0] + pbelow[0]) >> 1;
pcurrent[1] = (pabove[1] + pbelow[1]) >> 1;
pcurrent[2] = (pabove[2] + pbelow[2]) >> 1;
}
pabove += 3;
pnabove += 3;
pcurrent += 3;
pncurrent += 3;
pbelow += 3;
}
pcurrent += row_width;
pncurrent += row_width;
pabove += row_width;
pnabove += row_width;
pbelow += row_width;
}
/* Special case for the last line */
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
b = abs(pnabove[0] - pabove[0]);
g = abs(pnabove[1] - pabove[1]);
r = abs(pnabove[2] - pabove[2]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
b = abs(pncurrent[0] - pcurrent[0]);
g = abs(pncurrent[1] - pcurrent[1]);
r = abs(pncurrent[2] - pcurrent[2]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[0] = pabove[0];
pcurrent[1] = pabove[1];
pcurrent[2] = pabove[2];
}
pabove += 3;
pnabove += 3;
pcurrent += 3;
pncurrent += 3;
}
}
/* RGBA */
__attribute__((noinline)) void std_deinterlace_4field_rgba(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height)
{
uint8_t *pcurrent, *pabove, *pncurrent, *pnabove, *pbelow;
const unsigned int row_width = width*4;
const uint8_t* const max_ptr = col1 + (row_width * (height-1));
const uint8_t *max_ptr2;
unsigned int b, g, r;
unsigned int delta1, delta2;
pcurrent = col1 + row_width;
pncurrent = col2 + row_width;
pabove = col1;
pnabove = col2;
pbelow = col1 + (row_width*2);
while(pcurrent < max_ptr)
{
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
r = abs(pnabove[0] - pabove[0]);
g = abs(pnabove[1] - pabove[1]);
b = abs(pnabove[2] - pabove[2]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
r = abs(pncurrent[0] - pcurrent[0]);
g = abs(pncurrent[1] - pcurrent[1]);
b = abs(pncurrent[2] - pcurrent[2]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[0] = (pabove[0] + pbelow[0]) >> 1;
pcurrent[1] = (pabove[1] + pbelow[1]) >> 1;
pcurrent[2] = (pabove[2] + pbelow[2]) >> 1;
}
pabove += 4;
pnabove += 4;
pcurrent += 4;
pncurrent += 4;
pbelow += 4;
}
pcurrent += row_width;
pncurrent += row_width;
pabove += row_width;
pnabove += row_width;
pbelow += row_width;
}
/* Special case for the last line */
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
r = abs(pnabove[0] - pabove[0]);
g = abs(pnabove[1] - pabove[1]);
b = abs(pnabove[2] - pabove[2]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
r = abs(pncurrent[0] - pcurrent[0]);
g = abs(pncurrent[1] - pcurrent[1]);
b = abs(pncurrent[2] - pcurrent[2]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[0] = pabove[0];
pcurrent[1] = pabove[1];
pcurrent[2] = pabove[2];
}
pabove += 4;
pnabove += 4;
pcurrent += 4;
pncurrent += 4;
}
}
/* BGRA */
__attribute__((noinline)) void std_deinterlace_4field_bgra(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height)
{
uint8_t *pcurrent, *pabove, *pncurrent, *pnabove, *pbelow;
const unsigned int row_width = width*4;
const uint8_t* const max_ptr = col1 + (row_width * (height-1));
const uint8_t *max_ptr2;
unsigned int b, g, r;
unsigned int delta1, delta2;
pcurrent = col1 + row_width;
pncurrent = col2 + row_width;
pabove = col1;
pnabove = col2;
pbelow = col1 + (row_width*2);
while(pcurrent < max_ptr)
{
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
b = abs(pnabove[0] - pabove[0]);
g = abs(pnabove[1] - pabove[1]);
r = abs(pnabove[2] - pabove[2]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
b = abs(pncurrent[0] - pcurrent[0]);
g = abs(pncurrent[1] - pcurrent[1]);
r = abs(pncurrent[2] - pcurrent[2]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[0] = (pabove[0] + pbelow[0]) >> 1;
pcurrent[1] = (pabove[1] + pbelow[1]) >> 1;
pcurrent[2] = (pabove[2] + pbelow[2]) >> 1;
}
pabove += 4;
pnabove += 4;
pcurrent += 4;
pncurrent += 4;
pbelow += 4;
}
pcurrent += row_width;
pncurrent += row_width;
pabove += row_width;
pnabove += row_width;
pbelow += row_width;
}
/* Special case for the last line */
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
b = abs(pnabove[0] - pabove[0]);
g = abs(pnabove[1] - pabove[1]);
r = abs(pnabove[2] - pabove[2]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
b = abs(pncurrent[0] - pcurrent[0]);
g = abs(pncurrent[1] - pcurrent[1]);
r = abs(pncurrent[2] - pcurrent[2]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[0] = pabove[0];
pcurrent[1] = pabove[1];
pcurrent[2] = pabove[2];
}
pabove += 4;
pnabove += 4;
pcurrent += 4;
pncurrent += 4;
}
}
/* ARGB */
__attribute__((noinline)) void std_deinterlace_4field_argb(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height)
{
uint8_t *pcurrent, *pabove, *pncurrent, *pnabove, *pbelow;
const unsigned int row_width = width*4;
const uint8_t* const max_ptr = col1 + (row_width * (height-1));
const uint8_t *max_ptr2;
unsigned int b, g, r;
unsigned int delta1, delta2;
pcurrent = col1 + row_width;
pncurrent = col2 + row_width;
pabove = col1;
pnabove = col2;
pbelow = col1 + (row_width*2);
while(pcurrent < max_ptr)
{
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
r = abs(pnabove[1] - pabove[1]);
g = abs(pnabove[2] - pabove[2]);
b = abs(pnabove[3] - pabove[3]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
r = abs(pncurrent[1] - pcurrent[1]);
g = abs(pncurrent[2] - pcurrent[2]);
b = abs(pncurrent[3] - pcurrent[3]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[1] = (pabove[1] + pbelow[1]) >> 1;
pcurrent[2] = (pabove[2] + pbelow[2]) >> 1;
pcurrent[3] = (pabove[3] + pbelow[3]) >> 1;
}
pabove += 4;
pnabove += 4;
pcurrent += 4;
pncurrent += 4;
pbelow += 4;
}
pcurrent += row_width;
pncurrent += row_width;
pabove += row_width;
pnabove += row_width;
pbelow += row_width;
}
/* Special case for the last line */
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
r = abs(pnabove[1] - pabove[1]);
g = abs(pnabove[2] - pabove[2]);
b = abs(pnabove[3] - pabove[3]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
r = abs(pncurrent[1] - pcurrent[1]);
g = abs(pncurrent[2] - pcurrent[2]);
b = abs(pncurrent[3] - pcurrent[3]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[1] = pabove[1];
pcurrent[2] = pabove[2];
pcurrent[3] = pabove[3];
}
pabove += 4;
pnabove += 4;
pcurrent += 4;
pncurrent += 4;
}
}
/* ABGR */
__attribute__((noinline)) void std_deinterlace_4field_abgr(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height)
{
uint8_t *pcurrent, *pabove, *pncurrent, *pnabove, *pbelow;
const unsigned int row_width = width*4;
const uint8_t* const max_ptr = col1 + (row_width * (height-1));
const uint8_t *max_ptr2;
unsigned int b, g, r;
unsigned int delta1, delta2;
pcurrent = col1 + row_width;
pncurrent = col2 + row_width;
pabove = col1;
pnabove = col2;
pbelow = col1 + (row_width*2);
while(pcurrent < max_ptr)
{
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
b = abs(pnabove[1] - pabove[1]);
g = abs(pnabove[2] - pabove[2]);
r = abs(pnabove[3] - pabove[3]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
b = abs(pncurrent[1] - pcurrent[1]);
g = abs(pncurrent[2] - pcurrent[2]);
r = abs(pncurrent[3] - pcurrent[3]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[1] = (pabove[1] + pbelow[1]) >> 1;
pcurrent[2] = (pabove[2] + pbelow[2]) >> 1;
pcurrent[3] = (pabove[3] + pbelow[3]) >> 1;
}
pabove += 4;
pnabove += 4;
pcurrent += 4;
pncurrent += 4;
pbelow += 4;
}
pcurrent += row_width;
pncurrent += row_width;
pabove += row_width;
pnabove += row_width;
pbelow += row_width;
}
/* Special case for the last line */
max_ptr2 = pcurrent + row_width;
while(pcurrent < max_ptr2) {
b = abs(pnabove[1] - pabove[1]);
g = abs(pnabove[2] - pabove[2]);
r = abs(pnabove[3] - pabove[3]);
delta1 = (r + r + b + g + g + g + g + g)>>3;
b = abs(pncurrent[1] - pcurrent[1]);
g = abs(pncurrent[2] - pcurrent[2]);
r = abs(pncurrent[3] - pcurrent[3]);
delta2 = (r + r + b + g + g + g + g + g)>>3;
if(((delta1 + delta2) >> 1) >= threshold) {
pcurrent[1] = pabove[1];
pcurrent[2] = pabove[2];
pcurrent[3] = pabove[3];
}
pabove += 4;
pnabove += 4;
pcurrent += 4;
pncurrent += 4;
}
}
/* Grayscale SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_deinterlace_4field_gray8(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
union {
uint32_t int32;
uint8_t int8a[4];
} threshold_mask;
threshold_mask.int8a[0] = threshold;
threshold_mask.int8a[1] = 0;
threshold_mask.int8a[2] = threshold;
threshold_mask.int8a[3] = 0;
unsigned long row_width = width;
uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ (
/* Load the threshold */
"mov %5, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
/* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_gray8:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"pmaxub %%xmm2, %%xmm1\n\t"
"pminub %%xmm5, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"pmaxub %%xmm2, %%xmm1\n\t"
"pminub %%xmm6, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
"movdqa %%xmm1, %%xmm2\n\t"
/* Do the comparison on words instead of bytes because we don't have unsigned comparison */
"punpcklbw %%xmm0, %%xmm1\n\t" // Expand pixels 0-7 into words into xmm1
"punpckhbw %%xmm0, %%xmm2\n\t" // Expand pixels 8-15 into words into xmm2
"pcmpgtw %%xmm4, %%xmm1\n\t" // Compare average delta with threshold for pixels 0-7
"pcmpgtw %%xmm4, %%xmm2\n\t" // Compare average delta with threshold for pixels 8-15
"packsswb %%xmm2, %%xmm1\n\t" // Pack the comparison results into xmm1
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
"pavgb %%xmm5, %%xmm2\n\t" // Average pabove and pbelow
"pand %%xmm1, %%xmm2\n\t" // Filter out pixels in avg that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */
"cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_gray8\n\t" // Go for another iteration
/* Next row */
"add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */
"cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_gray8\n\t" // Go for another iteration
/* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"pmaxub %%xmm2, %%xmm1\n\t"
"pminub %%xmm5, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"pmaxub %%xmm2, %%xmm1\n\t"
"pminub %%xmm6, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
"movdqa %%xmm1, %%xmm2\n\t"
/* Do the comparison on words instead of bytes because we don't have unsigned comparison */
"punpcklbw %%xmm0, %%xmm1\n\t" // Expand pixels 0-7 into words into xmm1
"punpckhbw %%xmm0, %%xmm2\n\t" // Expand pixels 8-15 into words into xmm2
"pcmpgtw %%xmm4, %%xmm1\n\t" // Compare average delta with threshold for pixels 0-7
"pcmpgtw %%xmm4, %%xmm2\n\t" // Compare average delta with threshold for pixels 8-15
"packsswb %%xmm2, %%xmm1\n\t" // Pack the comparison results into xmm1
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm5, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
:
: "r" (col1), "r" (col2), "r" (max_ptr2), "r" (max_ptr), "r" (row_width), "m" (threshold_mask.int32)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* RGBA SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_deinterlace_4field_rgba(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {1,1,1,1,1,0,0,2,9,9,9,9,9,8,8,10};
const uint32_t threshold_val = threshold;
unsigned long row_width = width*4;
uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"movdqa %6, %%xmm3\n\t"
"mov %5, %%eax\n\t"
#if defined(__x86_64__)
"movd %%eax, %%xmm8\n\t"
"pshufd $0x0, %%xmm8, %%xmm8\n\t"
#endif
/* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_rgba:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__)
"pcmpgtd %%xmm8, %%xmm1\n\t" // Compare average delta with the threshold
#else
"movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
"pavgb %%xmm5, %%xmm2\n\t" // Average pabove and pbelow
"pand %%xmm1, %%xmm2\n\t" // Filter out pixels in avg that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */
"cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_rgba\n\t" // Go for another iteration
/* Next row */
"add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */
"cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_rgba\n\t" // Go for another iteration
/* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__)
"pcmpgtd %%xmm8, %%xmm1\n\t" // Compare average delta with the threshold
#else
"movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm5, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
:
: "r" (col1), "r" (col2), "r" (max_ptr2), "r" (max_ptr), "r" (row_width), "m" (threshold_val), "m" (*movemask2)
#if defined(__x86_64__)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "cc", "memory"
#else
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* BGRA SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_deinterlace_4field_bgra(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {1,1,1,1,1,2,2,0,9,9,9,9,9,10,10,8};
const uint32_t threshold_val = threshold;
unsigned long row_width = width*4;
uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"movdqa %6, %%xmm3\n\t"
"mov %5, %%eax\n\t"
#if defined(__x86_64__)
"movd %%eax, %%xmm8\n\t"
"pshufd $0x0, %%xmm8, %%xmm8\n\t"
#endif
/* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_bgra:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__)
"pcmpgtd %%xmm8, %%xmm1\n\t" // Compare average delta with the threshold
#else
"movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
"pavgb %%xmm5, %%xmm2\n\t" // Average pabove and pbelow
"pand %%xmm1, %%xmm2\n\t" // Filter out pixels in avg that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */
"cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_bgra\n\t" // Go for another iteration
/* Next row */
"add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */
"cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_bgra\n\t" // Go for another iteration
/* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__)
"pcmpgtd %%xmm8, %%xmm1\n\t" // Compare average delta with the threshold
#else
"movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm5, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
:
: "r" (col1), "r" (col2), "r" (max_ptr2), "r" (max_ptr), "r" (row_width), "m" (threshold_val), "m" (*movemask2)
#if defined(__x86_64__)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "cc", "memory"
#else
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* ARGB SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_deinterlace_4field_argb(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {2,2,2,2,2,1,1,3,10,10,10,10,10,9,9,11};
const uint32_t threshold_val = threshold;
unsigned long row_width = width*4;
uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"movdqa %6, %%xmm3\n\t"
"mov %5, %%eax\n\t"
#if defined(__x86_64__)
"movd %%eax, %%xmm8\n\t"
"pshufd $0x0, %%xmm8, %%xmm8\n\t"
#endif
/* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_argb:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__)
"pcmpgtd %%xmm8, %%xmm1\n\t" // Compare average delta with the threshold
#else
"movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
"pavgb %%xmm5, %%xmm2\n\t" // Average pabove and pbelow
"pand %%xmm1, %%xmm2\n\t" // Filter out pixels in avg that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */
"cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_argb\n\t" // Go for another iteration
/* Next row */
"add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */
"cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_argb\n\t" // Go for another iteration
/* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__)
"pcmpgtd %%xmm8, %%xmm1\n\t" // Compare average delta with the threshold
#else
"movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm5, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
:
: "r" (col1), "r" (col2), "r" (max_ptr2), "r" (max_ptr), "r" (row_width), "m" (threshold_val), "m" (*movemask2)
#if defined(__x86_64__)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "cc", "memory"
#else
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}
/* ABGR SSSE3 */
#if defined(__i386__) || defined(__x86_64__)
__attribute__((noinline,__target__("ssse3")))
#endif
void ssse3_deinterlace_4field_abgr(uint8_t* col1, uint8_t* col2, unsigned int threshold, unsigned int width, unsigned int height) {
#if ((defined(__i386__) || defined(__x86_64__) || defined(ZM_KEEP_SSE)) && !defined(ZM_STRIP_SSE))
__attribute__((aligned(16))) static const uint8_t movemask2[16] = {2,2,2,2,2,3,3,1,10,10,10,10,10,11,11,9};
const uint32_t threshold_val = threshold;
unsigned long row_width = width*4;
uint8_t* max_ptr = col1 + (row_width * (height-2));
uint8_t* max_ptr2 = col1 + row_width;
__asm__ __volatile__ (
"mov $0x1F1F1F1F, %%eax\n\t"
"movd %%eax, %%xmm4\n\t"
"pshufd $0x0, %%xmm4, %%xmm4\n\t"
"movdqa %6, %%xmm3\n\t"
"mov %5, %%eax\n\t"
#if defined(__x86_64__)
"movd %%eax, %%xmm8\n\t"
"pshufd $0x0, %%xmm8, %%xmm8\n\t"
#endif
/* Zero the temporary register */
"pxor %%xmm0, %%xmm0\n\t"
"algo_ssse3_deinterlace_4field_abgr:\n\t"
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__)
"pcmpgtd %%xmm8, %%xmm1\n\t" // Compare average delta with the threshold
#else
"movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif
"movdqa (%0,%4), %%xmm2\n\t" // Load pbelow
"pavgb %%xmm5, %%xmm2\n\t" // Average pabove and pbelow
"pand %%xmm1, %%xmm2\n\t" // Filter out pixels in avg that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm2, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
"sub %4, %0\n\t" // Restore pcurrent to pabove
"sub %4, %1\n\t" // Restore pncurrent to pnabove
/* Next pixels */
"add $0x10, %0\n\t" // Add 16 to pcurrent
"add $0x10, %1\n\t" // Add 16 to pncurrent
/* Check if we reached the row end */
"cmp %2, %0\n\t"
"jb algo_ssse3_deinterlace_4field_abgr\n\t" // Go for another iteration
/* Next row */
"add %4, %0\n\t" // Add width to pcurrent
"add %4, %1\n\t" // Add width to pncurrent
"mov %0, %2\n\t"
"add %4, %2\n\t" // Add width to max_ptr2
/* Check if we reached the end */
"cmp %3, %0\n\t"
"jb algo_ssse3_deinterlace_4field_abgr\n\t" // Go for another iteration
/* Special case for the last line */
/* Load pabove into xmm1 and pnabove into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm5\n\t" /* Keep backup of pabove in xmm5 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"movdqa %%xmm1, %%xmm7\n\t" /* Backup of delta2 in xmm7 for now */
/* Next row */
"add %4, %0\n\t"
"add %4, %1\n\t"
/* Load pcurrent into xmm1 and pncurrent into xmm2 */
"movdqa (%0), %%xmm1\n\t"
"movdqa (%1), %%xmm2\n\t"
"movdqa %%xmm1, %%xmm6\n\t" /* Keep backup of pcurrent in xmm6 */
"psrlq $0x3, %%xmm1\n\t"
"psrlq $0x3, %%xmm2\n\t"
"pand %%xmm4, %%xmm1\n\t"
"pand %%xmm4, %%xmm2\n\t"
"psubb %%xmm2, %%xmm1\n\t"
"pabsb %%xmm1, %%xmm2\n\t"
"movdqa %%xmm2, %%xmm1\n\t"
"punpckldq %%xmm1, %%xmm1\n\t"
"pshufb %%xmm3, %%xmm1\n\t"
"psadbw %%xmm0, %%xmm1\n\t"
"punpckhdq %%xmm2, %%xmm2\n\t"
"pshufb %%xmm3, %%xmm2\n\t"
"psadbw %%xmm0, %%xmm2\n\t"
"packuswb %%xmm2, %%xmm1\n\t"
"pavgb %%xmm7, %%xmm1\n\t" // Average the two deltas together
#if defined(__x86_64__)
"pcmpgtd %%xmm8, %%xmm1\n\t" // Compare average delta with the threshold
#else
"movd %%eax, %%xmm7\n\t" // Setup the threshold
"pshufd $0x0, %%xmm7, %%xmm7\n\t"
"pcmpgtd %%xmm7, %%xmm1\n\t" // Compare average delta with the threshold
#endif
"pand %%xmm1, %%xmm5\n\t" // Filter out pixels in pabove that shouldn't be copied
"pandn %%xmm6, %%xmm1\n\t" // Filter out pixels in pcurrent that should be replaced
"por %%xmm5, %%xmm1\n\t" // Put the new values in pcurrent
"movntdq %%xmm1, (%0)\n\t" // Write pcurrent
:
: "r" (col1), "r" (col2), "r" (max_ptr2), "r" (max_ptr), "r" (row_width), "m" (threshold_val), "m" (*movemask2)
#if defined(__x86_64__)
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "cc", "memory"
#else
: "%eax", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"
#endif
);
#else
Panic("SSE function called on a non x86\\x86-64 platform");
#endif
}