Fix imagesize requirements (#2404)
* introduce non-loop-unrolled version of function and use them when the image size is not a multiple of 12 or 16 * Remove tests for imagesize being a multiple of 16 or 12 to handle functions with unrolled loops * Use non-unrolled functions when image size is not a multiple of 12 or 16 * use std_blend when image is odd size
This commit is contained in:
parent
2b90bf15a6
commit
443fd35d99
|
@ -52,14 +52,8 @@ Camera::Camera(
|
|||
imagesize = pixels * colours;
|
||||
|
||||
Debug(2,"New camera id: %d width: %d height: %d colours: %d subpixelorder: %d capture: %d",
|
||||
monitor_id,width,height,colours,subpixelorder,capture);
|
||||
monitor_id, width, height, colours, subpixelorder, capture);
|
||||
|
||||
/* Because many loops are unrolled and work on 16 colours/time or 4 pixels/time, we have to meet requirements */
|
||||
if ( (colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB32) && (imagesize % 64) != 0 ) {
|
||||
Fatal("Image size is not multiples of 64");
|
||||
} else if ( colours == ZM_COLOUR_RGB24 && ((imagesize % 64) != 0 || (imagesize % 12) != 0) ) {
|
||||
Fatal("Image size is not multiples of 12 and 64");
|
||||
}
|
||||
monitor = NULL;
|
||||
}
|
||||
|
||||
|
|
427
src/zm_image.cpp
427
src/zm_image.cpp
|
@ -75,6 +75,32 @@ static deinterlace_4field_fptr_t fptr_deinterlace_4field_gray8;
|
|||
/* Pointer to image buffer memory copy function */
|
||||
imgbufcpy_fptr_t fptr_imgbufcpy;
|
||||
|
||||
void Image::update_function_pointers() {
|
||||
/* Because many loops are unrolled and work on 16 colours/time or 4 pixels/time, we have to meet requirements */
|
||||
if ( pixels % 16 || pixels % 12 ) {
|
||||
// have to use non-loop unrolled functions
|
||||
delta8_rgb = &std_delta8_rgb;
|
||||
delta8_bgr = &std_delta8_bgr;
|
||||
delta8_rgba = &std_delta8_rgba;
|
||||
delta8_bgra = &std_delta8_bgra;
|
||||
delta8_argb = &std_delta8_argb;
|
||||
delta8_abgr = &std_delta8_abgr;
|
||||
delta8_gray8 = &std_delta8_gray8;
|
||||
blend = &std_blend;
|
||||
} else {
|
||||
// Use either sse or neon, or loop unrolled version
|
||||
delta8_rgb = fptr_delta8_rgb;
|
||||
delta8_bgr = fptr_delta8_bgr;
|
||||
delta8_rgba = fptr_delta8_rgba;
|
||||
delta8_bgra = fptr_delta8_bgra;
|
||||
delta8_argb = fptr_delta8_argb;
|
||||
delta8_abgr = fptr_delta8_abgr;
|
||||
delta8_gray8 = fptr_delta8_gray8;
|
||||
blend = fptr_blend;
|
||||
}
|
||||
}
|
||||
|
||||
// This constructor is not used anywhere
|
||||
Image::Image() {
|
||||
if ( !initialised )
|
||||
Initialise();
|
||||
|
@ -89,6 +115,7 @@ Image::Image() {
|
|||
buffertype = 0;
|
||||
holdbuffer = 0;
|
||||
text[0] = '\0';
|
||||
blend = fptr_blend;
|
||||
}
|
||||
|
||||
Image::Image( const char *filename ) {
|
||||
|
@ -104,8 +131,9 @@ Image::Image( const char *filename ) {
|
|||
buffer = 0;
|
||||
buffertype = 0;
|
||||
holdbuffer = 0;
|
||||
ReadJpeg( filename, ZM_COLOUR_RGB24, ZM_SUBPIX_ORDER_RGB);
|
||||
ReadJpeg(filename, ZM_COLOUR_RGB24, ZM_SUBPIX_ORDER_RGB);
|
||||
text[0] = '\0';
|
||||
update_function_pointers();
|
||||
}
|
||||
|
||||
Image::Image( int p_width, int p_height, int p_colours, int p_subpixelorder, uint8_t *p_buffer ) {
|
||||
|
@ -127,6 +155,8 @@ Image::Image( int p_width, int p_height, int p_colours, int p_subpixelorder, uin
|
|||
AllocImgBuffer(size);
|
||||
}
|
||||
text[0] = '\0';
|
||||
|
||||
update_function_pointers();
|
||||
}
|
||||
|
||||
Image::Image( const AVFrame *frame ) {
|
||||
|
@ -168,6 +198,7 @@ Image::Image( const AVFrame *frame ) {
|
|||
Fatal("You must compile ffmpeg with the --enable-swscale option to use ffmpeg cameras");
|
||||
#endif // HAVE_LIBSWSCALE
|
||||
av_frame_free( &dest_frame );
|
||||
update_function_pointers();
|
||||
}
|
||||
|
||||
Image::Image( const Image &p_image ) {
|
||||
|
@ -184,6 +215,7 @@ Image::Image( const Image &p_image ) {
|
|||
AllocImgBuffer(size);
|
||||
(*fptr_imgbufcpy)(buffer, p_image.buffer, size);
|
||||
strncpy( text, p_image.text, sizeof(text) );
|
||||
update_function_pointers();
|
||||
}
|
||||
|
||||
Image::~Image() {
|
||||
|
@ -319,20 +351,20 @@ void Image::Initialise() {
|
|||
#endif
|
||||
} else {
|
||||
/* No suitable SSE version available */
|
||||
fptr_delta8_rgba = &std_delta8_rgba;
|
||||
fptr_delta8_bgra = &std_delta8_bgra;
|
||||
fptr_delta8_argb = &std_delta8_argb;
|
||||
fptr_delta8_abgr = &std_delta8_abgr;
|
||||
fptr_delta8_gray8 = &std_delta8_gray8;
|
||||
fptr_delta8_rgba = &fast_delta8_rgba;
|
||||
fptr_delta8_bgra = &fast_delta8_bgra;
|
||||
fptr_delta8_argb = &fast_delta8_argb;
|
||||
fptr_delta8_abgr = &fast_delta8_abgr;
|
||||
fptr_delta8_gray8 = &fast_delta8_gray8;
|
||||
Debug(4,"Delta: Using standard delta functions");
|
||||
}
|
||||
} else {
|
||||
/* CPU extensions disabled */
|
||||
fptr_delta8_rgba = &std_delta8_rgba;
|
||||
fptr_delta8_bgra = &std_delta8_bgra;
|
||||
fptr_delta8_argb = &std_delta8_argb;
|
||||
fptr_delta8_abgr = &std_delta8_abgr;
|
||||
fptr_delta8_gray8 = &std_delta8_gray8;
|
||||
fptr_delta8_rgba = &fast_delta8_rgba;
|
||||
fptr_delta8_bgra = &fast_delta8_bgra;
|
||||
fptr_delta8_argb = &fast_delta8_argb;
|
||||
fptr_delta8_abgr = &fast_delta8_abgr;
|
||||
fptr_delta8_gray8 = &fast_delta8_gray8;
|
||||
Debug(4,"Delta: CPU extensions disabled, using standard delta functions");
|
||||
}
|
||||
|
||||
|
@ -1596,7 +1628,7 @@ void Image::Blend( const Image &image, int transparency ) {
|
|||
#endif
|
||||
|
||||
/* Do the blending */
|
||||
(*fptr_blend)(buffer, image.buffer, new_buffer, size, transparency);
|
||||
(*blend)(buffer, image.buffer, new_buffer, size, transparency);
|
||||
|
||||
#ifdef ZM_IMAGE_PROFILING
|
||||
clock_gettime(CLOCK_THREAD_CPUTIME_ID,&end);
|
||||
|
@ -1623,7 +1655,7 @@ Image *Image::Merge( unsigned int n_images, Image *images[] ) {
|
|||
}
|
||||
}
|
||||
|
||||
Image *result = new Image( width, height, images[0]->colours, images[0]->subpixelorder);
|
||||
Image *result = new Image(width, height, images[0]->colours, images[0]->subpixelorder);
|
||||
unsigned int size = result->size;
|
||||
for ( unsigned int i = 0; i < size; i++ ) {
|
||||
unsigned int total = 0;
|
||||
|
@ -1729,37 +1761,33 @@ void Image::Delta( const Image &image, Image* targetimage) const {
|
|||
clock_gettime(CLOCK_THREAD_CPUTIME_ID,&start);
|
||||
#endif
|
||||
|
||||
switch(colours) {
|
||||
switch (colours) {
|
||||
case ZM_COLOUR_RGB24:
|
||||
{
|
||||
if(subpixelorder == ZM_SUBPIX_ORDER_BGR) {
|
||||
/* BGR subpixel order */
|
||||
(*fptr_delta8_bgr)(buffer, image.buffer, pdiff, pixels);
|
||||
} else {
|
||||
/* Assume RGB subpixel order */
|
||||
(*fptr_delta8_rgb)(buffer, image.buffer, pdiff, pixels);
|
||||
}
|
||||
break;
|
||||
if ( subpixelorder == ZM_SUBPIX_ORDER_BGR ) {
|
||||
/* BGR subpixel order */
|
||||
(*delta8_bgr)(buffer, image.buffer, pdiff, pixels);
|
||||
} else {
|
||||
/* Assume RGB subpixel order */
|
||||
(*delta8_rgb)(buffer, image.buffer, pdiff, pixels);
|
||||
}
|
||||
break;
|
||||
case ZM_COLOUR_RGB32:
|
||||
{
|
||||
if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) {
|
||||
/* ARGB subpixel order */
|
||||
(*fptr_delta8_argb)(buffer, image.buffer, pdiff, pixels);
|
||||
} else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
|
||||
/* ABGR subpixel order */
|
||||
(*fptr_delta8_abgr)(buffer, image.buffer, pdiff, pixels);
|
||||
} else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
|
||||
/* BGRA subpixel order */
|
||||
(*fptr_delta8_bgra)(buffer, image.buffer, pdiff, pixels);
|
||||
} else {
|
||||
/* Assume RGBA subpixel order */
|
||||
(*fptr_delta8_rgba)(buffer, image.buffer, pdiff, pixels);
|
||||
}
|
||||
break;
|
||||
if ( subpixelorder == ZM_SUBPIX_ORDER_ARGB ) {
|
||||
/* ARGB subpixel order */
|
||||
(*delta8_argb)(buffer, image.buffer, pdiff, pixels);
|
||||
} else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
|
||||
/* ABGR subpixel order */
|
||||
(*delta8_abgr)(buffer, image.buffer, pdiff, pixels);
|
||||
} else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
|
||||
/* BGRA subpixel order */
|
||||
(*delta8_bgra)(buffer, image.buffer, pdiff, pixels);
|
||||
} else {
|
||||
/* Assume RGBA subpixel order */
|
||||
(*delta8_rgba)(buffer, image.buffer, pdiff, pixels);
|
||||
}
|
||||
break;
|
||||
case ZM_COLOUR_GRAY8:
|
||||
(*fptr_delta8_gray8)(buffer, image.buffer, pdiff, pixels);
|
||||
(*delta8_gray8)(buffer, image.buffer, pdiff, pixels);
|
||||
break;
|
||||
default:
|
||||
Panic("Delta called with unexpected colours: %d",colours);
|
||||
|
@ -1772,7 +1800,7 @@ void Image::Delta( const Image &image, Image* targetimage) const {
|
|||
|
||||
executetime = (1000000000ull * diff.tv_sec) + diff.tv_nsec;
|
||||
milpixels = (unsigned long)((long double)pixels)/((((long double)executetime)/1000));
|
||||
Debug(5, "Delta: %u delta pixels generated in %llu nanoseconds, %lu million pixels/s\n",pixels,executetime,milpixels);
|
||||
Debug(5, "Delta: %u delta pixels generated in %llu nanoseconds, %lu million pixels/s",pixels,executetime,milpixels);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -2061,9 +2089,9 @@ void Image::DeColourise() {
|
|||
subpixelorder = ZM_SUBPIX_ORDER_NONE;
|
||||
size = width * height;
|
||||
|
||||
if(colours == ZM_COLOUR_RGB32 && config.cpu_extensions && sseversion >= 35) {
|
||||
if ( colours == ZM_COLOUR_RGB32 && config.cpu_extensions && sseversion >= 35 ) {
|
||||
/* Use SSSE3 functions */
|
||||
switch(subpixelorder) {
|
||||
switch (subpixelorder) {
|
||||
case ZM_SUBPIX_ORDER_BGRA:
|
||||
ssse3_convert_bgra_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
|
@ -2081,40 +2109,70 @@ void Image::DeColourise() {
|
|||
} else {
|
||||
/* Use standard functions */
|
||||
if ( colours == ZM_COLOUR_RGB32 ) {
|
||||
switch(subpixelorder) {
|
||||
case ZM_SUBPIX_ORDER_BGRA:
|
||||
std_convert_bgra_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_ARGB:
|
||||
std_convert_argb_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_ABGR:
|
||||
std_convert_abgr_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_RGBA:
|
||||
default:
|
||||
std_convert_rgba_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
}
|
||||
if ( pixels % 16 ) {
|
||||
switch (subpixelorder) {
|
||||
case ZM_SUBPIX_ORDER_BGRA:
|
||||
std_convert_bgra_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_ARGB:
|
||||
std_convert_argb_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_ABGR:
|
||||
std_convert_abgr_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_RGBA:
|
||||
default:
|
||||
std_convert_rgba_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (subpixelorder) {
|
||||
case ZM_SUBPIX_ORDER_BGRA:
|
||||
fast_convert_bgra_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_ARGB:
|
||||
fast_convert_argb_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_ABGR:
|
||||
fast_convert_abgr_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_RGBA:
|
||||
default:
|
||||
fast_convert_rgba_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
}
|
||||
} // end if pixels % 16 to use loop unrolled functions
|
||||
} else {
|
||||
/* Assume RGB24 */
|
||||
switch(subpixelorder) {
|
||||
case ZM_SUBPIX_ORDER_BGR:
|
||||
std_convert_bgr_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_RGB:
|
||||
default:
|
||||
std_convert_rgb_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( pixels % 12 ) {
|
||||
switch (subpixelorder) {
|
||||
case ZM_SUBPIX_ORDER_BGR:
|
||||
std_convert_bgr_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_RGB:
|
||||
default:
|
||||
std_convert_rgb_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (subpixelorder) {
|
||||
case ZM_SUBPIX_ORDER_BGR:
|
||||
fast_convert_bgr_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
case ZM_SUBPIX_ORDER_RGB:
|
||||
default:
|
||||
fast_convert_rgb_gray8(buffer,buffer,pixels);
|
||||
break;
|
||||
}
|
||||
} // end if pixels % 12 to use loop unrolled functions
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* RGB32 compatible: complete */
|
||||
void Image::Fill( Rgb colour, const Box *limits ) {
|
||||
if ( !(colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB24 || colours == ZM_COLOUR_RGB32 ) ) {
|
||||
Panic( "Attempt to fill image with unexpected colours %d", colours );
|
||||
Panic("Attempt to fill image with unexpected colours %d", colours);
|
||||
}
|
||||
|
||||
/* Convert the colour's RGBA subpixel order into the image's subpixel order */
|
||||
|
@ -3271,7 +3329,7 @@ __attribute__((noinline)) void std_blend(const uint8_t* col1, const uint8_t* col
|
|||
/************************************************* DELTA FUNCTIONS *************************************************/
|
||||
|
||||
/* Grayscale */
|
||||
__attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 16 bytes (16 grayscale pixels) at a time */
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
|
@ -3299,8 +3357,20 @@ __attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8
|
|||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
result[0] = abs(col1[0] - col2[0]);
|
||||
|
||||
col1 += 1;
|
||||
col2 += 1;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RGB24: RGB */
|
||||
__attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
@ -3329,8 +3399,25 @@ __attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t
|
|||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while (result < max_ptr) {
|
||||
r = abs(col1[0] - col2[0]);
|
||||
g = abs(col1[1] - col2[1]);
|
||||
b = abs(col1[2] - col2[2]);
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 3;
|
||||
col2 += 3;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RGB24: BGR */
|
||||
__attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
@ -3359,8 +3446,25 @@ __attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t
|
|||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
b = abs(col1[0] - col2[0]);
|
||||
g = abs(col1[1] - col2[1]);
|
||||
r = abs(col1[2] - col2[2]);
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 3;
|
||||
col2 += 3;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RGB32: RGBA */
|
||||
__attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
@ -3389,8 +3493,25 @@ __attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_
|
|||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
r = abs(col1[0] - col2[0]);
|
||||
g = abs(col1[1] - col2[1]);
|
||||
b = abs(col1[2] - col2[2]);
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 4;
|
||||
col2 += 4;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RGB32: BGRA */
|
||||
__attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
@ -3418,9 +3539,25 @@ __attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_
|
|||
result += 4;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
b = abs(col1[0] - col2[0]);
|
||||
g = abs(col1[1] - col2[1]);
|
||||
r = abs(col1[2] - col2[2]);
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 4;
|
||||
col2 += 4;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RGB32: ARGB */
|
||||
__attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
@ -3448,9 +3585,25 @@ __attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_
|
|||
result += 4;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
r = abs(col1[1] - col2[1]);
|
||||
g = abs(col1[2] - col2[2]);
|
||||
b = abs(col1[3] - col2[3]);
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 4;
|
||||
col2 += 4;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RGB32: ABGR */
|
||||
__attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
@ -3478,6 +3631,21 @@ __attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_
|
|||
result += 4;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
b = abs(col1[1] - col2[1]);
|
||||
g = abs(col1[2] - col2[2]);
|
||||
r = abs(col1[3] - col2[3]);
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 4;
|
||||
col2 += 4;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Grayscale Neon for AArch32 */
|
||||
#if (defined(__arm__) && defined(__ARM_PCS_VFP) && !defined(ZM_STRIP_NEON))
|
||||
|
@ -4046,7 +4214,7 @@ void ssse3_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result
|
|||
/************************************************* CONVERT FUNCTIONS *************************************************/
|
||||
|
||||
/* RGB24 to grayscale */
|
||||
__attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
|
@ -4072,9 +4240,23 @@ __attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_
|
|||
result += 4;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
r = col1[0];
|
||||
g = col1[1];
|
||||
b = col1[2];
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 3;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* BGR24 to grayscale */
|
||||
__attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
|
@ -4100,9 +4282,23 @@ __attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_
|
|||
result += 4;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
b = col1[0];
|
||||
g = col1[1];
|
||||
r = col1[2];
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 3;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RGBA to grayscale */
|
||||
__attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
|
@ -4128,9 +4324,23 @@ __attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8
|
|||
result += 4;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
r = col1[0];
|
||||
g = col1[1];
|
||||
b = col1[2];
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 4;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* BGRA to grayscale */
|
||||
__attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
|
@ -4157,8 +4367,22 @@ __attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8
|
|||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
b = col1[0];
|
||||
g = col1[1];
|
||||
r = col1[2];
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 4;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
/* ARGB to grayscale */
|
||||
__attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
|
@ -4184,9 +4408,23 @@ __attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8
|
|||
result += 4;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
r = col1[1];
|
||||
g = col1[2];
|
||||
b = col1[3];
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 4;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ABGR to grayscale */
|
||||
__attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
|
@ -4212,9 +4450,23 @@ __attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8
|
|||
result += 4;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
unsigned int r,g,b;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
b = col1[1];
|
||||
g = col1[2];
|
||||
r = col1[3];
|
||||
result[0] = (r + r + b + g + g + g + g + g)>>3;
|
||||
|
||||
col1 += 4;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Converts a YUYV image into grayscale by extracting the Y channel */
|
||||
__attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
__attribute__((noinline)) void fast_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
const uint16_t* yuvbuf = (const uint16_t*)col1;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
|
@ -4240,6 +4492,17 @@ __attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8
|
|||
result += 16;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
|
||||
const uint16_t* yuvbuf = (const uint16_t*)col1;
|
||||
const uint8_t* const max_ptr = result + count;
|
||||
|
||||
while(result < max_ptr) {
|
||||
result[0] = (uint8_t)yuvbuf[0];
|
||||
|
||||
yuvbuf += 1;
|
||||
result += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* RGB32 to grayscale SSSE3 */
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
|
|
|
@ -86,6 +86,19 @@ inline static void DumpBuffer(uint8_t* buffer, int buffertype) {
|
|||
// camera in raw form.
|
||||
//
|
||||
class Image {
|
||||
private:
|
||||
delta_fptr_t delta8_rgb;
|
||||
delta_fptr_t delta8_bgr;
|
||||
delta_fptr_t delta8_rgba;
|
||||
delta_fptr_t delta8_bgra;
|
||||
delta_fptr_t delta8_argb;
|
||||
delta_fptr_t delta8_abgr;
|
||||
delta_fptr_t delta8_gray8;
|
||||
|
||||
// Per object function pointer that we can set once we know the image dimensions
|
||||
blend_fptr_t blend;
|
||||
|
||||
void update_function_pointers();
|
||||
protected:
|
||||
|
||||
struct Edge {
|
||||
|
@ -275,6 +288,14 @@ void std_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result,
|
|||
void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
|
||||
void fast_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void fast_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void fast_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void fast_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void fast_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void fast_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void fast_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
|
||||
void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void neon32_armv7_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
void neon32_armv7_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||
|
@ -303,6 +324,15 @@ void std_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long
|
|||
void std_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void std_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void std_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
|
||||
void fast_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void fast_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void fast_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void fast_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void fast_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void fast_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void fast_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
|
||||
void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void ssse3_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
void ssse3_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
|
||||
|
|
Loading…
Reference in New Issue