Fix imagesize requirements (#2404)

* introduce non-loop-unrolled version of function and use them when the image size is not a multiple of 12 or 16

* Remove tests for imagesize being a multiple of 16 or 12 to handle functions with unrolled loops

* Use non-unrolled functions when image size is not a multiple of 12 or 16

* use std_blend when image is odd size
This commit is contained in:
Isaac Connor 2019-02-22 09:44:57 -05:00 committed by GitHub
parent 2b90bf15a6
commit 443fd35d99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 376 additions and 89 deletions

View File

@ -52,14 +52,8 @@ Camera::Camera(
imagesize = pixels * colours;
Debug(2,"New camera id: %d width: %d height: %d colours: %d subpixelorder: %d capture: %d",
monitor_id,width,height,colours,subpixelorder,capture);
monitor_id, width, height, colours, subpixelorder, capture);
/* Because many loops are unrolled and work on 16 colours/time or 4 pixels/time, we have to meet requirements */
if ( (colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB32) && (imagesize % 64) != 0 ) {
Fatal("Image size is not multiples of 64");
} else if ( colours == ZM_COLOUR_RGB24 && ((imagesize % 64) != 0 || (imagesize % 12) != 0) ) {
Fatal("Image size is not multiples of 12 and 64");
}
monitor = NULL;
}

View File

@ -75,6 +75,32 @@ static deinterlace_4field_fptr_t fptr_deinterlace_4field_gray8;
/* Pointer to image buffer memory copy function */
imgbufcpy_fptr_t fptr_imgbufcpy;
void Image::update_function_pointers() {
/* Because many loops are unrolled and work on 16 colours/time or 4 pixels/time, we have to meet requirements */
if ( pixels % 16 || pixels % 12 ) {
// have to use non-loop unrolled functions
delta8_rgb = &std_delta8_rgb;
delta8_bgr = &std_delta8_bgr;
delta8_rgba = &std_delta8_rgba;
delta8_bgra = &std_delta8_bgra;
delta8_argb = &std_delta8_argb;
delta8_abgr = &std_delta8_abgr;
delta8_gray8 = &std_delta8_gray8;
blend = &std_blend;
} else {
// Use either sse or neon, or loop unrolled version
delta8_rgb = fptr_delta8_rgb;
delta8_bgr = fptr_delta8_bgr;
delta8_rgba = fptr_delta8_rgba;
delta8_bgra = fptr_delta8_bgra;
delta8_argb = fptr_delta8_argb;
delta8_abgr = fptr_delta8_abgr;
delta8_gray8 = fptr_delta8_gray8;
blend = fptr_blend;
}
}
// This constructor is not used anywhere
Image::Image() {
if ( !initialised )
Initialise();
@ -89,6 +115,7 @@ Image::Image() {
buffertype = 0;
holdbuffer = 0;
text[0] = '\0';
blend = fptr_blend;
}
Image::Image( const char *filename ) {
@ -104,8 +131,9 @@ Image::Image( const char *filename ) {
buffer = 0;
buffertype = 0;
holdbuffer = 0;
ReadJpeg( filename, ZM_COLOUR_RGB24, ZM_SUBPIX_ORDER_RGB);
ReadJpeg(filename, ZM_COLOUR_RGB24, ZM_SUBPIX_ORDER_RGB);
text[0] = '\0';
update_function_pointers();
}
Image::Image( int p_width, int p_height, int p_colours, int p_subpixelorder, uint8_t *p_buffer ) {
@ -127,6 +155,8 @@ Image::Image( int p_width, int p_height, int p_colours, int p_subpixelorder, uin
AllocImgBuffer(size);
}
text[0] = '\0';
update_function_pointers();
}
Image::Image( const AVFrame *frame ) {
@ -168,6 +198,7 @@ Image::Image( const AVFrame *frame ) {
Fatal("You must compile ffmpeg with the --enable-swscale option to use ffmpeg cameras");
#endif // HAVE_LIBSWSCALE
av_frame_free( &dest_frame );
update_function_pointers();
}
Image::Image( const Image &p_image ) {
@ -184,6 +215,7 @@ Image::Image( const Image &p_image ) {
AllocImgBuffer(size);
(*fptr_imgbufcpy)(buffer, p_image.buffer, size);
strncpy( text, p_image.text, sizeof(text) );
update_function_pointers();
}
Image::~Image() {
@ -319,20 +351,20 @@ void Image::Initialise() {
#endif
} else {
/* No suitable SSE version available */
fptr_delta8_rgba = &std_delta8_rgba;
fptr_delta8_bgra = &std_delta8_bgra;
fptr_delta8_argb = &std_delta8_argb;
fptr_delta8_abgr = &std_delta8_abgr;
fptr_delta8_gray8 = &std_delta8_gray8;
fptr_delta8_rgba = &fast_delta8_rgba;
fptr_delta8_bgra = &fast_delta8_bgra;
fptr_delta8_argb = &fast_delta8_argb;
fptr_delta8_abgr = &fast_delta8_abgr;
fptr_delta8_gray8 = &fast_delta8_gray8;
Debug(4,"Delta: Using standard delta functions");
}
} else {
/* CPU extensions disabled */
fptr_delta8_rgba = &std_delta8_rgba;
fptr_delta8_bgra = &std_delta8_bgra;
fptr_delta8_argb = &std_delta8_argb;
fptr_delta8_abgr = &std_delta8_abgr;
fptr_delta8_gray8 = &std_delta8_gray8;
fptr_delta8_rgba = &fast_delta8_rgba;
fptr_delta8_bgra = &fast_delta8_bgra;
fptr_delta8_argb = &fast_delta8_argb;
fptr_delta8_abgr = &fast_delta8_abgr;
fptr_delta8_gray8 = &fast_delta8_gray8;
Debug(4,"Delta: CPU extensions disabled, using standard delta functions");
}
@ -1596,7 +1628,7 @@ void Image::Blend( const Image &image, int transparency ) {
#endif
/* Do the blending */
(*fptr_blend)(buffer, image.buffer, new_buffer, size, transparency);
(*blend)(buffer, image.buffer, new_buffer, size, transparency);
#ifdef ZM_IMAGE_PROFILING
clock_gettime(CLOCK_THREAD_CPUTIME_ID,&end);
@ -1623,7 +1655,7 @@ Image *Image::Merge( unsigned int n_images, Image *images[] ) {
}
}
Image *result = new Image( width, height, images[0]->colours, images[0]->subpixelorder);
Image *result = new Image(width, height, images[0]->colours, images[0]->subpixelorder);
unsigned int size = result->size;
for ( unsigned int i = 0; i < size; i++ ) {
unsigned int total = 0;
@ -1729,37 +1761,33 @@ void Image::Delta( const Image &image, Image* targetimage) const {
clock_gettime(CLOCK_THREAD_CPUTIME_ID,&start);
#endif
switch(colours) {
switch (colours) {
case ZM_COLOUR_RGB24:
{
if(subpixelorder == ZM_SUBPIX_ORDER_BGR) {
/* BGR subpixel order */
(*fptr_delta8_bgr)(buffer, image.buffer, pdiff, pixels);
} else {
/* Assume RGB subpixel order */
(*fptr_delta8_rgb)(buffer, image.buffer, pdiff, pixels);
}
break;
if ( subpixelorder == ZM_SUBPIX_ORDER_BGR ) {
/* BGR subpixel order */
(*delta8_bgr)(buffer, image.buffer, pdiff, pixels);
} else {
/* Assume RGB subpixel order */
(*delta8_rgb)(buffer, image.buffer, pdiff, pixels);
}
break;
case ZM_COLOUR_RGB32:
{
if(subpixelorder == ZM_SUBPIX_ORDER_ARGB) {
/* ARGB subpixel order */
(*fptr_delta8_argb)(buffer, image.buffer, pdiff, pixels);
} else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
/* ABGR subpixel order */
(*fptr_delta8_abgr)(buffer, image.buffer, pdiff, pixels);
} else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
/* BGRA subpixel order */
(*fptr_delta8_bgra)(buffer, image.buffer, pdiff, pixels);
} else {
/* Assume RGBA subpixel order */
(*fptr_delta8_rgba)(buffer, image.buffer, pdiff, pixels);
}
break;
if ( subpixelorder == ZM_SUBPIX_ORDER_ARGB ) {
/* ARGB subpixel order */
(*delta8_argb)(buffer, image.buffer, pdiff, pixels);
} else if(subpixelorder == ZM_SUBPIX_ORDER_ABGR) {
/* ABGR subpixel order */
(*delta8_abgr)(buffer, image.buffer, pdiff, pixels);
} else if(subpixelorder == ZM_SUBPIX_ORDER_BGRA) {
/* BGRA subpixel order */
(*delta8_bgra)(buffer, image.buffer, pdiff, pixels);
} else {
/* Assume RGBA subpixel order */
(*delta8_rgba)(buffer, image.buffer, pdiff, pixels);
}
break;
case ZM_COLOUR_GRAY8:
(*fptr_delta8_gray8)(buffer, image.buffer, pdiff, pixels);
(*delta8_gray8)(buffer, image.buffer, pdiff, pixels);
break;
default:
Panic("Delta called with unexpected colours: %d",colours);
@ -1772,7 +1800,7 @@ void Image::Delta( const Image &image, Image* targetimage) const {
executetime = (1000000000ull * diff.tv_sec) + diff.tv_nsec;
milpixels = (unsigned long)((long double)pixels)/((((long double)executetime)/1000));
Debug(5, "Delta: %u delta pixels generated in %llu nanoseconds, %lu million pixels/s\n",pixels,executetime,milpixels);
Debug(5, "Delta: %u delta pixels generated in %llu nanoseconds, %lu million pixels/s",pixels,executetime,milpixels);
#endif
}
@ -2061,9 +2089,9 @@ void Image::DeColourise() {
subpixelorder = ZM_SUBPIX_ORDER_NONE;
size = width * height;
if(colours == ZM_COLOUR_RGB32 && config.cpu_extensions && sseversion >= 35) {
if ( colours == ZM_COLOUR_RGB32 && config.cpu_extensions && sseversion >= 35 ) {
/* Use SSSE3 functions */
switch(subpixelorder) {
switch (subpixelorder) {
case ZM_SUBPIX_ORDER_BGRA:
ssse3_convert_bgra_gray8(buffer,buffer,pixels);
break;
@ -2081,40 +2109,70 @@ void Image::DeColourise() {
} else {
/* Use standard functions */
if ( colours == ZM_COLOUR_RGB32 ) {
switch(subpixelorder) {
case ZM_SUBPIX_ORDER_BGRA:
std_convert_bgra_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_ARGB:
std_convert_argb_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_ABGR:
std_convert_abgr_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_RGBA:
default:
std_convert_rgba_gray8(buffer,buffer,pixels);
break;
}
if ( pixels % 16 ) {
switch (subpixelorder) {
case ZM_SUBPIX_ORDER_BGRA:
std_convert_bgra_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_ARGB:
std_convert_argb_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_ABGR:
std_convert_abgr_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_RGBA:
default:
std_convert_rgba_gray8(buffer,buffer,pixels);
break;
}
} else {
switch (subpixelorder) {
case ZM_SUBPIX_ORDER_BGRA:
fast_convert_bgra_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_ARGB:
fast_convert_argb_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_ABGR:
fast_convert_abgr_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_RGBA:
default:
fast_convert_rgba_gray8(buffer,buffer,pixels);
break;
}
} // end if pixels % 16 to use loop unrolled functions
} else {
/* Assume RGB24 */
switch(subpixelorder) {
case ZM_SUBPIX_ORDER_BGR:
std_convert_bgr_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_RGB:
default:
std_convert_rgb_gray8(buffer,buffer,pixels);
break;
}
}
if ( pixels % 12 ) {
switch (subpixelorder) {
case ZM_SUBPIX_ORDER_BGR:
std_convert_bgr_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_RGB:
default:
std_convert_rgb_gray8(buffer,buffer,pixels);
break;
}
} else {
switch (subpixelorder) {
case ZM_SUBPIX_ORDER_BGR:
fast_convert_bgr_gray8(buffer,buffer,pixels);
break;
case ZM_SUBPIX_ORDER_RGB:
default:
fast_convert_rgb_gray8(buffer,buffer,pixels);
break;
}
} // end if pixels % 12 to use loop unrolled functions
}
}
}
/* RGB32 compatible: complete */
void Image::Fill( Rgb colour, const Box *limits ) {
if ( !(colours == ZM_COLOUR_GRAY8 || colours == ZM_COLOUR_RGB24 || colours == ZM_COLOUR_RGB32 ) ) {
Panic( "Attempt to fill image with unexpected colours %d", colours );
Panic("Attempt to fill image with unexpected colours %d", colours);
}
/* Convert the colour's RGBA subpixel order into the image's subpixel order */
@ -3271,7 +3329,7 @@ __attribute__((noinline)) void std_blend(const uint8_t* col1, const uint8_t* col
/************************************************* DELTA FUNCTIONS *************************************************/
/* Grayscale */
__attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (16 grayscale pixels) at a time */
const uint8_t* const max_ptr = result + count;
@ -3299,8 +3357,20 @@ __attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8
}
}
__attribute__((noinline)) void std_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
result[0] = abs(col1[0] - col2[0]);
col1 += 1;
col2 += 1;
result += 1;
}
}
/* RGB24: RGB */
__attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -3329,8 +3399,25 @@ __attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t
}
}
__attribute__((noinline)) void std_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while (result < max_ptr) {
r = abs(col1[0] - col2[0]);
g = abs(col1[1] - col2[1]);
b = abs(col1[2] - col2[2]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 3;
col2 += 3;
result += 1;
}
}
/* RGB24: BGR */
__attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -3359,8 +3446,25 @@ __attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t
}
}
__attribute__((noinline)) void std_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 12 bytes (4 rgb24 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = abs(col1[0] - col2[0]);
g = abs(col1[1] - col2[1]);
r = abs(col1[2] - col2[2]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 3;
col2 += 3;
result += 1;
}
}
/* RGB32: RGBA */
__attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -3389,8 +3493,25 @@ __attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_
}
}
__attribute__((noinline)) void std_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = abs(col1[0] - col2[0]);
g = abs(col1[1] - col2[1]);
b = abs(col1[2] - col2[2]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 4;
col2 += 4;
result += 1;
}
}
/* RGB32: BGRA */
__attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -3418,9 +3539,25 @@ __attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_
result += 4;
}
}
__attribute__((noinline)) void std_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = abs(col1[0] - col2[0]);
g = abs(col1[1] - col2[1]);
r = abs(col1[2] - col2[2]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 4;
col2 += 4;
result += 1;
}
}
/* RGB32: ARGB */
__attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -3448,9 +3585,25 @@ __attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_
result += 4;
}
}
__attribute__((noinline)) void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = abs(col1[1] - col2[1]);
g = abs(col1[2] - col2[2]);
b = abs(col1[3] - col2[3]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 4;
col2 += 4;
result += 1;
}
}
/* RGB32: ABGR */
__attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
/* Loop unrolling is used to work on 16 bytes (4 rgb32 pixels) at a time */
int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -3478,6 +3631,21 @@ __attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_
result += 4;
}
}
__attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = abs(col1[1] - col2[1]);
g = abs(col1[2] - col2[2]);
r = abs(col1[3] - col2[3]);
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 4;
col2 += 4;
result += 1;
}
}
/* Grayscale Neon for AArch32 */
#if (defined(__arm__) && defined(__ARM_PCS_VFP) && !defined(ZM_STRIP_NEON))
@ -4046,7 +4214,7 @@ void ssse3_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result
/************************************************* CONVERT FUNCTIONS *************************************************/
/* RGB24 to grayscale */
__attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -4072,9 +4240,23 @@ __attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_
result += 4;
}
}
__attribute__((noinline)) void std_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = col1[0];
g = col1[1];
b = col1[2];
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 3;
result += 1;
}
}
/* BGR24 to grayscale */
__attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -4100,9 +4282,23 @@ __attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_
result += 4;
}
}
__attribute__((noinline)) void std_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = col1[0];
g = col1[1];
r = col1[2];
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 3;
result += 1;
}
}
/* RGBA to grayscale */
__attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -4128,9 +4324,23 @@ __attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8
result += 4;
}
}
__attribute__((noinline)) void std_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = col1[0];
g = col1[1];
b = col1[2];
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 4;
result += 1;
}
}
/* BGRA to grayscale */
__attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -4157,8 +4367,22 @@ __attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8
}
}
__attribute__((noinline)) void std_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = col1[0];
g = col1[1];
r = col1[2];
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 4;
result += 1;
}
}
/* ARGB to grayscale */
__attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -4184,9 +4408,23 @@ __attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8
result += 4;
}
}
__attribute__((noinline)) void std_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
r = col1[1];
g = col1[2];
b = col1[3];
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 4;
result += 1;
}
}
/* ABGR to grayscale */
__attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
@ -4212,9 +4450,23 @@ __attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8
result += 4;
}
}
__attribute__((noinline)) void std_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
unsigned int r,g,b;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
b = col1[1];
g = col1[2];
r = col1[3];
result[0] = (r + r + b + g + g + g + g + g)>>3;
col1 += 4;
result += 1;
}
}
/* Converts a YUYV image into grayscale by extracting the Y channel */
__attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
__attribute__((noinline)) void fast_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
const uint16_t* yuvbuf = (const uint16_t*)col1;
const uint8_t* const max_ptr = result + count;
@ -4240,6 +4492,17 @@ __attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8
result += 16;
}
}
__attribute__((noinline)) void std_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count) {
const uint16_t* yuvbuf = (const uint16_t*)col1;
const uint8_t* const max_ptr = result + count;
while(result < max_ptr) {
result[0] = (uint8_t)yuvbuf[0];
yuvbuf += 1;
result += 1;
}
}
/* RGB32 to grayscale SSSE3 */
#if defined(__i386__) || defined(__x86_64__)

View File

@ -86,6 +86,19 @@ inline static void DumpBuffer(uint8_t* buffer, int buffertype) {
// camera in raw form.
//
class Image {
private:
delta_fptr_t delta8_rgb;
delta_fptr_t delta8_bgr;
delta_fptr_t delta8_rgba;
delta_fptr_t delta8_bgra;
delta_fptr_t delta8_argb;
delta_fptr_t delta8_abgr;
delta_fptr_t delta8_gray8;
// Per object function pointer that we can set once we know the image dimensions
blend_fptr_t blend;
void update_function_pointers();
protected:
struct Edge {
@ -275,6 +288,14 @@ void std_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result,
void std_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void std_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void fast_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void fast_delta8_rgb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void fast_delta8_bgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void fast_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void fast_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void fast_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void fast_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void neon32_armv7_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
void neon32_armv7_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
@ -303,6 +324,15 @@ void std_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long
void std_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void std_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void std_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void fast_convert_rgb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void fast_convert_bgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void fast_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void fast_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void fast_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void fast_convert_abgr_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void fast_convert_yuyv_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void ssse3_convert_rgba_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void ssse3_convert_bgra_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);
void ssse3_convert_argb_gray8(const uint8_t* col1, uint8_t* result, unsigned long count);