Merge branch 'armv7_neon' of https://github.com/mastertheknife/ZoneMinder into storageareas

This commit is contained in:
Isaac Connor 2017-03-30 13:46:30 -04:00
commit f2dcd18297
3 changed files with 57 additions and 26 deletions

View File

@ -66,12 +66,23 @@ set(CMAKE_CXX_FLAGS_DEBUG "-Wall -D__STDC_CONSTANT_MACROS -g")
set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
IF(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
add_definitions (-mfpu=neon)
ELSE(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
ENDIF(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
# GCC below 6.0 doesn't support __target__("fpu=neon") attribute, required for compiling ARM Neon code, otherwise compilation fails.
# Must use -mfpu=neon compiler flag instead, but only do that for processors that support neon, otherwise strip the neon code alltogether,
# because passing -fmpu=neon is unsafe to processors that don't support neon
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
IF(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0)
EXEC_PROGRAM(grep ARGS " neon " "/proc/cpuinfo" OUTPUT_VARIABLE neonoutput RETURN_VALUE neonresult)
IF(neonresult EQUAL 0)
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfpu=neon")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -mfpu=neon")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon")
ELSE(neonresult EQUAL 0)
add_definitions(-DZM_STRIP_NEON=1)
message(STATUS "ARM Neon is not available on this processor. Neon functions will be absent")
ENDIF(neonresult EQUAL 0)
ENDIF(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0)
ENDIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
# Modules that we need:
include (GNUInstallDirs)

View File

@ -3291,11 +3291,11 @@ __attribute__((noinline)) void std_fastblend(const uint8_t* col1, const uint8_t*
}
/* FastBlend Neon for AArch32 */
#if defined(__arm__)
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
__attribute__((noinline,__target__("fpu=neon")))
#endif
void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
#if defined(__arm__)
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
static int8_t divider = 0;
static double current_blendpercent = 0.0;
@ -3348,7 +3348,7 @@ void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* r
: "%r12", "%q0", "%q1", "%q2", "%q3", "cc", "memory"
);
#else
Panic("Neon function called on a non ARM platform");
Panic("Neon function called on a non-ARM platform or Neon code is absent");
#endif
}
@ -3575,11 +3575,11 @@ __attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_
}
/* Grayscale Neon for AArch32 */
#if defined(__arm__)
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
__attribute__((noinline,__target__("fpu=neon")))
#endif
void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
#if defined(__arm__)
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
/* Q0(D0,D1) = col1 */
/* Q1(D2,D3) = col2 */
@ -3597,16 +3597,16 @@ void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t
: "%q0", "%q1", "cc", "memory"
);
#else
Panic("Neon function called on a non ARM platform");
Panic("Neon function called on a non-ARM platform or Neon code is absent");
#endif
}
/* RGB32 Neon for AArch32 */
#if defined(__arm__)
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
__attribute__((noinline,__target__("fpu=neon")))
#endif
void neon32_armv7_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, uint32_t multiplier) {
#if defined(__arm__)
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
/* Q0(D0,D1) = col1 */
/* Q1(D2,D3) = col2 */
@ -3632,7 +3632,7 @@ void neon32_armv7_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t
: "%r12", "%q0", "%q1", "%q2", "cc", "memory"
);
#else
Panic("Neon function called on a non ARM platform");
Panic("Neon function called on a non-ARM platform or Neon code is absent");
#endif
}

View File

@ -250,30 +250,50 @@ void hwcaps_detect() {
/* x86 or x86-64 processor */
uint32_t r_edx, r_ecx, r_ebx;
#ifdef __x86_64__
__asm__ __volatile__(
"push %%rbx\n\t"
"mov $0x0,%%ecx\n\t"
"mov $0x7,%%eax\n\t"
"cpuid\n\t"
#ifdef __x86_64__
"push %%rbx\n\t"
#else
"push %%ebx\n\t"
#endif
"mov $0x1,%%eax\n\t"
"cpuid\n\t"
#ifdef __x86_64__
"pop %%rax\n\t"
"pop %%rbx\n\t"
#else
"pop %%ebx\n\t"
#endif
: "=d" (r_edx), "=c" (r_ecx), "=b" (r_ebx)
: "=d" (r_edx), "=c" (r_ecx), "=a" (r_ebx)
:
:
: "%eax"
);
#else
__asm__ __volatile__(
"push %%ebx\n\t"
"mov $0x0,%%ecx\n\t"
"mov $0x7,%%eax\n\t"
"cpuid\n\t"
"push %%ebx\n\t"
"mov $0x1,%%eax\n\t"
"cpuid\n\t"
"pop %%eax\n\t"
"pop %%ebx\n\t"
: "=d" (r_edx), "=c" (r_ecx), "=a" (r_ebx)
:
:
);
#endif
if (r_ebx & 0x00000020) {
sseversion = 52; /* AVX2 */
Debug(1,"Detected a x86\\x86-64 processor with AVX2");
} else if (r_ecx & 0x10000000) {
sseversion = 51; /* AVX */
Debug(1,"Detected a x86\\x86-64 processor with AVX");
} else if (r_ecx & 0x00100000) {
sseversion = 42; /* SSE4.2 */
Debug(1,"Detected a x86\\x86-64 processor with SSE4.2");
} else if (r_ecx & 0x00080000) {
sseversion = 41; /* SSE4.1 */
Debug(1,"Detected a x86\\x86-64 processor with SSE4.1");
} else if (r_ecx & 0x00000200) {
sseversion = 35; /* SSSE3 */
Debug(1,"Detected a x86\\x86-64 processor with SSSE3");