Merge branch 'armv7_neon' of https://github.com/mastertheknife/ZoneMinder into storageareas
This commit is contained in:
commit
f2dcd18297
|
@ -66,12 +66,23 @@ set(CMAKE_CXX_FLAGS_DEBUG "-Wall -D__STDC_CONSTANT_MACROS -g")
|
|||
set(CMAKE_INCLUDE_CURRENT_DIR ON)
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
|
||||
|
||||
IF(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
|
||||
add_definitions (-mfpu=neon)
|
||||
ELSE(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
|
||||
|
||||
ENDIF(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
|
||||
|
||||
# GCC below 6.0 doesn't support __target__("fpu=neon") attribute, required for compiling ARM Neon code, otherwise compilation fails.
|
||||
# Must use -mfpu=neon compiler flag instead, but only do that for processors that support neon, otherwise strip the neon code alltogether,
|
||||
# because passing -fmpu=neon is unsafe to processors that don't support neon
|
||||
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
IF(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0)
|
||||
EXEC_PROGRAM(grep ARGS " neon " "/proc/cpuinfo" OUTPUT_VARIABLE neonoutput RETURN_VALUE neonresult)
|
||||
IF(neonresult EQUAL 0)
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -mfpu=neon")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -mfpu=neon")
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -mfpu=neon")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -mfpu=neon")
|
||||
ELSE(neonresult EQUAL 0)
|
||||
add_definitions(-DZM_STRIP_NEON=1)
|
||||
message(STATUS "ARM Neon is not available on this processor. Neon functions will be absent")
|
||||
ENDIF(neonresult EQUAL 0)
|
||||
ENDIF(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0)
|
||||
ENDIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm" AND CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
|
||||
# Modules that we need:
|
||||
include (GNUInstallDirs)
|
||||
|
|
|
@ -3291,11 +3291,11 @@ __attribute__((noinline)) void std_fastblend(const uint8_t* col1, const uint8_t*
|
|||
}
|
||||
|
||||
/* FastBlend Neon for AArch32 */
|
||||
#if defined(__arm__)
|
||||
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
|
||||
__attribute__((noinline,__target__("fpu=neon")))
|
||||
#endif
|
||||
void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
|
||||
#if defined(__arm__)
|
||||
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
|
||||
static int8_t divider = 0;
|
||||
static double current_blendpercent = 0.0;
|
||||
|
||||
|
@ -3348,7 +3348,7 @@ void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* r
|
|||
: "%r12", "%q0", "%q1", "%q2", "%q3", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
Panic("Neon function called on a non ARM platform");
|
||||
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -3575,11 +3575,11 @@ __attribute__((noinline)) void std_delta8_abgr(const uint8_t* col1, const uint8_
|
|||
}
|
||||
|
||||
/* Grayscale Neon for AArch32 */
|
||||
#if defined(__arm__)
|
||||
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
|
||||
__attribute__((noinline,__target__("fpu=neon")))
|
||||
#endif
|
||||
void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||
#if defined(__arm__)
|
||||
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
|
||||
|
||||
/* Q0(D0,D1) = col1 */
|
||||
/* Q1(D2,D3) = col2 */
|
||||
|
@ -3597,16 +3597,16 @@ void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t
|
|||
: "%q0", "%q1", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
Panic("Neon function called on a non ARM platform");
|
||||
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||
#endif
|
||||
}
|
||||
|
||||
/* RGB32 Neon for AArch32 */
|
||||
#if defined(__arm__)
|
||||
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
|
||||
__attribute__((noinline,__target__("fpu=neon")))
|
||||
#endif
|
||||
void neon32_armv7_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, uint32_t multiplier) {
|
||||
#if defined(__arm__)
|
||||
#if (defined(__arm__) && !defined(ZM_STRIP_NEON))
|
||||
|
||||
/* Q0(D0,D1) = col1 */
|
||||
/* Q1(D2,D3) = col2 */
|
||||
|
@ -3632,7 +3632,7 @@ void neon32_armv7_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t
|
|||
: "%r12", "%q0", "%q1", "%q2", "cc", "memory"
|
||||
);
|
||||
#else
|
||||
Panic("Neon function called on a non ARM platform");
|
||||
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -250,30 +250,50 @@ void hwcaps_detect() {
|
|||
/* x86 or x86-64 processor */
|
||||
uint32_t r_edx, r_ecx, r_ebx;
|
||||
|
||||
#ifdef __x86_64__
|
||||
__asm__ __volatile__(
|
||||
"push %%rbx\n\t"
|
||||
"mov $0x0,%%ecx\n\t"
|
||||
"mov $0x7,%%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
#ifdef __x86_64__
|
||||
"push %%rbx\n\t"
|
||||
#else
|
||||
"push %%ebx\n\t"
|
||||
#endif
|
||||
"mov $0x1,%%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
#ifdef __x86_64__
|
||||
"pop %%rax\n\t"
|
||||
"pop %%rbx\n\t"
|
||||
#else
|
||||
"pop %%ebx\n\t"
|
||||
#endif
|
||||
: "=d" (r_edx), "=c" (r_ecx), "=b" (r_ebx)
|
||||
: "=d" (r_edx), "=c" (r_ecx), "=a" (r_ebx)
|
||||
:
|
||||
:
|
||||
: "%eax"
|
||||
);
|
||||
|
||||
#else
|
||||
__asm__ __volatile__(
|
||||
"push %%ebx\n\t"
|
||||
"mov $0x0,%%ecx\n\t"
|
||||
"mov $0x7,%%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"push %%ebx\n\t"
|
||||
"mov $0x1,%%eax\n\t"
|
||||
"cpuid\n\t"
|
||||
"pop %%eax\n\t"
|
||||
"pop %%ebx\n\t"
|
||||
: "=d" (r_edx), "=c" (r_ecx), "=a" (r_ebx)
|
||||
:
|
||||
:
|
||||
);
|
||||
#endif
|
||||
|
||||
if (r_ebx & 0x00000020) {
|
||||
sseversion = 52; /* AVX2 */
|
||||
Debug(1,"Detected a x86\\x86-64 processor with AVX2");
|
||||
} else if (r_ecx & 0x10000000) {
|
||||
sseversion = 51; /* AVX */
|
||||
Debug(1,"Detected a x86\\x86-64 processor with AVX");
|
||||
} else if (r_ecx & 0x00100000) {
|
||||
sseversion = 42; /* SSE4.2 */
|
||||
Debug(1,"Detected a x86\\x86-64 processor with SSE4.2");
|
||||
} else if (r_ecx & 0x00080000) {
|
||||
sseversion = 41; /* SSE4.1 */
|
||||
Debug(1,"Detected a x86\\x86-64 processor with SSE4.1");
|
||||
} else if (r_ecx & 0x00000200) {
|
||||
sseversion = 35; /* SSSE3 */
|
||||
Debug(1,"Detected a x86\\x86-64 processor with SSSE3");
|
||||
|
|
Loading…
Reference in New Issue