Merge branch 'storageareas' of github.com:ConnorTechnology/ZoneMinder into storageareas
This commit is contained in:
commit
facbf07391
|
@ -0,0 +1,74 @@
|
||||||
|
# Contributor Covenant Code of Conduct
|
||||||
|
|
||||||
|
## Our Pledge
|
||||||
|
|
||||||
|
In the interest of fostering an open and welcoming environment, we as
|
||||||
|
contributors and maintainers pledge to making participation in our project and
|
||||||
|
our community a harassment-free experience for everyone, regardless of age, body
|
||||||
|
size, disability, ethnicity, gender identity and expression, level of experience,
|
||||||
|
nationality, personal appearance, race, religion, or sexual identity and
|
||||||
|
orientation.
|
||||||
|
|
||||||
|
## Our Standards
|
||||||
|
|
||||||
|
Examples of behavior that contributes to creating a positive environment
|
||||||
|
include:
|
||||||
|
|
||||||
|
* Using welcoming and inclusive language
|
||||||
|
* Being respectful of differing viewpoints and experiences
|
||||||
|
* Gracefully accepting constructive criticism
|
||||||
|
* Focusing on what is best for the community
|
||||||
|
* Showing empathy towards other community members
|
||||||
|
|
||||||
|
Examples of unacceptable behavior by participants include:
|
||||||
|
|
||||||
|
* The use of sexualized language or imagery and unwelcome sexual attention or
|
||||||
|
advances
|
||||||
|
* Trolling, insulting/derogatory comments, and personal or political attacks
|
||||||
|
* Public or private harassment
|
||||||
|
* Publishing others' private information, such as a physical or electronic
|
||||||
|
address, without explicit permission
|
||||||
|
* Other conduct which could reasonably be considered inappropriate in a
|
||||||
|
professional setting
|
||||||
|
|
||||||
|
## Our Responsibilities
|
||||||
|
|
||||||
|
Project maintainers are responsible for clarifying the standards of acceptable
|
||||||
|
behavior and are expected to take appropriate and fair corrective action in
|
||||||
|
response to any instances of unacceptable behavior.
|
||||||
|
|
||||||
|
Project maintainers have the right and responsibility to remove, edit, or
|
||||||
|
reject comments, commits, code, wiki edits, issues, and other contributions
|
||||||
|
that are not aligned to this Code of Conduct, or to ban temporarily or
|
||||||
|
permanently any contributor for other behaviors that they deem inappropriate,
|
||||||
|
threatening, offensive, or harmful.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
This Code of Conduct applies both within project spaces and in public spaces
|
||||||
|
when an individual is representing the project or its community. Examples of
|
||||||
|
representing a project or community include using an official project e-mail
|
||||||
|
address, posting via an official social media account, or acting as an appointed
|
||||||
|
representative at an online or offline event. Representation of a project may be
|
||||||
|
further defined and clarified by project maintainers.
|
||||||
|
|
||||||
|
## Enforcement
|
||||||
|
|
||||||
|
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||||
|
reported by contacting the project team at [INSERT EMAIL ADDRESS]. All
|
||||||
|
complaints will be reviewed and investigated and will result in a response that
|
||||||
|
is deemed necessary and appropriate to the circumstances. The project team is
|
||||||
|
obligated to maintain confidentiality with regard to the reporter of an incident.
|
||||||
|
Further details of specific enforcement policies may be posted separately.
|
||||||
|
|
||||||
|
Project maintainers who do not follow or enforce the Code of Conduct in good
|
||||||
|
faith may face temporary or permanent repercussions as determined by other
|
||||||
|
members of the project's leadership.
|
||||||
|
|
||||||
|
## Attribution
|
||||||
|
|
||||||
|
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
||||||
|
available at [http://contributor-covenant.org/version/1/4][version]
|
||||||
|
|
||||||
|
[homepage]: http://contributor-covenant.org
|
||||||
|
[version]: http://contributor-covenant.org/version/1/4/
|
287
src/zm_image.cpp
287
src/zm_image.cpp
|
@ -194,8 +194,15 @@ void Image::Initialise()
|
||||||
fptr_blend = &sse2_fastblend; /* SSE2 fast blend */
|
fptr_blend = &sse2_fastblend; /* SSE2 fast blend */
|
||||||
Debug(4,"Blend: Using SSE2 fast blend function");
|
Debug(4,"Blend: Using SSE2 fast blend function");
|
||||||
} else if(config.cpu_extensions && neonversion >= 1) {
|
} else if(config.cpu_extensions && neonversion >= 1) {
|
||||||
fptr_blend = &neon32_armv7_fastblend; /* ARM Neon fast blend */
|
#if defined(__aarch64__)
|
||||||
Debug(4,"Blend: Using ARM Neon fast blend function");
|
fptr_blend = &neon64_armv8_fastblend; /* ARM Neon (AArch64) fast blend */
|
||||||
|
Debug(4,"Blend: Using ARM Neon (AArch64) fast blend function");
|
||||||
|
#elif defined(__arm__)
|
||||||
|
fptr_blend = &neon32_armv7_fastblend; /* ARM Neon (AArch32) fast blend */
|
||||||
|
Debug(4,"Blend: Using ARM Neon (AArch32) fast blend function");
|
||||||
|
#else
|
||||||
|
Panic("Bug: Non ARM platform but neon present");
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
fptr_blend = &std_fastblend; /* standard fast blend */
|
fptr_blend = &std_fastblend; /* standard fast blend */
|
||||||
Debug(4,"Blend: Using fast blend function");
|
Debug(4,"Blend: Using fast blend function");
|
||||||
|
@ -258,12 +265,23 @@ void Image::Initialise()
|
||||||
Debug(4,"Delta: Using SSE2 delta functions");
|
Debug(4,"Delta: Using SSE2 delta functions");
|
||||||
} else if(neonversion >= 1) {
|
} else if(neonversion >= 1) {
|
||||||
/* ARM Neon available */
|
/* ARM Neon available */
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
fptr_delta8_rgba = &neon64_armv8_delta8_rgba;
|
||||||
|
fptr_delta8_bgra = &neon64_armv8_delta8_bgra;
|
||||||
|
fptr_delta8_argb = &neon64_armv8_delta8_argb;
|
||||||
|
fptr_delta8_abgr = &neon64_armv8_delta8_abgr;
|
||||||
|
fptr_delta8_gray8 = &neon64_armv8_delta8_gray8;
|
||||||
|
Debug(4,"Delta: Using ARM Neon (AArch64) delta functions");
|
||||||
|
#elif defined(__arm__)
|
||||||
fptr_delta8_rgba = &neon32_armv7_delta8_rgba;
|
fptr_delta8_rgba = &neon32_armv7_delta8_rgba;
|
||||||
fptr_delta8_bgra = &neon32_armv7_delta8_bgra;
|
fptr_delta8_bgra = &neon32_armv7_delta8_bgra;
|
||||||
fptr_delta8_argb = &neon32_armv7_delta8_argb;
|
fptr_delta8_argb = &neon32_armv7_delta8_argb;
|
||||||
fptr_delta8_abgr = &neon32_armv7_delta8_abgr;
|
fptr_delta8_abgr = &neon32_armv7_delta8_abgr;
|
||||||
fptr_delta8_gray8 = &neon32_armv7_delta8_gray8;
|
fptr_delta8_gray8 = &neon32_armv7_delta8_gray8;
|
||||||
Debug(4,"Delta: Using ARM Neon delta functions");
|
Debug(4,"Delta: Using ARM Neon (AArch32) delta functions");
|
||||||
|
#else
|
||||||
|
Panic("Bug: Non ARM platform but neon present");
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
/* No suitable SSE version available */
|
/* No suitable SSE version available */
|
||||||
fptr_delta8_rgba = &std_delta8_rgba;
|
fptr_delta8_rgba = &std_delta8_rgba;
|
||||||
|
@ -3403,31 +3421,13 @@ void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* r
|
||||||
/* Q12(D24,D25) = divider */
|
/* Q12(D24,D25) = divider */
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
<<<<<<< HEAD
|
|
||||||
"mov r12, %4\n\t"
|
|
||||||
"vdup.8 q3, r12\n\t"
|
|
||||||
"neon32_armv7_fastblend_iter:\n\t"
|
|
||||||
"vldm %0!, {q0}\n\t"
|
|
||||||
"vldm %1!, {q1}\n\t"
|
|
||||||
"vrshl.u8 q2, q0, q3\n\t"
|
|
||||||
"vrshl.u8 q1, q1, q3\n\t"
|
|
||||||
"vsub.i8 q1, q1, q2\n\t"
|
|
||||||
"vadd.i8 q1, q1, q0\n\t"
|
|
||||||
"vstm %2!, {q1}\n\t"
|
|
||||||
"subs %3, %3, #16\n\t"
|
|
||||||
"bne neon32_armv7_fastblend_iter\n\t"
|
|
||||||
:
|
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "g" (divider)
|
|
||||||
: "%r12", "%q0", "%q1", "%q2", "%q3", "cc", "memory"
|
|
||||||
);
|
|
||||||
=======
|
|
||||||
"mov r12, %4\n\t"
|
"mov r12, %4\n\t"
|
||||||
"vdup.8 q12, r12\n\t"
|
"vdup.8 q12, r12\n\t"
|
||||||
"neon32_armv7_fastblend_iter:\n\t"
|
"neon32_armv7_fastblend_iter:\n\t"
|
||||||
"pld [%0,#256]\n\t"
|
|
||||||
"pld [%1,#256]\n\t"
|
|
||||||
"vldm %0!, {q0,q1,q2,q3}\n\t"
|
"vldm %0!, {q0,q1,q2,q3}\n\t"
|
||||||
"vldm %1!, {q4,q5,q6,q7}\n\t"
|
"vldm %1!, {q4,q5,q6,q7}\n\t"
|
||||||
|
"pld [%0, #256]\n\t"
|
||||||
|
"pld [%1, #256]\n\t"
|
||||||
"vrshl.u8 q8, q0, q12\n\t"
|
"vrshl.u8 q8, q0, q12\n\t"
|
||||||
"vrshl.u8 q9, q1, q12\n\t"
|
"vrshl.u8 q9, q1, q12\n\t"
|
||||||
"vrshl.u8 q10, q2, q12\n\t"
|
"vrshl.u8 q10, q2, q12\n\t"
|
||||||
|
@ -3451,7 +3451,90 @@ void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* r
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (divider)
|
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (divider)
|
||||||
: "%r12", "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7", "%q8", "%q9", "%q10", "%q11", "%q12", "cc", "memory"
|
: "%r12", "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7", "%q8", "%q9", "%q10", "%q11", "%q12", "cc", "memory"
|
||||||
);
|
);
|
||||||
>>>>>>> master
|
#else
|
||||||
|
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((noinline)) void neon64_armv8_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent) {
|
||||||
|
#if (defined(__aarch64__) && !defined(ZM_STRIP_NEON))
|
||||||
|
static int8_t divider = 0;
|
||||||
|
static double current_blendpercent = 0.0;
|
||||||
|
|
||||||
|
if(current_blendpercent != blendpercent) {
|
||||||
|
/* Attempt to match the blending percent to one of the possible values */
|
||||||
|
if(blendpercent < 2.34375) {
|
||||||
|
// 1.5625% blending
|
||||||
|
divider = 6;
|
||||||
|
} else if(blendpercent >= 2.34375 && blendpercent < 4.6875) {
|
||||||
|
// 3.125% blending
|
||||||
|
divider = 5;
|
||||||
|
} else if(blendpercent >= 4.6875 && blendpercent < 9.375) {
|
||||||
|
// 6.25% blending
|
||||||
|
divider = 4;
|
||||||
|
} else if(blendpercent >= 9.375 && blendpercent < 18.75) {
|
||||||
|
// 12.5% blending
|
||||||
|
divider = 3;
|
||||||
|
} else if(blendpercent >= 18.75 && blendpercent < 37.5) {
|
||||||
|
// 25% blending
|
||||||
|
divider = 2;
|
||||||
|
} else if(blendpercent >= 37.5) {
|
||||||
|
// 50% blending
|
||||||
|
divider = 1;
|
||||||
|
}
|
||||||
|
// We only have instruction to shift left by a variable, going negative shifts right :)
|
||||||
|
divider *= -1;
|
||||||
|
current_blendpercent = blendpercent;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* V16 = col1+0 */
|
||||||
|
/* V17 = col1+16 */
|
||||||
|
/* V18 = col1+32 */
|
||||||
|
/* V19 = col1+48 */
|
||||||
|
/* V20 = col2+0 */
|
||||||
|
/* V21 = col2+16 */
|
||||||
|
/* V22 = col2+32 */
|
||||||
|
/* V23 = col2+48 */
|
||||||
|
/* V24 = col1tmp+0 */
|
||||||
|
/* V25 = col1tmp+16 */
|
||||||
|
/* V26 = col1tmp+32 */
|
||||||
|
/* V27 = col1tmp+48 */
|
||||||
|
/* V28 = divider */
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"mov x12, %4\n\t"
|
||||||
|
"dup v28.16b, w12\n\t"
|
||||||
|
"neon64_armv8_fastblend_iter:\n\t"
|
||||||
|
"ldp q16, q17, [%0], #32\n\t"
|
||||||
|
"ldp q18, q19, [%0], #32\n\t"
|
||||||
|
"ldp q20, q21, [%1], #32\n\t"
|
||||||
|
"ldp q22, q23, [%1], #32\n\t"
|
||||||
|
"prfm pldl1keep, [%0, #256]\n\t"
|
||||||
|
"prfm pldl1keep, [%1, #256]\n\t"
|
||||||
|
"urshl v24.16b, v16.16b, v28.16b\n\t"
|
||||||
|
"urshl v25.16b, v17.16b, v28.16b\n\t"
|
||||||
|
"urshl v26.16b, v18.16b, v28.16b\n\t"
|
||||||
|
"urshl v27.16b, v19.16b, v28.16b\n\t"
|
||||||
|
"urshl v20.16b, v20.16b, v28.16b\n\t"
|
||||||
|
"urshl v21.16b, v21.16b, v28.16b\n\t"
|
||||||
|
"urshl v22.16b, v22.16b, v28.16b\n\t"
|
||||||
|
"urshl v23.16b, v23.16b, v28.16b\n\t"
|
||||||
|
"sub v20.16b, v20.16b, v24.16b\n\t"
|
||||||
|
"sub v21.16b, v21.16b, v25.16b\n\t"
|
||||||
|
"sub v22.16b, v22.16b, v26.16b\n\t"
|
||||||
|
"sub v23.16b, v23.16b, v27.16b\n\t"
|
||||||
|
"add v20.16b, v20.16b, v16.16b\n\t"
|
||||||
|
"add v21.16b, v21.16b, v17.16b\n\t"
|
||||||
|
"add v22.16b, v22.16b, v18.16b\n\t"
|
||||||
|
"add v23.16b, v23.16b, v19.16b\n\t"
|
||||||
|
"stp q20, q21, [%2], #32\n\t"
|
||||||
|
"stp q22, q23, [%2], #32\n\t"
|
||||||
|
"subs %3, %3, #64\n\t"
|
||||||
|
"bne neon64_armv8_fastblend_iter\n\t"
|
||||||
|
:
|
||||||
|
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (divider)
|
||||||
|
: "%x12", "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23", "%v24", "%v25", "%v26", "%v27", "%v28", "cc", "memory"
|
||||||
|
);
|
||||||
#else
|
#else
|
||||||
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||||
#endif
|
#endif
|
||||||
|
@ -3696,24 +3779,11 @@ void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t
|
||||||
/* Q7(D14,D15) = col2+48 */
|
/* Q7(D14,D15) = col2+48 */
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
<<<<<<< HEAD
|
|
||||||
"neon32_armv7_delta8_gray8_iter:\n\t"
|
"neon32_armv7_delta8_gray8_iter:\n\t"
|
||||||
"vldm %0!, {q0}\n\t"
|
|
||||||
"vldm %1!, {q1}\n\t"
|
|
||||||
"vabd.u8 q0, q0, q1\n\t"
|
|
||||||
"vstm %2!, {q0}\n\t"
|
|
||||||
"subs %3, %3, #16\n\t"
|
|
||||||
"bne neon32_armv7_delta8_gray8_iter\n\t"
|
|
||||||
:
|
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count)
|
|
||||||
: "%q0", "%q1", "cc", "memory"
|
|
||||||
);
|
|
||||||
=======
|
|
||||||
"neon32_armv7_delta8_gray8_iter:\n\t"
|
|
||||||
"pld [%0,#256]\n\t"
|
|
||||||
"pld [%1,#256]\n\t"
|
|
||||||
"vldm %0!, {q0,q1,q2,q3}\n\t"
|
"vldm %0!, {q0,q1,q2,q3}\n\t"
|
||||||
"vldm %1!, {q4,q5,q6,q7}\n\t"
|
"vldm %1!, {q4,q5,q6,q7}\n\t"
|
||||||
|
"pld [%0, #512]\n\t"
|
||||||
|
"pld [%1, #512]\n\t"
|
||||||
"vabd.u8 q0, q0, q4\n\t"
|
"vabd.u8 q0, q0, q4\n\t"
|
||||||
"vabd.u8 q1, q1, q5\n\t"
|
"vabd.u8 q1, q1, q5\n\t"
|
||||||
"vabd.u8 q2, q2, q6\n\t"
|
"vabd.u8 q2, q2, q6\n\t"
|
||||||
|
@ -3725,7 +3795,44 @@ void neon32_armv7_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count)
|
: "r" (col1), "r" (col2), "r" (result), "r" (count)
|
||||||
: "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7", "cc", "memory"
|
: "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7", "cc", "memory"
|
||||||
);
|
);
|
||||||
>>>>>>> master
|
#else
|
||||||
|
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Grayscale Neon for AArch64 */
|
||||||
|
__attribute__((noinline)) void neon64_armv8_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
#if (defined(__aarch64__) && !defined(ZM_STRIP_NEON))
|
||||||
|
|
||||||
|
/* V16 = col1+0 */
|
||||||
|
/* V17 = col1+16 */
|
||||||
|
/* V18 = col1+32 */
|
||||||
|
/* V19 = col1+48 */
|
||||||
|
/* V20 = col2+0 */
|
||||||
|
/* V21 = col2+16 */
|
||||||
|
/* V22 = col2+32 */
|
||||||
|
/* V23 = col2+48 */
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"neon64_armv8_delta8_gray8_iter:\n\t"
|
||||||
|
"ldp q16, q17, [%0], #32\n\t"
|
||||||
|
"ldp q18, q19, [%0], #32\n\t"
|
||||||
|
"ldp q20, q21, [%1], #32\n\t"
|
||||||
|
"ldp q22, q23, [%1], #32\n\t"
|
||||||
|
"prfm pldl1keep, [%0, #512]\n\t"
|
||||||
|
"prfm pldl1keep, [%1, #512]\n\t"
|
||||||
|
"uabd v16.16b, v16.16b, v20.16b\n\t"
|
||||||
|
"uabd v17.16b, v17.16b, v21.16b\n\t"
|
||||||
|
"uabd v18.16b, v18.16b, v22.16b\n\t"
|
||||||
|
"uabd v19.16b, v19.16b, v23.16b\n\t"
|
||||||
|
"stp q16, q17, [%2], #32\n\t"
|
||||||
|
"stp q18, q19, [%2], #32\n\t"
|
||||||
|
"subs %3, %3, #64\n\t"
|
||||||
|
"bne neon64_armv8_delta8_gray8_iter\n\t"
|
||||||
|
:
|
||||||
|
: "r" (col1), "r" (col2), "r" (result), "r" (count)
|
||||||
|
: "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23", "cc", "memory"
|
||||||
|
);
|
||||||
#else
|
#else
|
||||||
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||||
#endif
|
#endif
|
||||||
|
@ -3749,33 +3856,13 @@ void neon32_armv7_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t
|
||||||
/* Q8(D16,D17) = multiplier */
|
/* Q8(D16,D17) = multiplier */
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
<<<<<<< HEAD
|
|
||||||
"mov r12, %4\n\t"
|
|
||||||
"vdup.32 q2, r12\n\t"
|
|
||||||
"neon32_armv7_delta8_rgb32_iter:\n\t"
|
|
||||||
"vldm %0!, {q0}\n\t"
|
|
||||||
"vldm %1!, {q1}\n\t"
|
|
||||||
"vabd.u8 q0, q0, q1\n\t"
|
|
||||||
"vrshr.u8 q0, q0, #3\n\t"
|
|
||||||
"vmul.i8 q0, q0, q2\n\t"
|
|
||||||
"vpadd.i8 d0, d0, d1\n\t"
|
|
||||||
"vpadd.i8 d2, d2, d3\n\t"
|
|
||||||
"vpadd.i8 d0, d0, d2\n\t"
|
|
||||||
"vst1.32 {d0[0]}, [%2]!\n\t"
|
|
||||||
"subs %3, %3, #4\n\t"
|
|
||||||
"bne neon32_armv7_delta8_rgb32_iter\n\t"
|
|
||||||
:
|
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (multiplier)
|
|
||||||
: "%r12", "%q0", "%q1", "%q2", "cc", "memory"
|
|
||||||
);
|
|
||||||
=======
|
|
||||||
"mov r12, %4\n\t"
|
"mov r12, %4\n\t"
|
||||||
"vdup.32 q8, r12\n\t"
|
"vdup.32 q8, r12\n\t"
|
||||||
"neon32_armv7_delta8_rgb32_iter:\n\t"
|
"neon32_armv7_delta8_rgb32_iter:\n\t"
|
||||||
"pld [%0,#256]\n\t"
|
|
||||||
"pld [%1,#256]\n\t"
|
|
||||||
"vldm %0!, {q0,q1,q2,q3}\n\t"
|
"vldm %0!, {q0,q1,q2,q3}\n\t"
|
||||||
"vldm %1!, {q4,q5,q6,q7}\n\t"
|
"vldm %1!, {q4,q5,q6,q7}\n\t"
|
||||||
|
"pld [%0, #256]\n\t"
|
||||||
|
"pld [%1, #256]\n\t"
|
||||||
"vabd.u8 q0, q0, q4\n\t"
|
"vabd.u8 q0, q0, q4\n\t"
|
||||||
"vabd.u8 q1, q1, q5\n\t"
|
"vabd.u8 q1, q1, q5\n\t"
|
||||||
"vabd.u8 q2, q2, q6\n\t"
|
"vabd.u8 q2, q2, q6\n\t"
|
||||||
|
@ -3803,8 +3890,62 @@ void neon32_armv7_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t
|
||||||
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (multiplier)
|
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (multiplier)
|
||||||
: "%r12", "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7", "%q8", "cc", "memory"
|
: "%r12", "%q0", "%q1", "%q2", "%q3", "%q4", "%q5", "%q6", "%q7", "%q8", "cc", "memory"
|
||||||
);
|
);
|
||||||
|
#else
|
||||||
|
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
>>>>>>> master
|
|
||||||
|
/* RGB32 Neon for AArch64 */
|
||||||
|
__attribute__((noinline)) void neon64_armv8_delta8_rgb32(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, uint32_t multiplier) {
|
||||||
|
#if (defined(__aarch64__) && !defined(ZM_STRIP_NEON))
|
||||||
|
|
||||||
|
/* V16 = col1+0 */
|
||||||
|
/* V17 = col1+16 */
|
||||||
|
/* V18 = col1+32 */
|
||||||
|
/* V19 = col1+48 */
|
||||||
|
/* V20 = col2+0 */
|
||||||
|
/* V21 = col2+16 */
|
||||||
|
/* V22 = col2+32 */
|
||||||
|
/* V23 = col2+48 */
|
||||||
|
/* V24 = multiplier */
|
||||||
|
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"mov x12, %4\n\t"
|
||||||
|
"dup v24.4s, w12\n\t"
|
||||||
|
"neon64_armv8_delta8_rgb32_iter:\n\t"
|
||||||
|
"ldp q16, q17, [%0], #32\n\t"
|
||||||
|
"ldp q18, q19, [%0], #32\n\t"
|
||||||
|
"ldp q20, q21, [%1], #32\n\t"
|
||||||
|
"ldp q22, q23, [%1], #32\n\t"
|
||||||
|
"prfm pldl1keep, [%0, #256]\n\t"
|
||||||
|
"prfm pldl1keep, [%1, #256]\n\t"
|
||||||
|
"uabd v16.16b, v16.16b, v20.16b\n\t"
|
||||||
|
"uabd v17.16b, v17.16b, v21.16b\n\t"
|
||||||
|
"uabd v18.16b, v18.16b, v22.16b\n\t"
|
||||||
|
"uabd v19.16b, v19.16b, v23.16b\n\t"
|
||||||
|
"urshr v16.16b, v16.16b, #3\n\t"
|
||||||
|
"urshr v17.16b, v17.16b, #3\n\t"
|
||||||
|
"urshr v18.16b, v18.16b, #3\n\t"
|
||||||
|
"urshr v19.16b, v19.16b, #3\n\t"
|
||||||
|
"mul v16.16b, v16.16b, v24.16b\n\t"
|
||||||
|
"mul v17.16b, v17.16b, v24.16b\n\t"
|
||||||
|
"mul v18.16b, v18.16b, v24.16b\n\t"
|
||||||
|
"mul v19.16b, v19.16b, v24.16b\n\t"
|
||||||
|
"addp v16.16b, v16.16b, v16.16b\n\t"
|
||||||
|
"addp v17.16b, v17.16b, v17.16b\n\t"
|
||||||
|
"addp v18.16b, v18.16b, v18.16b\n\t"
|
||||||
|
"addp v19.16b, v19.16b, v19.16b\n\t"
|
||||||
|
"addp v16.16b, v16.16b, v16.16b\n\t"
|
||||||
|
"addp v17.16b, v17.16b, v17.16b\n\t"
|
||||||
|
"addp v18.16b, v18.16b, v18.16b\n\t"
|
||||||
|
"addp v19.16b, v19.16b, v19.16b\n\t"
|
||||||
|
"st4 {v16.s, v17.s, v18.s, v19.s}[0], [%2], #16\n\t"
|
||||||
|
"subs %3, %3, #16\n\t"
|
||||||
|
"bne neon64_armv8_delta8_rgb32_iter\n\t"
|
||||||
|
:
|
||||||
|
: "r" (col1), "r" (col2), "r" (result), "r" (count), "r" (multiplier)
|
||||||
|
: "%x12", "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23", "%v24", "cc", "memory"
|
||||||
|
);
|
||||||
#else
|
#else
|
||||||
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
Panic("Neon function called on a non-ARM platform or Neon code is absent");
|
||||||
#endif
|
#endif
|
||||||
|
@ -3830,6 +3971,26 @@ void neon32_armv7_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t*
|
||||||
neon32_armv7_delta8_rgb32(col1, col2, result, count, 0x02050100);
|
neon32_armv7_delta8_rgb32(col1, col2, result, count, 0x02050100);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* RGB32: RGBA Neon for AArch64 */
|
||||||
|
void neon64_armv8_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
neon64_armv8_delta8_rgb32(col1, col2, result, count, 0x00010502);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RGB32: BGRA Neon for AArch64 */
|
||||||
|
void neon64_armv8_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
neon64_armv8_delta8_rgb32(col1, col2, result, count, 0x00020501);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RGB32: ARGB Neon for AArch64 */
|
||||||
|
void neon64_armv8_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
neon64_armv8_delta8_rgb32(col1, col2, result, count, 0x01050200);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RGB32: ABGR Neon for AArch64 */
|
||||||
|
void neon64_armv8_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count) {
|
||||||
|
neon64_armv8_delta8_rgb32(col1, col2, result, count, 0x02050100);
|
||||||
|
}
|
||||||
|
|
||||||
/* Grayscale SSE2 */
|
/* Grayscale SSE2 */
|
||||||
#if defined(__i386__) || defined(__x86_64__)
|
#if defined(__i386__) || defined(__x86_64__)
|
||||||
__attribute__((noinline,__target__("sse2")))
|
__attribute__((noinline,__target__("sse2")))
|
||||||
|
|
|
@ -265,6 +265,7 @@ public:
|
||||||
void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
void sse2_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
||||||
void std_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
void std_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
||||||
void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
void neon32_armv7_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
||||||
|
void neon64_armv8_fastblend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
||||||
void std_blend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
void std_blend(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count, double blendpercent);
|
||||||
|
|
||||||
/* Delta functions */
|
/* Delta functions */
|
||||||
|
@ -280,6 +281,11 @@ void neon32_armv7_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t*
|
||||||
void neon32_armv7_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void neon32_armv7_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void neon32_armv7_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void neon32_armv7_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void neon32_armv7_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void neon32_armv7_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void neon64_armv8_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void neon64_armv8_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void neon64_armv8_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void neon64_armv8_delta8_argb(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
void neon64_armv8_delta8_abgr(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void sse2_delta8_gray8(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void sse2_delta8_rgba(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
void sse2_delta8_bgra(const uint8_t* col1, const uint8_t* col2, uint8_t* result, unsigned long count);
|
||||||
|
|
|
@ -311,15 +311,20 @@ void hwcaps_detect() {
|
||||||
Debug(1,"Detected a x86\\x86-64 processor");
|
Debug(1,"Detected a x86\\x86-64 processor");
|
||||||
}
|
}
|
||||||
#elif defined(__arm__)
|
#elif defined(__arm__)
|
||||||
// ARM processor
|
// ARM processor in 32bit mode
|
||||||
// To see if it supports NEON, we need to get that information from the kernel
|
// To see if it supports NEON, we need to get that information from the kernel
|
||||||
unsigned long auxval = getauxval(AT_HWCAP);
|
unsigned long auxval = getauxval(AT_HWCAP);
|
||||||
if (auxval & HWCAP_ARM_NEON) {
|
if (auxval & HWCAP_ARM_NEON) {
|
||||||
Debug(1,"Detected ARM processor with Neon");
|
Debug(1,"Detected ARM (AArch32) processor with Neon");
|
||||||
neonversion = 1;
|
neonversion = 1;
|
||||||
} else {
|
} else {
|
||||||
Debug(1,"Detected ARM processor");
|
Debug(1,"Detected ARM (AArch32) processor");
|
||||||
}
|
}
|
||||||
|
#elif defined(__aarch64__)
|
||||||
|
// ARM processor in 64bit mode
|
||||||
|
// Neon is mandatory, no need to check for it
|
||||||
|
neonversion = 1;
|
||||||
|
Debug(1,"Detected ARM (AArch64) processor with Neon");
|
||||||
#else
|
#else
|
||||||
// Unknown processor
|
// Unknown processor
|
||||||
Debug(1,"Detected unknown processor architecture");
|
Debug(1,"Detected unknown processor architecture");
|
||||||
|
|
|
@ -54,5 +54,5 @@ var focusWindow = <?php echo !empty($focusWindow)?'true':'false' ?>;
|
||||||
var imagePrefix = "<?php echo viewImagePath( "", '&' ) ?>";
|
var imagePrefix = "<?php echo viewImagePath( "", '&' ) ?>";
|
||||||
|
|
||||||
<?php if ( ZM_OPT_USE_AUTH && ZM_AUTH_HASH_LOGINS ) { ?>
|
<?php if ( ZM_OPT_USE_AUTH && ZM_AUTH_HASH_LOGINS ) { ?>
|
||||||
var auth_hash = '<?php echo $_SESSION['AuthHash']; ?>';
|
var auth_hash = '<?php echo isset($_SESSION['AuthHash']) ? $_SESSION['AuthHash'] : ''; ?>';
|
||||||
<?php } ?>
|
<?php } ?>
|
||||||
|
|
Loading…
Reference in New Issue