aboutsummaryrefslogtreecommitdiffstats
path: root/libFDK/include/clz.h
diff options
context:
space:
mode:
authorLexyan <lexyan@lexyan.fr>2016-09-03 15:38:08 +0200
committerMartin Storsjo <martin@martin.st>2016-09-10 14:33:39 +0300
commit1d686c3a23f3ae286ef964ab62199be96e4ad1dc (patch)
tree34cda9b76dedb7b366ee82f563853a3ca7299ec4 /libFDK/include/clz.h
parenta0bd8aa3b6339082fbe9d830264839fa50c0a4b7 (diff)
downloadfdk-aac-1d686c3a23f3ae286ef964ab62199be96e4ad1dc.tar.gz
fdk-aac-1d686c3a23f3ae286ef964ab62199be96e4ad1dc.tar.bz2
fdk-aac-1d686c3a23f3ae286ef964ab62199be96e4ad1dc.zip
Add aarch64 assembly optimization (ARMv8a 64 bits)
The fixmuldiv functions don't need inline assembly to be fast in this architecture; the compiler (both clang and GCC) figure out to use the optimal instructions for this (which is 2 instruction sequence), and when letting the compiler emit the instructions instead of using inline assembly, the compiler is able to interleave those instructions with other instructions, improving scheduling, making it even faster than when using inline assembly. Overall, this gives about 50% speedup.
Diffstat (limited to 'libFDK/include/clz.h')
-rw-r--r--libFDK/include/clz.h3
1 files changed, 3 insertions, 0 deletions
diff --git a/libFDK/include/clz.h b/libFDK/include/clz.h
index 1e79ec8..3a3ead5 100644
--- a/libFDK/include/clz.h
+++ b/libFDK/include/clz.h
@@ -97,6 +97,9 @@ amm-info@iis.fraunhofer.de
#if defined(__arm__)
#include "arm/clz_arm.h"
+#elif defined(__aarch64__) || defined(__AARCH64EL__)
+#include "aarch64/clz_aarch64.h"
+
#elif defined(__mips__) /* cppp replaced: elif */
#include "mips/clz_mips.h"