summaryrefslogtreecommitdiffstats
path: root/libFDK/include/clz.h
diff options
context:
space:
mode:
authorJakub Stachowski <qbast@go2.pl>2012-08-11 16:31:02 +0200
committerMartin Storsjo <martin@martin.st>2012-08-14 23:48:55 +0300
commite036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca (patch)
treed6d9f180841062d409dcf8bb3fe8b0e3027730c0 /libFDK/include/clz.h
parent6999980d671f70fdae19c8b4663efa5189f9ca6c (diff)
downloadfdk-aac-e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca.tar.gz
fdk-aac-e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca.tar.bz2
fdk-aac-e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca.zip
Use __builtin_clz instead of default implementation of fixnormz on GCC
The library contains architecture specific versions of the clz functions for arm and mips, but the fallback C version for other architectures is pretty slow. By using __builtin_clz on GCC (available since GCC 3.4), we get a significant (20-40 %) speedup of the total running time on x86. Speed difference: 5.1 s instead of 8.6 s for AAC-LC 128 kbit/s and 3.8 s instead of 4.9 s for HE-AACv2 32 kbit/s, on an i7.
Diffstat (limited to 'libFDK/include/clz.h')
-rw-r--r--libFDK/include/clz.h12
1 files changed, 12 insertions, 0 deletions
diff --git a/libFDK/include/clz.h b/libFDK/include/clz.h
index 90cdb2b..4f7d240 100644
--- a/libFDK/include/clz.h
+++ b/libFDK/include/clz.h
@@ -115,6 +115,11 @@ inline INT fixnormz_S (SHORT a)
{
return fixnormz_D((INT)(a));
}
+#elif defined(__GNUC__)
+inline INT fixnormz_S (SHORT a)
+{
+ return a ? __builtin_clz(a) : 16;
+}
#else
inline INT fixnormz_S (SHORT a)
{
@@ -131,6 +136,12 @@ inline INT fixnormz_S (SHORT a)
#endif
#if !defined(FUNCTION_fixnormz_D)
+#if defined(__GNUC__)
+inline INT fixnormz_D (LONG a)
+{
+ return a ? __builtin_clz(a) : 32;
+}
+#else
inline INT fixnormz_D (LONG a)
{
INT leadingBits = 0;
@@ -143,6 +154,7 @@ inline INT fixnormz_D (LONG a)
return (leadingBits);
}
#endif
+#endif
/*****************************************************************************