diff options
author | Jakub Stachowski <qbast@go2.pl> | 2012-08-11 16:31:02 +0200 |
---|---|---|
committer | Martin Storsjo <martin@martin.st> | 2012-08-14 23:48:55 +0300 |
commit | e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca (patch) | |
tree | d6d9f180841062d409dcf8bb3fe8b0e3027730c0 /libFDK/include | |
parent | 6999980d671f70fdae19c8b4663efa5189f9ca6c (diff) | |
download | fdk-aac-e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca.tar.gz fdk-aac-e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca.tar.bz2 fdk-aac-e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca.zip |
Use __builtin_clz instead of default implementation of fixnormz on GCC
The library contains architecture specific versions of the clz functions
for arm and mips, but the fallback C version for other architectures is
pretty slow.
By using __builtin_clz on GCC (available since GCC 3.4), we get a
significant (20-40 %) speedup of the total running time on x86.
Speed difference: 5.1 s instead of 8.6 s for AAC-LC 128 kbit/s and
3.8 s instead of 4.9 s for HE-AACv2 32 kbit/s, on an i7.
Diffstat (limited to 'libFDK/include')
-rw-r--r-- | libFDK/include/clz.h | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/libFDK/include/clz.h b/libFDK/include/clz.h index 90cdb2b..4f7d240 100644 --- a/libFDK/include/clz.h +++ b/libFDK/include/clz.h @@ -115,6 +115,11 @@ inline INT fixnormz_S (SHORT a) { return fixnormz_D((INT)(a)); } +#elif defined(__GNUC__) +inline INT fixnormz_S (SHORT a) +{ + return a ? __builtin_clz(a) : 16; +} #else inline INT fixnormz_S (SHORT a) { @@ -131,6 +136,12 @@ inline INT fixnormz_S (SHORT a) #endif #if !defined(FUNCTION_fixnormz_D) +#if defined(__GNUC__) +inline INT fixnormz_D (LONG a) +{ + return a ? __builtin_clz(a) : 32; +} +#else inline INT fixnormz_D (LONG a) { INT leadingBits = 0; @@ -143,6 +154,7 @@ inline INT fixnormz_D (LONG a) return (leadingBits); } #endif +#endif /***************************************************************************** |