From e036dbc8eb7d5fdd02dc70faff20e9ac3e2989ca Mon Sep 17 00:00:00 2001
From: Jakub Stachowski <qbast@go2.pl>
Date: Sat, 11 Aug 2012 16:31:02 +0200
Subject: Use __builtin_clz instead of default implementation of fixnormz on
 GCC

The library contains architecture specific versions of the clz functions
for arm and mips, but the fallback C version for other architectures is
pretty slow.

By using __builtin_clz on GCC (available since GCC 3.4), we get a
significant (20-40 %) speedup of the total running time on x86.

Speed difference: 5.1 s instead of 8.6 s for AAC-LC 128 kbit/s and
3.8 s instead of 4.9 s for HE-AACv2 32 kbit/s, on an i7.
---
 libFDK/include/clz.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/libFDK/include/clz.h b/libFDK/include/clz.h
index 90cdb2b..4f7d240 100644
--- a/libFDK/include/clz.h
+++ b/libFDK/include/clz.h
@@ -115,6 +115,11 @@ inline INT fixnormz_S (SHORT a)
 {
   return fixnormz_D((INT)(a));
 }
+#elif defined(__GNUC__)
+inline INT fixnormz_S (SHORT a)
+{
+    return a ? __builtin_clz(a) : 16;
+}
 #else
 inline INT fixnormz_S (SHORT a)
 {
@@ -131,6 +136,12 @@ inline INT fixnormz_S (SHORT a)
 #endif
 
 #if !defined(FUNCTION_fixnormz_D)
+#if defined(__GNUC__)
+inline INT fixnormz_D (LONG a)
+{
+    return a ? __builtin_clz(a) : 32;
+}
+#else
 inline INT fixnormz_D (LONG a)
 {
     INT leadingBits = 0;
@@ -143,6 +154,7 @@ inline INT fixnormz_D (LONG a)
     return (leadingBits);
 }
 #endif
+#endif
 
 
 /*****************************************************************************
-- 
cgit v1.2.3