diff options
Diffstat (limited to 'fixpow.c')
-rw-r--r-- | fixpow.c | 169 |
1 files changed, 93 insertions, 76 deletions
@@ -11,23 +11,27 @@ | |||
11 | #include <stdlib.h> | 11 | #include <stdlib.h> |
12 | #include <math.h> | 12 | #include <math.h> |
13 | 13 | ||
14 | #define TO_Q16(X) ((int32_t)(65536.f*(X))) | 14 | #define TO_Q16(X) ((int32_t)(65536.*(X))) |
15 | #define FROM_Q16(X) ((double)((X)/65536.f)) | 15 | #define TO_Q26(X) ((int32_t)(67108864.*(X))) |
16 | #define FROM_Q16(X) ((double)((X)/65536.)) | ||
17 | #define FROM_Q26(X) ((double)((x)/67108864.)) | ||
16 | 18 | ||
17 | // It works as follows: | 19 | /* |
18 | // P = pow_QX_QY_QP( x, y ) == exp( ln(x) * y ) | 20 | It works as follows: |
21 | P = pow_QX_QY_QP( x, y ) == exp( ln(x) * y ) | ||
19 | 22 | ||
20 | // Where if x is negative: | 23 | Where if x is negative: |
21 | // * y must have no fractional part, | 24 | * y must have no fractional part, |
22 | // * the result's sign is the lowest integral bit of y | 25 | * the result's sign is the lowest integral bit of y |
23 | 26 | ||
24 | // We note that in every standard Q representation ln(x) will not exceed | 27 | We note that in every standard Q representation ln(x) will not exceed |
25 | // the value 22, so that we can safely work with a Q26 representation. | 28 | the value 22, so that we can safely work with a Q26 representation. |
26 | // | 29 | |
27 | // if ln(x) * y would overflow in that representation, so would | 30 | if ln(x) * y would overflow in that representation, so would |
28 | // exp( ln(x) * y ) in Q16. | 31 | exp( ln(x) * y ) in Q16. |
29 | // | 32 | |
30 | // Finally exp() is calculated in the domain specified by script | 33 | Finally exp() is calculated in the domain specified by script |
34 | */ | ||
31 | 35 | ||
32 | #if 0 | 36 | #if 0 |
33 | The table[tm], generated from different versions of | 37 | The table[tm], generated from different versions of |
@@ -72,19 +76,19 @@ | |||
72 | 76 | ||
73 | for negative values: | 77 | for negative values: |
74 | 78 | ||
75 | -00,374693449441410697531 017FAFA3 x-= ( x >> 2 ) + ( x >> 4 ) | 79 | -00,374693449441410697531 x-= ( x >> 2 ) + ( x >> 4 ) |
76 | -00,207639364778244489562 00D49F69 x-= ( x >> 2 ) - ( x >> 4 ) | 80 | -00,207639364778244489562 x-= ( x >> 2 ) - ( x >> 4 ) |
77 | -00,115831815525121700761 00769C9D x-= ( x >> 3 ) - ( x >> 6 ) | 81 | -00,115831815525121700761 x-= ( x >> 3 ) - ( x >> 6 ) |
78 | -00,060380510988907482028 003DD463 x-= ( x >> 4 ) - ( x >> 8 ) | 82 | -00,060380510988907482028 x-= ( x >> 4 ) - ( x >> 8 ) |
79 | -00,031748698314580298119 002082BB x-= ( x >> 5 ) | 83 | -00,031748698314580298119 x-= ( x >> 5 ) |
80 | -00,015996403602177928366 0010615C x-= ( x >> 6 ) + ( x >> 12 ) | 84 | -00,015996403602177928366 x-= ( x >> 6 ) + ( x >> 12 ) |
81 | -00,008089270731616965762 0008488D x-= ( x >> 7 ) + ( x >> 12 ) | 85 | -00,008089270731616965762 x-= ( x >> 7 ) + ( x >> 12 ) |
82 | -00,004159027401785260480 00044243 x-= ( x >> 8 ) + ( x >> 12 ) | 86 | -00,004159027401785260480 x-= ( x >> 8 ) + ( x >> 12 ) |
83 | -00,003423823349553609900 00038188 x-= ( x >> 8 ) - ( x >> 11 ) | 87 | -00,003423823349553609900 x-= ( x >> 8 ) - ( x >> 11 ) |
84 | -00,001832733117311761669 0001E070 x-= ( x >> 9 ) - ( x >> 13 ) | 88 | -00,001832733117311761669 x-= ( x >> 9 ) - ( x >> 13 ) |
85 | -00,000961766059678620302 0000FC1F x-= ( x >> 10 ) - ( x >> 16 ) | 89 | -00,000961766059678620302 x-= ( x >> 10 ) - ( x >> 16 ) |
86 | -00,000484583944571210926 00007F07 x-= ( x >> 11 ) - ( x >> 18 ) | 90 | -00,000484583944571210926 x-= ( x >> 11 ) - ( x >> 18 ) |
87 | -00,000243216525424976433 00003FC1 x-= ( x >> 12 ) - ( x >> 20 ) | 91 | -00,000243216525424976433 x-= ( x >> 12 ) - ( x >> 20 ) |
88 | 92 | ||
89 | #endif | 93 | #endif |
90 | 94 | ||
@@ -92,53 +96,60 @@ for negative values: | |||
92 | static int32_t fixlog( int32_t x ) { | 96 | static int32_t fixlog( int32_t x ) { |
93 | int32_t t,y; | 97 | int32_t t,y; |
94 | 98 | ||
95 | y = 0x2996BD9E; // ln(2^15) << 26 | 99 | // Assume an x in Q21 (shift by 15) and thus start with y with log(2^15) |
96 | if(x<0x00008000) x<<=16, y-= 0x2C5C85FD; | 100 | y = TO_Q26(10.39720770839918); |
97 | if(x<0x00800000) x<<= 8, y-= 0x162E42FE; | 101 | if(x<0x00008000) x<<=16, y-= TO_Q26(11.09035488895912); /* log(65536) */ |
98 | if(x<0x08000000) x<<= 4, y-= 0x0B17217F; | 102 | if(x<0x00800000) x<<= 8, y-= TO_Q26(5.545177444479562); /* log(256) */ |
99 | if(x<0x20000000) x<<= 2, y-= 0x058B90BF; | 103 | if(x<0x08000000) x<<= 4, y-= TO_Q26(2.772588722239781); /* log(16) */ |
100 | if(x<0x40000000) x<<= 1, y-= 0x02C5C85F; | 104 | if(x<0x20000000) x<<= 2, y-= TO_Q26(1.386294361119891); /* log(4) */ |
101 | t=x+(x>>1); if((t&0x80000000)==0) x=t, y-= 0x019F323E; | 105 | if(x<0x40000000) x<<= 1, y-= TO_Q26(0.693147180559945); /* log(2) */ |
102 | t=x+(x>>2); if((t&0x80000000)==0) x=t, y-= 0x00E47FBE; | 106 | t=x+(x>>1); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.405465108108164); /* log(1+1/2) */ |
103 | t=x+(x>>3); if((t&0x80000000)==0) x=t, y-= 0x00789C1D; | 107 | t=x+(x>>2); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.22314355131421); /* log(1+1/4) */ |
104 | t=x+(x>>4); if((t&0x80000000)==0) x=t, y-= 0x003E1461; | 108 | t=x+(x>>3); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.117783035656384); /* log(1+1/8) */ |
105 | t=x+(x>>5); if((t&0x80000000)==0) x=t, y-= 0x001F829B; | 109 | t=x+(x>>4); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.060624621816435); /* log(1+1/16) */ |
106 | t=x+(x>>6); if((t&0x80000000)==0) x=t, y-= 0x000FE054; | 110 | t=x+(x>>5); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.030771658666754); /* log(1+1/32) */ |
107 | t=x+(x>>7); if((t&0x80000000)==0) x=t, y-= 0x0007F80A; | 111 | t=x+(x>>6); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.015504186535965); /* log(1+1/64) */ |
108 | t=x+(x>>8); if((t&0x80000000)==0) x=t, y-= 0x0003FE01; | 112 | t=x+(x>>7); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.007782140442055); /* log(1+1/128) */ |
109 | t=x+(x>>9); if((t&0x80000000)==0) x=t, y-= 0x0001FF80; | 113 | t=x+(x>>8); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.003898640415657); /* log(1+1/256) */ |
110 | t=x+(x>>10);if((t&0x80000000)==0) x=t, y-= 0x0000FFE0; | 114 | t=x+(x>>9); if((t&0x80000000)==0) x=t, y-= TO_Q26(0.001951220131262); /* log(1+1/512) */ |
111 | t=x+(x>>11);if((t&0x80000000)==0) x=t, y-= 0x00007FF8; | 115 | t=x+(x>>10);if((t&0x80000000)==0) x=t, y-= TO_Q26(0.000976085973055); /* log(1+1/1024) */ |
112 | t=x+(x>>12);if((t&0x80000000)==0) x=t, y-= 0x00003FFE; | 116 | t=x+(x>>11);if((t&0x80000000)==0) x=t, y-= TO_Q26(0.000488162079501); /* log(1+1/2048) */ |
113 | t=x+(x>>13);if((t&0x80000000)==0) x=t, y-= 0x00001FFF; | 117 | t=x+(x>>12);if((t&0x80000000)==0) x=t, y-= TO_Q26(0.000244110827527); /* log(1+1/4096) */ |
118 | t=x+(x>>13);if((t&0x80000000)==0) x=t, y-= TO_Q26(0.000122062862526); /* log(1+1/8192) */ | ||
119 | |||
120 | /* Estimate residual error, log(1-x) which for small x is approx -x */ | ||
114 | x = 0x80000000-x; | 121 | x = 0x80000000-x; |
115 | y-= x>>5; | 122 | |
116 | return y; | 123 | /* x has been promoted to Q31, substract residual error in Q26 by shifting down by 5 */ |
124 | return y - ( x >> 5 ); | ||
117 | } | 125 | } |
118 | 126 | ||
119 | // input is Q26, output Q16 | 127 | // input is Q26, output Q16 |
120 | static uint32_t fixexp(int32_t x) { | 128 | static uint32_t fixexp(int32_t x) { |
121 | int32_t t; | 129 | int32_t t; |
130 | /* Start y with 1.0 */ | ||
122 | uint32_t y = 0x10000; | 131 | uint32_t y = 0x10000; |
123 | 132 | ||
124 | if( (x & 0x80000000) == 0 ) { | 133 | if( x >= 0 ) { |
125 | t=x-0x162E42FE; if(t>=0) x=t,y<<=8; | 134 | t=x-TO_Q26(5.545177444479562); if(t>=0) x=t,y<<=8; /* log(256) */ |
126 | t=x-0x0B17217F; if(t>=0) x=t,y<<=4; | 135 | t=x-TO_Q26(2.772588722239781); if(t>=0) x=t,y<<=4; /* log(16) */ |
127 | t=x-0x058B90BF; if(t>=0) x=t,y<<=2; | 136 | t=x-TO_Q26(1.386294361119891); if(t>=0) x=t,y<<=2; /* log(4) */ |
128 | t=x-0x02C5C85F; if(t>=0) x=t,y<<=1; | 137 | t=x-TO_Q26(0.693147180559945); if(t>=0) x=t,y<<=1; /* log(2) */ |
129 | t=x-0x019F323E; if(t>=0) x=t,y+=y>>1; | 138 | t=x-TO_Q26(0.405465108108164); if(t>=0) x=t,y+=y>>1; /* log(1+1/2) */ |
130 | t=x-0x00E47FBE; if(t>=0) x=t,y+=y>>2; | 139 | t=x-TO_Q26(0.22314355131421); if(t>=0) x=t,y+=y>>2; /* log(1+1/4) */ |
131 | t=x-0x00789C1D; if(t>=0) x=t,y+=y>>3; | 140 | t=x-TO_Q26(0.117783035656384); if(t>=0) x=t,y+=y>>3; /* log(1+1/8) */ |
132 | t=x-0x003E1461; if(t>=0) x=t,y+=y>>4; | 141 | t=x-TO_Q26(0.060624621816435); if(t>=0) x=t,y+=y>>4; /* log(1+1/16) */ |
133 | t=x-0x001F829B; if(t>=0) x=t,y+=y>>5; | 142 | t=x-TO_Q26(0.030771658666754); if(t>=0) x=t,y+=y>>5; /* log(1+1/32) */ |
134 | t=x-0x000FE054; if(t>=0) x=t,y+=y>>6; | 143 | t=x-TO_Q26(0.015504186535965); if(t>=0) x=t,y+=y>>6; /* log(1+1/64) */ |
135 | t=x-0x0007F80A; if(t>=0) x=t,y+=y>>7; | 144 | t=x-TO_Q26(0.007782140442055); if(t>=0) x=t,y+=y>>7; /* log(1+1/128) */ |
136 | t=x-0x0003FE01; if(t>=0) x=t,y+=y>>8; | 145 | t=x-TO_Q26(0.003898640415657); if(t>=0) x=t,y+=y>>8; /* log(1+1/256) */ |
137 | t=x-0x0001FF80; if(t>=0) x=t,y+=y>>9; | 146 | t=x-TO_Q26(0.001951220131262); if(t>=0) x=t,y+=y>>9; /* log(1+1/512) */ |
138 | t=x-0x0000FFE0; if(t>=0) x=t,y+=y>>10; | 147 | t=x-TO_Q26(0.000976085973055); if(t>=0) x=t,y+=y>>10; /* log(1+1/1024) */ |
139 | t=x-0x00007FF8; if(t>=0) x=t,y+=y>>11; | 148 | t=x-TO_Q26(0.000488162079501); if(t>=0) x=t,y+=y>>11; /* log(1+1/2048) */ |
140 | t=x-0x00003FFE; if(t>=0) x=t,y+=y>>12; | 149 | t=x-TO_Q26(0.000244110827527); if(t>=0) x=t,y+=y>>12; /* log(1+1/4096) */ |
141 | t=x-0x00001FFF; if(t>=0) x=t,y+=y>>13; | 150 | t=x-TO_Q26(0.000122062862526); if(t>=0) x=t,y+=y>>13; /* log(1+1/8192) */ |
151 | /* from here the values in Q26 become approx 2^n, so lets check bits | ||
152 | and fix the residual error below */ | ||
142 | if(x&0x0001000) y+=y>>14; | 153 | if(x&0x0001000) y+=y>>14; |
143 | if(x&0x0000800) y+=y>>15; | 154 | if(x&0x0000800) y+=y>>15; |
144 | if(x&0x0000400) y+=y>>16; | 155 | if(x&0x0000400) y+=y>>16; |
@@ -192,28 +203,34 @@ static uint32_t fixexp(int32_t x) { | |||
192 | 203 | ||
193 | int fixpow(int32_t *pow, int32_t base, int32_t exponent ) | 204 | int fixpow(int32_t *pow, int32_t base, int32_t exponent ) |
194 | { | 205 | { |
195 | int neg = 0; | ||
196 | int32_t log_base, res; | 206 | int32_t log_base, res; |
197 | int64_t log_base_times_exponent; | 207 | int64_t log_base_times_exponent; |
208 | int neg = 0; | ||
198 | 209 | ||
199 | if( base < 0 ) { | 210 | if( base < 0 ) { |
200 | // negative bases only can have integer exponents | 211 | /* negative bases only can have integer exponents */ |
201 | if( exponent & 0xffff ) return -1; | 212 | if( exponent & 0xffff ) return -1; |
202 | // sign of power of a negative base is determined by | 213 | /* sign of power of a negative base is determined by |
203 | // wether exp is even | 214 | wether exp is even */ |
204 | if( exponent & 0x10000 ) neg = 1; | 215 | if( exponent & 0x10000 ) neg = 1; |
205 | base = abs(base); | 216 | base = abs(base); |
206 | } | 217 | } |
207 | 218 | ||
208 | // To calculate pow(base,exp), we do exp( log(base) * exp ) | 219 | /* To calculate pow(base,exp), we do exp( log(base) * exp ) |
209 | // which is mathematically the same. log_base is Q26 | 220 | which is mathematically the same. log_base is Q26 */ |
210 | log_base = fixlog( base ); | 221 | log_base = fixlog( base ); |
211 | log_base_times_exponent = ( (int64_t)log_base * (int64_t)exponent ) >> 16; | 222 | log_base_times_exponent = ( (int64_t)log_base * (int64_t)exponent ) >> 16; |
212 | 223 | ||
213 | // fixexp overflows for values > 21,48756259689264 which | 224 | #if 0 |
214 | // in Q26 notation is 1442005916 | 225 | /* In Q0, fixexp overflows for values > 21,48756259689264 which |
226 | in Q26 notation is 1442005916 */ | ||
215 | if( log_base_times_exponent > 1442005916 ) | 227 | if( log_base_times_exponent > 1442005916 ) |
216 | return -2; | 228 | return -2; |
229 | #endif | ||
230 | |||
231 | /* In Q16, fixexp overflows for values > 10,39720770839918 */ | ||
232 | if( log_base_times_exponent > TO_Q26(10,39720770839918) ) | ||
233 | return -2; | ||
217 | 234 | ||
218 | res = (int32_t)log_base_times_exponent; | 235 | res = (int32_t)log_base_times_exponent; |
219 | res = fixexp( res ); | 236 | res = fixexp( res ); |
@@ -225,8 +242,8 @@ int fixpow(int32_t *pow, int32_t base, int32_t exponent ) | |||
225 | 242 | ||
226 | int main() | 243 | int main() |
227 | { | 244 | { |
228 | double base = -.5f; | 245 | double base = -.5; |
229 | double exponent = 10.f; | 246 | double exponent = 10.; |
230 | int32_t result; | 247 | int32_t result; |
231 | int error = fixpow( &result, TO_Q16(base), TO_Q16(exponent) ); | 248 | int error = fixpow( &result, TO_Q16(base), TO_Q16(exponent) ); |
232 | 249 | ||