vegastrike  0.5.1.r1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fastmath.cpp
Go to the documentation of this file.
1 /*****************************************************************************
2  * File: fastmath.cpp
3  * This file is provided without support, instruction, or implied warranty of any
4  * kind. NVIDIA makes no guarantee of its fitness for a particular purpose and is
5  * not liable under any circumstances for any damages or loss whatsoever arising
6  * from the use or inability to use this file or items derived from it.
7  * Comments:
8  ******************************************************************************/
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <math.h>
12 
13 #ifdef _WIN32
14 #include <wtypes.h>
15 #else
16 typedef int DWORD;
17 #endif
18 
19 #define FP_BITS( fp ) ( ( *(DWORD*) &(fp) ) )
20 #define FP_ABS_BITS( fp ) ( (FP_BITS( fp )&0x7FFFFFFF) )
21 #define FP_SIGN_BIT( fp ) ( (FP_BITS( fp )&0x80000000) )
22 #define FP_ONE_BITS ( (0x3F800000) )
23 #define __forceinline inline
24 
25 //r = 1/p
26 
27 #define FP_INV( r, p ) \
28  do { \
29  int _i = 2*FP_ONE_BITS-*(int*) &(p); \
30  r = *(float*) &_i; \
31  r = r*(2.0f-(p)*r); \
32  } \
33  while (0)
34 
36 
37 //The following comes from Vincent Van Eeckhout
38 //Thanks for sending us the code!
39 //It's the same thing in assembly but without this C-needed line:
40 //r = *(float *)&_i;
41 
42 float two = 2.0f;
43 
44 #define FP_INV2( r, p ) \
45  do { \
46  __asm {mov eax, 0x7F000000}; \
47  __asm {sub eax, dword ptr[p]}; \
48  __asm {mov dword ptr[r], eax}; \
49  __asm {fld dword ptr[p]}; \
50  __asm {fmul dword ptr[r]}; \
51  __asm {fsubr[two]}; \
52  __asm {fmul dword ptr[r]}; \
53  __asm {fstp dword ptr[r]}; \
54  } \
55  while (0)
56 
58 
59 #define FP_EXP( e, p ) \
60  do { \
61  int _i; \
62  e = -1.44269504f*(float) 0x00800000*(p); \
63  _i = (int) e+0x3F800000; \
64  e = *(float*) &_i; \
65  } \
66  while (0)
67 
68 #define FP_NORM_TO_BYTE( i, p ) \
69  do { \
70  float _n = (p)+1.0f; \
71  i = *(int*) &_n; \
72  if (i >= 0x40000000) i = 0xFF; \
73  else if (i <= 0x3F800000) \
74  i = 0; \
75  else i = ( (i)>>15 )&0xFF; \
76  } \
77  while (0)
78 
79 inline unsigned long FP_NORM_TO_BYTE2( float p )
80 {
81  float fpTmp = p+1.0f;
82  return ( (*(unsigned*) &fpTmp)>>15 )&0xFF;
83 }
84 
85 inline unsigned long FP_NORM_TO_BYTE3( float p )
86 {
87  float ftmp = p+12582912.0f;
88  return (*(unsigned long*) &ftmp)&0xFF;
89 }
90 
91 static unsigned int fast_sqrt_table[0x10000]; //declare table of square roots
92 typedef union FastSqrtUnion
93 {
94  float f;
95  unsigned int i;
97 
99 {
100  unsigned int i;
101  FastSqrtUnion s;
102  for (i = 0; i <= 0x7FFF; i++) {
103  //Build a float with the bit pattern i as mantissa
104  //and an exponent of 0, stored as 127
105 
106  s.i = (i<<8)|(0x7F<<23);
107  s.f = (float) sqrt( s.f );
108 
109  //Take the square root then strip the first 7 bits of
110  //the mantissa into the table
111 
112  fast_sqrt_table[i+0x8000] = (s.i&0x7FFFFF);
113 
114  //Repeat the process, this time with an exponent of 1,
115  //stored as 128
116 
117  s.i = (i<<8)|(0x80<<23);
118  s.f = (float) sqrt( s.f );
119 
120  fast_sqrt_table[i] = (s.i&0x7FFFFF);
121  }
122 }
123 
124 inline float fastsqrt( float n )
125 {
126  if (FP_BITS( n ) == 0)
127  return 0.0; //check for square root of 0
128 
129  FP_BITS( n ) = fast_sqrt_table[(FP_BITS( n )>>8)&0xFFFF]|( ( ( (FP_BITS( n )-0x3F800000)>>1 )+0x3F800000 )&0x7F800000 );
130 
131  return n;
132 }
133 
134 //At the assembly level the recommended workaround for the second FIST bug is the same for the first;
135 //inserting the FRNDINT instruction immediately preceding the FIST instruction.
136 __forceinline void FloatToInt( int *int_pointer, float f )
137 {
138  *int_pointer = f;
139 }
140 
141 int Stupodmain( int argc, char *argv[] )
142 {
143  float t, it, test_sqrt;
144  int i = 0;
146  t = 1234.121234f;
147 
148  test_sqrt = fastsqrt( t );
149  printf( "sqrt expected %20.10f approx %20.10f\n", sqrt( t ), test_sqrt );
150  FP_INV( it, t );
151  printf( "inv expected %20.10f approx %20.10f\n", 1/t, it );
152  i = 0xdeafbabe;
153  FloatToInt( &i, t );
154  printf( "ftol expected %d actual %d %08X\n", (int) t, i, i );
155  return 0;
156 }
157 
159 //3D and geometry ops /////////////////////////////////////////////
160 //-----------------------------------------------------------------------------
161 //Name: CylTest_CapsFirst
162 //Orig: Greg James - gjames@NVIDIA.com
163 //Lisc: Free code - no warranty & no money back. Use it all you want
164 //Desc:
165 //This function tests if the 3D point 'testpt' lies within an arbitrarily
166 //oriented cylinder. The cylinder is defined by an axis from 'pt1' to 'pt2',
167 //the axis having a length squared of 'lengthsq' (pre-compute for each cylinder
168 //to avoid repeated work!), and radius squared of 'radius_sq'.
169 //The function tests against the end caps first, which is cheap -> only
170 //a single dot product to test against the parallel cylinder caps. If the
171 //point is within these, more work is done to find the distance of the point
172 //from the cylinder axis.
173 //Fancy Math (TM) makes the whole test possible with only two dot-products
174 //a subtract, and two multiplies. For clarity, the 2nd mult is kept as a
175 //divide. It might be faster to change this to a mult by also passing in
176 //1/lengthsq and using that instead.
177 //Elminiate the first 3 subtracts by specifying the cylinder as a base
178 //point on one end cap and a vector to the other end cap (pass in {dx,dy,dz}
179 //instead of 'pt2' ).
180 //
181 //The dot product is constant along a plane perpendicular to a vector.
182 //The magnitude of the cross product divided by one vector length is
183 //constant along a cylinder surface defined by the other vector as axis.
184 //
185 //Return: -1.0 if point is outside the cylinder
186 //Return: distance squared from cylinder axis if point is inside.
187 //
188 //-----------------------------------------------------------------------------
189 
190 struct Vec3
191 {
192  float x;
193  float y;
194  float z;
195 };
196 
197 float CylTest_CapsFirst( const Vec3 &pt1, const Vec3 &pt2, float lengthsq, float radius_sq, const Vec3 &testpt )
198 {
199  float dx, dy, dz; //vector d from line segment point 1 to point 2
200  float pdx, pdy, pdz; //vector pd from point 1 to test point
201  float dot, dsq;
202  dx = pt2.x-pt1.x; //translate so pt1 is origin. Make vector from
203  dy = pt2.y-pt1.y; //pt1 to pt2. Need for this is easily eliminated
204  dz = pt2.z-pt1.z;
205  pdx = testpt.x-pt1.x; //vector from pt1 to test point.
206  pdy = testpt.y-pt1.y;
207  pdz = testpt.z-pt1.z;
208  //Dot the d and pd vectors to see if point lies behind the
209  //cylinder cap at pt1.x, pt1.y, pt1.z
210  dot = pdx*dx+pdy*dy+pdz*dz;
211  //If dot is less than zero the point is behind the pt1 cap.
212  //If greater than the cylinder axis line segment length squared
213  //then the point is outside the other end cap at pt2.
214  if (dot < 0.0f || dot > lengthsq) {
215  return -1.0f;
216  } else {
217  //Point lies within the parallel caps, so find
218  //distance squared from point to line, using the fact that sin^2 + cos^2 = 1
219  //the dot = cos() * |d||pd|, and cross*cross = sin^2 * |d|^2 * |pd|^2
220  //Carefull: '*' means mult for scalars and dotproduct for vectors
221  //In short, where dist is pt distance to cyl axis:
222  //dist = sin( pd to d ) * |pd|
223  //distsq = dsq = (1 - cos^2( pd to d)) * |pd|^2
224  //dsq = ( 1 - (pd * d)^2 / (|pd|^2 * |d|^2) ) * |pd|^2
225  //dsq = pd * pd - dot * dot / lengthsq
226  //where lengthsq is d*d or |d|^2 that is passed into this function
227  //distance squared to the cylinder axis:
228  dsq = (pdx*pdx+pdy*pdy+pdz*pdz)-dot*dot/lengthsq;
229  if (dsq > radius_sq)
230  return -1.0f;
231  else
232  return dsq; //return distance squared to axis
233  }
234 }
235