// ---------------------------------------------------------------------------- // Copyright 2000, Paul Nettle. All rights reserved. // // Fast floating point substitution routines // // This file has been entered into the public domain by the author. // ---------------------------------------------------------------------------- // // COMPILE WITH WATCOM // // ---------------------------------------------------------------------------- // // Routine Equivalent C code // ----------------------------------------- // ICHOP (int) value // FCHOP (float) ((int) value) // IFLOOR (int) floor(value) // FFLOOR floor(value) // ICEIL (int) ceil(value) // FCEIL ceil(value) // FFRAC value - floor(value) // SUB_PIX ceil(value) - value // FTOX24 (int) (value * 0x1000000) // FTOX16 (int) (value * 0x10000) // FTOX8 (int) (value * 0x100) // IS_NEG (bool) (value1 < 0.0f) // IS_LESS (bool) (value1 < value2) // IS_LEQUAL (bool) (value1 <= value2) // IS_GREATER (bool) (value1 > value2) // IS_GEQUAL (bool) (value1 >= value2) // // ---------------------------------------------------------------------------- #include #include #include #include // ---------------------------------------------------------------------------- #define ABS(a) ((a) < 0 ? -(a) : (a)) // ---------------------------------------------------------------------------- static unsigned char chptab[512]; static unsigned char f24tab[512]; static unsigned char f16tab[512]; static unsigned char fx8tab[512]; static unsigned int msktab[256]; static unsigned short _savedControl; static unsigned short _fchopControl; static unsigned short _ffloorControl; static unsigned short _fceilControl; // ---------------------------------------------------------------------------- static unsigned int timerLo, timerHi, timerAdjust, writeValue; static unsigned int _t_conv_int[2] = {0,0x43380000}; static unsigned int _t_conv24_08[2] = {0,0x42B80000}; static unsigned int _t_conv16_16[2] = {0,0x42380000}; static unsigned int _t_conv08_24[2] = {0,0x41B80000}; static unsigned int _t_fxfl[2]; // ---------------------------------------------------------------------------- void startTimer(); #pragma aux startTimer = \ "db 0x0F, 0x31" \ "mov timerLo,eax" \ "mov timerHi,edx" \ modify exact [edx eax]; // ---------------------------------------------------------------------------- int stopTimer(); #pragma aux stopTimer = \ "db 0x0F, 0x31" \ "sub eax,timerLo" \ value [eax] \ modify exact [edx eax]; // ---------------------------------------------------------------------------- short FINIT(); #pragma aux FINIT = \ "fstcw _savedControl" \ "mov ax, _savedControl" \ "and ax, 0xf3ff" \ "or ax, 0x0C00" \ "mov _fchopControl, ax" \ "and ax, 0xf3ff" \ "or ax, 0x0400" \ "mov _ffloorControl, ax" \ "and ax, 0xf3ff" \ "or ax, 0x0800" \ "mov _fceilControl, ax" \ parm nomemory caller [] \ modify nomemory exact []; // ---------------------------------------------------------------------------- // This routine executes in approximately 16 cycles int ICHOP( float Val ); #pragma aux ICHOP = \ "fldcw _fchopControl" \ "fistp dword ptr writeValue" \ "mov eax,writeValue" \ value [eax] \ parm nomemory caller [8087] \ modify nomemory exact [eax 8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 21 cycles int ICHOP2( float Val ); #pragma aux ICHOP2 = \ "mov ebx,eax" \ "and eax,0x7fffff" \ "mov edx,ebx" \ "and ebx,0x7f800000" \ "shr ebx,23" \ "or eax,0x800000" \ "sar edx,31" \ "mov cl,chptab[ebx*2+1]" \ "sar eax,cl" \ "mov cl,chptab[ebx*2+0]" \ "sal eax,cl" \ "xor eax,edx" \ "sub eax,edx" \ value [eax] \ parm nomemory caller [eax] \ modify nomemory exact [eax ebx ecx edx]; // ---------------------------------------------------------------------------- // This routine executes in approximately 19 cycles float FCHOP( float Val ); #pragma aux FCHOP = \ "fldcw _fchopControl" \ "fistp dword ptr writeValue" \ "fild writeValue" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 30 cycles float FCHOP2( float Val ); #pragma aux FCHOP2 = \ "fldcw _fchopControl" \ "frndint" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 16 cycles int IFLOOR( float Val ); #pragma aux IFLOOR = \ "fldcw _ffloorControl" \ "fistp dword ptr writeValue" \ "mov eax,writeValue" \ value [eax] \ parm nomemory caller [8087] \ modify nomemory exact [eax 8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 16 cycles int IFLOOR2( float Val ); #pragma aux IFLOOR2 = \ "fldcw _ffloorControl" \ "fadd qword ptr _t_conv_int" \ "fstp qword ptr _t_fxfl" \ "mov eax,_t_fxfl" \ value [eax] \ parm nomemory caller [8087] \ modify nomemory exact [eax 8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 25 cycles int IFLOOR3( float Val ); #pragma aux IFLOOR3 = \ "mov ebx,eax" \ "and eax,0x7fffff" \ "mov edx,ebx" \ "and ebx,0x7f800000" \ "shr ebx,23" \ "or eax,0x800000" \ "sar edx,31" \ "mov esi,eax" \ "mov cl,chptab[ebx*2+1]" \ "sar eax,cl" \ "mov cl,chptab[ebx*2+0]" \ "sal eax,cl" \ "xor eax,edx" \ "sub eax,edx" \ "or edx,edx" \ "jz l1" \ "and esi,msktab[ebx * 4]" \ "jz l1" \ "dec eax" \ "l1:" \ value [eax] \ parm nomemory caller [eax] \ modify nomemory exact [eax ebx ecx edx esi]; // ---------------------------------------------------------------------------- // This routine executes in approximately 19 cycles float FFLOOR( float Val ); #pragma aux FFLOOR = \ "fldcw _ffloorControl" \ "fistp dword ptr writeValue" \ "fild writeValue" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 30 cycles float FFLOOR2( float Val ); #pragma aux FFLOOR2 = \ "fldcw _ffloorControl" \ "frndint" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 16 cycles int ICEIL( float Val ); #pragma aux ICEIL = \ "fldcw _fceilControl" \ "fistp dword ptr writeValue" \ "mov eax,writeValue" \ value [eax] \ parm nomemory caller [8087] \ modify nomemory exact [eax 8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 26 cycles int ICEIL2( float Val ); #pragma aux ICEIL2 = \ "or eax,eax" \ "jz l1" \ "mov ebx,eax" \ "and eax,0x7fffff" \ "mov edx,ebx" \ "and ebx,0x7f800000" \ "shr ebx,23" \ "sar edx,31" \ "or eax,0x800000" \ "mov esi,eax" \ "mov cl,chptab[ebx*2+1]" \ "sar eax,cl" \ "mov cl,chptab[ebx*2+0]" \ "sal eax,cl" \ "xor eax,edx" \ "sub eax,edx" \ "or edx,edx" \ "jnz l1" \ "and esi,msktab[ebx * 4]" \ "jz l1" \ "inc eax" \ "l1:" \ value [eax] \ parm nomemory caller [eax] \ modify nomemory exact [eax ebx ecx edx esi]; // ---------------------------------------------------------------------------- // This routine executes in approximately 19 cycles float FCEIL( float Val ); #pragma aux FCEIL = \ "fldcw _fceilControl" \ "fistp dword ptr writeValue" \ "fild writeValue" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 31 cycles float FCEIL2( float Val ); #pragma aux FCEIL2 = \ "fldcw _fceilControl" \ "frndint" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 22 cycles float FFRAC( float Val ); #pragma aux FFRAC = \ "fldcw _ffloorControl" \ "fist dword ptr writeValue" \ "fisub dword ptr writeValue" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 34 cycles float FFRAC2( float Val ); #pragma aux FFRAC2 = \ "fldcw _ffloorControl" \ "fld st(0)" \ "frndint" \ "fsubp st(1), st" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 22 cycles float SUB_PIX( float Val ); #pragma aux SUB_PIX = \ "fldcw _fceilControl" \ "fist dword ptr writeValue" \ "fisubr dword ptr writeValue" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 34 cycles float SUB_PIX2( float Val ); #pragma aux SUB_PIX2 = \ "fldcw _fceilControl" \ "fld st(0)" \ "frndint" \ "fsubrp st(1), st" \ value [8087] \ parm nomemory caller [8087] \ modify nomemory exact [8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 10 cycles int FTOX24( float Val ); #pragma aux FTOX24 = \ "fadd qword ptr _t_conv08_24" \ "fstp qword ptr _t_fxfl" \ "mov eax,_t_fxfl" \ value [eax] \ parm nomemory caller [8087] \ modify nomemory exact [eax 8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 21 cycles int FTOX242( float Val ); #pragma aux FTOX242 = \ "mov ebx,eax" \ "and eax,0x7fffff" \ "mov edx,ebx" \ "and ebx,0x7f800000" \ "shr ebx,23" \ "or eax,0x800000" \ "sar edx,31" \ "mov cl,f24tab[ebx*2+1]" \ "sar eax,cl" \ "mov cl,f24tab[ebx*2+0]" \ "sal eax,cl" \ "xor eax,edx" \ "sub eax,edx" \ value [eax] \ parm nomemory caller [eax] \ modify nomemory exact [eax ebx ecx edx]; // ---------------------------------------------------------------------------- // This routine executes in approximately 10 cycles int FTOX16( float Val ); #pragma aux FTOX16 = \ "fadd qword ptr _t_conv16_16" \ "fstp qword ptr _t_fxfl" \ "mov eax,_t_fxfl" \ value [eax] \ parm nomemory caller [8087] \ modify nomemory exact [eax 8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 21 cycles int FTOX162( float Val ); #pragma aux FTOX162 = \ "mov ebx,eax" \ "and eax,0x7fffff" \ "mov edx,ebx" \ "and ebx,0x7f800000" \ "shr ebx,23" \ "or eax,0x800000" \ "sar edx,31" \ "mov cl,f16tab[ebx*2+1]" \ "sar eax,cl" \ "mov cl,f16tab[ebx*2+0]" \ "sal eax,cl" \ "xor eax,edx" \ "sub eax,edx" \ value [eax] \ parm nomemory caller [eax] \ modify nomemory exact [eax ebx ecx edx]; // ---------------------------------------------------------------------------- // This routine executes in approximately 10 cycles int FTOX8( float Val ); #pragma aux FTOX8 = \ "fadd qword ptr _t_conv24_08" \ "fstp qword ptr _t_fxfl" \ "mov eax,_t_fxfl" \ value [eax] \ parm nomemory caller [8087] \ modify nomemory exact [eax 8087]; // ---------------------------------------------------------------------------- // This routine executes in approximately 21 cycles int FTOX82( float Val ); #pragma aux FTOX82 = \ "mov ebx,eax" \ "and eax,0x7fffff" \ "mov edx,ebx" \ "and ebx,0x7f800000" \ "shr ebx,23" \ "or eax,0x800000" \ "sar edx,31" \ "mov cl,fx8tab[ebx*2+1]" \ "sar eax,cl" \ "mov cl,fx8tab[ebx*2+0]" \ "sal eax,cl" \ "xor eax,edx" \ "sub eax,edx" \ value [eax] \ parm nomemory caller [eax] \ modify nomemory exact [eax ebx ecx edx]; // ---------------------------------------------------------------------------- bool IS_NEG( float Val ); #pragma aux IS_NEG = \ "shr eax,31" \ value [al] \ parm nomemory caller [eax] \ modify nomemory exact [eax]; // ---------------------------------------------------------------------------- bool IS_LESS( float Val1, float Val2 ); #pragma aux IS_LESS = \ "sub eax,ebx" \ "shr eax,31" \ value [al] \ parm nomemory caller [eax] [ebx]\ modify nomemory exact [eax]; // ---------------------------------------------------------------------------- bool IS_LEQUAL( float Val1, float Val2 ); #pragma aux IS_LEQUAL = \ "sub eax,ebx" \ "shr eax,31" \ value [al] \ parm nomemory caller [eax] [ebx]\ modify nomemory exact [eax]; // ---------------------------------------------------------------------------- bool IS_GREATER( float Val1, float Val2 ); #pragma aux IS_GREATER = \ "sub eax,ebx" \ "shr eax,31" \ value [al] \ parm nomemory caller [ebx] [eax]\ modify nomemory exact [eax]; // ---------------------------------------------------------------------------- bool IS_GEQUAL( float Val1, float Val2 ); #pragma aux IS_GEQUAL = \ "sub eax,ebx" \ "shr eax,31" \ value [al] \ parm nomemory caller [ebx] [eax]\ modify nomemory exact [eax]; // ---------------------------------------------------------------------------- void writeRead(); #pragma aux writeRead = \ "mov writeValue,eax" \ "mov ebx,writeValue" \ modify nomemory exact [eax ebx]; // ---------------------------------------------------------------------------- void readRead(); #pragma aux readRead = \ "mov eax,writeValue" \ "mov ebx,writeValue" \ modify nomemory exact [eax ebx]; // ---------------------------------------------------------------------------- void flushWriteBack(); #pragma aux flushWriteBack = \ "mov writeValue,eax" \ "mov writeValue,eax" \ "mov writeValue,eax" \ "mov writeValue,eax" \ "mov writeValue,eax" \ modify nomemory; // ---------------------------------------------------------------------------- float makeTestValue() { // The formula for the test values uses three random values per // iteration in this way: (r1 - r2) * (r3 - r4) / r5 float r1 = rand(); float r2 = rand(); float r3 = rand(); float r4 = rand(); float r5 = rand(); // Make sure we don't get a divide by 0 while(!r5) r5 = rand(); // Make the test value return (r1 - r2) * (r3 - r4) / r5; } // ---------------------------------------------------------------------------- void initFPU() { // Setup the tiny tables // // Yeah.. tables can suck for the cache, BUT, these tables only have // a used range of about 40 bytes someplace in the middle, and the most // commonly used portion of each table is within a single cache line, // so the cache thrashing is minimized. for (int e = 0; e < 256; e++) { int x = 150 - e; if (x > 31) x = 31; if (x < -31) x = -31; if (x < 0) chptab[e*2+0] = (unsigned char) -x; else chptab[e*2+1] = (unsigned char) x; x = 150 - e - 24; if (x > 31) x = 31; if (x < -31) x = -31; if (x < 0) f24tab[e*2+0] = (unsigned char) -x; else f24tab[e*2+1] = (unsigned char) x; x = 150 - e - 16; if (x > 31) x = 31; if (x < -31) x = -31; if (x < 0) f16tab[e*2+0] = (unsigned char) -x; else f16tab[e*2+1] = (unsigned char) x; x = 150 - e - 8; if (x > 31) x = 31; if (x < -31) x = -31; if (x < 0) fx8tab[e*2+0] = (unsigned char) -x; else fx8tab[e*2+1] = (unsigned char) x; x = 150 - e - 1; if (x > 31) x = 31; if (x < -31) x = -31; if (x < 0) msktab[e] = 0; else msktab[e] = (unsigned int) 0xffffffff >> (31 - x); } // Finally, init the FPU routines FINIT(); } // ---------------------------------------------------------------------------- void testICHOP(const int count) { printf( " Testing ICHOP...." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); int test = ICHOP(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) testValue; slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testICHOP2(const int count) { printf( " Testing ICHOP2..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); int test = ICHOP2(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) testValue; slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFCHOP(const int count) { printf( " Testing FCHOP...." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = FCHOP(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = (int) ((float) testValue); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFCHOP2(const int count) { printf( " Testing FCHOP2..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = FCHOP2(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = (int) ((float) testValue); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testIFLOOR(const int count) { printf( " Testing IFLOOR..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); int test = IFLOOR(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) ((float) floor(testValue)); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testIFLOOR2(const int count) { printf( " Testing IFLOOR2.." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); int test = IFLOOR2(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) ((float) floor(testValue)); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testIFLOOR3(const int count) { printf( " Testing IFLOOR3.." ); int expected = 0; unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); int test = IFLOOR3(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) ((float) floor(testValue)); slowTime += stopTimer() - timerAdjust; // Expect -0.0f to return different results if ((*(unsigned int *) &testValue) == 0x80000000) expected++; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", expected == diffCount ? "OK":"ERROR!", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFFLOOR(const int count) { printf( " Testing FFLOOR..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = FFLOOR(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = (float) floor(testValue); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFFLOOR2(const int count) { printf( " Testing FFLOOR2.." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = FFLOOR2(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = (float) floor(testValue); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testICEIL(const int count) { printf( " Testing ICEIL...." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); int test = ICEIL(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) ((float) ceil(testValue)); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testICEIL2(const int count) { printf( " Testing ICEIL2..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); int test = ICEIL2(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) ((float) ceil(testValue)); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFCEIL(const int count) { printf( " Testing FCEIL...." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = FCEIL(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = (float) ceil(testValue); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFCEIL2(const int count) { printf( " Testing FCEIL2..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = FCEIL2(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = (float) ceil(testValue); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFFRAC(const int count) { printf( " Testing FFRAC...." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = FFRAC(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = testValue - (float) floor((double) testValue); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFFRAC2(const int count) { printf( " Testing FFRAC2..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = FFRAC2(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = testValue - (float) floor((double) testValue); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testSUB_PIX(const int count) { printf( " Testing SUB_PIX.." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = SUB_PIX(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = (float) ceil((double) testValue) - testValue; slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testSUB_PIX2(const int count) { printf( " Testing SUB_PIX2." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); float test = SUB_PIX2(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); float real = (float) ceil((double) testValue) - testValue; slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testIS_NEG(const int count) { printf( " Testing IS_NEG..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue = makeTestValue(); startTimer(); bool test = IS_NEG(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); bool real = testValue < 0.0f; slowTime += stopTimer() - timerAdjust; if (test != real && *(int *) &testValue != 0x80000000) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testIS_LESS(const int count) { printf( " Testing IS_LESS.." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue1 = makeTestValue(); float testValue2 = makeTestValue(); if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f; if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f; if (testValue1 < 0.0f) testValue1 = -testValue1; if (testValue2 < 0.0f) testValue2 = -testValue2; startTimer(); bool test = IS_LESS(testValue1, testValue2); fastTime += stopTimer() - timerAdjust; startTimer(); bool real = testValue1 < testValue2; slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testIS_LEQUAL(const int count) { printf( " Testing IS_LEQ..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue1 = makeTestValue(); float testValue2 = makeTestValue(); if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f; if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f; if (testValue1 < 0.0f) testValue1 = -testValue1; if (testValue2 < 0.0f) testValue2 = -testValue2; startTimer(); bool test = IS_LEQUAL(testValue1, testValue2); fastTime += stopTimer() - timerAdjust; startTimer(); bool real = testValue1 <= testValue2; slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testIS_GREATER(const int count) { printf( " Testing IS_GREAT." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue1 = makeTestValue(); float testValue2 = makeTestValue(); if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f; if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f; if (testValue1 < 0.0f) testValue1 = -testValue1; if (testValue2 < 0.0f) testValue2 = -testValue2; startTimer(); bool test = IS_GREATER(testValue1, testValue2); fastTime += stopTimer() - timerAdjust; startTimer(); bool real = testValue1 > testValue2; slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testIS_GEQUAL(const int count) { printf( " Testing IS_GEQ..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { float testValue1 = makeTestValue(); float testValue2 = makeTestValue(); if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f; if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f; if (testValue1 < 0.0f) testValue1 = -testValue1; if (testValue2 < 0.0f) testValue2 = -testValue2; startTimer(); bool test = IS_GEQUAL(testValue1, testValue2); fastTime += stopTimer() - timerAdjust; startTimer(); bool real = testValue1 >= testValue2; slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFTOX24(const int count) { printf( " Testing FTOX24..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { enum {MAX_TESTVAL = RAND_MAX * RAND_MAX}; float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 128.0f; startTimer(); int test = FTOX24(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) (testValue * (float) 0x1000000); slowTime += stopTimer() - timerAdjust; if (ABS(test - real) > 1) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFTOX242(const int count) { printf( " Testing FTOX242.." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { enum {MAX_TESTVAL = RAND_MAX * RAND_MAX}; float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 128.0f; startTimer(); int test = FTOX242(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) (testValue * (float) 0x1000000); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFTOX16(const int count) { printf( " Testing FTOX16..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { enum {MAX_TESTVAL = RAND_MAX * RAND_MAX}; float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 32767.0f; startTimer(); int test = FTOX16(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) (testValue * (float) 0x10000); slowTime += stopTimer() - timerAdjust; if (ABS(test - real) > 1) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFTOX162(const int count) { printf( " Testing FTOX162.." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { enum {MAX_TESTVAL = RAND_MAX * RAND_MAX}; float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 32767.0f; startTimer(); int test = FTOX162(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) (testValue * (float) 0x10000); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFTOX8(const int count) { printf( " Testing FTOX8...." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { enum {MAX_TESTVAL = RAND_MAX * RAND_MAX}; float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 8000000.0f; startTimer(); int test = FTOX8(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) (testValue * (float) 0x100); slowTime += stopTimer() - timerAdjust; if (ABS(test - real) > 1) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testFTOX82(const int count) { printf( " Testing FTOX82..." ); unsigned int fastTime = 0, slowTime = 0, diffCount = 0; for (int i = 0; i < count; i++) { enum {MAX_TESTVAL = RAND_MAX * RAND_MAX}; float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 8000000.0f; startTimer(); int test = FTOX82(testValue); fastTime += stopTimer() - timerAdjust; startTimer(); int real = (int) (testValue * (float) 0x100); slowTime += stopTimer() - timerAdjust; if (test != real) diffCount++; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime); } // ---------------------------------------------------------------------------- void testMemSpeed(const int count) { printf( " Testing mem spd.." ); unsigned int fastTime = 0, slowTime = 0, flushTime = 0; for (int i = 0; i < count; i++) { startTimer(); readRead(); fastTime += stopTimer() - timerAdjust; startTimer(); writeRead(); slowTime += stopTimer() - timerAdjust; startTimer(); flushWriteBack(); flushTime += stopTimer() - timerAdjust; } fastTime = (int) ((float) fastTime / (float) count); slowTime = (int) ((float) slowTime / (float) count); flushTime = (int) ((float) flushTime / (float) count); printf( "Done. r/r ticks: %d w/r ticks: %d flush: %d\n", fastTime, slowTime, flushTime); } // ---------------------------------------------------------------------------- void main(int argc, char *argv[]) { setbuf(stdout, 0); // Warn the user about the /op option #ifndef __SW_OP printf( "---------------------------------------------------------\n" ); printf( "\aThis program was not compiled with /op. You can expect\n" ); printf( "problems since Watcom is less accurate without /op.\n " ); printf( "---------------------------------------------------------\n" ); printf( "\n" ); #endif // Init the flaoting point routines initFPU(); // Calculate the timer routine overhead printf( "Syncronizing the timer..." ); int timerTestCount = 1000000; for (int i = 0; i < timerTestCount; i++) { startTimer(); timerAdjust += stopTimer(); } timerAdjust = (int) ((float) timerAdjust / (float) timerTestCount); printf( "Done. Timer adjustment is %d cycles.\n", timerAdjust ); // Prime the random number generator srand(time(NULL)); // Get the run count int count = 1000000; if (argc > 1) count = atoi(argv[1]); // Start testing printf( "Testing each routine with %d iterations.\n", count ); printf( "\nFastest routines:\n" ); testICHOP(count); testFCHOP(count); testIFLOOR(count); testFFLOOR(count); testICEIL(count); testFCEIL(count); testFFRAC(count); testSUB_PIX(count); testIS_NEG(count); testIS_LESS(count); testIS_LEQUAL(count); testIS_GREATER(count); testIS_GEQUAL(count); testFTOX24(count); testFTOX16(count); testFTOX8(count); printf( "\nMedium-speed routines:\n" ); testICHOP2(count); testFCHOP2(count); testIFLOOR2(count); testFFLOOR2(count); testICEIL2(count); testFCEIL2(count); testFFRAC2(count); testSUB_PIX2(count); testFTOX242(count); testFTOX162(count); testFTOX82(count); printf( "\nSlowest routines:\n" ); testIFLOOR3(count); printf( "\nTesting memory:\n" ); testMemSpeed(count); } // ----------------------------------------------------------------------------