I am trying to verify that Linux PCs are faster than Sun Blade 2000's or any other Suns that are anywhere under $10k, at least with a simple benchmark I wrote. I have run it on Blade 2000's, and in Linux and in Cygwin on my own Duron 950.
I am seeing results that seem to indicate that the Duron 950 is significantly faster for small array sizes, which would indicate to me that probably the Duron is much faster with register or cache accesses, but with a 200 MHz bus it is slower when it needs to do the multiplications from main memory.
I would really like for some people with P4's with the 533 MHz bus to run this, but I'm open to all (faster than my Duron 950) and would would definitely like to see some Athlon XP 3000's report. I STRONGLY prefer that if you are overclocking, that you set your BIOS to a normal/correct clock and report your results. I'm suggesting to my school to change to Linux PCs and I'm quite sure they are not going to overclock them, so overclocked results are useless for my purposes. Start a separate thread maybe if you want to report overclocked results too.
DEFINITELY feel free to critique the code (BUT PLEASE START A SEPARATE THREAD). It is my first benchmark ever and so I will not mind. However, I would like results more than critiques I guess.
I did this with an array because I am attempting to simulate floating point matrix multiplications within Matlab and Spice. I chose purposely not to emulate a matrix multiplication exactly, because I do not think that it matters too much how far elements are apart from each other, just that they are apart from each other, such that register to register, cache to cache, and main memory to main memory results can be checked. Correct me if I'm wrong here (PLEASE START A SEPARATE THREAD FOR THIS TOO). I do not have knowledge of IA32 architecture so I could be wrong here.
I'm also pondering starting a sourceforge project for a spec like benchmark that is free and GNU. This MIGHT be a starting point, but in reality, spec benchmarks use actual algorithms, so this is probably not a good starting point if the goal is to emulate a spec benchmark. Also, it does not involve multiple threads, etc. which a spec benchmark for matrix multiplication might.
To run it, copy the code to the named files, copy the script to any file you want, then chmod the script and run the script and then post your results with the following info:
CPU
RAM type and bus rate
I do not care video cards, etc. because if you look at the code, it obviously does not use them (neither do Spice or Matlab DSP applications).
Mark
PS There is a float version (which is usually 32 bits) and a double version (which is usually 64 bits). I originally assumed the 64 bit Suns might out perform the 32 bit Intel architecture with the double version, but I could have an incorrect assumption here, but the results between the two are very close, so I suspect that with a fast enough bus speed that the IA32 will outperform the Sun on either.
----------- the script ----------------
#!/bin/sh
g++ -Wall -o newDoubleLoops newDoubleLoops.cc
g++ -Wall -o newFloatLoops newFloatLoops.cc
echo "-----------------------------------------"
./newFloatLoops 4 2000000
./newFloatLoops 4 2000000
./newFloatLoops 4 2000000
echo "-----------------------------------------"
./newFloatLoops 32 200000
./newFloatLoops 32 200000
./newFloatLoops 32 200000
echo "-----------------------------------------"
./newFloatLoops 3200 2
./newFloatLoops 3200 2
./newFloatLoops 3200 2
echo "-----------------------------------------"
echo "-----------------------------------------"
./newDoubleLoops 4 2000000
./newDoubleLoops 4 2000000
./newDoubleLoops 4 2000000
echo "-----------------------------------------"
./newDoubleLoops 32 200000
./newDoubleLoops 32 200000
./newDoubleLoops 32 200000
echo "-----------------------------------------"
./newDoubleLoops 3200 2
./newDoubleLoops 3200 2
./newDoubleLoops 3200 2
echo "-----------------------------------------"
------------- newDoubleLoops.cc -------------
#include <stdio.h>
#include <math.h>
#include <iostream>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
using namespace std;
int main( int argc, char **argv )
{
if ( argc != 3 )
exit( 1 );
int start_timestamp = time((time_t *) 0);
// cout << "double time start = " << start_timestamp << endl;
srand( start_timestamp );
long int limit = atoi( argv[ 1 ] );
//cout << "limit " << limit << endl;
long int limitTwo = atoi( argv[ 2 ] );
//cout << "limitTwo " << limitTwo << endl;
// double numbers[limit][limit];
double ** numbers;
numbers = new double* [ limit ];
for( long int i = 0; i < limit; i++ )
{
numbers[ i ] = new double[ limit ];
}
for( long int i = 0; i < limit; i++ )
{
for( long int j = 0; j < limit; j++ )
{
numbers[ i ][ j ] = double( rand() ) / 2.0 + 1.0;
}
}
for( long int k = 0; k < limitTwo; k++ )
{
for( long int i = 0; i < limit; i++ )
{
for( long int j = 0; j < limit; j++ )
{
numbers[ i ][ j ] = numbers[ i ][ j ] * numbers[ i ][ limit - 1 - j ];
}
}
for( int j = 0; j < limit; j++ )
{
for( int i = 0; i < limit; i++ )
{
//cout << "i = " << i << " j = " << j << endl;
numbers[ i ][ j ] = numbers[ i ][ j ] * numbers[ limit - 1 - i ][ j ];
}
}
}
int end_timestamp = time((time_t *) 0);
// cout << "double time stop = " << end_timestamp << endl;
cout << "double total time = " << ( end_timestamp - start_timestamp ) << " with ( " << limit << ", " << limitTwo << " )\n";
for( long int i = 0; i < limit; i++ )
{
delete[] numbers[ i ];
}
delete []numbers;
}
------------- newFloatLoops.cc -------------
#include <stdio.h>
#include <math.h>
#include <iostream>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
using namespace std;
int main( int argc, char **argv )
{
if ( argc != 3 )
exit( 1 );
int start_timestamp = time((time_t *) 0);
// cout << "float time start = " << start_timestamp << endl;
srand( start_timestamp );
long int limit = atoi( argv[ 1 ] );
//cout << "limit " << limit << endl;
long int limitTwo = atoi( argv[ 2 ] );
//cout << "limitTwo " << limitTwo << endl;
// float numbers[limit][limit];
float ** numbers;
numbers = new float* [ limit ];
for( long int i = 0; i < limit; i++ )
{
numbers[ i ] = new float[ limit ];
}
for( long int i = 0; i < limit; i++ )
{
for( long int j = 0; j < limit; j++ )
{
numbers[ i ][ j ] = float( rand() ) / 2.0F + 1.0F;
}
}
for( long int k = 0; k < limitTwo; k++ )
{
for( long int i = 0; i < limit; i++ )
{
for( long int j = 0; j < limit; j++ )
{
numbers[ i ][ j ] = numbers[ i ][ j ] * numbers[ i ][ limit - 1 - j ];
}
}
for( int j = 0; j < limit; j++ )
{
for( int i = 0; i < limit; i++ )
{
//cout << "i = " << i << " j = " << j << endl;
numbers[ i ][ j ] = numbers[ i ][ j ] * numbers[ limit - 1 - i ][ j ];
}
}
}
int end_timestamp = time((time_t *) 0);
// cout << "float time stop = " << end_timestamp << endl;
cout << "float total time = " << ( end_timestamp - start_timestamp ) << " with ( " << limit << ", " << limitTwo << " )\n";
for( long int i = 0; i < limit; i++ )
{
delete[] numbers[ i ];
}
delete []numbers;
}