// TimeTester / Opti Compo 1 tester - Stephen Stair 2008 - sgstair@akkit.org
// TimeTester is a simple test framework for measuring speed, comparing results, and testing for routine correctness on the DS.

/* [MIT License segment]
Copyright (c) 2008 - Stephen Stair (sgstair@akkit.org)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#ifndef COMPO1TESTER
#define COMPO1TESTER

#include <nds.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "template.h"
#include "TestTree.h"



// Note! Change these defines to change the test's behavior!
// RANDOM_TESTS - if true, use a set of random tests, otherwise just draw the whole fractal.
#define RANDOM_TESTS false
// NUM_TESTS: Number of random tests to run
#define NUM_TESTS	6
// TEST_SEED: The seed value for initialising the random number generator
#define TEST_SEED	12345
// COMPARE_TO_EXAMPLE: if true, every test will also run the example function and determine how different the output is from the tested function. (And render the example data too.)
//#define COMPARE_TO_EXAMPLE	true
// ALLOWABLE_ERROR_PER_PIXEL determines pass or fail (can be floating point, but eh.)
#define ALLOWABLE_ERROR_PER_PIXEL  1


extern bool COMPARE_TO_EXAMPLE;




void Example_MandelFunc(u32 * rectangle, int max_iteration, int width, int height, u16 * output_array, void * workram);
bool Helper_Diverges(s64 real, s64 imag, int iterationdepth);

typedef void (*MandelFunc)(u32 * rectangle, int max_iteration, int width, int height, u16 * output_array, void * workram);



struct TestParameters {
	u64 rectangle[6];
	int max_iteration;
	int width;
	int height;
};


template<MandelFunc testfunction>
class Compo1Tester : public TestNode {
public:
    Compo1Tester(char * testname) {
		TestTree::RegisterTest(this, testname, "Opti Compo 1: Mandelbrot Set");
    }
    ~Compo1Tester() {}

	
	// Seperate the actual running of the test out of the big RunTest function, in the hope that only this part of the code will be duplicated when the template is instantiated repeatedly.
	u64 TimeRunTest(u32 * rectangle, int max_iteration, int width, int height, u16 * output_array, void * workram)
	{
		u64 tickcount;
		DC_FlushAll();
		DC_InvalidateAll();
		IC_InvalidateAll(); // eliminate any bonuses due to cached memory
		StartTimer();
		testfunction(rectangle,max_iteration,width,height,output_array,workram);
		tickcount=StopTimer();
		return tickcount;
	}


    virtual std::string RunTest(bool & passed) {
		u64 testTime, totalTime = 0;
		passed=true;
		int testindex;
		u32 * rectangle;
		int max_iteration;
		int width;
		int height;
		u16 * output_array;
		u16 * output_array2;
		u32 workram;
		unsigned char * workram_base;
		int max_output_array_size=0;
		int output_size;
		int total_pixels;
		int diff_amount;
		int diff;
		u16 * gradient;
		char output[128];

		// allocate memory for tests
		TestParameters * tests = new TestParameters[NUM_TESTS];
		workram_base = new unsigned char[512*1024 + 32];
		workram = ((((u32)workram_base)+31)&(~31)); // kids, don't try this at home!
		gradient = new u16[128];
		
		videoSetMode(MODE_FB0); // set LCD Main to display VRAM A

		// Setup test parameters!
		// Note to users: feel free to modify the setup by either (1) changing the parameters near the top of the file or (2) forcing specific tests after the random generation - these can be used to your advantage!
		DrawStatusBar("Generating Tests...");
		srand(TEST_SEED);
		for(testindex=0;testindex<NUM_TESTS;testindex++)
		{
			tests[testindex].width = 128; // set size to be 1/2 of the top window :)
			tests[testindex].height = 192;
			if(RANDOM_TESTS)
			{
				tests[testindex].max_iteration=10 + rand()%63;


				// pick fully random values that are outside the set between -2 and +2 for initial coordinate
				do {
					tests[testindex].rectangle[0] = ( ((s64)rand())^(((s64)rand())<<10)^(((s64)rand())<<20)^(((s64)rand())<<30)^(((s64)rand())<<40)^(((s64)rand())<<50) ) & 0x1FFFFFFFFFFFFFFFll - 0x1C00000000000000ll;
					tests[testindex].rectangle[1] = ( ((s64)rand())^(((s64)rand())<<10)^(((s64)rand())<<20)^(((s64)rand())<<30)^(((s64)rand())<<40)^(((s64)rand())<<50) ) & 0x1FFFFFFFFFFFFFFFll - 0x1000000000000000ll;
				} while( Helper_Diverges(tests[testindex].rectangle[0],tests[testindex].rectangle[1],40) );

				s64 end_real, end_imag;
				do {
					// for step in X direction, try to pick a fairly arbitrary vector within a pretty wide range of values. The lowest values still won't be very well exercised, but they wouldn't have produced really great output anyway.
					// try to find this such that the endpoint is in the set
					int scale = (rand()&31)+5;
					tests[testindex].rectangle[2] = ((s64)((rand()^(rand()<<10))&0x1FFFFF - 0x100000)) << scale;
					tests[testindex].rectangle[3] = ((s64)((rand()^(rand()<<10))&0x1FFFFF - 0x100000)) << scale;

					// for step in Y direction, use X dir vector rotated 90 degrees.
					tests[testindex].rectangle[4] = -tests[testindex].rectangle[3];
					tests[testindex].rectangle[5] = tests[testindex].rectangle[2];

					end_real = tests[testindex].rectangle[0] + tests[testindex].rectangle[2] * tests[testindex].width + tests[testindex].rectangle[4]*tests[testindex].height;
					end_imag = tests[testindex].rectangle[1] + tests[testindex].rectangle[3] * tests[testindex].width + tests[testindex].rectangle[5]*tests[testindex].height;
				} while( !Helper_Diverges(end_real,end_imag,5) );

			}
			else
			{
/*
				tests[testindex].max_iteration=32;

				// top left corner real,imag
				tests[testindex].rectangle[0] = -0x1800000000000000ll;
				tests[testindex].rectangle[1] = -0x1800000000000000ll;

				// x step
				tests[testindex].rectangle[2] = 0x3000000000000000ll/128;
				tests[testindex].rectangle[3] = 0;

				// y step
				tests[testindex].rectangle[4] = 0;
				tests[testindex].rectangle[5] = 0x3000000000000000ll/192;
*/

				tests[testindex].max_iteration=200+240*testindex;

				// top left corner real,imag (0.001643721971153 + 0.822467633298876i)
				tests[testindex].rectangle[0] = 0x0006BB9141C1D159ll;
				tests[testindex].rectangle[1] = 0x0D28D3D2309857B4ll;

				// x step
				tests[testindex].rectangle[2] = 0x1000000000000000ll>>(24+testindex*4);
				tests[testindex].rectangle[3] = 0;

				// y step
				tests[testindex].rectangle[4] = 0;
				tests[testindex].rectangle[5] = 0x1000000000000000ll>>(24+testindex*4);
				
				tests[testindex].rectangle[0] -= (tests[testindex].rectangle[2] * tests[testindex].width/2) + (tests[testindex].rectangle[4] * tests[testindex].height/2);
				tests[testindex].rectangle[1] -= (tests[testindex].rectangle[3] * tests[testindex].width/2) + (tests[testindex].rectangle[5] * tests[testindex].height/2);
			}

			output_size = tests[testindex].width*tests[testindex].height;
			if(output_size > max_output_array_size) max_output_array_size = output_size;
		}


		output_array = new u16[max_output_array_size];
		output_array2 = new u16[max_output_array_size];

		total_pixels=0;
		diff_amount=0;

		// build gradient!
		// blue-cyan-green-yellow-red-magenta-blue (6 transitions 21.33 entries each (21,21,22,21,21,22)
		int i,z,x,y;
		for(i=0;i<21;i++) // blue-cyan
		{
			z=(i*31)/21;
			gradient[i] = 0x8000 | RGB15(0,z,31);
		}
		for(i=0;i<21;i++) // cyan-green
		{
			z=(i*31)/21;
			gradient[i+21] = 0x8000 | RGB15(0,31,31-z);
		}
		for(i=0;i<22;i++) // green-yellow
		{
			z=(i*31)/22;
			gradient[i+42] = 0x8000 | RGB15(z,31,0);
		}
		for(i=0;i<21;i++) // yellow-red
		{
			z=(i*31)/21;
			gradient[i+64] = 0x8000 | RGB15(31,31-z,0);
		}
		for(i=0;i<21;i++) // red-magenta
		{
			z=(i*31)/21;
			gradient[i+85] = 0x8000 | RGB15(31,0,z);
		}
		for(i=0;i<22;i++) // magenta-blue
		{
			z=(i*31)/22;
			gradient[i+106] = 0x8000 | RGB15(31-z,0,31);
		}

		for(i=0;i<256*192;i++) VRAM_A[i] = RGB15(0,0,0)|0x8000;

		// run tests!
		int x_offset = 0;
		for(testindex=0;testindex<NUM_TESTS;testindex++)
		{

			sprintf(output,"Running test %i",testindex+1);
			DrawStatusBar(output);

			// put data in local vars so as not to have anything cached when calling the function (except a tiny bit of code from this, and the system)
			width=tests[testindex].width;
			height=tests[testindex].height;
			max_iteration = tests[testindex].max_iteration;
			rectangle = (u32*)(u32)tests[testindex].rectangle;
			// clear memories!
			memset((void *)workram,0,512*1024);
			memset(output_array,0,2*max_output_array_size);

			// Ok, now run the test
			testTime = TimeRunTest(rectangle,max_iteration,width,height,output_array,(void *)workram);
			totalTime+=testTime;

			// plot the results!
			if(COMPARE_TO_EXAMPLE) for(i=0;i<256*192;i++) VRAM_A[i] = RGB15(7,7,7)|0x8000;

			for(y=0;y<height;y++)
			{
				for(x=0;x<width;x++)
				{
					if(x>=256 || y>=192) continue;
					if( output_array[ x+y*width ] == max_iteration ) VRAM_A[x+x_offset+y*256] = 0x8000;
					else VRAM_A[x+x_offset+y*256] = gradient[ output_array[ x+y*width ]&127 ];
				}
			}
			total_pixels += width*height;


			if(COMPARE_TO_EXAMPLE)
			{
				
				sprintf(output,"Checking test %i",testindex+1);
				DrawStatusBar(output);

				// run the example test for comparison
				Example_MandelFunc(rectangle,max_iteration,width,height,output_array2,(void *)workram);
				// plot results!
				for(y=0;y<height;y++)
				{
					for(x=0;x<width;x++)
					{
						if(x>=128 || y>=192) continue;
						if( output_array2[ x+y*width ] == max_iteration ) VRAM_A[x+128+y*256] = 0x8000;
						else VRAM_A[x+128+y*256] = gradient[ output_array2[ x+y*width ]&127 ];
					}
				}
				// compare results
				for(i=0;i<width*height;i++)
				{
					diff = (int)output_array2[i] - (int)output_array[i];
					if(diff<0) diff=-diff;
					diff_amount+=diff;
				}
			}
			else
			{
				x_offset = x_offset ^ 128;
			}
		}

		if(diff_amount>(total_pixels*ALLOWABLE_ERROR_PER_PIXEL)) passed=false;

		if(COMPARE_TO_EXAMPLE)
		{
			sprintf(output,"0x%04X%08X cycles; %i/%i diff",(unsigned int)(totalTime>>32), (unsigned int)totalTime, diff_amount,total_pixels);
		} 
		else 
		{
			sprintf(output,"0x%04X%08X cycles",(unsigned int)(totalTime>>32), (unsigned int)totalTime);
		}
	
		// really need to clean up allocated vars.
		delete[] output_array2;
		delete[] output_array;
		delete[] gradient;
		delete[] workram_base;
		delete[] tests;

		return std::string(output);
    }

};




#endif
