// Opti / Compo 1 Lower Bound code
// Compo1LowerBound.cpp - Test-related code for Opti Compo 1: Mandelbrot Set
//  Specificily this is a modified version of the Compo1Example, that intentionally loses the bottom 4 bits of accuracy in the multiply.
//  It will be the "lower bound" for how incorrect your results can be and still be accepted.

/* [MIT License segment]
Copyright (c) 2008 - Stephen Stair (sgstair@akkit.org)

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#include <nds.h>
#include "Compo1Tester.h"


// Implement fixed point multiply outside of the other stuff, so it's less cluttered
//  _minus4 to indicate that it's intentionally losing the bottom 4 bits of the multiply's precision. 
//  (technically loses 3.5 bits - the bottom 4 bits are stripped, but then the value is moved to the center of the uncertain region.)
inline s64 FixedMul_4_60_minus4(s64 a, s64 b)
{
	u64 low = ((a&0xFFFFFFFF)*(b&0xFFFFFFFF));
	s64 mid1 = (a&0xFFFFFFFF)*(b>>32);
	s64 mid2 = (b&0xFFFFFFFF)*(a>>32);
	s64 hi = (a>>32)*(b>>32);
	// currently have a 128bit number from multiplying 2 64bit numbers, split across 4 parts
	// | top 64 bit | btm 64 bit |  All of these pieces need to be added to make the full number
	// |-----hi-----|               Hi covers bits 127..64
	//        |----mid1----|        Mid1 covers bits 95..32
	//        |----mid2----|        Mid2 covers bits 95..32
	//              |----low-----|  Low covers bits 63..0
	//  |---wanted---|              We want bits 123..60, which represents a 4:60 section of the 8:120 number we have.
	// need to preserve the carry from the mid+low intersection (bits 32..64)
	// then add the top parts of mid, and hi - to get the value we want.

	s64 output = (s64)(low>>32) + (s64)(mid1&0xFFFFFFFF) + (s64)(mid2&0xFFFFFFFF);
	// have a partial construction of bits 95..32 - bits 63..32 are correct, and bits 65-64 are 0 + carries from the addition
	output = (output>>28); // now have bits 63-60 correct at the bottom of this value, and bits 65-64 have some carry information
	output += ((mid1>>32)<<4) + ((mid2>>32)<<4) + (hi<<4); // now have the value we want. If it overflowed though, there's not much we can do about it.
	output &= 0xFFFFFFFFFFFFFFF0ll + 8; // lose some precision
	return output;
}
// ComplexMultiply can multiply in place (because one set of params is passed by value)
inline void ComplexMultiply_minus4(s64 & destreal, s64 & destimag, s64 srcreal, s64 srcimag)
{
	s64 newreal = FixedMul_4_60_minus4( destreal, srcreal ) - FixedMul_4_60_minus4( destimag, srcimag );
	destimag = FixedMul_4_60_minus4( destimag, srcreal ) + FixedMul_4_60_minus4( destreal, srcimag );
	destreal = newreal;
}

// IsOutOfBounds - Tests if a vector is outside the radius-2 circle (The real difficulty in this is overflow.)
inline bool IsOutOfBounds_minus4( s64 & real, s64 & imag )
{
	if(real>0x2000000000000000ll || imag>0x2000000000000000ll || real<-0x2000000000000000ll || imag<-0x2000000000000000ll) return true; // either dim is outside of -2..+2
	s64 temp = FixedMul_4_60_minus4( real, real )>>1;
	temp += FixedMul_4_60_minus4( imag, imag )>>1;
	if(temp>0x2000000000000000ll || temp<-0x2000000000000000ll) return true; // squared length is outside of -4..+4 (note shift in temp calculation, to prevent overflow)
	return false;
}

// Example implementation of the Mandelbrot rendering function
void Example_MandelFunc_minus4(u32 * rectangle, int max_iteration, int width, int height, u16 * output_array, void * workram)
{ // yay naive implementation!
	int x,y;
	s64 cur_real, cur_imag; // the location we're testing
	s64 p_real, p_imag; // the current value in the test for divergence
	s64 * rect = (s64 *)rectangle;

	int iteration;

	for( y = 0; y < height; y++)
	{
		cur_real = rect[0] + rect[4]*y;
		cur_imag = rect[1] + rect[5]*y;
		for( x = 0; x < width; x++)
		{
			// skip stuff that's completely out of bounds
			if(IsOutOfBounds_minus4(cur_real, cur_imag))
			{
				output_array[ x + y*width ] = 1;
			}
			else // if it's not so easy, we do have to calculate it.
			{
				p_real = p_imag = 0;
				for(iteration = 0; iteration<max_iteration; iteration++)
				{
					if(IsOutOfBounds_minus4(p_real, p_imag)) break; // current iteration is the one that went out of bounds.
					// perform iteration step (P[n+1] = P[n]*P[n]+c)
					ComplexMultiply_minus4(p_real,p_imag,p_real,p_imag);
					p_real += cur_real;
					p_imag += cur_imag;
				}
				output_array[ x + y*width ] = iteration;
			}

			cur_real += rect[2];
			cur_imag += rect[3];
		}
	}
}







static Compo1Tester<Example_MandelFunc_minus4> mandel_example_minus4("Lower Bound -4bits");
