
#include <stdlib.h>
////////////////////////////////////////////////////////////////////////////////
// export C interface
extern "C"
void computeGold( float*, const float*, const float*, unsigned int, unsigned int);


void
computeGold(float* C, const float* A, const float* B, unsigned int hB, unsigned int wB)
{
	
	// For each element in the result matrix matrix
	for (unsigned int i = 0; i < hB; ++i){
        for (unsigned int j = 0; j < wB; ++j){

		double sum = 0;
		// check the start and end values of m and n to prevent overrunning the 
		//  matrix edges

		unsigned int mbegin = (i < 2)? 2 - i : 0;
		
		unsigned int mend = (i > (hB - 3))? hB - i + 2 : 5;
		
		unsigned int nbegin = (j < 2)? 2 - j : 0;
		
		unsigned int nend = (j > (wB - 3))? (wB-j) + 2 : 5;

		// overlay A over B centered at element (i,j).  For each 
		//  overlapping element, multiply the two and accumulate
		for(unsigned int m = mbegin; m < mend; ++m) 
		  {
			for(unsigned int n = nbegin; n < nend; n++) 
			  {
				sum += A[m * 5 + n] * B[wB*(i + m - 2) + (j+n - 2)];
			  }
		  }
			// store the result
			C[i*wB + j] = (float)sum;
        }
	}
}

