//
// 2002 by Ted T. Yuan.   
//

#include <stdio.h>
//#define WITH_LOGGING
#include <ProCon.h>
//#undef WITH_LOGGING
#include <time.h>

// type of matrix elements 
typedef double ValueType; 

void MatrixMultiply(
	const ValueType *srcA,
	const ValueType *srcB,
	ValueType *dest,
	int n, // srcA is n * m
	int m, // srcB is m * p
	int p)
{
  // for every row in the first matrix 
  for (int i = 0; i < n; i++)
  {
      register int destIndex;
	  register int j,k;

      destIndex = i*p;
	 
      for (j = 0; j < p; j++)
	   dest[destIndex+j] = 0;
	 
      // for every column in the first matrix 
      for (j = 0; j < m; j++)
	  {		
		ValueType srcVal;
		int srcBIndex;
		
		srcVal = srcA[i*m+j];

		destIndex = i*p;
		srcBIndex = j*p;
		
		for (k = 0; k < p; k++)
		  dest[destIndex++] += srcVal*srcB[srcBIndex++];
	  }
  }
}

void FillMatrix(ValueType *m, int numEls)
{
  for (register int i = 0; i < numEls; i++) m[i] = i;
}

//////////////////////////////////////////////////////
// Testing Producer-Consumer model

namespace sesame {

// data structs

typedef struct _InNOut{
	const ValueType *srcA;
	const ValueType *srcB;
	ValueType *dest;
	int n;
	int m;
	int p;
} InNOut;

// PC model related 

typedef int Slice; // row index of matrix srcA...

// convenient typedefs...
typedef std::vector<Slice> Slices; 
typedef SPACE_YIN::Pool<Slice> SlicePool;
typedef SPACE_YIN::Consumer<Slice, SlicePool > SliceConsumer;
typedef SPACE_YIN::Producer<Slice, SlicePool > SliceProducer;

// forward declare
struct Convoluter; // subclass of SliceConsumer
struct Scheduler;  // subclass of SliceProducer

typedef SPACE_YIN::Consuming<Slice, SlicePool, Convoluter > Convolute;
typedef SPACE_YIN::Production<Slice, SlicePool, Scheduler, Convoluter > Convolute2;

// forward declare
struct Convolution;  // subclass of Convolute, for consumer pool test
struct Convolution2; // subclass of Convolute2, for producer-consumer test

struct Convoluter : public SliceConsumer
{
	InNOut* ino;
	Convoluter( SlicePool& slices, 
		SPACE_YIN::Latch& lh) 
		: SliceConsumer(slices, lh) {}

protected:
	void consume(Slice slice_i) 
	{
#define srcA ino->srcA
#define srcB ino->srcB
#define dest ino->dest
#define n ino->n
#define m ino->m
#define p ino->p
      register int destIndex;
	  register int j,k;

      destIndex = slice_i*p;
	 
      for (j = 0; j < p; j++)
	   dest[destIndex+j] = 0;
	 
      // for every column in the first matrix 
      for (j = 0; j < m; j++)
	   {		
		ValueType srcVal;
		int srcBIndex;
		
		srcVal = srcA[slice_i*m+j];

		destIndex = slice_i*p;
		srcBIndex = j*p;
		
		for (k = 0; k < p; k++)
		    dest[destIndex++] += srcVal*srcB[srcBIndex++];
	   }
#undef srcA 
#undef srcB  
#undef dest  
#undef n  
#undef m  
#undef p  
	} 
	bool cancel() { 
		return !channel_.channel_.size(); // may stop if no more tasks
	} 
};

struct Scheduler : public SliceProducer
{
	volatile Slice slice_i;
	size_t slice_max;
	Scheduler( SlicePool& slices, 
		SPACE_YIN::Latch& lh) 
		: SliceProducer(slices, lh), slice_i(0) {}

protected: 
	virtual Slice produce() { 
		if(slice_i < slice_max) 
		{
			return (Slice)(slice_i++); // needs locking if more than one schedulers
		}
		throw SPACE_YIN::pc_exception("producer done");
	} 
	virtual bool cancel() { 
		return ! (slice_i < slice_max); } 
};

// consumer pool test
struct Convolution : public Convolute
{
	InNOut* ino;
	Convolution(SlicePool& channel, size_t nc = 1)
		: Convolute (channel, nc) {} 

	void consumerModelCreated(Convoluter& consumer) 
	{
		consumer.ino = ino;
	}
};

//producer-consumer test
struct Convolution2 : public Convolute2
{
	size_t slice_max;
	InNOut* ino;
	explicit Convolution2(size_t np, size_t nc)
		: Convolute2 (np, nc) {} 

	virtual void pcModelCreated(Scheduler& producer, Convoluter& consumer) {
		producer.slice_max = slice_max;
		consumer.ino = ino;
	}
};

#define PrintMatrix(a, b, c) { std::cout << *dest << std::endl; }

const int nProducers = 1;
const int nConsumers = 128;
 
void RunMatrixMultiplyTest(int sizeN)
{
  int i;
  long time;

  ValueType *srcA;
  ValueType *srcB;
  ValueType *dest;
  ValueType *dest2;

  srcA = (ValueType *)malloc(sizeN*sizeN*sizeof(ValueType));
  srcB = (ValueType *)malloc(sizeN*sizeN*sizeof(ValueType));
  dest = (ValueType *)malloc(sizeN*sizeN*sizeof(ValueType));
  dest2 = (ValueType *)malloc(sizeN*sizeN*sizeof(ValueType));

  if((long)srcA*(long)srcB*(long)dest*(long)dest2 == 0)
  {
	  std::cout << "Out of memory" << std::endl; exit(-1);
  }

  FillMatrix(srcA, sizeN*sizeN);
  FillMatrix(srcB, sizeN*sizeN);

  PrintMatrix(srcA, sizeN, sizeN);
  PrintMatrix(srcB, sizeN, sizeN);  

	boost::xtime xt, xt2;
	{ // single thread computation...
	boost::xtime_get(&xt, boost::TIME_UTC);
	MatrixMultiply(srcA, srcB, dest2, sizeN, sizeN, sizeN);
	boost::xtime_get(&xt2, boost::TIME_UTC);
	time = (xt2.sec*1000000000 + xt2.nsec - xt.sec*1000000000 - xt.nsec) / 1000000;
	std::cout << *dest2 << std::endl;
    
  printf("single thread: %ld msec\n", time);
  }
 
	boost::xtime_get(&xt, boost::TIME_UTC);

  InNOut space;
  space.srcA = srcA;
  space.srcB = srcB;
  space.dest = dest;
  space.n = space.m = space.p = sizeN;

  { // consumer pool model...
  Slices indices;
  for(i = 0; i < sizeN; i++) indices.push_back(i);

	SlicePool pool(indices);
	sesame::Convolution convolution(pool, 
		indices.size() > nConsumers ? nConsumers : indices.size());
	convolution.ino = &space;

	boost::thread thrd(convolution);
	thrd.join();

	boost::xtime_get(&xt2, boost::TIME_UTC);
	time = (xt2.sec*1000000000 + xt2.nsec - xt.sec*1000000000 - xt.nsec) / 1000000;
  
  PrintMatrix(dest, sizeN, sizeN);  
    
  printf("max %d consumer pool: %ld msec\n", nConsumers, time);
  }

#define VERIFY_RESULTS
#ifdef VERIFY_RESULTS
  
  for (i = 0; i < sizeN*sizeN; i++)
    {
	 if (dest[i] != dest2[i])
	   {
		fprintf(stderr,"Error in calculations %d\n",i);
		std::cout << "    difference " << dest[i] << " - " << dest2[i] << std::endl;
		exit(-1);  
	   }
    }

  fprintf(stderr,"Calculation check successful.\n");
#endif
	boost::xtime_get(&xt, boost::TIME_UTC);

  { // producer-consumer model
	sesame::Convolution2 convolution2(nProducers, 
		sizeN > nConsumers ? nConsumers : sizeN);
	convolution2.ino = &space;
	convolution2.slice_max = sizeN;

	boost::thread thrd2(convolution2);
	thrd2.join();

	boost::xtime_get(&xt2, boost::TIME_UTC);
	time = (xt2.sec*1000000000 + xt2.nsec - xt.sec*1000000000 - xt.nsec) / 1000000;
  
  PrintMatrix(dest, sizeN, sizeN);  
    
  printf("%d producer, max %d consumers: %ld msec\n", nProducers, nConsumers, time);
  }

#ifdef VERIFY_RESULTS
  
  for (i = 0; i < sizeN*sizeN; i++)
    {
	 if (dest[i] != dest2[i])
	   {
		fprintf(stderr,"Error in calculations %d\n",i);
		std::cout << "    difference " << dest[i] << " - " << dest2[i] << std::endl;
		exit(-1);  
	   }
    }

  fprintf(stderr,"Calculation check successful.\n");
#endif

  free(srcA);
  free(srcB);
  free(dest);
  free(dest2);
}

}

int main(int argc, char * argv[])
{
	printf("Usage: program <size of matrix>\n");
	if(argc < 2) exit(-1);
	//for(int j = 0; j < 100000; j++) {
		sesame::RunMatrixMultiplyTest(atoi(argv[1]));
		//new char[12345]; // intentional memory leak test, make sure the leak detecter finds it...
		//SPACE_YIN::sleep(0, 100); 
	//}
	return 0;
}

#undef SPACE_YIN
