//#include "chronometer.hpp"
#include "base.h"
#include <stdlib.h>
#include "listT.h"
#include "listBase.h"

//! apply the user's function on an input matrix 
/*!
  \param func user's function
  \param m input matrix
  \param border is the matrix with border?
  \return DMatrix matrix after appling the user's function
*/
//-------------------------------------------------------------------------------
template<class T, class T2> DMatrix<T2> * apply(const _ApplyOneMat<T,T2>& func,DMatrix<T> * m/*, int border=0*/, DMatrix<T2> * m2=NULL,bool exchanges=true)
  //-------------------------------------------------------------------------------
  {
    if(NULL==m2)
      m2 = new DMatrix<T2>(m->getGlobalHeader());

    //Time::Chronometer chrono;
    //chrono.start();

    //exchange of data
    // if(border>0)
    if(exchanges)
          m->getBorders(/*border*/);
     //-----Time calculation
      /*chrono.stop();
    std::stringstream st;
    st<<" Time TOTAL CALCULATION : "<<chrono<<" ";
    Mpiomp::print(st.str());*/
    //-----

      // Time::Chronometer chrono1;
      //chrono1.start();

    func(m,m2);

    //-----Time calculation
    /*chrono1.stop();
    std::stringstream st1;
    st1<<" Time TOTAL CALCULATION : "<<chrono1<<" ";
    Mpiomp::print(st1.str());*/
    //-----
        
    /* chrono1.stop();
#ifdef _OPENMP
#pragma omp critical
    {
#endif
      std::cout<<" Time Apply 1 CALCULATION : "<<chrono1<<" ";
#ifdef _OPENMP
    }
    #endif*/
        
#ifdef _OPENMP
#pragma omp flush
#endif

    /*
    m->print();
    m2->print();
    */

    return m2;
  }
//------------------------------------------------------------------------------- 

//! apply the user's function on two input matrix 
/*!
  \param func user's function
  \param m input matrix
  \param m2 second input matrix
  \param border is the matrix with border?
  \return DMatrix matrix after appling the user's function
*/
//-------------------------------------------------------------------------------
template<class T, class T2, class T3> DMatrix<T3> * apply(const _ApplyTwoMat<T,T2,T3>& func, DMatrix<T> * m,DMatrix<T2> * m2/*, int border=0*/, DMatrix<T3> * m3=NULL)
  //-------------------------------------------------------------------------------
  {
    if(NULL==m3)
      m3 = new DMatrix<T3>(m->getGlobalHeader());

   // Time::Chronometer chrono1;
    //chrono1.start();

    //-----exchanges of data
    //if(border>0)
      m->getBorders(/*border*/);
  //-----

  //-----call to f
    func(m,m2,m3);
  //-----

   /* chrono1.stop();
#ifdef _OPENMP
#pragma omp critical
    {
#endif
      std::cout<<" Time Apply 2 CALCULATION : "<<chrono1<<" ";
#ifdef _OPENMP
    }
#endif*/

#ifdef _OPENMP
#pragma omp flush
#endif

    /*
    m->print();
    m2->print();
    m3->print();
    */

    return m3;
  }
//-------------------------------------------------------------------------------

//-------------------------------------------------------------------------------
//!
/*!
  Generalisation of the Apply for a list of DMatrix as input and a list of DMatrix as output
  For now you have to give non NULL DMatrix in the list in input and output, the skeleton do not allocate the outputs
  A list of borders is the last argument, and associated to the input Dmatrix list
  No borders is getted from output martices
  You have a single input type T and a single output type T2
      
  \param func user's function
  \param inputs list of input DMatrix
  \param borders is the list of borders wanted associated to inputs
  \return a list of output DMatrix
*/
//-------------------------------------------------------------------------------
template<class T, class T2> void applyList(const _ApplyList<T,T2>& func, std::vector<DMatrix<T> * >& inputs, std::vector<DMatrix<T2> * >& outputs/*, std::vector<int> borders*/)
//-------------------------------------------------------------------------------
{
  //Time::Chronometer chrono1;
  //chrono1.start();

  //-----exchanges of data
  for(int i=0;i<inputs.size();i++)
   {
     //int border = borders[i];
     DMatrix<T> * m = inputs[i];
     if(NULL!=m/* && border>0*/)
       m->getBorders(/*border*/);
   }
  //-----

  //-----call to f
  func(inputs,outputs);
  //-----

  /*  chrono1.stop();
#ifdef _OPENMP
#pragma omp critical
    {
#endif
      std::cout<<" Time ApplyList CALCULATION : "<<chrono1<<" ";
#ifdef _OPENMP
    }
    #endif*/

#ifdef _OPENMP
#pragma omp flush
#endif
}

//-------------------------------------------------------------------------------
//!
/*!
  apply a user fonction with a recovery of data (border) on a distributed matrix m and returns a single value
  this apply version can be seen as a kind of reduce on matrix. It supposes that for a matrix M of the form
  M11 M12 f(M) = f(f(M11) f(M12)
  M21 M22          f(M21) f(M22))
      
  \param func user's function
  \param m input matrix
  \param m2 second input matrix
  \param border is the matrix with border?
  \return DMatrix matrix after appling the user's function
*/
//-------------------------------------------------------------------------------
template<class T> T apply(const _ApplyJustOneMat<T>& func, DMatrix<T> * m/*, int border=0*/)
//-------------------------------------------------------------------------------
  {
    T res;
    typename DMatrix<T>::iterator it;
    //Time::Chronometer chrono0;
    MPI_Status status;
    int MyOwnTag = Mpiomp::mpi_nb;    

    //chrono0.start();

    //-----exchanges of data
    //if(border>0)
      m->getBorders(/*border*/);
    //-----
    
    //-----call to f
    res=func(m);
    //-----

    //====================OMP
#ifdef _OPENMP
    //init of apply3 shared values if needed
#pragma omp master
    {
      if(Mpiomp::apply3_results==NULL)
	{
	  int v = -1;
	  ListT<T> * table = new ListT<T>((T)v,Mpiomp::omp_nb);
	  Mpiomp::InitApply3((ListBase *)table);
	}
    }
#pragma omp barrier
    
    //put res value in the apply3_results shared table
    ((ListT<T> *)(Mpiomp::apply3_results))->values[omp_get_thread_num()]=res;
    
#pragma omp flush
#pragma omp barrier
    //thread 0 do the Apply3 for the machine
#pragma omp master
    {
      HEADER h=m->getHeader();
      h.width=Mpiomp::omp_nb;
      h.height=1;

      DMatrix<T> * ompRes= new DMatrix<T>(h,1);
      typename DMatrix<T>::iterator itomp= ompRes->begin();
      for (int i=0; i < Mpiomp::omp_nb; i++)
	{
	  ompRes->setValue(((ListT<T> *)(Mpiomp::apply3_results))->values[i], itomp);
	  ++itomp;
	}

      res=func(ompRes);
      /*std::stringstream st3;
	st3<<"Result thread 0 apply3 "<<res<<"\n";
	Mpiomp::print(st3.str());*/
      delete ompRes;
    }
#endif
    //====================
    
    if(Mpiomp::mpi_nb>1)
      {
	if (Mpiomp::mpi_rank==0)
	  {
#ifdef _OPENMP
#pragma omp master
	    {
#endif
	      HEADER h=m->getHeader();
	      h.width=Mpiomp::mpi_nb;
	      h.height=1;
	      
	      DMatrix<T> * mRes= new DMatrix<T>(h,1);
	      //affectation of the proc 0 to the temporary result matrix
	      it= mRes->begin();
	      mRes->setValue(res, it);
	      ++it;
	      
	      T msg;
	       for (int i=1; i < Mpiomp::mpi_nb; i++)
		{
		  //receive result from other proc , other than proc0
		  MPI_Recv(&msg, 1, MPI_FLOAT, i, MyOwnTag, MPI_COMM_WORLD, &status);
		  
		  mRes->setValue(msg, it);
		  ++it;
		  }
	      //the temporary matrix now is analysed by the user function, to get only one output result
	      res=func(mRes);
	      
	      delete mRes;
#ifdef _OPENMP
	    }
#endif
	  }
	else
	  {
#ifdef _OPENMP
#pragma omp master
	    {
#endif
	      MPI_Send(&res, 1, MPI_FLOAT, 0, MyOwnTag, MPI_COMM_WORLD);
#ifdef _OPENMP
	    }
#endif
	  }
      }
    
    /*chrono0.stop();

#ifdef _OPENMP
#pragma omp critical
    {
#endif
    std::cout<<" Time Apply 3 CALCULATION : "<<chrono0<<" ";
#ifdef _OPENMP
    }
#endif*/

    //SEND TO EVERYONE ???

    /*
    m->print();
#ifdef _OPENMP
#pragma omp master
    {
#endif
    std::stringstream st;
    st<<"Value returned by Apply3 (Reduce)"<<res<<"\n";
    Mpiomp::print(st.str());
#ifdef _OPENMP
    }
    #endif*/

    return res;
  }

//-------------------------------------------------------------------------------
template<class T, class T2> DMatrix<T2> * applyStar(const _ApplyStar<T,T2>& func, DMatrix<T> * m, T delta, T maxIt/*, int border=0*/)
//-------------------------------------------------------------------------------
{
 DMatrix<T> * m1 = new DMatrix<T>(m->getGlobalHeader(), -1);
 DMatrix<T> * m2 = new DMatrix<T>(m->getGlobalHeader(), -1);
 DMatrix<T> * m3 = NULL;

 bool pasFini=true;
 int cptIt=0;
 //Time::Chronometer chrono1;

 //chrono1.start();

 while(cptIt < maxIt && pasFini)
 {
  cptIt++;
  //if(border>0)
    m->getBorders(/*border*/);

  func(m1,m,m2);


  typename DMatrix<T>::iterator itBeg = m1->begin();
  typename DMatrix<T>::iterator itEnd1 = m1->end();
  typename DMatrix<T>::iterator itBeg2 = m2->begin();
  typename DMatrix<T>::iterator itEnd2 = m2->end();
  int cptSupp=0;
  for(; itBeg <= itEnd1 && itBeg2 <= itEnd2; ++itBeg, ++itBeg2){
    if(abs((*itBeg) - (*itBeg2)) < delta){
      pasFini = false;
    }
    else{
      m3=m1;
      m1=m2;
      m2=m3;
    }
  }
  /*if(!pasFini || cptIt == maxIt){
    chrono1.stop();

#ifdef _OPENMP
#pragma omp critical
    {
#endif
    std::cout<<" Time ApplyStar CALCULATION : "<<chrono1<<" ";
#ifdef _OPENMP
    }
#endif*/

    delete m1;
    return m2;
  }
//}

}

//-------- surdefinition applys

//! apply the user's function on two input matrix, hiding the matrix couse
/*!
  \param func user's function
  \param m input matrix
  \return DMatrix matrix after appling the user's function
*/
template<class T, class T2> DMatrix<T> * map(const _map<T, T2>& func, DMatrix<T> * m)
  //-------------------------------------------------------------------------------
  {
    mapStruct<T,T2> map_f;
    map_f.func = &func;

    DMatrix<T2> * mat2 = apply<T, T2>(map_f, m, 0);
    return mat2;
  }
// End map 
//-------------------------------------------------------------------------------

//! apply the user's function on two input matrix, hiding the matrix couse
/*!
  \param func user's func
  \param m input matrix
  \param m2 second input matrix
  \return DMatrix matrix after appling the user's function
*/
template<class T, class T2, class T3> DMatrix<T> * zipWith(const _zipWith<T, T2, T3>& func, DMatrix<T> * m, DMatrix<T3> * m2)
  //-------------------------------------------------------------------------------
  {
    zipWithStruct<T, T2, T3> Zip_f;
    Zip_f.func = &func;

    DMatrix<T3> * mat3 = apply<T, T2, T3>(Zip_f, m, m2, 0);
    return mat3;
    }
// End zipWith
//-------------------------------------------------------------------------------

//! apply the user's function on one input matrix, hiding the matrix couse
/*!
  \param func user's function
  \param m input matrix
  \return DMatrix matrix after appling the user's function
*/
template<class T> T reduce(const _reduce<T>& func, DMatrix<T> * m)
//-------------------------------------------------------------------------------
  {
    reduceStruct<T> red_f;
    red_f.func = &func;

    T res = apply<T>(red_f, m);
    return res;
    }
// End reduce
//-------------------------------------------------------------------------------
