/*  _________________________________________________________________________
 *
 *  COLIN: A Common Optimization Library INterface
 *  Copyright (c) 2003, Sandia National Laboratories.
 *  This software is distributed under the GNU Lesser General Public License.
 *  For more information, see the README.html file in the top COLIN directory.
 *  _________________________________________________________________________
 */

#if !defined(DOXYGEN)
/** \file AnneColinMSApplication.h
 */

//
// ColinMSApplication.h
//
// Possible modes:
//  1.	Send k messages at a time, without waiting for a response from
//	the slaves.
//  2.  Send k messages at a time, waiting for the slave to respond to the
//	last messages. (unimplemented)
//  3.  Eval on master and slaves. (unimplemented)
//
// TODO - add eval-on-master capabilities
//	Run through size-1 batch sends, and then do a batch-eval on
//	the master.  This assumes that 

#ifndef colin_ColinMSApplication_h
#define colin_ColinMSApplication_h

#include <acro_config.h>
#ifdef USING_MPI
#include <mpi.h>
#include <utilib/PackBuf.h>
#endif
#include <colin/ColinApplication.h>
#include <colin/ColinProblem.h>
#include <colin/ColinAnalysisCode.h>
#include <utilib/BasicArray.h>
#include <utilib/LinkedList.h>  // %%%% added 6/04 plsx

namespace anne {


PROBLEM_TEMPLATE
class OptProblem;

PROBLEM_TEMPLATE
class MSApplication;


/** \class ResponseStatus
  *
  * A utility class used by \c MSApplication to describe the status of 
  * communication between the master and slave processes.
  */
PROBLEM_TEMPLATE
class ResponseStatus
{

  friend class MSAPPLICATION ;

public:

  ///
  string recvbuf;
  ///
  ResponseT * response;

#ifdef USING_MPI
  ///
  PackBuffer pack;
  ///
  MPI_Request send_request;
  ///
  MPI_Request recv_request;
#endif
	
protected:

  ///
  ResponseStatus()
	{ response=0; }
};



/** \class MSApplication
  *
  * The problem provides an interface for functions that are called via 
  * an MPI call.
  */
PROBLEM_TEMPLATE
class MSApplication : public OptApplication<DomainT,ResponseT>
{
public:
 
  /// Generic constructor.
  MSApplication(int batch_size_, 
						bool eval_on_master_);

  ///
  virtual ~MSApplication();

  ///
  void synchronize();

  // %%%% Added these two, 6/04 ---pls
  ///
  void do_agent_prioritization();

  ///
  void map_tasks_to_agents();

  ///
  int next_eval();
  int next_eval_batch();   // %%%% added 6/04 ---pls

  ///
  unsigned int num_evaluation_servers()
  {
#if defined(USING_MPI)
  int size;
  MPI_Comm_size(MPI_COMM_WORLD,&size);    // Compute number of processors
#endif
  return static_cast<unsigned int>(size);
  }

  ///
  bool terminate_eval(const int id);

  ///
  unsigned int num_queued_evaluations()
	{ return ndx; }

  ///
  void exec_server( OPTPROBLEM & prob);


  ///
  void terminate();

  ///
  int debug;

  ///
  OPTPROBLEM problem;

protected:

  ///
  bool eval_on_master;

  bool usingbatchversion;

  ///
  int batch_size;

  ///
  int batch_ctr;
  
  /// %%%% Structs added 6/04 pls
  struct taskstruct{
    DomainT * pt;     // pointer to the point itself
    int * pri;        // pointer to the priority of the task
    int * pt_id;      // pointer to the id that came in with the point
    ResponseT * resp;  // response obj
    int agentrank;    // which agent is in charge of this task (-1 == none)
    bool evaluated;   // true if the point has been evaluated
  };
  typedef struct taskstruct task;

  struct agentstruct{
    int rank;        // rank, in MPI terms 
    int pri;         // priority of this agent
    bool idle;       // if true, has no task
    bool ready;      // if true, has requested work.
    task * t;        // points into the tasklist
    ResponseStatus * RS_data;        //  These take the place of the
    ResponseStatus * RS_bufsize;     //     array and bufsize vectors
  };
  typedef struct agentstruct agent;

  /// %%%% Added linked list fields 6/04 pls
  LinkedList<task> tasklist;

  typename LinkedList<task>::iterator t_curr;

#if defined(COUGAR) || defined(TFLOPS_SERVICE)
  ///
  vector< RESPONSESTATUS *> array;

  ///
  vector< RESPONSESTATUS *> bufsize;
#else
  ///
  BasicArray< RESPONSESTATUS *> array;

  ///
  BasicArray< RESPONSESTATUS *> bufsize;
#endif

  BasicArray< agent *> agentlist;  // %%%% added 6/04 pls
  ///
  vector<int> offset;

  // %%%% pls---Added 6/04
  /// For the priorities--by rank
  vector <int> agent_pri;

  /// The total number of queued evaluations
  unsigned int ndx;

  ///
  vector<int> id_flags;

  ///
  unsigned int id_ndx;

  /// The next queued evaluation that needs to be sent out
  unsigned int next_pt;

  // if true, we are gathering points for later eval/prioritization
  bool gathering;

  ///
  int max_buf_size;

  // %%%% added pls 6/04
  int num_servers;
  int idle_servers;
  int unassigned_tasks;

#ifdef USING_MPI
  /// The MPI request object used to wait for requests from slaves
  MPI_Request feval_request;
#endif
  void start_async_batch(){
    usingbatchversion = true;
    gathering = true;
  }
  void end_async_batch(){
    usingbatchversion = true;
    gathering = false;
  }

  /// Routines that service the requests from the slaves
  void service_requests(bool finish=false);

  // %%%% added 6/04 pls
  void service_requests_batch(bool finish=false);

  /// %%%% changed 6/04 pls
  void DoEval(DomainT& point, int& priority, ResponseT* response, 
	      bool synch_flag);

  // %%%% added 6/04 pls
  void DoEval_batch(vector<DomainT *> & points, 
		    vector<int *> & task_priorities,
		    vector<ResponseT *> & response_vec, 
		    bool synch_flag = false);
};



#define FEVAL_REQUEST   1
#define FEVAL_RESPONSE  2
#define TERMINATE 	3
#define BUF_SIZE_MSG 	4
#define FEVAL_PT	5
#define NUM_BUFS	10


#ifdef USING_MPI
/*
static void errfn()
{ EXCEPTION_MNGR(runtime_error,"MPI Error function!") }

static MPI_Errhandler errhandle;
*/
#endif



//============================================================================
//
//
PROBLEM_TEMPLATE
MSAPPLICATION
	::MSApplication(int batch_size_, bool eval_on_master_)
  : debug(0),
    eval_on_master(eval_on_master_),
    batch_size(batch_size_),
     unassigned_tasks(0),
     gathering(false),
     usingbatchversion(false)
{
  ndx=0;
  id_ndx=0;
  //app_mode = problem.app_mode();
  batch_ctr=0;
  max_buf_size = 256;
  next_pt=0;
  
  num_servers = num_evaluation_servers();
  idle_servers = num_servers;
  agentlist.resize(num_servers);
  for(int k = 0; k < agentlist.size(); k++){
    agentlist[k] = new agent;
    agentlist[k]->rank = (k+1); 
    agentlist[k]->pri = -1;
    agentlist[k]->idle = true; 
    agentlist[k]->ready = false;
    agentlist[k]->task = NULL;
    agentlist[k]->RS_data = new ResponseStatus;
    agentlist[k]->RS_bufsize = new ResponseStatus;
  }
  //
  // batch sizing doesn't work right now, so set it to one.
  //
  batch_size=1;
#ifdef USING_MPI
  MPI_Irecv(0,0,MPI_CHAR,MPI_ANY_SOURCE,FEVAL_REQUEST,MPI_COMM_WORLD,
	    &feval_request);
  /*
    MPI_Errhandler_create(errfn, &errhandle);
    MPI_Errhandler_set(MPI_COMM_WORLD,errhandle);
  */
#endif
}     


//============================================================================
//
//
PROBLEM_TEMPLATE
MSAPPLICATION
	::~MSApplication()
{
  for (unsigned int i=0; i<array.size(); i++){
    delete array[i];
  }
  
  for (unsigned int j=0; j<bufsize.size(); j++){
    delete bufsize[j];
  }
  
  for(int k = 0; k < agentlist.size(); k++){
    delete agentlist[k]->RS_data;
    delete agentlist[k]->RS_bufsize;
    delete agentlist[k];
  }
  tasklist.clear();
}


//============================================================================
//
//
// %%%%% FIXME:: change this so it just q's the thing up into a 
//    list of some sort.  Then call some other method---
//    process_tasklist or something--- to do the sending off
//    to slaves thing. 
//
//      
PROBLEM_TEMPLATE
void MSAPPLICATION
	::DoEval(DomainT& point, int& priority,
		 ResponseT* response, bool synch_flag)
{
#ifdef USING_MPI
  //
  // Verify that the appropriate mode is used
  //
  verify(response->info->mode);
  
  //
  // Synchronous computation? Note: do not need to 'maintain' the 
  // response id value, since this is only set in the call to Eval.
  //
  if (synch_flag) {
     problem.Eval(point,response->active_set_vector(),*response);
     return;
     }
    
  //================================================
  // %%%% Added for batch stuff
  //      ---pls, 6/04
  //================================================
  
  if(usingbatchversion && gathering){    
    tasklist.push_back();
    t_curr = tasklist.end();
    t_curr--;
    t_curr->pt = &point;
    t_curr->pri= &priority;
    t_curr->id = response->info->id;
    t_curr->resp = response;
    verify(response->info->mode);
    t_curr->agentrank = -1;
    t_curr->evaluated = false;
    unassigned_tasks++;
  } // if batch and gathering
  else if (usingbatchversion && !gathering){
    DoEval_batch();
  } // else if batch and we have finished gathering
  else{
  //================================================
  
  //
  // Check to see if the point has been evaluated already.
  //
    if (response_exists(point,*response)) {
      if (!synch_flag)
	CachedAllocator<ResponseT> :: deallocate(response);
      return;
    }
    
    //
    // Update application counters.  Note: We assume that constraints and 
    // function evaluations are both being computed here.
    //
    nprob_ctr++;
    neval_ctr++;
    if ((num_eq_constr+num_ineq_constr)>0)
      nconstr_ctr++;
    
    //
    // Pack the parameters in the array
    //
    // ..Resize the array if needed.
    //
    if (offset.size() == ndx) {
      array.resize(ndx+10);
      bufsize.resize(ndx+10);
      offset.resize(ndx+10);
      //   task_pri.resize(ndx+10);  // %%%% added 6/4/04 ---pls
      id_flags.resize(ndx+10);
      for (unsigned int i=ndx; i<offset.size(); i++) {
	array[i] = new RESPONSESTATUS;
	bufsize[i] = new RESPONSESTATUS;
	offset[i] = i;
	// *(task_pri[i]) = -1; // %%%% added 6/4/04 ---pls
      }
    }
    if (debug)
      ucout << "OFFSET: " << offset << endl << Flush;
    int id=offset[ndx];
    //
    // ..Do the packing
    // 

    // use the current id for this evaluation
    response->info->id_generate=false; 
    array[id]->response = response;
    array[id]->pack.reset();
    array[id]->pack << -987654321;
    array[id]->pack << *response;
    // This should work for fevals and gradients
    array[id]->recvbuf.resize(array[id]->pack.size()*2*(response->info->mode));
    array[id]->pack << point;
    if (static_cast<int>(array[id]->pack.size()) > max_buf_size)
      max_buf_size = array[id]->pack.size();
    if (debug) {
      ucout << "Master packing the point" << endl;
      ucout << "Size " << array[id]->pack.size() << endl;
      ucout << point;
      ucout << endl << *response;
      ucout << "RecvBuf-Len=" << array[id]->recvbuf.size() << endl << Flush;
    }
    response->info->id_generate=true;
    //
    // ..Show this as an available packed buffer if the batch counter is
    // ..large enough
    //
    /*
      batch_ctr++;
      if (batch_ctr == batch_size) {
      ndx++;
      batch_ctr=0;
      }
    */
    ndx++;
    
    //
    // Handle requests from the slaves
    //
    service_requests();
  } // else use the one-at-a-time async eval
#endif
}


//============================================================================
//============================================================================
//
//  %%%% Batch version added 6/04 pls
//
//============================================================================
//============================================================================
// %%%%% FIXME:: change this so it just q's the thing up into a 
//    list of some sort.  Then call some other method---
//    process_tasklist or something--- to do the sending off
//    to slaves thing. 

PROBLEM_TEMPLATE
void MSAPPLICATION
	::DoEval_batch()
{
#ifdef USING_MPI


  // we need to clean up the task list---get rid of tasks that have 
  // been evaluated, incl. deallocating their request fields. 

  t_curr = tasklist.begin();
  while(t_curr != tasklist.end()){
    if (t_curr->evaluated == true && t_curr->agentrank == -1){
      CachedAllocator<ResponseT> :: deallocate(t_curr->resp); 
      // is this right ??????? Should advance t_curr
      t_curr = tasklist.erase(t_curr);
    }
  }

  // Now we prioritize the machines and do the mapping of tasks to
  //    processors.  Then we do the packing. Then we call 
  //    service_requests_batch(). 

  do_agent_prioritization();
  map_tasks_to_agents();

  // ..Do the packing

  for(int q = 0; q < agentlist.size(); q++){
    if(agentlist[q]->idle == true){
      continue;
    }
    // %%%% FIXME: blah will be the index of the point/task that it gets.
    int blah = agentlist[q]->t->pt_id;
    response->info->id_generate=false;	// use the current id for this evaluation
    agentlist[q]->RS_data->response = *(response_vec[blah]);
    agentlist[q]->RS_data->pack.reset();
    agentlist[q]->RS_data->pack << -987654321;
    agentlist[q]->RS_data->pack << *(response_vec[blah]);
    // This should work for fevals and gradients
    agentlist[q]->RS_data->recvbuf.resize(array[id]->pack.size()*
					  2*(response_vec[blah]->info->mode));
    agentlist[q]->RS_data->pack << *(points[blah]);
    if (static_cast<int>(agentlist[q]->RS_data->pack.size()) > max_buf_size)
      max_buf_size = agentlist[q]->RS_data->pack.size();
    if (debug) {
      ucout << "Master packing the point" << endl;
      ucout << "Size " << agentlist[q]->RS_data->pack.size() << endl;
      ucout << points[q];
      ucout << endl << *response;
      ucout << "RecvBuf-Len=" << agentlist[q]->RS_data->recvbuf.size() << endl << Flush;
    }
  }
  //
  // Handle requests from the slaves
  //
  service_requests_batch();
#endif
}


//============================================================================
//
//
PROBLEM_TEMPLATE
void MSAPPLICATION
	::service_requests(bool finish)
{
#ifdef USING_MPI
  if(usingbatchversion){
    service_requests_batch();
    return;
  }
bool flag=true;
while (flag || (finish && (ndx > 0))) {
  flag=false;
  //
  // Test for requests for work from the slaves
  //
  if (next_pt < ndx) {
    int test_val=0;
    MPI_Status status;
    MPI_Test(&feval_request, &test_val, &status);
    if (test_val) {
      if (debug)
	ucout << "OFFSET: " << offset << " next=" << next_pt << " ndx=" << ndx << endl << Flush;
      //
      // Receive the feval response
      //
      MPI_Irecv((void*)(array[offset[next_pt]]->recvbuf.data()),
		array[offset[next_pt]]->recvbuf.size(),
		MPI_PACKED,
		status.MPI_SOURCE,
		FEVAL_RESPONSE,
		MPI_COMM_WORLD,
		&(array[offset[next_pt]]->recv_request));
      //
      // Send off the feval request
      //
      int dest = status.MPI_SOURCE;
      
      MPI_Isend((void*)&max_buf_size,1,MPI_INT,dest,
		BUF_SIZE_MSG,
		MPI_COMM_WORLD,
		&(bufsize[offset[next_pt]]->send_request));
      if (debug) {
	ucout << "Master sending point to " << dest << 
	  " bufsize=" << array[offset[next_pt]]->pack.size() << endl << Flush;
      }
      MPI_Isend((void*)(array[offset[next_pt]]->pack.buf()),
		array[offset[next_pt]]->pack.size(),
		MPI_PACKED,
		dest,
		FEVAL_PT,
		MPI_COMM_WORLD,
		&(array[offset[next_pt]]->send_request));
      
      next_pt++;
      MPI_Irecv(0,0,MPI_CHAR,MPI_ANY_SOURCE,FEVAL_REQUEST,
		MPI_COMM_WORLD, &feval_request);
    }
  }
  //
  // Check for responses from the slaves
  //
  MPI_Status status;
  for (unsigned int i=0; i<next_pt; i++) {
    int id = offset[i];
    int test_val=0;
    MPI_Test(&(array[id]->recv_request), &test_val, &status);
    if (test_val) {
       int count=0;
       MPI_Get_count(&status,MPI_PACKED,&count);
       UnPackBuffer unpack(&(array[id]->recvbuf[0]),count);
       unpack >> *(array[id]->response);
       if (debug) {
          ucout << "Master received response from " << status.MPI_SOURCE << endl;
	  ucout << *(array[id]->response);
	  ucout << Flush;
	  }
       if (finish == false)
          id_flags[id_ndx++] = array[id]->response->info->id;
       CachedAllocator<ResponseT> :: deallocate(array[id]->response); // What does this do to the pointers ??????
       swap(offset[i],offset[--next_pt]);
       swap(offset[next_pt],offset[--ndx]);
       }
    }
  }
#endif
}
//========================================================
//      %%%% added 6/04 pls
// Note that service_requests is true only if we are 
// calling this from the synchronize() method.
//========================================================
PROBLEM_TEMPLATE
void MSAPPLICATION
      ::service_requests_batch(bool finish) // default is false,
{
#ifdef USING_MPI
  bool flag = true;
  while(flag){
    //
    // Test for requests for work from the slaves
    //
    int test_val=0;
    MPI_Status status;
    MPI_Test(&feval_request, &test_val, &status);
    flag = (test_val > 0);
    if (test_val) {
      int dest = status.MPI_SOURCE;
      agentlist[dest-1]->ready = true;
      //	if (debug)
      // ucout << "OFFSET: " << offset << " next=" << next_pt << " ndx=" << ndx << endl << Flush;
      //
      // Receive the feval response.  We post this receive now to avoid a 
      //   race condition later. 
      //
      if(!(agentlist[dest-1]->idle)){
	MPI_Irecv((void*)(agentlist[dest-1]->RS_data->recvbuf.data()),
		  agentlist[dest-1]->RS_data->recvbuf.size(),
		  MPI_PACKED,
		  status.MPI_SOURCE,
		  FEVAL_RESPONSE,
		  MPI_COMM_WORLD,
		  &agentlist[dest-1]->RS_data->recv_request);
	//
	// If this procesor is due for a task, send off the 
	//  feval request
	//
	// %%%% moved this from DoEval (batch version) 6/04 ---pls
	nprob_ctr++;
	neval_ctr++;
	if ((num_eq_constr+num_ineq_constr)>0){
	  nconstr_ctr++;
	}
	
	MPI_Isend((void*)&max_buf_size,1,MPI_INT,dest,
		  BUF_SIZE_MSG,
		  MPI_COMM_WORLD,
		  &agentlist[dest-1]->RS_bufsize->send_request);
	if (debug) {
	  ucout << "Master sending point to " << dest << 
	    " bufsize=" << agentlist[dest-1]->RS_data->pack.size() << endl << Flush;
	}
	MPI_Isend((void*)(agentlist[dest-1]->RS_data->pack.buf()),
		  agentlist[dest-1]->RS_data->pack.size(),
		  MPI_PACKED,
		  dest,
		  FEVAL_PT,
		  MPI_COMM_WORLD,
		  &agentlist[dest-1]->RS_data->send_request);
      
      } // if agent has a task assigned    ---does the bracket go here?????????
      MPI_Irecv(0,0,MPI_CHAR,MPI_ANY_SOURCE,FEVAL_REQUEST,
		MPI_COMM_WORLD, &feval_request);   // or here???????
    } // if test_val
  } // end while
  
  //
  // Check for responses from the slaves
  //
  MPI_Status status;
  for (unsigned int i=0; i<next_pt; i++) {
    int test_val=0;
    MPI_Test(&(agentlist[i]->RS_data->recv_request), &test_val, &status);
    if (test_val) {
      int count=0;
      MPI_Get_count(&status,MPI_PACKED,&count);
      UnPackBuffer unpack(&(agentlist[i]->RS_data->recvbuf[0]),count);
      unpack >> *(agentlist[i]->RS_data->response);
      if (debug) {
	ucout << "Master received response from " << status.MPI_SOURCE << endl;
	ucout << *(agentlist[i]->RS_data->response) << Flush;
      }
      agentlist[i]->t->evaluated = true;
    }
  }
#endif
}
//============================================================================

PROBLEM_TEMPLATE
void MSAPPLICATION
	::synchronize()
{
//
// Finish off the last batch
//
/*
if (batch_ctr > 0) {
   ndx++;
   batch_ctr=0;
   }
*/
//
// Service all requests until ndx==0
//
service_requests(true);
}

//============================================================================
//  %%%% Added two methods, 6/04 ---pls
//============================================================================

// For now, just put them in the order we get them.
void  MSAPPLICATION
        ::do_agent_prioritization()
{
  agent_pri.resize(num_servers);
  for(int i = 0; i < num_servers; i++){
    agent_pri[i] = (i + 1);
  }
  
}

// For now, all we do is give tasks to processors
// in order. 
void  MSAPPLICATION
        ::map_tasks_to_agents()
{
  int len = tasklist.size();
  t_curr = tasklist.begin();

  for(int i = 0; (i < len) && (t->curr != tasklist.end()); i++){
    while( (t_curr->agentrank > -1) && (t_curr != tasklist.end()) ){
      t_curr++;
    }
    if( (agentlist[i]->idle) && (t_curr != tasklist.end()) ){
      agentlist[i]->t = t_curr;
      agentlist[i]->idle = false;
      agentlist[i]->RS_data->response = t->resp;
      idle_servers--;
      unassigned_tasks--;
      t_curr->agentrank = agentlist[i]->rank;
      t_curr++;
    }
  }
}
//============================================================================
//
//
PROBLEM_TEMPLATE
int MSAPPLICATION
        ::next_eval()
{
  if(usingbatchversion) return next_eval_batch();
  
  int id;
  service_requests();
  if (id_ndx >= 1) {
    id = id_flags[0];
    id_flags[0] = id_flags[--id_ndx];
  }
  else
    id = -1;
  return id;
}

//  Go through the agentlist. Return the id of the task
//  of the first agent whose task is done.  Then 
//  mark the agent as idle and detach the task.
int MSAPPLICATION
        ::next_eval_batch()
{
  int id = -1;
  service_requests_batch();
  for(int i = 0; i < agentlist.size(); i++){
    if(agentlist[i]->t->evaluated == true){
      id = *(agentlist[i]->t->pt_id); // remember, this is a pointer.
      agentlist[i]->idle = true;
      agentlist[i]->t->agentrank = -1;
      agentlist[i]->t = NULL;
      agentlist[i]->RS_data->response = NULL; // do the deallocate at the task level.
      do_agent_prioritization(); // reprioritize here
      map_tasks_to_agents();
      break;
    }
  }
  return id;
}


//============================================================================
//
//
PROBLEM_TEMPLATE
bool MSAPPLICATION
	::terminate_eval(const int id)
{
return false;
}


//============================================================================
//
//
PROBLEM_TEMPLATE
void MSAPPLICATION
	::exec_server( OPTPROBLEM & prob)
{
#ifdef USING_MPI
int rank;
MPI_Comm_rank(MPI_COMM_WORLD,&rank);    // Compute processor id

ResponseT response;
MPI_Status status;
PackBuffer pack;
MPI_Request buf_request;
MPI_Request pt_request;
MPI_Request termination_request;
MPI_Request send_request;
MPI_Request ready_request;
bool termination=false;

string recvbuf;
bool received_bufsize=false;
bool request_flag=false;

int buffersize;
MPI_Irecv((void*)&buffersize,1,MPI_INT,0,BUF_SIZE_MSG,
	MPI_COMM_WORLD, &buf_request);
MPI_Irecv(0,0,MPI_CHAR,0,TERMINATE,
        MPI_COMM_WORLD, &termination_request);
MPI_Isend(0,0,MPI_CHAR,0, FEVAL_REQUEST,MPI_COMM_WORLD,&ready_request);

while (!termination) {
  //
  // Get the buffer size msg
  //
  if (!received_bufsize) {
     int test_val=0;
     MPI_Test(&buf_request,&test_val,&status);
     if (test_val) {
        if (((unsigned int)buffersize) > recvbuf.size())
	   recvbuf.resize(buffersize);
        received_bufsize=true;
        if (debug)
           ucout << "Processor " << rank << " received buffer size msg" << endl << Flush;
        MPI_Irecv((void*)(&buffersize),1,MPI_INT,0,BUF_SIZE_MSG,
		MPI_COMM_WORLD, &buf_request);
	MPI_Irecv((void*)(recvbuf.data()),recvbuf.size(),MPI_PACKED,0,FEVAL_PT,
        	MPI_COMM_WORLD, &pt_request);
        }
     }
  //
  // .. then get the optimization point
  //
  if (received_bufsize) {
     int test_val=0;
     MPI_Test(&pt_request,&test_val,&status);
     if (test_val) {
	//
	// Unpack the point
	//
        int count=0;
        MPI_Get_count(&status,MPI_PACKED,&count);
        UnPackBuffer unpack(&(recvbuf[0]),count);
	int tmp;
	unpack >> tmp;
	if (tmp != -987654321)
	   EXCEPTION_MNGR(runtime_error,"Corrupted message from the master: check="<<tmp)
	unpack >> response;
	unpack >> prob.get_point();
        if (debug) {
           ucout << "Processor: " << rank << " Received parameters (test=OK)" <<endl;
	   ucout << "Count " << count << " bufsize " << recvbuf.size() << endl;
	   ucout << prob.get_point();
	   ucout << endl << response;
	   ucout << Flush;
           }
	//
	// Evaluate the point.  Keep the response.id in a temporary int,
	// since the call to Eval will rewrite it and we need to return
	// the _global_ response id, not the local value that is assigned
	// in the Eval call.
	//
	int tmp_id = response.info->id;
        if (debug) {
           ucout << "Processor: " << rank << " starting evaluation." <<endl;
	   ucout << tmp_id << endl;
	   ucout << response;
	   ucout << Flush;
           }
        prob.Eval(response.request_vector(),response,response.info->mode);
	response.info->id = tmp_id;
        if (debug) {
           ucout << "Processor: " << rank << " finished evaluation." <<endl;
	   ucout << response;
	   ucout << Flush;
           }
	//
	// Send the result
	//
        pack.reset();
        pack << response;
	if (request_flag)	// Just to be sure that the prior Isend finished
           MPI_Wait(&send_request,&status);
        MPI_Isend((void*)pack.buf(),pack.size(),MPI_PACKED,0,
                FEVAL_RESPONSE,MPI_COMM_WORLD,&send_request);
        if (debug) {
           ucout << "Processor: " << rank << " sent result." <<endl;
	   ucout << Flush;
           }
	MPI_Isend(0,0,MPI_CHAR,0, FEVAL_REQUEST,MPI_COMM_WORLD,&ready_request);
	if (debug)
           ucout << "Processor " << rank << " waiting for a message." << endl << Flush;

	request_flag=true;

        received_bufsize=false;
     }
  }

  //
  // Check for termination message
  //
  int test_val=0;
  MPI_Test(&termination_request,&test_val,&status);
  if (test_val)
     termination=true;
} // while not terminated

#endif
} // end exec_server()


//============================================================================
//
//
PROBLEM_TEMPLATE
void MSAPPLICATION :: terminate()
{
//
// This synchronous send may be less efficient than an asynchronous
// send, but it's more convenient for now.
//
#ifdef USING_MPI
int size;
MPI_Comm_size(MPI_COMM_WORLD,&size);    // Compute # of processors

for (int i=1; i<size; i++)
  MPI_Send(0,0,MPI_CHAR,i, TERMINATE,MPI_COMM_WORLD);
#endif
}

};

#endif

#endif
