/*
   PLAPACK Release 3.0
   
   Copyright (C) 2000
   Robert A. van de Geijn and The University of Texas at Austin

   For GNU-license details see the file GNU_license in the 
   PLAPACK root directory.
*/

#include "PLA.h"

#define FILL_LOCAL_RANDOM 1
#define FILL_THROUGH_API 0

void create_problem( PLA_Obj A, PLA_Obj x, PLA_Obj b )
{
  PLA_Obj  zero = NULL,    one = NULL;
  int size, me, nprocs, i, j, fill_blocksize, this_fill_blocksize, type_size;
  double d_one = 1.0, time;

  void *locA;

  double *local_buf;
  int  local_m, local_n, local_ldim, local_stride, global_length;

  MPI_Datatype
    datatype;
  
  PLA_Obj_global_length( A, &size );

  PLA_Obj_datatype ( A, &datatype );
  MPI_Type_size ( datatype, &type_size);

  MPI_Comm_rank( MPI_COMM_WORLD, &me );
  MPI_Comm_size( MPI_COMM_WORLD, &nprocs );

  PLA_Create_constants_conf_to( A, NULL, &zero, &one );

  srand48( me * 1793 );

  PLA_Obj_local_length( A, &local_m );
  PLA_Obj_local_width(  A, &local_n );
  PLA_Obj_local_buffer( A, (void **) &local_buf );
  PLA_Obj_local_ldim(   A, &local_ldim );



/* PROBLEM FILL METHOD: FILL_LOCAL_RANDOM or FILL_THROUGH_API (see comment below) */
#define FILL_METHOD FILL_LOCAL_RANDOM

#if FILL_METHOD

  /************************************************************************
    Fill the matrices.

    NOTE:  There are two versions of the fill routine in this file.  The
    version directly below this comment simply fills the local portions of 
    the matrix and vector with random numbers.  To use this version, the 
    line directly above this comment should read "#define FILL_METHOD FILL_LOCAL_RANDOM".

    The other version of the fill algorithm uses the PLAPACK Application 
    Interface, which allows each processor to create a portion of the matrix
    or vector (regardless of the true location on the machine of that portion)
    and then submit the piece through a call to the PLAPACK API.  To use 
    this version of the algorithm, the line directly above this comment should 
    read "#define FILL_METHOD FILL_THROUGH_API".

    The API version of the algorithm has a parameter called "fill_blocksize"
    that determines the width of the column blocks to be submitted to the
    matrix.  The number of independent messages generated by the API is
    inversely proportional to the fill_blocksize, and significant network
    contention (or even deadlock on some systems) may occur if the
    fill_blocksize is taken too small.
    **********************************************************************/
  
  for (j=0; j<local_n; j++ )
    for (i=0; i<local_m; i++ )
      if ( datatype == MPI_DOUBLE )
	{
	  local_buf[ j*local_ldim + i ] = drand48() * 2.0 -1.0;
	}
      else if ( datatype == MPI_FLOAT )
	{
	  ((float *)local_buf)[ j*local_ldim + i ] = (float) (drand48() * 2.0 -1.0);
	}
      else if ( datatype == MPI_DOUBLE_COMPLEX )
	{
	  ((PLA_DOUBLE_COMPLEX *)local_buf)[ j*local_ldim + i ].real = drand48() * 2.0 -1.0;
	  ((PLA_DOUBLE_COMPLEX *)local_buf)[ j*local_ldim + i ].imaginary = drand48() * 2.0 -1.0;
	}
      else if ( 0 == me )
	printf("Unhandled datatype in create_problem()\n");
	
  
  PLA_Obj_local_length( x, &local_m );
  PLA_Obj_local_buffer( x, (void **) &local_buf );
  PLA_Obj_local_stride( x, &local_stride );
  
  for (i=0; i<local_m; i++ )
      if ( datatype == MPI_DOUBLE )
	{
	  local_buf[ i*local_stride ] = drand48() * 2.0 -1.0;
	}
      else if ( datatype == MPI_FLOAT )
	{
	  ((float *)local_buf)[ i*local_stride ] = (float) (drand48() * 2.0 -1.0);
	}
      else if ( datatype == MPI_DOUBLE_COMPLEX )
	{
	  ((PLA_DOUBLE_COMPLEX *)local_buf)[ i*local_stride ].real = drand48() * 2.0 -1.0;
	  ((PLA_DOUBLE_COMPLEX *)local_buf)[ i*local_stride ].imaginary = drand48() * 2.0 -1.0;
	}
      else if ( 0 == me )
	printf("Unhandled datatype in create_problem()\n");


#else
  /***********************************************************************************
    Alternate version of the problem creation using the PLAPACK Application interface.

    To use, edit the line marked "PROBLEM CREATION METHOD" above to read
    "#define FILL_METHOD FILL_THROUGH_API".
    **********************************************************************************/

  if ( 0 == me ) 
    printf("Using PLAPACK application interface to create problem\n");

  MPI_Barrier ( MPI_COMM_WORLD);
  time = MPI_Wtime ();

  PLA_API_begin();
  PLA_Obj_API_open(A);
  PLA_Obj_API_open(x);

  fill_blocksize = 10;

  locA = (void *) PLA_malloc( type_size * size * fill_blocksize  );
  
  for (j=me*fill_blocksize;j< size; j+=nprocs*fill_blocksize) {
    this_fill_blocksize = min( fill_blocksize, size - j);
    for (i=0; i < size*this_fill_blocksize; i++)  {   /* This loop determines the values to put into matrix */
      if ( MPI_DOUBLE == datatype )
	((double *)locA)[i]=drand48() * 2.0 - 1.0;      
      else if ( MPI_FLOAT == datatype )
	((float *)locA)[i]=drand48() * 2.0 - 1.0;      
      else if ( MPI_DOUBLE_COMPLEX == datatype ) {
	((double *)locA)[2*i]=drand48() * 2.0 - 1.0;      
	((double *)locA)[2*i+1]=drand48() * 2.0 - 1.0;      
      }
      else if ( 0 == me )
	printf("Unhandled datatype in create_problem()\n");
    }
    PLA_API_axpy_matrix_to_global(size, 
				  this_fill_blocksize, 
				  &d_one, 
				  locA, 
				  size, 
				  A, 
				  0, j );
  }
  
  if (0==me) {                                     /* processor zero alone fills the vector */
    for (i=0; i<size; i++)
      if ( MPI_DOUBLE == datatype )
	((double *)locA)[i]=drand48() * 2.0 - 1.0;      
      else if ( MPI_FLOAT == datatype )
	((float *)locA)[i]=drand48() * 2.0 - 1.0;      
      else if ( MPI_DOUBLE_COMPLEX == datatype ) {
	((double *)locA)[2*i]=drand48() * 2.0 - 1.0;      
	((double *)locA)[2*i+1]=drand48() * 2.0 - 1.0;      
      }
      else if ( 0 == me )
	printf("Unhandled datatype in create_problem()\n");

    PLA_API_axpy_vector_to_global( size, &d_one, locA, 1, x, 0);
  }
  
  PLA_free( locA );
  
  PLA_Obj_API_close(A);
  PLA_Obj_API_close(x);
  PLA_API_end(); 
  
  MPI_Barrier ( MPI_COMM_WORLD);
  time = MPI_Wtime () - time;

  if ( 0 == me ) {
    printf("time for problem creation: %e seconds\n", time);
  }

#endif

  PLA_Gemv( PLA_NO_TRANS, one, A, x, zero, b ); 

  PLA_Obj_free( &zero );         PLA_Obj_free( &one );
}

#undef FILL_LOCAL_RANDOM
#undef FILL_THROUGH_API
