Index: /issm/trunk/src/c/toolkits/plapack/patches/CyclicalFactorization.cpp
===================================================================
--- /issm/trunk/src/c/toolkits/plapack/patches/CyclicalFactorization.cpp	(revision 831)
+++ /issm/trunk/src/c/toolkits/plapack/patches/CyclicalFactorization.cpp	(revision 831)
@@ -0,0 +1,101 @@
+/*!\file:  CyclicalFactorization.cpp
+ * \brief given num_procs, finds the integers nprows and npcols so that nprows*npcols=num_procs and nprows-npcols is the smallest possible
+*/
+#include <math.h>
+#include "../../../shared/shared.h"
+
+int SmallestPrimeFactor(int* output,int input);
+
+#undef __FUNCT__ 
+#define __FUNCT__ "CyclicalFactorization"
+
+int CyclicalFactorization(int* pnprows,int* pnpcols,int num_procs){
+	
+	int nprows,npcols;
+	int last_diff;
+	int i;
+	int diff;
+
+	nprows=1; npcols=num_procs; last_diff=npcols-nprows;
+	for (i=1;i<=num_procs;i++){
+		if ((num_procs % i)==0){
+			diff=(int)fabs((double)(i-(num_procs/i)));
+			if (diff<last_diff){
+				nprows=i;npcols=num_procs/i;
+				last_diff=(int)fabs((double)(nprows-npcols));
+			}
+		}
+	}
+	/*Order nprows > npcols*/
+	if (nprows<npcols){
+		int temp=nprows;
+		nprows=npcols;
+		npcols=temp;
+	}
+
+	*pnprows=nprows;
+	*pnpcols=npcols;
+	#ifdef _DEBUG_
+		_printf_("Decomposition: %i-%i\n",nprows,npcols);
+	#endif
+
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "PrimeDecomp"
+int PrimeDecomp(int** pdecomp,int* pdecomp_size,int input){
+
+	int* decomp=NULL;
+	int prime_factor;
+	int i;
+
+	decomp=xmalloc(input*sizeof(int));
+	*decomp=input;
+	for (i=0;i<input;i++){
+		SmallestPrimeFactor(&prime_factor,*(decomp+i));
+		#ifdef _DEBUG_
+			_printf_("Smallest prime factor for term %i : %i\n",i,prime_factor);
+		#endif
+		if (prime_factor==*(decomp+i)){
+			*pdecomp_size=i;
+			break;
+		}
+		else{
+			*(decomp+i+1)=*(decomp+i)/prime_factor;
+			*(decomp+i)=prime_factor;
+		}
+	}
+	#ifdef _DEBUG_
+		_printf_("Prime factor decomposition for integer %i: \n",input);
+		for(i=0;i<*pdecomp_size;i++){
+			_printf_("%i ",*(decomp+i));
+		}
+	#endif
+
+	*pdecomp=decomp;
+}
+
+/*SmallestPrimeFactor: 
+	seeks the smallest prime factor in the prime factor decomposition 
+	of integer input
+*/
+#undef __FUNCT__
+#define __FUNCT__  "SmallestPrimeFactor"
+int SmallestPrimeFactor(int* output,int input){
+	
+	int found=0;
+	int i;
+
+	for (i=2;i<input;i++){
+		if ((input %i)==0){
+			found=i;
+			break;
+		}
+	}
+	if(found){
+		*output=found;
+	}
+	else{
+		*output=input;
+	}
+}
Index: /issm/trunk/src/c/toolkits/plapack/patches/PlapackInvertMatrix.cpp
===================================================================
--- /issm/trunk/src/c/toolkits/plapack/patches/PlapackInvertMatrix.cpp	(revision 831)
+++ /issm/trunk/src/c/toolkits/plapack/patches/PlapackInvertMatrix.cpp	(revision 831)
@@ -0,0 +1,148 @@
+/*!\file PlapackInvertMatrix.cpp
+ * \brief invert petsc matrix using Plapack
+ */
+
+/* petsc: */
+#include "../../petsc/petscincludes.h"
+
+/*plapack: */
+#include "../plapackincludes.h"
+
+/* Some fortran routines: */
+#include "../../scalapack/FortranMapping.h"
+
+#undef __FUNCT__
+#define __FUNCT__ "PlapackInvertMatrix"
+void PlapackInvertMatrixLocalCleanup(PLA_Obj* pa,PLA_Template* ptempl,double** parrayA,
+		int** pidxnA,MPI_Comm* pcomm_2d);
+	
+int PlapackInvertMatrix(Mat* A,Mat* inv_A,int status,int con){ 
+	/*inv_A does not yet exist, inv_A was just xmalloced, that's all*/
+
+	/*Error management*/
+	int i,j;
+	
+
+	/*input*/
+	int mA,nA;
+	int local_mA,local_nA;
+	int lower_row,upper_row,range,this_range,this_lower_row;
+	MatType type;
+	
+	/*Plapack: */
+	MPI_Datatype   datatype;
+	MPI_Comm       comm_2d;
+	PLA_Obj a=NULL;
+	PLA_Template   templ;	
+	double one=1.0;		
+	int ierror;
+	int nb,nb_alg;
+	int nprows,npcols;
+	int initialized=0;
+
+	/*Petsc to Plapack: */
+	double    *arrayA=NULL;
+	int* idxnA=NULL;
+	int d_nz,o_nz;
+	
+	/*Feedback to client*/
+	int computation_status;  
+
+	/*Verify that A is square*/
+	MatGetSize(*A,&mA,&nA);
+	MatGetLocalSize(*A,&local_mA,&local_nA);
+
+	/*Some dimensions checks: */
+	if (mA!=nA) throw ErrorException(__FUNCT__," trying to take the invert of a non-square matrix!");
+
+	/* Set default Plapack parameters */
+	//First find nprows*npcols=num_procs;
+	CyclicalFactorization(&nprows,&npcols,num_procs); 
+	//nprows=num_procs;
+	//npcols=1;
+	ierror = 0;
+	nb     = nA/num_procs;
+	if(nA - nb*num_procs) nb++; /* without cyclic distribution */
+
+	if (ierror){
+		PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
+	}
+	else {
+		PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
+	}
+	nb_alg = 0;
+	if (nb_alg){
+		pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,nb_alg);
+	}
+
+	/*Verify that plapack is not already initialized: */
+	if(PLA_Initialized(NULL)==TRUE)PLA_Finalize();
+	/* Create a 2D communicator */
+	PLA_Comm_1D_to_2D(MPI_COMM_WORLD,nprows,npcols,&comm_2d); 
+
+	/*Initlialize plapack: */
+	PLA_Init(comm_2d);
+
+	templ = NULL;
+	PLA_Temp_create(nb, 0, &templ);
+
+	/* Use suggested nb_alg if it is not provided by user */
+	if (nb_alg == 0){
+		PLA_Environ_nb_alg(PLA_OP_PAN_PAN,templ,&nb_alg);
+		pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,nb_alg);
+	}
+
+	/* Set the datatype */
+	datatype = MPI_DOUBLE;
+
+	
+	/* Copy A into a*/
+	PLA_Matrix_create(datatype,mA,nA,templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&a);  
+	PLA_Obj_set_to_zero(a);
+	/*Take array from A: use MatGetValues, because we are sure this routine works with 
+	any matrix type.*/
+	MatGetOwnershipRange(*A,&lower_row,&upper_row);
+	upper_row--; 
+	range=upper_row-lower_row+1;
+	arrayA=xmalloc(nA*sizeof(double));
+	idxnA=xmalloc(nA*sizeof(int));
+	for (i=0;i<nA;i++){
+		*(idxnA+i)=i;
+	}
+	PLA_API_begin();
+	PLA_Obj_API_open(a);  	
+	for (i=lower_row;i<=upper_row;i++){
+		MatGetValues(*A,1,&i,nA,idxnA,arrayA); 
+		PLA_API_axpy_matrix_to_global(1,nA, &one,(void *)arrayA,1,a,i,0); 
+	}
+	PLA_Obj_API_close(a); 
+	PLA_API_end(); 
+
+	#ifdef _DEBUG_
+		PLA_Global_show("Matrix A",a," %lf","Done with A");
+		MatView(*A,PETSC_VIEWER_STDOUT_WORLD);
+	#endif
+
+	/*Call the plapack invert routine*/
+	PLA_General_invert(PLA_METHOD_INV,a);
+
+	/*Translate Plapack a into Petsc invA*/
+	MatGetType(*A,&type);
+	PlapackToPetsc(inv_A,local_mA,local_nA,mA,nA,type,a,templ,nprows,npcols,nb);
+
+	#ifdef _DEBUG_
+		PLA_Global_show("Inverse of A",a," %lf","Done...");
+		MatView(*inv_A,PETSC_VIEWER_STDOUT_WORLD);
+	#endif
+
+	/*Free ressources:*/
+	PLA_Obj_free(&a);
+	PLA_Temp_free(&templ);
+	xfree((void**)&arrayA);
+	xfree((void**)&idxnA);
+	
+	/*Finalize PLAPACK*/
+	PLA_Finalize();
+	MPI_Comm_free(&comm_2d);
+
+}
Index: /issm/trunk/src/c/toolkits/plapack/patches/PlapackToPetsc.cpp
===================================================================
--- /issm/trunk/src/c/toolkits/plapack/patches/PlapackToPetsc.cpp	(revision 831)
+++ /issm/trunk/src/c/toolkits/plapack/patches/PlapackToPetsc.cpp	(revision 831)
@@ -0,0 +1,100 @@
+/*!\file PlapackToPetsc.cpp
+ * \brief convert plapack matrix to petsc matrix
+ */
+
+#include "../../petsc/petscincludes.h"
+#include "../plapackincludes.h"
+
+#undef __FUNCT__ 
+#define __FUNCT__ "PlapackToPetsc"
+int PlapackToPetsc(Mat* A,int local_mA,int local_nA,int mA,int nA,MatType type,PLA_Obj a,PLA_Template templ,int nprows,int npcols,int nb){
+
+	
+	int i;
+
+	int lower_row,upper_row,range;
+	int* row_nodes=NULL;
+	int* col_nodes=NULL;
+	int* idxm=NULL;
+	int* idxn=NULL;
+	int myrow,mycol;
+	int count,idxm_count,idxn_count;
+	int d_nz,o_nz;
+	int i0,i1;
+	double* local_buffer=NULL;
+
+
+	/*Create matrix A (right now, we just have an allocated pointer A*/
+	if (strcasecmp_eq(type,MATMPIAIJ)){
+		/*See pretty large here, because we have no idea how many nnz per row*/
+		d_nz=nA/2;
+		o_nz=nA/2;
+		MatCreateMPIAIJ(MPI_COMM_WORLD,local_mA,local_nA, mA,nA,d_nz,PETSC_NULL,o_nz,PETSC_NULL,A);
+	}
+	else if(strcasecmp_eq(type,MATMPIDENSE)){
+		MatCreateMPIDense(MPI_COMM_WORLD,local_mA,local_nA, mA,nA,PETSC_NULL,A); 
+	}
+	
+	MatGetOwnershipRange(*A,&lower_row,&upper_row);
+	upper_row--;
+	range=upper_row-lower_row+1;
+	
+	/*Build the Plapack row and column indices corresponding to the local_buffer stored in a. 
+	  We need those indices to directly plug local_buffer into the Petsc matrix A. We do not 
+	  use PLA_axpy_global_to_matrix to extract a local matrix, because this routine hangs when the 
+	  problem size becomes big. We rely therefore on MatAssembly from Petsc to gather the plapack 
+	  matrix into a Petsc Matrix.*/
+	
+	/*Vector physically based block cyclic distribution: */
+	row_nodes=xmalloc(mA*sizeof(int));
+	col_nodes=xmalloc(nA*sizeof(int));
+	for (i=0;i<mA;i++){
+		i0=i/nb;
+		*(row_nodes+i)=i0%nprows;
+	}
+	for (i=0;i<nA;i++){
+		i0=i/nb;
+		i1=i0/nprows;
+		*(col_nodes+i)=i1%npcols;
+	}
+
+	PLA_Temp_comm_row_rank(templ,&mycol);
+	PLA_Temp_comm_col_rank(templ,&myrow);
+
+	idxm=xmalloc(mA*sizeof(int));
+	count=0;
+	for (i=0;i<mA;i++){
+		if(*(row_nodes+i)==myrow){
+			*(idxm+count)=i;
+			count++;
+		}
+	}
+	idxm_count=count;
+
+	idxn=xmalloc(nA*sizeof(int));
+	count=0;
+	for (i=0;i<nA;i++){
+		if(*(col_nodes+i)==mycol){
+			*(idxn+count)=i;
+			count++;
+		}
+	}
+	idxn_count=count;
+
+	/*Get local buffer: */
+	PLA_Obj_local_buffer(a,(void**)&local_buffer);
+	
+	/*Insert into invA matrix. Use col oriented insertion, for Plapack is column oriented*/
+	MatSetOption(*A,MAT_COLUMN_ORIENTED);
+	MatSetValues(*A,idxm_count,idxm,idxn_count,idxn,local_buffer,INSERT_VALUES);
+
+	/*assemble: */
+	MatAssemblyBegin(*A,MAT_FINAL_ASSEMBLY);
+	MatAssemblyEnd(*A,MAT_FINAL_ASSEMBLY);
+
+	/*Free ressources:*/
+	xfree((void**)&row_nodes);
+	xfree((void**)&col_nodes);
+	xfree((void**)&idxm);
+	xfree((void**)&idxn);
+}
Index: /issm/trunk/src/c/toolkits/plapack/patches/plapackpatches.h
===================================================================
--- /issm/trunk/src/c/toolkits/plapack/patches/plapackpatches.h	(revision 831)
+++ /issm/trunk/src/c/toolkits/plapack/patches/plapackpatches.h	(revision 831)
@@ -0,0 +1,12 @@
+/*\file plapackpatches.h
+ * \brief: our own patches for plapack use
+ */
+
+#ifndef _PLAPACK_PATCHES_H_
+#define _PLAPACK_PATCHES_H_
+
+int PlapackInvertMatrix(Mat* A,Mat* inv_A,int status,int con);
+int PlapackToPetsc(Mat* A,int local_mA,int local_nA,int mA,int nA,MatType type,PLA_Obj a,PLA_Template templ,int nprows,int npcols,int nb);
+int CyclicalFactorization(int* pnprows,int* pnpcols,int num_procs);
+
+#endif
Index: /issm/trunk/src/c/toolkits/plapack/plapackincludes.h
===================================================================
--- /issm/trunk/src/c/toolkits/plapack/plapackincludes.h	(revision 831)
+++ /issm/trunk/src/c/toolkits/plapack/plapackincludes.h	(revision 831)
@@ -0,0 +1,19 @@
+/* \file plapackincludes.h
+ * \brief all includes for Plapack + our own patches
+ */
+
+#ifndef _PLA_INCLUDES_H_
+#define _PLA_INCLUDES_H_
+
+#include "PLA.h"
+#include "PLA_prototypes.h"
+
+/* missing Plapack prototypes: */
+int PLA_General_invert( int method, PLA_Obj A );
+
+/*our own patches: */
+#include "patches/petscpatches.h"
+
+
+#endif
+
Index: /issm/trunk/src/c/toolkits/scalapack/FortranMapping.h
===================================================================
--- /issm/trunk/src/c/toolkits/scalapack/FortranMapping.h	(revision 831)
+++ /issm/trunk/src/c/toolkits/scalapack/FortranMapping.h	(revision 831)
@@ -0,0 +1,43 @@
+#ifndef _FORTRAN_MAPPING_H_
+#define _FORTRAN_MAPPING_H_
+/*
+ * Fortran_Mapping.h
+ *     Description:  Fortran to C define to use Scalapack in a C program
+ */
+
+
+/*We transform every call to Fortran functions into their real symbolic name in the Scalapack, Blacs, Lapack and Blas libraries. 
+ * We had to look for these exact symbols in these libraries using nm | grep FORTRAN_NAME. In fact, the symbol naming is not well defined: see for example 
+ * numroc_ and blacs_gridinit__ (one underscore, then two). */
+
+#define BLACS_GRIDINFO(...) blacs_gridinfo__(__VA_ARGS__)
+#define SL_INIT(...) sl_init__(__VA_ARGS__)
+#define BLACS_GRIDEXIT(...) blacs_gridexit__(__VA_ARGS__)
+#define BLACS_EXIT(...) blacs_exit__(__VA_ARGS__) 
+#define DESCINIT(...) descinit_(__VA_ARGS__)
+#define NUMROC(...) numroc_(__VA_ARGS__)
+#define BLACS_GET(...) blacs_get__(__VA_ARGS__) 
+#define BLACS_GRIDINIT(...) blacs_gridinit__(__VA_ARGS__)
+#define BLACS_GRIDMAP(...) blacs_gridmap__(__VA_ARGS__)
+#define PDGETRF(...) pdgetrf_(__VA_ARGS__)
+#define PDGETRI(...) pdgetri_(__VA_ARGS__)
+
+
+/*Here, we clobber the fortran definition of these routines. Remember, every variable in fortran is passed by a pointer, and the 
+ * ordering of matrices is column oriented*/
+
+void sl_init__(int*,int*,int*); 
+void blacs_exit__(int*);
+void blacs_gridinfo__(int*,int*,int*,int*,int*);
+void descinit_(int[9],int*,int*,int*,int*,int*,int*,int*,int*,int*);
+void blacs_gridexit__(int*);
+int numroc_(int*,int*,int*,int*,int*);
+void blacs_get__(int*,int*,int*);
+void blacs_gridinit__(int*,char*,int*,int*);
+void blacs_gridmap__(int*,int *,int*,int*,int*);
+
+/*LU factorization*/
+void pdgetrf_(int* M,int* N,double* A,int* IA,int* JA,int* DESCA,int* IPIV,int* INFOVAR);  
+void pdgetri_(int* N,double* A,int* IA,int* JA,int* DESCA,int* IPIV,double* WORK,int* LWORK,int* IWORK,int* LIWORK,int*	INFOVAR);
+
+#endif