Index: /issm/trunk-jpl/src/c/classes/objects/Bucket.h
===================================================================
--- /issm/trunk-jpl/src/c/classes/objects/Bucket.h	(revision 14708)
+++ /issm/trunk-jpl/src/c/classes/objects/Bucket.h	(revision 14709)
@@ -10,5 +10,9 @@
 #include "./Object.h"
 #include "../../shared/Alloc/alloc.h"
+#include "../../Container/DataSet.h"
+#include "../../toolkits/toolkitsenums.h"
 /*}}}*/
+
+#define BUCKETSIZEOFREQUESTS 6 /*how many MPI_Isend requests does it take to transfer the contents of a bucket to another cpu?*/
 
 template <class doubletype> class Bucket: public Object{
@@ -25,4 +29,12 @@
 	
 		/*constructors, destructors: */
+		Bucket(){ /*{{{*/
+			this->m=0;
+			this->n=0;
+			this->idxm=NULL;
+			this->idxn=NULL;
+			this->values=NULL;
+			mode=INS_VAL;
+		} /*}}}*/
 		Bucket(int min,int* idxmin,int nin,int* idxnin,doubletype* valuesin,InsMode modein){ /*{{{*/
 			this->m=min;
@@ -79,4 +91,75 @@
 			_error_("Not implemented yet (similar to Elements)"); };
 		/*}}}*/
+		
+		/*specific routines of Bucket: */
+		void SpawnBucketsPerCpu(DataSet* bucketsofcpu_i,int rank_i,int* rowranks){ /*{{{*/
+
+			int i,j;
+
+			/*go through our idxm index of rows this bucket owns, and spawn buckets  
+			 *if these rows belong to cpu rank_i. Use rowranks to determine this.*/
+			for(i=0;i<m;i++){
+				if (rowranks[idxm[i]]==rank_i){
+					/*This row belongs to cpu rank_i, so spawn a bucket with this row, and add it to the bucketsofcpu_i dataset: */
+					bucketsofcpu_i->AddObject(new Bucket(1,idxm+i,n,idxn,values+n*i,mode));
+				}
+			}
+
+		};
+		/*}}}*/
+		void SetLocalMatrixValues(double* local_matrix,int lower_row,int global_N){ /*{{{*/
+
+			int i,j;
+			for(i=0;i<m;i++){
+				for(j=0;j<n;j++){
+					*(local_matrix+global_N*(idxm[i]-lower_row)+idxn[j])=*(values+n*i+j);
+				}
+			}
+			
+		};
+		/*}}}*/
+		void Isend(int receiver_rank,MPI_Request* requests,int* pcount,MPI_Comm comm){ /*{{{*/
+			int count=0;
+			int int_mode;
+
+			/*Recover pointer: */
+			count=*pcount;
+
+			/*Send all the information required: */
+			MPI_Isend(&m,1,MPI_INT,receiver_rank,2,comm,requests+count); count++;
+			if(m){ MPI_Isend(idxm,m,MPI_INT,receiver_rank,3,comm,requests+count); count++; }
+			MPI_Isend(&n,1,MPI_INT,receiver_rank,4,comm,requests+count); count++;
+			if(n){ MPI_Isend(idxn,n,MPI_INT,receiver_rank,5,comm,requests+count); count++; }
+			if(m*n){ MPI_Isend(values,m*n,MPI_DOUBLE,receiver_rank,6,comm,requests+count); count++; }
+			int_mode=(int)mode;
+			MPI_Isend(&int_mode,1,MPI_INT,receiver_rank,7,comm,requests+count); count++;
+
+			/*Allocate pointers: */
+			*pcount=count;
+
+		} /*}}}*/
+		void Recv(int sender_rank, MPI_Comm comm){ /*{{{*/
+
+			MPI_Status status;
+			int int_mode;
+
+			MPI_Recv(&m,1, MPI_INT,sender_rank,2, comm, &status);
+			if(m){
+				idxm=new int[m];
+				MPI_Recv(idxm,m, MPI_INT,sender_rank,3, comm, &status);
+			}
+			MPI_Recv(&n,1, MPI_INT,sender_rank,4, comm, &status);
+			if(n){
+				idxn=new int[n];
+				MPI_Recv(idxn,n, MPI_INT,sender_rank,5, comm, &status);
+			}
+			if(m*n){
+				values=new doubletype[m*n];
+				MPI_Recv(values,m*n, MPI_DOUBLE,sender_rank,6, comm, &status);
+			}
+			MPI_Recv(&int_mode,1, MPI_INT,sender_rank,7, comm, &status);
+			mode=(InsMode)int_mode;
+
+		} /*}}}*/
 };
 
Index: /issm/trunk-jpl/src/c/toolkits/issm/IssmMpiDenseMat.h
===================================================================
--- /issm/trunk-jpl/src/c/toolkits/issm/IssmMpiDenseMat.h	(revision 14708)
+++ /issm/trunk-jpl/src/c/toolkits/issm/IssmMpiDenseMat.h	(revision 14709)
@@ -25,4 +25,5 @@
 #include "../../classes/IssmComm.h"
 #include "../../classes/objects/Bucket.h"
+#include "../../toolkits/toolkits.h"
 #include <math.h>
 
@@ -149,6 +150,122 @@
 		/*}}}*/
 		/*FUNCTION Assemble{{{*/
-		void Assemble(void){
-			_error_("not supported yet!");
+		void Assemble(){
+
+
+			int           i;
+			int           j;
+			int           k;
+			int           my_rank;
+			int           num_procs;
+			int          *RowRank             = NULL;
+
+			DataSet     **bucketspercpu       = NULL;
+			int          *bucketspercpu_sizes = NULL;
+			MPI_Request  *requests            = NULL;
+			MPI_Status   *statuses            = NULL;
+			MPI_Status    status;
+			int           num_requests        = 0;
+			DataSet      *mybuckets           = NULL;
+			int           lower_row;
+			int           upper_row;
+			int           count               = 0;
+
+			int           size;
+
+
+
+			/*some communicator info: */
+			num_procs=IssmComm::GetSize();
+			my_rank=IssmComm::GetRank();
+			MPI_Comm comm=IssmComm::GetComm();
+
+			/*First, make a vector of size M, which for each row between 0 and M-1, tells which cpu this row belongs to: */
+			RowRank=DetermineRowRankFromLocalSize(M,m,comm);
+
+			/*Now, sort out our dataset of buckets according to cpu ownership of rows: */
+			bucketspercpu=xNew<DataSet*>(num_procs);
+			bucketspercpu_sizes=xNew<int>(num_procs);
+
+			for(i=0;i<num_procs;i++){
+				DataSet* bucketsofcpu_i=new DataSet();
+				for (j=0;j<buckets->Size();j++){
+					Bucket<doubletype>* bucket=(Bucket<doubletype>*)buckets->GetObjectByOffset(j);
+					bucket->SpawnBucketsPerCpu(bucketsofcpu_i,i,RowRank);
+				}
+				bucketspercpu[i]=bucketsofcpu_i;
+				bucketspercpu_sizes[i]=bucketsofcpu_i->Size();
+			}
+
+			/*Recap, each cpu has num_procs datasets of buckets. For a certain cpu j, for a given dataset i, the buckets this 
+			 * dataset owns correspond to rows that are owned by cpu i, not j!:*/
+
+			/*First, figure out how many requests are going to be sent by MPI_Isend. Do this a little bit better? */
+			for(i=0;i<num_procs;i++){
+				if(i!=my_rank){
+					num_requests+=bucketspercpu[i]->Size()*BUCKETSIZEOFREQUESTS; //this is to take into account all the MPI_ISend calls in each bucket.
+					num_requests++; //this is to take into account on MPI_ISend in BucketsSend.
+				}
+			}
+
+			/*Initialize array to track requests and statuses: */
+			requests=new MPI_Request[num_requests];
+			statuses=new MPI_Status[num_requests];
+
+			/*Now, go through all our bucketspercpu datasets, and send them to the corresponding cpus. Do not send our own buckets though!: */
+			count=0; //count requests
+			for(i=0;i<num_procs;i++){
+				if(my_rank==i){
+					for(j=0;j<num_procs;j++){
+						if(j!=i){//only send the buckets that this cpu does not own.
+						
+							/*Go through the buckets belonging to cpu j, and send them accordingly. */
+							DataSet* buckets=bucketspercpu[j];
+							MPI_Isend(bucketspercpu_sizes+j,1,MPI_INT,j,1,comm,requests+count); count++; //we use bucketspercpu_sizes because we need a permanent buffer for an asynchronous send
+							for(k=0;k<buckets->Size();k++){
+								Bucket<doubletype>* bucket=(Bucket<doubletype>*)buckets->GetObjectByOffset(k);
+								bucket->Isend(j,requests,&count,comm);
+							}
+						}
+					}
+				}
+				else{
+							
+					/*Receive buckets from cpu i, and add them to my own my_rank bucket list: */
+					/*First, are we receiving anything from sender_rank? :*/
+					MPI_Recv(&size,1, MPI_INT,i,1, comm, &status);
+
+					/*If so, started receiving extra buckets and plug them into out buckets: */
+					if(size){
+						for(j=0;j<size;j++){
+							Bucket<doubletype>* bucket=new Bucket<doubletype>();
+							bucket->Recv(i,comm);
+							bucketspercpu[my_rank]->AddObject(bucket);
+						}
+					}
+				}
+			}
+			/*Wait for all requests to complete: */
+			MPI_Waitall(num_requests,requests,statuses);
+
+			/*Every cpu now has a dataset of buckets  in bucketspercpu[my_rank], which holds all the values 
+			 *local to this cpu that should be added to the global matrix. Just do that: */
+			GetOwnershipBoundariesFromRange(&lower_row,&upper_row,m,comm);
+			mybuckets=bucketspercpu[my_rank];
+
+			for(i=0;i<mybuckets->Size();i++){
+				Bucket<doubletype>* bucket=(Bucket<doubletype>*)mybuckets->GetObjectByOffset(i);
+				bucket->SetLocalMatrixValues(this->matrix,lower_row,N);
+			}
+
+			/*Free ressources:{{{*/
+			xDelete<int>(RowRank);
+			for(i=0;i<num_procs;i++){
+				DataSet* buckets=bucketspercpu[i];
+				delete buckets;
+			}
+			xDelete<DataSet*>(bucketspercpu);
+			xDelete<int>(bucketspercpu_sizes);
+			xDelete<MPI_Request>(requests);
+			/*}}}*/
 		}
 		/*}}}*/
@@ -200,6 +317,6 @@
 		}
 		/*}}}*/		
-
 };
+							
 
 #endif //#ifndef _ISSM_MPI_DENSE_MAT_H_
