Context Navigation

← Previous Changeset
Next Changeset →

Changeset 14833

Timestamp:

05/01/13 12:30:08 (12 years ago)

Author:

Eric.Larour

Message:

CHG: committing new Assemble routine, still needs debugging

Location:

issm/trunk-jpl/src/c

Files:

: 2 edited

classes/objects/Bucket.h (modified) (1 diff)
toolkits/issm/IssmMpiDenseMat.h (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

TabularUnified issm/trunk-jpl/src/c/classes/objects/Bucket.h ¶

-              r14822
+              r14833
                 };
                 /*}}}*/
+                void Marshall(int** prow_indices_forcpu,int** pcol_indices_forcpu,doubletype** pvalues_forcpu,int** pmodes_forcpu){ /*{{{*/
+                        /*intermediary: */
+                        int         i;
+                        int         j;
+                        /*buffers: */
+                        int        *row_indices_forcpu = NULL;
+                        int        *col_indices_forcpu = NULL;
+                        doubletype *values_forcpu      = NULL;
+                        int        *modes_forcpu       = NULL;
+                        /*initialize buffers: */
+                        row_indices_forcpu=*prow_indices_forcpu;
+                        col_indices_forcpu=*pcol_indices_forcpu;
+                        values_forcpu=*pvalues_forcpu;
+                        modes_forcpu=*pmodes_forcpu;
+                        /*fill buffers with out values and indices and modes: */
+                        for(i=0;i<m;i++){
+                                for(j=0;j<n;j++){
+                                        row_indices_forcpu[i*n+j]=idxm[i];
+                                        col_indices_forcpu[i*n+j]=idxn[j];
+                                        values_forcpu[i*n+j]=values[i*n+j];
+                                        modes_forcpu[i*n+j]=mode;
+                                }
+                        }
+                        /*increment buffer for next Bucket who will marshall his data: */
+                        row_indices_forcpu+=m*n;
+                        col_indices_forcpu+=m*n;
+                        values_forcpu+=m*n;
+                        modes_forcpu+=m*n;
+                        /*output modified buffers: */
+                        *prow_indices_forcpu=row_indices_forcpu;
+                        *pcol_indices_forcpu=col_indices_forcpu;
+                        *pvalues_forcpu=values_forcpu;
+                        *pmodes_forcpu=modes_forcpu;
+                };
+                /*}}}*/
+                int MarshallSize(void){ /*{{{*/
+                        if(type=MATRIX_BUCKET){
+                                return m*n;
+                        }
+                        else{
+                                return m;
+                        }
+                };
+                /*}}}*/
 #ifdef _HAVE_MPI_
                         void Isend(int receiver_rank,MPI_Request* requests,int* pcount,MPI_Comm comm){ /*{{{*/

TabularUnified issm/trunk-jpl/src/c/toolkits/issm/IssmMpiDenseMat.h ¶

-              r14822
+              r14833
+                }
                 /*}}}*/
                 /*FUNCTION Assemble{{{*/
                 void Assemble(){
+                /*FUNCTION Assemble2{{{*/
+                void Assemble2(){
                         int           i;
 …
                         xDelete<MPI_Request>(requests);
                         /*}}}*/
+                }
+                /*}}}*/
+                /*FUNCTION Assemble{{{*/
+                void Assemble(){
+                        int           i,j;
+                        int         *RowRank            = NULL;
+                        int           num_procs;
+                        int        *row_indices_forcpu = NULL;
+                        int        *col_indices_forcpu = NULL;
+                        int        *modes_forcpu       = NULL;
+                        doubletype *values_forcpu      = NULL;
+                        int         *numvalues_forcpu   = NULL;
+                        DataSet     **bucketsforcpu       = NULL;
+                        int        **row_indices_fromcpu = NULL;
+                        int        **col_indices_fromcpu = NULL;
+                        int        **modes_fromcpu       = NULL;
+                        doubletype **values_fromcpu      = NULL;
+                        int         *numvalues_fromcpu   = NULL;
+                        int           lower_row;
+                        int           upper_row;
+                        int*          sendcnts            = NULL;
+                        int*          displs              = NULL;
+                        int           count               = 0;
+                        /*some communicator info: */
+                        num_procs=IssmComm::GetSize();
+                        MPI_Comm comm=IssmComm::GetComm();
+                        /*First, make a vector of size M, which for each row between 0 and M-1, tells which cpu this row belongs to: */
+                        RowRank=DetermineRowRankFromLocalSize(M,m,comm);
+                        /*Now, sort out our dataset of buckets according to cpu ownership of rows: */
+                        bucketsforcpu=xNew<DataSet*>(num_procs);
+                        for(i=0;i<num_procs;i++){
+                                DataSet* bucketsofcpu_i=new DataSet();
+                                for (j=0;j<buckets->Size();j++){
+                                        Bucket<doubletype>* bucket=(Bucket<doubletype>*)buckets->GetObjectByOffset(j);
+                                        bucket->SpawnBucketsPerCpu(bucketsofcpu_i,i,RowRank);
+                                }
+                                bucketsforcpu[i]=bucketsofcpu_i;
+                        }
+                        /*Recap, each cpu has num_procs datasets of buckets. For a certain cpu j, for a given dataset i, the buckets this
+                         * dataset owns correspond to rows that are owned by cpu i, not j!. Out of all the buckets we own, make row,col,value,insert_mode
+                         * vectors that will be shipped around the cluster: */
+                        this->BucketsBuildScatterBuffers(&numvalues_forcpu,&row_indices_forcpu,&col_indices_forcpu,&values_forcpu,&modes_forcpu,bucketsforcpu,num_procs);
+                        /*Now, we need to allocate on each cpu arrays to receive data from all the other cpus. To know what we need to allocate, we need
+                         *some scatter calls: */
+                        numvalues_fromcpu   = xNew<int>(num_procs);
+                        for(i=0;i<num_procs;i++){
+                                MPI_Scatter(numvalues_forcpu,num_procs,MPI_INT,numvalues_fromcpu+i,1,MPI_INT,i,comm);
+                        }
+                        for(i=0;i<num_procs;i++){
+                                row_indices_fromcpu[i]=xNew<int>(numvalues_fromcpu[i]);
+                                col_indices_fromcpu[i]=xNew<int>(numvalues_fromcpu[i]);
+                                values_fromcpu[i]=xNew<doubletype>(numvalues_fromcpu[i]);
+                                modes_fromcpu[i]=xNew<int>(numvalues_fromcpu[i]);
+                        }
+                        /*Now, to scatter values across the cluster, we need sendcnts and displs. Our sendbufs have been built by BucketsBuildScatterBuffers, with a stride given
+                         * by numvalues_forcpu. Get this ready to go before starting the scatter itslef. For reference, here is the MPI_Scatterv prototype:
+                         * int MPI_Scatterv( void *sendbuf, int *sendcnts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcnt, MPI_Datatype recvtype, int root, MPI_Comm comm) :*/
+                        sendcnts=xNew<int>(num_procs);
+                        displs=xNew<int>(num_procs);
+                        count=0;
+                        for(i=0;i<num_procs;i++){
+                                sendcnts[i]=numvalues_forcpu[i];
+                                displs[i]=count;
+                                count+=numvalues_forcpu[i];
+                        }
+                        /*Start the scattering: */
+                        for(i=0;i<num_procs;i++){
+                                MPI_Scatterv( row_indices_forcpu, sendcnts, displs, MPI_INT, row_indices_fromcpu[i], numvalues_fromcpu[i], MPI_INT, i, comm);
+                                MPI_Scatterv( col_indices_forcpu, sendcnts, displs, MPI_INT, col_indices_fromcpu[i], numvalues_fromcpu[i], MPI_INT, i, comm);
+                                MPI_Scatterv( values_forcpu, sendcnts, displs, MPI_DOUBLE, values_fromcpu[i], numvalues_fromcpu[i], MPI_DOUBLE, i, comm);
+                                MPI_Scatterv( modes_forcpu, sendcnts, displs, MPI_INT, modes_fromcpu[i], numvalues_fromcpu[i], MPI_INT, i, comm);
+                        }
+                        /*Plug values into global matrix: */
+                        GetOwnershipBoundariesFromRange(&lower_row,&upper_row,m,comm);
+                        for(i=0;i<num_procs;i++){
+                                int  numvalues=numvalues_fromcpu[i];
+                                int* rows=row_indices_fromcpu[i];
+                                int* cols=col_indices_fromcpu[i];
+                                doubletype* values=values_fromcpu[i];
+                                int* mods=modes_fromcpu[i];
+                                for(j=0;j<numvalues;j++){
+                                        if(mods[j]==ADD_VAL) *(matrix+N*(rows[j]-lower_row)+cols[j])+=values[j];
+                                        else *(matrix+N*(rows[j]-lower_row)+cols[j])=values[j];
+                                }
+                        }
+                        /*Free ressources:{{{*/
+                        xDelete<int>(RowRank);
+                        xDelete<int>(row_indices_forcpu);
+                        xDelete<int>(col_indices_forcpu);
+                        xDelete<int>(modes_forcpu);
+                        xDelete<doubletype>(values_forcpu);
+                        xDelete<int>(numvalues_forcpu);
+                        for(i=0;i<num_procs;i++){
+                                DataSet* buckets=bucketsforcpu[i];
+                                delete buckets;
+                        }
+                        xDelete<DataSet*>(bucketsforcpu);
+                        for(i=0;i<num_procs;i++){
+                                int* rows=row_indices_fromcpu[i];
+                                int* cols=col_indices_fromcpu[i];
+                                int* modes=modes_fromcpu[i];
+                                doubletype* values=values_fromcpu[i];
+                                xDelete<int>(rows);
+                                xDelete<int>(cols);
+                                xDelete<int>(modes);
+                                xDelete<doubletype>(values);
+                        }
+                        xDelete<int*>(row_indices_fromcpu);
+                        xDelete<int*>(col_indices_fromcpu);
+                        xDelete<int*>(modes_fromcpu);
+                        xDelete<doubletype*>(values_fromcpu);
+                        xDelete<int>(numvalues_fromcpu);
+                        xDelete<int>(sendcnts);
+                        xDelete<int>(displs);
+                        /*}}}*/
+                }
                 /*}}}*/
 …
+                }
                 /*}}}*/
+                /*FUNCTION BucketsBuildScatterBuffers{{{*/
+                void BucketsBuildScatterBuffers(int** pnumvalues_forcpu,int** prow_indices_forcpu,int** pcol_indices_forcpu,doubletype** pvalues_forcpu,int** pmodes_forcpu,DataSet** bucketsforcpu,int num_procs){
+                        /*intermediary: */
+                        int         i,j;
+                        int         count                   = 0;
+                        int         total_size              = 0;
+                        int        *temp_row_indices_forcpu = NULL;
+                        int        *temp_col_indices_forcpu = NULL;
+                        doubletype *temp_values_forcpu      = NULL;
+                        int        *temp_modes_forcpu       = NULL;
+                        /*output: */
+                        int        *numvalues_forcpu        = NULL;
+                        int        *row_indices_forcpu      = NULL;
+                        int        *col_indices_forcpu      = NULL;
+                        doubletype *values_forcpu           = NULL;
+                        int        *modes_forcpu            = NULL;
+                        /*figure out size of buffers per cpu: */
+                        for(i=0;i<num_procs;i++){
+                                DataSet    *buckets            = bucketsforcpu[i];
+                                count=0;
+                                for(j=0;j<buckets->Size();j++){
+                                        Bucket<doubletype>* bucket =(Bucket<doubletype>*)buckets->GetObjectByOffset(j);
+                                        count+=bucket->MarshallSize();
+                                }
+                                numvalues_forcpu[i]=count;
+                        }
+                        /*now, figure out size of  total buffers (for all cpus!): */
+                        count=0;
+                        for(i=0;i<num_procs;i++){
+                                count+=numvalues_forcpu[i];
+                        }
+                        total_size=count;
+                        /*Allocate buffers: */
+                        row_indices_forcpu = xNew<int>(total_size);
+                        col_indices_forcpu = xNew<int>(total_size);
+                        values_forcpu = xNew<doubletype>(total_size);
+                        modes_forcpu = xNew<int>(total_size);
+                        /*we are going to march through the buffers, and marshall data onto them, so in order to not
+                         *lose track of where these buffers are located in memory, we are going to work using copies
+                         of them: */
+                        temp_row_indices_forcpu=row_indices_forcpu;
+                        temp_col_indices_forcpu=col_indices_forcpu;
+                        temp_values_forcpu=values_forcpu;
+                        temp_modes_forcpu=modes_forcpu;
+                        /*Fill buffers: */
+                        for(i=0;i<num_procs;i++){
+                                DataSet    *buckets            = bucketsforcpu[i];
+                                for(j=0;j<buckets->Size();j++){
+                                        Bucket<doubletype>* bucket =(Bucket<doubletype>*)buckets->GetObjectByOffset(j);
+                                        bucket->Marshall(&temp_row_indices_forcpu,&temp_col_indices_forcpu,&temp_values_forcpu,&temp_modes_forcpu); //pass in the address of the buffers, so as to have the Marshall routine increment them.
+                                }
+                        }
+                        /*output buffers: */
+                        *pnumvalues_forcpu   = row_indices_forcpu;
+                        *prow_indices_forcpu = row_indices_forcpu;
+                        *pcol_indices_forcpu = col_indices_forcpu;
+                        *pvalues_forcpu      = values_forcpu;
+                        *pmodes_forcpu       = modes_forcpu;
+                }
+                /*}}}*/
 };

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 14833

Legend:

TabularUnified issm/trunk-jpl/src/c/classes/objects/Bucket.h ¶

TabularUnified issm/trunk-jpl/src/c/toolkits/issm/IssmMpiDenseMat.h ¶

Download in other formats: