Changeset 14833
- Timestamp:
- 05/01/13 12:30:08 (12 years ago)
- Location:
- issm/trunk-jpl/src/c
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified issm/trunk-jpl/src/c/classes/objects/Bucket.h ¶
r14822 r14833 175 175 }; 176 176 /*}}}*/ 177 void Marshall(int** prow_indices_forcpu,int** pcol_indices_forcpu,doubletype** pvalues_forcpu,int** pmodes_forcpu){ /*{{{*/ 178 179 /*intermediary: */ 180 int i; 181 int j; 182 183 /*buffers: */ 184 int *row_indices_forcpu = NULL; 185 int *col_indices_forcpu = NULL; 186 doubletype *values_forcpu = NULL; 187 int *modes_forcpu = NULL; 188 189 /*initialize buffers: */ 190 row_indices_forcpu=*prow_indices_forcpu; 191 col_indices_forcpu=*pcol_indices_forcpu; 192 values_forcpu=*pvalues_forcpu; 193 modes_forcpu=*pmodes_forcpu; 194 195 /*fill buffers with out values and indices and modes: */ 196 for(i=0;i<m;i++){ 197 for(j=0;j<n;j++){ 198 row_indices_forcpu[i*n+j]=idxm[i]; 199 col_indices_forcpu[i*n+j]=idxn[j]; 200 values_forcpu[i*n+j]=values[i*n+j]; 201 modes_forcpu[i*n+j]=mode; 202 } 203 } 204 205 /*increment buffer for next Bucket who will marshall his data: */ 206 row_indices_forcpu+=m*n; 207 col_indices_forcpu+=m*n; 208 values_forcpu+=m*n; 209 modes_forcpu+=m*n; 210 211 /*output modified buffers: */ 212 *prow_indices_forcpu=row_indices_forcpu; 213 *pcol_indices_forcpu=col_indices_forcpu; 214 *pvalues_forcpu=values_forcpu; 215 *pmodes_forcpu=modes_forcpu; 216 }; 217 /*}}}*/ 218 int MarshallSize(void){ /*{{{*/ 219 220 if(type=MATRIX_BUCKET){ 221 return m*n; 222 } 223 else{ 224 return m; 225 } 226 }; 227 /*}}}*/ 177 228 #ifdef _HAVE_MPI_ 178 229 void Isend(int receiver_rank,MPI_Request* requests,int* pcount,MPI_Comm comm){ /*{{{*/ -
TabularUnified issm/trunk-jpl/src/c/toolkits/issm/IssmMpiDenseMat.h ¶
r14822 r14833 149 149 } 150 150 /*}}}*/ 151 /*FUNCTION Assemble {{{*/152 void Assemble (){151 /*FUNCTION Assemble2{{{*/ 152 void Assemble2(){ 153 153 154 154 int i; … … 265 265 xDelete<MPI_Request>(requests); 266 266 /*}}}*/ 267 } 268 /*}}}*/ 269 /*FUNCTION Assemble{{{*/ 270 void Assemble(){ 271 272 273 int i,j; 274 275 int *RowRank = NULL; 276 int num_procs; 277 278 int *row_indices_forcpu = NULL; 279 int *col_indices_forcpu = NULL; 280 int *modes_forcpu = NULL; 281 doubletype *values_forcpu = NULL; 282 int *numvalues_forcpu = NULL; 283 DataSet **bucketsforcpu = NULL; 284 285 int **row_indices_fromcpu = NULL; 286 int **col_indices_fromcpu = NULL; 287 int **modes_fromcpu = NULL; 288 doubletype **values_fromcpu = NULL; 289 int *numvalues_fromcpu = NULL; 290 291 int lower_row; 292 int upper_row; 293 int* sendcnts = NULL; 294 int* displs = NULL; 295 int count = 0; 296 297 /*some communicator info: */ 298 num_procs=IssmComm::GetSize(); 299 MPI_Comm comm=IssmComm::GetComm(); 300 301 /*First, make a vector of size M, which for each row between 0 and M-1, tells which cpu this row belongs to: */ 302 RowRank=DetermineRowRankFromLocalSize(M,m,comm); 303 304 /*Now, sort out our dataset of buckets according to cpu ownership of rows: */ 305 bucketsforcpu=xNew<DataSet*>(num_procs); 306 307 for(i=0;i<num_procs;i++){ 308 DataSet* bucketsofcpu_i=new DataSet(); 309 for (j=0;j<buckets->Size();j++){ 310 Bucket<doubletype>* bucket=(Bucket<doubletype>*)buckets->GetObjectByOffset(j); 311 bucket->SpawnBucketsPerCpu(bucketsofcpu_i,i,RowRank); 312 } 313 bucketsforcpu[i]=bucketsofcpu_i; 314 } 315 316 /*Recap, each cpu has num_procs datasets of buckets. For a certain cpu j, for a given dataset i, the buckets this 317 * dataset owns correspond to rows that are owned by cpu i, not j!. Out of all the buckets we own, make row,col,value,insert_mode 318 * vectors that will be shipped around the cluster: */ 319 this->BucketsBuildScatterBuffers(&numvalues_forcpu,&row_indices_forcpu,&col_indices_forcpu,&values_forcpu,&modes_forcpu,bucketsforcpu,num_procs); 320 321 /*Now, we need to allocate on each cpu arrays to receive data from all the other cpus. To know what we need to allocate, we need 322 *some scatter calls: */ 323 numvalues_fromcpu = xNew<int>(num_procs); 324 for(i=0;i<num_procs;i++){ 325 MPI_Scatter(numvalues_forcpu,num_procs,MPI_INT,numvalues_fromcpu+i,1,MPI_INT,i,comm); 326 } 327 for(i=0;i<num_procs;i++){ 328 row_indices_fromcpu[i]=xNew<int>(numvalues_fromcpu[i]); 329 col_indices_fromcpu[i]=xNew<int>(numvalues_fromcpu[i]); 330 values_fromcpu[i]=xNew<doubletype>(numvalues_fromcpu[i]); 331 modes_fromcpu[i]=xNew<int>(numvalues_fromcpu[i]); 332 } 333 334 /*Now, to scatter values across the cluster, we need sendcnts and displs. Our sendbufs have been built by BucketsBuildScatterBuffers, with a stride given 335 * by numvalues_forcpu. Get this ready to go before starting the scatter itslef. For reference, here is the MPI_Scatterv prototype: 336 * int MPI_Scatterv( void *sendbuf, int *sendcnts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcnt, MPI_Datatype recvtype, int root, MPI_Comm comm) :*/ 337 sendcnts=xNew<int>(num_procs); 338 displs=xNew<int>(num_procs); 339 count=0; 340 for(i=0;i<num_procs;i++){ 341 sendcnts[i]=numvalues_forcpu[i]; 342 displs[i]=count; 343 count+=numvalues_forcpu[i]; 344 } 345 346 /*Start the scattering: */ 347 for(i=0;i<num_procs;i++){ 348 MPI_Scatterv( row_indices_forcpu, sendcnts, displs, MPI_INT, row_indices_fromcpu[i], numvalues_fromcpu[i], MPI_INT, i, comm); 349 MPI_Scatterv( col_indices_forcpu, sendcnts, displs, MPI_INT, col_indices_fromcpu[i], numvalues_fromcpu[i], MPI_INT, i, comm); 350 MPI_Scatterv( values_forcpu, sendcnts, displs, MPI_DOUBLE, values_fromcpu[i], numvalues_fromcpu[i], MPI_DOUBLE, i, comm); 351 MPI_Scatterv( modes_forcpu, sendcnts, displs, MPI_INT, modes_fromcpu[i], numvalues_fromcpu[i], MPI_INT, i, comm); 352 } 353 354 /*Plug values into global matrix: */ 355 GetOwnershipBoundariesFromRange(&lower_row,&upper_row,m,comm); 356 for(i=0;i<num_procs;i++){ 357 int numvalues=numvalues_fromcpu[i]; 358 int* rows=row_indices_fromcpu[i]; 359 int* cols=col_indices_fromcpu[i]; 360 doubletype* values=values_fromcpu[i]; 361 int* mods=modes_fromcpu[i]; 362 363 for(j=0;j<numvalues;j++){ 364 if(mods[j]==ADD_VAL) *(matrix+N*(rows[j]-lower_row)+cols[j])+=values[j]; 365 else *(matrix+N*(rows[j]-lower_row)+cols[j])=values[j]; 366 } 367 } 368 369 /*Free ressources:{{{*/ 370 xDelete<int>(RowRank); 371 xDelete<int>(row_indices_forcpu); 372 xDelete<int>(col_indices_forcpu); 373 xDelete<int>(modes_forcpu); 374 xDelete<doubletype>(values_forcpu); 375 xDelete<int>(numvalues_forcpu); 376 377 for(i=0;i<num_procs;i++){ 378 DataSet* buckets=bucketsforcpu[i]; 379 delete buckets; 380 } 381 xDelete<DataSet*>(bucketsforcpu); 382 383 for(i=0;i<num_procs;i++){ 384 int* rows=row_indices_fromcpu[i]; 385 int* cols=col_indices_fromcpu[i]; 386 int* modes=modes_fromcpu[i]; 387 doubletype* values=values_fromcpu[i]; 388 389 xDelete<int>(rows); 390 xDelete<int>(cols); 391 xDelete<int>(modes); 392 xDelete<doubletype>(values); 393 } 394 xDelete<int*>(row_indices_fromcpu); 395 xDelete<int*>(col_indices_fromcpu); 396 xDelete<int*>(modes_fromcpu); 397 xDelete<doubletype*>(values_fromcpu); 398 xDelete<int>(numvalues_fromcpu); 399 400 xDelete<int>(sendcnts); 401 xDelete<int>(displs); 402 /*}}}*/ 403 404 267 405 } 268 406 /*}}}*/ … … 364 502 } 365 503 /*}}}*/ 504 /*FUNCTION BucketsBuildScatterBuffers{{{*/ 505 void BucketsBuildScatterBuffers(int** pnumvalues_forcpu,int** prow_indices_forcpu,int** pcol_indices_forcpu,doubletype** pvalues_forcpu,int** pmodes_forcpu,DataSet** bucketsforcpu,int num_procs){ 506 507 508 /*intermediary: */ 509 int i,j; 510 int count = 0; 511 int total_size = 0; 512 int *temp_row_indices_forcpu = NULL; 513 int *temp_col_indices_forcpu = NULL; 514 doubletype *temp_values_forcpu = NULL; 515 int *temp_modes_forcpu = NULL; 516 517 /*output: */ 518 int *numvalues_forcpu = NULL; 519 int *row_indices_forcpu = NULL; 520 int *col_indices_forcpu = NULL; 521 doubletype *values_forcpu = NULL; 522 int *modes_forcpu = NULL; 523 524 /*figure out size of buffers per cpu: */ 525 for(i=0;i<num_procs;i++){ 526 DataSet *buckets = bucketsforcpu[i]; 527 528 count=0; 529 for(j=0;j<buckets->Size();j++){ 530 Bucket<doubletype>* bucket =(Bucket<doubletype>*)buckets->GetObjectByOffset(j); 531 count+=bucket->MarshallSize(); 532 } 533 534 numvalues_forcpu[i]=count; 535 } 536 537 /*now, figure out size of total buffers (for all cpus!): */ 538 count=0; 539 for(i=0;i<num_procs;i++){ 540 count+=numvalues_forcpu[i]; 541 } 542 total_size=count; 543 544 /*Allocate buffers: */ 545 row_indices_forcpu = xNew<int>(total_size); 546 col_indices_forcpu = xNew<int>(total_size); 547 values_forcpu = xNew<doubletype>(total_size); 548 modes_forcpu = xNew<int>(total_size); 549 550 /*we are going to march through the buffers, and marshall data onto them, so in order to not 551 *lose track of where these buffers are located in memory, we are going to work using copies 552 of them: */ 553 temp_row_indices_forcpu=row_indices_forcpu; 554 temp_col_indices_forcpu=col_indices_forcpu; 555 temp_values_forcpu=values_forcpu; 556 temp_modes_forcpu=modes_forcpu; 557 558 /*Fill buffers: */ 559 for(i=0;i<num_procs;i++){ 560 DataSet *buckets = bucketsforcpu[i]; 561 for(j=0;j<buckets->Size();j++){ 562 Bucket<doubletype>* bucket =(Bucket<doubletype>*)buckets->GetObjectByOffset(j); 563 bucket->Marshall(&temp_row_indices_forcpu,&temp_col_indices_forcpu,&temp_values_forcpu,&temp_modes_forcpu); //pass in the address of the buffers, so as to have the Marshall routine increment them. 564 } 565 } 566 567 /*output buffers: */ 568 *pnumvalues_forcpu = row_indices_forcpu; 569 *prow_indices_forcpu = row_indices_forcpu; 570 *pcol_indices_forcpu = col_indices_forcpu; 571 *pvalues_forcpu = values_forcpu; 572 *pmodes_forcpu = modes_forcpu; 573 } 574 /*}}}*/ 366 575 }; 367 576
Note:
See TracChangeset
for help on using the changeset viewer.