Index: /issm/trunk-jpl/src/c/classes/Node.cpp
===================================================================
--- /issm/trunk-jpl/src/c/classes/Node.cpp	(revision 23639)
+++ /issm/trunk-jpl/src/c/classes/Node.cpp	(revision 23640)
@@ -37,11 +37,11 @@
 }
 /*}}}*/
-Node::Node(int node_id,int node_sid,int node_lid,int node_pid,int io_index,bool node_clone,IoModel* iomodel,int node_analysis,int in_approximation,bool isamr){/*{{{*/
+Node::Node(int node_id,int node_sid,int io_index,bool node_clone,IoModel* iomodel,int node_analysis,int in_approximation,bool isamr){/*{{{*/
 
 	/*id: */
 	this->id            = node_id;
 	this->sid           = node_sid;
-	this->lid           = node_lid;
-	this->pid           = node_pid;
+	this->lid           = -1; /*Assigned by Finalize*/
+	this->pid           = -1; /*Assigned by Finalize*/
 	this->analysis_enum = node_analysis;
 	this->clone         = node_clone;
Index: /issm/trunk-jpl/src/c/classes/Node.h
===================================================================
--- /issm/trunk-jpl/src/c/classes/Node.h	(revision 23639)
+++ /issm/trunk-jpl/src/c/classes/Node.h	(revision 23640)
@@ -26,9 +26,12 @@
 		int  approximation; //For ice flow models, we need to know what ice flow approximation is employed on this node
 		bool clone;  //this node is replicated from another one
-
 		int  id;    // unique arbitrary id.
 		int  sid;   // "serial" id (rank of this node if the dataset was serial on 1 cpu)
 		int  lid;   // "local"  id (rank of this node in current partition)
 		int  pid;   // parallel id (specific to this partition)
+
+		/*Only this function can access these private fields*/
+		//friend void Nodes::Finalize();
+		friend class Nodes;
 
 	public: 
@@ -64,5 +67,5 @@
 		/*Node constructors, destructors*/
 		Node();
-		Node(int node_id,int node_sid,int node_lid,int node_pid,int io_index,bool isclone,IoModel* iomodel,int analysis_enum,int approximation_in,bool isamr);
+		Node(int node_id,int node_sid,int io_index,bool isclone,IoModel* iomodel,int analysis_enum,int approximation_in,bool isamr);
 		~Node();
 
Index: /issm/trunk-jpl/src/c/classes/Nodes.cpp
===================================================================
--- /issm/trunk-jpl/src/c/classes/Nodes.cpp	(revision 23639)
+++ /issm/trunk-jpl/src/c/classes/Nodes.cpp	(revision 23640)
@@ -20,9 +20,12 @@
 /*Object constructors and destructor*/
 Nodes::Nodes(){/*{{{*/
-	enum_type=NodesEnum;
-	this->common_recv     = NULL;
-	this->common_recv_ids = NULL;
-	this->common_send     = NULL;
-	this->common_send_ids = NULL;
+	this->enum_type             = NodesEnum;
+	this->common_recv           = NULL;
+	this->common_recv_ids       = NULL;
+	this->common_send           = NULL;
+	this->common_send_ids       = NULL;
+	this->numberofnodes         = -1;
+	this->numberofnodes_local   = -1;
+	this->numberofmasters_local = -1;
 	return;
 }
@@ -61,17 +64,19 @@
 	/*Build id_offsets and sorted_ids*/
 	int objsize = this->numsorted;
+	output->id_offsets=NULL;
+	output->sorted_ids=NULL;
 	if(this->sorted && objsize>0 && this->id_offsets){	
-		/*Allocate new ids*/
 		output->id_offsets=xNew<int>(objsize);
 		xMemCpy<int>(output->id_offsets,this->id_offsets,objsize);
 	}
-	else output->id_offsets=NULL;
 	if(this->sorted && objsize>0 && this->sorted_ids){
-		/*Allocate new ids*/
 		output->sorted_ids=xNew<int>(objsize);
 		xMemCpy<int>(output->sorted_ids,this->sorted_ids,objsize);
 	}
-	else output->sorted_ids=NULL;
-
+
+	/*Copy other fields*/
+	output->numberofnodes         = this->numberofnodes;
+	output->numberofnodes_local   = this->numberofnodes_local;
+	output->numberofmasters_local = this->numberofmasters_local;
 
 	if(this->common_recv){
@@ -106,4 +111,8 @@
 	int test = num_procs;
 	MARSHALLING_ENUM(NodesEnum);
+	MARSHALLING(numberofnodes);
+	MARSHALLING(numberofnodes_local);
+	MARSHALLING(numberofmasters_local);
+
 	MARSHALLING(test);
 	if(test!=num_procs) _error_("number of cores is not the same as before");
@@ -209,4 +218,81 @@
 }
 /*}}}*/
+void  Nodes::Finalize(){/*{{{*/
+
+	/*Here we do 4 things:
+	 * - count all nodes once for all so that we do not need to call MPI
+	 *   every time we need to know the total number of vertices
+	 * - Distribute lids (local ids): masters first, slaves second
+	 * - Distribute pids (parallel ids)
+	 * - Distribute Gset once for all
+	 */
+
+	/*recover my_rank:*/
+	ISSM_MPI_Status status;
+	int my_rank   = IssmComm::GetRank();
+	int num_procs = IssmComm::GetSize();
+
+	/*1. set number of nodes once for all*/
+	this->numberofnodes_local=this->Size();
+	this->numberofmasters_local=0;
+	for(int i=0;i<this->Size();i++){
+		Node* node=xDynamicCast<Node*>(this->GetObjectByOffset(i));
+		if(!node->clone) this->numberofmasters_local++;
+	}
+	ISSM_MPI_Allreduce((void*)&this->numberofmasters_local,(void*)&this->numberofnodes,1,ISSM_MPI_INT,ISSM_MPI_SUM,IssmComm::GetComm());
+
+	/*2. Distribute lids (First: masters, then clones)*/
+	int lid = 0;
+	for(int i=0;i<this->Size();i++){
+		Node* node=xDynamicCast<Node*>(this->GetObjectByOffset(i));
+		if(!node->clone) node->lid=lid++;
+	}
+	for(int i=0;i<this->Size();i++){
+		Node* node=xDynamicCast<Node*>(this->GetObjectByOffset(i));
+		if(node->clone) node->lid=lid++;
+	}
+
+	/*3. Distribute pids based on lids and offsets*/
+	int* all_num_masters=xNew<int>(num_procs);
+	ISSM_MPI_Gather(&this->numberofmasters_local,1,ISSM_MPI_INT,all_num_masters,1,ISSM_MPI_INT,0,IssmComm::GetComm());
+	ISSM_MPI_Bcast(all_num_masters,num_procs,ISSM_MPI_INT,0,IssmComm::GetComm());
+	int offset=0;
+	for(int i=0;i<my_rank;i++) offset+=all_num_masters[i];
+	xDelete<int>(all_num_masters);
+
+	for(int i=0;i<this->Size();i++){
+		Node* node=xDynamicCast<Node*>(this->GetObjectByOffset(i));
+		node->pid = node->lid+offset;
+	}
+
+	/* Share pids of masters and update pids of clones*/
+	int* truepids = xNew<int>(this->Size()); //only one alloc
+	for(int rank=0;rank<num_procs;rank++){
+		if(this->common_send[rank]){
+			int  numids = this->common_send[rank];
+			for(int i=0;i<numids;i++){
+				Node* node=xDynamicCast<Node*>(this->GetObjectByOffset(this->common_send_ids[rank][i]));
+				truepids[i] = node->pid;
+			}
+			ISSM_MPI_Send(truepids,numids,ISSM_MPI_INT,rank,0,IssmComm::GetComm());
+		}
+	}
+	for(int rank=0;rank<num_procs;rank++){
+		if(this->common_recv[rank]){
+			int  numids = this->common_recv[rank];
+			ISSM_MPI_Recv(truepids,numids,ISSM_MPI_INT,rank,0,IssmComm::GetComm(),&status);
+			for(int i=0;i<numids;i++){
+				Node* node=xDynamicCast<Node*>(this->GetObjectByOffset(this->common_recv_ids[rank][i]));
+				node->pid = truepids[i];
+			}
+		}
+	}
+	xDelete<int>(truepids);
+
+	/*4. Distribute G dofs once for all*/
+	//this->DistributeDofs(GsetEnum);
+
+	return;
+}/*}}}*/
 int   Nodes::MaxNumDofs(int setenum){/*{{{*/
 
@@ -271,21 +357,5 @@
 int   Nodes::NumberOfNodes(void){/*{{{*/
 
-	/*Careful! only use once all clones have been setup for all nodes!: */
-
-	int numnodes=0;
-	int allnumnodes=0;
-
-	/*Now go through all nodes, and get how many dofs they own, unless they are clone nodes: */
-	for(int i=0;i<this->Size();i++){
-		Node* node=xDynamicCast<Node*>(this->GetObjectByOffset(i));
-
-		/*Ok, this object is a node, ask it to plug values into partition: */
-		if (!node->IsClone()) numnodes++;
-	}
-
-	/*Gather from all cpus: */
-	ISSM_MPI_Allreduce((void*)&numnodes,(void*)&allnumnodes,1,ISSM_MPI_INT,ISSM_MPI_SUM,IssmComm::GetComm());
-
-	return allnumnodes;
+	return this->numberofnodes;
 }
 /*}}}*/
Index: /issm/trunk-jpl/src/c/classes/Nodes.h
===================================================================
--- /issm/trunk-jpl/src/c/classes/Nodes.h	(revision 23639)
+++ /issm/trunk-jpl/src/c/classes/Nodes.h	(revision 23640)
@@ -18,4 +18,8 @@
 class Nodes: public DataSet{
 
+	private:
+		int numberofnodes;
+		int numberofnodes_local;
+		int numberofmasters_local;
 	public:
 		int*  common_recv;
@@ -30,8 +34,9 @@
 		/*Objects virtual functions*/
 		Nodes* Copy();
-		void Marshall(char** pmarshalled_data,int* pmarshalled_data_size, int marshall_direction);
+		void   Marshall(char** pmarshalled_data,int* pmarshalled_data_size, int marshall_direction);
 
 		/*numerics*/
 		void  DistributeDofs(int SETENUM);
+		void  Finalize(void);
 		int   MaxNumDofs(int setenum);
 		int   NumberOfDofs(int setenum);
Index: /issm/trunk-jpl/src/c/classes/Vertex.cpp
===================================================================
--- /issm/trunk-jpl/src/c/classes/Vertex.cpp	(revision 23639)
+++ /issm/trunk-jpl/src/c/classes/Vertex.cpp	(revision 23640)
@@ -20,5 +20,5 @@
 }
 /*}}}*/
-Vertex::Vertex(int vertex_id, int vertex_sid,int vertex_pid,bool vertex_clone, IoModel* iomodel,bool isamr){/*{{{*/
+Vertex::Vertex(int vertex_id, int vertex_sid,bool vertex_clone, IoModel* iomodel,bool isamr){/*{{{*/
 
 	/*Checks in debugging mode*/
@@ -28,5 +28,5 @@
 	this->id    = vertex_id;
 	this->sid   = vertex_sid;
-	this->pid   = vertex_pid;
+	this->pid   = -1; /*Assigned later*/
 	this->lid   = -1; /*Assigned later*/
 	this->clone = vertex_clone;
Index: /issm/trunk-jpl/src/c/classes/Vertex.h
===================================================================
--- /issm/trunk-jpl/src/c/classes/Vertex.h	(revision 23639)
+++ /issm/trunk-jpl/src/c/classes/Vertex.h	(revision 23640)
@@ -37,5 +37,5 @@
 		/*Vertex constructors, destructors {{{*/
 		Vertex();
-		Vertex(int id, int sid,int pid,bool clone, IoModel* iomodel,bool isamr);
+		Vertex(int id, int sid,bool clone, IoModel* iomodel,bool isamr);
 		~Vertex();
 		/*}}}*/
Index: /issm/trunk-jpl/src/c/classes/Vertices.cpp
===================================================================
--- /issm/trunk-jpl/src/c/classes/Vertices.cpp	(revision 23639)
+++ /issm/trunk-jpl/src/c/classes/Vertices.cpp	(revision 23640)
@@ -173,4 +173,11 @@
 void Vertices::Finalize(){/*{{{*/
 
+	/*Here we do 3 things:
+	 * - count all vertices once for all so that we do not need to call MPI
+	 *   every time we need to know the total number of vertices
+	 * - Distribute lids (local ids): masters first, slaves second
+	 * - Distribute pids (parallel ids)
+	 *   */
+
 	/*recover my_rank:*/
 	ISSM_MPI_Status status;
@@ -178,5 +185,5 @@
 	int num_procs = IssmComm::GetSize();
 
-	/*1. set counters*/
+	/*1. set number of vertices once for all*/
 	this->numberofvertices_local=this->Size();
 	this->numberofmasters_local=0;
@@ -198,16 +205,12 @@
 	}
 
-	/* Now every object has distributed dofs, but locally, and with a dof count starting from 
-	 * 0. This means the dofs between all the cpus are not unique. We now offset the dofs of each
-	 * cpus by the total last (master) dofs of the previus cpu, starting from 0.
-	 * First: get number of dofs for each cpu*/
+	/*3. Distribute pids based on lids and offsets*/
 	int* all_num_masters=xNew<int>(num_procs);
 	ISSM_MPI_Gather(&this->numberofmasters_local,1,ISSM_MPI_INT,all_num_masters,1,ISSM_MPI_INT,0,IssmComm::GetComm());
 	ISSM_MPI_Bcast(all_num_masters,num_procs,ISSM_MPI_INT,0,IssmComm::GetComm());
-
-	/* Every cpu should start its own dof count at the end of the dofcount from cpu-1*/
 	int offset=0;
 	for(int i=0;i<my_rank;i++) offset+=all_num_masters[i];
 	xDelete<int>(all_num_masters);
+
 	for(int i=0;i<this->Size();i++){
 		Vertex* vertex=xDynamicCast<Vertex*>(this->GetObjectByOffset(i));
@@ -215,7 +218,5 @@
 	}
 
-	/* Finally, remember that cpus may have skipped some objects, because they were clones. For every 
-	 * object that is not a clone, tell them to show their dofs, so that later on, they can get picked 
-	 * up by their clones: */
+	/* Share pids of masters and update pids of clones*/
 	int* truepids = xNew<int>(this->Size()); //only one alloc
 	for(int rank=0;rank<num_procs;rank++){
Index: /issm/trunk-jpl/src/c/modules/ModelProcessorx/CreateElementsVerticesAndMaterials.cpp
===================================================================
--- /issm/trunk-jpl/src/c/modules/ModelProcessorx/CreateElementsVerticesAndMaterials.cpp	(revision 23639)
+++ /issm/trunk-jpl/src/c/modules/ModelProcessorx/CreateElementsVerticesAndMaterials.cpp	(revision 23640)
@@ -262,5 +262,4 @@
 void CreateVertices(Elements* elements,Vertices* vertices,IoModel* iomodel,int solution_type,bool isamr){/*{{{*/
 
-
 	/*Get element partitionning*/
 	int* epart = iomodel->epart;
@@ -296,12 +295,12 @@
 
 	/*Create vector of size total numnodes, initialized with -1, that will keep track of local ids*/
-	int  lid = 0;
-	int* vertices_lids  = xNew<int>(iomodel->numberofvertices);
+	int  offset = 0;
+	int* vertices_offsets  = xNew<int>(iomodel->numberofvertices);
 	for(int i=0;i<iomodel->numberofvertices;i++){
 		if(IsVertexInRank(vertices_ranks,vertices_proc_count,i,my_rank)){
-			vertices_lids[i] = lid++;
+			vertices_offsets[i] = offset++;
 		}
 		else{
-			vertices_lids[i] = -1;
+			vertices_offsets[i] = -1;
 		}
 	}
@@ -335,5 +334,5 @@
 
 			/*If we did not find this vertex in our current partition, go to next vertex*/
-			if(vertices_lids[i] == -1) continue;
+			if(vertices_offsets[i] == -1) continue;
 
 			/*Find in what column this rank belongs*/
@@ -358,5 +357,5 @@
 						int rank = vertices_ranks[MAXCONNECTIVITY*i+j];
 						if(step==1){
-							common_send_ids[rank][common_send[rank]] = vertices_lids[i];
+							common_send_ids[rank][common_send[rank]] = vertices_offsets[i];
 						}
 						common_send[rank]++;
@@ -368,5 +367,5 @@
 				int rank = vertices_ranks[MAXCONNECTIVITY*i+0];
 				if(step==1){
-					common_recv_ids[rank][common_recv[rank]] = vertices_lids[i];
+					common_recv_ids[rank][common_recv[rank]] = vertices_offsets[i];
 				}
 				common_recv[rank]++;
@@ -374,26 +373,4 @@
 		}
 	}
-
-	/*Final step: prepare pids (parallel ids), first count number of masters for each proc*/
-	int*  num_masters = xNewZeroInit<int>(num_procs);
-	for(int i=0;i<iomodel->numberofvertices;i++){
-		num_masters[vertices_ranks[MAXCONNECTIVITY*i+0]]++;
-	}
-	/*Now count offsets for each procs (by taking the sum of masters of procs<my_rank)*/
-	int* rank_offsets  = xNewZeroInit<int>(num_procs);
-	for(int i=0;i<num_procs;i++){
-		for(int j=i+1;j<num_procs;j++) rank_offsets[i]+=num_masters[j];
-	}
-	xDelete<int>(num_masters);
-	/*Now create pids*/
-	int* vertices_pids = xNew<int>(iomodel->numberofvertices);
-	int* rank_counters = xNewZeroInit<int>(iomodel->numberofvertices);
-	for(int i=0;i<iomodel->numberofvertices;i++){
-		int rank_master = vertices_ranks[MAXCONNECTIVITY*i+0];
-		vertices_pids[i] = rank_offsets[rank_master]+rank_counters[rank_master];
-		rank_counters[rank_master]++;
-	}
-	xDelete<int>(rank_counters);
-	xDelete<int>(rank_offsets);
 
 	/*Create Vertices, depending on the constructor type: */
@@ -409,7 +386,7 @@
 
 		for(int i=0;i<iomodel->numberofvertices;i++){
-			if(vertices_lids[i]!=-1){
+			if(vertices_offsets[i]!=-1){
 				bool isclone = (vertices_ranks[MAXCONNECTIVITY*i+0]!=my_rank);
-				vertices->AddObject(new Vertex(i+1,i,vertices_pids[i],isclone,iomodel,isamr));
+				vertices->AddObject(new Vertex(i+1,i,isclone,iomodel,isamr));
 			}
 		}
@@ -422,13 +399,11 @@
 	else{
 		for(int i=0;i<iomodel->numberofvertices;i++){
-			if(vertices_lids[i]!=-1){
+			if(vertices_offsets[i]!=-1){
 				bool isclone = (vertices_ranks[MAXCONNECTIVITY*i+0]!=my_rank);
-				vertices->AddObject(new Vertex(i+1,i,vertices_pids[i],isclone,iomodel,isamr));
-			}
-		}
-	}
-
-	xDelete<int>(vertices_lids);
-	xDelete<int>(vertices_pids);
+				vertices->AddObject(new Vertex(i+1,i,isclone,iomodel,isamr));
+			}
+		}
+	}
+	xDelete<int>(vertices_offsets);
 
 	/*Final step, create my_vertices*/
Index: /issm/trunk-jpl/src/c/modules/ModelProcessorx/CreateNodes.cpp
===================================================================
--- /issm/trunk-jpl/src/c/modules/ModelProcessorx/CreateNodes.cpp	(revision 23639)
+++ /issm/trunk-jpl/src/c/modules/ModelProcessorx/CreateNodes.cpp	(revision 23640)
@@ -46,5 +46,5 @@
 	int*     epart = iomodel->epart;
 
-	/*Determine how many nodes we have in total*/
+	/*Determine how many nodes we have in total (which depends on the type of finite element)*/
 	/*{{{*/
 	if(iomodel->meshelementtype==TriaEnum){
@@ -568,12 +568,12 @@
 
 	/*Create vector of size total numnodes, initialized with -1, that will keep track of local ids*/
-	int  lid = 0;
-	int* nodes_lids  = xNew<int>(numnodes);
+	int  offset = 0;
+	int* nodes_offsets  = xNew<int>(numnodes);
 	for(int i=0;i<numnodes;i++){
 		if(IsNodeInRank(nodes_ranks,nodes_proc_count,i,my_rank)){
-			nodes_lids[i] = lid++;
+			nodes_offsets[i] = offset++;
 		}
 		else{
-			nodes_lids[i] = -1;
+			nodes_offsets[i] = -1;
 		}
 	}
@@ -603,5 +603,5 @@
 		for(int i=0;i<numnodes;i++){
 			/*If we did not find this vertex in our current partition, go to next vertex*/
-			if(nodes_lids[i] == -1) continue;
+			if(nodes_offsets[i] == -1) continue;
 			/*Find in what column this rank belongs*/
 			int col = -1;
@@ -625,5 +625,5 @@
 						int rank = nodes_ranks[MAXCONNECTIVITY*i+j];
 						if(step==1){
-							common_send_ids[rank][common_send[rank]] = nodes_lids[i];
+							common_send_ids[rank][common_send[rank]] = nodes_offsets[i];
 						}
 						common_send[rank]++;
@@ -635,5 +635,5 @@
 				int rank = nodes_ranks[MAXCONNECTIVITY*i+0];
 				if(step==1){
-					common_recv_ids[rank][common_recv[rank]] = nodes_lids[i];
+					common_recv_ids[rank][common_recv[rank]] = nodes_offsets[i];
 				}
 				common_recv[rank]++;
@@ -643,33 +643,11 @@
 	xDelete<int>(nodes_proc_count);
 
-	/*Final step: prepare pids (parallel ids), first count number of masters for each proc*/
-	int*  num_masters = xNewZeroInit<int>(num_procs);
-	for(int i=0;i<numnodes;i++){
-		num_masters[nodes_ranks[MAXCONNECTIVITY*i+0]]++;
-	}
-	/*Now count offsets for each procs (by taking the sum of masters of procs<my_rank)*/
-	int* rank_offsets = xNewZeroInit<int>(num_procs);
-	for(int i=0;i<num_procs;i++){
-		for(int j=i+1;j<num_procs;j++) rank_offsets[i]+=num_masters[j];
-	}
-	xDelete<int>(num_masters);
-	/*Now create pids*/
-	int* nodes_pids = xNew<int>(numnodes);
-	int* rank_counters = xNewZeroInit<int>(numnodes);
-	for(int i=0;i<numnodes;i++){
-		int rank_master = nodes_ranks[MAXCONNECTIVITY*i+0];
-		nodes_pids[i] = rank_offsets[rank_master]+rank_counters[rank_master];
-		rank_counters[rank_master]++;
-	}
-	xDelete<int>(rank_counters);
-	xDelete<int>(rank_offsets);
-
 	/*Go ahead and create vertices now that we have all we need*/
 	for(int i=0;i<numnodes;i++){
-		if(nodes_lids[i]!=-1){
+		if(nodes_offsets[i]!=-1){
 			bool isclone = (nodes_ranks[MAXCONNECTIVITY*i+0]!=my_rank);
 			int io_index = 0;
 			if(i<iomodel->numberofvertices) io_index = i;
-			Node* node=new Node(i+1,i,nodes_lids[i],nodes_pids[i],io_index,isclone,iomodel,analysis,nodes_approx[i],isamr);
+			Node* node=new Node(i+1,i,io_index,isclone,iomodel,analysis,nodes_approx[i],isamr);
 			if(finite_element==MINIcondensedEnum || finite_element==P1bubblecondensedEnum){
 				/*Bubble function is collapsed, needs to constrain it, maybe this is not the best place to do this, but that's life!*/
@@ -681,13 +659,17 @@
 		}
 	}
+
+	/*Free data: */
 	xDelete<int>(nodes_approx);
 	xDelete<int>(nodes_ranks);
-	xDelete<int>(nodes_lids);
-	xDelete<int>(nodes_pids);
-
+	xDelete<int>(nodes_offsets);
+
+	/*Assign communicators*/
 	nodes->common_send=common_send;
 	nodes->common_recv=common_recv;
 	nodes->common_send_ids=common_send_ids;
 	nodes->common_recv_ids=common_recv_ids;
+
+	/*Finalize Initialization*/
+	nodes->Finalize();
 }
-
