/*!\file:  Partitioning.cpp
 * \brief: partition elements and nodes and vertices
 */ 

#ifdef HAVE_CONFIG_H
	#include "config.h"
#else
#error "Cannot compile with HAVE_CONFIG_H symbol! run configure first!"
#endif

#include <string.h>
#include "./IoModel.h"
#include "../shared/shared.h"
#include "../EnumDefinitions/EnumDefinitions.h"
#include "../include/macros.h"
#include "../include/typedefs.h"
#include "../MeshPartitionx/MeshPartitionx.h"

void  DiscontinuousGalerkinPartitioning(bool** pmy_elements, bool** pmy_vertices, bool** pmy_nodes, bool** pmy_bordervertices, IoModel* iomodel, ConstDataHandle iomodel_handle);
void  ContinuousGalerkinPartitioning(bool** pmy_elements, bool** pmy_vertices, bool** pmy_nodes, bool** pmy_bordervertices, IoModel* iomodel, ConstDataHandle iomodel_handle);

void  Partitioning(bool** pmy_elements, bool** pmy_vertices, bool** pmy_nodes, bool** pmy_bordervertices, IoModel* iomodel, ConstDataHandle iomodel_handle){
	
	if (iomodel->analysis_type==Prognostic2AnalysisEnum)
		DiscontinuousGalerkinPartitioning(pmy_elements, pmy_vertices, pmy_nodes, pmy_bordervertices, iomodel, iomodel_handle);
	else
		ContinuousGalerkinPartitioning(pmy_elements, pmy_vertices, pmy_nodes, pmy_bordervertices, iomodel, iomodel_handle);
}

void  ContinuousGalerkinPartitioning(bool** pmy_elements, bool** pmy_vertices, bool** pmy_nodes, bool** pmy_bordervertices, IoModel* iomodel, ConstDataHandle iomodel_handle){

	/*as many nodes as there are vertices */

	int i;

	extern int my_rank;
	extern int num_procs;

	/*output: */
	bool* my_elements=NULL;
	bool* my_vertices=NULL;
	bool* my_nodes=NULL;
	bool* my_bordervertices=NULL;

	/*intermediary: */
	int* epart=NULL; //element partitioning.
	int* npart=NULL; //node partitioning.
	int  elements_width; //number of columns in elements (2d->3, 3d->6)
	Vec  bordervertices=NULL;
	double* serial_bordervertices=NULL;
	int  el1,el2;

	/*Number of vertices per elements, needed to correctly retrieve data: */
	if(strcmp(iomodel->meshtype,"2d")==0) elements_width=3; //tria elements
	else elements_width=6; //penta elements

	#ifdef _PARALLEL_
	/*Determine parallel partitioning of elements: we use Metis for now. First load the data, then partition*/
	if(strcmp(iomodel->meshtype,"2d")==0){
		/*load elements: */
		IoModelFetchData(&iomodel->elements,NULL,NULL,iomodel_handle,"elements");
	}
	else{
		/*load elements2d: */
		IoModelFetchData(&iomodel->elements2d,NULL,NULL,iomodel_handle,"elements2d");
	}

	MeshPartitionx(&epart, &npart,iomodel->numberofelements,iomodel->numberofvertices,iomodel->elements, iomodel->numberofelements2d,iomodel->numberofvertices2d,iomodel->elements2d,iomodel->numlayers,elements_width, iomodel->meshtype,num_procs);

	/*Free elements and elements2d: */
	xfree((void**)&iomodel->elements);
	xfree((void**)&iomodel->elements2d);

	#else
	/*In serial mode, epart is full of 0: all elements belong to cpu 0: */
	epart=(int*)xcalloc(iomodel->numberofelements,sizeof(int));
	#endif

	/*Deal with rifts, they have to be included into one partition only, not several: */
	IoModelFetchData(&iomodel->riftinfo,&iomodel->numrifts,NULL,iomodel_handle,"riftinfo");

	for(i=0;i<iomodel->numrifts;i++){
		el1=(int)*(iomodel->riftinfo+RIFTINFOSIZE*i+2)-1; //matlab indexing to c indexing
		el2=(int)*(iomodel->riftinfo+RIFTINFOSIZE*i+3)-1; //matlab indexing to c indexing
		epart[el2]=epart[el1]; //ensures that this pair of elements will be in the same partition, as well as the corresponding grids;
	}

	/*Free rifts: */
	xfree((void**)&iomodel->riftinfo); 

	/*Used later on: */
	my_vertices=(bool*)xcalloc(iomodel->numberofvertices,sizeof(bool));
	my_elements=(bool*)xcalloc(iomodel->numberofelements,sizeof(bool));

	/*Start figuring out, out of the partition, which elements belong to this cpu: */
	IoModelFetchData(&iomodel->elements,NULL,NULL,iomodel_handle,"elements");
	for (i=0;i<iomodel->numberofelements;i++){

		/*!All elements have been partitioned above, only deal with elements for this cpu: */
		if(my_rank==epart[i]){ 

			my_elements[i]=1;
			
			/*Now that we are here, we can also start building the list of vertices belonging to this cpu partition: we use 
			 *the  element index to do this. For each element n, we know index[n][0:2] holds the indices (matlab indexing) 
			 into the vertices coordinates. If we start plugging 1 into my_vertices for each index[n][i] (i=0:2), then my_vertices 
			 will hold which vertices belong to this partition*/
			my_vertices[(int)*(iomodel->elements+elements_width*i+0)-1]=1;
			my_vertices[(int)*(iomodel->elements+elements_width*i+1)-1]=1;
			my_vertices[(int)*(iomodel->elements+elements_width*i+2)-1]=1;
			
			if(elements_width==6){
				my_vertices[(int)*(iomodel->elements+elements_width*i+3)-1]=1;
				my_vertices[(int)*(iomodel->elements+elements_width*i+4)-1]=1;
				my_vertices[(int)*(iomodel->elements+elements_width*i+5)-1]=1;
			}
		}
	}//for (i=0;i<numberofelements;i++)
	/*Free data : */
	xfree((void**)&iomodel->elements);

	#ifdef _PARALLEL_
		/*From the element partitioning, we can determine which grids are on the inside of this cpu's 
		 *element partition, and which are on its border with other nodes:*/
		bordervertices=NewVec(iomodel->numberofvertices);

		for (i=0;i<iomodel->numberofvertices;i++){
			if(my_vertices[i])VecSetValue(bordervertices,i,1,ADD_VALUES);
		}
		VecAssemblyBegin(bordervertices);
		VecAssemblyEnd(bordervertices);

		VecToMPISerial(&serial_bordervertices,bordervertices);

		/*now go through serial_bordervertices, and booleanize it: */
		my_bordervertices=(bool*)xcalloc(iomodel->numberofvertices,sizeof(bool));
		for(i=0;i<iomodel->numberofvertices;i++){
			if(serial_bordervertices[i]>1)my_bordervertices[i]=1;
		}
	#else
		/*No border vertices: */
		my_bordervertices=(bool*)xcalloc(iomodel->numberofvertices,sizeof(bool));
	#endif

	/*Deal with my_nodes: */
	my_nodes=(bool*)xmalloc(iomodel->numberofvertices*sizeof(bool));
	memcpy(my_nodes,my_vertices,iomodel->numberofvertices*sizeof(bool));

	/*Free ressources:*/
	xfree((void**)&npart);
	xfree((void**)&epart);
	VecFree(&bordervertices);

	/*Assign output pointers:*/
	*pmy_elements=my_elements;
	*pmy_vertices=my_vertices;
	*pmy_nodes=my_nodes;
	*pmy_bordervertices=my_bordervertices;
}


void  DiscontinuousGalerkinPartitioning(bool** pmy_elements, bool** pmy_vertices, bool** pmy_nodes, bool** pmy_bordervertices, IoModel* iomodel, ConstDataHandle iomodel_handle){

	/*each element has it own nodes (as many as vertices) + additional nodes from neighbouring elements for each edge. This yields to a very different partition for 
	 * the nodes and the vertices. The vertices are similar to continuous galerkin, but the nodes partitioning involves edges, which mess up sorting of 
	 * ids. */
	
	int i,j;

	/*output: */
	bool*   my_elements=NULL;
	bool*   my_vertices=NULL;
	bool*   my_nodes=NULL;
	bool*   my_nodescontinuous=NULL;
	bool*   my_bordervertices=NULL;

	int     i1,i2;
	int     cols;
	double  e1,e2;
	int     pos;

	/*First: get element and vertices partitioning from Continuous Galerkin: only the nodes are partitioned differently*/
	ContinuousGalerkinPartitioning(&my_elements,&my_vertices,&my_nodescontinuous,&my_bordervertices,iomodel,iomodel_handle);
	xfree((void**)&my_nodescontinuous);

	/*Now we must build node partitioning
	 *  - there are three nodes per element (discontinous)
	 *  - for each element present of each partition, its three nodes will be in this partition
	 *  - the edges require the dofs of the 2 nodes of each elements sharing the edge.
	 *    if the 2 elements sharing the edge are on 2 different cpus, we must duplicate
	 *    the two nodes that are not on the cpus so that the edge can access the dofs of
	 *    all its 4 nodes
	 */

	/*Allocate*/
	my_nodes=(bool*)xcalloc(3*iomodel->numberofelements,sizeof(int));

	/*First: add all the nodes of all the elements belonging to this cpu*/
	if (strcmp(iomodel->meshtype,"2d")==0){
		for (i=0;i<iomodel->numberofelements;i++){
			if (my_elements[i]){
				my_nodes[3*i+0]=1;
				my_nodes[3*i+1]=1;
				my_nodes[3*i+2]=1;
			}
		}
	}
	else{
		ISSMERROR("not implemented yet");
	}

	/*Second: add all missing nodes*/

	/*Get edges and elements*/
	IoModelFetchData(&iomodel->edges,&iomodel->numberofedges,&cols,iomodel_handle,"edges");
	IoModelFetchData(&iomodel->elements,NULL,NULL,iomodel_handle,"elements");
	if (cols!=4) ISSMERROR("field edges should have 4 columns");

	/*!All elements have been partitioned above, only create elements for this CPU: */
	for (i=0;i<iomodel->numberofedges;i++){

		/*Get left and right elements*/
		e1=iomodel->edges[4*i+2]-1; //edges are [node1 node2 elem1 elem2]
		e2=iomodel->edges[4*i+3]-1; //edges are [node1 node2 elem1 elem2]

		/* 1) If the element e1 is in the current partition
		 * 2) and if the edge of the element is shared by another element (internal edge)
		 * 3) and if this element is not in the same partition:
		 * we must clone the nodes on this partition so that the loads (Numericalflux)
		 * will have access to their properties (dofs,...)*/
		if(my_elements[(int)e1] && !isnan(e2) && !my_elements[(int)e2]){ 

			/*1: Get vertices ids*/
			i1=(int)iomodel->edges[4*i+0];
			i2=(int)iomodel->edges[4*i+1];

			/*2: Get the column where these ids are located in the index*/
			pos=UNDEF;
			for(j=0;j<3;j++){
				if ((int)iomodel->elements[3*(int)e2+j]==i1) pos=j;
			}

			/*3: We have the id of the elements and the position of the vertices in the index
			 * we can now create the corresponding nodes:*/
			if (pos==0){
				my_nodes[(int)e2*3+0]=1;
				my_nodes[(int)e2*3+2]=1;
			}
			else if(pos==1){
				my_nodes[(int)e2*3+1]=1;
				my_nodes[(int)e2*3+0]=1;
			}
			else if (pos==2){
				my_nodes[(int)e2*3+2]=1;
				my_nodes[(int)e2*3+1]=1;
			}
			else{
				ISSMERROR("Problem in edges creation");
			}
		}
	}

	/*Free data: */
	xfree((void**)&iomodel->elements);
	xfree((void**)&iomodel->edges);

	/*Assign output pointers:*/
	*pmy_elements=my_elements;
	*pmy_vertices=my_vertices;
	*pmy_nodes=my_nodes;
	*pmy_bordervertices=my_bordervertices;
}
