function [u_g varargout]=diagnostic_core_nonlinear(m,analysis_type, varargin);
%DIAGNOSTIC_CORE_NONLINEAR - core solution of non linear problems
%
%   Core of the diagnostic solution, deals with the Direct Shooting Method
%   to linearize the non-linear material equations.
%   varargin is for extra inputs to solution sequence.
%   
%   Usage:
%      [u_g varargout]=diagnostic_core_nonlinear(m,analysis_type,varargin);
%
%   See also: ICEDIAGNOSTIC, ICEDIAGNOSTIC2D, ICEDIAGNOSTIC3D, ICEDIAGNOSTIC_CORE_LINEAR

%global variables
global cluster gridset element_debug element_debugid

%recover fem model fields
elements=m.elements;
grids=m.grids;
materials=m.materials;
loads=m.loads;
ys=m.ys;
gridset=m.gridset;
params=m.params;
G_mn=m.Gmn;

%recover parameters
sparsity=params.sparsity;
solver_type=params.solver_type;
eps_abs=params.eps_abs;
eps_rel=params.eps_rel;
yts=params.yts;
debug=params.debug;
element_debug=params.element_debug;
element_debugid=params.element_debugid;

%recover existing velocity if given in input and initialize solution
if nargin==3,
	inputs=varargin{1};

	[velocity_param velocity_is_present]=recover_input(inputs,'velocity');
	if velocity_is_present
		soln(1).u_g=velocity_param;
		soln(1).u_g(4:6:gridset.gsize)=0; %no pressure in the velocity
		soln(2).u_g=velocity_param;
		soln(2).u_g(4:6:gridset.gsize)=0; %no pressure in the velocity
	else	
		soln(1).u_g={};
		soln(2).u_g={};
	end
else
	velocity_is_present=0;
	inputs=struct();
	soln(1).u_g={};
	soln(2).u_g={};
end


%Initialization
count=2;
converged=0;
	
if debug,
	disp(sprintf('%s','   starting direct shooting method'));
end

while(~converged),

	% Generate system matrices (stiffness and load)
	%compute stiffness matrix flag
	kflag=1;
	%compute loads ? 
	if strcmp(analysis_type, 'diagnostic_stokes'),
		pflag=1;
	else
		if count==2, pflag=1; else pflag=0; end
	end

	%add velocity to inputs
	inputs.velocity=soln(count).u_g;
	inputs.oldvelocity=soln(count-1).u_g;
	
	%generate stiffness and loads
	[K_gg,p_g]=SystemMatrices(elements,grids,loads,materials,kflag, pflag, sparsity,inputs,analysis_type);
	[K_gg,p_g]=PenaltySystemMatrices(grids,loads,materials,kflag, pflag, sparsity,inputs,analysis_type,K_gg,p_g);

	%Save loads 
	if count==2, 
		p_g_old=p_g;
	else
		p_g=p_g_old;
	end
	
	if cluster, 
		K_gg=distributed(gplus(K_gg),'convert');
		p_g=gplus(p_g);
	end
	
	% Reduce stiffness matrix from g set to f set (effectively rooting out the single point constraints), 
	% and compute modifications to loads from single point constraints.
	
	[K_ff K_fs]=Reducematrixfromgtof(K_gg,G_mn); 

	% Reduce load from g set to f set
	p_f=Reducerightside(p_g,G_mn,K_fs,ys);

	% Solve
	u_f=Solver(K_ff,p_f,solver_type);
	if debug,
		disp(sprintf('%s%g','      condition number of stiffness matrix: ',condest(K_ff)));
	end
   
	%increment index 
	count=count+1;

	% Add single point constraints back, ie increase f-set by s-set into the global g-set.
	soln(count).u_g= full(Mergesolution_g(u_f,G_mn, ys)); %make solution full

	%Figure out if convergence is reached.
	if((count>=4)| velocity_is_present), %make sure we already iterated at least once.

		%compute relative velocity difference for this step.
		relative_change=norm(soln(count).u_g-soln(count-1).u_g,2)/norm(soln(count-1).u_g,2);

		if relative_change<eps_rel, 
			if debug, disp(sprintf('%s %g %s %g','      convergence criterion: norm(du)/norm(u)=',relative_change,' < ',eps_rel)); end
			converged=1;
		else
			if debug, disp(sprintf('%s %g %s %g','      convergence criterion: norm(du)/norm(u)=',relative_change,' > ',eps_rel)); end
			converged=0;
		end

		if ~isnan(eps_abs)
			%compute velocity difference for this step.
			change=max(abs(soln(count).u_g-soln(count-1).u_g))*yts;

			if change<eps_abs, 
				if debug, disp(sprintf('%s %g %s %g %s','      convergence criterion: max(du)=',change,' < ',eps_abs,'m/yr')); end
			else
				if debug, disp(sprintf('%s %g %s %g %s','      convergence criterion: max(du)=',change,' > ',eps_abs,'m/yr')); end
				converged=0;
			end
		end
	end
end

%prepare output 
u_g=soln(end).u_g;

%more output might be needed, for ex when running in cielocontrol.m
nout=max(nargout,1)-1;
if nout==2,
	%K_ff and K_fs are requested, for the case where we have no loads (ie basal drag)
	inputs.drag=zeros(gridset.gsize,1); kflag=1;pflag=1; 
	inputs.velocity=soln(count).u_g;

	[K_gg,p_g]=SystemMatrices(elements,grids,loads,materials,kflag, pflag, sparsity,inputs,analysis_type);
	[K_gg,p_g]=PenaltySystemMatrices(grids,loads,materials,kflag, pflag, sparsity,inputs,analysis_type,K_gg,p_g);
	[K_ff K_fs]=Reducematrixfromgtof(K_gg,G_mn); 
	varargout(1)={K_ff};
	varargout(2)={K_fs};
end
