/***********************************************************************
ESPRIT-Forest: Parallel Clustering of Massive Amplicon Sequence Data in Subquadratic Time 
by: Yunpeng Cai, Yijun Sun, Wei Zheng, Jin Yao and Yujie Yang  (C) 2016
Please kindly cite [Y.Cai et.al PLOS Comp. Biol. 2016]

THE LICENSED WORK IS PROVIDED UNDER THE TERMS OF THE ADAPTIVE PUBLIC LICENSE ("LICENSE") AS FIRST COMPLETED BY: _Yunpeng Cai, Yijun Sun, Wei Zheng, Jin Yao, Yujie Yang_ [Insert the name of the Initial Contributor here]. ANY USE, PUBLIC DISPLAY, PUBLIC PERFORMANCE, REPRODUCTION OR DISTRIBUTION OF, OR PREPARATION OF DERIVATIVE WORKS BASED ON, THE LICENSED WORK CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS LICENSE AND ITS TERMS, WHETHER OR NOT SUCH RECIPIENT READS THE TERMS OF THE LICENSE. "LICENSED WORK" AND "RECIPIENT" ARE DEFINED IN THE LICENSE. A COPY OF THE LICENSE IS LOCATED IN THE TEXT FILE ENTITLED "LICENSE.TXT" ACCOMPANYING THE CONTENTS OF THIS FILE. IF A COPY OF THE LICENSE DOES NOT ACCOMPANY THIS FILE, A COPY OF THE LICENSE MAY ALSO BE OBTAINED AT THE FOLLOWING WEB SITE: http://www.acsu.buffalo.edu/~yijunsun/lab/ESPRIT-Forest.html [Insert Initial Contributor's Designated Web Site here]

Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License.
*/

#include <stdio.h>
#include "global.h"

float	Global::gap_o=10;
float	Global::gap_e=0.5;
int	Global::Kmer_Len=5;
float *Global::krate=NULL;
int Global::num_krate=1;
float *Global::nbound=NULL;

float Global::krate_step=1.0;
Kmer *Global::kmer=NULL;
DistCache *Global::cache=NULL;

bool Global::ShowAlign=false;
char *Global::ProbSeqOut=NULL;

		
Needle *Global::needle=NULL;
float Global::level_min=0.01;
float Global::level_inc=1.25;
float Global::level_max=0.15;
float Global::level_step=0.005;
int Global::Needle_Diag=1;
float Global::DiagRate=0.1;
float Global::ErrorRate=0.0025;

void Global::SetParam(float g_o,	float g_e, int K_Len, int K_Br)
{
	gap_o=g_o;
	gap_e=g_e;
	Kmer_Len=K_Len;
}

void Global::Init(int SeqNum)
{
#pragma omp parallel
{
	kmer=new Kmer(Global::Kmer_Len);
	needle=new Needle(Global::gap_o,Global::gap_e);
}
	cache=new DistCache(10000);
}

void Global::Finish()
{
#pragma omp parallel
{
	delete kmer;
	delete needle;
}
	free(krate);
	free(nbound);
	delete cache;
}

void Global::LoadKList(char *filename)
{
	FILE *fp;
	if (filename==NULL)
	{
		num_krate=1;
		krate=(float *)Malloc(sizeof(float));
		krate[0]=6.0;
		krate_step=1.0;
		return;		
	}
	if ((fp=fopen(filename,"r"))==NULL)
	{
		fprintf(stderr,"Cannot open Kmer configure file.\n");
		num_krate=1;
		krate=(float *)Malloc(sizeof(float));
		nbound=(float *)Malloc(sizeof(float));
		krate[0]=6.0;
		krate_step=1.0;
		nbound[0]=1.0;
	}
	else
	{
		fscanf(fp,"%d",&num_krate);
		fscanf(fp,"%f",&krate_step);
		krate=(float *)Malloc(num_krate*sizeof(float));
		nbound=(float *)Malloc(num_krate*sizeof(float));
		for (int i=0;i<num_krate;i++)
		{
			fscanf(fp,"%f",krate+i);
			nbound[i]=(i+1)*krate_step*krate[i];
		}
		fclose(fp);
		printf("Load Kmer Conf Success\n");
		
		
	}
}

void Global::LoadKList(int num_k, float k_step, float *klist)
{
	num_krate=num_k;
	krate_step=k_step;
	krate=klist;
	for (int i=0;i<num_krate;i++)
		nbound[i]=(i+1)*krate_step*krate[i];
}

float KdistBound(float dist)
{
			int coefidx = (int)(dist/Global::krate_step);
			if (coefidx <0) coefidx=0;
			if (coefidx >=Global::num_krate) coefidx=Global::num_krate-1;
			return Global::krate[coefidx] * dist;
}

float Kdist2Ndist(float kdist)
{
	int i=0;
	while (i<Global::num_krate && kdist >Global::nbound[i]) i++;
	if (i==Global::num_krate) 
		return kdist/Global::krate[i-1];
	else
		return kdist/Global::krate[i];
}

float calcDistance(char *seq1, char *seq2) {


  unsigned int i, j, k;
  float residuecount, distance;
	
  residuecount = distance = 0.0;

	int gap_alert1 = 0;
	int gap_alert2 = 0;

	int start_seq1 = 0;
  int start_seq2 = 0;
  int end_seq1 = strlen(seq1);
  int end_seq2 = strlen(seq2);

	k = 0;
  while(seq1[k] == '-') k++;
  start_seq1 =  k;

  k = 0;
  while(seq2[k] == '-') k++;
  start_seq2 = k;

	k = end_seq1-1;
  while(seq1[k] == '-') k--;
  end_seq1 = k+1; 
  	
	k = end_seq2-1;
  while(seq2[k] == '-') k--;
  end_seq2 = k+1; 
	
  int start_seq = start_seq1;
  if(start_seq < start_seq2)
     {
        start_seq = start_seq2;
     }

  int end_seq = end_seq1;
  if(end_seq > end_seq2)
     {
         end_seq = end_seq2;
     }
	gap_alert1 = 0;
	gap_alert2 = 0;

	for( k=start_seq; k <end_seq; k++) {

	   if (seq1[k] == '-' && seq2[k] == '-')
		{
			continue;
		}

  	if(seq1[k] == 'N' && seq2[k] == 'N')
		{
			gap_alert1 = 0;
			gap_alert2 = 0;
			continue;
		}


  	if( seq1[k] == '.' &&seq2[k] == '.')
		{
			gap_alert1 = 0;
			gap_alert2 = 0;
			continue;
		}


/* multiple consecutive gaps  are counted as 1 mismatch */
/*  but residue is shortened. */
/* you have to check both sequences... */


    if ( seq1[k] == '-' )
		{
			if(gap_alert1 == 0)
			{
        	residuecount+=1.0;
         	distance+=1.0;
			}
			gap_alert1 = 1;
      gap_alert2 = 0;
			continue;
		}

    if ( seq2[k] == '-' )
		{
			if(gap_alert2 == 0)
			{
        	 	residuecount+=1.0;
           	distance+=1.0;
			}
			gap_alert1 = 0;
      gap_alert2 = 1;
			continue;
		}

     /* ok no gaps , check for mismatch */

    if ( seq1[k] !=seq2[k])
		{
    	 distance += 1.0;
       residuecount+=1.0;
			 gap_alert1 = 0;
			 gap_alert2 = 0;
	 		continue;
	 	}

   if ( seq1[k] ==seq2[k])
		{
      residuecount+= 1.0;
			gap_alert1 = 0;
			gap_alert2 = 0;
			continue;	
		}
  } /* end of loop on k */ 

  if (residuecount > 0) {
		distance = distance / residuecount;
  }
  else {
		distance = 1.0;
  }
  return distance;  
}
