/***********************************************************************
ESPRIT-Forest: Parallel Clustering of Massive Amplicon Sequence Data in Subquadratic Time 
by: Yunpeng Cai, Yijun Sun, Wei Zheng, Jin Yao and Yujie Yang  (C) 2016
Please kindly cite [Y.Cai et.al PLOS Comp. Biol. 2016]

THE LICENSED WORK IS PROVIDED UNDER THE TERMS OF THE ADAPTIVE PUBLIC LICENSE ("LICENSE") AS FIRST COMPLETED BY: _Yunpeng Cai, Yijun Sun, Wei Zheng, Jin Yao, Yujie Yang_ [Insert the name of the Initial Contributor here]. ANY USE, PUBLIC DISPLAY, PUBLIC PERFORMANCE, REPRODUCTION OR DISTRIBUTION OF, OR PREPARATION OF DERIVATIVE WORKS BASED ON, THE LICENSED WORK CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS LICENSE AND ITS TERMS, WHETHER OR NOT SUCH RECIPIENT READS THE TERMS OF THE LICENSE. "LICENSED WORK" AND "RECIPIENT" ARE DEFINED IN THE LICENSE. A COPY OF THE LICENSE IS LOCATED IN THE TEXT FILE ENTITLED "LICENSE.TXT" ACCOMPANYING THE CONTENTS OF THIS FILE. IF A COPY OF THE LICENSE DOES NOT ACCOMPANY THIS FILE, A COPY OF THE LICENSE MAY ALSO BE OBTAINED AT THE FOLLOWING WEB SITE: http://www.acsu.buffalo.edu/~yijunsun/lab/ESPRIT-Forest.html [Insert Initial Contributor's Designated Web Site here]

Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License.
*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <set>
#include <fstream>
#include <time.h>
#include <math.h>
#include <omp.h>

#include "FASTA.h"
#include "util.h"
#include "global.h"
#include "ProbModel.h"
#include "TreeClust.h"
#include "MinHeap.h"

extern char **SeqStrs;
extern int *Freq;

extern int SeqNum;
extern int **KmerTabs;
extern int **KmerSeqs;
extern int *SeqID;
extern int *SeqLens;

extern bool verbose;

ProbString *ProbSeqs;

int ClsTop;
long int NumAl=0;
long int NumKmer=0;

float MAX_DIST=0.15;
int NumCls;

ClustRec *ClRec;

PairRec *NNRec;

typedef struct{
	int ID;
	float dist;
}PointRec;

PointRec *UpdRec;

float KmerDist(int uid1, int uid2)
{
	#pragma omp atomic
		NumKmer++;
	if (uid1 < SeqNum && uid2 < SeqNum)
	{
		if (SeqLens[uid1] < SeqLens[uid2])
		{
			return 1 - Global::kmer->KmerComp(KmerTabs[uid1], KmerTabs[uid2], KmerSeqs[uid1],SeqLens[uid1]);
		}
		else
		{
			return 1 - Global::kmer->KmerComp(KmerTabs[uid2], KmerTabs[uid1], KmerSeqs[uid2],SeqLens[uid2]);
		}
	}
	else if (uid1 < SeqNum)
	{
		return 1 - Global::kmer->KmerComp(KmerTabs[uid1], KmerTabs[uid2], KmerSeqs[uid1],SeqLens[uid1],ProbSeqs[uid2-SeqNum].Len(),ClRec[uid2].NumSeqs);
	}
	else if (uid2 <SeqNum)
	{
		return 1 - Global::kmer->KmerComp(KmerTabs[uid2], KmerTabs[uid1], KmerSeqs[uid2],SeqLens[uid2],ProbSeqs[uid1-SeqNum].Len(),ClRec[uid1].NumSeqs);
	}
	else
	{
		return 1 - Global::kmer->KmerComp(KmerTabs[uid1],KmerTabs[uid2],ClRec[uid1].NumSeqs,ClRec[uid2].NumSeqs);
	}
}

bool Quickdist(int uid1, int uid2, float &dist)
{
	if (Global::cache->Get(uid1,uid2,dist))
		return true;
	float dist1,dist2,dist3,dist4;

// If the NN dist for the component sequences are all known, we can use it to estimate the NN dist for current sequence
	if (ClRec[uid1].fromid1 >=0)
	{
		if (Global::cache->Get(ClRec[uid1].fromid1,uid2,dist1) && Global::cache->Get(ClRec[uid1].fromid2,uid2,dist2))
		{
			dist=(dist1*ClRec[ClRec[uid1].fromid1].NumSeqs+dist2*ClRec[ClRec[uid1].fromid2].NumSeqs)/ClRec[uid1].NumSeqs;
			#pragma omp critical (CRI_CACHE)
			Global::cache->Store(uid1,uid2,dist);
			return true;
		}
	}
	if (ClRec[uid2].fromid1 >=0)
	{
		if (Global::cache->Get(ClRec[uid2].fromid1,uid1,dist1) && Global::cache->Get(ClRec[uid2].fromid2,uid1,dist2))
		{
			dist=(dist1*ClRec[ClRec[uid2].fromid1].NumSeqs+dist2*ClRec[ClRec[uid2].fromid2].NumSeqs)/ClRec[uid2].NumSeqs;
			#pragma omp critical (CRI_CACHE)
			Global::cache->Store(uid1,uid2,dist);
			return true;
		}
	}
	if (ClRec[uid1].fromid1 >=0 && ClRec[uid2].fromid1 >=0)
	{
		if (Global::cache->Get(ClRec[uid1].fromid1,ClRec[uid2].fromid1,dist1) && Global::cache->Get(ClRec[uid1].fromid2,ClRec[uid2].fromid1,dist2) &&
		  Global::cache->Get(ClRec[uid1].fromid1,ClRec[uid2].fromid2,dist3) && Global::cache->Get(ClRec[uid1].fromid2,ClRec[uid2].fromid2,dist4))
		 {
			dist=(dist1*ClRec[ClRec[uid1].fromid1].NumSeqs*ClRec[ClRec[uid2].fromid1].NumSeqs+dist2*ClRec[ClRec[uid1].fromid2].NumSeqs*ClRec[ClRec[uid2].fromid1].NumSeqs+
			  dist3*ClRec[ClRec[uid1].fromid1].NumSeqs*ClRec[ClRec[uid2].fromid2].NumSeqs+dist4*ClRec[ClRec[uid1].fromid2].NumSeqs*ClRec[ClRec[uid2].fromid2].NumSeqs)/
			  (ClRec[uid1].NumSeqs*ClRec[uid2].NumSeqs);
			Global::cache->Store(uid1,uid2,dist);
			return true;
		 }
	}
	
	//When two component sequences are highly imbalanced, no exact NW computing is needed
	int id_big1=-1;
	int id_big2=-1;
	if (ClRec[uid1].fromid1 >=0)
	{
		int ids1=ClRec[uid1].fromid1;
		int ids2=ClRec[uid1].fromid2;
		int sq1=ClRec[ids1].NumSeqs;
		int sq2=ClRec[ids2].NumSeqs;
		if (ClRec[ids1].NumSeqs > ClRec[ids2].NumSeqs && ClRec[ids1].distErr+ ClRec[ids1].NNdist*sq2/(sq1+sq2) < 0.1*Global::level_step)
		{
			id_big1=ids1;
		}
		if (ClRec[ids2].NumSeqs > ClRec[ids1].NumSeqs && ClRec[ids2].distErr+ ClRec[ids2].NNdist*sq1/(sq1+sq2) < 0.1*Global::level_step)
		{
			id_big1=ids2;
		}
	}
	if (ClRec[uid2].fromid1 >=0)
	{
		int ids1=ClRec[uid2].fromid1;
		int ids2=ClRec[uid2].fromid2;
		int sq1=ClRec[ids1].NumSeqs;
		int sq2=ClRec[ids2].NumSeqs;
		if (sq1 > sq2 && ClRec[ids1].distErr+ ClRec[ids1].NNdist*sq2/(sq1+sq2) < 0.1*Global::level_step)
		{
			id_big2=ids1;
		}
		if (sq2 > sq1 && ClRec[ids2].distErr+ ClRec[ids2].NNdist*sq1/(sq1+sq2) < 0.1*Global::level_step)
		{
			id_big2=ids2;
		}
	}
	if (id_big1>=0 && Global::cache->Get(id_big1,uid2,dist))
	{
		ClRec[uid1].distErr=max(ClRec[uid1].distErr,ClRec[id_big1].distErr+ ClRec[id_big1].NNdist*ClRec[ClRec[uid1].fromid1+ClRec[uid1].fromid2-id_big1].NumSeqs/ClRec[uid1].NumSeqs);
		#pragma omp critical (CRI_CACHE)
		Global::cache->Store(uid1,uid2,dist);
		return true;		
	}
	if (id_big1>=0 && Global::cache->Get(id_big1,ClRec[uid2].fromid1,dist1) && Global::cache->Get(id_big1,ClRec[uid2].fromid2,dist2))
	{
		dist=(dist1*ClRec[ClRec[uid2].fromid1].NumSeqs+dist2*ClRec[ClRec[uid2].fromid2].NumSeqs)/ClRec[uid2].NumSeqs;
		ClRec[uid1].distErr=max(ClRec[uid1].distErr,ClRec[id_big1].distErr+ ClRec[id_big1].NNdist*ClRec[ClRec[uid1].fromid1+ClRec[uid1].fromid2-id_big1].NumSeqs/ClRec[uid1].NumSeqs);
		#pragma omp critical (CRI_CACHE)
		Global::cache->Store(uid1,uid2,dist);
		return true;		
	}
	if (id_big2>=0 && Global::cache->Get(id_big2,uid1,dist))
	{
		ClRec[uid2].distErr=max(ClRec[uid2].distErr,ClRec[id_big2].distErr+ ClRec[id_big2].NNdist*ClRec[ClRec[uid2].fromid1+ClRec[uid2].fromid2-id_big2].NumSeqs/ClRec[uid2].NumSeqs);
		#pragma omp critical (CRI_CACHE)
		Global::cache->Store(uid1,uid2,dist);
		return true;		
	}
	if (id_big2>=0 && Global::cache->Get(id_big2,ClRec[uid1].fromid1,dist1) && Global::cache->Get(id_big2,ClRec[uid1].fromid2,dist2))
	{
		dist=(dist1*ClRec[ClRec[uid1].fromid1].NumSeqs+dist2*ClRec[ClRec[uid1].fromid2].NumSeqs)/ClRec[uid1].NumSeqs;
		ClRec[uid2].distErr=max(ClRec[uid2].distErr,ClRec[id_big2].distErr+ ClRec[id_big2].NNdist*ClRec[ClRec[uid2].fromid1+ClRec[uid2].fromid2-id_big2].NumSeqs/ClRec[uid2].NumSeqs);
		#pragma omp critical (CRI_CACHE)
		Global::cache->Store(uid1,uid2,dist);
		return true;		
	}
	if (id_big1 >=0 && id_big2>=0 && Global::cache->Get(id_big1,id_big2,dist))
	{
		float err1=max(ClRec[uid1].distErr,ClRec[id_big1].distErr+ ClRec[id_big1].NNdist*ClRec[ClRec[uid1].fromid1+ClRec[uid1].fromid2-id_big1].NumSeqs/ClRec[uid1].NumSeqs);
		float err2=max(ClRec[uid2].distErr,ClRec[id_big2].distErr+ ClRec[id_big2].NNdist*ClRec[ClRec[uid2].fromid1+ClRec[uid2].fromid2-id_big2].NumSeqs/ClRec[uid2].NumSeqs);
		if (err1 + err2 <0.1*Global::level_step)
		{
			ClRec[uid1].distErr=err1;
			ClRec[uid2].distErr=err2;
			#pragma omp critical (CRI_CACHE)
			Global::cache->Store(uid1,uid2,dist);
			return true;		
		}
	}
	return false;
}

float NeedleDist(int uid1, int uid2, float level)
{
	float dist;
	if (Quickdist(uid1,uid2,dist))
		return dist;
	#pragma omp atomic
		NumAl++;
	char buf1[65535];
	char buf2[65535];
	if (uid1 <SeqNum && uid2 < SeqNum)
	{
		Global::needle->Align(SeqStrs[uid1],SeqStrs[uid2],buf1,buf2,min(level,Global::DiagRate));
		dist=calcDistance(buf1,buf2);
	}
	else if (uid1 < SeqNum)
	{
		ProbString al;
		Global::needle->Align(SeqStrs[uid1],ProbSeqs[uid2-SeqNum],buf1,al,min(level,Global::DiagRate));
		dist=al.AveDist(buf1);
	}
	else if (uid2 < SeqNum)
	{
		ProbString al;
		Global::needle->Align(SeqStrs[uid2],ProbSeqs[uid1-SeqNum],buf1,al,min(level,Global::DiagRate));
		dist=al.AveDist(buf1);
	}
	else
	{
		ProbString al1,al2;
		Global::needle->Align(ProbSeqs[uid1-SeqNum],ProbSeqs[uid2-SeqNum],al1,al2,min(level,Global::DiagRate));
		dist=al1.AveDist(al2);
	}
	#pragma omp critical (CRI_CACHE)
		Global::cache->Store(uid1,uid2,dist);
	return dist;
}

void InitRecords(ClustRec *clRec,int tot)
{
	for (int uid=0;uid <tot;uid++)
	{
		clRec[uid].Node=NULL;
		clRec[uid].NNdist=MAX_DIST;
		clRec[uid].NNdist2=MAX_DIST;
		clRec[uid].fromid1=-1;
		clRec[uid].fromid2=-1;
		clRec[uid].clsid=-1;
		clRec[uid].NNseq=-1;
		clRec[uid].NNseq2=-1;
		clRec[uid].ID=uid;
		clRec[uid].distErr=0.0;
		clRec[uid].tagscan=0;
		clRec[uid].is_outlier=false;
		clRec[uid].Seqlist.clear();
		clRec[uid].NNlist.clear();
	}
}

void MergeSeqs(int id1, int id2, int mergeid, ClustRec *clRec)
{
	ProbString pr1,pr2,al1,al2;
	int *ktab;
	
	if (id1 < SeqNum) 
	{
		pr1.FromString(SeqStrs[id1]); 
		ktab=Global::kmer->KmerCopy(KmerTabs[id1],Freq[id1]);
	}
	else 
	{
		pr1=ProbSeqs[id1-SeqNum];
		ktab=Global::kmer->KmerCopy(KmerTabs[id1],1);
	}
	if (id2 < SeqNum) 
	{
		pr2.FromString(SeqStrs[id2]); 
		Global::kmer->KmerAdd(ktab,KmerTabs[id2],Freq[id2]);
	}
	else 
	{
		pr2=ProbSeqs[id2-SeqNum];
		Global::kmer->KmerAdd(ktab,KmerTabs[id2],1);
	}
	KmerTabs[mergeid]=ktab;
	clRec[mergeid].NumSeqs=clRec[id1].NumSeqs+clRec[id2].NumSeqs;
	clRec[mergeid].fromid1=id1;
	clRec[mergeid].fromid2=id2;
	
	Global::needle->Align(pr1,pr2,al1,al2,MAX_DIST);
	float wt1,wt2;
	wt1=(clRec[id1].NumSeqs+0.0)/clRec[mergeid].NumSeqs;
	wt2=(clRec[id2].NumSeqs+0.0)/clRec[mergeid].NumSeqs;
	al1*=wt1;
	al2*=wt2;
	al1 +=al2;
	
	if (clRec[mergeid].NumSeqs > 100.0/Global::ErrorRate) 
		al1.Rectify(Global::ErrorRate);
	ProbSeqs[mergeid-SeqNum]=al1;
}

// Creates the PBP tree and merges the bottom-layered nodes
void DoPreCluster(TreeClust &treeclust,ClustRec *clRec, float clusterlevel)
{
	Tree *node;
	set<unsigned int>::iterator itr;
	
	node=treeclust.AddSeq(0);
	clRec[0].Node=node;
	
	
	double w1=omp_get_wtime();
	int procseq=1;
	Tree **TParent=(Tree **)Malloc(SeqNum*sizeof(Tree *));
	for (int i=0;i <SeqNum;i++)
	{
		TParent[i]=NULL;
		clRec[i].NumSeqs=Freq[i];
		clRec[i].Seqlist.insert(i);
	}

	
	int num_proc;
	
	#pragma omp parallel
	{
		num_proc=omp_get_num_threads();
	}

	
	#pragma omp parallel for schedule(dynamic) ordered shared(procseq)
	for (int i=1;i <SeqNum;i++)
	{
		int myproc;
		#pragma omp flush(procseq)
		myproc=procseq;
		TParent[i]=treeclust.FindInsertParent(i);
		#pragma omp ordered
		{
			if (TParent[i]->BottomLevel())  // the node is merged with existing branch, no further comparison needed
			{
				node=treeclust.AddSeqAt(i,TParent[i]);
				clRec[i].Node=node;
			}
			else
			{
				bool hasCl=false;
				for (int j=myproc+1;j<i;j++)
				{
					if (TParent[i]==TParent[j])     // node i may be potentially merged with node j
						if (Kdist2Ndist(KmerDist(i,j)) < TParent[i]->FirstChild()->GetThres())
						{
							node=treeclust.AddSeqFrom(i,j,TParent[i]);
							clRec[i].Node=node;
							hasCl=true;
							Tree *par=node->GetParent();
							while (par->NumChildren()==1 && par->GetParent() !=NULL)
								par=par->GetParent();
							TParent[i]=node->GetParent();
							break;
						}
				}
				if (!hasCl)
				{
					node=treeclust.AddSeqAt(i,TParent[i]);
					clRec[i].Node=node;
				}
			}
			procseq=i;
		}
	}
	
	
	double w2=omp_get_wtime();
	printf("PreCluster Tree Created with %.6lf secs AL %ld KM %ld\n", w2-w1,NumAl,NumKmer);
	// Merge bottom nodes and create prob-sequences

	ClsTop=SeqNum;
	NumCls=SeqNum;


	vector<Tree *> tvec;

	treeclust.ListBottom(tvec);
	vector<int>::size_type tvecsize = tvec.size();
	
	#pragma omp parallel
	{
		ProbString TCenter;
		ProbString al1,al2;
		int *ktab;
		char buf[65535];
		vector<Tree *> tchild;
		vector<Tree *>::iterator it2;
		set<unsigned int> mylist;
		
		#pragma omp for schedule(dynamic) ordered
		for (int i=0;i<tvecsize;i++)
		{
			mylist.clear();
			
			Tree *cur=tvec[i];
			tchild.clear();
			cur->ListLeaf(tchild);
			int NumLeaf=tchild.size();
			if (NumLeaf >1)
			{
				it2=tchild.begin();
				TCenter.FromString(SeqStrs[(*it2)->UID]);
				float wt1,wt2;
				int totseq=Freq[(*it2)->UID];
				
				ktab=Global::kmer->KmerCopy(KmerTabs[(*it2)->UID],Freq[(*it2)->UID]);
				//clRec[(*it2)->UID].clsid=ClsTop;
				//clRec[ClsTop].Seqlist.insert((*it2)->UID);
				mylist.insert((*it2)->UID);
				clRec[(*it2)->UID].Seqlist.clear();
				it2++;
				
				for (;it2!=tchild.end();it2++)
				{
					Tree *leaf=*it2;
					mylist.insert(leaf->UID);
					clRec[leaf->UID].Seqlist.clear();
					clRec[leaf->UID].Node=NULL;
					//clRec[leaf->UID].clsid=ClsTop;
					Global::kmer->KmerAdd(ktab,KmerTabs[leaf->UID],Freq[leaf->UID]);
					wt1=(totseq+0.0)/(totseq+Freq[leaf->UID]);
					wt2=(Freq[leaf->UID]+0.0)/(totseq+Freq[leaf->UID]);
					Global::needle->Align(SeqStrs[leaf->UID],TCenter,buf,al1,2*clusterlevel);
					al1*=wt1;
					al2.FromString(buf);
					al2*=wt2;
					al1+=al2;
					TCenter=al1;
					totseq+=Freq[leaf->UID];	
				}
				#pragma omp ordered
				{
					KmerTabs[ClsTop]=ktab;
					ProbSeqs[ClsTop-SeqNum]=TCenter;
					clRec[ClsTop].Node=treeclust.Condense(cur,ClsTop);
					clRec[ClsTop].NumSeqs=totseq;
					clRec[ClsTop].Seqlist.clear();
					clRec[ClsTop].Seqlist.insert(mylist.begin(),mylist.end());
					for (it2=tchild.begin();it2!=tchild.end();it2++)
						clRec[(*it2)->UID].clsid=ClsTop;
					ClsTop++;
					NumCls-=(NumLeaf-1);
				}
			}
		}
	}
	printf("%d Clusters Generated\n",NumCls);
	
}


int CompPntRec(const void * a, const void * b)
{
	PointRec *Pnt1=(PointRec *)a;
	PointRec *Pnt2=(PointRec *)b;
	if (Pnt1->dist < Pnt2->dist) return -1;
	if (Pnt1->dist > Pnt2->dist) return 1;
	
	return 0;	
}

int CompPairRec(const void * a, const void * b)
{
	PairRec *Pnt1=(PairRec *)a;
	PairRec *Pnt2=(PairRec *)b;
	if (Pnt1->dist < Pnt2->dist) return -1;
	if (Pnt1->dist > Pnt2->dist) return 1;
	
	return 0;	
}

void DoCluster(float clusterlevel,fstream &flist,fstream &fstat,fstream &fgroup,fstream &ftree)
{

	int num_proc;
	fstat.precision(5);
	
	#pragma omp parallel
	{
		num_proc=omp_get_num_threads();
	}
 
	ClRec=(ClustRec *)Malloc(2*SeqNum*sizeof(ClustRec));
	NNRec=(PairRec *)Malloc(SeqNum*sizeof(PairRec));
	UpdRec=(PointRec *)Malloc(SeqNum*sizeof(PointRec));
	int nnhead=0;
	int nntail=0;
	MAX_DIST=Global::level_max+Global::level_step;
	
	ProbSeqs= new ProbString[SeqNum];
	TreeClust treeclust(max(clusterlevel,Global::level_min/Global::level_inc),Global::level_max);
	InitRecords(ClRec,2*SeqNum);
	printf("Starting PreCluster\n");
	double w1=omp_get_wtime();
	DoPreCluster(treeclust,ClRec,clusterlevel);
	double w2=omp_get_wtime();
	printf("PreClustering Finished with %.6lf secs AL %ld KM %ld\n", w2-w1,NumAl,NumKmer);
	
	for (int i=0; i<SeqNum;i++)
	{
		if (ClRec[i].clsid >0)
			ftree << SeqID[i] << "\t -1 \t" << ClRec[i].clsid << "\t" << fixed << clusterlevel<<endl;
	}
	
	printf("Starting Find NN\n");
	
	w1=omp_get_wtime();

	#pragma omp parallel for schedule(dynamic)
	for (int i=0;i<ClsTop;i++)
	{
		if (ClRec[i].clsid <0)
			treeclust.EstimateNN(i,ClRec);       //pre-estimate NN distance using the PBP tree structure, to save NN searching time
	}
	
	#pragma omp parallel for schedule(dynamic)
	for (int i=0;i<ClsTop;i++)
	{
		if (ClRec[i].clsid <0)
		{
			treeclust.FindNN(i,ClRec,clusterlevel);
			ClRec[i].tagscan=1;
		}
	}
	w2=omp_get_wtime();
	printf("FindNN Finished with %.6lf secs AL %ld KM %ld\n", w2-w1,NumAl,NumKmer);
	
	set<unsigned int>::iterator itr;
	
	float currentlevel=clusterlevel+Global::level_step/2;   //restricts the distances of mergable pairs
	
	w1=omp_get_wtime();
	
	for (int i=0;i<ClsTop;i++)
	{
		if (ClRec[i].clsid<0 && ClRec[i].NNseq <0)            // remove isolated nodes to save search time
			{
				ClRec[i].is_outlier=true;
				treeclust.RemoveNode(ClRec[i].Node);
				ClRec[i].Node=NULL;
				continue;
			}
		if (ClRec[i].clsid <0 && ClRec[i].NNlist.find(ClRec[i].NNseq) == ClRec[i].NNlist.end())  //perform tie-breaking to gain more mergable pairs
		{
			for (itr=ClRec[i].NNlist.begin();itr != ClRec[i].NNlist.end();itr++)
			{
				if (ClRec[*itr].clsid >=0)
				{
					fprintf(stderr,"NN list error here\n");
				}
				if (ClRec[*itr].NNdist <= ClRec[i].NNdist)
				{
					if (ClRec[i].NNseq >=0)
						ClRec[ClRec[i].NNseq].NNlist.erase(i);
					ClRec[i].NNseq=*itr;
					ClRec[*itr].NNlist.insert(i);
					break;
				}
			}
		}
	}
	
	for (int i=0;i<ClsTop;i++)
	{
		if (ClRec[i].is_outlier || ClRec[i].clsid >=0) continue;
		if (ClRec[i].NNseq >=0  && ClRec[i].NNdist <= clusterlevel+Global::level_step)    // using a higher distance level for the first iteration
		{
			if (ClRec[ClRec[i].NNseq].NNseq ==i && ClRec[i].NNseq >i)     //enroll all mergable pairs
			{
				NNRec[nntail].id1=i;
				NNRec[nntail].id2=ClRec[i].NNseq;
				NNRec[nntail].dist=ClRec[i].NNdist;
				nntail++;
			}
		}
	}
	
	printf("Starting Clustering\n");
	fflush(stdout);
	
	MinHeap waitHeap;
	
	int roundcnt=0;
	float checkdist=0.0;
	
	do{
		//int updptr=0;
		int i;
		roundcnt++;
		
		for (i=nnhead;i<nntail;i++)
		{
			// merge sequences and create a new node
						
			int myid=ClsTop+(i-nnhead);
			MergeSeqs(NNRec[i].id1,NNRec[i].id2,myid, ClRec);
			if (!ClRec[NNRec[i].id1].is_outlier)
				treeclust.RemoveNode(ClRec[NNRec[i].id1].Node);
			if (!ClRec[NNRec[i].id2].is_outlier)
				treeclust.RemoveNode(ClRec[NNRec[i].id2].Node);
			ClRec[NNRec[i].id1].Node=NULL;
			ClRec[NNRec[i].id2].Node=NULL;
			ClRec[NNRec[i].id1].clsid=myid;
			ClRec[NNRec[i].id2].clsid=myid;
			
			Tree *node=treeclust.AddSeq(myid);
			ClRec[myid].Node=node;
			waitHeap.Add((void *)(&ClRec[myid]),NNRec[i].dist);
			
			//enroll the inverse NN list of id1 to the wait list
			for (itr=ClRec[NNRec[i].id1].NNlist.begin(); itr !=ClRec[NNRec[i].id1].NNlist.end();itr++)
			{
				if (*itr==NNRec[i].id2) continue;
				float olddist=ClRec[*itr].NNdist;
				int nseq=ClRec[*itr].NNseq2;
				if(nseq >=0 && ClRec[nseq].clsid <0)
				{
					ClRec[*itr].NNseq=nseq;
					ClRec[*itr].NNdist=ClRec[*itr].NNdist2;
					ClRec[nseq].NNlist.insert(*itr);
				}
				else
				{
					ClRec[*itr].NNseq=-1;
					ClRec[*itr].NNdist=MAX_DIST;
				}
				ClRec[*itr].NNseq2=-1;
				ClRec[*itr].NNdist2=MAX_DIST;
				ClRec[*itr].tagscan=0;
				waitHeap.Add((void *)(&ClRec[*itr]),olddist);
				
				// estimate possible NN dists for new node
				float rdist,cdist;
				if (Global::cache->Get(*itr,NNRec[i].id2,cdist))
				{
					rdist=(ClRec[NNRec[i].id1].NumSeqs*ClRec[*itr].NNdist+ClRec[NNRec[i].id2].NumSeqs*cdist)/(ClRec[NNRec[i].id1].NumSeqs+ClRec[NNRec[i].id2].NumSeqs);
					Global::cache->Store(myid,*itr,rdist);
					if (rdist < ClRec[*itr].NNdist)
					{
						if (ClRec[*itr].NNseq >=0)
						{
							ClRec[ClRec[*itr].NNseq].NNlist.erase(*itr);
						}
						ClRec[*itr].NNseq2=ClRec[*itr].NNseq;
						ClRec[*itr].NNdist2=ClRec[*itr].NNdist;
						ClRec[*itr].NNdist=rdist;
						ClRec[*itr].NNseq=myid;
						ClRec[myid].NNlist.insert(*itr);
					}
					else if (rdist <ClRec[*itr].NNdist2)
					{
						ClRec[*itr].NNdist2=rdist;
						ClRec[*itr].NNseq2=myid;
					}
				
					if (rdist <ClRec[myid].NNdist)
					{
						if (ClRec[myid].NNseq >=0)
						{
							ClRec[ClRec[myid].NNseq].NNlist.erase(myid);
						}
						ClRec[myid].NNseq2=ClRec[myid].NNseq;
						ClRec[myid].NNdist2=ClRec[myid].NNdist;
						ClRec[myid].NNdist=rdist;
						ClRec[myid].NNseq=*itr;
						ClRec[*itr].NNlist.insert(myid);
					}
					else if (rdist < ClRec[myid].NNdist2)
					{
						ClRec[myid].NNdist2=rdist;
						ClRec[myid].NNseq2=*itr;
					}
				}
			}
			
			//enroll the inverse NN list of id2 to the wait list
			for (itr=ClRec[NNRec[i].id2].NNlist.begin(); itr !=ClRec[NNRec[i].id2].NNlist.end();itr++)
			{
				if (*itr==NNRec[i].id1) continue;
				float olddist=ClRec[*itr].NNdist;
				int nseq=ClRec[*itr].NNseq2;
				if(nseq >=0 && ClRec[nseq].clsid <0)
				{
					ClRec[*itr].NNseq=nseq;
					ClRec[*itr].NNdist=ClRec[*itr].NNdist2;
					ClRec[nseq].NNlist.insert(*itr);
				}
				else
				{
					ClRec[*itr].NNseq=-1;
					ClRec[*itr].NNdist=MAX_DIST;
				}
				ClRec[*itr].NNseq2=-1;
				ClRec[*itr].NNdist2=MAX_DIST;
				ClRec[*itr].tagscan=0;
				waitHeap.Add((void *)(&ClRec[*itr]),olddist);
				
				// estimate possible NN dists for new node
				float rdist,cdist;
				if (Global::cache->Get(*itr,NNRec[i].id1,cdist))
				{
					rdist=(ClRec[NNRec[i].id2].NumSeqs*ClRec[*itr].NNdist+ClRec[NNRec[i].id1].NumSeqs*cdist)/(ClRec[NNRec[i].id1].NumSeqs+ClRec[NNRec[i].id2].NumSeqs);
					Global::cache->Store(myid,*itr,rdist);
					if (rdist < ClRec[*itr].NNdist)
					{
						if (ClRec[*itr].NNseq >=0)
						{
							ClRec[ClRec[*itr].NNseq].NNlist.erase(*itr);
						}
						ClRec[*itr].NNseq2=ClRec[*itr].NNseq;
						ClRec[*itr].NNdist2=ClRec[*itr].NNdist;
						ClRec[*itr].NNdist=rdist;
						ClRec[*itr].NNseq=myid;
						ClRec[myid].NNlist.insert(*itr);
					}
					else if (rdist <ClRec[*itr].NNdist2)
					{
						ClRec[*itr].NNdist2=rdist;
						ClRec[*itr].NNseq2=myid;
					}
				
					if (rdist <ClRec[myid].NNdist)
					{
						if (ClRec[myid].NNseq >=0)
						{
							ClRec[ClRec[myid].NNseq].NNlist.erase(myid);
						}
						ClRec[myid].NNseq2=ClRec[myid].NNseq;
						ClRec[myid].NNdist2=ClRec[myid].NNdist;
						ClRec[myid].NNdist=rdist;
						ClRec[myid].NNseq=*itr;
						ClRec[*itr].NNlist.insert(myid);
					}
					else if (rdist < ClRec[myid].NNdist2)
					{
						ClRec[myid].NNdist2=rdist;
						ClRec[myid].NNseq2=*itr;
					}
				}
			}
			
		}
		//printf("Merge %d Seqs Finished %d Pairs Queued\n", nntail-nnhead,waitHeap.Size());
		ClsTop+=nntail-nnhead;
		NumCls-=nntail-nnhead;
		nnhead=nntail;

		//printf("Start Merging Clusters\n");
		
		
		int updptr=0;
		ClustRec *curRec;
		bool volatile isEmpty=waitHeap.Empty();
		
		while (!isEmpty) // && waitHeap.Top() < currentlevel)    // pop-up a given number of pairs from wait list for NN finding
		{
			checkdist=waitHeap.Pop((void *&)curRec);            // the pop-up value for the last pair is a safe estimation of mergable distance
			if (curRec->clsid <0 && curRec->tagscan==0)
			{
				curRec->tagscan=1;
				UpdRec[updptr].ID=curRec->ID;
				UpdRec[updptr].dist=checkdist;
				updptr++;
			}
			isEmpty=waitHeap.Empty();
			if (updptr >=num_proc*100)                         //restrict the number of updating pairs
				break;
		}

		if (verbose)
			printf ("Finding NNs for %d Seqs\n",updptr);
		
		#pragma omp parallel for schedule(dynamic)
		for (int ptr=0;ptr<updptr;ptr++)
		{
			treeclust.FindNN(UpdRec[ptr].ID,ClRec,clusterlevel);
		}
		
		
		for (int i=0;i<ClsTop;i++)
		{
			if (ClRec[i].is_outlier || ClRec[i].clsid>=0 || ClRec[i].tagscan==0) continue;  // skip unqualified seqs
			if (ClRec[i].NNseq <0)   //remove isolated nodes
			{
				ClRec[i].is_outlier=true;
				treeclust.RemoveNode(ClRec[i].Node);
				ClRec[i].Node=NULL;
				continue;
			}
			if (ClRec[i].NNlist.find(ClRec[i].NNseq) == ClRec[i].NNlist.end())  //perform tie-breaking to gain more mergable pairs
			{
				for (itr=ClRec[i].NNlist.begin();itr != ClRec[i].NNlist.end();itr++)  
				{
					if (ClRec[*itr].clsid >=0)
					{
						fprintf(stderr,"NN list error here\n");
					}
					if (ClRec[*itr].NNdist <= ClRec[i].NNdist)
					{
						if (ClRec[i].NNseq >=0)
							ClRec[ClRec[i].NNseq].NNlist.erase(i);
						ClRec[i].NNseq=*itr;
						ClRec[*itr].NNlist.insert(i);
						break;
					}
				}
			}
		}
	
		for (int i=0;i<ClsTop;i++)
		{
			if (ClRec[i].clsid>=0) continue;
			if (ClRec[i].NNseq >=0 && (isEmpty || ClRec[i].NNdist <=checkdist*0.95f) && ClRec[i].NNdist <= currentlevel)
			// pick up the safely-mergable pairs and enroll them to the mergable list
			{
				if (ClRec[i].tagscan==0 || ClRec[ClRec[i].NNseq].tagscan==0)
					continue;
				if (ClRec[ClRec[i].NNseq].NNseq ==i && ClRec[i].NNseq >i && ClRec[i].clsid <0)
				{
					NNRec[nntail].id1=i;
					NNRec[nntail].id2=ClRec[i].NNseq;
					NNRec[nntail].dist=ClRec[i].NNdist;
					nntail++;
				}
			}
		}
		if (verbose) 
		{
			w2=omp_get_wtime();
			printf("Level= %.3lf; %d clusters left NumAL %ld NumKmer %ld time %.3lf\n",checkdist,NumCls,NumAl, NumKmer,w2-w1);
		}
		else
		{
			printf(".");
			if (roundcnt % 80==0) 
				printf("\n");
			fflush(stdout);
		}
		if (nntail - nnhead < num_proc)   // increase threshold if too few pairs are available
			currentlevel +=Global::level_step/2;
	}while (!waitHeap.Empty() || nntail >nnhead || currentlevel <=Global::level_max);
	
	printf("\nGenerating Outputs\n");
	fflush(stdout);
	
	float levelrep=Global::level_min;
	fgroup.precision(3);
	flist.precision(3);
	fstat.precision(3);
	int i;

	for (i=0;i<nntail;i++)
	{
		NNRec[i].clstag=0;
		int mu1=NNRec[i].id1 < SeqNum? SeqID[NNRec[i].id1] : NNRec[i].id1;
		int mu2=NNRec[i].id2 < SeqNum? SeqID[NNRec[i].id2] : NNRec[i].id2;
		ftree << mu1 << "\t" << mu2 << "\t" << ClRec[NNRec[i].id1].clsid << "\t" << fixed << NNRec[i].dist <<endl;

	}
	do{
		for (i=0;i<nntail;i++)
		{
			if (NNRec[i].clstag ==0 && NNRec[i].dist <=levelrep)
			{
				if (ClRec[NNRec[i].id1].clsid != ClRec[NNRec[i].id2].clsid)
					fprintf(stderr,"Warning: Wrong Cluster Hierarchy!\n");
				if (!(ClRec[NNRec[i].id1].Seqlist.empty() || ClRec[NNRec[i].id2].Seqlist.empty()))
				{
					int mergeid=ClRec[NNRec[i].id1].clsid;
					ClRec[mergeid].Seqlist.insert(ClRec[NNRec[i].id1].Seqlist.begin(),ClRec[NNRec[i].id1].Seqlist.end());
					ClRec[mergeid].Seqlist.insert(ClRec[NNRec[i].id2].Seqlist.begin(),ClRec[NNRec[i].id2].Seqlist.end());
					ClRec[NNRec[i].id1].Seqlist.clear();
					ClRec[NNRec[i].id2].Seqlist.clear();
					NNRec[i].clstag=1;
				}
			}
		}
		int Clscnt=0;
		fgroup << fixed << levelrep << " |";
		fstat << fixed << levelrep << " ";
		for (i=0;i<ClsTop;i++)
		{
			if (!ClRec[i].Seqlist.empty())
			{
				Clscnt++;
				fstat << fixed << ClRec[i].NumSeqs<<" ";
				for (itr=ClRec[i].Seqlist.begin();itr!=ClRec[i].Seqlist.end();itr++)
				{
					if (itr !=ClRec[i].Seqlist.begin())
						fgroup <<" ";
					fgroup << SeqID[*itr];
				}
				fgroup << "|";
			}
		}
		
		fstat << endl;
		fgroup <<endl;	

		flist << fixed << levelrep << " " << Clscnt << endl;
		printf("Level %.3f\t OTUs %d\n",levelrep,Clscnt);
		levelrep+=Global::level_step;
	}while (levelrep <=Global::level_max);
	printf("NumAl %ld NumKmer %ld\n",NumAl, NumKmer);
}



