#Syllabifies force-aligned segmented corpus data. Three loops are run here. In the first, any consonant in the
#list "onset" preceding a vowel in the list "nuclei" is combined with that vowel. This is an onset-vowel rule.
#In the second, any consonant in the list "coda", when preceded by a vowel and followed by another consonant, 
#is combined with its preceding (C)V unit. This is a coda rule. In the last, all consonants in the list "C1" 
#are combined with those (now syllables) which begin with a consonant in the list "C2." This is a complex
#onset rule. The user needs to specify, as simple text documents, the list of all possible consonants that
#can be syllabified with a following vowel, the list of all possible nuclei, the list of possible codas, a
#list of C1 consonants part of a C1C2 onset cluster, and a list of consonants C2 part of a C1C2 onset cluster.
#Right now, this script will resyllabify across word boundaries.

#Copyright Christian DiCanio, University at Buffalo, 2021.

form Syllabify in Textgrid
   sentence Directory_name: /Users/cdicanio/Research_programs/Christians_praat_scripts/Syllabifier_for_Spanish_full/testing/
   positive Segment_tier_number 2
   sentence Onset_file_name: onsets.txt
   sentence Nuclei_file_name: nuclei.txt
   sentence Coda_file_name: codas.txt
   sentence C1_file_name: c1.txt
   sentence C2_file_name: c2.txt
   sentence Silence_demarcator sp
   positive Target_tier_number 3 
   sentence Target_tier_name syllables
endform

Create Strings as file list... list 'directory_name$'/*.TextGrid
number_files = Get number of strings
for ifile to number_files
	select Strings list
	fileName$ = Get string... ifile
	Read from file... 'directory_name$'/'fileName$'
	Duplicate tier: segment_tier_number, target_tier_number, target_tier_name$
 	tgID = selected("TextGrid")
	num_labels = Get number of intervals... target_tier_number

#Reading in external text files with possible onset and nuclei.
	Read Strings from raw text file... 'directory_name$''onset_file_name$'
	onsets = selected("Strings")
	lenons = Get number of strings
	Read Strings from raw text file... 'directory_name$''nuclei_file_name$'
	nuclei = selected("Strings")
	lennuc = Get number of strings
	Read Strings from raw text file... 'directory_name$''coda_file_name$'
	codas = selected("Strings")
	lencodas = Get number of strings
	Read Strings from raw text file... 'directory_name$''c1_file_name$'
	c1 = selected("Strings")
	lenc1 = Get number of strings
	Read Strings from raw text file... 'directory_name$''c2_file_name$'
	c2 = selected("Strings")
	lenc2 = Get number of strings

	#The loop which does the combining of onsets and nuclei
	for k to num_labels
		select 'tgID'
		label$ = Get label of interval... target_tier_number k
			
		for m to lenons
				select 'onsets'
					text$ [m] = Get string... m
					if label$ = text$ [m]
					select 'tgID'
					label_p$ = Get label of interval... target_tier_number k
					label_f$ = Get label of interval... target_tier_number (k+1)
					label_comb$ = label_p$+label_f$

						for n to lennuc
							select 'nuclei'
								text$ [n] = Get string... n
								if label_f$ = text$ [n]
									select 'tgID'
									Remove right boundary: target_tier_number, k
									Set interval text: target_tier_number, k, label_comb$
								else
									#nada
								endif
						endfor
					else
						#nada
					endif

		#This next part is crucial; as we are reducing the total number of intervals, we have to recalculate
		#the number after every cycle. Otherwise, the number of intervals is larger than those now found
		#in the textgrid.

		select 'tgID'
		num_labels = Get number of intervals... target_tier_number
		endfor
	endfor

	#The loop which does the combining of nuclei and codas
	for p to num_labels
		select 'tgID'
		label$ = Get label of interval... target_tier_number p
			
		for q to lencodas
				select 'codas'
					text$ [q] = Get string... q
					if label$ = silence_demarcator$
						#do nothing
					elsif label$ = text$ [q]
					select 'tgID'
					label_p2$ = Get label of interval... target_tier_number p
					label_f2$ = Get label of interval... target_tier_number (p+1)
					label_f3$ = left$(label_f2$, 1)

						for r to lenons
							select 'onsets'
								text$ [r] = Get string... r
								if label$ = text$ [r] 
									select 'tgID'
									Remove left boundary: target_tier_number, p
									#Set interval text: target_tier_number, p, label_comb2$
								else
									#nada
								endif
						endfor
					else
						#nada
					endif

		#This next part is crucial; as we are reducing the total number of intervals, we have to recalculate
		#the number after every cycle. Otherwise, the number of intervals is larger than those now found
		#in the textgrid.

		select 'tgID'
		num_labels = Get number of intervals... target_tier_number
		endfor
	endfor

	#The loop which does the combining complex onsets
	for x to num_labels
		select 'tgID'
		label$ = Get label of interval... target_tier_number x
			
		for y to lenc1
				select 'c1'
					text$ [y] = Get string... y
					if label$ = silence_demarcator$
						#do nothing
					elsif label$ = text$ [y]
					select 'tgID'
					label_f2$ = Get label of interval... target_tier_number (x+1)
					label_c2$ = left$(label_f2$, 1)

						for z to lenc2
							select 'c2'
								text$ [z] = Get string... z
								if label_c2$ = text$ [z] 
									select 'tgID'
									Remove right boundary: target_tier_number, x
								else
									#nada
								endif
						endfor
					else
						#nada
					endif

		#This next part is crucial; as we are reducing the total number of intervals, we have to recalculate
		#the number after every cycle. Otherwise, the number of intervals is larger than those now found
		#in the textgrid.

		select 'tgID'
		num_labels = Get number of intervals... target_tier_number
		endfor
	endfor

	select tgID
	lengthFN = length (fileName$)
	newfilename$ = left$ (fileName$, lengthFN-9)
	Save as text file: "'directory_name$'/'newfilename$'_syllabified.TextGrid"
endfor