#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# ace-to-sam.py
# MacVector
#
# Created by B. Kevin Hardman on 12/28/11
# Copyright © 2011 MacVector, Inc. All rights reserved.

#
# aceToSam
# Copyright (C) 2009 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#

__author__ = 'Plateforme bioinformatique Midi Pyrenees'
__copyright__ = 'Copyright (C) 2010 INRA'
__license__ = 'GNU General Public License'
__version__ = '1.0'
__email__ = 'support.genopole@toulouse.inra.fr'
__status__ = 'beta'

# Copyright 2004 by Frank Kauff and Cymon J. Cox.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""
Parser for ACE files output by PHRAP.

Written by Frank Kauff (fkauff@duke.edu) and
Cymon J. Cox (cymon@duke.edu)

Uses the Biopython Parser interface: ParserSupport.py

Usage:

There are two ways of reading an ace file:
1) The function 'read' reads the whole file at once;
2) The function 'parse' reads the file contig after contig.

1) Parse whole ace file at once:

		from Bio.Sequencing import Ace
		acefilerecord=Ace.read(open('my_ace_file.ace'))

This gives you:
		acefilerecord.ncontigs (the number of contigs in the ace file)
		acefilerecord.nreads (the number of reads in the ace file)
		acefilerecord.contigs[] (one instance of the Contig class for each contig)

The Contig class holds the info of the CO tag, CT and WA tags, and all the reads used
for this contig in a list of instances of the Read class, e.g.:

		contig3=acefilerecord.contigs[2]
		read4=contig3.reads[3]
		RD_of_read4=read4.rd
		DS_of_read4=read4.ds

CT, WA, RT tags from the end of the file can appear anywhere are automatically
sorted into the right place.

see _RecordConsumer for details.

2) Or you can iterate over the contigs of an ace file one by one in the ususal way:

		from Bio.Sequencing import Ace
		contigs=Ace.parse(open('my_ace_file.ace'))
		for contig in contigs:
			print contig.name
			...

Please note that for memory efficiency, when using the iterator approach, only one
contig is kept in memory at once.  However, there can be a footer to the ACE file
containing WA, CT, RT or WR tags which contain additional meta-data on the contigs.
Because the parser doesn't see this data until the final record, it cannot be added to
the appropriate records.  Instead these tags will be returned with the last contig record.
Thus an ace file does not entirerly suit the concept of iterating. If WA, CT, RT, WR tags
are needed, the 'read' function rather than the 'parse' function might be more appropriate.
"""


class rd(object):
	"""RD (reads), store a read with its name, sequence etc.

	The location and strand each read is mapped to is held in the AF lines.
	"""
	def __init__(self):
		self.name=''
		self.padded_bases=None
		self.info_items=None
		self.read_tags=None
		self.sequence=''

class qa(object):
	"""QA (read quality), including which part if any was used as the consensus."""
	def __init__(self, line=None):
		self.qual_clipping_start=None
		self.qual_clipping_end=None
		self.align_clipping_start=None
		self.align_clipping_end=None
		if line:
			header=list(map(eval,line.split()[1:]))
			self.qual_clipping_start=header[0]
			self.qual_clipping_end=header[1]
			self.align_clipping_start=header[2]
			self.align_clipping_end=header[3]

class ds(object):
	"""DS lines, include file name of a read's chromatogram file."""
	def __init__(self, line=None):
		self.chromat_file=''
		self.phd_file=''
		self.time=''
		self.chem=''
		self.dye=''
		self.template=''
		self.direction=''
		if line:
			tags=['CHROMAT_FILE','PHD_FILE','TIME','CHEM','DYE','TEMPLATE','DIRECTION']
			poss=list(map(line.find,tags))
			tagpos=dict(zip(poss,tags))
			if -1 in tagpos:
				del tagpos[-1]
			ps=sorted(tagpos)
			for (p1,p2) in zip(ps,ps[1:] + [len(line) + 1]):
				setattr(self,tagpos[p1].lower(),line[p1 + len(tagpos[p1]) + 1:p2].strip())

class af(object):
	"""AF lines, define the location of the read within the contig.

	Note attribute coru is short for complemented (C) or uncomplemented (U),
	since the strand information is stored in an ACE file using either the
	C or U character.
	"""
	def __init__(self, line=None):
		self.name=''
		self.coru=None
		self.padded_start=None
		if line:
			header = line.split()
			self.name = header[1]
			self.coru = header[2]
			self.padded_start = int(header[3])

class bs(object):
	""""BS (base segment), which read was chosen as the consensus at each position."""
	def __init__(self, line=None):
		self.name=''
		self.padded_start=None
		self.padded_end=None
		if line:
			header = line.split()
			self.padded_start = int(header[1])
			self.padded_end = int(header[2])
			self.name = header[3]

class rt(object):
	"""RT (transient read tags), generated by crossmatch and phrap."""
	def __init__(self, line=None):
		self.name=''
		self.tag_type=''
		self.program=''
		self.padded_start=None
		self.padded_end=None
		self.date=''
		self.comment=[]
		if line:
			header=line.split()
			self.name=header[0]
			self.tag_type=header[1]
			self.program=header[2]
			self.padded_start=int(header[3])
			self.padded_end=int(header[4])
			self.date=header[5]

class ct(object):
	"""CT (consensus tags)."""
	def __init__(self, line=None):
		self.name=''
		self.tag_type=''
		self.program=''
		self.padded_start=None
		self.padded_end=None
		self.date=''
		self.notrans=''
		self.info=[]
		self.comment=[]
		if line:
			header=line.split()
			self.name = header[0]
			self.tag_type = header[1]
			self.program = header[2]
			self.padded_start = int(header[3])
			self.padded_end = int(header[4])
			self.date = header[5]
			if len(header)==7:
				self.notrans = header[6]

class wa(object):
	"""WA (whole assembly tag), holds the assembly program name, version, etc."""
	def __init__(self, line=None):
		self.tag_type=''
		self.program=''
		self.date=''
		self.info=[]
		if line:
			header = line.split()
			self.tag_type = header[0]
			self.program = header[1]
			self.date = header[2]

class wr(object):
	"""WR lines."""
	def __init__(self, line=None):
		self.name=''
		self.aligned=''
		self.program=''
		self.date=[]
		if line:
			header = line.split()
			self.name = header[0]
			self.aligned = header[1]
			self.program = header[2]
			self.date = header[3]

class Reads(object):
	"""Holds information about a read supporting an ACE contig."""
	def __init__(self, line=None):
		self.rd=None	# one per read
		self.qa=None	# one per read
		self.ds=None	# none or one per read
		self.rt=None	# none or many per read
		self.wr=None	# none or many per read
		if line:
			self.rd = rd()
			header = line.split()
			self.rd.name = header[1]
			self.rd.padded_bases = int(header[2])
			self.rd.info_items = int(header[3])
			self.rd.read_tags = int(header[4])

class Contig(object):
	"""Holds information about a contig from an ACE record."""
	def __init__(self, line=None):
		self.name = ''
		self.nbases=None
		self.nreads=None
		self.nsegments=None
		self.uorc=None
		self.sequence=""
		self.quality=[]
		self.af=[]
		self.bs=[]
		self.reads=[]
		self.ct=None	# none or many
		self.wa=None	# none or many
		if line:
			header = line.split()
			self.name = header[1]
			self.nbases = int(header[2])
			self.nreads = int(header[3])
			self.nsegments = int(header[4])
			self.uorc = header[5]

def parse(handle):
	"""parse(handle)

	where handle is a file-like object.

	This function returns an iterator that allows you to iterate
	over the ACE file record by record:

		records = parse(handle)
		for record in records:
			# do something with the record

	where each record is a Contig object.
	"""

	handle = iter(handle)

	line = ""
	while True:
		# at beginning, skip the AS and look for first CO command
		try:
			while True:
				if line.startswith('CO'):
					break
				line = next(handle)
		except StopIteration:
			return

		record = Contig(line)

		for line in handle:
			line = line.strip()
			if not line:
				break
			record.sequence += line

		for line in handle:
			if line.strip():
				break
		if not line.startswith("BQ"):
			raise ValueError("Failed to find BQ line")

		for line in handle:
			if not line.strip():
				break
			record.quality.extend(list(map(int, line.split())))

		for line in handle:
			if line.strip():
				break

		while True:
			if not line.startswith("AF "):
				break
			record.af.append(af(line))
			try:
				line = next(handle)
			except StopIteration:
				raise ValueError("Unexpected end of AF block")

		while True:
			if line.strip():
				break
			try:
				line = next(handle)
			except StopIteration:
				raise ValueError("Unexpected end of file")

		while True:
			if not line.startswith("BS "):
				break
			record.bs.append(bs(line))
			try:
				line = next(handle)
			except StopIteration:
				raise ValueError("Failed to find end of BS block")

		# now read all the read data
		# it starts with a 'RD', and then a mandatory QA
		# then follows an optional DS
		# CT,RT,WA,WR may or may not be there in unlimited quantity. They might refer to the actual read or contig,
		# or, if encountered at the end of file, to any previous read or contig. the sort() method deals
		# with that later.
		while True:

			# each read must have a rd and qa
			try:
				while True:
					# If I've met the condition, then stop reading the line.
					if line.startswith("RD "):
						break
					line = next(handle)
			except StopIteration:
				raise ValueError("Failed to find RD line")

			record.reads.append(Reads(line))

			for line in handle:
				line = line.strip()
				if not line:
					break
				record.reads[-1].rd.sequence += line

			for line in handle:
				if line.strip():
					break
			if not line.startswith("QA "):
				raise ValueError("Failed to find QA line")
			record.reads[-1].qa = qa(line)

			# now one ds can follow
			for line in handle:
				if line.strip():
					break
			else:
				break

			if line.startswith("DS"):
				record.reads[-1].ds = ds(line)

			for line in handle:
				if line.strip():
					break

			# the file could just end, or there's some more stuff. In ace files, anything can happen.
			# the following tags are interspersed between reads and can appear multiple times.
			while True:
				# something left
				try:
					while True:
						if line.strip():
							break
						line = next(handle)
				except StopIteration:
					# file ends here
					break
				if line.startswith("RT{"):
					# now if we're at the end of the file, this rt could
					# belong to a previous read, not the actual one.
					# we store it here were it appears, the user can sort later.
					if record.reads[-1].rt is None:
						record.reads[-1].rt=[]
					for line in handle:
						line=line.strip()
						#if line=="COMMENT{":
						if line.startswith("COMMENT{"):
							if line[8:].strip():
								#MIRA 3.0.5 would miss the new line out :(
								record.reads[-1].rt[-1].comment.append(line[8:])
							for line in handle:
								line = line.strip()
								if line.endswith("C}"):
									break
								record.reads[-1].rt[-1].comment.append(line)
						elif line=='}':
							break
						else:
							record.reads[-1].rt.append(rt(line))
					line = ""
				elif line.startswith("WR{"):
					if record.reads[-1].wr is None:
						record.reads[-1].wr=[]
					for line in handle:
						line=line.strip()
						if line=='}': break
						record.reads[-1].wr.append(wr(line))
					line = ""
				elif line.startswith("WA{"):
					if record.wa is None:
						record.wa=[]
					try:
						line = next(handle)
					except StopIteration:
						raise ValueError("Failed to read WA block")
					record.wa.append(wa(line))
					for line in handle:
						line=line.strip()
						if line=='}': break
						record.wa[-1].info.append(line)
					line = ""
				elif line.startswith("CT{"):
					if record.ct is None:
						record.ct=[]
					try:
						line = next(handle)
					except StopIteration:
						raise ValueError("Failed to read CT block")
					record.ct.append(ct(line))
					for line in handle:
						line=line.strip()
						if line=="COMMENT{":
							for line in handle:
								line = line.strip()
								if line.endswith("C}"):
									break
								record.ct[-1].comment.append(line)
						elif line=='}':
							break
						else:
							record.ct[-1].info.append(line)
					line = ""
				else:
					break

			if not line.startswith('RD'): # another read?
				break

		yield record

class ACEFileRecord(object):
	"""Holds data of an ACE file.
	"""
	def __init__(self):
		self.ncontigs=None
		self.nreads=None
		self.contigs=[]
		self.wa=None	# none or many

	def sort(self):
		"""Sorts wr, rt and ct tags into the appropriate contig / read instance, if possible.  """

		ct=[]
		rt=[]
		wr=[]
		# search for tags that aren't in the right position
		for i in range(len(self.contigs)):
			c = self.contigs[i]
			if c.wa:
				if not self.wa:
					self.wa=[]
				self.wa.extend(c.wa)
			if c.ct:
				newcts=[ct_tag for ct_tag in c.ct if ct_tag.name!=c.name]
				for x in newcts:
					self.contigs[i].ct.remove(x)
				ct.extend(newcts)
			for j in range(len(c.reads)):
				r = c.reads[j]
				if r.rt:
					newrts=[rt_tag for rt_tag in r.rt if rt_tag.name!=r.rd.name]
					for x in newrts:
						self.contigs[i].reads[j].rt.remove(x)
					rt.extend(newrts)
				if r.wr:
					newwrs=[wr_tag for wr_tag in r.wr if wr_tag.name!=r.rd.name]
					for x in newwrs:
						self.contigs[i].reads[j].wr.remove(x)
					wr.extend(newwrs)
		# now sort them into their proper place
		for i in range(len(self.contigs)):
			c = self.contigs[i]
			for ct_tag in ct:
				if ct_tag.name==c.name:
					if self.contigs[i].ct is None:
						self.contigs[i].ct=[]
					self.contigs[i].ct.append(ct_tag)
			if rt or wr:
				for j in range(len(c.reads)):
					r = c.reads[j]
					for rt_tag in rt:
						if rt_tag.name==r.rd.name:
							if self.contigs[i].reads[j].rt is None:
								self.contigs[i].reads[j].rt=[]
							self.contigs[i].reads[j].rt.append(rt_tag)
					for wr_tag in wr:
						if wr_tag.name==r.rd.name:
							if self.contigs[i].reads[j].wr is None:
								self.contigs[i].reads[j].wr=[]
							self.contigs[i].reads[j].wr.append(wr_tag)

def readACEFile(handle):
	"""Parses the full ACE file in list of contigs."""

	handle = iter(handle)

	record=ACEFileRecord()

	try:
		line = next(handle)
	except StopIteration:
		raise ValueError("Premature end of file")

	# check if the file starts correctly
	if not line.startswith('AS'):
		raise ValueError("File does not start with 'AS'.")

	words = line.split()
	record.ncontigs, record.nreads = list(map(int, words[1:3]))

	# now read all the records
	record.contigs = list(parse(handle))
	# wa, ct, rt rags are usually at the end of the file, but not necessarily (correct?).
	# If the iterator is used, the tags are returned with the contig or the read after which they appear,
	# if all tags are at the end, they are read with the last contig. The concept of an
	# iterator leaves no other choice. But if the user uses the ACEParser, we can check
	# them and put them into the appropriate contig/read instance.
	# Conclusion: An ACE file is not a filetype for which iteration is 100% suitable...
	record.sort()
	return record

def readQualityFile(handle, base):
	"""Parses the full quality file"""

	qualities = {}

	handle = iter(handle)
	for line in handle:
		line = line.strip()
		if not line:
			break
		if line.startswith('>'):
			name = line.partition(' ')[0][1:]
		else:
			values = line.split()
			values = list(map(int, values))
			qualities[name] = qualities.get(name, "") + "".join([chr(i + base) for i in values])

	return qualities

#import os, re, sys, shutil
import re

def makeCigarAndStats(seq_reference, seq_read, clipped_start, clipped_end):
	"""
	Receive aligned read and reference hard clipped (both) and produce the cigar
		Contig  Read	cigar
		A		A		M (match)
		G		A		M (match)
		*		A		I (insertion)
		A		*		D (deletion)
		A		n/N/.	N (skipped)
		*		*		P (padded)
		G		a		S (soft clipped) ! edges !
		G				H (hard clipped) ! edges ! -> impossible in ACE
	@param	seq_reference	string, alignment reference
	@param	seq_read		 string, to be aligned on the reference
	@param	clipped_start	int, relative start of the read clipped subsequence on the contig
	@param	clipped_end	  int, relative end of the read clipped subsequence on the contig
	@return   [cigar, stats]   string, the cigar line, a zipped alignment information
	"""
	str_cigar = "" #temporary string, like MMMPMMMM for a cigar of 3M1P4M
	matches = 0
	mismatches = 0

	for pos in range(len(seq_read)):
		try:
			char_ref = seq_reference[pos]
		except IndexError:
			break #read overlap: ignore
		char_read = seq_read[pos]

		# reference is gapped
		if char_ref=="*":

			# padded in read
			if char_read == "*":
				str_cigar += "P"

			# insertion of one base
			else:
				str_cigar += "I"

		# reference has a nucleotide here
		else:

			# unkown base, skipped region
			if char_read in ["n", "N", "."]:
				str_cigar += "N"

			# deletion
			elif char_read=="*":
				str_cigar += "D"

			# read has a nucleotide too
			else:
				str_cigar += "M"

		# count match/mismatch if the last -so current- cigar operation is a M
		if str_cigar[-1]=="M":
			if char_read.upper() == char_ref.upper() and char_read!="*":
				matches += 1 #perfect hit
			else:
				mismatches += 1

	# final cigar: zipping from MMMMIMM to 4M1I2M
	cigar =""
	occurence = 0
	current_char = ""

	# force process for last part of cigar
	for char in str_cigar:
		if not current_char: #init
			occurence = 1
			current_char = char
		elif current_char == char:
			occurence += 1
		else: # change char : save value...
			cigar += str(occurence) + current_char
			#...and init
			occurence = 1
			current_char = char
	cigar += str(occurence) + current_char  #process last

	# add the clipped parts (from read.qa info)
	clip_s="" ; clip_e=""

	if clipped_start:
		clip_s=str(clipped_start) + "S"

	if clipped_end:
		clip_e=str(clipped_end) + "S"
	cigar= clip_s + cigar + clip_e

	stats = [matches, mismatches]
	return cigar, stats

def editReadSequence(read_seq, cigar):
	"""
	@param	read_seq	string	read sequence as parsed in the ACE
	@param	cigar	   string	cigar of the alignment of the current read on the reference
	@return   read_seq	string	read sequence shortened (gapless and N-less, excepted for soft clipped part (as will be in SAM file)
	"""
	list_read = list(read_seq)
	pos = len(list_read) - 1
	# reversed is a Python fix for list deletion
	for ops in reversed(re.findall("\d+[A-Z]", cigar)):
		number=int(ops[:-1])
		type_op = ops[-1]
		#do nothing on soft clipped sequences or inserted N
		if type_op in ["S", "I"]:
			pos -= number
			continue
		for i in range(number):
			if pos < len(list_read):
				if list_read[pos] in ["*", "N", "n", "."]: #remove the elements given by the cigar
					list_read.pop(pos)
			pos -= 1
	return "".join(list_read)

def editQuality(quality, read_seq, cigar, base):
	list_quality = list(quality)
	list_read = list(read_seq)
	pos = len(list_read) - 1
	# reversed is a Python fix for list deletion
	for ops in reversed(re.findall("\d+[A-Z]", cigar)):
		number=int(ops[:-1])
		type_op = ops[-1]
		#do nothing on soft clipped sequences or inserted N
#		if type_op in ["S", "I"]:
#			pos -= number
#			continue
		if type_op in ["I"]:
			pos -= number
			continue
		if type_op in ["S"]:
			for i in range(number):
				if list_read[pos] in ["*"]:
					list_quality.insert(pos, chr(0 + base))
				pos -= 1
			continue
		for i in range(number):
#			if pos < len(list_read):
#				if list_read[pos] in ["*", "N", "n", "."]: #remove the elements given by the cigar
			if list_read[pos] in ["N", "n", "."]: #remove the elements given by the cigar
				list_quality.pop(pos)
			pos -= 1
	return "".join(list_quality)

def editConsensusQuality(quality, consensus, base):
	list_quality = list(quality)
	list_read = list(consensus)
	for pos in range(len(list_read)):
		if list_read[pos] in ["*"]:
			list_quality.insert(pos, chr(0 + base))
	return "".join(list_quality)

def compute_quality(contig_seq, read_seq, mask, base):
	"""
	Convolution-like computing of a quality score for each base (useful in SNP detection, samtools pileup)
	@param	contig_seq	string		contig sequence
	@param	read_seq	  string		read shortenend sequence, as required in SAM
	@param	mask		  list of int   convolution mask used to compute quality base per base
	@param	base		  int		   ASCII-base for quality in SAM (current version is 33)
	@return   quality	   string		quality sequence in the ASCII-base format
	"""
	if len(read_seq)<=len(mask):
		return "*"

	qualities = []
	masksize = len(mask)
	half_size = masksize//2	#only for odd mask's size

	for pos in range(len(read_seq)):

		#gapped or skipped position: only in a clipped sequence, quality null
		if read_seq[pos] in ["*", "N", "n", "."]:
			qualities.append(0)
			continue

		#in the sequence: values for substring
		curr_mask = mask
		ctg_sub_start = pos -half_size ; ctg_sub_stop  = pos + half_size + 1
		rd_sub_start  = pos -half_size ; rd_sub_stop   = pos + half_size + 1

		#begin of the sequence: pos < 1/2 len(mask): correct values -> don't select non-existing letters
		if pos<half_size:
			ctg_sub_start = 0
			rd_sub_start  = 0
			curr_mask = curr_mask[ half_size -pos :]
		remaining_chars = len(read_seq) -(pos + 1)

		#end of the sequence
		if remaining_chars < half_size:
			ctg_sub_stop  = pos + remaining_chars + 1
			rd_sub_stop   = pos + remaining_chars + 1
			curr_mask = curr_mask[: half_size + 1 + remaining_chars]

		str_ctg = contig_seq  [ctg_sub_start : ctg_sub_stop]
		str_rd =  read_seq	[rd_sub_start  : rd_sub_stop]
		base_qual = 0
		for i in range(len(curr_mask)):
			if i >= len(str_ctg): #contig shorter: ignore overlap
				break
			if str_ctg[i].upper() == str_rd[i].upper():
				base_qual += curr_mask[i]
		qualities.append(base_qual)
	#conversion into ASCII-33 (default)
	result = "".join([ chr(i + base) for i in qualities])
	return result

#if __name__ == "__main__":
def main():
	from optparse import OptionParser, OptionGroup
#	import os, re, sys, shutil
	import os, sys
#	from Bio.Sequencing import Ace
#	from tempfile import NamedTemporaryFile, TemporaryFile

	parser = OptionParser(usage="usage: %prog -i filename -o filename")
	parser.description = "Ace to SAM converter"
	parser.add_option("-i", "--input", dest="inputAce",
					  help="path/filename of the ace to convert", metavar="FILE")
	parser.add_option("-o", "--output", dest="output",
					  help="path/filename of the output SAM file", metavar="FILE")
	parser.add_option("-q", "--quality-file", dest="inputQual",
					  help="path/filename of the PHRAP quality file", metavar="FILE")
	parser.add_option("-c", "--consensus", dest="consensus",
					  help="path/filename of the output consensus file", metavar="FILE")
	parser.add_option("-s", "--statistics", dest="statistics",
					  help="path/filename to write statistics", metavar="FILE")
	parser.add_option("-m", "--mask", dest="mask",
					  help="mask used for quality computation, odd succession of integer, separated by a comma", metavar="STRING")
	parser.add_option("-b", "--quality-base", dest="opt_qual",
					  help="ASCII conversion of quality", metavar="INT")
	parser.add_option("-v", "--verbose", dest="verbose",
					  action="store_true", default=False,
					  help="print all status messages to stdout")
	(options, args) = parser.parse_args()
	inputAce = options.inputAce
#	if not inputAce:
#		inputAce="/private/var/folders/l6/l6j0grot2RW59++8ZOwfN++++TI/-Tmp-/TemporaryItems/macvector/phrap/phrap1/PhrapInput.ace"
	inputQual = options.inputQual
#	if not inputQual:
#		inputQual="/private/var/folders/l6/l6j0grot2RW59++8ZOwfN++++TI/-Tmp-/TemporaryItems/macvector/phrap/phrap1/PhrapInput.qual"
	output = options.output
#	if not output:
#		output="/private/var/folders/l6/l6j0grot2RW59++8ZOwfN++++TI/-Tmp-/TemporaryItems/macvector/phrap/phrap1/TIG_1.fq"
	consensus = options.consensus
	statistics = options.statistics
	verbose = options.verbose
	opt_qual = options.opt_qual
	mask = options.mask
	str_error = ""

	if mask: # odd mask size is not pair !
		if len(re.findall("\d+",mask))%2==0:
			print("Do not use this option unless the mask's size is odd ! Examples: 1,2,1 or 5,10,15,10,5", file=sys.stderr)
			sys.exit()
		mask = [int(i) for i in re.findall("\d+",mask)]
		if sum(mask)>93:
			print("Your mask sum exceeds the maximum allowed value (decimal 93). Please use lower values", file=sys.stderr)
			sys.exit()
	if not mask:
		mask=[5, 10, 15, 10, 5]

	if not inputAce or not output or inputAce==output:
		parser.print_help()
		sys.exit()
	elif not os.path.exists(inputAce):
		print("The input file {0} doesn't exist!".format(inputAce), file=sys.stderr)
		parser.print_help()
		sys.exit()
#	elif inputQual and not os.path.exists(inputQual):
#		print("The input file {0} doesn't exist!".format(inputAce), file=sys.stderr)
#		parser.print_help()
#		sys.exit()
	#else: proceed

	base = 33
	if opt_qual:
		base = int(opt_qual[1:])

	try:# parse quality file
		if inputQual and os.path.exists(inputQual):
			print("Reading quality file..", file=sys.stderr)
			qualities = readQualityFile(open(inputQual, 'r'), base)
	except:
		print("The given file is not a quality file: {0}".format(inputQual), file=sys.stderr)
		parser.print_help()
		sys.exit()

#	print "Starting.."

	try:# parse Ace
		print("Reading ACE file..", file=sys.stderr)
		acefilerecord = readACEFile(open(inputAce, 'r'))
		# from http://www.biopython.org/DIST/docs/api/Bio.Sequencing.Ace-module.html
	except:
		print("The given file is not an Ace: {0}".format(inputAce), file=sys.stderr)
		parser.print_help()
		sys.exit()

#	tmp_head=NamedTemporaryFile(mode='w+b', suffix='.sam.head')#tempfile used to store the header
#	tmp_body=NamedTemporaryFile(mode='w+b', suffix='.sam.body')#tempfile used to store the alignment section

	# HEADER
	CLI = ""		 # if multiple times in an ace file,
	pgr_version = "" # keep only the least of each ?
	aln_program = "" #

	if acefilerecord.wa:
		for annot in acefilerecord.wa:
#			if annot.info: #[ "CLI", "prog_name version" ]
			if (annot.tag_type == "phrap_params") and annot.info:
				CLI=annot.info[0]
				pgr_version=annot.info[1].split()[-1]
				aln_program=annot.info[1].split()[0]

	print("Processing contigs..", file=sys.stderr)

	if consensus:
		destination = open(consensus, 'w')
		for contig in acefilerecord.contigs:
			destination.write("@" + contig.name + "\n")
			destination.write(contig.sequence + "\n")
			destination.write("+\n")
			qual = "".join([ chr(i + base) for i in contig.quality])
			if len(qual) != len(contig.sequence):
				qual = editConsensusQuality(qual, contig.sequence, base)
			destination.write(qual + "\n")
		destination.close()

	if statistics:
		destination = open(statistics, 'w')
		for contig in acefilerecord.contigs:
			destination.write(contig.name + "\t" + str(contig.nreads) + "\t" + str(contig.nbases) + "\n")
		destination.close()

	if output == "-":
		destination = sys.stdout
	else:
		destination = open(output,'w')

	for contig in acefilerecord.contigs:
#		tmp_head.write("@SQ"+"\t"+"SN:"+contig.name+"\t"+"LN:"+str(len(contig.sequence))+"\n")
		destination.write("@SQ" + "\t" + "SN:" + contig.name + "\t" + "LN:" + str(len(contig.sequence)) + "\n")

	if aln_program and CLI and pgr_version:
#		tmp_head.write("@PG"+"\t"+"ID:"+aln_program+""+"\t"+"CL:"+CLI+""+"\t"+"VN:"+pgr_version+"\n") # VN:1.0
		destination.write("@PG" + "\t" + "ID:" + aln_program + "" + "\t" + "CL:" + CLI + "" + "\t" + "VN:" + pgr_version + "\n") # VN:1.0
#	tmp_head.write("@CO"+"\t"+"Generated from "+inputAce+" quality generated using a convolution mask of "+str(mask)+" and coded in ASCII-"+str(base)+"\n")
	destination.write("@CO" + "\t" + "Generated from " + inputAce + " quality generated using a convolution mask of " + str(mask) + " and coded in ASCII-" + str(base) + "\n")

	numberOfContigs = 0
	numberOfReads = 0
#	iterations = 0
	for contig in acefilerecord.contigs:
		numberOfContigs += 1
#		tmp_head.write("@SQ"+"\t"+"SN:"+contig.name+"\t"+"LN:"+str(len(contig.sequence))+"\n")

		read_locations = {}
		for ctg_af in contig.af:
			read_locations[ctg_af.name] = ctg_af

		# ALIGNMENT SECTION
		for r in contig.reads:

#			iterations+=1
#			if (20*iterations)%(acefilerecord.nreads)==0:
#				sys.stdout.write(".") ; sys.stdout.flush()

			read = r.rd
			clip_start = r.qa.align_clipping_start
			clip_end = r.qa.align_clipping_end

			# process all
			if clip_start == clip_end == 1:
				clip_start = 0 ; clip_end = len(read.sequence)

			#everything's clipped
			if clip_start == clip_end == -1:
#				cigar ="*" ; seq_read = "*" ; stats = None
				continue

			# compute read's sequence, cigar and statistics
			else:
				# look for the corresponding read declaration (AF)
#				for ctg_af in contig.af:
#					if ctg_af.name == read.name:
#						flag = 0 #forward
#						if ctg_af.coru == "C": #reverse
#							flag = 16
#
#						read_padded_start = ctg_af.padded_start
				flag = 0
				ctg_af = read_locations[read.name]
				if ctg_af.coru == "C": #reverse
					flag = 16
				read_padded_start = ctg_af.padded_start

				#cut ends of read 's sequence (do hard clip)
				read_sequence = read.sequence[clip_start - 1: clip_end]
				offset = clip_start - 1 + read_padded_start - 1
				seq_reference = contig.sequence[ offset : offset + len(read_sequence)]
				read_start = offset

				#uncount gaps before the read
				read_start -= len(re.findall("\*", contig.sequence[:offset + 1]))

				cigar, stats = makeCigarAndStats(seq_reference, read_sequence, clip_start-1, len(read.sequence)-clip_end)

				# Verify the alignment starts with an insertion : not counted gap, shift to the right
				tmp_cigar = cigar
				# cigar starts with a Soft clip, jump over it and look what's next
				if re.match("\d+S", tmp_cigar):
					tmp_cigar = tmp_cigar[ len(re.match("\d+S", tmp_cigar).group()):]
				# cigar starts with Insertions, count them (sam alignment do not show gaps in reference)
				if re.match("\d+I", tmp_cigar):
					num = re.match("[0-9]+", tmp_cigar).group()
					read_start += 1 #shift of 1 : count first insertion

				# read sequence as in the SAM
				seq_read = editReadSequence(read.sequence, cigar)

			# Unused fields
			mapq=255 #unavailable
			rmnm = "*" ; mpos=0 ; isize =0 #no paired ends

			# Quality
#			base = 33
#			if opt_qual:
#				base = int(opt_qual[1:])
#			qual = compute_quality(seq_reference, seq_read, mask, base)
			try:
				qual = qualities[read.name]
				if len(qual) != len(seq_read):
					qual = editQuality(qual, read.sequence, cigar, base)
			except:
				qual = compute_quality(seq_reference, seq_read, mask, base)

			# Controls
			sum_padded_cigar=0
			sum_unpadded_cigar=0
			for i in re.findall("\d+[A-Z]", cigar):
				if i[:-1].isdigit and i[-1] not in ["D", "N", "P"]:
					sum_unpadded_cigar += int(i[:-1])
				if i[:-1].isdigit:
					sum_padded_cigar += int(i[:-1])

			if not len(read.sequence)==sum_padded_cigar:
				print("\nERROR PADDED {0} {1} {2} {3}".format(read.name, cigar, len(read.sequence), sum_padded_cigar), file=sys.stderr)
				sys.exit()

			if not len(seq_read)==len(qual)==sum_unpadded_cigar:
				print("\nERROR UNPADDED {0} {1} {2} {3} {4}".format(read.name, cigar, len(seq_read), len(qual), sum_unpadded_cigar), file=sys.stderr)
				print(read.sequence, file=sys.stderr)
				print(seq_read, file=sys.stderr)
				qual = compute_quality(seq_reference, seq_read, mask, base)
				if not len(seq_read)==len(qual)==sum_unpadded_cigar:
					print("\nERROR UNPADDED {0} {1} {2} {3} {4}".format(read.name, cigar, len(seq_read), len(qual), sum_unpadded_cigar), file=sys.stderr)
					sys.exit()

			# Optional fields
			options = ""
			if stats:
				perfect, mismatches = stats
				if perfect != -1:
					options += "HO:i:" + str(perfect)
				if mismatches != -1:
					options += "\tNM:i:" + str(mismatches)
			read_start += 1
			line = [str(i) for i in [read.name, flag, contig.name, read_start,
					mapq, cigar, rmnm, mpos, isize, seq_read, qual, options]]

#			tmp_body.write("\t".join(line)+"\n")
			destination.write("\t".join(line) + "\n")
			numberOfReads += 1

#	print("Writing SAM file..", file=sys.stderr)

#	if aln_program and CLI and pgr_version:
#		tmp_head.write("@PG"+"\t"+"ID:"+aln_program+""+"\t"+"CL:"+CLI+""+"\t"+"VN:"+pgr_version+"\n") # VN:1.0
#	tmp_head.write("@CO"+"\t"+"Generated from "+inputAce+" quality generated using a convolution mask of "+str(mask)+" and coded in ASCII-"+str(base)+"\n")

#	tmp_head.seek(0) ; tmp_body.seek(0) # go to the beginning of these files.
#	destination = open(output,'w')
#	shutil.copyfileobj(open(tmp_head.name,'r'), destination)
#	shutil.copyfileobj(open(tmp_body.name,'r'), destination)
	if output != "-":
		destination.close()
	print("Written {0} contigs {1} reads {2}".format(destination.name, numberOfContigs, numberOfReads), file=sys.stderr)

#import profile
#import pstats

if __name__ == "__main__":
	main()
#	profile.run("main()", "profile_tmp")
#	p = pstats.Stats('profile_tmp')
#	p.sort_stats('cumulative').print_stats(10)
