From 3b063504d73b9b22b0de14dd60e1dbf8f4bac851 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Mon, 25 Jun 2018 14:45:34 -0500
Subject: [PATCH] Update converstion step.

---
 workflow/main.nf                  |  8 ++--
 workflow/scripts/convert_reads.py | 74 ++++++++++++++++++++++++++-----
 2 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/workflow/main.nf b/workflow/main.nf
index 450f470..5ce28f7 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -214,7 +214,7 @@ process convertReads {
 
   output:
 
-  set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), experimentId, biosample, factor, treatment, replicate into tagReads
+  set sampleId, file('*.tagAlign.gz'), experimentId, biosample, factor, treatment, replicate into tagReads
 
 
   script:
@@ -263,7 +263,7 @@ process crossReads {
 
   input:
 
-  set sampleId, seTagAlign, tagAlign, experimentId, biosample, factor, treatment, replicate from tagReads
+  set sampleId, tagAlign, experimentId, biosample, factor, treatment, replicate from tagReads
 
   output:
 
@@ -274,12 +274,12 @@ process crossReads {
 
   if (pairedEnd) {
     """
-    python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
+    python3 $baseDir/scripts/xcor.py -t $tagAlign -p
     """
   }
   else {
     """
-    python3 $baseDir/scripts/xcor.py -t $seTagAlign
+    python3 $baseDir/scripts/xcor.py -t $tagAlign
     """
   }
 
diff --git a/workflow/scripts/convert_reads.py b/workflow/scripts/convert_reads.py
index d099794..0d51566 100644
--- a/workflow/scripts/convert_reads.py
+++ b/workflow/scripts/convert_reads.py
@@ -40,6 +40,16 @@ def get_args():
                         default=False,
                         action='store_true')
 
+    parser.add_argument('-m', '--chrm',
+                        help="True/False if to remove ChrM.",
+                        default=True,
+                        action='store_true')
+
+     parser.add_argument('-t', '--tn5'
+                        help='Enable TN5 shifting for ATAC-Seq.'
+                        default=True,
+                        action='store_true')
+
     args = parser.parse_args()
     return args
 
@@ -66,16 +76,22 @@ def check_tools():
         raise Exception('Missing samtools')
 
 
-def convert_mapped(bam, tag_filename):
+def convert_mapped(bam, tag_filename, chrm):
     '''Use bedtools to convert to tagAlign.'''
 
-    out, err = utils.run_pipe([
+    steps = [
         "bamToBed -i %s" % (bam),
-        r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'""",
-        "gzip -nc"], outfile=tag_filename)
+        r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""]
+
+    if chrm:
+        steps.extend["grep -v 'chrM'"]
 
+    steps.extend["gzip -nc"]
 
-def convert_mapped_pe(bam, bam_basename):
+    out, err = utils.run_pipe(steps, outfile=tag_filename)
+
+
+def convert_mapped_pe(bam, bam_basename, tag_filename, chrm):
     '''Use bedtools to convert to tagAlign PE data.'''
 
     bedpe_filename = bam_basename + ".bedpe.gz"
@@ -89,15 +105,42 @@ def convert_mapped_pe(bam, bam_basename):
     logger.info(samtools_sort_command)
     subprocess.check_output(shlex.split(samtools_sort_command))
 
-    out, err = utils.run_pipe([
-        "bamToBed -bedpe -mate1 -i %s" % (nmsrt_bam_filename),
-        "gzip -nc"], outfile=bedpe_filename)
+    steps = ["bamToBed -bedpe -mate1 -i %s" % (nmsrt_bam_filename)]
+
+    steps.extend["gzip -nc"]
+
+    out, err = utils.run_pipe(steps, outfile=bedpe_filename)
+    os.remove(nmsrt_bam_filename)
+
+    if chrm:
+        steps.extend["grep -v 'chrM'"]
+
+    # Convert read pairs to reads into standard tagAlign file
+    tag_steps = ["zcat -f %s" % (bedpe_filename)]
+    tag_steps.extend([r"""awk 'BEGIN{OFS="\t"}{printf "%s\t%s\t%s\tN\t1000\t%s\n%s\t%s\t%s\tN\t1000\t%s\n",$1,$2,$3,$9,$4,$5,$6,$10}'"""])
+    if chrm:
+        tag_steps.extend["grep -v 'chrM'"]
+    tag_steps.extend(['gzip -cn'])
+    out, err = utils.run_pipe(tag_steps, outfile=tag_filename)
+    os.remove(bedpe_filename)
+
+
+def tn5_shift_tagalign(tag_filename, shitfted_tag_filename):
+    '''Shift tagalign file for Tn5.'''
+
+    steps = [
+        "zcat -f %s" % (tag_filename),
+        r"""awk 'BEGIN{OFS="\t"}{{if ($6 == "+") {{$2 = $2 + 4}; else if ($6 == "-") {{$3 = $3 - 5}} print $0}}}'""",
+        "gzip -nc"]
+    out, err = utils.run_pipe(steps, outfile=shitfted_tag_filename)
 
 
 def main():
     args = get_args()
     paired = args.paired
     bam = args.bam
+    chrm = args.chrm
+    tn5 = args.tn5
 
     # Create a file handler
     handler = logging.FileHandler('convert_reads.log')
@@ -111,14 +154,23 @@ def main():
         utils.strip_extensions(bam, ['.bam']))
 
     tag_filename = bam_basename + '.tagAlign.gz'
-    convert_mapped(bam, tag_filename)
+
+    shitfted_tag_filename = bam_basename + ".tn5.tagAlign.gz"
+
 
     if paired:  # paired-end data
         convert_mapped_pe(bam, bam_basename)
     else:
-        bedse_filename = bam_basename + ".bedse.gz"
-        shutil.copy(tag_filename, bedse_filename)
+        convert_mapped(bam, tag_filename)
+
+    if tn5:
+        logger.info("TN5-shifting TAGALIGN...")
+        tn5_shift_ta(tag_filename, shitfted_tag_filename)
+    else:
+        tn5_shift = ["cp %s %s" % (tag_filename, shitfted_tag_filename)]
+        out, err = utils.run_pipe(tn5_shift)
 
+    os.remove(tag_filename)
 
 if __name__ == '__main__':
     main()
-- 
GitLab