From 84872f975ecf01f81525312257ae05adfba2edd9 Mon Sep 17 00:00:00 2001
From: Venkat Malladi <venkat.malladi@utsouthwestern.edu>
Date: Sun, 1 Oct 2017 11:50:08 -0500
Subject: [PATCH] Add triming of reads, remove fastqc step.

---
 workflow/main.nf               | 10 ++--
 workflow/scripts/trim_reads.py | 87 ++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 5 deletions(-)
 create mode 100644 workflow/scripts/trim_reads.py

diff --git a/workflow/main.nf b/workflow/main.nf
index ee90c3a..7648445 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -58,11 +58,11 @@ rawReads = designFilePaths
   .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read1], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
 }
 
-process fastQc {
+// Trim raw reads using trimgalore
+process trimReads {
 
   tag "$sampleId-$replicate"
-  publishDir "$baseDir/output/", mode: 'copy',
-    saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"}
+  publishDir "$baseDir/output/{task.process}/$sampleId-$replicate$/", mode: 'copy'
 
   input:
 
@@ -70,11 +70,11 @@ process fastQc {
 
   output:
 
-  file '*_fastqc.{zip,html}' into fastqc_results
+  set sampleId, file('*.fq.gz'), biosample, factor, treatment, replicate, controlId into trimmedReads
 
   script:
 
   """
-  python $baseDir/scripts/qc_fastq.py -f $reads
+  python $baseDir/scripts/trim_reads.py -f $reads
   """
 }
diff --git a/workflow/scripts/trim_reads.py b/workflow/scripts/trim_reads.py
new file mode 100644
index 0000000..0ec719b
--- /dev/null
+++ b/workflow/scripts/trim_reads.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+
+'''Trim low quality reads and remove sequences less than 35 base pairs.'''
+
+import os
+import subprocess
+import argparse
+import shutil
+import logging
+import sys
+import json
+
+EPILOG = '''
+For more details:
+        %(prog)s --help
+'''
+
+## SETTINGS
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+logger.propagate = False
+logger.setLevel(logging.INFO)
+
+
+def get_args():
+    '''Define arguments.'''
+    parser = argparse.ArgumentParser(
+        description=__doc__, epilog=EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    parser.add_argument('-f', '--fastq',
+                        help="The fastq file to run triming on.",
+                        nargs='+',
+                        required=True)
+
+    args = parser.parse_args()
+    return args
+
+
+def check_tools():
+    '''Checks for required componenets on user system'''
+
+    logger.info('Checking for required libraries and components on this system')
+
+    trimgalore_path = shutil.which("trim_galore")
+    if fastqc_path:
+        logger.info('Found trimgalore: %s', fastqc_path)
+    else:
+        logger.error('Missing trimgalore')
+        raise Exception('Missing trimgalore')
+
+    cutadapt_path = shutil.which("cutadapt")
+    if fastqc_path:
+        logger.info('Found cutadapt: %s', fastqc_path)
+    else:
+        logger.error('Missing cutadapt')
+        raise Exception('Missing cutadapt')
+
+
+def trim_reads(fastq):
+    '''Run trim_galore on 1 or 2 files.'''
+    qc_command = "trim_galore --paired -q 25 --illumina --gzip --length 35 " \
+                + " ".join(fastq)
+
+    logger.info("Running trim_galore with %s", qc_command)
+
+    qual_fastq = subprocess.Popen(qc_command, shell=True)
+    out, err = qual_fastq.communicate()
+
+
+def main():
+    args = get_args()
+
+    # Create a file handler
+    handler = logging.FileHandler('trim.log')
+    LOGGER.addHandler(handler)
+
+    # Check if tools are present
+    check_tools()
+
+    # Run trim_reads
+    trim_reads(args.fastq)
+
+
+if __name__ == '__main__':
+    main()
-- 
GitLab