Skip to content
Snippets Groups Projects
Commit 84872f97 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add triming of reads, remove fastqc step.

parent c9f0a2c4
Branches
Tags
1 merge request!5Resolve "Add mapping and trimming"
Pipeline #1021 passed with stage
in 35 seconds
......@@ -58,11 +58,11 @@ rawReads = designFilePaths
.map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read1], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] }
}
process fastQc {
// Trim raw reads using trimgalore
process trimReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/", mode: 'copy',
saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"}
publishDir "$baseDir/output/{task.process}/$sampleId-$replicate$/", mode: 'copy'
input:
......@@ -70,11 +70,11 @@ process fastQc {
output:
file '*_fastqc.{zip,html}' into fastqc_results
set sampleId, file('*.fq.gz'), biosample, factor, treatment, replicate, controlId into trimmedReads
script:
"""
python $baseDir/scripts/qc_fastq.py -f $reads
python $baseDir/scripts/trim_reads.py -f $reads
"""
}
#!/usr/bin/env python3
'''Trim low quality reads and remove sequences less than 35 base pairs.'''
import os
import subprocess
import argparse
import shutil
import logging
import sys
import json
EPILOG = '''
For more details:
%(prog)s --help
'''
## SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-f', '--fastq',
help="The fastq file to run triming on.",
nargs='+',
required=True)
args = parser.parse_args()
return args
def check_tools():
'''Checks for required componenets on user system'''
logger.info('Checking for required libraries and components on this system')
trimgalore_path = shutil.which("trim_galore")
if fastqc_path:
logger.info('Found trimgalore: %s', fastqc_path)
else:
logger.error('Missing trimgalore')
raise Exception('Missing trimgalore')
cutadapt_path = shutil.which("cutadapt")
if fastqc_path:
logger.info('Found cutadapt: %s', fastqc_path)
else:
logger.error('Missing cutadapt')
raise Exception('Missing cutadapt')
def trim_reads(fastq):
'''Run trim_galore on 1 or 2 files.'''
qc_command = "trim_galore --paired -q 25 --illumina --gzip --length 35 " \
+ " ".join(fastq)
logger.info("Running trim_galore with %s", qc_command)
qual_fastq = subprocess.Popen(qc_command, shell=True)
out, err = qual_fastq.communicate()
def main():
args = get_args()
# Create a file handler
handler = logging.FileHandler('trim.log')
LOGGER.addHandler(handler)
# Check if tools are present
check_tools()
# Run trim_reads
trim_reads(args.fastq)
if __name__ == '__main__':
main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment