diff --git a/workflow/main.nf b/workflow/main.nf index ee90c3a58b01b2016861705bf6b297af49f85b5b..7648445a121d8fe2d9a800a725b24d17188bd3fb 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -58,11 +58,11 @@ rawReads = designFilePaths .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read1], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] } } -process fastQc { +// Trim raw reads using trimgalore +process trimReads { tag "$sampleId-$replicate" - publishDir "$baseDir/output/", mode: 'copy', - saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} + publishDir "$baseDir/output/{task.process}/$sampleId-$replicate$/", mode: 'copy' input: @@ -70,11 +70,11 @@ process fastQc { output: - file '*_fastqc.{zip,html}' into fastqc_results + set sampleId, file('*.fq.gz'), biosample, factor, treatment, replicate, controlId into trimmedReads script: """ - python $baseDir/scripts/qc_fastq.py -f $reads + python $baseDir/scripts/trim_reads.py -f $reads """ } diff --git a/workflow/scripts/trim_reads.py b/workflow/scripts/trim_reads.py new file mode 100644 index 0000000000000000000000000000000000000000..0ec719bdcbbd659dc562c91bab460092f890fc68 --- /dev/null +++ b/workflow/scripts/trim_reads.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +'''Trim low quality reads and remove sequences less than 35 base pairs.''' + +import os +import subprocess +import argparse +import shutil +import logging +import sys +import json + +EPILOG = ''' +For more details: + %(prog)s --help +''' + +## SETTINGS + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) +logger.propagate = False +logger.setLevel(logging.INFO) + + +def get_args(): + '''Define arguments.''' + parser = argparse.ArgumentParser( + description=__doc__, epilog=EPILOG, + formatter_class=argparse.RawDescriptionHelpFormatter) + + parser.add_argument('-f', '--fastq', + help="The fastq file to run triming on.", + nargs='+', + required=True) + + args = parser.parse_args() + return args + + +def check_tools(): + '''Checks for required componenets on user system''' + + logger.info('Checking for required libraries and components on this system') + + trimgalore_path = shutil.which("trim_galore") + if fastqc_path: + logger.info('Found trimgalore: %s', fastqc_path) + else: + logger.error('Missing trimgalore') + raise Exception('Missing trimgalore') + + cutadapt_path = shutil.which("cutadapt") + if fastqc_path: + logger.info('Found cutadapt: %s', fastqc_path) + else: + logger.error('Missing cutadapt') + raise Exception('Missing cutadapt') + + +def trim_reads(fastq): + '''Run trim_galore on 1 or 2 files.''' + qc_command = "trim_galore --paired -q 25 --illumina --gzip --length 35 " \ + + " ".join(fastq) + + logger.info("Running trim_galore with %s", qc_command) + + qual_fastq = subprocess.Popen(qc_command, shell=True) + out, err = qual_fastq.communicate() + + +def main(): + args = get_args() + + # Create a file handler + handler = logging.FileHandler('trim.log') + LOGGER.addHandler(handler) + + # Check if tools are present + check_tools() + + # Run trim_reads + trim_reads(args.fastq) + + +if __name__ == '__main__': + main()