From 3d73714a01f76ea8f11914ea791e8a7e622f5e97 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Sat, 30 Sep 2017 22:03:51 -0500 Subject: [PATCH] Add in fastqc step. --- workflow/main.nf | 36 +++++++++++++++++++++++++++++++++--- workflow/scripts/qc_fastq.py | 20 ++++++++++---------- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/workflow/main.nf b/workflow/main.nf index c025d3b..ee90c3a 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -9,12 +9,11 @@ params.pairedEnd = false params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt" // Define List of Files -Channel +readsList = Channel .fromPath( params.reads ) .flatten() .map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")} .collectFile( name: 'fileList.tsv', newLine: true ) - .set { readsList } // Define regular variables pairedEnd = params.pairedEnd @@ -37,7 +36,6 @@ process checkDesignFile { if (pairedEnd) { """ - echo $designFile python $baseDir/scripts/check_design.py -d $designFile -f $readsList -p """ } @@ -48,3 +46,35 @@ process checkDesignFile { } } + +// Define channel for raw reads +if (pairedEnd) { + rawReads = designFilePaths + .splitCsv(sep: '\t', header: true) + .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] } +} else { +rawReads = designFilePaths + .splitCsv(sep: '\t', header: true) + .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read1], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] } +} + +process fastQc { + + tag "$sampleId-$replicate" + publishDir "$baseDir/output/", mode: 'copy', + saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} + + input: + + set sampleId, reads, biosample, factor, treatment, replicate, controlId from rawReads + + output: + + file '*_fastqc.{zip,html}' into fastqc_results + + script: + + """ + python $baseDir/scripts/qc_fastq.py -f $reads + """ +} diff --git a/workflow/scripts/qc_fastq.py b/workflow/scripts/qc_fastq.py index 991d927..95d8172 100755 --- a/workflow/scripts/qc_fastq.py +++ b/workflow/scripts/qc_fastq.py @@ -17,10 +17,10 @@ For more details: ## SETTINGS -LOGGER = logging.getLogger(__name__) -LOGGER.addHandler(logging.NullHandler()) -LOGGER.propagate = False -LOGGER.setLevel(logging.INFO) +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) +logger.propagate = False +logger.setLevel(logging.INFO) def get_args(): @@ -41,24 +41,24 @@ def get_args(): def check_tools(): '''Checks for required componenets on user system''' - LOGGER.info('Checking for required libraries and components on this system') + logger.info('Checking for required libraries and components on this system') fastqc_path = shutil.which("fastqc") if fastqc_path: - LOGGER.info('Found fastqc: %s', fastqc_path) + logger.info('Found fastqc: %s', fastqc_path) else: - print("Please install 'fastqc' before using the tool") - sys.exit() + logger.error('Missing fastqc') + raise Exception('Missing fastqc') def check_qual_fastq(fastq): '''Run fastqc on 1 or 2 files.''' qc_command = "fastqc -t -f fastq " + " ".join(fastq) - LOGGER.info("Running fastqc with %s", qc_command) + logger.info("Running fastqc with %s", qc_command) qual_fastq = subprocess.Popen(qc_command, shell=True) - qual_fastq .communicate() + out, err = qual_fastq.communicate() def main(): -- GitLab