diff --git a/workflow/main.nf b/workflow/main.nf index c025d3b4c79334161f7885b8705f3e544822dd2e..ee90c3a58b01b2016861705bf6b297af49f85b5b 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -9,12 +9,11 @@ params.pairedEnd = false params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt" // Define List of Files -Channel +readsList = Channel .fromPath( params.reads ) .flatten() .map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")} .collectFile( name: 'fileList.tsv', newLine: true ) - .set { readsList } // Define regular variables pairedEnd = params.pairedEnd @@ -37,7 +36,6 @@ process checkDesignFile { if (pairedEnd) { """ - echo $designFile python $baseDir/scripts/check_design.py -d $designFile -f $readsList -p """ } @@ -48,3 +46,35 @@ process checkDesignFile { } } + +// Define channel for raw reads +if (pairedEnd) { + rawReads = designFilePaths + .splitCsv(sep: '\t', header: true) + .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] } +} else { +rawReads = designFilePaths + .splitCsv(sep: '\t', header: true) + .map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read1], row.biosample, row.factor, row.treatment, row.replicate, row.control_id ] } +} + +process fastQc { + + tag "$sampleId-$replicate" + publishDir "$baseDir/output/", mode: 'copy', + saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} + + input: + + set sampleId, reads, biosample, factor, treatment, replicate, controlId from rawReads + + output: + + file '*_fastqc.{zip,html}' into fastqc_results + + script: + + """ + python $baseDir/scripts/qc_fastq.py -f $reads + """ +} diff --git a/workflow/scripts/qc_fastq.py b/workflow/scripts/qc_fastq.py index 991d9275c2890d16c38c8cf8f2456911aa97a040..95d817270d95a8bec029551ec4fa15e1f5031ff6 100755 --- a/workflow/scripts/qc_fastq.py +++ b/workflow/scripts/qc_fastq.py @@ -17,10 +17,10 @@ For more details: ## SETTINGS -LOGGER = logging.getLogger(__name__) -LOGGER.addHandler(logging.NullHandler()) -LOGGER.propagate = False -LOGGER.setLevel(logging.INFO) +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) +logger.propagate = False +logger.setLevel(logging.INFO) def get_args(): @@ -41,24 +41,24 @@ def get_args(): def check_tools(): '''Checks for required componenets on user system''' - LOGGER.info('Checking for required libraries and components on this system') + logger.info('Checking for required libraries and components on this system') fastqc_path = shutil.which("fastqc") if fastqc_path: - LOGGER.info('Found fastqc: %s', fastqc_path) + logger.info('Found fastqc: %s', fastqc_path) else: - print("Please install 'fastqc' before using the tool") - sys.exit() + logger.error('Missing fastqc') + raise Exception('Missing fastqc') def check_qual_fastq(fastq): '''Run fastqc on 1 or 2 files.''' qc_command = "fastqc -t -f fastq " + " ".join(fastq) - LOGGER.info("Running fastqc with %s", qc_command) + logger.info("Running fastqc with %s", qc_command) qual_fastq = subprocess.Popen(qc_command, shell=True) - qual_fastq .communicate() + out, err = qual_fastq.communicate() def main():