diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index cb0ca674288b80a437ed35a5f4790a9a79bde122..4f1fd5f249b3bf7e25388a7389675fd100c9b18c 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -1,13 +1,13 @@ #!/usr/bin/env nextflow // Define input variables -params.deriva = "${baseDir}/../test_data/deriva-cookies.txt" +params.deriva = "/project/BICF/BICF_Core/shared/gudmap/cookies/deriva-cookies.txt" params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip" params.outDir = "${baseDir}/../output" // Parse input variables -deriva = file(params.deriva) +deriva = file(params.deriva, checkIfExists: 'true') bdbag = Channel .fromPath(params.bdbag) .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" } @@ -19,10 +19,10 @@ outDir = params.outDir */ process splitData { tag "${bdbag.baseName}" - publishDir "${outDir}/temp/${task.process}", mode: "symlink" input: file bdbag + path cookies, stageAs: 'cookies.txt' from deriva output: file("Replicate_*.zip") into bdbagSplit mode flatten @@ -34,48 +34,44 @@ process splitData { """ hostname ulimit -a - ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt - echo LOG: deriva cookie linked - study=`echo "${bdbag}" | cut -d'.' -f1` - echo LOG: \${study} + ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt + echo "LOG: deriva cookie linked" + study=`echo "${bdbag}" | cut -d '.' -f1` + echo "LOG: \${study}" unzip ${bdbag} - echo LOG: bdgag unzipped + echo "LOG: bdgag unzipped" python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} - echo LOG: fetch file filtered for only .fastq.gz + echo "LOG: fetch file filtered for only .fastq.gz" python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} - echo LOG: fetch file split by replicates + echo "LOG: fetch file split by replicates" sh ${baseDir}/scripts/splitBag.sh \${study} - echo LOG: bag recreated with replicate split fetch file + echo "LOG: bag recreated with replicate split fetch file" """ } -println {${http_proxy}} -println {${https_proxy}} - /* * getData: fetch study files from consortium with downloaded bdbag.zip */ process getData { tag "${rep.baseName}" - publishDir "${outDir}/temp/${task.process}", mode: "symlink" + publishDir "${outDir}/tempOut/fastqs", mode: "symlink" input: each rep from bdbagSplit output: - file("**/*.R*.fastq.gz") into fastq + path ("*.R*.fastq.gz", type: 'file', maxDepth: '0') into fastq script: """ hostname ulimit -a - echo LOG:\${http_proxy} export https_proxy=\${http_proxy} - replicate=\$(echo "${rep}" | cut -d'.' -f1 | rev | cut -f1 -d '/' | rev) - echo LOG: \${replicate} + replicate=\$(basename "${rep}" | cut -d '.' -f1) + echo "LOG: \${replicate}" unzip ${rep} - echo LOG: replicate bdbag unzipped + echo "LOG: replicate bdbag unzipped" sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} - echo LOG: replicate bdbag fetched + echo "LOG: replicate bdbag fetched" """ } diff --git a/workflow/scripts/bdbagFetch.sh b/workflow/scripts/bdbagFetch.sh index 28dab3f5338b3b6371b2b8f4ee7ac6bf2e715fa6..9af4eb46c0e716e0e1db7cb66e9f027f63611218 100644 --- a/workflow/scripts/bdbagFetch.sh +++ b/workflow/scripts/bdbagFetch.sh @@ -1,3 +1,6 @@ -#!/bin +#!/bin/bash -bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz $1 \ No newline at end of file +bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz $1 && +for i in $(find */ -name "*.R*.fastq.gz"); do + mv ${i} .; +done;