From ed35a88e4d8f476dd779798d4fa3cf9d91be83b0 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Fri, 11 Oct 2019 21:26:41 -0500 Subject: [PATCH] Working version of getData process --- workflow/rna-seq.nf | 17 ++++++++++++++--- workflow/scripts/bdbagFetch.sh | 3 +++ workflow/scripts/modifyFetch.sh | 3 --- workflow/scripts/renameFastq.sh | 15 +++++++++++++++ 4 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 workflow/scripts/bdbagFetch.sh delete mode 100644 workflow/scripts/modifyFetch.sh create mode 100644 workflow/scripts/renameFastq.sh diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index b272392..d839044 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -25,14 +25,25 @@ outDir = params.outDir file bdbag output: - file("*") into dataPaths + file("**/*.R*.fastq.gz") into fastqPaths + file("**/File.csv") into filePaths + file("**/Experiment Settings.csv") into experimentSettingsPaths + file("**/Experiment.csv") into experimentPaths script: """ hostname ulimit -a + study=\$(echo "${bdbag}" | cut -d'.' -f1) + echo LOG: \${study} unzip ${bdbag} - python3 ${baseDir}/scripts/modifyFetch.py -f \$(echo "${bdbag}" | cut -d'.' -f1) - bdbag --materialize "\$(echo "${bdbag}" | cut -d'.' -f1)" + echo LOG: bdgag unzipped + python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} + echo LOG: fetch file filtered for only .fastq.gz + #bdbag --materialize "\$(echo "${bdbag}" | cut -d'.' -f1)" + sh ${baseDir}/scripts/bdbagFetch.sh \${study} + echo LOG: bdbag fetched + sh ${baseDir}/scripts/renameFastq.sh \${study} + echo LOG: fastq.gz files renamed to replicate RID """ } \ No newline at end of file diff --git a/workflow/scripts/bdbagFetch.sh b/workflow/scripts/bdbagFetch.sh new file mode 100644 index 0000000..28dab3f --- /dev/null +++ b/workflow/scripts/bdbagFetch.sh @@ -0,0 +1,3 @@ +#!/bin + +bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz $1 \ No newline at end of file diff --git a/workflow/scripts/modifyFetch.sh b/workflow/scripts/modifyFetch.sh deleted file mode 100644 index f243f5c..0000000 --- a/workflow/scripts/modifyFetch.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin - -unzip $1 \ No newline at end of file diff --git a/workflow/scripts/renameFastq.sh b/workflow/scripts/renameFastq.sh new file mode 100644 index 0000000..f559376 --- /dev/null +++ b/workflow/scripts/renameFastq.sh @@ -0,0 +1,15 @@ +#!/bin + +while read loc checksum fileLocation +do + file=$(echo ${fileLocation##*/}) + fileName=$(echo ${file%.R*.fastq.gz}) + fileExt=$(echo ${file##${fileName}.}) + while IFS="," read RID Study_RID Experiment_RID Replicate_RID Caption File_Type File_Name URI File_size MD5 GEO_Archival_URL dbGaP_Accession_ID Processed Notes Principal_Investigator Consortium Release_Date RCT RMT Legacy_File_RID GUDMAP_NGF_OID GUDMAP_NGS_OID + do + if [ ${file} == ${File_Name} ] + then + find . -type f -name ${file} -execdir mv {} ${Replicate_RID}.${fileExt} ';' + fi + done < $1/data/File.csv +done < $1/fetch.txt \ No newline at end of file -- GitLab