diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index b2723926ff7aab5a97c0cc8a96f03c2f90006282..d839044791bc3aaccc701c9fb62099105c97205b 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -25,14 +25,25 @@ outDir = params.outDir file bdbag output: - file("*") into dataPaths + file("**/*.R*.fastq.gz") into fastqPaths + file("**/File.csv") into filePaths + file("**/Experiment Settings.csv") into experimentSettingsPaths + file("**/Experiment.csv") into experimentPaths script: """ hostname ulimit -a + study=\$(echo "${bdbag}" | cut -d'.' -f1) + echo LOG: \${study} unzip ${bdbag} - python3 ${baseDir}/scripts/modifyFetch.py -f \$(echo "${bdbag}" | cut -d'.' -f1) - bdbag --materialize "\$(echo "${bdbag}" | cut -d'.' -f1)" + echo LOG: bdgag unzipped + python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} + echo LOG: fetch file filtered for only .fastq.gz + #bdbag --materialize "\$(echo "${bdbag}" | cut -d'.' -f1)" + sh ${baseDir}/scripts/bdbagFetch.sh \${study} + echo LOG: bdbag fetched + sh ${baseDir}/scripts/renameFastq.sh \${study} + echo LOG: fastq.gz files renamed to replicate RID """ } \ No newline at end of file diff --git a/workflow/scripts/bdbagFetch.sh b/workflow/scripts/bdbagFetch.sh new file mode 100644 index 0000000000000000000000000000000000000000..28dab3f5338b3b6371b2b8f4ee7ac6bf2e715fa6 --- /dev/null +++ b/workflow/scripts/bdbagFetch.sh @@ -0,0 +1,3 @@ +#!/bin + +bdbag --resolve-fetch all --fetch-filter filename\$*fastq.gz $1 \ No newline at end of file diff --git a/workflow/scripts/modifyFetch.sh b/workflow/scripts/modifyFetch.sh deleted file mode 100644 index f243f5cc2ad6e72327e57e249260d662a0876238..0000000000000000000000000000000000000000 --- a/workflow/scripts/modifyFetch.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin - -unzip $1 \ No newline at end of file diff --git a/workflow/scripts/renameFastq.sh b/workflow/scripts/renameFastq.sh new file mode 100644 index 0000000000000000000000000000000000000000..f5593766b3a3bd645c3f2c8758d3a20fd354c9be --- /dev/null +++ b/workflow/scripts/renameFastq.sh @@ -0,0 +1,15 @@ +#!/bin + +while read loc checksum fileLocation +do + file=$(echo ${fileLocation##*/}) + fileName=$(echo ${file%.R*.fastq.gz}) + fileExt=$(echo ${file##${fileName}.}) + while IFS="," read RID Study_RID Experiment_RID Replicate_RID Caption File_Type File_Name URI File_size MD5 GEO_Archival_URL dbGaP_Accession_ID Processed Notes Principal_Investigator Consortium Release_Date RCT RMT Legacy_File_RID GUDMAP_NGF_OID GUDMAP_NGS_OID + do + if [ ${file} == ${File_Name} ] + then + find . -type f -name ${file} -execdir mv {} ${Replicate_RID}.${fileExt} ';' + fi + done < $1/data/File.csv +done < $1/fetch.txt \ No newline at end of file