Skip to content
Snippets Groups Projects
Commit b89416b5 authored by Jonathan Gesell's avatar Jonathan Gesell
Browse files

Temporary fix for file naming

parent 813bcb28
Branches
Tags
3 merge requests!37v0.0.1,!4Develop,!2Resolve "process_getData"
This commit is part of merge request !2. Comments created here will be created in the context of that merge request.
...@@ -39,11 +39,7 @@ singularity { ...@@ -39,11 +39,7 @@ singularity {
} }
env { env {
http_proxy = "http://proxy.swmed.edu:3128" http_proxy = 'http://proxy.swmed.edu:3128'
https_proxy = "http://proxy.swmed.edu:3128" https_proxy = 'http://proxy.swmed.edu:3128'
HTTP_PROXY = "http://proxy.swmed.edu:3128" all_proxy = 'http://proxy.swmed.edu:3128'
HTTPS_PROXY = "http://proxy.swmed.edu:3128"
all_proxy = "http://proxy.swmed.edu:3128"
ALL_PROXY = "http://proxy.swmed.edu:3128"
} }
...@@ -7,9 +7,11 @@ params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip" ...@@ -7,9 +7,11 @@ params.bdbag = "${baseDir}/../test_data/Study_Q-Y4H0.zip"
params.outDir = "${baseDir}/../output" params.outDir = "${baseDir}/../output"
// Parse input variables // Parse input variables
deriva = Channel deriva = file(params.deriva)
.fromPath(params.deriva) deriva.copyTo('~/.bdbag/deriva-cookies.txt')
.ifEmpty { exit 1, "deriva cookie file not found: ${params.deriva}" } //deriva = Channel
// .fromPath(params.deriva)
// .ifEmpty { exit 1, "deriva cookie file not found: ${params.deriva}" }
bdbag = Channel bdbag = Channel
.fromPath(params.bdbag) .fromPath(params.bdbag)
.ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" } .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
...@@ -20,62 +22,72 @@ outDir = params.outDir ...@@ -20,62 +22,72 @@ outDir = params.outDir
* splitData: split bdbag files by replicate so fetch can occure in parallel * splitData: split bdbag files by replicate so fetch can occure in parallel
*/ */
process splitData { process splitData {
tag "${bdbag.baseName}" tag "${bdbag.baseName}"
publishDir "${outDir}/temp/${task.process}", mode: "symlink" publishDir "${outDir}/temp/${task.process}", mode: "symlink"
input: input:
file bdbag file bdbag
file deriva
output: output:
file("Replicate_*.zip") into bdbagSplit mode flatten file("Replicate_*.zip") into bdbagSplit mode flatten
file("${bdbag.baseName}/data/File.csv") into fileMeta file("${bdbag.baseName}/data/File.csv") into fileMeta
file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
script: script:
""" """
hostname hostname
ulimit -a ulimit -a
study=\$(echo "${bdbag}" | cut -d'.' -f1) ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt
echo LOG: \${study} study=`echo "${bdbag}" | cut -d'.' -f1`
unzip ${bdbag} echo LOG: \${study}
echo LOG: bdgag unzipped unzip ${bdbag}
python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} echo LOG: bdgag unzipped
echo LOG: fetch file filtered for only .fastq.gz python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} cd \${study}
echo LOG: fetch file split by replicates bash ${baseDir}/scripts/fixFetch.sh
sh ${baseDir}/scripts/splitBag.sh \${study} cd ..
echo LOG: bag recreated with replicate split fetch file echo LOG: fetch file filtered for only .fastq.gz
""" python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study}
echo LOG: fetch file split by replicates
sh ${baseDir}/scripts/splitBag.sh \${study}
echo LOG: bag recreated with replicate split fetch file
"""
} }
println {${http_proxy}}
println {${https_proxy}}
/* /*
* getData: fetch study files from consortium with downloaded bdbag.zip * getData: fetch study files from consortium with downloaded bdbag.zip
*/ */
process getData { process getData {
tag "${rep.baseName}" tag "${rep.baseName}"
publishDir "${outDir}/temp/${task.process}", mode: "symlink" publishDir "${outDir}/temp/${task.process}", mode: "symlink"
input: input:
file deriva file deriva
each rep from bdbagSplit each rep from bdbagSplit
output: output:
file("**/*.R*.fastq.gz") into fastq file("**/*.R*.fastq.gz") into fastq
script: script:
""" """
hostname hostname
ulimit -a ulimit -a
replicate=\$(echo "${rep}" | cut -d'.' -f1) echo LOG:\${http_proxy}
echo LOG: \${replicate} export https_proxy=\${http_proxy}
cp "${deriva}" ~/.bdbag/deriva-cookies.txt ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt
echo LOG: deriva cookie loaded replicate=\$(echo "${rep}" | cut -d'.' -f1 | rev | cut -f1 -d '/' | rev)
unzip ${rep} echo LOG: \${replicate}
echo LOG: replicate bdbag unzipped echo LOG: deriva cookie loaded
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} unzip ${rep}
echo LOG: replicate bdbag fetched echo LOG: replicate bdbag unzipped
sh ${baseDir}/scripts/renameFastq.sh \${replicate} sh ${baseDir}/scripts/bdbagFetch.sh \${replicate}
echo LOG: fastq.gz files renamed to replicate RID echo LOG: replicate bdbag fetched
""" sh ${baseDir}/scripts/renameFastq.sh \${replicate}
} echo LOG: fastq.gz files renamed to replicate RID
\ No newline at end of file """
}
#!/bin #!/bin
while read loc checksum fileLocation while read loc checksum fileLocation; do
do file=$(echo ${fileLocation##*/});
file=$(echo ${fileLocation##*/}) fileName=$(echo ${file%.R*.fastq.gz});
fileName=$(echo ${file%.R*.fastq.gz}) fileExt=$(echo ${file##${fileName}.});
fileExt=$(echo ${file##${fileName}.}) while IFS="," read RID Study_RID Experiment_RID Replicate_RID Caption File_Type File_Name URI File_size MD5 GEO_Archival_URL dbGaP_Accession_ID Processed Notes Principal_Investigator Consortium Release_Date RCT RMT Legacy_File_RID GUDMAP_NGF_OID GUDMAP_NGS_OID; do
while IFS="," read RID Study_RID Experiment_RID Replicate_RID Caption File_Type File_Name URI File_size MD5 GEO_Archival_URL dbGaP_Accession_ID Processed Notes Principal_Investigator Consortium Release_Date RCT RMT Legacy_File_RID GUDMAP_NGF_OID GUDMAP_NGS_OID if [ "${file}" == "${File_Name}" ]; then
do find . -type f -name "${file}" -execdir mv {} ${Replicate_RID}.${fileExt} ';';
if [ ${file} == ${File_Name} ] fi;
then done < $1/data/File.csv;
find . -type f -name ${file} -execdir mv {} ${Replicate_RID}.${fileExt} ';' done < $1/fetch.txt;
fi
done < $1/data/File.csv
done < $1/fetch.txt
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment