Skip to content
Snippets Groups Projects
Commit 92f9b500 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add file rename in fetch.txt to modifyFetch.py

parent b89416b5
Branches
Tags
3 merge requests!37v0.0.1,!4Develop,!2Resolve "process_getData"
......@@ -9,9 +9,6 @@ params.outDir = "${baseDir}/../output"
// Parse input variables
deriva = file(params.deriva)
deriva.copyTo('~/.bdbag/deriva-cookies.txt')
//deriva = Channel
// .fromPath(params.deriva)
// .ifEmpty { exit 1, "deriva cookie file not found: ${params.deriva}" }
bdbag = Channel
.fromPath(params.bdbag)
.ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
......@@ -19,7 +16,7 @@ bdbag = Channel
outDir = params.outDir
/*
* splitData: split bdbag files by replicate so fetch can occure in parallel
* splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid
*/
process splitData {
tag "${bdbag.baseName}"
......@@ -27,7 +24,6 @@ process splitData {
input:
file bdbag
file deriva
output:
file("Replicate_*.zip") into bdbagSplit mode flatten
......@@ -39,15 +35,11 @@ process splitData {
"""
hostname
ulimit -a
ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt
study=`echo "${bdbag}" | cut -d'.' -f1`
echo LOG: \${study}
unzip ${bdbag}
echo LOG: bdgag unzipped
python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
cd \${study}
bash ${baseDir}/scripts/fixFetch.sh
cd ..
echo LOG: fetch file filtered for only .fastq.gz
python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study}
echo LOG: fetch file split by replicates
......@@ -67,7 +59,6 @@ process getData {
publishDir "${outDir}/temp/${task.process}", mode: "symlink"
input:
file deriva
each rep from bdbagSplit
output:
......@@ -79,15 +70,11 @@ process getData {
ulimit -a
echo LOG:\${http_proxy}
export https_proxy=\${http_proxy}
ln -sf `readlink -e ${deriva}` ~/.bdbag/deriva-cookies.txt
replicate=\$(echo "${rep}" | cut -d'.' -f1 | rev | cut -f1 -d '/' | rev)
echo LOG: \${replicate}
echo LOG: deriva cookie loaded
unzip ${rep}
echo LOG: replicate bdbag unzipped
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate}
echo LOG: replicate bdbag fetched
sh ${baseDir}/scripts/renameFastq.sh \${replicate}
echo LOG: fastq.gz files renamed to replicate RID
"""
}
......@@ -2,6 +2,7 @@
import argparse
import pandas as pd
import re
def get_args():
parser = argparse.ArgumentParser()
......@@ -12,8 +13,13 @@ def get_args():
def main():
args = get_args()
fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
fileFile = pd.read_csv(args.fetchFile+"/data/File.csv",sep=",",header=0)
fileFile_filtered = fileFile[fileFile["File_Type"]=="FastQ"]
fetchFile_filtered = fetchFile[fetchFile[2].str[-9:]==".fastq.gz"]
fetchFile_filtered.to_csv(args.fetchFile+"/fetch.txt",sep="\t",header=False,index=False)
fetchFile_filtered_renamed = fetchFile_filtered
for i in fileFile_filtered["File_Name"]:
fetchFile_filtered_renamed[2][fetchFile_filtered_renamed[2].str.contains(i,regex=False)] = fetchFile_filtered_renamed[2][fetchFile_filtered_renamed[2].str.contains(i,regex=False)].values[0].replace(re.sub("\.R.\.fastq\.gz","",i),fileFile_filtered["Replicate_RID"][fileFile_filtered["File_Name"]==i].values[0])
fetchFile_filtered_renamed.to_csv(args.fetchFile+"/fetch.txt",sep="\t",header=False,index=False)
if __name__ == '__main__':
main()
\ No newline at end of file
#!/bin
while read loc checksum fileLocation; do
file=$(echo ${fileLocation##*/});
fileName=$(echo ${file%.R*.fastq.gz});
fileExt=$(echo ${file##${fileName}.});
while IFS="," read RID Study_RID Experiment_RID Replicate_RID Caption File_Type File_Name URI File_size MD5 GEO_Archival_URL dbGaP_Accession_ID Processed Notes Principal_Investigator Consortium Release_Date RCT RMT Legacy_File_RID GUDMAP_NGF_OID GUDMAP_NGS_OID; do
if [ "${file}" == "${File_Name}" ]; then
find . -type f -name "${file}" -execdir mv {} ${Replicate_RID}.${fileExt} ';';
fi;
done < $1/data/File.csv;
done < $1/fetch.txt;
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment