Skip to content
Snippets Groups Projects
Commit 43935dd7 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Make bdbag scripts an input

parent 0ecf1945
Branches
Tags
3 merge requests!37v0.0.1,!13Develop,!12Resolve "Make scripts input files for processes"
Pipeline #5721 passed with stages
in 33 minutes and 55 seconds
......@@ -23,6 +23,9 @@ logsDir = "${outDir}/Logs"
// Define fixed files
derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json")
// Define script files
script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh")
/*
* getData: get bagit file from consortium
*/
......@@ -57,6 +60,7 @@ process getData {
publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.getData.err"
input:
path script_bdbagFetch
path cookies, stageAs: 'deriva-cookies.txt' from bdbag
path bagit
......@@ -79,7 +83,7 @@ process getData {
echo "LOG: \${replicate}" >>${repRID}.getData.err
unzip ${bagit} 2>>${repRID}.getData.err
echo "LOG: replicate bdbag unzipped" >>${repRID}.getData.err
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} ${repRID} 2>>${repRID}.getData.err
sh bdbagFetch.sh \${replicate} ${repRID} 2>>${repRID}.getData.err
echo "LOG: replicate bdbag fetched" >>${repRID}.getData.err
"""
}
......
#!/usr/bin/env python3
import argparse
import pandas as pd
import re
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fetchFile',help="The fetch file from bdgap.zip.",required=True)
args = parser.parse_args()
return args
def main():
args = get_args()
fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
fileFile = pd.read_csv(args.fetchFile+"/data/File.csv",sep=",",header=0)
fileFile_filtered = fileFile[fileFile["File_Type"]=="FastQ"]
fetchFile_filtered = fetchFile[fetchFile[2].str[-9:]==".fastq.gz"]
fetchFile_filtered_renamed = fetchFile_filtered
for i in fileFile_filtered["File_Name"]:
fetchFile_filtered_renamed[2][fetchFile_filtered_renamed[2].str.contains(i,regex=False)] = fetchFile_filtered_renamed[2][fetchFile_filtered_renamed[2].str.contains(i,regex=False)].values[0].replace(re.sub("\.R.\.fastq\.gz","",i),fileFile_filtered["Replicate_RID"][fileFile_filtered["File_Name"]==i].values[0])
fetchFile_filtered_renamed.to_csv(args.fetchFile+"/fetch.txt",sep="\t",header=False,index=False)
if __name__ == '__main__':
main()
\ No newline at end of file
#!/usr/bin/env python3
import argparse
import pandas as pd
import os
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--fetchFile',help="The fetch file from bdgap.zip.",required=True)
args = parser.parse_args()
return args
def main():
args = get_args()
fetchFile = pd.read_csv(args.fetchFile+"/fetch.txt",sep="\t",header=None)
fileFile = pd.read_csv(args.fetchFile+"/data/File.csv",sep=",",header=0)
replicateRID = fileFile.Replicate_RID.unique()
fetchArray = {i:fileFile.URI[(fileFile.Replicate_RID == i) & (fileFile.File_Type == "FastQ")] for i in replicateRID}
for i in replicateRID:
if not os.path.exists(i):
os.mkdir("Replicate_"+i)
fetchFile[fetchFile[0].str.contains('|'.join(fetchArray[i]))].to_csv("Replicate_"+i+"/fetch.txt",sep="\t",header=False,index=False)
if __name__ == '__main__':
main()
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment