Skip to content
Snippets Groups Projects
Commit eb83dee4 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add uploadInputBag

parent dcda53fd
Branches
Tags
2 merge requests!58Develop,!53Resolve "process_derivaUpload"
# v0.0.4 (in development)
# v1.0.0 (in development)
**User Facing**
* Add option to pull references from datahub
* Add option to send email on workflow error, with pipeline error message
* Add versions and paper references of software used to report
* Upload input bag
**Background**
* Remove (comment out) option to pull references from S3
......
......@@ -88,4 +88,8 @@ process {
cpus = 1
memory = '1 GB'
}
withName: uploadInputBag {
cpus = 1
memory = '1 GB'
}
}
......@@ -61,6 +61,9 @@ process {
withName: outputBag {
executor = 'local'
}
withName: uploadInputBag {
executor = 'local'
}
}
singularity {
......
......@@ -70,6 +70,9 @@ process {
withName:outputBag {
container = 'bicf/gudmaprbkfilexfer:2.0.1_indev'
}
withName:uploadInputBag {
container = 'gudmaprbk/deriva1.3:1.0.0'
}
}
trace {
......@@ -98,6 +101,6 @@ manifest {
homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
mainScript = 'rna-seq.nf'
version = 'v0.0.4_indev'
version = 'v1.0.0_indev'
nextflowVersion = '>=19.09.0'
}
......@@ -36,6 +36,7 @@ deriva.into {
deriva_getBag
deriva_getRefInfer
deriva_getRef
deriva_uploadInputBag
}
bdbag = Channel
.fromPath(params.bdbag)
......@@ -82,7 +83,7 @@ script_refData = Channel.fromPath("${baseDir}/scripts/extractRefData.py")
script_calculateTPM = Channel.fromPath("${baseDir}/scripts/calculateTPM.R")
script_convertGeneSymbols = Channel.fromPath("${baseDir}/scripts/convertGeneSymbols.R")
script_tinHist = Channel.fromPath("${baseDir}/scripts/tinHist.py")
script_uploadInputBag = Channel.fromPath("${baseDir}/scripts/uploadInputBag.py")
/*
* trackStart: track start of pipeline
......@@ -177,6 +178,10 @@ if (inputBagForce != "") {
} else {
inputBag = bag
}
inputBag.into {
inputBag_getData
inputBag_uploadInputBag
}
/*
* getData: fetch study files from consortium with downloaded bdbag.zip
......@@ -187,7 +192,7 @@ process getData {
input:
path script_bdbagFetch
path cookies, stageAs: "deriva-cookies.txt" from bdbag
path inputBag
path inputBag from inputBag_getData
output:
path ("*.R{1,2}.fastq.gz") into fastqs
......@@ -1267,6 +1272,67 @@ process outputBag {
"""
}
/*
* uploadInputBag: uploads the input bag
*/
process uploadInputBag {
tag "${repRID}"
input:
path script_uploadInputBag
path inputBag from inputBag_uploadInputBag
path credential, stageAs: "credential.json" from deriva_uploadInputBag
output:
path ("inputBagRID.csv") into inputBagRIDfl
script:
"""
hostname > ${repRID}.uploadInputBag.log
ulimit -a >> ${repRID}.uploadInputBag.log
yr=\$(date +'%Y')
mn=\$(date +'%m')
dy=\$(date +'%d')
hatrac=\$(deriva-hatrac-cli --host dev.gudmap.org ls /hatrac/resources/rnaseq/pipeline/input_bag/ | grep -o \${yr}_\${mn}_\${dy})
if [ -z "\${hatrac}" ]
then
deriva-hatrac-cli --host ${source} mkdir /hatrac/resources/rnaseq/pipeline/input_bag/\${yr}_\${mn}_\${dy}
echo LOG: hatrac folder created - /hatrac/resources/rnaseq/pipeline/input_bag/\${yr}_\${mn}_\${dy} >> ${repRID}.uploadInputBag.log
else
echo LOG: hatrac folder already exists - /hatrac/resources/rnaseq/pipeline/input_bag/\${yr}_\${mn}_\${dy} >> ${repRID}.uploadInputBag.log
fi
file=\$(basename -a ${inputBag})
md5=\$(md5sum ./\${file} | awk '{ print \$1 }')
echo LOG: ${repRID} input bag md5 sum - \${md5} >> ${repRID}.uploadInputBag.log
size=\$(wc -c < ./\${file})
echo LOG: ${repRID} input bag size - \${size} bytes >> ${repRID}.uploadInputBag.log
exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:Input_Bag/File_MD5=\${md5})
if [ "\${exist}" == "[]" ]
then
cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"')
cookie=\${cookie:11:-1}
loc=\$(deriva-hatrac-cli --host ${source} put ./\${file} /hatrac/resources/rnaseq/pipeline/input_bag/\${yr}_\${mn}_\${dy}/\${file})
inputBag_rid=\$(python3 uploadInputBag.py -f \${file} -l \${loc} -s \${md5} -b \${size} -o ${source} -c \${cookie})
echo LOG: input bag RID uploaded - \${inputBag_rid} >> ${repRID}.uploadInputBag.log
rid=\${inputBag_rid}
else
exist=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT')
exist=\${exist:8:-6}
echo LOG: input bag RID already exists - \${exist} >> ${repRID}.uploadInputBag.log
rid=\${exist}
fi
echo \${rid} > inputBagRID.csv
"""
}
inputBagRID = Channel.create()
inputBagRIDfl.splitCsv(sep: ",", header: false).separate(
inputBagRID
)
workflow.onError = {
subject = "$workflow.manifest.name FAILED: $params.repRID"
......
import argparse
from deriva.core import ErmrestCatalog, get_credential, BaseCLI
import sys
import csv
from datetime import datetime
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--file', help="file", required=True)
parser.add_argument('-l', '--loc', help="location", required=True)
parser.add_argument('-s', '--md5', help="md5", required=True)
parser.add_argument('-b', '--bytes', help="bytes", required=True)
parser.add_argument('-o', '--host', help="bytes", required=True)
parser.add_argument('-c', '--cookie', help="bytes", required=True)
args = parser.parse_args()
return args
def main(hostname, catalog_number, credential):
catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
pb = catalog.getPathBuilder()
inputBag_table = pb.RNASeq.Input_Bag
inputBag_data = {
"File_Name": args.file,
"File_URL": args.loc,
"File_MD5": args.md5,
"File_Bytes": args.bytes,
"File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
"Notes": "TEST",
"Bag_Type": "Replicate_Input_Seq"
}
entities = inputBag_table.insert([inputBag_data])
rid = entities[0]["RID"]
print(rid)
if __name__ == '__main__':
args = get_args()
cli = BaseCLI("Custom RNASeq query", None, 1)
cli.remove_options(["--config-file"])
host = args.host
credential = {"cookie": args.cookie}
main(host, 2, credential)
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment