From 3aa23f2ae41c2b210ae71c9602db460755f26058 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Thu, 17 Dec 2020 15:00:15 -0600 Subject: [PATCH] Check for old mRNA QC and delete catalog entry --- .gitlab-ci.yml | 4 ++-- workflow/rna-seq.nf | 21 +++++++++++-------- workflow/scripts/deleteEntry.py | 37 +++++++++++++++++++++++++++++++++ workflow/tests/test_getBag.py | 2 +- workflow/tests/test_getData.py | 4 ++-- 5 files changed, 54 insertions(+), 14 deletions(-) create mode 100644 workflow/scripts/deleteEntry.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 29b3c61..a0b183b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -45,8 +45,8 @@ getData: script: - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag --version > version_bdbag.txt - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt - - unzip ./test_data/bag/staging/Replicate_Q-Y5F6.zip - - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST + - unzip ./test_data/bag/staging/Q-Y5F6_inputBag.zip + - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Q-Y5F6_inputBag Q-Y5F6_inputBag TEST - pytest -m getData artifacts: name: "$CI_JOB_NAME" diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index cfc9bd8..4529f8f 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -1459,19 +1459,22 @@ process uploadQC { cookie=\$(cat credential.json | grep -A 1 '\\"${source}\\": {' | grep -o '\\"cookie\\": \\".*\\"') cookie=\${cookie:11:-1} - +singularity run 'docker://gudmaprbk/deriva1.3:1.0.0' python3 ./workflow/scripts/ exist=\$(curl -s https://${source}/ermrest/catalog/2/entity/RNASeq:mRNA_QC/Replicate=${repRID}/Execution_Run=${executionRunRID}) - if [ "\${exist}" == "[]" ] + if [ "\${exist}" != "[]" ] then - qc_rid=\$(python3 uploadQC.py -r ${repRID} -e ${executionRunRID} -p "\${end}" -s ${stranded} -l ${length} -w ${rawCount} -f ${finalCount} -o ${source} -c \${cookie} -u F) - echo LOG: mRNA QC RID uploaded - \${qc_rid} >> ${repRID}.uploadQC.log - else - rid=\$(echo \${exist} | grep -o '\\"RID\\":\\".*\\",\\"RCT') - rid=\${rid:7:-6} - qc_rid=\$(python3 uploadQC.py -r ${repRID} -e ${executionRunRID} -p "\${end}" -s ${stranded} -l ${length} -w ${rawCount} -f ${finalCount} -o ${source} -c \${cookie} -u \${rid}) - echo LOG: mRNA QC RID updated - \${qc_rid} >> ${repRID}.uploadQC.log + rids=\$(echo $exist | grep -o '\\"RID\\":\\".\\{7\\}' | sed 's/^.\\{7\\}//') + for rid in \${rids} + do + python3 deleteEntry.py -r \${rid} -t mRNA_QC -o ${source} -c \${cookie} + echo LOG: old mRNA QC RID deleted - \${rid} >> ${repRID}.uploadQC.log + done + echo LOG: all old mRNA QC RIDs deleted >> ${repRID}.uploadQC.log fi + qc_rid=\$(python3 uploadQC.py -r ${repRID} -e ${executionRunRID} -p "\${end}" -s ${stranded} -l ${length} -w ${rawCount} -f ${finalCount} -o ${source} -c \${cookie} -u F) + echo LOG: mRNA QC RID uploaded - \${qc_rid} >> ${repRID}.uploadQC.log + echo \${qc_rid} > qcRID.csv """ } diff --git a/workflow/scripts/deleteEntry.py b/workflow/scripts/deleteEntry.py new file mode 100644 index 0000000..51d4816 --- /dev/null +++ b/workflow/scripts/deleteEntry.py @@ -0,0 +1,37 @@ +import argparse +from deriva.core import ErmrestCatalog, get_credential, BaseCLI +import sys +import csv + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-r', '--RID', help="replicate RID", required=True) + parser.add_argument('-t', '--table', help="source table", required=True) + parser.add_argument('-o', '--host', help="datahub host", required=True) + parser.add_argument('-c', '--cookie', help="cookie token", required=True) + args = parser.parse_args() + return args + +def main(hostname, catalog_number, credential): + catalog = ErmrestCatalog('https', hostname, catalog_number, credential) + pb = catalog.getPathBuilder() + if args.table == 'mRNA_QC': + run_table = pb.RNASeq.mRNA_QC + elif args.table == "Processed_File": + run_table = pb.RNASeq.Processed_File + + path = run_table.filter(run_table.RID == args.RID) + path.delete() + rid = args.RID + + + print(rid + "deleted") + + +if __name__ == '__main__': + args = get_args() + cli = BaseCLI("Custom RNASeq query", None, 1) + cli.remove_options(["--config-file"]) + host = args.host + credentials = {"cookie": args.cookie} + main(host, 2, credentials) \ No newline at end of file diff --git a/workflow/tests/test_getBag.py b/workflow/tests/test_getBag.py index a99acc6..23bfc0e 100644 --- a/workflow/tests/test_getBag.py +++ b/workflow/tests/test_getBag.py @@ -12,4 +12,4 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.getBag def test_getBag(): assert os.path.exists(os.path.join( - test_output_path, 'Replicate_Q-Y5F6.zip')) + test_output_path, 'Q-Y5F6_inputBag.zip')) diff --git a/workflow/tests/test_getData.py b/workflow/tests/test_getData.py index 95e2018..596a120 100644 --- a/workflow/tests/test_getData.py +++ b/workflow/tests/test_getData.py @@ -12,6 +12,6 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.getData def test_getData(): assert os.path.exists(os.path.join( - test_output_path, 'Replicate_Q-Y5F6/bagit.txt')) + test_output_path, 'Q-Y5F6_inputBag/bagit.txt')) assert os.path.exists(os.path.join( - test_output_path, 'Replicate_Q-Y5F6/data/assets/Study/Q-Y4GY/Experiment/Q-Y4DP/Replicate/Q-Y5F6/mMARIS_Six2-#3.gene.rpkm.txt')) + test_output_path, 'Q-Y5F6_inputBag/data/assets/Study/Q-Y4GY/Experiment/Q-Y4DP/Replicate/Q-Y5F6/mMARIS_Six2-#3.gene.rpkm.txt')) -- GitLab