Merge branch '11-deriva.upload' into 'develop'

Resolve "process_derivaUpload" Closes #24, #75, and #11 See merge request !53

Merge branch '11-deriva.upload' into 'develop'
Resolve "process_derivaUpload" Closes #24, #75, and #11 See merge request !53
ad5bf09e · Gervaise Henry · 30143e2f · 5e6b9051 · ad5bf09e · ad5bf09e
Commit ad5bf09e authored 4 years ago by Gervaise Henry
--- a/workflow/scripts/inferMeta.sh
+++ b/workflow/scripts/inferMeta.sh
--- a/workflow/scripts/parseMeta.py
+++ b/workflow/scripts/parseMeta.py
--- a/workflow/scripts/splitStudy.py
+++ b/workflow/scripts/splitStudy.py
--- a/workflow/scripts/splitStudy.sh
+++ b/workflow/scripts/splitStudy.sh
--- a/workflow/scripts/tinHist.py
+++ b/workflow/scripts/tinHist.py
@@ -17,7 +17,7 @@ def get_args():

 def main():
    args = get_args()
-    tin = pd.read_csv(args.repRID + '.sorted.deduped.tin.xls',
+    tin = pd.read_csv(args.repRID + '_sorted.deduped.tin.xls',
                      sep="\t", header=0)

    hist = pd.cut(tin['TIN'], bins=pd.interval_range(
@@ -42,8 +42,8 @@ def main():
    hist = hist[['TOTAL'] + [i for i in hist.columns if i != 'TOTAL']]
    hist = hist.T.fillna(0.0).astype(int)
    #hist = hist.apply(lambda x: x/x.sum()*100, axis=1)
-    hist.to_csv(args.repRID + '.tin.hist.tsv', sep='\t')
-    medFile = open(args.repRID + '.tin.med.csv', "w")
+    hist.to_csv(args.repRID + '_tin.hist.tsv', sep='\t')
+    medFile = open(args.repRID + '_tin.med.csv', "w")
    medFile.write(str(round(tin['TIN'][(tin['TIN'] != 0)].median(), 2)))
    medFile.close()


--- a/workflow/scripts/upload_execution_run.py
+++ b/workflow/scripts/upload_execution_run.py
+import argparse
+from deriva.core import ErmrestCatalog, get_credential, BaseCLI
+import sys
+import csv
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-r', '--repRID', help="replicate RID", required=True)
+    parser.add_argument('-w', '--workflowRID', help="workflow RID", required=True)
+    parser.add_argument('-g', '--referenceRID', help="reference genome RID", required=True)
+    parser.add_argument('-i', '--inputBagRID', help="inputBag RID", required=True)
+    parser.add_argument('-n', '--notes', help="notes", default="", required=False)
+    parser.add_argument('-s', '--status', help="run status", default="", required=False)
+    parser.add_argument('-d', '--statusDetail', help="status detail", default="", required=False)
+    parser.add_argument('-o', '--host', help="datahub host", required=True)
+    parser.add_argument('-c', '--cookie', help="cookie token", required=True)
+    parser.add_argument('-u', '--update', help="update?", default="F", required=True)
+    args = parser.parse_args()
+    return args
+
+def main(hostname, catalog_number, credential):
+    catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
+    pb = catalog.getPathBuilder()
+    run_table = pb.RNASeq.Execution_Run
+
+    if args.update == "F":
+        run_data = {
+            "Replicate": args.repRID,
+            "Workflow": args.workflowRID,
+            "Reference_Genome": args.referenceRID,
+            "Input_Bag": args.inputBagRID,
+            "Notes": args.notes,
+            "Execution_Status": args.status,
+            "Execution_Status_Detail": args.statusDetail
+        }
+        entities = run_table.insert([run_data])
+        rid = entities[0]["RID"]
+    else:
+        run_data = {
+            "RID": args.update,
+            "Replicate": args.repRID,
+            "Workflow": args.workflowRID,
+            "Reference_Genome": args.referenceRID,
+            "Input_Bag": args.inputBagRID,
+            "Notes": args.notes,
+            "Execution_Status": args.status,
+            "Execution_Status_Detail": args.statusDetail
+        }
+        entities = run_table.update([run_data])
+        rid = args.update
+    
+
+    print(rid)
+
+
+if __name__ == '__main__':
+    args = get_args()
+    cli = BaseCLI("Custom RNASeq query", None, 1)
+    cli.remove_options(["--config-file"])
+    host = args.host
+    credentials = {"cookie": args.cookie}
+    main(host, 2, credentials)
\ No newline at end of file
--- a/workflow/scripts/upload_input_bag.py
+++ b/workflow/scripts/upload_input_bag.py
+import argparse
+from deriva.core import ErmrestCatalog, get_credential, BaseCLI
+import sys
+import csv
+from datetime import datetime
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', '--file', help="file name", required=True)
+    parser.add_argument('-l', '--loc', help="datahub location", required=True)
+    parser.add_argument('-s', '--md5', help="md5 sum", required=True)
+    parser.add_argument('-b', '--bytes', help="size in bytes", required=True)
+    parser.add_argument('-n', '--notes', help="notes", default="", required=False)
+    parser.add_argument('-o', '--host', help="datahub host", required=True)
+    parser.add_argument('-c', '--cookie', help="cookie token", required=True)
+    args = parser.parse_args()
+    return args
+
+def main(hostname, catalog_number, credential):
+    catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
+    pb = catalog.getPathBuilder()
+    inputBag_table = pb.RNASeq.Input_Bag
+
+    inputBag_data = {
+        "File_Name": args.file,
+        "File_URL": args.loc,
+        "File_MD5": args.md5,
+        "File_Bytes": args.bytes,
+        "File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
+        "Notes": args.notes,
+        "Bag_Type": "Replicate_Input_Seq"
+        }
+
+    entities = inputBag_table.insert([inputBag_data])
+    rid = entities[0]["RID"]
+
+    print(rid)
+
+
+if __name__ == '__main__':
+    args = get_args()
+    cli = BaseCLI("Custom RNASeq query", None, 1)
+    cli.remove_options(["--config-file"])
+    host = args.host
+    credential = {"cookie": args.cookie}
+    main(host, 2, credential)
\ No newline at end of file
--- a/workflow/scripts/upload_output_bag.py
+++ b/workflow/scripts/upload_output_bag.py
+import argparse
+from deriva.core import ErmrestCatalog, get_credential, BaseCLI
+import sys
+import csv
+from datetime import datetime
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-e', '--executionRunRID', help="exection run RID", required=True)
+    parser.add_argument('-f', '--file', help="file name", required=True)
+    parser.add_argument('-l', '--loc', help="datahub location", required=True)
+    parser.add_argument('-s', '--md5', help="md5 sum", required=True)
+    parser.add_argument('-b', '--bytes', help="size in bytes", required=True)
+    parser.add_argument('-n', '--notes', help="notes", default="", required=False)
+    parser.add_argument('-o', '--host', help="datahub host", required=True)
+    parser.add_argument('-c', '--cookie', help="cookie token", required=True)
+    args = parser.parse_args()
+    return args
+
+def main(hostname, catalog_number, credential):
+    catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
+    pb = catalog.getPathBuilder()
+    outputBag_table = pb.RNASeq.Output_Bag
+
+    outputBag_data = {
+        "Execution_Run": args.executionRunRID,
+        "File_Name": args.file,
+        "File_URL": args.loc,
+        "File_MD5": args.md5,
+        "File_Bytes": args.bytes,
+        "File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
+        "Notes": args.notes,
+        "Bag_Type": "mRNA_Replicate_Analysis"
+        }
+
+    entities = outputBag_table.insert([outputBag_data])
+    rid = entities[0]["RID"]
+
+    print(rid)
+
+
+if __name__ == '__main__':
+    args = get_args()
+    cli = BaseCLI("Custom RNASeq query", None, 1)
+    cli.remove_options(["--config-file"])
+    host = args.host
+    credential = {"cookie": args.cookie}
+    main(host, 2, credential)
\ No newline at end of file
--- a/workflow/scripts/upload_qc.py
+++ b/workflow/scripts/upload_qc.py
+import argparse
+from deriva.core import ErmrestCatalog, get_credential, BaseCLI
+import sys
+import csv
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-r', '--repRID', help="replicate RID", required=True)
+    parser.add_argument('-e', '--executionRunRID', help="exection run RID", required=True)
+    parser.add_argument('-p', '--ends', help="single/paired ends", required=True)
+    parser.add_argument('-s', '--stranded', help="stranded?", required=True)
+    parser.add_argument('-l', '--length', help="median read length", required=True)
+    parser.add_argument('-w', '--rawCount', help="raw count", required=True)
+    parser.add_argument('-f', '--assignedCount', help="final assigned count", required=True)
+    parser.add_argument('-n', '--notes', help="notes", default="", required=False)
+    parser.add_argument('-o', '--host', help="datahub host", required=True)
+    parser.add_argument('-c', '--cookie', help="cookie token", required=True)
+    parser.add_argument('-u', '--update', help="update?", default="F", required=True)
+    args = parser.parse_args()
+    return args
+
+def main(hostname, catalog_number, credential):
+    catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
+    pb = catalog.getPathBuilder()
+    run_table = pb.RNASeq.mRNA_QC
+
+    if args.update == "F":
+        run_data = {
+            "Execution_Run": args.executionRunRID,
+            "Replicate": args.repRID,
+            "Paired_End": args.ends,
+            "Strandedness": args.stranded,
+            "Median_Read_Length": args.length,
+            "Raw_Count": args.rawCount,
+            "Final_Count": args.assignedCount,
+            "Notes": args.notes
+        }
+        entities = run_table.insert([run_data])
+        rid = entities[0]["RID"]
+    else:
+        run_data = {
+            "RID": args.update,
+            "Execution_Run": args.executionRunRID,
+            "Replicate": args.repRID,
+            "Paired_End": args.ends,
+            "Strandedness": args.stranded,
+            "Median_Read_Length": args.length,
+            "Raw_Count": args.rawCount,
+            "Final_Count": args.assignedCount,
+            "Notes": args.notes
+        }
+        entities = run_table.update([run_data])
+        rid = args.update
+    
+
+    print(rid)
+
+
+if __name__ == '__main__':
+    args = get_args()
+    cli = BaseCLI("Custom RNASeq query", None, 1)
+    cli.remove_options(["--config-file"])
+    host = args.host
+    credentials = {"cookie": args.cookie}
+    main(host, 2, credentials)
\ No newline at end of file
--- a/workflow/tests/test_consistency.py
+++ b/workflow/tests/test_consistency.py
@@ -18,8 +18,8 @@ def test_consistencySE():
    with open(os.path.join(
        test_output_path, 'SE_multiqc_data.json')) as f:
        assigned_reads_json = json.load(f)
-    assigned_reads = assigned_reads_json['report_general_stats_data'][4]['16-1ZX4']['Assigned']
-    assert  assigned_reads == 7742416
+    assigned_reads = assigned_reads_json['report_general_stats_data'][4]['16-1ZX4_sorted']['Assigned']
+    assert  assigned_reads == 7746121


 @pytest.mark.consistencyPE
@@ -30,5 +30,5 @@ def test_consistencyPE():
    with open(os.path.join(
        test_output_path, 'PE_multiqc_data.json')) as f:
        assigned_reads_json = json.load(f)
-    assigned_reads = assigned_reads_json['report_general_stats_data'][4]['Q-Y5JA']['Assigned']
-    assert  assigned_reads == 2599149
+    assigned_reads = assigned_reads_json['report_general_stats_data'][4]['Q-Y5JA_sorted']['Assigned']
+    assert  assigned_reads == 2596053
--- a/workflow/tests/test_getBag.py
+++ b/workflow/tests/test_getBag.py
@@ -12,4 +12,4 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.getBag
 def test_getBag():
    assert os.path.exists(os.path.join(
-        test_output_path, 'Replicate_Q-Y5F6.zip'))
+        test_output_path, 'Q-Y5F6_inputBag.zip'))
--- a/workflow/tests/test_getData.py
+++ b/workflow/tests/test_getData.py
@@ -12,6 +12,6 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.getData
 def test_getData():
    assert os.path.exists(os.path.join(
-        test_output_path, 'Replicate_Q-Y5F6/bagit.txt'))
+        test_output_path, 'Q-Y5F6_inputBag/bagit.txt'))
    assert os.path.exists(os.path.join(
-        test_output_path, 'Replicate_Q-Y5F6/data/assets/Study/Q-Y4GY/Experiment/Q-Y4DP/Replicate/Q-Y5F6/mMARIS_Six2-#3.gene.rpkm.txt'))
+        test_output_path, 'Q-Y5F6_inputBag/data/assets/Study/Q-Y4GY/Experiment/Q-Y4DP/Replicate/Q-Y5F6/mMARIS_Six2-#3.gene.rpkm.txt'))
--- a/workflow/tests/test_makeFeatureCounts.py
+++ b/workflow/tests/test_makeFeatureCounts.py
@@ -12,8 +12,8 @@ data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.makeFeatureCounts
 def test_makeFeatureCounts():
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.countData'))
+        data_output_path, 'Q-Y5F6_1M.se_countData'))
    assert os.path.exists(os.path.join(
        data_output_path, 'Q-Y5F6_1M.se.countTable.csv'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.tpmTable.csv'))
+        data_output_path, 'Q-Y5F6_1M.se_tpmTable.csv'))
--- a/workflow/tests/test_trimData.py
+++ b/workflow/tests/test_trimData.py
@@ -18,6 +18,6 @@ def test_trimData_se():
 @pytest.mark.trimData
 def test_trimData_pe():
    assert os.path.exists(os.path.join(
-        test_output_path, 'Q-Y5F6_1M.pe_R1_val_1.fq.gz'))
+        test_output_path, 'Q-Y5F6_1M.pe_val_1.fq.gz'))
    assert os.path.exists(os.path.join(
-        test_output_path, 'Q-Y5F6_1M.pe_R2_val_2.fq.gz'))
+        test_output_path, 'Q-Y5F6_1M.pe_val_2.fq.gz'))