Skip to content
Snippets Groups Projects
Commit ad5bf09e authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '11-deriva.upload' into 'develop'

Resolve "process_derivaUpload"

Closes #24, #75, and #11

See merge request !53
parents 30143e2f 5e6b9051
2 merge requests!58Develop,!53Resolve "process_derivaUpload"
Pipeline #8733 failed with stages
in 8 minutes and 17 seconds
File moved
File moved
File moved
File moved
......@@ -17,7 +17,7 @@ def get_args():
def main():
args = get_args()
tin = pd.read_csv(args.repRID + '.sorted.deduped.tin.xls',
tin = pd.read_csv(args.repRID + '_sorted.deduped.tin.xls',
sep="\t", header=0)
hist = pd.cut(tin['TIN'], bins=pd.interval_range(
......@@ -42,8 +42,8 @@ def main():
hist = hist[['TOTAL'] + [i for i in hist.columns if i != 'TOTAL']]
hist = hist.T.fillna(0.0).astype(int)
#hist = hist.apply(lambda x: x/x.sum()*100, axis=1)
hist.to_csv(args.repRID + '.tin.hist.tsv', sep='\t')
medFile = open(args.repRID + '.tin.med.csv', "w")
hist.to_csv(args.repRID + '_tin.hist.tsv', sep='\t')
medFile = open(args.repRID + '_tin.med.csv', "w")
medFile.write(str(round(tin['TIN'][(tin['TIN'] != 0)].median(), 2)))
medFile.close()
......
import argparse
from deriva.core import ErmrestCatalog, get_credential, BaseCLI
import sys
import csv
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-r', '--repRID', help="replicate RID", required=True)
parser.add_argument('-w', '--workflowRID', help="workflow RID", required=True)
parser.add_argument('-g', '--referenceRID', help="reference genome RID", required=True)
parser.add_argument('-i', '--inputBagRID', help="inputBag RID", required=True)
parser.add_argument('-n', '--notes', help="notes", default="", required=False)
parser.add_argument('-s', '--status', help="run status", default="", required=False)
parser.add_argument('-d', '--statusDetail', help="status detail", default="", required=False)
parser.add_argument('-o', '--host', help="datahub host", required=True)
parser.add_argument('-c', '--cookie', help="cookie token", required=True)
parser.add_argument('-u', '--update', help="update?", default="F", required=True)
args = parser.parse_args()
return args
def main(hostname, catalog_number, credential):
catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
pb = catalog.getPathBuilder()
run_table = pb.RNASeq.Execution_Run
if args.update == "F":
run_data = {
"Replicate": args.repRID,
"Workflow": args.workflowRID,
"Reference_Genome": args.referenceRID,
"Input_Bag": args.inputBagRID,
"Notes": args.notes,
"Execution_Status": args.status,
"Execution_Status_Detail": args.statusDetail
}
entities = run_table.insert([run_data])
rid = entities[0]["RID"]
else:
run_data = {
"RID": args.update,
"Replicate": args.repRID,
"Workflow": args.workflowRID,
"Reference_Genome": args.referenceRID,
"Input_Bag": args.inputBagRID,
"Notes": args.notes,
"Execution_Status": args.status,
"Execution_Status_Detail": args.statusDetail
}
entities = run_table.update([run_data])
rid = args.update
print(rid)
if __name__ == '__main__':
args = get_args()
cli = BaseCLI("Custom RNASeq query", None, 1)
cli.remove_options(["--config-file"])
host = args.host
credentials = {"cookie": args.cookie}
main(host, 2, credentials)
\ No newline at end of file
import argparse
from deriva.core import ErmrestCatalog, get_credential, BaseCLI
import sys
import csv
from datetime import datetime
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--file', help="file name", required=True)
parser.add_argument('-l', '--loc', help="datahub location", required=True)
parser.add_argument('-s', '--md5', help="md5 sum", required=True)
parser.add_argument('-b', '--bytes', help="size in bytes", required=True)
parser.add_argument('-n', '--notes', help="notes", default="", required=False)
parser.add_argument('-o', '--host', help="datahub host", required=True)
parser.add_argument('-c', '--cookie', help="cookie token", required=True)
args = parser.parse_args()
return args
def main(hostname, catalog_number, credential):
catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
pb = catalog.getPathBuilder()
inputBag_table = pb.RNASeq.Input_Bag
inputBag_data = {
"File_Name": args.file,
"File_URL": args.loc,
"File_MD5": args.md5,
"File_Bytes": args.bytes,
"File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
"Notes": args.notes,
"Bag_Type": "Replicate_Input_Seq"
}
entities = inputBag_table.insert([inputBag_data])
rid = entities[0]["RID"]
print(rid)
if __name__ == '__main__':
args = get_args()
cli = BaseCLI("Custom RNASeq query", None, 1)
cli.remove_options(["--config-file"])
host = args.host
credential = {"cookie": args.cookie}
main(host, 2, credential)
\ No newline at end of file
import argparse
from deriva.core import ErmrestCatalog, get_credential, BaseCLI
import sys
import csv
from datetime import datetime
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--executionRunRID', help="exection run RID", required=True)
parser.add_argument('-f', '--file', help="file name", required=True)
parser.add_argument('-l', '--loc', help="datahub location", required=True)
parser.add_argument('-s', '--md5', help="md5 sum", required=True)
parser.add_argument('-b', '--bytes', help="size in bytes", required=True)
parser.add_argument('-n', '--notes', help="notes", default="", required=False)
parser.add_argument('-o', '--host', help="datahub host", required=True)
parser.add_argument('-c', '--cookie', help="cookie token", required=True)
args = parser.parse_args()
return args
def main(hostname, catalog_number, credential):
catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
pb = catalog.getPathBuilder()
outputBag_table = pb.RNASeq.Output_Bag
outputBag_data = {
"Execution_Run": args.executionRunRID,
"File_Name": args.file,
"File_URL": args.loc,
"File_MD5": args.md5,
"File_Bytes": args.bytes,
"File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
"Notes": args.notes,
"Bag_Type": "mRNA_Replicate_Analysis"
}
entities = outputBag_table.insert([outputBag_data])
rid = entities[0]["RID"]
print(rid)
if __name__ == '__main__':
args = get_args()
cli = BaseCLI("Custom RNASeq query", None, 1)
cli.remove_options(["--config-file"])
host = args.host
credential = {"cookie": args.cookie}
main(host, 2, credential)
\ No newline at end of file
import argparse
from deriva.core import ErmrestCatalog, get_credential, BaseCLI
import sys
import csv
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-r', '--repRID', help="replicate RID", required=True)
parser.add_argument('-e', '--executionRunRID', help="exection run RID", required=True)
parser.add_argument('-p', '--ends', help="single/paired ends", required=True)
parser.add_argument('-s', '--stranded', help="stranded?", required=True)
parser.add_argument('-l', '--length', help="median read length", required=True)
parser.add_argument('-w', '--rawCount', help="raw count", required=True)
parser.add_argument('-f', '--assignedCount', help="final assigned count", required=True)
parser.add_argument('-n', '--notes', help="notes", default="", required=False)
parser.add_argument('-o', '--host', help="datahub host", required=True)
parser.add_argument('-c', '--cookie', help="cookie token", required=True)
parser.add_argument('-u', '--update', help="update?", default="F", required=True)
args = parser.parse_args()
return args
def main(hostname, catalog_number, credential):
catalog = ErmrestCatalog('https', hostname, catalog_number, credential)
pb = catalog.getPathBuilder()
run_table = pb.RNASeq.mRNA_QC
if args.update == "F":
run_data = {
"Execution_Run": args.executionRunRID,
"Replicate": args.repRID,
"Paired_End": args.ends,
"Strandedness": args.stranded,
"Median_Read_Length": args.length,
"Raw_Count": args.rawCount,
"Final_Count": args.assignedCount,
"Notes": args.notes
}
entities = run_table.insert([run_data])
rid = entities[0]["RID"]
else:
run_data = {
"RID": args.update,
"Execution_Run": args.executionRunRID,
"Replicate": args.repRID,
"Paired_End": args.ends,
"Strandedness": args.stranded,
"Median_Read_Length": args.length,
"Raw_Count": args.rawCount,
"Final_Count": args.assignedCount,
"Notes": args.notes
}
entities = run_table.update([run_data])
rid = args.update
print(rid)
if __name__ == '__main__':
args = get_args()
cli = BaseCLI("Custom RNASeq query", None, 1)
cli.remove_options(["--config-file"])
host = args.host
credentials = {"cookie": args.cookie}
main(host, 2, credentials)
\ No newline at end of file
......@@ -18,8 +18,8 @@ def test_consistencySE():
with open(os.path.join(
test_output_path, 'SE_multiqc_data.json')) as f:
assigned_reads_json = json.load(f)
assigned_reads = assigned_reads_json['report_general_stats_data'][4]['16-1ZX4']['Assigned']
assert assigned_reads == 7742416
assigned_reads = assigned_reads_json['report_general_stats_data'][4]['16-1ZX4_sorted']['Assigned']
assert assigned_reads == 7746121
@pytest.mark.consistencyPE
......@@ -30,5 +30,5 @@ def test_consistencyPE():
with open(os.path.join(
test_output_path, 'PE_multiqc_data.json')) as f:
assigned_reads_json = json.load(f)
assigned_reads = assigned_reads_json['report_general_stats_data'][4]['Q-Y5JA']['Assigned']
assert assigned_reads == 2599149
assigned_reads = assigned_reads_json['report_general_stats_data'][4]['Q-Y5JA_sorted']['Assigned']
assert assigned_reads == 2596053
......@@ -12,4 +12,4 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.getBag
def test_getBag():
assert os.path.exists(os.path.join(
test_output_path, 'Replicate_Q-Y5F6.zip'))
test_output_path, 'Q-Y5F6_inputBag.zip'))
......@@ -12,6 +12,6 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.getData
def test_getData():
assert os.path.exists(os.path.join(
test_output_path, 'Replicate_Q-Y5F6/bagit.txt'))
test_output_path, 'Q-Y5F6_inputBag/bagit.txt'))
assert os.path.exists(os.path.join(
test_output_path, 'Replicate_Q-Y5F6/data/assets/Study/Q-Y4GY/Experiment/Q-Y4DP/Replicate/Q-Y5F6/mMARIS_Six2-#3.gene.rpkm.txt'))
test_output_path, 'Q-Y5F6_inputBag/data/assets/Study/Q-Y4GY/Experiment/Q-Y4DP/Replicate/Q-Y5F6/mMARIS_Six2-#3.gene.rpkm.txt'))
......@@ -12,8 +12,8 @@ data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.makeFeatureCounts
def test_makeFeatureCounts():
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.se.countData'))
data_output_path, 'Q-Y5F6_1M.se_countData'))
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.se.countTable.csv'))
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.se.tpmTable.csv'))
data_output_path, 'Q-Y5F6_1M.se_tpmTable.csv'))
......@@ -18,6 +18,6 @@ def test_trimData_se():
@pytest.mark.trimData
def test_trimData_pe():
assert os.path.exists(os.path.join(
test_output_path, 'Q-Y5F6_1M.pe_R1_val_1.fq.gz'))
test_output_path, 'Q-Y5F6_1M.pe_val_1.fq.gz'))
assert os.path.exists(os.path.join(
test_output_path, 'Q-Y5F6_1M.pe_R2_val_2.fq.gz'))
test_output_path, 'Q-Y5F6_1M.pe_val_2.fq.gz'))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment