Merge branch 'develop' into 'master'

Develop See merge request !68

Merge branch 'develop' into 'master'
Develop See merge request !68
70e64daa · Gervaise Henry · 9c0e035b · fca601e4 · 70e64daa · 70e64daa
Commit 70e64daa authored 4 years ago by Gervaise Henry
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
 before_script:
  - module load python/3.6.4-anaconda
-  - pip install --user  attrs==19.1.0 pytest-pythonpath==0.7.1 pytest-cov==2.5.1
+  - pip install --user attrs==20.3.0 pytest==6.2.2 pytest-pythonpath==0.7.3 pytest-cov==2.11.1
  - module load singularity/3.5.3
  - module load nextflow/20.01.0
  - ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ./test_data/
@@ -389,7 +389,7 @@ uploadQC:
      done
      echo all old mRNA QC RIDs deleted
    fi
-      rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BVDJ -p "Single Read" -s forward -l 35 -w 5 -f 1 -t 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
+      rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BVDJ -p "Single End" -s forward -l 35 -w 5 -f 1 -t 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
      echo ${rid} test mRNA QC created

 uploadProcessedFile:
@@ -660,12 +660,13 @@ integration_se:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --upload true -with-dag dag.png --dev false --ci true --email 'venkat.malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./SE_report.html
  - find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
+  - pytest -m completionMultiqc --filename SE_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
@@ -684,12 +685,13 @@ integration_pe:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./PE_report.html
  - find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
+  - pytest -m completionMultiqc --filename PE_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
@@ -710,11 +712,11 @@ failAmbiguousSpecies:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload true -with-dag dag.png --dev false --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failAmbiguousSpecies_report.html
  retry:
    max: 0
    when:
@@ -725,11 +727,11 @@ failTrunkation:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failTrunkation_report.html
  retry:
    max: 0
    when:
@@ -740,11 +742,11 @@ failMismatchR1R2:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failMismatchR1R2_report.html
  retry:
    max: 0
    when:
@@ -755,11 +757,11 @@ failUnexpectedMeta:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failUnexpectedMeta_report.html
  retry:
    max: 0
    when:
@@ -770,11 +772,11 @@ failFileStructure:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failFileStructure_report.html
  retry:
    max: 0
    when:
@@ -785,17 +787,20 @@ override_inputBag:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \;
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true --track false -with-report ./inputBagOverride_report.html
+  - find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_multiqc_data.json \;
+  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./inputBagOverride_multiqc.html \;
+  - pytest -m completionMultiqc --filename inputBagOverride_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
-      - inputBagOverride_PE_multiqc_data.json
+      - inputBagOverride_multiqc_data.json
+      - inputBagOverride_multiqc.html
    expire_in: 7 days
  retry:
    max: 0
@@ -807,17 +812,20 @@ override_fastq:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6  --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \;
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6  --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true --track false -with-report ./fastqOverride_report.html
+  - find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_multiqc_data.json \;
+  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./fastqOverride_multiqc.html \;
+  - pytest -m completionMultiqc --filename fastqOverride_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
-      - fastqOverride_PE_multiqc_data.json
+      - fastqOverride_multiqc_data.json
+      - fastqOverride_multiqc.html
    expire_in: 7 days
  retry:
    max: 0
@@ -829,17 +837,70 @@ override_species:
  only: [merge_requests]
  except:
    variables:
-        - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+  script:
+  - hostname
+  - ulimit -a
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EW --source staging --speciesForce 'Homo sapiens' --upload true --dev false --ci true --track false -with-report ./speciesOverride_report.html
+  - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_multiqc_data.json \;
+  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./speciesOverride_multiqc.html \;
+  - pytest -m completionMultiqc --filename speciesOverride_multiqc_data.json
+  artifacts:
+    name: "$CI_JOB_NAME"
+    when: always
+    paths:
+      - speciesOverride_multiqc_data.json
+      - speciesOverride_multiqc.html
+    expire_in: 7 days
+  retry:
+    max: 0
+    when:
+      - always
+
+override_stranded:
+  stage: integration
+  only: [merge_requests]
+  except:
+    variables:
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
+  script:
+  - hostname
+  - ulimit -a
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EY --source staging --strandedForce unstranded --upload true --dev false --ci true --track false -with-report ./strandedOverride_report.html
+  - find . -type f -name "multiqc_data.json" -exec cp {} ./strandedOverride_multiqc_data.json \;
+  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./strandedOverride_multiqc.html \;
+  - pytest -m completionMultiqc --filename strandedOverride_multiqc_data.json
+  artifacts:
+    name: "$CI_JOB_NAME"
+    when: always
+    paths:
+      - strandedOverride_multiqc_data.json
+      - strandedOverride_multiqc.html
+    expire_in: 7 days
+  retry:
+    max: 0
+    when:
+      - always
+
+override_spike:
+  stage: integration
+  only: [merge_requests]
+  except:
+    variables:
+      - $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
  script:
  - hostname
  - ulimit -a
-  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --speciesForce 'Homo sapiens' --upload false --dev false --ci true
-  - find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \;
+  - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F0 --source staging --spikeForce true --upload true --dev false --ci true --track false -with-report ./spikeOverride_report.html
+  - find . -type f -name "multiqc_data.json" -exec cp {} ./spikeOverride_multiqc_data.json \;
+  - find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./spikeOverride_multiqc.html \;
+  - pytest -m completionMultiqc --filename spikeOverride_multiqc_data.json
  artifacts:
    name: "$CI_JOB_NAME"
    when: always
    paths:
-      - speciesOverride_PE_multiqc_data.json
+      - spikedOverride_multiqc_data.json
+      - spikeOverride_multiqc.html
    expire_in: 7 days
  retry:
    max: 0

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
-# v1.0.3
+# v2.0.0rc01
 **User Facing**
+* Endness metadata "Single Read" changed to "Single End" in data-hub, pipeline updated to handle (#110) ("Single Read" still acceptable for backwards compatibility)
+* Strandedness metadata "yes"/"no" changed to boolean "t"/"f" in data-hub, pipeline updated to handle (#70) ("yes"/"no" still acceptable for backwards compatibility)
+* Upload empty mRNA_QC entry if data error (#111)
+* Allow forcing of strandedness and spike (#100)

 **Background**
 * Add memory limit (75%) per thread for samtools sort (#108)
@@ -15,10 +19,16 @@
 * Detect malformed fastq's (#107)
 * Restrict sampled alignment process to use >32GB nodes on BioHPC (#108)
 * Use nproc**-1** for alignment processes (#108)
+* Data-hub column title change from "Sequencing_Type" to "Experiment_Type" (#114)
+* Data-hub column title change from "Has_Strand_Specific_Information" to "Strandedness" (#115)
+* Merge data error pre-inference execution run upload/finalize to 1 process
+* Change uploadOutputBag logic to change reuse hatrac file if alread exists (re-uses Output_Bag entry by reassigning Execution_Run RID) (#112)
+* Add new CI py tests for override and integration

 *Known Bugs*
 * Override params (inputBag, fastq, species) aren't checked for integrity
 * Authentication files and tokens must be active (active auth client) for the duration of the pipeline run (until long-lived token utilization included)
+* Check for outputBag in hatrac doesn't check for any uploaded by chaise

 <hr>


--- a/README.md
+++ b/README.md
@@ -57,8 +57,12 @@ To Run:
    * eg: `--inputBagForce test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip` (must be the expected bag structure, this example will not work because it is a test bag)
  * `--fastqsForce` utilizes local fastq's instead of downloading from the data-hub (still requires accurate repRID input)
    * eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order)
-  * `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses ambiguous species error
+  * `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses a metadata mismatch or an ambiguous species error
    * eg: `--speciesForce 'Mus musculus'`
+  * `--strandedForce` forces the strandedness to be "forward", "reverse" or "unstranded", it bypasses a metadata mismatch error
+    * eg: `--strandedForce 'unstranded'`
+  * `--spikeForce` forces the spike-in to be "false" or "true", it bypasses a metadata mismatch error
+    * eg: `--spikeForce 'true'`
 * Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)):
  * `--ci` boolean (default = false)
  * `--dev` boolean (default = true)

--- a/conftest.py
+++ b/conftest.py
+#!/usr/bin/env python3
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption("--filename", action="store")
+
+@pytest.fixture(scope='session')
+def filename(request):
+    filename_value = request.config.option.filename
+    if filename_value is None:
+        pytest.skip()
+    return filename_value
\ No newline at end of file
--- a/docs/dag.png
+++ b/docs/dag.png
--- a/test_data/Replicate_For_Input_Bag(test).json
+++ b/test_data/Replicate_For_Input_Bag(test).json
@@ -19,7 +19,7 @@
        "processor": "csv",
        "processor_params": {
          "output_path": "Experiment",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Sequencing_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
        }
      },
      {
@@ -40,7 +40,7 @@
        "processor": "csv",
        "processor_params": {
          "output_path": "Experiment Settings",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Has_Strand_Specific_Information,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
        }
      },
      {

--- a/test_data/createTestData.sh
+++ b/test_data/createTestData.sh
@@ -41,8 +41,8 @@ cp Q-Y5F6_1M.R1.fastq.gz_trimming_report.txt ./NEW_test_data/meta/Q-Y5F6_1M.R1.f
 cp Q-Y5F6_1M.R2.fastq.gz_trimming_report.txt ./NEW_test_data/meta/Q-Y5F6_1M.R2.fastq.gz_trimming_report.txt

 touch metaTest.csv
-echo 'Replicate_RID,Experiment_RID,Study_RID,Paired_End,File_Type,Has_Strand_Specific_Information,Used_Spike_Ins,Species,Read_Length' > metaTest.csv
-echo 'Replicate_RID,Experiment_RID,Study_RID,uk,FastQ,no,no,Homo sapiens,75' >> metaTest.csv
+echo 'Replicate_RID,Experiment_RID,Study_RID,Paired_End,File_Type,Strandedness,Used_Spike_Ins,Species,Read_Length' > metaTest.csv
+echo 'Replicate_RID,Experiment_RID,Study_RID,uk,FastQ,unstranded,f,Homo sapiens,75' >> metaTest.csv
 cp metaTest.csv ./NEW_test_data/meta/metaTest.csv

 mkdir -p ./NEW_test_data/bam

--- a/workflow/conf/Replicate_For_Input_Bag.json
+++ b/workflow/conf/Replicate_For_Input_Bag.json
@@ -19,7 +19,7 @@
        "processor": "csv",
        "processor_params": {
          "output_path": "Experiment",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Sequencing_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
        }
      },
      {
@@ -40,7 +40,7 @@
        "processor": "csv",
        "processor_params": {
          "output_path": "Experiment Settings",
-          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Has_Strand_Specific_Information,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
+          "query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
        }
      },
      {

--- a/workflow/conf/aws.config
+++ b/workflow/conf/aws.config
@@ -112,20 +112,15 @@ process {
    cpus = 1
    memory = '1 GB'
  }
-  withName:failPreExecutionRun_fastq {
+  withName:failPreExecutionRun {
    cpus = 1
    memory = '1 GB'
  }
-  withName:failPreExecutionRun_fastqFile {
-    cpus = 1
-    memory = '1 GB'
-  }
-  withName:failPreExecutionRun_species {
- {
+  withName:failExecutionRun {
    cpus = 1
    memory = '1 GB'
  }
-  withName:failExecutionRun {
+  withName:uploadQC_fail {
    cpus = 1
    memory = '1 GB'
  }

--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -82,16 +82,13 @@ process {
  withName:finalizeExecutionRun {
    executor = 'local'
  }
-  withName:failPreExecutionRun_fastq {
+  withName:failPreExecutionRun {
    executor = 'local'
  }
-  withName:failPreExecutionRun_fastqFile {
-    executor = 'local'
-  }
-  withName:failPreExecutionRun_species {
+  withName:failExecutionRun {
    executor = 'local'
  }
-  withName:failExecutionRun {
+  withName:uploadQC_fail {
    executor = 'local'
  }
 }

--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -88,16 +88,13 @@ process {
  withName:finalizeExecutionRun {
    container = 'gudmaprbk/deriva1.4:1.0.0'
  }
-  withName:failPreExecutionRun_fastq {
+  withName:failPreExecutionRun {
    container = 'gudmaprbk/deriva1.4:1.0.0'
  }
-  withName:failPreExecutionRun_fastqFile {
-    container = 'gudmaprbk/deriva1.4:1.0.0'
-  }
-  withName:failPreExecutionRun_species {
+  withName:failExecutionRun {
    container = 'gudmaprbk/deriva1.4:1.0.0'
  }
-  withName:failExecutionRun {
+  withName:uploadQC_fail {
    container = 'gudmaprbk/deriva1.4:1.0.0'
  }
 }
@@ -128,6 +125,6 @@ manifest {
  homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
  description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
  mainScript = 'rna-seq.nf'
-  version = 'v1.0.3'
+  version = 'v2.0.0rc01'
  nextflowVersion = '>=19.09.0'
 }
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
--- a/workflow/scripts/parse_meta.py
+++ b/workflow/scripts/parse_meta.py
@@ -63,32 +63,17 @@ def main():

    # Get strandedness metadata from 'Experiment Settings.csv'
    if (args.parameter == "stranded"):
-        if (metaFile.Has_Strand_Specific_Information.unique() == "yes"):
-            stranded = "stranded"
-        elif (metaFile.Has_Strand_Specific_Information.unique() == "no"):
-            stranded = "unstranded"
-        else:
-            stranded = metaFile.Has_Strand_Specific_Information.unique()[0]
+        stranded = metaFile.Strandedness.unique()[0]
        print(stranded)

    # Get spike-in metadata from 'Experiment Settings.csv'
    if (args.parameter == "spike"):
-        if (metaFile.Used_Spike_Ins.unique() == "yes"):
-            spike = "yes"
-        elif (metaFile.Used_Spike_Ins.unique() == "no"):
-            spike = "no"
-        else:
-            spike = metaFile.Used_Spike_Ins.unique()[0]
+        spike = metaFile.Used_Spike_Ins.unique()[0]
        print(spike)

    # Get species metadata from 'Experiment.csv'
    if (args.parameter == "species"):
-        if (metaFile.Species.unique() == "Mus musculus"):
-            species = "Mus musculus"
-        elif (metaFile.Species.unique() == "Homo sapiens"):
-            species = "Homo sapiens"
-        else:
-            species = metaFile.Species.unique()[0]
+        species = metaFile.Species.unique()[0]
        print(species)

    # Get read length metadata from 'Experiment Settings.csv'

--- a/workflow/scripts/upload_execution_run.py
+++ b/workflow/scripts/upload_execution_run.py
@@ -48,7 +48,6 @@ def main(hostname, catalog_number, credential):
        }
        entities = run_table.update([run_data])
        rid = args.update
-    

    print(rid)


--- a/workflow/scripts/upload_output_bag.py
+++ b/workflow/scripts/upload_output_bag.py
@@ -14,6 +14,7 @@ def get_args():
    parser.add_argument('-n', '--notes', help="notes", default="", required=False)
    parser.add_argument('-o', '--host', help="datahub host", required=True)
    parser.add_argument('-c', '--cookie', help="cookie token", required=True)
+    parser.add_argument('-u', '--update', help="update?", default="F", required=True)
    args = parser.parse_args()
    return args

@@ -22,19 +23,27 @@ def main(hostname, catalog_number, credential):
    pb = catalog.getPathBuilder()
    outputBag_table = pb.RNASeq.Output_Bag

-    outputBag_data = {
-        "Execution_Run": args.executionRunRID,
-        "File_Name": args.file,
-        "File_URL": args.loc,
-        "File_MD5": args.md5,
-        "File_Bytes": args.bytes,
-        "File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
-        "Notes": args.notes,
-        "Bag_Type": "mRNA_Replicate_Analysis"
+    if args.update == "F":
+        outputBag_data = {
+            "Execution_Run": args.executionRunRID,
+            "File_Name": args.file,
+            "File_URL": args.loc,
+            "File_MD5": args.md5,
+            "File_Bytes": args.bytes,
+            "File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
+            "Notes": args.notes,
+            "Bag_Type": "mRNA_Replicate_Analysis"
        }
+        entities = outputBag_table.insert([outputBag_data])
+        rid = entities[0]["RID"]

-    entities = outputBag_table.insert([outputBag_data])
-    rid = entities[0]["RID"]
+    else:
+        outputBag_data = {
+            "RID": args.update,
+            "Execution_Run": args.executionRunRID
+        }
+        entities = outputBag_table.insert([outputBag_data])
+        rid = entities[0]["RID"]

    print(rid)


--- a/workflow/scripts/upload_qc.py
+++ b/workflow/scripts/upload_qc.py
@@ -7,12 +7,12 @@ def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('-r', '--repRID', help="replicate RID", required=True)
    parser.add_argument('-e', '--executionRunRID', help="exection run RID", required=True)
-    parser.add_argument('-p', '--ends', help="single/paired ends", required=True)
-    parser.add_argument('-s', '--stranded', help="stranded?", required=True)
-    parser.add_argument('-l', '--length', help="median read length", required=True)
-    parser.add_argument('-w', '--rawCount', help="raw count", required=True)
-    parser.add_argument('-f', '--assignedCount', help="final assigned count", required=True)
-    parser.add_argument('-t', '--tin', help="median TIN", required=True)
+    parser.add_argument('-p', '--ends', help="single/paired ends", required=False)
+    parser.add_argument('-s', '--stranded', help="stranded?", required=False)
+    parser.add_argument('-l', '--length', help="median read length", required=False)
+    parser.add_argument('-w', '--rawCount', help="raw count", required=False)
+    parser.add_argument('-f', '--assignedCount', help="final assigned count", required=False)
+    parser.add_argument('-t', '--tin', help="median TIN", required=False)
    parser.add_argument('-n', '--notes', help="notes", default="", required=False)
    parser.add_argument('-o', '--host', help="datahub host", required=True)
    parser.add_argument('-c', '--cookie', help="cookie token", required=True)
@@ -39,6 +39,13 @@ def main(hostname, catalog_number, credential):
        }
        entities = run_table.insert([run_data])
        rid = entities[0]["RID"]
+    elif args.update == "E":
+        run_data = {
+            "Execution_Run": args.executionRunRID,
+            "Replicate": args.repRID
+        }
+        entities = run_table.insert([run_data])
+        rid = entities[0]["RID"]
    else:
        run_data = {
            "RID": args.update,

--- a/workflow/tests/test_alignReads.py
+++ b/workflow/tests/test_alignReads.py
@@ -5,25 +5,25 @@ import pandas as pd
 import os
 import utils

-data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
    '/../../'


 @pytest.mark.alignData
 def test_alignData_se():
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.unal.gz'))
+        test_output_path, 'Q-Y5F6_1M.se.unal.gz'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.bam'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.bam'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.bam.bai'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.bam.bai'))


 @pytest.mark.alignData
 def test_alignData_pe():
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.pe.unal.gz'))
+        test_output_path, 'Q-Y5F6_1M.pe.unal.gz'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.pe.sorted.bam'))
+        test_output_path, 'Q-Y5F6_1M.pe.sorted.bam'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.pe.sorted.bam.bai'))
+        test_output_path, 'Q-Y5F6_1M.pe.sorted.bam.bai'))
--- a/workflow/tests/test_completion.py
+++ b/workflow/tests/test_completion.py
+#!/usr/bin/env python3
+
+import pytest
+import pandas as pd
+from io import StringIO
+import os
+import json
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+    '/../../'
+
+@pytest.mark.completionMultiqc
+def test_multiqcExist(filename):
+    assert os.path.exists(os.path.join(
+        test_output_path, filename))
\ No newline at end of file
--- a/workflow/tests/test_dedupReads.py
+++ b/workflow/tests/test_dedupReads.py
@@ -5,25 +5,25 @@ import pandas as pd
 import os
 import utils

-data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
    '/../../'


 @pytest.mark.dedupData
 def test_dedupData():
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.deduped.bam'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.deduped.bam'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.deduped.bam.bai'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.deduped.bam.bai'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chr8.bam'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chr8.bam'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chr8.bam.bai'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chr8.bam.bai'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chr4.bam'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chr4.bam'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chr4.bam.bai'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chr4.bam.bai'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chrY.bam'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chrY.bam'))
    assert os.path.exists(os.path.join(
-        data_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chrY.bam.bai'))
+        test_output_path, 'Q-Y5F6_1M.se.sorted.deduped.chrY.bam.bai'))
--- a/workflow/tests/test_makeBigWig.py
+++ b/workflow/tests/test_makeBigWig.py
@@ -5,10 +5,10 @@ import pandas as pd
 import os
 import utils

-data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
    '/../../'


 @pytest.mark.makeBigWig
 def test_makeBigWig():
-    assert os.path.exists(os.path.join(data_output_path, 'Q-Y5F6_1M.se.bw'))
+    assert os.path.exists(os.path.join(test_output_path, 'Q-Y5F6_1M.se.bw'))