Commit fca601e4 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '110-SE' into 'develop'

Resolve "Change Single Read to Single End for submitted endness"

Closes #95, #100, #112, #111, #70, #115, #114, and #110

See merge request !67
parents ef06aea5 9cccaef7
Pipeline #9206 passed with stages
in 41 seconds
before_script:
- module load python/3.6.4-anaconda
- pip install --user attrs==19.1.0 pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- pip install --user attrs==20.3.0 pytest==6.2.2 pytest-pythonpath==0.7.3 pytest-cov==2.11.1
- module load singularity/3.5.3
- module load nextflow/20.01.0
- ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ./test_data/
......@@ -389,7 +389,7 @@ uploadQC:
done
echo all old mRNA QC RIDs deleted
fi
rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BVDJ -p "Single Read" -s forward -l 35 -w 5 -f 1 -t 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
rid=$(singularity run 'docker://gudmaprbk/deriva1.4:1.0.0' python3 ./workflow/scripts/upload_qc.py -r 17-BTFJ -e 17-BVDJ -p "Single End" -s forward -l 35 -w 5 -f 1 -t 1 -n "This is a test mRNA QC" -o staging.gudmap.org -c ${cookie} -u F)
echo ${rid} test mRNA QC created
uploadProcessedFile:
......@@ -660,12 +660,13 @@ integration_se:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --upload true -with-dag dag.png --dev false --ci true --email 'venkat.malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./SE_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
- pytest -m completionMultiqc --filename SE_multiqc_data.json
artifacts:
name: "$CI_JOB_NAME"
when: always
......@@ -684,12 +685,13 @@ integration_pe:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./PE_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
- pytest -m completionMultiqc --filename PE_multiqc_data.json
artifacts:
name: "$CI_JOB_NAME"
when: always
......@@ -710,11 +712,11 @@ failAmbiguousSpecies:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failAmbiguousSpecies_report.html
retry:
max: 0
when:
......@@ -725,11 +727,11 @@ failTrunkation:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ET --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failTrunkation_report.html
retry:
max: 0
when:
......@@ -740,11 +742,11 @@ failMismatchR1R2:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-CWH4 --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failMismatchR1R2_report.html
retry:
max: 0
when:
......@@ -755,11 +757,11 @@ failUnexpectedMeta:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 14-3R4R --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failUnexpectedMeta_report.html
retry:
max: 0
when:
......@@ -770,11 +772,11 @@ failFileStructure:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5HT --source staging --upload true -with-dag dag.png --dev false --ci true --track true -with-report ./failFileStructure_report.html
retry:
max: 0
when:
......@@ -785,17 +787,20 @@ override_inputBag:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_PE_multiqc_data.json \;
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --inputBagForce ./test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip --upload false --dev false --ci true --track false -with-report ./inputBagOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./inputBagOverride_multiqc_data.json \;
- find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./inputBagOverride_multiqc.html \;
- pytest -m completionMultiqc --filename inputBagOverride_multiqc_data.json
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- inputBagOverride_PE_multiqc_data.json
- inputBagOverride_multiqc_data.json
- inputBagOverride_multiqc.html
expire_in: 7 days
retry:
max: 0
......@@ -807,17 +812,20 @@ override_fastq:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_PE_multiqc_data.json \;
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F6 --source staging --fastqsForce './test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz' --upload false --dev false --ci true --track false -with-report ./fastqOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./fastqOverride_multiqc_data.json \;
- find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./fastqOverride_multiqc.html \;
- pytest -m completionMultiqc --filename fastqOverride_multiqc_data.json
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- fastqOverride_PE_multiqc_data.json
- fastqOverride_multiqc_data.json
- fastqOverride_multiqc.html
expire_in: 7 days
retry:
max: 0
......@@ -829,17 +837,70 @@ override_species:
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EW --source staging --speciesForce 'Homo sapiens' --upload true --dev false --ci true --track false -with-report ./speciesOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_multiqc_data.json \;
- find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./speciesOverride_multiqc.html \;
- pytest -m completionMultiqc --filename speciesOverride_multiqc_data.json
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- speciesOverride_multiqc_data.json
- speciesOverride_multiqc.html
expire_in: 7 days
retry:
max: 0
when:
- always
override_stranded:
stage: integration
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5EY --source staging --strandedForce unstranded --upload true --dev false --ci true --track false -with-report ./strandedOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./strandedOverride_multiqc_data.json \;
- find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./strandedOverride_multiqc.html \;
- pytest -m completionMultiqc --filename strandedOverride_multiqc_data.json
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- strandedOverride_multiqc_data.json
- strandedOverride_multiqc.html
expire_in: 7 days
retry:
max: 0
when:
- always
override_spike:
stage: integration
only: [merge_requests]
except:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5ER --source staging --speciesForce 'Homo sapiens' --upload false --dev false --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./speciesOverride_PE_multiqc_data.json \;
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5F0 --source staging --spikeForce true --upload true --dev false --ci true --track false -with-report ./spikeOverride_report.html
- find . -type f -name "multiqc_data.json" -exec cp {} ./spikeOverride_multiqc_data.json \;
- find ./**/report/ -type f -name "*multiqc.html" -exec cp {} ./spikeOverride_multiqc.html \;
- pytest -m completionMultiqc --filename spikeOverride_multiqc_data.json
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- speciesOverride_PE_multiqc_data.json
- spikedOverride_multiqc_data.json
- spikeOverride_multiqc.html
expire_in: 7 days
retry:
max: 0
......
# v1.0.3
# v2.0.0rc01
**User Facing**
* Endness metadata "Single Read" changed to "Single End" in data-hub, pipeline updated to handle (#110) ("Single Read" still acceptable for backwards compatibility)
* Strandedness metadata "yes"/"no" changed to boolean "t"/"f" in data-hub, pipeline updated to handle (#70) ("yes"/"no" still acceptable for backwards compatibility)
* Upload empty mRNA_QC entry if data error (#111)
* Allow forcing of strandedness and spike (#100)
**Background**
* Add memory limit (75%) per thread for samtools sort (#108)
......@@ -15,10 +19,16 @@
* Detect malformed fastq's (#107)
* Restrict sampled alignment process to use >32GB nodes on BioHPC (#108)
* Use nproc**-1** for alignment processes (#108)
* Data-hub column title change from "Sequencing_Type" to "Experiment_Type" (#114)
* Data-hub column title change from "Has_Strand_Specific_Information" to "Strandedness" (#115)
* Merge data error pre-inference execution run upload/finalize to 1 process
* Change uploadOutputBag logic to change reuse hatrac file if alread exists (re-uses Output_Bag entry by reassigning Execution_Run RID) (#112)
* Add new CI py tests for override and integration
*Known Bugs*
* Override params (inputBag, fastq, species) aren't checked for integrity
* Authentication files and tokens must be active (active auth client) for the duration of the pipeline run (until long-lived token utilization included)
* Check for outputBag in hatrac doesn't check for any uploaded by chaise
<hr>
......
......@@ -57,8 +57,12 @@ To Run:
* eg: `--inputBagForce test_data/bag/Q-Y5F6_inputBag_xxxxxxxx.zip` (must be the expected bag structure, this example will not work because it is a test bag)
* `--fastqsForce` utilizes local fastq's instead of downloading from the data-hub (still requires accurate repRID input)
* eg: `--fastqsForce 'test_data/fastq/small/Q-Y5F6_1M.R{1,2}.fastq.gz'` (note the quotes around fastq's which must me named in the correct standard [*\*.R1.fastq.gz and/or \*.R2.fastq.gz*] and in the correct order)
* `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses ambiguous species error
* `--speciesForce` forces the species to be "Mus musculus" or "Homo sapiens", it bypasses a metadata mismatch or an ambiguous species error
* eg: `--speciesForce 'Mus musculus'`
* `--strandedForce` forces the strandedness to be "forward", "reverse" or "unstranded", it bypasses a metadata mismatch error
* eg: `--strandedForce 'unstranded'`
* `--spikeForce` forces the spike-in to be "false" or "true", it bypasses a metadata mismatch error
* eg: `--spikeForce 'true'`
* Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)):
* `--ci` boolean (default = false)
* `--dev` boolean (default = true)
......
#!/usr/bin/env python3
import pytest
def pytest_addoption(parser):
parser.addoption("--filename", action="store")
@pytest.fixture(scope='session')
def filename(request):
filename_value = request.config.option.filename
if filename_value is None:
pytest.skip()
return filename_value
\ No newline at end of file
This image diff could not be displayed because it is too large. You can view the blob instead.
......@@ -19,7 +19,7 @@
"processor": "csv",
"processor_params": {
"output_path": "Experiment",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Sequencing_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
}
},
{
......@@ -40,7 +40,7 @@
"processor": "csv",
"processor_params": {
"output_path": "Experiment Settings",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Has_Strand_Specific_Information,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
}
},
{
......
......@@ -41,8 +41,8 @@ cp Q-Y5F6_1M.R1.fastq.gz_trimming_report.txt ./NEW_test_data/meta/Q-Y5F6_1M.R1.f
cp Q-Y5F6_1M.R2.fastq.gz_trimming_report.txt ./NEW_test_data/meta/Q-Y5F6_1M.R2.fastq.gz_trimming_report.txt
touch metaTest.csv
echo 'Replicate_RID,Experiment_RID,Study_RID,Paired_End,File_Type,Has_Strand_Specific_Information,Used_Spike_Ins,Species,Read_Length' > metaTest.csv
echo 'Replicate_RID,Experiment_RID,Study_RID,uk,FastQ,no,no,Homo sapiens,75' >> metaTest.csv
echo 'Replicate_RID,Experiment_RID,Study_RID,Paired_End,File_Type,Strandedness,Used_Spike_Ins,Species,Read_Length' > metaTest.csv
echo 'Replicate_RID,Experiment_RID,Study_RID,uk,FastQ,unstranded,f,Homo sapiens,75' >> metaTest.csv
cp metaTest.csv ./NEW_test_data/meta/metaTest.csv
mkdir -p ./NEW_test_data/bam
......
......@@ -19,7 +19,7 @@
"processor": "csv",
"processor_params": {
"output_path": "Experiment",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Sequencing_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment:RID)/Experiment_RID:=RID,Study_RID,Internal_ID,Name,Description,Experiment_Method,Experiment_Type,Species,Specimen_Type,Molecule_Type,Pooled_Sample,Pool_Size,Markers,Cell_Count,Treatment_Protocol,Treatment_Protocol_Reference,Isolation_Protocol,Isolation_Protocol_Reference,Growth_Protocol,Growth_Protocol_Reference,Label_Protocol,Label_Protocol_Reference,Hybridization_Protocol,Hybridization_Protocol_Reference,Scan_Protocol,Scan_Protocol_Reference,Data_Processing,Value_Definition,Notes,Principal_Investigator,Consortium,Release_Date,RCT,RMT?limit=none"
}
},
{
......@@ -40,7 +40,7 @@
"processor": "csv",
"processor_params": {
"output_path": "Experiment Settings",
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Has_Strand_Specific_Information,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
"query_path": "/attribute/M:=RNASeq:Replicate/RID={rid}/(Experiment_RID)=(RNASeq:Experiment_Settings:Experiment_RID)/RID,Experiment_RID,Alignment_Format,Aligner,Aligner_Version,Reference_Genome,Sequence_Trimming,Duplicate_Removal,Pre-alignment_Sequence_Removal,Junction_Reads,Library_Type,Protocol_Reference,Library_Selection,Quantification_Format,Quantification_Software,Expression_Metric,Transcriptome_Model,Sequencing_Platform,Paired_End,Read_Length,Strandedness,Used_Spike_Ins,Spike_Ins_Amount,Visualization_Format,Visualization_Software,Visualization_Version,Visualization_Setting,Notes,RCT,RMT?limit=none"
}
},
{
......
......@@ -112,20 +112,15 @@ process {
cpus = 1
memory = '1 GB'
}
withName:failPreExecutionRun_fastq {
withName:failPreExecutionRun {
cpus = 1
memory = '1 GB'
}
withName:failPreExecutionRun_fastqFile {
cpus = 1
memory = '1 GB'
}
withName:failPreExecutionRun_species {
{
withName:failExecutionRun {
cpus = 1
memory = '1 GB'
}
withName:failExecutionRun {
withName:uploadQC_fail {
cpus = 1
memory = '1 GB'
}
......
......@@ -82,16 +82,13 @@ process {
withName:finalizeExecutionRun {
executor = 'local'
}
withName:failPreExecutionRun_fastq {
withName:failPreExecutionRun {
executor = 'local'
}
withName:failPreExecutionRun_fastqFile {
executor = 'local'
}
withName:failPreExecutionRun_species {
withName:failExecutionRun {
executor = 'local'
}
withName:failExecutionRun {
withName:uploadQC_fail {
executor = 'local'
}
}
......
......@@ -88,16 +88,13 @@ process {
withName:finalizeExecutionRun {
container = 'gudmaprbk/deriva1.4:1.0.0'
}
withName:failPreExecutionRun_fastq {
withName:failPreExecutionRun {
container = 'gudmaprbk/deriva1.4:1.0.0'
}
withName:failPreExecutionRun_fastqFile {
container = 'gudmaprbk/deriva1.4:1.0.0'
}
withName:failPreExecutionRun_species {
withName:failExecutionRun {
container = 'gudmaprbk/deriva1.4:1.0.0'
}
withName:failExecutionRun {
withName:uploadQC_fail {
container = 'gudmaprbk/deriva1.4:1.0.0'
}
}
......@@ -128,6 +125,6 @@ manifest {
homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
mainScript = 'rna-seq.nf'
version = 'v1.0.3'
version = 'v2.0.0rc01'
nextflowVersion = '>=19.09.0'
}
This diff is collapsed.
......@@ -63,32 +63,17 @@ def main():
# Get strandedness metadata from 'Experiment Settings.csv'
if (args.parameter == "stranded"):
if (metaFile.Has_Strand_Specific_Information.unique() == "yes"):
stranded = "stranded"
elif (metaFile.Has_Strand_Specific_Information.unique() == "no"):
stranded = "unstranded"
else:
stranded = metaFile.Has_Strand_Specific_Information.unique()[0]
stranded = metaFile.Strandedness.unique()[0]
print(stranded)
# Get spike-in metadata from 'Experiment Settings.csv'
if (args.parameter == "spike"):
if (metaFile.Used_Spike_Ins.unique() == "yes"):
spike = "yes"
elif (metaFile.Used_Spike_Ins.unique() == "no"):
spike = "no"
else:
spike = metaFile.Used_Spike_Ins.unique()[0]
spike = metaFile.Used_Spike_Ins.unique()[0]
print(spike)
# Get species metadata from 'Experiment.csv'
if (args.parameter == "species"):
if (metaFile.Species.unique() == "Mus musculus"):
species = "Mus musculus"
elif (metaFile.Species.unique() == "Homo sapiens"):
species = "Homo sapiens"
else:
species = metaFile.Species.unique()[0]
species = metaFile.Species.unique()[0]
print(species)
# Get read length metadata from 'Experiment Settings.csv'
......
......@@ -48,7 +48,6 @@ def main(hostname, catalog_number, credential):
}
entities = run_table.update([run_data])
rid = args.update
print(rid)
......
......@@ -14,6 +14,7 @@ def get_args():
parser.add_argument('-n', '--notes', help="notes", default="", required=False)
parser.add_argument('-o', '--host', help="datahub host", required=True)
parser.add_argument('-c', '--cookie', help="cookie token", required=True)
parser.add_argument('-u', '--update', help="update?", default="F", required=True)
args = parser.parse_args()
return args
......@@ -22,19 +23,27 @@ def main(hostname, catalog_number, credential):
pb = catalog.getPathBuilder()
outputBag_table = pb.RNASeq.Output_Bag
outputBag_data = {
"Execution_Run": args.executionRunRID,
"File_Name": args.file,
"File_URL": args.loc,
"File_MD5": args.md5,
"File_Bytes": args.bytes,
"File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
"Notes": args.notes,
"Bag_Type": "mRNA_Replicate_Analysis"
if args.update == "F":
outputBag_data = {
"Execution_Run": args.executionRunRID,
"File_Name": args.file,
"File_URL": args.loc,
"File_MD5": args.md5,
"File_Bytes": args.bytes,
"File_Creation_Time": datetime.now().replace(microsecond=0).isoformat(),
"Notes": args.notes,
"Bag_Type": "mRNA_Replicate_Analysis"
}
entities = outputBag_table.insert([outputBag_data])
rid = entities[0]["RID"]
entities = outputBag_table.insert([outputBag_data])
rid = entities[0]["RID"]
else:
outputBag_data = {
"RID": args.update,
"Execution_Run": args.executionRunRID
}
entities = outputBag_table.insert([outputBag_data])
rid = entities[0]["RID"]
print(rid)
......
......@@ -7,12 +7,12 @@ def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('-r', '--repRID', help="replicate RID", required=True)
parser.add_argument('-e', '--executionRunRID', help="exection run RID", required=True)
parser.add_argument('-p', '--ends', help="single/paired ends", required=True)
parser.add_argument('-s', '--stranded', help="stranded?", required=True)
parser.add_argument('-l', '--length', help="median read length", required=True)
parser.add_argument('-w', '--rawCount', help="raw count", required=True)
parser.add_argument('-f', '--assignedCount', help="final assigned count", required=True)
parser.add_argument('-t', '--tin', help="median TIN", required=True)
parser.add_argument('-p', '--ends', help="single/paired ends", required=False)
parser.add_argument('-s', '--stranded', help="stranded?", required=False)
parser.add_argument('-l', '--length', help="median read length", required=False)
parser.add_argument('-w', '--rawCount', help="raw count", required=False)
parser.add_argument('-f', '--assignedCount', help="final assigned count", required=False)
parser.add_argument('-t', '--tin', help="median TIN", required=False)
parser.add_argument('-n', '--notes', help="notes", default="", required=False)
parser.add_argument('-o', '--host', help="datahub host", required=True)
parser.add_argument('-c', '--cookie', help="cookie token", required=True)
......@@ -39,6 +39,13 @@ def main(hostname, catalog_number, credential):
}
entities = run_table.insert([run_data])
rid = entities[0]["RID"]
elif args.update == "E":
run_data = {
"Execution_Run": args.executionRunRID,
"Replicate": args.repRID
}
entities = run_table.insert([run_data])
rid = entities[0]["RID"]
else:
run_data = {
"RID": args.update,
......
......@@ -5,25 +5,25 @@ import pandas as pd
import os
import utils
data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../'
@pytest.mark.alignData
def test_alignData_se():
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.se.unal.gz'))
test_output_path, 'Q-Y5F6_1M.se.unal.gz'))
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.se.sorted.bam'))
test_output_path, 'Q-Y5F6_1M.se.sorted.bam'))
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.se.sorted.bam.bai'))
test_output_path, 'Q-Y5F6_1M.se.sorted.bam.bai'))
@pytest.mark.alignData
def test_alignData_pe():
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.pe.unal.gz'))
test_output_path, 'Q-Y5F6_1M.pe.unal.gz'))
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.pe.sorted.bam'))
test_output_path, 'Q-Y5F6_1M.pe.sorted.bam'))
assert os.path.exists(os.path.join(
data_output_path, 'Q-Y5F6_1M.pe.sorted.bam.bai'))
test_output_path, 'Q-Y5F6_1M.pe.sorted.bam.bai'))
#!/usr/bin/env python3
import pytest
import pandas as pd
from io import StringIO
import os
import json
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../'
@pytest.mark.completionMultiqc
def test_multiqcExist(filename):
assert os.path.exists(os.path.join(
test_output_path, filename))
\ No newline at end of file
......@@ -5,25 +5,25 @@ import pandas as pd
import os
import utils
data_output_path = os.path.dirname(os.path.abspath(__file__)) + \
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../'
@pytest.mark.dedupData