Skip to content
Snippets Groups Projects
Commit 0a0b51e4 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '78-tool_version' into develop

parents f232c58b df286d4f
Branches
Tags v2.0.0rc01
2 merge requests!58Develop,!48Resolve "Output tool version to report in multiqc"
...@@ -9,6 +9,7 @@ before_script: ...@@ -9,6 +9,7 @@ before_script:
stages: stages:
- unit - unit
- aggregation
- reference - reference
- integration - integration
- consistency - consistency
...@@ -22,8 +23,15 @@ getBag: ...@@ -22,8 +23,15 @@ getBag:
- merge_requests - merge_requests
script: script:
- ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli --version > version_deriva.txt
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6 - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6
- pytest -m getBag - pytest -m getBag
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_deriva.txt
expire_in: 7 days
getData: getData:
stage: unit stage: unit
...@@ -33,10 +41,17 @@ getData: ...@@ -33,10 +41,17 @@ getData:
except: except:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag --version > version_bdbag.txt
- ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
- unzip ./test_data/bag/Replicate_Q-Y5F6.zip - unzip ./test_data/bag/Replicate_Q-Y5F6.zip
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST
- pytest -m getData - pytest -m getData
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_bdbag.txt
expire_in: 7 days
parseMetadata: parseMetadata:
stage: unit stage: unit
...@@ -46,6 +61,7 @@ parseMetadata: ...@@ -46,6 +61,7 @@ parseMetadata:
except: except:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/python3:2.0.1_indev' python3 --version > version_python.txt
- rep=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID) - rep=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
- exp=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID) - exp=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
- study=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID) - study=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
...@@ -57,6 +73,12 @@ parseMetadata: ...@@ -57,6 +73,12 @@ parseMetadata:
- readLength=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.stageNew.csv" -p readLength) - readLength=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.stageNew.csv" -p readLength)
- echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
- pytest -m parseMetadata - pytest -m parseMetadata
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_python.txt
expire_in: 7 days
inferMetadata: inferMetadata:
stage: unit stage: unit
...@@ -66,6 +88,7 @@ inferMetadata: ...@@ -66,6 +88,7 @@ inferMetadata:
except: except:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py --version > version_rseqc.txt
- > - >
align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) && align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
if [[ ${align} == "" ]]; then exit 1; fi if [[ ${align} == "" ]]; then exit 1; fi
...@@ -74,6 +97,12 @@ inferMetadata: ...@@ -74,6 +97,12 @@ inferMetadata:
ended=`singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/inferMeta.sh endness Q-Y5F6_1M.se.inferMetadata.log` && ended=`singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/inferMeta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
if [[ ${ended} == "" ]]; then exit 1; fi if [[ ${ended} == "" ]]; then exit 1; fi
- pytest -m inferMetadata - pytest -m inferMetadata
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_rseqc.txt
expire_in: 7 days
trimData: trimData:
stage: unit stage: unit
...@@ -83,11 +112,18 @@ trimData: ...@@ -83,11 +112,18 @@ trimData:
except: except:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --version > version_trimgalore.txt
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- pytest -m trimData - pytest -m trimData
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_trimgalore.txt
expire_in: 7 days
downsampleData: downsampleData:
stage: unit stage: unit
...@@ -100,6 +136,7 @@ downsampleData: ...@@ -100,6 +136,7 @@ downsampleData:
- singularity run 'docker://bicf/seqtk:2.0.1_indev' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq - singularity run 'docker://bicf/seqtk:2.0.1_indev' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
- pytest -m downsampleData - pytest -m downsampleData
alignData: alignData:
stage: unit stage: unit
only: only:
...@@ -108,6 +145,8 @@ alignData: ...@@ -108,6 +145,8 @@ alignData:
except: except:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 --version > version_hisat2.txt
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools --version > version_samtools.txt
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
...@@ -117,6 +156,14 @@ alignData: ...@@ -117,6 +156,14 @@ alignData:
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
- pytest -m alignData - pytest -m alignData
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_hisat2.txt
- version_samtools.txt
expire_in: 7 days
dedupData: dedupData:
stage: unit stage: unit
...@@ -126,6 +173,8 @@ dedupData: ...@@ -126,6 +173,8 @@ dedupData:
except: except:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools --version > version_samtools.txt
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates --version 2> version_markdups.txt&
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
...@@ -134,6 +183,13 @@ dedupData: ...@@ -134,6 +183,13 @@ dedupData:
echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;"; echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
- pytest -m dedupData - pytest -m dedupData
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_markdups.txt
- version_samtools.txt
expire_in: 7 days
countData: countData:
stage: unit stage: unit
...@@ -149,7 +205,16 @@ countData: ...@@ -149,7 +205,16 @@ countData:
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
- assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)') - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
- singularity run 'docker://bicf/subread2:2.0.0' featureCounts -v &> version_featurecounts.txt
- singularity run 'docker://bicf/subread2:2.0.0' R --version > version_r.txt
- pytest -m makeFeatureCounts - pytest -m makeFeatureCounts
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_featurecounts.txt
- version_r.txt
expire_in: 7 days
makeBigWig: makeBigWig:
stage: unit stage: unit
...@@ -159,8 +224,15 @@ makeBigWig: ...@@ -159,8 +224,15 @@ makeBigWig:
except: except:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' deeptools --version > version_deeptools.txt
- singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw - singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
- pytest -m makeBigWig - pytest -m makeBigWig
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_deeptools.txt
expire_in: 7 days
fastqc: fastqc:
stage: unit stage: unit
...@@ -170,8 +242,16 @@ fastqc: ...@@ -170,8 +242,16 @@ fastqc:
except: except:
- merge_requests - merge_requests
script: script:
- singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc --version > version_fastqc.txt
- singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o . - singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
- pytest -m fastqc - pytest -m fastqc
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_fastqc.txt
expire_in: 7 days
dataQC: dataQC:
stage: unit stage: unit
...@@ -199,6 +279,26 @@ outputBag: ...@@ -199,6 +279,26 @@ outputBag:
- pytest -m outputBag - pytest -m outputBag
generateVersions:
stage: aggregation
only:
- push
- tags
except:
- merge_requests
script:
- singularity run 'docker://bicf/multiqc1.8:2.0.1_indev' multiqc --version > version_multiqc.txt
- python ./workflow/scripts/generate_versions.py -o software_versions
- python ./workflow/scripts/generate_references.py -r ./docs/references.md -o software_references
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- software_references_mqc.yaml
- software_versions_mqc.yaml
expire_in: 7 days
humanBioHPC: humanBioHPC:
stage: reference stage: reference
only: only:
......
...@@ -6,6 +6,8 @@ These are the most common things requested on pull requests. ...@@ -6,6 +6,8 @@ These are the most common things requested on pull requests.
- [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've fixed a bug or added code that should be tested, add tests!
- [ ] Documentation in `docs` is updated - [ ] Documentation in `docs` is updated
- [ ] Replace dag.png with the most recent CI pipleine integrated_pe artifact - [ ] Replace dag.png with the most recent CI pipleine integrated_pe artifact
- [ ] Replace software_versions_mqc.yaml with the most recent CI pipleine generateVersions artifact
- [ ] Replace software_references_mqc.yaml with the most recent CI pipleine generateVersions artifact
- [ ] `CHANGELOG.md` is updated - [ ] `CHANGELOG.md` is updated
- [ ] `README.md` is updated - [ ] `README.md` is updated
- [ ] `LICENSE.md` is updated with new contributors - [ ] `LICENSE.md` is updated with new contributors
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
**User Facing** **User Facing**
* Add option to pull references from datahub * Add option to pull references from datahub
* Add option to send email on workflow error, with pipeline error message * Add option to send email on workflow error, with pipeline error message
* Add versions and paper references of software used to report
**Background** **Background**
* Remove (comment out) option to pull references from S3 * Remove (comment out) option to pull references from S3
......
docs/dag.png

733 KiB | W: | H:

docs/dag.png

767 KiB | W: | H:

docs/dag.png
docs/dag.png
docs/dag.png
docs/dag.png
  • 2-up
  • Swipe
  • Onion skin
### References ### References
1. **python**:
* Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
2. **DERIVA**:
* Bugacov, A., Czajkowski, K., Kesselman, C., Kumar, A., Schuler, R. E. and Tangmunarunkit, H. 2017 Experiences with DERIVA: An Asset Management Platform for Accelerating eScience. IEEE 13th International Conference on e-Science (e-Science), Auckland, 2017, pp. 79-88, doi:[10.1109/eScience.2017.20](https://doi.org/10.1109/eScience.2017.20).
3. **BDBag**:
* D'Arcy, M., Chard, K., Foster, I., Kesselman, C., Madduri, R., Saint, N., & Wagner, R.. 2019. Big Data Bags: A Scalable Packaging Format for Science. Zenodo. doi:[10.5281/zenodo.3338725](http://doi.org/10.5281/zenodo.3338725).
4. **RSeQC**:
* Wang, L., Wang, S., Li, W. 2012 RSeQC: quality control of RNA-seq experiments. Bioinformatics. Aug 15;28(16):2184-5. doi:[10.1093/bioinformatics/bts356](https://doi.org/10.1093/bioinformatics/bts356).
5. **trimgalore**:
* trimgalore [https://github.com/FelixKrueger/TrimGalore](https://github.com/FelixKrueger/TrimGalore)
6. **hisat2**:
* Kim ,D.,Paggi, J.M., Park, C., Bennett, C., Salzberg, S.L. 2019 Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol. Aug;37(8):907-915. doi:[10.1038/s41587-019-0201-4](https://doi.org/10.1038/s41587-019-0201-4).
7. **samtools**:
* Li H., B. Handsaker, A. Wysoker, T. Fennell, J. Ruan, N. Homer, G. Marth, G. Abecasis, R. Durbin, and 1000 Genome Project Data Processing Subgroup. 2009. The Sequence alignment/map (SAM) format and SAMtools. Bioinformatics 25: 2078-9. doi:[10.1093/bioinformatics/btp352](http://dx.doi.org/10.1093/bioinformatics/btp352)
8. **picard**:
* “Picard Toolkit.” 2019. Broad Institute, GitHub Repository. [http://broadinstitute.github.io/picard/](http://broadinstitute.github.io/picard/); Broad Institute
9. **featureCounts**:
* Liao, Y., Smyth, G.K., Shi, W. 2014 featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. Apr 1;30(7):923-30. doi:[10.1093/bioinformatics/btt656](https://doi.org/10.1093/bioinformatics/btt656).
10. **R**:
* R Core Team 2014. R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL:[http://www.R-project.org/](http://www.R-project.org/).
11. **deeptools**:
* Ramírez, F., D. P. Ryan, B. Grüning, V. Bhardwaj, F. Kilpert, A. S. Richter, S. Heyne, F. Dündar, and T. Manke. 2016. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Research 44: W160-165. doi:[10.1093/nar/gkw257](http://dx.doi.org/10.1093/nar/gkw257)
12. **FastQC**
* FastQC [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
13. **MultiQC**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
14. **Nextflow**:
* Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., and Notredame, C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316.
id: 'software_references'
section_name: 'Software References'
description: 'This section describes references for the tools used.'
plot_type: 'html'
data: |
<h3 id="references">References</h3>
<ol style="list-style-type: decimal">
<li><strong>python</strong>:</li>
</ol>
<ul>
<li>Anaconda (Anaconda Software Distribution, <a href="https://anaconda.com" class="uri">https://anaconda.com</a>)</li>
</ul>
<ol start="2" style="list-style-type: decimal">
<li><strong>DERIVA</strong>:</li>
</ol>
<ul>
<li>Bugacov, A., Czajkowski, K., Kesselman, C., Kumar, A., Schuler, R. E. and Tangmunarunkit, H. 2017 Experiences with DERIVA: An Asset Management Platform for Accelerating eScience. IEEE 13th International Conference on e-Science (e-Science), Auckland, 2017, pp. 79-88, doi:<a href="https://doi.org/10.1109/eScience.2017.20">10.1109/eScience.2017.20</a>.</li>
</ul>
<ol start="3" style="list-style-type: decimal">
<li><strong>BDBag</strong>:<br />
</li>
</ol>
<ul>
<li>D'Arcy, M., Chard, K., Foster, I., Kesselman, C., Madduri, R., Saint, N., &amp; Wagner, R.. 2019. Big Data Bags: A Scalable Packaging Format for Science. Zenodo. doi:<a href="http://doi.org/10.5281/zenodo.3338725">10.5281/zenodo.3338725</a>.</li>
</ul>
<ol start="4" style="list-style-type: decimal">
<li><strong>RSeQC</strong>:</li>
</ol>
<ul>
<li>Wang, L., Wang, S., Li, W. 2012 RSeQC: quality control of RNA-seq experiments. Bioinformatics. Aug 15;28(16):2184-5. doi:<a href="https://doi.org/10.1093/bioinformatics/bts356">10.1093/bioinformatics/bts356</a>.</li>
</ul>
<ol start="5" style="list-style-type: decimal">
<li><strong>trimgalore</strong>:</li>
</ol>
<ul>
<li>trimgalore <a href="https://github.com/FelixKrueger/TrimGalore" class="uri">https://github.com/FelixKrueger/TrimGalore</a></li>
</ul>
<ol start="6" style="list-style-type: decimal">
<li><strong>hisat2</strong>:</li>
</ol>
<ul>
<li>Kim ,D.,Paggi, J.M., Park, C., Bennett, C., Salzberg, S.L. Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. 2019 Nat Biotechnol. 2019 Aug;37(8):907-915. doi:<a href="https://doi.org/10.1038/s41587-019-0201-4">10.1038/s41587-019-0201-4</a></li>
</ul>
<ol start="7" style="list-style-type: decimal">
<li><strong>samtools</strong>:</li>
</ol>
<ul>
<li>Li H., B. Handsaker, A. Wysoker, T. Fennell, J. Ruan, N. Homer, G. Marth, G. Abecasis, R. Durbin, and 1000 Genome Project Data Processing Subgroup. 2009. The Sequence alignment/map (SAM) format and SAMtools. Bioinformatics 25: 2078-9. doi:<a href="http://dx.doi.org/10.1093/bioinformatics/btp352">10.1093/bioinformatics/btp352</a></li>
</ul>
<ol start="8" style="list-style-type: decimal">
<li><strong>picard</strong>:</li>
</ol>
<ul>
<li>“Picard Toolkit.” 2019. Broad Institute, GitHub Repository. <a href="http://broadinstitute.github.io/picard/" class="uri">http://broadinstitute.github.io/picard/</a>; Broad Institute</li>
</ul>
<ol start="9" style="list-style-type: decimal">
<li><strong>featureCounts</strong>:</li>
</ol>
<ul>
<li>Liao, Y., Smyth, G.K., Shi, W. 2014 featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. Apr 1;30(7):923-30. doi:<a href="https://doi.org/10.1093/bioinformatics/btt656">10.1093/bioinformatics/btt656</a>.</li>
</ul>
<ol start="10" style="list-style-type: decimal">
<li><strong>R</strong>:</li>
</ol>
<ul>
<li>R Core Team 2014. R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL:<a href="http://www.R-project.org/" class="uri">http://www.R-project.org/</a>.</li>
</ul>
<ol start="11" style="list-style-type: decimal">
<li><strong>deeptools</strong>:</li>
</ol>
<ul>
<li>Ramírez, F., D. P. Ryan, B. Grüning, V. Bhardwaj, F. Kilpert, A. S. Richter, S. Heyne, F. Dündar, and T. Manke. 2016. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Research 44: W160-165. doi:<a href="http://dx.doi.org/10.1093/nar/gkw257">10.1093/nar/gkw257</a></li>
</ul>
<ol start="12" style="list-style-type: decimal">
<li><strong>FastQC</strong></li>
</ol>
<ul>
<li>FastQC <a href="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/" class="uri">https://www.bioinformatics.babraham.ac.uk/projects/fastqc/</a></li>
</ul>
<ol start="13" style="list-style-type: decimal">
<li><strong>MultiQC</strong>:</li>
</ol>
<ul>
<li>Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:<a href="https://dx.doi.org/10.1093/bioinformatics/btw354">10.1093/bioinformatics/btw354</a></li>
</ul>
<ol start="14" style="list-style-type: decimal">
<li><strong>Nextflow</strong>:</li>
</ol>
<ul>
<li>Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., and Notredame, C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316.</li>
</ul>
id: 'software_versions'
section_name: 'Software Versions'
section_href: 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/blob/78-tool_version/docs/RNA-Seq%20Pipeline%20Design%20Process%20Table.pdf'
plot_type: 'html'
description: 'are collected for pipeline version.'
data: |
<dl class="dl-horizontal">
<dt>Python</dt><dd>v3.7.7</dd>
<dt>DERIVA</dt><dd>v1.0.0</dd>
<dt>BDBag</dt><dd>v1.5.6</dd>
<dt>RSeQC</dt><dd>v3.0.1</dd>
<dt>Trim Galore!</dt><dd>v0.6.4</dd>
<dt>HISAT2</dt><dd>v2.1.0</dd>
<dt>Samtools</dt><dd>v1.9</dd>
<dt>picard (MarkDuplicates)</dt><dd>v2.23.0-SNAPSHOT</dd>
<dt>featureCounts</dt><dd>v2.0.0</dd>
<dt>R</dt><dd>v3.6.3</dd>
<dt>deepTools</dt><dd>v3.3.2</dd>
<dt>FastQC</dt><dd>v0.11.9</dd>
<dt>MultiQC</dt><dd>v1.8</dd>
<dt>Pipeline Version</dt><dd>v0.0.4_indev</dd>
</dl>
...@@ -56,6 +56,10 @@ report_section_order: ...@@ -56,6 +56,10 @@ report_section_order:
order: 2000 order: 2000
ref: ref:
order: 1000 order: 1000
software_versions:
order: -1000
software_references:
order: -2000
skip_generalstats: true skip_generalstats: true
...@@ -152,4 +156,4 @@ sp: ...@@ -152,4 +156,4 @@ sp:
ref: ref:
fn: 'reference.tsv' fn: 'reference.tsv'
tin: tin:
fn: '*.tin.hist.tsv' fn: '*.tin.hist.tsv'
\ No newline at end of file
...@@ -70,6 +70,8 @@ if (params.refSource == "biohpc") { ...@@ -70,6 +70,8 @@ if (params.refSource == "biohpc") {
referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"]) referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"])
multiqcConfig = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml") multiqcConfig = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml")
bicfLogo = Channel.fromPath("${baseDir}/../docs/bicf_logo.png") bicfLogo = Channel.fromPath("${baseDir}/../docs/bicf_logo.png")
softwareReferences = Channel.fromPath("${baseDir}/../docs/software_references_mqc.yaml")
softwareVersions = Channel.fromPath("${baseDir}/../docs/software_versions_mqc.yaml")
// Define script files // Define script files
script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh") script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh")
...@@ -1126,6 +1128,8 @@ process aggrQC { ...@@ -1126,6 +1128,8 @@ process aggrQC {
input: input:
path multiqcConfig path multiqcConfig
path bicfLogo path bicfLogo
path softwareReferences
path softwareVersions
path fastqc path fastqc
path trimQC path trimQC
path alignQC path alignQC
......
#!/usr/bin/env python3
#generate_references.py
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
import argparse
import subprocess
import shlex
import logging
EPILOG = '''
For more details:
%(prog)s --help
'''
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-r', '--reference',
help="The reference file (markdown format).",
required=True)
parser.add_argument('-o', '--output',
help="The out file name.",
default='references')
args = parser.parse_args()
return args
def main():
args = get_args()
reference = args.reference
output = args.output
out_filename = output + '_mqc.yaml'
# Header for HTML
print(
'''
id: 'software_references'
section_name: 'Software References'
description: 'This section describes references for the tools used.'
plot_type: 'html'
data: |
'''
, file = open(out_filename, "w")
)
# Turn Markdown into HTML
references_html = 'bash -c "pandoc -p {} | sed \'s/^/ /\' >> {}"'
references_html = references_html.format(reference, out_filename)
subprocess.check_call(shlex.split(references_html))
if __name__ == '__main__':
main()
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# * --------------------------------------------------------------------------
# * Licensed under MIT (https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/blob/master/LICENSE)
# * --------------------------------------------------------------------------
#
'''Make YAML of software versions.'''
from __future__ import print_function
from collections import OrderedDict
import re
import os
import logging
import glob
import argparse
import numpy as np
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
SOFTWARE_REGEX = {
'Python': ['version_python.txt', r"Python (\S+)"],
'DERIVA': ['version_deriva.txt', r"(\S+)"],
'BDBag': ['version_bdbag.txt', r"BDBag (\S+) \(Bagit \S+\)"],
'RSeQC': ['version_rseqc.txt', r"infer_experiment.py (\S+)"],
'Trim Galore!': ['version_trimgalore.txt', r"version (\S+)"],
'HISAT2': ['version_hisat2.txt', r"version (\S+)"],
'Samtools': ['version_samtools.txt', r"samtools (\S+)"],
'picard (MarkDuplicates)': ['version_markdups.txt', r"(\S\.\S{2}\.\S+)"],
'featureCounts': ['version_featurecounts.txt', r"featureCounts v(\S+)"],
'R': ['version_r.txt', r"R version (\S+)"],
'deepTools': ['version_deeptools.txt', r"deeptools (\S+)"],
'FastQC': ['version_fastqc.txt', r"FastQC v(\S+)"],
'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"],
'Pipeline Version': ['./workflow/nextflow.config', r"version = 'v(\S+)'"]
}
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-o', '--output',
help="The out file name.",
required=True)
parser.add_argument('-t', '--test',
help='Used for testing purposes',
default=False,
action='store_true')
args = parser.parse_args()
return args
def check_files(files, test):
'''Check if version files are found.'''
logger.info("Running file check.")
software_files = np.array(list(SOFTWARE_REGEX.values()))[:,0]
extra_files = set(files) - set(software_files)
if len(extra_files) > 0 and test:
logger.error('Missing regex: %s', list(extra_files))
raise Exception("Missing regex: %s" % list(extra_files))
def main():
args = get_args()
output = args.output
test = args.test
out_filename = output + '_mqc.yaml'
results = OrderedDict()
results['Python'] = '<span style="color:#999999;\">Not Run</span>'
results['DERIVA'] = '<span style="color:#999999;\">Not Run</span>'
results['BDBag'] = '<span style="color:#999999;\">Not Run</span>'
results['RSeQC'] = '<span style="color:#999999;\">Not Run</span>'
results['Trim Galore!'] = '<span style="color:#999999;\">Not Run</span>'
results['HISAT2'] = '<span style="color:#999999;\">Not Run</span>'
results['Samtools'] = '<span style="color:#999999;\">Not Run</span>'
results['picard (MarkDuplicates)'] = '<span style="color:#999999;\">Not Run</span>'
results['featureCounts'] = '<span style="color:#999999;\">Not Run</span>'
results['R'] = '<span style="color:#999999;\">Not Run</span>'
results['deepTools'] = '<span style="color:#999999;\">Not Run</span>'
results['FastQC'] = '<span style="color:#999999;\">Not Run</span>'
results['MultiQC'] = '<span style="color:#999999;\">Not Run</span>'
results['Pipeline Version'] = '<span style="color:#999999;\">Not Run</span>'
# list all files
files = glob.glob('**/*.txt', recursive=True)
# Check for version files:
check_files(files, test)
# Search each file using its regex
for k, v in SOFTWARE_REGEX.items():
if os.path.isfile(v[0]):
with open(v[0]) as x:
versions = x.read()
match = re.search(v[1], versions)
if match:
results[k] = "v{}".format(match.group(1))
# Dump to YAML
print(
'''
id: 'software_versions'
section_name: 'Software Versions'
section_href: 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/-/blob/78-tool_version/docs/RNA-Seq%20Pipeline%20Design%20Process%20Table.pdf'
plot_type: 'html'
description: 'are collected for pipeline version.'
data: |
<dl class="dl-horizontal">
'''
, file = open(out_filename, "w"))
for k, v in results.items():
print(" <dt>{}</dt><dd>{}</dd>".format(k, v), file = open(out_filename, "a"))
print(" </dl>", file = open(out_filename, "a"))
if __name__ == '__main__':
main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment