From 36caf093e9dd7aebac77bc8760b8d866506c338f Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Tue, 2 Mar 2021 14:32:54 -0600 Subject: [PATCH] Add seqtk to references --- .gitlab-ci.yml | 7 +++++ CHANGELOG.md | 1 + docs/software_references_mqc.yaml | 36 +++++++++++++--------- workflow/scripts/generate_versions.py | 14 +++++---- workflow/scripts/get_updated_badge_info.sh | 14 +++++---- 5 files changed, 45 insertions(+), 27 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 031fdb1..391e512 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -194,8 +194,15 @@ downsampleData: - merge_requests - schedules script: + - singularity run 'docker://gudmaprbk/seqtk1.3:1.0.0' seqtk 2>&1 | greo -o Version.* > version_seqtk.txt - singularity run 'docker://gudmaprbk/seqtk1.3:1.0.0' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq - pytest -m downsampleData + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - version_seqtk.txt + expire_in: 7 days inferMetadata: stage: unit diff --git a/CHANGELOG.md b/CHANGELOG.md index 94f2098..69f9140 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * Add seqwho results to multiqc report * Modify repository structure to allow for use with XPACK-DNANEXUS * Add override for endness +* Add seqtk to references **Background** * Add memory limit (75%) per thread for samtools sort (#108) diff --git a/docs/software_references_mqc.yaml b/docs/software_references_mqc.yaml index 1456ff7..a1bd97e 100755 --- a/docs/software_references_mqc.yaml +++ b/docs/software_references_mqc.yaml @@ -26,52 +26,52 @@ <li>D'Arcy, M., Chard, K., Foster, I., Kesselman, C., Madduri, R., Saint, N., & Wagner, R.. 2019. Big Data Bags: A Scalable Packaging Format for Science. Zenodo. doi:<a href="http://doi.org/10.5281/zenodo.3338725">10.5281/zenodo.3338725</a>.</li> </ul> <ol start="4" style="list-style-type: decimal"> - <li><strong>RSeQC</strong>:</li> - </ol> - <ul> - <li>Wang, L., Wang, S., Li, W. 2012 RSeQC: quality control of RNA-seq experiments. Bioinformatics. Aug 15;28(16):2184-5. doi:<a href="https://doi.org/10.1093/bioinformatics/bts356">10.1093/bioinformatics/bts356</a>.</li> - </ul> - <ol start="5" style="list-style-type: decimal"> <li><strong>trimgalore</strong>:</li> </ol> <ul> <li>trimgalore <a href="https://github.com/FelixKrueger/TrimGalore" class="uri">https://github.com/FelixKrueger/TrimGalore</a></li> </ul> - <ol start="6" style="list-style-type: decimal"> + <ol start="5" style="list-style-type: decimal"> <li><strong>hisat2</strong>:</li> </ol> <ul> <li>Kim ,D.,Paggi, J.M., Park, C., Bennett, C., Salzberg, S.L. 2019 Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. Nat Biotechnol. Aug;37(8):907-915. doi:<a href="https://doi.org/10.1038/s41587-019-0201-4">10.1038/s41587-019-0201-4</a>.</li> </ul> - <ol start="7" style="list-style-type: decimal"> + <ol start="6" style="list-style-type: decimal"> <li><strong>samtools</strong>:</li> </ol> <ul> <li>Li H., B. Handsaker, A. Wysoker, T. Fennell, J. Ruan, N. Homer, G. Marth, G. Abecasis, R. Durbin, and 1000 Genome Project Data Processing Subgroup. 2009. The Sequence alignment/map (SAM) format and SAMtools. Bioinformatics 25: 2078-9. doi:<a href="http://dx.doi.org/10.1093/bioinformatics/btp352">10.1093/bioinformatics/btp352</a></li> </ul> - <ol start="8" style="list-style-type: decimal"> + <ol start="7" style="list-style-type: decimal"> <li><strong>picard</strong>:</li> </ol> <ul> <li>“Picard Toolkit.†2019. Broad Institute, GitHub Repository. <a href="http://broadinstitute.github.io/picard/" class="uri">http://broadinstitute.github.io/picard/</a>; Broad Institute</li> </ul> - <ol start="9" style="list-style-type: decimal"> + <ol start="8" style="list-style-type: decimal"> <li><strong>featureCounts</strong>:</li> </ol> <ul> <li>Liao, Y., Smyth, G.K., Shi, W. 2014 featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. Apr 1;30(7):923-30. doi:<a href="https://doi.org/10.1093/bioinformatics/btt656">10.1093/bioinformatics/btt656</a>.</li> </ul> + <ol start="9" style="list-style-type: decimal"> + <li><strong>deeptools</strong>:</li> + </ol> + <ul> + <li>RamÃrez, F., D. P. Ryan, B. Grüning, V. Bhardwaj, F. Kilpert, A. S. Richter, S. Heyne, F. Dündar, and T. Manke. 2016. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Research 44: W160-165. doi:<a href="http://dx.doi.org/10.1093/nar/gkw257">10.1093/nar/gkw257</a></li> + </ul> <ol start="10" style="list-style-type: decimal"> - <li><strong>R</strong>:</li> + <li><strong>Seqtk</strong>:</li> </ol> <ul> - <li>R Core Team 2014. R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL:<a href="http://www.R-project.org/" class="uri">http://www.R-project.org/</a>.</li> + <li>Seqtk URL:<a href="https://github.com/lh3/seqtk" class="uri">https://github.com/lh3/seqtk</a>.</li> </ul> <ol start="11" style="list-style-type: decimal"> - <li><strong>deeptools</strong>:</li> + <li><strong>R</strong>:</li> </ol> <ul> - <li>RamÃrez, F., D. P. Ryan, B. Grüning, V. Bhardwaj, F. Kilpert, A. S. Richter, S. Heyne, F. Dündar, and T. Manke. 2016. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Research 44: W160-165. doi:<a href="http://dx.doi.org/10.1093/nar/gkw257">10.1093/nar/gkw257</a></li> + <li>R Core Team 2014. R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL:<a href="http://www.R-project.org/" class="uri">http://www.R-project.org/</a>.</li> </ul> <ol start="12" style="list-style-type: decimal"> <li><strong>FastQC</strong></li> @@ -86,12 +86,18 @@ <li>SeqWho <a href="https://git.biohpc.swmed.edu/s181649/seqwho" class="uri">https://git.biohpc.swmed.edu/s181649/seqwho/</a></li> </ul> <ol start="14" style="list-style-type: decimal"> + <li><strong>RSeQC</strong>:</li> + </ol> + <ul> + <li>Wang, L., Wang, S., Li, W. 2012 RSeQC: quality control of RNA-seq experiments. Bioinformatics. Aug 15;28(16):2184-5. doi:<a href="https://doi.org/10.1093/bioinformatics/bts356">10.1093/bioinformatics/bts356</a>.</li> + </ul> + <ol start="15" style="list-style-type: decimal"> <li><strong>MultiQC</strong>:</li> </ol> <ul> <li>Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:<a href="https://dx.doi.org/10.1093/bioinformatics/btw354">10.1093/bioinformatics/btw354</a></li> </ul> - <ol start="15" style="list-style-type: decimal"> + <ol start="16" style="list-style-type: decimal"> <li><strong>Nextflow</strong>:</li> </ol> <ul> diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py index ecaeb7c..d9ea7b6 100644 --- a/workflow/scripts/generate_versions.py +++ b/workflow/scripts/generate_versions.py @@ -34,16 +34,17 @@ SOFTWARE_REGEX = { 'Python': ['version_python.txt', r"Python (\S+)"], 'DERIVA': ['version_deriva.txt', r"(\S+)"], 'BDBag': ['version_bdbag.txt', r"BDBag (\S+) \(Bagit \S+\)"], - 'SeqWho': ['version_seqwho.txt', r"Version: (\S+)"], - 'RSeQC': ['version_rseqc.txt', r"infer_experiment.py (\S+)"], 'Trim Galore!': ['version_trimgalore.txt', r"version (\S+)"], 'HISAT2': ['version_hisat2.txt', r"version (\S+)"], 'Samtools': ['version_samtools.txt', r"samtools (\S+)"], 'picard (MarkDuplicates)': ['version_markdups.txt', r"Version:(\S+)"], 'featureCounts': ['version_featurecounts.txt', r"featureCounts v(\S+)"], - 'R': ['version_r.txt', r"R version (\S+)"], 'deepTools': ['version_deeptools.txt', r"deeptools (\S+)"], + 'Seqtk': ['version_seqtk.txt', r"Version: (\S+)"], + 'R': ['version_r.txt', r"R version (\S+)"], 'FastQC': ['version_fastqc.txt', r"FastQC v(\S+)"], + 'SeqWho': ['version_seqwho.txt', r"Version: (\S+)"], + 'RSeQC': ['version_rseqc.txt', r"infer_experiment.py (\S+)"], 'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"], 'Pipeline Version': ['./workflow/nextflow.config', r"version = 'v(\S+)'"] } @@ -94,16 +95,17 @@ def main(): results['Python'] = '<span style="color:#999999;\">Not Run</span>' results['DERIVA'] = '<span style="color:#999999;\">Not Run</span>' results['BDBag'] = '<span style="color:#999999;\">Not Run</span>' - results['SeqWho'] = '<span style="color:#999999;\">Not Run</span>' - results['RSeQC'] = '<span style="color:#999999;\">Not Run</span>' results['Trim Galore!'] = '<span style="color:#999999;\">Not Run</span>' results['HISAT2'] = '<span style="color:#999999;\">Not Run</span>' results['Samtools'] = '<span style="color:#999999;\">Not Run</span>' results['picard (MarkDuplicates)'] = '<span style="color:#999999;\">Not Run</span>' results['featureCounts'] = '<span style="color:#999999;\">Not Run</span>' - results['R'] = '<span style="color:#999999;\">Not Run</span>' results['deepTools'] = '<span style="color:#999999;\">Not Run</span>' + results['Seqtk'] = '<span style="color:#999999;\">Not Run</span>' + results['R'] = '<span style="color:#999999;\">Not Run</span>' results['FastQC'] = '<span style="color:#999999;\">Not Run</span>' + results['SeqWho'] = '<span style="color:#999999;\">Not Run</span>' + results['RSeQC'] = '<span style="color:#999999;\">Not Run</span>' results['MultiQC'] = '<span style="color:#999999;\">Not Run</span>' results['Pipeline Version'] = '<span style="color:#999999;\">Not Run</span>' diff --git a/workflow/scripts/get_updated_badge_info.sh b/workflow/scripts/get_updated_badge_info.sh index a8c4033..b1889b3 100644 --- a/workflow/scripts/get_updated_badge_info.sh +++ b/workflow/scripts/get_updated_badge_info.sh @@ -13,16 +13,17 @@ echo "collecting tool version for badges" python_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o Python.* | grep -oP "(?<=d>).*(?=\<)") deriva_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o DERIVA.* | grep -oP "(?<=d>).*(?=\<)") bdbag_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o BDBag.* | grep -oP "(?<=d>).*(?=\<)") -seqwho_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o SeqWho.* | grep -oP "(?<=d>).*(?=\<)") -rseqc_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o RSeQC.* | grep -oP "(?<=d>).*(?=\<)") trimgalore_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o 'Trim Galore!'.* | grep -oP "(?<=d>).*(?=\<)") hisat2_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o HISAT2.* | grep -oP "(?<=d>).*(?=\<)") samtools_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o Samtools.* | grep -oP "(?<=d>).*(?=\<)") picard_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o 'picard (MarkDuplicates)'.* | grep -oP "(?<=d>).*(?=\<)") featurecounts_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o featureCounts.* | grep -oP "(?<=d>).*(?=\<)") -r_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o '>R<'.* | grep -oP "(?<=d>).*(?=\<)") deeptools_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o deepTools.* | grep -oP "(?<=d>).*(?=\<)") +seqtk_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o Seqtk.* | grep -oP "(?<=d>).*(?=\<)") +r_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o '>R<'.* | grep -oP "(?<=d>).*(?=\<)") fastqc_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o FastQC.* | grep -oP "(?<=d>).*(?=\<)") +seqwho_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o SeqWho.* | grep -oP "(?<=d>).*(?=\<)") +rseqc_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o RSeQC.* | grep -oP "(?<=d>).*(?=\<)") multiqc_version=$(git show ${latest_release_tag}:docs/software_versions_mqc.yaml | grep -o MultiQC.* | grep -oP "(?<=d>).*(?=\<)") echo "collecting badges" @@ -37,15 +38,16 @@ curl --request GET https://img.shields.io/badge/Nextflow%20Version-${develop_nex curl --request GET https://img.shields.io/badge/Python%20Version-${python_version}-blueviolet?style=flat > ./badges/tools/python.svg curl --request GET https://img.shields.io/badge/DERIVA%20Version-${deriva_version}-blueviolet?style=flat > ./badges/tools/deriva.svg -curl --request GET https://img.shields.io/badge/SeqWho%20Version-${seqwho_version}-blueviolet?style=flat > ./badges/tools/seqwho.svg curl --request GET https://img.shields.io/badge/BDBag%20Version-${bdbag_version}-blueviolet?style=flat > ./badges/tools/bdbag.svg -curl --request GET https://img.shields.io/badge/RSeQC%20Version-${rseqc_version}-blueviolet?style=flat > ./badges/tools/rseqc.svg curl --request GET https://img.shields.io/badge/Trim%20Galore%20Version-${trimgalore_version}-blueviolet?style=flat > ./badges/tools/trimgalore.svg curl --request GET https://img.shields.io/badge/HISAT2%20Version-${hisat2_version}-blueviolet?style=flat > ./badges/tools/hisat2.svg curl --request GET https://img.shields.io/badge/Samtools%20Version-${samtools_version}-blueviolet?style=flat > ./badges/tools/samtools.svg curl --request GET https://img.shields.io/badge/picard%20Version-${picard_version}-blueviolet?style=flat > ./badges/tools/picard.svg curl --request GET https://img.shields.io/badge/featureCounts%20Version-${featurecounts_version}-blueviolet?style=flat > ./badges/tools/featurecounts.svg -curl --request GET https://img.shields.io/badge/R%20Version-${r_version}-blueviolet?style=flat > ./badges/tools/r.svg curl --request GET https://img.shields.io/badge/deepTools%20Version-${deeptools_version}-blueviolet?style=flat > ./badges/tools/deeptools.svg +curl --request GET https://img.shields.io/badge/Seqtk%20Version-${seqtk_version}-blueviolet?style=flat > ./badges/tools/seqtk.svg +curl --request GET https://img.shields.io/badge/R%20Version-${r_version}-blueviolet?style=flat > ./badges/tools/r.svg curl --request GET https://img.shields.io/badge/FastQC%20Version-${fastqc_version}-blueviolet?style=flat > ./badges/tools/fastqc.svg +curl --request GET https://img.shields.io/badge/SeqWho%20Version-${seqwho_version}-blueviolet?style=flat > ./badges/tools/seqwho.svg +curl --request GET https://img.shields.io/badge/RSeQC%20Version-${rseqc_version}-blueviolet?style=flat > ./badges/tools/rseqc.svg curl --request GET https://img.shields.io/badge/MultiQC%20Version-${multiqc_version}-blueviolet?style=flat > ./badges/tools/multiqc.svg \ No newline at end of file -- GitLab