From d907644c4b2519676346818032dcee0a17d4dd61 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Fri, 14 Aug 2020 11:06:26 -0500 Subject: [PATCH] Add Seurat version to report and fix Seurat itteration --- docs/references.md | 7 +++- workflow/configs/cluster.config | 6 +-- workflow/main.nf | 59 +++++++++++++++------------ workflow/scripts/generate_versions.py | 2 + workflow/scripts/versions_seurat.sh | 9 ++++ 5 files changed, 52 insertions(+), 31 deletions(-) create mode 100644 workflow/scripts/versions_seurat.sh diff --git a/docs/references.md b/docs/references.md index 37f42d8..ec7341a 100644 --- a/docs/references.md +++ b/docs/references.md @@ -1,7 +1,7 @@ ### References 1. **Nextflow**: - * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) + * Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316-319. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) 2. **cellranger** * Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count) @@ -9,5 +9,8 @@ 3. **python**: * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com)) -4. **MultiQc**: +4. **Seurat**: + * Stuart, T., Butler, A., Hoffman, P., Hafemeister, C., Papalexi, E., Mauck III, W. M., ... & Satija, R. (2019). Comprehensive integration of single-cell data. Cell, 177(7), 1888-1902. doi:[10.1016/j.cell.2019.05.031](https://doi.org/10.1016/j.cell.2019.05.031) + +5. **MultiQc**: * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354) diff --git a/workflow/configs/cluster.config b/workflow/configs/cluster.config index 2043421..fe73734 100644 --- a/workflow/configs/cluster.config +++ b/workflow/configs/cluster.config @@ -24,13 +24,13 @@ process { withName: count400 { queue = '128GB,256GB,256GBv1,384GB' } + withName: downstreamViz { + queue = '32GB' + } withName: versions { executor = 'local' } withName: multiqc { executor = 'local' } - withName: downstreamViz { - executor = '32GB' - } } diff --git a/workflow/main.nf b/workflow/main.nf index b532aa1..b9d4fa0 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -88,6 +88,7 @@ filename_checkScript = Channel.fromPath("$baseDir/scripts/filename_check.sh") generate_versionsScript = Channel.fromPath("$baseDir/scripts/generate_versions.py") generate_referencesScript = Channel.fromPath("$baseDir/scripts/generate_references.py") versions_pythonScript = Channel.fromPath("$baseDir/scripts/versions_python.sh") +versions_seuratScript = Channel.fromPath("$baseDir/scripts/versions_seurat.sh") downstream_vizScript = Channel.fromPath("$baseDir/scripts/downstream_viz.r") // Define report files @@ -373,6 +374,37 @@ process count400 { } } +// Collect all outputs reguardless of cellranger version +filteredOut = filteredOut211.mix(filteredOut302, filteredOut310, filteredOut400) +// Combine all inputs for downstreamViz +downstreamVizIn = downstream_vizScript.combine(versions_seuratScript).combine(filteredOut) + +/* + * downstreamViz: create files for downstream use (eg. R Seurat object) + */ +process downstreamViz { + tag "${sample}" + publishDir "${outDir}/${task.process}", mode: 'copy', pattern: "*.rds" + module 'seurat/3.0.0' + + input: + set file("*"), file("*"), sample, file("filtered/*"), file("clustering/graphclust/*"), file("clustering/kmeans_2_clusters/*"), file("clustering/kmeans_3_clusters/*"), file("clustering/kmeans_4_clusters/*"), file("clustering/kmeans_5_clusters/*"), file("clustering/kmeans_6_clusters/*"), file("clustering/kmeans_7_clusters/*"), file("clustering/kmeans_8_clusters/*"), file("clustering/kmeans_9_clusters/*"), file("clustering/kmeans_10_clusters/*"), file("pca/*"), file("tsne/*"), file("umap/*") from downstreamVizIn + //file downstream_vizScript + //file versions_seuratScript + + output: + file "*.rds" into seuratPaths + file "version_seurat.txt" into version_seurat + + script: + """ + hostname + ulimit -a + seurat-Rscript downstream_viz.r --sample ${sample} --cellrangerVersion ${version} + bash versions_seurat.sh > version_seurat.txt + """ +} + /* * versions: collect all versions into a single yml */ @@ -382,6 +414,7 @@ process versions { input: file versions_pythonScript + file version_seurat file generate_versionsScript file generate_referencesScript @@ -428,29 +461,3 @@ process multiqc { multiqc -c ${multiqcConf} . """ } - -// Collect all outputs reguardless of cellranger version -filteredOut = filteredOut211.mix(filteredOut302, filteredOut310, filteredOut400) - -/* - * downstreamViz: create files for downstream use (eg. R Seurat object) - */ -process downstreamViz { - tag "${sample}" - publishDir "${outDir}/${task.process}", mode: 'copy' - module 'seurat/3.0.0' - - input: - file downstream_vizScript - set sample, file("filtered/*"), file("clustering/graphclust/*"), file("clustering/kmeans_2_clusters/*"), file("clustering/kmeans_3_clusters/*"), file("clustering/kmeans_4_clusters/*"), file("clustering/kmeans_5_clusters/*"), file("clustering/kmeans_6_clusters/*"), file("clustering/kmeans_7_clusters/*"), file("clustering/kmeans_8_clusters/*"), file("clustering/kmeans_9_clusters/*"), file("clustering/kmeans_10_clusters/*"), file("pca/*"), file("tsne/*"), file("umap/*") from filteredOut - - output: - file "*.rds" into seuratPaths - - script: - """ - hostname - ulimit -a - seurat-Rscript downstream_viz.r --sample ${sample} --cellrangerVersion ${version} - """ -} diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py index 978aedc..8faa1fe 100755 --- a/workflow/scripts/generate_versions.py +++ b/workflow/scripts/generate_versions.py @@ -28,6 +28,7 @@ SOFTWARE_REGEX = { 'Nextflow': ['version_nextflow.txt', r"(\S+)"], 'cellranger count': ['version_cellranger.txt', r"(\S+)"], 'python': ['version_python.txt', r"(\S+)"], + 'seurat': ['version_seurat.txt', r"(\S+)"], } @@ -77,6 +78,7 @@ def main(): results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' results['cellranger count'] = '<span style="color:#999999;\">N/A</span>' results['python'] = '<span style="color:#999999;\">N/A</span>' + results['seurat'] = '<span style="color:#999999;\">N/A</span>' # Check for version files: check_files(files) diff --git a/workflow/scripts/versions_seurat.sh b/workflow/scripts/versions_seurat.sh new file mode 100644 index 0000000..e0a261a --- /dev/null +++ b/workflow/scripts/versions_seurat.sh @@ -0,0 +1,9 @@ +#!/bin/bash +#versions_python.sh +#* +#* -------------------------------------------------------------------------- +#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE) +#* -------------------------------------------------------------------------- +#* + +seurat-Rscript -e 'packageVersion("Seurat")' |& grep '\[1\] ' | sed -n -e 's/^\[1\] ‘//p' | tr -d '’' -- GitLab