diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e7e7a07b8f13d737e841d098f775585ae8452035..926be68afcc15ffb2a3fa60cd4118b020deebbee 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,22 +3,25 @@ before_script: - module load python/3.6.1-2-anaconda - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1 - module load nextflow/20.01.0 - - module load singularity/3.0.2 + - module load singularity/3.5.3 - mkdir -p test_data/hu.v2s1r500 - mkdir -p test_data/hu.v3s1r500 - mkdir -p test_data/mu.v3s1r500 - mkdir -p test_data/hu.v3s2r10k - mkdir -p test_data/mu.v3s2r10k - mkdir -p test_data/hu.v2s2r10k + - mkdir -p test_data/output - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s1r500/* test_data/hu.v2s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s1r500/* test_data/hu.v3s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s1r500/* test_data/mu.v3s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s2r10k/* test_data/mu.v3s2r10k/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s2r10k/* test_data/hu.v2s2r10k/ + - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/output/* test_data/output/ stages: - astrocyte_test + - module_test - container_test - reference_test - multiSample_test @@ -34,8 +37,8 @@ astrocyte_cli: when: - always -2.1.1_test: - stage: container_test +module_2.1.1_test: + stage: module_test only: - branches except: @@ -45,20 +48,20 @@ astrocyte_cli: - tags script: - module load cellranger/2.1.1 - - cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + - cellranger count --id=module-211 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 artifacts: name: "$CI_JOB_NAME" when: always paths: - - test/outs/web_summary.html + - module-211/outs/web_summary.html expire_in: 2 days retry: max: 0 when: - always -3.0.2_test: - stage: container_test +module_3.0.2_test: + stage: module_test only: - branches except: @@ -68,20 +71,20 @@ astrocyte_cli: - tags script: - module load cellranger/3.0.2 - - cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + - cellranger count --id=module-302 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 artifacts: name: "$CI_JOB_NAME" when: always paths: - - test/outs/web_summary.html + - module-302/outs/web_summary.html expire_in: 2 days retry: max: 0 when: - always -3.1.0_test: - stage: container_test +module_3.1.0_test: + stage: module_test only: - branches except: @@ -91,14 +94,172 @@ astrocyte_cli: - tags script: - module load cellranger/3.1.0 - - cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + - cellranger count --id=module-310 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - module-310/outs/web_summary.html + expire_in: 2 days + retry: + max: 0 + when: + - always + +module_4.0.0_test: + stage: module_test + only: + - branches + except: + refs: + - develop + - master + - tags + script: + - module load cellranger/4.0.0 + - cellranger count --id=module-400 --transcriptome=/project/apps_database/cellranger/refdata-gex-GRCh38-2020-A --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - module-400/outs/web_summary.html + expire_in: 2 days + retry: + max: 0 + when: + - always + +module_seurat_test: + stage: module_test + only: + - branches + except: + refs: + - develop + - master + - tags + script: + - module load seurat/3.0.0 + - ln -sfn test_data/output/* . + - seurat-Rscript workflow/scripts/downstream_viz.r --sample sample1 --cellrangerVersion 4.0.0 + - bash workflow/scripts/versions_seurat.sh > version_seurat.txt artifacts: name: "$CI_JOB_NAME" when: always paths: + - version_seurat.txt + expire_in: 2 days + retry: + max: 0 + when: + - always + +container_2.1.1_test: + stage: container_test + only: + - branches + except: + refs: - develop - master - - test/outs/web_summary.html + - tags + script: + - singularity run 'docker://bicf/cellranger2.1.1:2.0.0' cellranger count --id=container-211 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - container-211/outs/web_summary.html + expire_in: 2 days + retry: + max: 0 + when: + - always + +container_3.0.2_test: + stage: container_test + only: + - branches + except: + refs: + - develop + - master + - tags + script: + - singularity run 'docker://bicf/cellranger3.0.2:2.0.0' cellranger count --id=container-302 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - container-302/outs/web_summary.html + expire_in: 2 days + retry: + max: 0 + when: + - always + +container_3.1.0_test: + stage: container_test + only: + - branches + except: + refs: + - develop + - master + - tags + script: + - singularity run 'docker://bicf/cellranger3.1.0:2.0.0' cellranger count --id=container-310 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - container-310/outs/web_summary.html + expire_in: 2 days + retry: + max: 0 + when: + - always + +container_4.0.0_test: + stage: container_test + only: + - branches + except: + refs: + - develop + - master + - tags + script: + - singularity run 'docker://bicf/cellranger4.0.0:2.0.0_indev' cellranger count --id=container-400 --transcriptome=/project/apps_database/cellranger/refdata-gex-GRCh38-2020-A --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - container-400/outs/web_summary.html + expire_in: 2 days + retry: + max: 0 + when: + - always + +GRCh38-2020A: + stage: reference_test + only: + refs: + - develop + - master + except: + - tags + script: + - nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s1r500/*.fastq.gz" --designFile "test_data/hu.v3s1r500/design.csv" --genome 'GRCh38-2020-A' --kitVersion '3GEXv3' --version '4.0.0' --ci true + - pytest -m count400 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - .nextflow.log + - workflow/output/count400/sample1/outs/web_summary.html + - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: max: 0 @@ -129,6 +290,30 @@ GRCh38-3.0.0: when: - always +mm10-2020A: + stage: reference_test + only: + refs: + - develop + - master + except: + - tags + script: + - nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/mu.v3s1r500/*.fastq.gz" --designFile "test_data/mu.v3s1r500/design.csv" --genome 'mm10-2020-A' --kitVersion '3GEXv3' --version '4.0.0' --ci true + - pytest -m count400 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - .nextflow.log + - workflow/output/count400/sample1/outs/web_summary.html + - workflow/output/multiqc/run/multiqc_report.html + expire_in: 2 days + retry: + max: 0 + when: + - always + mm10-3.0.0: stage: reference_test only: @@ -160,15 +345,15 @@ mm10-3.0.0: - master - tags script: - - nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s2r10k/*.fastq.gz" --designFile "test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.1.0' --ci true - - pytest -m count310 + - nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s2r10k/*.fastq.gz" --designFile "test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-2020A' --kitVersion 'auto' --version '4.0.0' --ci true + - pytest -m count400 artifacts: name: "$CI_JOB_NAME" when: always paths: - .nextflow.log - - workflow/output/count310/sample1/outs/web_summary.html - - workflow/output/count310/sample2/outs/web_summary.html + - workflow/output/count400/sample1/outs/web_summary.html + - workflow/output/count400/sample2/outs/web_summary.html - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: diff --git a/CHANGELOG.md b/CHANGELOG.md index e85dc433e6de36cc4d50004114f885aed8214dfb..0e068f6cb3f5ee5b55f98bd40b0cc828a4cbb40d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +# v2.2.0-indev +**User Facing** +* Add cellranger version 4.0.0 +* Add references version 2020-A (GRCh38, mm10, mix) +* Create option to create files for downstream viz and analysis (Seurat R-object) + +**Background** + +*Known Bugs* +* Vizapp does not yet work for Astrocyte +* Running in CLI: to set --fastq path of file/s needs to be in quotes + + # v2.1.1 **User Facing** * Check Design File for spaces in name and file contents diff --git a/LICENSE b/LICENSE index ce31415967a482f717d2e7eaea440fd34d4e9ba6..69a90437eb12478cccaf5838821e737d44e36268 100644 --- a/LICENSE +++ b/LICENSE @@ -2,7 +2,7 @@ MIT License Copyright (c) 2019 University of Texas Southwestern Medical Center. -Contributors: Gervaise H. Henry, Jeremy Mathews, and Venkat Malladi +Contributors: Gervaise H. Henry, Jeremy Mathews, Jon Gesell, and Venkat Malladi Department: Bioinformatic Core Facility, Department of Bioinformatics diff --git a/README.md b/README.md index ae33423a4719c29a91cb99d08fdf0dccd46308b0..f221d97744a0fb293e8c20d9dab4f8f57523954b 100755 --- a/README.md +++ b/README.md @@ -108,12 +108,17 @@ To Run: * *'3.0.2'* * *'2.1.1'* * eg: **--version '3.1.0'** + * **--vizFiles** + * create objects which can be used for downstream visualization and analysis of each sample outputs, currently creates: + * Seurat R-objects + * true/false + * eg: **--version true** * **--outDir** * optional output directory for run * eg: **--outDir 'test'** * FULL EXAMPLE: ``` - nextflow run workflow/main.nf -profile biohpc,cluster --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' --outDir 'test' + nextflow run workflow/main.nf -profile biohpc,cluster --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' --vizFiles true --outDir 'test' ``` * Design example: diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index e128e1ff61f118e1ab733268460d82bd14836654..81b430f1b693a9f3f5a3c0ff9ec87f44b6a409ac 100755 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -100,12 +100,15 @@ workflow_parameters: - id: genome type: select choices: + - ['GRCh38-2020-A', 'Human GRCh38 release 98'] - ['GRCh38-3.0.0', 'Human GRCh38 release 93'] - ['GRCh38-1.2.0', 'Human GRCh38 release 84'] - ['hg19-3.0.0', 'Human GRCh37 (hg19) release 87'] - ['hg19-1.2.0', 'Human GRCh37 (hg19) release 84'] + - ['mm10-2020-A', 'Mouse GRCm38 (mm10) release 98'] - ['mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93'] - ['mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84'] + - ['GRCh38_and_mm10-2020-A', 'Human GRCh38 + Mouse GRCm38 (mm10) release 98'] - ['GRCh38_and_mm10-3.1.0', 'Human GRCh38 + Mouse GRCm38 (mm10) release 93'] - ['hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 93'] - ['hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 84'] @@ -146,8 +149,9 @@ workflow_parameters: - id: version type: select - default: '3.1.0' + default: '4.0.0' choices: + - ['4.0.0', '4.0.0'] - ['3.1.0', '3.1.0'] - ['3.0.2', '3.0.2'] - ['2.1.1', '2.1.1'] @@ -155,6 +159,17 @@ workflow_parameters: description: | 10x cellranger version. + - id: vizFiles + type: select + choices: + - [ 'true', 'Yes' ] + - [ 'false', 'No' ] + default: 'true' + required: true + description: | + Create objects which can be used for downstream visualization and analysis of each sample outputs. Currently created: Seurat R-objects. + + - id: astrocyte type: select choices: diff --git a/docs/references.md b/docs/references.md index 37f42d86af5542e91c79e088be69f4f10a386a57..ec7341a417b2562117d2d295257bbfa271672e1f 100644 --- a/docs/references.md +++ b/docs/references.md @@ -1,7 +1,7 @@ ### References 1. **Nextflow**: - * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) + * Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316-319. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) 2. **cellranger** * Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count) @@ -9,5 +9,8 @@ 3. **python**: * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com)) -4. **MultiQc**: +4. **Seurat**: + * Stuart, T., Butler, A., Hoffman, P., Hafemeister, C., Papalexi, E., Mauck III, W. M., ... & Satija, R. (2019). Comprehensive integration of single-cell data. Cell, 177(7), 1888-1902. doi:[10.1016/j.cell.2019.05.031](https://doi.org/10.1016/j.cell.2019.05.031) + +5. **MultiQc**: * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354) diff --git a/workflow/configs/aws.config b/workflow/configs/aws.config index 86a5b3187b7418a2bdc16c1c6cedb4473a204177..3fd7dc519c6c89e8acafcb952f1046e445fd49dc 100644 --- a/workflow/configs/aws.config +++ b/workflow/configs/aws.config @@ -12,27 +12,35 @@ process { cpus = 1 memory = '1 GB' - withLabel: checkDesignFile { + withName: checkDesignFile { cpus = 2 memory = '1 GB' } - withLabel: count211 { + withName: count211 { cpus = 2 memory = '30 GB' } - withLabel: count302 { + withName: count302 { cpus = 2 memory = '30 GB' } - withLabel: count310 { + withName: count310 { cpus = 2 memory = '30 GB' } - withLabel: versions { + withName: count400 { + cpus = 2 + memory = '30 GB' + } + withName: downstreamViz { + cpus = 2 + memory = '1 GB' + } + withName: versions { cpus = 3 memory = '1 GB' } - withLabel: multiqc { + withName: multiqc { cpus = 1 memory = '1 GB' } diff --git a/workflow/configs/biohpc.config b/workflow/configs/biohpc.config index 1da174a128ff30cb2a47790566792d5f4d267cee..983b458356f832bfe2cf0ce2ef2a5eeae919366c 100644 --- a/workflow/configs/biohpc.config +++ b/workflow/configs/biohpc.config @@ -1,6 +1,9 @@ params { // Reference file paths on BioHPC genomes { + 'GRCh38-2020-A' { + loc = '/project/apps_database/cellranger/refdata-gex-' + } 'GRCh38-3.0.0' { loc = '/project/apps_database/cellranger/refdata-cellranger-' } @@ -13,12 +16,18 @@ params { 'hg19-1.2.0' { loc = '/project/apps_database/cellranger/refdata-cellranger-' } + 'mm10-2020-A' { + loc = '/project/apps_database/cellranger/refdata-gex-' + } 'mm10-3.0.0' { loc = '/project/apps_database/cellranger/refdata-cellranger-' } 'mm10-1.2.0' { loc = '/project/apps_database/cellranger/refdata-cellranger-' } + 'GRCh38_and_mm10-2020-A' { + loc = '/project/apps_database/cellranger/refdata-gex-' + } 'GRCh38_and_mm10-3.1.0' { loc = '/project/apps_database/cellranger/refdata-cellranger-' } @@ -52,6 +61,11 @@ params { } } +singularity { + enabled = true + cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/' +} + env { http_proxy = 'http://proxy.swmed.edu:3128' https_proxy = 'http://proxy.swmed.edu:3128' diff --git a/workflow/configs/cluster.config b/workflow/configs/cluster.config index ed3da8d938bf96f44bb103ab83e75f4f9bbacd4b..d1f5293dd68aad0f2ceb3380c2aa4e05566de0e5 100644 --- a/workflow/configs/cluster.config +++ b/workflow/configs/cluster.config @@ -1,27 +1,33 @@ process { executor = 'slurm' - queue = 'super' + queue = '32GB' clusterOptions = '--hold' - withName:trackStart { + withName: trackStart { executor = 'local' } - withName:checkDesignFile { + withName: checkDesignFile { executor = 'local' } - withName:count211 { + withName: count211 { queue = '128GB,256GB,256GBv1,384GB' } - withName:count302 { + withName: count302 { queue = '128GB,256GB,256GBv1,384GB' } - withName:count310 { + withName: count310 { queue = '128GB,256GB,256GBv1,384GB' } - withName:versions { + withName: count400 { + queue = '128GB,256GB,256GBv1,384GB' + } + withName: downstreamViz { + queue = '32GB' + } + withName: versions { executor = 'local' } - withName:multiqc { + withName: multiqc { executor = 'local' } } diff --git a/workflow/main.nf b/workflow/main.nf index f110f31fe226abcc8ab8f44661727ad5f956e330..1fb8fca096757974b9b7d1f285cf29fbecfdf2c5 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -21,12 +21,12 @@ main.nf params.name = "run" params.fastq = "test_data/mu.v3s1r500/*.fastq.gz" params.designFile = "test_data/mu.v3s1r500/design.csv" -params.genome = 'mm10-3.0.0' -params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-' +params.genome = 'mm10-2020-A' params.expectCells = 10000 params.forceCells = 0 params.kitVersion = '3GEXv3' -params.version = '3.1.0' +params.version = '4.0.0' +params.vizFiles = true params.astrocyte = false params.outDir = "${baseDir}/output" @@ -39,7 +39,11 @@ if (params.kitVersion == "3GEXv3" && params.version == '2.1.1') { // Define variables if astrocyte (or from config) if (params.astrocyte) { print("Running under astrocyte") - params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-' + if (params.version == "4.0.0") { + params.genomeLocation = '/project/apps_database/cellranger/refdata-gex-' + } else { + params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-' + } if (params.kitVersion == "3GEXv1") { params.chemistryParam ='SC3Pv1' } else if (params.kitVersion == "3GEXv2") { @@ -60,7 +64,7 @@ if (params.astrocyte) { params.genomeLocationFull = params.genomeLocation+params.genome // Define variables from input -pipelineVersion = "2.1.1" +pipelineVersion = "2.2.0-indev" name = params.name designLocation = Channel .fromPath(params.designFile) @@ -77,6 +81,7 @@ expectCells = params.expectCells forceCells = params.forceCells chemistryParam = params.chemistryParam version = params.version +vizFiles = params.vizFiles outDir = params.outDir // Define script files @@ -85,6 +90,8 @@ filename_checkScript = Channel.fromPath("$baseDir/scripts/filename_check.sh") generate_versionsScript = Channel.fromPath("$baseDir/scripts/generate_versions.py") generate_referencesScript = Channel.fromPath("$baseDir/scripts/generate_references.py") versions_pythonScript = Channel.fromPath("$baseDir/scripts/versions_python.sh") +versions_seuratScript = Channel.fromPath("$baseDir/scripts/versions_seurat.sh") +downstream_vizScript = Channel.fromPath("$baseDir/scripts/downstream_viz.r") // Define report files multiqcConf = "${baseDir}/configs/multiqc_config.yaml" @@ -158,21 +165,26 @@ samples.into { samples211 samples302 samples310 + samples400 } refLocation.into { refLocation211 refLocation302 refLocation310 + refLocation400 } expectCells211 = expectCells expectCells302 = expectCells expectCells310 = expectCells +expectCells400 = expectCells forceCells211 = forceCells forceCells302 = forceCells forceCells310 = forceCells +forceCells400 = forceCells chemistryParam211 = chemistryParam chemistryParam302 = chemistryParam chemistryParam310 = chemistryParam +chemistryParam400 = chemistryParam /* @@ -192,7 +204,8 @@ process count211 { chemistryParam211 output: - file("**/outs/**") into outPaths211 + set sample, file("**/outs/**") into outPaths211 + set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**") into filteredOut211 file("*_metrics_summary.tsv") into metricsSummary211 when: @@ -239,7 +252,8 @@ process count302 { chemistryParam302 output: - file("**/outs/**") into outPaths302 + set sample, file("**/outs/**") into outPaths302 + set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**") into filteredOut302 file("*_metrics_summary.tsv") into metricsSummary302 when: @@ -285,7 +299,8 @@ process count310 { chemistryParam310 output: - file("**/outs/**") into outPaths310 + set sample, file("**/outs/**") into outPaths310 + set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**"), file("**/outs/analysis/umap/**") into filteredOut310 file("*_metrics_summary.tsv") into metricsSummary310 when: @@ -314,6 +329,87 @@ process count310 { } } +/* + * count400: run cellranger count version 4.0.0 + */ +process count400 { + tag "${sample}" + publishDir "${outDir}/${task.process}", mode: 'copy' + queue '128GB,256GB,256GBv1,384GB' + module 'cellranger/4.0.0' + + input: + set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz"), file(script) from samples400 + file ref from refLocation400.first() + expectCells400 + forceCells400 + chemistryParam400 + + output: + set file("**/outs/**") into outPaths400 + set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**"), file("**/outs/analysis/umap/**") into filteredOut400 + file("*_metrics_summary.tsv") into metricsSummary400 + + when: + version == '4.0.0' + + script: + if (forceCells400 == 0) { + """ + hostname + ulimit -u 16384 + ulimit -a + bash filename_check.sh -r ${ref} + cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells310} --chemistry=${chemistryParam310} + sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv + """ + } + else { + """ + hostname + ulimit -u 16384 + ulimit -a + bash filename_check.sh -r ${ref} + cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells310} --chemistry=${chemistryParam310} + sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv + """ + } +} + +// Collect all outputs reguardless of cellranger version +filteredOut = filteredOut211.mix(filteredOut302, filteredOut310, filteredOut400) +// Combine all inputs for downstreamViz +downstreamVizIn = downstream_vizScript.combine(versions_seuratScript).combine(filteredOut) + +/* + * downstreamViz: create files for downstream use (eg. R Seurat object) + */ +process downstreamViz { + tag "${sample}" + publishDir "${outDir}/seurat", mode: 'copy', pattern: "*.rds" + module 'seurat/3.0.0' + + input: + set file("*"), file("*"), sample, file("filtered/*"), file("clustering/graphclust/*"), file("clustering/kmeans_2_clusters/*"), file("clustering/kmeans_3_clusters/*"), file("clustering/kmeans_4_clusters/*"), file("clustering/kmeans_5_clusters/*"), file("clustering/kmeans_6_clusters/*"), file("clustering/kmeans_7_clusters/*"), file("clustering/kmeans_8_clusters/*"), file("clustering/kmeans_9_clusters/*"), file("clustering/kmeans_10_clusters/*"), file("pca/*"), file("tsne/*"), file("umap/*") from downstreamVizIn + //file downstream_vizScript + //file versions_seuratScript + + output: + file "*.rds" into seuratPaths + file "version_seurat.txt" into version_seurat + + when: + vizFiles + + script: + """ + hostname + ulimit -a + seurat-Rscript downstream_viz.r --sample ${sample} --cellrangerVersion ${version} + bash versions_seurat.sh > version_seurat.txt + """ +} + /* * versions: collect all versions into a single yml */ @@ -323,6 +419,7 @@ process versions { input: file versions_pythonScript + file version_seurat file generate_versionsScript file generate_referencesScript @@ -343,7 +440,7 @@ process versions { } // Collect all metrics summaries reguardless of cellranger version -metricsSummary = metricsSummary211.mix(metricsSummary302, metricsSummary310) +metricsSummary = metricsSummary211.mix(metricsSummary302, metricsSummary310, metricsSummary400) /* * multiqc: create multiqc report diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 5a622ec752d117228141cd00bb89c28a77fbc5a3..1ab9a50bd365b61631b3df6441a59dd34480855b 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -1,6 +1,7 @@ profiles { standard { includeConfig 'configs/biohpc.config' + includeConfig 'configs/cluster.config' } biohpc { includeConfig 'configs/biohpc.config' @@ -47,6 +48,6 @@ manifest { homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count' description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.' mainScript = 'main.nf' - version = '2.1.1' + version = '2.2.0-indev' nextflowVersion = '>=0.31.0' } diff --git a/workflow/scripts/downstream_viz.r b/workflow/scripts/downstream_viz.r new file mode 100644 index 0000000000000000000000000000000000000000..49bfe70484382106522ecf115e7fcbf5424417d5 --- /dev/null +++ b/workflow/scripts/downstream_viz.r @@ -0,0 +1,38 @@ +if (!require(optparse)) install.packages('optparse',repos='http://cran.us.r-project.org',quiet=TRUE) +library(optparse) +library(Seurat) + +option_list=list( + make_option("--sample",default="sample1",action="store",type='character',help="sample"), + make_option("--cellrangerVersion",default="4.0.0",action="store",type='character',help="cellranger Version") +) +opt=parse_args(OptionParser(option_list=option_list)) +rm(option_list) + +data <- Read10X(data.dir="filtered/") +data <- CreateSeuratObject(counts=data) + +dimReductions <- c("pca","tsne") +if (opt$cellrangerVersion!="2.1.1" && opt$cellrangerVersion!="3.0.2"){ + dimReductions <- c(dimReductions,"umap") +} +for (i in dimReductions){ + if (i=="tsne"){ + lab <- "tSNE" + } else { + lab <- toupper(i) + } + projection <- read.csv(paste0(i,"/projection.csv"),row.names=1) + rownames(projection) <- gsub("-.","",rownames(projection)) + data[[i]] <- CreateDimReducObject(embeddings=as.matrix(projection),key=paste0(lab,"_"),assay="RNA") +} + +clust <- c("graphclust",paste0("kmeans_",2:10,"_clusters")) +for (i in clust){ + clusters <- read.csv(paste0("clustering/",i,"/clusters.csv"),row.names=1) + rownames(clusters) <- gsub("-.","",rownames(clusters)) + data[[i]] <- clusters + data@meta.data <- data@meta.data[,colnames(data@meta.data)!="orig.ident"] +} + +saveRDS(data,paste0(opt$sample,".rds")) diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py index 978aedc56c4774b2e6a3556ff0c0f7dccb76eadf..8faa1fe9799f644906ea95fd6a693c1618b5cbf2 100755 --- a/workflow/scripts/generate_versions.py +++ b/workflow/scripts/generate_versions.py @@ -28,6 +28,7 @@ SOFTWARE_REGEX = { 'Nextflow': ['version_nextflow.txt', r"(\S+)"], 'cellranger count': ['version_cellranger.txt', r"(\S+)"], 'python': ['version_python.txt', r"(\S+)"], + 'seurat': ['version_seurat.txt', r"(\S+)"], } @@ -77,6 +78,7 @@ def main(): results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' results['cellranger count'] = '<span style="color:#999999;\">N/A</span>' results['python'] = '<span style="color:#999999;\">N/A</span>' + results['seurat'] = '<span style="color:#999999;\">N/A</span>' # Check for version files: check_files(files) diff --git a/workflow/scripts/versions_seurat.sh b/workflow/scripts/versions_seurat.sh new file mode 100644 index 0000000000000000000000000000000000000000..e0a261a99d98e0690cd5b0130fd541f8a7415828 --- /dev/null +++ b/workflow/scripts/versions_seurat.sh @@ -0,0 +1,9 @@ +#!/bin/bash +#versions_python.sh +#* +#* -------------------------------------------------------------------------- +#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE) +#* -------------------------------------------------------------------------- +#* + +seurat-Rscript -e 'packageVersion("Seurat")' |& grep '\[1\] ' | sed -n -e 's/^\[1\] ‘//p' | tr -d '’' diff --git a/workflow/tests/test_count.py b/workflow/tests/test_count.py index 86623ca6057dd298ae0294caa1b6ce91d32296ad..449e86fe8c6b5c13111b4863ae821db9a7640357 100644 --- a/workflow/tests/test_count.py +++ b/workflow/tests/test_count.py @@ -33,4 +33,9 @@ def test_count302_count(): @pytest.mark.count310 def test_count310_count(): assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1_metrics_summary.tsv')) - assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1', 'outs')) \ No newline at end of file + assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1', 'outs')) + +@pytest.mark.count400 +def test_count310_count(): + assert os.path.exists(os.path.join(test_output_path, 'count400', 'sample1_metrics_summary.tsv')) + assert os.path.exists(os.path.join(test_output_path, 'count400', 'sample1', 'outs'))