Skip to content
Snippets Groups Projects
Commit 3944b9de authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '50-seurat.object' into 'develop'

Resolve "Generate Raw RDS file"

Closes #50

See merge request !76
parents 079739df 436b0259
Branches
Tags
2 merge requests!78Develop,!76Resolve "Generate Raw RDS file"
Pipeline #7882 failed with stages
in 12 seconds
...@@ -3,22 +3,25 @@ before_script: ...@@ -3,22 +3,25 @@ before_script:
- module load python/3.6.1-2-anaconda - module load python/3.6.1-2-anaconda
- pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1 - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- module load nextflow/20.01.0 - module load nextflow/20.01.0
- module load singularity/3.0.2 - module load singularity/3.5.3
- mkdir -p test_data/hu.v2s1r500 - mkdir -p test_data/hu.v2s1r500
- mkdir -p test_data/hu.v3s1r500 - mkdir -p test_data/hu.v3s1r500
- mkdir -p test_data/mu.v3s1r500 - mkdir -p test_data/mu.v3s1r500
- mkdir -p test_data/hu.v3s2r10k - mkdir -p test_data/hu.v3s2r10k
- mkdir -p test_data/mu.v3s2r10k - mkdir -p test_data/mu.v3s2r10k
- mkdir -p test_data/hu.v2s2r10k - mkdir -p test_data/hu.v2s2r10k
- mkdir -p test_data/output
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s1r500/* test_data/hu.v2s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s1r500/* test_data/hu.v2s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s1r500/* test_data/hu.v3s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s1r500/* test_data/hu.v3s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s1r500/* test_data/mu.v3s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s1r500/* test_data/mu.v3s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s2r10k/* test_data/mu.v3s2r10k/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s2r10k/* test_data/mu.v3s2r10k/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s2r10k/* test_data/hu.v2s2r10k/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s2r10k/* test_data/hu.v2s2r10k/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/output/* test_data/output/
stages: stages:
- astrocyte_test - astrocyte_test
- module_test
- container_test - container_test
- reference_test - reference_test
- multiSample_test - multiSample_test
...@@ -34,8 +37,8 @@ astrocyte_cli: ...@@ -34,8 +37,8 @@ astrocyte_cli:
when: when:
- always - always
2.1.1_test: module_2.1.1_test:
stage: container_test stage: module_test
only: only:
- branches - branches
except: except:
...@@ -45,20 +48,20 @@ astrocyte_cli: ...@@ -45,20 +48,20 @@ astrocyte_cli:
- tags - tags
script: script:
- module load cellranger/2.1.1 - module load cellranger/2.1.1
- cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 - cellranger count --id=module-211 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts: artifacts:
name: "$CI_JOB_NAME" name: "$CI_JOB_NAME"
when: always when: always
paths: paths:
- test/outs/web_summary.html - module-211/outs/web_summary.html
expire_in: 2 days expire_in: 2 days
retry: retry:
max: 0 max: 0
when: when:
- always - always
3.0.2_test: module_3.0.2_test:
stage: container_test stage: module_test
only: only:
- branches - branches
except: except:
...@@ -68,20 +71,20 @@ astrocyte_cli: ...@@ -68,20 +71,20 @@ astrocyte_cli:
- tags - tags
script: script:
- module load cellranger/3.0.2 - module load cellranger/3.0.2
- cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 - cellranger count --id=module-302 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts: artifacts:
name: "$CI_JOB_NAME" name: "$CI_JOB_NAME"
when: always when: always
paths: paths:
- test/outs/web_summary.html - module-302/outs/web_summary.html
expire_in: 2 days expire_in: 2 days
retry: retry:
max: 0 max: 0
when: when:
- always - always
3.1.0_test: module_3.1.0_test:
stage: container_test stage: module_test
only: only:
- branches - branches
except: except:
...@@ -91,14 +94,172 @@ astrocyte_cli: ...@@ -91,14 +94,172 @@ astrocyte_cli:
- tags - tags
script: script:
- module load cellranger/3.1.0 - module load cellranger/3.1.0
- cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 - cellranger count --id=module-310 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- module-310/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
module_4.0.0_test:
stage: module_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- module load cellranger/4.0.0
- cellranger count --id=module-400 --transcriptome=/project/apps_database/cellranger/refdata-gex-GRCh38-2020-A --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- module-400/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
module_seurat_test:
stage: module_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- module load seurat/3.0.0
- ln -sfn test_data/output/* .
- seurat-Rscript workflow/scripts/downstream_viz.r --sample sample1 --cellrangerVersion 4.0.0
- bash workflow/scripts/versions_seurat.sh > version_seurat.txt
artifacts: artifacts:
name: "$CI_JOB_NAME" name: "$CI_JOB_NAME"
when: always when: always
paths: paths:
- version_seurat.txt
expire_in: 2 days
retry:
max: 0
when:
- always
container_2.1.1_test:
stage: container_test
only:
- branches
except:
refs:
- develop - develop
- master - master
- test/outs/web_summary.html - tags
script:
- singularity run 'docker://bicf/cellranger2.1.1:2.0.0' cellranger count --id=container-211 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- container-211/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
container_3.0.2_test:
stage: container_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- singularity run 'docker://bicf/cellranger3.0.2:2.0.0' cellranger count --id=container-302 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- container-302/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
container_3.1.0_test:
stage: container_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- singularity run 'docker://bicf/cellranger3.1.0:2.0.0' cellranger count --id=container-310 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- container-310/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
container_4.0.0_test:
stage: container_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- singularity run 'docker://bicf/cellranger4.0.0:2.0.0_indev' cellranger count --id=container-400 --transcriptome=/project/apps_database/cellranger/refdata-gex-GRCh38-2020-A --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- container-400/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
GRCh38-2020A:
stage: reference_test
only:
refs:
- develop
- master
except:
- tags
script:
- nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s1r500/*.fastq.gz" --designFile "test_data/hu.v3s1r500/design.csv" --genome 'GRCh38-2020-A' --kitVersion '3GEXv3' --version '4.0.0' --ci true
- pytest -m count400
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count400/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days expire_in: 2 days
retry: retry:
max: 0 max: 0
...@@ -129,6 +290,30 @@ GRCh38-3.0.0: ...@@ -129,6 +290,30 @@ GRCh38-3.0.0:
when: when:
- always - always
mm10-2020A:
stage: reference_test
only:
refs:
- develop
- master
except:
- tags
script:
- nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/mu.v3s1r500/*.fastq.gz" --designFile "test_data/mu.v3s1r500/design.csv" --genome 'mm10-2020-A' --kitVersion '3GEXv3' --version '4.0.0' --ci true
- pytest -m count400
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count400/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 0
when:
- always
mm10-3.0.0: mm10-3.0.0:
stage: reference_test stage: reference_test
only: only:
...@@ -160,15 +345,15 @@ mm10-3.0.0: ...@@ -160,15 +345,15 @@ mm10-3.0.0:
- master - master
- tags - tags
script: script:
- nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s2r10k/*.fastq.gz" --designFile "test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.1.0' --ci true - nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s2r10k/*.fastq.gz" --designFile "test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-2020A' --kitVersion 'auto' --version '4.0.0' --ci true
- pytest -m count310 - pytest -m count400
artifacts: artifacts:
name: "$CI_JOB_NAME" name: "$CI_JOB_NAME"
when: always when: always
paths: paths:
- .nextflow.log - .nextflow.log
- workflow/output/count310/sample1/outs/web_summary.html - workflow/output/count400/sample1/outs/web_summary.html
- workflow/output/count310/sample2/outs/web_summary.html - workflow/output/count400/sample2/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html - workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days expire_in: 2 days
retry: retry:
......
# v2.2.0-indev
**User Facing**
* Add cellranger version 4.0.0
* Add references version 2020-A (GRCh38, mm10, mix)
* Create option to create files for downstream viz and analysis (Seurat R-object)
**Background**
*Known Bugs*
* Vizapp does not yet work for Astrocyte
* Running in CLI: to set --fastq path of file/s needs to be in quotes
# v2.1.1 # v2.1.1
**User Facing** **User Facing**
* Check Design File for spaces in name and file contents * Check Design File for spaces in name and file contents
......
...@@ -2,7 +2,7 @@ MIT License ...@@ -2,7 +2,7 @@ MIT License
Copyright (c) 2019 University of Texas Southwestern Medical Center. Copyright (c) 2019 University of Texas Southwestern Medical Center.
Contributors: Gervaise H. Henry, Jeremy Mathews, and Venkat Malladi Contributors: Gervaise H. Henry, Jeremy Mathews, Jon Gesell, and Venkat Malladi
Department: Bioinformatic Core Facility, Department of Bioinformatics Department: Bioinformatic Core Facility, Department of Bioinformatics
......
...@@ -108,12 +108,17 @@ To Run: ...@@ -108,12 +108,17 @@ To Run:
* *'3.0.2'* * *'3.0.2'*
* *'2.1.1'* * *'2.1.1'*
* eg: **--version '3.1.0'** * eg: **--version '3.1.0'**
* **--vizFiles**
* create objects which can be used for downstream visualization and analysis of each sample outputs, currently creates:
* Seurat R-objects
* true/false
* eg: **--version true**
* **--outDir** * **--outDir**
* optional output directory for run * optional output directory for run
* eg: **--outDir 'test'** * eg: **--outDir 'test'**
* FULL EXAMPLE: * FULL EXAMPLE:
``` ```
nextflow run workflow/main.nf -profile biohpc,cluster --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' --outDir 'test' nextflow run workflow/main.nf -profile biohpc,cluster --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' --vizFiles true --outDir 'test'
``` ```
* Design example: * Design example:
......
...@@ -100,12 +100,15 @@ workflow_parameters: ...@@ -100,12 +100,15 @@ workflow_parameters:
- id: genome - id: genome
type: select type: select
choices: choices:
- ['GRCh38-2020-A', 'Human GRCh38 release 98']
- ['GRCh38-3.0.0', 'Human GRCh38 release 93'] - ['GRCh38-3.0.0', 'Human GRCh38 release 93']
- ['GRCh38-1.2.0', 'Human GRCh38 release 84'] - ['GRCh38-1.2.0', 'Human GRCh38 release 84']
- ['hg19-3.0.0', 'Human GRCh37 (hg19) release 87'] - ['hg19-3.0.0', 'Human GRCh37 (hg19) release 87']
- ['hg19-1.2.0', 'Human GRCh37 (hg19) release 84'] - ['hg19-1.2.0', 'Human GRCh37 (hg19) release 84']
- ['mm10-2020-A', 'Mouse GRCm38 (mm10) release 98']
- ['mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93'] - ['mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93']
- ['mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84'] - ['mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84']
- ['GRCh38_and_mm10-2020-A', 'Human GRCh38 + Mouse GRCm38 (mm10) release 98']
- ['GRCh38_and_mm10-3.1.0', 'Human GRCh38 + Mouse GRCm38 (mm10) release 93'] - ['GRCh38_and_mm10-3.1.0', 'Human GRCh38 + Mouse GRCm38 (mm10) release 93']
- ['hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 93'] - ['hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 93']
- ['hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 84'] - ['hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 84']
...@@ -146,8 +149,9 @@ workflow_parameters: ...@@ -146,8 +149,9 @@ workflow_parameters:
- id: version - id: version
type: select type: select
default: '3.1.0' default: '4.0.0'
choices: choices:
- ['4.0.0', '4.0.0']
- ['3.1.0', '3.1.0'] - ['3.1.0', '3.1.0']
- ['3.0.2', '3.0.2'] - ['3.0.2', '3.0.2']
- ['2.1.1', '2.1.1'] - ['2.1.1', '2.1.1']
...@@ -155,6 +159,17 @@ workflow_parameters: ...@@ -155,6 +159,17 @@ workflow_parameters:
description: | description: |
10x cellranger version. 10x cellranger version.
- id: vizFiles
type: select
choices:
- [ 'true', 'Yes' ]
- [ 'false', 'No' ]
default: 'true'
required: true
description: |
Create objects which can be used for downstream visualization and analysis of each sample outputs. Currently created: Seurat R-objects.
- id: astrocyte - id: astrocyte
type: select type: select
choices: choices:
......
### References ### References
1. **Nextflow**: 1. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) * Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316-319. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
2. **cellranger** 2. **cellranger**
* Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count) * Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count)
...@@ -9,5 +9,8 @@ ...@@ -9,5 +9,8 @@
3. **python**: 3. **python**:
* Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com)) * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
4. **MultiQc**: 4. **Seurat**:
* Stuart, T., Butler, A., Hoffman, P., Hafemeister, C., Papalexi, E., Mauck III, W. M., ... & Satija, R. (2019). Comprehensive integration of single-cell data. Cell, 177(7), 1888-1902. doi:[10.1016/j.cell.2019.05.031](https://doi.org/10.1016/j.cell.2019.05.031)
5. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354) * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
...@@ -12,27 +12,35 @@ process { ...@@ -12,27 +12,35 @@ process {
cpus = 1 cpus = 1
memory = '1 GB' memory = '1 GB'
withLabel: checkDesignFile { withName: checkDesignFile {
cpus = 2 cpus = 2
memory = '1 GB' memory = '1 GB'
} }
withLabel: count211 { withName: count211 {
cpus = 2 cpus = 2
memory = '30 GB' memory = '30 GB'
} }
withLabel: count302 { withName: count302 {
cpus = 2 cpus = 2
memory = '30 GB' memory = '30 GB'
} }
withLabel: count310 { withName: count310 {
cpus = 2 cpus = 2
memory = '30 GB' memory = '30 GB'
} }
withLabel: versions { withName: count400 {
cpus = 2
memory = '30 GB'
}
withName: downstreamViz {
cpus = 2
memory = '1 GB'
}
withName: versions {
cpus = 3 cpus = 3
memory = '1 GB' memory = '1 GB'
} }
withLabel: multiqc { withName: multiqc {
cpus = 1 cpus = 1
memory = '1 GB' memory = '1 GB'
} }
......
params { params {
// Reference file paths on BioHPC // Reference file paths on BioHPC
genomes { genomes {
'GRCh38-2020-A' {
loc = '/project/apps_database/cellranger/refdata-gex-'
}
'GRCh38-3.0.0' { 'GRCh38-3.0.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-' loc = '/project/apps_database/cellranger/refdata-cellranger-'
} }
...@@ -13,12 +16,18 @@ params { ...@@ -13,12 +16,18 @@ params {
'hg19-1.2.0' { 'hg19-1.2.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-' loc = '/project/apps_database/cellranger/refdata-cellranger-'
} }
'mm10-2020-A' {
loc = '/project/apps_database/cellranger/refdata-gex-'
}
'mm10-3.0.0' { 'mm10-3.0.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-' loc = '/project/apps_database/cellranger/refdata-cellranger-'
} }
'mm10-1.2.0' { 'mm10-1.2.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-' loc = '/project/apps_database/cellranger/refdata-cellranger-'
} }
'GRCh38_and_mm10-2020-A' {
loc = '/project/apps_database/cellranger/refdata-gex-'
}
'GRCh38_and_mm10-3.1.0' { 'GRCh38_and_mm10-3.1.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-' loc = '/project/apps_database/cellranger/refdata-cellranger-'
} }
...@@ -52,6 +61,11 @@ params { ...@@ -52,6 +61,11 @@ params {
} }
} }
singularity {
enabled = true
cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
}
env { env {
http_proxy = 'http://proxy.swmed.edu:3128' http_proxy = 'http://proxy.swmed.edu:3128'
https_proxy = 'http://proxy.swmed.edu:3128' https_proxy = 'http://proxy.swmed.edu:3128'
......
process { process {
executor = 'slurm' executor = 'slurm'
queue = 'super' queue = '32GB'
clusterOptions = '--hold' clusterOptions = '--hold'
withName:trackStart { withName: trackStart {
executor = 'local' executor = 'local'
} }
withName:checkDesignFile { withName: checkDesignFile {
executor = 'local' executor = 'local'
} }
withName:count211 { withName: count211 {
queue = '128GB,256GB,256GBv1,384GB' queue = '128GB,256GB,256GBv1,384GB'
} }
withName:count302 { withName: count302 {
queue = '128GB,256GB,256GBv1,384GB' queue = '128GB,256GB,256GBv1,384GB'
} }
withName:count310 { withName: count310 {
queue = '128GB,256GB,256GBv1,384GB' queue = '128GB,256GB,256GBv1,384GB'
} }
withName:versions { withName: count400 {
queue = '128GB,256GB,256GBv1,384GB'
}
withName: downstreamViz {
queue = '32GB'
}
withName: versions {
executor = 'local' executor = 'local'
} }
withName:multiqc { withName: multiqc {
executor = 'local' executor = 'local'
} }
} }
...@@ -21,12 +21,12 @@ main.nf ...@@ -21,12 +21,12 @@ main.nf
params.name = "run" params.name = "run"
params.fastq = "test_data/mu.v3s1r500/*.fastq.gz" params.fastq = "test_data/mu.v3s1r500/*.fastq.gz"
params.designFile = "test_data/mu.v3s1r500/design.csv" params.designFile = "test_data/mu.v3s1r500/design.csv"
params.genome = 'mm10-3.0.0' params.genome = 'mm10-2020-A'
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-'
params.expectCells = 10000 params.expectCells = 10000
params.forceCells = 0 params.forceCells = 0
params.kitVersion = '3GEXv3' params.kitVersion = '3GEXv3'
params.version = '3.1.0' params.version = '4.0.0'
params.vizFiles = true
params.astrocyte = false params.astrocyte = false
params.outDir = "${baseDir}/output" params.outDir = "${baseDir}/output"
...@@ -39,7 +39,11 @@ if (params.kitVersion == "3GEXv3" && params.version == '2.1.1') { ...@@ -39,7 +39,11 @@ if (params.kitVersion == "3GEXv3" && params.version == '2.1.1') {
// Define variables if astrocyte (or from config) // Define variables if astrocyte (or from config)
if (params.astrocyte) { if (params.astrocyte) {
print("Running under astrocyte") print("Running under astrocyte")
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-' if (params.version == "4.0.0") {
params.genomeLocation = '/project/apps_database/cellranger/refdata-gex-'
} else {
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-'
}
if (params.kitVersion == "3GEXv1") { if (params.kitVersion == "3GEXv1") {
params.chemistryParam ='SC3Pv1' params.chemistryParam ='SC3Pv1'
} else if (params.kitVersion == "3GEXv2") { } else if (params.kitVersion == "3GEXv2") {
...@@ -60,7 +64,7 @@ if (params.astrocyte) { ...@@ -60,7 +64,7 @@ if (params.astrocyte) {
params.genomeLocationFull = params.genomeLocation+params.genome params.genomeLocationFull = params.genomeLocation+params.genome
// Define variables from input // Define variables from input
pipelineVersion = "2.1.1" pipelineVersion = "2.2.0-indev"
name = params.name name = params.name
designLocation = Channel designLocation = Channel
.fromPath(params.designFile) .fromPath(params.designFile)
...@@ -77,6 +81,7 @@ expectCells = params.expectCells ...@@ -77,6 +81,7 @@ expectCells = params.expectCells
forceCells = params.forceCells forceCells = params.forceCells
chemistryParam = params.chemistryParam chemistryParam = params.chemistryParam
version = params.version version = params.version
vizFiles = params.vizFiles
outDir = params.outDir outDir = params.outDir
// Define script files // Define script files
...@@ -85,6 +90,8 @@ filename_checkScript = Channel.fromPath("$baseDir/scripts/filename_check.sh") ...@@ -85,6 +90,8 @@ filename_checkScript = Channel.fromPath("$baseDir/scripts/filename_check.sh")
generate_versionsScript = Channel.fromPath("$baseDir/scripts/generate_versions.py") generate_versionsScript = Channel.fromPath("$baseDir/scripts/generate_versions.py")
generate_referencesScript = Channel.fromPath("$baseDir/scripts/generate_references.py") generate_referencesScript = Channel.fromPath("$baseDir/scripts/generate_references.py")
versions_pythonScript = Channel.fromPath("$baseDir/scripts/versions_python.sh") versions_pythonScript = Channel.fromPath("$baseDir/scripts/versions_python.sh")
versions_seuratScript = Channel.fromPath("$baseDir/scripts/versions_seurat.sh")
downstream_vizScript = Channel.fromPath("$baseDir/scripts/downstream_viz.r")
// Define report files // Define report files
multiqcConf = "${baseDir}/configs/multiqc_config.yaml" multiqcConf = "${baseDir}/configs/multiqc_config.yaml"
...@@ -158,21 +165,26 @@ samples.into { ...@@ -158,21 +165,26 @@ samples.into {
samples211 samples211
samples302 samples302
samples310 samples310
samples400
} }
refLocation.into { refLocation.into {
refLocation211 refLocation211
refLocation302 refLocation302
refLocation310 refLocation310
refLocation400
} }
expectCells211 = expectCells expectCells211 = expectCells
expectCells302 = expectCells expectCells302 = expectCells
expectCells310 = expectCells expectCells310 = expectCells
expectCells400 = expectCells
forceCells211 = forceCells forceCells211 = forceCells
forceCells302 = forceCells forceCells302 = forceCells
forceCells310 = forceCells forceCells310 = forceCells
forceCells400 = forceCells
chemistryParam211 = chemistryParam chemistryParam211 = chemistryParam
chemistryParam302 = chemistryParam chemistryParam302 = chemistryParam
chemistryParam310 = chemistryParam chemistryParam310 = chemistryParam
chemistryParam400 = chemistryParam
/* /*
...@@ -192,7 +204,8 @@ process count211 { ...@@ -192,7 +204,8 @@ process count211 {
chemistryParam211 chemistryParam211
output: output:
file("**/outs/**") into outPaths211 set sample, file("**/outs/**") into outPaths211
set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**") into filteredOut211
file("*_metrics_summary.tsv") into metricsSummary211 file("*_metrics_summary.tsv") into metricsSummary211
when: when:
...@@ -239,7 +252,8 @@ process count302 { ...@@ -239,7 +252,8 @@ process count302 {
chemistryParam302 chemistryParam302
output: output:
file("**/outs/**") into outPaths302 set sample, file("**/outs/**") into outPaths302
set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**") into filteredOut302
file("*_metrics_summary.tsv") into metricsSummary302 file("*_metrics_summary.tsv") into metricsSummary302
when: when:
...@@ -285,7 +299,8 @@ process count310 { ...@@ -285,7 +299,8 @@ process count310 {
chemistryParam310 chemistryParam310
output: output:
file("**/outs/**") into outPaths310 set sample, file("**/outs/**") into outPaths310
set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**"), file("**/outs/analysis/umap/**") into filteredOut310
file("*_metrics_summary.tsv") into metricsSummary310 file("*_metrics_summary.tsv") into metricsSummary310
when: when:
...@@ -314,6 +329,87 @@ process count310 { ...@@ -314,6 +329,87 @@ process count310 {
} }
} }
/*
* count400: run cellranger count version 4.0.0
*/
process count400 {
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
queue '128GB,256GB,256GBv1,384GB'
module 'cellranger/4.0.0'
input:
set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz"), file(script) from samples400
file ref from refLocation400.first()
expectCells400
forceCells400
chemistryParam400
output:
set file("**/outs/**") into outPaths400
set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**"), file("**/outs/analysis/umap/**") into filteredOut400
file("*_metrics_summary.tsv") into metricsSummary400
when:
version == '4.0.0'
script:
if (forceCells400 == 0) {
"""
hostname
ulimit -u 16384
ulimit -a
bash filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells310} --chemistry=${chemistryParam310}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
else {
"""
hostname
ulimit -u 16384
ulimit -a
bash filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells310} --chemistry=${chemistryParam310}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
// Collect all outputs reguardless of cellranger version
filteredOut = filteredOut211.mix(filteredOut302, filteredOut310, filteredOut400)
// Combine all inputs for downstreamViz
downstreamVizIn = downstream_vizScript.combine(versions_seuratScript).combine(filteredOut)
/*
* downstreamViz: create files for downstream use (eg. R Seurat object)
*/
process downstreamViz {
tag "${sample}"
publishDir "${outDir}/seurat", mode: 'copy', pattern: "*.rds"
module 'seurat/3.0.0'
input:
set file("*"), file("*"), sample, file("filtered/*"), file("clustering/graphclust/*"), file("clustering/kmeans_2_clusters/*"), file("clustering/kmeans_3_clusters/*"), file("clustering/kmeans_4_clusters/*"), file("clustering/kmeans_5_clusters/*"), file("clustering/kmeans_6_clusters/*"), file("clustering/kmeans_7_clusters/*"), file("clustering/kmeans_8_clusters/*"), file("clustering/kmeans_9_clusters/*"), file("clustering/kmeans_10_clusters/*"), file("pca/*"), file("tsne/*"), file("umap/*") from downstreamVizIn
//file downstream_vizScript
//file versions_seuratScript
output:
file "*.rds" into seuratPaths
file "version_seurat.txt" into version_seurat
when:
vizFiles
script:
"""
hostname
ulimit -a
seurat-Rscript downstream_viz.r --sample ${sample} --cellrangerVersion ${version}
bash versions_seurat.sh > version_seurat.txt
"""
}
/* /*
* versions: collect all versions into a single yml * versions: collect all versions into a single yml
*/ */
...@@ -323,6 +419,7 @@ process versions { ...@@ -323,6 +419,7 @@ process versions {
input: input:
file versions_pythonScript file versions_pythonScript
file version_seurat
file generate_versionsScript file generate_versionsScript
file generate_referencesScript file generate_referencesScript
...@@ -343,7 +440,7 @@ process versions { ...@@ -343,7 +440,7 @@ process versions {
} }
// Collect all metrics summaries reguardless of cellranger version // Collect all metrics summaries reguardless of cellranger version
metricsSummary = metricsSummary211.mix(metricsSummary302, metricsSummary310) metricsSummary = metricsSummary211.mix(metricsSummary302, metricsSummary310, metricsSummary400)
/* /*
* multiqc: create multiqc report * multiqc: create multiqc report
......
profiles { profiles {
standard { standard {
includeConfig 'configs/biohpc.config' includeConfig 'configs/biohpc.config'
includeConfig 'configs/cluster.config'
} }
biohpc { biohpc {
includeConfig 'configs/biohpc.config' includeConfig 'configs/biohpc.config'
...@@ -47,6 +48,6 @@ manifest { ...@@ -47,6 +48,6 @@ manifest {
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count' homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count'
description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.' description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.'
mainScript = 'main.nf' mainScript = 'main.nf'
version = '2.1.1' version = '2.2.0-indev'
nextflowVersion = '>=0.31.0' nextflowVersion = '>=0.31.0'
} }
if (!require(optparse)) install.packages('optparse',repos='http://cran.us.r-project.org',quiet=TRUE)
library(optparse)
library(Seurat)
option_list=list(
make_option("--sample",default="sample1",action="store",type='character',help="sample"),
make_option("--cellrangerVersion",default="4.0.0",action="store",type='character',help="cellranger Version")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
data <- Read10X(data.dir="filtered/")
data <- CreateSeuratObject(counts=data)
dimReductions <- c("pca","tsne")
if (opt$cellrangerVersion!="2.1.1" && opt$cellrangerVersion!="3.0.2"){
dimReductions <- c(dimReductions,"umap")
}
for (i in dimReductions){
if (i=="tsne"){
lab <- "tSNE"
} else {
lab <- toupper(i)
}
projection <- read.csv(paste0(i,"/projection.csv"),row.names=1)
rownames(projection) <- gsub("-.","",rownames(projection))
data[[i]] <- CreateDimReducObject(embeddings=as.matrix(projection),key=paste0(lab,"_"),assay="RNA")
}
clust <- c("graphclust",paste0("kmeans_",2:10,"_clusters"))
for (i in clust){
clusters <- read.csv(paste0("clustering/",i,"/clusters.csv"),row.names=1)
rownames(clusters) <- gsub("-.","",rownames(clusters))
data[[i]] <- clusters
data@meta.data <- data@meta.data[,colnames(data@meta.data)!="orig.ident"]
}
saveRDS(data,paste0(opt$sample,".rds"))
...@@ -28,6 +28,7 @@ SOFTWARE_REGEX = { ...@@ -28,6 +28,7 @@ SOFTWARE_REGEX = {
'Nextflow': ['version_nextflow.txt', r"(\S+)"], 'Nextflow': ['version_nextflow.txt', r"(\S+)"],
'cellranger count': ['version_cellranger.txt', r"(\S+)"], 'cellranger count': ['version_cellranger.txt', r"(\S+)"],
'python': ['version_python.txt', r"(\S+)"], 'python': ['version_python.txt', r"(\S+)"],
'seurat': ['version_seurat.txt', r"(\S+)"],
} }
...@@ -77,6 +78,7 @@ def main(): ...@@ -77,6 +78,7 @@ def main():
results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
results['cellranger count'] = '<span style="color:#999999;\">N/A</span>' results['cellranger count'] = '<span style="color:#999999;\">N/A</span>'
results['python'] = '<span style="color:#999999;\">N/A</span>' results['python'] = '<span style="color:#999999;\">N/A</span>'
results['seurat'] = '<span style="color:#999999;\">N/A</span>'
# Check for version files: # Check for version files:
check_files(files) check_files(files)
......
#!/bin/bash
#versions_python.sh
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
seurat-Rscript -e 'packageVersion("Seurat")' |& grep '\[1\] ' | sed -n -e 's/^\[1\] ‘//p' | tr -d '’'
...@@ -33,4 +33,9 @@ def test_count302_count(): ...@@ -33,4 +33,9 @@ def test_count302_count():
@pytest.mark.count310 @pytest.mark.count310
def test_count310_count(): def test_count310_count():
assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1_metrics_summary.tsv')) assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1_metrics_summary.tsv'))
assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1', 'outs')) assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1', 'outs'))
\ No newline at end of file
@pytest.mark.count400
def test_count310_count():
assert os.path.exists(os.path.join(test_output_path, 'count400', 'sample1_metrics_summary.tsv'))
assert os.path.exists(os.path.join(test_output_path, 'count400', 'sample1', 'outs'))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment