Skip to content
Snippets Groups Projects
Commit 3944b9de authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '50-seurat.object' into 'develop'

Resolve "Generate Raw RDS file"

Closes #50

See merge request !76
parents 079739df 436b0259
2 merge requests!78Develop,!76Resolve "Generate Raw RDS file"
Pipeline #7882 failed with stages
in 12 seconds
......@@ -3,22 +3,25 @@ before_script:
- module load python/3.6.1-2-anaconda
- pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- module load nextflow/20.01.0
- module load singularity/3.0.2
- module load singularity/3.5.3
- mkdir -p test_data/hu.v2s1r500
- mkdir -p test_data/hu.v3s1r500
- mkdir -p test_data/mu.v3s1r500
- mkdir -p test_data/hu.v3s2r10k
- mkdir -p test_data/mu.v3s2r10k
- mkdir -p test_data/hu.v2s2r10k
- mkdir -p test_data/output
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s1r500/* test_data/hu.v2s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s1r500/* test_data/hu.v3s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s1r500/* test_data/mu.v3s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s2r10k/* test_data/mu.v3s2r10k/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s2r10k/* test_data/hu.v2s2r10k/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/output/* test_data/output/
stages:
- astrocyte_test
- module_test
- container_test
- reference_test
- multiSample_test
......@@ -34,8 +37,8 @@ astrocyte_cli:
when:
- always
2.1.1_test:
stage: container_test
module_2.1.1_test:
stage: module_test
only:
- branches
except:
......@@ -45,20 +48,20 @@ astrocyte_cli:
- tags
script:
- module load cellranger/2.1.1
- cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
- cellranger count --id=module-211 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- test/outs/web_summary.html
- module-211/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
3.0.2_test:
stage: container_test
module_3.0.2_test:
stage: module_test
only:
- branches
except:
......@@ -68,20 +71,20 @@ astrocyte_cli:
- tags
script:
- module load cellranger/3.0.2
- cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
- cellranger count --id=module-302 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- test/outs/web_summary.html
- module-302/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
3.1.0_test:
stage: container_test
module_3.1.0_test:
stage: module_test
only:
- branches
except:
......@@ -91,14 +94,172 @@ astrocyte_cli:
- tags
script:
- module load cellranger/3.1.0
- cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
- cellranger count --id=module-310 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- module-310/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
module_4.0.0_test:
stage: module_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- module load cellranger/4.0.0
- cellranger count --id=module-400 --transcriptome=/project/apps_database/cellranger/refdata-gex-GRCh38-2020-A --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- module-400/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
module_seurat_test:
stage: module_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- module load seurat/3.0.0
- ln -sfn test_data/output/* .
- seurat-Rscript workflow/scripts/downstream_viz.r --sample sample1 --cellrangerVersion 4.0.0
- bash workflow/scripts/versions_seurat.sh > version_seurat.txt
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- version_seurat.txt
expire_in: 2 days
retry:
max: 0
when:
- always
container_2.1.1_test:
stage: container_test
only:
- branches
except:
refs:
- develop
- master
- test/outs/web_summary.html
- tags
script:
- singularity run 'docker://bicf/cellranger2.1.1:2.0.0' cellranger count --id=container-211 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- container-211/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
container_3.0.2_test:
stage: container_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- singularity run 'docker://bicf/cellranger3.0.2:2.0.0' cellranger count --id=container-302 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- container-302/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
container_3.1.0_test:
stage: container_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- singularity run 'docker://bicf/cellranger3.1.0:2.0.0' cellranger count --id=container-310 --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- container-310/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
container_4.0.0_test:
stage: container_test
only:
- branches
except:
refs:
- develop
- master
- tags
script:
- singularity run 'docker://bicf/cellranger4.0.0:2.0.0_indev' cellranger count --id=container-400 --transcriptome=/project/apps_database/cellranger/refdata-gex-GRCh38-2020-A --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- container-400/outs/web_summary.html
expire_in: 2 days
retry:
max: 0
when:
- always
GRCh38-2020A:
stage: reference_test
only:
refs:
- develop
- master
except:
- tags
script:
- nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s1r500/*.fastq.gz" --designFile "test_data/hu.v3s1r500/design.csv" --genome 'GRCh38-2020-A' --kitVersion '3GEXv3' --version '4.0.0' --ci true
- pytest -m count400
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count400/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 0
......@@ -129,6 +290,30 @@ GRCh38-3.0.0:
when:
- always
mm10-2020A:
stage: reference_test
only:
refs:
- develop
- master
except:
- tags
script:
- nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/mu.v3s1r500/*.fastq.gz" --designFile "test_data/mu.v3s1r500/design.csv" --genome 'mm10-2020-A' --kitVersion '3GEXv3' --version '4.0.0' --ci true
- pytest -m count400
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count400/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 0
when:
- always
mm10-3.0.0:
stage: reference_test
only:
......@@ -160,15 +345,15 @@ mm10-3.0.0:
- master
- tags
script:
- nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s2r10k/*.fastq.gz" --designFile "test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.1.0' --ci true
- pytest -m count310
- nextflow run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s2r10k/*.fastq.gz" --designFile "test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-2020A' --kitVersion 'auto' --version '4.0.0' --ci true
- pytest -m count400
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count310/sample1/outs/web_summary.html
- workflow/output/count310/sample2/outs/web_summary.html
- workflow/output/count400/sample1/outs/web_summary.html
- workflow/output/count400/sample2/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
......
# v2.2.0-indev
**User Facing**
* Add cellranger version 4.0.0
* Add references version 2020-A (GRCh38, mm10, mix)
* Create option to create files for downstream viz and analysis (Seurat R-object)
**Background**
*Known Bugs*
* Vizapp does not yet work for Astrocyte
* Running in CLI: to set --fastq path of file/s needs to be in quotes
# v2.1.1
**User Facing**
* Check Design File for spaces in name and file contents
......
......@@ -2,7 +2,7 @@ MIT License
Copyright (c) 2019 University of Texas Southwestern Medical Center.
Contributors: Gervaise H. Henry, Jeremy Mathews, and Venkat Malladi
Contributors: Gervaise H. Henry, Jeremy Mathews, Jon Gesell, and Venkat Malladi
Department: Bioinformatic Core Facility, Department of Bioinformatics
......
......@@ -108,12 +108,17 @@ To Run:
* *'3.0.2'*
* *'2.1.1'*
* eg: **--version '3.1.0'**
* **--vizFiles**
* create objects which can be used for downstream visualization and analysis of each sample outputs, currently creates:
* Seurat R-objects
* true/false
* eg: **--version true**
* **--outDir**
* optional output directory for run
* eg: **--outDir 'test'**
* FULL EXAMPLE:
```
nextflow run workflow/main.nf -profile biohpc,cluster --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' --outDir 'test'
nextflow run workflow/main.nf -profile biohpc,cluster --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' --vizFiles true --outDir 'test'
```
* Design example:
......
......@@ -100,12 +100,15 @@ workflow_parameters:
- id: genome
type: select
choices:
- ['GRCh38-2020-A', 'Human GRCh38 release 98']
- ['GRCh38-3.0.0', 'Human GRCh38 release 93']
- ['GRCh38-1.2.0', 'Human GRCh38 release 84']
- ['hg19-3.0.0', 'Human GRCh37 (hg19) release 87']
- ['hg19-1.2.0', 'Human GRCh37 (hg19) release 84']
- ['mm10-2020-A', 'Mouse GRCm38 (mm10) release 98']
- ['mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93']
- ['mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84']
- ['GRCh38_and_mm10-2020-A', 'Human GRCh38 + Mouse GRCm38 (mm10) release 98']
- ['GRCh38_and_mm10-3.1.0', 'Human GRCh38 + Mouse GRCm38 (mm10) release 93']
- ['hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 93']
- ['hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 84']
......@@ -146,8 +149,9 @@ workflow_parameters:
- id: version
type: select
default: '3.1.0'
default: '4.0.0'
choices:
- ['4.0.0', '4.0.0']
- ['3.1.0', '3.1.0']
- ['3.0.2', '3.0.2']
- ['2.1.1', '2.1.1']
......@@ -155,6 +159,17 @@ workflow_parameters:
description: |
10x cellranger version.
- id: vizFiles
type: select
choices:
- [ 'true', 'Yes' ]
- [ 'false', 'No' ]
default: 'true'
required: true
description: |
Create objects which can be used for downstream visualization and analysis of each sample outputs. Currently created: Seurat R-objects.
- id: astrocyte
type: select
choices:
......
### References
1. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
* Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316-319. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
2. **cellranger**
* Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count)
......@@ -9,5 +9,8 @@
3. **python**:
* Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
4. **MultiQc**:
4. **Seurat**:
* Stuart, T., Butler, A., Hoffman, P., Hafemeister, C., Papalexi, E., Mauck III, W. M., ... & Satija, R. (2019). Comprehensive integration of single-cell data. Cell, 177(7), 1888-1902. doi:[10.1016/j.cell.2019.05.031](https://doi.org/10.1016/j.cell.2019.05.031)
5. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
......@@ -12,27 +12,35 @@ process {
cpus = 1
memory = '1 GB'
withLabel: checkDesignFile {
withName: checkDesignFile {
cpus = 2
memory = '1 GB'
}
withLabel: count211 {
withName: count211 {
cpus = 2
memory = '30 GB'
}
withLabel: count302 {
withName: count302 {
cpus = 2
memory = '30 GB'
}
withLabel: count310 {
withName: count310 {
cpus = 2
memory = '30 GB'
}
withLabel: versions {
withName: count400 {
cpus = 2
memory = '30 GB'
}
withName: downstreamViz {
cpus = 2
memory = '1 GB'
}
withName: versions {
cpus = 3
memory = '1 GB'
}
withLabel: multiqc {
withName: multiqc {
cpus = 1
memory = '1 GB'
}
......
params {
// Reference file paths on BioHPC
genomes {
'GRCh38-2020-A' {
loc = '/project/apps_database/cellranger/refdata-gex-'
}
'GRCh38-3.0.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-'
}
......@@ -13,12 +16,18 @@ params {
'hg19-1.2.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-'
}
'mm10-2020-A' {
loc = '/project/apps_database/cellranger/refdata-gex-'
}
'mm10-3.0.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-'
}
'mm10-1.2.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-'
}
'GRCh38_and_mm10-2020-A' {
loc = '/project/apps_database/cellranger/refdata-gex-'
}
'GRCh38_and_mm10-3.1.0' {
loc = '/project/apps_database/cellranger/refdata-cellranger-'
}
......@@ -52,6 +61,11 @@ params {
}
}
singularity {
enabled = true
cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
}
env {
http_proxy = 'http://proxy.swmed.edu:3128'
https_proxy = 'http://proxy.swmed.edu:3128'
......
process {
executor = 'slurm'
queue = 'super'
queue = '32GB'
clusterOptions = '--hold'
withName:trackStart {
withName: trackStart {
executor = 'local'
}
withName:checkDesignFile {
withName: checkDesignFile {
executor = 'local'
}
withName:count211 {
withName: count211 {
queue = '128GB,256GB,256GBv1,384GB'
}
withName:count302 {
withName: count302 {
queue = '128GB,256GB,256GBv1,384GB'
}
withName:count310 {
withName: count310 {
queue = '128GB,256GB,256GBv1,384GB'
}
withName:versions {
withName: count400 {
queue = '128GB,256GB,256GBv1,384GB'
}
withName: downstreamViz {
queue = '32GB'
}
withName: versions {
executor = 'local'
}
withName:multiqc {
withName: multiqc {
executor = 'local'
}
}
......@@ -21,12 +21,12 @@ main.nf
params.name = "run"
params.fastq = "test_data/mu.v3s1r500/*.fastq.gz"
params.designFile = "test_data/mu.v3s1r500/design.csv"
params.genome = 'mm10-3.0.0'
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-'
params.genome = 'mm10-2020-A'
params.expectCells = 10000
params.forceCells = 0
params.kitVersion = '3GEXv3'
params.version = '3.1.0'
params.version = '4.0.0'
params.vizFiles = true
params.astrocyte = false
params.outDir = "${baseDir}/output"
......@@ -39,7 +39,11 @@ if (params.kitVersion == "3GEXv3" && params.version == '2.1.1') {
// Define variables if astrocyte (or from config)
if (params.astrocyte) {
print("Running under astrocyte")
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-'
if (params.version == "4.0.0") {
params.genomeLocation = '/project/apps_database/cellranger/refdata-gex-'
} else {
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-'
}
if (params.kitVersion == "3GEXv1") {
params.chemistryParam ='SC3Pv1'
} else if (params.kitVersion == "3GEXv2") {
......@@ -60,7 +64,7 @@ if (params.astrocyte) {
params.genomeLocationFull = params.genomeLocation+params.genome
// Define variables from input
pipelineVersion = "2.1.1"
pipelineVersion = "2.2.0-indev"
name = params.name
designLocation = Channel
.fromPath(params.designFile)
......@@ -77,6 +81,7 @@ expectCells = params.expectCells
forceCells = params.forceCells
chemistryParam = params.chemistryParam
version = params.version
vizFiles = params.vizFiles
outDir = params.outDir
// Define script files
......@@ -85,6 +90,8 @@ filename_checkScript = Channel.fromPath("$baseDir/scripts/filename_check.sh")
generate_versionsScript = Channel.fromPath("$baseDir/scripts/generate_versions.py")
generate_referencesScript = Channel.fromPath("$baseDir/scripts/generate_references.py")
versions_pythonScript = Channel.fromPath("$baseDir/scripts/versions_python.sh")
versions_seuratScript = Channel.fromPath("$baseDir/scripts/versions_seurat.sh")
downstream_vizScript = Channel.fromPath("$baseDir/scripts/downstream_viz.r")
// Define report files
multiqcConf = "${baseDir}/configs/multiqc_config.yaml"
......@@ -158,21 +165,26 @@ samples.into {
samples211
samples302
samples310
samples400
}
refLocation.into {
refLocation211
refLocation302
refLocation310
refLocation400
}
expectCells211 = expectCells
expectCells302 = expectCells
expectCells310 = expectCells
expectCells400 = expectCells
forceCells211 = forceCells
forceCells302 = forceCells
forceCells310 = forceCells
forceCells400 = forceCells
chemistryParam211 = chemistryParam
chemistryParam302 = chemistryParam
chemistryParam310 = chemistryParam
chemistryParam400 = chemistryParam
/*
......@@ -192,7 +204,8 @@ process count211 {
chemistryParam211
output:
file("**/outs/**") into outPaths211
set sample, file("**/outs/**") into outPaths211
set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**") into filteredOut211
file("*_metrics_summary.tsv") into metricsSummary211
when:
......@@ -239,7 +252,8 @@ process count302 {
chemistryParam302
output:
file("**/outs/**") into outPaths302
set sample, file("**/outs/**") into outPaths302
set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**") into filteredOut302
file("*_metrics_summary.tsv") into metricsSummary302
when:
......@@ -285,7 +299,8 @@ process count310 {
chemistryParam310
output:
file("**/outs/**") into outPaths310
set sample, file("**/outs/**") into outPaths310
set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**"), file("**/outs/analysis/umap/**") into filteredOut310
file("*_metrics_summary.tsv") into metricsSummary310
when:
......@@ -314,6 +329,87 @@ process count310 {
}
}
/*
* count400: run cellranger count version 4.0.0
*/
process count400 {
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
queue '128GB,256GB,256GBv1,384GB'
module 'cellranger/4.0.0'
input:
set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz"), file(script) from samples400
file ref from refLocation400.first()
expectCells400
forceCells400
chemistryParam400
output:
set file("**/outs/**") into outPaths400
set sample, file("**/outs/filtered_*/**"), file("**/outs/analysis/clustering/graphclust/**"), file("**/outs/analysis/clustering/kmeans_2_clusters/**"), file("**/outs/analysis/clustering/kmeans_3_clusters/**"), file("**/outs/analysis/clustering/kmeans_4_clusters/**"), file("**/outs/analysis/clustering/kmeans_5_clusters/**"), file("**/outs/analysis/clustering/kmeans_6_clusters/**"), file("**/outs/analysis/clustering/kmeans_7_clusters/**"), file("**/outs/analysis/clustering/kmeans_8_clusters/**"), file("**/outs/analysis/clustering/kmeans_9_clusters/**"), file("**/outs/analysis/clustering/kmeans_10_clusters/**"), file("**/outs/analysis/pca/**"), file("**/outs/analysis/tsne/**"), file("**/outs/analysis/umap/**") into filteredOut400
file("*_metrics_summary.tsv") into metricsSummary400
when:
version == '4.0.0'
script:
if (forceCells400 == 0) {
"""
hostname
ulimit -u 16384
ulimit -a
bash filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells310} --chemistry=${chemistryParam310}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
else {
"""
hostname
ulimit -u 16384
ulimit -a
bash filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells310} --chemistry=${chemistryParam310}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
// Collect all outputs reguardless of cellranger version
filteredOut = filteredOut211.mix(filteredOut302, filteredOut310, filteredOut400)
// Combine all inputs for downstreamViz
downstreamVizIn = downstream_vizScript.combine(versions_seuratScript).combine(filteredOut)
/*
* downstreamViz: create files for downstream use (eg. R Seurat object)
*/
process downstreamViz {
tag "${sample}"
publishDir "${outDir}/seurat", mode: 'copy', pattern: "*.rds"
module 'seurat/3.0.0'
input:
set file("*"), file("*"), sample, file("filtered/*"), file("clustering/graphclust/*"), file("clustering/kmeans_2_clusters/*"), file("clustering/kmeans_3_clusters/*"), file("clustering/kmeans_4_clusters/*"), file("clustering/kmeans_5_clusters/*"), file("clustering/kmeans_6_clusters/*"), file("clustering/kmeans_7_clusters/*"), file("clustering/kmeans_8_clusters/*"), file("clustering/kmeans_9_clusters/*"), file("clustering/kmeans_10_clusters/*"), file("pca/*"), file("tsne/*"), file("umap/*") from downstreamVizIn
//file downstream_vizScript
//file versions_seuratScript
output:
file "*.rds" into seuratPaths
file "version_seurat.txt" into version_seurat
when:
vizFiles
script:
"""
hostname
ulimit -a
seurat-Rscript downstream_viz.r --sample ${sample} --cellrangerVersion ${version}
bash versions_seurat.sh > version_seurat.txt
"""
}
/*
* versions: collect all versions into a single yml
*/
......@@ -323,6 +419,7 @@ process versions {
input:
file versions_pythonScript
file version_seurat
file generate_versionsScript
file generate_referencesScript
......@@ -343,7 +440,7 @@ process versions {
}
// Collect all metrics summaries reguardless of cellranger version
metricsSummary = metricsSummary211.mix(metricsSummary302, metricsSummary310)
metricsSummary = metricsSummary211.mix(metricsSummary302, metricsSummary310, metricsSummary400)
/*
* multiqc: create multiqc report
......
profiles {
standard {
includeConfig 'configs/biohpc.config'
includeConfig 'configs/cluster.config'
}
biohpc {
includeConfig 'configs/biohpc.config'
......@@ -47,6 +48,6 @@ manifest {
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count'
description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.'
mainScript = 'main.nf'
version = '2.1.1'
version = '2.2.0-indev'
nextflowVersion = '>=0.31.0'
}
if (!require(optparse)) install.packages('optparse',repos='http://cran.us.r-project.org',quiet=TRUE)
library(optparse)
library(Seurat)
option_list=list(
make_option("--sample",default="sample1",action="store",type='character',help="sample"),
make_option("--cellrangerVersion",default="4.0.0",action="store",type='character',help="cellranger Version")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
data <- Read10X(data.dir="filtered/")
data <- CreateSeuratObject(counts=data)
dimReductions <- c("pca","tsne")
if (opt$cellrangerVersion!="2.1.1" && opt$cellrangerVersion!="3.0.2"){
dimReductions <- c(dimReductions,"umap")
}
for (i in dimReductions){
if (i=="tsne"){
lab <- "tSNE"
} else {
lab <- toupper(i)
}
projection <- read.csv(paste0(i,"/projection.csv"),row.names=1)
rownames(projection) <- gsub("-.","",rownames(projection))
data[[i]] <- CreateDimReducObject(embeddings=as.matrix(projection),key=paste0(lab,"_"),assay="RNA")
}
clust <- c("graphclust",paste0("kmeans_",2:10,"_clusters"))
for (i in clust){
clusters <- read.csv(paste0("clustering/",i,"/clusters.csv"),row.names=1)
rownames(clusters) <- gsub("-.","",rownames(clusters))
data[[i]] <- clusters
data@meta.data <- data@meta.data[,colnames(data@meta.data)!="orig.ident"]
}
saveRDS(data,paste0(opt$sample,".rds"))
......@@ -28,6 +28,7 @@ SOFTWARE_REGEX = {
'Nextflow': ['version_nextflow.txt', r"(\S+)"],
'cellranger count': ['version_cellranger.txt', r"(\S+)"],
'python': ['version_python.txt', r"(\S+)"],
'seurat': ['version_seurat.txt', r"(\S+)"],
}
......@@ -77,6 +78,7 @@ def main():
results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
results['cellranger count'] = '<span style="color:#999999;\">N/A</span>'
results['python'] = '<span style="color:#999999;\">N/A</span>'
results['seurat'] = '<span style="color:#999999;\">N/A</span>'
# Check for version files:
check_files(files)
......
#!/bin/bash
#versions_python.sh
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
seurat-Rscript -e 'packageVersion("Seurat")' |& grep '\[1\] ' | sed -n -e 's/^\[1\] ‘//p' | tr -d '’'
......@@ -33,4 +33,9 @@ def test_count302_count():
@pytest.mark.count310
def test_count310_count():
assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1_metrics_summary.tsv'))
assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1', 'outs'))
\ No newline at end of file
assert os.path.exists(os.path.join(test_output_path, 'count310', 'sample1', 'outs'))
@pytest.mark.count400
def test_count310_count():
assert os.path.exists(os.path.join(test_output_path, 'count400', 'sample1_metrics_summary.tsv'))
assert os.path.exists(os.path.join(test_output_path, 'count400', 'sample1', 'outs'))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment