Skip to content
Snippets Groups Projects
Commit fa36fc6f authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch 'develop' into '10-count.features'

Develop

See merge request !29
parents 4f99d15c 8f536aab
Branches
Tags
1 merge request!29Develop
Pipeline #3331 passed with stages
in 21 minutes and 42 seconds
......@@ -300,9 +300,13 @@ $RECYCLE.BIN/
/workflow/.nextflow/*
/workflow/work/*
/workflow/output/*
/.nextflow/*
/data/*
/work/*
/output/*
pipeline_trace*.txt*
.nextflow*.log*
report.html*
report*.html*
timeline*.html*
*~
......
before_script:
- module load astrocyte
- module load python/3.6.1-2-anaconda
- module load nextflow/0.27.6
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/cellranger_count/*fastq.gz test_data/
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/cellranger_count/design.csv test_data/
- module load nextflow/0.31.1_Ignite
- mkdir test_data/hu.v2s1r500
- mkdir test_data/mu.v2s2r10k
- mkdir test_data/hu.v3s2r10k
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s1r500/* test_data/hu.v2s1r500/
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v2s2r10k/* test_data/mu.v2s2r10k/
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/
stages:
- integration
- astrocyte
- simple
- detailed
simple_test:
stage: integration
astrocyte_check:
stage: astrocyte
script:
- nextflow run workflow/main.nf
- astrocyte_cli check ../cellranger_count
run_hu.cr3v2ref3.0.0:
stage: simple
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v2s1r500/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v2s1r500/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'two' --version '3.0.2'
run_mu.cr2v2ref1.2.0:
stage: detailed
except:
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/mu.v2s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/mu.v2s2r10k/design.csv" --genome 'mm10-1.2.0' --kitVersion 'auto' --version '2.1.1'
run_hu.cr3v3ref3.0.0:
stage: detailed
except:
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.0.2'
......@@ -10,4 +10,85 @@ The pipeline uses Nextflow, a bioinformatics workflow tool.
This pipeline is primarily used with a SLURM cluster on the BioHPC Cluster. However, the pipeline should be able to run on any system that Nextflow supports.
Additionally, the pipeline is designed to work with Astrocyte Workflow System using a simple web interface.
\ No newline at end of file
Additionally, the pipeline is designed to work with Astrocyte Workflow System using a simple web interface.
To Run:
-------
* Available parameters:
* **--fastq**
* path to the fastq location
* R1 and R2 only necessary but can include I2
* eg: **--fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/v3s2r100k/\*.fastq.gz'**
* **--designFile**
* path to design file (csv format) location
* column 1 = "Sample"
* column 2 = "fastq_R1"
* column 3 = "fastq_R2"
* can have repeated "Sample" if there are multiole fastq R1/R2 pairs for the samples
* eg: **--designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/v3s2r100k/design.csv'**
* **--genome**
* reference genome
* requires workflow/conf/biohpc.config to work
* name of available 10x Gemomics premade reference genomes:
* *'GRCh38-3.0.0'* = Human GRCh38 release 93
* *'GRCh38-1.2.0'* = Human GRCh38 release 84
* *'hg19-3.0.0'* = Human GRCh37 (hg19) release 87
* *'hg19-1.2.0'* = Human GRCh37 (hg19) release 84
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 93
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 84
* *'hg19_and_mm10-3.0.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93
* *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84
* *'ercc92-1.2.0'* = ERCC.92 Spike-In
* if --genome is used then --genomeLocationFull is not necessary
* eg: **--genome 'GRCh38-3.0.0'**
* **--genomeLocationFull**
* path to a custom genome
* if --genomeLocationFull is used --genome is not necessary and is overwritten
* eg. **--genomeLocationFull '/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0'**
* **--expectCells**
* expected number of cells to be detected
* guides cellranger in it's cutoff for background/low quality cells
* as a guide it doesn't have to be exact
* 0-10000
* if --expextedCells is used then --forceCells is not necessary
* only used if --forceCells is not entered or set to 0
* eg: **--expectCells 10000**
* **--forceCells**
* forces filtering of the top number of cells matching this parameter
* 0-10000
* if --forceCells is used then --expectedCells is not necessary and is overwritten
* eg: **--forceCells 10000**
* **--kitVersion**
* the library chemistry version number for the 10x Genomics Gene Expression kit
* setting to auto will attempt to autodetect from the detected cycle strategy in the fastq's
* version numbers are spelled out
* --kitversion is only used if --version (cellranger version) is > 2
* --version (cellranger version) 2.1.1 can only read --kitVersion of two (2)
* options:
* *'auto'*
* *'three'*
* *'two'*
* eg: **--kitVersion 'three'**'
* **--version**
* cellranger version
* --version (cellranger version) 2.1.1 can only read --kitVersion of two (2)
* options:
* *'3.0.2'*
* *'3.0.1'*
* *'2.1.1'*
* eg: **--version '3.0.2'**'
* **--outDir**
* optional output directory for run
* eg: **--outDir 'test'**
* FULL EXAMPLE:
**nextflow main.nf --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/v3s2r100k/\*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/v3s2r100k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion 'three' --version '3.0.2' --outDir 'test'**
* Design example:
| Sample | fastq_R1 | fastq_R2 |
|---------|------------------------------------|------------------------------------|
| sample1 | pbmc_1k_v2_S1_L001_R1_001.fastq.gz | pbmc_1k_v2_S1_L001_R2_001.fastq.gz |
| sample2 | pbmc_1k_v2_S2_L001_R1_001.fastq.gz | pbmc_1k_v2_S2_L001_R2_001.fastq.gz |
| sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz |
\ No newline at end of file
......@@ -133,11 +133,11 @@ workflow_parameters:
default: 'auto'
choices:
- [ 'auto', 'Auto Detect']
- [ '3', '3']
- [ '2', '2']
- [ 'three', '3']
- [ 'two', '2']
required: true
description: |
10x single cell gene expression chemistry version (only used in cellranger version 2.x).
10x single cell gene expression chemistry version (only used in cellranger version 3.x).
- id: version
type: select
......@@ -160,6 +160,15 @@ workflow_parameters:
description: |
Additional features to count (only used in cellranger version 3+, ignored otherwise).
- id: astrocyte
type: select
choices:
- [ 'true', 'true' ]
required: true
default: 'true'
description: |
Ensure configuraton for astrocyte.
# -----------------------------------------------------------------------------
# SHINY APP CONFIGURATION
# -----------------------------------------------------------------------------
......@@ -178,5 +187,4 @@ vizapp_cran_packages:
# List of any Bioconductor packages, not provided by the modules,
# that must be made available to the vizapp
vizapp_bioc_packages:
- chipseq
vizapp_bioc_packages: []
Sample,fastq_R1,fastq_R2
sample1,pbmc_1k_v2_S1_L001_R1_001.fastq.gz,pbmc_1k_v2_S1_L001_R2_001.fastq.gz
sample2,pbmc_1k_v2_S2_L001_R1_001.fastq.gz,pbmc_1k_v2_S2_L001_R2_001.fastq.gz
sample2,pbmc_1k_v2_S2_L002_R1_001.fastq.gz,pbmc_1k_v2_S2_L002_R2_001.fastq.gz
# Astrocyte CellRanger 10x Workflow Package
10x Genomics scRNA-Seq (cellranger) count Pipeline
========================================
## Workflow SOP
Introduction
------------
This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.
The pipeline uses Nextflow, a bioinformatics workflow tool.
To Run:
-------
* Workflow parameters:
* **fastq**
* Pairs (read1 and read2) of fastq.gz files from a sequencing of 10x single-cell expereiment. Index fastq not required.
* REQUIRED
* R1 and R2 only necessary
* **design file**
* A design file listing sample, corresponding read1 filename, corresponding read2 filename. There can be multiple rows with the same sample name, if there are multiple fastq's for that sample.
* REQUIRED
* column 1 = "Sample"
* column 2 = "fastq_R1"
* column 3 = "fastq_R2"
* can have repeated "Sample" if there are multiole fastq R1/R2 pairs for the samples
* eg: can be downloaded [HERE](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/8db3e25c13cb1463c2a50e510159c72380ae5826/docs/design.csv)
* **genome**
* Reference species and genome used for alignment and subsequent analysis.
* name of available 10x Gemomics premade reference genomes:
* *'GRCh38-3.0.0'* = Human GRCh38 release 93
* *'GRCh38-1.2.0'* = Human GRCh38 release 84
* *'hg19-3.0.0'* = Human GRCh37 (hg19) release 87
* *'hg19-1.2.0'* = Human GRCh37 (hg19) release 84
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 93
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 84
* *'hg19_and_mm10-3.0.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93
* *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84
* *'ercc92-1.2.0'* = ERCC.92 Spike-In
* **expect cells**
* Expected number of recovered cells.
* guides cellranger in it's cutoff for background/low quality cells
* as a guide it doesn't have to be exact
* 0-10000
* if --expextedCells is used then --forceCells is not necessary
* only used if force cells is not entered or set to 0
* **force cells**
* Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells.
* 0-10000
* if force cells is used then expected cells is not necessary and is ignored
* **chemistry version**
* 10x single cell gene expression chemistry version (only used in cellranger version 3.x).
* setting to auto will attempt to autodetect from the detected cycle strategy in the fastq's
* chemistry version is only used if cellranger version is > 2.x
* cellranger version 2.1.1 can only read chemistry version less than or equal to two (2)
* **cellranger version**
* 10x cellranger version.
* cellranger version 2.1.1 can only read chemistry version less than or equal to two (2)
* Design example:
| Sample | fastq_R1 | fastq_R2 |
|---------|------------------------------------|------------------------------------|
| sample1 | pbmc_1k_v2_S1_L001_R1_001.fastq.gz | pbmc_1k_v2_S1_L001_R2_001.fastq.gz |
| sample2 | pbmc_1k_v2_S2_L001_R1_001.fastq.gz | pbmc_1k_v2_S2_L001_R2_001.fastq.gz |
| sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz |
profiles {
standard {
includeConfig 'workflow/conf/biohpc.config'
}
}
......@@ -9,15 +9,15 @@ process {
}
$count211 {
module = ['cellranger/2.1.1']
memory = '120GB'
queue = '128GB,256GB,256GBv1,384GB'
}
$count301 {
module = ['cellranger/3.0.1']
memory = '120GB'
queue = '128GB,256GB,256GBv1,384GB'
}
$count302 {
module = ['cellranger/3.0.2']
memory = '120GB'
queue = '128GB,256GB,256GBv1,384GB'
}
}
......@@ -57,13 +57,13 @@ params {
'auto' {
param = 'auto'
}
'1' {
'one' {
param = 'SC3Pv1'
}
'2' {
'two' {
param = 'SC3Pv2'
}
'3' {
'three' {
param = 'SC3Pv3'
}
}
......
......@@ -7,16 +7,34 @@
params.fastq = "$baseDir/../test_data/*.fastq.gz"
params.designFile = "$baseDir/../test_data/design.csv"
params.genome = 'GRCh38-3.0.0'
params.genomes = []
params.genomeLocation = params.genome ? params.genomes[ params.genome ].loc ?: false : false
params.expectCells = 10000
params.forceCells = 0
params.kitVersion = '3'
params.chemistry = []
params.chemistryParam = params.kitVersion ? params.chemistry[ params.kitVersion ].param ?: false : false
params.kitVersion = 'three'
params.version = '3.0.2'
params.astrocyte = false
params.outDir = "$baseDir/output"
// Assign variables if astrocyte
if (params.astrocyte) {
print("Running under astrocyte")
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-'
if (params.kitVersion == "one") {
params.chemistryParam ='SC3Pv1'
} else if (params.kitVersion == "two") {
params.chemistryParam ='SC3Pv2'
} else if (params.kitVersion == "three") {
params.chemistryParam ='SC3Pv3'
} else {
params.chemistryParam = 'auto'
}
} else {
params.genomes = []
params.genomeLocation = params.genome ? params.genomes[ params.genome ].loc ?: false : false
params.chemistry = []
params.chemistryParam = params.kitVersion ? params.chemistry[ params.kitVersion ].param ?: false : false
}
params.genomeLocationFull = params.genomeLocation+params.genome
// Define regular variables
designLocation = Channel
.fromPath(params.designFile)
......@@ -27,7 +45,7 @@ fastqList = Channel
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t") }
.collectFile(name: 'fileList.tsv', newLine: true)
refLocation = Channel
.fromPath(params.genomeLocation+params.genome)
.fromPath(params.genomeLocationFull)
.ifEmpty { exit 1, "referene not found: ${params.genome}" }
expectCells = params.expectCells
forceCells = params.forceCells
......@@ -51,6 +69,9 @@ process checkDesignFile {
script:
"""
hostname
ulimit -a
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/check_design.py -d $designLocation -f $fastqList
"""
}
......@@ -83,6 +104,7 @@ chemistryParam301 = chemistryParam
chemistryParam302 = chemistryParam
process count211 {
queue '128GB,256GB,256GBv1,384GB'
tag "count211-$sample"
publishDir "$outDir/${task.process}", mode: 'copy'
......@@ -103,17 +125,24 @@ process count211 {
script:
if (forceCells211 == 0){
"""
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells211
"""
"""
hostname
ulimit -a
module load cellranger/2.1.1
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells211
"""
} else {
"""
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells211
"""
"""
hostname
ulimit -a
module load cellranger/2.1.1
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells211
"""
}
}
process count301 {
queue '128GB,256GB,256GBv1,384GB'
tag "count301-$sample"
publishDir "$outDir/${task.process}", mode: 'copy'
......@@ -135,17 +164,24 @@ process count301 {
script:
if (forceCells301 == 0){
"""
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells301 --chemistry="$chemistryParam301"
"""
"""
hostname
ulimit -a
module load cellranger/3.0.1
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells301 --chemistry="$chemistryParam301"
"""
} else {
"""
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells301 --chemistry="$chemistryParam301"
"""
"""
hostname
ulimit -a
module load cellranger/3.0.1
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells301 --chemistry="$chemistryParam301"
"""
}
}
process count302 {
queue '128GB,256GB,256GBv1,384GB'
tag "count302-$sample"
publishDir "$outDir/${task.process}", mode: 'copy'
......@@ -167,12 +203,18 @@ process count302 {
script:
if (forceCells302 == 0){
"""
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells302 --chemistry="$chemistryParam302"
"""
"""
hostname
ulimit -a
module load cellranger/3.0.2
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells302 --chemistry="$chemistryParam302"
"""
} else {
"""
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells302 --chemistry="$chemistryParam302"
"""
"""
hostname
ulimit -a
module load cellranger/3.0.2
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells302 --chemistry="$chemistryParam302"
"""
}
}
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment