Commit 4b102a5e authored by Gervaise Henry's avatar Gervaise Henry 🤠
Browse files

Merge branch 'develop' into 'master'

Develop

See merge request !53
parents 4430629a 1019aefc
Pipeline #4507 passed with stages
in 12 minutes and 23 seconds
before_script:
- module load astrocyte
- module load python/3.6.1-2-anaconda
- pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- module load nextflow/0.31.1_Ignite
- mkdir test_data/hu.v2s1r500
- mkdir test_data/mu.v2s2r10k
- mkdir test_data/hu.v3s1r500
- mkdir test_data/mu.v3s1r500
- mkdir test_data/hu.v3s2r10k
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s1r500/* test_data/hu.v2s1r500/
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v2s2r10k/* test_data/mu.v2s2r10k/
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/
- mkdir test_data/mu.v3s2r10k
- mkdir test_data/hu.v2s2r10k
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s1r500/* test_data/hu.v3s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s1r500/* test_data/mu.v3s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s2r10k/* test_data/mu.v3s2r10k/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s2r10k/* test_data/hu.v2s2r10k/
stages:
- astrocyte
......@@ -17,23 +22,117 @@ stages:
astrocyte_check:
stage: astrocyte
script:
- astrocyte_cli check ../cellranger_count
- astrocyte_cli check ../cellranger_count
artifacts:
expire_in: 2 days
retry:
max: 1
when:
- always
run_hu.cr3v2ref3.0.0:
simple_1:
stage: simple
only:
- branches
- tags
except:
- develop
- master
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v2s1r500/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v2s1r500/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'two' --version '3.0.2'
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s1r500/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s1r500/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'three' --version '3.0.2'
- pytest -m count302
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
expire_in: 2 days
retry:
max: 1
when:
- always
run_mu.cr2v2ref1.2.0:
stage: detailed
simple_2:
stage: simple
only:
- branches
except:
- develop
- master
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/mu.v2s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/mu.v2s2r10k/design.csv" --genome 'mm10-1.2.0' --kitVersion 'auto' --version '2.1.1'
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/mu.v3s1r500/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/mu.v3s1r500/design.csv" --genome 'mm10-3.0.0' --kitVersion 'three' --version '3.0.1'
- pytest -m count301
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
expire_in: 2 days
retry:
max: 1
when:
- always
run_hu.cr3v3ref3.0.0:
detailed_1:
stage: detailed
only:
- develop
- master
except:
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.0.2'
- pytest -m count302
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
expire_in: 2 days
retry:
max: 1
when:
- always
detailed_2:
stage: detailed
only:
- develop
- master
except:
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/mu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/mu.v3s2r10k/design.csv" --genome 'mm10-3.0.0' --kitVersion 'three' --version '3.0.2'
- pytest -m count302
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
expire_in: 2 days
retry:
max: 1
when:
- always
detailed_3:
stage: detailed
only:
- develop
- master
except:
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v2s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v2s2r10k/design.csv" --genome 'GRCh38-1.2.0' --kitVersion 'two' --version '2.1.1'
- pytest -m count211
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
expire_in: 2 days
retry:
max: 1
when:
- always
# v1.1.0 (in development)
**User Facing**
* Make report (multiqc) for cellranger qc output, version, references
**Background**
* Add changelog
* Add check for space in genomeLocationFull (cellranger cannot handle) in bash script
* Move module loads to process setup level code
* Add Jeremy Mathews to author list
* Apply style guide
* Add pytests for ouptuts
* Test for incompatible params (kitVersion=3 AND version=2.1.1)
*Known Bugs*
......@@ -2,6 +2,9 @@
|:-:|:-:|
|[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/badges/master/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/master)|[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/badges/develop/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/develop)|
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2652622.svg)](https://doi.org/10.5281/zenodo.2652622)
10x Genomics scRNA-Seq (cellranger) count Pipeline
==================================================
......@@ -21,75 +24,76 @@ To Run:
* Available parameters:
* **--fastq**
* path to the fastq location
* R1 and R2 only necessary but can include I2
* only fastq's in designFile (see below) are used, not present will be ignored
* eg: **--fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/v3s2r100k/\*.fastq.gz'**
* path to the fastq location
* R1 and R2 only necessary but can include I2
* only fastq's in designFile (see below) are used, not present will be ignored
* eg: **--fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r100k/\*.fastq.gz'**
* **--designFile**
* path to design file (csv format) location
* column 1 = "Sample"
* column 2 = "fastq_R1"
* column 3 = "fastq_R2"
* can have repeated "Sample" if there are multiple fastq R1/R2 pairs for the samples
* eg: **--designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/v3s2r100k/design.csv'**
* **--genome**
* reference genome
* requires workflow/conf/biohpc.config to work
* name of available 10x Gemomics premade reference genomes:
* *'GRCh38-3.0.0'* = Human GRCh38 release 93
* *'GRCh38-1.2.0'* = Human GRCh38 release 84
* *'hg19-3.0.0'* = Human GRCh37 (hg19) release 87
* *'hg19-1.2.0'* = Human GRCh37 (hg19) release 84
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 93
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 84
* *'hg19_and_mm10-3.0.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93
* *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84
* *'ercc92-1.2.0'* = ERCC.92 Spike-In
* if --genome is used then --genomeLocationFull is not necessary
* eg: **--genome 'GRCh38-3.0.0'**
* **--genomeLocationFull**
* path to a custom genome
* if --genomeLocationFull is used --genome is not necessary and is ignored
* eg. **--genomeLocationFull '/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0'**
* **--expectCells**
* expected number of cells to be detected
* guides cellranger in it's cutoff for background/low quality cells
* as a guide it doesn't have to be exact
* 0-10000
* if --expextedCells is used then --forceCells is not necessary
* only used if --forceCells is not entered or set to 0
* eg: **--expectCells 10000**
* **--forceCells**
* forces filtering of the top number of cells matching this parameter
* 0-10000
* if --forceCells is used then --expectedCells is not necessary and is ignored
* eg: **--forceCells 10000**
* **--kitVersion**
* the library chemistry version number for the 10x Genomics Gene Expression kit
* setting to auto will attempt to autodetect from the detected sequencing strategy in the fastq's
* version numbers are spelled out
* --kitversion is only used if --version (cellranger version) is > 2
* --version (cellranger version) 2.1.1 can only read --kitVersion of two (2)
* options:
* *'auto'*
* *'three'*
* *'two'*
* eg: **--kitVersion 'three'**
* **--version**
* cellranger version
* --version (cellranger version) 2.1.1 can only read --kitVersion of two (2)
* options:
* *'3.0.2'*
* *'3.0.1'*
* *'2.1.1'*
* eg: **--version '3.0.2'**'
* **--outDir**
* optional output directory for run
* eg: **--outDir 'test'**
* FULL EXAMPLE:
**nextflow main.nf --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/v3s2r100k/\*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/v3s2r100k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion 'three' --version '3.0.2' --outDir 'test'**
* path to design file (csv format) location
* column 1 = "Sample"
* column 2 = "fastq_R1"
* column 3 = "fastq_R2"
* can have repeated "Sample" if there are multiple fastq R1/R2 pairs for the samples
* can be downloaded [HERE](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/master/docs/design.csv)
* eg: **--designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r100k/design.csv'**
* **--genome**
* reference genome
* requires workflow/conf/biohpc.config to work
* name of available 10x Gemomics premade reference genomes:
* *'GRCh38-3.0.0'* = Human GRCh38 release 93
* *'GRCh38-1.2.0'* = Human GRCh38 release 84
* *'hg19-3.0.0'* = Human GRCh37 (hg19) release 87
* *'hg19-1.2.0'* = Human GRCh37 (hg19) release 84
* *'mm10-3.0.0'* = Mouse GRCm38 (mm10) release 93
* *'mm10-3.0.0'* = Mouse GRCm38 (mm10) release 84
* *'hg19_and_mm10-3.0.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93
* *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84
* *'ercc92-1.2.0'* = ERCC.92 Spike-In
* if --genome is used then --genomeLocationFull is not necessary
* eg: **--genome 'GRCh38-3.0.0'**
* **--genomeLocationFull**
* path to a custom genome
* if --genomeLocationFull is used --genome is not necessary and is ignored
* eg. **--genomeLocationFull '/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0'**
* **--expectCells**
* expected number of cells to be detected
* guides cellranger in it's cutoff for background/low quality cells
* as a guide it doesn't have to be exact
* 0-10000
* if --expextedCells is used then --forceCells is not necessary
* only used if --forceCells is not entered or set to 0
* eg: **--expectCells 10000**
* **--forceCells**
* forces filtering of the top number of cells matching this parameter
* 0-10000
* if --forceCells is used then --expectedCells is not necessary and is ignored
* eg: **--forceCells 10000**
* **--kitVersion**
* the library chemistry version number for the 10x Genomics Gene Expression kit
* setting to auto will attempt to autodetect from the detected sequencing strategy in the fastq's
* version numbers are spelled out
* --kitversion is only used if --version (cellranger version) is > 2
* --version (cellranger version) 2.1.1 can only read --kitVersion of two (2)
* options:
* *'auto'*
* *'three'*
* *'two'*
* eg: **--kitVersion 'three'**
* **--version**
* cellranger version
* --version (cellranger version) 2.1.1 can only read --kitVersion of two (2)
* options:
* *'3.0.2'*
* *'3.0.1'*
* *'2.1.1'*
* eg: **--version '3.0.2'**
* **--outDir**
* optional output directory for run
* eg: **--outDir 'test'**
* FULL EXAMPLE:
```
nextflow run workflow/main.nf --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r100k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r100k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion 'three' --version '3.0.2' --outDir 'test'
```
* Design example:
| Sample | fastq_R1 | fastq_R2 |
......@@ -97,3 +101,12 @@ To Run:
| sample1 | pbmc_1k_v2_S1_L001_R1_001.fastq.gz | pbmc_1k_v2_S1_L001_R2_001.fastq.gz |
| sample2 | pbmc_1k_v2_S2_L001_R1_001.fastq.gz | pbmc_1k_v2_S2_L001_R2_001.fastq.gz |
| sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz |
[**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/CHANGELOG.md)
Credits
-------
This worklow is was developed jointly with the [Bioinformatic Core Facility (BICF), Department of Bioinformatics](http://www.utsouthwestern.edu/labs/bioinformatics/)
Please cite in publications: Pipeline was developed by BICF from funding provided by **Cancer Prevention and Research Institute of Texas (RP150596)**.
......@@ -9,7 +9,7 @@
# A unique identifier for the workflow package, text/underscores only
name: 'cellranger_count'
# Who wrote this?
author: 'Gervaise Henry and Venkat Malladi'
author: 'Gervaise H. Henry, Jeremy Mathews, and Venkat Malladi'
# A contact email address for questions
email: 'bicf@utsouthwestern.edu'
# A more informative title for the workflow package
......@@ -44,6 +44,7 @@ workflow_modules:
- 'cellranger/3.0.1'
- 'cellranger/3.0.2'
- 'bcl2fastq/2.17.1.14'
- 'multiqc/1.7'
# A list of parameters used by the workflow, defining how to present them,
# options etc in the web interface. For each parameter:
......@@ -84,28 +85,28 @@ workflow_parameters:
required: true
description: |
Pairs (read1 and read2) of fastq.gz files from a sequencing of 10x single-cell expereiment. Index fastq not required.
regex: ".*fastq.gz"
regex: ".*\\.fastq.gz"
min: 2
- id: designFile
type: file
required: true
regex: ".*csv"
regex: ".*\\.csv"
description: |
A design file listing sample, corresponding read1 filename, corresponding read2 filename. There can be multiple rows with the same sample name, if there are multiple fastq's for that sample.
- id: genome
type: select
choices:
- [ 'GRCh38-3.0.0', 'Human GRCh38 release 93']
- [ 'GRCh38-1.2.0', 'Human GRCh38 release 84']
- [ 'hg19-3.0.0', 'Human GRCh37 (hg19) release 87']
- [ 'hg19-1.2.0', 'Human GRCh37 (hg19) release 84']
- [ 'mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93']
- [ 'mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84']
- [ 'hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93']
- [ 'hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84']
- [ 'ercc92-1.2.0', 'ERCC.92 Spike-In']
- ['GRCh38-3.0.0', 'Human GRCh38 release 93']
- ['GRCh38-1.2.0', 'Human GRCh38 release 84']
- ['hg19-3.0.0', 'Human GRCh37 (hg19) release 87']
- ['hg19-1.2.0', 'Human GRCh37 (hg19) release 84']
- ['mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93']
- ['mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84']
- ['hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93']
- ['hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84']
- ['ercc92-1.2.0', 'ERCC.92 Spike-In']
required: true
description: |
Reference species and genome used for alignment and subsequent analysis.
......@@ -132,9 +133,9 @@ workflow_parameters:
type: select
default: 'auto'
choices:
- [ 'auto', 'Auto Detect']
- [ 'three', '3']
- [ 'two', '2']
- ['auto', 'Auto Detect']
- ['three', '3']
- ['two', '2']
required: true
description: |
10x single cell gene expression chemistry version (only used in cellranger version 3.x).
......@@ -143,9 +144,9 @@ workflow_parameters:
type: select
default: '3.0.2'
choices:
- [ '3.0.2', '3.0.2']
- [ '3.0.1', '3.0.1']
- [ '2.1.1', '2.1.1']
- ['3.0.2', '3.0.2']
- ['3.0.1', '3.0.1']
- ['2.1.1', '2.1.1']
required: true
description: |
10x cellranger version.
......
......@@ -8,34 +8,37 @@ This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It t
The pipeline uses Nextflow, a bioinformatics workflow tool.
This pipeline is primarily used with a SLURM cluster on the BioHPC Cluster. However, the pipeline should be able to run on any system that Nextflow supports.
Additionally, the pipeline is designed to work with Astrocyte Workflow System using a simple web interface.
To Run:
-------
* Workflow parameters:
* **fastq**
* Pairs (read1 and read2) of fastq.gz files from a sequencing of 10x single-cell expereiment. Index fastq not required.
* REQUIRED
* R1 and R2 only necessary
* Pairs (read1 and read2) of fastq.gz files from a sequencing of 10x single-cell expereiment. Index fastq not required.
* R1 and R2 only necessary
* **design file**
* A design file listing sample, corresponding read1 filename, corresponding read2 filename. There can be multiple rows with the same sample name, if there are multiple fastq's for that sample.
* REQUIRED
* column 1 = "Sample"
* column 2 = "fastq_R1"
* column 3 = "fastq_R2"
* can have repeated "Sample" if there are multiole fastq R1/R2 pairs for the samples
* eg: can be downloaded [HERE](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/8db3e25c13cb1463c2a50e510159c72380ae5826/docs/design.csv)
* A design file listing sample, corresponding read1 filename, corresponding read2 filename. There can be multiple rows with the same sample name, if there are multiple fastq's for that sample.
* REQUIRED
* column 1 = "Sample"
* column 2 = "fastq_R1"
* column 3 = "fastq_R2"
* can have repeated "Sample" if there are multiole fastq R1/R2 pairs for the samples
* eg: can be downloaded [HERE](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/master/docs/design.csv)
* **genome**
* Reference species and genome used for alignment and subsequent analysis.
* name of available 10x Gemomics premade reference genomes:
* *'GRCh38-3.0.0'* = Human GRCh38 release 93
* *'GRCh38-1.2.0'* = Human GRCh38 release 84
* *'hg19-3.0.0'* = Human GRCh37 (hg19) release 87
* *'hg19-1.2.0'* = Human GRCh37 (hg19) release 84
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 93
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 84
* *'hg19_and_mm10-3.0.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93
* *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84
* *'ercc92-1.2.0'* = ERCC.92 Spike-In
* Reference species and genome used for alignment and subsequent analysis.
* name of available 10x Gemomics premade reference genomes:
* *'GRCh38-3.0.0'* = Human GRCh38 release 93
* *'GRCh38-1.2.0'* = Human GRCh38 release 84
* *'hg19-3.0.0'* = Human GRCh37 (hg19) release 87
* *'hg19-1.2.0'* = Human GRCh37 (hg19) release 84
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 93
* *'mm10-3.0.0'* = Human GRCm38 (mm10) release 84
* *'hg19_and_mm10-3.0.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93
* *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84
* *'ercc92-1.2.0'* = ERCC.92 Spike-In
* **expect cells**
* Expected number of recovered cells.
* guides cellranger in it's cutoff for background/low quality cells
......@@ -44,17 +47,18 @@ To Run:
* if --expextedCells is used then --forceCells is not necessary
* only used if force cells is not entered or set to 0
* **force cells**
* Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells.
* 0-10000
* if force cells is used then expected cells is not necessary and is ignored
* Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells.
* 0-10000
* if force cells is used then expected cells is not necessary and is ignored
* **chemistry version**
* 10x single cell gene expression chemistry version (only used in cellranger version 3.x).
* setting to auto will attempt to autodetect from the detected cycle strategy in the fastq's
* chemistry version is only used if cellranger version is > 2.x
* cellranger version 2.1.1 can only read chemistry version less than or equal to two (2)
* 10x single cell gene expression chemistry version (only used in cellranger version 3.x).
* setting to auto will attempt to autodetect from the detected cycle strategy in the fastq's
* chemistry version is only used if cellranger version is > 2.x
* cellranger version 2.1.1 can only read chemistry version less than or equal to two (2)
* **cellranger version**
* 10x cellranger version.
* cellranger version 2.1.1 can only read chemistry version less than or equal to two (2)
* 10x cellranger version.
* cellranger version 2.1.1 can only read chemistry version less than or equal to two (2)
* Design example:
......@@ -66,6 +70,8 @@ To Run:
[**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/master/CHANGELOG.md)
Credits
-------
This worklow is was developed jointly with the [Bioinformatic Core Facility (BICF), Department of Bioinformatics](http://www.utsouthwestern.edu/labs/bioinformatics/)
......
### References
1. **python**:
* Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
2. **cellranger**
* Cellranger mkfastq [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/mkfastq](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/mkfastq)
3. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
4. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
......@@ -2,23 +2,30 @@ process {
executor = 'slurm'
queue='super'
// Process specific configuration
$checkDesignFile {
withLabel: checkDesignFile {
module = ['python/3.6.1-2-anaconda']
executor = 'local'
}
$count211 {
withLabel: count211 {
module = ['cellranger/2.1.1']
queue = '128GB,256GB,256GBv1,384GB'
}
$count301 {
withLabel: count301 {
module = ['cellranger/3.0.1']
queue = '128GB,256GB,256GBv1,384GB'
}
$count302 {
withLabel: count302 {
module = ['cellranger/3.0.2']
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: versions {
module = ['python/3.6.1-2-anaconda','pandoc/2.7','multiqc/1.7']
executor = 'local'
}
withLabel: multiqc {
module = ['multiqc/1.7']
executor = 'local'
}
}
params {
......
# Custom Logo
custom_logo: 'bicf_logo.png'
custom_logo_url: 'https://www.utsouthwestern.edu/labs/bioinformatics/'
custom_logo_title: 'Bioinformatics Core Facility'
report_header_info:
- Contact E-mail: 'bicf@utsouthwestern.edu'
- Application Type: 'cellranger_count'
- Department: 'Bioinformatic Core Facility, Department of Bioinformatics'
# Title to use for the report.
title: BICF CellRanger Count Analysis Report
report_comment: >
This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count"
target="_blank">BICF/cellranger_count</a> pipeline.
custom_data:
metrics_summary:
file_format: 'tsv'
id: 'metrics_summary'
contents: 'Estimated Number of Cells Mean Reads per Cell Median Genes per Cell Number of Reads Valid Barcodes Sequencing Saturation Q30 Bases in Barcode Q30 Bases in RNA Read Q30 Bases in UMI Reads Mapped to Genome Reads Mapped Confidently to Genome Reads Mapped Confidently to Intergenic Regions Reads Mapped Confidently to Intronic Regions Reads Mapped Confidently to Exonic Regions Reads Mapped Confidently to Transcriptome Reads Mapped Antisense to Gene Fraction Reads in Cells Total Genes Detected Median UMI Counts per Cell'
section_name: 'Metrics Summary'
plot_type: 'generalstats'
sp:
metrics_summary:
fn: 'metrics_summary_mqc.tsv'
table_columns_placement:
metrics_summary:
Estimated Number of Cells: 1
Mean Reads per Cell: 2
Median Genes per Cell: 3
Number of Reads: 4
Sequencing Saturation: 5
Reads Mapped Confidently to Genome: 6
Reads Mapped Confidently to Transcriptome: 7
Fraction Reads in Cells: 8
Total Genes Detected: 9
Median UMI Counts per Cell: 10
Valid Barcodes: 1100
Reads Mapped Antisense to Gene: 1200
table_columns_visible:
metrics_summary:
Q30 Bases in Barcode: False
Q30 Bases in RNA Read: False
Q30 Bases in UMI: False
Reads Mapped to Genome: False
Reads Mapped Confidently to Intergenic Regions: False
Reads Mapped Confidently to Intronic Regions: False
Reads Mapped Confidently to Exonic Regions: False
thousandsSep_format: ''
report_section_order:
software_versions:
order: -1100
software_references:
order: -1200