diff --git a/.gitignore b/.gitignore index 5476422ef13e9df66e855db9e4d6bea34b1e3915..f153fc296faef39fd8ce3be544fdf32b414a32c5 100644 --- a/.gitignore +++ b/.gitignore @@ -301,11 +301,12 @@ $RECYCLE.BIN/ /workflow/work/* /workflow/output/* /.nextflow/* +/data/* /work/* /output/* pipeline_trace*.txt* .nextflow*.log* -report.html* +report*.html* timeline*.html* *~ diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index 7c34bde1e06c081a27fc30ffa010ad627b8efebf..8417664465fb8691f52e97960b6acda0b54594c8 100755 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -137,7 +137,7 @@ workflow_parameters: - [ 'two', '2'] required: true description: | - 10x single cell gene expression chemistry version (only used in cellranger version 2.x). + 10x single cell gene expression chemistry version (only used in cellranger version 3.x). - id: version type: select @@ -151,10 +151,11 @@ workflow_parameters: 10x cellranger version. - id: astrocyte - type: string + type: select + choices: + - [ 'true', 'true' ] required: true default: 'true' - regex: "true" description: | Ensure configuraton for astrocyte. @@ -176,5 +177,4 @@ vizapp_cran_packages: # List of any Bioconductor packages, not provided by the modules, # that must be made available to the vizapp -vizapp_bioc_packages: - - chipseq +vizapp_bioc_packages: [] diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/docs/design.csv b/docs/design.csv new file mode 100755 index 0000000000000000000000000000000000000000..df082a46726ff6b863ff945de21cf1bad419028f --- /dev/null +++ b/docs/design.csv @@ -0,0 +1,4 @@ +Sample,fastq_R1,fastq_R2 +sample1,pbmc_1k_v2_S1_L001_R1_001.fastq.gz,pbmc_1k_v2_S1_L001_R2_001.fastq.gz +sample2,pbmc_1k_v2_S2_L001_R1_001.fastq.gz,pbmc_1k_v2_S2_L001_R2_001.fastq.gz +sample2,pbmc_1k_v2_S2_L002_R1_001.fastq.gz,pbmc_1k_v2_S2_L002_R2_001.fastq.gz diff --git a/docs/index.md b/docs/index.md index 654bef8befdc01d28ebb410ae0fbf775d3c10dfb..13cc7d7acfadc63ed5543dac64d6f8725f9faaa6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,3 +1,65 @@ -# Astrocyte CellRanger 10x Workflow Package +10x Genomics scRNA-Seq (cellranger) count Pipeline +======================================== -## Workflow SOP +Introduction +------------ + +This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis. + +The pipeline uses Nextflow, a bioinformatics workflow tool. + +To Run: +------- + +* Workflow parameters: + * **fastq** + * Pairs (read1 and read2) of fastq.gz files from a sequencing of 10x single-cell expereiment. Index fastq not required. + * REQUIRED + * R1 and R2 only necessary + * **design file** + * A design file listing sample, corresponding read1 filename, corresponding read2 filename. There can be multiple rows with the same sample name, if there are multiple fastq's for that sample. + * REQUIRED + * column 1 = "Sample" + * column 2 = "fastq_R1" + * column 3 = "fastq_R2" + * can have repeated "Sample" if there are multiole fastq R1/R2 pairs for the samples + * eg: can be downloaded [HERE](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/8db3e25c13cb1463c2a50e510159c72380ae5826/docs/design.csv) + * **genome** + * Reference species and genome used for alignment and subsequent analysis. + * name of available 10x Gemomics premade reference genomes: + * *'GRCh38-3.0.0'* = Human GRCh38 release 93 + * *'GRCh38-1.2.0'* = Human GRCh38 release 84 + * *'hg19-3.0.0'* = Human GRCh37 (hg19) release 87 + * *'hg19-1.2.0'* = Human GRCh37 (hg19) release 84 + * *'mm10-3.0.0'* = Human GRCm38 (mm10) release 93 + * *'mm10-3.0.0'* = Human GRCm38 (mm10) release 84 + * *'hg19_and_mm10-3.0.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93 + * *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84 + * *'ercc92-1.2.0'* = ERCC.92 Spike-In + * **expect cells** + * Expected number of recovered cells. + * guides cellranger in it's cutoff for background/low quality cells + * as a guide it doesn't have to be exact + * 0-10000 + * if --expextedCells is used then --forceCells is not necessary + * only used if force cells is not entered or set to 0 + * **force cells** + * Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells. + * 0-10000 + * if force cells is used then expected cells is not necessary and is ignored + * **chemistry version** + * 10x single cell gene expression chemistry version (only used in cellranger version 3.x). + * setting to auto will attempt to autodetect from the detected cycle strategy in the fastq's + * chemistry version is only used if cellranger version is > 2.x + * cellranger version 2.1.1 can only read chemistry version less than or equal to two (2) + * **cellranger version** + * 10x cellranger version. + * cellranger version 2.1.1 can only read chemistry version less than or equal to two (2) + +* Design example: + +| Sample | fastq_R1 | fastq_R2 | +|---------|------------------------------------|------------------------------------| +| sample1 | pbmc_1k_v2_S1_L001_R1_001.fastq.gz | pbmc_1k_v2_S1_L001_R2_001.fastq.gz | +| sample2 | pbmc_1k_v2_S2_L001_R1_001.fastq.gz | pbmc_1k_v2_S2_L001_R2_001.fastq.gz | +| sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz | diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 71409aca77cfb01cb00e863855228276d98edb95..e69f0507b028c493d49bbbada40e051034dc0ac7 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -9,15 +9,15 @@ process { } $count211 { module = ['cellranger/2.1.1'] - memory = '120GB' + queue = '128GB,256GB,256GBv1,384GB' } $count301 { module = ['cellranger/3.0.1'] - memory = '120GB' + queue = '128GB,256GB,256GBv1,384GB' } $count302 { module = ['cellranger/3.0.2'] - memory = '120GB' + queue = '128GB,256GB,256GBv1,384GB' } } diff --git a/workflow/main.nf b/workflow/main.nf index 90165b28bf3b318cf0c7f1786c47e75db1f6aa81..3649ddfcc70460d5142e3c39cc3b803396c4fe25 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -7,23 +7,16 @@ params.fastq = "$baseDir/../test_data/*.fastq.gz" params.designFile = "$baseDir/../test_data/design.csv" params.genome = 'GRCh38-3.0.0' - - params.expectCells = 10000 params.forceCells = 0 params.kitVersion = 'three' - params.version = '3.0.2' -params.astrocyte = 'false' +params.astrocyte = false params.outDir = "$baseDir/output" // Assign variables if astrocyte -if (params.astrocyte == 'false') { - params.genomes = [] - params.genomeLocation = params.genome ? params.genomes[ params.genome ].loc ?: false : false - params.chemistry = [] - params.chemistryParam = params.kitVersion ? params.chemistry[ params.kitVersion ].param ?: false : false -} else if (params.astrocyte == 'true') { +if (params.astrocyte) { + print("Running under astrocyte") params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-' if (params.kitVersion == "one") { params.chemistryParam ='SC3Pv1' @@ -34,6 +27,11 @@ if (params.astrocyte == 'false') { } else { params.chemistryParam = 'auto' } +} else { + params.genomes = [] + params.genomeLocation = params.genome ? params.genomes[ params.genome ].loc ?: false : false + params.chemistry = [] + params.chemistryParam = params.kitVersion ? params.chemistry[ params.kitVersion ].param ?: false : false } params.genomeLocationFull = params.genomeLocation+params.genome @@ -71,6 +69,9 @@ process checkDesignFile { script: """ + hostname + ulimit -a + module load python/3.6.1-2-anaconda python3 $baseDir/scripts/check_design.py -d $designLocation -f $fastqList """ } @@ -103,7 +104,7 @@ chemistryParam301 = chemistryParam chemistryParam302 = chemistryParam process count211 { - memory '120 GB' + queue '128GB,256GB,256GBv1,384GB' tag "count211-$sample" publishDir "$outDir/${task.process}", mode: 'copy' @@ -124,18 +125,24 @@ process count211 { script: if (forceCells211 == 0){ - """ - cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells211 - """ + """ + hostname + ulimit -a + module load cellranger/2.1.1 + cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells211 + """ } else { - """ - cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells211 - """ + """ + hostname + ulimit -a + module load cellranger/2.1.1 + cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells211 + """ } } process count301 { - memory '120 GB' + queue '128GB,256GB,256GBv1,384GB' tag "count301-$sample" publishDir "$outDir/${task.process}", mode: 'copy' @@ -157,18 +164,24 @@ process count301 { script: if (forceCells301 == 0){ - """ - cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells301 --chemistry="$chemistryParam301" - """ + """ + hostname + ulimit -a + module load cellranger/3.0.1 + cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells301 --chemistry="$chemistryParam301" + """ } else { - """ - cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells301 --chemistry="$chemistryParam301" - """ + """ + hostname + ulimit -a + module load cellranger/3.0.1 + cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells301 --chemistry="$chemistryParam301" + """ } } process count302 { - memory '120 GB' + queue '128GB,256GB,256GBv1,384GB' tag "count302-$sample" publishDir "$outDir/${task.process}", mode: 'copy' @@ -190,12 +203,18 @@ process count302 { script: if (forceCells302 == 0){ - """ - cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells302 --chemistry="$chemistryParam302" - """ + """ + hostname + ulimit -a + module load cellranger/3.0.2 + cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells302 --chemistry="$chemistryParam302" + """ } else { - """ - cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells302 --chemistry="$chemistryParam302" - """ + """ + hostname + ulimit -a + module load cellranger/3.0.2 + cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells302 --chemistry="$chemistryParam302" + """ } -} +} \ No newline at end of file