diff --git a/CHANGELOG.md b/CHANGELOG.md index e85dc433e6de36cc4d50004114f885aed8214dfb..8c1414aced8a8373411f475de9e2671b63153c8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +# v2.2.0-indev +**User Facing** +* Add cellranger version 4.0.0 +* Add references version 2020-A (GRCh38, mm10, mix) + +**Background** + +*Known Bugs* +* Vizapp does not yet work for Astrocyte +* Running in CLI: to set --fastq path of file/s needs to be in quotes + + # v2.1.1 **User Facing** * Check Design File for spaces in name and file contents diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index e128e1ff61f118e1ab733268460d82bd14836654..29f82cda5883ec0980adddbf909b90e81726d150 100755 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -100,12 +100,15 @@ workflow_parameters: - id: genome type: select choices: + - ['GRCh38-2020-A', 'Human GRCh38 release 98'] - ['GRCh38-3.0.0', 'Human GRCh38 release 93'] - ['GRCh38-1.2.0', 'Human GRCh38 release 84'] - ['hg19-3.0.0', 'Human GRCh37 (hg19) release 87'] - ['hg19-1.2.0', 'Human GRCh37 (hg19) release 84'] + - ['mm10-2020-A', 'Mouse GRCm38 (mm10) release 98'] - ['mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93'] - ['mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84'] + - ['GRCh38_and_mm10-2020-A', 'Human GRCh38 + Mouse GRCm38 (mm10) release 98'] - ['GRCh38_and_mm10-3.1.0', 'Human GRCh38 + Mouse GRCm38 (mm10) release 93'] - ['hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 93'] - ['hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 84'] @@ -146,8 +149,9 @@ workflow_parameters: - id: version type: select - default: '3.1.0' + default: '4.0.0' choices: + - ['4.0.0', '4.0.0'] - ['3.1.0', '3.1.0'] - ['3.0.2', '3.0.2'] - ['2.1.1', '2.1.1'] diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config new file mode 100644 index 0000000000000000000000000000000000000000..6caee145d38bebcca65b4cb71b99e7dc10e930e9 --- /dev/null +++ b/workflow/conf/aws.config @@ -0,0 +1,44 @@ +workDir = 's3://' +aws.client.storageEncryption = 'AES256' +aws { + region = '' + batch { + cliPath = '/home/ec2-user/miniconda/bin/aws' + } +} + +process { + executor = 'awsbatch' + queue = 'default-' + cpus = 1 + memory = '1 GB' + + withLabel: checkDesignFile { + cpus = 2 + memory = '1 GB' + } + withLabel: count211 { + cpus = 2 + memory = '30 GB' + } + withLabel: count220 { + cpus = 2 + memory = '30 GB' + } + withLabel: count302 { + cpus = 2 + memory = '30 GB' + } + withLabel: count310 { + cpus = 2 + memory = '30 GB' + } + withLabel: versions { + cpus = 3 + memory = '1 GB' + } + withLabel: multiqc { + cpus = 1 + memory = '1 GB' + } +} diff --git a/workflow/conf/bicf_logo.png b/workflow/conf/bicf_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..0d8015590c5a94f92c39ec2470bd02baa3d09077 Binary files /dev/null and b/workflow/conf/bicf_logo.png differ diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config new file mode 100644 index 0000000000000000000000000000000000000000..983b458356f832bfe2cf0ce2ef2a5eeae919366c --- /dev/null +++ b/workflow/conf/biohpc.config @@ -0,0 +1,73 @@ +params { + // Reference file paths on BioHPC + genomes { + 'GRCh38-2020-A' { + loc = '/project/apps_database/cellranger/refdata-gex-' + } + 'GRCh38-3.0.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'GRCh38-1.2.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'hg19-3.0.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'hg19-1.2.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'mm10-2020-A' { + loc = '/project/apps_database/cellranger/refdata-gex-' + } + 'mm10-3.0.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'mm10-1.2.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'GRCh38_and_mm10-2020-A' { + loc = '/project/apps_database/cellranger/refdata-gex-' + } + 'GRCh38_and_mm10-3.1.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'hg19_and_mm10-3.0.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'hg19_and_mm10-1.2.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + 'ercc92-1.2.0' { + loc = '/project/apps_database/cellranger/refdata-cellranger-' + } + } + // Chemistry mapping parameter + chemistry { + 'auto' { + param = 'auto' + } + '3GEXv1' { + param = 'SC3Pv1' + } + '3GEXv2' { + param = 'SC3Pv2' + } + '3GEXv3' { + param = 'SC3Pv3' + } + '5GEX' { + param = 'fiveprime' + } + } +} + +singularity { + enabled = true + cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/' +} + +env { + http_proxy = 'http://proxy.swmed.edu:3128' + https_proxy = 'http://proxy.swmed.edu:3128' + all_proxy = 'http://proxy.swmed.edu:3128' +} diff --git a/workflow/conf/cluster.config b/workflow/conf/cluster.config new file mode 100644 index 0000000000000000000000000000000000000000..7322376199a795c72f973f2bf14ae75c1c2c6e7b --- /dev/null +++ b/workflow/conf/cluster.config @@ -0,0 +1,30 @@ +process { + executor = 'slurm' + queue = '32GB' + clusterOptions = '--hold' + + withLabel: trackStart { + executor = 'local' + } + withLabel: checkDesignFile { + executor = 'local' + } + withLabel: count211 { + queue = '128GB,256GB,256GBv1,384GB' + } + withLabel: count220 { + queue = '128GB,256GB,256GBv1,384GB' + } + withLabel: count302 { + queue = '128GB,256GB,256GBv1,384GB' + } + withLabel: count310 { + queue = '128GB,256GB,256GBv1,384GB' + } + withLabel: versions { + executor = 'local' + } + withLabel: multiqc { + executor = 'local' + } +} diff --git a/workflow/conf/local.config b/workflow/conf/local.config new file mode 100755 index 0000000000000000000000000000000000000000..9ca703b1d7f806b19fde6bd88a4fe2e7caeed1af --- /dev/null +++ b/workflow/conf/local.config @@ -0,0 +1,3 @@ +process { + executor = 'local' +} \ No newline at end of file diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml new file mode 100755 index 0000000000000000000000000000000000000000..2ff18a76db2868839cc4789d94bc08fdfa5df70b --- /dev/null +++ b/workflow/conf/multiqc_config.yaml @@ -0,0 +1,62 @@ +# Custom Logo +custom_logo: 'bicf_logo.png' +custom_logo_url: 'https://www.utsouthwestern.edu/labs/bioinformatics/' +custom_logo_title: 'Bioinformatics Core Facility' + +report_header_info: + - Contact E-mail: 'bicf@utsouthwestern.edu' + - Application Type: 'cellranger_count' + - Department: 'Bioinformatic Core Facility, Department of Bioinformatics' + + +# Title to use for the report. +title: BICF CellRanger Count Analysis Report + +report_comment: > + This report has been generated by the <a href="https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count" + target="_blank">BICF/cellranger_count</a> pipeline. + +custom_data: + metrics_summary: + file_format: 'tsv' + id: 'metrics_summary' + contents: 'Estimated Number of Cells Mean Reads per Cell Median Genes per Cell Number of Reads Valid Barcodes Sequencing Saturation Q30 Bases in Barcode Q30 Bases in RNA Read Q30 Bases in UMI Reads Mapped to Genome Reads Mapped Confidently to Genome Reads Mapped Confidently to Intergenic Regions Reads Mapped Confidently to Intronic Regions Reads Mapped Confidently to Exonic Regions Reads Mapped Confidently to Transcriptome Reads Mapped Antisense to Gene Fraction Reads in Cells Total Genes Detected Median UMI Counts per Cell' + section_name: 'Metrics Summary' + plot_type: 'generalstats' + +sp: + metrics_summary: + fn: 'metrics_summary_mqc.tsv' + +table_columns_placement: + metrics_summary: + Estimated Number of Cells: 1 + Mean Reads per Cell: 2 + Median Genes per Cell: 3 + Number of Reads: 4 + Sequencing Saturation: 5 + Reads Mapped Confidently to Genome: 6 + Reads Mapped Confidently to Transcriptome: 7 + Fraction Reads in Cells: 8 + Total Genes Detected: 9 + Median UMI Counts per Cell: 10 + Valid Barcodes: 1100 + Reads Mapped Antisense to Gene: 1200 + +table_columns_visible: + metrics_summary: + Q30 Bases in Barcode: False + Q30 Bases in RNA Read: False + Q30 Bases in UMI: False + Reads Mapped to Genome: False + Reads Mapped Confidently to Intergenic Regions: False + Reads Mapped Confidently to Intronic Regions: False + Reads Mapped Confidently to Exonic Regions: False + +thousandsSep_format: '' + +report_section_order: + software_versions: + order: -1100 + software_references: + order: -1200 diff --git a/workflow/main.nf b/workflow/main.nf index f110f31fe226abcc8ab8f44661727ad5f956e330..d727bb876521fc67bcd8c504c94f8b8af9308a8d 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -21,12 +21,12 @@ main.nf params.name = "run" params.fastq = "test_data/mu.v3s1r500/*.fastq.gz" params.designFile = "test_data/mu.v3s1r500/design.csv" -params.genome = 'mm10-3.0.0' -params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-' +params.genome = 'mm10-2020-A' +params.genomeLocation = '/project/apps_database/cellranger/refdata-gex-' params.expectCells = 10000 params.forceCells = 0 params.kitVersion = '3GEXv3' -params.version = '3.1.0' +params.version = '4.0.0' params.astrocyte = false params.outDir = "${baseDir}/output" @@ -60,7 +60,7 @@ if (params.astrocyte) { params.genomeLocationFull = params.genomeLocation+params.genome // Define variables from input -pipelineVersion = "2.1.1" +pipelineVersion = "2.2.0-indev" name = params.name designLocation = Channel .fromPath(params.designFile) @@ -158,21 +158,26 @@ samples.into { samples211 samples302 samples310 + samples400 } refLocation.into { refLocation211 refLocation302 refLocation310 + refLocation400 } expectCells211 = expectCells expectCells302 = expectCells expectCells310 = expectCells +expectCells3400 = expectCells forceCells211 = forceCells forceCells302 = forceCells forceCells310 = forceCells +forceCells400 = forceCells chemistryParam211 = chemistryParam chemistryParam302 = chemistryParam chemistryParam310 = chemistryParam +chemistryParam400 = chemistryParam /* @@ -314,6 +319,52 @@ process count310 { } } +/* + * count400: run cellranger count version 4.0.0 + */ +process count400 { + tag "${sample}" + publishDir "${outDir}/${task.process}", mode: 'copy' + queue '128GB,256GB,256GBv1,384GB' + module 'cellranger/4.0.0' + + input: + set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz"), file(script) from samples400 + file ref from refLocation310.first() + expectCells400 + forceCells400 + chemistryParam400 + + output: + file("**/outs/**") into outPaths400 + file("*_metrics_summary.tsv") into metricsSummary400 + + when: + version == '4.0.0' + + script: + if (forceCells400 == 0) { + """ + hostname + ulimit -u 16384 + ulimit -a + bash filename_check.sh -r ${ref} + cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells310} --chemistry=${chemistryParam310} + sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv + """ + } + else { + """ + hostname + ulimit -u 16384 + ulimit -a + bash filename_check.sh -r ${ref} + cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells310} --chemistry=${chemistryParam310} + sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv + """ + } +} + /* * versions: collect all versions into a single yml */ @@ -343,7 +394,7 @@ process versions { } // Collect all metrics summaries reguardless of cellranger version -metricsSummary = metricsSummary211.mix(metricsSummary302, metricsSummary310) +metricsSummary = metricsSummary211.mix(metricsSummary302, metricsSummary310, metricsSummary400) /* * multiqc: create multiqc report diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 5a622ec752d117228141cd00bb89c28a77fbc5a3..c402bf3f2ca76befef3ba984286493637520e3ad 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -47,6 +47,6 @@ manifest { homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count' description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.' mainScript = 'main.nf' - version = '2.1.1' + version = '2.2.0-indev' nextflowVersion = '>=0.31.0' }