diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 03a422fc78ecb76ac0bae7962cda596acec7e15b..a22e859f34169ebf0451cd6229f8756d9b9d2b6d 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,12 +2,15 @@ before_script: - module load astrocyte - module load python/3.6.1-2-anaconda - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1 - - module load nextflow/0.31.1_Ignite - - mkdir test_data/hu.v3s1r500 - - mkdir test_data/mu.v3s1r500 - - mkdir test_data/hu.v3s2r10k - - mkdir test_data/mu.v3s2r10k - - mkdir test_data/hu.v2s2r10k + - module load singularity/3.0.2 + - module load nextflow/19.09.0 + - mkdir -p test_data/hu.v2s1r500 + - mkdir -p test_data/hu.v3s1r500 + - mkdir -p test_data/mu.v3s1r500 + - mkdir -p test_data/hu.v3s2r10k + - mkdir -p test_data/mu.v3s2r10k + - mkdir -p test_data/hu.v2s2r10k + - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s1r500/* test_data/hu.v2s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s1r500/* test_data/hu.v3s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s1r500/* test_data/mu.v3s1r500/ - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/ @@ -15,14 +18,15 @@ before_script: - ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s2r10k/* test_data/hu.v2s2r10k/ stages: - - astrocyte - - simple - - detailed + - astrocyte_test + - container_test + - reference_test + - multiSample_test -astrocyte_check: - stage: astrocyte +astrocyte_cli: + stage: astrocyte_test script: - - astrocyte_cli check ../cellranger_count + - astrocyte_cli check . artifacts: expire_in: 2 days retry: @@ -30,33 +34,30 @@ astrocyte_check: when: - always -simple_1: - stage: simple +2.1.1_test: + stage: container_test only: - branches - - tags except: refs: - develop - master + - tags script: - - nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s1r500/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s1r500/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'three' --version '3.1.0' - - pytest -m count310 + - singularity run 'docker://bicf/cellranger2.1.1:2.0.0' cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 artifacts: name: "$CI_JOB_NAME" when: always paths: - - .nextflow.log - - workflow/output/count310/sample1/outs/web_summary.html - - workflow/output/multiqc/run/multiqc_report.html + - test/outs/web_summary.html expire_in: 2 days retry: max: 1 when: - always -simple_2: - stage: simple +2.2.2_test: + stage: container_test only: - branches except: @@ -65,63 +66,81 @@ simple_2: - master - tags script: - - nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/mu.v3s1r500/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/mu.v3s1r500/design.csv" --genome 'mm10-3.0.0' --kitVersion 'three' --version '3.1.0' - - pytest -m count310 + - singularity run 'docker://bicf/cellranger2.2.0:2.0.0' cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 artifacts: name: "$CI_JOB_NAME" when: always paths: - - .nextflow.log - - workflow/output/count310/sample1/outs/web_summary.html - - workflow/output/multiqc/run/multiqc_report.html + - test/outs/web_summary.html expire_in: 2 days retry: max: 1 when: - always -detailed_1: - stage: detailed +3.0.2_test: + stage: container_test only: - - develop - - master + - branches except: refs: + - develop + - master - tags script: - - nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.1.0' - - pytest -m count310 + - singularity run 'docker://bicf/cellranger3.0.2:2.0.0' cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 artifacts: name: "$CI_JOB_NAME" when: always paths: - - .nextflow.log - - workflow/output/count310/sample1/outs/web_summary.html - - workflow/output/multiqc/run/multiqc_report.html + - test/outs/web_summary.html expire_in: 2 days retry: max: 1 when: - always - -detailed_2: - stage: detailed +3.1.0_test: + stage: container_test only: - - develop - - master + - branches except: - refs: + refs: + - develop + - master - tags script: - - nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.0.2' - - pytest -m count302 + - singularity run 'docker://bicf/cellranger3.1.0:2.0.0' cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2 + artifacts: + name: "$CI_JOB_NAME" + when: always + paths: + - develop + - master + - test/outs/web_summary.html + expire_in: 2 days + retry: + max: 1 + when: + - always + +GRCh38-3.0.0: + stage: reference_test + only: + refs: + - develop + - master + except: + - tags + script: + - nextflow -q run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s1r500/*.fastq.gz" --designFile "test_data/hu.v3s1r500/design.csv" --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' + - pytest -m count310 artifacts: name: "$CI_JOB_NAME" when: always paths: - .nextflow.log - - workflow/output/count302/sample1/outs/web_summary.html + - workflow/output/count310/sample1/outs/web_summary.html - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: @@ -129,23 +148,23 @@ detailed_2: when: - always -detailed_3: - stage: detailed +mm10-3.0.0: + stage: reference_test only: - - develop - - master - except: refs: - - tags + - develop + - master + except: + - tags script: - - nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/mu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/mu.v3s2r10k/design.csv" --genome 'mm10-3.0.0' --kitVersion 'three' --version '3.0.1' - - pytest -m count301 + - nextflow -q run workflow/main.nf -profile biohpc,cluster --fastq "test_data/mu.v3s1r500/*.fastq.gz" --designFile "test_data/mu.v3s1r500/design.csv" --genome 'mm10-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' + - pytest -m count310 artifacts: name: "$CI_JOB_NAME" when: always paths: - .nextflow.log - - workflow/output/count301/sample1/outs/web_summary.html + - workflow/output/count310/sample1/outs/web_summary.html - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: @@ -153,23 +172,22 @@ detailed_3: when: - always -detailed_4: - stage: detailed +2Samples: + stage: multiSample_test only: - - develop - - master - except: - refs: + refs: + - master - tags script: - - nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v2s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v2s2r10k/design.csv" --genome 'GRCh38-1.2.0' --kitVersion 'two' --version '2.1.1' - - pytest -m count211 + - nextflow -q run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s2r10k/*.fastq.gz" --designFile "test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.1.0' + - pytest -m count310 artifacts: name: "$CI_JOB_NAME" when: always paths: - .nextflow.log - - workflow/output/count211/sample1/outs/web_summary.html + - workflow/output/count310/sample1/outs/web_summary.html + - workflow/output/count310/sample2/outs/web_summary.html - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: diff --git a/.gitlab/issue_templates/Bug.md b/.gitlab/issue_templates/Bug.md new file mode 100644 index 0000000000000000000000000000000000000000..2ffc8b3ca4ebacc6b452a57aebdaa59906ce9b78 --- /dev/null +++ b/.gitlab/issue_templates/Bug.md @@ -0,0 +1,21 @@ +# Summary + + +# Steps to reproduce + + +# Observed bug behavior + + +# Expected behavior + + +# Relevant logs and/or screenshots + + +# Potential fixes + + + +/label ~bug ~"To Do" +/cc @ghenry diff --git a/.gitlab/merge_request_templates/Merge_Request.md b/.gitlab/merge_request_templates/Merge_Request.md new file mode 100644 index 0000000000000000000000000000000000000000..153c0b6de9e6c69b31ff5f0a627b3fb7d5278649 --- /dev/null +++ b/.gitlab/merge_request_templates/Merge_Request.md @@ -0,0 +1,17 @@ +Please fill in the appropriate checklist below (delete those which are not relevant). +These are the most common things requested on pull requests. + +## PR checklist + - [ ] This comment contains a description of changes (with reason) + - [ ] If you've fixed a bug or added code that should be tested, add tests! + - [ ] Documentation in `docs` is updated + - [ ] `CHANGELOG.md` is updated + - [ ] `README.md` is updated + - [ ] `LICENSE.md` is updated with new contributors + + +* [ ] **Close issue**\ +Closes # + +/cc @ghenry +/assign @ghenry diff --git a/CHANGELOG.md b/CHANGELOG.md index df4bdebb1180a5eb2ed19928d12262bf84bd551e..186cd6d02e9d8f26c9247b3fc6c90b7559564104 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,22 @@ +# v2.0.0 (in development) +**User Facing** +* Check Design File for spaces in name and file contents +* Attempt to preven thredding error (which appears to only happen on 256GBv1 nodes) +* Add option for 5' GEX chemistry +* Remove cellranger 3.0.1 as an option +* Add cellranger 2.2.0 as an option + +**Background** +* Add Nextflow Tower integration into CI (GHH's profile) +* Add new layered config folders, including prepare for awsifying +* Update param to new standard +* Use docker containers +* Update CI + +*Known Bugs* +* Vizapp does not yet work for Astrocyte +* Running in CLI: to set --fastq path of file/s needs to be in quotes + # v1.2.0 **User Facing** * Add Cellranger Version 3.1.0 diff --git a/README.md b/README.md index 6323ec5b79b16f933558813f4c745cba11be01ef..ab2141e93077c9523590335cb541c70794fb94a5 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ |*master*|*develop*| |:-:|:-:| -|[](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/master)|[](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/develop)| +|[](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/master)|[](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/develop)| [](https://doi.org/10.5281/zenodo.2652622) @@ -77,16 +77,17 @@ To Run: * --version (cellranger version) 2.1.1 can only read --kitVersion of two (2) * options: * *'auto'* - * *'three'* - * *'two'* - * eg: **--kitVersion 'three'** + * *'3GEXv3'* + * *'3GEXv2'* + * *'5GEX'* + * eg: **--kitVersion '3GEXv3'** * **--version** * cellranger version - * --version (cellranger version) 2.1.1 can only read --kitVersion of two (2) + * --version (cellranger version) 2.1.1 and 2.2.0 can only read --kitVersion of 3GEXv2 * options: * *'3.1.0'* * *'3.0.2'* - * *'3.0.1'* + * *'2.2.0'* * *'2.1.1'* * eg: **--version '3.1.0'** * **--outDir** @@ -94,7 +95,7 @@ To Run: * eg: **--outDir 'test'** * FULL EXAMPLE: ``` - nextflow run workflow/main.nf --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion 'three' --version '3.1.0' --outDir 'test' + nextflow run workflow/main.nf --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' --outDir 'test' ``` * Design example: diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index 1a7cb7a0cf81e44d4f67198cc57b5ad91fdd3820..24105a88d79683610430ccf86b5b84b3335f226d 100755 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -136,8 +136,9 @@ workflow_parameters: default: 'auto' choices: - ['auto', 'Auto Detect'] - - ['three', '3'] - - ['two', '2'] + - ['3GEXv3', '3prime GEX v3 (3prime Gene Expression)'] + - ['3GEXv2', '3prime GEX v2 (3prime Gene Expression)'] + - ['5GEX', '5prime GEX Auto (5prime Gene Expression)'] required: true description: | 10x single cell gene expression chemistry version (only used in cellranger version 3.x). diff --git a/cleanup.sh b/cleanup.sh new file mode 100644 index 0000000000000000000000000000000000000000..9569ff54fd71cd94bddde415af03a101820ab514 --- /dev/null +++ b/cleanup.sh @@ -0,0 +1,7 @@ +rm *.out +rm pipeline_trace*.txt* +rm report*.html* +rm timeline*.html* +rm .nextflow*.log* +rm -r .nextflow/ +rm -r work/ diff --git a/docs/index.md b/docs/index.md index 29cf0d399120909478055abf5538241312f28a40..2532ad93b5637d778c0804449c7f3bf1afe1214a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -25,7 +25,7 @@ To Run: * column 1 = "Sample" * column 2 = "fastq_R1" * column 3 = "fastq_R2" - * can have repeated "Sample" if there are multiole fastq R1/R2 pairs for the samples + * can have repeated "Sample" if there are multiple fastq R1/R2 pairs for the samples * eg: can be downloaded [HERE](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/master/docs/design.csv) * **genome** * Reference species and genome used for alignment and subsequent analysis. @@ -41,22 +41,22 @@ To Run: * *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 84 * *'ercc92-1.2.0'* = ERCC.92 Spike-In * **expect cells** - * Expected number of recovered cells. - * guides cellranger in it's cutoff for background/low quality cells - * as a guide it doesn't have to be exact - * 0-10000 - * if --expextedCells is used then --forceCells is not necessary - * only used if force cells is not entered or set to 0 - * **force cells** - * Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells. + * Expected number of recovered cells. + * guides cellranger in it's cutoff for background/low quality cells + * as a guide it doesn't have to be exact * 0-10000 - * if force cells is used then expected cells is not necessary and is ignored + * if --expextedCells is used then --forceCells is not necessary + * only used if force cells is not entered or set to 0 + * **force cells** + * Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells. + * 0-10000 + * if force cells is used then expected cells is not necessary and is ignored * **chemistry version** * 10x single cell gene expression chemistry version (only used in cellranger version 3.x). * setting to auto will attempt to autodetect from the detected cycle strategy in the fastq's * chemistry version is only used if cellranger version is > 2.x * cellranger version 2.1.1 can only read chemistry version less than or equal to two (2) - * **cellranger version** + * **cellranger version** * 10x cellranger version. * cellranger version 2.1.1 can only read chemistry version less than or equal to two (2) diff --git a/nextflow.config b/nextflow.config deleted file mode 100644 index 28777047bfa85b13d08a0df02a22e6eac6d66540..0000000000000000000000000000000000000000 --- a/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -profiles { - standard { - includeConfig 'workflow/conf/biohpc.config' - } -} diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config new file mode 100644 index 0000000000000000000000000000000000000000..e3612374c3997d368e808f7f9e0f31099a8dadda --- /dev/null +++ b/workflow/conf/aws.config @@ -0,0 +1,15 @@ +workDir = 's3://' +aws.client.storageEncryption = 'AES256' +aws { + region = '' + batch { + cliPath = '/home/ec2-user/miniconda/bin/aws' + } +} + +process { + executor = 'awsbatch' + queue = 'default-' + cpus = 1 + memory = '10 GB' +} diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config old mode 100755 new mode 100644 index a5ea037bbdc5558ff20dec18c238ffa4e3f4ed44..56d5b65c6196d6bf9a841561bd85330cd758f775 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -1,37 +1,3 @@ -process { - executor = 'slurm' - queue='super' - - withLabel: checkDesignFile { - module = ['python/3.6.1-2-anaconda'] - executor = 'local' - } - withLabel: count211 { - module = ['cellranger/2.1.1'] - queue = '128GB,256GB,256GBv1,384GB' - } - withLabel: count301 { - module = ['cellranger/3.0.1'] - queue = '128GB,256GB,256GBv1,384GB' - } - withLabel: count302 { - module = ['cellranger/3.0.2'] - queue = '128GB,256GB,256GBv1,384GB' - } - withLabel: count310 { - module = ['cellranger/3.1.0'] - queue = '128GB,256GB,256GBv1,384GB' - } - withLabel: versions { - module = ['python/3.6.1-2-anaconda','pandoc/2.7','multiqc/1.7'] - executor = 'local' - } - withLabel: multiqc { - module = ['multiqc/1.7'] - executor = 'local' - } -} - params { // Reference file paths on BioHPC genomes { @@ -71,30 +37,28 @@ params { 'auto' { param = 'auto' } - 'one' { + '3GEXv1' { param = 'SC3Pv1' } - 'two' { + '3GEXv2' { param = 'SC3Pv2' } - 'three' { + '3GEXv3' { param = 'SC3Pv3' } + '5GEX' { + param = 'fiveprime' + } } } -trace { +singularity { enabled = true - file = 'pipeline_trace.txt' - fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss' + cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/' } -timeline { - enabled = true - file = 'timeline.html' -} - -report { - enabled = true - file = 'report.html' +env { + http_proxy = 'http://proxy.swmed.edu:3128' + https_proxy = 'http://proxy.swmed.edu:3128' + all_proxy = 'http://proxy.swmed.edu:3128' } diff --git a/workflow/conf/cluster.config b/workflow/conf/cluster.config new file mode 100644 index 0000000000000000000000000000000000000000..6abbd5a6b727ce8ec62573e83dcd83ee80414025 --- /dev/null +++ b/workflow/conf/cluster.config @@ -0,0 +1,27 @@ +process { + executor = 'slurm' + queue = '32GB' + clusterOptions = '--hold' + + withLabel: checkDesignFile { + executor = 'local' + } + withLabel: count211 { + queue = '128GB,256GB,256GBv1,384GB' + } + withLabel: count220 { + queue = '128GB,256GB,256GBv1,384GB' + } + withLabel: count302 { + queue = '128GB,256GB,256GBv1,384GB' + } + withLabel: count310 { + queue = '128GB,256GB,256GBv1,384GB' + } + withLabel: versions { + executor = 'local' + } + withLabel: multiqc { + executor = 'local' + } +} diff --git a/workflow/conf/local.config b/workflow/conf/local.config new file mode 100755 index 0000000000000000000000000000000000000000..a1e4055c4636c8282f85c0327f9742017b060ee2 --- /dev/null +++ b/workflow/conf/local.config @@ -0,0 +1,3 @@ +process { + executor = 'local' +} diff --git a/workflow/main.nf b/workflow/main.nf index 8862991d57eec85dae8d55ecc7ac7001a187e671..1194038abc4c462c12397439f676af9d2334cf47 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -13,30 +13,36 @@ params.name = "run" params.fastq = "${baseDir}/../test_data/*.fastq.gz" params.designFile = "${baseDir}/../test_data/design.csv" params.genome = 'GRCh38-3.0.0' +params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-' params.expectCells = 10000 params.forceCells = 0 -params.kitVersion = 'three' +params.kitVersion = '3GEXv3' params.version = '3.1.0' params.astrocyte = false params.outDir = "${baseDir}/output" -params.multiqcConf = "${baseDir}/conf/multiqc_config.yaml" -params.references = "${baseDir}/../docs/references.md" -if (params.kitVersion == "three" && params.version == '2.1.1') { +// Variable error test +if (params.kitVersion == "3GEXv3" && params.version == '2.1.1') { print("Cellranger Version 2.1.1 requires kitVersion 2") System.exit(32) } +if (params.kitVersion == "3GEXv3" && params.version == '2.2.0') { + print("Cellranger Version 2.2.0 requires kitVersion 2") + System.exit(32) +} -// Assign variables if astrocyte +// Define variables if astrocyte (or from config) if (params.astrocyte) { print("Running under astrocyte") params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-' - if (params.kitVersion == "one") { + if (params.kitVersion == "3GEXv1") { params.chemistryParam ='SC3Pv1' - } else if (params.kitVersion == "two") { + } else if (params.kitVersion == "3GEXv2") { params.chemistryParam ='SC3Pv2' - } else if (params.kitVersion == "three") { + } else if (params.kitVersion == "3GEXv3") { params.chemistryParam ='SC3Pv3' + } else if (params.kitVersion == "5GEX") { + params.chemistryParam ='fiveprime' } else { params.chemistryParam = 'auto' } @@ -48,7 +54,7 @@ if (params.astrocyte) { } params.genomeLocationFull = params.genomeLocation+params.genome -// Define regular variables +// Define variables from input name = params.name designLocation = Channel .fromPath(params.designFile) @@ -66,15 +72,17 @@ forceCells = params.forceCells chemistryParam = params.chemistryParam version = params.version outDir = params.outDir -multiqcConf = params.multiqcConf -references = params.references +// Define constant variables +multiqcConf = "${baseDir}/conf/multiqc_config.yaml" +references = "${baseDir}/../docs/references.md" +/* + * checkDesignFile: check design file for errors + */ process checkDesignFile { - tag "${name}" - publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy' - module 'python/3.6.1-2-anaconda' + container = 'bicf/python3:2.0.0' input: file designLocation @@ -87,9 +95,12 @@ process checkDesignFile { """ hostname ulimit -a - python3 ${baseDir}/scripts/check_design.py -d ${designLocation} -f ${fastqList} + noSpaceDesign=\$(echo "${designLocation}" | tr -d ' ') + if [[ "\${noSpaceDesign}" != "${designLocation}" ]]; then + mv "${designLocation}" "\${noSpaceDesign}" + fi + python3 ${baseDir}/scripts/check_design.py -d \${noSpaceDesign} -f ${fastqList} """ - } @@ -100,45 +111,47 @@ samples = designPaths .groupTuple() //.subscribe { println it } - // Duplicate variables samples.into { samples211 - samples301 + samples220 samples302 samples310 } refLocation.into { refLocation211 - refLocation301 + refLocation220 refLocation302 refLocation310 } expectCells211 = expectCells -expectCells301 = expectCells +expectCells220 = expectCells expectCells302 = expectCells expectCells310 = expectCells forceCells211 = forceCells -forceCells301 = forceCells +forceCells220 = forceCells forceCells302 = forceCells forceCells310 = forceCells -chemistryParam301 = chemistryParam +chemistryParam211 = chemistryParam +chemistryParam220 = chemistryParam chemistryParam302 = chemistryParam chemistryParam310 = chemistryParam - +/* + * count211: run cellranger count version 2.1.1 + */ process count211 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - module 'cellranger/2.1.1' + container 'bicf/cellranger2.1.1:2.0.0' input: set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples211 file ref from refLocation211.first() expectCells211 forceCells211 + chemistryParam211 output: file("**/outs/**") into outPaths211 @@ -153,7 +166,7 @@ process count211 { hostname ulimit -a bash ${baseDir}/scripts/filename_check.sh -r ${ref} - cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells211} + cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells211} --chemistry=${chemistryParam211} sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv """ } @@ -162,42 +175,42 @@ process count211 { hostname ulimit -a bash ${baseDir}/scripts/filename_check.sh -r ${ref} - cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells211} + cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells211} --chemistry=${chemistryParam211} sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv """ } - } - -process count301 { - +/* + * count220: run cellranger count version 2.2.0 + */ +process count220 { queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - module 'cellranger/3.0.1' + container 'bicf/cellranger2.2.0:2.0.0' input: - set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples301 - file ref from refLocation301.first() - expectCells301 - forceCells301 - chemistryParam301 + set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples220 + file ref from refLocation220.first() + expectCells220 + forceCells220 + chemistryParam220 output: - file("**/outs/**") into outPaths301 - file("*_metrics_summary.tsv") into metricsSummary301 + file("**/outs/**") into outPaths220 + file("*_metrics_summary.tsv") into metricsSummary220 when: - version == '3.0.1' + version == '2.2.0' script: - if (forceCells301 == 0) { + if (forceCells220 == 0) { """ hostname ulimit -a bash ${baseDir}/scripts/filename_check.sh -r ${ref} - cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells301} --chemistry=${chemistryParam301} + cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells220} --chemistry=${chemistryParam220} sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv """ } @@ -206,20 +219,20 @@ process count301 { hostname ulimit -a bash ${baseDir}/scripts/filename_check.sh -r ${ref} - cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells301} --chemistry=${chemistryParam301} + cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells220} --chemistry=${chemistryParam220} sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv """ } - } - +/* + * count302: run cellranger count version 3.0.2 + */ process count302 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - module 'cellranger/3.0.2' + container 'bicf/cellranger3.0.2:2.0.0' input: set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples302 @@ -239,6 +252,7 @@ process count302 { if (forceCells302 == 0) { """ hostname + ulimit -u 16384 ulimit -a bash ${baseDir}/scripts/filename_check.sh -r ${ref} cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells302} --chemistry=${chemistryParam302} @@ -248,22 +262,23 @@ process count302 { else { """ hostname + ulimit -u 16384 ulimit -a bash ${baseDir}/scripts/filename_check.sh -r ${ref} cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells302} --chemistry=${chemistryParam302} sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv """ } - } - +/* + * count310: run cellranger count version 3.1.0 + */ process count310 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - module 'cellranger/3.1.0' + container 'bicf/cellranger3.1.0:2.0.0' input: set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples310 @@ -292,21 +307,21 @@ process count310 { else { """ hostname + ulimit -u 16384 ulimit -a bash ${baseDir}/scripts/filename_check.sh -r ${ref} cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells310} --chemistry=${chemistryParam310} sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv """ } - } - +/* + * versions: collect too versions into a single yml + */ process versions { - tag "${name}" - publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy' - module 'python/3.6.1-2-anaconda:pandoc/2.7:multiqc/1.7' + container 'bicf/python3:2.0.0' input: @@ -323,17 +338,16 @@ process versions { python3 "${baseDir}/scripts/generate_versions.py" -f version_*.txt -o versions python3 "${baseDir}/scripts/generate_references.py" -r "${references}" -o references """ - } +// Collect all metrics summaries reguardless of cellranger version +metricsSummary = metricsSummary211.mix(metricsSummary220, metricsSummary302, metricsSummary310) -metricsSummary = metricsSummary211.mix(metricsSummary301, metricsSummary302, metricsSummary310) - - +/* + * multiqc: create multiqc report + */ process multiqc { - tag "${name}" - queue 'super' publishDir "${outDir}/${task.process}/${name}", mode: 'copy' module 'multiqc/1.7' @@ -352,5 +366,4 @@ process multiqc { sed -i '1s/^.*\tE/Sample\tE/' metrics_summary_mqc.tsv multiqc -c ${multiqcConf} . """ - -} \ No newline at end of file +} diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 30e47ea1aea37ed6550cc2944d69d26e69887489..3e15f5769a1da8e676852a504655cf8845fd6db1 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -1,5 +1,67 @@ profiles { - standard { + biohpc { includeConfig 'conf/biohpc.config' } + local { + includeConfig 'conf/local.config' + } + cluster { + includeConfig 'conf/cluster.config' + } + aws { + includeConfig 'conf/aws.config' + } +} + +process { + withName:checkDesignFile { + container = 'bicf/python3:2.0.0' + } + withName:count211 { + container = 'bicf/cellranger2.1.1:2.0.0' + } + withName:count220 { + container = 'bicf/cellranger2.2.0:2.0.0' + } + withName:count302 { + container = 'bicf/cellranger3.0.2:2.0.0' + } + withName:count310 { + container = 'bicf/cellranger3.1.0:2.0.0' + } + withName:versions { + container = 'bicf/python3:2.0.0' + } + withName:multiqc { + container = 'bicf/multiqc:2.0.0' + } +} + +trace { + enabled = true + file = 'pipeline_trace.txt' + fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss' +} + +timeline { + enabled = true + file = 'timeline.html' +} + +report { + enabled = true + file = 'report.html' +} + +tower { + accessToken = '3ade8f325d4855434b49aa387421a44c63e3360f' + enabled = true +} + +manifest { + homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count' + description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.' + mainScript = 'main.nf' + version = 'v2.0.0_indev' + nextflowVersion = '>=0.31.0' } diff --git a/workflow/scripts/check_design.py b/workflow/scripts/check_design.py index c5c679e58b2819e6fafb4f34e6181cdd62f8cdbd..4a4d84c80f74098e709817794218a29b7cec6b60 100755 --- a/workflow/scripts/check_design.py +++ b/workflow/scripts/check_design.py @@ -97,7 +97,7 @@ def main(): logger.addHandler(handler) # Read files as dataframes - design_df = pd.read_csv(args.design, sep=',') + design_df = pd.read_csv(args.design, sep=',', converters={'Sample': str.strip, 'fastq_R1': str.strip, 'fastq_R2': str.strip}) fastq_df = pd.read_csv(args.fastq, sep='\t', names=['name', 'path']) # Check design file @@ -107,4 +107,4 @@ def main(): new_design_df.to_csv('design.checked.csv', header=True, sep=',', index=False) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/workflow/tests/test_check_design.py b/workflow/tests/test_check_design.py deleted file mode 100644 index f425d09ab3a19ba1e610f451435a657f55d954ae..0000000000000000000000000000000000000000 --- a/workflow/tests/test_check_design.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 -#test_check_design.py -#* -#* -------------------------------------------------------------------------- -#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE) -#* -------------------------------------------------------------------------- -#* - -import pytest -import pandas as pd -from io import StringIO -import os - -test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ - '/../output/misc/checkDesignFile/run/' - -@pytest.mark.count211 -def test_count211_design(): - assert os.path.exists(os.path.join(test_output_path, 'design.checked.csv')) - -@pytest.mark.count301 -def test_count301_design(): - assert os.path.exists(os.path.join(test_output_path, 'design.checked.csv')) - -@pytest.mark.count302 -def test_count302_design(): - assert os.path.exists(os.path.join(test_output_path, 'design.checked.csv')) - -@pytest.mark.count310 -def test_count310_design(): - assert os.path.exists(os.path.join(test_output_path, 'design.checked.csv')) \ No newline at end of file diff --git a/workflow/tests/test_versions.py b/workflow/tests/test_versions.py deleted file mode 100644 index 535e029ffc5f9a1d0eadc63b286397c81a2427c0..0000000000000000000000000000000000000000 --- a/workflow/tests/test_versions.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python3 -#test_versions.py -#* -#* -------------------------------------------------------------------------- -#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE) -#* -------------------------------------------------------------------------- -#* - -import pytest -import pandas as pd -from io import StringIO -import os - -test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ - '/../output/misc/versions/run/' - -@pytest.mark.count211 -def test_count211_versions(): - assert os.path.exists(os.path.join(test_output_path, 'versions_mqc.yaml')) - assert os.path.exists(os.path.join(test_output_path, 'references_mqc.yaml')) - -@pytest.mark.count301 -def test_count301_versions(): - assert os.path.exists(os.path.join(test_output_path, 'versions_mqc.yaml')) - assert os.path.exists(os.path.join(test_output_path, 'references_mqc.yaml')) - -@pytest.mark.count302 -def test_count302_versions(): - assert os.path.exists(os.path.join(test_output_path, 'versions_mqc.yaml')) - assert os.path.exists(os.path.join(test_output_path, 'references_mqc.yaml')) - -@pytest.mark.count310 -def test_count310_versions(): - assert os.path.exists(os.path.join(test_output_path, 'versions_mqc.yaml')) - assert os.path.exists(os.path.join(test_output_path, 'references_mqc.yaml')) \ No newline at end of file