diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 19d907385821582ee042178815224b877eb8f20e..598e601822c15ba27b5a836992c6866b6a9c7705 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,7 +3,7 @@ before_script: - module load python/3.6.1-2-anaconda - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1 - module load singularity/3.0.2 - - module load nextflow/19.09.0 + - module load nextflow/20.01.0 - mkdir -p test_data/hu.v2s1r500 - mkdir -p test_data/hu.v3s1r500 - mkdir -p test_data/mu.v3s1r500 @@ -30,7 +30,7 @@ astrocyte_cli: artifacts: expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -52,7 +52,7 @@ astrocyte_cli: - test/outs/web_summary.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -74,7 +74,7 @@ astrocyte_cli: - test/outs/web_summary.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -96,7 +96,7 @@ astrocyte_cli: - test/outs/web_summary.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -120,7 +120,7 @@ astrocyte_cli: - test/outs/web_summary.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -144,7 +144,7 @@ GRCh38-3.0.0: - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -168,7 +168,7 @@ mm10-3.0.0: - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -191,6 +191,6 @@ mm10-3.0.0: - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always diff --git a/README.md b/README.md index e8fe72a632ec289314bce36b0fdcae53f929ef66..1edd5a8db23b710ef8e58fc406ae15592295df6d 100755 --- a/README.md +++ b/README.md @@ -111,8 +111,8 @@ To Run: ``` * Design example: -| Sample | fastq_R1 | fastq_R2 | -|---------|------------------------------------|------------------------------------| +| Sample | fastq_R1 | fastq_R2 | +|--------|----------|----------| | sample1 | pbmc_1k_v2_S1_L001_R1_001.fastq.gz | pbmc_1k_v2_S1_L001_R2_001.fastq.gz | | sample2 | pbmc_1k_v2_S2_L001_R1_001.fastq.gz | pbmc_1k_v2_S2_L001_R2_001.fastq.gz | | sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz | diff --git a/docs/references.md b/docs/references.md index ea483c496889564a214e995e17e2b56e4991a557..37f42d86af5542e91c79e088be69f4f10a386a57 100644 --- a/docs/references.md +++ b/docs/references.md @@ -1,13 +1,13 @@ ### References -1. **python**: - * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com)) +1. **Nextflow**: + * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) 2. **cellranger** * Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count) -3. **MultiQc**: - * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354) +3. **python**: + * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com)) -4. **Nextflow**: - * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) +4. **MultiQc**: + * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354) diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config index 6caee145d38bebcca65b4cb71b99e7dc10e930e9..767a7d2c12c09898c3cb2309716edf89827279e4 100644 --- a/workflow/conf/aws.config +++ b/workflow/conf/aws.config @@ -1,4 +1,4 @@ -workDir = 's3://' +workDir = 's3://gudmap.rbk/work' aws.client.storageEncryption = 'AES256' aws { region = '' @@ -9,7 +9,6 @@ aws { process { executor = 'awsbatch' - queue = 'default-' cpus = 1 memory = '1 GB' diff --git a/workflow/conf/ondemand.config b/workflow/conf/ondemand.config new file mode 100644 index 0000000000000000000000000000000000000000..d89352bbf93ab79507d6ac6c28f471a63bed80a5 --- /dev/null +++ b/workflow/conf/ondemand.config @@ -0,0 +1,3 @@ +process { + queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc' +} diff --git a/workflow/conf/spot.config b/workflow/conf/spot.config new file mode 100644 index 0000000000000000000000000000000000000000..6f1bfe0669f9052ac0bec7514b9628305560d682 --- /dev/null +++ b/workflow/conf/spot.config @@ -0,0 +1,3 @@ +process { + queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc' +} diff --git a/workflow/main.nf b/workflow/main.nf index b796771930a3b3dbc908ffb145ebd887153c3e42..23b126d3e64941e7ffac07ee404c01b9ea59d373 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -8,6 +8,15 @@ main.nf * */ +// ######## #### ###### ######## +// ## ## ## ## ## ## +// ## ## ## ## ## +// ######## ## ## ###### +// ## ## ## ## ## +// ## ## ## ## ## ## +// ######## #### ###### ## + + // Define Input variables params.name = "run" params.fastq = "test_data/mu.v3s1r500/*.fastq.gz" @@ -55,6 +64,7 @@ if (params.astrocyte) { params.genomeLocationFull = params.genomeLocation+params.genome // Define variables from input +pipelineVersion = "2.x.x-indev" name = params.name designLocation = Channel .fromPath(params.designFile) @@ -81,6 +91,7 @@ references = "${baseDir}/../docs/references.md" * trackStart: track start of pipeline */ params.ci = false +params.dev = false process trackStart { script: """ @@ -91,11 +102,13 @@ process trackStart { curl -H 'Content-Type: application/json' -X PUT -d '{ \ "sessionId": "${workflow.sessionId}", \ "pipeline": "cellranger_count", \ + "pipelineVersion": "${pipelineVersion}", \ "start": "${workflow.start}", \ "astrocyte": ${params.astrocyte}, \ "status": "started", \ "nextflowVersion": "${workflow.nextflow.version}", - "ci": ${params.ci}}' \ + "ci": ${params.ci}, + "dev": ${params.dev}}' \ "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking" """ } @@ -105,7 +118,6 @@ process trackStart { */ process checkDesignFile { tag "${name}" - container = 'bicf/python3:2.0.0' input: file designLocation @@ -164,10 +176,9 @@ chemistryParam310 = chemistryParam * count211: run cellranger count version 2.1.1 */ process count211 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - container 'bicf/cellranger2.1.1:2.0.0' + queue '128GB,256GB,256GBv1,384GB' input: set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples211 @@ -213,7 +224,6 @@ process count220 { queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - container 'bicf/cellranger2.2.0:2.0.0' input: set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples220 @@ -256,10 +266,9 @@ process count220 { * count302: run cellranger count version 3.0.2 */ process count302 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - container 'bicf/cellranger3.0.2:2.0.0' + queue '128GB,256GB,256GBv1,384GB' input: set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples302 @@ -302,10 +311,9 @@ process count302 { * count310: run cellranger count version 3.1.0 */ process count310 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - container 'bicf/cellranger3.1.0:2.0.0' + queue '128GB,256GB,256GBv1,384GB' input: set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples310 @@ -345,13 +353,13 @@ process count310 { } /* - * versions: collect too versions into a single yml + * versions: collect all versions into a single yml */ process versions { tag "${name}" - container 'bicf/python3:2.0.0' input: + file versions_pythonScript output: file("*.yaml") into yamlPaths @@ -359,10 +367,12 @@ process versions { script: """ hostname + ulimit -u 16384 ulimit -a - echo ${workflow.nextflow.version} > version_nextflow.txt - echo ${version} > version_cellranger.txt - multiqc --version | tr -d 'multiqc, version ' > version_multiqc.txt + echo "${workflow.nextflow.version}" > version_nextflow.txt + echo "${pipelineVersion}" > version_pipeline.txt + echo "${version}" > version_cellranger.txt + bash versions_python.sh > version_python.txt python3 "${baseDir}/scripts/generate_versions.py" -f version_*.txt -o versions python3 "${baseDir}/scripts/generate_references.py" -r "${references}" -o references """ diff --git a/workflow/nextflow.config b/workflow/nextflow.config index b05454d76d7f73ef9e519d33530d746af19af762..4e2de2d5861c9f6788f86032f0183e71f6a53d71 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -1,15 +1,24 @@ profiles { + standard { + includeConfig 'configs/biohpc.config' + } biohpc { - includeConfig 'conf/biohpc.config' + includeConfig 'configs/biohpc.config' } local { - includeConfig 'conf/local.config' + includeConfig 'configs/local.config' } cluster { - includeConfig 'conf/cluster.config' + includeConfig 'configs/cluster.config' } aws { - includeConfig 'conf/aws.config' + includeConfig 'configs/aws.config' + } + ondemand { + includeConfig 'configs/ondemand.config' + } + spot { + includeConfig 'configs/spot.config' } } @@ -62,6 +71,6 @@ manifest { homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count' description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.' mainScript = 'main.nf' - version = 'publish_2.0.4' + version = '2.x.x-indev' nextflowVersion = '>=0.31.0' } diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py index ddcda535f6bf1f8350f5aa148af517f2c421c272..978aedc56c4774b2e6a3556ff0c0f7dccb76eadf 100755 --- a/workflow/scripts/generate_versions.py +++ b/workflow/scripts/generate_versions.py @@ -24,9 +24,10 @@ logger.propagate = False logger.setLevel(logging.INFO) SOFTWARE_REGEX = { + 'Pipeline': ['version_pipeline.txt', r"(\S+)"], 'Nextflow': ['version_nextflow.txt', r"(\S+)"], - 'Cellranger Count': ['version_cellranger.txt', r"(\S+)"], - 'MultiQC': ['version_multiqc.txt', r"(\S+)"], + 'cellranger count': ['version_cellranger.txt', r"(\S+)"], + 'python': ['version_python.txt', r"(\S+)"], } @@ -72,9 +73,10 @@ def main(): out_filename = output + '_mqc.yaml' results = OrderedDict() + results['Pipeline'] = '<span style="color:#999999;\">N/A</span>' results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' - results['Cellranger Count'] = '<span style="color:#999999;\">N/A</span>' - results['MultiQC'] = '<span style="color:#999999;\">N/A</span>' + results['cellranger count'] = '<span style="color:#999999;\">N/A</span>' + results['python'] = '<span style="color:#999999;\">N/A</span>' # Check for version files: check_files(files) @@ -106,4 +108,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/workflow/scripts/versions_python.sh b/workflow/scripts/versions_python.sh new file mode 100644 index 0000000000000000000000000000000000000000..ff79391c3bec5b92eea8d8376c57682201c34271 --- /dev/null +++ b/workflow/scripts/versions_python.sh @@ -0,0 +1,9 @@ +#!/bin/bash +#versions_python.sh +#* +#* -------------------------------------------------------------------------- +#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE) +#* -------------------------------------------------------------------------- +#* + +python --version |& grep 'Python ' | sed -n -e 's/^Python //p'