From 5677b2e2c3440fc6d05b4c497eb97d5b90b41142 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Sun, 21 Jun 2020 13:50:56 -0500 Subject: [PATCH] Major cleanup update --- .gitlab-ci.yml | 18 ++++++------- README.md | 4 +-- docs/references.md | 12 ++++----- workflow/conf/aws.config | 3 +-- workflow/conf/ondemand.config | 3 +++ workflow/conf/spot.config | 3 +++ workflow/main.nf | 38 +++++++++++++++++---------- workflow/nextflow.config | 19 ++++++++++---- workflow/scripts/generate_versions.py | 12 +++++---- workflow/scripts/versions_python.sh | 9 +++++++ 10 files changed, 78 insertions(+), 43 deletions(-) create mode 100644 workflow/conf/ondemand.config create mode 100644 workflow/conf/spot.config create mode 100644 workflow/scripts/versions_python.sh diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 19d9073..598e601 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,7 +3,7 @@ before_script: - module load python/3.6.1-2-anaconda - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1 - module load singularity/3.0.2 - - module load nextflow/19.09.0 + - module load nextflow/20.01.0 - mkdir -p test_data/hu.v2s1r500 - mkdir -p test_data/hu.v3s1r500 - mkdir -p test_data/mu.v3s1r500 @@ -30,7 +30,7 @@ astrocyte_cli: artifacts: expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -52,7 +52,7 @@ astrocyte_cli: - test/outs/web_summary.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -74,7 +74,7 @@ astrocyte_cli: - test/outs/web_summary.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -96,7 +96,7 @@ astrocyte_cli: - test/outs/web_summary.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -120,7 +120,7 @@ astrocyte_cli: - test/outs/web_summary.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -144,7 +144,7 @@ GRCh38-3.0.0: - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -168,7 +168,7 @@ mm10-3.0.0: - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always @@ -191,6 +191,6 @@ mm10-3.0.0: - workflow/output/multiqc/run/multiqc_report.html expire_in: 2 days retry: - max: 1 + max: 0 when: - always diff --git a/README.md b/README.md index e8fe72a..1edd5a8 100755 --- a/README.md +++ b/README.md @@ -111,8 +111,8 @@ To Run: ``` * Design example: -| Sample | fastq_R1 | fastq_R2 | -|---------|------------------------------------|------------------------------------| +| Sample | fastq_R1 | fastq_R2 | +|--------|----------|----------| | sample1 | pbmc_1k_v2_S1_L001_R1_001.fastq.gz | pbmc_1k_v2_S1_L001_R2_001.fastq.gz | | sample2 | pbmc_1k_v2_S2_L001_R1_001.fastq.gz | pbmc_1k_v2_S2_L001_R2_001.fastq.gz | | sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz | diff --git a/docs/references.md b/docs/references.md index ea483c4..37f42d8 100644 --- a/docs/references.md +++ b/docs/references.md @@ -1,13 +1,13 @@ ### References -1. **python**: - * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com)) +1. **Nextflow**: + * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) 2. **cellranger** * Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count) -3. **MultiQc**: - * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354) +3. **python**: + * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com)) -4. **Nextflow**: - * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820) +4. **MultiQc**: + * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354) diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config index 6caee14..767a7d2 100644 --- a/workflow/conf/aws.config +++ b/workflow/conf/aws.config @@ -1,4 +1,4 @@ -workDir = 's3://' +workDir = 's3://gudmap.rbk/work' aws.client.storageEncryption = 'AES256' aws { region = '' @@ -9,7 +9,6 @@ aws { process { executor = 'awsbatch' - queue = 'default-' cpus = 1 memory = '1 GB' diff --git a/workflow/conf/ondemand.config b/workflow/conf/ondemand.config new file mode 100644 index 0000000..d89352b --- /dev/null +++ b/workflow/conf/ondemand.config @@ -0,0 +1,3 @@ +process { + queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc' +} diff --git a/workflow/conf/spot.config b/workflow/conf/spot.config new file mode 100644 index 0000000..6f1bfe0 --- /dev/null +++ b/workflow/conf/spot.config @@ -0,0 +1,3 @@ +process { + queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc' +} diff --git a/workflow/main.nf b/workflow/main.nf index b796771..23b126d 100755 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -8,6 +8,15 @@ main.nf * */ +// ######## #### ###### ######## +// ## ## ## ## ## ## +// ## ## ## ## ## +// ######## ## ## ###### +// ## ## ## ## ## +// ## ## ## ## ## ## +// ######## #### ###### ## + + // Define Input variables params.name = "run" params.fastq = "test_data/mu.v3s1r500/*.fastq.gz" @@ -55,6 +64,7 @@ if (params.astrocyte) { params.genomeLocationFull = params.genomeLocation+params.genome // Define variables from input +pipelineVersion = "2.x.x-indev" name = params.name designLocation = Channel .fromPath(params.designFile) @@ -81,6 +91,7 @@ references = "${baseDir}/../docs/references.md" * trackStart: track start of pipeline */ params.ci = false +params.dev = false process trackStart { script: """ @@ -91,11 +102,13 @@ process trackStart { curl -H 'Content-Type: application/json' -X PUT -d '{ \ "sessionId": "${workflow.sessionId}", \ "pipeline": "cellranger_count", \ + "pipelineVersion": "${pipelineVersion}", \ "start": "${workflow.start}", \ "astrocyte": ${params.astrocyte}, \ "status": "started", \ "nextflowVersion": "${workflow.nextflow.version}", - "ci": ${params.ci}}' \ + "ci": ${params.ci}, + "dev": ${params.dev}}' \ "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking" """ } @@ -105,7 +118,6 @@ process trackStart { */ process checkDesignFile { tag "${name}" - container = 'bicf/python3:2.0.0' input: file designLocation @@ -164,10 +176,9 @@ chemistryParam310 = chemistryParam * count211: run cellranger count version 2.1.1 */ process count211 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - container 'bicf/cellranger2.1.1:2.0.0' + queue '128GB,256GB,256GBv1,384GB' input: set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples211 @@ -213,7 +224,6 @@ process count220 { queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - container 'bicf/cellranger2.2.0:2.0.0' input: set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples220 @@ -256,10 +266,9 @@ process count220 { * count302: run cellranger count version 3.0.2 */ process count302 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - container 'bicf/cellranger3.0.2:2.0.0' + queue '128GB,256GB,256GBv1,384GB' input: set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples302 @@ -302,10 +311,9 @@ process count302 { * count310: run cellranger count version 3.1.0 */ process count310 { - queue '128GB,256GB,256GBv1,384GB' tag "${sample}" publishDir "${outDir}/${task.process}", mode: 'copy' - container 'bicf/cellranger3.1.0:2.0.0' + queue '128GB,256GB,256GBv1,384GB' input: set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples310 @@ -345,13 +353,13 @@ process count310 { } /* - * versions: collect too versions into a single yml + * versions: collect all versions into a single yml */ process versions { tag "${name}" - container 'bicf/python3:2.0.0' input: + file versions_pythonScript output: file("*.yaml") into yamlPaths @@ -359,10 +367,12 @@ process versions { script: """ hostname + ulimit -u 16384 ulimit -a - echo ${workflow.nextflow.version} > version_nextflow.txt - echo ${version} > version_cellranger.txt - multiqc --version | tr -d 'multiqc, version ' > version_multiqc.txt + echo "${workflow.nextflow.version}" > version_nextflow.txt + echo "${pipelineVersion}" > version_pipeline.txt + echo "${version}" > version_cellranger.txt + bash versions_python.sh > version_python.txt python3 "${baseDir}/scripts/generate_versions.py" -f version_*.txt -o versions python3 "${baseDir}/scripts/generate_references.py" -r "${references}" -o references """ diff --git a/workflow/nextflow.config b/workflow/nextflow.config index b05454d..4e2de2d 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -1,15 +1,24 @@ profiles { + standard { + includeConfig 'configs/biohpc.config' + } biohpc { - includeConfig 'conf/biohpc.config' + includeConfig 'configs/biohpc.config' } local { - includeConfig 'conf/local.config' + includeConfig 'configs/local.config' } cluster { - includeConfig 'conf/cluster.config' + includeConfig 'configs/cluster.config' } aws { - includeConfig 'conf/aws.config' + includeConfig 'configs/aws.config' + } + ondemand { + includeConfig 'configs/ondemand.config' + } + spot { + includeConfig 'configs/spot.config' } } @@ -62,6 +71,6 @@ manifest { homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count' description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.' mainScript = 'main.nf' - version = 'publish_2.0.4' + version = '2.x.x-indev' nextflowVersion = '>=0.31.0' } diff --git a/workflow/scripts/generate_versions.py b/workflow/scripts/generate_versions.py index ddcda53..978aedc 100755 --- a/workflow/scripts/generate_versions.py +++ b/workflow/scripts/generate_versions.py @@ -24,9 +24,10 @@ logger.propagate = False logger.setLevel(logging.INFO) SOFTWARE_REGEX = { + 'Pipeline': ['version_pipeline.txt', r"(\S+)"], 'Nextflow': ['version_nextflow.txt', r"(\S+)"], - 'Cellranger Count': ['version_cellranger.txt', r"(\S+)"], - 'MultiQC': ['version_multiqc.txt', r"(\S+)"], + 'cellranger count': ['version_cellranger.txt', r"(\S+)"], + 'python': ['version_python.txt', r"(\S+)"], } @@ -72,9 +73,10 @@ def main(): out_filename = output + '_mqc.yaml' results = OrderedDict() + results['Pipeline'] = '<span style="color:#999999;\">N/A</span>' results['Nextflow'] = '<span style="color:#999999;\">N/A</span>' - results['Cellranger Count'] = '<span style="color:#999999;\">N/A</span>' - results['MultiQC'] = '<span style="color:#999999;\">N/A</span>' + results['cellranger count'] = '<span style="color:#999999;\">N/A</span>' + results['python'] = '<span style="color:#999999;\">N/A</span>' # Check for version files: check_files(files) @@ -106,4 +108,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/workflow/scripts/versions_python.sh b/workflow/scripts/versions_python.sh new file mode 100644 index 0000000..ff79391 --- /dev/null +++ b/workflow/scripts/versions_python.sh @@ -0,0 +1,9 @@ +#!/bin/bash +#versions_python.sh +#* +#* -------------------------------------------------------------------------- +#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE) +#* -------------------------------------------------------------------------- +#* + +python --version |& grep 'Python ' | sed -n -e 's/^Python //p' -- GitLab