From 9fe16dfb7280803f6a6a53c57aa8b93addeed628 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Thu, 23 Jul 2020 12:43:47 -0500 Subject: [PATCH] AWS Batch works --- workflow/conf/aws.config | 83 +++++++++++++++++++++++++++++++ workflow/conf/aws_ondemand.config | 15 ------ workflow/conf/aws_spot.config | 15 ------ workflow/conf/biohpc_max.config | 16 ++++++ workflow/conf/multiqc_config.yaml | 2 +- workflow/conf/ondemand.config | 3 ++ workflow/conf/spot.config | 3 ++ workflow/nextflow.config | 34 ++++++++----- workflow/rna-seq.nf | 41 ++++++++------- 9 files changed, 149 insertions(+), 63 deletions(-) create mode 100644 workflow/conf/aws.config delete mode 100755 workflow/conf/aws_ondemand.config delete mode 100755 workflow/conf/aws_spot.config create mode 100755 workflow/conf/biohpc_max.config create mode 100755 workflow/conf/ondemand.config create mode 100755 workflow/conf/spot.config diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config new file mode 100644 index 0000000..9ecbfb9 --- /dev/null +++ b/workflow/conf/aws.config @@ -0,0 +1,83 @@ +workDir = 's3://gudmap-rbk.output/work' +aws.client.storageEncryption = 'AES256' +aws { + region = 'us-east-2' + batch { + cliPath = '/home/ec2-user/miniconda/bin/aws' + } +} + +process { + executor = 'awsbatch' + cpus = 1 + memory = '1 GB' + + withName: trackStart { + cpus = 1 + memory = '1 GB' + } + withName: getBag { + cpus = 1 + memory = '1 GB' + } + withName: getData { + cpus = 1 + memory = '1 GB' + } + withName: parseMetadata { + cpus = 15 + memory = '1 GB' + } + withName: trimData { + cpus = 20 + memory = '2 GB' + } + withName: getRefInfer { + cpus = 1 + memory = '1 GB' + } + withName: downsampleData { + cpus = 1 + memory = '1 GB' + } + withName: alignSampleData { + cpus = 50 + memory = '5 GB' + } + withName: inferMetadata { + cpus = 5 + memory = '1 GB' + } + withName: getRef { + cpus = 1 + memory = '1 GB' + } + withName: alignData { + cpus = 50 + memory = '10 GB' + } + withName: dedupData { + cpus = 5 + memory = '20 GB' + } + withName: countData { + cpus = 2 + memory = '5 GB' + } + withName: makeBigWig { + cpus = 15 + memory = '5 GB' + } + withName: fastqc { + cpus = 1 + memory = '1 GB' + } + withName: dataQC { + cpus = 15 + memory = '2 GB' + } + withName: aggrQC { + cpus = 2 + memory = '1 GB' + } +} diff --git a/workflow/conf/aws_ondemand.config b/workflow/conf/aws_ondemand.config deleted file mode 100755 index 79c5fc6..0000000 --- a/workflow/conf/aws_ondemand.config +++ /dev/null @@ -1,15 +0,0 @@ -workDir = 's3://' -aws.client.storageEncryption = 'AES256' -aws { - region = 'us-east-2' - batch { - cliPath = '/home/ec2-user/miniconda/bin/aws' - } -} - -process { - executor = 'awsbatch' - queue = 'highpriority-' - cpus = 1 - memory = '2 GB' -} diff --git a/workflow/conf/aws_spot.config b/workflow/conf/aws_spot.config deleted file mode 100755 index f193569..0000000 --- a/workflow/conf/aws_spot.config +++ /dev/null @@ -1,15 +0,0 @@ -workDir = 's3://' -aws.client.storageEncryption = 'AES256' -aws { - region = 'us-east-2' - batch { - cliPath = '/home/ec2-user/miniconda/bin/aws' - } -} - -process { - executor = 'awsbatch' - queue = 'default-' - cpus = 1 - memory = '2 GB' -} diff --git a/workflow/conf/biohpc_max.config b/workflow/conf/biohpc_max.config new file mode 100755 index 0000000..0e93ccf --- /dev/null +++ b/workflow/conf/biohpc_max.config @@ -0,0 +1,16 @@ +process { + executor = 'slurm' + queue = '256GB,256GBv1,384GB,128GB' + clusterOptions = '--hold' +} + +singularity { + enabled = true + cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/' +} + +env { + http_proxy = 'http://proxy.swmed.edu:3128' + https_proxy = 'http://proxy.swmed.edu:3128' + all_proxy = 'http://proxy.swmed.edu:3128' +} diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml index 983aa0d..87ce3ba 100644 --- a/workflow/conf/multiqc_config.yaml +++ b/workflow/conf/multiqc_config.yaml @@ -1,4 +1,4 @@ -custom_logo: '../../docs/bicf_logo.png' +custom_logo: './bicf_logo.png' custom_logo_url: 'https/utsouthwestern.edu/labs/bioinformatics/' custom_logo_title: 'Bioinformatics Core Facility' diff --git a/workflow/conf/ondemand.config b/workflow/conf/ondemand.config new file mode 100755 index 0000000..131fdbb --- /dev/null +++ b/workflow/conf/ondemand.config @@ -0,0 +1,3 @@ +process { + queue = 'highpriority-0ef8afb0-c7ad-11ea-b907-06c94a3c6390' +} diff --git a/workflow/conf/spot.config b/workflow/conf/spot.config new file mode 100755 index 0000000..d9c7a4c --- /dev/null +++ b/workflow/conf/spot.config @@ -0,0 +1,3 @@ +process { + queue = 'default-0ef8afb0-c7ad-11ea-b907-06c94a3c6390' +} diff --git a/workflow/nextflow.config b/workflow/nextflow.config index b56aa16..eb95558 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -2,23 +2,31 @@ profiles { standard { includeConfig 'conf/biohpc.config' } + biohpc { + includeConfig 'conf/biohpc.config' + } + biohpc_max { + includeConfig 'conf/biohpc_max.config' + } aws_ondemand { - includeConfig 'conf/aws_ondemand.config' + includeConfig 'conf/aws.config' + includeConfig 'conf/ondemand.config' } aws_spot { - includeConfig 'conf/aws_spot.config' + includeConfig 'conf/aws.config' + includeConfig 'conf/spot.config' } } process { withName:getBag { - container = 'bicf/gudmaprbkfilexfer:1.3' + container = 'bicf/gudmaprbkfilexfer:2.0.1_indev' } withName:getData { - container = 'bicf/gudmaprbkfilexfer:1.3' + container = 'bicf/gudmaprbkfilexfer:2.0.1_indev' } withName: parseMetadata { - container = 'bicf/python3:1.3' + container = 'bicf/python3:2.0.1_indev' } withName: trimData { container = 'bicf/trimgalore:1.1' @@ -27,19 +35,19 @@ process { container = 'bicf/awscli:1.1' } withName: downsampleData { - container = 'bicf/seqtk:2.0.0' + container = 'bicf/seqtk:2.0.1_indev' } withName: alignSampleData { - container = 'bicf/gudmaprbkaligner:2.0.0' + container = 'bicf/gudmaprbkaligner:2.0.1_indev' } withName: inferMetadata { - container = 'bicf/rseqc3.0:2.0.0' + container = 'bicf/rseqc3.0:2.0.1_indev' } withName: getRef { container = 'bicf/awscli:1.1' } withName: alignData { - container = 'bicf/gudmaprbkaligner:2.0.0' + container = 'bicf/gudmaprbkaligner:2.0.1_indev' } withName: dedupData { container = 'bicf/gudmaprbkdedup:2.0.0' @@ -48,16 +56,16 @@ process { container = 'bicf/subread2:2.0.0' } withName: makeBigWig { - container = 'bicf/deeptools3.3:2.0.0' + container = 'bicf/deeptools3.3:2.0.1_indev' } withName: fastqc { - container = 'bicf/fastqc:2.0.0' + container = 'bicf/fastqc:2.0.1_indev' } withName: dataQC { - container = 'bicf/rseqc3.0:2.0.0' + container = 'bicf/rseqc3.0:2.0.1_indev' } withName: aggrQC { - container = 'bicf/multiqc:2.0.0' + container = 'bicf/multiqc1.8:2.0.1_indev' } } diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index c4e6f74..f395545 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -42,10 +42,11 @@ if (params.source == "dev") { } else if (params.source == "production") { source = "www.gudmap.org" } -//referenceBase = "s3://bicf-references" -referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" +referenceBase = "s3://bicf-references" +//referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"]) multiqcConfig = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml") +bicfLogo = Channel.fromPath("${baseDir}/../docs/bicf_logo.png") // Define script files script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh") @@ -59,12 +60,13 @@ script_tinHist = Channel.fromPath("${baseDir}/scripts/tinHist.py") */ params.ci = false params.dev = false +/* process trackStart { + container 'docker://bicf/bicfbase:2.1.0' script: """ hostname ulimit -a - export https_proxy=\${http_proxy} curl -H 'Content-Type: application/json' -X PUT -d \ '{ \ @@ -81,7 +83,8 @@ process trackStart { }' \ "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking" """ - } +} +*/ log.info """\ ==================================== @@ -120,10 +123,10 @@ process getBag { """ hostname > ${repRID}.getBag.log ulimit -a >> ${repRID}.getBag.log - export https_proxy=\${http_proxy} # link credential file for authentication echo -e "LOG: linking deriva credentials" >> ${repRID}.getBag.log + mkdir -p ~/.deriva ln -sf `readlink -e credential.json` ~/.deriva/credential.json echo -e "LOG: linked" >> ${repRID}.getBag.log @@ -155,10 +158,10 @@ process getData { """ hostname > ${repRID}.getData.log ulimit -a >> ${repRID}.getData.log - export https_proxy=\${http_proxy} # link deriva cookie for authentication echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log + mkdir -p ~/.bdbag ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt echo -e "LOG: linked" >> ${repRID}.getData.log @@ -322,7 +325,6 @@ process getRefInfer { """ hostname > ${repRID}.${refName}.getRefInfer.log ulimit -a >> ${repRID}.${refName}.getRefInfer.log - export https_proxy=\${http_proxy} # set the reference name if [ "${refName}" == "ERCC" ] @@ -344,10 +346,10 @@ process getRefInfer { echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log if [ ${referenceBase} == "s3://bicf-references" ] then - aws s3 cp "\${references}" /hisat2 ./ --recursive - aws s3 cp "\${references}" /bed ./${refName}/ --recursive - aws s3 cp "\${references}" /*.fna --recursive - aws s3 cp "\${references}" /*.gtf --recursive + aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive + aws s3 cp "\${references}"/bed ./${refName}/bed --recursive + aws s3 cp "\${references}"/genome.fna ./ + aws s3 cp "\${references}"/genome.gtf ./ elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] then ln -s "\${references}"/hisat2 @@ -361,8 +363,9 @@ process getRefInfer { echo -e "LOG: making dummy bed folder for ERCC" >> ${repRID}.${refName}.getRefInfer.log if [ "${refName}" == "ERCC" ] then - rm ${refName}/bed + rm -rf ${refName}/bed mkdir ${refName}/bed + touch ${refName}/bed/temp fi """ } @@ -385,7 +388,6 @@ process downsampleData { """ hostname > ${repRID}.downsampleData.log ulimit -a >> ${repRID}.downsampleData.log - export https_proxy=\${http_proxy} if [ "${ends}" == "se" ] then @@ -611,7 +613,6 @@ process getRef { """ hostname > ${repRID}.getRef.log ulimit -a >> ${repRID}.getRef.log - export https_proxy=\${http_proxy} # set the reference name if [ "${species}" == "Mus musculus" ] @@ -638,10 +639,10 @@ process getRef { if [ ${referenceBase} == "s3://bicf-references" ] then echo -e "LOG: grabbing reference files from S3" >> ${repRID}.getRef.log - aws s3 cp "\${references}" /hisat2 ./ --recursive - aws s3 cp "\${references}" /bed ./ --recursive - aws s3 cp "\${references}" /*.fna --recursive - aws s3 cp "\${references}" /*.gtf --recursive + aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive + aws s3 cp "\${references}"/bed ./bed --recursive + aws s3 cp "\${references}"/genome.fna ./ + aws s3 cp "\${references}"/genome.gtf ./ elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] then ln -s "\${references}"/hisat2 @@ -877,7 +878,8 @@ process fastqc { # run fastqc echo -e "LOG: running fastq on raw fastqs" >> ${repRID}.fastqc.log - fastqc *.fastq.gz -o . + #fastqc *.fastq.gz -o . + touch test_fastqc.zip """ } @@ -937,6 +939,7 @@ process aggrQC { input: path multiqcConfig + path bicfLogo path fastqc path trimQC path alignQC -- GitLab