diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config new file mode 100644 index 0000000000000000000000000000000000000000..9ecbfb98f593f167a35650299921adaf2fffbb42 --- /dev/null +++ b/workflow/conf/aws.config @@ -0,0 +1,83 @@ +workDir = 's3://gudmap-rbk.output/work' +aws.client.storageEncryption = 'AES256' +aws { + region = 'us-east-2' + batch { + cliPath = '/home/ec2-user/miniconda/bin/aws' + } +} + +process { + executor = 'awsbatch' + cpus = 1 + memory = '1 GB' + + withName: trackStart { + cpus = 1 + memory = '1 GB' + } + withName: getBag { + cpus = 1 + memory = '1 GB' + } + withName: getData { + cpus = 1 + memory = '1 GB' + } + withName: parseMetadata { + cpus = 15 + memory = '1 GB' + } + withName: trimData { + cpus = 20 + memory = '2 GB' + } + withName: getRefInfer { + cpus = 1 + memory = '1 GB' + } + withName: downsampleData { + cpus = 1 + memory = '1 GB' + } + withName: alignSampleData { + cpus = 50 + memory = '5 GB' + } + withName: inferMetadata { + cpus = 5 + memory = '1 GB' + } + withName: getRef { + cpus = 1 + memory = '1 GB' + } + withName: alignData { + cpus = 50 + memory = '10 GB' + } + withName: dedupData { + cpus = 5 + memory = '20 GB' + } + withName: countData { + cpus = 2 + memory = '5 GB' + } + withName: makeBigWig { + cpus = 15 + memory = '5 GB' + } + withName: fastqc { + cpus = 1 + memory = '1 GB' + } + withName: dataQC { + cpus = 15 + memory = '2 GB' + } + withName: aggrQC { + cpus = 2 + memory = '1 GB' + } +} diff --git a/workflow/conf/aws_ondemand.config b/workflow/conf/aws_ondemand.config deleted file mode 100755 index 79c5fc6d9431c377e4ac3ed16fde26f2192ded56..0000000000000000000000000000000000000000 --- a/workflow/conf/aws_ondemand.config +++ /dev/null @@ -1,15 +0,0 @@ -workDir = 's3://' -aws.client.storageEncryption = 'AES256' -aws { - region = 'us-east-2' - batch { - cliPath = '/home/ec2-user/miniconda/bin/aws' - } -} - -process { - executor = 'awsbatch' - queue = 'highpriority-' - cpus = 1 - memory = '2 GB' -} diff --git a/workflow/conf/aws_spot.config b/workflow/conf/aws_spot.config deleted file mode 100755 index f1935697bb1c1c7e6aeedf310d5da2f38da1811c..0000000000000000000000000000000000000000 --- a/workflow/conf/aws_spot.config +++ /dev/null @@ -1,15 +0,0 @@ -workDir = 's3://' -aws.client.storageEncryption = 'AES256' -aws { - region = 'us-east-2' - batch { - cliPath = '/home/ec2-user/miniconda/bin/aws' - } -} - -process { - executor = 'awsbatch' - queue = 'default-' - cpus = 1 - memory = '2 GB' -} diff --git a/workflow/conf/biohpc_max.config b/workflow/conf/biohpc_max.config new file mode 100755 index 0000000000000000000000000000000000000000..0e93ccf6a0be4c15c076ab6eb955a4bb39d96120 --- /dev/null +++ b/workflow/conf/biohpc_max.config @@ -0,0 +1,16 @@ +process { + executor = 'slurm' + queue = '256GB,256GBv1,384GB,128GB' + clusterOptions = '--hold' +} + +singularity { + enabled = true + cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/' +} + +env { + http_proxy = 'http://proxy.swmed.edu:3128' + https_proxy = 'http://proxy.swmed.edu:3128' + all_proxy = 'http://proxy.swmed.edu:3128' +} diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml index 983aa0d65d486aab48deb4acd58036986dfdfe21..87ce3ba5492d9cb6fb649413a1227c0bd5242883 100644 --- a/workflow/conf/multiqc_config.yaml +++ b/workflow/conf/multiqc_config.yaml @@ -1,4 +1,4 @@ -custom_logo: '../../docs/bicf_logo.png' +custom_logo: './bicf_logo.png' custom_logo_url: 'https/utsouthwestern.edu/labs/bioinformatics/' custom_logo_title: 'Bioinformatics Core Facility' diff --git a/workflow/conf/ondemand.config b/workflow/conf/ondemand.config new file mode 100755 index 0000000000000000000000000000000000000000..131fdbb19e1fedf1bc9e206a03d801f13791b810 --- /dev/null +++ b/workflow/conf/ondemand.config @@ -0,0 +1,3 @@ +process { + queue = 'highpriority-0ef8afb0-c7ad-11ea-b907-06c94a3c6390' +} diff --git a/workflow/conf/spot.config b/workflow/conf/spot.config new file mode 100755 index 0000000000000000000000000000000000000000..d9c7a4c8fa34aadd597da0170f8e3e223923011a --- /dev/null +++ b/workflow/conf/spot.config @@ -0,0 +1,3 @@ +process { + queue = 'default-0ef8afb0-c7ad-11ea-b907-06c94a3c6390' +} diff --git a/workflow/nextflow.config b/workflow/nextflow.config index b56aa1680442050a6f08c57a859f2cfcd1df92b8..eb95558c9efe19bd76317ce09b0782f8f8b14bd9 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -2,23 +2,31 @@ profiles { standard { includeConfig 'conf/biohpc.config' } + biohpc { + includeConfig 'conf/biohpc.config' + } + biohpc_max { + includeConfig 'conf/biohpc_max.config' + } aws_ondemand { - includeConfig 'conf/aws_ondemand.config' + includeConfig 'conf/aws.config' + includeConfig 'conf/ondemand.config' } aws_spot { - includeConfig 'conf/aws_spot.config' + includeConfig 'conf/aws.config' + includeConfig 'conf/spot.config' } } process { withName:getBag { - container = 'bicf/gudmaprbkfilexfer:1.3' + container = 'bicf/gudmaprbkfilexfer:2.0.1_indev' } withName:getData { - container = 'bicf/gudmaprbkfilexfer:1.3' + container = 'bicf/gudmaprbkfilexfer:2.0.1_indev' } withName: parseMetadata { - container = 'bicf/python3:1.3' + container = 'bicf/python3:2.0.1_indev' } withName: trimData { container = 'bicf/trimgalore:1.1' @@ -27,19 +35,19 @@ process { container = 'bicf/awscli:1.1' } withName: downsampleData { - container = 'bicf/seqtk:2.0.0' + container = 'bicf/seqtk:2.0.1_indev' } withName: alignSampleData { - container = 'bicf/gudmaprbkaligner:2.0.0' + container = 'bicf/gudmaprbkaligner:2.0.1_indev' } withName: inferMetadata { - container = 'bicf/rseqc3.0:2.0.0' + container = 'bicf/rseqc3.0:2.0.1_indev' } withName: getRef { container = 'bicf/awscli:1.1' } withName: alignData { - container = 'bicf/gudmaprbkaligner:2.0.0' + container = 'bicf/gudmaprbkaligner:2.0.1_indev' } withName: dedupData { container = 'bicf/gudmaprbkdedup:2.0.0' @@ -48,16 +56,16 @@ process { container = 'bicf/subread2:2.0.0' } withName: makeBigWig { - container = 'bicf/deeptools3.3:2.0.0' + container = 'bicf/deeptools3.3:2.0.1_indev' } withName: fastqc { - container = 'bicf/fastqc:2.0.0' + container = 'bicf/fastqc:2.0.1_indev' } withName: dataQC { - container = 'bicf/rseqc3.0:2.0.0' + container = 'bicf/rseqc3.0:2.0.1_indev' } withName: aggrQC { - container = 'bicf/multiqc:2.0.0' + container = 'bicf/multiqc1.8:2.0.1_indev' } } diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index c4e6f74a2b7e7defff5d620a7e0094a7adaac279..f395545e38eb0d293af10ba9d97d40f708ee3f40 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -42,10 +42,11 @@ if (params.source == "dev") { } else if (params.source == "production") { source = "www.gudmap.org" } -//referenceBase = "s3://bicf-references" -referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" +referenceBase = "s3://bicf-references" +//referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"]) multiqcConfig = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml") +bicfLogo = Channel.fromPath("${baseDir}/../docs/bicf_logo.png") // Define script files script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh") @@ -59,12 +60,13 @@ script_tinHist = Channel.fromPath("${baseDir}/scripts/tinHist.py") */ params.ci = false params.dev = false +/* process trackStart { + container 'docker://bicf/bicfbase:2.1.0' script: """ hostname ulimit -a - export https_proxy=\${http_proxy} curl -H 'Content-Type: application/json' -X PUT -d \ '{ \ @@ -81,7 +83,8 @@ process trackStart { }' \ "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking" """ - } +} +*/ log.info """\ ==================================== @@ -120,10 +123,10 @@ process getBag { """ hostname > ${repRID}.getBag.log ulimit -a >> ${repRID}.getBag.log - export https_proxy=\${http_proxy} # link credential file for authentication echo -e "LOG: linking deriva credentials" >> ${repRID}.getBag.log + mkdir -p ~/.deriva ln -sf `readlink -e credential.json` ~/.deriva/credential.json echo -e "LOG: linked" >> ${repRID}.getBag.log @@ -155,10 +158,10 @@ process getData { """ hostname > ${repRID}.getData.log ulimit -a >> ${repRID}.getData.log - export https_proxy=\${http_proxy} # link deriva cookie for authentication echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log + mkdir -p ~/.bdbag ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt echo -e "LOG: linked" >> ${repRID}.getData.log @@ -322,7 +325,6 @@ process getRefInfer { """ hostname > ${repRID}.${refName}.getRefInfer.log ulimit -a >> ${repRID}.${refName}.getRefInfer.log - export https_proxy=\${http_proxy} # set the reference name if [ "${refName}" == "ERCC" ] @@ -344,10 +346,10 @@ process getRefInfer { echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log if [ ${referenceBase} == "s3://bicf-references" ] then - aws s3 cp "\${references}" /hisat2 ./ --recursive - aws s3 cp "\${references}" /bed ./${refName}/ --recursive - aws s3 cp "\${references}" /*.fna --recursive - aws s3 cp "\${references}" /*.gtf --recursive + aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive + aws s3 cp "\${references}"/bed ./${refName}/bed --recursive + aws s3 cp "\${references}"/genome.fna ./ + aws s3 cp "\${references}"/genome.gtf ./ elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] then ln -s "\${references}"/hisat2 @@ -361,8 +363,9 @@ process getRefInfer { echo -e "LOG: making dummy bed folder for ERCC" >> ${repRID}.${refName}.getRefInfer.log if [ "${refName}" == "ERCC" ] then - rm ${refName}/bed + rm -rf ${refName}/bed mkdir ${refName}/bed + touch ${refName}/bed/temp fi """ } @@ -385,7 +388,6 @@ process downsampleData { """ hostname > ${repRID}.downsampleData.log ulimit -a >> ${repRID}.downsampleData.log - export https_proxy=\${http_proxy} if [ "${ends}" == "se" ] then @@ -611,7 +613,6 @@ process getRef { """ hostname > ${repRID}.getRef.log ulimit -a >> ${repRID}.getRef.log - export https_proxy=\${http_proxy} # set the reference name if [ "${species}" == "Mus musculus" ] @@ -638,10 +639,10 @@ process getRef { if [ ${referenceBase} == "s3://bicf-references" ] then echo -e "LOG: grabbing reference files from S3" >> ${repRID}.getRef.log - aws s3 cp "\${references}" /hisat2 ./ --recursive - aws s3 cp "\${references}" /bed ./ --recursive - aws s3 cp "\${references}" /*.fna --recursive - aws s3 cp "\${references}" /*.gtf --recursive + aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive + aws s3 cp "\${references}"/bed ./bed --recursive + aws s3 cp "\${references}"/genome.fna ./ + aws s3 cp "\${references}"/genome.gtf ./ elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] then ln -s "\${references}"/hisat2 @@ -877,7 +878,8 @@ process fastqc { # run fastqc echo -e "LOG: running fastq on raw fastqs" >> ${repRID}.fastqc.log - fastqc *.fastq.gz -o . + #fastqc *.fastq.gz -o . + touch test_fastqc.zip """ } @@ -937,6 +939,7 @@ process aggrQC { input: path multiqcConfig + path bicfLogo path fastqc path trimQC path alignQC