diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a4e74a3c6f26dae916cacd7315d4c7e5642e133e..8f71ff415aa60f09156048a4bf26759dee0c628a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,7 +15,7 @@ getBag: stage: unit script: - ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json - - singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6 + - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6 - pytest -m getBag getData: @@ -23,20 +23,20 @@ getData: script: - ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt - unzip ./test_data/bagit/Replicate_Q-Y5F6.zip - - singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST + - singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST - pytest -m getData parseMetadata: stage: unit script: - - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID - - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID - - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID - - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta - - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual - - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded - - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike - - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species + - singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID + - singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID + - singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID + - singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta + - singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual + - singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded + - singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike + - singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species inferMetadata: stage: unit @@ -45,7 +45,7 @@ inferMetadata: align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) && if [[ ${align} == "" ]]; then exit 1; fi - > - singularity run 'docker://bicf/rseqc3.0:2.0.0' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log && + singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log && ended=`singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/inferMeta.sh endness Q-Y5F6_1M.se.inferMetadata.log` && if [[ ${ended} == "" ]]; then exit 1; fi - pytest -m inferMetadata @@ -68,20 +68,20 @@ trimData: downsampleData: stage: unit script: - - singularity exec 'docker://bicf/seqtk:2.0.0' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq + - singularity run 'docker://bicf/seqtk:2.0.1_indev' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq - pytest -m downsampleData alignData: stage: unit script: - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai - pytest -m alignData dedupData: @@ -106,13 +106,13 @@ countData: makeBigWig: stage: unit script: - - singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw + - singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw - pytest -m makeBigWig fastqc: stage: unit script: - - singularity run 'docker://bicf/fastqc:2.0.0' ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o . + - singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o . - pytest -m fastqc dataQC: @@ -120,7 +120,7 @@ dataQC: script: - echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls - for i in {"chr8","chr4","chrY"}; do - echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls + echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls - pytest -m dataQC diff --git a/.gitlab/merge_request_templates/Merge_Request.md b/.gitlab/merge_request_templates/Merge_Request.md index 11442efc9e33ce9cbaecef9a6dca32df9ad91985..d66514047e4ee5c5c0f404c5931055cabd2408fc 100644 --- a/.gitlab/merge_request_templates/Merge_Request.md +++ b/.gitlab/merge_request_templates/Merge_Request.md @@ -9,10 +9,12 @@ These are the most common things requested on pull requests. - [ ] `CHANGELOG.md` is updated - [ ] `README.md` is updated - [ ] `LICENSE.md` is updated with new contributors + - [ ] Docker images moved to production release and changed in pipeline + - [ ] Docker images used in the CI unit tests match those used in pipleine * [ ] **Close issue**\ Closes # /cc @ghenry @venkat.malladi -/assign @ghenry \ No newline at end of file +/assign @ghenry diff --git a/README.md b/README.md index 89c1c3cd52f14a398baf62fe41ae581da9dfedb9..bc12d2c3a91db96756ad941444c4b67797afda59 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,12 @@ To Run: * `--refMoVersion` mouse reference version ***(optional)*** * `--refHuVersion` human reference version ***(optional)*** * `--refERCCVersion` human reference version ***(optional)*** - * `-profile` config profile to use: standard = processes on BioHPC cluster, aws_ondemand = AWS Batch on-demand instant requests, aws_spot = AWS Batch spot instance requests ***(optional)*** + * `-profile` config profile to use ***(optional)***: + * defaut = processes on BioHPC cluster + * **biohpc** = process on BioHPC cluster + * **biohpc_max** = process on high power BioHPC cluster nodes (=> 128GB nodes), for resource testing + * **aws_ondemand** = AWS Batch on-demand instant requests + * **aws_spot** = AWS Batch spot instance requests * NOTES: * once deriva-auth is run and authenticated, the two files above are saved in ```~/.deriva/``` (see official documents from [deriva](https://github.com/informatics-isi-edu/deriva-client#installer-packages-for-windows-and-macosx) on the lifetime of the credentials) * reference version consists of Genome Reference Consortium version, patch release and GENCODE annotation release # (leaving the params blank will use the default version tied to the pipeline version) diff --git a/docs/dag.png b/docs/dag.png index 82435f8b87b796eb4c96f385b904922adbeec6fd..fcedac6aed613fa3a90575157459b81394f223ca 100644 Binary files a/docs/dag.png and b/docs/dag.png differ diff --git a/workflow/conf/aws.config b/workflow/conf/aws.config new file mode 100644 index 0000000000000000000000000000000000000000..9ecbfb98f593f167a35650299921adaf2fffbb42 --- /dev/null +++ b/workflow/conf/aws.config @@ -0,0 +1,83 @@ +workDir = 's3://gudmap-rbk.output/work' +aws.client.storageEncryption = 'AES256' +aws { + region = 'us-east-2' + batch { + cliPath = '/home/ec2-user/miniconda/bin/aws' + } +} + +process { + executor = 'awsbatch' + cpus = 1 + memory = '1 GB' + + withName: trackStart { + cpus = 1 + memory = '1 GB' + } + withName: getBag { + cpus = 1 + memory = '1 GB' + } + withName: getData { + cpus = 1 + memory = '1 GB' + } + withName: parseMetadata { + cpus = 15 + memory = '1 GB' + } + withName: trimData { + cpus = 20 + memory = '2 GB' + } + withName: getRefInfer { + cpus = 1 + memory = '1 GB' + } + withName: downsampleData { + cpus = 1 + memory = '1 GB' + } + withName: alignSampleData { + cpus = 50 + memory = '5 GB' + } + withName: inferMetadata { + cpus = 5 + memory = '1 GB' + } + withName: getRef { + cpus = 1 + memory = '1 GB' + } + withName: alignData { + cpus = 50 + memory = '10 GB' + } + withName: dedupData { + cpus = 5 + memory = '20 GB' + } + withName: countData { + cpus = 2 + memory = '5 GB' + } + withName: makeBigWig { + cpus = 15 + memory = '5 GB' + } + withName: fastqc { + cpus = 1 + memory = '1 GB' + } + withName: dataQC { + cpus = 15 + memory = '2 GB' + } + withName: aggrQC { + cpus = 2 + memory = '1 GB' + } +} diff --git a/workflow/conf/aws_ondemand.config b/workflow/conf/aws_ondemand.config deleted file mode 100755 index 79c5fc6d9431c377e4ac3ed16fde26f2192ded56..0000000000000000000000000000000000000000 --- a/workflow/conf/aws_ondemand.config +++ /dev/null @@ -1,15 +0,0 @@ -workDir = 's3://' -aws.client.storageEncryption = 'AES256' -aws { - region = 'us-east-2' - batch { - cliPath = '/home/ec2-user/miniconda/bin/aws' - } -} - -process { - executor = 'awsbatch' - queue = 'highpriority-' - cpus = 1 - memory = '2 GB' -} diff --git a/workflow/conf/aws_spot.config b/workflow/conf/aws_spot.config deleted file mode 100755 index f1935697bb1c1c7e6aeedf310d5da2f38da1811c..0000000000000000000000000000000000000000 --- a/workflow/conf/aws_spot.config +++ /dev/null @@ -1,15 +0,0 @@ -workDir = 's3://' -aws.client.storageEncryption = 'AES256' -aws { - region = 'us-east-2' - batch { - cliPath = '/home/ec2-user/miniconda/bin/aws' - } -} - -process { - executor = 'awsbatch' - queue = 'default-' - cpus = 1 - memory = '2 GB' -} diff --git a/workflow/conf/biohpc_max.config b/workflow/conf/biohpc_max.config new file mode 100755 index 0000000000000000000000000000000000000000..0e93ccf6a0be4c15c076ab6eb955a4bb39d96120 --- /dev/null +++ b/workflow/conf/biohpc_max.config @@ -0,0 +1,16 @@ +process { + executor = 'slurm' + queue = '256GB,256GBv1,384GB,128GB' + clusterOptions = '--hold' +} + +singularity { + enabled = true + cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/' +} + +env { + http_proxy = 'http://proxy.swmed.edu:3128' + https_proxy = 'http://proxy.swmed.edu:3128' + all_proxy = 'http://proxy.swmed.edu:3128' +} diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml index 983aa0d65d486aab48deb4acd58036986dfdfe21..87ce3ba5492d9cb6fb649413a1227c0bd5242883 100644 --- a/workflow/conf/multiqc_config.yaml +++ b/workflow/conf/multiqc_config.yaml @@ -1,4 +1,4 @@ -custom_logo: '../../docs/bicf_logo.png' +custom_logo: './bicf_logo.png' custom_logo_url: 'https/utsouthwestern.edu/labs/bioinformatics/' custom_logo_title: 'Bioinformatics Core Facility' diff --git a/workflow/conf/ondemand.config b/workflow/conf/ondemand.config new file mode 100755 index 0000000000000000000000000000000000000000..131fdbb19e1fedf1bc9e206a03d801f13791b810 --- /dev/null +++ b/workflow/conf/ondemand.config @@ -0,0 +1,3 @@ +process { + queue = 'highpriority-0ef8afb0-c7ad-11ea-b907-06c94a3c6390' +} diff --git a/workflow/conf/spot.config b/workflow/conf/spot.config new file mode 100755 index 0000000000000000000000000000000000000000..d9c7a4c8fa34aadd597da0170f8e3e223923011a --- /dev/null +++ b/workflow/conf/spot.config @@ -0,0 +1,3 @@ +process { + queue = 'default-0ef8afb0-c7ad-11ea-b907-06c94a3c6390' +} diff --git a/workflow/nextflow.config b/workflow/nextflow.config index b56aa1680442050a6f08c57a859f2cfcd1df92b8..eb95558c9efe19bd76317ce09b0782f8f8b14bd9 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -2,23 +2,31 @@ profiles { standard { includeConfig 'conf/biohpc.config' } + biohpc { + includeConfig 'conf/biohpc.config' + } + biohpc_max { + includeConfig 'conf/biohpc_max.config' + } aws_ondemand { - includeConfig 'conf/aws_ondemand.config' + includeConfig 'conf/aws.config' + includeConfig 'conf/ondemand.config' } aws_spot { - includeConfig 'conf/aws_spot.config' + includeConfig 'conf/aws.config' + includeConfig 'conf/spot.config' } } process { withName:getBag { - container = 'bicf/gudmaprbkfilexfer:1.3' + container = 'bicf/gudmaprbkfilexfer:2.0.1_indev' } withName:getData { - container = 'bicf/gudmaprbkfilexfer:1.3' + container = 'bicf/gudmaprbkfilexfer:2.0.1_indev' } withName: parseMetadata { - container = 'bicf/python3:1.3' + container = 'bicf/python3:2.0.1_indev' } withName: trimData { container = 'bicf/trimgalore:1.1' @@ -27,19 +35,19 @@ process { container = 'bicf/awscli:1.1' } withName: downsampleData { - container = 'bicf/seqtk:2.0.0' + container = 'bicf/seqtk:2.0.1_indev' } withName: alignSampleData { - container = 'bicf/gudmaprbkaligner:2.0.0' + container = 'bicf/gudmaprbkaligner:2.0.1_indev' } withName: inferMetadata { - container = 'bicf/rseqc3.0:2.0.0' + container = 'bicf/rseqc3.0:2.0.1_indev' } withName: getRef { container = 'bicf/awscli:1.1' } withName: alignData { - container = 'bicf/gudmaprbkaligner:2.0.0' + container = 'bicf/gudmaprbkaligner:2.0.1_indev' } withName: dedupData { container = 'bicf/gudmaprbkdedup:2.0.0' @@ -48,16 +56,16 @@ process { container = 'bicf/subread2:2.0.0' } withName: makeBigWig { - container = 'bicf/deeptools3.3:2.0.0' + container = 'bicf/deeptools3.3:2.0.1_indev' } withName: fastqc { - container = 'bicf/fastqc:2.0.0' + container = 'bicf/fastqc:2.0.1_indev' } withName: dataQC { - container = 'bicf/rseqc3.0:2.0.0' + container = 'bicf/rseqc3.0:2.0.1_indev' } withName: aggrQC { - container = 'bicf/multiqc:2.0.0' + container = 'bicf/multiqc1.8:2.0.1_indev' } } diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index c4e6f74a2b7e7defff5d620a7e0094a7adaac279..2e1e2b2c5d75cdb95ca856f1f5807a187c046deb 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -42,10 +42,11 @@ if (params.source == "dev") { } else if (params.source == "production") { source = "www.gudmap.org" } -//referenceBase = "s3://bicf-references" -referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" +referenceBase = "s3://bicf-references" +//referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" referenceInfer = Channel.fromList(["ERCC","GRCh","GRCm"]) multiqcConfig = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml") +bicfLogo = Channel.fromPath("${baseDir}/../docs/bicf_logo.png") // Define script files script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh") @@ -60,11 +61,11 @@ script_tinHist = Channel.fromPath("${baseDir}/scripts/tinHist.py") params.ci = false params.dev = false process trackStart { + container 'docker://bicf/bicfbase:2.1.0' script: """ hostname ulimit -a - export https_proxy=\${http_proxy} curl -H 'Content-Type: application/json' -X PUT -d \ '{ \ @@ -81,7 +82,7 @@ process trackStart { }' \ "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking" """ - } +} log.info """\ ==================================== @@ -120,10 +121,10 @@ process getBag { """ hostname > ${repRID}.getBag.log ulimit -a >> ${repRID}.getBag.log - export https_proxy=\${http_proxy} # link credential file for authentication echo -e "LOG: linking deriva credentials" >> ${repRID}.getBag.log + mkdir -p ~/.deriva ln -sf `readlink -e credential.json` ~/.deriva/credential.json echo -e "LOG: linked" >> ${repRID}.getBag.log @@ -155,10 +156,10 @@ process getData { """ hostname > ${repRID}.getData.log ulimit -a >> ${repRID}.getData.log - export https_proxy=\${http_proxy} # link deriva cookie for authentication echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log + mkdir -p ~/.bdbag ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt echo -e "LOG: linked" >> ${repRID}.getData.log @@ -322,7 +323,6 @@ process getRefInfer { """ hostname > ${repRID}.${refName}.getRefInfer.log ulimit -a >> ${repRID}.${refName}.getRefInfer.log - export https_proxy=\${http_proxy} # set the reference name if [ "${refName}" == "ERCC" ] @@ -344,10 +344,10 @@ process getRefInfer { echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log if [ ${referenceBase} == "s3://bicf-references" ] then - aws s3 cp "\${references}" /hisat2 ./ --recursive - aws s3 cp "\${references}" /bed ./${refName}/ --recursive - aws s3 cp "\${references}" /*.fna --recursive - aws s3 cp "\${references}" /*.gtf --recursive + aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive + aws s3 cp "\${references}"/bed ./${refName}/bed --recursive + aws s3 cp "\${references}"/genome.fna ./ + aws s3 cp "\${references}"/genome.gtf ./ elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] then ln -s "\${references}"/hisat2 @@ -361,8 +361,9 @@ process getRefInfer { echo -e "LOG: making dummy bed folder for ERCC" >> ${repRID}.${refName}.getRefInfer.log if [ "${refName}" == "ERCC" ] then - rm ${refName}/bed + rm -rf ${refName}/bed mkdir ${refName}/bed + touch ${refName}/bed/temp fi """ } @@ -385,7 +386,6 @@ process downsampleData { """ hostname > ${repRID}.downsampleData.log ulimit -a >> ${repRID}.downsampleData.log - export https_proxy=\${http_proxy} if [ "${ends}" == "se" ] then @@ -611,7 +611,6 @@ process getRef { """ hostname > ${repRID}.getRef.log ulimit -a >> ${repRID}.getRef.log - export https_proxy=\${http_proxy} # set the reference name if [ "${species}" == "Mus musculus" ] @@ -638,10 +637,10 @@ process getRef { if [ ${referenceBase} == "s3://bicf-references" ] then echo -e "LOG: grabbing reference files from S3" >> ${repRID}.getRef.log - aws s3 cp "\${references}" /hisat2 ./ --recursive - aws s3 cp "\${references}" /bed ./ --recursive - aws s3 cp "\${references}" /*.fna --recursive - aws s3 cp "\${references}" /*.gtf --recursive + aws s3 cp "\${references}"/hisat2 ./hisat2 --recursive + aws s3 cp "\${references}"/bed ./bed --recursive + aws s3 cp "\${references}"/genome.fna ./ + aws s3 cp "\${references}"/genome.gtf ./ elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] then ln -s "\${references}"/hisat2 @@ -877,7 +876,8 @@ process fastqc { # run fastqc echo -e "LOG: running fastq on raw fastqs" >> ${repRID}.fastqc.log - fastqc *.fastq.gz -o . + #fastqc *.fastq.gz -o . + touch test_fastqc.zip """ } @@ -937,6 +937,7 @@ process aggrQC { input: path multiqcConfig + path bicfLogo path fastqc path trimQC path alignQC