diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 10b06f97a6a83b23bdb1ac01e2ce46e975def353..6e1028aba7a0d2969e4766b210f1c13c30d0c538 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -51,11 +51,11 @@ trimData: alignData: stage: unit script: - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p `nproc` --add-chrname --un-gz Q-Y5JA_1M.se.unal.gz -S Q-Y5JA_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p `nproc` --add-chrname --un-gz Q-Y5JA_1M.se.unal.gz -S Q-Y5JA_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o Q-Y5JA_1M.se.bam Q-Y5JA_1M.se.sam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ `nproc` -O BAM -o Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.bam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ `nproc` -b Q-Y5JA_1M.se.sorted.bam Q-Y5JA_1M.se.sorted.bai - - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p `nproc` --add-chrname --un-gz Q-Y5JA_1M.pe.unal.gz -S Q-Y5JA_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5JA_1M_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5JA_1M_R2_val_2.fq.gz + - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p `nproc` --add-chrname --un-gz Q-Y5JA_1M.pe.unal.gz -S Q-Y5JA_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5JA_1M_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5JA_1M_R2_val_2.fq.gz - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o Q-Y5JA_1M.pe.bam Q-Y5JA_1M.pe.sam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ `nproc` -O BAM -o Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.bam - singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ `nproc` -b Q-Y5JA_1M.pe.sorted.bam Q-Y5JA_1M.pe.sorted.bai @@ -64,8 +64,16 @@ alignData: dedupData: stage: unit script: - - singularity exec 'docker://bicf/picard2.21.7:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true + - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true + - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ `nproc` -O BAM -o Q-Y5JA_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5JA_1M.se.deduped.bam + - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ `nproc` -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam Q-Y5JA_1M.se.sorted.deduped.bai - pytest -m dedupData + +makeBigWig: + stage: unit + script: + - singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p `nproc` -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam -o Q-Y5JA_1M.se.bw + - pytest -m makeBigWig fastqc: stage: unit diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 56e7e6aa9abb7b1ce87fbc6cf9857c0460f140d5..a6f5fba4f7f6600733c0accce8d853fefb1fce63 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -27,6 +27,9 @@ process { withName: fastqc { queue = 'super' } + withName: makeBigWig { + queue = 'super' + } } singularity { diff --git a/workflow/nextflow.config b/workflow/nextflow.config index 02c71af5c58998839c270f78aadfdfd84696dfa3..eee32fa6f754c55b6b8051eedee646eebf4066c2 100644 --- a/workflow/nextflow.config +++ b/workflow/nextflow.config @@ -30,11 +30,14 @@ process { container = 'bicf/gudmaprbkaligner:2.0.0' } withName: dedupData { - container = 'bicf/picard2.21.7:2.0.0' + container = 'bicf/gudmaprbkdedup:2.0.0' } withName: fastqc { container = 'bicf/fastqc:2.0.0' } + withName: makeBigWig { + container = 'bicf/deeptools3.3:2.0.0' + } } trace { @@ -64,4 +67,4 @@ manifest { mainScript = 'rna-seq.nf' version = 'v0.0.1_indev' nextflowVersion = '>=19.09.0' -} \ No newline at end of file +} diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 0fd3ee0d872768d1d7d80c0be6bcc0cbafd105e0..28770ca170ed49a1524122852898337286c6aecb 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -1,13 +1,20 @@ #!/usr/bin/env nextflow +// ######## #### ###### ######## +// ## ## ## ## ## ## +// ## ## ## ## ## +// ######## ## ## ###### +// ## ## ## ## ## +// ## ## ## ## ## ## +// ######## #### ###### ## + // Define input variables params.deriva = "${baseDir}/../test_data/auth/credential.json" params.bdbag = "${baseDir}/../test_data/auth/cookies.txt" //params.repRID = "16-1ZX4" params.repRID = "Q-Y5JA" -params.refVersion = "0.0.1" -params.refMuVersion = "38.P6" -params.refHuVersion = "38.p12" +params.refMoVersion = "38.p6.vM22" +params.refHuVersion = "38.p12.v31" params.outDir = "${baseDir}/../output" // Parse input variables @@ -18,15 +25,15 @@ bdbag = Channel .fromPath(params.bdbag) .ifEmpty { exit 1, "deriva cookie file for bdbag not found: ${params.bdbag}" } repRID = params.repRID -refVersion = params.refVersion -refMuVersion = params.refMuVersion +refMoVersion = params.refMoVersion refHuVersion = params.refHuVersion outDir = params.outDir logsDir = "${outDir}/Logs" // Define fixed files derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json") -referenceBase = "s3://bicf-references" +//referenceBase = "s3://bicf-references" +referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references" // Define script files script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh") @@ -186,9 +193,11 @@ stranded.into { } spike.into{ spike_getRef + spike_rseqc } species.into { species_getRef + species_rseqc } /* @@ -199,10 +208,6 @@ process getRef { publishDir "${logsDir}", mode: "copy", pattern: "*.getRef.err" input: - val referenceBase - val refVersion - val refMuVersion - val refHuVersion val spike_getRef val species_getRef @@ -215,13 +220,13 @@ process getRef { ulimit -a >>${repRID}.getRef.err export https_proxy=\${http_proxy} - # retreive appropriate reference from S3 bucket + # run set the reference name if [ "${species_getRef}" == "Mus musculus" ] then - references=\$(echo ${referenceBase}/mouse/${refVersion}/GRCm${refMuVersion}) + references=\$(echo ${referenceBase}/GRCm${refMoVersion}) elif [ '${species_getRef}' == "Homo sapiens" ] then - references=\$(echo ${referenceBase}/human/${refVersion}/GRCh${refHuVersion}) + references=\$(echo ${referenceBase}/GRCh${refHuVersion}) else exit 1 fi @@ -232,7 +237,17 @@ process getRef { then reference=\$(echo \${references}/) fi - aws s3 cp "\${references}" ./ --recursive >>${repRID}.getRef.err + + # retreive appropriate reference appropriate location + if [ ${referenceBase} == "s3://bicf-references" ] + then + aws s3 cp "\${references}" /hisat2 ./ --recursive >>${repRID}.getRef.err + aws s3 cp "\${references}" /bed ./ --recursive >>${repRID}.getRef.err + elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] + then + cp -R "\${references}"/hisat2 ./ >>${repRID}.getRef.err + cp -R "\${references}"/bed ./ >>${repRID}.getRef.err + fi """ } @@ -268,6 +283,11 @@ process trimData { """ } +reference.into { + reference_alignData + reference_rseqc +} + /* * alignData: aligns the reads to a reference database */ @@ -279,7 +299,7 @@ process alignData { val endsManual_alignData val stranded_alignData path fastq from fastqs_trimmed - path reference + path reference_alignData output: path ("${repRID}.sorted.bam") into rawBam @@ -320,7 +340,7 @@ process dedupData { path rawBam output: - path ("${repRID}.deduped.bam") into dedupBam + tuple val ("${repRID}"), path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bai") into dedupBam path ("${repRID}.dedup.out") path ("${repRID}.dedup.err") @@ -331,6 +351,8 @@ process dedupData { # remove duplicated reads java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err + samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err + samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bai 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err """ } @@ -356,4 +378,23 @@ process fastqc { # run fastqc fastqc *.fastq.gz -o . >>${repRID}.fastqc.err """ -} \ No newline at end of file +} + +/* + *Make BigWig files for later processes +*/ +process makeBigWig { + tag "${repRID}" + publishDir "${logsDir}", mode: 'copy', pattern: "*.makeBigWig.err" + + input: + set val (repRID), path (inBam), path (inBai) from dedupBam + + output: + path ("${repRID}.bw") + + script: + """ + bamCoverage -p `nproc` -b ${inBam} -o ${repRID}.bw + """ +} diff --git a/workflow/tests/test_makeBigWig.py b/workflow/tests/test_makeBigWig.py new file mode 100644 index 0000000000000000000000000000000000000000..198b5d2d4cdecd9013e08fbe3eae9c9b7303ef1a --- /dev/null +++ b/workflow/tests/test_makeBigWig.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 + +import pytest +import pandas as pd +import os +import utils + +data_output_path = os.path.dirname(os.path.abspath(__file__)) + \ + '/../../' + + +@pytest.mark.makeBigWig +def test_makeBigWig(): + assert os.path.exists(os.path.join(data_output_path, 'Q-Y5JA_1M.se.bw'))