From b8e8774b9a3082cd90e06b39a93eddde0d69d1f0 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Sat, 31 Oct 2020 15:14:13 -0500 Subject: [PATCH] Test failure. --- workflow/rna-seq.nf | 82 ++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index a940778..3099e99 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -1,12 +1,12 @@ #!/usr/bin/env nextflow -// ######## #### ###### ######## -// ## ## ## ## ## ## -// ## ## ## ## ## -// ######## ## ## ###### -// ## ## ## ## ## -// ## ## ## ## ## ## -// ######## #### ###### ## +// ######## #### ###### ######## +// ## ## ## ## ## ## +// ## ## ## ## ## +// ######## ## ## ###### +// ## ## ## ## ## +// ## ## ## ## ## ## +// ######## #### ###### ## // Define input variables params.deriva = "${baseDir}/../test_data/auth/credential.json" @@ -89,7 +89,7 @@ process trackStart { """ hostname ulimit -a - + curl -H 'Content-Type: application/json' -X PUT -d \ '{ \ "sessionId": "${workflow.sessionId}", \ @@ -199,16 +199,16 @@ process getData { mkdir -p ~/.bdbag ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt echo -e "LOG: linked" >> ${repRID}.getData.log - + # get bag basename replicate=\$(basename "${inputBag}" | cut -d "." -f1) echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log - + # unzip bag echo -e "LOG: unzipping replicate bag" >> ${repRID}.getData.log unzip ${inputBag} echo -e "LOG: unzipped" >> ${repRID}.getData.log - + # bag fetch fastq's only and rename by repRID echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log sh ${script_bdbagFetch} \${replicate} ${repRID} @@ -259,7 +259,7 @@ process parseMetadata { # get experiment RID metadata exp=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p expRID) echo -e "LOG: experiment RID metadata parsed: \${exp}" >> ${repRID}.parseMetadata.log - + # get study RID metadata study=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p studyRID) echo -e "LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.log @@ -267,7 +267,7 @@ process parseMetadata { # get endedness metadata endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p endsMeta) echo -e "LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.log - + # ganually get endness endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p endsManual) echo -e "LOG: endedness manually detected: \${endsManual}" >> ${repRID}.parseMetadata.log @@ -275,11 +275,11 @@ process parseMetadata { # get strandedness metadata stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p stranded) echo -e "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.log - + # get spike-in metadata spike=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p spike) echo -e "LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.log - + # get species metadata species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experiment}" -p species) echo -e "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.log @@ -358,7 +358,7 @@ process trimData { fi echo -e "LOG: trimmed" >> ${repRID}.trimData.log echo -e "LOG: average trimmed read length: \${readLength}" >> ${repRID}.trimData.log - + # save read length file echo -e "\${readLength}" > readLength.csv """ @@ -381,7 +381,7 @@ getRefInferInput = referenceInfer.combine(deriva_getRefInfer.combine(script_refD /* * getRefInfer: dowloads appropriate reference for metadata inference -*/ +*/ process getRefInfer { tag "${refName}" @@ -391,7 +391,7 @@ process getRefInfer { output: tuple val (refName), path ("hisat2", type: 'dir'), path ("*.fna"), path ("*.gtf") into refInfer path ("${refName}", type: 'dir') into bedInfer - + script: """ hostname > ${repRID}.${refName}.getRefInfer.log @@ -532,14 +532,14 @@ process alignSampleData { echo -e "LOG: aligning ${ends}" >> ${repRID}.${ref}.alignSampleData.log if [ "${ends}" == "se" ] then - + hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome -U ${fastq1} --summary-file ${ref}.alignSampleSummary.txt --new-summary elif [ "${ends}" == "pe" ] then hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq1} -2 ${fastq2} --summary-file ${ref}.alignSampleSummary.txt --new-summary fi echo -e "LOG: aliged" >> ${repRID}.${ref}.alignSampleData.log - + # convert the output sam file to a sorted bam file using Samtools echo -e "LOG: converting from sam to bam" >> ${repRID}.${ref}.alignSampleData.log samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${ref}.sampled.bam ${ref}.sampled.sam @@ -639,7 +639,7 @@ process inferMetadata { ended=`bash inferMeta.sh endness ${repRID}.infer_experiment.txt` fail=`bash inferMeta.sh fail ${repRID}.infer_experiment.txt` - if [ \${ended} == "PairEnd" ] + if [ \${ended} == "PairEnd" ] then ends="pe" percentF=`bash inferMeta.sh pef ${repRID}.infer_experiment.txt` @@ -728,7 +728,7 @@ process getRef { output: tuple path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv") into reference - + script: """ hostname > ${repRID}.getRef.log @@ -847,7 +847,7 @@ process alignData { strandedParam="--rna-strandness R" elif [ "${stranded}" == "reverse" ] && [ "${ends}" == "pe" ] then - strandedParam="--rna-strandness RF" + strandedParam="--rna-strandness RF" fi # align the reads with Hisat2 @@ -860,7 +860,7 @@ process alignData { hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary fi echo -e "LOG: alignined" >> ${repRID}.align.log - + # convert the output sam file to a sorted bam file using Samtools echo -e "LOG: converting from sam to bam" >> ${repRID}.align.log samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam @@ -892,7 +892,7 @@ process dedupData { output: tuple path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam - tuple path ("${repRID}.sorted.deduped.*.bam"), path ("${repRID}.sorted.deduped.*.bam.bai") into dedupChrBam + tuple path ("${repRID}.sorted.deduped.*.bam"), path ("${repRID}.sorted.deduped.*.bam.bai") into dedupChrBam path ("*.deduped.Metrics.txt") into dedupQC script: @@ -908,7 +908,7 @@ process dedupData { # sort the bam file using Samtools echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam - + # index the sorted bam using Samtools echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai @@ -1004,7 +1004,7 @@ process countData { featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam fi echo -e "LOG: counted" >> ${repRID}.countData.log - + # extract assigned reads grep -m 1 'Assigned' *.countData.summary | grep -oe '\\([0-9.]*\\)' > assignedReads.csv @@ -1069,12 +1069,12 @@ process dataQC { tuple path (bam), path (bai) from dedupBam_dataQC tuple path (chrBam), path (chrBai) from dedupChrBam val ends from endsInfer_dataQC - + output: path "${repRID}.tin.hist.tsv" into tinHist path "${repRID}.tin.med.csv" into inferMetadata_tinMed path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance - + script: """ hostname > ${repRID}.dataQC.log @@ -1179,8 +1179,8 @@ process aggrQC { echo -e "LOG: creating run table" >> ${repRID}.aggrQC.log echo -e "Session\tSession ID\tStart Time\tPipeline Version\tInput" > run.tsv echo -e "Session\t${workflow.sessionId}\t${workflow.start}\t${workflow.manifest.version}\t\${input}" >> run.tsv - - + + # make RID table echo -e "LOG: creating RID table" >> ${repRID}.aggrQC.log echo -e "Replicate\tReplicate RID\tExperiment RID\tStudy RID" > rid.tsv @@ -1224,11 +1224,11 @@ process aggrQC { process outputBag { tag "${repRID}" publishDir "${outDir}/outputBag", mode: 'copy', pattern: "Replicate_${repRID}.outputBag.zip" - + input: path multiqc path multiqcJSON - + output: path ("Replicate_*.zip") into outputBag @@ -1239,4 +1239,18 @@ process outputBag { cp ${multiqcJSON} Replicate_${repRID}.outputBag bdbag Replicate_${repRID}.outputBag --archiver zip """ -} \ No newline at end of file +} + +workflow.onError = { + def msg = """\ + + Pipeline error summary + --------------------------- + RID : ${params.repRID} + Message : ${workflow.errorMessage} + exit status : ${workflow.exitStatus} + """ + .stripIndent() + + sendMail(to: 'venkat.malladi@utsouthwestern.edu', subject: 'GUDMAP RNA-seq error execution', body: msg) +} -- GitLab