Skip to content
Snippets Groups Projects
Commit c54dec24 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '63-on_error' into 'develop'

Test failure.

Closes #63

See merge request !47
parents c114a629 14154ff6
Branches
Tags
2 merge requests!58Develop,!47Test failure.
Pipeline #8331 passed with stages
in 1 minute and 53 seconds
...@@ -130,7 +130,7 @@ dedupData: ...@@ -130,7 +130,7 @@ dedupData:
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai - singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
- > - >
for i in {"chr8","chr4","chrY"}; do for i in {"chr8","chr4","chrY"}; do
echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;"; echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
- pytest -m dedupData - pytest -m dedupData
...@@ -145,7 +145,7 @@ countData: ...@@ -145,7 +145,7 @@ countData:
script: script:
- ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv
- ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv - ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv
- singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
- assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)') - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
...@@ -283,7 +283,7 @@ integration_se: ...@@ -283,7 +283,7 @@ integration_se:
script: script:
- hostname - hostname
- ulimit -a - ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 -with-dag dag.png --ci true - nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 -with-dag dag.png --ci true --email 'venkat.malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'
- find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \; - find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
artifacts: artifacts:
name: "$CI_JOB_NAME" name: "$CI_JOB_NAME"
...@@ -366,7 +366,7 @@ override_fastq: ...@@ -366,7 +366,7 @@ override_fastq:
max: 1 max: 1
when: when:
- always - always
override_species: override_species:
stage: integration stage: integration
only: [merge_requests] only: [merge_requests]
...@@ -388,7 +388,7 @@ override_species: ...@@ -388,7 +388,7 @@ override_species:
max: 1 max: 1
when: when:
- always - always
consistency: consistency:
stage: consistency stage: consistency
...@@ -413,4 +413,4 @@ consistency: ...@@ -413,4 +413,4 @@ consistency:
- assignedPE.txt - assignedPE.txt
- assignedExpectSE.txt - assignedExpectSE.txt
- assignedExpectPE.txt - assignedExpectPE.txt
expire_in: 7 days expire_in: 7 days
\ No newline at end of file
# v0.0.4 (in development) # v0.0.4 (in development)
**User Facing** **User Facing**
* Add option to pull references from datahub * Add option to pull references from datahub
* Add option to send email on workflow error, with pipeline error message
**Background** **Background**
* Remove (comment out) option to pull references from S3 * Remove (comment out) option to pull references from S3
......
...@@ -43,6 +43,9 @@ To Run: ...@@ -43,6 +43,9 @@ To Run:
* **biohpc_max** = process on high power BioHPC cluster nodes (=> 128GB nodes), for resource testing * **biohpc_max** = process on high power BioHPC cluster nodes (=> 128GB nodes), for resource testing
* **aws_ondemand** = AWS Batch on-demand instant requests * **aws_ondemand** = AWS Batch on-demand instant requests
* **aws_spot** = AWS Batch spot instance requests * **aws_spot** = AWS Batch spot instance requests
* `--email` email address(es) to send failure notification (comma separated) ***(optional)***:
* e.g: `--email 'venkat.malladi@utsouthwestern.edu,Gervaise.Henry@UTSouthwestern.edu'`
* NOTES: * NOTES:
* once deriva-auth is run and authenticated, the two files above are saved in ```~/.deriva/``` (see official documents from [deriva](https://github.com/informatics-isi-edu/deriva-client#installer-packages-for-windows-and-macosx) on the lifetime of the credentials) * once deriva-auth is run and authenticated, the two files above are saved in ```~/.deriva/``` (see official documents from [deriva](https://github.com/informatics-isi-edu/deriva-client#installer-packages-for-windows-and-macosx) on the lifetime of the credentials)
* reference version consists of Genome Reference Consortium version, patch release and GENCODE annotation release # (leaving the params blank will use the default version tied to the pipeline version) * reference version consists of Genome Reference Consortium version, patch release and GENCODE annotation release # (leaving the params blank will use the default version tied to the pipeline version)
...@@ -126,4 +129,4 @@ Please cite in publications: Pipeline was developed by BICF from funding provide ...@@ -126,4 +129,4 @@ Please cite in publications: Pipeline was developed by BICF from funding provide
Pipeline Directed Acyclic Graph Pipeline Directed Acyclic Graph
------------------------------- -------------------------------
![dag](docs/dag.png "DAG") ![dag](docs/dag.png "DAG")
\ No newline at end of file
docs/dag.png

769 KiB | W: | H:

docs/dag.png

733 KiB | W: | H:

docs/dag.png
docs/dag.png
docs/dag.png
docs/dag.png
  • 2-up
  • Swipe
  • Onion skin
...@@ -82,7 +82,7 @@ timeline { ...@@ -82,7 +82,7 @@ timeline {
enabled = false enabled = false
file = 'timeline.html' file = 'timeline.html'
} }
report { report {
enabled = false enabled = false
file = 'report.html' file = 'report.html'
...@@ -94,6 +94,7 @@ tower { ...@@ -94,6 +94,7 @@ tower {
} }
manifest { manifest {
name = 'gudmap_rbk/rna-seq'
homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq' homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.' description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
mainScript = 'rna-seq.nf' mainScript = 'rna-seq.nf'
......
#!/usr/bin/env nextflow #!/usr/bin/env nextflow
// ######## #### ###### ######## // ######## #### ###### ########
// ## ## ## ## ## ## // ## ## ## ## ## ##
// ## ## ## ## ## // ## ## ## ## ##
// ######## ## ## ###### // ######## ## ## ######
// ## ## ## ## ## // ## ## ## ## ##
// ## ## ## ## ## ## // ## ## ## ## ## ##
// ######## #### ###### ## // ######## #### ###### ##
// Define input variables // Define input variables
params.deriva = "${baseDir}/../test_data/auth/credential.json" params.deriva = "${baseDir}/../test_data/auth/credential.json"
...@@ -18,6 +18,8 @@ params.refMoVersion = "38.p6.vM22" ...@@ -18,6 +18,8 @@ params.refMoVersion = "38.p6.vM22"
params.refHuVersion = "38.p12.v31" params.refHuVersion = "38.p12.v31"
params.refERCCVersion = "92" params.refERCCVersion = "92"
params.outDir = "${baseDir}/../output" params.outDir = "${baseDir}/../output"
params.email = ""
// Define override input variable // Define override input variable
params.refSource = "biohpc" params.refSource = "biohpc"
...@@ -25,6 +27,7 @@ params.inputBagForce = "" ...@@ -25,6 +27,7 @@ params.inputBagForce = ""
params.fastqsForce = "" params.fastqsForce = ""
params.speciesForce = "" params.speciesForce = ""
// Parse input variables // Parse input variables
deriva = Channel deriva = Channel
.fromPath(params.deriva) .fromPath(params.deriva)
...@@ -46,6 +49,7 @@ logsDir = "${outDir}/Logs" ...@@ -46,6 +49,7 @@ logsDir = "${outDir}/Logs"
inputBagForce = params.inputBagForce inputBagForce = params.inputBagForce
fastqsForce = params.fastqsForce fastqsForce = params.fastqsForce
speciesForce = params.speciesForce speciesForce = params.speciesForce
email = params.email
// Define fixed files // Define fixed files
derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json") derivaConfig = Channel.fromPath("${baseDir}/conf/replicate_export_config.json")
...@@ -89,7 +93,7 @@ process trackStart { ...@@ -89,7 +93,7 @@ process trackStart {
""" """
hostname hostname
ulimit -a ulimit -a
curl -H 'Content-Type: application/json' -X PUT -d \ curl -H 'Content-Type: application/json' -X PUT -d \
'{ \ '{ \
"sessionId": "${workflow.sessionId}", \ "sessionId": "${workflow.sessionId}", \
...@@ -199,16 +203,16 @@ process getData { ...@@ -199,16 +203,16 @@ process getData {
mkdir -p ~/.bdbag mkdir -p ~/.bdbag
ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt
echo -e "LOG: linked" >> ${repRID}.getData.log echo -e "LOG: linked" >> ${repRID}.getData.log
# get bag basename # get bag basename
replicate=\$(basename "${inputBag}" | cut -d "." -f1) replicate=\$(basename "${inputBag}" | cut -d "." -f1)
echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log echo -e "LOG: bag replicate name \${replicate}" >> ${repRID}.getData.log
# unzip bag # unzip bag
echo -e "LOG: unzipping replicate bag" >> ${repRID}.getData.log echo -e "LOG: unzipping replicate bag" >> ${repRID}.getData.log
unzip ${inputBag} unzip ${inputBag}
echo -e "LOG: unzipped" >> ${repRID}.getData.log echo -e "LOG: unzipped" >> ${repRID}.getData.log
# bag fetch fastq's only and rename by repRID # bag fetch fastq's only and rename by repRID
echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
sh ${script_bdbagFetch} \${replicate} ${repRID} sh ${script_bdbagFetch} \${replicate} ${repRID}
...@@ -259,7 +263,7 @@ process parseMetadata { ...@@ -259,7 +263,7 @@ process parseMetadata {
# get experiment RID metadata # get experiment RID metadata
exp=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p expRID) exp=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p expRID)
echo -e "LOG: experiment RID metadata parsed: \${exp}" >> ${repRID}.parseMetadata.log echo -e "LOG: experiment RID metadata parsed: \${exp}" >> ${repRID}.parseMetadata.log
# get study RID metadata # get study RID metadata
study=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p studyRID) study=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p studyRID)
echo -e "LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.log echo -e "LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.log
...@@ -267,7 +271,7 @@ process parseMetadata { ...@@ -267,7 +271,7 @@ process parseMetadata {
# get endedness metadata # get endedness metadata
endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p endsMeta) endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p endsMeta)
echo -e "LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.log echo -e "LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.log
# ganually get endness # ganually get endness
endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p endsManual) endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${file}" -p endsManual)
echo -e "LOG: endedness manually detected: \${endsManual}" >> ${repRID}.parseMetadata.log echo -e "LOG: endedness manually detected: \${endsManual}" >> ${repRID}.parseMetadata.log
...@@ -275,11 +279,11 @@ process parseMetadata { ...@@ -275,11 +279,11 @@ process parseMetadata {
# get strandedness metadata # get strandedness metadata
stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p stranded) stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p stranded)
echo -e "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.log echo -e "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.log
# get spike-in metadata # get spike-in metadata
spike=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p spike) spike=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p spike)
echo -e "LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.log echo -e "LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.log
# get species metadata # get species metadata
species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experiment}" -p species) species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experiment}" -p species)
echo -e "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.log echo -e "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.log
...@@ -358,7 +362,7 @@ process trimData { ...@@ -358,7 +362,7 @@ process trimData {
fi fi
echo -e "LOG: trimmed" >> ${repRID}.trimData.log echo -e "LOG: trimmed" >> ${repRID}.trimData.log
echo -e "LOG: average trimmed read length: \${readLength}" >> ${repRID}.trimData.log echo -e "LOG: average trimmed read length: \${readLength}" >> ${repRID}.trimData.log
# save read length file # save read length file
echo -e "\${readLength}" > readLength.csv echo -e "\${readLength}" > readLength.csv
""" """
...@@ -381,7 +385,7 @@ getRefInferInput = referenceInfer.combine(deriva_getRefInfer.combine(script_refD ...@@ -381,7 +385,7 @@ getRefInferInput = referenceInfer.combine(deriva_getRefInfer.combine(script_refD
/* /*
* getRefInfer: dowloads appropriate reference for metadata inference * getRefInfer: dowloads appropriate reference for metadata inference
*/ */
process getRefInfer { process getRefInfer {
tag "${refName}" tag "${refName}"
...@@ -391,7 +395,7 @@ process getRefInfer { ...@@ -391,7 +395,7 @@ process getRefInfer {
output: output:
tuple val (refName), path ("hisat2", type: 'dir'), path ("*.fna"), path ("*.gtf") into refInfer tuple val (refName), path ("hisat2", type: 'dir'), path ("*.fna"), path ("*.gtf") into refInfer
path ("${refName}", type: 'dir') into bedInfer path ("${refName}", type: 'dir') into bedInfer
script: script:
""" """
hostname > ${repRID}.${refName}.getRefInfer.log hostname > ${repRID}.${refName}.getRefInfer.log
...@@ -532,14 +536,14 @@ process alignSampleData { ...@@ -532,14 +536,14 @@ process alignSampleData {
echo -e "LOG: aligning ${ends}" >> ${repRID}.${ref}.alignSampleData.log echo -e "LOG: aligning ${ends}" >> ${repRID}.${ref}.alignSampleData.log
if [ "${ends}" == "se" ] if [ "${ends}" == "se" ]
then then
hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome -U ${fastq1} --summary-file ${ref}.alignSampleSummary.txt --new-summary hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome -U ${fastq1} --summary-file ${ref}.alignSampleSummary.txt --new-summary
elif [ "${ends}" == "pe" ] elif [ "${ends}" == "pe" ]
then then
hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq1} -2 ${fastq2} --summary-file ${ref}.alignSampleSummary.txt --new-summary hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq1} -2 ${fastq2} --summary-file ${ref}.alignSampleSummary.txt --new-summary
fi fi
echo -e "LOG: aliged" >> ${repRID}.${ref}.alignSampleData.log echo -e "LOG: aliged" >> ${repRID}.${ref}.alignSampleData.log
# convert the output sam file to a sorted bam file using Samtools # convert the output sam file to a sorted bam file using Samtools
echo -e "LOG: converting from sam to bam" >> ${repRID}.${ref}.alignSampleData.log echo -e "LOG: converting from sam to bam" >> ${repRID}.${ref}.alignSampleData.log
samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${ref}.sampled.bam ${ref}.sampled.sam samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${ref}.sampled.bam ${ref}.sampled.sam
...@@ -639,7 +643,7 @@ process inferMetadata { ...@@ -639,7 +643,7 @@ process inferMetadata {
ended=`bash inferMeta.sh endness ${repRID}.infer_experiment.txt` ended=`bash inferMeta.sh endness ${repRID}.infer_experiment.txt`
fail=`bash inferMeta.sh fail ${repRID}.infer_experiment.txt` fail=`bash inferMeta.sh fail ${repRID}.infer_experiment.txt`
if [ \${ended} == "PairEnd" ] if [ \${ended} == "PairEnd" ]
then then
ends="pe" ends="pe"
percentF=`bash inferMeta.sh pef ${repRID}.infer_experiment.txt` percentF=`bash inferMeta.sh pef ${repRID}.infer_experiment.txt`
...@@ -728,7 +732,7 @@ process getRef { ...@@ -728,7 +732,7 @@ process getRef {
output: output:
tuple path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv") into reference tuple path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv") into reference
script: script:
""" """
hostname > ${repRID}.getRef.log hostname > ${repRID}.getRef.log
...@@ -847,7 +851,7 @@ process alignData { ...@@ -847,7 +851,7 @@ process alignData {
strandedParam="--rna-strandness R" strandedParam="--rna-strandness R"
elif [ "${stranded}" == "reverse" ] && [ "${ends}" == "pe" ] elif [ "${stranded}" == "reverse" ] && [ "${ends}" == "pe" ]
then then
strandedParam="--rna-strandness RF" strandedParam="--rna-strandness RF"
fi fi
# align the reads with Hisat2 # align the reads with Hisat2
...@@ -860,7 +864,7 @@ process alignData { ...@@ -860,7 +864,7 @@ process alignData {
hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary
fi fi
echo -e "LOG: alignined" >> ${repRID}.align.log echo -e "LOG: alignined" >> ${repRID}.align.log
# convert the output sam file to a sorted bam file using Samtools # convert the output sam file to a sorted bam file using Samtools
echo -e "LOG: converting from sam to bam" >> ${repRID}.align.log echo -e "LOG: converting from sam to bam" >> ${repRID}.align.log
samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam
...@@ -892,7 +896,7 @@ process dedupData { ...@@ -892,7 +896,7 @@ process dedupData {
output: output:
tuple path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam tuple path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam
tuple path ("${repRID}.sorted.deduped.*.bam"), path ("${repRID}.sorted.deduped.*.bam.bai") into dedupChrBam tuple path ("${repRID}.sorted.deduped.*.bam"), path ("${repRID}.sorted.deduped.*.bam.bai") into dedupChrBam
path ("*.deduped.Metrics.txt") into dedupQC path ("*.deduped.Metrics.txt") into dedupQC
script: script:
...@@ -908,7 +912,7 @@ process dedupData { ...@@ -908,7 +912,7 @@ process dedupData {
# sort the bam file using Samtools # sort the bam file using Samtools
echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log
samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam
# index the sorted bam using Samtools # index the sorted bam using Samtools
echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log
samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai
...@@ -1004,7 +1008,7 @@ process countData { ...@@ -1004,7 +1008,7 @@ process countData {
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
fi fi
echo -e "LOG: counted" >> ${repRID}.countData.log echo -e "LOG: counted" >> ${repRID}.countData.log
# extract assigned reads # extract assigned reads
grep -m 1 'Assigned' *.countData.summary | grep -oe '\\([0-9.]*\\)' > assignedReads.csv grep -m 1 'Assigned' *.countData.summary | grep -oe '\\([0-9.]*\\)' > assignedReads.csv
...@@ -1069,12 +1073,12 @@ process dataQC { ...@@ -1069,12 +1073,12 @@ process dataQC {
tuple path (bam), path (bai) from dedupBam_dataQC tuple path (bam), path (bai) from dedupBam_dataQC
tuple path (chrBam), path (chrBai) from dedupChrBam tuple path (chrBam), path (chrBai) from dedupChrBam
val ends from endsInfer_dataQC val ends from endsInfer_dataQC
output: output:
path "${repRID}.tin.hist.tsv" into tinHist path "${repRID}.tin.hist.tsv" into tinHist
path "${repRID}.tin.med.csv" into inferMetadata_tinMed path "${repRID}.tin.med.csv" into inferMetadata_tinMed
path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance
script: script:
""" """
hostname > ${repRID}.dataQC.log hostname > ${repRID}.dataQC.log
...@@ -1179,8 +1183,8 @@ process aggrQC { ...@@ -1179,8 +1183,8 @@ process aggrQC {
echo -e "LOG: creating run table" >> ${repRID}.aggrQC.log echo -e "LOG: creating run table" >> ${repRID}.aggrQC.log
echo -e "Session\tSession ID\tStart Time\tPipeline Version\tInput" > run.tsv echo -e "Session\tSession ID\tStart Time\tPipeline Version\tInput" > run.tsv
echo -e "Session\t${workflow.sessionId}\t${workflow.start}\t${workflow.manifest.version}\t\${input}" >> run.tsv echo -e "Session\t${workflow.sessionId}\t${workflow.start}\t${workflow.manifest.version}\t\${input}" >> run.tsv
# make RID table # make RID table
echo -e "LOG: creating RID table" >> ${repRID}.aggrQC.log echo -e "LOG: creating RID table" >> ${repRID}.aggrQC.log
echo -e "Replicate\tReplicate RID\tExperiment RID\tStudy RID" > rid.tsv echo -e "Replicate\tReplicate RID\tExperiment RID\tStudy RID" > rid.tsv
...@@ -1224,11 +1228,11 @@ process aggrQC { ...@@ -1224,11 +1228,11 @@ process aggrQC {
process outputBag { process outputBag {
tag "${repRID}" tag "${repRID}"
publishDir "${outDir}/outputBag", mode: 'copy', pattern: "Replicate_${repRID}.outputBag.zip" publishDir "${outDir}/outputBag", mode: 'copy', pattern: "Replicate_${repRID}.outputBag.zip"
input: input:
path multiqc path multiqc
path multiqcJSON path multiqcJSON
output: output:
path ("Replicate_*.zip") into outputBag path ("Replicate_*.zip") into outputBag
...@@ -1239,4 +1243,25 @@ process outputBag { ...@@ -1239,4 +1243,25 @@ process outputBag {
cp ${multiqcJSON} Replicate_${repRID}.outputBag cp ${multiqcJSON} Replicate_${repRID}.outputBag
bdbag Replicate_${repRID}.outputBag --archiver zip bdbag Replicate_${repRID}.outputBag --archiver zip
""" """
} }
\ No newline at end of file
workflow.onError = {
subject = "$workflow.manifest.name FAILED: $params.repRID"
def msg = """\
Pipeline error summary
---------------------------
RID : ${params.repRID}
Version : ${workflow.manifest.version}
Duration : ${workflow.duration}
Nf Version : ${workflow.nextflow.version}
Message : ${workflow.errorMessage}
exit status : ${workflow.exitStatus}
"""
.stripIndent()
if (email != '') {
sendMail(to: email, subject: subject , body: msg)
}
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment