From a6562099e16d6c37741123b3ea4f48158db71f87 Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Thu, 17 Dec 2020 10:55:49 -0600 Subject: [PATCH] Change processed file ouputs to use _ as separator for custom txt --- workflow/rna-seq.nf | 28 ++++++++++++++-------------- workflow/scripts/calculateTPM.R | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index d875803..67dfe6e 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -939,8 +939,8 @@ process dedupData { tuple path (bam), path (bai) from rawBam_dedupData output: - tuple path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam - tuple path ("${repRID}.sorted.deduped.*.bam"), path ("${repRID}.sorted.deduped.*.bam.bai") into dedupChrBam + tuple path ("${repRID}_sorted.deduped.bam"), path ("${repRID}_sorted.deduped.bam.bai") into dedupBam + tuple path ("${repRID}_sorted.deduped.*.bam"), path ("${repRID}_sorted.deduped.*.bam.bai") into dedupChrBam path ("*.deduped.Metrics.txt") into dedupQC script: @@ -955,16 +955,16 @@ process dedupData { # sort the bam file using Samtools echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log - samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam + samtools sort -@ `nproc` -O BAM -o ${repRID}_sorted.deduped.bam ${repRID}.deduped.bam # index the sorted bam using Samtools echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log - samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai + samtools index -@ `nproc` -b ${repRID}_sorted.deduped.bam ${repRID}_sorted.deduped.bam.bai # split the deduped BAM file for multi-threaded tin calculation - for i in `samtools view ${repRID}.sorted.deduped.bam | cut -f3 | sort | uniq`; + for i in `samtools view ${repRID}_sorted.deduped.bam | cut -f3 | sort | uniq`; do - echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}.sorted.deduped.bam \${i} 1>> ${repRID}.sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}.sorted.deduped.\${i}.bam ${repRID}.sorted.deduped.\${i}.bam.bai" + echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}_sorted.deduped.bam \${i} 1>> ${repRID}_sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}_sorted.deduped.\${i}.bam ${repRID}_sorted.deduped.\${i}.bam.bai" done | parallel -j `nproc` -k """ } @@ -997,7 +997,7 @@ process makeBigWig { # create bigwig echo -e "LOG: creating bibWig" >> ${repRID}.makeBigWig.log - bamCoverage -p `nproc` -b ${bam} -o ${repRID}.bw + bamCoverage -p `nproc` -b ${bam} -o ${repRID}_sorted.deduped.bw echo -e "LOG: created" >> ${repRID}.makeBigWig.log """ } @@ -1007,7 +1007,7 @@ process makeBigWig { */ process countData { tag "${repRID}" - publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*.tpmTable.csv" + publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*_tpmTable.csv" input: path script_calculateTPM @@ -1018,7 +1018,7 @@ process countData { val stranded from strandedInfer_countData output: - path ("*.tpmTable.csv") into counts + path ("*_tpmTable.csv") into counts path ("*.countData.summary") into countsQC path ("assignedReads.csv") into assignedReadsInfer_fl @@ -1047,10 +1047,10 @@ process countData { echo -e "LOG: counting ${ends} features" >> ${repRID}.countData.log if [ "${ends}" == "se" ] then - featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam + featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}_countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}_sorted.deduped.bam elif [ "${ends}" == "pe" ] then - featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam + featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}_countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}_sorted.deduped.bam fi echo -e "LOG: counted" >> ${repRID}.countData.log @@ -1142,10 +1142,10 @@ process dataQC { ulimit -a >> ${repRID}.dataQC.log # calcualte TIN values per feature on each chromosome - echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}.sorted.deduped.tin.xls + echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}_sorted.deduped.tin.xls for i in `cat ./bed/genome.bed | cut -f1 | sort | uniq`; do - echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}.sorted.deduped.\${i}.bam -r ./bed/genome.bed; cat ${repRID}.sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";"; - done | parallel -j `nproc` -k 1>> ${repRID}.sorted.deduped.tin.xls + echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}_sorted.deduped.\${i}.bam -r ./bed/genome.bed; cat ${repRID}_sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";"; + done | parallel -j `nproc` -k 1>> ${repRID}_sorted.deduped.tin.xls # bin TIN values echo -e "LOG: binning TINs" >> ${repRID}.dataQC.log diff --git a/workflow/scripts/calculateTPM.R b/workflow/scripts/calculateTPM.R index a26bf94..9a163c4 100644 --- a/workflow/scripts/calculateTPM.R +++ b/workflow/scripts/calculateTPM.R @@ -30,4 +30,4 @@ tpm <- rpk/scale output <- cbind(count,tpm) colnames(output)[7] <- "count" -write.table(output,file=paste0(repRID,".countTable.csv"),sep=",",row.names=FALSE,quote=FALSE) +write.table(output,file=paste0(repRID,"_countTable.csv"),sep=",",row.names=FALSE,quote=FALSE) -- GitLab