Skip to content
Snippets Groups Projects
Commit a6562099 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Change processed file ouputs to use _ as separator for custom txt

parent b0eb119b
Branches
Tags
2 merge requests!58Develop,!53Resolve "process_derivaUpload"
...@@ -939,8 +939,8 @@ process dedupData { ...@@ -939,8 +939,8 @@ process dedupData {
tuple path (bam), path (bai) from rawBam_dedupData tuple path (bam), path (bai) from rawBam_dedupData
output: output:
tuple path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam tuple path ("${repRID}_sorted.deduped.bam"), path ("${repRID}_sorted.deduped.bam.bai") into dedupBam
tuple path ("${repRID}.sorted.deduped.*.bam"), path ("${repRID}.sorted.deduped.*.bam.bai") into dedupChrBam tuple path ("${repRID}_sorted.deduped.*.bam"), path ("${repRID}_sorted.deduped.*.bam.bai") into dedupChrBam
path ("*.deduped.Metrics.txt") into dedupQC path ("*.deduped.Metrics.txt") into dedupQC
script: script:
...@@ -955,16 +955,16 @@ process dedupData { ...@@ -955,16 +955,16 @@ process dedupData {
# sort the bam file using Samtools # sort the bam file using Samtools
echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log
samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam samtools sort -@ `nproc` -O BAM -o ${repRID}_sorted.deduped.bam ${repRID}.deduped.bam
# index the sorted bam using Samtools # index the sorted bam using Samtools
echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log
samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai samtools index -@ `nproc` -b ${repRID}_sorted.deduped.bam ${repRID}_sorted.deduped.bam.bai
# split the deduped BAM file for multi-threaded tin calculation # split the deduped BAM file for multi-threaded tin calculation
for i in `samtools view ${repRID}.sorted.deduped.bam | cut -f3 | sort | uniq`; for i in `samtools view ${repRID}_sorted.deduped.bam | cut -f3 | sort | uniq`;
do do
echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}.sorted.deduped.bam \${i} 1>> ${repRID}.sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}.sorted.deduped.\${i}.bam ${repRID}.sorted.deduped.\${i}.bam.bai" echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}_sorted.deduped.bam \${i} 1>> ${repRID}_sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}_sorted.deduped.\${i}.bam ${repRID}_sorted.deduped.\${i}.bam.bai"
done | parallel -j `nproc` -k done | parallel -j `nproc` -k
""" """
} }
...@@ -997,7 +997,7 @@ process makeBigWig { ...@@ -997,7 +997,7 @@ process makeBigWig {
# create bigwig # create bigwig
echo -e "LOG: creating bibWig" >> ${repRID}.makeBigWig.log echo -e "LOG: creating bibWig" >> ${repRID}.makeBigWig.log
bamCoverage -p `nproc` -b ${bam} -o ${repRID}.bw bamCoverage -p `nproc` -b ${bam} -o ${repRID}_sorted.deduped.bw
echo -e "LOG: created" >> ${repRID}.makeBigWig.log echo -e "LOG: created" >> ${repRID}.makeBigWig.log
""" """
} }
...@@ -1007,7 +1007,7 @@ process makeBigWig { ...@@ -1007,7 +1007,7 @@ process makeBigWig {
*/ */
process countData { process countData {
tag "${repRID}" tag "${repRID}"
publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*.tpmTable.csv" publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*_tpmTable.csv"
input: input:
path script_calculateTPM path script_calculateTPM
...@@ -1018,7 +1018,7 @@ process countData { ...@@ -1018,7 +1018,7 @@ process countData {
val stranded from strandedInfer_countData val stranded from strandedInfer_countData
output: output:
path ("*.tpmTable.csv") into counts path ("*_tpmTable.csv") into counts
path ("*.countData.summary") into countsQC path ("*.countData.summary") into countsQC
path ("assignedReads.csv") into assignedReadsInfer_fl path ("assignedReads.csv") into assignedReadsInfer_fl
...@@ -1047,10 +1047,10 @@ process countData { ...@@ -1047,10 +1047,10 @@ process countData {
echo -e "LOG: counting ${ends} features" >> ${repRID}.countData.log echo -e "LOG: counting ${ends} features" >> ${repRID}.countData.log
if [ "${ends}" == "se" ] if [ "${ends}" == "se" ]
then then
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}_countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}_sorted.deduped.bam
elif [ "${ends}" == "pe" ] elif [ "${ends}" == "pe" ]
then then
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}_countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}_sorted.deduped.bam
fi fi
echo -e "LOG: counted" >> ${repRID}.countData.log echo -e "LOG: counted" >> ${repRID}.countData.log
...@@ -1142,10 +1142,10 @@ process dataQC { ...@@ -1142,10 +1142,10 @@ process dataQC {
ulimit -a >> ${repRID}.dataQC.log ulimit -a >> ${repRID}.dataQC.log
# calcualte TIN values per feature on each chromosome # calcualte TIN values per feature on each chromosome
echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}.sorted.deduped.tin.xls echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > ${repRID}_sorted.deduped.tin.xls
for i in `cat ./bed/genome.bed | cut -f1 | sort | uniq`; do for i in `cat ./bed/genome.bed | cut -f1 | sort | uniq`; do
echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}.sorted.deduped.\${i}.bam -r ./bed/genome.bed; cat ${repRID}.sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";"; echo "echo \"LOG: running tin.py on \${i}\" >> ${repRID}.dataQC.log; tin.py -i ${repRID}_sorted.deduped.\${i}.bam -r ./bed/genome.bed; cat ${repRID}_sorted.deduped.\${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \\\"\\\\t\${i}\\\\t\\\";";
done | parallel -j `nproc` -k 1>> ${repRID}.sorted.deduped.tin.xls done | parallel -j `nproc` -k 1>> ${repRID}_sorted.deduped.tin.xls
# bin TIN values # bin TIN values
echo -e "LOG: binning TINs" >> ${repRID}.dataQC.log echo -e "LOG: binning TINs" >> ${repRID}.dataQC.log
......
...@@ -30,4 +30,4 @@ tpm <- rpk/scale ...@@ -30,4 +30,4 @@ tpm <- rpk/scale
output <- cbind(count,tpm) output <- cbind(count,tpm)
colnames(output)[7] <- "count" colnames(output)[7] <- "count"
write.table(output,file=paste0(repRID,".countTable.csv"),sep=",",row.names=FALSE,quote=FALSE) write.table(output,file=paste0(repRID,"_countTable.csv"),sep=",",row.names=FALSE,quote=FALSE)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment