-
Brandi Cantarel authorede6be1c74
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
markdups.sh 3.17 KiB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/bin/bash
#hisat.sh
usage() {
echo "-h --Help documentation for markdups.sh"
echo "-m --Mark Duplication Method: sambamba, samtools, picard, picard_umi, fgbio_umi, null; default is null"
echo "-b --BAM file"
echo "-p --Prefix for output file name"
echo "Example: bash markdups.sh -p prefix -b file.bam -a picard"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :a:b:r:p:h opt
do
case $opt in
a) algo=$OPTARG;;
b) sbam=$OPTARG;;
p) pair_id=$OPTARG;;
r) index_path=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]]; then
usage
fi
NPROC=$SLURM_CPUS_ON_NODE
if [[ -z $NPROC ]]
then
NPROC=`nproc`
fi
if [[ -z $index_path ]]
then
index_path="/project/shared/bicf_workflow_ref/human/grch38_cloud/dnaref"
fi
if [[ $index_path == *project* ]]
then
testexe='/project/shared/bicf_workflow_ref/seqprg/bin'
else
testexe='/usr/local/bin'
fi
baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh
module load picard/2.10.3
if [ $algo == 'sambamba' ]
then
module load speedseq/20160506
sambamba markdup -t $NPROC ${sbam} ${pair_id}.dedup.bam
touch ${pair_id}.dedup.stat.txt
elif [ $algo == 'samtools' ]
then
module load samtools/gcc/1.8
samtools markdup -s --output-fmt BAM -@ $NPROC sort.bam ${pair_id}.dedup.bam
touch ${pair_id}.dedup.stat.txt
elif [ $algo == 'picard' ]
then
java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
elif [ $algo == 'picard_umi' ]
then
java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
elif [ $algo == 'fgbio_umi' ]
then
module load fgbio bwakit/0.7.15 bwa/intel/0.7.17 samtools/gcc/1.8
samtools index -@ $NPROC ${sbam}
fgbio --tmp-dir ./ GroupReadsByUmi -s identity -i ${sbam} -o group.bam --family-size-histogram ${pair_id}.umihist.txt -e 0 -m 0
fgbio --tmp-dir ./ CallMolecularConsensusReads -i group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:'
samtools index -@ $NPROC ${pair_id}.consensus.bam
samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam
gzip ${pair_id}.consensus.R1.fastq
gzip ${pair_id}.consensus.R2.fastq
bwa mem -M -C -t $NPROC -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz > out.sam
if [[ ${index_path}/genome.fa.alt ]]
then
k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | samtools view -1 - > ${pair_id}.consensus.bam
else
samtools view -1 out.sam > ${pair_id}.consensus.bam
fi
samtools sort --threads $NPROC -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam
samtools sort --threads $NPROC -o ${pair_id}.group.bam group.bam
samtools index -@ $NPROC ${pair_id}.group.bam
else
cp ${sbam} ${pair_id}.dedup.bam
fi
module load samtools/gcc/1.8
samtools index -@ $NPROC ${pair_id}.dedup.bam