-
Brandi Cantarel authoredac7a88b4
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
cnvkit.sh 3.81 KiB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/bin/bash
#cnvkit.sh
usage() {
echo "-h --Help documentation for markdups.sh"
echo "-b --BAM file"
echo "-p --Prefix for output file name"
echo "-n --Panel of Normal cnn file"
echo "-t --Target and Antitarget prefix"
echo "Example: bash cnvkit.sh -p prefix -b file.bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :b:p:n:d:t:r:uqh opt
do
case $opt in
b) sbam=$OPTARG;;
d) paneldir=$OPTARG;;
p) pair_id=$OPTARG;;
r) index_path=$OPTARG;;
u) umi='umi';;
h) usage;;
esac
done
shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
if [[ -z $index_path ]]
then
index_path='/project/shared/bicf_workflow_ref/human/grch38_cloud/dnaref'
fi
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]]
then
"missing pair_id or bam"
usage
fi
NPROC=$SLURM_CPUS_ON_NODE
if [[ -z $NPROC ]]
then
NPROC=`nproc`
fi
if [[ -z $paneldir ]]
then
echo "missing panel dir"
usage
fi
if [[ -s "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
dict="${index_path}/genome.dict"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
if [[ -z $paneldir ]]
then
paneldir="UTSW_V3_pancancer"
fi
capture="$paneldir/targetpanel.bed"
targets="$paneldir/cnvkit."
normals="$paneldir/pon.cnn"
if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 java/oracle/jdk1.8.0_171 snpeff/4.3q
fi
if [[ -f "${paneldir}/pon.downsample.cnn" ]]
then
bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${capture} -b ${sbam} -hist > covhist.txt
grep ^all covhist.txt > genomecov.txt
sumdepth=`awk '{ sum+= $2*$3;} END {print sum;}' genomecov.txt`
total=`head -n 1 genomecov.txt |cut -f 4`
avgdepth=$((${sumdepth}/${total}))
if [[ "$avgdepth" -lt 1000 ]]
then
normals="${paneldir}/pon.downsample.cnn"
fi
fi
echo "${targets}targets.bed"
echo "${targets}antitargets.bed"
echo "${normals}"
numsnps=`grep -c -P "rs[0-9]+" ${targets}targets.bed`
panelsize=`awk '{ sum+=$3-$2} END {print sum}' ${targets}targets.bed`
unset DISPLAY
cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn
cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn
cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr
if [[ $panelsize -gt 4000000 ]]
then
cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
else
cnvkit.py segment -m haar ${pair_id}.cnr -o ${pair_id}.cns
fi
if [[ $numsnps -gt 100 ]]
then
bcftools mpileup -A -d 1000000 -C50 -Ou --gvcf 0 -f ${reffa} -a INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR -T ${index_path}/IDT_snps.hg38.bed ${sbam} | bcftools call -m --gvcf 0 -Ov | bcftools convert --gvcf2vcf -f ${reffa} -Ov -o common_variants.vcf
$baseDir/formatVcfCNV.pl cnvkit_common common_variants.vcf
echo -e "CHROM\tPOS\tAO\tRO\tDP\tMAF" > ${pair_id}.ballelefreq.txt
java -jar $SNPEFF_HOME/SnpSift.jar extractFields cnvkit_common.vcf CHROM POS GEN[0].AO GEN[0].RO GEN[0].DP |grep -v CHROM | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$3/$5}' >> ${pair_id}.ballelefreq.txt
cnvkit.py call --filter cn ${pair_id}.cns -v cnvkit_common.vcf -o ${pair_id}.call.cns
cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf -v cnvkit_common.vcf
else
cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns
cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf
fi
cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b ${index_path}/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed
perl $baseDir/filter_cnvkit.pl -s ${pair_id}.call.cns