diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 78365090c9676f5ae3d357aed73be54e8f30a259..23a40e557b0971b737e0173456425824515fff13 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -15,12 +15,10 @@ while getopts :b:p:n:t:r:uqh opt do case $opt in b) sbam=$OPTARG;; + d) paneldir=$OPTARG;; p) pair_id=$OPTARG;; - n) normals=$OPTARG;; r) index_path=$OPTARG;; - t) targets=$OPTARG;; u) umi='umi';; - q) idtsnp=1;; h) usage;; esac done @@ -41,7 +39,7 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 fi -if [[ -z $normals ]] || [[ -z $targets ]] +if [[ -z $paneldir ]] then usage fi @@ -53,14 +51,35 @@ else echo "Missing Fasta File: ${index_path}/genome.fa" usage fi +if [[ -z $paneldir ]] +then + paneldir="UTSW_V3_pancancer" +fi -echo "${targets}targets.bed" -echo "${targets}antitargets.bed" -echo "${normals}" +capture="$paneldir/targetpanel.bed" +targets="$paneldir/cnvkit." +normals="$paneldir/pon.cnn" source /etc/profile.d/modules.sh module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 java/oracle/jdk1.8.0_171 snpeff/4.3q +if [[ -f "${paneldir}/pon.downsample.cnn" ]] +then + bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${capture} -b ${sbam} -hist > covhist.txt + grep ^all covhist.txt > genomecov.txt + sumdepth=`awk '{ sum+= $2*$3;} END {print sum;}' genomecov.txt` + total=`head -n 1 genomecov.txt |cut -f 4` + avgdepth=$((${sumdepth}/${total})) + if [[ "$avgdepth" -lt 1000 ]] + then + normals="${paneldir}/pon.downsample.cnn" + fi +fi + +echo "${targets}targets.bed" +echo "${targets}antitargets.bed" +echo "${normals}" + unset DISPLAY cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn @@ -68,7 +87,9 @@ cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcov cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns -if [[ $idtsnp == 1 ]] +numsnps=`grep -c -P "rs[0-9]+" ${targets}targets.bed` + +if [[ $numsnps > 100 ]] then samtools index ${sbam} java -jar /cm/shared/apps/gatk/3.8/target/package/GenomeAnalysisTK.jar -T UnifiedGenotyper -R ${reffa} --output_mode EMIT_ALL_SITES -L ${index_path}/IDT_snps.hg38.bed -o common_variants.vcf -glm BOTH -dcov 10000 -I ${sbam} diff --git a/variants/itdseek.sh b/variants/itdseek.sh index f40a33053f79f732a8f89585d0d19a4a85360a81..0ea6ac83510bc896c371ce12edf884a78b48cf37 100755 --- a/variants/itdseek.sh +++ b/variants/itdseek.sh @@ -9,7 +9,7 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:b:l:p:h opt +while getopts :r:b:l:p:fh opt do case $opt in r) index_path=$OPTARG;; @@ -17,6 +17,7 @@ do p) pair_id=$OPTARG;; l) itdbed=$OPTARG;; g) snpeffgeno=$OPTARG;; + f) filter=1;; h) usage;; esac done @@ -58,3 +59,11 @@ stexe=`which samtools` samtools view -@ $SLURM_CPUS_ON_NODE -L ${itdbed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz tabix ${pair_id}.itdseek.vcf.gz bcftools norm --fasta-ref $reffa -c w -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config $snpeffgeno - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz + +if [[ $filter == 1 ]] +then + perl $baseDir/filter_itdseeker.pl -t ${pair_id} -d ${pair_id}.itdseek_tandemdup.vcf.gz + mv ${pair_id}.itdseek_tandemdup.vcf.gz ${pair_id}.itdseek_tandemdup.unfilt.vcf.gz + mv ${pair_id}.itdseek_tandemdup.pass.vcf ${pair_id}.itdseek_tandemdup.vcf + bgzip ${pair_id}.itdseek_tandemdup.pass.vcf +fi