From d46959b9f3e7f27f0b64e49481ad56e2939ef564 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Wed, 22 Aug 2018 16:36:34 -0500
Subject: [PATCH] bug fixes new nodes multiple allelic sites

---
 alignment/bam2tdf.sh       | 36 ++++++++++++++++++++++++++++++++++++
 alignment/bamqc.sh         |  1 +
 diff_exp/geneabundance.sh  |  4 ++++
 variants/cnvkit.sh         |  5 ++++-
 variants/norm_annot.sh     |  9 ++++-----
 variants/uniform_vcf_gt.pl |  8 +++++---
 variants/union.sh          |  2 +-
 7 files changed, 55 insertions(+), 10 deletions(-)
 create mode 100644 alignment/bam2tdf.sh

diff --git a/alignment/bam2tdf.sh b/alignment/bam2tdf.sh
new file mode 100644
index 0000000..a91820d
--- /dev/null
+++ b/alignment/bam2tdf.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+#indexbams.sh
+
+usage() {
+  echo "-h  --Help documentation for markdups.sh"
+  echo "Example: bash indexbams.sh"
+  echo "-r  --Reference Genome: GRCh38 or GRCm38"
+
+  exit 1
+}
+OPTIND=1 # Reset OPTIND
+while getopts :r:b:p:h opt
+do
+    case $opt in
+        h) usage;;
+        r) index_path=$OPTARG;;
+        p) pair_id=$OPTARG;;
+	b) bam=$OPTARG;;
+    esac
+done
+
+shift $(($OPTIND -1))
+
+# Check for mandatory options
+
+if [[ -z $SLURM_CPUS_ON_NODE ]]
+then
+    SLURM_CPUS_ON_NODE=1
+fi
+baseDir="`dirname \"$0\"`"
+
+source /etc/profile.d/modules.sh
+module load igvtools/2.3.71 samtools/1.6
+igvtools count -z 5 $bam ${pair_id}.tdf ${index_path}/igv/human.genome
+
+
diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh
index ad4ee6c..eefb332 100644
--- a/alignment/bamqc.sh
+++ b/alignment/bamqc.sh
@@ -46,6 +46,7 @@ if [[ $nuctype == 'dna' ]]; then
     module load bedtools/2.26.0 picard/2.10.3
     samtools view -@ $SLURM_CPUS_ON_NODE -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam}
     samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.ontarget.bam
+    samtools flagstat  ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt
     java -Xmx64g -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt
     java -Xmx64g -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt
     java -Xmx64g -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt
diff --git a/diff_exp/geneabundance.sh b/diff_exp/geneabundance.sh
index 0bf9535..40a68ab 100644
--- a/diff_exp/geneabundance.sh
+++ b/diff_exp/geneabundance.sh
@@ -33,6 +33,10 @@ if [[ -z $SLURM_CPUS_ON_NODE ]]
 then
     SLURM_CPUS_ON_NODE=1
 fi
+if [[ $SLURM_CPUS_ON_NODE > 64 ]]
+then
+    SLURM_CPUS_ON_NODE=64
+fi
 source /etc/profile.d/modules.sh
 module load subread/1.6.1 stringtie/1.3.2d-intel
 featureCounts -s $stranded -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh
index b98711c..0548eb9 100755
--- a/variants/cnvkit.sh
+++ b/variants/cnvkit.sh
@@ -39,7 +39,10 @@ baseDir="`dirname \"$0\"`"
 
 if [[ -z $normals ]]
 then
-if [[ $umi == 'umi' ]]
+if [[ $targets == 'panel1385V2-2.cnvkit_' ]]
+then
+normals="${index_path}/panelofnormals.panel1385V2_2.cnn"
+elif [[ $umi == 'umi' ]]
 then
 normals="${index_path}/UTSWV2.uminormals.cnn"
 else
diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh
index 6134c3b..4f89845 100755
--- a/variants/norm_annot.sh
+++ b/variants/norm_annot.sh
@@ -26,9 +26,8 @@ baseDir="`dirname \"$0\"`"
 source /etc/profile.d/modules.sh
 module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q 
 
-prefix="${vcf%.vcf.gz}"
-perl $baseDir\/uniform_vcf_gt.pl $vcf
-bgzip -f ${prefix}.uniform.vcf
-j=${prefix}.uniform.vcf.gz
+perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf
+bgzip -f ${pair_id}.uniform.vcf
+j=${pair_id}.uniform.vcf.gz
 tabix -f $j
-bcftools norm -m - -O z -o ${prefix}.norm.vcf.gz $j
+bcftools norm -m - -O z -o ${pair_id}.norm.vcf.gz $j
diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl
index 5f744a4..6c894c0 100755
--- a/variants/uniform_vcf_gt.pl
+++ b/variants/uniform_vcf_gt.pl
@@ -1,9 +1,9 @@
 #!/usr/bin/perl 
 #migrate_db.pl
 
+my $pair_id = shift @ARGV;
 my $vcf = shift @ARGV;
-my $outfile = $vcf;
-$outfile =~ s/vcf.gz/uniform.vcf/;
+my $outfile = $pair_id.".uniform.vcf";
 open OUT, ">$outfile" or die $!;
 open VCF, "gunzip -c $vcf|" or die $!;
 while (my $line = <VCF>) {
@@ -15,7 +15,9 @@ while (my $line = <VCF>) {
 	    print OUT "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed\">\n";
 	    print OUT "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth (reads with MQ=255 or with bad mates are filtered)\">\n";
 	}
-	print OUT $line,"\n";
+	unless ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/) {
+	    print OUT $line,"\n";
+	}
 	next;
     }
     my ($chrom, $pos,$id,$ref,$alt,$score,
diff --git a/variants/union.sh b/variants/union.sh
index 1a85d4f..6c2edee 100755
--- a/variants/union.sh
+++ b/variants/union.sh
@@ -32,7 +32,7 @@ fi
 source /etc/profile.d/modules.sh
 module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q
 
-HS=*.hotspot.vcf.gz
+HS=${dir}/*.hotspot.vcf.gz
 list1=`ls ${dir}/*vcf.gz|grep -v hotspot`
 list2=`ls ${dir}/*vcf.gz|grep -v hotspot`
 varlist=''
-- 
GitLab