From d46959b9f3e7f27f0b64e49481ad56e2939ef564 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Wed, 22 Aug 2018 16:36:34 -0500 Subject: [PATCH] bug fixes new nodes multiple allelic sites --- alignment/bam2tdf.sh | 36 ++++++++++++++++++++++++++++++++++++ alignment/bamqc.sh | 1 + diff_exp/geneabundance.sh | 4 ++++ variants/cnvkit.sh | 5 ++++- variants/norm_annot.sh | 9 ++++----- variants/uniform_vcf_gt.pl | 8 +++++--- variants/union.sh | 2 +- 7 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 alignment/bam2tdf.sh diff --git a/alignment/bam2tdf.sh b/alignment/bam2tdf.sh new file mode 100644 index 0000000..a91820d --- /dev/null +++ b/alignment/bam2tdf.sh @@ -0,0 +1,36 @@ +#!/bin/bash +#indexbams.sh + +usage() { + echo "-h --Help documentation for markdups.sh" + echo "Example: bash indexbams.sh" + echo "-r --Reference Genome: GRCh38 or GRCm38" + + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :r:b:p:h opt +do + case $opt in + h) usage;; + r) index_path=$OPTARG;; + p) pair_id=$OPTARG;; + b) bam=$OPTARG;; + esac +done + +shift $(($OPTIND -1)) + +# Check for mandatory options + +if [[ -z $SLURM_CPUS_ON_NODE ]] +then + SLURM_CPUS_ON_NODE=1 +fi +baseDir="`dirname \"$0\"`" + +source /etc/profile.d/modules.sh +module load igvtools/2.3.71 samtools/1.6 +igvtools count -z 5 $bam ${pair_id}.tdf ${index_path}/igv/human.genome + + diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index ad4ee6c..eefb332 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -46,6 +46,7 @@ if [[ $nuctype == 'dna' ]]; then module load bedtools/2.26.0 picard/2.10.3 samtools view -@ $SLURM_CPUS_ON_NODE -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam} samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.ontarget.bam + samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt java -Xmx64g -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt java -Xmx64g -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt java -Xmx64g -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt diff --git a/diff_exp/geneabundance.sh b/diff_exp/geneabundance.sh index 0bf9535..40a68ab 100644 --- a/diff_exp/geneabundance.sh +++ b/diff_exp/geneabundance.sh @@ -33,6 +33,10 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 fi +if [[ $SLURM_CPUS_ON_NODE > 64 ]] +then + SLURM_CPUS_ON_NODE=64 +fi source /etc/profile.d/modules.sh module load subread/1.6.1 stringtie/1.3.2d-intel featureCounts -s $stranded -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam} diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index b98711c..0548eb9 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -39,7 +39,10 @@ baseDir="`dirname \"$0\"`" if [[ -z $normals ]] then -if [[ $umi == 'umi' ]] +if [[ $targets == 'panel1385V2-2.cnvkit_' ]] +then +normals="${index_path}/panelofnormals.panel1385V2_2.cnn" +elif [[ $umi == 'umi' ]] then normals="${index_path}/UTSWV2.uminormals.cnn" else diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh index 6134c3b..4f89845 100755 --- a/variants/norm_annot.sh +++ b/variants/norm_annot.sh @@ -26,9 +26,8 @@ baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q -prefix="${vcf%.vcf.gz}" -perl $baseDir\/uniform_vcf_gt.pl $vcf -bgzip -f ${prefix}.uniform.vcf -j=${prefix}.uniform.vcf.gz +perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf +bgzip -f ${pair_id}.uniform.vcf +j=${pair_id}.uniform.vcf.gz tabix -f $j -bcftools norm -m - -O z -o ${prefix}.norm.vcf.gz $j +bcftools norm -m - -O z -o ${pair_id}.norm.vcf.gz $j diff --git a/variants/uniform_vcf_gt.pl b/variants/uniform_vcf_gt.pl index 5f744a4..6c894c0 100755 --- a/variants/uniform_vcf_gt.pl +++ b/variants/uniform_vcf_gt.pl @@ -1,9 +1,9 @@ #!/usr/bin/perl #migrate_db.pl +my $pair_id = shift @ARGV; my $vcf = shift @ARGV; -my $outfile = $vcf; -$outfile =~ s/vcf.gz/uniform.vcf/; +my $outfile = $pair_id.".uniform.vcf"; open OUT, ">$outfile" or die $!; open VCF, "gunzip -c $vcf|" or die $!; while (my $line = <VCF>) { @@ -15,7 +15,9 @@ while (my $line = <VCF>) { print OUT "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed\">\n"; print OUT "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth (reads with MQ=255 or with bad mates are filtered)\">\n"; } - print OUT $line,"\n"; + unless ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/) { + print OUT $line,"\n"; + } next; } my ($chrom, $pos,$id,$ref,$alt,$score, diff --git a/variants/union.sh b/variants/union.sh index 1a85d4f..6c2edee 100755 --- a/variants/union.sh +++ b/variants/union.sh @@ -32,7 +32,7 @@ fi source /etc/profile.d/modules.sh module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q -HS=*.hotspot.vcf.gz +HS=${dir}/*.hotspot.vcf.gz list1=`ls ${dir}/*vcf.gz|grep -v hotspot` list2=`ls ${dir}/*vcf.gz|grep -v hotspot` varlist='' -- GitLab