Skip to content
Snippets Groups Projects
Commit a4a2a4db authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

adding pindel

parent f7278ea1
Branches
Tags
No related merge requests found
......@@ -79,7 +79,7 @@ then
java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "(CNT[*] >0)" - |bgzip > ${pair_id}.hotspot.vcf.gz
elif [[ $algo == 'speedseq' ]]
then
module load speedseq/20160506
module load speedseq/gcc/0.1.2
speedseq var -t $SLURM_CPUS_ON_NODE -o ssvar ${reffa} *.bam
vcf-annotate -n --fill-type ssvar.vcf.gz| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.ssvar.vcf.gz -
elif [[ $algo == 'gatk' ]]
......
#!/usr/bin/perl
#parse_pindel
my $pair_id = shift @ARGV;
my $tid = shift @ARGV;
my $vcf = shift @ARGV;
open SI, ">$pair_id.indel.vcf" or die $!;
open SV, ">$pair_id.sv.vcf" or die $!;
open DUP, ">$pair_id.dup.vcf" or die $!;
open VCF, "gunzip -c $vcf|" or die $!;
while (my $line = <VCF>) {
chomp($line);
if ($line =~ m/#/) {
print SI $line,"\n";
print SV $line,"\n";
print DUP $line,"\n";
if ($line =~ m/#CHROM/) {
my @header = split(/\t/,$line);
($chrom, $pos,$id,$ref,$alt,$score,
$filter,$info,$format,@subjacc) = split(/\t/, $line);
}
next;
}
my ($chrom, $pos,$id,$ref,$alt,$score,
$filter,$annot,$format,@gts) = split(/\t/, $line);
my %hash = ();
foreach $a (split(/;/,$annot)) {
my ($key,$val) = split(/=/,$a);
$hash{$key} = $val;
}
my @deschead = split(/:/,$format);
my $newformat = 'GT:DP:AD:AO:RO';
my @newgts = ();
my $missingGT = 0;
my %allele;
FG:foreach my $i (0..$#gts) {
my $sid = $subjacc[$i];
my @gtinfo = split(/:/,$gts[$i]);
my %gtdata;
if ($allele_info eq '.') {
push @newgts, '.:.:.:.:.';
$missingGT ++;
next FG;
}
foreach my $i (0..$#deschead) {
$gtdata{$deschead[$i]} = $gtinfo[$i];
}
if ($gtdata{AD}){
($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
$gtdata{AO} = join(",",@alts);
$gtdata{DP} = $gtdata{RO};
foreach (@alts) {
$gtdata{DP} += $_;
}
} elsif (exists $gtdata{NR} && exists $gtdata{NV}) {
$gtdata{DP} = $gtdata{NR};
$gtdata{AO} = $gtdata{NV};
$gtdata{RO} = $gtdata{DP} - $gtdata{AO};
} elsif (exists $gtdata{AO} && exists $gtdata{RO}) {
$gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO});
$gtdata{DP} = $gtdata{RO};
foreach (split(',',$gtdata{AO})) {
$gtdata{DP} += $_;
}
}
if (($gtdata{DP} && $gtdata{DP} < 10) || ()) {
$missingGT ++;
} if ($gtdata{DP} == 0 || $gtdata{GT} eq './.') {
push @newgts, '.:.:.:.:.';
$missingGT ++;
next FG;
}
$allele{$tid} = [sprintf("%.4f",$gtdata{AO}/$gtdata{DP}),$gtdata{DP}];
push @newgts, join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
}
next if ($missingGT == scalar(@gts));
next unless ($allele{$tid});
my ($taf,$tdp) = @{$allele{$tid}};
next if ($taf < 0.05 && $tdp < 10);
if ($hash{SVTYPE} eq 'DUP:TANDEM') {
print DUP join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}elsif ($hash{SVTYPE} eq 'INS') {
print SV join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') {
if (abs($hash{SVLEN}) < 50) {
print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}else {
print SV join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}
}
}
close VCF;
#!/bin/bash
#svcalling.sh
usage() {
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Path to Reference Genome with the file genome.fa"
echo "-p --Prefix for output file name"
echo "Example: bash svcalling.sh -p prefix -r /path/GRCh38 -a gatk"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:p:h opt
do
case $opt in
r) index_path=$OPTARG;;
p) pair_id=$OPTARG;;
h) usage;;
esac
done
function join_by { local IFS="$1"; shift; echo "$*"; }
shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $index_path ]]; then
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
if [[ -a "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
dict="${index_path}/genome.dict"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
source /etc/profile.d/modules.sh
genomefiledate=`find ${reffa} -maxdepth 0 -printf "%TY%Tm%Td\n"`
module load samtools/1.6 pindel/0.2.5-intel snpeff/4.3q
touch ${pair_id}.pindel.config
for i in *.bam; do
sname="${i%.bam}"
echo -e "${i}\t400\t${sname}" >> ${pair_id}.pindel.config
done
pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP
pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf
cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz
perl $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz
perl $baseDir/parse_pindel.pl ${pair_id} $tid pindel.norm.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf |bgzip > ${pair_id}.pindel_tandemdup.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf |bgzip > ${pair_id}.pindel_sv.vcf.gz
......@@ -11,16 +11,15 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:p:b:t:i:k:n:m:h opt
while getopts :r:p:b:i:e:n:a:h opt
do
case $opt in
r) index_path=$OPTARG;;
p) pair_id=$OPTARG;;
b) sbam=$OPTARG;;
k) tid=$OPTARG;;
i) nid=$OPTARG;;
i) tid=$OPTARG;;
n) normal=$OPTARG;;
m) method=$OPTARG;;
a) method=$OPTARG;;
h) usage;;
esac
done
......@@ -48,25 +47,11 @@ else
usage
fi
source /etc/profile.d/modules.sh
module load speedseq/20160506 novoBreak/v1.1.3 delly2/v0.7.7-multi samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14
module load samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14
mkdir temp
genomefiledate=`find ${reffa} -maxdepth 0 -printf "%TY%Tm%Td\n"`
if [[ $method == 'pindel' ]]
then
module load samtools/1.6 pindel/0.2.5-intel snpeff/4.3q
echo -e "${sbam}\t400\t${tid}" > ${pair_id}.pindel.config
if [[ -n ${normal} ]]
then
echo -e "${normal}\t400\t${nid}" >> ${pair_id}.pindel.config
fi
pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP
pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.pindel.vcf.gz
fi
if [[ $method == 'delly' ]]
then
module load delly2/v0.7.7-multi samtools/1.6 snpeff/4.3q
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment