Skip to content
Snippets Groups Projects
Commit 2b3b3cfe authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

adding cnv and umicode

parent 11a5dfe5
Branches
Tags
No related merge requests found
...@@ -48,10 +48,10 @@ if [[ $nuctype == 'dna' ]]; then ...@@ -48,10 +48,10 @@ if [[ $nuctype == 'dna' ]]; then
samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt
samtools view -b -q 1 ${pair_id}.ontarget.bam | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt samtools view -b -q 1 ${pair_id}.ontarget.bam | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
samtools view -b -F 1024 ${pair_id}.ontarget.bam | bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b stdin -hist | grep ^all > ${pair_id}.dedupcov.txt samtools view -b -F 1024 ${pair_id}.ontarget.bam | bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b stdin -hist | grep ^all > ${pair_id}.dedupcov.txt
java -Xmx32g -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt java -Xmx64g -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt
java -Xmx64g -jar $PICARD/picard.jar EstimateLibraryComplexity I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.libcomplex.txt java -Xmx64g -jar $PICARD/picard.jar EstimateLibraryComplexity I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.libcomplex.txt
samtools view -F 1024 ${pair_id}.ontarget.bam | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt samtools view -F 1024 ${pair_id}.ontarget.bam | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt
java -Xmx4g -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt java -Xmx64g -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt
samtools view -b -q 1 ${pair_id}.ontarget.bam | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt samtools view -b -q 1 ${pair_id}.ontarget.bam | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt
perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt
......
...@@ -36,7 +36,7 @@ then ...@@ -36,7 +36,7 @@ then
SLURM_CPUS_ON_NODE=1 SLURM_CPUS_ON_NODE=1
fi fi
module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 bcftools/1.6 htslib/1.6 picard/2.10.3 module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 picard/2.10.3
baseDir="`dirname \"$0\"`" baseDir="`dirname \"$0\"`"
......
...@@ -48,10 +48,10 @@ then ...@@ -48,10 +48,10 @@ then
samtools markdup -s --output-fmt BAM -@ $SLURM_CPUS_ON_NODE sort.bam ${pair_id}.dedup.bam samtools markdup -s --output-fmt BAM -@ $SLURM_CPUS_ON_NODE sort.bam ${pair_id}.dedup.bam
elif [ $algo == 'picard' ] elif [ $algo == 'picard' ]
then then
java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar MarkDuplicates I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt java -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
elif [ $algo == 'picard_umi' ] elif [ $algo == 'picard_umi' ]
then then
java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt java -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
elif [ $algo == 'fgbio_umi' ] elif [ $algo == 'fgbio_umi' ]
then then
module load fgbio bwa/intel/0.7.15 module load fgbio bwa/intel/0.7.15
......
...@@ -9,11 +9,12 @@ usage() { ...@@ -9,11 +9,12 @@ usage() {
exit 1 exit 1
} }
OPTIND=1 # Reset OPTIND OPTIND=1 # Reset OPTIND
while getopts :b:p:h opt while getopts :b:p:uh opt
do do
case $opt in case $opt in
b) sbam=$OPTARG;; b) sbam=$OPTARG;;
p) pair_id=$OPTARG;; p) pair_id=$OPTARG;;
u) umi='umi';;
h) usage;; h) usage;;
esac esac
done done
...@@ -32,7 +33,14 @@ fi ...@@ -32,7 +33,14 @@ fi
module load cnvkit/0.9.0 module load cnvkit/0.9.0
cnvkit.py coverage ${sbam} /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.cnvkit_targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.cnvkit_targets.bed -o ${pair_id}.targetcoverage.cnn
cnvkit.py coverage ${sbam} /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.cnvkit_antitargets.bed -o ${pair_id}.antitargetcoverage.cnn cnvkit.py coverage ${sbam} /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.cnvkit_antitargets.bed -o ${pair_id}.antitargetcoverage.cnn
if [[ $umi == 'umi' ]]
then
cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.uminormals.cnn -o ${pair_id}.cnr
else
cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.normals.cnn -o ${pair_id}.cnr cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.normals.cnn -o ${pair_id}.cnr
fi
cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
cnvkit.py call ${pair_id}.cns -o ${pair_id}.call.cns cnvkit.py call ${pair_id}.cns -o ${pair_id}.call.cns
cnvkit.py diagram ${pair_id}.cnr -s ${pair_id}.cns -o ${pair_id}.cnv.pdf cnvkit.py diagram ${pair_id}.cnr -s ${pair_id}.cns -o ${pair_id}.cnv.pdf
perl $baseDir/filter_cnvkit.pl *.call.cns
#!/usr/bin/perl -w
#parse_cnvkit_table.pl
my $refdir = '/project/shared/bicf_workflow_ref/GRCh38/';
open OM, "<$refdir\/panel1385.genelist.txt" or die $!;
while (my $line = <OM>) {
chomp($line);
$keep{$line} = 1;
}
#my @keep = ("AKT2","ATM","AURKA","BAP1","BCL2L1","BCL6","BIRC3","BRCA2","CCND1","CCNE1","CD79B","CDK8","CDKN2A","CDKN2B","CEBPA","EGFR","ERBB2","FGF10","FGF14","FGF19","FGF3","FGF4","FLT1","FLT3","FOXP1","GATA6","GNA13","GNAS","IKZF1","IL7R","IRS2","KDM4C","KLHL6","KMT2A","KRAS","LYN","MCL1","MITF","MYC","NOTCH2","PIK3CA","PRKAR1A","PRKDC","PTEN","RB1","RICTOR","RUNX1T1","SDHA","TFDP1","TP53","ZNF217","CDK4","MDM4","MYCN","BTG2","BCL2L2","RAF1");
#my %keep = map {$_ => 1} @keep;
my @cvncalls = @ARGV;
foreach my $file (@ARGV) {
open IN, "<$file" or die $!;
my $out = $file;
$out =~ s/call.cns/cnvcalls.txt/;
open OUT, ">$out" or die $!;
print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score"),"\n";
my $header = <IN>;
while (my $line = <IN>) {
chomp($line);
my ($chr,$start,$end,$geneids,$log2,$cn,$depth,
$probes,$weight) = split(/\t/,$line);
my %genes;
my @ids = split(/;|,/,$geneids);
foreach my $gid (@ids) {
my ($key,$value) = split(/=/,$gid);
if ($key eq 'ensembl_gn' || $key eq 'identifier') {
$genes{$value} = 1 if $keep{$value};
}
}
my $newgeneids = join(";", keys %genes);
my $len = sprintf("%.1f",($end-$start)/1000);
next if ($cn == 2) || scalar(keys %genes) < 1;
my $abtype = 'amplification';
$abtype = 'loss' if ($cn < 2);
print OUT join("\t",$newgeneids,$chr,$start,$end,$abtype,$cn,$weight),"\n";
}
close IN;
close OUT;
}
...@@ -100,4 +100,17 @@ then ...@@ -100,4 +100,17 @@ then
vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz
tabix platypus.vcf.gz tabix platypus.vcf.gz
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz
elif [[ $algo == 'strelka2' ]]
then
module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14
mkdir manta strelka
gvcflist=''
for i in *.bam; do
gvcflist="$gvcflist --bam ${i}"
done
configManta.py $gvcflist --referenceFasta ${reffa} --exome --runDir manta
manta/runWorkflow.py -m local -j 8
configureStrelkaGermlineWorkflow.py $gvcflist --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
strelka/runWorkflow.py -m local -j 8
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz
fi fi
...@@ -53,7 +53,8 @@ if [ $algo == 'strelka2' ] ...@@ -53,7 +53,8 @@ if [ $algo == 'strelka2' ]
/cm/shared/apps/manta/1.2.0/bin/configManta.py --normalBam ${normal}.final.bam --tumorBam ${tumor}.final.bam --referenceFasta ${genome_reference} --runDir ${manta_analysisPath} /cm/shared/apps/manta/1.2.0/bin/configManta.py --normalBam ${normal}.final.bam --tumorBam ${tumor}.final.bam --referenceFasta ${genome_reference} --runDir ${manta_analysisPath}
${manta_analysisPath}/runWorkflow.py -m local -j 8 ${manta_analysisPath}/runWorkflow.py -m local -j 8
/cm/shared/apps/strelka/2.8.3/bin/configureStrelkaSomaticWorkflow.py --normalBam ${normal}.final.bam --tumorBam ${tumor}.final.bam --referenceFasta ${genome_reference} --targeted --indelCandidates ${manta_analysisPath}/results/variants/candidateSmallIndels.vcf.gz --runDir ${strelka_analysisPath} /cm/shared/apps/strelka/2.8.3/bin/configureStrelkaSomaticWorkflow.py --normalBam ${normal}.final.bam --tumorBam ${tumor}.final.bam --referenceFasta ${genome_reference} --targeted --indelCandidates ${manta_analysisPath}/results/variants/candidateSmallIndels.vcf.gz --runDir ${strelka_analysisPath}
${strelka_analysisPath}/runWorkflow.py -m local -j 8 ${strelka_analysisPath}/runWorkflow.py -m local -j 8
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz
fi fi
if [ $algo == 'virmid' ] if [ $algo == 'virmid' ]
......
...@@ -82,13 +82,12 @@ if [ $algo == 'strelka2' ] ...@@ -82,13 +82,12 @@ if [ $algo == 'strelka2' ]
then then
module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14 module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14
mkdir manta strelka mkdir manta strelka
configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --runDir manta configManta.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --runDir manta
manta/runWorkflow.py -m local -j 8 manta/runWorkflow.py -m local -j 8
configureStrelkaSomaticWorkflow.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
strelka/runWorkflow.py -m local -j 8 strelka/runWorkflow.py -m local -j 8
vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "((FILTER = 'PASS') & (GEN[*].DP >= 10))" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka.vcf.gz vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].DP >= 10)" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka2.vcf.gz
fi fi
if [ $algo == 'virmid' ] if [ $algo == 'virmid' ]
then then
module load snpeff/4.3q virmid/1.2 samtools/1.6 vcftools/0.1.14 module load snpeff/4.3q virmid/1.2 samtools/1.6 vcftools/0.1.14
...@@ -108,7 +107,12 @@ fi ...@@ -108,7 +107,12 @@ fi
if [ $algo == 'mutect2' ] if [ $algo == 'mutect2' ]
then then
module load parallel gatk/3.7 snpeff/4.3q samtools/1.6 vcftools/0.1.14 module load parallel gatk/3.7 snpeff/4.3q samtools/1.6 vcftools/0.1.14
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" if [ -z ${tbed} ]
then
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
else
awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
fi
vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.pmutect.vcf.gz vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.pmutect.vcf.gz
fi fi
......
...@@ -31,8 +31,8 @@ calllist='' ...@@ -31,8 +31,8 @@ calllist=''
for i in *.vcf.gz; do for i in *.vcf.gz; do
if [[ $i == $HS ]] if [[ $i == $HS ]]
then then
bedtools multiinter -i $list1 |cut -f 1,2,3 |bedtools intersect -header -v -a $i -b stdin |bgzip > hotspot.nooverlap.vcf.gz bedtools multiinter -i $list1 |cut -f 1,2,3 |bedtools intersect -header -v -a $i -b stdin |bgzip > nooverlap.hotspot.vcf.gz
list2="$list2 hotspot.nooverlap.vcf.gz" list2="$list2 nooverlap.hotspot.vcf.gz"
fi fi
done done
......
...@@ -7,99 +7,102 @@ my @vcffiles = @ARGV; ...@@ -7,99 +7,102 @@ my @vcffiles = @ARGV;
open HEADER, "<$headerfile" or die $!; open HEADER, "<$headerfile" or die $!;
open OUT, ">int.vcf" or die $!; open OUT, ">int.vcf" or die $!;
while (my $line = <HEADER>) { while (my $line = <HEADER>) {
chomp($line); chomp($line);
print OUT $line,"\n";; print OUT $line,"\n";;
} }
close HEADER; close HEADER;
my @sampleorder; my @sampleorder;
my %headerlines; my %headerlines;
foreach $vcf (@vcffiles) { foreach $vcf (@vcffiles) {
$caller = (split(/\./,$vcf))[-3]; $caller = (split(/\./,$vcf))[-3];
open VCF, "gunzip -c $vcf|" or die $!; open VCF, "gunzip -c $vcf|" or die $!;
my @sampleids; my @sampleids;
while (my $line = <VCF>) { while (my $line = <VCF>) {
chomp($line); chomp($line);
if ($line =~ m/#/) { if ($line =~ m/#/) {
if ($line =~ m/#CHROM/) { if ($line =~ m/#CHROM/) {
($chromhd, $posd,$idhd,$refhd,$althd,$scorehd, ($chromhd, $posd,$idhd,$refhd,$althd,$scorehd,
$filterhd,$annothd,$formathd,@sampleids) = split(/\t/, $line); $filterhd,$annothd,$formathd,@sampleids) = split(/\t/, $line);
foreach $j (0..$#sampleids) { foreach $j (0..$#sampleids) {
$sampleids[$j] =~ s/\.final//g; $sampleids[$j] =~ s/\.final//g;
}
unless (@sampleorder) {
@sampleorder = @sampleids;
print OUT $line,"\n";
}
next;
}
next;
} }
my ($chrom, $pos,$id,$ref,$alt,$score, unless (@sampleorder) {
$filter,$annot,$format,@gts) = split(/\t/, $line); @sampleorder = @sampleids;
my %hash = (); print OUT $line,"\n";
foreach $a (split(/;/,$annot)) {
my ($key,$val) = split(/=/,$a);
$hash{$key} = $val;
} }
my @deschead = split(/:/,$format); next;
my $newformat = 'GT:DP:AD:AO:RO'; }
my %newgts; next;
my $missingGT = 0; }
FG:foreach my $i (0..$#gts) { my ($chrom, $pos,$id,$ref,$alt,$score,
my $allele_info = $gts[$i]; $filter,$annot,$format,@gts) = split(/\t/, $line);
my $sid = $sampleids[$i]; if ($pos == 48303944) {
my @gtinfo = split(/:/,$allele_info); warn "Debugging\n";
my %gtdata; }
if ($allele_info eq '.') { my %hash = ();
$newgts{$sid} = '.:.:.:.:.'; foreach $a (split(/;/,$annot)) {
$missingGT ++; my ($key,$val) = split(/=/,$a);
next FG; $hash{$key} = $val;
} }
foreach my $i (0..$#deschead) { my @deschead = split(/:/,$format);
$gtdata{$deschead[$i]} = $gtinfo[$i]; my $newformat = 'GT:DP:AD:AO:RO';
} my %newgts;
if ($gtdata{GT} eq './.') { my $missingGT = 0;
$newgts{$sid} = '.:.:.:.:.'; FG:foreach my $i (0..$#gts) {
$missingGT ++; my $allele_info = $gts[$i];
next FG; my $sid = $sampleids[$i];
} my @gtinfo = split(/:/,$allele_info);
if ($gtdata{AD}){ my %gtdata;
($gtdata{RO},@alts) = split(/,/,$gtdata{AD}); if ($allele_info eq '.') {
$gtdata{AO} = join(",",@alts); $newgts{$sid} = '.:.:.:.:.';
$gtdata{DP} = $gtdata{RO}; $missingGT ++;
foreach (@alts) { next FG;
$gtdata{DP} += $_;
}
} elsif (exists $gtdata{NR} && exists $gtdata{NV}) {
$gtdata{DP} = $gtdata{NR};
$gtdata{AO} = $gtdata{NV};
$gtdata{RO} = $gtdata{DP} - $gtdata{AO};
$gtdata{AD} = join(",",$gtdata{RO},$gtdata{AO})
} elsif (exists $gtdata{AO} && exists $gtdata{RO}) {
$gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO});
$gtdata{DP} = $gtdata{RO};
foreach (split(',',$gtdata{AO})) {
$gtdata{DP} += $_;
}
}
if ($gtdata{DP} && $gtdata{DP} < 3) {
$missingGT ++;
}
$newgts{$sid} = join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
} }
next if ($missingGT == scalar(@gts)); foreach my $i (0..$#deschead) {
my @newgts; $gtdata{$deschead[$i]} = $gtinfo[$i];
foreach $id (@sampleorder) { }
push @newgts, $newgts{$id}; if ($gtdata{GT} eq './.') {
$newgts{$sid} = '.:.:.:.:.';
$missingGT ++;
next FG;
}
if ($gtdata{AD}){
($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
$gtdata{AO} = join(",",@alts);
$gtdata{DP} = $gtdata{RO};
foreach (@alts) {
$gtdata{DP} += $_;
}
} elsif (exists $gtdata{NR} && exists $gtdata{NV}) {
$gtdata{DP} = $gtdata{NR};
$gtdata{AO} = $gtdata{NV};
$gtdata{RO} = $gtdata{DP} - $gtdata{AO};
$gtdata{AD} = join(",",$gtdata{RO},$gtdata{AO})
} elsif (exists $gtdata{AO} && exists $gtdata{RO}) {
$gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO});
$gtdata{DP} = $gtdata{RO};
foreach (split(',',$gtdata{AO})) {
$gtdata{DP} += $_;
} }
$lines{$chrom}{$pos}{$caller} = join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; }
if ($gtdata{DP} && $gtdata{DP} < 3) {
$missingGT ++;
}
$newgts{$sid} = join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
}
next if ($missingGT == scalar(@gts));
my @newgts;
foreach $id (@sampleorder) {
push @newgts, $newgts{$id};
} }
close VCF; $lines{$chrom}{$pos}{$caller} = join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}
close VCF;
} }
my @callers = ('ssvar','platypus','sam','gatk','hotspot'); my @callers = ('ssvar','platypus','sam','gatk','strelka2','hotspot');
if (grep(/mutect/,@vcffiles)) { if (grep(/mutect/,@vcffiles)) {
@callers = ('sssom','pmutect','shimmer','stelka','varscan','virmid'); @callers = ('sssom','pmutect','shimmer','strelka2','varscan','virmid');
} }
F1:foreach $chr (sort {$a cmp $b} keys %lines) { F1:foreach $chr (sort {$a cmp $b} keys %lines) {
F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) { F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment