Skip to content
Snippets Groups Projects
Commit 2fa59e74 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

update CNVkit for SNPs using VCF

parent 9c238995
Branches
Tags
No related merge requests found
...@@ -59,15 +59,9 @@ echo "${targets}antitargets.bed" ...@@ -59,15 +59,9 @@ echo "${targets}antitargets.bed"
echo "${normals}" echo "${normals}"
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 java/oracle/jdk1.8.0_171 snpeff/4.3q
unset DISPLAY
if [[ $idtsnp == 1 ]] unset DISPLAY
then
samtools index ${sbam}
bcftools mpileup --threads 10 --gvcf 10 -A -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 1000000 -L 1000000 -C50 -f ${reffa} ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf -T ${index_path}/IDT_snps.hg38.bed
$baseDir/formatVcfCNV.pl cnvkit_common common_variants.vcf
fi
cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn
cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn
...@@ -76,11 +70,19 @@ cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns ...@@ -76,11 +70,19 @@ cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
if [[ $idtsnp == 1 ]] if [[ $idtsnp == 1 ]]
then then
samtools index ${sbam}
java -jar /cm/shared/apps/gatk/3.8/target/package/GenomeAnalysisTK.jar -T UnifiedGenotyper -R ${reffa} --output_mode EMIT_ALL_SITES -L ${index_path}/IDT_snps.hg38.bed -o common_variants.vcf -glm BOTH -dcov 10000 -I ${sbam}
$baseDir/formatVcfCNV.pl cnvkit_common common_variants.vcf
echo -e "CHROM\tPOS\tAO\tRO\tDP\tMAF" > ${pair_id}.ballelefreq.txt
java -jar $SNPEFF_HOME/SnpSift.jar extractFields cnvkit_common.vcf CHROM POS GEN[0].AO GEN[0].RO GEN[0].DP |grep -v CHROM | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$3/$5}' >> ${pair_id}.ballelefreq.txt
cnvkit.py call --filter cn ${pair_id}.cns -v cnvkit_common.vcf -o ${pair_id}.call.cns cnvkit.py call --filter cn ${pair_id}.cns -v cnvkit_common.vcf -o ${pair_id}.call.cns
cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf -v cnvkit_common.vcf
else else
cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns
cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf
fi fi
cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf
cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed
perl $baseDir/filter_cnvkit.pl ${pair_id}.call.cns perl $baseDir/filter_cnvkit.pl -s ${pair_id}.call.cns
#!/usr/bin/perl -w #!/usr/bin/perl -w
#parse_cnvkit_table.pl #parse_cnvkit_table.pl
my $refdir = '/project/shared/bicf_workflow_ref/human/GRCh38/'; use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
open OM, "<$refdir\/clinseq_prj/panelgenes.txt" or die $!; my %opt = ();
while (my $line = <OM>) { my $results = GetOptions (\%opt,'input|s=s','help|h');
chomp($line);
$keep{$line} = 1;
}
open ENT_ENS, "<$refdir/../gene2ensembl.human.txt" or die $!;
my %entrez;
my $ent_header = <ENT_ENS>;
while (my $line = <ENT_ENS>){
chomp $line;
my @row = split(/\t/, $line);
$entrez{$row[2]}=$row[1];
}
close ENT_ENS;
open ENT_SYM, "<$refdir/../gene_info.human.txt" or die $!;
$ent_header = <ENT_SYM>;
while (my $line = <ENT_SYM>){
chomp $line;
my @row = split(/\t/, $line);
$entrez{$row[2]}=$row[1];
}
close ENT_SYM;
my $file = shift @ARGV; my $file = $opt{input};
my $sname = (split(/\./,(split(/\//,$file))[-1]))[0]; my $sname = (split(/\./,(split(/\//,$file))[-1]))[0];
my $prefix = (split(/\./,$file))[0]; my $prefix = (split(/\./,$file))[0];
my %cyto; my %cyto;
open CYTO, "<$prefix\.cytoband.bed" or die $!; open CYTO, "<$prefix\.cytoband.bed" or die $!;
while (my $line = <CYTO>) { while (my $line = <CYTO>) {
...@@ -40,89 +20,97 @@ while (my $line = <CYTO>) { ...@@ -40,89 +20,97 @@ while (my $line = <CYTO>) {
push @{$cyto{$key}{$1}},$2; push @{$cyto{$key}{$1}},$2;
} }
open OUT, ">$prefix\.cnvcalls.txt" or die $!; open OUT, ">$prefix\.cnv.answer.txt" or die $!;
open OUT2, ">$prefix\.cnv.answer.txt" or die $!; open CNSO, ">$prefix\.answerplot.cns" or die $!;
open OUT3, ">$prefix\.answerplot.cns" or die $!;
open BIO, ">$prefix\.data_cna_discrete.cbioportal.txt" or die $!;
open BIO2, ">$prefix\.data_cna_continuous.cbioportal.txt" or die $!;
print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score"),"\n"; print CNSO join("\t","Chromosome","Start","End","Log2","CN"),"\n";
print OUT3 join("\t","Chromosome","Start","End","Log2","CN"),"\n"; print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score","CytoBand"),"\n";
print OUT2 join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score","CytoBand"),"\n";
print BIO join("\t","Hugo_Symbol","Entrez_Gene_Id",$sname),"\n";
print BIO2 join("\t","Hugo_Symbol","Entrez_Gene_Id",$sname),"\n";
open CNR, "<$prefix\.cnr" or die $!; open CNR, "<$prefix\.cnr" or die $!;
open CNRO, ">$prefix\.answerplot.cnr" or die $!; open CNRO, ">$prefix\.answerplot.cnr" or die $!;
print CNRO join("\t","Gene","Chromosome","Start","End","Log2","Depth","Weight"),"\n"; print CNRO join("\t","Gene","Chromosome","Start","End","Log2","Depth","Weight"),"\n";
my $header = <CNR>; my $header = <CNR>;
chomp($header);
my @colnames = split(/\t/,$header);
while (my $line = <CNR>) { while (my $line = <CNR>) {
chomp($line); chomp($line);
my ($chr,$start,$end,$geneids,$log2,$depth,$weight) = split(/\t/,$line); my @row = split(/\t/,$line);
my $key = $chr.":".$start."-".$end; my %hash = ();
foreach my $j (0..$#row) {
$hash{$colnames[$j]} = $row[$j];
}
my $key = $hash{chromosome}.":".$hash{start}."-".$hash{end};
my $geneids = $hash{gene};
my %genes; my %genes;
if ($geneids =~ m/ensembl_gn/g) { if ($geneids =~ m/ensembl_gn/g) {
my @ids = split(/;|,/,$geneids); my @ids = split(/;|,/,$geneids);
foreach my $gid (@ids) { foreach my $gid (@ids) {
my ($key,$value) = split(/=/,$gid); my ($key,$value) = split(/=/,$gid);
if ($key eq 'ensembl_gn' || $key eq 'identifier') { if ($key eq 'ensembl_gn' || $key eq 'identifier') {
$genes{$value} = 1 if $keep{$value}; $genes{$value} = 1;
} }
} }
} else { } else {
my @ids = split(/,/,$geneids); my @ids = split(/,/,$geneids);
foreach my $gid (@ids) { foreach my $gid (@ids) {
next if ($gid =~ /^SNP:rs\d+$/); next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion/);
my ($gene,@other) = split(/:/,$gid); my ($gene,@other) = split(/:/,$gid);
$genes{$gene} = 1 if $keep{$gene}; $genes{$gene} = 1;
} }
} }
foreach $gene (keys %genes) { foreach $gene (keys %genes) {
print CNRO join("\t",$gene,$chr,$start,$end,$log2,$depth,$weight),"\n"; print CNRO join("\t",$gene,$hash{chromosome},$hash{start},$hash{end},
$hash{log2},$hash{depth},$hash{weight}),"\n";
} }
} }
open IN, "<$file" or die $!; open IN, "<$file" or die $!;
$header = <IN>; $header = <IN>;
chomp($header);
@colnames = split(/\t/,$header);
while (my $line = <IN>) { while (my $line = <IN>) {
chomp($line); chomp($line);
my ($chr,$start,$end,$geneids,$log2,$cn,$depth, my @row = split(/\t/,$line);
$probes,$weight) = split(/\t/,$line); my %hash = ();
next if ($chr eq 'chrX' && $cn == 1); foreach my $j (0..$#row) {
my $key = $chr.":".$start."-".$end; $hash{$colnames[$j]} = $row[$j];
}
next if ($hash{chromosome} eq 'chrX' && $hash{cn} == 1);
my $key = $hash{chromosome}.":".$hash{start}."-".$hash{end};
my $geneids = $hash{gene};
my %genes; my %genes;
if ($geneids =~ m/ensembl_gn/g) { if ($geneids =~ m/ensembl_gn/g) {
my @ids = split(/;|,/,$geneids); my @ids = split(/;|,/,$geneids);
foreach my $gid (@ids) { foreach my $gid (@ids) {
my ($key,$value) = split(/=/,$gid); my ($key,$value) = split(/=/,$gid);
if ($key eq 'ensembl_gn' || $key eq 'identifier') { if ($key eq 'ensembl_gn' || $key eq 'identifier') {
$genes{$value} = 1 if $keep{$value}; $genes{$value} = 1;
} }
} }
} else { } else {
my @ids = split(/,/,$geneids); my @ids = split(/,/,$geneids);
foreach my $gid (@ids) { foreach my $gid (@ids) {
next if ($gid =~ /^SNP:rs\d+$/); next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion/);
my ($gene,@other) = split(/:/,$gid); my ($gene,@other) = split(/:/,$gid);
$genes{$gene} = 1 if $keep{$gene}; $genes{$gene} = 1;
} }
} }
my $len = sprintf("%.1f",($end-$start)/1000); my $len = sprintf("%.1f",($hash{end}-$hash{start})/1000);
print OUT3 join("\t",$chr,$start,$end,$log2,$cn),"\n"; print CNSO join("\t",$hash{chromosome},$hash{start},$hash{end},
next if ($cn == 2) || scalar(keys %genes) < 1; $hash{log2},$hash{cn}),"\n";
next if ($hash{cn} == 2) || scalar(keys %genes) < 1;
my $abtype = 'amplification'; my $abtype = 'amplification';
$abtype = 'loss' if ($cn < 2); $abtype = 'loss' if ($hash{cn} < 2);
$abtype = 'gain' if ($cn > 2 && $cn < 6); $abtype = 'gain' if ($hash{cn} > 2 && $hash{cn} < 6);
foreach $gene (keys %genes) { foreach $gene (keys %genes) {
$cn_cbio = $cn -2;
$cn_cbio = 2 if ($cn > 4);
print BIO join("\t",$gene,$entrez{$gene},$cn_cbio),"\n";
print BIO2 join("\t",$gene,$entrez{$gene},$log2),"\n";
my @cytoband; my @cytoband;
if (@{$cyto{$key}{'p'}}) { if ($cyto{$key}{'p'}) {
@nums = sort {$b <=> $a} @{$cyto{$key}{'p'}}; @nums = sort {$b <=> $a} @{$cyto{$key}{'p'}};
push @cytoband, 'p'.$nums[0],'p'.$nums[-1]; push @cytoband, 'p'.$nums[0],'p'.$nums[-1];
} if (@{$cyto{$key}{'q'}}) { } if ($cyto{$key}{'q'}) {
@nums = sort {$a <=> $b} @{$cyto{$key}{'q'}}; @nums = sort {$a <=> $b} @{$cyto{$key}{'q'}};
push @cytoband, 'q'.$nums[0],'q'.$nums[-1]; push @cytoband, 'q'.$nums[0],'q'.$nums[-1];
} }
...@@ -131,11 +119,8 @@ while (my $line = <IN>) { ...@@ -131,11 +119,8 @@ while (my $line = <IN>) {
}else { }else {
$cband = join("-",$cytoband[0],$cytoband[-1]); $cband = join("-",$cytoband[0],$cytoband[-1]);
} }
print OUT2 join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight,$cband),"\n"; print OUT join("\t",$gene,$hash{chromosome},$hash{start},$hash{end},
print OUT join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight),"\n"; $abtype,$hash{cn},$hash{weight},$cband),"\n";
} }
} }
close IN; close IN;
close OUT;
close BIO;
close BIO2;
...@@ -43,7 +43,7 @@ while (my $line = <VCF>) { ...@@ -43,7 +43,7 @@ while (my $line = <VCF>) {
foreach my $i (0..$#deschead) { foreach my $i (0..$#deschead) {
$gtdata{$deschead[$i]} = $gtinfo[$i]; $gtdata{$deschead[$i]} = $gtinfo[$i];
} }
if ($alt eq '.') { if ($alt eq '.' || $alt eq '<NON_REF>') {
$gtdata{AO} = 0; $gtdata{AO} = 0;
$gtdata{RO} = $gtdata{DP}; $gtdata{RO} = $gtdata{DP};
$gtdata{AD} = join(",", $gtdata{RO},$gtdata{AO}); $gtdata{AD} = join(",", $gtdata{RO},$gtdata{AO});
...@@ -69,7 +69,7 @@ while (my $line = <VCF>) { ...@@ -69,7 +69,7 @@ while (my $line = <VCF>) {
next if ($missingGT == scalar(@gts)); next if ($missingGT == scalar(@gts));
if ($hash{END}) { if ($hash{END}) {
foreach $i ($pos..$hash{END}) { foreach $i ($pos..$hash{END}) {
print OUT join("\t",$chrom,$i,$id,'N','N',$score,$filter,$annot,$newformat,@newgts),"\n"; print OUT join("\t",$chrom,$i,$id,$ref,'.',$score,$filter,$annot,$newformat,@newgts),"\n";
} }
}else { }else {
print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment