Commits (11)
......@@ -91,6 +91,7 @@ while (my $line = <FUSION>) {
$leftexon = $exonnuminfo{$key}{leftgene};
$rightexon = $exonnuminfo{$key}{rightgene};
}
$hash{PROT_FUSION_TYPE} = 'in-frame' if ($hash{PROT_FUSION_TYPE} eq 'INFRAME');
my ($dna_support,$rna_support)=("no") x 2;
if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) {
$rna_support = "yes";
......@@ -99,11 +100,11 @@ while (my $line = <FUSION>) {
$hash{RightStrand},$hash{SumRNAReads},0),"\n";
print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand},
$hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand},
$hash{SumRNAReads},0,$hash{PROT_FUSION_TYPE},$hash{annots}),"\n";
$hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$hash{annots}),"\n";
print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion",
$dna_support,$rna_support,"STAR Fusion","N/A"),"\n";
$dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
$dna_support,$rna_support,"STAR Fusion","N/A"),"\n";
$dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n";
}
}
......
......@@ -39,8 +39,7 @@ then
fi
source /etc/profile.d/modules.sh
module load subread/1.6.1 stringtie/1.3.2d-intel
featureCounts -s $stranded -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
featureCounts -s $stranded -M --fraction -J --ignoreDup -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
mkdir ${pair_id}_stringtie
cd ${pair_id}_stringtie
stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ../${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
......
......@@ -15,7 +15,7 @@ while (my $line = <SYM>) {
chomp($line);
my ($chrom,$start,$end,$ensembl,$symbol,$type) = split(/\t/,$line);
$ensembl = (split(/\./,$ensembl))[0];
$names{$symbol} = {ensembl=>$ensembl,type=>$type};
$names{$ensembl} = {symbol=>$symbol,type=>$type};
}
my @files = @ARGV;
......@@ -32,7 +32,8 @@ foreach $file (@files) {
while (my $line = <IN>) {
chomp($line);
my ($ensid,$gene,$chr,$strand,$start,$end,$cov,$fpkm,$tmp) = split(/\t/,$line);
$cts{$gene}{$sample} = $fpkm;
my ($ens,$version) = split(/\./,$ensid);
$cts{$ens}{$sample} = $fpkm;
}
close IN;
}
......@@ -57,7 +58,7 @@ foreach $gene (keys %cts) {
#next unless ($sortct[0] >= 1);
next if ($names{$gene}{type} eq 'rRNA');
next if ($exc{$gene});
print OUT join("\t",$names{$gene}{ensembl},$gene,
print OUT join("\t",$gene,$names{$gene}{symbol},
$names{$gene}{type},@line),"\n";
}
close OUT;
......@@ -36,7 +36,7 @@ then
bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX -o ${pair_id}.gnomad.vcf.gz ${unionvcf}
tabix ${pair_id}.gnomad.vcf.gz
bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.gnomad.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-downstream -no-upstream -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz
tabix ${pair_id}.annot.vcf.gz
else
if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCm38' ]]
......
......@@ -49,7 +49,7 @@ then
elif [[ $capture == '/project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/UTSWV2_2.panelplus.bed' ]]
then
normals="${index_path}/panelofnormals.panel1385V2_2.cnn"
target='panel1385V2-2.cnvkit_'
targets="${index_path}/panel1385V2-2.cnvkit_"
fi
echo "${targets}targets.bed"
......
......@@ -60,11 +60,19 @@ while (my $line = <CNR>) {
my ($chr,$start,$end,$geneids,$log2,$depth,$weight) = split(/\t/,$line);
my $key = $chr.":".$start."-".$end;
my %genes;
my @ids = split(/;|,/,$geneids);
foreach my $gid (@ids) {
my ($key,$value) = split(/=/,$gid);
if ($key eq 'ensembl_gn' || $key eq 'identifier') {
$genes{$value} = 1 if $keep{$value};
if ($geneids =~ m/ensembl_gn/g) {
my @ids = split(/;|,/,$geneids);
foreach my $gid (@ids) {
my ($key,$value) = split(/=/,$gid);
if ($key eq 'ensembl_gn' || $key eq 'identifier') {
$genes{$value} = 1 if $keep{$value};
}
}
}else {
my @ids = split(/,/,$geneids);
foreach my $gid (@ids) {
my ($gene,$trxid,$exonnum,$strand) = split(/\|/,$gid);
$genes{$gene} = 1 if $keep{$gene};
}
}
foreach $gene (keys %genes) {
......@@ -73,7 +81,7 @@ while (my $line = <CNR>) {
}
open IN, "<$file" or die $!;
my $header = <IN>;
$header = <IN>;
while (my $line = <IN>) {
chomp($line);
my ($chr,$start,$end,$geneids,$log2,$cn,$depth,
......@@ -81,11 +89,19 @@ while (my $line = <IN>) {
next if ($chr eq 'chrX' && $cn == 1);
my $key = $chr.":".$start."-".$end;
my %genes;
my @ids = split(/;|,/,$geneids);
foreach my $gid (@ids) {
my ($key,$value) = split(/=/,$gid);
if ($key eq 'ensembl_gn' || $key eq 'identifier') {
$genes{$value} = 1 if $keep{$value};
if ($geneids =~ m/ensembl_gn/g) {
my @ids = split(/;|,/,$geneids);
foreach my $gid (@ids) {
my ($key,$value) = split(/=/,$gid);
if ($key eq 'ensembl_gn' || $key eq 'identifier') {
$genes{$value} = 1 if $keep{$value};
}
}
}else {
my @ids = split(/,/,$geneids);
foreach my $gid (@ids) {
my ($gene,$trxid,$exonnum,$strand) = split(/\|/,$gid);
$genes{$gene} = 1 if $keep{$gene};
}
}
my $len = sprintf("%.1f",($end-$start)/1000);
......
#!/usr/bin/perl -w
#integrate_datasets.pl
#module load vcftools/0.1.14 samtools/1.6 bedtools/2.26.0
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
my %opt = ();
my $results = GetOptions (\%opt,'td|d=s','indel|i=s','sv|s=s','tumor|t=s');
my @files = grep(/vcf.gz/,values %opt);
foreach $file (@files) {
chomp($file);
open IN, "gunzip -c $file |" or die $!;
my $outfile = $file;
$outfile =~ s/vcf.*/pass.vcf/;
my @gtheader;
open OUT, ">$outfile" or die $!;
W1:while (my $line = <IN>) {
chomp($line);
if ($line =~ m/^#/) {
print OUT $line,"\n";
if ($line =~ m/^#CHROM/) {
my @header = split(/\t/,$line);
($chrom, $pos,$id,$ref,$alt,$score,
$filter,$info,$format,@gtheader) = split(/\t/, $line);
unless ($opt{tumor}) {
if (grep(/T_DNA/,@gtheader)) {
my @tsamps = grep(/T_DNA/,@gtheader);
$opt{tumor} = $tsamps[0];
}else {
$opt{tumor} = $gtheader[0];
}
}
}
next;
}
my ($chrom, $pos,$id,$ref,$alt,$score,
$filter,$annot,$format,@gts) = split(/\t/, $line);
next if ($ref =~ m/\./ || $alt =~ m/\./ || $alt=~ m/,X/);
my %hash = ();
foreach $a (split(/;/,$annot)) {
my ($key,$val) = split(/=/,$a);
$hash{$key} = $val unless ($hash{$key});
}
next unless ($hash{ANN});
next unless ($hash{ANN} =~ m/HIGH|MODERATE|LOW/);
my %gtinfo = ();
my @deschead = split(/:/,$format);
F1:foreach my $k (0..$#gtheader) {
my $subjid = $gtheader[$k];
my $allele_info = $gts[$k];
my @ainfo = split(/:/, $allele_info);
my @mutallfreq = ();
foreach my $k (0..$#ainfo) {
$gtinfo{$subjid}{$deschead[$k]} = $ainfo[$k];
$hash{$deschead[$k]} = $ainfo[$k] if ($subjid eq $opt{tumor});
}
$gtinfo{$subjid}{DP} = (split(/,/,$gtinfo{$subjid}{DP}))[0] if ($gtinfo{$subjid}{DP});
next F1 unless ($gtinfo{$subjid}{DP} && $gtinfo{$subjid}{DP} ne '.' && $gtinfo{$subjid}{DP} >= 1);
my @altct = split(/,/,$gtinfo{$subjid}{AD});
my $refct = shift @altct;
@altct2 = split(/,/,$gtinfo{$subjid}{AO});
if (scalar(@altct) ne scalar(@altct2)) {
warn "Inconsistent Allele counts @ $chrom,$pos,$alt,$gtinfo{$subjid}{AD},$gtinfo{$subjid}{AO}\n";
}
my $total = $refct;
foreach my $act (@altct) {
next if ($act eq '.');
$total += $act;
push @mutallfreq, sprintf("%.4f",$act/$gtinfo{$subjid}{DP});
}
$gtinfo{$subjid}{MAF} = \@mutallfreq;
}
next unless ($gtinfo{$opt{tumor}}{DP} && $gtinfo{$opt{tumor}}{DP} ne '.' && $gtinfo{$opt{tumor}}{DP} >= 20);
unless ($gtinfo{$opt{tumor}}{AO} =~ m/\d+/ && $gtinfo{$opt{tumor}}{AD} =~ m/,/) {
warn "Missing Alt:$line\n";
}
@tumormaf = @{$gtinfo{$opt{tumor}}{MAF}};
@tumoraltct = split(/,/,$gtinfo{$opt{tumor}}{AO});
next if ($tumoraltct[0] eq '.');
$hash{AF} = join(",",@tumormaf);
next if ($tumoraltct[0] < 20);
next if ($tumormaf[0] < 0.05);
my $keepforvcf = 0;
foreach $trx (split(/,/,$hash{ANN})) {
my ($allele,$effect,$impact,$gene,$geneid,$feature,
$featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna,
$cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx);
next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i);
next if($effect eq 'sequence_feature');
if ($file eq $opt{sv}) {
next unless ($effect eq 'gene_fusion');
}
$keepforvcf = $gene;
}
next unless $keepforvcf;
my @fail = sort {$a cmp $b} keys %fail;
next if (scalar(@fail) > 0);
my @nannot;
foreach $info (sort {$a cmp $b} keys %hash) {
if (defined $hash{$info}) {
push @nannot, $info."=".$hash{$info};
}else {
push @nannot, $info;
}
}
my $newannot = join(";",@nannot);
print OUT join("\t",$chrom, $pos,$id,$ref,$alt,$score,$filter,$newannot,
$format,@gts),"\n";
}
close IN;
}
......@@ -77,14 +77,15 @@ while (my $line = <VCF>) {
next if ($missingGT == scalar(@gts));
if ($hash{SVTYPE} eq 'DUP:TANDEM') {
print DUP join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
print DUP join("\t",$chrom,$pos,$id,'N','<DUP>',$score,$filter,$annot,$newformat,@newgts),"\n";
}elsif ($hash{SVTYPE} eq 'INS') {
print SV join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
print SV join("\t",$chrom,$pos,$id,'N','<INS>',$score,$filter,$annot,$newformat,@newgts),"\n";
}elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') {
if (abs($hash{SVLEN}) < 50) {
print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}else {
print SV join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
$newalt = "<".$hash{SVTYPE}.">";
print SV join("\t",$chrom,$pos,$id,'N',$newalt,$score,$filter,$annot,$newformat,@newgts),"\n";
}
}
}
......