diff --git a/variants/annotvcf.sh b/variants/annotvcf.sh index f3d246289978d343b37ec7b4632e16021efb9b36..7bc785b9e608a0595fa454e065f809a4dca5a0b3 100755 --- a/variants/annotvcf.sh +++ b/variants/annotvcf.sh @@ -36,7 +36,7 @@ then bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX -o ${pair_id}.gnomad.vcf.gz ${unionvcf} tabix ${pair_id}.gnomad.vcf.gz bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.gnomad.vcf.gz - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-downstream -no-upstream -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz tabix ${pair_id}.annot.vcf.gz else if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCm38' ]] diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index 2f60eef3d84c6973f4568c831486e6a5f98c5e6b..b7504ad5823d209c647e3ae270f9e71df8f55f62 100755 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -60,11 +60,19 @@ while (my $line = <CNR>) { my ($chr,$start,$end,$geneids,$log2,$depth,$weight) = split(/\t/,$line); my $key = $chr.":".$start."-".$end; my %genes; - my @ids = split(/;|,/,$geneids); - foreach my $gid (@ids) { - my ($key,$value) = split(/=/,$gid); - if ($key eq 'ensembl_gn' || $key eq 'identifier') { - $genes{$value} = 1 if $keep{$value}; + if ($geneids =~ m/ensembl_gn/g) { + my @ids = split(/;|,/,$geneids); + foreach my $gid (@ids) { + my ($key,$value) = split(/=/,$gid); + if ($key eq 'ensembl_gn' || $key eq 'identifier') { + $genes{$value} = 1 if $keep{$value}; + } + } + }else { + my @ids = split(/,/,$geneids); + foreach my $gid (@ids) { + my ($gene,$trxid,$exonnum,$strand) = split(/\|/,$gid); + $genes{$gene} = 1 if $keep{$gene}; } } foreach $gene (keys %genes) { @@ -81,11 +89,19 @@ while (my $line = <IN>) { next if ($chr eq 'chrX' && $cn == 1); my $key = $chr.":".$start."-".$end; my %genes; - my @ids = split(/;|,/,$geneids); - foreach my $gid (@ids) { - my ($key,$value) = split(/=/,$gid); - if ($key eq 'ensembl_gn' || $key eq 'identifier') { - $genes{$value} = 1 if $keep{$value}; + if ($geneids =~ m/ensembl_gn/g) { + my @ids = split(/;|,/,$geneids); + foreach my $gid (@ids) { + my ($key,$value) = split(/=/,$gid); + if ($key eq 'ensembl_gn' || $key eq 'identifier') { + $genes{$value} = 1 if $keep{$value}; + } + } + }else { + my @ids = split(/,/,$geneids); + foreach my $gid (@ids) { + my ($gene,$trxid,$exonnum,$strand) = split(/\|/,$gid); + $genes{$gene} = 1 if $keep{$gene}; } } my $len = sprintf("%.1f",($end-$start)/1000);