From 4b6b270d528e317aa6bf3f200e711c56f58fe0df Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Wed, 25 Jul 2018 15:20:42 -0500 Subject: [PATCH] adding annot for cnv/starfusion --- alignment/bamqc.sh | 2 +- alignment/starfusion.sh | 5 +++++ variants/cnvkit.sh | 3 ++- variants/filter_cnvkit.pl | 15 +++++++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index 2e9f460..bc7894d 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -53,6 +53,6 @@ if [[ $nuctype == 'dna' ]]; then samtools view -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt grep ^all ${pair_id}.covhist.txt > ${pair_id}.genomecov.txt - samtools view ${pair_id}.dedup.bam | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt + samtools view ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt fi diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index d739448..070e8d3 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -37,7 +37,12 @@ module add python/2.7.x-anaconda star/2.5.2b STAR-Fusion --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --output_dir star_fusion &> star_fusion.err mv star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt + if [[ $filter==1 ]] then +cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed +bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt +#cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint|perl -pe 's/:/\t/g' |awk '{print $1"\t"$2"\t"$4"\t"$5"\tAVG"}' > coords.txt +#java -Xmx1G -jar /project/shared/bicf_workflow_ref/seqprg/oncofuse-1.1.1/Oncofuse.jar -a hg38 coords.txt coord AVG oncofuse.out perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt fi diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 4b2513c..84903ed 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -55,11 +55,12 @@ echo "${targets}targets.bed" echo "${targets}antitargets.bed" source /etc/profile.d/modules.sh -module load cnvkit/0.9.0 +module load cnvkit/0.9.0 bedtools/2.26.0 cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns cnvkit.py call ${pair_id}.cns -o ${pair_id}.call.cns cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf +cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed perl $baseDir/filter_cnvkit.pl *.call.cns diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index ddcf170..cf905ef 100755 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -28,11 +28,22 @@ close ENT_SYM; my $file = shift @ARGV; my $prefix = (split(/\./,(split(/\//,$file))[0]))[0]; +my %cyto; +open CYTO, "<$prefix\.cytoband.bed" or die $!; +while (my $line = <CYTO>) { + chomp($line); + my ($chrom,$start,$end,$band) = split(/\t/,$line); + my $key = $chrom.":".$start."-".$end; + push @{$cyto{$key}}, $band; +} open OUT, ">$prefix\.cnvcalls.txt" or die $!; +open OUT2, ">$prefix\.cnv.answer.txt" or die $!; + open BIO, ">$prefix\.data_cna_discrete.cbioportal.txt" or die $!; open BIO2, ">$prefix\.data_cna_continuous.cbioportal.txt" or die $!; print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score"),"\n"; +print OUT2 join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score","CytoBand"),"\n"; print BIO join("\t","Hugo_Symbol","Entrez_Gene_Id",$prefix),"\n"; print BIO2 join("\t","Hugo_Symbol","Entrez_Gene_Id",$prefix),"\n"; @@ -42,6 +53,8 @@ while (my $line = <IN>) { chomp($line); my ($chr,$start,$end,$geneids,$log2,$cn,$depth, $probes,$weight) = split(/\t/,$line); + next if ($chr eq 'chrX' && $cn == 1); + my $key = $chr.":".$start."-".$end; my %genes; my @ids = split(/;|,/,$geneids); foreach my $gid (@ids) { @@ -54,11 +67,13 @@ while (my $line = <IN>) { next if ($cn == 2) || scalar(keys %genes) < 1; my $abtype = 'amplification'; $abtype = 'loss' if ($cn < 2); + $abtype = 'gain' if ($cn > 2 && $cn < 6); foreach $gene (keys %genes) { $cn_cbio = $cn -2; $cn_cbio = 2 if ($cn > 4); print BIO join("\t",$gene,$entrez{$gene},$cn_cbio),"\n"; print BIO2 join("\t",$gene,$entrez{$gene},$log2),"\n"; + print OUT2 join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight,join(",",@{$cyto{$key}})),"\n"; print OUT join("\t",$gene,$chr,$start,$end,$abtype,$cn,$weight),"\n"; } } -- GitLab