diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl index 07866703fd2f4a2dd3fe05d1e4e50dde1fca7654..6dca268dba98d92cbc5b992f04add77881515b51 100755 --- a/alignment/filter_genefusions.pl +++ b/alignment/filter_genefusions.pl @@ -20,18 +20,42 @@ while (my $line = <OM>) { $known{$line} = 1; } close OM; -open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1385.genelist.txt" or die $!; +open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!; while (my $line = <OM>) { chomp($line); $keep{$line} = 1; } +my @exonfiles = `ls */*.exons.txt`; +foreach $efile (@exonfiles) { + chomp($efile); + my ($dir,$pair,@etc) = split(/\/|\./,$efile); + open EFILE, "<$efile" or die $!; + my $header = <EFILE>; + while (my $line = <EFILE>) { + my ($leftgene,$rightgene,$lefttrx,$righttrx,$exonsrc, + $exonnum,$exon_chr,$exon_start,$exon_end) = split(/\t/,$line); + if ($exonsrc =~ m/5/) { + push @leftexons, $exonnum; + }else { + push @rightexons, $exonnum; + } + } + $exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]); + $exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]); +} + open OUT, ">$opt{prefix}\.translocations.txt" or die $!; +open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!; open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!; print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint", "RightBreakpoint","LeftStrand","RightStrand","RNAReads", "DNAReads"),"\n"; +print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand", + "RightGene","RightBreakpoint","RightGeneExons","RightStrand", + "RNAReads","DNAReads","FusionType","Annot"),"\n"; + print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode", "Fusion","DNA_support","RNA_support","Method","Frame"),"\n"; @@ -61,15 +85,24 @@ while (my $line = <FUSION>) { $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount}; my $fname = join("--",$hash{LeftGene},$hash{RightGene}); my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene}); + my $key = join("_",$hash{LeftGene},$left_pos,$hash{RightGene},$right_pos); + my ($leftexon,$rightexon); + if ($exonnuminfo{$key}) { + $leftexon = $exonnuminfo{$key}{leftgene}; + $rightexon = $exonnuminfo{$key}{rightgene}; + } my ($dna_support,$rna_support)=("no") x 2; if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) { $rna_support = "yes"; print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene}, $hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand}, $hash{RightStrand},$hash{SumRNAReads},0),"\n"; + print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand}, + $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand}, + $hash{SumRNAReads},0,$hash{PROT_FUSION_TYPE},$hash{annots}),"\n"; print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion", $dna_support,$rna_support,"STAR Fusion","N/A"),"\n"; - print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion", + print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion", $dna_support,$rna_support,"STAR Fusion","N/A"),"\n"; } } diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh index 8eaab5079f48981f9e2fdb6899d0435e47b56bd5..182a738385e8c8bf3159c151a498df76ec89c2d6 100644 --- a/alignment/rnaseqalign.sh +++ b/alignment/rnaseqalign.sh @@ -63,7 +63,7 @@ else else hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/hisat_index/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt fi - if [[ $umi==1 ]] + if [[ $umi == 1 ]] then python ${baseDir}/add_umi_sam.py -s out.sam -o output.bam else diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index 27937e8ba46470a7e89448ff838e1d80ade898b5..619569cc91fa135abc7b01cca45ed00b687c7a00 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -51,19 +51,24 @@ then export TMP_HOME=$tmphome index_path=${refgeno}/CTAT_lib_trinity/ trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $SLURM_CPUS_ON_NODE --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion - cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.tsv ${pair_id}.starfusion.txt - cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.coding_effect.txt + #cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.tsv ${pair_id}.starfusion.txt + cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt else module add star/2.5.2b index_path=${refgeno}/CTAT_lib/ STAR-Fusion --genome_lib_dir ${index_path} --min_sum_frags 3 --left_fq ${fq1} --right_fq ${fq2} --output_dir ${pair_id}_star_fusion &> star_fusion.err cp ${pair_id}_star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt fi -if [[ $filter==1 ]] + +module load singularity/2.6.0 +export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images" +cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh + +if [[ $filter == 1 ]] then cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt - #cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint|perl -pe 's/:/\t/g' |awk '{print $1"\t"$2"\t"$4"\t"$5"\tAVG"}' > coords.txt - #java -Xmx1G -jar /project/shared/bicf_workflow_ref/seqprg/oncofuse-1.1.1/Oncofuse.jar -a hg38 coords.txt coord AVG oncofuse.out perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt fi + +