From 0cb2a2fd8daa374dc95199d6c7155b3aabe6ea91 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Fri, 9 Nov 2018 08:12:09 -0600 Subject: [PATCH] updates to RNASEQ --- alignment/filter_genefusions.pl | 36 +++++++++++++++++++++++++++++++-- alignment/rnaseqalign.sh | 2 +- alignment/starfusion.sh | 15 +++++++++----- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl index 0786670..73ff3bd 100755 --- a/alignment/filter_genefusions.pl +++ b/alignment/filter_genefusions.pl @@ -20,18 +20,41 @@ while (my $line = <OM>) { $known{$line} = 1; } close OM; -open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1385.genelist.txt" or die $!; +open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!; while (my $line = <OM>) { chomp($line); $keep{$line} = 1; } +my @exonfiles = `ls */*.exons.txt`; +foreach $efile (@exonfiles) { + chomp($efile); + my ($dir,$pair,@etc) = split(/\/|\./,$efile); + open EFILE, "<$efile" or die $!; + my $header = <EFILE>; + while (my $line = <EFILE>) { + my ($leftgene,$rightgene,$lefttrx,$righttrx,$exonsrc, + $exonnum,$exon_chr,$exon_start,$exon_end) = split(/\t/,$line); + if ($exonsrc =~ m/5/) { + push @leftexons, $exonnum; + }else { + push @rightexons, $exonnum; + } + } + $exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]); + $exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]); +} + open OUT, ">$opt{prefix}\.translocations.txt" or die $!; +open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!; open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!; print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint", "RightBreakpoint","LeftStrand","RightStrand","RNAReads", "DNAReads"),"\n"; +print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand", + "RightGene","RightBreakpoint","RightGeneExons","RightStrand","RNAReads","DNAReads"),"\n"; + print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode", "Fusion","DNA_support","RNA_support","Method","Frame"),"\n"; @@ -61,15 +84,24 @@ while (my $line = <FUSION>) { $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount}; my $fname = join("--",$hash{LeftGene},$hash{RightGene}); my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene}); + my $key = join("_",$hash{LeftGene},$left_pos,$hash{RightGene},$right_pos); + my ($leftexon,$rightexon); + if ($exonnuminfo{$key}) { + $leftexon = $exonnuminfo{$key}{leftgene}; + $rightexon = $exonnuminfo{$key}{rightgene}; + } my ($dna_support,$rna_support)=("no") x 2; if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) { $rna_support = "yes"; print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene}, $hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand}, $hash{RightStrand},$hash{SumRNAReads},0),"\n"; + print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand}, + $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand}, + $hash{SumRNAReads},0,$hash{PROT_FUSION_TYPE},$hash{annots}),"\n"; print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion", $dna_support,$rna_support,"STAR Fusion","N/A"),"\n"; - print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion", + print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion", $dna_support,$rna_support,"STAR Fusion","N/A"),"\n"; } } diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh index 8feea1b..4ef4299 100644 --- a/alignment/rnaseqalign.sh +++ b/alignment/rnaseqalign.sh @@ -63,7 +63,7 @@ else else hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/hisat_index/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt fi - if [[ $umi==1 ]] + if [[ $umi == 1 ]] then python ${baseDir}/add_umi_sam.py -s out.sam -o output.bam else diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index 27937e8..619569c 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -51,19 +51,24 @@ then export TMP_HOME=$tmphome index_path=${refgeno}/CTAT_lib_trinity/ trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $SLURM_CPUS_ON_NODE --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion - cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.tsv ${pair_id}.starfusion.txt - cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.coding_effect.txt + #cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.tsv ${pair_id}.starfusion.txt + cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt else module add star/2.5.2b index_path=${refgeno}/CTAT_lib/ STAR-Fusion --genome_lib_dir ${index_path} --min_sum_frags 3 --left_fq ${fq1} --right_fq ${fq2} --output_dir ${pair_id}_star_fusion &> star_fusion.err cp ${pair_id}_star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt fi -if [[ $filter==1 ]] + +module load singularity/2.6.0 +export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images" +cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh + +if [[ $filter == 1 ]] then cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt - #cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint|perl -pe 's/:/\t/g' |awk '{print $1"\t"$2"\t"$4"\t"$5"\tAVG"}' > coords.txt - #java -Xmx1G -jar /project/shared/bicf_workflow_ref/seqprg/oncofuse-1.1.1/Oncofuse.jar -a hg38 coords.txt coord AVG oncofuse.out perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt fi + + -- GitLab