Skip to content
Snippets Groups Projects
Commit 0002b743 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

adding cbioportal general code

parent 4b2e0039
Branches
Tags
No related merge requests found
#!/usr/bin/perl -w
#concat_cnvs.pl
my @discreet = `ls *cnv_discreet.txt`;
my %cts;
my %sample;
foreach $file (@discreet) {
open IN, "<$file" or die $!;
my ($sample,@ext) = split(/\./,$file);
$sample{$sample} = 1;
while (my $line = <IN>) {
chomp($line);
my ($chr,$s,$e,$ct,$gene) = split(/\t/,$line);
$cts{$gene}{$sample} = $ct;
}
}
my @samples = sort {$a cmp $b} keys %sample;
open OUT, ">discreet.cna.txt" or die $!;
print OUT join("\t",'Hugo_Symbol',@samples),"\n";
foreach my $gene (keys %cts) {
my @line;
foreach my $sid (@samples) {
$cts{$gene}{$sid} = 2 unless ($cts{$gene}{$sid});
push @line, $cts{$gene}{$sid};
}
print OUT join("\t",$gene,@line),"\n";
}
my @continuous = `ls *cnv_continuous.txt`;
my %cts;
my %sample;
foreach $file (@continuous) {
open IN, "<$file" or die $!;
my ($sample,@ext) = split(/\./,$file);
$sample{$sample} = 1;
while (my $line = <IN>) {
chomp($line);
my ($chr,$s,$e,$ct,$gene) = split(/\t/,$line);
$cts{$gene}{$sample} = $ct;
}
}
my @samples = sort {$a cmp $b} keys %sample;
open OUT, ">continuous.cna.txt" or die $!;
print OUT join("\t",'Hugo_Symbol',@samples),"\n";
foreach my $gene (keys %cts) {
my @line;
foreach my $sid (@samples) {
$cts{$gene}{$sid} = 2 unless ($cts{$gene}{$sid});
push @line, $cts{$gene}{$sid};
}
print OUT join("\t",$gene,@line),"\n";
}
#!/usr/bin/perl -w
#patient_sample_uuid.pl
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
my %opt = ();
my $results = GetOptions (\%opt,'prjid|p=s');
my @maffiles = @ARGV;
open MAFOUT, ">variants.maf" or die $!;
my @mincols = ('Hugo_Symbol','Entrez_Gene_Id','Variant_Classification',
'Tumor_Sample_Barcode','HGVSp_Short','Protein_position',
'SWISSPROT');
#($hash{'Hugo_Symbol'},$hash{'Entrez_Gene_Id'},$hash{'Variant_Classification'},$hash{'Tumor_Sample_Barcode'},$hash{'HGVSp_Short'},$hash{'Protein_position'},$hash{'SWISSPROT'});
foreach $maf (@maffiles) {
open MAF, "<$maf" or die $!;
while (my $line = <MAF>) {
chomp($line);
if ($line =~ m/#/) {
print MAFOUT $line,"\n";
}
elsif ($line =~ m/Hugo_Symbol/i) {
@mafcols = split(/\t/,$line);
print MAFOUT join("\t",@mincols),"\n";
}else {
my @row = split(/\t/,$line);
my %hash;
foreach my $i (0..$#mafcols) {
$row[$i] = '' unless $row[$i];
$hash{$mafcols[$i]} = $row[$i];
}
next if ($hash{Variant_Classification} =~ m/Silent|Intron|UTR|Flank|IGR|RNA|Splice_Region/);
next unless ($hash{FILTER} =~ m/PASS/);
$mafids{$hash{Tumor_Sample_Barcode}} = 1;
my @newline;
foreach $i (0..$#mincols) {
push @newline, $hash{$mincols[$i]};
}
print MAFOUT join("\t",@newline),"\n";
}
}
close MAF;
}
close MAFOUT;
open SEQD, ">case_lists/sequenced.txt" or die $!;
print SEQD join("\n","cancer_study_identifier: $opt{prjid}",
"stable_id: $opt{prjid}_sequenced",
"case_list_name: Sequenced",
"case_list_description: Sequenced Samples",
"case_list_ids:".join("\t",keys %mafids)),"\n";
close SEQD;
#!/bin/bash
module load bedtools/2.29.0
ln -s /project/shared/bicf_workflow_ref/human/grch38_cloud/rnaref/genenames.txt .
perl /project/PHG/PHG_Clinical/devel/clinseq_workflows/process_scripts/genect_rnaseq/concat_cts.pl -o ./ */*/*.cts
perl /project/PHG/PHG_Clinical/devel/clinseq_workflows/process_scripts/genect_rnaseq/concat_fpkm.pl -o ./ */*/*.fpkm.txt
cut -f 2,4- countTable.fpkm.txt |perl -pi -e 's/SYMBOL/Hugo_Symbol/g' > fpkm.txt
ls ../*/CNV/*.txt | awk -F '/' '{print "cut -f 1-3,5",$0,"|bedtools intersect -wao -a stdin -b tempus.genes.hg19.bed | cut -f 1-3,4,8 >",$2".cnv_continuous.txt"}' |sh
ls ../*/CNV/*.txt | awk -F '/' '{print "cut -f 1-3,12",$0,"|bedtools intersect -wao -a stdin -b tempus.genes.hg19.bed | cut -f 1-3,4,8 >",$2".cnv_discreet.txt"}' |sh
perl /project/PHG/PHG_Clinical/devel/clinseq_workflows/process_scripts/cbioportal/concat_cnvs.pl
#!/usr/bin/perl -w
#translocation2cbioportal.pl
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
use File::Basename;
my $results= GetOptions (\%opt,'datadir|d=s','gbuilddir|g=s');
open ENT_GENE, "<$opt{datadir}\/gene_info.human.txt" or die $!;
my %entrez;
my %entgene;
my $ent_header = <ENT_GENE>;
while (my $line = <ENT_GENE>){
chomp $line;
my @row = split(/\t/, $line);
$entgene{'chr'.$row[6]}{$row[2]}=$row[1];
}
close ENT_GENE;
open ENT_ENS, "<$opt{gbuilddir}\/genenames.txt" or die $!;
my $gn_header = <ENT_ENS>;
my %ensym;
while (my $line = <ENT_ENS>){
chomp $line;
my @row = split(/\t/, $line);
$entrez{$row[3]}=$entgene{$row[0]}{$row[4]};
}
close ENT_ENS;
open ENT_ENS, "<$opt{datadir}\/gene2ensembl.human.txt" or die $!;
my $ens_header = <ENT_ENS>;
while (my $line = <ENT_ENS>){
chomp $line;
my @row = split(/\t/, $line);
$entrez{$row[2]}=$row[1];
}
close ENT_ENS;
my @fusion_files = @ARGV;
open OUT, ">variants.fusion.txt" or die $!;
print OUT join("\t",'Hugo_Symbol','Entrez_Gene_Id','Center',
'Tumor_Sample_Barcode','Fusion','DNA_support',
'RNA_support','Method','Frame','Fusion_Status');
foreach my $ffile (@fusion_files) {
open FF, "<$ffile" or die $!;
my $head = <FF>;
chomp($head);
my @colnames = split(/\t/,$head);
$fname = basename($ffile);
my $sample = (split(/\./,$fname))[0];
while (my $line = <FF>) {
chomp($line);
my @row = split(/\t/,$line);
my %hash;
foreach my $i (0..$#row) {
$hash{$colnames[$i]} = $row[$i];
}
print OUT join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},
'',$sample,$hash{FusionName},$hash{DNAReads},
$hash{RNAReads},'StarFusion',$hash{FusionType},
uc($hash{SomaticStatus})),"\n"
}
}
#!/usr/bin/perl -w
#translocation2cbioportal.pl
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
use File::Basename;
my $results= GetOptions (\%opt,'datadir|d=s','gbuilddir|g=s');
open ENT_GENE, "<$opt{datadir}\/gene_info.human.txt" or die $!;
my %entrez;
my %entgene;
my $ent_header = <ENT_GENE>;
while (my $line = <ENT_GENE>){
chomp $line;
my @row = split(/\t/, $line);
$entrez{$row[2]}=$row[1];
}
close ENT_GENE;
my @fusion_files = @ARGV;
open OUT, ">variants.fusion.txt" or die $!;
print OUT join("\t",'Hugo_Symbol','Entrez_Gene_Id','Center',
'Tumor_Sample_Barcode','Fusion','DNA_support',
'RNA_support','Method','Frame','Fusion_Status');
foreach my $ffile (@fusion_files) {
open FF, "<$ffile" or die $!;
my $head = <FF>;
chomp($head);
my @colnames = split(/\t/,$head);
$fname = basename($ffile);
my $sample = (split(/\./,$fname))[0];
while (my $line = <FF>) {
chomp($line);
my @row = split(/\t/,$line);
my %hash;
foreach my $i (0..$#row) {
$hash{$colnames[$i]} = $row[$i];
}
next unless $hash{Filter} eq 'PASS';
print OUT join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},
'',$sample,$hash{FusionName},$hash{DNAReads},
$hash{RNAReads},'StarFusion',$hash{FusionType},
uc($hash{SomaticStatus})),"\n" if $entrez{$hash{LeftGene}};
print OUT join("\t",$hash{RightGene},$entrez{$hash{RightGene}},
'',$sample,$hash{FusionName},$hash{DNAReads},
$hash{RNAReads},'StarFusion',$hash{FusionType},
uc($hash{SomaticStatus})),"\n" if $entrez{$hash{RightGene}};
}
}
#!/usr/bin/perl -w
#parse_gencode.pl
my $gtf = shift @ARGV;
my $keepfile = shift @ARGV;
my %keep;
if ($keepfile) {
open KP, "<$keepfile" or die $!;
while (my $line = <KP>) {
chomp($line);
$inc{$line} = 1;
}
}
open OUT, ">genenames.txt" or die $!;
print OUT join("\t",'chrom','start','end','ensembl','symbol','type'),"\n";
open GCODE, "<$gtf" or die $!;
while (my $line = <GCODE>) {
chomp($line);
next if ($line =~ m/^#/);
my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) =
split(/\t/,$line);
next unless ($feature eq 'gene');
$info =~ s/\"//g;
my %hash;
foreach $a (split(/;\s*/,$info)) {
my ($key,$val) = split(/ /,$a);
$hash{$key} = $val;
}
$hash{gene_id} =~ s/\.\d+//;
if ($keepfile) {
next unless $inc{$hash{gene_name}};
}
print OUT join("\t",$chrom,$start,$end,$hash{gene_id},$hash{gene_name},$hash{gene_type}),"\n";
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment