diff --git a/bamqc.sh b/alignment/bamqc.sh similarity index 100% rename from bamqc.sh rename to alignment/bamqc.sh diff --git a/calculate_depthcov.pl b/alignment/calculate_depthcov.pl similarity index 100% rename from calculate_depthcov.pl rename to alignment/calculate_depthcov.pl diff --git a/covstats.pl b/alignment/covstats.pl similarity index 100% rename from covstats.pl rename to alignment/covstats.pl diff --git a/hisat.sh b/alignment/hisat.sh similarity index 100% rename from hisat.sh rename to alignment/hisat.sh diff --git a/parse_flagstat.pl b/alignment/parse_flagstat.pl similarity index 100% rename from parse_flagstat.pl rename to alignment/parse_flagstat.pl diff --git a/parse_seqqc.pl b/alignment/parse_seqqc.pl similarity index 100% rename from parse_seqqc.pl rename to alignment/parse_seqqc.pl diff --git a/plot_hist_genocov.R b/alignment/plot_hist_genocov.R similarity index 100% rename from plot_hist_genocov.R rename to alignment/plot_hist_genocov.R diff --git a/star_aligner.sh b/alignment/star_aligner.sh similarity index 100% rename from star_aligner.sh rename to alignment/star_aligner.sh diff --git a/starfusion.sh b/alignment/starfusion.sh similarity index 100% rename from starfusion.sh rename to alignment/starfusion.sh diff --git a/subset_bam.pl b/alignment/subset_bam.pl similarity index 100% rename from subset_bam.pl rename to alignment/subset_bam.pl diff --git a/baysic_blc.pl b/baysic_blc.pl deleted file mode 100755 index 62823e670843dbb2b2e5e2f433d54bf205dbf65b..0000000000000000000000000000000000000000 --- a/baysic_blc.pl +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/perl -w -#count_venn.vcf - -use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); -my %opt = (); -my $results = GetOptions (\%opt,'fasta|f=s','help|h','prefix|p=s','complex|c=s','execdir|e=s'); -my @zipvcf = @ARGV; - -unless($opt{fasta}) { - $opt{fasta} = '/project/shared/bicf_workflow_ref/GRCh38/hs38DH.fa'; -} -unless($opt{prefix}) { - $opt{prefix} = 'baysic'; -} -my $total = 0; -open FASTA, "<$opt{fasta}\.fai" or die "unable to find fai index for $opt{fasta}, please index with samtools"; -while (my $line = <FASTA>) { - chomp($line); - my ($chr,$length,$offset,$linelen,$linebytes) = split(/\t/,$line); - $total += $length; -} - -my %ct = (); -my %snpbins = (); -open CTS, ">baysic.cts" or die $!; -print CTS "Estimated sum: ".$total,"\n"; -open VC, "<vcf_compare.out" or die $!; - -while (my $line = <VC>) { - my @key = (); - next if($line =~ m/#/); - if ($line =~ m/^VN\s+(.+)/) { - my ($ct,@flist) = split(/\s+/,$1); - foreach my $dset (@zipvcf) { - if (grep(/$dset/,@flist)) { - push @key, 1; - }else { - push @key, 0; - } - } - $key = join("",@key); - $ct{$key} = $ct; - $total -= $ct; - } -} -my @g = bits(scalar(@zipvcf)); -$ct{$g[0]} = $total; -foreach (@g) { - $ct{$_} = 0 unless ($ct{$_}); - print CTS join("\t",$_,$ct{$_}),"\n"; -} - -system("Rscript $opt{execdir}\/scripts/lca.R -c baysic.cts -s baysic.stats"); - -my @key1 = split(//,$g[-1]); -my @key2; -foreach $i (0..$#key1) { - push @key2, $i if ($key1[$i] > 0); -} -open STATS, "baysic.stats" or die $!; -my @keeppos; -while (my $line = <STATS>) { - chomp($line); - next unless ($line =~ m/postprobs\[(\d+)\]\s+(\S+)/); - my ($index,$prob) = ($1,$2); - next unless ($prob >= 0.8); - my $key = $g[$index-1]; - @key1 = split(//,$key); - @key2 = (); - foreach $i (0..$#key1) { - push @key2, $i if ($key1[$i] > 0); - } - my $subset = 'integrate'.join('_',@key2).'.vcf.gz'; - push @keeppos, $subset if (-e $subset); -} -push @keeppos, $opt{complex} if ($opt{complex}); -system("vcf-concat ".join(" ",@keeppos)." |vcf-sort |bgzip > final.integrated.vcf.gz"); - -sub bits { glob join "", map "{0,1}", 1..$_[0] } diff --git a/baysic_indel.pl b/baysic_indel.pl deleted file mode 100755 index 43c6a433262227abe779360b927774f50a85c2db..0000000000000000000000000000000000000000 --- a/baysic_indel.pl +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/perl -w -#count_venn.vcf - -use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); -my %opt = (); -my $results = GetOptions (\%opt,'fasta|f=s','capture|b=s','help|h','prefix|p=s','execdir|e=s'); - -unless($opt{prefix}) { - $opt{prefix} = 'baysic'; -} -my $total = 0; -if ($opt{fasta}) { - open FASTA, "<$opt{fasta}\.fai" or die "unable to find fai index for $opt{fasta}, please index with samtools"; - while (my $line = <FASTA>) { - chomp($line); - my ($chr,$length,$offset,$linelen,$linebytes) = split(/\t/,$line); - $total += $length; - } -}else { - open BED, "<$opt{capture}" or die "Please provide reference genome (-f) or capture bed(-b)"; - while (my $line = <BED>) { - chomp($line); - my ($chr,$start,$end) = split(/\t/,$line); - $total += $end-$start+1; - } -} - -my %ct = (); -my %snpbins = (); -open CTS, ">baysic.cts" or die $!; -print CTS "Estimated sum: ".$total,"\n"; - -#bedtools multiinter -header -i gatk1.indel.bed platypus1.indel.bed sam1.indel.bed ssvar1.indel.bed > indel_intersect.txt - -open VC, "<indel_intersect.txt" or die $!; -my $header = <VC>; -chomp($header); -my @header = split(/\t/,$header); -@callers = @header[5..$#header]; -my $numcallers = scalar(@callers); -while (my $line = <VC>) { - chomp($line); - my @row = split(/\t/,$line); - my $key = join("",@row[5..$#row]); - $ct{$key} ++; - push @{$chrpos{$key}}, join("\t",@row[0..2]); - $total --; -} - -my @g = bits($numcallers); -$ct{$g[0]} = $total; -foreach (@g) { - $ct{$_} = 0 unless ($ct{$_}); - print CTS join("\t",$_,$ct{$_}),"\n"; -} - -system("Rscript $opt{execdir}\/scripts/lca.R -c baysic.cts -s baysic.stats"); - -my @key1 = split(//,$g[-1]); -my @key2; -foreach $i (0..$#key1) { - push @key2, $i if ($key1[$i] > 0); -} -open STATS, "baysic.stats" or die $!; -open BEDU, ">indels.bed" or die $!; -my @keeppos; -while (my $line = <STATS>) { - chomp($line); - next unless ($line =~ m/postprobs\[(\d+)\]\s+(\S+)/); - my ($index,$prob) = ($1,$2); - next unless ($prob >= 0.8); - my $key = $g[$index-1]; - if ($chrpos{$key}) { - print BEDU join("\n",@{$chrpos{$key}}),"\n"; - } -} -close BEDU; -system("sort -k 1,1 -k 2,2n indels.bed > indels.sort.bed"); - -sub bits { glob join "", map "{0,1}", 1..$_[0] } diff --git a/baysic_snp.pl b/baysic_snp.pl deleted file mode 100755 index 96d32f3a16bdfeaa6d0d5c19e1d4fa2e5334517b..0000000000000000000000000000000000000000 --- a/baysic_snp.pl +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/perl -w -#count_venn.vcf - -use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); -my %opt = (); -my $results = GetOptions (\%opt,'fasta|f=s','capture|b=s','help|h','prefix|p=s','complex|c=s','execdir|e=s'); - -unless($opt{prefix}) { - $opt{prefix} = 'baysic'; -} -my $total = 0; -if ($opt{fasta}) { - open FASTA, "<$opt{fasta}\.fai" or die "unable to find fai index for $opt{fasta}, please index with samtools"; - while (my $line = <FASTA>) { - chomp($line); - my ($chr,$length,$offset,$linelen,$linebytes) = split(/\t/,$line); - $total += $length; - } -}else { - open BED, "<$opt{capture}" or die "Please provide reference genome (-f) or capture bed(-b)"; - while (my $line = <BED>) { - chomp($line); - my ($chr,$start,$end) = split(/\t/,$line); - $total += $end-$start+1; - } -} - -my %ct = (); -my %snpbins = (); -open CTS, ">baysic.cts" or die $!; -print CTS "Estimated sum: ".$total,"\n"; - -#bedtools multiinter -header -i gatk1.snp.vcf.gz platypus1.snp.vcf.gz sam1.snp.vcf.gz ssvar1.snp.vcf.gz > snp_intersect.txt - -open VC, "<snp_intersect.txt" or die $!; -my $header = <VC>; -chomp($header); -my @header = split(/\t/,$header); -@callers = @header[5..$#header]; -my $numcallers = scalar(@callers); -while (my $line = <VC>) { - chomp($line); - my @row = split(/\t/,$line); - my $key = join("",@row[5..$#row]); - $ct{$key} ++; - #push @{$chrpos{$key}}, join('\t',@row[0..2]); - $total --; -} - -my @g = bits($numcallers); -$ct{$g[0]} = $total; -foreach (@g) { - $ct{$_} = 0 unless ($ct{$_}); - print CTS join("\t",$_,$ct{$_}),"\n"; -} - -system("Rscript $opt{execdir}\/scripts/lca.R -c baysic.cts -s baysic.stats"); - -my @key1 = split(//,$g[-1]); -my @key2; -foreach $i (0..$#key1) { - push @key2, $i if ($key1[$i] > 0); -} -open STATS, "baysic.stats" or die $!; -my @keeppos; -while (my $line = <STATS>) { - chomp($line); - next unless ($line =~ m/postprobs\[(\d+)\]\s+(\S+)/); - my ($index,$prob) = ($1,$2); - next unless ($prob >= 0.8); - my $key = $g[$index-1]; - @key1 = split(//,$key); - @key2 = (); - foreach $i (0..$#key1) { - push @key2, $i if ($key1[$i] > 0); - } - my $subset = 'integrate'.join('_',@key2).'.vcf.gz'; - push @keeppos, $subset if (-e $subset); -} -push @keeppos, $opt{complex} if ($opt{complex}); -system("vcf-concat ".join(" ",@keeppos)." |vcf-sort |bgzip > final.integrated.vcf.gz"); - -sub bits { glob join "", map "{0,1}", 1..$_[0] } diff --git a/DEA_htseq.R b/diff_exp/DEA_htseq.R similarity index 100% rename from DEA_htseq.R rename to diff_exp/DEA_htseq.R diff --git a/build_ballgown.R b/diff_exp/build_ballgown.R similarity index 100% rename from build_ballgown.R rename to diff_exp/build_ballgown.R diff --git a/concat_edgeR.pl b/diff_exp/concat_edgeR.pl similarity index 100% rename from concat_edgeR.pl rename to diff_exp/concat_edgeR.pl diff --git a/dea.R b/diff_exp/dea.R similarity index 100% rename from dea.R rename to diff_exp/dea.R diff --git a/gencode_genename.pl b/diff_exp/gencode_genename.pl similarity index 100% rename from gencode_genename.pl rename to diff_exp/gencode_genename.pl diff --git a/concat_cts.pl b/genect_rnaseq/concat_cts.pl similarity index 100% rename from concat_cts.pl rename to genect_rnaseq/concat_cts.pl diff --git a/concat_fpkm.pl b/genect_rnaseq/concat_fpkm.pl similarity index 100% rename from concat_fpkm.pl rename to genect_rnaseq/concat_fpkm.pl diff --git a/check_designfile.pl b/preproc_fastq/check_designfile_rnaseq.pl similarity index 100% rename from check_designfile.pl rename to preproc_fastq/check_designfile_rnaseq.pl diff --git a/parse_trimreport.pl b/preproc_fastq/parse_trimreport.pl similarity index 100% rename from parse_trimreport.pl rename to preproc_fastq/parse_trimreport.pl diff --git a/trimgalore.sh b/preproc_fastq/trimgalore.sh similarity index 100% rename from trimgalore.sh rename to preproc_fastq/trimgalore.sh diff --git a/snp_annotator.pl b/snp_annotator.pl deleted file mode 100755 index 85d5030dd30b3a3124d4ac9c6019214c7809b92b..0000000000000000000000000000000000000000 --- a/snp_annotator.pl +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/perl -w -#snp_annotator.pl - -use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); -#use DBI; -require '/qbrc/home/bcantarel/seqprg/scripts/config.pl'; -my %opt = (); -my $results = GetOptions (\%opt,'help|h','input|i=s','tfam|t=s', - 'assembly|a=s','cancer|c'); - -my $vcf = $opt{input}; -$opt{gver} = 'GRCh38.82' unless ($opt{gver}); - -my $prefix = $vcf; -$prefix =~ s/\.vcf.*$//; - -my $runannot = "$java -Xmx10g -jar $snpeff -no-intergenic -lof -c $snpeff_config $opt{gver} $vcf"; -$runannot .= " | $java -Xmx10g -jar $snpsift annotate $refdir/dbSnp.vcf.gz -"; -$runannot .= " | $java -Xmx10g -jar $snpsift annotate $refdir/clinvar-latest.vcf.gz -"; -$runannot .= " | $java -Xmx10g -jar $snpsift annotate $refdir/ExAC.vcf.gz -"; -$runannot .= " | $java -Xmx10g -jar $snpsift caseControl -tfam $opt{tfam} -" if ($opt{tfam}); -$runannot .= " | $java -Xmx10g -jar $snpsift annotate $refdir/cosmic.vcf.gz -" if ($opt{cancer}); -$runannot .= " | $java -Xmx10g -jar $snpsift dbnsfp -v -db $refdir/dbNSFP.txt.gz -"; -$runannot .= " | $java -Xmx10g -jar $snpsift gwasCat -db $refdir/gwas_catalog.tsv -"; -$runannot .= " | bgzip > $prefix\.annot.vcf.gz\n"; -system($runannot) unless (-e "$prefix\.annot.vcf.gz"); - -# open VCF, "gunzip -c $prefix\.annot.vcf.gz |" or die $!; -# while (my $line = <VCF>) { -# chomp($line); -# if ($line =~ m/^#CHROM/) { -# my @header = split(/\t/,$line); -# ($chrom, $pos,$id,$ref,$alt,$score, -# $filter,$info,$format,@subjacc) = split(/\t/, $line); -# } -# next if ($line =~ m/^#/); -# my ($chrom, $pos,$id,$ref,$alt,$score, -# $filter,$annot,$format,@gts) = split(/\t/, $line); -# next if ($ref =~ m/\./ || $alt =~ m/\./ || $alt=~ m/,X/); -# next unless($chrom =~ m/chr\d+$|chrM|chrX/); -# my %hash = (); -# foreach $a (split(/;/,$annot)) { -# my ($key,$val) = split(/=/,$a); -# $hash{$key} = $val unless ($hash{$key}); -# } -# my %evsannot; -# if ($addannot{$chrom}{$pos}) { -# %evsannot = %{$addannot{$chrom}{$pos}}; -# $hash{MAF} = $addannot{$chrom}{$pos}{MAF}; -# $hash{LOF} =~ s/\(|\)// if ($hash{LOF}); -# $hash{MAF} = ',,' unless ($hash{MAF}); -# } -# my @af = (); -# my @afkeys = ('AMR_AF','AFR_AF','EUR_AF','SAS_AF','EAS_AF','MAF','dbNSFP_ExAC_AF','dbNSFP_ExAC_Adj_AF'); -# foreach $grp (@afkeys) { -# next unless ($hash{$grp}); -# if ($grp eq 'MAF') { -# push @af, split(/,/,$hash{$grp}); -# } -# else { -# if ($hash{$grp} =~ m/,/) { -# $hash{$grp} = 0.31; -# } -# push @af, $hash{$grp}*100; -# } -# } -# @af = sort {$b <=> $a} @af; -# next if ($af[0] && $af[0] > 25); -# #$present = $dbh->selectall_arrayref(qq{select locus from varannot where chrom='$chrom' and pos=$pos}); -# my $locusid; -# if (scalar(@{$present}) > 0) { -# $snp = shift @{$present}; -# $locusid = $snp->[0]; -# }else { -# %ins1 = (chrom=>$chrom,pos=>$pos,id=>$id,refal=>$ref,altal=>$alt, -# eur_af=>$hash{EUR_AF},eas_af=>$hash{EAS_AF},sas_af=>$hash{SAS_AF}, -# afr_af=>$hash{AFR_AF},amr_af=>$hash{AMR_AF}, -# evs_ea_af=>$hash{dbNSFP_ESP6500_EA_AF}, -# evs_aa_af=>$hash{dbNSFP_ESP6500_AA_AF},exac_af=>$hash{dbNSFP_ExAC_AF}, -# exac_adj_af=>$hash{dbNSFP_ExAC_Adj_AF},clinvar_disease=>$hash{CLNDBN}, -# clinvar_sig=>$hash{CLNSIG},clinical_assoc_evs=>$evsannot{CA}, -# gwascat=>$hash{GWASCAT_TRAIT}); -# #$locusid = insert('varannot',\%ins1); -# } -# my %trxdone; -# if ($hash{ANN}) { -# foreach $trx (split(/,/, $hash{ANN})) { -# my ($allele,$effect,$impact,$gene,$geneid,$feature, -# $featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna, -# $cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx); -# my $key = join("|",$effect,$gene,$aa); -# next if ($trxdone{$key}); -# if ($aa) { -# #$effpres = $dbh->selectall_arrayref(qq{select vareffid from vareff where chrom='$chrom' and pos=$pos and effect='$effect' and prot='$aa'}); -# }else { -# #$effpres = $dbh->selectall_arrayref(qq{select vareffid from vareff where chrom='$chrom' and pos=$pos and effect='$effect'}); -# } -# next if (scalar(@{$effpres}) > 0); -# next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ m/TF|regulatory/); -# next if ($impact =~ m/MODIFIER/ && length($info{REF}) + length($info{ALT}) > 2); -# my $exonnum = ''; -# ($exonnum, $exontot) = split(/\//,$rank) if ($rank); -# $trxdone{$key} = 1; -# my %ins2 = (locus=>$locusid,chrom=>$chrom,pos=>$pos,effect=>$effect,impact=>$impact, -# symbol=>$gene,ensembl=>$geneid,prot=>$aa,dna=>$codon,lof=>$hash{LOF}, -# nmd=>$hash{NMD},gerp_rs=>$hash{dbNSFP_GERP___RS},sift=>$hash{dbNSFP_SIFT_pred}, -# polyphen2_hvar=>$hash{dbNSFP_Polyphen2_HVAR_pred},exon_num=>$exonnum, -# phastcons100=>$hash{dbNSFP_phastCons100way_vertebrate}, -# mutationtaster=>$hash{dbNSFP_MutationTaster_pred}, -# interpro=>$hash{dbNSFP_Interpro_domain}); -# #$effid = insert('vareff',\%ins2); -# } -# } -# @posalls = ($ref,split(/,/,$alt)); -# $ac = 0; -# $an = 0; -# @deschead = split(":",$format); -# F1:foreach $subjid (@subjacc) { -# my $allele_info = shift @gts; -# @ainfo = split(/:/, $allele_info); -# my %gtinfo = (); -# foreach $k (0..$#deschead) { -# $gtinfo{$deschead[$k]} = $ainfo[$k]; -# } -# my ($x,$y) = (split(/\//, $gtinfo{GT})); -# next F1 if ($x eq '.' || $y eq '.'); -# if ($gtinfo{AD}) { -# @ar= split(/,/,$gtinfo{AD}); -# $gtinfo{DP} = sum(@ar) unless $gtinfo{DP}; -# $gtinfo{RO} = shift @ar; -# $gtinfo{AO} = join(",",@ar); -# } -# if ($gtinfo{DV} && $gtinfo{DP}) { -# $gtinfo{AO} = $gtinfo{DV}; -# $gtinfo{RO} = $gtinfo{DP} - $gtinfo{DV}; -# }elsif ($gtinfo{DPR}) { -# @ar= split(/,/,$gtinfo{DPR}); -# $gtinfo{DP} = sum(@ar) unless $gtinfo{DP}; -# $gtinfo{RO} = shift @ar; -# $gtinfo{AO} = join(",",@ar); -# } -# if ($gtinfo{VR} || $gtinfo{RR} || $gtinfo{NV} || $gtinfo{NR}) { -# $gtinfo{AO} = $gtinfo{VR} if ($gtinfo{VR}); -# $gtinfo{RO} = $gtinfo{RR} if ($gtinfo{RR}); -# $gtinfo{AO} = $gtinfo{NV} if ($gtinfo{NV}); -# $gtinfo{RO} = $gtinfo{NR} if ($gtinfo{NR}); -# } -# $gtinfo{AO} = 0 if ($gtinfo{DP} == $gtinfo{AO}); -# next F1 if ($gtinfo{DP} && $gtinfo{DP} < 10); -# next F1 if ($gtinfo{GQ} && $gtinfo{GQ} < 5); -# my %ins3 = (sample_name=>$subjid,locus=>$locusid,chrom=>$chrom,pos=>$pos, -# quality=>$score,filter=>$filter,gt=>$gtinfo{GT},dp=>$gtinfo{DP}, -# alt_obs=>$gtinfo{AO},ref_obs=>$gtinfo{RO}); -# #$samsnpid = insert('sample_snp',\%ins3); -# } -# } - diff --git a/lca.R b/vcf/lca.R similarity index 100% rename from lca.R rename to vcf/lca.R diff --git a/parse_svresults.pl b/vcf/parse_svresults.pl similarity index 100% rename from parse_svresults.pl rename to vcf/parse_svresults.pl diff --git a/transvar2bed.pl b/vcf/transvar2bed.pl similarity index 100% rename from transvar2bed.pl rename to vcf/transvar2bed.pl diff --git a/uniform_integrated_vcf.pl b/vcf/uniform_integrated_vcf.pl similarity index 100% rename from uniform_integrated_vcf.pl rename to vcf/uniform_integrated_vcf.pl