diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index 7c0787c180e36fedd753fc780ac9ad3aa6700eae..d6cfd359c5ef980aee969dfbb637cd86021b6111 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -31,6 +31,7 @@ shift $(($OPTIND -1)) # usage #fi +source /etc/profile.d/modules.sh module load samtools/1.6 fastqc/0.11.5 samtools flagstat ${sbam} > ${pair_id}.flagstat.txt fastqc -f bam ${sbam} diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index ce02965ba5e93c64cffaa4d0ee2b457dc113aefc..c5793fab889db52afe5d17c5a82ae3efc1632507 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -36,6 +36,7 @@ then SLURM_CPUS_ON_NODE=1 fi +source /etc/profile.d/modules.sh module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 picard/2.10.3 baseDir="`dirname \"$0\"`" @@ -56,7 +57,7 @@ then else samtools view -1 -o output.unsort.bam out.sam fi - +which samtools samtools sort -n --threads $SLURM_CPUS_ON_NODE -o output.dups.bam output.unsort.bam java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.dups.bam O=${pair_id}.bam samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.bam diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl new file mode 100755 index 0000000000000000000000000000000000000000..468660cd1c6f2169eb773c26cdb140d8669739be --- /dev/null +++ b/alignment/filter_genefusions.pl @@ -0,0 +1,78 @@ +#!/usr/bin/perl -w +#svvcf2bed.pl + +use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); + +my %opt = (); +my $results = GetOptions (\%opt,'fusion|f=s','prefix|p=s','help|h'); + +my %entrez; +open ENT, "</project/shared/bicf_workflow_ref/gene_info.human.txt" or die $!; +my $headline = <ENT>; +while (my $line = <ENT>) { + chomp($line); + my @row = split(/\t/,$line);{ + $entrez{$row[2]} = $row[1]; + } +} +open OM, "</project/shared/bicf_workflow_ref/GRCh38/utswv2_known_genefusions.txt" or die $!; +while (my $line = <OM>) { + chomp($line); + $known{$line} = 1; +} +close OM; +open OM, "</project/shared/bicf_workflow_ref/GRCh38/panel1385.genelist.txt" or die $!; +while (my $line = <OM>) { + chomp($line); + $keep{$line} = 1; +} + +open OUT, ">$opt{prefix}\.translocations.txt" or die $!; +open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!; + +print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint", + "RightBreakpoint","LeftStrand","RightStrand","RNAReads", + "DNAReads"),"\n"; +print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode", + "Fusion","DNA_support","RNA_support","Method","Frame"),"\n"; + +my $sname = (split(/_DNA_panel1385/,$opt{prefix}))[0]; + +open FUSION, "<$opt{fusion}" or die $!; +my $header = <FUSION>; +chomp($header); +$header =~ s/^#//; +my @hline = split(/\t/,$header); +while (my $line = <FUSION>) { + chomp($line); + my @row = split(/\t/,$line); + my %hash; + foreach my $i (0..$#row) { + $hash{$hline[$i]} = $row[$i]; + } + my ($left_chr,$left_pos,$left_strand) = split(/:/,$hash{LeftBreakpoint}); + my ($right_chr,$right_pos,$right_strand) = split(/:/,$hash{RightBreakpoint}); + $hash{LeftBreakpoint} = join(":",$left_chr,$left_pos); + $hash{RightBreakpoint} = join(":",$right_chr,$right_pos); + $hash{LeftStrand} = $left_strand; + $hash{RightStrand} = $right_strand; + $hash{LeftGene} = (split(/\^/,$hash{LeftGene}))[0]; + $hash{RightGene} = (split(/\^/,$hash{RightGene}))[0]; + next unless ($keep{$hash{LeftGene}} || $keep{$hash{RightGene}}); + $hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount}; + my $fname = join("--",$hash{LeftGene},$hash{RightGene}); + my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene}); + my $ename = join("--",$entrez{$hash{LeftGene}},$entrez{$hash{RightGene}}); + my ($dna_support,$rna_support)=("no","no"); + if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) { + $rna_support = "yes"; + print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene}, + $hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand}, + $hash{RightStrand},$hash{SumRNAReads},0),"\n"; + print OUTIR join("\t",$fname,$ename,"UTSW NGS Clinical Sequencing Lab",$sname,$fname." fusion", + 0,$rna_support,"STAR 2.5.2b","N/A"),"\n"; + } +} + +close OUT; +close OUTIR; diff --git a/alignment/indexbams.sh b/alignment/indexbams.sh index f69697afd1c95a23c975b5ed8fd00eea85eead08..38238fb1b337c8051e225e9e0f410ccd3fe2bd8a 100644 --- a/alignment/indexbams.sh +++ b/alignment/indexbams.sh @@ -24,6 +24,7 @@ then fi baseDir="`dirname \"$0\"`" +source /etc/profile.d/modules.sh module load samtools/1.6 for i in *.bam; do samtools index -@ $SLURM_CPUS_ON_NODE ${i} diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 6758e23676277eb0ea6c1aaf310f097eae3be05c..13822b710ed15d1d87f109a8d77777698e487ea5 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -33,6 +33,7 @@ then fi baseDir="`dirname \"$0\"`" +source /etc/profile.d/modules.sh module load picard/2.10.3 samtools/1.6 if [ $algo == 'sambamba' ] diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh index 81ff2c06c15c1ef8966b8bc06bb433cc878d98a5..de0e0e67af5fd686483e5c6926dd11137637d9e0 100644 --- a/alignment/rnaseqalign.sh +++ b/alignment/rnaseqalign.sh @@ -33,6 +33,7 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi +source /etc/profile.d/modules.sh module load samtools/gcc/1.6 picard/2.10.3 baseDir="`dirname \"$0\"`" if [[ -z $SLURM_CPUS_ON_NODE ]] diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index 842894c7b154ede0b0936e97e21def4dc4dde9e5..d739448cba399a831aec53fac7b75e98405bf399 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -11,13 +11,14 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:a:b:p:h opt +while getopts :r:a:b:p:fh opt do case $opt in r) refgeno=$OPTARG;; a) fq1=$OPTARG;; b) fq2=$OPTARG;; p) pair_id=$OPTARG;; + f) filter=1;; h) usage;; esac done @@ -30,7 +31,13 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then fi index_path=${refgeno}/CTAT_lib/ - +baseDir="`dirname \"$0\"`" +source /etc/profile.d/modules.sh module add python/2.7.x-anaconda star/2.5.2b STAR-Fusion --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --output_dir star_fusion &> star_fusion.err mv star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt + +if [[ $filter==1 ]] +then +perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt +fi diff --git a/diff_exp/geneabundance.sh b/diff_exp/geneabundance.sh index 5385804a67d97c8f7ec3a7f1559afa248c4d8091..bc54feb223fbfdffb328f99f841095557d3e659f 100644 --- a/diff_exp/geneabundance.sh +++ b/diff_exp/geneabundance.sh @@ -33,10 +33,11 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 fi +source /etc/profile.d/modules.sh module load subread/1.5.0-intel stringtie/1.1.2-intel featureCounts -s $stranded -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam} mkdir ${pair_id}_stringtie cd ${pair_id}_stringtie stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ../${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt - \ No newline at end of file + diff --git a/genect_rnaseq/cBioPortal_documents.pl b/genect_rnaseq/cBioPortal_documents.pl new file mode 100644 index 0000000000000000000000000000000000000000..7f1fe0c87c9c6b1059e2ad8f0f094b3ae64972e3 --- /dev/null +++ b/genect_rnaseq/cBioPortal_documents.pl @@ -0,0 +1,95 @@ +#!/usr/bin/perl -w +#cbioPortal_documents.pl + +use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); +use File::Basename; + +my $results= GetOptions (\%opt,'fpkm|f=s','logcpm|l=s','cnv|c=s','prefix|p=s','help|h'); + +open ENT_ENS, "</project/shared/bicf_workflow_ref/gene_info.human.txt" or die $!; +my %entrez; +my $ent_header = <ENT_ENS>; +while (my $line = <ENT_ENS>){ + chomp $line; + my @row = split(/\t/, $line); + $entrez{$row[2]}=$row[1]; +} +close ENT_ENS; +open ENT_ENS, "</project/shared/bicf_workflow_ref/GRCh38/genenames.txt" or die $!; +my $gn_header = <ENT_ENS>; +my %ensym; +while (my $line = <ENT_ENS>){ + chomp $line; + my @row = split(/\t/, $line); + $entrez{$row[3]}=$entrez{$row[4]}; +} +close ENT_ENS; +open ENT_ENS, "</project/shared/bicf_workflow_ref/gene2ensembl.human.txt" or die $!; +my $ens_header = <ENT_ENS>; +while (my $line = <ENT_ENS>){ + chomp $line; + my @row = split(/\t/, $line); + $entrez{$row[2]}=$row[1]; +} +close ENT_ENS; + +if($opt{fpkm}){ + open FPKM, "<$opt{fpkm}" or die $!; + open OUTF, ">$opt{prefix}\.data_expression_median_fpkm.txt" or die $!; + print OUTF join("\t","Entrez_Gene_Id",$opt{prefix}),"\n"; + my %fpkm; + my $fpkm_header = <FPKM>; + while(my $line = <FPKM>){ + chomp $line; + my @row = split(/\t/,$line); + my $ensembl = (split(/\./,$row[0]))[0]; + if ($entrez{$ensembl}) { + $entrezid = $entrez{$ensembl}; + }else { + $entrezid = $entrez{$row[1]}; + } + next unless ($entrezid); + print OUTF join("\t",$entrezid,$row[7]),"\n"; + } + close OUTF; +} + +if($opt{logcpm}){ + open IN, "<$opt{logcpm}" or die $!; + open OUTL, ">$opt{prefix}\.cBioPortal.logCPM.txt" or die $!; + print OUTL join("\t","Entrez_Gene_Id",$opt{prefix}),"\n"; + $fname = basename($opt{logcpm}); + my $sample = (split(/\./,$fname))[0]; + my $command = <IN>; + my $head = <IN>; + chomp($head); + my $total = 0; + while (my $line = <IN>) { + chomp($line); + my @row = split(/\t/,$line); + my $gene = $row[0]; + my $ct = $row[-1]; + next if($gene =~ m/^__/); + $cts{$gene}{$sample} = $ct; + $total += $ct; + } + close IN; + foreach $ens (keys %cts) { + next unless $entrez{$ens}; + unless ($cts{$ens}) { + $cts{$ens} = 0; + } + $cpm = ($cts{$ens}/$total)*1e6; + print OUTL join("\t",$entrez{$ens},sprintf("%.2f",log2($cpm))),"\n"; + } + close OUTL; +} + +sub log2 { + $n = shift @_; + if ($n < 1) { + return 0; + }else { + return(log($n)/log(2)); + } +} diff --git a/preproc_fastq/trimgalore.sh b/preproc_fastq/trimgalore.sh index 2c27f5ac219ca4618e63309e2398b5b3e5758baf..b941ef1b9710831f9f8fbcf2af34949b84e6733c 100644 --- a/preproc_fastq/trimgalore.sh +++ b/preproc_fastq/trimgalore.sh @@ -29,8 +29,9 @@ fi r1base="${fq1%.fastq*}" r2base="${fq2%.fastq*}" - +source /etc/profile.d/modules.sh module load trimgalore/0.4.1 cutadapt/1.9.1 + if [ $fq1 == $fq2 ] then trim_galore -q 25 --illumina --gzip --length 35 ${fq1} diff --git a/variants/annotvcf.sh b/variants/annotvcf.sh index 2d2e121dc82df79500075b0cfe63442be56a56b2..9d7595e930f1a3d91adf2b0841c21fa2abbe7146 100644 --- a/variants/annotvcf.sh +++ b/variants/annotvcf.sh @@ -21,8 +21,16 @@ do done function join_by { local IFS="$1"; shift; echo "$*"; } shift $(($OPTIND -1)) + +source /etc/profile.d/modules.sh module load python/2.7.x-anaconda bedtools/2.26.0 samtools/1.6 snpeff/4.3q -if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh38' ]] + +if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh38/hisat_index' ]] +then + index_path='/project/shared/bicf_workflow_ref/GRCh38' +fi + +if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh38' ]] then tabix ${unionvcf} bcftools annotate -Oz -a ${index_path}/ExAC.vcf.gz -o ${pair_id}.exac.vcf.gz --columns CHROM,POS,AC_Het,AC_Hom,AC_Hemi,AC_Adj,AN_Adj,AC_POPMAX,AN_POPMAX,POPMAX ${unionvcf} diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 3be8a88068fe530ff9f74235df45f14c994be6bf..d4ba922c136de31ea9d0b8440f558cca0d55e312 100644 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -30,6 +30,8 @@ then SLURM_CPUS_ON_NODE=1 fi baseDir="`dirname \"$0\"`" + +source /etc/profile.d/modules.sh module load cnvkit/0.9.0 cnvkit.py coverage ${sbam} /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.cnvkit_targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.cnvkit_antitargets.bed -o ${pair_id}.antitargetcoverage.cnn diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index bff6663236f7629051760760ea8b39356f426ef6..ad18cc1a0c0937f40dfeae3297eee3a955071f65 100644 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -8,38 +8,59 @@ while (my $line = <OM>) { $keep{$line} = 1; } -#my @keep = ("AKT2","ATM","AURKA","BAP1","BCL2L1","BCL6","BIRC3","BRCA2","CCND1","CCNE1","CD79B","CDK8","CDKN2A","CDKN2B","CEBPA","EGFR","ERBB2","FGF10","FGF14","FGF19","FGF3","FGF4","FLT1","FLT3","FOXP1","GATA6","GNA13","GNAS","IKZF1","IL7R","IRS2","KDM4C","KLHL6","KMT2A","KRAS","LYN","MCL1","MITF","MYC","NOTCH2","PIK3CA","PRKAR1A","PRKDC","PTEN","RB1","RICTOR","RUNX1T1","SDHA","TFDP1","TP53","ZNF217","CDK4","MDM4","MYCN","BTG2","BCL2L2","RAF1"); +open ENT_ENS, "</project/shared/bicf_workflow_ref/gene2ensembl.human.txt" or die $!; +my %entrez; +my $ent_header = <ENT_ENS>; +while (my $line = <ENT_ENS>){ + chomp $line; + my @row = split(/\t/, $line); + $entrez{$row[2]}=$row[1]; +} +close ENT_ENS; +open ENT_SYM, "</project/shared/bicf_workflow_ref/gene_info.human.txt" or die $!; +my %entrez; +my $ent_header = <ENT_ENS>; +while (my $line = <ENT_ENS>){ + chomp $line; + my @row = split(/\t/, $line); + $entrez{$row[2]}=$row[1]; +} +close ENT_SYM; -#my %keep = map {$_ => 1} @keep; +my $file = shift @ARGV; +my $prefix = (split(/\./,(split(/\//,$file))[0]))[0]; +open OUT, ">$prefix\.cnvcalls.txt" or die $!; +open BIO, ">$prefix\.data_cna_cbioportal.txt" or die $!; +print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score"),"\n"; +print BIO join("\t","Hugo_Symbol","Entrez_Gene_Id",$prefix),"\n"; -my @cvncalls = @ARGV; -foreach my $file (@ARGV) { - open IN, "<$file" or die $!; - my $out = $file; - $out =~ s/call.cns/cnvcalls.txt/; - open OUT, ">$out" or die $!; - print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score"),"\n"; - my $header = <IN>; - while (my $line = <IN>) { - chomp($line); - my ($chr,$start,$end,$geneids,$log2,$cn,$depth, - $probes,$weight) = split(/\t/,$line); - my %genes; - my @ids = split(/;|,/,$geneids); - foreach my $gid (@ids) { - my ($key,$value) = split(/=/,$gid); - if ($key eq 'ensembl_gn' || $key eq 'identifier') { - $genes{$value} = 1 if $keep{$value}; - } +open IN, "<$file" or die $!; +my $header = <IN>; +while (my $line = <IN>) { + chomp($line); + my ($chr,$start,$end,$geneids,$log2,$cn,$depth, + $probes,$weight) = split(/\t/,$line); + my %genes; + my @ids = split(/;|,/,$geneids); + foreach my $gid (@ids) { + my ($key,$value) = split(/=/,$gid); + if ($key eq 'ensembl_gn' || $key eq 'identifier') { + $genes{$value} = 1 if $keep{$value}; } - my $newgeneids = join(";", keys %genes); - my $len = sprintf("%.1f",($end-$start)/1000); - next if ($cn == 2) || scalar(keys %genes) < 1; - my $abtype = 'amplification'; - $abtype = 'loss' if ($cn < 2); - print OUT join("\t",$newgeneids,$chr,$start,$end,$abtype,$cn,$weight),"\n"; } - close IN; - close OUT; + my $newgeneids = join(";", keys %genes); + my $len = sprintf("%.1f",($end-$start)/1000); + next if ($cn == 2) || scalar(keys %genes) < 1; + my $abtype = 'amplification'; + $abtype = 'loss' if ($cn < 2); + foreach $gene (keys %gene) { + $cn_cbio = $cn -2; + $cn_cbio = 2 if ($cn > 4); + print BIO join("\t",$gene,$entrez{$gene},$cn_cbio),"\n"; + } + print OUT join("\t",$newgeneids,$chr,$start,$end,$abtype,$cn,$weight),"\n"; } +close IN; +close OUT; +close BIO; diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index bcf723d55b5381ac1a926e7d1905d76c8487d8d2..e90ad2e38958aa19bcfa471659efac508c5ecf9c 100644 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -56,6 +56,7 @@ else usage fi +source /etc/profile.d/modules.sh module load gatk/3.7 samtools/1.6 samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 2bd6851349e72477ee02636316999c685ae2de1b..e8a8161fabaec9c4dc5f06a3845ce082cbeac2e3 100644 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -5,17 +5,19 @@ usage() { echo "-h Help documentation for gatkrunner.sh" echo "-r --Path to Reference Genome with the file genome.fa" echo "-p --Prefix for output file name" - echo "-a --Algorithm/Command: gatk, mpileup, speedseq, platypus " + echo "-a --Algorithm/Command: gatk, mpileup, speedseq, platypus" + echo "-t --RNASeq Data" echo "Example: bash hisat.sh -p prefix -r /path/GRCh38 -a gatk" exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:a:b:p:h opt +while getopts :r:a:b:p:th opt do case $opt in r) index_path=$OPTARG;; p) pair_id=$OPTARG;; a) algo=$OPTARG;; + t) rna=1;; h) usage;; esac done @@ -53,6 +55,8 @@ else echo "Missing Fasta File: ${index_path}/genome.fa" usage fi + +source /etc/profile.d/modules.sh module load python/2.7.x-anaconda picard/2.10.3 samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 parallel for i in *.bam; do @@ -102,15 +106,21 @@ then bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz elif [[ $algo == 'strelka2' ]] then + if [[ $rna==1 ]] + then + mode="--rna" + else + mode="--exome" + fi module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14 mkdir manta strelka gvcflist='' for i in *.bam; do gvcflist="$gvcflist --bam ${i}" done - configManta.py $gvcflist --referenceFasta ${reffa} --exome --runDir manta + configManta.py $gvcflist --referenceFasta ${reffa} $mode --runDir manta manta/runWorkflow.py -m local -j 8 - configureStrelkaGermlineWorkflow.py $gvcflist --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka + configureStrelkaGermlineWorkflow.py $gvcflist --referenceFasta ${reffa} $mode --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka strelka/runWorkflow.py -m local -j 8 bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz fi diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 748fc353beeea43060d3ea3732be61ed23b50f6c..a130aab9fa99dd5e671745ed16cec1662c0d6633 100644 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -78,6 +78,9 @@ else fi baseDir="`dirname \"$0\"`" +source /etc/profile.d/modules.sh + + if [ $algo == 'strelka2' ] then module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14 @@ -113,7 +116,7 @@ then else awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" fi - vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.pmutect.vcf.gz + vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz fi if [ $algo == 'varscan' ] diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 8e715ca54c5b96618be17cf41b29c2b4c9266565..376676943ff4e8d6de0a6b61000efedebd70d5f5 100644 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -46,7 +46,7 @@ else usage fi - +source /etc/profile.d/modules.sh module load speedseq/20160506 novoBreak/v1.1.3 delly2/v0.7.7-multi samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 mkdir temp diff --git a/variants/union.sh b/variants/union.sh index 391259bb8fe6e899b021921fbb6150bde4819e71..09bae157dd8bb250e4af85564c6e0dcb6f30445c 100644 --- a/variants/union.sh +++ b/variants/union.sh @@ -21,6 +21,7 @@ function join_by { local IFS="$1"; shift; echo "$*"; } shift $(($OPTIND -1)) baseDir="`dirname \"$0\"`" +source /etc/profile.d/modules.sh module load bedtools/2.26.0 samtools/1.6 HS=*.hotspot.vcf.gz diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index c44815b1c5d24096d3f66cb5f35e5bbaabb3c447..c3377c2164df58a03b75254aad8827b7f47734a9 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -38,6 +38,9 @@ foreach $vcf (@vcffiles) { } my ($chrom, $pos,$id,$ref,$alt,$score, $filter,$annot,$format,@gts) = split(/\t/, $line); + if ($pos eq '90088702') { + warn "allele frequency\n"; + } my %hash = (); foreach $a (split(/;/,$annot)) { my ($key,$val) = split(/=/,$a); @@ -65,14 +68,20 @@ foreach $vcf (@vcffiles) { $missingGT ++; next FG; } - if ($gtdata{AD}){ + if ($gtdata{DP4}) { #varscan uses this + my ($ref_fwd,$ref_rev,$alt_fwd,$alt_rev) = split(',',$gtdata{DP4}); + $gtdata{AO} = $alt_fwd+$alt_rev; + $gtdata{RO} = $ref_fwd+$ref_rev; + $gtdata{DP} = $ref_fwd+$ref_rev+$alt_fwd+$alt_rev; + $gtdata{AD} = join(",",$gtdata{RO},$gtdata{AO}); + }elsif ($gtdata{AD} && $gtdata{AD} =~ m/,/){ ($gtdata{RO},@alts) = split(/,/,$gtdata{AD}); $gtdata{AO} = join(",",@alts); $gtdata{DP} = $gtdata{RO}; foreach (@alts) { $gtdata{DP} += $_; } - } elsif (exists $gtdata{NR} && exists $gtdata{NV}) { + } elsif (exists $gtdata{NR} && exists $gtdata{NV}) { #platypus uses this $gtdata{DP} = $gtdata{NR}; $gtdata{AO} = $gtdata{NV}; $gtdata{RO} = $gtdata{DP} - $gtdata{AO};