From 50154ca3d5801ae2913e598d31b5f4a34093a086 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Mon, 30 Mar 2020 14:35:19 -0500 Subject: [PATCH] update viral alignment and mutect filter bug --- alignment/combine_idxstats.pl | 61 +++++++++++++++++++++++++++++++++++ alignment/viralalign.sh | 2 +- variants/germline_vc.sh | 4 +-- variants/somatic_vc.sh | 4 +-- 4 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 alignment/combine_idxstats.pl diff --git a/alignment/combine_idxstats.pl b/alignment/combine_idxstats.pl new file mode 100644 index 0000000..e4bc42e --- /dev/null +++ b/alignment/combine_idxstats.pl @@ -0,0 +1,61 @@ +#!/usr/bin/perl + +use warnings; +use strict; +use diagnostics; +use Getopt::Long; +#use Cwd; + +#my $cwd = getcwd(); + +my @allFiles = @ARGV; +chomp(@allFiles); +my %data; +my @samples; +open OUT, ">viral.info" or die $!; + +my %virus; +$virus{'NC_001538.1'}="BK polyomavirus"; +$virus{'NC_007605.1'}="Human gammaherpesvirus 4"; +$virus{'NC_009334.1'}="Human herpesvirus 4"; +$virus{'NC_001488.1'}="Human T-lymphotropic virus 2"; +$virus{'D90400.1'}="Human papillomavirus type 58"; +$virus{'M14119.1'}="Human papillomavirus type 11 (HPV-11)"; +$virus{'J04353.1'}="Human papillomavirus type 31 (HPV-31)"; +$virus{'M12732.1'}="Human papillomavirus type 33"; +$virus{'M74117.1'}="Human papillomavirus type 35"; +$virus{'M62849.1'}="Human papillomavirus ORFs"; +$virus{'X74481.1'}="Human papillomavirus type 52"; +$virus{'X77858.1'}="Human papilloma virus type 59"; +$virus{'NC_001355.1'}="Human papillomavirus type 6b"; +$virus{'NC_001357.1'}="Human papillomavirus - 18"; +$virus{'NC_001436.1'}="Human T-lymphotropic virus 1"; +$virus{'NC_001699.1'}="JC polyomavirus"; +$virus{'EF177177.1'}="Human papillomavirus type 56 clone Qv26342"; +$virus{'NC_009333.1'}="Human herpesvirus 8"; +$virus{'EF202167.1'}="Human papillomavirus type 45 isolate Qv31748"; +$virus{'NC_014407.1'}="Human polyomavirus 7"; +$virus{'HM355825.1'}="Merkel cell polyomavirus isolate MCVw156"; +$virus{'NC_001526.4'}="Human papillomavirus type 16"; + + +print OUT join("\t","SampleName","VirusAcc","VirusName","Mapped","Unmapped"),"\n"; +foreach my $file_idx(@allFiles){ + print "File Included: ".$file_idx."\n"; + #my @filePath = split("/", $file_idx); + my $fileName = $file_idx; + $fileName =~ s/\.idxstats\.txt//g; + push @samples, $fileName; + + open INFILE, "<$file_idx" or die $!; + + foreach my $line(<INFILE>){ + chomp $line; + my @data_array = split("\t",$line); + if($data_array[2]!=0 and $data_array[0] ne '*'){ + $data{$data_array[0]}{$fileName} = $data_array[2]."\t".$data_array[3]; + print OUT join("\t",$fileName,$data_array[0],$virus{$data_array[0]},$data_array[2],$data_array[3]),"\n"; + } + } + close INFILE; +} diff --git a/alignment/viralalign.sh b/alignment/viralalign.sh index 4e72b31..d3608a5 100644 --- a/alignment/viralalign.sh +++ b/alignment/viralalign.sh @@ -33,7 +33,7 @@ then fi reffa=${ref}/idt_virus_reference.fa - +source /etc/profile.d/modules.sh module load bwa/intel/0.7.17 picard/2.10.3 samtools/1.6 samtools view -@ 8 -b -u -F 2 ${bam} |samtools sort -n - >unmapped.bam diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index f8b2eaa..9f598d9 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -126,8 +126,8 @@ then bamlist+="-I ${i} " done gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd` - gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf - vcf-sort ${pair_id}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz + #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf + vcf-sort ${pair_id}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [[ $algo == 'strelka2' ]] then diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index f441cdf..36a09b0 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -121,8 +121,8 @@ then module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf - gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf - vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz + #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf + vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then module load bcftools/gcc/1.8 samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14 -- GitLab