Skip to content
Snippets Groups Projects
Commit 50154ca3 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

update viral alignment and mutect filter bug

parent f431810c
No related merge requests found
#!/usr/bin/perl
use warnings;
use strict;
use diagnostics;
use Getopt::Long;
#use Cwd;
#my $cwd = getcwd();
my @allFiles = @ARGV;
chomp(@allFiles);
my %data;
my @samples;
open OUT, ">viral.info" or die $!;
my %virus;
$virus{'NC_001538.1'}="BK polyomavirus";
$virus{'NC_007605.1'}="Human gammaherpesvirus 4";
$virus{'NC_009334.1'}="Human herpesvirus 4";
$virus{'NC_001488.1'}="Human T-lymphotropic virus 2";
$virus{'D90400.1'}="Human papillomavirus type 58";
$virus{'M14119.1'}="Human papillomavirus type 11 (HPV-11)";
$virus{'J04353.1'}="Human papillomavirus type 31 (HPV-31)";
$virus{'M12732.1'}="Human papillomavirus type 33";
$virus{'M74117.1'}="Human papillomavirus type 35";
$virus{'M62849.1'}="Human papillomavirus ORFs";
$virus{'X74481.1'}="Human papillomavirus type 52";
$virus{'X77858.1'}="Human papilloma virus type 59";
$virus{'NC_001355.1'}="Human papillomavirus type 6b";
$virus{'NC_001357.1'}="Human papillomavirus - 18";
$virus{'NC_001436.1'}="Human T-lymphotropic virus 1";
$virus{'NC_001699.1'}="JC polyomavirus";
$virus{'EF177177.1'}="Human papillomavirus type 56 clone Qv26342";
$virus{'NC_009333.1'}="Human herpesvirus 8";
$virus{'EF202167.1'}="Human papillomavirus type 45 isolate Qv31748";
$virus{'NC_014407.1'}="Human polyomavirus 7";
$virus{'HM355825.1'}="Merkel cell polyomavirus isolate MCVw156";
$virus{'NC_001526.4'}="Human papillomavirus type 16";
print OUT join("\t","SampleName","VirusAcc","VirusName","Mapped","Unmapped"),"\n";
foreach my $file_idx(@allFiles){
print "File Included: ".$file_idx."\n";
#my @filePath = split("/", $file_idx);
my $fileName = $file_idx;
$fileName =~ s/\.idxstats\.txt//g;
push @samples, $fileName;
open INFILE, "<$file_idx" or die $!;
foreach my $line(<INFILE>){
chomp $line;
my @data_array = split("\t",$line);
if($data_array[2]!=0 and $data_array[0] ne '*'){
$data{$data_array[0]}{$fileName} = $data_array[2]."\t".$data_array[3];
print OUT join("\t",$fileName,$data_array[0],$virus{$data_array[0]},$data_array[2],$data_array[3]),"\n";
}
}
close INFILE;
}
...@@ -33,7 +33,7 @@ then ...@@ -33,7 +33,7 @@ then
fi fi
reffa=${ref}/idt_virus_reference.fa reffa=${ref}/idt_virus_reference.fa
source /etc/profile.d/modules.sh
module load bwa/intel/0.7.17 picard/2.10.3 samtools/1.6 module load bwa/intel/0.7.17 picard/2.10.3 samtools/1.6
samtools view -@ 8 -b -u -F 2 ${bam} |samtools sort -n - >unmapped.bam samtools view -@ 8 -b -u -F 2 ${bam} |samtools sort -n - >unmapped.bam
......
...@@ -126,8 +126,8 @@ then ...@@ -126,8 +126,8 @@ then
bamlist+="-I ${i} " bamlist+="-I ${i} "
done done
gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd` gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd`
gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf
vcf-sort ${pair_id}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz vcf-sort ${pair_id}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
elif [[ $algo == 'strelka2' ]] elif [[ $algo == 'strelka2' ]]
then then
......
...@@ -121,8 +121,8 @@ then ...@@ -121,8 +121,8 @@ then
module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14
java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa}
gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf
gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf
vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
elif [ $algo == 'varscan' ] elif [ $algo == 'varscan' ]
then then
module load bcftools/gcc/1.8 samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14 module load bcftools/gcc/1.8 samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment