diff --git a/alignment/combine_idxstats.pl b/alignment/combine_idxstats.pl new file mode 100644 index 0000000000000000000000000000000000000000..e4bc42ea8bdc0788358c2c324c7f3cc8de8fd261 --- /dev/null +++ b/alignment/combine_idxstats.pl @@ -0,0 +1,61 @@ +#!/usr/bin/perl + +use warnings; +use strict; +use diagnostics; +use Getopt::Long; +#use Cwd; + +#my $cwd = getcwd(); + +my @allFiles = @ARGV; +chomp(@allFiles); +my %data; +my @samples; +open OUT, ">viral.info" or die $!; + +my %virus; +$virus{'NC_001538.1'}="BK polyomavirus"; +$virus{'NC_007605.1'}="Human gammaherpesvirus 4"; +$virus{'NC_009334.1'}="Human herpesvirus 4"; +$virus{'NC_001488.1'}="Human T-lymphotropic virus 2"; +$virus{'D90400.1'}="Human papillomavirus type 58"; +$virus{'M14119.1'}="Human papillomavirus type 11 (HPV-11)"; +$virus{'J04353.1'}="Human papillomavirus type 31 (HPV-31)"; +$virus{'M12732.1'}="Human papillomavirus type 33"; +$virus{'M74117.1'}="Human papillomavirus type 35"; +$virus{'M62849.1'}="Human papillomavirus ORFs"; +$virus{'X74481.1'}="Human papillomavirus type 52"; +$virus{'X77858.1'}="Human papilloma virus type 59"; +$virus{'NC_001355.1'}="Human papillomavirus type 6b"; +$virus{'NC_001357.1'}="Human papillomavirus - 18"; +$virus{'NC_001436.1'}="Human T-lymphotropic virus 1"; +$virus{'NC_001699.1'}="JC polyomavirus"; +$virus{'EF177177.1'}="Human papillomavirus type 56 clone Qv26342"; +$virus{'NC_009333.1'}="Human herpesvirus 8"; +$virus{'EF202167.1'}="Human papillomavirus type 45 isolate Qv31748"; +$virus{'NC_014407.1'}="Human polyomavirus 7"; +$virus{'HM355825.1'}="Merkel cell polyomavirus isolate MCVw156"; +$virus{'NC_001526.4'}="Human papillomavirus type 16"; + + +print OUT join("\t","SampleName","VirusAcc","VirusName","Mapped","Unmapped"),"\n"; +foreach my $file_idx(@allFiles){ + print "File Included: ".$file_idx."\n"; + #my @filePath = split("/", $file_idx); + my $fileName = $file_idx; + $fileName =~ s/\.idxstats\.txt//g; + push @samples, $fileName; + + open INFILE, "<$file_idx" or die $!; + + foreach my $line(<INFILE>){ + chomp $line; + my @data_array = split("\t",$line); + if($data_array[2]!=0 and $data_array[0] ne '*'){ + $data{$data_array[0]}{$fileName} = $data_array[2]."\t".$data_array[3]; + print OUT join("\t",$fileName,$data_array[0],$virus{$data_array[0]},$data_array[2],$data_array[3]),"\n"; + } + } + close INFILE; +} diff --git a/alignment/viralalign.sh b/alignment/viralalign.sh index 4e72b3122c137e82f0048ec0c0bf07cdf81ade0c..d3608a59c349e98f5c382d1a8a9430388116d933 100644 --- a/alignment/viralalign.sh +++ b/alignment/viralalign.sh @@ -33,7 +33,7 @@ then fi reffa=${ref}/idt_virus_reference.fa - +source /etc/profile.d/modules.sh module load bwa/intel/0.7.17 picard/2.10.3 samtools/1.6 samtools view -@ 8 -b -u -F 2 ${bam} |samtools sort -n - >unmapped.bam diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index f8b2eaa1f7412c751c0a289ca4bdca5a71a2ba98..9f598d9f68435b27b3759a5c8adffe361ce3c188 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -126,8 +126,8 @@ then bamlist+="-I ${i} " done gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd` - gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf - vcf-sort ${pair_id}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz + #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf + vcf-sort ${pair_id}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [[ $algo == 'strelka2' ]] then diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index f441cdf44fbf8d912b8ecef9df99e3f373eaa2b5..36a09b0a1b2f85a6bfdc96e1a51c9848cbdaa8ca 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -121,8 +121,8 @@ then module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf - gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf - vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz + #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf + vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then module load bcftools/gcc/1.8 samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14