...
 
Commits (16)
......@@ -12,13 +12,14 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:x:y:p:uh opt
while getopts :r:x:y:g:p:uh opt
do
case $opt in
r) index_path=$OPTARG;;
x) fq1=$OPTARG;;
y) fq2=$OPTARG;;
u) umi='umi';;
g) read_group=$OPTARG;;
p) pair_id=$OPTARG;;
h) usage;;
esac
......@@ -36,6 +37,11 @@ then
SLURM_CPUS_ON_NODE=1
fi
if [[ -z $read_group ]]
then
read_group=$pair_id
fi
source /etc/profile.d/modules.sh
module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 picard/2.10.3
......@@ -45,9 +51,9 @@ diff $fq1 $fq2 > difffile
if [ -s difffile ]
then
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam
else
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${fq1} > out.sam
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} > out.sam
fi
if [[ $umi == 'umi' ]] && [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh38' ]]
......
......@@ -20,18 +20,42 @@ while (my $line = <OM>) {
$known{$line} = 1;
}
close OM;
open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1385.genelist.txt" or die $!;
open OM, "</project/shared/bicf_workflow_ref/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!;
while (my $line = <OM>) {
chomp($line);
$keep{$line} = 1;
}
my @exonfiles = `ls */*.exons.txt`;
foreach $efile (@exonfiles) {
chomp($efile);
my ($dir,$pair,@etc) = split(/\/|\./,$efile);
open EFILE, "<$efile" or die $!;
my $header = <EFILE>;
while (my $line = <EFILE>) {
my ($leftgene,$rightgene,$lefttrx,$righttrx,$exonsrc,
$exonnum,$exon_chr,$exon_start,$exon_end) = split(/\t/,$line);
if ($exonsrc =~ m/5/) {
push @leftexons, $exonnum;
}else {
push @rightexons, $exonnum;
}
}
$exonnuminfo{$dir}{leftgene} = join("-",$leftexons[0],$leftexons[-1]);
$exonnuminfo{$dir}{rightgene} = join("-",$rightexons[0],$rightexons[-1]);
}
open OUT, ">$opt{prefix}\.translocations.txt" or die $!;
open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!;
open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!;
print OUT join("\t","FusionName","LeftGene","RightGene","LefttBreakpoint",
"RightBreakpoint","LeftStrand","RightStrand","RNAReads",
"DNAReads"),"\n";
print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand",
"RightGene","RightBreakpoint","RightGeneExons","RightStrand",
"RNAReads","DNAReads","FusionType","Annot"),"\n";
print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode",
"Fusion","DNA_support","RNA_support","Method","Frame"),"\n";
......@@ -61,15 +85,24 @@ while (my $line = <FUSION>) {
$hash{SumRNAReads} += $hash{JunctionReadCount}+$hash{SpanningFragCount};
my $fname = join("--",$hash{LeftGene},$hash{RightGene});
my $fname2 = join("--",sort {$a cmp $b} $hash{LeftGene},$hash{RightGene});
my $key = join("_",$hash{LeftGene},$left_pos,$hash{RightGene},$right_pos);
my ($leftexon,$rightexon);
if ($exonnuminfo{$key}) {
$leftexon = $exonnuminfo{$key}{leftgene};
$rightexon = $exonnuminfo{$key}{rightgene};
}
my ($dna_support,$rna_support)=("no") x 2;
if ($known{$fname2} && ($hash{SumRNAReads} >= 3)|| ($hash{SumRNAReads} >= 5)) {
$rna_support = "yes";
print OUT join("\t",$fname,$hash{LeftGene},$hash{RightGene},
$hash{LeftBreakpoint},$hash{RightBreakpoint},$hash{LeftStrand},
$hash{RightStrand},$hash{SumRNAReads},0),"\n";
print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand},
$hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand},
$hash{SumRNAReads},0,$hash{PROT_FUSION_TYPE},$hash{annots}),"\n";
print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion",
$dna_support,$rna_support,"STAR Fusion","N/A"),"\n";
print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion",
$dna_support,$rna_support,"STAR Fusion","N/A"),"\n";
}
}
......
......@@ -63,14 +63,15 @@ else
else
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/hisat_index/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt
fi
if [[ $umi==1 ]]
if [[ $umi == 1 ]]
then
python ${baseDir}/add_umi_sam.py -s out.sam -o output.bam
else
samtools view -1 --threads $SLURM_CPUS_ON_NODE -o output.bam out.sam
fi
samtools sort -@ $SLURM_CPUS_ON_NODE -O BAM -n -o output.nsort.bam output.bam
samtools sort -@ $SLURM_CPUS_ON_NODE -O BAM -o ${pair_id}.bam output.bam
java -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.nsort.bam O=${pair_id}.bam
fi
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.bam
......@@ -11,13 +11,14 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:a:b:p:fh opt
while getopts :r:a:b:p:m:fh opt
do
case $opt in
r) refgeno=$OPTARG;;
a) fq1=$OPTARG;;
b) fq2=$OPTARG;;
p) pair_id=$OPTARG;;
m) method=$OPTARG;;
f) filter=1;;
h) usage;;
esac
......@@ -30,19 +31,44 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
usage
fi
index_path=${refgeno}/CTAT_lib/
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh
module add python/2.7.x-anaconda star/2.5.2b bedtools/2.26.0
STAR-Fusion --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --output_dir star_fusion &> star_fusion.err
mv star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt
if [[ $method == 'trinity' ]]
then
module load trinity/1.4.0
tmphome="/tmp/$USER"
if [[ -z $tmphome ]]
then
mkdir $tmphome
fi
export TMP_HOME=$tmphome
index_path=${refgeno}/CTAT_lib_trinity/
trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $SLURM_CPUS_ON_NODE --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion
#cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.tsv ${pair_id}.starfusion.txt
cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt
else
module add star/2.5.2b
index_path=${refgeno}/CTAT_lib/
STAR-Fusion --genome_lib_dir ${index_path} --min_sum_frags 3 --left_fq ${fq1} --right_fq ${fq2} --output_dir ${pair_id}_star_fusion &> star_fusion.err
cp ${pair_id}_star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt
fi
module load singularity/2.6.0
export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images"
cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh
if [[ $filter==1 ]]
if [[ $filter == 1 ]]
then
cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed
bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt
#cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint|perl -pe 's/:/\t/g' |awk '{print $1"\t"$2"\t"$4"\t"$5"\tAVG"}' > coords.txt
#java -Xmx1G -jar /project/shared/bicf_workflow_ref/seqprg/oncofuse-1.1.1/Oncofuse.jar -a hg38 coords.txt coord AVG oncofuse.out
perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt
cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed
bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt
perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt
fi
......@@ -33,7 +33,7 @@ fi
if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh38' ]]
then
tabix ${unionvcf}
bcftools annotate -Oz -a ${index_path}/gnomad.exomes.r2.0.2.HG38.vcf.gz -o ${pair_id}.gnomad.vcf.gz --columns CHROM,POS,AF,AF_raw,AF_POPMAX,POPMAX ${unionvcf}
bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX -o ${pair_id}.gnomad.vcf.gz ${unionvcf}
tabix ${pair_id}.gnomad.vcf.gz
bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.gnomad.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz
......
......@@ -57,7 +57,7 @@ else
fi
source /etc/profile.d/modules.sh
module load gatk/3.7 samtools/1.6
module load gatk/3.8 samtools/1.6
samtools index -@ $SLURM_CPUS_ON_NODE ${sbam}
if [[ $algo == 'gatkbam_rna' ]]
......@@ -68,16 +68,15 @@ then
java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id}
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.clean.bam
java -Xmx4g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T SplitNCigarReads -R ${reffa} -I ${pair_id}.sort.bam -o ${pair_id}.split.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS
java -Xmx32g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${pair_id}.split.bam -nt $SLURM_CPUS_ON_NODE -nct 1
java -Xmx32g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -I ${pair_id}.split.bam -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam
java -Xmx32g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct $SLURM_CPUS_ON_NODE
java -Xmx32g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct $SLURM_CPUS_ON_NODE
java -Xmx32g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${pair_id}.split.bam -nt 8 -nct 1
java -Xmx16g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -I ${pair_id}.split.bam -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam
java -Xmx16g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct 8
java -Xmx16g -jar $GATK_JAR -L ${index_path}/../gatk_regions.list -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8
elif [[ $algo == 'gatkbam' ]]
then
samtools index -@ $SLURM_CPUS_ON_NODE ${sbam}
java -Xmx32g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${sbam} -nt $SLURM_CPUS_ON_NODE -nct 1
java -Xmx32g -jar $GATK_JAR -I ${sbam} -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam
java -Xmx32g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct $SLURM_CPUS_ON_NODE
java -Xmx32g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct $SLURM_CPUS_ON_NODE
java -Xmx16g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${sbam} -nt 8 -nct 1
java -Xmx16g -jar $GATK_JAR -I ${sbam} -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam
java -Xmx16g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct 8
java -Xmx16g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8
fi
......@@ -79,12 +79,12 @@ then
java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "(CNT[*] >0)" - |bgzip > ${pair_id}.hotspot.vcf.gz
elif [[ $algo == 'speedseq' ]]
then
module load speedseq/20160506
module load speedseq/gcc/0.1.2
speedseq var -t $SLURM_CPUS_ON_NODE -o ssvar ${reffa} *.bam
vcf-annotate -n --fill-type ssvar.vcf.gz| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.ssvar.vcf.gz -
elif [[ $algo == 'gatk' ]]
then
module load gatk/3.7
module load gatk/3.8
gvcflist=''
for i in *.bam; do
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T HaplotypeCaller -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -variant_index_type LINEAR -variant_index_parameter 128000 --emitRefConfidence GVCF -I $i -o ${i}.{}.chr.gatk.g.vcf -nct 2 -L {}"
......
#!/usr/bin/perl
#parse_pindel
my $pair_id = shift @ARGV;
my $vcf = shift @ARGV;
open SI, ">$pair_id.indel.vcf" or die $!;
open SV, ">$pair_id.sv.vcf" or die $!;
open DUP, ">$pair_id.dup.vcf" or die $!;
open VCF, "gunzip -c $vcf|" or die $!;
while (my $line = <VCF>) {
chomp($line);
if ($line =~ m/#/) {
print SI $line,"\n";
print SV $line,"\n";
print DUP $line,"\n";
if ($line =~ m/#CHROM/) {
my @header = split(/\t/,$line);
($chrom, $pos,$id,$ref,$alt,$score,
$filter,$info,$format,@subjacc) = split(/\t/, $line);
}
next;
}
my ($chrom, $pos,$id,$ref,$alt,$score,
$filter,$annot,$format,@gts) = split(/\t/, $line);
my %hash = ();
foreach $a (split(/;/,$annot)) {
my ($key,$val) = split(/=/,$a);
$hash{$key} = $val;
}
my @deschead = split(/:/,$format);
my $newformat = 'GT:DP:AD:AO:RO';
my @newgts = ();
my $missingGT = 0;
my @allele;
FG:foreach my $i (0..$#gts) {
my $sid = $subjacc[$i];
my @gtinfo = split(/:/,$gts[$i]);
my %gtdata;
if ($allele_info eq '.') {
push @newgts, '.:.:.:.:.';
$missingGT ++;
next FG;
}
foreach my $i (0..$#deschead) {
$gtdata{$deschead[$i]} = $gtinfo[$i];
}
if ($gtdata{AD}){
($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
$gtdata{AO} = join(",",@alts);
$gtdata{DP} = $gtdata{RO};
foreach (@alts) {
$gtdata{DP} += $_;
}
} elsif (exists $gtdata{NR} && exists $gtdata{NV}) {
$gtdata{DP} = $gtdata{NR};
$gtdata{AO} = $gtdata{NV};
$gtdata{RO} = $gtdata{DP} - $gtdata{AO};
} elsif (exists $gtdata{AO} && exists $gtdata{RO}) {
$gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO});
$gtdata{DP} = $gtdata{RO};
foreach (split(',',$gtdata{AO})) {
$gtdata{DP} += $_;
}
}
if (($gtdata{DP} && $gtdata{DP} < 10) || ()) {
$missingGT ++;
} if ($gtdata{DP} == 0 || $gtdata{GT} eq './.') {
push @newgts, '.:.:.:.:.';
$missingGT ++;
next FG;
}
push @allele, sprintf("%.4f",$gtdata{AO}/$gtdata{DP});
push @newgts, join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
}
next if ($missingGT == scalar(@gts));
if ($hash{SVTYPE} eq 'DUP:TANDEM') {
print DUP join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}elsif ($hash{SVTYPE} eq 'INS') {
print SV join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') {
if (abs($hash{SVLEN}) < 50) {
print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}else {
print SV join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}
}
}
close VCF;
#!/bin/bash
#svcalling.sh
usage() {
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Path to Reference Genome with the file genome.fa"
echo "-p --Prefix for output file name"
echo "Example: bash svcalling.sh -p prefix -r /path/GRCh38 -a gatk"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:p:h opt
do
case $opt in
r) index_path=$OPTARG;;
p) pair_id=$OPTARG;;
h) usage;;
esac
done
function join_by { local IFS="$1"; shift; echo "$*"; }
shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $index_path ]]; then
usage
fi
if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
if [[ -a "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
dict="${index_path}/genome.dict"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
source /etc/profile.d/modules.sh
genomefiledate=`find ${reffa} -maxdepth 0 -printf "%TY%Tm%Td\n"`
module load samtools/1.6 pindel/0.2.5-intel snpeff/4.3q
touch ${pair_id}.pindel.config
for i in *.bam; do
sname="${i%.bam}"
echo -e "${i}\t400\t${sname}" >> ${pair_id}.pindel.config
done
pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP
pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf
cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz
perl $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz
perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf |bgzip > ${pair_id}.pindel_tandemdup.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf |bgzip > ${pair_id}.pindel_sv.vcf.gz
......@@ -75,7 +75,7 @@ fi
if [ $algo == 'mutect' ]
then
module load parallel python/2.7.x-anaconda gatk/3.5 bcftools/intel/1.3 bedtools/2.25.0 snpeff/4.2 vcftools/0.1.14
module load parallel python/2.7.x-anaconda gatk/3.8 bcftools/intel/1.3 bedtools/2.25.0 snpeff/4.2 vcftools/0.1.14
cut -f 1 /project/shared/bicf_workflow_ref/GRCh38/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar $GATK_JAR -R ${genome_reference} -D ${dbSnp_reference} -T MuTect2 -stand_call_conf 30 -stand_emit_conf 10.0 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor}.final.bam -I:normal ${normal}.final.bam --cosmic ${cosmic} -o ${tumor}.{}.mutect.vcf -L {}"
vcf-concat ${tumor}*.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe 's/TUMOR/'${tumor}'/' | perl -pe 's/NORMAL/'${normal}'/g' |bgzip > ${tumor}_${normal}.mutect.vcf.gz
fi
......
......@@ -113,7 +113,7 @@ fi
if [ $algo == 'mutect2' ]
then
module load parallel gatk/3.7 snpeff/4.3q samtools/1.6 vcftools/0.1.14
module load parallel gatk/3.8 snpeff/4.3q samtools/1.6 vcftools/0.1.14
if [ -z ${tbed} ]
then
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
......@@ -125,13 +125,13 @@ fi
if [ $algo == 'varscan' ]
then
module load samtools/1.6 VarScan/2.4.2 speedseq/20160506 vcftools/0.1.14
sambamba mpileup --tmpdir=./ -t $SLURM_CPUS_ON_NODE ${tumor} --samtools "-C 50 -f ${reffa}" > t.mpileup
sambamba mpileup --tmpdir=./ -t $SLURM_CPUS_ON_NODE ${normal} --samtools "-C 50 -f ${reffa}" > n.mpileup
VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1
VarScan copynumber n.mpileup t.mpileup vscancnv
module load samtools/1.6 VarScan/2.4.2 vcftools/0.1.14
module rm java/oracle/jdk1.7.0_51
module load snpeff/4.3q
samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup
samtools mpileup -C 50 -f ${reffa} $normal > n.mpileup
VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1
VarScan copynumber n.mpileup t.mpileup vscancnv
vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz
fi
......
......@@ -11,14 +11,15 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:a:b:n:k:p:h opt
while getopts :r:p:b:i:e:n:a:h opt
do
case $opt in
r) index_path=$OPTARG;;
p) pair_id=$OPTARG;;
b) sbam=$OPTARG;;
k) keepid=$OPTARG;;
i) tid=$OPTARG;;
n) normal=$OPTARG;;
a) method=$OPTARG;;
h) usage;;
esac
done
......@@ -46,96 +47,65 @@ else
usage
fi
source /etc/profile.d/modules.sh
module load speedseq/20160506 novoBreak/v1.1.3 delly2/v0.7.7-multi samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14
module load samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14
mkdir temp
#if [[ -n ${normal} ]]
#then
#run_novoBreak.sh /cm/shared/apps/novoBreak/novoBreak_distribution_v1.1.3rc ${reffa} ${sbam} ${normal} $SLURM_CPUS_ON_NODE
#perl $baseDir/vcf2bed.sv.pl novoBreak.pass.flt.vcf |sort -T temp -V -k 1,1 -k 2,2n > novobreak.bed
#mv novoBreak.pass.flt.vcf ${pair_id}.novobreak.vcf
#bgzip ${pair_id}.novobreak.vcf
#fi
if [[ -n ${normal} ]]
if [[ $method == 'delly' ]]
then
#RUN DELLY
delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 filter -t BND -o delly_tra.bcf -f somatic -s samples.tsv delly_translocations.bcf
delly2 filter -t DUP -o delly_dup.bcf -f somatic -s samples.tsv delly_duplications.bcf
delly2 filter -t INV -o delly_inv.bcf -f somatic -s samples.tsv delly_inversions.bcf
delly2 filter -t DEL -o delly_del.bcf -f somatic -s samples.tsv delly_deletion.bcf
delly2 filter -t INS -o delly_ins.bcf -f somatic -s samples.tsv delly_insertion.bcf
else
#RUN DELLY
delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam}
delly2 filter -t BND -o delly_tra.bcf -f germline delly_translocations.bcf
delly2 filter -t DUP -o delly_dup.bcf -f germline delly_duplications.bcf
delly2 filter -t INV -o delly_inv.bcf -f germline delly_inversions.bcf
delly2 filter -t DEL -o delly_del.bcf -f germline delly_deletion.bcf
delly2 filter -t INS -o delly_ins.bcf -f germline delly_insertion.bcf
module load delly2/v0.7.7-multi samtools/1.6 snpeff/4.3q
if [[ -n ${normal} ]]
then
#RUN DELLY
echo -e "${normal}\tcontrol"> samples.tsv
echo -e "${tumor}\ttumor" >> samples.tsv
delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 filter -t BND -o delly_tra.bcf -f somatic -s samples.tsv delly_translocations.bcf
delly2 filter -t DUP -o delly_dup.bcf -f somatic -s samples.tsv delly_duplications.bcf
delly2 filter -t INV -o delly_inv.bcf -f somatic -s samples.tsv delly_inversions.bcf
delly2 filter -t DEL -o delly_del.bcf -f somatic -s samples.tsv delly_deletion.bcf
delly2 filter -t INS -o delly_ins.bcf -f somatic -s samples.tsv delly_insertion.bcf
else
#RUN DELLY
delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t DEL -o delly_deletion.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t INS -o delly_insertion.bcf -q 30 -g ${reffa} ${sbam}
delly2 filter -t BND -o delly_tra.bcf -f germline delly_translocations.bcf
delly2 filter -t DUP -o delly_dup.bcf -f germline delly_duplications.bcf
delly2 filter -t INV -o delly_inv.bcf -f germline delly_inversions.bcf
delly2 filter -t DEL -o delly_del.bcf -f germline delly_deletion.bcf
delly2 filter -t INS -o delly_ins.bcf -f germline delly_insertion.bcf
fi
#MERGE DELLY AND MAKE BED
bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf
bgzip delly.vcf
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 delly.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.delly.vcf.gz
fi
#MERGE DELLY AND MAKE BED
bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf
perl $baseDir/vcf2bed.sv.pl delly.vcf | sort -V -k 1,1 -k 2,2n > delly.bed
bgzip delly.vcf
if [[ -n ${normal} ]]
if [[ $method == 'lumpy' ]]
then
tabix delly.vcf.gz
bcftools view -O z -o ${pair_id}.delly.vcf.gz -s ${keepid} delly.vcf.gz
else
mv delly.vcf.gz ${pair_id}.delly.vcf.gz
fi
tabix ${pair_id}.delly.vcf.gz
#MAKE FILES FOR LUMPY
samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${sbam}
samtools view -h namesort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam
gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o splitters.bam -
gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o discordants.bam -
#RUN LUMPY
if [[ -n ${normal} ]]
then
samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${normal}
#MAKE FILES FOR LUMPY
samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${sbam}
samtools view -h namesort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam
gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o normal.splitters.bam -
gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o normal.discordants.bam -
speedseq sv -t $SLURM_CPUS_ON_NODE -o sssv -R ${reffa} -B ${normal},${sbam} -D normal.discordants.bam,discordants.bam -S normal.splitters.bam,splitters.bam -x ${index_path}/exclude_alt.bed
java -jar $SNPEFF_HOME/SnpSift.jar filter -n "GEN[0].SU > 1" sssv.sv.vcf.gz |bgzip > tumor.sssv.sv.vcf.gz
tabix tumor.sssv.sv.vcf.gz
bcftools view -O z -o ${pair_id}.sssv.sv.vcf.gz -s ${keepid} tumor.sssv.sv.vcf.gz
else
speedseq sv -t $SLURM_CPUS_ON_NODE -o ${pair_id}.sssv -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed
gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o splitters.bam -
gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o discordants.bam -
#RUN LUMPY
if [[ -n ${normal} ]]
then
samtools sort -@ $SLURM_CPUS_ON_NODE -n -o namesort.bam ${normal}
samtools view -h namesort.bam | samblaster -M -a --excludeDups --addMateTags --maxSplitCount 2 --minNonOverlap 20 -d discordants.sam -s splitters.sam > temp.sam
gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o normal.splitters.bam -
gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o normal.discordants.bam -
speedseq sv -t $SLURM_CPUS_ON_NODE -o lumpy -R ${reffa} -B ${normal},${sbam} -D normal.discordants.bam,discordants.bam -S normal.splitters.bam,splitters.bam -x ${index_path}/exclude_alt.bed
else
speedseq sv -t $SLURM_CPUS_ON_NODE -o lumpy -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed
fi
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 lumpy.sv.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].DV >= 20 )" | bgzip > ${pair_id}.lumpy.vcf.gz
fi
java -jar $SNPEFF_HOME/SnpSift.jar filter "GEN[0].SU > 10" ${pair_id}.sssv.sv.vcf.gz > lumpy.vcf
perl $baseDir/vcf2bed.sv.pl lumpy.vcf > lumpy.bed
#COMPARE DELLY & LUMPY
#if [[ -n ${normal} ]]
#then
#bedtools multiinter -cluster -header -names novobreak delly lumpy -i novobreak.bed delly.bed lumpy.bed > sv.intersect.bed
#zcat ${pair_id}.novobreak.vcf.gz | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz
#grep novobreak sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v start | bedtools intersect -header -b stdin -a ${pair_id}.novobreak.vcf.gz | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz
#else
#fi
bedtools multiinter -cluster -header -names delly lumpy -i delly.bed lumpy.bed > sv.intersect.bed
grep delly sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v 'start' |grep -v 'novobreak' | bedtools intersect -header -b stdin -a ${pair_id}.delly.vcf.gz |bgzip > svt2.vcf.gz
grep lumpy sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v 'start' |grep -v 'delly' |grep -v 'novobreak' | bedtools intersect -header -b stdin -a ${pair_id}.sssv.sv.vcf.gz |bgzip > svt3.vcf.gz
vcf-concat svt*.vcf.gz | vcf-sort -t temp > ${pair_id}.sv.vcf
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf > ${pair_id}.sv.annot.vcf
perl $baseDir/svannot.pl -i ${pair_id}.sv.annot.vcf
bgzip ${pair_id}.sv.annot.vcf
tabix ${pair_id}.sv.annot.vcf.gz
......@@ -118,7 +118,7 @@ foreach $vcf (@vcffiles) {
push @gtdesc, join(":",$id,$afinfo{$id});
push @newgts, $newgts{$id};
}
$lines{$chrom}{$pos}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc];
$lines{$chrom}{$pos}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc];
}
close VCF;
}
......@@ -128,37 +128,39 @@ if (grep(/mutect/,@vcffiles)) {
}
F1:foreach $chr (sort {$a cmp $b} keys %lines) {
F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) {
my @callset;
my %csets;
F3:foreach $caller (sort {$a cmp $b} keys %{$lines{$chr}{$pos}}) {
my ($chrom, $pos,$id,$ref,$alt,$score,$filter,$annot,
$format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$caller}};
@gtdesc = @{$gtdescref};
foreach $gtd (@gtdesc) {
my ($id,$dp,$maf) = split(/:/,$gtd);
push @{$csets{$id}}, [$caller,$dp,$maf];
}
push @callset, join("|",$caller,$alt,@gtdesc);
}
my $consistent = 1;
foreach $id (keys %csets) {
my @calls = @{$csets{$id}};
my @calls = sort {$a[2] <=> $b[2]} @calls;
$consistent = 0 if ($calls[0][2] < 0.25 && $calls[-1][2] - $calls[0][2] > 0.10 && $calls[-1][2]/($calls[0][2]+0.001) > 3);
}
F3:foreach $caller (@callers) {
if ($lines{$chr}{$pos}{$caller}) {
F4:foreach $alt (sort {$a <=> $b} keys %{$lines{$chr}{$pos}}) {
my @callset;
my %csets;
F3:foreach $caller (sort {$a cmp $b} keys %{$lines{$chr}{$pos}{$alt}}) {
my ($chrom, $pos,$id,$ref,$alt,$score,$filter,$annot,
$format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$caller}};
@gts = @{$gtsref};
$format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$alt}{$caller}};
@gtdesc = @{$gtdescref};
$annot = $annot.";CallSet=".join(",",@callset);
unless ($consistent) {
$annot = $annot.";CallSetInconsistent=1";
foreach $gtd (@gtdesc) {
my ($id,$dp,$maf) = split(/:/,$gtd);
push @{$csets{$id}}, [$caller,$dp,$maf];
}
push @callset, join("|",$caller,$alt,@gtdesc);
}
my $consistent = 1;
foreach $id (keys %csets) {
my @calls = @{$csets{$id}};
my @calls = sort {$a[2] <=> $b[2]} @calls;
$consistent = 0 if ($calls[0][2] < 0.25 && $calls[-1][2] - $calls[0][2] > 0.10 && $calls[-1][2]/($calls[0][2]+0.001) > 3);
}
F3:foreach $caller (@callers) {
if ($lines{$chr}{$pos}{$alt}{$caller}) {
my ($chrom, $pos,$id,$ref,$alt,$score,$filter,$annot,
$format,$gtsref,$gtdescref) = @{$lines{$chr}{$pos}{$alt}{$caller}};
@gts = @{$gtsref};
@gtdesc = @{$gtdescref};
$annot = $annot.";CallSet=".join(",",@callset);
unless ($consistent) {
$annot = $annot.";CallSetInconsistent=1";
}
print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,
$filter,$annot,$format,@gts),"\n";
last F3;
}
print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,
$filter,$annot,$format,@gts),"\n";
last F3;
}
}
}
......