Skip to content
Snippets Groups Projects
Commit a39f0a5e authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

Merge branch 'master' of git.biohpc.swmed.edu:BICF/Astrocyte/process_scripts

parents 27d472fe 77c74abb
Branches
Tags
No related merge requests found
...@@ -30,7 +30,7 @@ baseDir="`dirname \"$0\"`" ...@@ -30,7 +30,7 @@ baseDir="`dirname \"$0\"`"
if [[ -z $index_path ]] if [[ -z $index_path ]]
then then
index_path='/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj' index_path='/project/shared/bicf_workflow_ref/human/GRCh38'
fi fi
# Check for mandatory options # Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]] if [[ -z $pair_id ]] || [[ -z $sbam ]]
...@@ -51,13 +51,18 @@ echo "${targets}antitargets.bed" ...@@ -51,13 +51,18 @@ echo "${targets}antitargets.bed"
echo "${normals}" echo "${normals}"
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
module load cnvkit/0.9.5 bedtools/2.26.0 module load cnvkit/0.9.5 bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8
unset DISPLAY unset DISPLAY
#samtools index ${sbam}
#bcftools mpileup --threads 10 -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} -t ${index_path}/NGSCheckMate.bed ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf
cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn
cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn
cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr
cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
cnvkit.py call ${pair_id}.cns -o ${pair_id}.call.cns cnvkit.py call ${pair_id}.cns -o ${pair_id}.call.cns
#cnvkit.py call ${pair_id}.cns -v common_variants.vcf -o ${pair_id}.ballelecall.cns
cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf
cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed
perl $baseDir/filter_cnvkit.pl *.call.cns perl $baseDir/filter_cnvkit.pl ${pair_id}.call.cns
#!/usr/bin/perl -w
#integrate_datasets.pl
#module load vcftools/0.1.14 samtools/1.6 bedtools/2.26.0
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
my %opt = ();
my $results = GetOptions (\%opt,'td|d=s','indel|i=s','sv|s=s','tumor|t=s');
my @files = grep(/vcf.gz/,values %opt);
foreach $file (@files) {
chomp($file);
open IN, "gunzip -c $file |" or die $!;
my $outfile = $file;
$outfile =~ s/vcf.*/pass.vcf/;
my @gtheader;
open OUT, ">$outfile" or die $!;
W1:while (my $line = <IN>) {
chomp($line);
my $format = 'GT:MAF:AO';
if ($line =~ m/^#/) {
if ($line =~ m/^#CHROM/) {
print OUT "##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observation count\">\n";
print OUT "##FORMAT=<ID=MAF,Number=1,Type=Integer,Description=\"Mutation Allele Frequency\">\n";
print OUT "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n";
my @header = split(/\t/,$line);
($chrom, $pos,$id,$ref,$alt,$score,
$filter,$info) = split(/\t/, $line);
print OUT join("\t",$chrom, $pos,$id,$ref,$alt,$score,
$filter,$info,'FORMAT',$opt{tumor}),"\n";
}else {
print OUT $line,"\n";
}
next;
}
my ($chrom, $pos,$id,$ref,$alt,$score,
$filter,$annot) = split(/\t/, $line);
next if ($ref =~ m/\./ || $alt =~ m/\./ || $alt=~ m/,X/);
my %hash = ();
foreach $a (split(/;/,$annot)) {
my ($key,$val) = split(/=/,$a);
$hash{$key} = $val unless ($hash{$key});
}
next unless ($hash{ANN});
next unless ($hash{ANN} =~ m/HIGH|MODERATE|LOW/);
my %gtinfo;
foreach (split(/,/,$hash{DP2})) {
$gtinfo{AO} += $_;
}
$gtinfo{MAF} = $hash{VAF};
$hash{AF} = $hash{VAF};
next if $gtinfo{AO} < 10;
next if ($hash{VAF} < 0.01);
$gt = '0/0';
$gt = '0/1' if ($hash{VAF} > 0.3);
$gt = '1/1' if ($hash{VAF} > 0.8);
my $gtinfo = join(":",$gt,$gtinfo{MAF},$gtinfo{AO});
my @nannot;
foreach $info (sort {$a cmp $b} keys %hash) {
if (defined $hash{$info}) {
push @nannot, $info."=".$hash{$info};
}else {
push @nannot, $info;
}
}
my $newannot = join(";",@nannot);
print OUT join("\t",$chrom, $pos,$id,$ref,$alt,$score,$filter,$newannot,
$format,$gtinfo),"\n";
}
close IN;
}
...@@ -6,7 +6,6 @@ use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); ...@@ -6,7 +6,6 @@ use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
my %opt = (); my %opt = ();
my $results = GetOptions (\%opt,'td|d=s','indel|i=s','sv|s=s','tumor|t=s'); my $results = GetOptions (\%opt,'td|d=s','indel|i=s','sv|s=s','tumor|t=s');
my @files = grep(/vcf.gz/,values %opt); my @files = grep(/vcf.gz/,values %opt);
foreach $file (@files) { foreach $file (@files) {
chomp($file); chomp($file);
...@@ -18,8 +17,8 @@ foreach $file (@files) { ...@@ -18,8 +17,8 @@ foreach $file (@files) {
W1:while (my $line = <IN>) { W1:while (my $line = <IN>) {
chomp($line); chomp($line);
if ($line =~ m/^#/) { if ($line =~ m/^#/) {
print OUT $line,"\n";
if ($line =~ m/^#CHROM/) { if ($line =~ m/^#CHROM/) {
print OUT "##INFO=<ID=AF,Number=A,Type=Integer,Description=\"Alternate allele observation frequency\">\n";
my @header = split(/\t/,$line); my @header = split(/\t/,$line);
($chrom, $pos,$id,$ref,$alt,$score, ($chrom, $pos,$id,$ref,$alt,$score,
$filter,$info,$format,@gtheader) = split(/\t/, $line); $filter,$info,$format,@gtheader) = split(/\t/, $line);
...@@ -32,6 +31,7 @@ foreach $file (@files) { ...@@ -32,6 +31,7 @@ foreach $file (@files) {
} }
} }
} }
print OUT $line,"\n";
next; next;
} }
my ($chrom, $pos,$id,$ref,$alt,$score, my ($chrom, $pos,$id,$ref,$alt,$score,
...@@ -53,7 +53,7 @@ foreach $file (@files) { ...@@ -53,7 +53,7 @@ foreach $file (@files) {
my @mutallfreq = (); my @mutallfreq = ();
foreach my $k (0..$#ainfo) { foreach my $k (0..$#ainfo) {
$gtinfo{$subjid}{$deschead[$k]} = $ainfo[$k]; $gtinfo{$subjid}{$deschead[$k]} = $ainfo[$k];
$hash{$deschead[$k]} = $ainfo[$k] if ($subjid eq $opt{tumor}); #$hash{$deschead[$k]} = $ainfo[$k] if ($subjid eq $opt{tumor});
} }
$gtinfo{$subjid}{DP} = (split(/,/,$gtinfo{$subjid}{DP}))[0] if ($gtinfo{$subjid}{DP}); $gtinfo{$subjid}{DP} = (split(/,/,$gtinfo{$subjid}{DP}))[0] if ($gtinfo{$subjid}{DP});
next F1 unless ($gtinfo{$subjid}{DP} && $gtinfo{$subjid}{DP} ne '.' && $gtinfo{$subjid}{DP} >= 1); next F1 unless ($gtinfo{$subjid}{DP} && $gtinfo{$subjid}{DP} ne '.' && $gtinfo{$subjid}{DP} >= 1);
...@@ -108,7 +108,7 @@ foreach $file (@files) { ...@@ -108,7 +108,7 @@ foreach $file (@files) {
} }
} }
my $newannot = join(";",@nannot); my $newannot = join(";",@nannot);
print OUT join("\t",$chrom, $pos,$id,$ref,$alt,$score,$filter,$newannot, print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$newannot,
$format,@gts),"\n"; $format,@gts),"\n";
} }
close IN; close IN;
......
...@@ -15,7 +15,7 @@ do ...@@ -15,7 +15,7 @@ do
r) index_path=$OPTARG;; r) index_path=$OPTARG;;
b) sbam=$OPTARG;; b) sbam=$OPTARG;;
p) pair_id=$OPTARG;; p) pair_id=$OPTARG;;
l) idtbed=$OPTARG;; l) itdbed=$OPTARG;;
h) usage;; h) usage;;
esac esac
done done
...@@ -47,11 +47,9 @@ fi ...@@ -47,11 +47,9 @@ fi
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
module load samtools/1.6 snpeff/4.3q vcftools/0.1.14 bcftools/gcc/1.8 bedtools/2.26.0 module load samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 htslib/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0
stexe=`which samtools` stexe=`which samtools`
#flt3='chr13:28003274-28100592' samtools view -@ $SLURM_CPUS_ON_NODE -L ${itdbed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz
tabix ${pair_id}.itdseek.vcf.gz
samtools view -@ $SLURM_CPUS_ON_NODE -L ${idtbed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort |bgzip > ${pair_id}.idtseek.vcf.gz bcftools norm --fasta-ref $reffa -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz
tabix ${pair_id}.idtseek.vcf.gz
bedtools intersect -header -b ${idtbed} -a ${pair_id}.idtseek.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 - |bgzip > ${pair_id}.idtseek_tandemdup.vcf.gz
...@@ -26,8 +26,18 @@ baseDir="`dirname \"$0\"`" ...@@ -26,8 +26,18 @@ baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q
if [[ -a "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
dict="${index_path}/genome.dict"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf
bgzip -f ${pair_id}.uniform.vcf bgzip -f ${pair_id}.uniform.vcf
j=${pair_id}.uniform.vcf.gz j=${pair_id}.uniform.vcf.gz
tabix -f $j tabix -f $j
bcftools norm -m - -Oz $j -o ${pair_id}.norm.vcf.gz bcftools norm --fasta-ref $reffa -m - -Oz $j -o ${pair_id}.norm.vcf.gz
...@@ -75,16 +75,14 @@ while (my $line = <VCF>) { ...@@ -75,16 +75,14 @@ while (my $line = <VCF>) {
push @newgts, join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO}); push @newgts, join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
} }
next if ($missingGT == scalar(@gts)); next if ($missingGT == scalar(@gts));
if ($hash{SVTYPE} eq 'DUP:TANDEM') { if ($hash{SVTYPE} eq 'DUP:TANDEM') {
print DUP join("\t",$chrom,$pos,$id,'N','<DUP>',$score,$filter,$annot,$newformat,@newgts),"\n"; print DUP join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}elsif ($hash{SVTYPE} eq 'INS') {
print SV join("\t",$chrom,$pos,$id,'N','<INS>',$score,$filter,$annot,$newformat,@newgts),"\n";
}elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') { }elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') {
if (abs($hash{SVLEN}) < 50) { if (abs($hash{SVLEN}) < 50) {
print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}else { }else {
$newalt = "<".$hash{SVTYPE}.">"; $newalt = "<".$hash{SVTYPE}.">";
print SV join("\t",$chrom,$pos,$id,'N',$newalt,$score,$filter,$annot,$newformat,@newgts),"\n"; print SV join("\t",$chrom,$pos,$id,'N',$newalt,$score,$filter,$annot,$newformat,@newgts),"\n";
} }
} }
......
...@@ -9,11 +9,12 @@ usage() { ...@@ -9,11 +9,12 @@ usage() {
exit 1 exit 1
} }
OPTIND=1 # Reset OPTIND OPTIND=1 # Reset OPTIND
while getopts :r:p:h opt while getopts :r:l:p:h opt
do do
case $opt in case $opt in
r) index_path=$OPTARG;; r) index_path=$OPTARG;;
p) pair_id=$OPTARG;; p) pair_id=$OPTARG;;
l) idtbed=$OPTARG;;
h) usage;; h) usage;;
esac esac
done done
...@@ -55,10 +56,10 @@ done ...@@ -55,10 +56,10 @@ done
pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP
pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf
cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz
perl $baseDir/parse_pindel.pl ${pair_id} pindel.vcf.gz tabix pindel.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > indel.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz
perl $baseDir/norm_annot.sh -r ${index_path} -p pindel_indel -v indel.vcf.gz perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz
mv pindel_indel.norm.vcf.gz ${pair_id}.pindel_indel.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf |bgzip > ${pair_id}.pindel_tandemdup.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf | bedtools intersect -header -b ${idtbed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel_sv.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel_sv.vcf.gz
...@@ -15,7 +15,6 @@ do ...@@ -15,7 +15,6 @@ do
case $opt in case $opt in
r) index_path=$OPTARG;; r) index_path=$OPTARG;;
p) pair_id=$OPTARG;; p) pair_id=$OPTARG;;
v) vcf=$OPTARG;; v) vcf=$OPTARG;;
h) usage;; h) usage;;
esac esac
...@@ -24,6 +23,16 @@ function join_by { local IFS="$1"; shift; echo "$*"; } ...@@ -24,6 +23,16 @@ function join_by { local IFS="$1"; shift; echo "$*"; }
shift $(($OPTIND -1)) shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`" baseDir="`dirname \"$0\"`"
if [[ -a "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
dict="${index_path}/genome.dict"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q
...@@ -32,7 +41,7 @@ mv ${vcf} ${pair_id}.ori.vcf.gz ...@@ -32,7 +41,7 @@ mv ${vcf} ${pair_id}.ori.vcf.gz
bgzip -f ${pair_id}.uniform.vcf bgzip -f ${pair_id}.uniform.vcf
j=${pair_id}.uniform.vcf.gz j=${pair_id}.uniform.vcf.gz
tabix -f $j tabix -f $j
bcftools norm -m - -Oz $j -o ${pair_id}.norm.vcf.gz bcftools norm --fasta-ref $reffa -m - -Oz $j -o ${pair_id}.norm.vcf.gz
bash $baseDir/annotvcf.sh -p ${pair_id} -r $index_path -v ${pair_id}.norm.vcf.gz bash $baseDir/annotvcf.sh -p ${pair_id} -r $index_path -v ${pair_id}.norm.vcf.gz
/project/shared/bicf_workflow_ref/seqprg/vt/vt decompose_blocksub ${pair_id}.annot.vcf.gz -p -a -o ${pair_id}.vcf /project/shared/bicf_workflow_ref/seqprg/vt/vt decompose_blocksub ${pair_id}.annot.vcf.gz -p -a -o ${pair_id}.vcf
bgzip -f ${pair_id}.vcf bgzip -f ${pair_id}.vcf
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment