Skip to content
Snippets Groups Projects
Commit b3782a7c authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

Merge branch 'master' of git.biohpc.swmed.edu:ngsclialab/process_scripts

parents 85a26a5e 34b23838
Branches
Tags
No related merge requests found
......@@ -13,7 +13,7 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:b:c:n:p:s:d:h opt
while getopts :r:b:c:n:p:e:s:d:h opt
do
case $opt in
r) index_path=$OPTARG;;
......@@ -22,6 +22,7 @@ do
n) nuctype=$OPTARG;;
p) pair_id=$OPTARG;;
d) dedup=$OPTARG;;
e) version=$OPTARG;;
s) skiplc=1;;
h) usage;;
esac
......@@ -63,9 +64,12 @@ if [[ $nuctype == 'dna' ]]; then
samtools index -@ $NPROC ${pair_id}.ontarget.bam
samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt
samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity BARCODE_TAG=RG I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir}
#java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir}
#java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir}
#samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt
fi
#java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir}
perl $baseDir/sequenceqc_dna.pl -e ${version} -r $index_path ${pair_id}.genomecov.txt
else
perl $baseDir/sequenceqc_rna.pl -e ${version} -r $index_path ${pair_id}.flagstat.txt
fi
#!/usr/bin/perl -w
#sequenceqc_alignment.p
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
my %opt = ();
my $results = GetOptions (\%opt,'refdir|r=s','help|h','gitdir|e=s','user|u=s');
my @statfiles = @ARGV;
my $fileowner = $opt{user};
my $gittag = 'v5';
if ($opt{gitdir}) {
$gittag = $opt{gitdir};
}elsif($ENV{'gitv'}) {
$gittag = $ENV{'gitv'};
}
foreach $sfile (@statfiles) {
$sfile =~ m/(\S+)\.genomecov.txt/;
my $prefix = $1;
my %hash;
open FLAG, "<$prefix\.trimreport.txt";
while (my $line = <FLAG>) {
chomp($line);
my ($file,$raw,$trim) = split(/\t/,$line);
$hash{rawct} += $raw;
}
open FLAG, "<$prefix\.flagstat.txt" or die $!;
while (my $line = <FLAG>) {
chomp($line);
if ($line =~ m/(\d+) \+ \d+ in total/) {
$hash{total} = $1;
}elsif ($line =~ m/(\d+) \+ \d+ read1/) {
$hash{pairs} = $1;
}elsif ($line =~ m/(\d+) \+ \d+ mapped/) {
$hash{maprate} = 100*sprintf("%.4f",$1/$hash{total});
}elsif ($line =~ m/(\d+) \+ \d+ properly paired/) {
$hash{propair} = 100*sprintf("%.4f",$1/$hash{total});
}
}
close FLAG;
open FLAG, "<$prefix\.ontarget.flagstat.txt" or die $!;
while (my $line = <FLAG>) {
chomp($line);
if ($line =~ m/(\d+) \+ \d+ in total/) {
$hash{ontarget} = $1;
}
}
unless ($hash{rawct}) {
$hash{rawct} = $hash{total};
}
my %lc;
open DUP, "<$prefix.libcomplex.txt" or die $!;
while (my $line = <DUP>) {
chomp($line);
if ($line =~ m/## METRICS/) {
$header = <DUP>;
$nums = <DUP>;
chomp($header);
chomp($nums);
my @stats = split(/\t/,$header);
my @row = split(/\t/,$nums);
my %info;
foreach my $i (0..$#stats) {
$info{$stats[$i]} = $row[$i];
}
$lc{TOTREADSLC} += $info{UNPAIRED_READS_EXAMINED} + $info{READ_PAIRS_EXAMINED};
$hash{libsize} = $info{ESTIMATED_LIBRARY_SIZE};
$lc{TOTDUPLC} += $info{UNPAIRED_READ_DUPLICATES} + $info{READ_PAIR_DUPLICATES};
}
}
close DUP;
$hash{percdups} = sprintf("%.4f",$lc{TOTDUPLC}/$lc{TOTREADSLC});
my %cov;
open COV, "<$prefix\.genomecov.txt" or die $!;
my $sumdepth;
my $totalbases;
while (my $line = <COV>) {
chomp($line);
my ($all,$depth,$bp,$total,$percent) = split(/\t/,$line);
$cov{$depth} = $percent;
$sumdepth += $depth*$bp;
$totalbases = $total unless ($totalbases);
}
my $avgdepth = sprintf("%.0f",$sumdepth/$totalbases);
my @depths = sort {$a <=> $b} keys %cov;
my @perc = @cov{@depths};
my @cum_sum = cumsum(@perc);
my $median = 0;
foreach my $i (0..$#cum_sum) {
if ($cum_sum[$i] < 0.5) {
$median = $i;
}
}
$hash{'perc100x'} = 100*sprintf("%.4f",1-$cum_sum[100]);
$hash{'perc200x'} = 100*sprintf("%.4f",1-$cum_sum[200]);
$hash{'perc500x'} = 100*sprintf("%.4f",1-$cum_sum[500]);
#### Begin File Information ########
my $status = 'PASS';
$status = 'FAIL' if ($hash{maprate} < 0.90 && $hash{'perc100x'} < 0.90);
my @stats = stat("$prefix\.flagstat.txt");
my ($day,$month,$year) = (localtime($stats[9]))[3,4,5];
$year += 1900;
$month++;
$date = join("-",$year,sprintf("%02s",$month),sprintf("%02s",$day));
$fileowner = 's'.$stats[4] unless $fileowner;
$hash{status} = $status;
$hash{date}=$date;
$hash{fileowner} = $fileowner;
##### End File Information ########
##### START separateFilesPerSample ######
open OUT, ">".$prefix.".sequence.stats.txt" or die $!;
print OUT join("\n", "Sample\t".$prefix,"Total_Raw_Count\t".$hash{rawct},
"On_Target\t".$hash{ontarget},"Map_Rate\t".$hash{maprate},
"Properly_Paired\t".$hash{propair},
"Percent_Duplicates\t".sprintf("%.2f",100*$hash{percdups}),
"Percent_on_Target\t".sprintf("%.2f",100*$hash{ontarget}/$hash{rawct}),
"All_Average_Depth\t".$avgdepth,"All_Median_Depth\t".$median,
"Percent_over_100x\t".$hash{'perc100x'},
"Percent_over_200x\t".$hash{'perc200x'},
"Percent_over_500x\t".$hash{'perc500x'},
"Alignment_Status\t".$hash{status},"Alignment_Date\t".$hash{date},
"File_Owner\t".$hash{fileowner},"Workflow_Version\t".$gittag),"\n";
close OUT;
system(qq{cat $opt{refdir}\/reference_info.txt >> $prefix\.sequence.stats.txt});
##### END separateFilesPerSample ######
}
sub cumsum {
my @nums = @_;
my @cumsum = ();
my $mid = 0;
for my $i (0..$#nums) {
$mid += $nums[$i];
push(@cumsum,$mid);
}
return @cumsum;
}
#!/usr/bin/perl -w
#sequenceqc_rnaseq.pl
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
my %opt = ();
my $results = GetOptions (\%opt,'refdir|r=s','help|h','gitdir|e=s','user|u=s');
my @files = @ARGV;
chomp(@files);
my $fileowner = $opt{user};
my $gittag = 'v5';
if ($opt{gitdir}) {
$gittag = $opt{gitdir};
}elsif($ENV{'gitv'}) {
$gittag = $ENV{'gitv'};
}
foreach my $file (@files) {
chomp($file);
$file =~ m/(\S+)\.flagstat.txt/;
my @directory = split(/\//, $file);
$prefix = $directory[-1];
my $sample = (split(/\./,$prefix))[0];
open OUT, ">".$sample.".sequence.stats.txt" or die;#separateFilesPerSample
$hisatfile = $file;
$hisatfile =~ s/flagstat/alignerout/;
open HISAT, "<$hisatfile";
my ($total, $pairs,$read2ct,$maprate,$concorrate);
while (my $line = <HISAT>) {
chomp($line);
if ($line =~ m/(\d+) reads; of these:/) {
$total = $1;
}elsif ($line =~ m/Number of input reads\s+\|\s+(\d+)/) {
$total = $1;
}elsif ($line =~ m/(\d+) \S+ were paired; of these:/) {
$pairs = $1;
$total = $pairs*2 if ($total == $pairs);
}elsif ($line =~ m/(\d+) pairs aligned concordantly 0 times; of these:/) {
$concorrate = sprintf("%.4f",1-($1/$pairs));
}elsif ($line =~ m/(\S+)% overall alignment rate/) {
$maprate = $1/100;
}
}
close HISAT;
open IN, "<$file" or die $!;
while ($line = <IN>) {
chomp($line);
if ($line =~ m/(\d+) \+ \d+ in total/) {
$total = $1 unless $total;
}elsif ($line =~ m/(\d+) \+ \d+ read1/) {
$pairs = $1;
}elsif ($line =~ m/(\d+) \+ \d+ read2/) {
$read2ct = $1;
}elsif ($line =~ m/(\d+) \+ \d+ mapped\s+\((\S+)%.+\)/) {
$maprate = sprintf("%.2f",$2/100) unless ($maprate);
}elsif ($line =~ m/(\d+) \+ \d+ properly paired\s+\((\S+)%.+\)/) {
$concorrate = sprintf("%.2f",$2/100) unless ($concorrate);
}
}
close IN;
my @stats=stat($file);
my ($day,$month,$year) = (localtime($stats[9]))[3,4,5];
$year += 1900;
$month++;
$date = join("-",$year,sprintf("%02s",$month),sprintf("%02s",$day));
$fileowner = 's'.$stats[4] unless $fileowner;
$total = $pairs*2 if ($total == $pairs);
my $status = 'PASS';
$status = 'FAIL' if ($maprate < 0.90);
$status = 'FAIL' if ($total < 6000000);
print OUT join("\n","Sample\t".$sample,"Total_Raw_Count\t".$total, "Read1_Map\t".$pairs,"Read2_Map\t".$read2ct,
"Map_Rate\t".$maprate,"Concordant_Rate\t".$concorrate,"Alignment_Status\t".$status,"Alignment_Date\t".$date,
"File_Owner\t".$fileowner,"Workflow_Version\t".$gittag),"\n";
system(qq{cat $opt{refdir}\/reference_info.txt >> $prefix\.sequence.stats.txt});
}
../alignment/add_umi_sam.py
\ No newline at end of file
......@@ -26,11 +26,20 @@ done
shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
fqs=("$@")
fqs=''
i=0
numfq=$#
while [[ $i -le $numfq ]]
do
fqs="$fqs $1"
i=$((i + 1))
shift 1
done
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
if [[ -z $pair_id ]]
then
usage
fi
NPROC=$SLURM_CPUS_ON_NODE
......@@ -50,17 +59,18 @@ else
fi
source /etc/profile.d/modules.sh
module load trimgalore/0.6.4 cutadapt/1.9.1 python/2.7.x-anaconda bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3
module load trimgalore/0.6.4 cutadapt/1.9.1 bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3
threads=`expr $NPROC / 2`
trim_galore --cores $threads --paired -q 25 -o trim --illumina --gzip --length 35 ${fqs}
if [[ $filter == 1 ]]
trim_galore --cores 4 --paired -q 25 --illumina --gzip --length 35 ${fqs}
if [[ ${filter} == 1 ]]
then
perl $baseDir/parse_trimreport.pl ${pair_id}.trimreport.txt *trimming_report.txt
fi
bwa mem -M -t $threads -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa trim/*.fq.gz > out.sam
bwa mem -M -t $threads -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa *.fq.gz > out.sam
if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]]
then
......
../preproc_fastq/parse_trimreport.pl
\ No newline at end of file
#!/bin/bin/perl
use strict;
use warnings;
use diagnostics;
use Getopt::Long;
my ($genelist,$fpkm_file) = ("") x 2;
GetOptions( 'g|genes=s' => \$genelist,
'f|fpkm=s' => \$fpkm_file);
my $fpkm_out = $fpkm_file;
$fpkm_out =~ s/\.fpkm.+txt/\.fpkm.capture.txt/;
open OUT, ">$fpkm_out" or die $!;
#gene_info.human.txt file is required for gene alias information
open GENEINFO, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!;
my %geneinfo;
while(my $gline = <GENEINFO>){
chomp $gline;
my ($taxid,$geneid,$symbol,$locus,$syn,@info) = split("\t",$gline);
if($syn ne '-'){
my @synonyms = split(/\|/,$syn);
$geneinfo{$symbol} = $syn;
foreach my $synonym(@synonyms){
$geneinfo{$synonym} = $symbol;
}
}
}
#genelist is the list of genes in panel. Not specifying genelist will print out all lines(e.g. wholernaseq)
my %genes;
if($genelist ne ""){
open GENES, "<$genelist" or die $!;
while (my $gene = <GENES>){
chomp $gene;
$genes{$gene} = 1;
}
close GENES;
}
my $genelist_size = keys %genes;
#FPKM data
open FPKM, "<$fpkm_file" or die $!;
my %fpkmdata;
my %found;
my $header = <FPKM>;
chomp $header;
print OUT $header."\n";
while(my $line = <FPKM>){
chomp $line;
my ($gid,$gname,$ref,$strand,$start,$end,$cov,$fpkm,$tpm) = split("\t",$line);
if($ref =~ /^chr/){
if(!exists $fpkmdata{$gname}){$fpkmdata{$gname} = $line;}
else{$fpkmdata{$gname} = $fpkmdata{$gname}."\n".$line;}
}
if ($genelist_size == 0){ #wholernaseq
print OUT $line."\n";
}
elsif($genes{$gname} and $ref =~ /^chr/){ #panel gene found in fpkm data
print OUT $line."\n";
$found{$gname} =1;
}
}
foreach my $gene_key (keys %genes){
if($found{$gene_key}){}
else { #gene from gene list is not in fpkm data or has a different name
if($geneinfo{$gene_key}){ #gene is in gene_info.human.txt
if($fpkmdata{$geneinfo{$gene_key}}){ #gene from gene list is an alias in gene_info.human.txt
print OUT $fpkmdata{$geneinfo{$gene_key}}."\n";
}
elsif($geneinfo{$gene_key} =~ /\|/){
my @alias_array = split(/\|/, $geneinfo{$gene_key});
foreach my $alias(@alias_array){
if($fpkmdata{$alias}){print OUT $fpkmdata{$alias}."\n";}#gene from genelist is a Gene Name in gene_info.human.txt but fpkm data has an alias
}
}
}
}
}
close FPKM;
close OUT;
......@@ -11,13 +11,14 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :b:g:p:s:h opt
while getopts :b:g:p:f:s:h opt
do
case $opt in
b) sbam=$OPTARG;;
g) gtf=$OPTARG;;
p) pair_id=$OPTARG;;
s) stranded=$OPTARG;;
f) filter=$OPTARG;;
h) usage;;
esac
done
......@@ -48,4 +49,11 @@ featureCounts -s $stranded -M --fraction -J --ignoreDup -T $NPROC -p -g gene_nam
mkdir ${pair_id}_stringtie
cd ${pair_id}_stringtie
stringtie ../${sbam} -p $NPROC -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
if [[ -f $filter ]]
then
cd ..
mv ${pair_id}.fpkm.txt ${prefix}.fpkm.ori.txt
perl ${baseDir}/fpkm_subset_panel.pl -f ${prefix}.fpkm.ori.txt -g $filter
mv ${prefix}.fpkm.capture.txt ${pair_id}.fpkm.txt
fi
......@@ -41,7 +41,8 @@ else
module load R/3.2.1-intel
Rscript $baseDir/dea.R
Rscript $baseDir/build_ballgown.R *_stringtie
if [[ -n `ls *.edgeR.txt` ]]
edgeR=`find ./ -name *.edgeR.txt`
if [[ -n $edgeR ]]
then
perl $baseDir/concat_edgeR.pl *.edgeR.txt
fi
......
......@@ -25,12 +25,20 @@ shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
if [[ -z $pair_id ]]; then
usage
fi
fqs=("$@")
fqs=''
i=0
numfq=$#
while [[ $i -le $numfq ]]
do
fqs="$fqs $1"
i=$((i + 1))
shift 1
done
if [[ -f $fq1 ]]
then
fqs="$fq1"
......@@ -50,11 +58,11 @@ module load trimgalore/0.6.4 cutadapt/2.5 perl/5.28.0
if [ $numfq > 1 ]
then
trim_galore --paired -q 25 --illumina --gzip --length 35 ${fqs}
mv ${r1base}_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz
mv ${r2base}_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz
mv *_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz
mv *_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz
else
trim_galore -q 25 --illumina --gzip --length 35 ${fqs}
mv ${r1base}_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz
mv *_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz
cp ${pair_id}.trim.R1.fastq.gz ${pair_id}.trim.R2.fastq.gz
fi
......
......@@ -54,7 +54,7 @@ while (my $line = <CNR>) {
} else {
my @ids = split(/,/,$geneids);
foreach my $gid (@ids) {
next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI/);
next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI:/);
my ($gene,@other) = split(/:/,$gid);
my ($gname,@loc) = split(/_/,$gene);
$genes{$gname} = 1;
......@@ -93,7 +93,7 @@ while (my $line = <IN>) {
} else {
my @ids = split(/,/,$geneids);
foreach my $gid (@ids) {
next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI/);
next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI:/);
my ($gene,@other) = split(/:/,$gid);
my ($gname,@loc) = split(/_/,$gene);
$genes{$gname} = 1;
......
......@@ -18,6 +18,7 @@ do
p) pair_id=$OPTARG;;
a) algo=$OPTARG;;
t) rna=1;;
b) tbed=$OPTARG;;
q) pon==$OPTARG;;
h) usage;;
esac
......@@ -131,6 +132,11 @@ then
elif [[ $algo == 'strelka2' ]]
then
opt=''
if [[ -n $tbed ]]
then
opt="--callRegions ${tbed}.gz"
fi
if [[ $rna == 1 ]]
then
mode="--rna"
......@@ -143,7 +149,7 @@ then
for i in *.bam; do
gvcflist="$gvcflist --bam ${i}"
done
configManta.py $gvcflist --referenceFasta ${reffa} $mode --runDir manta
configManta.py $gvcflist $opt --referenceFasta ${reffa} $mode --runDir manta
manta/runWorkflow.py -m local -j $NPROC
if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]]
then
......
......@@ -90,12 +90,18 @@ baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh
module load htslib/gcc/1.8
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
if [ $algo == 'strelka2' ]
then
then
module load strelka/2.9.10 manta/1.3.1 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14
mkdir manta strelka
configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --runDir manta
opt=''
if [[ -n $tbed ]]
then
opt="--callRegions ${tbed}.gz"
fi
mkdir manta
configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} $opt --runDir manta
manta/runWorkflow.py -m local -j 8
if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]]
then
......@@ -135,7 +141,7 @@ then
vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz
elif [ $algo == 'shimmer' ]
then
module load shimmer/0.1.1 samtools/gcc/1.8 vcftools/0.1.14
module load samtools/gcc/1.8 R/3.6.1-gccmkl vcftools/0.1.14
shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err
perl $baseDir/add_readct_shimmer.pl
module rm java/oracle/jdk1.7.0_51
......
......@@ -67,31 +67,20 @@ then
tid=`samtools view -H ${sbam} |grep '^@RG' |perl -pi -e 's/\t/\n/g' |grep ID |cut -f 2 -d ':'`
fi
bams=''
for i in *.bam; do
bams="$bams $i"
done
#RUN DELLY
if [[ $method == 'delly' ]]
then
#module load delly2/v0.7.7-multi
if [[ -n ${normal} ]]
then
#RUN DELLY
echo -e "${nid}\tcontrol"> samples.tsv
echo -e "${tid}\ttumor" >> samples.tsv
delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
#delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf
else
#RUN DELLY
delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam}
delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam}
#delly2 filter -o ${pair_id}.delly_tra.bcf -f germline ${pair_id}.delly_translocations.bcf
fi
delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${bams}
delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${bams}
delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${bams}
delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${bams}
delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${bams}
#MERGE DELLY AND MAKE BED
bcftools concat -a -O v ${pair_id}.delly_duplications.bcf ${pair_id}.delly_inversions.bcf ${pair_id}.delly_translocations.bcf ${pair_id}.delly_deletion.bcf ${pair_id}.delly_insertion.bcf | vcf-sort -t temp | bgzip > ${pair_id}.delly.svar.vcf.gz
bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.sv -v ${pair_id}.delly.svar.vcf.gz -s
java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment