Merge branch 'master' of git.biohpc.swmed.edu:ngsclialab/process_scripts

b3782a7c · Brandi Cantarel · 85a26a5e · 34b23838 · b3782a7c · b3782a7c
Commit b3782a7c authored 4 years ago by Brandi Cantarel
--- a/alignment/bamqc.sh
+++ b/alignment/bamqc.sh
@@ -13,7 +13,7 @@ usage() {
  exit 1
 }
 OPTIND=1 # Reset OPTIND
-while getopts :r:b:c:n:p:s:d:h opt
+while getopts :r:b:c:n:p:e:s:d:h opt
 do
    case $opt in
        r) index_path=$OPTARG;;
@@ -22,6 +22,7 @@ do
        n) nuctype=$OPTARG;;
        p) pair_id=$OPTARG;;
 	d) dedup=$OPTARG;;
+	e) version=$OPTARG;;
 	s) skiplc=1;;
        h) usage;;
    esac
@@ -63,9 +64,12 @@ if [[ $nuctype == 'dna' ]]; then
 	samtools index -@ $NPROC ${pair_id}.ontarget.bam
 	samtools flagstat  ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt
 	samtools view  -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
+	java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity BARCODE_TAG=RG I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir}
 	#java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir}
-	#java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir}
 	#samtools view  -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt
    fi
    #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir}
+    perl $baseDir/sequenceqc_dna.pl -e ${version} -r $index_path ${pair_id}.genomecov.txt
+else
+    perl $baseDir/sequenceqc_rna.pl -e ${version} -r $index_path ${pair_id}.flagstat.txt
 fi
--- a/alignment/sequenceqc_dna.pl
+++ b/alignment/sequenceqc_dna.pl
+#!/usr/bin/perl -w
+#sequenceqc_alignment.p
+
+use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
+my %opt = ();
+my $results = GetOptions (\%opt,'refdir|r=s','help|h','gitdir|e=s','user|u=s');
+
+my @statfiles = @ARGV;
+
+my $fileowner = $opt{user};
+my $gittag = 'v5';
+if ($opt{gitdir}) {
+    $gittag = $opt{gitdir};
+}elsif($ENV{'gitv'}) {
+    $gittag = $ENV{'gitv'};
+}
+foreach $sfile (@statfiles) {
+  $sfile =~ m/(\S+)\.genomecov.txt/;
+  my $prefix = $1;
+  my %hash;
+  open FLAG, "<$prefix\.trimreport.txt";
+  while (my $line = <FLAG>) {
+    chomp($line);
+    my ($file,$raw,$trim) = split(/\t/,$line);
+    $hash{rawct} += $raw;
+  }
+  open FLAG, "<$prefix\.flagstat.txt" or die $!;
+  while (my $line = <FLAG>) {
+    chomp($line);
+    if ($line =~ m/(\d+) \+ \d+ in total/) {
+      $hash{total} = $1;
+    }elsif ($line =~ m/(\d+) \+ \d+ read1/) {
+      $hash{pairs} = $1;
+    }elsif ($line =~ m/(\d+) \+ \d+ mapped/) {
+      $hash{maprate} = 100*sprintf("%.4f",$1/$hash{total});
+    }elsif ($line =~ m/(\d+) \+ \d+ properly paired/) {
+      $hash{propair} = 100*sprintf("%.4f",$1/$hash{total});
+    }
+  }
+  close FLAG;
+  open FLAG, "<$prefix\.ontarget.flagstat.txt" or die $!;
+  while (my $line = <FLAG>) {
+    chomp($line);
+    if ($line =~ m/(\d+) \+ \d+ in total/) {
+      $hash{ontarget} = $1;
+    }
+  }
+  unless ($hash{rawct}) {
+    $hash{rawct} = $hash{total};
+  }
+  my %lc;
+  open DUP, "<$prefix.libcomplex.txt" or die $!;
+  while (my $line = <DUP>) {
+    chomp($line);
+    if ($line =~ m/## METRICS/) {
+      $header = <DUP>;
+      $nums = <DUP>;
+      chomp($header);
+      chomp($nums);
+      my @stats = split(/\t/,$header);
+      my @row = split(/\t/,$nums);
+      my %info;
+      foreach my $i (0..$#stats) {
+	$info{$stats[$i]} = $row[$i];
+      }
+      $lc{TOTREADSLC} += $info{UNPAIRED_READS_EXAMINED} + $info{READ_PAIRS_EXAMINED};
+      $hash{libsize} = $info{ESTIMATED_LIBRARY_SIZE};
+      $lc{TOTDUPLC} +=  $info{UNPAIRED_READ_DUPLICATES} + $info{READ_PAIR_DUPLICATES};
+    }
+  }
+  close DUP;
+  $hash{percdups} = sprintf("%.4f",$lc{TOTDUPLC}/$lc{TOTREADSLC});
+  my %cov;
+  open COV, "<$prefix\.genomecov.txt" or die $!;
+  my $sumdepth;
+  my $totalbases;
+  while (my $line = <COV>) {
+    chomp($line);
+    my ($all,$depth,$bp,$total,$percent) = split(/\t/,$line);
+    $cov{$depth} = $percent;
+    $sumdepth += $depth*$bp;
+    $totalbases = $total unless ($totalbases);
+  }
+  my $avgdepth = sprintf("%.0f",$sumdepth/$totalbases);
+  my @depths = sort {$a <=> $b} keys %cov;
+  my @perc = @cov{@depths};
+  my @cum_sum = cumsum(@perc);
+  my $median = 0;
+  foreach my $i (0..$#cum_sum) {
+    if ($cum_sum[$i] < 0.5) {
+      $median = $i;
+    }
+  }
+  $hash{'perc100x'} = 100*sprintf("%.4f",1-$cum_sum[100]);
+  $hash{'perc200x'} = 100*sprintf("%.4f",1-$cum_sum[200]);
+  $hash{'perc500x'} = 100*sprintf("%.4f",1-$cum_sum[500]);
+  
+  #### Begin File Information ########
+  my $status = 'PASS';
+  $status = 'FAIL' if ($hash{maprate} < 0.90 && $hash{'perc100x'} < 0.90);
+  my @stats = stat("$prefix\.flagstat.txt");
+  my ($day,$month,$year) = (localtime($stats[9]))[3,4,5];
+  $year += 1900;
+  $month++;
+  $date = join("-",$year,sprintf("%02s",$month),sprintf("%02s",$day));
+  $fileowner = 's'.$stats[4] unless $fileowner;
+  $hash{status} = $status;
+  $hash{date}=$date;
+  $hash{fileowner} = $fileowner;
+  ##### End File Information ########
+  ##### START separateFilesPerSample ######
+  open OUT, ">".$prefix.".sequence.stats.txt" or die $!;
+  print OUT join("\n", "Sample\t".$prefix,"Total_Raw_Count\t".$hash{rawct},
+		 "On_Target\t".$hash{ontarget},"Map_Rate\t".$hash{maprate},
+		 "Properly_Paired\t".$hash{propair},
+		 "Percent_Duplicates\t".sprintf("%.2f",100*$hash{percdups}),
+		 "Percent_on_Target\t".sprintf("%.2f",100*$hash{ontarget}/$hash{rawct}),
+		 "All_Average_Depth\t".$avgdepth,"All_Median_Depth\t".$median,
+		 "Percent_over_100x\t".$hash{'perc100x'},
+		 "Percent_over_200x\t".$hash{'perc200x'},
+		 "Percent_over_500x\t".$hash{'perc500x'},
+		 "Alignment_Status\t".$hash{status},"Alignment_Date\t".$hash{date},
+		 "File_Owner\t".$hash{fileowner},"Workflow_Version\t".$gittag),"\n";
+  close OUT;
+  system(qq{cat $opt{refdir}\/reference_info.txt >> $prefix\.sequence.stats.txt});
+  ##### END separateFilesPerSample ######
+}
+
+
+sub cumsum {
+  my @nums = @_;
+  my @cumsum = ();
+  my $mid = 0;
+  for my $i (0..$#nums) {
+    $mid += $nums[$i];
+    push(@cumsum,$mid);
+  }
+  return @cumsum;
+}
--- a/alignment/sequenceqc_rna.pl
+++ b/alignment/sequenceqc_rna.pl
+#!/usr/bin/perl -w
+#sequenceqc_rnaseq.pl
+
+use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
+my %opt = ();
+my $results = GetOptions (\%opt,'refdir|r=s','help|h','gitdir|e=s','user|u=s');
+
+my @files = @ARGV;
+chomp(@files);
+
+my $fileowner = $opt{user};
+my $gittag = 'v5';
+if ($opt{gitdir}) {
+    $gittag = $opt{gitdir};
+}elsif($ENV{'gitv'}) {
+    $gittag = $ENV{'gitv'};
+}
+
+foreach my $file (@files) {
+  chomp($file);
+  $file =~ m/(\S+)\.flagstat.txt/;
+  my @directory = split(/\//, $file);
+  $prefix = $directory[-1];
+  my $sample = (split(/\./,$prefix))[0];
+  open OUT, ">".$sample.".sequence.stats.txt" or die;#separateFilesPerSample
+  $hisatfile = $file;
+  $hisatfile =~ s/flagstat/alignerout/;
+  open HISAT, "<$hisatfile";
+  my ($total, $pairs,$read2ct,$maprate,$concorrate);
+  while (my $line = <HISAT>) {
+    chomp($line);
+    if ($line =~ m/(\d+) reads; of these:/) {
+      $total = $1;
+    }elsif ($line =~ m/Number of input reads\s+\|\s+(\d+)/) {
+	$total = $1;
+    }elsif ($line =~ m/(\d+) \S+ were paired; of these:/) {
+      $pairs = $1;
+      $total = $pairs*2 if ($total == $pairs);
+    }elsif ($line =~ m/(\d+) pairs aligned concordantly 0 times; of these:/) {
+      $concorrate = sprintf("%.4f",1-($1/$pairs));
+    }elsif ($line =~ m/(\S+)% overall alignment rate/) {
+      $maprate = $1/100;
+    }
+  }
+  close HISAT;
+  open IN, "<$file" or die $!;
+  while ($line = <IN>) {
+    chomp($line);
+    if ($line =~ m/(\d+) \+ \d+ in total/) {
+      $total = $1 unless $total;
+    }elsif ($line =~ m/(\d+) \+ \d+ read1/) {
+      $pairs = $1;
+    }elsif ($line =~ m/(\d+) \+ \d+ read2/) {
+      $read2ct = $1;
+    }elsif ($line =~ m/(\d+) \+ \d+ mapped\s+\((\S+)%.+\)/) {
+      $maprate = sprintf("%.2f",$2/100) unless ($maprate);
+    }elsif ($line =~ m/(\d+) \+ \d+ properly paired\s+\((\S+)%.+\)/) {
+      $concorrate = sprintf("%.2f",$2/100) unless ($concorrate);
+    }
+  }
+  close IN;
+  
+  my @stats=stat($file);
+  my ($day,$month,$year) = (localtime($stats[9]))[3,4,5];
+  $year += 1900;
+  $month++;
+  $date = join("-",$year,sprintf("%02s",$month),sprintf("%02s",$day));
+  $fileowner = 's'.$stats[4] unless $fileowner;
+  $total = $pairs*2 if ($total == $pairs);
+  my $status = 'PASS';
+  $status = 'FAIL' if ($maprate < 0.90);
+  $status = 'FAIL' if ($total < 6000000);  
+  
+  print OUT join("\n","Sample\t".$sample,"Total_Raw_Count\t".$total, "Read1_Map\t".$pairs,"Read2_Map\t".$read2ct,
+    "Map_Rate\t".$maprate,"Concordant_Rate\t".$concorrate,"Alignment_Status\t".$status,"Alignment_Date\t".$date,
+    "File_Owner\t".$fileowner,"Workflow_Version\t".$gittag),"\n";
+  system(qq{cat $opt{refdir}\/reference_info.txt >> $prefix\.sequence.stats.txt});
+}
--- a/cvc/add_umi_sam.py
+++ b/cvc/add_umi_sam.py
+../alignment/add_umi_sam.py
\ No newline at end of file
--- a/cvc/dna_trim_align.sh
+++ b/cvc/dna_trim_align.sh
@@ -26,11 +26,20 @@ done
 shift $(($OPTIND -1))
 baseDir="`dirname \"$0\"`"

-fqs=("$@")
+fqs=''
+i=0
 numfq=$#

+while [[ $i -le $numfq ]]
+do
+    fqs="$fqs $1"
+    i=$((i + 1))
+    shift 1
+done
+
 # Check for mandatory options
-if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
+if [[ -z $pair_id ]]
+then
    usage
 fi
 NPROC=$SLURM_CPUS_ON_NODE
@@ -50,17 +59,18 @@ else
 fi

 source /etc/profile.d/modules.sh
-module load trimgalore/0.6.4 cutadapt/1.9.1 python/2.7.x-anaconda bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3
+module load trimgalore/0.6.4 cutadapt/1.9.1 bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3

 threads=`expr $NPROC / 2`

-trim_galore --cores $threads --paired -q 25 -o trim --illumina --gzip --length 35 ${fqs}
-if [[ $filter == 1 ]]
+trim_galore --cores 4 --paired -q 25 --illumina --gzip --length 35 ${fqs}
+
+if [[ ${filter} == 1 ]]
 then
      perl $baseDir/parse_trimreport.pl ${pair_id}.trimreport.txt *trimming_report.txt
 fi

-bwa mem -M -t $threads -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa trim/*.fq.gz > out.sam
+bwa mem -M -t $threads -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa *.fq.gz > out.sam

 if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]]
 then

--- a/cvc/parse_trimreport.pl
+++ b/cvc/parse_trimreport.pl
+../preproc_fastq/parse_trimreport.pl
\ No newline at end of file
--- a/genect_rnaseq/fpkm_subset_panel.pl
+++ b/genect_rnaseq/fpkm_subset_panel.pl
+#!/bin/bin/perl
+use strict;
+use warnings;
+use diagnostics;
+use Getopt::Long;
+
+
+my ($genelist,$fpkm_file) = ("") x 2;
+
+GetOptions( 'g|genes=s' => \$genelist,
+            'f|fpkm=s' => \$fpkm_file);
+
+my $fpkm_out = $fpkm_file;
+$fpkm_out =~ s/\.fpkm.+txt/\.fpkm.capture.txt/;
+open OUT, ">$fpkm_out" or die $!;
+
+#gene_info.human.txt file is required for gene alias information
+open GENEINFO, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!;
+my %geneinfo;
+while(my $gline = <GENEINFO>){
+	chomp $gline;
+	my ($taxid,$geneid,$symbol,$locus,$syn,@info) = split("\t",$gline);
+	if($syn ne '-'){
+		my @synonyms = split(/\|/,$syn);
+		$geneinfo{$symbol} = $syn;
+		foreach my $synonym(@synonyms){
+			$geneinfo{$synonym} = $symbol;
+		}
+	}
+}
+
+#genelist is the list of genes in panel. Not specifying genelist will print out all lines(e.g. wholernaseq)
+my %genes;
+if($genelist ne ""){
+	open GENES, "<$genelist" or die $!;
+	while (my $gene = <GENES>){
+		chomp $gene;
+		$genes{$gene} = 1;
+	}
+	close GENES;
+}
+my $genelist_size = keys %genes;
+
+
+#FPKM data
+open FPKM, "<$fpkm_file" or die $!;
+my %fpkmdata;
+my %found;
+my $header = <FPKM>;
+chomp $header;
+print OUT $header."\n";
+while(my $line = <FPKM>){
+	chomp $line;
+	my ($gid,$gname,$ref,$strand,$start,$end,$cov,$fpkm,$tpm) = split("\t",$line);
+	if($ref =~ /^chr/){ 
+		if(!exists $fpkmdata{$gname}){$fpkmdata{$gname} = $line;}
+		else{$fpkmdata{$gname} = $fpkmdata{$gname}."\n".$line;}	
+	}
+	if ($genelist_size == 0){ #wholernaseq
+		print OUT $line."\n";
+	}
+	elsif($genes{$gname} and $ref =~ /^chr/){ #panel gene found in fpkm data
+		print OUT $line."\n";
+		$found{$gname} =1;
+	}
+}
+
+foreach my $gene_key (keys %genes){
+	if($found{$gene_key}){}
+	else { #gene from gene list is not in fpkm data or has a different name
+		if($geneinfo{$gene_key}){ #gene is in gene_info.human.txt
+			if($fpkmdata{$geneinfo{$gene_key}}){ #gene from gene list is an alias in gene_info.human.txt
+				print OUT $fpkmdata{$geneinfo{$gene_key}}."\n";
+			}
+			elsif($geneinfo{$gene_key} =~ /\|/){
+				my @alias_array = split(/\|/, $geneinfo{$gene_key});
+				foreach my $alias(@alias_array){
+					if($fpkmdata{$alias}){print OUT $fpkmdata{$alias}."\n";}#gene from genelist is a Gene Name in gene_info.human.txt but fpkm data has an alias
+				}
+			}
+		}
+	}
+}
+close FPKM;
+close OUT;
--- a/genect_rnaseq/geneabundance.sh
+++ b/genect_rnaseq/geneabundance.sh
@@ -11,13 +11,14 @@ usage() {
  exit 1
 }
 OPTIND=1 # Reset OPTIND
-while getopts :b:g:p:s:h opt
+while getopts :b:g:p:f:s:h opt
 do
    case $opt in
        b) sbam=$OPTARG;;
        g) gtf=$OPTARG;;
        p) pair_id=$OPTARG;;
        s) stranded=$OPTARG;;
+	f) filter=$OPTARG;;
        h) usage;;
    esac
 done
@@ -48,4 +49,11 @@ featureCounts -s $stranded -M --fraction -J --ignoreDup -T $NPROC -p -g gene_nam
 mkdir ${pair_id}_stringtie
 cd ${pair_id}_stringtie
 stringtie ../${sbam} -p $NPROC -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt
- 
+
+if [[ -f $filter ]]
+then
+    cd ..
+    mv ${pair_id}.fpkm.txt ${prefix}.fpkm.ori.txt
+    perl ${baseDir}/fpkm_subset_panel.pl -f ${prefix}.fpkm.ori.txt -g $filter
+    mv ${prefix}.fpkm.capture.txt ${pair_id}.fpkm.txt
+fi
--- a/genect_rnaseq/statanal.sh
+++ b/genect_rnaseq/statanal.sh
@@ -41,7 +41,8 @@ else
    module load R/3.2.1-intel
    Rscript  $baseDir/dea.R
    Rscript $baseDir/build_ballgown.R *_stringtie
-    if [[ -n `ls *.edgeR.txt` ]]
+    edgeR=`find ./ -name *.edgeR.txt`
+    if [[ -n $edgeR ]]
    then
 	perl $baseDir/concat_edgeR.pl *.edgeR.txt
    fi

--- a/preproc_fastq/trimgalore.sh
+++ b/preproc_fastq/trimgalore.sh
@@ -25,12 +25,20 @@ shift $(($OPTIND -1))
 baseDir="`dirname \"$0\"`"

 # Check for mandatory options
-if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then
+if [[ -z $pair_id ]]; then
    usage
 fi
-fqs=("$@")
+fqs=''
+i=0
 numfq=$#

+while [[ $i -le $numfq ]]
+do
+    fqs="$fqs $1"
+    i=$((i + 1))
+    shift 1
+done
+
 if [[ -f $fq1 ]]
 then
    fqs="$fq1"
@@ -50,11 +58,11 @@ module load trimgalore/0.6.4 cutadapt/2.5 perl/5.28.0
 if [ $numfq > 1 ]
 then
    trim_galore --paired -q 25 --illumina --gzip --length 35 ${fqs}
-    mv ${r1base}_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz
-    mv ${r2base}_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz
+    mv *_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz
+    mv *_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz
 else
    trim_galore -q 25 --illumina --gzip --length 35 ${fqs}
-    mv ${r1base}_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz
+    mv *_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz
    cp ${pair_id}.trim.R1.fastq.gz ${pair_id}.trim.R2.fastq.gz 
 fi


--- a/variants/filter_cnvkit.pl
+++ b/variants/filter_cnvkit.pl
@@ -54,7 +54,7 @@ while (my $line = <CNR>) {
    } else {
 	my @ids = split(/,/,$geneids);
 	foreach my $gid (@ids) {
-	    next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI/);
+	    next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI:/);
 	    my ($gene,@other) = split(/:/,$gid);
 	    my ($gname,@loc) = split(/_/,$gene);
 	    $genes{$gname} = 1;
@@ -93,7 +93,7 @@ while (my $line = <IN>) {
    } else {
 	my @ids = split(/,/,$geneids);
 	foreach my $gid (@ids) {
-	    next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI/);
+	    next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI:/);
 	    my ($gene,@other) = split(/:/,$gid);
 	    my ($gname,@loc) = split(/_/,$gene);
 	    $genes{$gname} = 1;

--- a/variants/germline_vc.sh
+++ b/variants/germline_vc.sh
@@ -18,6 +18,7 @@ do
        p) pair_id=$OPTARG;;
        a) algo=$OPTARG;;
 	t) rna=1;;
+	b) tbed=$OPTARG;;
 	q) pon==$OPTARG;; 
        h) usage;;
    esac
@@ -131,6 +132,11 @@ then

 elif [[ $algo == 'strelka2' ]]
 then
+    opt=''
+    if [[ -n $tbed ]]
+    then
+	opt="--callRegions ${tbed}.gz"
+    fi
    if [[ $rna == 1 ]]
    then
 	mode="--rna"
@@ -143,7 +149,7 @@ then
    for i in *.bam; do
 	gvcflist="$gvcflist --bam ${i}"
    done
-    configManta.py $gvcflist --referenceFasta ${reffa} $mode --runDir manta
+    configManta.py $gvcflist $opt --referenceFasta ${reffa} $mode --runDir manta
    manta/runWorkflow.py -m local -j $NPROC
    if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]]
    then

--- a/variants/somatic_vc.sh
+++ b/variants/somatic_vc.sh
@@ -90,12 +90,18 @@ baseDir="`dirname \"$0\"`"

 source /etc/profile.d/modules.sh
 module load htslib/gcc/1.8
+export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH

 if [ $algo == 'strelka2' ]
-  then
+then
    module load strelka/2.9.10 manta/1.3.1 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14
-    mkdir manta strelka
-    configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --runDir manta
+    opt=''
+    if [[ -n $tbed ]]
+    then
+	opt="--callRegions ${tbed}.gz"
+    fi
+    mkdir manta     
+    configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} $opt --runDir manta
    manta/runWorkflow.py -m local -j 8
    if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]]
    then
@@ -135,7 +141,7 @@ then
  vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip >  ${pair_id}.varscan.vcf.gz
 elif [ $algo == 'shimmer' ]
 then
-    module load shimmer/0.1.1 samtools/gcc/1.8  vcftools/0.1.14
+    module load samtools/gcc/1.8  R/3.6.1-gccmkl vcftools/0.1.14
    shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err
    perl $baseDir/add_readct_shimmer.pl
    module rm java/oracle/jdk1.7.0_51

--- a/variants/svcalling.sh
+++ b/variants/svcalling.sh
@@ -67,31 +67,20 @@ then
    tid=`samtools view -H ${sbam} |grep '^@RG' |perl -pi -e 's/\t/\n/g' |grep ID |cut -f 2 -d ':'`
 fi

+bams=''
+for i in *.bam; do
+    bams="$bams $i" 
+done
+
+#RUN DELLY
 if [[ $method == 'delly' ]]
 then
-    #module load  delly2/v0.7.7-multi
-    if [[ -n ${normal} ]]
-    then
-	#RUN DELLY
-	echo -e "${nid}\tcontrol"> samples.tsv
-	echo -e "${tid}\ttumor" >> samples.tsv
-	delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal}
-	delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal}
-	delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal}
-	delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
-	delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal}
-	#delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf
-    else
-	#RUN DELLY
-	delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam}
-	delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam}
-	delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${sbam}
-	delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam}
-	delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam}
-	#delly2 filter -o ${pair_id}.delly_tra.bcf -f germline ${pair_id}.delly_translocations.bcf
-    fi
+    delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${bams} 
+    delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${bams}
+    delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${bams}
+    delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${bams}
+    delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${bams}
    #MERGE DELLY AND MAKE BED
-    
    bcftools concat -a -O v ${pair_id}.delly_duplications.bcf ${pair_id}.delly_inversions.bcf ${pair_id}.delly_translocations.bcf ${pair_id}.delly_deletion.bcf ${pair_id}.delly_insertion.bcf | vcf-sort -t temp | bgzip > ${pair_id}.delly.svar.vcf.gz
    bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.sv -v ${pair_id}.delly.svar.vcf.gz -s
    java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz