diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index 52f904e385e2e536a3f8e8858d0e2dbb179536b1..79db6ad4b87fd88e9c63064e89c4fbb874710ebc 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -13,7 +13,7 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:b:c:n:p:s:d:h opt +while getopts :r:b:c:n:p:e:s:d:h opt do case $opt in r) index_path=$OPTARG;; @@ -22,6 +22,7 @@ do n) nuctype=$OPTARG;; p) pair_id=$OPTARG;; d) dedup=$OPTARG;; + e) version=$OPTARG;; s) skiplc=1;; h) usage;; esac @@ -63,9 +64,12 @@ if [[ $nuctype == 'dna' ]]; then samtools index -@ $NPROC ${pair_id}.ontarget.bam samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt + java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity BARCODE_TAG=RG I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir} #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir} - #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir} #samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt fi #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir} + perl $baseDir/sequenceqc_dna.pl -e ${version} -r $index_path ${pair_id}.genomecov.txt +else + perl $baseDir/sequenceqc_rna.pl -e ${version} -r $index_path ${pair_id}.flagstat.txt fi diff --git a/alignment/sequenceqc_dna.pl b/alignment/sequenceqc_dna.pl new file mode 100755 index 0000000000000000000000000000000000000000..9d573b89a2843cbf3e777b7779adddb509bcf333 --- /dev/null +++ b/alignment/sequenceqc_dna.pl @@ -0,0 +1,139 @@ +#!/usr/bin/perl -w +#sequenceqc_alignment.p + +use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); +my %opt = (); +my $results = GetOptions (\%opt,'refdir|r=s','help|h','gitdir|e=s','user|u=s'); + +my @statfiles = @ARGV; + +my $fileowner = $opt{user}; +my $gittag = 'v5'; +if ($opt{gitdir}) { + $gittag = $opt{gitdir}; +}elsif($ENV{'gitv'}) { + $gittag = $ENV{'gitv'}; +} +foreach $sfile (@statfiles) { + $sfile =~ m/(\S+)\.genomecov.txt/; + my $prefix = $1; + my %hash; + open FLAG, "<$prefix\.trimreport.txt"; + while (my $line = <FLAG>) { + chomp($line); + my ($file,$raw,$trim) = split(/\t/,$line); + $hash{rawct} += $raw; + } + open FLAG, "<$prefix\.flagstat.txt" or die $!; + while (my $line = <FLAG>) { + chomp($line); + if ($line =~ m/(\d+) \+ \d+ in total/) { + $hash{total} = $1; + }elsif ($line =~ m/(\d+) \+ \d+ read1/) { + $hash{pairs} = $1; + }elsif ($line =~ m/(\d+) \+ \d+ mapped/) { + $hash{maprate} = 100*sprintf("%.4f",$1/$hash{total}); + }elsif ($line =~ m/(\d+) \+ \d+ properly paired/) { + $hash{propair} = 100*sprintf("%.4f",$1/$hash{total}); + } + } + close FLAG; + open FLAG, "<$prefix\.ontarget.flagstat.txt" or die $!; + while (my $line = <FLAG>) { + chomp($line); + if ($line =~ m/(\d+) \+ \d+ in total/) { + $hash{ontarget} = $1; + } + } + unless ($hash{rawct}) { + $hash{rawct} = $hash{total}; + } + my %lc; + open DUP, "<$prefix.libcomplex.txt" or die $!; + while (my $line = <DUP>) { + chomp($line); + if ($line =~ m/## METRICS/) { + $header = <DUP>; + $nums = <DUP>; + chomp($header); + chomp($nums); + my @stats = split(/\t/,$header); + my @row = split(/\t/,$nums); + my %info; + foreach my $i (0..$#stats) { + $info{$stats[$i]} = $row[$i]; + } + $lc{TOTREADSLC} += $info{UNPAIRED_READS_EXAMINED} + $info{READ_PAIRS_EXAMINED}; + $hash{libsize} = $info{ESTIMATED_LIBRARY_SIZE}; + $lc{TOTDUPLC} += $info{UNPAIRED_READ_DUPLICATES} + $info{READ_PAIR_DUPLICATES}; + } + } + close DUP; + $hash{percdups} = sprintf("%.4f",$lc{TOTDUPLC}/$lc{TOTREADSLC}); + my %cov; + open COV, "<$prefix\.genomecov.txt" or die $!; + my $sumdepth; + my $totalbases; + while (my $line = <COV>) { + chomp($line); + my ($all,$depth,$bp,$total,$percent) = split(/\t/,$line); + $cov{$depth} = $percent; + $sumdepth += $depth*$bp; + $totalbases = $total unless ($totalbases); + } + my $avgdepth = sprintf("%.0f",$sumdepth/$totalbases); + my @depths = sort {$a <=> $b} keys %cov; + my @perc = @cov{@depths}; + my @cum_sum = cumsum(@perc); + my $median = 0; + foreach my $i (0..$#cum_sum) { + if ($cum_sum[$i] < 0.5) { + $median = $i; + } + } + $hash{'perc100x'} = 100*sprintf("%.4f",1-$cum_sum[100]); + $hash{'perc200x'} = 100*sprintf("%.4f",1-$cum_sum[200]); + $hash{'perc500x'} = 100*sprintf("%.4f",1-$cum_sum[500]); + + #### Begin File Information ######## + my $status = 'PASS'; + $status = 'FAIL' if ($hash{maprate} < 0.90 && $hash{'perc100x'} < 0.90); + my @stats = stat("$prefix\.flagstat.txt"); + my ($day,$month,$year) = (localtime($stats[9]))[3,4,5]; + $year += 1900; + $month++; + $date = join("-",$year,sprintf("%02s",$month),sprintf("%02s",$day)); + $fileowner = 's'.$stats[4] unless $fileowner; + $hash{status} = $status; + $hash{date}=$date; + $hash{fileowner} = $fileowner; + ##### End File Information ######## + ##### START separateFilesPerSample ###### + open OUT, ">".$prefix.".sequence.stats.txt" or die $!; + print OUT join("\n", "Sample\t".$prefix,"Total_Raw_Count\t".$hash{rawct}, + "On_Target\t".$hash{ontarget},"Map_Rate\t".$hash{maprate}, + "Properly_Paired\t".$hash{propair}, + "Percent_Duplicates\t".sprintf("%.2f",100*$hash{percdups}), + "Percent_on_Target\t".sprintf("%.2f",100*$hash{ontarget}/$hash{rawct}), + "All_Average_Depth\t".$avgdepth,"All_Median_Depth\t".$median, + "Percent_over_100x\t".$hash{'perc100x'}, + "Percent_over_200x\t".$hash{'perc200x'}, + "Percent_over_500x\t".$hash{'perc500x'}, + "Alignment_Status\t".$hash{status},"Alignment_Date\t".$hash{date}, + "File_Owner\t".$hash{fileowner},"Workflow_Version\t".$gittag),"\n"; + close OUT; + system(qq{cat $opt{refdir}\/reference_info.txt >> $prefix\.sequence.stats.txt}); + ##### END separateFilesPerSample ###### +} + + +sub cumsum { + my @nums = @_; + my @cumsum = (); + my $mid = 0; + for my $i (0..$#nums) { + $mid += $nums[$i]; + push(@cumsum,$mid); + } + return @cumsum; +} diff --git a/alignment/sequenceqc_rna.pl b/alignment/sequenceqc_rna.pl new file mode 100755 index 0000000000000000000000000000000000000000..906b802404a6ba82cd463ca09ebe92f1d4d6c931 --- /dev/null +++ b/alignment/sequenceqc_rna.pl @@ -0,0 +1,78 @@ +#!/usr/bin/perl -w +#sequenceqc_rnaseq.pl + +use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); +my %opt = (); +my $results = GetOptions (\%opt,'refdir|r=s','help|h','gitdir|e=s','user|u=s'); + +my @files = @ARGV; +chomp(@files); + +my $fileowner = $opt{user}; +my $gittag = 'v5'; +if ($opt{gitdir}) { + $gittag = $opt{gitdir}; +}elsif($ENV{'gitv'}) { + $gittag = $ENV{'gitv'}; +} + +foreach my $file (@files) { + chomp($file); + $file =~ m/(\S+)\.flagstat.txt/; + my @directory = split(/\//, $file); + $prefix = $directory[-1]; + my $sample = (split(/\./,$prefix))[0]; + open OUT, ">".$sample.".sequence.stats.txt" or die;#separateFilesPerSample + $hisatfile = $file; + $hisatfile =~ s/flagstat/alignerout/; + open HISAT, "<$hisatfile"; + my ($total, $pairs,$read2ct,$maprate,$concorrate); + while (my $line = <HISAT>) { + chomp($line); + if ($line =~ m/(\d+) reads; of these:/) { + $total = $1; + }elsif ($line =~ m/Number of input reads\s+\|\s+(\d+)/) { + $total = $1; + }elsif ($line =~ m/(\d+) \S+ were paired; of these:/) { + $pairs = $1; + $total = $pairs*2 if ($total == $pairs); + }elsif ($line =~ m/(\d+) pairs aligned concordantly 0 times; of these:/) { + $concorrate = sprintf("%.4f",1-($1/$pairs)); + }elsif ($line =~ m/(\S+)% overall alignment rate/) { + $maprate = $1/100; + } + } + close HISAT; + open IN, "<$file" or die $!; + while ($line = <IN>) { + chomp($line); + if ($line =~ m/(\d+) \+ \d+ in total/) { + $total = $1 unless $total; + }elsif ($line =~ m/(\d+) \+ \d+ read1/) { + $pairs = $1; + }elsif ($line =~ m/(\d+) \+ \d+ read2/) { + $read2ct = $1; + }elsif ($line =~ m/(\d+) \+ \d+ mapped\s+\((\S+)%.+\)/) { + $maprate = sprintf("%.2f",$2/100) unless ($maprate); + }elsif ($line =~ m/(\d+) \+ \d+ properly paired\s+\((\S+)%.+\)/) { + $concorrate = sprintf("%.2f",$2/100) unless ($concorrate); + } + } + close IN; + + my @stats=stat($file); + my ($day,$month,$year) = (localtime($stats[9]))[3,4,5]; + $year += 1900; + $month++; + $date = join("-",$year,sprintf("%02s",$month),sprintf("%02s",$day)); + $fileowner = 's'.$stats[4] unless $fileowner; + $total = $pairs*2 if ($total == $pairs); + my $status = 'PASS'; + $status = 'FAIL' if ($maprate < 0.90); + $status = 'FAIL' if ($total < 6000000); + + print OUT join("\n","Sample\t".$sample,"Total_Raw_Count\t".$total, "Read1_Map\t".$pairs,"Read2_Map\t".$read2ct, + "Map_Rate\t".$maprate,"Concordant_Rate\t".$concorrate,"Alignment_Status\t".$status,"Alignment_Date\t".$date, + "File_Owner\t".$fileowner,"Workflow_Version\t".$gittag),"\n"; + system(qq{cat $opt{refdir}\/reference_info.txt >> $prefix\.sequence.stats.txt}); +} diff --git a/cvc/add_umi_sam.py b/cvc/add_umi_sam.py new file mode 120000 index 0000000000000000000000000000000000000000..2a9843b161590c4fc9cda2cd95911f8b2de2d325 --- /dev/null +++ b/cvc/add_umi_sam.py @@ -0,0 +1 @@ +../alignment/add_umi_sam.py \ No newline at end of file diff --git a/cvc/dna_trim_align.sh b/cvc/dna_trim_align.sh index a44d414e66de09573d69548e3c1ee3ccf6c4bebd..ceaffa61289338aebb29f786514a7b24e14ce2f6 100644 --- a/cvc/dna_trim_align.sh +++ b/cvc/dna_trim_align.sh @@ -26,11 +26,20 @@ done shift $(($OPTIND -1)) baseDir="`dirname \"$0\"`" -fqs=("$@") +fqs='' +i=0 numfq=$# +while [[ $i -le $numfq ]] +do + fqs="$fqs $1" + i=$((i + 1)) + shift 1 +done + # Check for mandatory options -if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then +if [[ -z $pair_id ]] +then usage fi NPROC=$SLURM_CPUS_ON_NODE @@ -50,17 +59,18 @@ else fi source /etc/profile.d/modules.sh -module load trimgalore/0.6.4 cutadapt/1.9.1 python/2.7.x-anaconda bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3 +module load trimgalore/0.6.4 cutadapt/1.9.1 bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3 threads=`expr $NPROC / 2` -trim_galore --cores $threads --paired -q 25 -o trim --illumina --gzip --length 35 ${fqs} -if [[ $filter == 1 ]] +trim_galore --cores 4 --paired -q 25 --illumina --gzip --length 35 ${fqs} + +if [[ ${filter} == 1 ]] then perl $baseDir/parse_trimreport.pl ${pair_id}.trimreport.txt *trimming_report.txt fi -bwa mem -M -t $threads -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa trim/*.fq.gz > out.sam +bwa mem -M -t $threads -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa *.fq.gz > out.sam if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]] then diff --git a/cvc/parse_trimreport.pl b/cvc/parse_trimreport.pl new file mode 120000 index 0000000000000000000000000000000000000000..ede2fb3d09d606f1a3f033a39cbed122ff75f672 --- /dev/null +++ b/cvc/parse_trimreport.pl @@ -0,0 +1 @@ +../preproc_fastq/parse_trimreport.pl \ No newline at end of file diff --git a/genect_rnaseq/fpkm_subset_panel.pl b/genect_rnaseq/fpkm_subset_panel.pl new file mode 100755 index 0000000000000000000000000000000000000000..baa830a29069088bb559c3ec83d9324a9d7f62c7 --- /dev/null +++ b/genect_rnaseq/fpkm_subset_panel.pl @@ -0,0 +1,85 @@ +#!/bin/bin/perl +use strict; +use warnings; +use diagnostics; +use Getopt::Long; + + +my ($genelist,$fpkm_file) = ("") x 2; + +GetOptions( 'g|genes=s' => \$genelist, + 'f|fpkm=s' => \$fpkm_file); + +my $fpkm_out = $fpkm_file; +$fpkm_out =~ s/\.fpkm.+txt/\.fpkm.capture.txt/; +open OUT, ">$fpkm_out" or die $!; + +#gene_info.human.txt file is required for gene alias information +open GENEINFO, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!; +my %geneinfo; +while(my $gline = <GENEINFO>){ + chomp $gline; + my ($taxid,$geneid,$symbol,$locus,$syn,@info) = split("\t",$gline); + if($syn ne '-'){ + my @synonyms = split(/\|/,$syn); + $geneinfo{$symbol} = $syn; + foreach my $synonym(@synonyms){ + $geneinfo{$synonym} = $symbol; + } + } +} + +#genelist is the list of genes in panel. Not specifying genelist will print out all lines(e.g. wholernaseq) +my %genes; +if($genelist ne ""){ + open GENES, "<$genelist" or die $!; + while (my $gene = <GENES>){ + chomp $gene; + $genes{$gene} = 1; + } + close GENES; +} +my $genelist_size = keys %genes; + + +#FPKM data +open FPKM, "<$fpkm_file" or die $!; +my %fpkmdata; +my %found; +my $header = <FPKM>; +chomp $header; +print OUT $header."\n"; +while(my $line = <FPKM>){ + chomp $line; + my ($gid,$gname,$ref,$strand,$start,$end,$cov,$fpkm,$tpm) = split("\t",$line); + if($ref =~ /^chr/){ + if(!exists $fpkmdata{$gname}){$fpkmdata{$gname} = $line;} + else{$fpkmdata{$gname} = $fpkmdata{$gname}."\n".$line;} + } + if ($genelist_size == 0){ #wholernaseq + print OUT $line."\n"; + } + elsif($genes{$gname} and $ref =~ /^chr/){ #panel gene found in fpkm data + print OUT $line."\n"; + $found{$gname} =1; + } +} + +foreach my $gene_key (keys %genes){ + if($found{$gene_key}){} + else { #gene from gene list is not in fpkm data or has a different name + if($geneinfo{$gene_key}){ #gene is in gene_info.human.txt + if($fpkmdata{$geneinfo{$gene_key}}){ #gene from gene list is an alias in gene_info.human.txt + print OUT $fpkmdata{$geneinfo{$gene_key}}."\n"; + } + elsif($geneinfo{$gene_key} =~ /\|/){ + my @alias_array = split(/\|/, $geneinfo{$gene_key}); + foreach my $alias(@alias_array){ + if($fpkmdata{$alias}){print OUT $fpkmdata{$alias}."\n";}#gene from genelist is a Gene Name in gene_info.human.txt but fpkm data has an alias + } + } + } + } +} +close FPKM; +close OUT; diff --git a/genect_rnaseq/geneabundance.sh b/genect_rnaseq/geneabundance.sh index 5d1a362da7f790c8dafb40218f83f5d78f3463aa..35c1d000fc2ad58ff7963819facfd92cd0f6b006 100644 --- a/genect_rnaseq/geneabundance.sh +++ b/genect_rnaseq/geneabundance.sh @@ -11,13 +11,14 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :b:g:p:s:h opt +while getopts :b:g:p:f:s:h opt do case $opt in b) sbam=$OPTARG;; g) gtf=$OPTARG;; p) pair_id=$OPTARG;; s) stranded=$OPTARG;; + f) filter=$OPTARG;; h) usage;; esac done @@ -48,4 +49,11 @@ featureCounts -s $stranded -M --fraction -J --ignoreDup -T $NPROC -p -g gene_nam mkdir ${pair_id}_stringtie cd ${pair_id}_stringtie stringtie ../${sbam} -p $NPROC -G ${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt - + +if [[ -f $filter ]] +then + cd .. + mv ${pair_id}.fpkm.txt ${prefix}.fpkm.ori.txt + perl ${baseDir}/fpkm_subset_panel.pl -f ${prefix}.fpkm.ori.txt -g $filter + mv ${prefix}.fpkm.capture.txt ${pair_id}.fpkm.txt +fi diff --git a/genect_rnaseq/statanal.sh b/genect_rnaseq/statanal.sh index fd36f0f19d745ac77bb35cec936cbdb4a6100664..23efcf1a0d71687ffbee469dec947b63228cbaeb 100644 --- a/genect_rnaseq/statanal.sh +++ b/genect_rnaseq/statanal.sh @@ -41,7 +41,8 @@ else module load R/3.2.1-intel Rscript $baseDir/dea.R Rscript $baseDir/build_ballgown.R *_stringtie - if [[ -n `ls *.edgeR.txt` ]] + edgeR=`find ./ -name *.edgeR.txt` + if [[ -n $edgeR ]] then perl $baseDir/concat_edgeR.pl *.edgeR.txt fi diff --git a/preproc_fastq/trimgalore.sh b/preproc_fastq/trimgalore.sh index dcb275278d29a213ae9c83f95eda6b13ccfdae4c..537b2b4639d9fec7ea210ab8cbccc5ced5c360fe 100644 --- a/preproc_fastq/trimgalore.sh +++ b/preproc_fastq/trimgalore.sh @@ -25,12 +25,20 @@ shift $(($OPTIND -1)) baseDir="`dirname \"$0\"`" # Check for mandatory options -if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then +if [[ -z $pair_id ]]; then usage fi -fqs=("$@") +fqs='' +i=0 numfq=$# +while [[ $i -le $numfq ]] +do + fqs="$fqs $1" + i=$((i + 1)) + shift 1 +done + if [[ -f $fq1 ]] then fqs="$fq1" @@ -50,11 +58,11 @@ module load trimgalore/0.6.4 cutadapt/2.5 perl/5.28.0 if [ $numfq > 1 ] then trim_galore --paired -q 25 --illumina --gzip --length 35 ${fqs} - mv ${r1base}_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz - mv ${r2base}_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz + mv *_val_1.fq.gz ${pair_id}.trim.R1.fastq.gz + mv *_val_2.fq.gz ${pair_id}.trim.R2.fastq.gz else trim_galore -q 25 --illumina --gzip --length 35 ${fqs} - mv ${r1base}_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz + mv *_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz cp ${pair_id}.trim.R1.fastq.gz ${pair_id}.trim.R2.fastq.gz fi diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index 772feaf5159faa1cf9b276140ad0faa9e6268c8e..ef06a3e56e17a58ef415b98aa54a875c95476690 100755 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -54,7 +54,7 @@ while (my $line = <CNR>) { } else { my @ids = split(/,/,$geneids); foreach my $gid (@ids) { - next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI/); + next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI:/); my ($gene,@other) = split(/:/,$gid); my ($gname,@loc) = split(/_/,$gene); $genes{$gname} = 1; @@ -93,7 +93,7 @@ while (my $line = <IN>) { } else { my @ids = split(/,/,$geneids); foreach my $gid (@ids) { - next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI/); + next if ($gid =~ /^rs\d+$|^SNP:rs\d+$|^-$|Fusion|MSI:/); my ($gene,@other) = split(/:/,$gid); my ($gname,@loc) = split(/_/,$gene); $genes{$gname} = 1; diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 9f598d9f68435b27b3759a5c8adffe361ce3c188..38ce8eeb0d1bd0cc26d1108f71372a9c7f343885 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -18,6 +18,7 @@ do p) pair_id=$OPTARG;; a) algo=$OPTARG;; t) rna=1;; + b) tbed=$OPTARG;; q) pon==$OPTARG;; h) usage;; esac @@ -131,6 +132,11 @@ then elif [[ $algo == 'strelka2' ]] then + opt='' + if [[ -n $tbed ]] + then + opt="--callRegions ${tbed}.gz" + fi if [[ $rna == 1 ]] then mode="--rna" @@ -143,7 +149,7 @@ then for i in *.bam; do gvcflist="$gvcflist --bam ${i}" done - configManta.py $gvcflist --referenceFasta ${reffa} $mode --runDir manta + configManta.py $gvcflist $opt --referenceFasta ${reffa} $mode --runDir manta manta/runWorkflow.py -m local -j $NPROC if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]] then diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 36a09b0a1b2f85a6bfdc96e1a51c9848cbdaa8ca..4631780783270f6c1776f1b7f94788921553be51 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -90,12 +90,18 @@ baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh module load htslib/gcc/1.8 +export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH if [ $algo == 'strelka2' ] - then +then module load strelka/2.9.10 manta/1.3.1 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 - mkdir manta strelka - configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --runDir manta + opt='' + if [[ -n $tbed ]] + then + opt="--callRegions ${tbed}.gz" + fi + mkdir manta + configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} $opt --runDir manta manta/runWorkflow.py -m local -j 8 if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]] then @@ -135,7 +141,7 @@ then vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz elif [ $algo == 'shimmer' ] then - module load shimmer/0.1.1 samtools/gcc/1.8 vcftools/0.1.14 + module load samtools/gcc/1.8 R/3.6.1-gccmkl vcftools/0.1.14 shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err perl $baseDir/add_readct_shimmer.pl module rm java/oracle/jdk1.7.0_51 diff --git a/variants/svcalling.sh b/variants/svcalling.sh index b737b98abb0210a828a0ed67ab765ce23b03fa0c..28812c1394dde759fb6e4b779f1fb49305d7f237 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -67,31 +67,20 @@ then tid=`samtools view -H ${sbam} |grep '^@RG' |perl -pi -e 's/\t/\n/g' |grep ID |cut -f 2 -d ':'` fi +bams='' +for i in *.bam; do + bams="$bams $i" +done + +#RUN DELLY if [[ $method == 'delly' ]] then - #module load delly2/v0.7.7-multi - if [[ -n ${normal} ]] - then - #RUN DELLY - echo -e "${nid}\tcontrol"> samples.tsv - echo -e "${tid}\ttumor" >> samples.tsv - delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam} ${normal} - delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} ${normal} - #delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf - else - #RUN DELLY - delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${sbam} - delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${sbam} - #delly2 filter -o ${pair_id}.delly_tra.bcf -f germline ${pair_id}.delly_translocations.bcf - fi + delly2 call -t BND -o ${pair_id}.delly_translocations.bcf -q 30 -g ${reffa} ${bams} + delly2 call -t DUP -o ${pair_id}.delly_duplications.bcf -q 30 -g ${reffa} ${bams} + delly2 call -t INV -o ${pair_id}.delly_inversions.bcf -q 30 -g ${reffa} ${bams} + delly2 call -t DEL -o ${pair_id}.delly_deletion.bcf -q 30 -g ${reffa} ${bams} + delly2 call -t INS -o ${pair_id}.delly_insertion.bcf -q 30 -g ${reffa} ${bams} #MERGE DELLY AND MAKE BED - bcftools concat -a -O v ${pair_id}.delly_duplications.bcf ${pair_id}.delly_inversions.bcf ${pair_id}.delly_translocations.bcf ${pair_id}.delly_deletion.bcf ${pair_id}.delly_insertion.bcf | vcf-sort -t temp | bgzip > ${pair_id}.delly.svar.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p ${pair_id}.delly.sv -v ${pair_id}.delly.svar.vcf.gz -s java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz