diff --git a/alignment/abra2.sh b/alignment/abra2.sh new file mode 100755 index 0000000000000000000000000000000000000000..173fdfa17ce6ee394e6a0a74e12eeb9eda187edc --- /dev/null +++ b/alignment/abra2.sh @@ -0,0 +1,58 @@ +#!/bin/bash +#gatkrunner.sh + +usage() { + echo "-h Help documentation for gatkrunner.sh" + echo "-r --Reference Genome: GRCh38 or GRCm38" + echo "-b --BAM File" + echo "-p --Prefix for output file name" + echo "-a --Algorithm/Command" + echo "Example: bash hisat.sh -p prefix -r GRCh38 -b File.bam" + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :r:a:c:b:p:h opt +do + case $opt in + r) index_path=$OPTARG;; + b) sbam=$OPTARG;; + p) pair_id=$OPTARG;; + c) tbed=$OPTARG;; + h) usage;; + esac +done + +shift $(($OPTIND -1)) + +# Check for mandatory options +if [[ -z $pair_id ]] || [[ -z $sbam ]] || [[ -z $index_path ]] +then + usage +fi + +NPROC=$SLURM_CPUS_ON_NODE +if [[ -z $NPROC ]] +then + NPROC=`nproc` +fi + +if [[ -z $isdocker ]] +then + source /etc/profile.d/modules.sh + module load abra2/2.18 samtools/gcc/1.8 + abrajar=/cm/shared/apps/abra2/lib/abra2.jar +else + abrajar=/usr/local/bin/abra2.jar +fi +ioopt="--in ${sbam} --out ${pair_id}.abra2.bam" +opt='' +if [ -n "$tbed" ] +then + opt="--targets $tbed" +fi + +which samtools +samtools index -@ $NPROC ${sbam} +mkdir tmpdir +java -Xmx16G -jar ${abrajar} ${ioopt} --ref ${index_path}/genome.fa --threads $NPROC $opt --tmpdir tmpdir --mbq 150 --mnf 5 --mer 0.05 > abra.log +samtools index -@ $NPROC ${pair_id}.abra2.bam diff --git a/alignment/bam2tdf.sh b/alignment/bam2tdf.sh index e6299d58137337fa2ded981c5239809c2ecf8e43..1834c773216b5d39c48dbe346a7f1ba16eaa51ac 100755 --- a/alignment/bam2tdf.sh +++ b/alignment/bam2tdf.sh @@ -30,8 +30,10 @@ then fi baseDir="`dirname \"$0\"`" - -source /etc/profile.d/modules.sh -module load igvtools/2.3.71 samtools/1.6 +if [[ -z $isdocker ]] +then + source /etc/profile.d/modules.sh + module load igvtools/2.3.71 samtools/1.6 +exit samtools index -@ $NPROC $bam igvtools count -z 5 $bam ${pair_id}.tdf ${index_path}/igv/human.genome diff --git a/alignment/hisat_genotype.sh b/alignment/hisat_genotype.sh index 4ee37c672dea6910e9f0a8e9b8516e55ed4d35b6..97e3f719ceb3f9cdfcec8461225220fd7a91a0eb 100755 --- a/alignment/hisat_genotype.sh +++ b/alignment/hisat_genotype.sh @@ -35,9 +35,11 @@ then NPROC=`nproc` fi -source /etc/profile.d/modules.sh -module load hisat-genotype/1.0.1 - +if [[ -z $isdocker ]] +then + source /etc/profile.d/modules.sh + module load hisat-genotype/1.0.1 +fi diff $fq1 $fq2 > difffile if [ -s difffile ] diff --git a/alignment/indexbams.sh b/alignment/indexbams.sh index 2af125dd027eac1ad1ae2cefd1f776e0a9bc17a5..ecdab135ba10b8bbe7e2b9dee4e4db59577f421d 100755 --- a/alignment/indexbams.sh +++ b/alignment/indexbams.sh @@ -26,8 +26,12 @@ fi baseDir="`dirname \"$0\"`" -source /etc/profile.d/modules.sh -module load samtools/1.6 +if [[ -z $isdocker ]] +then + source /etc/profile.d/modules.sh + module load samtools/1.6 +fi + for i in *.bam; do samtools index -@ $NPROC ${i} done diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index 9bc7050514e97f7443c57425a51cad3aac6491ef..e60b0b85dbaccf1df77a7a6c2177d082f3798c6a 100755 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -43,10 +43,6 @@ if [[ -z $isdocker ]] then source /etc/profile.d/modules.sh module add python/2.7.x-anaconda star/2.5.2b bedtools/2.26.0 -fi - -if [[ -n $method ]] && [[ $method == 'trinity' ]] -then module load trinity/1.6.0 tmphome="/tmp/$USER" if [[ -z $tmphome ]] diff --git a/genect_rnaseq/geneabundance.sh b/genect_rnaseq/geneabundance.sh index bbaf11f1e923f6d46f218987b7a12696e7b00411..b8e8fdb37bea875b3d87c086f9f172c598222a3b 100644 --- a/genect_rnaseq/geneabundance.sh +++ b/genect_rnaseq/geneabundance.sh @@ -44,9 +44,9 @@ if [[ -z $isdocker ]] then source /etc/profile.d/modules.sh module load subread/1.6.1 + export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH fi baseDir="`dirname \"$0\"`" -export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH featureCounts -s $stranded -M --fraction -J --ignoreDup -T $NPROC -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam} diff --git a/genect_rnaseq/statanal.sh b/genect_rnaseq/statanal.sh index 23efcf1a0d71687ffbee469dec947b63228cbaeb..cdf3b1cc3bff190c25dc9cd332e9caf2efad399e 100644 --- a/genect_rnaseq/statanal.sh +++ b/genect_rnaseq/statanal.sh @@ -25,8 +25,10 @@ if [[ -z $NPROC ]] then NPROC=`nproc` fi -source /etc/profile.d/modules.sh - +if [[ -z $isdocker ]] +then + source /etc/profile.d/modules.sh +fi perl $baseDir/concat_cts.pl -o ./ *.cts perl $baseDir/concat_fpkm.pl -o ./ *.fpkm.txt perl $baseDir/concat_ctsum.pl -o ./ *.cts.summary diff --git a/variants/itdseek.sh b/obsolete/itdseek.sh similarity index 100% rename from variants/itdseek.sh rename to obsolete/itdseek.sh diff --git a/variants/pindel.sh b/obsolete/pindel.sh similarity index 100% rename from variants/pindel.sh rename to obsolete/pindel.sh diff --git a/variants/checkmate.sh b/variants/checkmate.sh index 855b168723c48b383dd3e328c72a715a54959171..ddd70c2be2c69f34d99d18a0741d3c0594a38c61 100755 --- a/variants/checkmate.sh +++ b/variants/checkmate.sh @@ -33,9 +33,9 @@ then source /etc/profile.d/modules.sh module load samtools/gcc/1.8 bcftools/gcc/1.8 ncm=/project/shared/bicf_workflow_ref/seqprg/NGSCheckMate/ncm.py + export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH fi -export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:/usr/local/bin/:$PATH if [[ -f /usr/local/bin/ncm.py ]] then ncm=/usr/local/bin/ncm.py diff --git a/variants/filter_pindel.pl b/variants/filter_pindel.pl index 1802a79e5c8b8dd01f9f5ad1fe472afb96bf7784..0048d753d422539d514ce499826665feb87a2741 100755 --- a/variants/filter_pindel.pl +++ b/variants/filter_pindel.pl @@ -43,7 +43,7 @@ foreach $file (@files) { $hash{$key} = $val unless ($hash{$key}); } next unless ($hash{ANN}); - next unless ($hash{ANN} =~ m/HIGH|MODERATE|LOW/); + #next unless ($hash{ANN} =~ m/HIGH|MODERATE|LOW/); my %gtinfo = (); my @deschead = split(/:/,$format); F1:foreach my $k (0..$#gtheader) { @@ -79,15 +79,15 @@ foreach $file (@files) { @tumoraltct = split(/,/,$gtinfo{$opt{tumor}}{AO}); next if ($tumoraltct[0] eq '.'); $hash{AF} = join(",",@tumormaf); - next if ($tumoraltct[0] < 20); + next if ($tumoraltct[0] < 20 && $tumormaf[0] < 0.05); next if ($tumormaf[0] < 0.01); my $keepforvcf = 0; foreach $trx (split(/,/,$hash{ANN})) { my ($allele,$effect,$impact,$gene,$geneid,$feature, $featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna, $cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx); - next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i); - next if($effect eq 'sequence_feature'); + #next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i); + #next if($effect eq 'sequence_feature'); $keepforvcf = $gene; } next unless $keepforvcf; diff --git a/variants/filter_svaba.pl b/variants/filter_svaba.pl index e9625704e8fae361f70d8c1218f254c6ba5fffe6..72a2eb390ab8679f0a41e0bb4c46ee146db05c7f 100755 --- a/variants/filter_svaba.pl +++ b/variants/filter_svaba.pl @@ -41,11 +41,11 @@ W1:while (my $line = <IN>) { $hash{$key} = $val unless ($hash{$key}); } if (length($alt) > length($ref)) { - $hash{SVTYPE} = "INS"; - $hash{END} = $pos; + $hash{SVTYPE} = "INS"; + $hash{END} = $pos; }elsif (length($alt) < length($ref)) { - my $diff = substr($ref, length($alt)); - $hash{SVTYPE} = "DEL"; + my $diff = substr($ref, length($alt)); + $hash{SVTYPE} = "DEL"; $hash{END} = $pos + length($diff); } next unless ($hash{ANN}); @@ -92,7 +92,7 @@ W1:while (my $line = <IN>) { my ($allele,$effect,$impact,$gene,$geneid,$feature, $featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna, $cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx); - next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i); + next unless ($impact =~ m/HIGH|MODERATE|LOW/ || $effect =~ /splice/i); next if($effect eq 'sequence_feature'); $keeptrx = $trx; $keepforvcf = $gene; @@ -107,7 +107,6 @@ W1:while (my $line = <IN>) { push @nannot, $info; } } - my $newannot = join(";",@nannot); if ($hash{SVTYPE} eq 'INS' || ($hash{SVTYPE} eq 'DEL' && $keepforvcf !~ m/&/)) { if ($filter =~ m/LOWMAPQ|LowQual/i) { @@ -231,9 +230,6 @@ close IN; foreach my $id (keys %svpairs) { - if ($id =~ m/815016443/) { - warn "debugging\n"; - } my $alt1 = $svpairs{$id}{1}{alt}; my $alt2 = $svpairs{$id}{2}{alt}; my $svtype; @@ -242,14 +238,14 @@ foreach my $id (keys %svpairs) { }elsif ($alt2 =~ m/^\w\[/ && $alt1 =~ m/^\]/) { $svtype = 'INS'; }else { - $svtype = 'UNK'; + $svtype = 'UNK'; } - if ($svtype eq 'INS' || ($svtype eq 'DEL' && $svpairs{$id}{1}{gene} !~ m/&/ && $svpairs{$id}{1}{span} < 9999)) { - if ($filter =~ m/LOWMAPQ|LowQual/i) { + if ($svtype eq 'INS' || ($svtype eq 'DEL' && $svpairs{$id}{1}{gene} !~ m/&/ && $svpairs{$id}{1}{span} < 9999)) { + if ($filter =~ m/LOWMAPQ|LowQual/i) { $filter = 'FailedQC'.$filter; - } + } print VCFOUT $svpairs{$id}{1}{vcfline},"\n" - }elsif ($svtype eq 'DEL' && $svpairs{$id}{1}{span} && $svpairs{$id}{1}{span} > 9999) { - print DELFUS $svpairs{$id}{1}{fusionline},"\n"; - } + }elsif ($svtype eq 'DEL' && $svpairs{$id}{1}{span} && $svpairs{$id}{1}{span} > 9999) { + print DELFUS $svpairs{$id}{1}{fusionline},"\n"; + } } diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index a4ff47e47d4e3ec6ef953a04d4f1e41affc8c648..334d7d691a2cb1312d4454f270c1c757b4835284 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -107,6 +107,12 @@ then fi bamlist=`join_by , *.bam` Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$NPROC --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf + for i in *.bam + do + prefix="${i%.bam}" + sid=`samtools view -H ${i} |grep '^@RG' |perl -pe 's/\t/\n/g' |grep ID |cut -f 2 -d ':'` + perl -pi -e "s/$prefix/$sid/g" platypus.vcf + done vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz tabix platypus.vcf.gz bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz diff --git a/variants/msisensor.sh b/variants/msisensor.sh index 1d27dc83ee69e9e9f586c93263c4511817470d6a..c2e59167eb7062a60adf5a1d9026e006a543bfdc 100755 --- a/variants/msisensor.sh +++ b/variants/msisensor.sh @@ -30,8 +30,10 @@ if [[ -z $sbam ]] || [[ -z $index_path ]]; then usage fi -export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH - +if [[ -z $isdocker ]] +then + export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH +fi bedopt='' if [[ -n $capture ]] then diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh index 776c306c6bae5c965e07f648b16ecf1554e14194..d282231782066cac93e8b0c7d3283cfc98718291 100755 --- a/variants/norm_annot.sh +++ b/variants/norm_annot.sh @@ -24,8 +24,11 @@ function join_by { local IFS="$1"; shift; echo "$*"; } shift $(($OPTIND -1)) baseDir="`dirname \"$0\"`" -source /etc/profile.d/modules.sh -module load bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 snpeff/4.3q +if [[ -z $isdocker ]] +then + source /etc/profile.d/modules.sh + module load bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 snpeff/4.3q +fi if [[ -a "${index_path}/genome.fa" ]] then diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 9923e359071c4e28521bc4e1b002bd5c7303685d..95deb660d21fbf1f74c83dbf15269eeba2be02b3 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -38,8 +38,8 @@ if [[ -z $isdocker ]] then source /etc/profile.d/modules.sh module load htslib/gcc/1.8 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 + export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH fi -export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH shift $(($OPTIND -1)) diff --git a/variants/svcalling.sh b/variants/svcalling.sh index b01363fa80861ac48be8728d8244b78cbaeeb4b6..562d2eefe3344a6bae29089cada98d1f378a1be6 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -62,18 +62,23 @@ if [[ -z $isdocker ]] then source /etc/profile.d/modules.sh module load htslib/gcc/1.8 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 + export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH fi -export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH mkdir -p temp -if [[ -z $tid ]] +if [[ -z $tid ]] && [[ -f ${sbam} ]] then tid=`samtools view -H ${sbam} |grep '^@RG' |perl -pe 's/\t/\n/g' |grep ID |cut -f 2 -d ':'` fi bams='' for i in *.bam; do - bams="$bams $i" + bams="$bams $i" + sid=`samtools view -H ${i} |grep '^@RG' |perl -pe 's/\t/\n/g' |grep ID |cut -f 2 -d ':'` + if [[ $sid =~ "_T_" ]] + then + tid=$sid + fi done #RUN DELLY @@ -90,8 +95,9 @@ then java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz if [[ $filter == 1 ]] then - zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u > ${pair_id}.dgf.txt + zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion|transcript_ablation' | sort -u > ${pair_id}.dgf.txt mv ${pair_id}.delly.vcf.gz ${pair_id}.delly.ori.vcf.gz + echo "perl $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz" perl $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz bgzip -f ${pair_id}.delly.vcf zgrep '#CHROM' ${pair_id}.delly.vcf.gz > ${pair_id}.delly.genefusion.txt @@ -120,7 +126,7 @@ then if [[ $filter == 1 ]] then - zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u > ${pair_id}.sgf.txt + zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion|transcript_ablation' | sort -u > ${pair_id}.sgf.txt mv ${pair_id}.svaba.vcf.gz ${pair_id}.svaba.ori.vcf.gz perl $baseDir/filter_svaba.pl -t $tid -p ${pair_id} -i ${pair_id}.svaba.ori.vcf.gz -s ${pair_id}.svaba.sv.vcf.gz bgzip ${pair_id}.svaba.vcf @@ -190,6 +196,7 @@ then elif [[ $method == 'itdseek' ]] then stexe=`which samtools` + echo $stexe samtools view -@ $NPROC -L ${itdbed} ${sbam} | itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | java -Xmx30g -jar $SNPEFF_HOME/SnpSift.jar filter "( LEN < 10000 )" | bgzip > ${pair_id}.itdseek.vcf.gz tabix ${pair_id}.itdseek.vcf.gz diff --git a/variants/uni_norm_annot.sh b/variants/uni_norm_annot.sh index 502fa0e6d9dedda6cd8567f37eed9f6c801020f0..56b45e683d5ad3a3359dc4bb1383ddf4c8b2200c 100755 --- a/variants/uni_norm_annot.sh +++ b/variants/uni_norm_annot.sh @@ -41,8 +41,8 @@ if [[ -z $isdocker ]] then source /etc/profile.d/modules.sh module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q + export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH fi -export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf mv ${vcf} ${pair_id}.ori.vcf.gz