...
 
Commits (11)
#!/bin/bash
#gatkrunner.sh
usage() {
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-b --BAM File"
echo "-p --Prefix for output file name"
echo "-a --Algorithm/Command"
echo "Example: bash hisat.sh -p prefix -r GRCh38 -b File.bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:a:c:b:p:h opt
do
case $opt in
r) index_path=$OPTARG;;
b) sbam=$OPTARG;;
p) pair_id=$OPTARG;;
c) tbed=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]] || [[ -z $index_path ]]
then
usage
fi
NPROC=$SLURM_CPUS_ON_NODE
if [[ -z $NPROC ]]
then
NPROC=`nproc`
fi
if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load abra2/2.18 samtools/gcc/1.8
abrajar=/cm/shared/apps/abra2/lib/abra2.jar
else
abrajar=/usr/local/bin/abra2.jar
fi
ioopt="--in ${sbam} --out ${pair_id}.abra2.bam"
opt=''
if [ -n "$tbed" ]
then
opt="--targets $tbed"
fi
which samtools
samtools index -@ $NPROC ${sbam}
mkdir tmpdir
java -Xmx16G -jar ${abrajar} ${ioopt} --ref ${index_path}/genome.fa --threads $NPROC $opt --tmpdir tmpdir --mbq 150 --mnf 5 --mer 0.05 > abra.log
samtools index -@ $NPROC ${pair_id}.abra2.bam
......@@ -30,8 +30,10 @@ then
fi
baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh
module load igvtools/2.3.71 samtools/1.6
if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load igvtools/2.3.71 samtools/1.6
exit
samtools index -@ $NPROC $bam
igvtools count -z 5 $bam ${pair_id}.tdf ${index_path}/igv/human.genome
......@@ -35,9 +35,11 @@ then
NPROC=`nproc`
fi
source /etc/profile.d/modules.sh
module load hisat-genotype/1.0.1
if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load hisat-genotype/1.0.1
fi
diff $fq1 $fq2 > difffile
if [ -s difffile ]
......
......@@ -26,8 +26,12 @@ fi
baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh
module load samtools/1.6
if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load samtools/1.6
fi
for i in *.bam; do
samtools index -@ $NPROC ${i}
done
......@@ -43,10 +43,6 @@ if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module add python/2.7.x-anaconda star/2.5.2b bedtools/2.26.0
fi
if [[ -n $method ]] && [[ $method == 'trinity' ]]
then
module load trinity/1.6.0
tmphome="/tmp/$USER"
if [[ -z $tmphome ]]
......
File mode changed from 100644 to 100755
#!/usr/bin/perl -w
#check_designfile.pl
use strict;
use warnings;
my $pe = shift @ARGV;
my $dfile = shift @ARGV;
open OUT, ">design.valid.txt" or die $!;
open DFILE, "<$dfile" or die $!;
my $head = <DFILE>;
chomp($head);
$head =~ s/FullPathTo//g;
my @colnames = split(/\t/,$head);
my %newcols = map {$_=> 1} @colnames;
unless (grep(/FqR1/,@colnames)) {
die "Missing Sequence Files in Design File: FqR1\n";
}
unless (grep(/SampleID/,@colnames)) {
die "Missing SampleID in Design File\n";
}
if ($pe eq 'pe') {
unless (grep(/FqR2/,@colnames)) {
die "Missing Sequence Files in Design File: FqR2\n";
}
}else {
delete $newcols{FqR2};
}
my @cols = sort {$a cmp $b} keys %newcols;
print OUT join("\t",@cols),"\n";
my @grp = ('a','b');
my $lnct = 0;
while (my $line = <DFILE>) {
chomp($line);
$line =~ s/FullPathTo//g;
my @row = split(/\t/,$line);
my %hash;
foreach my $i (0..$#row) {
next unless ($newcols{$colnames[$i]});
$row[$i] =~ s/-//g unless ($colnames[$i] =~ m/Fq/);
$hash{$colnames[$i]} = $row[$i];
}
if ($hash{SampleID} =~ m/^\d/) {
$hash{SampleID} =~ s/^/S/;
}
$hash{SampleName} = $hash{SampleID} unless ($hash{SampleName});
$hash{SubjectID} = $hash{SampleID} unless ($hash{SubjectID});
unless ($hash{SampleGroup}) {
my $j = $lnct %% 2;
$hash{SampleGroup} = $grp[$j];
}
$lnct ++;
$hash{SampleGroup} =~ s/_//g;
next unless ( -f $hash{FqR1});
if ($hash{FqR1} =~ m/gz/) {
system(qq{mv $hash{FqR1} $hash{SampleID}.R1.fastq.gz});
}else {
system(qq{mv $hash{FqR1} $hash{SampleID}.R1.fastq});
system(qq{pigz -f $hash{SampleID}.R1.fastq});
}
$hash{FqR1} = "$hash{SampleID}.R1.fastq.gz";
if ($pe eq 'pe') {
next unless (-f $hash{FqR2});
if ($hash{FqR2} =~ m/gz/) {
system(qq{mv $hash{FqR2} $hash{SampleID}.R2.fastq.gz});
}else {
system(qq{mv $hash{FqR2} $hash{SampleID}.R2.fastq});
system(qq{pigz -f $hash{SampleID}.R2.fastq});
}
$hash{FqR2} = "$hash{SampleID}.R2.fastq.gz";
}
my @line;
foreach my $f (@cols) {
push @line, $hash{$f};
}
print OUT join("\t",@line),"\n";
print join(",",$hash{SampleID},"$hash{SampleID}.R1.fastq.gz","$hash{SampleID}.R2.fastq.gz"),"\n";
}
#!/bin/bash
#check_inputfiles.sh
fqs=`ls *.f*`
for i in $fqs;
do
if [[ ${i} == *.fq ]];
then
new_name=`echo ${i} | sed -e "s/.fq\$/_good.fastq/"`;
mv ${i} ${new_name};
`pigz -f ${new_name}`;
elif [[ ${i} == *.fastq ]];
then
new_name=`echo ${i} | sed -e "s/.fastq\$/_good.fastq/"`;
mv ${i} ${new_name};
`pigz -f ${new_name}`;
elif [[ ${i} == *.fq.gz ]];
then
new_name=`echo ${i} | sed -e "s/.fq.gz\$/_good.fastq.gz/"`;
mv ${i} ${new_name};
else
new_name=`echo ${i} | sed -e "s/.fastq.gz\$/_good.fastq.gz/"`;
mv ${i} ${new_name};
fi;
done
#!/bin/bash
#check_inputfiles.sh
baseDir="`dirname \"$0\"`"
rpair=$1
design=$2
perl -p -e 's/\\r\\n*/\\n/g' $design > design.fix.txt
perl $baseDir/check_designfile.pl ${rpair} design.fix.txt
File mode changed from 100644 to 100755
......@@ -44,9 +44,9 @@ if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load subread/1.6.1
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
fi
baseDir="`dirname \"$0\"`"
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
featureCounts -s $stranded -M --fraction -J --ignoreDup -T $NPROC -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam}
......
......@@ -25,8 +25,11 @@ if [[ -z $NPROC ]]
then
NPROC=`nproc`
fi
source /etc/profile.d/modules.sh
if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load R/3.2.1-intel
fi
perl $baseDir/concat_cts.pl -o ./ *.cts
perl $baseDir/concat_fpkm.pl -o ./ *.fpkm.txt
perl $baseDir/concat_ctsum.pl -o ./ *.cts.summary
......@@ -38,7 +41,6 @@ then
touch empty.png
touch bg.rda
else
module load R/3.2.1-intel
Rscript $baseDir/dea.R
Rscript $baseDir/build_ballgown.R *_stringtie
edgeR=`find ./ -name *.edgeR.txt`
......
......@@ -33,9 +33,9 @@ then
source /etc/profile.d/modules.sh
module load samtools/gcc/1.8 bcftools/gcc/1.8
ncm=/project/shared/bicf_workflow_ref/seqprg/NGSCheckMate/ncm.py
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
fi
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:/usr/local/bin/:$PATH
if [[ -f /usr/local/bin/ncm.py ]]
then
ncm=/usr/local/bin/ncm.py
......
......@@ -43,7 +43,7 @@ foreach $file (@files) {
$hash{$key} = $val unless ($hash{$key});
}
next unless ($hash{ANN});
next unless ($hash{ANN} =~ m/HIGH|MODERATE|LOW/);
#next unless ($hash{ANN} =~ m/HIGH|MODERATE|LOW/);
my %gtinfo = ();
my @deschead = split(/:/,$format);
F1:foreach my $k (0..$#gtheader) {
......@@ -79,15 +79,15 @@ foreach $file (@files) {
@tumoraltct = split(/,/,$gtinfo{$opt{tumor}}{AO});
next if ($tumoraltct[0] eq '.');
$hash{AF} = join(",",@tumormaf);
next if ($tumoraltct[0] < 20);
next if ($tumoraltct[0] < 20 && $tumormaf[0] < 0.05);
next if ($tumormaf[0] < 0.01);
my $keepforvcf = 0;
foreach $trx (split(/,/,$hash{ANN})) {
my ($allele,$effect,$impact,$gene,$geneid,$feature,
$featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna,
$cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx);
next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i);
next if($effect eq 'sequence_feature');
#next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i);
#next if($effect eq 'sequence_feature');
$keepforvcf = $gene;
}
next unless $keepforvcf;
......
......@@ -41,11 +41,11 @@ W1:while (my $line = <IN>) {
$hash{$key} = $val unless ($hash{$key});
}
if (length($alt) > length($ref)) {
$hash{SVTYPE} = "INS";
$hash{END} = $pos;
$hash{SVTYPE} = "INS";
$hash{END} = $pos;
}elsif (length($alt) < length($ref)) {
my $diff = substr($ref, length($alt));
$hash{SVTYPE} = "DEL";
my $diff = substr($ref, length($alt));
$hash{SVTYPE} = "DEL";
$hash{END} = $pos + length($diff);
}
next unless ($hash{ANN});
......@@ -92,7 +92,7 @@ W1:while (my $line = <IN>) {
my ($allele,$effect,$impact,$gene,$geneid,$feature,
$featureid,$biotype,$rank,$codon,$aa,$pos_dna,$len_cdna,
$cds_pos,$cds_len,$aapos,$aalen,$distance,$err) = split(/\|/,$trx);
next unless ($impact =~ m/HIGH|MODERATE/ || $effect =~ /splice/i);
next unless ($impact =~ m/HIGH|MODERATE|LOW/ || $effect =~ /splice/i);
next if($effect eq 'sequence_feature');
$keeptrx = $trx;
$keepforvcf = $gene;
......@@ -107,7 +107,6 @@ W1:while (my $line = <IN>) {
push @nannot, $info;
}
}
my $newannot = join(";",@nannot);
if ($hash{SVTYPE} eq 'INS' || ($hash{SVTYPE} eq 'DEL' && $keepforvcf !~ m/&/)) {
if ($filter =~ m/LOWMAPQ|LowQual/i) {
......@@ -231,9 +230,6 @@ close IN;
foreach my $id (keys %svpairs) {
if ($id =~ m/815016443/) {
warn "debugging\n";
}
my $alt1 = $svpairs{$id}{1}{alt};
my $alt2 = $svpairs{$id}{2}{alt};
my $svtype;
......@@ -242,14 +238,14 @@ foreach my $id (keys %svpairs) {
}elsif ($alt2 =~ m/^\w\[/ && $alt1 =~ m/^\]/) {
$svtype = 'INS';
}else {
$svtype = 'UNK';
$svtype = 'UNK';
}
if ($svtype eq 'INS' || ($svtype eq 'DEL' && $svpairs{$id}{1}{gene} !~ m/&/ && $svpairs{$id}{1}{span} < 9999)) {
if ($filter =~ m/LOWMAPQ|LowQual/i) {
if ($svtype eq 'INS' || ($svtype eq 'DEL' && $svpairs{$id}{1}{gene} !~ m/&/ && $svpairs{$id}{1}{span} < 9999)) {
if ($filter =~ m/LOWMAPQ|LowQual/i) {
$filter = 'FailedQC'.$filter;
}
}
print VCFOUT $svpairs{$id}{1}{vcfline},"\n"
}elsif ($svtype eq 'DEL' && $svpairs{$id}{1}{span} && $svpairs{$id}{1}{span} > 9999) {
print DELFUS $svpairs{$id}{1}{fusionline},"\n";
}
}elsif ($svtype eq 'DEL' && $svpairs{$id}{1}{span} && $svpairs{$id}{1}{span} > 9999) {
print DELFUS $svpairs{$id}{1}{fusionline},"\n";
}
}
......@@ -107,6 +107,12 @@ then
fi
bamlist=`join_by , *.bam`
Platypus.py callVariants --minMapQual=0 --minReads=3 --mergeClusteredVariants=1 --nCPU=$NPROC --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf
for i in *.bam
do
prefix="${i%.bam}"
sid=`samtools view -H ${i} |grep '^@RG' |perl -pe 's/\t/\n/g' |grep ID |cut -f 2 -d ':'`
perl -pi -e "s/$prefix/$sid/g" platypus.vcf
done
vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz
tabix platypus.vcf.gz
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz
......
......@@ -30,8 +30,10 @@ if [[ -z $sbam ]] || [[ -z $index_path ]]; then
usage
fi
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
if [[ -z $isdocker ]]
then
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
fi
bedopt=''
if [[ -n $capture ]]
then
......
......@@ -24,8 +24,11 @@ function join_by { local IFS="$1"; shift; echo "$*"; }
shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh
module load bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 snpeff/4.3q
if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 snpeff/4.3q
fi
if [[ -a "${index_path}/genome.fa" ]]
then
......
......@@ -38,8 +38,8 @@ if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load htslib/gcc/1.8 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
fi
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
shift $(($OPTIND -1))
......
......@@ -62,18 +62,23 @@ if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load htslib/gcc/1.8 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
fi
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
mkdir -p temp
if [[ -z $tid ]]
if [[ -z $tid ]] && [[ -f ${sbam} ]]
then
tid=`samtools view -H ${sbam} |grep '^@RG' |perl -pe 's/\t/\n/g' |grep ID |cut -f 2 -d ':'`
fi
bams=''
for i in *.bam; do
bams="$bams $i"
bams="$bams $i"
sid=`samtools view -H ${i} |grep '^@RG' |perl -pe 's/\t/\n/g' |grep ID |cut -f 2 -d ':'`
if [[ $sid =~ "_T_" ]]
then
tid=$sid
fi
done
#RUN DELLY
......@@ -90,8 +95,9 @@ then
java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].DP >= 20 )" ${pair_id}.delly.sv.norm.vcf.gz | java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - | bgzip > ${pair_id}.delly.vcf.gz
if [[ $filter == 1 ]]
then
zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u > ${pair_id}.dgf.txt
zcat ${pair_id}.delly.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion|transcript_ablation' | sort -u > ${pair_id}.dgf.txt
mv ${pair_id}.delly.vcf.gz ${pair_id}.delly.ori.vcf.gz
echo "perl $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz"
perl $baseDir/filter_delly.pl -t $tid -p $pair_id -i ${pair_id}.delly.ori.vcf.gz
bgzip -f ${pair_id}.delly.vcf
zgrep '#CHROM' ${pair_id}.delly.vcf.gz > ${pair_id}.delly.genefusion.txt
......@@ -120,7 +126,7 @@ then
if [[ $filter == 1 ]]
then
zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion' | sort -u > ${pair_id}.sgf.txt
zcat ${pair_id}.svaba.sv.vcf.gz | $SNPEFF_HOME/scripts/vcfEffOnePerLine.pl |java -jar $SNPEFF_HOME/SnpSift.jar extractFields - CHROM POS ALT ID ANN[*].EFFECT ANN[*].GENE ANN[*].BIOTYPE FILTER FORMAT GEN[*] |grep -E 'gene_fusion|feature_fusion|transcript_ablation' | sort -u > ${pair_id}.sgf.txt
mv ${pair_id}.svaba.vcf.gz ${pair_id}.svaba.ori.vcf.gz
perl $baseDir/filter_svaba.pl -t $tid -p ${pair_id} -i ${pair_id}.svaba.ori.vcf.gz -s ${pair_id}.svaba.sv.vcf.gz
bgzip ${pair_id}.svaba.vcf
......@@ -190,6 +196,7 @@ then
elif [[ $method == 'itdseek' ]]
then
stexe=`which samtools`
echo $stexe
samtools view -@ $NPROC -L ${itdbed} ${sbam} | itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | java -Xmx30g -jar $SNPEFF_HOME/SnpSift.jar filter "( LEN < 10000 )" | bgzip > ${pair_id}.itdseek.vcf.gz
tabix ${pair_id}.itdseek.vcf.gz
......
......@@ -41,8 +41,8 @@ if [[ -z $isdocker ]]
then
source /etc/profile.d/modules.sh
module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
fi
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf
mv ${vcf} ${pair_id}.ori.vcf.gz
......