Skip to content
Snippets Groups Projects
Commit 4eef99e5 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

Merge branch 'master' of git.biohpc.swmed.edu:BICF/Astrocyte/process_scripts

parents 5000ba2b 0270fd9c
Branches
Tags
No related merge requests found
...@@ -46,7 +46,7 @@ fi ...@@ -46,7 +46,7 @@ fi
testexe='/project/shared/bicf_workflow_ref/seqprg/bin' testexe='/project/shared/bicf_workflow_ref/seqprg/bin'
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
module load bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3 module load python/2.7.x-anaconda bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3
baseDir="`dirname \"$0\"`" baseDir="`dirname \"$0\"`"
......
...@@ -26,7 +26,7 @@ done ...@@ -26,7 +26,7 @@ done
shift $(($OPTIND -1)) shift $(($OPTIND -1))
index_path='/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/' index_path='/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj'
# Check for mandatory options # Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]]; then if [[ -z $pair_id ]] || [[ -z $sbam ]]; then
...@@ -38,7 +38,7 @@ then ...@@ -38,7 +38,7 @@ then
fi fi
baseDir="`dirname \"$0\"`" baseDir="`dirname \"$0\"`"
if [[ $capture == '${index_path}/UTSWV2.bed' ]] if [[ $capture == "${index_path}/UTSWV2.bed" ]]
then then
normals="${index_path}/UTSWV2.normals.cnn" normals="${index_path}/UTSWV2.normals.cnn"
targets="${index_path}/UTSWV2.cnvkit_" targets="${index_path}/UTSWV2.cnvkit_"
...@@ -46,11 +46,11 @@ then ...@@ -46,11 +46,11 @@ then
then then
normals="${index_path}/UTSWV2.uminormals.cnn" normals="${index_path}/UTSWV2.uminormals.cnn"
fi fi
elif [[ $capture == '${index_path}/UTSWV2_2.panelplus.bed' ]] elif [[ $capture == "${index_path}/UTSWV2_2.panelplus.bed" ]]
then then
normals="${index_path}/panelofnormals.panel1385V2_2.cnn" normals="${index_path}/panelofnormals.panel1385V2_2.cnn"
targets="${index_path}/panel1385V2-2.cnvkit_" targets="${index_path}/panel1385V2-2.cnvkit_"
elif [[ $capture == '${index_path}/hemepanelV3.bed' ]] elif [[ $capture == "${index_path}/hemepanelV3.bed" ]]
then then
normals="${index_path}/hemepanelV3.flatreference.cnn" normals="${index_path}/hemepanelV3.flatreference.cnn"
targets="${index_path}/hemepanelV3.cnvkit_" targets="${index_path}/hemepanelV3.cnvkit_"
......
...@@ -57,11 +57,7 @@ else ...@@ -57,11 +57,7 @@ else
fi fi
source /etc/profile.d/modules.sh source /etc/profile.d/modules.sh
user=$USER module load gatk/4.1.2.0 samtools/gcc/1.8
module load gatk/4.x singularity/2.6.1 samtools/gcc/1.8
mkdir /tmp/${user}
export TMP_HOME=/tmp/${user}
samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} samtools index -@ $SLURM_CPUS_ON_NODE ${sbam}
if [[ $algo == 'gatkbam_rna' ]] if [[ $algo == 'gatkbam_rna' ]]
...@@ -71,14 +67,14 @@ then ...@@ -71,14 +67,14 @@ then
java -Xmx4g -jar $PICARD/picard.jar ReorderSam I=${pair_id}.clean.bam O=${pair_id}.sort.bam R=${reffa} CREATE_INDEX=TRUE java -Xmx4g -jar $PICARD/picard.jar ReorderSam I=${pair_id}.clean.bam O=${pair_id}.sort.bam R=${reffa} CREATE_INDEX=TRUE
java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id} java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id}
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.rg_added_sorted.bam samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.rg_added_sorted.bam
singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam
singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities
singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam
elif [[ $algo == 'gatkbam' ]] elif [[ $algo == 'gatkbam' ]]
then then
singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities
singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table
samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam
elif [[ $algo == 'abra2' ]] elif [[ $algo == 'abra2' ]]
......
...@@ -33,21 +33,21 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] ...@@ -33,21 +33,21 @@ if [[ -z $SLURM_CPUS_ON_NODE ]]
then then
SLURM_CPUS_ON_NODE=1 SLURM_CPUS_ON_NODE=1
fi fi
if [[ -a "${index_path}/dbSnp.vcf.gz" ]] if [[ -s "${index_path}/dbSnp.vcf.gz" ]]
then then
dbsnp="${index_path}/dbSnp.vcf.gz" dbsnp="${index_path}/dbSnp.vcf.gz"
else else
echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz" echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
usage usage
fi fi
if [[ -a "${index_path}/GoldIndels.vcf.gz" ]] if [[ -s "${index_path}/GoldIndels.vcf.gz" ]]
then then
knownindel="${index_path}/GoldIndels.vcf.gz" knownindel="${index_path}/GoldIndels.vcf.gz"
else else
echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz" echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz"
usage usage
fi fi
if [[ -a "${index_path}/genome.fa" ]] if [[ -s "${index_path}/genome.fa" ]]
then then
reffa="${index_path}/genome.fa" reffa="${index_path}/genome.fa"
dict="${index_path}/genome.dict" dict="${index_path}/genome.dict"
...@@ -124,8 +124,8 @@ then ...@@ -124,8 +124,8 @@ then
gvcflist="$gvcflist --bam ${i}" gvcflist="$gvcflist --bam ${i}"
done done
configManta.py $gvcflist --referenceFasta ${reffa} $mode --runDir manta configManta.py $gvcflist --referenceFasta ${reffa} $mode --runDir manta
manta/runWorkflow.py -m local -j 8 manta/runWorkflow.py -m local -j $SLURM_CPUS_ON_NODE
configureStrelkaGermlineWorkflow.py $gvcflist --referenceFasta ${reffa} $mode --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka configureStrelkaGermlineWorkflow.py $gvcflist --referenceFasta ${reffa} $mode --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
strelka/runWorkflow.py -m local -j 8 strelka/runWorkflow.py -m local -j $SLURM_CPUS_ON_NODE
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz
fi fi
...@@ -30,4 +30,4 @@ perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf ...@@ -30,4 +30,4 @@ perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf
bgzip -f ${pair_id}.uniform.vcf bgzip -f ${pair_id}.uniform.vcf
j=${pair_id}.uniform.vcf.gz j=${pair_id}.uniform.vcf.gz
tabix -f $j tabix -f $j
bcftools norm -m - -O z -o ${pair_id}.norm.vcf.gz $j bcftools norm -m - -Oz $j -o ${pair_id}.norm.vcf.gz
...@@ -56,8 +56,9 @@ done ...@@ -56,8 +56,9 @@ done
pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP
pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf
cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz
perl $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz perl $baseDir/parse_pindel.pl ${pair_id} pindel.vcf.gz
perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > indel.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz perl $baseDir/norm_annot.sh -r ${index_path} -p pindel_indel -v indel.vcf.gz
mv pindel_indel.norm.vcf.gz ${pair_id}.pindel_indel.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf |bgzip > ${pair_id}.pindel_tandemdup.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf |bgzip > ${pair_id}.pindel_tandemdup.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf |bgzip > ${pair_id}.pindel_sv.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf |bgzip > ${pair_id}.pindel_sv.vcf.gz
#!/bin/bash
#union.sh
usage() {
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-p --Prefix for output file name"
echo "-v --VCF File"
echo "Example: bash union.sh -p prefix -r /path/GRCh38"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:p:v:h opt
do
case $opt in
r) index_path=$OPTARG;;
p) pair_id=$OPTARG;;
v) vcf=$OPTARG;;
h) usage;;
esac
done
function join_by { local IFS="$1"; shift; echo "$*"; }
shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh
module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q
perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf
mv ${vcf} ${pair_id}.ori.vcf.gz
bgzip -f ${pair_id}.uniform.vcf
j=${pair_id}.uniform.vcf.gz
tabix -f $j
bcftools norm -m - -Oz $j -o ${pair_id}.norm.vcf.gz
bash $baseDir/annotvcf.sh -p ${pair_id} -r $index_path -v ${pair_id}.norm.vcf.gz
/project/shared/bicf_workflow_ref/seqprg/vt/vt decompose_blocksub ${pair_id}.annot.vcf.gz -p -a -o ${pair_id}.vcf
bgzip -f ${pair_id}.vcf
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
my $headerfile = shift @ARGV; my $headerfile = shift @ARGV;
my @vcffiles = @ARGV; my @vcffiles = @ARGV;
my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid');
%algos = map {$_=>1} @callers;
open HEADER, "<$headerfile" or die $!; open HEADER, "<$headerfile" or die $!;
open OUT, ">int.vcf" or die $!; open OUT, ">int.vcf" or die $!;
...@@ -15,7 +17,11 @@ my @sampleorder; ...@@ -15,7 +17,11 @@ my @sampleorder;
my %headerlines; my %headerlines;
foreach $vcf (@vcffiles) { foreach $vcf (@vcffiles) {
$caller = (split(/\./,$vcf))[-3]; my @filename = (split(/\./,$vcf));
my $caller;
foreach $fio (@filename) {
$caller = $fio if ($algos{$fio});
}
open VCF, "gunzip -c $vcf|" or die $!; open VCF, "gunzip -c $vcf|" or die $!;
my @sampleids; my @sampleids;
while (my $line = <VCF>) { while (my $line = <VCF>) {
...@@ -125,19 +131,18 @@ foreach $vcf (@vcffiles) { ...@@ -125,19 +131,18 @@ foreach $vcf (@vcffiles) {
my @filts = split(";",$filter); my @filts = split(";",$filter);
my %filterqc = map {$_ => 1} @filts; my %filterqc = map {$_ => 1} @filts;
delete $filterqc{'.'}; delete $filterqc{'.'};
if ($gtfilt{'StrandBias'} || ($hash{FS} && $hash{FS} > 60) || if ($gtfilt{'StrandBias'} || ($hash{FS} && $hash{FS} > 60)) { # ||($hash{SAP} && $hash{SAP} > 20)) {
($hash{SAP} && $hash{SAP} > 20)) {
$filterqc{strandBias} = 1; $filterqc{strandBias} = 1;
}if (scalar(keys %filterqc) > 1) { $annot .= ';strandBias=1';
}if (scalar(keys %filterqc) > 0) {
$filter = join(";",keys %filterqc); $filter = join(";",keys %filterqc);
}else { }else {
$filter = '.'; $filter = '.';
} }
$lines{$chrom}{$pos}{$ref}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc]; $lines{$chrom}{$pos}{$ref}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc] unless $lines{$chrom}{$pos}{$ref}{$alt}{$caller};
} }
close VCF; close VCF;
} }
my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid');
F1:foreach $chr (sort {$a cmp $b} keys %lines) { F1:foreach $chr (sort {$a cmp $b} keys %lines) {
F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) { F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment