Skip to content
Snippets Groups Projects
Commit 58c7a125 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

update options for tbed

parent 0002b743
Branches
Tags
No related merge requests found
......@@ -69,8 +69,11 @@ fi
if [[ -n $tbed ]]
then
interval=$tbed
awk '{print $1":"$2"-"$3}' $tbed > fbsplit.genomefile.txt
fbsplit=fbsplit.genomefile.txt
else
interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'`
fbsplit="${index_path}/genomefile.5M.txt"
fi
source /etc/profile.d/modules.sh
......@@ -97,7 +100,7 @@ then
for i in *.bam; do
bamlist="$bamlist --bam ${PWD}/${i}"
done
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $NPROC "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf"
cut -f 1 $fbsplit | parallel --delay 1 --jobs 0 "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf"
vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.fb.vcf.gz -
elif [[ $algo == 'platypus' ]]
then
......
#!/bin/bash
#run_somatic_caller.sh
usage(){
echo "-h --Help documentation for run_somatic_caller.sh"
echo "-a --Somatic Workflow Method: strelka2, virmid, speedseq, mutect2, varscan, shimmer, lancet"
echo "-r --Path to Reference Genome with the file genome.fa"
echo "-n --Normal"
echo "-t --Tumor"
echo "-p -- PairID"
echo "-x --NormalID"
echo "-y --TumorID"
echo "-i --NormalBAM used for Mantra in the case of UMI consensus"
echo "-j --TumorBAM used for Mantra in the case of UMI consensus"
echo "-b --TargetBed"
echo "Example: bash somatic_vc.sh -a strelka2 -p subj1 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam"
exit 1
echo "-h --Help documentation for run_somatic_caller.sh"
echo "-a --Somatic Workflow Method: strelka2, virmid, speedseq, mutect2, varscan, shimmer, lancet"
echo "-r --Path to Reference Genome with the file genome.fa"
echo "-n --Normal"
echo "-t --Tumor"
echo "-p -- PairID"
echo "-x --NormalID"
echo "-y --TumorID"
echo "-i --NormalBAM used for Mantra in the case of UMI consensus"
echo "-j --TumorBAM used for Mantra in the case of UMI consensus"
echo "-b --TargetBed"
echo "Example: bash somatic_vc.sh -a strelka2 -p subj1 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :n:t:p:r:x:y:i:j:q:a:b:h opt
do
case $opt in
r) index_path=$OPTARG;;
x) tid=$OPTARG;;
y) nid=$OPTARG;;
p) pair_id=$OPTARG;;
n) normal=$OPTARG;;
t) tumor=$OPTARG;;
i) mnormal=$OPTARG;;
j) mtumor=$OPTARG;;
a) algo=$OPTARG;;
b) tbed=$OPTARG;;
q) pon==$OPTARG;;
h) usage;;
r) index_path=$OPTARG;;
x) tid=$OPTARG;;
y) nid=$OPTARG;;
p) pair_id=$OPTARG;;
n) normal=$OPTARG;;
t) tumor=$OPTARG;;
a) algo=$OPTARG;;
b) tbed=$OPTARG;;
q) pon==$OPTARG;;
h) usage;;
esac
done
......@@ -47,20 +43,14 @@ if [[ -z $normal ]] || [[ -z $tumor ]] || [[ -z $algo ]]; then
fi
NPROC=$SLURM_CPUS_ON_NODE
if [[ -z $NPROC ]]
then
then
NPROC=`nproc`
fi
#pair_id=${tid}_${nid}
if [[ -z $mtumor ]]
then
mtumor=tumor
mnormal=normal
fi
ponopt='';
if [[ -f $pon ]]
then
ponopt="--pon $pon"
else
ponopt='';
fi
if [[ -a "${index_path}/genome.fa" ]]
......@@ -105,32 +95,40 @@ fi
source /etc/profile.d/modules.sh
module load htslib/gcc/1.8
module load htslib/gcc/1.8 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
if [ $algo == 'strelka2' ]
then
module load strelka/2.9.10 manta/1.3.1 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14
module load strelka/2.9.10 manta/1.3.1
opt=''
if [[ -n $tbed ]]
then
opt="--callRegions ${tbed}.gz"
if [[ -f $tbed ]]
then
opt="--callRegions ${tbed}.gz"
else
cp $tbed targetpanel.bed
bgzip targetpanel.bed
tabix targetpanel.bed.gz
opt="--callRegions targetpanel.bed.gz"
fi
fi
mkdir manta
configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} $opt --runDir manta
manta/runWorkflow.py -m local -j 8
mantaopt=''
if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]]
then
configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
else
configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates --runDir strelka
mantaopt="--indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz"
fi
configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --runDir strelka $mantaopt
strelka/runWorkflow.py -m local -j 8
vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].DP >= 10)" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka2.vcf.gz
fi
if [ $algo == 'virmid' ]
then
module load virmid/1.2 samtools/gcc/1.8 vcftools/0.1.14
elif [ $algo == 'virmid' ]
then
module load virmid/1.2
virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $NPROC -M 2000 -c1 10 -c2 10
perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf
perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf
......@@ -139,24 +137,23 @@ if [ $algo == 'virmid' ]
vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz
elif [ $algo == 'mutect' ]
then
gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14
java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa}
gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval
vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
module load gatk/4.1.4.0 picard/2.10.3
gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval
vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
elif [ $algo == 'varscan' ]
then
module load bcftools/gcc/1.8 samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14
module rm java/oracle/jdk1.7.0_51
module load snpeff/4.3q
samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup
samtools mpileup -C 50 -f ${reffa} $normal > n.mpileup
VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1
VarScan copynumber n.mpileup t.mpileup vscancnv
vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz
module load bcftools/gcc/1.8 VarScan/2.4.2
module rm java/oracle/jdk1.7.0_51
module load snpeff/4.3q
samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup
samtools mpileup -C 50 -f ${reffa} $normal > n.mpileup
VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1
VarScan copynumber n.mpileup t.mpileup vscancnv
vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz
elif [ $algo == 'shimmer' ]
then
module load samtools/gcc/1.8 R/3.6.1-gccmkl vcftools/0.1.14
module load R/3.6.1-gccmkl
shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err
perl $baseDir/add_readct_shimmer.pl
module rm java/oracle/jdk1.7.0_51
......
......@@ -11,19 +11,20 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:p:b:i:x:y:n:l:a:hf opt
while getopts :r:p:b:t:x:c:y:n:l:a:hf opt
do
case $opt in
r) index_path=$OPTARG;;
p) pair_id=$OPTARG;;
b) sbam=$OPTARG;;
i) tumor=$OPTARG;;
t) tumor=$OPTARG;;
n) normal=$OPTARG;;
a) method=$OPTARG;;
x) tid=$OPTARG;;
y) nid=$OPTARG;;
f) filter=1;;
l) bed=$OPTARG;;
b) sbam=$OPTARG;;
c) tbed=$OPTARG;;
l) itdbed=$OPTARG;;
h) usage;;
esac
done
......@@ -152,14 +153,19 @@ then
echo -e "${i}\t400\t${sname}" >> ${pair_id}.pindel.config
samtools index -@ $NPROC $i
done
pindel -T $NPROC -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP
bedopt=''
if [[ -f $tbed ]]
then
bedopt="-j $tbed"
fi
pindel -T $NPROC -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --report_inversions false $bedopt
pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf
cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz
tabix pindel.vcf.gz
bash $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz
perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.dup.vcf | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.dup.vcf | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel.sv.vcf.gz
if [[ $filter == 1 ]]
then
......@@ -178,7 +184,7 @@ then
elif [[ $method == 'itdseek' ]]
then
stexe=`which samtools`
samtools view -@ $NPROC -L ${bed} ${sbam} | itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${bed} -a stdin | java -Xmx30g -jar $SNPEFF_HOME/SnpSift.jar filter "( LEN < 10000 )" | bgzip > ${pair_id}.itdseek.vcf.gz
samtools view -@ $NPROC -L ${itdbed} ${sbam} | itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | java -Xmx30g -jar $SNPEFF_HOME/SnpSift.jar filter "( LEN < 10000 )" | bgzip > ${pair_id}.itdseek.vcf.gz
tabix ${pair_id}.itdseek.vcf.gz
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment