Skip to content
Snippets Groups Projects
Commit 953795b2 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

starfusion DNANexus;GATK add with Intervals; bamqc add threads

parent 283f4815
Branches
Tags
No related merge requests found
......@@ -51,11 +51,12 @@ if [[ -z $NPROC ]]
then
NPROC=`nproc`
fi
threads=`expr $NPROC - 10`
if [[ $dedup == 1 ]]
then
mv $sbam ori.bam
samtools view -@ $NPROC -F 1024 -b -o ${sbam} ori.bam
samtools view -@ $threads -F 1024 -b -o ${sbam} ori.bam
fi
tmpdir=`pwd`
if [[ $nuctype == 'dna' ]]; then
......@@ -65,13 +66,13 @@ if [[ $nuctype == 'dna' ]]; then
perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt
if [[ -z $skiplc ]]
then
samtools view -@ $NPROC -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam}
samtools index -@ $NPROC ${pair_id}.ontarget.bam
samtools view -@ $threads -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam}
samtools index -@ $threads ${pair_id}.ontarget.bam
samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt
samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -hist -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity BARCODE_TAG=RG I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir}
samtools view -@ $threads -b -q 1 ${sbam} | bedtools coverage -hist -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$threads -jar $PICARD/picard.jar EstimateLibraryComplexity BARCODE_TAG=RG I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir}
#java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir}
#samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt
#samtools view -@ $threads ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt
fi
#java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir}
if [[ $index_path/reference_info.pl ]]
......
......@@ -57,14 +57,10 @@ then
export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images"
cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh
else
#jeremy to change for DNANEXUS
module add star/2.5.2b
refgeno=${index_path}/CTAT_lib/
refgeno=${index_path}/CTAT_resource_lib
STAR-Fusion --genome_lib_dir ${refgeno} --min_sum_frags 3 --left_fq ${fq1} --right_fq ${fq2} --output_dir ${pair_id}_star_fusion &> star_fusion.err
cp ${pair_id}_star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt
module load singularity/3.0.2
export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images"
cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh
cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "agfusion annotate -db agfusion.homo_sapiens.87.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh
fi
if [[ $filter == 1 ]]
......@@ -73,5 +69,3 @@ then
bedtools intersect -wao -a temp.bed -b ${index_path}/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt
perl $baseDir/filter_genefusions.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt
fi
......@@ -66,6 +66,13 @@ else
ponopt='';
fi
if [[ -n $tbed ]]
then
interval=$tbed
else
interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'`
fi
source /etc/profile.d/modules.sh
module load python/2.7.x-anaconda picard/2.10.3 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 parallel
......@@ -108,15 +115,14 @@ then
for i in *.bam; do
prefix="${i%.bam}"
echo ${prefix}
gatk --java-options "-Xmx32g" HaplotypeCaller -R ${reffa} -I ${i} -A FisherStrand -A QualByDepth -A DepthPerAlleleBySample -A TandemRepeat --emit-ref-confidence GVCF -O haplotypecaller.vcf.gz
gatk --java-options "-Xmx32g" HaplotypeCaller -R ${reffa} -I ${i} -A FisherStrand -A QualByDepth -A DepthPerAlleleBySample -A TandemRepeat --emit-ref-confidence GVCF -G StandardAnnotation -G AS_StandardAnnotation -G StandardHCAnnotation -O haplotypecaller.vcf.gz -L $interval
java -jar $PICARD/picard.jar SortVcf I=haplotypecaller.vcf.gz O=${prefix}.gatk.g.vcf R=${reffa} CREATE_INDEX=TRUE
gvcflist="$gvcflist -V ${prefix}.gatk.g.vcf"
done
interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'`
gatk --java-options "-Xmx32g" GenomicsDBImport $gvcflist --genomicsdb-workspace-path gendb -L $interval
gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf
bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz
gatk --java-options "-Xmx32g" GenomicsDBImport $gvcflist --genomicsdb-workspace-path gendb -L $interval --reader-threads $NPROC
gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf -L $interval
bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type | bgzip > ${pair_id}.gatk.vcf.gz
tabix ${pair_id}.gatk.vcf.gz
elif [ $algo == 'mutect' ]
then
......@@ -126,10 +132,8 @@ then
for i in *.bam; do
bamlist+="-I ${i} "
done
gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd`
#gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf
gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd` -L $interval
vcf-sort ${pair_id}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
elif [[ $algo == 'strelka2' ]]
then
opt=''
......
......@@ -88,6 +88,13 @@ else
fi
baseDir="`dirname \"$0\"`"
if [[ -n $tbed ]]
then
interval=$tbed
else
interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'`
fi
source /etc/profile.d/modules.sh
module load htslib/gcc/1.8
export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH
......@@ -126,8 +133,7 @@ then
gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz
module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14
java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa}
gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf
#gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf
gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval
vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
elif [ $algo == 'varscan' ]
then
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment