diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index 80a32a2d5130572f8ba2db9d9f46fec6be225a7d..07ca1db67f8408985e013a729a30b6bc99a4766a 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -51,11 +51,12 @@ if [[ -z $NPROC ]] then NPROC=`nproc` fi +threads=`expr $NPROC - 10` if [[ $dedup == 1 ]] then mv $sbam ori.bam - samtools view -@ $NPROC -F 1024 -b -o ${sbam} ori.bam + samtools view -@ $threads -F 1024 -b -o ${sbam} ori.bam fi tmpdir=`pwd` if [[ $nuctype == 'dna' ]]; then @@ -65,13 +66,13 @@ if [[ $nuctype == 'dna' ]]; then perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt if [[ -z $skiplc ]] then - samtools view -@ $NPROC -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam} - samtools index -@ $NPROC ${pair_id}.ontarget.bam + samtools view -@ $threads -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam} + samtools index -@ $threads ${pair_id}.ontarget.bam samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt - samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -hist -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt - java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity BARCODE_TAG=RG I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir} + samtools view -@ $threads -b -q 1 ${sbam} | bedtools coverage -hist -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt + java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$threads -jar $PICARD/picard.jar EstimateLibraryComplexity BARCODE_TAG=RG I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir} #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir} - #samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt + #samtools view -@ $threads ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt fi #java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir} if [[ $index_path/reference_info.pl ]] diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index c5c206fa74037d86cea9f81159ba5547ed950540..c143ea31bb20bb6e1ee5c60fac3c7b52743228e2 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -57,14 +57,10 @@ then export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images" cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh else - #jeremy to change for DNANEXUS - module add star/2.5.2b - refgeno=${index_path}/CTAT_lib/ + refgeno=${index_path}/CTAT_resource_lib STAR-Fusion --genome_lib_dir ${refgeno} --min_sum_frags 3 --left_fq ${fq1} --right_fq ${fq2} --output_dir ${pair_id}_star_fusion &> star_fusion.err cp ${pair_id}_star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt - module load singularity/3.0.2 - export PYENSEMBL_CACHE_DIR="/project/shared/bicf_workflow_ref/singularity_images" - cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "singularity exec /project/shared/bicf_workflow_ref/singularity_images/agfusion.simg agfusion annotate -db /project/shared/bicf_workflow_ref/singularity_images/pyensembl/GRCh38/ensembl92/agfusion.homo_sapiens.92.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh + cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "agfusion annotate -db agfusion.homo_sapiens.87.db -g5", $1,"-j5",$4,"-g3",$6,"-j3",$9,"-o",$1"_"$4"_"$6"_"$9}' |grep -v 'LeftGene' |sh fi if [[ $filter == 1 ]] @@ -73,5 +69,3 @@ then bedtools intersect -wao -a temp.bed -b ${index_path}/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt perl $baseDir/filter_genefusions.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt fi - - diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 38ce8eeb0d1bd0cc26d1108f71372a9c7f343885..09e14265bc3d3c5b5464afdcd7152c1f4e1f19bb 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -66,6 +66,13 @@ else ponopt=''; fi +if [[ -n $tbed ]] +then + interval=$tbed +else + interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` +fi + source /etc/profile.d/modules.sh module load python/2.7.x-anaconda picard/2.10.3 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 parallel @@ -108,15 +115,14 @@ then for i in *.bam; do prefix="${i%.bam}" echo ${prefix} - gatk --java-options "-Xmx32g" HaplotypeCaller -R ${reffa} -I ${i} -A FisherStrand -A QualByDepth -A DepthPerAlleleBySample -A TandemRepeat --emit-ref-confidence GVCF -O haplotypecaller.vcf.gz + gatk --java-options "-Xmx32g" HaplotypeCaller -R ${reffa} -I ${i} -A FisherStrand -A QualByDepth -A DepthPerAlleleBySample -A TandemRepeat --emit-ref-confidence GVCF -G StandardAnnotation -G AS_StandardAnnotation -G StandardHCAnnotation -O haplotypecaller.vcf.gz -L $interval java -jar $PICARD/picard.jar SortVcf I=haplotypecaller.vcf.gz O=${prefix}.gatk.g.vcf R=${reffa} CREATE_INDEX=TRUE gvcflist="$gvcflist -V ${prefix}.gatk.g.vcf" done - interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` - gatk --java-options "-Xmx32g" GenomicsDBImport $gvcflist --genomicsdb-workspace-path gendb -L $interval - gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf - bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz + gatk --java-options "-Xmx32g" GenomicsDBImport $gvcflist --genomicsdb-workspace-path gendb -L $interval --reader-threads $NPROC + gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf -L $interval + bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type | bgzip > ${pair_id}.gatk.vcf.gz tabix ${pair_id}.gatk.vcf.gz elif [ $algo == 'mutect' ] then @@ -126,10 +132,8 @@ then for i in *.bam; do bamlist+="-I ${i} " done - gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd` - #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf + gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd` -L $interval vcf-sort ${pair_id}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz - elif [[ $algo == 'strelka2' ]] then opt='' diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 4631780783270f6c1776f1b7f94788921553be51..90911047ab1f03e5f595c529859a6f401054f324 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -88,6 +88,13 @@ else fi baseDir="`dirname \"$0\"`" +if [[ -n $tbed ]] +then + interval=$tbed +else + interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` +fi + source /etc/profile.d/modules.sh module load htslib/gcc/1.8 export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH @@ -126,8 +133,7 @@ then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} - gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf - #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf + gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then