diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 5687a5d19c43811861e59b7b0c3552121046df23..565fac72b6a340d96d75242e0ff2d9eec27b7385 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -32,6 +32,7 @@ then SLURM_CPUS_ON_NODE=1 fi baseDir="`dirname \"$0\"`" +testexe='/project/shared/bicf_workflow_ref/seqprg/bin' source /etc/profile.d/modules.sh module load picard/2.10.3 samtools/gcc/1.8 @@ -40,9 +41,11 @@ if [ $algo == 'sambamba' ] then module load speedseq/20160506 sambamba markdup -t $SLURM_CPUS_ON_NODE ${sbam} ${pair_id}.dedup.bam + touch ${pair_id}.dedup.stat.txt elif [ $algo == 'samtools' ] then samtools markdup -s --output-fmt BAM -@ $SLURM_CPUS_ON_NODE sort.bam ${pair_id}.dedup.bam + touch ${pair_id}.dedup.stat.txt elif [ $algo == 'picard' ] then java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt @@ -53,13 +56,19 @@ elif [ $algo == 'fgbio_umi' ] then module load fgbio bwa/intel/0.7.15 samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} - fgbio GroupReadsByUmi -s identity -i ${sbam} -o ${pair_id}.group.bam --family-size-histogram ${pair_id}.umihist.txt -e 0 -m 0 - fgbio CallMolecularConsensusReads -i ${pair_id}.group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:' + fgbio GroupReadsByUmi --tmp-dir ./ -s identity -i ${sbam} -o ${pair_id}.group.bam --family-size-histogram ${pair_id}.umihist.txt -e 0 -m 0 + fgbio CallMolecularConsensusReads --tmp-dir ./ -i ${pair_id}.group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:' samtools index ${pair_id}.consensus.bam samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam gzip ${pair_id}.consensus.R1.fastq gzip ${pair_id}.consensus.R2.fastq - bwa mem -M -C -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" /project/shared/bicf_workflow_ref/human/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam + bwa mem -M -C -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" /project/shared/bicf_workflow_ref/human/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz > out.sam + if [ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ] + then + k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | samtools view -1 - > ${pair_id}.consensus.bam + else + samtools view -1 out.sam > ${pair_id}.consensus.bam + fi samtools sort --threads $SLURM_CPUS_ON_NODE -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam else cp ${sbam} ${pair_id}.dedup.bam diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index f6c5e287ae5d6b0e175335b54ff711809fb71693..2ddc6f73a6ef978dceab4671fa7779d0aa15af61 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -80,7 +80,7 @@ then for i in *.bam; do bamlist="$bamlist --bam ${PWD}/${i}" done - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" + cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $SLURM_CPUS_ON_NODE "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.freebayes.vcf.gz - elif [[ $algo == 'gatk' ]] then