Skip to content
Snippets Groups Projects
Commit 2948ddca authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update samtools version and synxtax to fix bugs in last push.

parent ea8a20e5
Branches
Tags
1 merge request!6Resolve "Bam Stats and Filter"
Pipeline #1058 passed with stage
in 1 hour, 36 minutes, and 29 seconds
...@@ -44,7 +44,7 @@ workflow_modules: ...@@ -44,7 +44,7 @@ workflow_modules:
- 'cutadapt/1.9.1' - 'cutadapt/1.9.1'
- 'fastqc/0.11.2' - 'fastqc/0.11.2'
- 'bwa/intel/0.7.12' - 'bwa/intel/0.7.12'
- 'samtools/intel/1.3' - 'samtools/1.4.1'
- 'sambamba/0.6.6' - 'sambamba/0.6.6'
- 'bedtools/2.26.0' - 'bedtools/2.26.0'
......
...@@ -16,7 +16,7 @@ process { ...@@ -16,7 +16,7 @@ process {
cpus = 32 cpus = 32
} }
$filterReads{ $filterReads{
module = ['python/3.6.1-2-anaconda', 'samtools/intel/1.3', 'sambamba/0.6.6', 'bedtools/2.26.0'] module = ['python/3.6.1-2-anaconda', 'samtools/1.4.1', 'sambamba/0.6.6', 'bedtools/2.26.0']
cpus = 32 cpus = 32
} }
} }
......
...@@ -9,7 +9,6 @@ import shutil ...@@ -9,7 +9,6 @@ import shutil
import shlex import shlex
import logging import logging
from multiprocessing import cpu_count from multiprocessing import cpu_count
import json
import utils import utils
EPILOG = ''' EPILOG = '''
...@@ -66,18 +65,18 @@ def check_tools(): ...@@ -66,18 +65,18 @@ def check_tools():
raise Exception('Missing samtools') raise Exception('Missing samtools')
sambamba_path = shutil.which("sambamba") sambamba_path = shutil.which("sambamba")
if bwa_path: if sambamba_path:
logger.info('Found sambamba: %s', bwa_path) logger.info('Found sambamba: %s', sambamba_path)
else: else:
logger.error('Missing sambamba') logger.error('Missing sambamba')
raise Exception('Missing sambamba') raise Exception('Missing sambamba')
bedtools_path = shutil.which("bedtools") bedtools_path = shutil.which("bedtools")
if bwa_path: if bedtools_path:
logger.info('Found sambamba: %s', bwa_path) logger.info('Found bedtools: %s', bedtools_path)
else: else:
logger.error('Missing sambamba') logger.error('Missing bedtools')
raise Exception('Missing sambamba') raise Exception('Missing bedtools')
def filter_mapped_pe(bam, bam_basename): def filter_mapped_pe(bam, bam_basename):
...@@ -93,7 +92,7 @@ def filter_mapped_pe(bam, bam_basename): ...@@ -93,7 +92,7 @@ def filter_mapped_pe(bam, bam_basename):
# Remove low MAPQ reads # Remove low MAPQ reads
# Only keep properly paired reads # Only keep properly paired reads
# Obtain name sorted BAM file # Obtain name sorted BAM file
out, err = common.run_pipe([ out, err = utils.run_pipe([
# filter: -F 1804 FlAG bits to exclude; -f 2 FLAG bits to reqire; # filter: -F 1804 FlAG bits to exclude; -f 2 FLAG bits to reqire;
# -q 30 exclude MAPQ < 30; -u uncompressed output # -q 30 exclude MAPQ < 30; -u uncompressed output
# exclude FLAG 1804: unmapped, next segment unmapped, secondary # exclude FLAG 1804: unmapped, next segment unmapped, secondary
...@@ -103,14 +102,14 @@ def filter_mapped_pe(bam, bam_basename): ...@@ -103,14 +102,14 @@ def filter_mapped_pe(bam, bam_basename):
# sort: -n sort by name; - take input from stdin; # sort: -n sort by name; - take input from stdin;
# out to specified filename # out to specified filename
# Will produce name sorted BAM # Will produce name sorted BAM
"samtools sort -n -@%d -o %s" % (cpu_count(), tmp_filt_bam_filename)]) "samtools sort -n -@ %d -o %s" % (cpu_count(), tmp_filt_bam_filename)])
if err: if err:
logger.error("samtools filter error: %s" % (err)) logger.error("samtools filter error: %s" % (err))
# Remove orphan reads (pair was removed) # Remove orphan reads (pair was removed)
# and read pairs mapping to different chromosomes # and read pairs mapping to different chromosomes
# Obtain position sorted BAM # Obtain position sorted BAM
out, err = common.run_pipe([ out, err = utils.run_pipe([
# fill in mate coordinates, ISIZE and mate-related flags # fill in mate coordinates, ISIZE and mate-related flags
# fixmate requires name-sorted alignment; -r removes secondary and # fixmate requires name-sorted alignment; -r removes secondary and
# unmapped (redundant here because already done above?) # unmapped (redundant here because already done above?)
...@@ -119,7 +118,7 @@ def filter_mapped_pe(bam, bam_basename): ...@@ -119,7 +118,7 @@ def filter_mapped_pe(bam, bam_basename):
# repeat filtering after mate repair # repeat filtering after mate repair
"samtools view -F 1804 -f 2 -u -", "samtools view -F 1804 -f 2 -u -",
# produce the coordinate-sorted BAM # produce the coordinate-sorted BAM
"samtools sort -@%d -o %s" % (cpu_count(), filt_bam_filename)]) "samtools sort -@ %d -o %s" % (cpu_count(), filt_bam_filename)])
os.remove(tmp_filt_bam_filename) os.remove(tmp_filt_bam_filename)
return filt_bam_filename return filt_bam_filename
...@@ -138,7 +137,7 @@ def filter_mapped_se(bam, bam_basename): ...@@ -138,7 +137,7 @@ def filter_mapped_se(bam, bam_basename):
with open(filt_bam_filename, 'w') as fh: with open(filt_bam_filename, 'w') as fh:
samtools_filter_command = ( samtools_filter_command = (
"samtools view -F 1804 -q 30 -b %s" "samtools view -F 1804 -q 30 -b %s"
% (samtools_params, input_bam) % (bam)
) )
logger.info(samtools_filter_command) logger.info(samtools_filter_command)
subprocess.check_call( subprocess.check_call(
...@@ -154,17 +153,17 @@ def dedup_mapped(bam, bam_basename, paired): ...@@ -154,17 +153,17 @@ def dedup_mapped(bam, bam_basename, paired):
# Markduplicates # Markduplicates
dup_file_qc_filename = bam_basename + ".dup.qc" dup_file_qc_filename = bam_basename + ".dup.qc"
tmp_dup_mark_filename = bam_basename + ".dupmark.bam" tmp_dup_mark_filename = bam_basename + ".dupmark.bam"
sambamba_parms = "--hash-table-size=17592186044416 \ sambamba_parms = "--hash-table-size=17592186044416" + \
--overflow-list-size=20000000 --io-buffer-size=256" " --overflow-list-size=20000000 --io-buffer-size=256"
with open(dup_file_qc_filename, 'w') as fh: with open(dup_file_qc_filename, 'w') as fh:
sambamba_markdup_command = ( sambamba_markdup_command = (
"sambamba markdup -t %d %s %s %s" "sambamba markdup -t %d %s %s %s"
% (cpu_count, sambamba_parms, bam, tmp_dup_mark_filename) % (cpu_count(), sambamba_parms, bam, tmp_dup_mark_filename)
) )
logger.info(sambamba_markdup_command) logger.info(sambamba_markdup_command)
subprocess.check_call( subprocess.check_call(
shlex.split(sambamba_markdup), shlex.split(sambamba_markdup_command),
stdout=fh) stderr=fh)
os.remove(tmp_dup_mark_filename + '.bai') os.remove(tmp_dup_mark_filename + '.bai')
...@@ -174,10 +173,10 @@ def dedup_mapped(bam, bam_basename, paired): ...@@ -174,10 +173,10 @@ def dedup_mapped(bam, bam_basename, paired):
if paired: # paired-end data if paired: # paired-end data
samtools_dedupe_command = \ samtools_dedupe_command = \
"samtools view -F 1804 -f 2 -b %s" % (filt_bam_filename) "samtools view -F 1804 -f 2 -b %s" % (bam)
else: else:
samtools_dedupe_command = \ samtools_dedupe_command = \
"samtools view -F 1804 -b %s" % (filt_bam_filename) "samtools view -F 1804 -b %s" % (bam)
with open(final_bam_filename, 'w') as fh: with open(final_bam_filename, 'w') as fh:
logger.info(samtools_dedupe_command) logger.info(samtools_dedupe_command)
...@@ -187,7 +186,7 @@ def dedup_mapped(bam, bam_basename, paired): ...@@ -187,7 +186,7 @@ def dedup_mapped(bam, bam_basename, paired):
# Index final bam file # Index final bam file
sambamba_index_command = \ sambamba_index_command = \
"samtools index -t %d %s" % (cpu_count(), final_bam_filename) "samtools index -@ %d %s" % (cpu_count(), final_bam_filename)
logger.info(sambamba_index_command) logger.info(sambamba_index_command)
subprocess.check_output(shlex.split(sambamba_index_command)) subprocess.check_output(shlex.split(sambamba_index_command))
...@@ -199,9 +198,10 @@ def dedup_mapped(bam, bam_basename, paired): ...@@ -199,9 +198,10 @@ def dedup_mapped(bam, bam_basename, paired):
logger.info(flagstat_command) logger.info(flagstat_command)
subprocess.check_call(shlex.split(flagstat_command), stdout=fh) subprocess.check_call(shlex.split(flagstat_command), stdout=fh)
os.remove(filt_bam_filename) os.remove(bam)
return final_bam_filename return final_bam_filename
def compute_complexity(bam, paired, bam_basename): def compute_complexity(bam, paired, bam_basename):
'''Calculate library complexity .''' '''Calculate library complexity .'''
...@@ -236,7 +236,7 @@ def compute_complexity(bam, paired, bam_basename): ...@@ -236,7 +236,7 @@ def compute_complexity(bam, paired, bam_basename):
"uniq -c", "uniq -c",
r"""awk 'BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf "%d\t%d\t%d\t%d\t%f\t%f\t%f\n",mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}'""" r"""awk 'BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf "%d\t%d\t%d\t%d\t%f\t%f\t%f\n",mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}'"""
]) ])
out, err = common.run_pipe(steps, pbc_file_qc_filename) out, err = utils.run_pipe(steps, pbc_file_qc_filename)
if err: if err:
logger.error("PBC file error: %s" % (err)) logger.error("PBC file error: %s" % (err))
...@@ -247,13 +247,12 @@ def main(): ...@@ -247,13 +247,12 @@ def main():
bam = args.bam bam = args.bam
# Create a file handler # Create a file handler
handler = logging.FileHandler('filter_qc.log') handler = logging.FileHandler('map_qc.log')
logger.addHandler(handler) logger.addHandler(handler)
# Check if tools are present # Check if tools are present
check_tools() check_tools()
# Run filtering for either PE or SE # Run filtering for either PE or SE
bam_basename = os.path.basename( bam_basename = os.path.basename(
utils.strip_extensions(bam, STRIP_EXTENSIONS)) utils.strip_extensions(bam, STRIP_EXTENSIONS))
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment