Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
chipseq_analysis
Manage
Activity
Members
Labels
Plan
Issues
19
Issue boards
Milestones
Iterations
Requirements
Code
Merge requests
3
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
BICF
Astrocyte
chipseq_analysis
Commits
2948ddca
There was a problem fetching the pipeline stages.
Commit
2948ddca
authored
7 years ago
by
Venkat Malladi
Browse files
Options
Downloads
Patches
Plain Diff
Update samtools version and synxtax to fix bugs in last push.
parent
ea8a20e5
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!6
Resolve "Bam Stats and Filter"
Pipeline
#1058
passed with stage
in 1 hour, 36 minutes, and 29 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
astrocyte_pkg.yml
+1
-1
1 addition, 1 deletion
astrocyte_pkg.yml
workflow/conf/biohpc.config
+1
-1
1 addition, 1 deletion
workflow/conf/biohpc.config
workflow/scripts/map_qc.py
+23
-24
23 additions, 24 deletions
workflow/scripts/map_qc.py
with
25 additions
and
26 deletions
astrocyte_pkg.yml
+
1
−
1
View file @
2948ddca
...
@@ -44,7 +44,7 @@ workflow_modules:
...
@@ -44,7 +44,7 @@ workflow_modules:
-
'
cutadapt/1.9.1'
-
'
cutadapt/1.9.1'
-
'
fastqc/0.11.2'
-
'
fastqc/0.11.2'
-
'
bwa/intel/0.7.12'
-
'
bwa/intel/0.7.12'
-
'
samtools/
intel/1.3
'
-
'
samtools/
1.4.1
'
-
'
sambamba/0.6.6'
-
'
sambamba/0.6.6'
-
'
bedtools/2.26.0'
-
'
bedtools/2.26.0'
...
...
This diff is collapsed.
Click to expand it.
workflow/conf/biohpc.config
+
1
−
1
View file @
2948ddca
...
@@ -16,7 +16,7 @@ process {
...
@@ -16,7 +16,7 @@ process {
cpus
=
32
cpus
=
32
}
}
$
filterReads
{
$
filterReads
{
module
= [
'python/3.6.1-2-anaconda'
,
'samtools/
intel/1.3
'
,
'sambamba/0.6.6'
,
'bedtools/2.26.0'
]
module
= [
'python/3.6.1-2-anaconda'
,
'samtools/
1.4.1
'
,
'sambamba/0.6.6'
,
'bedtools/2.26.0'
]
cpus
=
32
cpus
=
32
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
workflow/scripts/map_qc.py
+
23
−
24
View file @
2948ddca
...
@@ -9,7 +9,6 @@ import shutil
...
@@ -9,7 +9,6 @@ import shutil
import
shlex
import
shlex
import
logging
import
logging
from
multiprocessing
import
cpu_count
from
multiprocessing
import
cpu_count
import
json
import
utils
import
utils
EPILOG
=
'''
EPILOG
=
'''
...
@@ -66,18 +65,18 @@ def check_tools():
...
@@ -66,18 +65,18 @@ def check_tools():
raise
Exception
(
'
Missing samtools
'
)
raise
Exception
(
'
Missing samtools
'
)
sambamba_path
=
shutil
.
which
(
"
sambamba
"
)
sambamba_path
=
shutil
.
which
(
"
sambamba
"
)
if
b
w
a_path
:
if
sambam
ba_path
:
logger
.
info
(
'
Found sambamba: %s
'
,
b
w
a_path
)
logger
.
info
(
'
Found sambamba: %s
'
,
sambam
ba_path
)
else
:
else
:
logger
.
error
(
'
Missing sambamba
'
)
logger
.
error
(
'
Missing sambamba
'
)
raise
Exception
(
'
Missing sambamba
'
)
raise
Exception
(
'
Missing sambamba
'
)
bedtools_path
=
shutil
.
which
(
"
bedtools
"
)
bedtools_path
=
shutil
.
which
(
"
bedtools
"
)
if
b
wa
_path
:
if
b
edtools
_path
:
logger
.
info
(
'
Found
sambamba
: %s
'
,
b
wa
_path
)
logger
.
info
(
'
Found
bedtools
: %s
'
,
b
edtools
_path
)
else
:
else
:
logger
.
error
(
'
Missing
sambamba
'
)
logger
.
error
(
'
Missing
bedtools
'
)
raise
Exception
(
'
Missing
sambamba
'
)
raise
Exception
(
'
Missing
bedtools
'
)
def
filter_mapped_pe
(
bam
,
bam_basename
):
def
filter_mapped_pe
(
bam
,
bam_basename
):
...
@@ -93,7 +92,7 @@ def filter_mapped_pe(bam, bam_basename):
...
@@ -93,7 +92,7 @@ def filter_mapped_pe(bam, bam_basename):
# Remove low MAPQ reads
# Remove low MAPQ reads
# Only keep properly paired reads
# Only keep properly paired reads
# Obtain name sorted BAM file
# Obtain name sorted BAM file
out
,
err
=
common
.
run_pipe
([
out
,
err
=
utils
.
run_pipe
([
# filter: -F 1804 FlAG bits to exclude; -f 2 FLAG bits to reqire;
# filter: -F 1804 FlAG bits to exclude; -f 2 FLAG bits to reqire;
# -q 30 exclude MAPQ < 30; -u uncompressed output
# -q 30 exclude MAPQ < 30; -u uncompressed output
# exclude FLAG 1804: unmapped, next segment unmapped, secondary
# exclude FLAG 1804: unmapped, next segment unmapped, secondary
...
@@ -103,14 +102,14 @@ def filter_mapped_pe(bam, bam_basename):
...
@@ -103,14 +102,14 @@ def filter_mapped_pe(bam, bam_basename):
# sort: -n sort by name; - take input from stdin;
# sort: -n sort by name; - take input from stdin;
# out to specified filename
# out to specified filename
# Will produce name sorted BAM
# Will produce name sorted BAM
"
samtools sort -n -@%d -o %s
"
%
(
cpu_count
(),
tmp_filt_bam_filename
)])
"
samtools sort -n -@
%d -o %s
"
%
(
cpu_count
(),
tmp_filt_bam_filename
)])
if
err
:
if
err
:
logger
.
error
(
"
samtools filter error: %s
"
%
(
err
))
logger
.
error
(
"
samtools filter error: %s
"
%
(
err
))
# Remove orphan reads (pair was removed)
# Remove orphan reads (pair was removed)
# and read pairs mapping to different chromosomes
# and read pairs mapping to different chromosomes
# Obtain position sorted BAM
# Obtain position sorted BAM
out
,
err
=
common
.
run_pipe
([
out
,
err
=
utils
.
run_pipe
([
# fill in mate coordinates, ISIZE and mate-related flags
# fill in mate coordinates, ISIZE and mate-related flags
# fixmate requires name-sorted alignment; -r removes secondary and
# fixmate requires name-sorted alignment; -r removes secondary and
# unmapped (redundant here because already done above?)
# unmapped (redundant here because already done above?)
...
@@ -119,7 +118,7 @@ def filter_mapped_pe(bam, bam_basename):
...
@@ -119,7 +118,7 @@ def filter_mapped_pe(bam, bam_basename):
# repeat filtering after mate repair
# repeat filtering after mate repair
"
samtools view -F 1804 -f 2 -u -
"
,
"
samtools view -F 1804 -f 2 -u -
"
,
# produce the coordinate-sorted BAM
# produce the coordinate-sorted BAM
"
samtools sort -@%d -o %s
"
%
(
cpu_count
(),
filt_bam_filename
)])
"
samtools sort -@
%d -o %s
"
%
(
cpu_count
(),
filt_bam_filename
)])
os
.
remove
(
tmp_filt_bam_filename
)
os
.
remove
(
tmp_filt_bam_filename
)
return
filt_bam_filename
return
filt_bam_filename
...
@@ -138,7 +137,7 @@ def filter_mapped_se(bam, bam_basename):
...
@@ -138,7 +137,7 @@ def filter_mapped_se(bam, bam_basename):
with
open
(
filt_bam_filename
,
'
w
'
)
as
fh
:
with
open
(
filt_bam_filename
,
'
w
'
)
as
fh
:
samtools_filter_command
=
(
samtools_filter_command
=
(
"
samtools view -F 1804 -q 30 -b %s
"
"
samtools view -F 1804 -q 30 -b %s
"
%
(
samtools_params
,
input_
bam
)
%
(
bam
)
)
)
logger
.
info
(
samtools_filter_command
)
logger
.
info
(
samtools_filter_command
)
subprocess
.
check_call
(
subprocess
.
check_call
(
...
@@ -154,17 +153,17 @@ def dedup_mapped(bam, bam_basename, paired):
...
@@ -154,17 +153,17 @@ def dedup_mapped(bam, bam_basename, paired):
# Markduplicates
# Markduplicates
dup_file_qc_filename
=
bam_basename
+
"
.dup.qc
"
dup_file_qc_filename
=
bam_basename
+
"
.dup.qc
"
tmp_dup_mark_filename
=
bam_basename
+
"
.dupmark.bam
"
tmp_dup_mark_filename
=
bam_basename
+
"
.dupmark.bam
"
sambamba_parms
=
"
--hash-table-size=17592186044416
\
sambamba_parms
=
"
--hash-table-size=17592186044416
"
+
\
--overflow-list-size=20000000 --io-buffer-size=256
"
"
--overflow-list-size=20000000 --io-buffer-size=256
"
with
open
(
dup_file_qc_filename
,
'
w
'
)
as
fh
:
with
open
(
dup_file_qc_filename
,
'
w
'
)
as
fh
:
sambamba_markdup_command
=
(
sambamba_markdup_command
=
(
"
sambamba markdup -t %d %s %s %s
"
"
sambamba markdup -t %d %s %s %s
"
%
(
cpu_count
,
sambamba_parms
,
bam
,
tmp_dup_mark_filename
)
%
(
cpu_count
()
,
sambamba_parms
,
bam
,
tmp_dup_mark_filename
)
)
)
logger
.
info
(
sambamba_markdup_command
)
logger
.
info
(
sambamba_markdup_command
)
subprocess
.
check_call
(
subprocess
.
check_call
(
shlex
.
split
(
sambamba_markdup
),
shlex
.
split
(
sambamba_markdup
_command
),
std
out
=
fh
)
std
err
=
fh
)
os
.
remove
(
tmp_dup_mark_filename
+
'
.bai
'
)
os
.
remove
(
tmp_dup_mark_filename
+
'
.bai
'
)
...
@@ -174,10 +173,10 @@ def dedup_mapped(bam, bam_basename, paired):
...
@@ -174,10 +173,10 @@ def dedup_mapped(bam, bam_basename, paired):
if
paired
:
# paired-end data
if
paired
:
# paired-end data
samtools_dedupe_command
=
\
samtools_dedupe_command
=
\
"
samtools view -F 1804 -f 2 -b %s
"
%
(
filt_bam_filename
)
"
samtools view -F 1804 -f 2 -b %s
"
%
(
bam
)
else
:
else
:
samtools_dedupe_command
=
\
samtools_dedupe_command
=
\
"
samtools view -F 1804 -b %s
"
%
(
filt_bam_filename
)
"
samtools view -F 1804 -b %s
"
%
(
bam
)
with
open
(
final_bam_filename
,
'
w
'
)
as
fh
:
with
open
(
final_bam_filename
,
'
w
'
)
as
fh
:
logger
.
info
(
samtools_dedupe_command
)
logger
.
info
(
samtools_dedupe_command
)
...
@@ -187,7 +186,7 @@ def dedup_mapped(bam, bam_basename, paired):
...
@@ -187,7 +186,7 @@ def dedup_mapped(bam, bam_basename, paired):
# Index final bam file
# Index final bam file
sambamba_index_command
=
\
sambamba_index_command
=
\
"
samtools index -
t
%d %s
"
%
(
cpu_count
(),
final_bam_filename
)
"
samtools index -
@
%d %s
"
%
(
cpu_count
(),
final_bam_filename
)
logger
.
info
(
sambamba_index_command
)
logger
.
info
(
sambamba_index_command
)
subprocess
.
check_output
(
shlex
.
split
(
sambamba_index_command
))
subprocess
.
check_output
(
shlex
.
split
(
sambamba_index_command
))
...
@@ -199,9 +198,10 @@ def dedup_mapped(bam, bam_basename, paired):
...
@@ -199,9 +198,10 @@ def dedup_mapped(bam, bam_basename, paired):
logger
.
info
(
flagstat_command
)
logger
.
info
(
flagstat_command
)
subprocess
.
check_call
(
shlex
.
split
(
flagstat_command
),
stdout
=
fh
)
subprocess
.
check_call
(
shlex
.
split
(
flagstat_command
),
stdout
=
fh
)
os
.
remove
(
filt_bam_filename
)
os
.
remove
(
bam
)
return
final_bam_filename
return
final_bam_filename
def
compute_complexity
(
bam
,
paired
,
bam_basename
):
def
compute_complexity
(
bam
,
paired
,
bam_basename
):
'''
Calculate library complexity .
'''
'''
Calculate library complexity .
'''
...
@@ -236,7 +236,7 @@ def compute_complexity(bam, paired, bam_basename):
...
@@ -236,7 +236,7 @@ def compute_complexity(bam, paired, bam_basename):
"
uniq -c
"
,
"
uniq -c
"
,
r
"""
awk
'
BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf
"
%d\t%d\t%d\t%d\t%f\t%f\t%f\n
"
,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}
'"""
r
"""
awk
'
BEGIN{mt=0;m0=0;m1=0;m2=0} ($1==1){m1=m1+1} ($1==2){m2=m2+1} {m0=m0+1} {mt=mt+$1} END{printf
"
%d\t%d\t%d\t%d\t%f\t%f\t%f\n
"
,mt,m0,m1,m2,m0/mt,m1/m0,m1/m2}
'"""
])
])
out
,
err
=
common
.
run_pipe
(
steps
,
pbc_file_qc_filename
)
out
,
err
=
utils
.
run_pipe
(
steps
,
pbc_file_qc_filename
)
if
err
:
if
err
:
logger
.
error
(
"
PBC file error: %s
"
%
(
err
))
logger
.
error
(
"
PBC file error: %s
"
%
(
err
))
...
@@ -247,13 +247,12 @@ def main():
...
@@ -247,13 +247,12 @@ def main():
bam
=
args
.
bam
bam
=
args
.
bam
# Create a file handler
# Create a file handler
handler
=
logging
.
FileHandler
(
'
filter
_qc.log
'
)
handler
=
logging
.
FileHandler
(
'
map
_qc.log
'
)
logger
.
addHandler
(
handler
)
logger
.
addHandler
(
handler
)
# Check if tools are present
# Check if tools are present
check_tools
()
check_tools
()
# Run filtering for either PE or SE
# Run filtering for either PE or SE
bam_basename
=
os
.
path
.
basename
(
bam_basename
=
os
.
path
.
basename
(
utils
.
strip_extensions
(
bam
,
STRIP_EXTENSIONS
))
utils
.
strip_extensions
(
bam
,
STRIP_EXTENSIONS
))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment