Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
BICF
Astrocyte
chipseq_analysis
Commits
54b9865d
Commit
54b9865d
authored
Apr 16, 2020
by
Venkat Malladi
Browse files
Merge branch 'master' into 75-rename_plotprofile
parents
01e0f56e
df7ed9bf
Pipeline
#6578
failed with stages
in 434 minutes and 14 seconds
Changes
11
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
54b9865d
...
...
@@ -10,6 +10,7 @@ stages:
-
single
-
multiple
-
skip
-
cleanup
user_configuration
:
stage
:
unit
...
...
@@ -26,19 +27,19 @@ bash_tests:
astrocyte
:
stage
:
astrocyte
script
:
-
module load astrocyte/0.
1
.0
-
module load astrocyte/0.
2
.0
-
module unload nextflow
-
cd ..
-
astrocyte_cli validate chipseq_analysis
a
rtifacts
:
expire_in
:
2 days
a
fter_script
:
-
rm -rf work/
single_end_mouse
:
stage
:
single
only
:
-
master
script
:
-
nextflow run workflow/main.nf --astrocyte
true
-resume
-
NXF_OPTS="-Dleveldb.mmap=false"
nextflow run workflow/main.nf --astrocyte
true
-
pytest -m singleend
paired_end_human
:
...
...
@@ -48,9 +49,19 @@ paired_end_human:
except
:
-
master
script
:
-
nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd
true
--astrocyte
false
-resume
-
NXF_OPTS="-Dleveldb.mmap=false"
nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd
true
--astrocyte
false
-
pytest -m pairedend
single_end_single_control
:
stage
:
single
only
:
-
branches
except
:
-
master
script
:
-
NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_single_contol_SE.txt" --genome 'GRCh38' --pairedEnd
false
--astrocyte
false
-
pytest -m singlecontrol
single_end_diff
:
stage
:
multiple
only
:
...
...
@@ -58,7 +69,7 @@ single_end_diff:
except
:
-
master
script
:
-
nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte
false
-resume
-
NXF_OPTS="-Dleveldb.mmap=false"
nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte
false
-
pytest -m singleend
-
pytest -m singlediff
...
...
@@ -67,7 +78,7 @@ paired_end_diff:
-
master
stage
:
multiple
script
:
-
nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd
true
--astrocyte
false
-resume
-
NXF_OPTS="-Dleveldb.mmap=false"
nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd
true
--astrocyte
false
-
pytest -m pairedend
-
pytest -m paireddiff
...
...
@@ -76,5 +87,12 @@ single_end_skip:
only
:
-
master
script
:
-
nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff
true
--skipMotif
true
--skipPlotProfile
true
--astrocyte
false
-resume
-
NXF_OPTS="-Dleveldb.mmap=false"
nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff
true
--skipMotif
true
--skipPlotProfile
true
--astrocyte
false
-
pytest -m singleskip_true
cleanup_job
:
stage
:
cleanup
script
:
-
cd $CI_BUILDS_DIR/$CI_RUNNER_SHORT_TOKEN/$CI_PROJECT_NAME
-
rm -fr $CI_PIPELINE_ID/
CHANGELOG.md
View file @
54b9865d
...
...
@@ -14,10 +14,11 @@ All notable changes to this project will be documented in this file.
-
Make gtf and geneName files as param inputs
-
Fix xcor to increase file size for --random-source
-
Fix skip diff test for paired-end data
-
Add test data for single control and single replicate
-
Fix python version for MultiQC report
-
Fix xcor to get lowest non zero value above 50
-
Fix references to display in Multiqc report
-
Update astrocyte testing to 0.2.0
## [publish_1.0.6 ] - 2019-05-31
### Added
...
...
README.md
View file @
54b9865d
...
...
@@ -4,11 +4,13 @@
# BICF ChIP-seq Pipeline
[

](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[

](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[

](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[

](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[

](https://www.nextflow.io/)
[

](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
[

](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
[

](https://doi.org/10.5281/zenodo.2648845)
...
...
test_data/A_1.bedpe.gz
0 → 100644
View file @
54b9865d
File added
test_data/B_1.bedpe.gz
0 → 100644
View file @
54b9865d
File added
test_data/design_single_contol_SE.txt
0 → 100644
View file @
54b9865d
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1
ENCLB497XZB ENCSR000DXB Panc1 H3K4me3 None 1 ENCLB304SBJ ENCFF001GBW.fastq.gz
ENCLB304SBJ ENCSR000DXC Panc1 Control None 1 ENCLB304SBJ ENCFF001HWJ.fastq.gz
test_data/fetch_test_data.sh
View file @
54b9865d
...
...
@@ -25,3 +25,9 @@ wget https://www.encodeproject.org/files/ENCFF161HBP/@@download/ENCFF161HBP.fast
wget https://www.encodeproject.org/files/ENCFF776KZU/@@download/ENCFF776KZU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF119KHM/@@download/ENCFF119KHM.fastq.gz
echo
"Done with Paired-end"
echo
"Downloading Single-end data set Human ENCSR000DXB and ENCSR000DXC"
wget https://www.encodeproject.org/files/ENCFF001GBW/@@download/ENCFF001GBW.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001GBV/@@download/ENCFF001GBV.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001HWJ/@@download/ENCFF001HWJ.fastq.gz
echo
"Done with Single-end"
workflow/conf/biohpc.config
View file @
54b9865d
...
...
@@ -2,6 +2,7 @@ process {
executor
=
'slurm'
queue
=
'super'
clusterOptions
=
'--hold'
beforeScript
=
'ulimit -Ss unlimited'
//
Process
specific
configuration
withName
:
checkDesignFile
{
...
...
workflow/scripts/pool_and_psuedoreplicate.py
View file @
54b9865d
...
...
@@ -204,6 +204,7 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
pool_control_tmp
=
bedpe_to_tagalign
(
pool_control
,
"pool_control"
)
pool_control
=
pool_control_tmp
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id
=
design_df
.
at
[
0
,
'experiment_id'
]
replicate_files
=
design_df
.
tag_align
.
unique
()
...
...
@@ -237,9 +238,9 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
else
:
pool_experiment_se
=
pool_experiment
# Check controls against cutoff_ratio
# if so replace with pool_control
# unless single control was used
# Check controls against cutoff_ratio
# if so replace with pool_control
# unless single control was used
if
not
single_control
:
path_to_pool_control
=
cwd
+
'/'
+
pool_control
if
control_df
.
values
.
max
()
>
cutoff_ratio
:
...
...
@@ -267,7 +268,10 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
path_to_control
else
:
path_to_pool_control
=
cwd
+
'/'
+
pool_control
if
paired
:
path_to_pool_control
=
cwd
+
'/'
+
pool_control
else
:
path_to_pool_control
=
pool_control
design_new_df
[
'control_tag_align'
]
=
path_to_pool_control
# Add in pseudo replicates
...
...
@@ -306,7 +310,7 @@ def main():
design_df
=
pd
.
read_csv
(
design
,
sep
=
'
\t
'
)
# Get current directory to build paths
cwd
=
os
.
getcwd
()
cwd
=
os
.
getcwd
()
# Check Number of replicates and replicates
no_reps
=
check_replicates
(
design_df
)
...
...
workflow/tests/test_overlap_peaks.py
View file @
54b9865d
...
...
@@ -45,3 +45,9 @@ def test_overlap_peaks_pairedend():
assert
os
.
path
.
exists
(
os
.
path
.
join
(
test_output_path
,
'ENCSR729LGA.rejected.narrowPeak'
))
peak_file
=
test_output_path
+
'ENCSR729LGA.replicated.narrowPeak'
assert
utils
.
count_lines
(
peak_file
)
>=
25657
@
pytest
.
mark
.
singlecontrol
def
test_overlap_peaks_singlecontrol
():
assert
os
.
path
.
exists
(
os
.
path
.
join
(
test_output_path
,
'ENCSR000DXB.rejected.narrowPeak'
))
peak_file
=
test_output_path
+
'ENCSR000DXB.replicated.narrowPeak'
assert
utils
.
count_lines
(
peak_file
)
>=
35097
workflow/tests/test_pool_and_psuedoreplicate.py
View file @
54b9865d
...
...
@@ -33,9 +33,12 @@ def design_experiment_2(design_experiment):
@
pytest
.
fixture
def
design_experiment_3
(
design_experiment
):
# Update second control to be same as first
design_experiment
.
loc
[
1
,
'control_tag_align'
]
=
'B_1.bedse.gz'
return
design_experiment
# Drop Replicate A_2
design_df
=
design_experiment
.
drop
(
design_experiment
.
index
[
1
])
# Update to be paired as first
design_df
.
loc
[
0
,
'control_tag_align'
]
=
'B_1.bedpe.gz'
design_df
.
loc
[
0
,
'tag_align'
]
=
'A_1.bedpe.gz'
return
design_df
@
pytest
.
mark
.
unit
...
...
@@ -71,6 +74,19 @@ def test_single_rep(design_experiment_2):
shutil
.
copy
(
test_design_path
+
'B_1.tagAlign.gz'
,
cwd
)
single_rep
=
pool_and_psuedoreplicate
.
generate_design
(
'false'
,
1.2
,
design_experiment_2
,
cwd
,
1
,
1
)
assert
single_rep
.
shape
[
0
]
==
4
assert
len
(
single_rep
[
'control_tag_align'
].
unique
())
==
2
assert
'pool_control.tagAlign.gz'
in
single_rep
[
'control_tag_align'
].
unique
()[
1
]
@
pytest
.
mark
.
unit
def
test_single_control
(
design_experiment_3
):
cwd
=
os
.
getcwd
()
shutil
.
copy
(
test_design_path
+
'A_1.bedpe.gz'
,
cwd
)
shutil
.
copy
(
test_design_path
+
'B_1.bedpe.gz'
,
cwd
)
shutil
.
copy
(
test_design_path
+
'A_1.tagAlign.gz'
,
cwd
)
single_control
=
pool_and_psuedoreplicate
.
generate_design
(
'true'
,
1.2
,
design_experiment_3
,
cwd
,
1
,
1
)
assert
'pool_control.tagAlign.gz'
in
single_control
[
'control_tag_align'
].
unique
()[
0
]
@
pytest
.
mark
.
singleend
def
test_pool_and_psuedoreplicate_singleend
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment