Commit 54b9865d authored by Venkat Malladi's avatar Venkat Malladi

Merge branch 'master' into 75-rename_plotprofile

parents 01e0f56e df7ed9bf
Pipeline #6578 failed with stages
in 434 minutes and 14 seconds
......@@ -10,6 +10,7 @@ stages:
- single
- multiple
- skip
- cleanup
user_configuration:
stage: unit
......@@ -26,19 +27,19 @@ bash_tests:
astrocyte:
stage: astrocyte
script:
- module load astrocyte/0.1.0
- module load astrocyte/0.2.0
- module unload nextflow
- cd ..
- astrocyte_cli validate chipseq_analysis
artifacts:
expire_in: 2 days
after_script:
- rm -rf work/
single_end_mouse:
stage: single
only:
- master
script:
- nextflow run workflow/main.nf --astrocyte true -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --astrocyte true
- pytest -m singleend
paired_end_human:
......@@ -48,9 +49,19 @@ paired_end_human:
except:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false
- pytest -m pairedend
single_end_single_control:
stage: single
only:
- branches
except:
- master
script:
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_single_contol_SE.txt" --genome 'GRCh38' --pairedEnd false --astrocyte false
- pytest -m singlecontrol
single_end_diff:
stage: multiple
only:
......@@ -58,7 +69,7 @@ single_end_diff:
except:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false
- pytest -m singleend
- pytest -m singlediff
......@@ -67,7 +78,7 @@ paired_end_diff:
- master
stage: multiple
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false
- pytest -m pairedend
- pytest -m paireddiff
......@@ -76,5 +87,12 @@ single_end_skip:
only:
- master
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false -resume
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false
- pytest -m singleskip_true
cleanup_job:
stage: cleanup
script:
- cd $CI_BUILDS_DIR/$CI_RUNNER_SHORT_TOKEN/$CI_PROJECT_NAME
- rm -fr $CI_PIPELINE_ID/
......@@ -14,10 +14,11 @@ All notable changes to this project will be documented in this file.
- Make gtf and geneName files as param inputs
- Fix xcor to increase file size for --random-source
- Fix skip diff test for paired-end data
- Add test data for single control and single replicate
- Fix python version for MultiQC report
- Fix xcor to get lowest non zero value above 50
- Fix references to display in Multiqc report
- Update astrocyte testing to 0.2.0
## [publish_1.0.6 ] - 2019-05-31
### Added
......
......@@ -4,11 +4,13 @@
# BICF ChIP-seq Pipeline
[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![Coverage Report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![coverage report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.24.0-brightgreen.svg
)](https://www.nextflow.io/)
[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.1.0-blue.svg)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.2.0-blue)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2648845.svg)](https://doi.org/10.5281/zenodo.2648845)
......
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1
ENCLB497XZB ENCSR000DXB Panc1 H3K4me3 None 1 ENCLB304SBJ ENCFF001GBW.fastq.gz
ENCLB304SBJ ENCSR000DXC Panc1 Control None 1 ENCLB304SBJ ENCFF001HWJ.fastq.gz
......@@ -25,3 +25,9 @@ wget https://www.encodeproject.org/files/ENCFF161HBP/@@download/ENCFF161HBP.fast
wget https://www.encodeproject.org/files/ENCFF776KZU/@@download/ENCFF776KZU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF119KHM/@@download/ENCFF119KHM.fastq.gz
echo "Done with Paired-end"
echo "Downloading Single-end data set Human ENCSR000DXB and ENCSR000DXC"
wget https://www.encodeproject.org/files/ENCFF001GBW/@@download/ENCFF001GBW.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001GBV/@@download/ENCFF001GBV.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001HWJ/@@download/ENCFF001HWJ.fastq.gz
echo "Done with Single-end"
......@@ -2,6 +2,7 @@ process {
executor = 'slurm'
queue = 'super'
clusterOptions = '--hold'
beforeScript= 'ulimit -Ss unlimited'
// Process specific configuration
withName: checkDesignFile {
......
......@@ -204,6 +204,7 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
pool_control = pool_control_tmp
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id = design_df.at[0, 'experiment_id']
replicate_files = design_df.tag_align.unique()
......@@ -237,9 +238,9 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
else:
pool_experiment_se = pool_experiment
# Check controls against cutoff_ratio
# if so replace with pool_control
# unless single control was used
# Check controls against cutoff_ratio
# if so replace with pool_control
# unless single control was used
if not single_control:
path_to_pool_control = cwd + '/' + pool_control
if control_df.values.max() > cutoff_ratio:
......@@ -267,7 +268,10 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
path_to_control
else:
path_to_pool_control = cwd + '/' + pool_control
if paired:
path_to_pool_control = cwd + '/' + pool_control
else:
path_to_pool_control = pool_control
design_new_df['control_tag_align'] = path_to_pool_control
# Add in pseudo replicates
......@@ -306,7 +310,7 @@ def main():
design_df = pd.read_csv(design, sep='\t')
# Get current directory to build paths
cwd = os.getcwd()
cwd = os.getcwd()
# Check Number of replicates and replicates
no_reps = check_replicates(design_df)
......
......@@ -45,3 +45,9 @@ def test_overlap_peaks_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak'))
peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak'
assert utils.count_lines(peak_file) >= 25657
@pytest.mark.singlecontrol
def test_overlap_peaks_singlecontrol():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR000DXB.rejected.narrowPeak'))
peak_file = test_output_path + 'ENCSR000DXB.replicated.narrowPeak'
assert utils.count_lines(peak_file) >= 35097
......@@ -33,9 +33,12 @@ def design_experiment_2(design_experiment):
@pytest.fixture
def design_experiment_3(design_experiment):
# Update second control to be same as first
design_experiment.loc[1, 'control_tag_align'] = 'B_1.bedse.gz'
return design_experiment
# Drop Replicate A_2
design_df = design_experiment.drop(design_experiment.index[1])
# Update to be paired as first
design_df.loc[0, 'control_tag_align'] = 'B_1.bedpe.gz'
design_df.loc[0, 'tag_align'] = 'A_1.bedpe.gz'
return design_df
@pytest.mark.unit
......@@ -71,6 +74,19 @@ def test_single_rep(design_experiment_2):
shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
assert single_rep.shape[0] == 4
assert len(single_rep['control_tag_align'].unique()) == 2
assert 'pool_control.tagAlign.gz' in single_rep['control_tag_align'].unique()[1]
@pytest.mark.unit
def test_single_control(design_experiment_3):
cwd = os.getcwd()
shutil.copy(test_design_path + 'A_1.bedpe.gz', cwd)
shutil.copy(test_design_path + 'B_1.bedpe.gz', cwd)
shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
single_control = pool_and_psuedoreplicate.generate_design('true', 1.2, design_experiment_3, cwd, 1, 1)
assert 'pool_control.tagAlign.gz' in single_control['control_tag_align'].unique()[0]
@pytest.mark.singleend
def test_pool_and_psuedoreplicate_singleend():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment