Skip to content
Snippets Groups Projects
Commit 54b9865d authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch 'master' into 75-rename_plotprofile

parents 01e0f56e df7ed9bf
Branches
Tags
1 merge request!67Resolve "Rename Plot profile to be consistent with naming"
Pipeline #6578 failed with stages
in 7 hours, 14 minutes, and 14 seconds
...@@ -10,6 +10,7 @@ stages: ...@@ -10,6 +10,7 @@ stages:
- single - single
- multiple - multiple
- skip - skip
- cleanup
user_configuration: user_configuration:
stage: unit stage: unit
...@@ -26,19 +27,19 @@ bash_tests: ...@@ -26,19 +27,19 @@ bash_tests:
astrocyte: astrocyte:
stage: astrocyte stage: astrocyte
script: script:
- module load astrocyte/0.1.0 - module load astrocyte/0.2.0
- module unload nextflow - module unload nextflow
- cd .. - cd ..
- astrocyte_cli validate chipseq_analysis - astrocyte_cli validate chipseq_analysis
artifacts: after_script:
expire_in: 2 days - rm -rf work/
single_end_mouse: single_end_mouse:
stage: single stage: single
only: only:
- master - master
script: script:
- nextflow run workflow/main.nf --astrocyte true -resume - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --astrocyte true
- pytest -m singleend - pytest -m singleend
paired_end_human: paired_end_human:
...@@ -48,9 +49,19 @@ paired_end_human: ...@@ -48,9 +49,19 @@ paired_end_human:
except: except:
- master - master
script: script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false
- pytest -m pairedend - pytest -m pairedend
single_end_single_control:
stage: single
only:
- branches
except:
- master
script:
- NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_single_contol_SE.txt" --genome 'GRCh38' --pairedEnd false --astrocyte false
- pytest -m singlecontrol
single_end_diff: single_end_diff:
stage: multiple stage: multiple
only: only:
...@@ -58,7 +69,7 @@ single_end_diff: ...@@ -58,7 +69,7 @@ single_end_diff:
except: except:
- master - master
script: script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false -resume - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false
- pytest -m singleend - pytest -m singleend
- pytest -m singlediff - pytest -m singlediff
...@@ -67,7 +78,7 @@ paired_end_diff: ...@@ -67,7 +78,7 @@ paired_end_diff:
- master - master
stage: multiple stage: multiple
script: script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false
- pytest -m pairedend - pytest -m pairedend
- pytest -m paireddiff - pytest -m paireddiff
...@@ -76,5 +87,12 @@ single_end_skip: ...@@ -76,5 +87,12 @@ single_end_skip:
only: only:
- master - master
script: script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false -resume - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false
- pytest -m singleskip_true - pytest -m singleskip_true
cleanup_job:
stage: cleanup
script:
- cd $CI_BUILDS_DIR/$CI_RUNNER_SHORT_TOKEN/$CI_PROJECT_NAME
- rm -fr $CI_PIPELINE_ID/
...@@ -14,10 +14,11 @@ All notable changes to this project will be documented in this file. ...@@ -14,10 +14,11 @@ All notable changes to this project will be documented in this file.
- Make gtf and geneName files as param inputs - Make gtf and geneName files as param inputs
- Fix xcor to increase file size for --random-source - Fix xcor to increase file size for --random-source
- Fix skip diff test for paired-end data - Fix skip diff test for paired-end data
- Add test data for single control and single replicate
- Fix python version for MultiQC report - Fix python version for MultiQC report
- Fix xcor to get lowest non zero value above 50 - Fix xcor to get lowest non zero value above 50
- Fix references to display in Multiqc report - Fix references to display in Multiqc report
- Update astrocyte testing to 0.2.0
## [publish_1.0.6 ] - 2019-05-31 ## [publish_1.0.6 ] - 2019-05-31
### Added ### Added
......
...@@ -4,11 +4,13 @@ ...@@ -4,11 +4,13 @@
# BICF ChIP-seq Pipeline # BICF ChIP-seq Pipeline
[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![Coverage Report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![coverage report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.24.0-brightgreen.svg [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.24.0-brightgreen.svg
)](https://www.nextflow.io/) )](https://www.nextflow.io/)
[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.1.0-blue.svg)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html) [![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.2.0-blue)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2648845.svg)](https://doi.org/10.5281/zenodo.2648845) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2648845.svg)](https://doi.org/10.5281/zenodo.2648845)
......
File added
File added
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1
ENCLB497XZB ENCSR000DXB Panc1 H3K4me3 None 1 ENCLB304SBJ ENCFF001GBW.fastq.gz
ENCLB304SBJ ENCSR000DXC Panc1 Control None 1 ENCLB304SBJ ENCFF001HWJ.fastq.gz
...@@ -25,3 +25,9 @@ wget https://www.encodeproject.org/files/ENCFF161HBP/@@download/ENCFF161HBP.fast ...@@ -25,3 +25,9 @@ wget https://www.encodeproject.org/files/ENCFF161HBP/@@download/ENCFF161HBP.fast
wget https://www.encodeproject.org/files/ENCFF776KZU/@@download/ENCFF776KZU.fastq.gz wget https://www.encodeproject.org/files/ENCFF776KZU/@@download/ENCFF776KZU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF119KHM/@@download/ENCFF119KHM.fastq.gz wget https://www.encodeproject.org/files/ENCFF119KHM/@@download/ENCFF119KHM.fastq.gz
echo "Done with Paired-end" echo "Done with Paired-end"
echo "Downloading Single-end data set Human ENCSR000DXB and ENCSR000DXC"
wget https://www.encodeproject.org/files/ENCFF001GBW/@@download/ENCFF001GBW.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001GBV/@@download/ENCFF001GBV.fastq.gz
wget https://www.encodeproject.org/files/ENCFF001HWJ/@@download/ENCFF001HWJ.fastq.gz
echo "Done with Single-end"
...@@ -2,6 +2,7 @@ process { ...@@ -2,6 +2,7 @@ process {
executor = 'slurm' executor = 'slurm'
queue = 'super' queue = 'super'
clusterOptions = '--hold' clusterOptions = '--hold'
beforeScript= 'ulimit -Ss unlimited'
// Process specific configuration // Process specific configuration
withName: checkDesignFile { withName: checkDesignFile {
......
...@@ -204,6 +204,7 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con ...@@ -204,6 +204,7 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control") pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
pool_control = pool_control_tmp pool_control = pool_control_tmp
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data # Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id = design_df.at[0, 'experiment_id'] experiment_id = design_df.at[0, 'experiment_id']
replicate_files = design_df.tag_align.unique() replicate_files = design_df.tag_align.unique()
...@@ -237,9 +238,9 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con ...@@ -237,9 +238,9 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
else: else:
pool_experiment_se = pool_experiment pool_experiment_se = pool_experiment
# Check controls against cutoff_ratio # Check controls against cutoff_ratio
# if so replace with pool_control # if so replace with pool_control
# unless single control was used # unless single control was used
if not single_control: if not single_control:
path_to_pool_control = cwd + '/' + pool_control path_to_pool_control = cwd + '/' + pool_control
if control_df.values.max() > cutoff_ratio: if control_df.values.max() > cutoff_ratio:
...@@ -267,7 +268,10 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con ...@@ -267,7 +268,10 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
path_to_control path_to_control
else: else:
path_to_pool_control = cwd + '/' + pool_control if paired:
path_to_pool_control = cwd + '/' + pool_control
else:
path_to_pool_control = pool_control
design_new_df['control_tag_align'] = path_to_pool_control design_new_df['control_tag_align'] = path_to_pool_control
# Add in pseudo replicates # Add in pseudo replicates
...@@ -306,7 +310,7 @@ def main(): ...@@ -306,7 +310,7 @@ def main():
design_df = pd.read_csv(design, sep='\t') design_df = pd.read_csv(design, sep='\t')
# Get current directory to build paths # Get current directory to build paths
cwd = os.getcwd() cwd = os.getcwd()
# Check Number of replicates and replicates # Check Number of replicates and replicates
no_reps = check_replicates(design_df) no_reps = check_replicates(design_df)
......
...@@ -45,3 +45,9 @@ def test_overlap_peaks_pairedend(): ...@@ -45,3 +45,9 @@ def test_overlap_peaks_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak'))
peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak' peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak'
assert utils.count_lines(peak_file) >= 25657 assert utils.count_lines(peak_file) >= 25657
@pytest.mark.singlecontrol
def test_overlap_peaks_singlecontrol():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR000DXB.rejected.narrowPeak'))
peak_file = test_output_path + 'ENCSR000DXB.replicated.narrowPeak'
assert utils.count_lines(peak_file) >= 35097
...@@ -33,9 +33,12 @@ def design_experiment_2(design_experiment): ...@@ -33,9 +33,12 @@ def design_experiment_2(design_experiment):
@pytest.fixture @pytest.fixture
def design_experiment_3(design_experiment): def design_experiment_3(design_experiment):
# Update second control to be same as first # Drop Replicate A_2
design_experiment.loc[1, 'control_tag_align'] = 'B_1.bedse.gz' design_df = design_experiment.drop(design_experiment.index[1])
return design_experiment # Update to be paired as first
design_df.loc[0, 'control_tag_align'] = 'B_1.bedpe.gz'
design_df.loc[0, 'tag_align'] = 'A_1.bedpe.gz'
return design_df
@pytest.mark.unit @pytest.mark.unit
...@@ -71,6 +74,19 @@ def test_single_rep(design_experiment_2): ...@@ -71,6 +74,19 @@ def test_single_rep(design_experiment_2):
shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd) shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1) single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
assert single_rep.shape[0] == 4 assert single_rep.shape[0] == 4
assert len(single_rep['control_tag_align'].unique()) == 2
assert 'pool_control.tagAlign.gz' in single_rep['control_tag_align'].unique()[1]
@pytest.mark.unit
def test_single_control(design_experiment_3):
cwd = os.getcwd()
shutil.copy(test_design_path + 'A_1.bedpe.gz', cwd)
shutil.copy(test_design_path + 'B_1.bedpe.gz', cwd)
shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
single_control = pool_and_psuedoreplicate.generate_design('true', 1.2, design_experiment_3, cwd, 1, 1)
assert 'pool_control.tagAlign.gz' in single_control['control_tag_align'].unique()[0]
@pytest.mark.singleend @pytest.mark.singleend
def test_pool_and_psuedoreplicate_singleend(): def test_pool_and_psuedoreplicate_singleend():
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment