Merge branch 'master' into 75-rename_plotprofile

54b9865d · Venkat Malladi · 01e0f56e · df7ed9bf · 54b9865d · 54b9865d
Commit 54b9865d authored 4 years ago by Venkat Malladi
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -10,6 +10,7 @@ stages:
  - single
  - multiple
  - skip
+  - cleanup
 user_configuration:
  stage: unit
@@ -26,19 +27,19 @@ bash_tests:
 astrocyte:
  stage: astrocyte
  script:
-  - module load astrocyte/0.1.0
+  - module load astrocyte/0.2.0
  - module unload nextflow
  - cd ..
  - astrocyte_cli validate chipseq_analysis
-  artifacts:
+  after_script:
-    expire_in: 2 days
+    - rm -rf work/
 single_end_mouse:
  stage: single
  only:
    - master
  script:
-  - nextflow run workflow/main.nf --astrocyte true -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --astrocyte true
  - pytest -m singleend
 paired_end_human:
@@ -48,9 +49,19 @@ paired_end_human:
  except:
    - master
  script:
-  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false
  - pytest -m pairedend
+single_end_single_control:
+  stage: single
+  only:
+    - branches
+  except:
+    - master
+  script:
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_single_contol_SE.txt" --genome 'GRCh38' --pairedEnd false --astrocyte false
+  - pytest -m singlecontrol
 single_end_diff:
  stage: multiple
  only:
@@ -58,7 +69,7 @@ single_end_diff:
  except:
    - master
  script:
-  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false
  - pytest -m singleend
  - pytest -m singlediff
@@ -67,7 +78,7 @@ paired_end_diff:
    - master
  stage: multiple
  script:
-  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false
  - pytest -m pairedend
  - pytest -m paireddiff
@@ -76,5 +87,12 @@ single_end_skip:
  only:
    - master
  script:
-  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false
  - pytest -m singleskip_true
+cleanup_job:
+  stage: cleanup
+  script:
+    - cd $CI_BUILDS_DIR/$CI_RUNNER_SHORT_TOKEN/$CI_PROJECT_NAME
+    - rm -fr $CI_PIPELINE_ID/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,10 +14,11 @@ All notable changes to this project will be documented in this file.
 - Make gtf and geneName files as param inputs
 - Fix xcor to increase file size for --random-source
 - Fix skip diff test for paired-end data
+- Add test data for single control and single replicate
 - Fix python version for MultiQC report
 - Fix xcor to get lowest non zero value above 50
 - Fix references to display in Multiqc report
+- Update astrocyte testing to 0.2.0
 ## [publish_1.0.6 ] - 2019-05-31
 ### Added

--- a/README.md
+++ b/README.md
@@ -4,11 +4,13 @@
 # BICF ChIP-seq Pipeline
-[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
-[![Coverage Report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
+[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
+[![coverage report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
 [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.24.0-brightgreen.svg
 )](https://www.nextflow.io/)
-[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.1.0-blue.svg)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
+[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.2.0-blue)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2648845.svg)](https://doi.org/10.5281/zenodo.2648845)

--- a/test_data/A_1.bedpe.gz
+++ b/test_data/A_1.bedpe.gz
--- a/test_data/B_1.bedpe.gz
+++ b/test_data/B_1.bedpe.gz
--- a/test_data/design_single_contol_SE.txt
+++ b/test_data/design_single_contol_SE.txt
+sample_id	experiment_id	biosample	factor	treatment	replicate	control_id	fastq_read1
+ENCLB497XZB	ENCSR000DXB	Panc1	H3K4me3	None	1	ENCLB304SBJ	ENCFF001GBW.fastq.gz
+ENCLB304SBJ	ENCSR000DXC	Panc1	Control	None	1	ENCLB304SBJ	ENCFF001HWJ.fastq.gz
--- a/test_data/fetch_test_data.sh
+++ b/test_data/fetch_test_data.sh
@@ -25,3 +25,9 @@ wget https://www.encodeproject.org/files/ENCFF161HBP/@@download/ENCFF161HBP.fast
 wget https://www.encodeproject.org/files/ENCFF776KZU/@@download/ENCFF776KZU.fastq.gz
 wget https://www.encodeproject.org/files/ENCFF119KHM/@@download/ENCFF119KHM.fastq.gz
 echo "Done with Paired-end"
+echo "Downloading Single-end data set Human ENCSR000DXB and ENCSR000DXC"
+wget https://www.encodeproject.org/files/ENCFF001GBW/@@download/ENCFF001GBW.fastq.gz
+wget https://www.encodeproject.org/files/ENCFF001GBV/@@download/ENCFF001GBV.fastq.gz
+wget https://www.encodeproject.org/files/ENCFF001HWJ/@@download/ENCFF001HWJ.fastq.gz
+echo "Done with Single-end"
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -2,6 +2,7 @@ process {
  executor = 'slurm'
  queue = 'super'
  clusterOptions = '--hold'
+  beforeScript= 'ulimit -Ss unlimited'
  // Process specific configuration
  withName: checkDesignFile {

--- a/workflow/scripts/pool_and_psuedoreplicate.py
+++ b/workflow/scripts/pool_and_psuedoreplicate.py
@@ -204,6 +204,7 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
        pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
        pool_control = pool_control_tmp
    # Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
    experiment_id = design_df.at[0, 'experiment_id']
    replicate_files = design_df.tag_align.unique()
@@ -237,9 +238,9 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
    else:
        pool_experiment_se = pool_experiment
-        # Check controls against cutoff_ratio
+    # Check controls against cutoff_ratio
-        # if so replace with pool_control
+    # if so replace with pool_control
-        # unless single control was used
+    # unless single control was used
    if not single_control:
        path_to_pool_control = cwd + '/' + pool_control
        if control_df.values.max() > cutoff_ratio:
@@ -267,7 +268,10 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
                                                            path_to_control
    else:
-        path_to_pool_control = cwd + '/' +  pool_control
+        if paired:
+            path_to_pool_control = cwd + '/' + pool_control
+        else:
+            path_to_pool_control = pool_control
        design_new_df['control_tag_align'] = path_to_pool_control
    # Add in pseudo replicates
@@ -306,7 +310,7 @@ def main():
    design_df = pd.read_csv(design, sep='\t')
    # Get current directory to build paths
-    cwd = os.getcwd() 
+    cwd = os.getcwd()
    # Check Number of replicates and replicates
    no_reps = check_replicates(design_df)

--- a/workflow/tests/test_overlap_peaks.py
+++ b/workflow/tests/test_overlap_peaks.py
@@ -45,3 +45,9 @@ def test_overlap_peaks_pairedend():
    assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak'))
    peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak'
    assert utils.count_lines(peak_file) >= 25657
+@pytest.mark.singlecontrol
+def test_overlap_peaks_singlecontrol():
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR000DXB.rejected.narrowPeak'))
+    peak_file = test_output_path + 'ENCSR000DXB.replicated.narrowPeak'
+    assert utils.count_lines(peak_file) >= 35097
--- a/workflow/tests/test_pool_and_psuedoreplicate.py
+++ b/workflow/tests/test_pool_and_psuedoreplicate.py
@@ -33,9 +33,12 @@ def design_experiment_2(design_experiment):
 @pytest.fixture
 def design_experiment_3(design_experiment):
-    # Update second control to be same as first
+    # Drop Replicate A_2
-    design_experiment.loc[1, 'control_tag_align'] = 'B_1.bedse.gz'
+    design_df = design_experiment.drop(design_experiment.index[1])
-    return design_experiment
+    # Update to be paired as first
+    design_df.loc[0, 'control_tag_align'] = 'B_1.bedpe.gz'
+    design_df.loc[0, 'tag_align'] = 'A_1.bedpe.gz'
+    return design_df
 @pytest.mark.unit
@@ -71,6 +74,19 @@ def test_single_rep(design_experiment_2):
    shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
    single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
    assert single_rep.shape[0] == 4
+    assert len(single_rep['control_tag_align'].unique()) == 2
+    assert 'pool_control.tagAlign.gz' in single_rep['control_tag_align'].unique()[1]
+@pytest.mark.unit
+def test_single_control(design_experiment_3):
+    cwd = os.getcwd()
+    shutil.copy(test_design_path + 'A_1.bedpe.gz', cwd)
+    shutil.copy(test_design_path + 'B_1.bedpe.gz', cwd)
+    shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
+    single_control = pool_and_psuedoreplicate.generate_design('true', 1.2, design_experiment_3, cwd, 1, 1)
+    assert 'pool_control.tagAlign.gz' in single_control['control_tag_align'].unique()[0]
 @pytest.mark.singleend
 def test_pool_and_psuedoreplicate_singleend():