177d794f · a202eec2 · 39092668 · dce709b4 · 93a8086a · 38f8f83a
--- a/.gitignore
+++ b/.gitignore
@@ -108,5 +108,4 @@ report*.html*
 timeline*.html*
 /workflow/output/*
 /work/*
-/test_data/*
 /.nextflow/*
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -10,32 +10,37 @@ stages:
  - single
  - multiple
  - skip
+  - cleanup

 user_configuration:
  stage: unit
  script:
-  - pytest -m unit
  - pytest -m unit --cov=./workflow/scripts

+bash_tests:
+  stage: unit
+  script:
+    - module load singularity/3.0.2
+    - module load deeptools/2.5.0.1
+    - singularity run docker://bats/bats:v1.1.0 --tap workflow/tests/plot_profile.bats
+
 astrocyte:
  stage: astrocyte
  script:
-  - module load astrocyte/0.1.0
+  - module load astrocyte/0.3.1
  - module unload nextflow
  - cd ..
  - astrocyte_cli validate chipseq_analysis
-  artifacts:
-    expire_in: 2 days
+  after_script:
+    - rm -rf work/

 single_end_mouse:
  stage: single
  only:
    - master
  script:
-  - nextflow run workflow/main.nf --astrocyte true -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --astrocyte true --ci true --dev true
  - pytest -m singleend
-  artifacts:
-    expire_in: 2 days

 paired_end_human:
  stage: single
@@ -44,39 +49,55 @@ paired_end_human:
  except:
    - master
  script:
-  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false --ci true --dev true
  - pytest -m pairedend
-  artifacts:
-    expire_in: 2 days

-single_end_diff:
-  stage: multiple
+single_end_single_control:
+  stage: single
  only:
    - branches
  except:
    - master
  script:
-  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_single_contol_SE.txt" --genome 'GRCh38' --pairedEnd false --astrocyte false --ci true --dev true
+  - pytest -m singlecontrol
+
+single_end_diff:
+  stage: multiple
+  only:
+    - master
+  script:
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false --ci true --dev true
+  - pytest -m singleend
  - pytest -m singlediff
-  artifacts:
-    expire_in: 2 days

 paired_end_diff:
  only:
+    - branches
+  except:
    - master
  stage: multiple
  script:
-  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -with-dag flowchart.pdf --ci true --dev true
+  - pytest -m pairedend
  - pytest -m paireddiff
  artifacts:
-    expire_in: 2 days
+    name: "$CI_JOB_NAME"
+    when: always
+    paths:
+      - flowchart.pdf
+    expire_in: 7 days

 single_end_skip:
  stage: skip
  only:
    - master
  script:
-  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false -resume
+  - NXF_OPTS="-Dleveldb.mmap=false" nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true --skipPlotProfile true --astrocyte false --ci true --dev true
  - pytest -m singleskip_true
-  artifacts:
-    expire_in: 2 days
+
+cleanup_job:
+  stage: cleanup
+  script:
+    - cd $CI_BUILDS_DIR/$CI_RUNNER_SHORT_TOKEN/$CI_PROJECT_NAME
+    - rm -fr $CI_PIPELINE_ID/
--- a/.gitlab/merge_request_templates/merge_request.md
+++ b/.gitlab/merge_request_templates/merge_request.md
@@ -6,6 +6,7 @@ These are the most common things requested on pull requests (PRs).
 - [ ] This comment contains a description of changes (with reason)
 - [ ] If you've fixed a bug or added code that should be tested, add tests!
 - [ ] Documentation in `docs` is updated
+ - [ ] Replace dag.png with the most recent CI pipleine integrated_pe artifact
 - [ ] `CHANGELOG.md` is updated
 - [ ] `README.md` is updated
 - [ ] `LICENSE.md` is updated with new contributors
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,14 +2,35 @@

 All notable changes to this project will be documented in this file.

-## [Unreleased]
- Fix references.md link in citation of README.md
+## [publish_1.1.3 ] - 2020-08-16
+### Updated
+- Updated astrocyte to 0.3.1
+
+### Fixed
+- Fixed missing gene names in annotation
+
+## [publish_1.1.2 ] - 2020-06-22
+- Add pipeline tracking
+
+## [publish_1.1.1 ] - 2020-04-23
+### Added
 - Add Nextflow to references.md
- Fix pool_and_psuedoreplicate.py to run single experiment
- Add test data
+- Add test data for test_pool_and_pseudoreplicate
 - Add PlotProfile Option
 - Add Python version to MultiQC
 - Add and Update tests
+- Use GTF files instead of TxDb and org libraries in Annotate Peaks
+- Make gtf and geneName files as param inputs
+- Add test data for single control and single replicate
+
+### Fixed
+- Fix references.md link in citation of README.md
+- Fix pool_and_psuedoreplicate.py to run single experiment
+- Fix xcor to increase file size for --random-source
+- Fix skip diff test for paired-end data
+- Fix xcor to get lowest non zero value above 50
+- Fix references to display in Multiqc report
+- Update astrocyte testing to 0.2.0

 ## [publish_1.0.6 ] - 2019-05-31
 ### Added

--- a/README.md
+++ b/README.md
-# **CHIPseq Manual**
-## Version 1.0.6
-## May 31, 2019
+# **ChIP-seq Manual**
+## Version 1.1.2
+## June 21, 2020

 # BICF ChIP-seq Pipeline

-[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
-[![Coverage Report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)
-[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.24.0-brightgreen.svg
-)](https://www.nextflow.io/)
-[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.1.0-blue.svg)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2648845.svg)](https://doi.org/10.5281/zenodo.2648845)
+|*master*|*dev*|
+|:-:|:-:|
+|[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)|[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/dev/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/dev)|
+|[![coverage report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/master/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/master)|[![coverage report](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/badges/dev/coverage.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/commits/dev)|
+
+[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.31.0-brightgreen)](https://www.nextflow.io/)
+[![Astrocyte](https://img.shields.io/badge/astrocyte-%E2%89%A50.3.1-blue)](https://astrocyte-test.biohpc.swmed.edu/static/docs/index.html)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2648844.svg)](https://doi.org/10.5281/zenodo.2648844)


 ## Introduction
-BICF ChIPseq is a bioinformatics best-practice analysis pipeline used for ChIP-seq (chromatin immunoprecipitation sequencing) data analysis at [BICF](http://www.utsouthwestern.edu/labs/bioinformatics/) at [UT Southwestern Department of Bioinformatics](http://www.utsouthwestern.edu/departments/bioinformatics/).
+BICF ChIP-seq is a bioinformatics best-practice analysis pipeline used for ChIP-seq (chromatin immunoprecipitation sequencing) data analysis at [BICF](http://www.utsouthwestern.edu/labs/bioinformatics/) at [UT Southwestern Department of Bioinformatics](http://www.utsouthwestern.edu/departments/bioinformatics/).

 The pipeline uses [Nextflow](https://www.nextflow.io), a bioinformatics workflow tool. It pre-processes raw data from FastQ inputs, aligns the reads and performs extensive quality-control on the results.

@@ -58,7 +60,10 @@ $ git clone git@git.biohpc.swmed.edu:BICF/Astrocyte/chipseq_analysis.git
      - --designFile '/path/to/file/design.txt',
      - --genome 'GRCm38', 'GRCh38', or 'GRCh37' (if you need to use another genome contact the [BICF](mailto:BICF@UTSouthwestern.edu))
      - --pairedEnd 'true' or 'false' (where 'true' is PE and 'false' is SE; default 'false')
-      - --outDir (optional) path and folder name of the output data, example: /home2/s000000/Desktop/Chipseq_output (if not specficied will be under workflow/output/)
+      - --skipDiff 'true' or 'false' (where 'true' is skip differential peak and 'false' is do peak differential peak calling; default 'false')
+      - --skipMotif 'true' or 'false' (where 'true' is skip motif calling and 'false' is do motif calling; default 'false')
+      - --skipPlotProfile 'true' or 'false' (where 'true' is skip metageneplot for TSS and 'false' is do metageneplot for TSS; default 'false')
+      - --outDir (optional) path and folder name of the output data, example: /home2/s000000/Desktop/Chipseq_output (if not specified will be under workflow/output/)

 ## Pipeline
  + There are 11 steps to the pipeline
@@ -73,6 +78,7 @@ $ git clone git@git.biohpc.swmed.edu:BICF/Astrocyte/chipseq_analysis.git
    9. Annotate all peaks using ChipSeeker
    10. Calculate Differential Binding Activity with DiffBind (If more than 1 rep in more than 1 experiment)
    11. Use MEME-ChIP to find motifs in original peaks
+    12. Plot enrichment of signal around TSS

 See [FLOWCHART](docs/flowchart.pdf)


--- a/astrocyte_pkg.yml
+++ b/astrocyte_pkg.yml
@@ -9,7 +9,7 @@
 # A unique identifier for the workflow package, text/underscores only
 name: 'chipseq_analysis_bicf'
 # Who wrote this?
-author: 'Holly Ruess, Spencer D. Barnes, Beibei Chen and Venkat Malladi'
+author: 'Holly Ruess, Spencer D. Barnes, Jeremy A. Mathews, Beibei Chen and Venkat Malladi'
 # A contact email address for questions
 email: 'bicf@utsouthwestern.edu'
 # A more informative title for the workflow package
@@ -52,6 +52,7 @@ workflow_modules:
  - 'R/3.3.2-gccmkl'
  - 'meme/4.11.1-gcc-openmpi'
  - 'pandoc/2.7'
+  - 'singularity/3.0.2'


 # A list of parameters used by the workflow, defining how to present them,

--- a/docs/flowchart.pdf
+++ b/docs/flowchart.pdf
--- a/docs/index.md
+++ b/docs/index.md
@@ -20,6 +20,7 @@ Report issues to the Bioinformatic Core Facility [BICF](mailto:BICF@UTSouthweste
    9. Annotate all peaks using ChipSeeker
    10. Calculate Differential Binding Activity with DiffBind (If more than 1 rep in more than 1 experiment)
    11. Use MEME-ChIP to find motifs in original peaks
+    12. Plot enrichment of signal around TSS


 ## Workflow Parameters

--- a/docs/references.md
+++ b/docs/references.md
@@ -52,7 +52,7 @@
  * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)

 17. **BICF ChIP-seq Analysis Workflow**:
-  * Spencer D. Barnes, Holly Ruess, Jeremy A. Mathews, Beibei Chen, and Venkat S. Malladi. 2019. BICF ChIP-seq Analysis Workflow (publish_1.0.5). Zenodo. doi:[10.5281/zenodo.2648844](https://doi.org/10.5281/zenodo.2648844)
+  * Spencer D. Barnes, Holly Ruess, Jeremy A. Mathews, Beibei Chen, and Venkat S. Malladi. 2020. BICF ChIP-seq Analysis Workflow (publish_1.1.3). Zenodo. doi:[10.5281/zenodo.3986942](https://doi.org/10.5281/zenodo.3986942)

 18. **Nextflow**:
  * Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., and Notredame, C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology, 35(4), 316.

--- a/test_data/A_1.bedpe.gz
+++ b/test_data/A_1.bedpe.gz
--- a/test_data/B_1.bedpe.gz
+++ b/test_data/B_1.bedpe.gz
--- a/test_data/design_single_contol_SE.txt
+++ b/test_data/design_single_contol_SE.txt
+sample_id	experiment_id	biosample	factor	treatment	replicate	control_id	fastq_read1
+ENCLB497XZB	ENCSR000DXB	Panc1	H3K4me3	None	1	ENCLB304SBJ	ENCFF001GBW.fastq.gz
+ENCLB304SBJ	ENCSR000DXC	Panc1	Control	None	1	ENCLB304SBJ	ENCFF001HWJ.fastq.gz
--- a/test_data/fetch_test_data.sh
+++ b/test_data/fetch_test_data.sh
@@ -25,3 +25,9 @@ wget https://www.encodeproject.org/files/ENCFF161HBP/@@download/ENCFF161HBP.fast
 wget https://www.encodeproject.org/files/ENCFF776KZU/@@download/ENCFF776KZU.fastq.gz
 wget https://www.encodeproject.org/files/ENCFF119KHM/@@download/ENCFF119KHM.fastq.gz
 echo "Done with Paired-end"
+
+echo "Downloading Single-end data set Human ENCSR000DXB and ENCSR000DXC"
+wget https://www.encodeproject.org/files/ENCFF001GBW/@@download/ENCFF001GBW.fastq.gz
+wget https://www.encodeproject.org/files/ENCFF001GBV/@@download/ENCFF001GBV.fastq.gz
+wget https://www.encodeproject.org/files/ENCFF001HWJ/@@download/ENCFF001HWJ.fastq.gz
+echo "Done with Single-end"
--- a/test_data/test_cross.qc
+++ b/test_data/test_cross.qc
+Test.20.tagAlign.gz	18588987	0,20,33	0.211525291335199,0.211232019956852,0.211139666755398	35	0.2123067	1500	0.209429	1.01001	0.7284536       0
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
@@ -2,6 +2,7 @@ process {
  executor = 'slurm'
  queue = 'super'
  clusterOptions = '--hold'
+  beforeScript= 'ulimit -Ss unlimited'

  // Process specific configuration
  withName: checkDesignFile {
@@ -65,7 +66,7 @@ process {
    cpus = 32
  }
  withName: multiqcReport {
-    module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'multiqc/1.7']
+    module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'singularity/3.0.2']
    executor = 'local'
  }
 }
@@ -74,25 +75,28 @@ params {
  // Reference file paths on BioHPC
  genomes {
    'GRCh38' {
-      bwa = '/project/shared/bicf_workflow_ref/GRCh38'
+      bwa = '/project/shared/bicf_workflow_ref/human/GRCh38'
      genomesize = 'hs'
-      chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/genomefile.txt'
-      fasta = '/project/shared/bicf_workflow_ref/GRCh38/genome.fa'
-      gtf = '/project/shared/bicf_workflow_ref/GRCh38/gencode.gtf'
+      chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh38/genomefile.txt'
+      fasta = '/project/shared/bicf_workflow_ref/human/GRCh38/genome.fa'
+      gtf = '/project/shared/bicf_workflow_ref/human/GRCh38/gencode.v25.chr_patch_hapl_scaff.annotation.gtf'
+      geneNames = '/project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt'
    }
    'GRCh37' {
-      bwa = '/project/shared/bicf_workflow_ref/GRCh37'
+      bwa = '/project/shared/bicf_workflow_ref/human/GRCh37'
      genomesize = 'hs'
-      chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/genomefile.txt'
-      fasta = '/project/shared/bicf_workflow_ref/GRCh37/genome.fa'
-      gtf = '/project/shared/bicf_workflow_ref/GRCh37/gencode.gtf'
+      chromsizes = '/project/shared/bicf_workflow_ref/human/GRCh37/genomefile.txt'
+      fasta = '/project/shared/bicf_workflow_ref/human/GRCh37/genome.fa'
+      gtf = '/project/shared/bicf_workflow_ref/human/GRCh37/gencode.v19.chr_patch_hapl_scaff.annotation.gtf'
+      geneNames = '/project/shared/bicf_workflow_ref/human/GRCh37/genenames.txt'
    }
    'GRCm38' {
-      bwa = '/project/shared/bicf_workflow_ref/GRCm38'
+      bwa = '/project/shared/bicf_workflow_ref/mouse/GRCm38'
      genomesize = 'mm'
-      chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/genomefile.txt'
-      fasta = '/project/shared/bicf_workflow_ref/GRCm38/genome.fa'
-      gtf = '/project/shared/bicf_workflow_ref/GRCm38/gencode.gtf'
+      chromsizes = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genomefile.txt'
+      fasta = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genome.fa'
+      gtf = '/project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf'
+      geneNames = '/project/shared/bicf_workflow_ref/mouse/GRCm38/genenames.txt'
    }
  }
 }

--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -28,26 +28,43 @@ params.skipMotif = false
 params.skipPlotProfile = false
 params.references = "$baseDir/../docs/references.md"
 params.multiqc =  "$baseDir/conf/multiqc_config.yaml"
+params.ci = false
+params.dev = false
+

 // Assign variables if astrocyte
 if (params.astrocyte) {
  print("Running under astrocyte")
  referenceLocation = "/project/shared/bicf_workflow_ref"
-  params.bwaIndex = "$referenceLocation/$params.genome"
-  params.chromSizes = "$referenceLocation/$params.genome/genomefile.txt"
-  params.fasta = "$referenceLocation/$params.genome/genome.fa"
-  params.gtf = "$referenceLocation/$params.genome/gencode.gtf"
-  if (params.genome == 'GRCh37' || params.genome == 'GRCh38') {
+  if (params.genome == 'GRCh37') {
+    params.bwaIndex = "$referenceLocation/human/$params.genome"
+    params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt"
+    params.fasta = "$referenceLocation/human/$params.genome/genome.fa"
+    params.gtf = "$referenceLocation/human/$params.genome/gencode.v19.chr_patch_hapl_scaff.annotation.gtf"
+    params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt"
    params.genomeSize = 'hs'
  } else if (params.genome == 'GRCm38') {
+    params.bwaIndex = "$referenceLocation/mouse/$params.genome"
+    params.chromSizes = "$referenceLocation/mouse/$params.genome/genomefile.txt"
+    params.fasta = "$referenceLocation/mouse/$params.genome/genome.fa"
+    params.gtf = "$referenceLocation/mouse/$params.genome/gencode.vM20.annotation.gtf"
+    params.geneNames = "$referenceLocation/mouse/$params.genome/genenames.txt"
    params.genomeSize = 'mm'
+  } else if (params.genome == 'GRCh38') {
+    params.bwaIndex = "$referenceLocation/human/$params.genome"
+    params.chromSizes = "$referenceLocation/human/$params.genome/genomefile.txt"
+    params.fasta = "$referenceLocation/human/$params.genome/genome.fa"
+    params.gtf = "$referenceLocation/human/$params.genome/gencode.v25.chr_patch_hapl_scaff.annotation.gtf"
+    params.geneNames = "$referenceLocation/human/$params.genome/genenames.txt"
+    params.genomeSize = 'hs'
  }
 } else {
    params.bwaIndex = params.genome ? params.genomes[ params.genome ].bwa ?: false : false
    params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?: false : false
    params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
    params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
-    params.gtf = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
+    params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false
+    params.geneNames = params.genome ? params.genomes[ params.genome ].geneNames ?: false : false
 }


@@ -84,7 +101,33 @@ skipMotif = params.skipMotif
 skipPlotProfile = params.skipPlotProfile
 references = params.references
 multiqc = params.multiqc
-gtfFile = Channel.fromPath(params.gtf)
+gtfFile = params.gtf
+geneNames = params.geneNames
+
+/*
+ * trackStart: track start of pipeline
+ */
+
+process trackStart {
+  script:
+  """
+  hostname
+  ulimit -a
+
+  curl -H 'Content-Type: application/json' -X PUT -d '{ \
+      "sessionId": "${workflow.sessionId}", \
+      "pipeline": "chipseq_analysis", \
+      "start": "${workflow.start}", \
+      "astrocyte": ${params.astrocyte}, \
+      "status": "started", \
+      "nextflowVersion": "${workflow.nextflow.version}", \
+      "pipelineVersion": "1.1.2", \
+      "ci": ${params.ci}, \
+      "dev": ${params.dev}}' \
+  "https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking"
+  """
+}
+

 // Check design file for errors
 process checkDesignFile {
@@ -468,8 +511,7 @@ process plotProfile {

  input:

-  file ("*.pooled.fc_signal.bw") from bigwigs.collect()
-  file gtf from gtfFile
+  file bigWigList from bigwigs.collect()

  output:

@@ -482,7 +524,7 @@ process plotProfile {
  script:
  """
  module load deeptools/2.5.0.1
-  bash $baseDir/scripts/plotProfile.sh
+  bash $baseDir/scripts/plot_profile.sh -g $gtfFile
  """
 }

@@ -534,7 +576,7 @@ process peakAnnotation {

  """
  module load R/3.3.2-gccmkl
-  Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $genome
+  Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $gtfFile $geneNames
  """

 }
@@ -637,12 +679,12 @@ process multiqcReport {
  """
  module load python/3.6.1-2-anaconda
  module load pandoc/2.7
-  module load multiqc/1.7
+  module load singularity/3.0.2
  echo $workflow.nextflow.version > version_nextflow.txt
-  multiqc --version > version_multiqc.txt
+  singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc --version > version_multiqc.txt
  python --version &> version_python.txt
  python3 $baseDir/scripts/generate_references.py -r $references -o software_references
  python3 $baseDir/scripts/generate_versions.py -o software_versions
-  multiqc -c $multiqc .
+  singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc -c $multiqc .
  """
 }
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
@@ -4,11 +4,28 @@ profiles {
  }
 }

+trace {
+  enabled = true
+  file = 'pipeline_trace.txt'
+  fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
+}
+
+timeline {
+  enabled = true
+  file = 'timeline.html'
+}
+
+report {
+  enabled = true
+  file = 'report.html'
+}
+
+
 manifest {
  name = 'chipseq_analysis'
  description = 'BICF ChIP-seq Analysis Workflow.'
  homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis'
-  version = '1.0.6'
+  version = '1.1.2'
  mainScript = 'main.nf'
  nextflowVersion = '>=0.31.0'
 }
--- a/workflow/scripts/annotate_peaks.R
+++ b/workflow/scripts/annotate_peaks.R
@@ -6,40 +6,27 @@
 #* --------------------------------------------------------------------------
 #*

+#Currently Human or Mouse
+
 # Load libraries
 library("ChIPseeker")
-
-# Currently mouse or human
-
-library("TxDb.Hsapiens.UCSC.hg19.knownGene")
-library("TxDb.Mmusculus.UCSC.mm10.knownGene")
-library("TxDb.Hsapiens.UCSC.hg38.knownGene")
-library("org.Hs.eg.db")
-library("org.Mm.eg.db")
-
+library(GenomicFeatures)

 # Create parser object
 args <- commandArgs(trailingOnly=TRUE)

 # Check input args
-if (length(args) != 2) {
-  stop("Usage: annotate_peaks.R annotate_design.tsv genome_assembly", call.=FALSE)
+if (length(args) != 3) {
+  stop("Usage: annotate_peaks.R annotate_design.tsv gtf geneNames", call.=FALSE)
 }

 design_file <- args[1]
-genome_assembly <- args[2]
+gtf <- args[2]
+geneNames <- args[3]

 # Load UCSC Known Genes
-if(genome_assembly=='GRCh37') {
-    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
-    annodb <- 'org.Hs.eg.db'
-} else if(genome_assembly=='GRCm38')  {
-    txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
-    annodb <- 'org.Mm.eg.db'
-} else if(genome_assembly=='GRCh38')  {
-    txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
-    annodb <- 'org.Hs.eg.db'
-}
+txdb <- makeTxDbFromGFF(gtf)
+sym <- read.table(geneNames, header=T, sep='\t') [,4:5]

 # Output version of ChIPseeker
 chipseeker_version = packageVersion('ChIPseeker')
@@ -54,18 +41,19 @@ names(files) <- design$Condition
 # Granges of files

 peaks <- lapply(files, readPeakFile, as = "GRanges", header = FALSE)
-peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, annoDb=annodb, tssRegion=c(-3000, 3000), verbose=FALSE)
+peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)

-column_names <- c("chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
+column_names <- c("geneId","chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
                  "pValue", "qValue", "peak", "annotation", "geneChr", "geneStart", "geneEnd",
-                  "geneLength" ,"geneStrand", "geneId", "transcriptId", "distanceToTSS",
-                  "ENSEMBL", "symbol", "geneName")
+                  "geneLength" ,"geneStrand", "transcriptId", "distanceToTSS", "symbol")

 for(index in c(1:length(peakAnnoList))) {
  filename <- paste(names(peaks)[index], ".chipseeker_annotation.tsv", sep="")
  df <- as.data.frame(peakAnnoList[[index]])
-  colnames(df) <- column_names
-  write.table(df[ , !(names(df) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)
+  df$geneId <- sapply(strsplit(as.character(df$geneId), split = "\\."), "[[", 1)
+  df_final <- merge(df, sym, by.x="geneId", by.y="ensembl", all.x=T)
+  colnames(df_final) <- column_names
+  write.table(df_final[ , !(names(df_final) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)

  # Draw individual plots


--- a/workflow/scripts/call_peaks_macs.py
+++ b/workflow/scripts/call_peaks_macs.py
@@ -138,8 +138,20 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes)
    with open(xcor, 'r') as xcor_fh:
        firstline = xcor_fh.readline()
        frag_lengths = firstline.split()[2]  # third column
-        fragment_length = frag_lengths.split(',')[0]  # grab first value
-        logger.info("Fraglen %s", fragment_length)
+        frag_lengths_array = frag_lengths.split(',')
+        fragment_length = 0
+        fragment = False
+        # Loop through all values of fragment length
+        for f in frag_lengths.split(','):
+            fragment_length = f
+            logger.info("Fraglen %s", fragment_length)
+            if int(fragment_length) > 50:
+                fragment = True
+                break
+
+        if fragment == False:
+            logger.info('Error in cross-correlation analysis: %s', frag_lengths_array)
+            raise Exception("Error in cross-correlation analysis: %s" % frag_lengths_array)

    # Generate narrow peaks and preliminary signal tracks


--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -46,7 +46,7 @@ SOFTWARE_REGEX = {
    'MEME-ChIP': ['motifSearch_vf/version_memechip.txt', r"Version (\S+)"],
    'DiffBind': ['diffPeaks_vf/version_DiffBind.txt', r"Version (\S+)\""],
    'deepTools': ['experimentQC_vf/version_deeptools.txt', r"deeptools (\S+)"],
-    'Python': ['version_python.txt', r"python, version (\S+)"],
+    'Python': ['version_python.txt', r"Python (\S+)"],
    'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"],
 }
No results found