diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3e6393ed8d83c8b402f9b37d1713fe2244c49433..83ef6e5f6e80f646f93eff71bf9016c85e1f96fe 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -57,6 +57,7 @@ single_end_diff: - master script: - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --astrocyte false -resume + - pytest -m singleend - pytest -m singlediff artifacts: expire_in: 2 days @@ -67,6 +68,7 @@ paired_end_diff: stage: multiple script: - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true --astrocyte false -resume + - pytest -m pairedend - pytest -m paireddiff artifacts: expire_in: 2 days diff --git a/CHANGELOG.md b/CHANGELOG.md index bc28c53bfc58c5288b3b912d06152a1657e3db0a..90cc2d75a828b8e52e7753b986e0efa88c230366 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file. - Add and Update tests - Use GTF files instead of TxDb and org libraries in Annotate Peaks - Make gtf and geneName files as param inputs +- Fix xcor to increase file size for --random-source ## [publish_1.0.6 ] - 2019-05-31 ### Added diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index b2135584edf920a7fc432589ec0ae99a76d573e0..037d1e43e37c7daeb21489a00dff85701c8f8fd0 100644 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -65,7 +65,7 @@ process { cpus = 32 } withName: multiqcReport { - module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'multiqc/1.7'] + module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'singularity/3.0.2'] executor = 'local' } } diff --git a/workflow/main.nf b/workflow/main.nf index f654471c74632c7efdd23836df63c9aba78c2099..bf87941930e7a3737c8610703d224887c113a4d1 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -653,14 +653,11 @@ process multiqcReport { script: """ - module load python/3.6.1-2-anaconda - module load pandoc/2.7 - module load multiqc/1.7 echo $workflow.nextflow.version > version_nextflow.txt - multiqc --version > version_multiqc.txt + singularity exec /project/shared/bicf_workflow_ref/singularity_images/multiqc.sif multiqc --version > version_multiqc.txt python --version &> version_python.txt python3 $baseDir/scripts/generate_references.py -r $references -o software_references python3 $baseDir/scripts/generate_versions.py -o software_versions - multiqc -c $multiqc . + singularity exec /project/shared/bicf_workflow_ref/singularity_images/multiqc.sif multiqc -c $multiqc . """ } diff --git a/workflow/scripts/plotProfile.sh b/workflow/scripts/plotProfile.sh index 7f62dc5bdd9e13026315222b076b8b939f839d00..0f50501c410d2ab0a7549190ba830aa0a90d7602 100644 --- a/workflow/scripts/plotProfile.sh +++ b/workflow/scripts/plotProfile.sh @@ -10,6 +10,7 @@ computeMatrix reference-point \ -R $gtf \ --skipZeros \ -o computeMatrix.gz + -p max/2 plotProfile -m computeMatrix.gz \ -out plotProfile.png \ diff --git a/workflow/scripts/xcor.py b/workflow/scripts/xcor.py index 3bf8dbfa1e7da470ec0eea413970a72b01a968c1..66fa9aec4d7030288b3bd57d7e156247d82c97ec 100644 --- a/workflow/scripts/xcor.py +++ b/workflow/scripts/xcor.py @@ -103,14 +103,20 @@ def xcor(tag, paired): uncompressed_tag_filename = tag_basename # Subsample tagAlign file - number_reads = 15000000 + number_reads = 20000000 subsampled_tag_filename = \ tag_basename + ".%d.tagAlign.gz" % (number_reads/1000000) + tag_extended = 'cat.tagAlign.gz' + out, err = utils.run_pipe([ + "zcat %s %s %s" % + (tag, tag, tag) + ], outfile=tag_extended) + steps = [ 'zcat %s' % (tag), 'grep -v "chrM"', - 'shuf -n %d --random-source=%s' % (number_reads, tag)] + 'shuf -n %d --random-source=%s' % (number_reads, tag_extended)] if paired: steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""]) diff --git a/workflow/tests/test_annotate_peaks.py b/workflow/tests/test_annotate_peaks.py index 22e1d78ef149890c1b518225f6e01b2e259aabdb..73656357dc176169246c8196f20c60555e87dfac 100644 --- a/workflow/tests/test_annotate_peaks.py +++ b/workflow/tests/test_annotate_peaks.py @@ -41,4 +41,4 @@ def test_upsetplot_pairedend(): def test_annotation_pairedend(): annotation_file = test_output_path + 'ENCSR729LGA.chipseeker_annotation.tsv' assert os.path.exists(annotation_file) - assert utils.count_lines(annotation_file) >= 25466 + assert utils.count_lines(annotation_file) >= 25367 diff --git a/workflow/tests/test_call_peaks_macs.py b/workflow/tests/test_call_peaks_macs.py index 28881bd6e61a084ffa51a83647db9cd76ace6854..cd94e1783a25ffa953b3666f7cb3172c33f4f0bd 100644 --- a/workflow/tests/test_call_peaks_macs.py +++ b/workflow/tests/test_call_peaks_macs.py @@ -26,7 +26,7 @@ def test_peaks_xls_singleend(): @pytest.mark.singleend def test_peaks_bed_singleend(): peak_file = test_output_path + 'ENCSR238SGC/1/' + 'ENCLB144FDT.narrowPeak' - assert utils.count_lines(peak_file) == 227389 + assert utils.count_lines(peak_file) == 226738 @pytest.mark.pairedend @@ -47,4 +47,4 @@ def test_peaks_xls_pairedend(): @pytest.mark.pairedend def test_peaks_bed_pairedend(): peak_file = test_output_path + 'ENCSR729LGA/2/' + 'ENCLB568IYX.narrowPeak' - assert utils.count_lines(peak_file) == 113821 + assert utils.count_lines(peak_file) == 112631 diff --git a/workflow/tests/test_diff_peaks.py b/workflow/tests/test_diff_peaks.py index ed22fd83d5db04fac6c3c4924cafd81455a55c5a..93655d177a322b2d44056df79485909413444317 100644 --- a/workflow/tests/test_diff_peaks.py +++ b/workflow/tests/test_diff_peaks.py @@ -71,4 +71,4 @@ def test_diffbind_pairedend_single_rep(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.bed')) diffbind_file = test_output_path + 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.csv' assert os.path.exists(diffbind_file) - assert utils.count_lines(diffbind_file) >= 66201 + assert utils.count_lines(diffbind_file) >= 65182 diff --git a/workflow/tests/test_overlap_peaks.py b/workflow/tests/test_overlap_peaks.py index 67435d9968bcf4c681c43685cb85a35475cb8b1e..c450551ab5ac582977163c5d59220f5e807615c6 100644 --- a/workflow/tests/test_overlap_peaks.py +++ b/workflow/tests/test_overlap_peaks.py @@ -44,4 +44,4 @@ def test_overlap_peaks_singleend(): def test_overlap_peaks_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak')) peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak' - assert utils.count_lines(peak_file) >= 25758 + assert utils.count_lines(peak_file) >= 25657 diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py index fd475943f3f60317c4e0d5dd1ef067cc23fd842d..19777a0be7f045cfb91ca3f1d96fdba7bd39322c 100644 --- a/workflow/tests/test_xcor.py +++ b/workflow/tests/test_xcor.py @@ -17,9 +17,9 @@ def test_cross_plot_singleend(): def test_cross_qc_singleend(): qc_file = os.path.join(test_output_path,"ENCLB144FDT/ENCLB144FDT.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '190,200,210' - assert df_xcor[8].iloc[0] == 1.025906 - assert round(df_xcor[9].iloc[0], 6) == 1.139671 + assert df_xcor[2].iloc[0] == '185,195,205' + assert df_xcor[8].iloc[0] == 1.02454 + assert df_xcor[9].iloc[0] == 0.8098014 @pytest.mark.pairedend @@ -31,6 +31,6 @@ def test_cross_qc_pairedend(): def test_cross_plot_pairedend(): qc_file = os.path.join(test_output_path,"ENCLB568IYX/ENCLB568IYX.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '220,430,475' - assert round(df_xcor[8].iloc[0],6) == 1.060018 - assert df_xcor[9].iloc[0] == 4.099357 + assert df_xcor[2].iloc[0] == '215,225,455' + assert round(df_xcor[8].iloc[0],6) == 1.056201 + assert df_xcor[9].iloc[0] == 3.599357