diff --git a/workflow/main.nf b/workflow/main.nf index ffcffde1c1d5a5932ee8bf13db3c60d206771083..6051ff7cf3d7409db201634a14814a220bab2cf2 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -389,6 +389,7 @@ peaksDesign = experimentPeaks process consensusPeaks { publishDir "$outDir/${task.process}", mode: 'copy' + publishDir "$outDir/design", mode: 'copy', pattern: '*.{csv|tsv}' input: diff --git a/workflow/scripts/call_peaks_macs.py b/workflow/scripts/call_peaks_macs.py index 69a5968efc0cd34d3429b0a0f2677b023c676f50..17b1414b6236ee5cfda0d1c0694a988ecea237c9 100644 --- a/workflow/scripts/call_peaks_macs.py +++ b/workflow/scripts/call_peaks_macs.py @@ -126,7 +126,7 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes) # Remove coordinates outside chromosome sizes int_narrowpeak_fn = '%s_peaks.narrowPeak' % (prefix) - narrowpeak_fn = '%s_peaks.narrowPeak' % (prefix) + narrowpeak_fn = '%s.narrowPeak' % (prefix) clipped_narrowpeak_fn = 'clipped-%s' % (narrowpeak_fn) @@ -239,6 +239,7 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes) # Remove temporary files os.remove(clipped_narrowpeak_fn) os.remove(rescaled_narrowpeak_fn) + os.remove(int_narrowpeak_fn) def main(): diff --git a/workflow/scripts/map_reads.py b/workflow/scripts/map_reads.py index 3c20ac81ddf051f11d5b7e4e37e15629991ffa31..a1f8161cb4ee71885719d954e9ddf25428499d11 100644 --- a/workflow/scripts/map_reads.py +++ b/workflow/scripts/map_reads.py @@ -104,7 +104,7 @@ def generate_sa(fastq, reference): def align_se(fastq, sai, reference, fastq_basename): '''Use BWA to align SE data.''' - bam_filename = '%s.srt.bam' % (fastq_basename) + bam_filename = '%s.bam' % (fastq_basename) steps = [ "bwa samse %s %s %s" @@ -125,7 +125,7 @@ def align_pe(fastq, sai, reference, fastq_basename): sam_filename = "%s.sam" % (fastq_basename) badcigar_filename = "%s.badReads" % (fastq_basename) - bam_filename = '%s.srt.bam' % (fastq_basename) + bam_filename = '%s.bam' % (fastq_basename) # Remove read pairs with bad CIGAR strings and sort by position steps = [ diff --git a/workflow/tests/test_annotate_peaks.py b/workflow/tests/test_annotate_peaks.py index 692ddced7762a9a9742c6bf51e652673b62bdd1f..ef42a9adc332c505715ad70be3b343de901898a8 100644 --- a/workflow/tests/test_annotate_peaks.py +++ b/workflow/tests/test_annotate_peaks.py @@ -11,18 +11,33 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend -def test_annotate_peaks_singleend(): +def test_pie_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_pie.pdf')) + + +@pytest.mark.singleend +def test_upsetplot_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_upsetplot.pdf')) + +@pytest.mark.singleend +def test_annotation_singleend(): annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.tsv' assert os.path.exists(annotation_file) assert utils.count_lines(annotation_file) == 152840 @pytest.mark.pairedend -def test_annotate_peaks_pairedend(): +def test_pie_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.chipseeker_pie.pdf')) + + +@pytest.mark.pairedend +def test_upsetplot_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.chipseeker_upsetplot.pdf')) + + +@pytest.mark.pairedend +def test_annotation_pairedend(): annotation_file = test_output_path + 'ENCSR729LGA.chipseeker_annotation.tsv' assert os.path.exists(annotation_file) assert utils.count_lines(annotation_file) == 25391 diff --git a/workflow/tests/test_call_peaks_macs.py b/workflow/tests/test_call_peaks_macs.py index dabfd7167f1e295735066db378028d17124e2654..91c1d0e43460ed663d1c59bf6caa8ad800e8fd61 100644 --- a/workflow/tests/test_call_peaks_macs.py +++ b/workflow/tests/test_call_peaks_macs.py @@ -9,18 +9,42 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend -def test_call_peaks_macs_singleend(): +def test_fc_signal_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.fc_signal.bw')) + + +@pytest.mark.singleend +def test_pvalue_signal_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.pvalue_signal.bw')) - assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.xls')) + + +@pytest.mark.singleend +def test_peaks_xls_singleend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT_peaks.xls')) + + +@pytest.mark.singleend +def test_peaks_bed_singleend(): peak_file = test_output_path + 'ENCLB144FDT.narrowPeak' assert utils.count_lines(peak_file) == 227389 @pytest.mark.pairedend -def test_call_peaks_macs_pairedend(): +def test_fc_signal_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.fc_signal.bw')) + + +@pytest.mark.pairedend +def test_pvalue_signal_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.pvalue_signal.bw')) - assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.xls')) + + +@pytest.mark.pairedend +def test_peaks_xls_pairedend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX_peak.xls')) + + +@pytest.mark.pairedend +def test_peaks_bed_pairedend(): peak_file = test_output_path + 'ENCLB568IYX.narrowPeak' - assert utils.count_lines(peak_file) == 112652 + assert utils.count_lines(peak_file) == 113821 diff --git a/workflow/tests/test_convert_reads.py b/workflow/tests/test_convert_reads.py index 753b54b19a1bcbd6fd678a9592ae3be49f637ebf..8dda74678d42c0049eb74caa205ad8b662772bb5 100644 --- a/workflow/tests/test_convert_reads.py +++ b/workflow/tests/test_convert_reads.py @@ -8,12 +8,20 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend -def test_convert_reads_singleend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.tagAlign.gz')) - assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.bedse.gz')) +def test_tag_reads_singleend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.tagAlign.gz')) + + +@pytest.mark.singleend +def test_bed_reads_singleend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.bedse.gz')) @pytest.mark.pairedend -def test_map_qc_pairedend(): +def test_tag_reads_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.tagAlign.gz')) + + +@pytest.mark.pairedend +def test_bed_reads_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.bedpe.gz')) diff --git a/workflow/tests/test_diff_peaks.py b/workflow/tests/test_diff_peaks.py index 59b1e3a27d774a1b1a2ef648369c2c3385cff97f..84c5179f021abd676a818ac24e698b54bd7b648d 100644 --- a/workflow/tests/test_diff_peaks.py +++ b/workflow/tests/test_diff_peaks.py @@ -15,14 +15,26 @@ def test_diff_peaks_singleend_single_rep(): assert os.path.isdir(test_output_path) == False @pytest.mark.pairedend -def test_annotate_peaks_pairedend_single_rep(): +def test_diff_peaks_pairedend_single_rep(): assert os.path.isdir(test_output_path) == False @pytest.mark.singlediff -def test_diff_peaks_singleend_multiple_rep(): +def test_heatmap_singleend_multiple_rep(): assert os.path.exists(os.path.join(test_output_path, 'heatmap.pdf')) + + +@pytest.mark.singlediff +def test_pca_singleend_multiple_rep(): assert os.path.exists(os.path.join(test_output_path, 'pca.pdf')) + + +@pytest.mark.singlediff +def test_normcount_singleend_multiple_rep(): assert os.path.exists(os.path.join(test_output_path, 'normcount_peaksets.txt')) + + +@pytest.mark.singlediff +def test_diffbind_singleend_multiple_rep(): if os.path.isfile(os.path.join(test_output_path, 'ENCSR272GNQ_vs_ENCSR238SGC_diffbind.bed')): assert os.path.exists(os.path.join(test_output_path, 'ENCSR272GNQ_vs_ENCSR238SGC_diffbind.bed')) diffbind_file = test_output_path + 'ENCSR272GNQ_vs_ENCSR238SGC_diffbind.csv' @@ -32,11 +44,24 @@ def test_diff_peaks_singleend_multiple_rep(): assert os.path.exists(diffbind_file) assert utils.count_lines(diffbind_file) == 201039 + @pytest.mark.paireddiff -def test_annotate_peaks_pairedend_single_rep(): +def test_heatmap_pairedend_single_rep(): assert os.path.exists(os.path.join(test_output_path, 'heatmap.pdf')) + + +@pytest.mark.paireddiff +def test_pca_pairedend_single_rep(): assert os.path.exists(os.path.join(test_output_path, 'pca.pdf')) + + +@pytest.mark.paireddiff +def test_normcount_pairedend_single_rep(): assert os.path.exists(os.path.join(test_output_path, 'normcount_peaksets.txt')) + + +@pytest.mark.paireddiff +def test_diffbind_pairedend_single_rep(): if os.path.isfile(os.path.join(test_output_path, 'ENCSR757EMK_vs_ENCSR729LGA_diffbind.bed')): assert os.path.exists(os.path.join(test_output_path, 'ENCSR757EMK_vs_ENCSR729LGA_diffbind.bed')) diffbind_file = test_output_path + 'ENCSR757EMK_vs_ENCSR729LGA_diffbind.csv' diff --git a/workflow/tests/test_experiment_qc.py b/workflow/tests/test_experiment_qc.py index 5256da5fbebc424ea29a1977d9257bfc95060ae3..98853b9d6c2eabc4e4b9887cfb51c07a6af08fa4 100644 --- a/workflow/tests/test_experiment_qc.py +++ b/workflow/tests/test_experiment_qc.py @@ -31,17 +31,34 @@ def test_check_update_controls(design_bam): @pytest.mark.singleend -def test_experiment_qc_singleend(): +def test_coverage_singleend(): assert os.path.exists(os.path.join(test_output_path, 'sample_mbs.npz')) - assert os.path.exists(os.path.join(test_output_path, 'heatmap_SpearmanCorr.png')) assert os.path.exists(os.path.join(test_output_path, 'coverage.png')) + + +@pytest.mark.singleend +def test_spearman_singleend(): + assert os.path.exists(os.path.join(test_output_path, 'heatmap_SpearmanCorr.png')) + + +@pytest.mark.singleend +def test_fingerprint_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT_fingerprint.png')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI_fingerprint.png')) + @pytest.mark.pairdend -def test_experiment_qc_pairedend(): +def test_coverage_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'sample_mbs.npz')) - assert os.path.exists(os.path.join(test_output_path, 'heatmap_SpearmanCorr.png')) assert os.path.exists(os.path.join(test_output_path, 'coverage.png')) + + +@pytest.mark.pairdend +def test_spearman_pairedend(): + assert os.path.exists(os.path.join(test_output_path, 'heatmap_SpearmanCorr.png')) + + +@pytest.mark.pairdend +def test_fingerprint_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX_fingerprint.png')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB637LZP_fingerprint.png')) diff --git a/workflow/tests/test_map_qc.py b/workflow/tests/test_map_qc.py index 5ae8218f33fb6a339aebffcb97598ff05793cf01..7bff0601d679dd5426e03c08c1670acbecf0ad22 100644 --- a/workflow/tests/test_map_qc.py +++ b/workflow/tests/test_map_qc.py @@ -9,14 +9,22 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend -def test_map_qc_singleend(): +def test_dedup_files_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam.bai')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.qc')) + + +@pytest.mark.singleend +def test_map_qc_singleend(): filtered_reads_report = test_output_path + 'ENCLB831RUI.dedup.flagstat.qc' samtools_report = open(filtered_reads_report).readlines() assert '64962570 + 0 in total' in samtools_report[0] assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4] + + +@pytest.mark.singleend +def test_library_complexity_singleend(): library_complexity = test_output_path + 'ENCLB831RUI.pbc.qc' df_library_complexity = pd.read_csv(library_complexity, sep='\t') assert df_library_complexity["NRF"].iloc[0] == 0.926192 @@ -25,16 +33,24 @@ def test_map_qc_singleend(): @pytest.mark.pairedend -def test_map_qc_pairedend(): +def test_dedup_files_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam.bai')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.qc')) + + +@pytest.mark.pairedend +def test_map_qc_pairedend(): filtered_reads_report = test_output_path + 'ENCLB568IYX.dedup.flagstat.qc' samtools_report = open(filtered_reads_report).readlines() - assert '47389080 + 0 in total' in samtools_report[0] - assert '47389080 + 0 mapped (100.00%:N/A)' in samtools_report[4] + assert '47388510 + 0 in total' in samtools_report[0] + assert '47388510 + 0 mapped (100.00%:N/A)' in samtools_report[4] + + +@pytest.mark.pairedend +def test_library_complexity_pairedend(): library_complexity = test_output_path + 'ENCLB568IYX.pbc.qc' df_library_complexity = pd.read_csv(library_complexity, sep='\t') assert df_library_complexity["NRF"].iloc[0] == 0.947064 - assert df_library_complexity["PBC1"].iloc[0] == 0.946724 + assert round(df_library_complexity["PBC1"].iloc[0],6) == 0.946723 assert df_library_complexity["PBC2"].iloc[0] == 18.643039 diff --git a/workflow/tests/test_motif_search.py b/workflow/tests/test_motif_search.py index 09b4ca949df62603f68807bc016b076f9baff68f..8c1265211b87da7d006b9020087ce04994889fd0 100644 --- a/workflow/tests/test_motif_search.py +++ b/workflow/tests/test_motif_search.py @@ -11,17 +11,26 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend -def test_motif_search_singleend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC_memechip', 'ENCSR238SGC.fa')) - assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC_memechip', 'index.html')) +def test_limited_peaks_singleend(): peak_file_ENCSR238SGC = test_output_path + 'ENCSR238SGC.600.narrowPeak' assert os.path.exists(peak_file_ENCSR238SGC) assert utils.count_lines(peak_file_ENCSR238SGC) == 600 + +@pytest.mark.singleend +def test_motif_search_singleend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC_memechip', 'ENCSR238SGC.fa')) + assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC_memechip', 'index.html')) + + @pytest.mark.pairedend -def test_motif_search_pairedend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_memechip', 'ENCSR729LGA.fa')) - assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_memechip', 'index.html')) +def test_limited_peaks_pairedend(): peak_file_ENCSR729LGA= test_output_path + 'ENCSR729LGA.600.narrowPeak' assert os.path.exists(peak_file_ENCSR729LGA) assert utils.count_lines(peak_file_ENCSR729LGA) == 600 + + +@pytest.mark.pairedend +def test_motif_search_pairedend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_memechip', 'ENCSR729LGA.fa')) + assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_memechip', 'index.html')) diff --git a/workflow/tests/test_overlap_peaks.py b/workflow/tests/test_overlap_peaks.py index 99d43b87939617c801bad0389f14e2683cde0f82..9289d165ee8a7cf3bb613616a165e852ad80d462 100644 --- a/workflow/tests/test_overlap_peaks.py +++ b/workflow/tests/test_overlap_peaks.py @@ -44,4 +44,4 @@ def test_overlap_peaks_singleend(): def test_overlap_peaks_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak')) peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak' - assert utils.count_lines(peak_file) == 25655 + assert utils.count_lines(peak_file) == 26281 diff --git a/workflow/tests/test_trim_reads.py b/workflow/tests/test_trim_reads.py index aeb3eb3bbe2be77479b88fb82391efab687a0063..f929d9c20cb95c3c89fdda22096870120195972d 100644 --- a/workflow/tests/test_trim_reads.py +++ b/workflow/tests/test_trim_reads.py @@ -13,20 +13,28 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_trim_reads_singleend(): raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz' - trimmed_fastq = test_output_path + 'ENCLB144FDT_trimmed.fq.gz' - trimmed_fastq_report = test_output_path + \ - 'ENCLB144FDT.fastq.gz_trimming_report.txt' + trimmed_fastq = test_output_path + 'ENCLB144FDT_R1_trimmed.fq.gz' assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) assert os.path.getsize(trimmed_fastq) == 2512853101 + + +@pytest.mark.singleend +def test_trim_report_singleend(): + trimmed_fastq_report = test_output_path + \ + 'ENCLB144FDT_R1_trimmed.fq.gz_trimming_report.txt' assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4] @pytest.mark.pairedend def test_trim_reads_pairedend(): raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz' - trimmed_fastq = test_output_path + 'ENCFF582IOZ_val_2.fq.gz' - trimmed_fastq_report = test_output_path + \ - 'ENCLB637LZP.fastq.gz_trimming_report.txt' + trimmed_fastq = test_output_path + 'ENCLB637LZP_R2_val_2.fq.gz' assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) assert os.path.getsize(trimmed_fastq) == 2229312710 + + +@pytest.mark.pairedend +def test_trim_report_pairedend(): + trimmed_fastq_report = test_output_path + \ + 'ENCLB637LZP_R2.fastq.gz_trimming_report.txt' assert 'Trimming mode: paired-end' in open(trimmed_fastq_report).readlines()[4] diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py index 69006a0f03f4193035898e14376f01be463e3089..259ecfb1f82ce80e79ee14539c4bb603bb5ddfb4 100644 --- a/workflow/tests/test_xcor.py +++ b/workflow/tests/test_xcor.py @@ -9,9 +9,13 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend -def test_cross_singleend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.cc.plot.pdf')) - qc_file = os.path.join(test_output_path,"ENCFF833BLU.cc.qc") +def test_cross_plot_singleend(): + assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.cc.plot.pdf')) + + +@pytest.mark.singleend +def test_cross_qc_singleend(): + qc_file = os.path.join(test_output_path,"ENCLB144FDT.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) assert df_xcor[2].iloc[0] == '190,200,210' assert df_xcor[8].iloc[0] == 1.025906 @@ -19,10 +23,14 @@ def test_cross_singleend(): @pytest.mark.pairedend -def test_cross_pairedend(): +def test_cross_qc_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.cc.plot.pdf')) + + +@pytest.mark.pairedend +def test_cross_plot_pairedend(): qc_file = os.path.join(test_output_path,"ENCLB568IYX.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '210,220,475' - assert round(df_xcor[8].iloc[0],6) == 1.062032 - assert df_xcor[9].iloc[0] == 3.737722 + assert df_xcor[2].iloc[0] == '220,430,475' + assert round(df_xcor[8].iloc[0],6) == 1.060018 + assert df_xcor[9].iloc[0] == 4.099357