diff --git a/workflow/main.nf b/workflow/main.nf index 1908dfdfb2724a07916d7a342a088c3becf7a12c..0f4b58f8b18c15ac1398f042587cac6c9e9fc2e4 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -38,6 +38,7 @@ readsList = Channel pairedEnd = params.pairedEnd designFile = params.designFile genomeSize = params.genomeSize +genome = params.genome chromSizes = params.chromSizes fasta = params.fasta cutoffRatio = params.cutoffRatio @@ -422,6 +423,7 @@ process motifSearch { output: file "*memechip" into motifSearch + file "sorted-*" into filteredPeaks script: diff --git a/workflow/scripts/annotate_peaks.R b/workflow/scripts/annotate_peaks.R index 98bb8bdc846442e22ff354165c4580fd7777e25c..3629c4acf85e87440e17f2846e3beb56ba2c43ce 100644 --- a/workflow/scripts/annotate_peaks.R +++ b/workflow/scripts/annotate_peaks.R @@ -21,16 +21,16 @@ if (length(args) != 2) { } design_file <- args[1] -genome <-args[2] +genome_assembly <- args[2] # Load UCSC Known Genes -if(genome=='GRCh37') { +if(genome_assembly=='GRCh37') { txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene annodb <- 'org.Hs.eg.db' -} else if(genome=='GRCm38') { +} else if(genome_assembly=='GRCm38') { txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene annodb <- 'org.Mm.eg.db' -} else if(genome=='GRCh38') { +} else if(genome_assembly=='GRCh38') { txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene annodb <- 'org.Hs.eg.db' } diff --git a/workflow/tests/test_annotate_peaks.py b/workflow/tests/test_annotate_peaks.py index 13813505403067eadce9d9c31f6c26b4ae210f2e..e3f5746c3087e2d0f8eb97dc7f0050994270000d 100644 --- a/workflow/tests/test_annotate_peaks.py +++ b/workflow/tests/test_annotate_peaks.py @@ -16,7 +16,7 @@ def test_annotate_peaks_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.chipseeker_upsetplot.pdf')) annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.csv' assert os.path.exists(annotation_file) - assert utils.count_lines(annotation_file) == 149820 + assert utils.count_lines(annotation_file) == 152840 @pytest.mark.pairedend diff --git a/workflow/tests/test_call_peaks_macs.py b/workflow/tests/test_call_peaks_macs.py index a0792c6d2919f0fdf255c19e5ffc330ef805b91b..71358bc045801d23d28b8f7385c5ef9f608347b6 100644 --- a/workflow/tests/test_call_peaks_macs.py +++ b/workflow/tests/test_call_peaks_macs.py @@ -13,7 +13,7 @@ def test_call_peaks_macs_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.fc_signal.bw')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.pvalue_signal.bw')) peak_file = test_output_path + 'ENCLB144FDT_peaks.narrowPeak' - assert utils.count_lines(peak_file) == 210349 + assert utils.count_lines(peak_file) == 227389 @pytest.mark.pairedend diff --git a/workflow/tests/test_diff_peaks.py b/workflow/tests/test_diff_peaks.py index 6b4a6ed272f2ef8ad530a600aee52745da9e2656..ad66c4a1a667110790e5de626dbdad7820751888 100644 --- a/workflow/tests/test_diff_peaks.py +++ b/workflow/tests/test_diff_peaks.py @@ -12,20 +12,11 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_diff_peaks_singleend_single_rep(): - assert os.path.exists(os.path.join(test_output_path, 'no_diffbind.bed')) - assert os.path.exists(os.path.join(test_output_path, 'no_diffbind.csv')) - assert os.path.exists(os.path.join(test_output_path, 'no_heatmap.pdf')) - assert os.path.exists(os.path.join(test_output_path, 'no_pca.pdf')) - assert os.path.exists(os.path.join(test_output_path, 'normcount_peaksets.pdf')) - + assert os.path.isdir(test_output_path) == False @pytest.mark.pairedend def test_annotate_peaks_pairedend_single_rep(): - assert os.path.exists(os.path.join(test_output_path, 'no_diffbind.bed')) - assert os.path.exists(os.path.join(test_output_path, 'no_diffbind.csv')) - assert os.path.exists(os.path.join(test_output_path, 'no_heatmap.pdf')) - assert os.path.exists(os.path.join(test_output_path, 'no_pca.pdf')) - assert os.path.exists(os.path.join(test_output_path, 'normcount_peaksets.pdf')) + assert os.path.isdir(test_output_path) == False @pytest.mark.singlediff def test_diff_peaks_singleend_multiple_rep(): diff --git a/workflow/tests/test_overlap_peaks.py b/workflow/tests/test_overlap_peaks.py index a239b4144f0c780805ea07ac540c828e0154d5e5..ccbf186aa12798878db6b982a5a5d4574df01461 100644 --- a/workflow/tests/test_overlap_peaks.py +++ b/workflow/tests/test_overlap_peaks.py @@ -37,7 +37,7 @@ def test_check_update_design(design_diff): def test_overlap_peaks_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.rejected.narrowPeak')) peak_file = test_output_path + 'ENCSR238SGC.replicated.narrowPeak' - assert utils.count_lines(peak_file) == 150302 + assert utils.count_lines(peak_file) == 149828 @pytest.mark.pairedend diff --git a/workflow/tests/test_trim_reads.py b/workflow/tests/test_trim_reads.py index 502312f20092a03c688f25fd8e597fd0e79f1940..aeb3eb3bbe2be77479b88fb82391efab687a0063 100644 --- a/workflow/tests/test_trim_reads.py +++ b/workflow/tests/test_trim_reads.py @@ -24,7 +24,7 @@ def test_trim_reads_singleend(): @pytest.mark.pairedend def test_trim_reads_pairedend(): raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz' - trimmed_fastq = test_output_path + ' ENCLB637LZP_val_2.fq.gz' + trimmed_fastq = test_output_path + 'ENCFF582IOZ_val_2.fq.gz' trimmed_fastq_report = test_output_path + \ 'ENCLB637LZP.fastq.gz_trimming_report.txt' assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) diff --git a/workflow/tests/test_xcor.py b/workflow/tests/test_xcor.py index 0c27795253040544676835fce6109c6889539087..8492de444ddc8f17d5d4ad9d7ce2f19785859753 100644 --- a/workflow/tests/test_xcor.py +++ b/workflow/tests/test_xcor.py @@ -9,13 +9,13 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend -def test_convert_reads_singleend(): +def test_map_qc_singleend(): assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf')) qc_file = os.path.join(test_output_path,"ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '195,215,230' - assert df_xcor[8].iloc[0] == 1.024836 - assert df_xcor[9].iloc[0] == 1.266678 + assert df_xcor[2].iloc[0] == '190,200,210' + assert df_xcor[8].iloc[0] == 1.025906 + assert df_xcor[9].iloc[0] == 1.139671 @pytest.mark.pairedend @@ -23,6 +23,6 @@ def test_map_qc_pairedend(): assert os.path.exists(os.path.join(test_output_path, 'ENCFF582IOZ_val_2ENCFF957SQS_val_1.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf')) qc_file = os.path.join(test_output_path,"ENCFF582IOZ_val_2ENCFF957SQS_val_1.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc") df_xcor = pd.read_csv(qc_file, sep="\t", header=None) - assert df_xcor[2].iloc[0] == '205,410,430' - assert df_xcor[8].iloc[0] == 1.060266 - assert df_xcor[9].iloc[0] == 4.308793 + assert df_xcor[2].iloc[0] == '210,220,475' + assert df_xcor[8].iloc[0] == 1.062032 + assert df_xcor[9].iloc[0] == 3.737722