Merge branch '33-organization' into 'master'

Resolve "New folders for all steps" Closes #33 See merge request !22

Merge branch '33-organization' into 'master'
Resolve "New folders for all steps" Closes #33 See merge request !22
5f8cb463 · Venkat Malladi · 0817965e · 867b9ec1 · 5f8cb463 · 5f8cb463
Commit 5f8cb463 authored 6 years ago by Venkat Malladi
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,9 +6,9 @@ before_script:
 stages:
  - unit
-  - skip
  - single
  - multiple
+  - skip
 user_configuration:
  stage: unit
@@ -20,8 +20,6 @@ single_end_mouse:
  stage: single
  only:
    - master
-  except:
-    - branches
  script:
  - nextflow run workflow/main.nf -resume
  - pytest -m singleend
@@ -32,8 +30,6 @@ paired_end_human:
  stage: single
  only:
    - branches
-  except:
-    - master
  script:
  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true -resume
  - pytest -m pairedend
@@ -44,8 +40,6 @@ single_end_diff:
  stage: multiple
  only:
    - branches
-  except:
-    - master
  script:
  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' -resume
  - pytest -m singlediff
@@ -55,8 +49,6 @@ single_end_diff:
 paired_end_diff:
  only:
    - master
-  except:
-    - branches
  stage: multiple
  script:
  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true -resume
@@ -67,8 +59,6 @@ paired_end_diff:
 single_end_skip:
  stage: skip
  only:
-    - branches
-  except:
    - master
  script:
  - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true -resume

--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -94,7 +94,7 @@ rawReads = designFilePaths
 process trimReads {
  tag "$sampleId-$replicate"
-  publishDir "$outDir/${task.process}", mode: 'copy'
+  publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
  input:
@@ -124,7 +124,7 @@ process trimReads {
 process alignReads {
  tag "$sampleId-$replicate"
-  publishDir "$outDir/${task.process}", mode: 'copy'
+  publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
  input:
@@ -155,7 +155,7 @@ process alignReads {
 process filterReads {
  tag "$sampleId-$replicate"
-  publishDir "$outDir/${task.process}", mode: 'copy'
+  publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
  input:
@@ -216,7 +216,7 @@ process experimentQC {
 process convertReads {
  tag "$sampleId-$replicate"
-  publishDir "$outDir/${task.process}", mode: 'copy'
+  publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
  input:
@@ -245,7 +245,7 @@ process convertReads {
 process crossReads {
  tag "$sampleId-$replicate"
-  publishDir "$outDir/${task.process}", mode: 'copy'
+  publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
  input:
@@ -338,7 +338,7 @@ experimentRows = experimentPoolObjs
 process callPeaksMACS {
  tag "$sampleId-$replicate"
-  publishDir "$outDir/${task.process}", mode: 'copy'
+  publishDir "$outDir/${task.process}/${experimentId}/${replicate}", mode: 'copy'
  input:
  set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows

--- a/workflow/tests/test_call_peaks_macs.py
+++ b/workflow/tests/test_call_peaks_macs.py
@@ -10,41 +10,41 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.singleend
 def test_fc_signal_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.fc_signal.bw'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC/1/', 'ENCLB144FDT.fc_signal.bw'))
 @pytest.mark.singleend
 def test_pvalue_signal_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.pvalue_signal.bw'))
+    assert os.path.exists(os.path.join(test_output_path,  'ENCSR238SGC/1/', 'ENCLB144FDT.pvalue_signal.bw'))
 @pytest.mark.singleend
 def test_peaks_xls_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT_peaks.xls'))
+    assert os.path.exists(os.path.join(test_output_path,  'ENCSR238SGC/1/', 'ENCLB144FDT_peaks.xls'))
 @pytest.mark.singleend
 def test_peaks_bed_singleend():
-    peak_file = test_output_path + 'ENCLB144FDT.narrowPeak'
+    peak_file = test_output_path +  'ENCSR238SGC/1/' + 'ENCLB144FDT.narrowPeak'
    assert utils.count_lines(peak_file) == 227389
 @pytest.mark.pairedend
 def test_fc_signal_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.fc_signal.bw'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA/2/', 'ENCLB568IYX.fc_signal.bw'))
 @pytest.mark.pairedend
 def test_pvalue_signal_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.pvalue_signal.bw'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA/2/', 'ENCLB568IYX.pvalue_signal.bw'))
 @pytest.mark.pairedend
 def test_peaks_xls_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX_peaks.xls'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA/2/', 'ENCLB568IYX_peaks.xls'))
 @pytest.mark.pairedend
 def test_peaks_bed_pairedend():
-    peak_file = test_output_path + 'ENCLB568IYX.narrowPeak'
+    peak_file = test_output_path + 'ENCSR729LGA/2/' + 'ENCLB568IYX.narrowPeak'
    assert utils.count_lines(peak_file) == 113821
--- a/workflow/tests/test_convert_reads.py
+++ b/workflow/tests/test_convert_reads.py
@@ -9,19 +9,19 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.singleend
 def test_tag_reads_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.tagAlign.gz'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.tagAlign.gz'))
 @pytest.mark.singleend
 def test_bed_reads_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.bedse.gz'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.bedse.gz'))
 @pytest.mark.pairedend
 def test_tag_reads_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.tagAlign.gz'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.tagAlign.gz'))
 @pytest.mark.pairedend
 def test_bed_reads_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.bedpe.gz'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.bedpe.gz'))
--- a/workflow/tests/test_map_qc.py
+++ b/workflow/tests/test_map_qc.py
@@ -10,14 +10,14 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.singleend
 def test_dedup_files_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.dedup.bam'))
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam.bai'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.dedup.bam.bai'))
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.qc'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.dedup.qc'))
 @pytest.mark.singleend
 def test_map_qc_singleend():
-    filtered_reads_report = test_output_path + 'ENCLB831RUI.dedup.flagstat.qc'
+    filtered_reads_report = test_output_path + 'ENCLB831RUI/ENCLB831RUI.dedup.flagstat.qc'
    samtools_report = open(filtered_reads_report).readlines()
    assert '64962570 + 0 in total' in samtools_report[0]
    assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4]
@@ -25,7 +25,7 @@ def test_map_qc_singleend():
 @pytest.mark.singleend
 def test_library_complexity_singleend():
-    library_complexity = test_output_path + 'ENCLB831RUI.pbc.qc'
+    library_complexity = test_output_path + 'ENCLB831RUI/ENCLB831RUI.pbc.qc'
    df_library_complexity = pd.read_csv(library_complexity, sep='\t')
    assert  df_library_complexity["NRF"].iloc[0] == 0.926192
    assert  df_library_complexity["PBC1"].iloc[0] == 0.926775
@@ -34,14 +34,14 @@ def test_library_complexity_singleend():
 @pytest.mark.pairedend
 def test_dedup_files_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.dedup.bam'))
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam.bai'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.dedup.bam.bai'))
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.qc'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.dedup.qc'))
 @pytest.mark.pairedend
 def test_map_qc_pairedend():
-    filtered_reads_report = test_output_path + 'ENCLB568IYX.dedup.flagstat.qc'
+    filtered_reads_report = test_output_path + 'ENCLB568IYX/ENCLB568IYX.dedup.flagstat.qc'
    samtools_report = open(filtered_reads_report).readlines()
    assert '47388510 + 0 in total' in samtools_report[0]
    assert '47388510 + 0 mapped (100.00%:N/A)' in samtools_report[4]
@@ -49,7 +49,7 @@ def test_map_qc_pairedend():
 @pytest.mark.pairedend
 def test_library_complexity_pairedend():
-    library_complexity = test_output_path + 'ENCLB568IYX.pbc.qc'
+    library_complexity = test_output_path + 'ENCLB568IYX/ENCLB568IYX.pbc.qc'
    df_library_complexity = pd.read_csv(library_complexity, sep='\t')
    assert  df_library_complexity["NRF"].iloc[0] == 0.947064
    assert  round(df_library_complexity["PBC1"].iloc[0],6) == 0.946723

--- a/workflow/tests/test_map_reads.py
+++ b/workflow/tests/test_map_reads.py
@@ -9,8 +9,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.singleend
 def test_map_reads_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.bam'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.bam'))
-    aligned_reads_report = test_output_path + 'ENCLB831RUI.flagstat.qc'
+    aligned_reads_report = test_output_path + 'ENCLB831RUIENCLB831RUI/ENCLB831RUI.flagstat.qc'
    samtools_report = open(aligned_reads_report).readlines()
    assert '80795025 + 0 in total' in samtools_report[0]
    assert '80050072 + 0 mapped (99.08% : N/A)' in samtools_report[4]
@@ -18,8 +18,8 @@ def test_map_reads_singleend():
 @pytest.mark.pairedend
 def test_map_reads_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB678IDC.bam'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB678IDC/ENCLB678IDC.bam'))
-    aligned_reads_report = test_output_path + 'ENCLB678IDC.flagstat.qc'
+    aligned_reads_report = test_output_path + 'ENCLB678IDC/ENCLB678IDC.flagstat.qc'
    samtools_report = open(aligned_reads_report).readlines()
    assert '72660890 + 0 in total' in samtools_report[0]
    assert '72053925 + 0 mapped (99.16% : N/A)' in samtools_report[4]

--- a/workflow/tests/test_trim_reads.py
+++ b/workflow/tests/test_trim_reads.py
@@ -13,7 +13,7 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.singleend
 def test_trim_reads_singleend():
    raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz'
-    trimmed_fastq = test_output_path + 'ENCLB144FDT_R1_trimmed.fq.gz'
+    trimmed_fastq = test_output_path + 'ENCLB144FDT/ENCLB144FDT_R1_trimmed.fq.gz'
    assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
    assert os.path.getsize(trimmed_fastq) == 2512853101
@@ -21,14 +21,14 @@ def test_trim_reads_singleend():
 @pytest.mark.singleend
 def test_trim_report_singleend():
    trimmed_fastq_report = test_output_path + \
-                            'ENCLB144FDT_R1.fastq.gz_trimming_report.txt'
+                            'ENCLB144FDT/ENCLB144FDT_R1.fastq.gz_trimming_report.txt'
    assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4]
 @pytest.mark.pairedend
 def test_trim_reads_pairedend():
    raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz'
-    trimmed_fastq = test_output_path + 'ENCLB637LZP_R2_val_2.fq.gz'
+    trimmed_fastq = test_output_path + 'ENCLB637LZP/ENCLB637LZP_R2_val_2.fq.gz'
    assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
    assert os.path.getsize(trimmed_fastq) == 2229312710
@@ -36,5 +36,5 @@ def test_trim_reads_pairedend():
 @pytest.mark.pairedend
 def test_trim_report_pairedend():
    trimmed_fastq_report = test_output_path + \
-                            'ENCLB637LZP_R2.fastq.gz_trimming_report.txt'
+                            'ENCLB637LZP/ENCLB637LZP_R2.fastq.gz_trimming_report.txt'
    assert 'Trimming mode: paired-end' in open(trimmed_fastq_report).readlines()[4]
--- a/workflow/tests/test_xcor.py
+++ b/workflow/tests/test_xcor.py
@@ -10,12 +10,12 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 @pytest.mark.singleend
 def test_cross_plot_singleend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.cc.plot.pdf'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT/ENCLB144FDT.cc.plot.pdf'))
 @pytest.mark.singleend
 def test_cross_qc_singleend():
-    qc_file = os.path.join(test_output_path,"ENCLB144FDT.cc.qc")
+    qc_file = os.path.join(test_output_path,"ENCLB144FDT/ENCLB144FDT.cc.qc")
    df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
    assert df_xcor[2].iloc[0] == '190,200,210'
    assert df_xcor[8].iloc[0] == 1.025906
@@ -24,12 +24,12 @@ def test_cross_qc_singleend():
 @pytest.mark.pairedend
 def test_cross_qc_pairedend():
-    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.cc.plot.pdf'))
+    assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.cc.plot.pdf'))
 @pytest.mark.pairedend
 def test_cross_plot_pairedend():
-    qc_file = os.path.join(test_output_path,"ENCLB568IYX.cc.qc")
+    qc_file = os.path.join(test_output_path,"ENCLB568IYX/ENCLB568IYX.cc.qc")
    df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
    assert df_xcor[2].iloc[0] == '220,430,475'
    assert round(df_xcor[8].iloc[0],6) == 1.060018