Skip to content
Snippets Groups Projects
Commit 5f8cb463 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '33-organization' into 'master'

Resolve "New folders for all steps"

Closes #33

See merge request !22
parents 0817965e 867b9ec1
Branches
Tags
1 merge request!22Resolve "New folders for all steps"
Pipeline #3264 failed with stages
in 8 hours, 35 minutes, and 53 seconds
...@@ -6,9 +6,9 @@ before_script: ...@@ -6,9 +6,9 @@ before_script:
stages: stages:
- unit - unit
- skip
- single - single
- multiple - multiple
- skip
user_configuration: user_configuration:
stage: unit stage: unit
...@@ -20,8 +20,6 @@ single_end_mouse: ...@@ -20,8 +20,6 @@ single_end_mouse:
stage: single stage: single
only: only:
- master - master
except:
- branches
script: script:
- nextflow run workflow/main.nf -resume - nextflow run workflow/main.nf -resume
- pytest -m singleend - pytest -m singleend
...@@ -32,8 +30,6 @@ paired_end_human: ...@@ -32,8 +30,6 @@ paired_end_human:
stage: single stage: single
only: only:
- branches - branches
except:
- master
script: script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true -resume - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true -resume
- pytest -m pairedend - pytest -m pairedend
...@@ -44,8 +40,6 @@ single_end_diff: ...@@ -44,8 +40,6 @@ single_end_diff:
stage: multiple stage: multiple
only: only:
- branches - branches
except:
- master
script: script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' -resume - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' -resume
- pytest -m singlediff - pytest -m singlediff
...@@ -55,8 +49,6 @@ single_end_diff: ...@@ -55,8 +49,6 @@ single_end_diff:
paired_end_diff: paired_end_diff:
only: only:
- master - master
except:
- branches
stage: multiple stage: multiple
script: script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true -resume - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_PE.txt" --genome 'GRCh38' --pairedEnd true -resume
...@@ -67,8 +59,6 @@ paired_end_diff: ...@@ -67,8 +59,6 @@ paired_end_diff:
single_end_skip: single_end_skip:
stage: skip stage: skip
only: only:
- branches
except:
- master - master
script: script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true -resume - nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_diff_SE.txt" --genome 'GRCm38' --skipDiff true --skipMotif true -resume
......
...@@ -94,7 +94,7 @@ rawReads = designFilePaths ...@@ -94,7 +94,7 @@ rawReads = designFilePaths
process trimReads { process trimReads {
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}", mode: 'copy' publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input: input:
...@@ -124,7 +124,7 @@ process trimReads { ...@@ -124,7 +124,7 @@ process trimReads {
process alignReads { process alignReads {
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}", mode: 'copy' publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input: input:
...@@ -155,7 +155,7 @@ process alignReads { ...@@ -155,7 +155,7 @@ process alignReads {
process filterReads { process filterReads {
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}", mode: 'copy' publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input: input:
...@@ -216,7 +216,7 @@ process experimentQC { ...@@ -216,7 +216,7 @@ process experimentQC {
process convertReads { process convertReads {
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}", mode: 'copy' publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input: input:
...@@ -245,7 +245,7 @@ process convertReads { ...@@ -245,7 +245,7 @@ process convertReads {
process crossReads { process crossReads {
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}", mode: 'copy' publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
input: input:
...@@ -338,7 +338,7 @@ experimentRows = experimentPoolObjs ...@@ -338,7 +338,7 @@ experimentRows = experimentPoolObjs
process callPeaksMACS { process callPeaksMACS {
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}", mode: 'copy' publishDir "$outDir/${task.process}/${experimentId}/${replicate}", mode: 'copy'
input: input:
set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows
......
...@@ -10,41 +10,41 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -10,41 +10,41 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_fc_signal_singleend(): def test_fc_signal_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.fc_signal.bw')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC/1/', 'ENCLB144FDT.fc_signal.bw'))
@pytest.mark.singleend @pytest.mark.singleend
def test_pvalue_signal_singleend(): def test_pvalue_signal_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.pvalue_signal.bw')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC/1/', 'ENCLB144FDT.pvalue_signal.bw'))
@pytest.mark.singleend @pytest.mark.singleend
def test_peaks_xls_singleend(): def test_peaks_xls_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT_peaks.xls')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC/1/', 'ENCLB144FDT_peaks.xls'))
@pytest.mark.singleend @pytest.mark.singleend
def test_peaks_bed_singleend(): def test_peaks_bed_singleend():
peak_file = test_output_path + 'ENCLB144FDT.narrowPeak' peak_file = test_output_path + 'ENCSR238SGC/1/' + 'ENCLB144FDT.narrowPeak'
assert utils.count_lines(peak_file) == 227389 assert utils.count_lines(peak_file) == 227389
@pytest.mark.pairedend @pytest.mark.pairedend
def test_fc_signal_pairedend(): def test_fc_signal_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.fc_signal.bw')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA/2/', 'ENCLB568IYX.fc_signal.bw'))
@pytest.mark.pairedend @pytest.mark.pairedend
def test_pvalue_signal_pairedend(): def test_pvalue_signal_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.pvalue_signal.bw')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA/2/', 'ENCLB568IYX.pvalue_signal.bw'))
@pytest.mark.pairedend @pytest.mark.pairedend
def test_peaks_xls_pairedend(): def test_peaks_xls_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX_peaks.xls')) assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA/2/', 'ENCLB568IYX_peaks.xls'))
@pytest.mark.pairedend @pytest.mark.pairedend
def test_peaks_bed_pairedend(): def test_peaks_bed_pairedend():
peak_file = test_output_path + 'ENCLB568IYX.narrowPeak' peak_file = test_output_path + 'ENCSR729LGA/2/' + 'ENCLB568IYX.narrowPeak'
assert utils.count_lines(peak_file) == 113821 assert utils.count_lines(peak_file) == 113821
...@@ -9,19 +9,19 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -9,19 +9,19 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_tag_reads_singleend(): def test_tag_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.tagAlign.gz')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.tagAlign.gz'))
@pytest.mark.singleend @pytest.mark.singleend
def test_bed_reads_singleend(): def test_bed_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.bedse.gz')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.bedse.gz'))
@pytest.mark.pairedend @pytest.mark.pairedend
def test_tag_reads_pairedend(): def test_tag_reads_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.tagAlign.gz')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.tagAlign.gz'))
@pytest.mark.pairedend @pytest.mark.pairedend
def test_bed_reads_pairedend(): def test_bed_reads_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.bedpe.gz')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.bedpe.gz'))
...@@ -10,14 +10,14 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -10,14 +10,14 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_dedup_files_singleend(): def test_dedup_files_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.dedup.bam'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.bam.bai')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.dedup.bam.bai'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.dedup.qc')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.dedup.qc'))
@pytest.mark.singleend @pytest.mark.singleend
def test_map_qc_singleend(): def test_map_qc_singleend():
filtered_reads_report = test_output_path + 'ENCLB831RUI.dedup.flagstat.qc' filtered_reads_report = test_output_path + 'ENCLB831RUI/ENCLB831RUI.dedup.flagstat.qc'
samtools_report = open(filtered_reads_report).readlines() samtools_report = open(filtered_reads_report).readlines()
assert '64962570 + 0 in total' in samtools_report[0] assert '64962570 + 0 in total' in samtools_report[0]
assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4] assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4]
...@@ -25,7 +25,7 @@ def test_map_qc_singleend(): ...@@ -25,7 +25,7 @@ def test_map_qc_singleend():
@pytest.mark.singleend @pytest.mark.singleend
def test_library_complexity_singleend(): def test_library_complexity_singleend():
library_complexity = test_output_path + 'ENCLB831RUI.pbc.qc' library_complexity = test_output_path + 'ENCLB831RUI/ENCLB831RUI.pbc.qc'
df_library_complexity = pd.read_csv(library_complexity, sep='\t') df_library_complexity = pd.read_csv(library_complexity, sep='\t')
assert df_library_complexity["NRF"].iloc[0] == 0.926192 assert df_library_complexity["NRF"].iloc[0] == 0.926192
assert df_library_complexity["PBC1"].iloc[0] == 0.926775 assert df_library_complexity["PBC1"].iloc[0] == 0.926775
...@@ -34,14 +34,14 @@ def test_library_complexity_singleend(): ...@@ -34,14 +34,14 @@ def test_library_complexity_singleend():
@pytest.mark.pairedend @pytest.mark.pairedend
def test_dedup_files_pairedend(): def test_dedup_files_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.dedup.bam'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.bam.bai')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.dedup.bam.bai'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.dedup.qc')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.dedup.qc'))
@pytest.mark.pairedend @pytest.mark.pairedend
def test_map_qc_pairedend(): def test_map_qc_pairedend():
filtered_reads_report = test_output_path + 'ENCLB568IYX.dedup.flagstat.qc' filtered_reads_report = test_output_path + 'ENCLB568IYX/ENCLB568IYX.dedup.flagstat.qc'
samtools_report = open(filtered_reads_report).readlines() samtools_report = open(filtered_reads_report).readlines()
assert '47388510 + 0 in total' in samtools_report[0] assert '47388510 + 0 in total' in samtools_report[0]
assert '47388510 + 0 mapped (100.00%:N/A)' in samtools_report[4] assert '47388510 + 0 mapped (100.00%:N/A)' in samtools_report[4]
...@@ -49,7 +49,7 @@ def test_map_qc_pairedend(): ...@@ -49,7 +49,7 @@ def test_map_qc_pairedend():
@pytest.mark.pairedend @pytest.mark.pairedend
def test_library_complexity_pairedend(): def test_library_complexity_pairedend():
library_complexity = test_output_path + 'ENCLB568IYX.pbc.qc' library_complexity = test_output_path + 'ENCLB568IYX/ENCLB568IYX.pbc.qc'
df_library_complexity = pd.read_csv(library_complexity, sep='\t') df_library_complexity = pd.read_csv(library_complexity, sep='\t')
assert df_library_complexity["NRF"].iloc[0] == 0.947064 assert df_library_complexity["NRF"].iloc[0] == 0.947064
assert round(df_library_complexity["PBC1"].iloc[0],6) == 0.946723 assert round(df_library_complexity["PBC1"].iloc[0],6) == 0.946723
......
...@@ -9,8 +9,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -9,8 +9,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_map_reads_singleend(): def test_map_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.bam')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI/ENCLB831RUI.bam'))
aligned_reads_report = test_output_path + 'ENCLB831RUI.flagstat.qc' aligned_reads_report = test_output_path + 'ENCLB831RUIENCLB831RUI/ENCLB831RUI.flagstat.qc'
samtools_report = open(aligned_reads_report).readlines() samtools_report = open(aligned_reads_report).readlines()
assert '80795025 + 0 in total' in samtools_report[0] assert '80795025 + 0 in total' in samtools_report[0]
assert '80050072 + 0 mapped (99.08% : N/A)' in samtools_report[4] assert '80050072 + 0 mapped (99.08% : N/A)' in samtools_report[4]
...@@ -18,8 +18,8 @@ def test_map_reads_singleend(): ...@@ -18,8 +18,8 @@ def test_map_reads_singleend():
@pytest.mark.pairedend @pytest.mark.pairedend
def test_map_reads_pairedend(): def test_map_reads_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB678IDC.bam')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB678IDC/ENCLB678IDC.bam'))
aligned_reads_report = test_output_path + 'ENCLB678IDC.flagstat.qc' aligned_reads_report = test_output_path + 'ENCLB678IDC/ENCLB678IDC.flagstat.qc'
samtools_report = open(aligned_reads_report).readlines() samtools_report = open(aligned_reads_report).readlines()
assert '72660890 + 0 in total' in samtools_report[0] assert '72660890 + 0 in total' in samtools_report[0]
assert '72053925 + 0 mapped (99.16% : N/A)' in samtools_report[4] assert '72053925 + 0 mapped (99.16% : N/A)' in samtools_report[4]
......
...@@ -13,7 +13,7 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -13,7 +13,7 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_trim_reads_singleend(): def test_trim_reads_singleend():
raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz' raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz'
trimmed_fastq = test_output_path + 'ENCLB144FDT_R1_trimmed.fq.gz' trimmed_fastq = test_output_path + 'ENCLB144FDT/ENCLB144FDT_R1_trimmed.fq.gz'
assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
assert os.path.getsize(trimmed_fastq) == 2512853101 assert os.path.getsize(trimmed_fastq) == 2512853101
...@@ -21,14 +21,14 @@ def test_trim_reads_singleend(): ...@@ -21,14 +21,14 @@ def test_trim_reads_singleend():
@pytest.mark.singleend @pytest.mark.singleend
def test_trim_report_singleend(): def test_trim_report_singleend():
trimmed_fastq_report = test_output_path + \ trimmed_fastq_report = test_output_path + \
'ENCLB144FDT_R1.fastq.gz_trimming_report.txt' 'ENCLB144FDT/ENCLB144FDT_R1.fastq.gz_trimming_report.txt'
assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4] assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4]
@pytest.mark.pairedend @pytest.mark.pairedend
def test_trim_reads_pairedend(): def test_trim_reads_pairedend():
raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz' raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz'
trimmed_fastq = test_output_path + 'ENCLB637LZP_R2_val_2.fq.gz' trimmed_fastq = test_output_path + 'ENCLB637LZP/ENCLB637LZP_R2_val_2.fq.gz'
assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
assert os.path.getsize(trimmed_fastq) == 2229312710 assert os.path.getsize(trimmed_fastq) == 2229312710
...@@ -36,5 +36,5 @@ def test_trim_reads_pairedend(): ...@@ -36,5 +36,5 @@ def test_trim_reads_pairedend():
@pytest.mark.pairedend @pytest.mark.pairedend
def test_trim_report_pairedend(): def test_trim_report_pairedend():
trimmed_fastq_report = test_output_path + \ trimmed_fastq_report = test_output_path + \
'ENCLB637LZP_R2.fastq.gz_trimming_report.txt' 'ENCLB637LZP/ENCLB637LZP_R2.fastq.gz_trimming_report.txt'
assert 'Trimming mode: paired-end' in open(trimmed_fastq_report).readlines()[4] assert 'Trimming mode: paired-end' in open(trimmed_fastq_report).readlines()[4]
...@@ -10,12 +10,12 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -10,12 +10,12 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_cross_plot_singleend(): def test_cross_plot_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.cc.plot.pdf')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT/ENCLB144FDT.cc.plot.pdf'))
@pytest.mark.singleend @pytest.mark.singleend
def test_cross_qc_singleend(): def test_cross_qc_singleend():
qc_file = os.path.join(test_output_path,"ENCLB144FDT.cc.qc") qc_file = os.path.join(test_output_path,"ENCLB144FDT/ENCLB144FDT.cc.qc")
df_xcor = pd.read_csv(qc_file, sep="\t", header=None) df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
assert df_xcor[2].iloc[0] == '190,200,210' assert df_xcor[2].iloc[0] == '190,200,210'
assert df_xcor[8].iloc[0] == 1.025906 assert df_xcor[8].iloc[0] == 1.025906
...@@ -24,12 +24,12 @@ def test_cross_qc_singleend(): ...@@ -24,12 +24,12 @@ def test_cross_qc_singleend():
@pytest.mark.pairedend @pytest.mark.pairedend
def test_cross_qc_pairedend(): def test_cross_qc_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX.cc.plot.pdf')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX/ENCLB568IYX.cc.plot.pdf'))
@pytest.mark.pairedend @pytest.mark.pairedend
def test_cross_plot_pairedend(): def test_cross_plot_pairedend():
qc_file = os.path.join(test_output_path,"ENCLB568IYX.cc.qc") qc_file = os.path.join(test_output_path,"ENCLB568IYX/ENCLB568IYX.cc.qc")
df_xcor = pd.read_csv(qc_file, sep="\t", header=None) df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
assert df_xcor[2].iloc[0] == '220,430,475' assert df_xcor[2].iloc[0] == '220,430,475'
assert round(df_xcor[8].iloc[0],6) == 1.060018 assert round(df_xcor[8].iloc[0],6) == 1.060018
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment