Skip to content
Snippets Groups Projects
Commit 624918a8 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add in test data.

parent 722a8dce
Branches
Tags
No related merge requests found
Pipeline #2230 failed with stages
Showing
with 303 additions and 16 deletions
before_script:
- module add python/3.6.1-2-anaconda
- pip install --user pytest-pythonpath pytest-cov
- module load nextflow/0.27.6
- ln -s /project/shared/bicf_workflow_ref/workflow_testdata/atacseq/*fastq.gz test_data/
stages:
- unit
- integration
user_configuration:
stage: unit
script:
- pytest -m unit
single_end_mouse:
stage: integration
script:
- nextflow run workflow/main.nf
artifacts:
expire_in: 2 days
paired_end_human:
stage: integration
script:
- nextflow run workflow/main.nf --designFile "$CI_PROJECT_DIR/test_data/design_ENCSR729LGA_PE.txt" --genome 'GRCh38' --pairedEnd true
artifacts:
expire_in: 2 days
echo "Downloading Single-end data set Mouse ENCSR238SGC and ENCSR687ALB"
wget https://www.encodeproject.org/files/ENCFF833BLU/@@download/ENCFF833BLU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF646LXU/@@download/ENCFF646LXU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF524CAC/@@download/ENCFF524CAC.fastq.gz
wget https://www.encodeproject.org/files/ENCFF163AJI/@@download/ENCFF163AJI.fastq.gz
echo "Done with Single-end"
echo "Downloading Paired-end data set Human ENCSR729LGA and ENCSR217LRF"
wget https://www.encodeproject.org/files/ENCFF957SQS/@@download/ENCFF957SQS.fastq.gz
wget https://www.encodeproject.org/files/ENCFF582IOZ/@@download/ENCFF582IOZ.fastq.gz
wget https://www.encodeproject.org/files/ENCFF330MCZ/@@download/ENCFF330MCZ.fastq.gz
wget https://www.encodeproject.org/files/ENCFF293YFE/@@download/ENCFF293YFE.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002DTU/@@download/ENCFF002DTU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002EFI/@@download/ENCFF002EFI.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002EFG/@@download/ENCFF002EFG.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002DTS/@@download/ENCFF002DTS.fastq.gz
echo "Downloading Paired-end data set Mouse ENCSR451NAE"
wget https://www.encodeproject.org/files/ENCFF833BLU/@@download/ENCFF655OFT.fastq.gz
wget https://www.encodeproject.org/files/ENCFF646LXU/@@download/ENCFF999SZR.fastq.gz
wget https://www.encodeproject.org/files/ENCFF524CAC/@@download/ENCFF913PMS.fastq.gz
wget https://www.encodeproject.org/files/ENCFF163AJI/@@download/ENCFF483MKX.fastq.gz
echo "Done with Paired-end"
echo "Downloading Single-end data set Human ENCSR265ZXX "
wget https://www.encodeproject.org/files/ENCFF957SQS/@@download/ENCFF115PAE.fastq.gz
wget https://www.encodeproject.org/files/ENCFF582IOZ/@@download/ENCFF610JYD.fastq.gz
wget https://www.encodeproject.org/files/ENCFF330MCZ/@@download/ENCFF124LBK.fastq.gz
echo "Done with Single-end"
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
File added
#!/usr/bin/env python3
import pytest
import os
from python_utils import utils
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/callPeaksMACS/'
@pytest.mark.integration
def test_call_peaks_macs_singleend():
#assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.fc_signal.bw'))
#assert os.path.exists(os.path.join(test_output_path, 'ENCLB144FDT.pvalue_signal.bw'))
#peak_file = test_output_path + 'ENCLB144FDT_peaks.narrowPeak'
#assert utils.count_lines(peak_file) == 210349
pass
@pytest.mark.integration
def test_call_peaks_macs_pairedend():
# Do the same thing for paired end data
pass
#!/usr/bin/env python3
import pytest
import pandas as pd
from io import StringIO
from design_file import check_design
DESIGN_STRING = """sample_id\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tfastq_read1
A_1\tA\tLiver\None\tNone\t1\tA_1.fastq.gz
A_2\tA\tLiver\None\tNone\t2\tA_2.fastq.gz
B_1\tB\tLiver\None\tNone\t1\tB_1.fastq.gz
B_2\tB\tLiver\None\tNone\t2\tB_2.fastq.gz
"""
FASTQ_STRING = """
A_1.fastq.gz\t/path/to/file/A_1.fastq.gz
A_2.fastq.gz\t/path/to/file/A_2.fastq.gz
B_1.fastq.gz\t/path/to/file/B_1.fastq.gz
B_2.fastq.gz\t/path/to/file/B_2.fastq.gz
"""
@pytest.fixture
def design():
design_file = StringIO(DESIGN_STRING)
design_df = pd.read_csv(design_file, sep="\t")
return design_df
@pytest.fixture
def fastq_files():
fastq_file = StringIO(FASTQ_STRING)
fastq_df = pd.read_csv(fastq_file, sep='\t', names=['name', 'path'])
return fastq_df
@pytest.fixture
def design_1(design):
design_df = design.drop('fastq_read1', axis=1)
return design_df
@pytest.fixture
def design_2(design):
# Drop Control B_1
design_df = design.drop(design.index[2])
return design_df
@pytest.fixture
def design_3(design):
# Drop A_2 and B_2 and append as fastq_read2
design_df = design.drop(design.index[[1, 3]])
design_df['fastq_read2'] = design.loc[[1, 3], 'fastq_read1'].values
return design_df
@pytest.fixture
def design_4(design):
# Update replicate 2 for experiment B to be 1
design.loc[design['sample_id'] == 'B_2', 'replicate'] = 1
return design
@pytest.fixture
def fastq_files_1(fastq_files):
# Drop B_2.fastq.gz
fastq_df = fastq_files.drop(fastq_files.index[3])
return fastq_df
@pytest.mark.unit
def test_check_headers_singleend(design_1):
paired = False
with pytest.raises(Exception) as excinfo:
check_design.check_design_headers(design_1, paired)
assert str(excinfo.value) == "Missing column headers: ['fastq_read1']"
@pytest.mark.unit
def test_check_headers_pairedend(design):
paired = True
with pytest.raises(Exception) as excinfo:
check_design.check_design_headers(design, paired)
assert str(excinfo.value) == "Missing column headers: ['fastq_read2']"
@pytest.mark.unit
def test_check_files_missing_files(design, fastq_files_1):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_files(design, fastq_files_1, paired)
assert str(excinfo.value) == "Missing files from design file: ['B_2.fastq.gz']"
@pytest.mark.unit
def test_check_files_output_singleend(design, fastq_files):
paired = False
new_design = check_design.check_files(design, fastq_files, paired)
assert new_design.loc[0, 'fastq_read1'] == "/path/to/file/A_1.fastq.gz"
@pytest.mark.unit
def test_check_files_output_pairedend(design_3, fastq_files):
paired = True
new_design = check_design.check_files(design_3, fastq_files, paired)
assert new_design.loc[0, 'fastq_read2'] == "/path/to/file/A_2.fastq.gz"
@pytest.mark.unit
def test_check_replicates(design_4):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_replicates(design_4)
assert str(excinfo.value) == "Duplicate replicates in experiments: ['B']"
#!/usr/bin/env python3
import pytest
import os
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/convertReads/'
@pytest.mark.integration
def test_convert_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF115PAE.filt.nodup.tagAlign.gz'))
assert os.path.exists(os.path.join(test_output_path, 'ENCFF115PAE.filt.nodup.bedse.gz'))
@pytest.mark.integration
def test_map_qc_pairedend():
# Do the same thing for paired end data
# Also check that bedpe exists
pass
#!/usr/bin/env python3
import pytest
import pandas as pd
from io import StringIO
from design_file import experiment_design
import os
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/design/'
DESIGN_STRING = """sample_id\ttag_align\txcor\tbiosample\tfactor\ttreatment\treplicate
A_1\tA_1.tagAlign.gz\tA\tLiver\None\tNone\t1
A_2\tA_2.tagAlign.gz\tA\tLiver\None\tNone\t2
B_1\tB_1.tagAlign.gz\tB\tLiver\None\tNone\t1
B_2\tB_2.tagAlign.gz\tB\tLiver\None\tNone\t2
"""
@pytest.fixture
def design_tag():
design_file = StringIO(DESIGN_STRING)
design_df = pd.read_csv(design_file, sep="\t")
return design_df
@pytest.mark.integration
def test_experiment_design_single_end():
design_file = os.path.join(test_output_path, 'ENCSR265ZXX.tsv')
assert os.path.exists(design_file)
design_df = pd.read_csv(design_file, sep="\t")
assert design_df.shape[0] == 3
@pytest.mark.integration
def test_experiment_design_paired_end():
# Do the same thing for paired end data
pass
#!/usr/bin/env python3
import pytest
import os
import pandas as pd
from io import StringIO
from quality_metrics import experiment_qc
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/experimentQC/'
DESIGN_STRING = """sample_id\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tbam_reads
A_1\tA\tLiver\None\tNone\t1\tA_1.bam
A_2\tA\tLiver\None\tNone\t2\tA_2.bam
B_1\tB\tLiver\None\tNone\t1\tB_1.bam
B_2\tB\tLiver\None\tNone\t2\tB_2.bam
"""
@pytest.fixture
def design_bam():
design_file = StringIO(DESIGN_STRING)
design_df = pd.read_csv(design_file, sep="\t")
return design_df
@pytest.mark.integration
def test_experiment_qc_singleend():
assert os.path.exists(os.path.join(test_output_path, 'sample_mbs.npz'))
assert os.path.exists(os.path.join(test_output_path, 'heatmap_SpearmanCorr.png'))
assert os.path.exists(os.path.join(test_output_path, 'coverage.png'))
@pytest.mark.integration
def test_experiment_qc_pairedend():
# Do the same thing for paired end data
pass
#!/usr/bin/env python3
import pytest
import os
import pandas as pd
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/filterReads/'
@pytest.mark.integration
def test_map_qc_singleend():
#assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam'))
#assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bam.bai'))
#filtered_reads_report = test_output_path + 'ENCFF646LXU.filt.nodup.flagstat.qc'
#samtools_report = open(filtered_reads_report).readlines()
#assert '64962570 + 0 in total' in samtools_report[0]
#assert '64962570 + 0 mapped (100.00%:N/A)' in samtools_report[4]
#library_complexity = test_output_path + 'ENCFF646LXU.filt.nodup.pbc.qc'
#df_library_complexity = pd.read_csv(library_complexity, sep='\t')
#assert df_library_complexity["NRF"].iloc[0] == 0.926192
#assert df_library_complexity["PBC1"].iloc[0] == 0.926775
#assert df_library_complexity["PBC2"].iloc[0] == 13.706885
pass
@pytest.mark.integration
def test_map_qc_pairedend():
# Do the same thing for paired end data
pass
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment