Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • BICF/Astrocyte/chipseq_analysis
  • s190984/chipseq_analysis
  • astrocyte/workflows/bicf/chipseq_analysis
  • s219741/chipseq-analysis-containerized
Show changes
Showing
with 362 additions and 164 deletions
......@@ -4,11 +4,28 @@ profiles {
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
}
manifest {
name = 'chipseq_analysis'
description = 'BICF ChIP-seq Analysis Workflow.'
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis'
version = '1.0.6'
version = '1.1.2'
mainScript = 'main.nf'
nextflowVersion = '>=0.31.0'
}
......@@ -6,40 +6,27 @@
#* --------------------------------------------------------------------------
#*
#Currently Human or Mouse
# Load libraries
library("ChIPseeker")
# Currently mouse or human
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("TxDb.Mmusculus.UCSC.mm10.knownGene")
library("TxDb.Hsapiens.UCSC.hg38.knownGene")
library("org.Hs.eg.db")
library("org.Mm.eg.db")
library(GenomicFeatures)
# Create parser object
args <- commandArgs(trailingOnly=TRUE)
# Check input args
if (length(args) != 2) {
stop("Usage: annotate_peaks.R annotate_design.tsv genome_assembly", call.=FALSE)
if (length(args) != 3) {
stop("Usage: annotate_peaks.R annotate_design.tsv gtf geneNames", call.=FALSE)
}
design_file <- args[1]
genome_assembly <- args[2]
gtf <- args[2]
geneNames <- args[3]
# Load UCSC Known Genes
if(genome_assembly=='GRCh37') {
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
annodb <- 'org.Hs.eg.db'
} else if(genome_assembly=='GRCm38') {
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
annodb <- 'org.Mm.eg.db'
} else if(genome_assembly=='GRCh38') {
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
annodb <- 'org.Hs.eg.db'
}
txdb <- makeTxDbFromGFF(gtf)
sym <- read.table(geneNames, header=T, sep='\t') [,4:5]
# Output version of ChIPseeker
chipseeker_version = packageVersion('ChIPseeker')
......@@ -54,18 +41,19 @@ names(files) <- design$Condition
# Granges of files
peaks <- lapply(files, readPeakFile, as = "GRanges", header = FALSE)
peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, annoDb=annodb, tssRegion=c(-3000, 3000), verbose=FALSE)
peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
column_names <- c("chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
column_names <- c("geneId","chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
"pValue", "qValue", "peak", "annotation", "geneChr", "geneStart", "geneEnd",
"geneLength" ,"geneStrand", "geneId", "transcriptId", "distanceToTSS",
"ENSEMBL", "symbol", "geneName")
"geneLength" ,"geneStrand", "transcriptId", "distanceToTSS", "symbol")
for(index in c(1:length(peakAnnoList))) {
filename <- paste(names(peaks)[index], ".chipseeker_annotation.tsv", sep="")
df <- as.data.frame(peakAnnoList[[index]])
colnames(df) <- column_names
write.table(df[ , !(names(df) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)
df$geneId <- sapply(strsplit(as.character(df$geneId), split = "\\."), "[[", 1)
df_final <- merge(df, sym, by.x="geneId", by.y="ensembl", all.x=T)
colnames(df_final) <- column_names
write.table(df_final[ , !(names(df_final) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)
# Draw individual plots
......
......@@ -138,8 +138,20 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes)
with open(xcor, 'r') as xcor_fh:
firstline = xcor_fh.readline()
frag_lengths = firstline.split()[2] # third column
fragment_length = frag_lengths.split(',')[0] # grab first value
logger.info("Fraglen %s", fragment_length)
frag_lengths_array = frag_lengths.split(',')
fragment_length = 0
fragment = False
# Loop through all values of fragment length
for f in frag_lengths.split(','):
fragment_length = f
logger.info("Fraglen %s", fragment_length)
if int(fragment_length) > 50:
fragment = True
break
if fragment == False:
logger.info('Error in cross-correlation analysis: %s', frag_lengths_array)
raise Exception("Error in cross-correlation analysis: %s" % frag_lengths_array)
# Generate narrow peaks and preliminary signal tracks
......
......@@ -46,7 +46,7 @@ SOFTWARE_REGEX = {
'MEME-ChIP': ['motifSearch_vf/version_memechip.txt', r"Version (\S+)"],
'DiffBind': ['diffPeaks_vf/version_DiffBind.txt', r"Version (\S+)\""],
'deepTools': ['experimentQC_vf/version_deeptools.txt', r"deeptools (\S+)"],
'Python': ['version_python.txt', r"python, version (\S+)"],
'Python': ['version_python.txt', r"Python (\S+)"],
'MultiQC': ['version_multiqc.txt', r"multiqc, version (\S+)"],
}
......
#!/bin/bash
#plotProfile.sh
bws=`ls *.bw`
gtf=`ls *.gtf *.bed`
computeMatrix reference-point \
--referencePoint TSS \
-S $bws \
-R $gtf \
--skipZeros \
-o computeMatrix.gz
plotProfile -m computeMatrix.gz \
-out plotProfile.png \
#!/bin/bash
#plot_profile.sh
script_name="plot_profile.sh"
#Help function
usage() {
echo "-h --Help documentation for $script_name"
echo "-g --File path to gtf/bed files"
echo "Example: $script_name -g 'genome.gtf'"
exit 1
}
raise()
{
echo "${1}" >&2
}
check_tools() {
raise "
Checking for required libraries and components on this system
"
deeptools --version &> version_deeptools.txt
if [ $? -gt 0 ]
then
raise "Missing deeptools"
return 1
fi
}
compute_matrix() {
raise "
Computing matrix on ${1} using ${2}
"
computeMatrix reference-point \
--referencePoint TSS \
-S ${1} \
-R ${2} \
--skipZeros \
-o computeMatrix.gz \
-p max/2
if [ $? -gt 0 ]
then
raise "Problem building matrix"
return 1
fi
}
plot_profile() {
raise "
Plotting profile
"
plotProfile -m computeMatrix.gz \
-out plotProfile.png
if [ $? -gt 0 ]
then
raise "Problem plotting"
return 1
fi
}
run_main() {
# Parsing options
OPTIND=1 # Reset OPTIND
while getopts :g:h opt
do
case $opt in
g) gtf=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $gtf ]]; then
usage
fi
bws=$(ls *pooled.fc_signal.bw)
check_tools || exit 1
compute_matrix "${bws}" "${gtf}" || return 1
plot_profile || return 1
raise "ALL COMPLETE"
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]
then
run_main "$@"
if [ $? -gt 0 ]
then
exit 1
fi
fi
......@@ -172,26 +172,7 @@ def self_psuedoreplication(tag_file, prefix, paired):
return pseudoreplicate_dict
def main():
args = get_args()
paired = args.paired
design = args.design
cutoff_ratio = args.cutoff
# Create a file handler
handler = logging.FileHandler('experiment_generation.log')
logger.addHandler(handler)
# Read files as dataframes
design_df = pd.read_csv(design, sep='\t')
# Get current directory to build paths
cwd = os.getcwd()
# Check Number of replicates and replicates
no_reps = check_replicates(design_df)
no_unique_controls = check_controls(design_df)
def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_controls):
if no_reps == 1:
logger.info("No other replicate specified "
"so processing as an unreplicated experiment.")
......@@ -223,83 +204,43 @@ def main():
pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
pool_control = pool_control_tmp
if not replicated:
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id = design_df.at[0, 'experiment_id']
replicate = design_df.at[0, 'replicate']
design_new_df = design_df.loc[np.repeat(design_df.index, 4)].reset_index()
# Update tagAlign with single end data
if paired:
design_new_df['tag_align'] = design_new_df['se_tag_align']
design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
design_new_df['replicate'] = design_new_df['replicate'].astype(str)
design_new_df.at[1, 'sample_id'] = experiment_id + '_pr1'
design_new_df.at[1, 'replicate'] = '1_pr'
design_new_df.at[1, 'xcor'] = 'Calculate'
design_new_df.at[2, 'sample_id'] = experiment_id + '_pr2'
design_new_df.at[2, 'replicate'] = '2_pr'
design_new_df.at[2, 'xcor'] = 'Calculate'
design_new_df.at[3, 'sample_id'] = experiment_id + '_pooled'
design_new_df.at[3, 'replicate'] = 'pooled'
design_new_df.at[3, 'xcor'] = 'Calculate'
design_new_df.at[3, 'tag_align'] = design_new_df.at[0, 'tag_align']
# Make 2 self psuedoreplicates
self_pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
replicate_prefix = experiment_id + '_' + str(rep)
self_pseudoreplicates_dict = \
self_psuedoreplication(tag_file, replicate_prefix, paired)
# Update design to include new self pseudo replicates
for rep, pseudorep_file in self_pseudoreplicates_dict.items():
path_to_file = cwd + '/' + pseudorep_file
replicate = rep + 1
design_new_df.loc[replicate, 'tag_align'] = path_to_file
# Drop index column
design_new_df.drop(labels='index', axis=1, inplace=True)
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id = design_df.at[0, 'experiment_id']
replicate_files = design_df.tag_align.unique()
pool_experiment = pool(replicate_files, experiment_id + "_pooled", paired)
# Make 2 self psuedoreplicates
pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
replicate_prefix = experiment_id + '_' + str(rep)
pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
pseudoreplicates_dict[rep] = pr_dict
# Update design to include new self pseudo replicates
pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
pool_pseudoreplicates_dict = {}
for index, row in pseudoreplicates_df.iterrows():
replicate_id = index + 1
pr_filename = experiment_id + ".pr" + str(replicate_id) + '.tagAlign.gz'
pool_replicate = pool(row, pr_filename, False)
pool_pseudoreplicates_dict[replicate_id] = pool_replicate
design_new_df = design_df #.loc[np.repeat(design_df.index, 4)].reset_index()
# Update tagAlign with single end data
if paired:
design_new_df['tag_align'] = design_new_df['se_tag_align']
design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
# If paired change to single End
if paired:
pool_experiment_se = bedpe_to_tagalign(pool_experiment, experiment_id + "_pooled")
else:
# Make pool of replicates
replicate_files = design_df.tag_align.unique()
experiment_id = design_df.at[0, 'experiment_id']
pool_experiment = pool(replicate_files, experiment_id + "_pooled", paired)
# If paired change to single End
if paired:
pool_experiment_se = bedpe_to_tagalign(pool_experiment, experiment_id + "_pooled")
else:
pool_experiment_se = pool_experiment
# Make self psuedoreplicates equivalent to number of replicates
pseudoreplicates_dict = {}
for rep, tag_file in zip(design_df['replicate'], design_df['tag_align']):
replicate_prefix = experiment_id + '_' + str(rep)
pr_dict = self_psuedoreplication(tag_file, replicate_prefix, paired)
pseudoreplicates_dict[rep] = pr_dict
# Merge self psuedoreplication for each true replicate
pseudoreplicates_df = pd.DataFrame.from_dict(pseudoreplicates_dict)
pool_pseudoreplicates_dict = {}
for index, row in pseudoreplicates_df.iterrows():
replicate_id = index + 1
pr_filename = experiment_id + ".pr" + str(replicate_id) + '.tagAlign.gz'
pool_replicate = pool(row, pr_filename, False)
pool_pseudoreplicates_dict[replicate_id] = pool_replicate
design_new_df = design_df
# Update tagAlign with single end data
if paired:
design_new_df['tag_align'] = design_new_df['se_tag_align']
design_new_df.drop(labels='se_tag_align', axis=1, inplace=True)
# Check controls against cutoff_ratio
# if so replace with pool_control
# unless single control was used
pool_experiment_se = pool_experiment
# Check controls against cutoff_ratio
# if so replace with pool_control
# unless single control was used
if not single_control:
path_to_pool_control = cwd + '/' + pool_control
if control_df.values.max() > cutoff_ratio:
......@@ -327,7 +268,10 @@ def main():
path_to_control
else:
path_to_pool_control = cwd + '/' + pool_control
if paired:
path_to_pool_control = cwd + '/' + pool_control
else:
path_to_pool_control = pool_control
design_new_df['control_tag_align'] = path_to_pool_control
# Add in pseudo replicates
......@@ -349,7 +293,34 @@ def main():
tmp_metadata['tag_align'] = path_to_file
design_new_df = design_new_df.append(tmp_metadata)
return design_new_df
def main():
args = get_args()
paired = args.paired
design = args.design
cutoff_ratio = args.cutoff
# Create a file handler
handler = logging.FileHandler('experiment_generation.log')
logger.addHandler(handler)
# Read files as dataframes
design_df = pd.read_csv(design, sep='\t')
# Get current directory to build paths
cwd = os.getcwd()
# Check Number of replicates and replicates
no_reps = check_replicates(design_df)
no_unique_controls = check_controls(design_df)
# Generate new design file
design_new_df = generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_controls)
# Write out new dataframe
experiment_id = design_df.at[0, 'experiment_id']
design_new_df.to_csv(experiment_id + '_ppr.tsv',
header=True, sep='\t', index=False)
......
......@@ -103,14 +103,20 @@ def xcor(tag, paired):
uncompressed_tag_filename = tag_basename
# Subsample tagAlign file
number_reads = 15000000
number_reads = 20000000
subsampled_tag_filename = \
tag_basename + ".%d.tagAlign.gz" % (number_reads/1000000)
tag_extended = 'cat.tagAlign.gz'
out, err = utils.run_pipe([
"zcat %s %s %s" %
(tag, tag, tag)
], outfile=tag_extended)
steps = [
'zcat %s' % (tag),
'grep -v "chrM"',
'shuf -n %d --random-source=%s' % (number_reads, tag)]
'shuf -n %d --random-source=%s' % (number_reads, tag_extended)]
if paired:
steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])
......
#!/opt/bats/libexec/bats-core/ bats
profile_script="./workflow/scripts/plot_profile.sh"
@test "Test deeptools present" {
source ${profile_script}
run check_tools
}
@test "Test deeptools computeMatrix" {
source ${profile_script}
run compute_matrix test_data/ENCSR238SGC_pooled.fc_signal.bw /project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf
FILE=computeMatrix.gz
if [[ -s "$FILE" ]]; then
echo "$FILE exists and not empty"
fi
}
@test "Test deeptools plotProfile" {
source ${profile_script}
run plot_profile computeMatrix.gz
FILE=plotProfile.png
if [[ -s "$FILE" ]]; then
echo "$FILE exists and not empty"
fi
}
......@@ -25,6 +25,10 @@ def test_annotation_singleend():
annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.tsv'
assert os.path.exists(annotation_file)
assert utils.count_lines(annotation_file) >= 149284
df = pd.read_csv(annotation_file, sep = "\t", header = 0)
print(df.head())
#assert df['symbol'].notna().all()
assert not(df['symbol'].isnull().values.any())
@pytest.mark.pairedend
......@@ -41,4 +45,8 @@ def test_upsetplot_pairedend():
def test_annotation_pairedend():
annotation_file = test_output_path + 'ENCSR729LGA.chipseeker_annotation.tsv'
assert os.path.exists(annotation_file)
assert utils.count_lines(annotation_file) >= 25494
assert utils.count_lines(annotation_file) >= 25367
df = pd.read_csv(annotation_file, sep = "\t", header = 0)
print(df.head())
#assert df['symbol'].notna().all()
assert not(df['symbol'].isnull().values.any())
......@@ -3,10 +3,29 @@
import pytest
import os
import utils
from io import StringIO
import call_peaks_macs
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/callPeaksMACS/'
test_data_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../test_data/'
@pytest.mark.unit
def test_fragment_length():
experiment = "experiment.tagAlign.gz"
control = "control.tagAlign.gz"
prefix = 'test'
genome_size = 'hs'
chrom_sizes = 'genomefile.txt'
cross_qc = os.path.join(test_data_path, 'test_cross.qc')
with pytest.raises(Exception) as excinfo:
call_peaks_macs.call_peaks_macs(experiment, cross_qc, control, prefix, genome_size, chrom_sizes)
assert str(excinfo.value) == "Error in cross-correlation analysis: ['0', '20', '33']"
@pytest.mark.singleend
def test_fc_signal_singleend():
......@@ -26,7 +45,7 @@ def test_peaks_xls_singleend():
@pytest.mark.singleend
def test_peaks_bed_singleend():
peak_file = test_output_path + 'ENCSR238SGC/1/' + 'ENCLB144FDT.narrowPeak'
assert utils.count_lines(peak_file) == 227389
assert utils.count_lines(peak_file) == 226738
@pytest.mark.pairedend
......@@ -47,4 +66,4 @@ def test_peaks_xls_pairedend():
@pytest.mark.pairedend
def test_peaks_bed_pairedend():
peak_file = test_output_path + 'ENCSR729LGA/2/' + 'ENCLB568IYX.narrowPeak'
assert utils.count_lines(peak_file) == 113821
assert utils.count_lines(peak_file) == 112631
......@@ -15,7 +15,7 @@ def test_diff_peaks_singleend_single_rep():
assert os.path.isdir(test_output_path) == False
@pytest.mark.pairedend
@pytest.mark.pairedendskip_true
def test_diff_peaks_pairedend_single_rep():
assert os.path.isdir(test_output_path) == False
......@@ -71,4 +71,4 @@ def test_diffbind_pairedend_single_rep():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.bed'))
diffbind_file = test_output_path + 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.csv'
assert os.path.exists(diffbind_file)
assert utils.count_lines(diffbind_file) >= 66201
assert utils.count_lines(diffbind_file) >= 65124
......@@ -4,6 +4,7 @@ import pytest
import os
import utils
import yaml
from bs4 import BeautifulSoup
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/multiqcReport/'
......@@ -21,3 +22,14 @@ def test_software_references_output():
data_loaded = yaml.load(stream)
assert len(data_loaded['data'].split('<ul>')) == 19
@pytest.mark.singleend
def test_software_references_html():
multiqc_report = os.path.join(test_output_path, 'multiqc_report.html')
html_file = open(multiqc_report, 'r')
source_code = html_file.read()
multiqc_html = BeautifulSoup(source_code, 'html.parser')
references = multiqc_html.find(id="mqc-module-section-Software_References")
assert references is not None
assert len(references.find_all('ul')) == 18
......@@ -20,4 +20,5 @@ def test_software_versions_output():
with open(software_versions, 'r') as stream:
data_loaded = yaml.load(stream)
assert len(data_loaded['data'].split('<dt>')) == 17
assert len(data_loaded['data'].split('<dt>')) == 18
assert 'Not Run' not in data_loaded['data'].split('<dt>')[17]
......@@ -44,4 +44,10 @@ def test_overlap_peaks_singleend():
def test_overlap_peaks_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak'))
peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak'
assert utils.count_lines(peak_file) >= 25758
assert utils.count_lines(peak_file) >= 25657
@pytest.mark.singlecontrol
def test_overlap_peaks_singlecontrol():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR000DXB.rejected.narrowPeak'))
peak_file = test_output_path + 'ENCSR000DXB.replicated.narrowPeak'
assert utils.count_lines(peak_file) >= 35097
#!/usr/bin/env python3
import pytest
import os
import utils
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/experimentQC/'
@pytest.mark.singleend
def test_plot_singleend():
assert os.path.exists(os.path.join(test_output_path, 'plotProfile.png'))
assert os.path.getsize(os.path.join(test_output_path, 'plotProfile.png')) > 0
@pytest.mark.pairedend
def test_plot_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'plotProfile.png'))
assert os.path.getsize(os.path.join(test_output_path, 'plotProfile.png')) > 0
......@@ -5,16 +5,18 @@ import pandas as pd
from io import StringIO
import os
import pool_and_psuedoreplicate
import shutil
test_design_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../test_data/'
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/design/'
DESIGN_STRING = """sample_id\ttag_align\txcor\tbiosample\tfactor\ttreatment\treplicate\tcontrol_tag_align
A_1\tA_1.bedse.gz\tA_1.cc.qc\tLiver\tH3K27ac\tNone\t1\tB_1.bedse.gz
A_2\tA_2.bedse.gz\tA_2.cc.qc\tLiver\tH3K27ac\tNone\t2\tB_2.bedse.gz
DESIGN_STRING = """sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\tcontrol_tag_align
A_1\tA_1.tagAlign.gz\tA_1.bedse.gz\tA_1.cc.qc\tA\tLiver\tH3K27ac\tNone\t1\tB_1\tB_1.bedse.gz
A_2\tA_2.tagAlign.gz\tA_2.bedse.gz\tA_2.cc.qc\tA\tLiver\tH3K27ac\tNone\t2\tB_2\tB_2.bedse.gz
"""
@pytest.fixture
def design_experiment():
design_file = StringIO(DESIGN_STRING)
......@@ -31,9 +33,12 @@ def design_experiment_2(design_experiment):
@pytest.fixture
def design_experiment_3(design_experiment):
# Update second control to be same as first
design_experiment.loc[1, 'control_tag_align'] = 'B_1.bedse.gz'
return design_experiment
# Drop Replicate A_2
design_df = design_experiment.drop(design_experiment.index[1])
# Update to be paired as first
design_df.loc[0, 'control_tag_align'] = 'B_1.bedpe.gz'
design_df.loc[0, 'tag_align'] = 'A_1.bedpe.gz'
return design_df
@pytest.mark.unit
......@@ -62,7 +67,25 @@ def test_check_controls_single(design_experiment_3):
@pytest.mark.unit
def test_single_rep(design_experiment_2):
single_rep = pool_and_psuedoreplicate.main(design_experiment_2, "true", 1.2)
cwd = os.getcwd()
shutil.copy(test_design_path + 'A_1.bedse.gz', cwd)
shutil.copy(test_design_path + 'B_1.bedse.gz', cwd)
shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
assert single_rep.shape[0] == 4
assert len(single_rep['control_tag_align'].unique()) == 2
assert 'pool_control.tagAlign.gz' in single_rep['control_tag_align'].unique()[1]
@pytest.mark.unit
def test_single_control(design_experiment_3):
cwd = os.getcwd()
shutil.copy(test_design_path + 'A_1.bedpe.gz', cwd)
shutil.copy(test_design_path + 'B_1.bedpe.gz', cwd)
shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
single_control = pool_and_psuedoreplicate.generate_design('true', 1.2, design_experiment_3, cwd, 1, 1)
assert 'pool_control.tagAlign.gz' in single_control['control_tag_align'].unique()[0]
@pytest.mark.singleend
......
......@@ -17,9 +17,9 @@ def test_cross_plot_singleend():
def test_cross_qc_singleend():
qc_file = os.path.join(test_output_path,"ENCLB144FDT/ENCLB144FDT.cc.qc")
df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
assert df_xcor[2].iloc[0] == '190,200,210'
assert df_xcor[8].iloc[0] == 1.025906
assert round(df_xcor[9].iloc[0], 6) == 1.139671
assert df_xcor[2].iloc[0] == '185,195,205'
assert df_xcor[8].iloc[0] == 1.02454
assert df_xcor[9].iloc[0] == 0.8098014
@pytest.mark.pairedend
......@@ -31,6 +31,6 @@ def test_cross_qc_pairedend():
def test_cross_plot_pairedend():
qc_file = os.path.join(test_output_path,"ENCLB568IYX/ENCLB568IYX.cc.qc")
df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
assert df_xcor[2].iloc[0] == '220,430,475'
assert round(df_xcor[8].iloc[0],6) == 1.060018
assert df_xcor[9].iloc[0] == 4.099357
assert df_xcor[2].iloc[0] == '215,225,455'
assert round(df_xcor[8].iloc[0],6) == 1.056201
assert df_xcor[9].iloc[0] == 3.599357