Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • BICF/Astrocyte/chipseq_analysis
  • s190984/chipseq_analysis
  • astrocyte/workflows/bicf/chipseq_analysis
  • s219741/chipseq-analysis-containerized
Show changes
#!/bin/bash
#plotProfile.sh
bws=$(ls *.bw)
gtf=$(ls *.gtf *.bed)
computeMatrix reference-point \
--referencePoint TSS \
-S $bws \
-R $gtf \
--skipZeros \
-o computeMatrix.gz
-p max/2
plotProfile -m computeMatrix.gz \
-out plotProfile.png \
#!/bin/bash
#plot_profile.sh
script_name="plot_profile.sh"
#Help function
usage() {
echo "-h --Help documentation for $script_name"
echo "-g --File path to gtf/bed files"
echo "Example: $script_name -g 'genome.gtf'"
exit 1
}
raise()
{
echo "${1}" >&2
}
check_tools() {
raise "
Checking for required libraries and components on this system
"
deeptools --version &> version_deeptools.txt
if [ $? -gt 0 ]
then
raise "Missing deeptools"
return 1
fi
}
compute_matrix() {
raise "
Computing matrix on ${1} using ${2}
"
computeMatrix reference-point \
--referencePoint TSS \
-S ${1} \
-R ${2} \
--skipZeros \
-o computeMatrix.gz \
-p max/2
if [ $? -gt 0 ]
then
raise "Problem building matrix"
return 1
fi
}
plot_profile() {
raise "
Plotting profile
"
plotProfile -m computeMatrix.gz \
-out plotProfile.png
if [ $? -gt 0 ]
then
raise "Problem plotting"
return 1
fi
}
run_main() {
# Parsing options
OPTIND=1 # Reset OPTIND
while getopts :g:h opt
do
case $opt in
g) gtf=$OPTARG;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $gtf ]]; then
usage
fi
bws=$(ls *pooled.fc_signal.bw)
check_tools || exit 1
compute_matrix "${bws}" "${gtf}" || return 1
plot_profile || return 1
raise "ALL COMPLETE"
}
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]
then
run_main "$@"
if [ $? -gt 0 ]
then
exit 1
fi
fi
......@@ -204,6 +204,7 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
pool_control_tmp = bedpe_to_tagalign(pool_control, "pool_control")
pool_control = pool_control_tmp
# Duplicate rows and update for pool and psuedoreplicates and update tagAlign with single end data
experiment_id = design_df.at[0, 'experiment_id']
replicate_files = design_df.tag_align.unique()
......@@ -237,9 +238,9 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
else:
pool_experiment_se = pool_experiment
# Check controls against cutoff_ratio
# if so replace with pool_control
# unless single control was used
# Check controls against cutoff_ratio
# if so replace with pool_control
# unless single control was used
if not single_control:
path_to_pool_control = cwd + '/' + pool_control
if control_df.values.max() > cutoff_ratio:
......@@ -267,7 +268,10 @@ def generate_design(paired, cutoff_ratio, design_df, cwd, no_reps, no_unique_con
path_to_control
else:
path_to_pool_control = cwd + '/' + pool_control
if paired:
path_to_pool_control = cwd + '/' + pool_control
else:
path_to_pool_control = pool_control
design_new_df['control_tag_align'] = path_to_pool_control
# Add in pseudo replicates
......@@ -306,7 +310,7 @@ def main():
design_df = pd.read_csv(design, sep='\t')
# Get current directory to build paths
cwd = os.getcwd()
cwd = os.getcwd()
# Check Number of replicates and replicates
no_reps = check_replicates(design_df)
......
#!/opt/bats/libexec/bats-core/ bats
profile_script="./workflow/scripts/plot_profile.sh"
@test "Test deeptools present" {
source ${profile_script}
run check_tools
}
@test "Test deeptools computeMatrix" {
source ${profile_script}
run compute_matrix test_data/ENCSR238SGC_pooled.fc_signal.bw /project/shared/bicf_workflow_ref/mouse/GRCm38/gencode.vM20.annotation.gtf
FILE=computeMatrix.gz
if [[ -s "$FILE" ]]; then
echo "$FILE exists and not empty"
fi
}
@test "Test deeptools plotProfile" {
source ${profile_script}
run plot_profile computeMatrix.gz
FILE=plotProfile.png
if [[ -s "$FILE" ]]; then
echo "$FILE exists and not empty"
fi
}
......@@ -25,6 +25,10 @@ def test_annotation_singleend():
annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.tsv'
assert os.path.exists(annotation_file)
assert utils.count_lines(annotation_file) >= 149284
df = pd.read_csv(annotation_file, sep = "\t", header = 0)
print(df.head())
#assert df['symbol'].notna().all()
assert not(df['symbol'].isnull().values.any())
@pytest.mark.pairedend
......@@ -42,3 +46,7 @@ def test_annotation_pairedend():
annotation_file = test_output_path + 'ENCSR729LGA.chipseeker_annotation.tsv'
assert os.path.exists(annotation_file)
assert utils.count_lines(annotation_file) >= 25367
df = pd.read_csv(annotation_file, sep = "\t", header = 0)
print(df.head())
#assert df['symbol'].notna().all()
assert not(df['symbol'].isnull().values.any())
......@@ -3,10 +3,29 @@
import pytest
import os
import utils
from io import StringIO
import call_peaks_macs
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/callPeaksMACS/'
test_data_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../test_data/'
@pytest.mark.unit
def test_fragment_length():
experiment = "experiment.tagAlign.gz"
control = "control.tagAlign.gz"
prefix = 'test'
genome_size = 'hs'
chrom_sizes = 'genomefile.txt'
cross_qc = os.path.join(test_data_path, 'test_cross.qc')
with pytest.raises(Exception) as excinfo:
call_peaks_macs.call_peaks_macs(experiment, cross_qc, control, prefix, genome_size, chrom_sizes)
assert str(excinfo.value) == "Error in cross-correlation analysis: ['0', '20', '33']"
@pytest.mark.singleend
def test_fc_signal_singleend():
......
......@@ -71,4 +71,4 @@ def test_diffbind_pairedend_single_rep():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.bed'))
diffbind_file = test_output_path + 'ENCSR729LGA_vs_ENCSR757EMK_diffbind.csv'
assert os.path.exists(diffbind_file)
assert utils.count_lines(diffbind_file) >= 65182
assert utils.count_lines(diffbind_file) >= 65124
......@@ -21,3 +21,4 @@ def test_software_versions_output():
data_loaded = yaml.load(stream)
assert len(data_loaded['data'].split('<dt>')) == 18
assert 'Not Run' not in data_loaded['data'].split('<dt>')[17]
......@@ -45,3 +45,9 @@ def test_overlap_peaks_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR729LGA.rejected.narrowPeak'))
peak_file = test_output_path + 'ENCSR729LGA.replicated.narrowPeak'
assert utils.count_lines(peak_file) >= 25657
@pytest.mark.singlecontrol
def test_overlap_peaks_singlecontrol():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR000DXB.rejected.narrowPeak'))
peak_file = test_output_path + 'ENCSR000DXB.replicated.narrowPeak'
assert utils.count_lines(peak_file) >= 35097
......@@ -11,8 +11,9 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend
def test_plot_singleend():
assert os.path.exists(os.path.join(test_output_path, 'plotProfile.png'))
assert os.path.getsize(os.path.join(test_output_path, 'plotProfile.png')) > 0
@pytest.mark.pairedend
def test_plot_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'computeMatrix.gz'))
assert os.path.exists(os.path.join(test_output_path, 'plotProfile.png'))
assert os.path.getsize(os.path.join(test_output_path, 'plotProfile.png')) > 0
......@@ -33,9 +33,12 @@ def design_experiment_2(design_experiment):
@pytest.fixture
def design_experiment_3(design_experiment):
# Update second control to be same as first
design_experiment.loc[1, 'control_tag_align'] = 'B_1.bedse.gz'
return design_experiment
# Drop Replicate A_2
design_df = design_experiment.drop(design_experiment.index[1])
# Update to be paired as first
design_df.loc[0, 'control_tag_align'] = 'B_1.bedpe.gz'
design_df.loc[0, 'tag_align'] = 'A_1.bedpe.gz'
return design_df
@pytest.mark.unit
......@@ -71,6 +74,19 @@ def test_single_rep(design_experiment_2):
shutil.copy(test_design_path + 'B_1.tagAlign.gz', cwd)
single_rep = pool_and_psuedoreplicate.generate_design('false', 1.2, design_experiment_2, cwd, 1, 1)
assert single_rep.shape[0] == 4
assert len(single_rep['control_tag_align'].unique()) == 2
assert 'pool_control.tagAlign.gz' in single_rep['control_tag_align'].unique()[1]
@pytest.mark.unit
def test_single_control(design_experiment_3):
cwd = os.getcwd()
shutil.copy(test_design_path + 'A_1.bedpe.gz', cwd)
shutil.copy(test_design_path + 'B_1.bedpe.gz', cwd)
shutil.copy(test_design_path + 'A_1.tagAlign.gz', cwd)
single_control = pool_and_psuedoreplicate.generate_design('true', 1.2, design_experiment_3, cwd, 1, 1)
assert 'pool_control.tagAlign.gz' in single_control['control_tag_align'].unique()[0]
@pytest.mark.singleend
def test_pool_and_psuedoreplicate_singleend():
......