Skip to content
Snippets Groups Projects

Resolve "Fix Experiment QC"

Merged Holly Ruess requested to merge 17-FixExperimentQC into master
Compare and
6 files
+ 71
75
Preferences
File browser
Compare changes
@@ -35,11 +35,6 @@ def get_args():
help="The design file to run QC (tsv format).",
required=True)
parser.add_argument('-a', '--atac',
help="True/False if ATAC-seq or ChIP-seq.",
default=False,
action='store_true')
args = parser.parse_args()
return args
@@ -52,6 +47,15 @@ def check_tools():
deeptools_path = shutil.which("deeptools")
if deeptools_path:
logger.info('Found deeptools: %s', deeptools_path)
# Get Version
deeptools_version_command = "deeptools --version"
deeptools_version = subprocess.check_output(deeptools_version_command, shell=True, stderr=subprocess.STDOUT)
# Write to file
deeptools_file = open("version_deeptools.txt", "wb")
deeptools_file.write(deeptools_version)
deeptools_file.close()
else:
logger.error('Missing deeptools')
raise Exception('Missing deeptools')
@@ -77,10 +81,10 @@ def generate_read_summary(design):
return mbs_filename
def check_correlation(mbs):
def check_spearman_correlation(mbs):
'''Check Spearman pairwise correlation of samples based on read counts.'''
spearman_filename = 'heatmap_SpearmanCorr.png'
spearman_filename = 'heatmap_SpearmanCorr.pdf'
spearman_params = "--corMethod spearman --skipZero" + \
" --plotTitle \"Spearman Correlation of Read Counts\"" + \
" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers"
@@ -96,12 +100,31 @@ def check_correlation(mbs):
out, err = spearman_correlation.communicate()
def check_pearson_correlation(mbs):
'''Check Pearson pairwise correlation of samples based on read counts.'''
pearson_filename = 'heatmap_PearsonCorr.pdf'
pearson_params = "--corMethod pearson --skipZero" + \
" --plotTitle \"Pearson Correlation of Read Counts\"" + \
" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers"
pearson_command = (
"plotCorrelation -in %s %s -o %s"
% (mbs, pearson_params, pearson_filename)
)
logger.info("Running deeptools with %s", pearson_command)
pearson_correlation = subprocess.Popen(pearson_command, shell=True)
out, err = pearson_correlation.communicate()
def check_coverage(design):
'''Asses the sequencing depth of samples.'''
bam_files = ' '.join(design['bam_reads'])
labels = ' '.join(design['sample_id'])
coverage_filename = 'coverage.png'
coverage_filename = 'coverage.pdf'
coverage_params = "-n 1000000 --plotTitle \"Sample Coverage\"" + \
" --ignoreDuplicates --minMappingQuality 10"
@@ -116,44 +139,9 @@ def check_coverage(design):
out, err = coverage_summary.communicate()
def update_controls(design):
'''Update design file to append controls list.'''
logger.info("Running control file update.")
file_dict = design[['sample_id', 'bam_reads']] \
.set_index('sample_id').T.to_dict()
design['control_reads'] = design['control_id'] \
.apply(lambda x: file_dict[x]['bam_reads'])
logger.info("Removing rows that are there own control.")
design = design[design['control_id'] != design['sample_id']]
return design
def check_enrichment(sample_id, control_id, sample_reads, control_reads):
'''Asses the enrichment per sample.'''
fingerprint_filename = sample_id + '_fingerprint.png'
fingerprint_command = (
"plotFingerprint -b %s %s --labels %s %s --plotFile %s"
% (sample_reads, control_reads, sample_id, control_id, fingerprint_filename)
)
logger.info("Running deeptools with %s", fingerprint_command)
fingerprint_summary = subprocess.Popen(fingerprint_command, shell=True)
out, err = fingerprint_summary.communicate()
def main():
args = get_args()
design = args.design
atac = args.atac
# Create a file handler
handler = logging.FileHandler('experiment_qc.log')
@@ -167,21 +155,11 @@ def main():
# Run correlation
mbs_filename = generate_read_summary(design_df)
check_correlation(mbs_filename)
check_spearman_correlation(mbs_filename)
check_pearson_correlation(mbs_filename)
# Run coverage
check_coverage(design_df)
# Run enrichment
if not atac:
new_design_df = update_controls(design_df)
for index, row in new_design_df.iterrows():
check_enrichment(
row['sample_id'],
row['control_id'],
row['bam_reads'],
row['control_reads'])
if __name__ == '__main__':
main()