Holly Ruess · ec9f9468
--- a/workflow/scripts/experiment_qc.py

+ 33

− 55
+++ b/workflow/scripts/experiment_qc.py

+ 33

− 55
 @@ -35,11 +35,6 @@ def get_args():
                        help="The design file to run QC (tsv format).",
                        required=True)

-    parser.add_argument('-a', '--atac',
-                        help="True/False if ATAC-seq or ChIP-seq.",
-                        default=False,
-                        action='store_true')
-
    args = parser.parse_args()
    return args

 @@ -52,6 +47,15 @@ def check_tools():
    deeptools_path = shutil.which("deeptools")
    if deeptools_path:
        logger.info('Found deeptools: %s', deeptools_path)
+
+        # Get Version
+        deeptools_version_command = "deeptools --version"
+        deeptools_version = subprocess.check_output(deeptools_version_command, shell=True, stderr=subprocess.STDOUT)
+
+        # Write to file
+        deeptools_file = open("version_deeptools.txt", "wb")
+        deeptools_file.write(deeptools_version)
+        deeptools_file.close()
    else:
        logger.error('Missing deeptools')
        raise Exception('Missing deeptools')
 @@ -77,10 +81,10 @@ def generate_read_summary(design):
    return mbs_filename


-def check_correlation(mbs):
+def check_spearman_correlation(mbs):
    '''Check Spearman pairwise correlation of samples based on read counts.'''

-    spearman_filename = 'heatmap_SpearmanCorr.png'
+    spearman_filename = 'heatmap_SpearmanCorr.pdf'
    spearman_params = "--corMethod spearman --skipZero" + \
                    " --plotTitle \"Spearman Correlation of Read Counts\"" + \
                    " --whatToPlot heatmap --colorMap RdYlBu --plotNumbers"
 @@ -96,12 +100,31 @@ def check_correlation(mbs):
    out, err = spearman_correlation.communicate()


+def check_pearson_correlation(mbs):
+    '''Check Pearson pairwise correlation of samples based on read counts.'''
+
+    pearson_filename = 'heatmap_PearsonCorr.pdf'
+    pearson_params = "--corMethod pearson --skipZero" + \
+                    " --plotTitle \"Pearson Correlation of Read Counts\"" + \
+                    " --whatToPlot heatmap --colorMap RdYlBu --plotNumbers"
+
+    pearson_command = (
+        "plotCorrelation -in %s %s -o %s"
+        % (mbs, pearson_params, pearson_filename)
+    )
+
+    logger.info("Running deeptools with %s", pearson_command)
+
+    pearson_correlation = subprocess.Popen(pearson_command, shell=True)
+    out, err = pearson_correlation.communicate()
+
+
 def check_coverage(design):
    '''Asses the sequencing depth of samples.'''

    bam_files = ' '.join(design['bam_reads'])
    labels = ' '.join(design['sample_id'])
-    coverage_filename = 'coverage.png'
+    coverage_filename = 'coverage.pdf'
    coverage_params = "-n 1000000 --plotTitle \"Sample Coverage\"" + \
                    " --ignoreDuplicates --minMappingQuality 10"

 @@ -116,44 +139,9 @@ def check_coverage(design):
    out, err = coverage_summary.communicate()


-def update_controls(design):
-    '''Update design file to append controls list.'''
-
-    logger.info("Running control file update.")
-
-    file_dict = design[['sample_id', 'bam_reads']] \
-                .set_index('sample_id').T.to_dict()
-
-    design['control_reads'] = design['control_id'] \
-                                .apply(lambda x: file_dict[x]['bam_reads'])
-
-    logger.info("Removing rows that are there own control.")
-
-    design = design[design['control_id'] != design['sample_id']]
-
-    return design
-
-
-def check_enrichment(sample_id, control_id, sample_reads, control_reads):
-    '''Asses the enrichment per sample.'''
-
-    fingerprint_filename = sample_id + '_fingerprint.png'
-
-    fingerprint_command = (
-        "plotFingerprint -b %s %s --labels %s %s --plotFile %s"
-        % (sample_reads, control_reads, sample_id, control_id, fingerprint_filename)
-        )
-
-    logger.info("Running deeptools with %s", fingerprint_command)
-
-    fingerprint_summary = subprocess.Popen(fingerprint_command, shell=True)
-    out, err = fingerprint_summary.communicate()
-
-
 def main():
    args = get_args()
    design = args.design
-    atac = args.atac

    # Create a file handler
    handler = logging.FileHandler('experiment_qc.log')
 @@ -167,21 +155,11 @@ def main():

    # Run correlation
    mbs_filename = generate_read_summary(design_df)
-    check_correlation(mbs_filename)
+    check_spearman_correlation(mbs_filename)
+    check_pearson_correlation(mbs_filename)

    # Run coverage
    check_coverage(design_df)

-    # Run enrichment
-    if not atac:
-        new_design_df = update_controls(design_df)
-        for index, row in new_design_df.iterrows():
-            check_enrichment(
-                row['sample_id'],
-                row['control_id'],
-                row['bam_reads'],
-                row['control_reads'])
-
-
 if __name__ == '__main__':
    main()