From 249284b7db13456f9f8ceb906c59c6080d9720d7 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Thu, 26 Oct 2017 14:31:56 -0500 Subject: [PATCH] Refactor code to have consistent use of arguments and referenceing pandas. --- workflow/scripts/check_design.py | 21 ++++++++++++--------- workflow/scripts/experiment_design.py | 11 ++++++----- workflow/scripts/experiment_qc.py | 13 +++++++------ workflow/scripts/trim_reads.py | 6 ++++-- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/workflow/scripts/check_design.py b/workflow/scripts/check_design.py index 6bd822f..a900c3b 100644 --- a/workflow/scripts/check_design.py +++ b/workflow/scripts/check_design.py @@ -21,7 +21,7 @@ logger.setLevel(logging.INFO) def get_args(): '''Define arguments.''' - + parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -137,23 +137,26 @@ def check_files(design, fastq, paired): def main(): args = get_args() + design = args.design + fastq = args.fastq + paired = args.paired # Create a file handler handler = logging.FileHandler('design.log') logger.addHandler(handler) - # Read files - design_file = pd.read_csv(args.design, sep='\t') - fastq_file = pd.read_csv(args.fastq, sep='\t', names=['name', 'path']) + # Read files as dataframes + design_df = pd.read_csv(args.design, sep='\t') + fastq_df = pd.read_csv(args.fastq, sep='\t', names=['name', 'path']) # Check design file - check_design_headers(design_file, args.paired) - check_controls(design_file) - check_replicates(design_file) - new_design = check_files(design_file, fastq_file, args.paired) + check_design_headers(design_df, paired) + check_controls(design_df) + check_replicates(design_df) + new_design_df = check_files(design_file, fastq_df, paired) # Write out new design file - new_design.to_csv('design.tsv', header=True, sep='\t', index=False) + new_design_df.to_csv('design.tsv', header=True, sep='\t', index=False) if __name__ == '__main__': diff --git a/workflow/scripts/experiment_design.py b/workflow/scripts/experiment_design.py index 3d6697a..f527b46 100644 --- a/workflow/scripts/experiment_design.py +++ b/workflow/scripts/experiment_design.py @@ -21,7 +21,7 @@ logger.setLevel(logging.INFO) def get_args(): '''Define arguments.''' - + parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -64,19 +64,20 @@ def make_experiment_design(design): def main(): args = get_args() + design = args.design # Create a file handler handler = logging.FileHandler('experiment_generation.log') logger.addHandler(handler) - # Read files - design_file = pd.read_csv(args.design, sep='\t') + # Read files as dataframes + design_df = pd.read_csv(design, sep='\t') # Update design file for check_controls - new_design = update_controls(design_file) + new_design_df = update_controls(design_df) # write out experiment design files - make_experiment_design(new_design) + make_experiment_design(new_design_df) if __name__ == '__main__': diff --git a/workflow/scripts/experiment_qc.py b/workflow/scripts/experiment_qc.py index 711da18..6f63b52 100644 --- a/workflow/scripts/experiment_qc.py +++ b/workflow/scripts/experiment_qc.py @@ -25,7 +25,7 @@ logger.setLevel(logging.INFO) def get_args(): '''Define arguments.''' - + parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -146,6 +146,7 @@ def check_enrichment(sample_id, control_id, sample_reads, control_reads): def main(): args = get_args() + design = args.design # Create a file handler handler = logging.FileHandler('experiment_qc.log') @@ -155,18 +156,18 @@ def main(): check_tools() # Read files - design_file = pd.read_csv(args.design, sep='\t') + design_df = pd.read_csv(design, sep='\t') # Run correlation - mbs_filename = generate_read_summary(design_file) + mbs_filename = generate_read_summary(design_df) check_correlation(mbs_filename) # Run coverage - check_coverage(design_file) + check_coverage(design_df) # Run enrichment - new_design = update_controls(design_file) - for index, row in new_design.iterrows(): + new_design_df = update_controls(design_df) + for index, row in new_design_df.iterrows(): check_enrichment( row['sample_id'], row['control_id'], diff --git a/workflow/scripts/trim_reads.py b/workflow/scripts/trim_reads.py index 2f7f3e5..036c5f7 100644 --- a/workflow/scripts/trim_reads.py +++ b/workflow/scripts/trim_reads.py @@ -22,7 +22,7 @@ logger.setLevel(logging.INFO) def get_args(): '''Define arguments.''' - + parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -81,6 +81,8 @@ def trim_reads(fastq, paired): def main(): args = get_args() + fastq = args.fastq + paired = args.paired # Create a file handler handler = logging.FileHandler('trim.log') @@ -90,7 +92,7 @@ def main(): check_tools() # Run trim_reads - trim_reads(args.fastq, args.paired) + trim_reads(fastq, paired) if __name__ == '__main__': -- GitLab