From 3cc1bd730822f8d72306f7493065db96cd158ae9 Mon Sep 17 00:00:00 2001 From: Beibei Chen <beibei.chen@utsouthwestern.edu> Date: Wed, 26 Apr 2017 10:19:22 -0500 Subject: [PATCH] remove unessessory files --- workflow/main.nf | 5 +-- workflow/scripts/process.py | 77 ------------------------------------- 2 files changed, 1 insertion(+), 81 deletions(-) delete mode 100644 workflow/scripts/process.py diff --git a/workflow/main.nf b/workflow/main.nf index 8425afc..bb90254 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -1,10 +1,9 @@ #!/usr/bin/env nextflow params.design="$baseDir/../test_data/samplesheet.csv" params.bams = "$baseDir/../test_data/*.bam" -// params.bais = "$baseDir/../test_data/*.bai" params.peaks = "$baseDir/../test_data/*.broadPeak" params.genomepath="/project/shared/bicf_workflow_ref/GRCh37" - toppeakcount = 200 + toppeakcount = -1 design_file = file(params.design) deeptools_design = Channel.fromPath(params.design) diffbind_design = Channel.fromPath(params.design) @@ -17,8 +16,6 @@ diffbind_bams = Channel.fromPath(params.bams) diffbind_peaks = Channel.fromPath(params.peaks) meme_peaks = Channel.fromPath(params.peaks) -// deeptools_bamindex = Channel.fromPath(params.bais) -// diffbind_bamindex = Channel.fromPath(params.bais) process bamindex { publishDir "$baseDir/output/", mode: 'copy' diff --git a/workflow/scripts/process.py b/workflow/scripts/process.py deleted file mode 100644 index cc5ab14..0000000 --- a/workflow/scripts/process.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/python -# programmer : bbc -# usage: main function to call all the procedures for chip-seq analysis -import sys -import os -import argparse as ap -import logging -import pandas as pd -import glob -import subprocess -from multiprocessing import Pool -import runDeepTools -import runMemechip -logging.basicConfig(level=10) - - -def prepare_argparser(): - description = "Make wig file for given bed using bam" - epilog = "For command line options of each command, type %(prog)% COMMAND -h" - argparser = ap.ArgumentParser(description=description, epilog = epilog) - argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input design file") - argparser.add_argument("-g","--genome",dest = "genome",type=str,required=True, help="genome", default="hg19") - argparser.add_argument("--top-peak",dest="toppeak",type=int, default=-1, help = "Only use top peaks for motif call") - #argparser.add_argument("-s","--strandtype",dest="stranded",type=str,default="none", choices=["none","reverse","yes"]) - #argparser.add_argument("-n","--name",dest="trackName",type=str,default="UserTrack",help = "track name for bedgraph header") - return(argparser) - -def memechip_wrapper(args): - #print args - runMemechip.run(*args) - -def main(): - argparser = prepare_argparser() - args = argparser.parse_args() - #dfile = pd.read_csv(args.infile) - - #for testing, add testing path to all input files - test_path = "/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/" - designfile = pd.read_csv(args.infile) - designfile['Peaks'] = designfile['Peaks'].apply(lambda x: test_path+x) - designfile['bamReads'] = designfile['bamReads'].apply(lambda x: test_path+x) - designfile['bamControl'] = designfile['bamControl'].apply(lambda x: test_path+x) - designfile.to_csv(args.infile+"_new",index=False) - dfile = pd.read_csv(args.infile+"_new") - #call deeptools - runDeepTools.run(args.infile+"_new", args.genome) - #call diffbind - this_script = os.path.abspath(__file__).split("/") - folder = "/".join(this_script[0:len(this_script)-1]) - - diffbind_command = "Rscript "+folder+"/runDiffBind.R "+args.infile+"_new" - #logging.debug(diffbind_command) - p = subprocess.Popen(diffbind_command, shell=True) - p.communicate() - #call chipseeker on original peaks and overlapping peaks - chipseeker_command = "Rscript "+folder+"/runChipseeker.R "+",".join(dfile['Peaks'].tolist())+" "+",".join(dfile['SampleID']) -#BC## logging.debug(chipseeker_command) - p = subprocess.Popen(chipseeker_command, shell=True) - p.communicate() - overlapping_peaks = glob.glob('*diffbind.bed') - overlapping_peak_names = [] - for pn in overlapping_peaks: - overlapping_peak_names.append(pn.split("_diffbind")[0].replace("!","non")) - chipseeker_overlap_command = "Rscript "+folder+"/runChipseeker.R "+",".join(overlapping_peaks)+" "+",".join(overlapping_peak_names) - p = subprocess.Popen(chipseeker_overlap_command, shell=True) - p.communicate() - #MEME-chip on all peaks - meme_arglist = zip(dfile['Peaks'].tolist(),[test_path+"hg19.2bit"]*dfile.shape[0],[str(args.toppeak)]*dfile.shape[0],dfile['SampleID'].tolist()) -#BC# #print meme_arglist - work_pool = Pool(min(12,dfile.shape[0])) - resultList = work_pool.map(memechip_wrapper, meme_arglist) - work_pool.close() - work_pool.join() - - -if __name__=="__main__": - main() -- GitLab