diff --git a/astrocyte_package.yml b/astrocyte_package.yml index 9e99a113ca25306a06e095e5ba723fb6e5474790..6bc210a2bbdf06636b039605241f74f38d8a6c9e 100644 --- a/astrocyte_package.yml +++ b/astrocyte_package.yml @@ -40,7 +40,8 @@ documentation_files: # Specify versioned module names to ensure reproducability. workflow_modules: - 'deeptools/2.3.5' - - 'homer/4.7' + - 'meme/4.11.1-gcc-openmpi' + # A list of parameters used by the workflow, defining how to present them, # options etc in the web interface. For each parameter: # diff --git a/workflow/main.nf b/workflow/main.nf index ffdf4d8d5feedb6bf751ae3199574e3c520d9344..3865d90b5ab384f974670621b12da751abe12033 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -2,8 +2,8 @@ // Default parameter values to run tests // params.bams="$baseDir/../test/*.bam" -params.design="$baseDir/../test/samplesheet.csv" -// params.genome="/project/shared/bicf_workflow_ref/GRCh37/" + params.design="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/samplesheet.csv" + params.genome="/project/shared/bicf_workflow_ref/GRCh37/" // design_file = file(params.design) // bams=file(params.bams) @@ -13,21 +13,19 @@ params.design="$baseDir/../test/samplesheet.csv" -process peakanno { +process run_chipseq_analysis { // publishDir "$baseDir/output", mode: 'copy' // input: // file design_file from input // file annotation Tdx output: stdout result -// set peak_id, file("${pattern}_annotation.xls"), file("${pattern}_peakTssDistribution.png") into peakanno script: """ module load python/2.7.x-anaconda - module load R/3.2.1-intel - module load deeptools/2.5.3 - python $baseDir/scripts/process.py - #Rscript /project/BICF/BICF_Core/bchen4/chipseq_analysis/workflow/scripts/runchipseeker.R + module load meme/4.11.1-gcc-openmpi + cat $params.design + python $baseDir/scripts/process.py -i params.design -g params.genome --top-peak 100 """ } diff --git a/workflow/scripts/runDeepTools.py b/workflow/scripts/runDeepTools.py index 81e02b45127096b5a40e45655d8eab15f42575c0..54ee54de0cf873230140ad5d31838f46a2302754 100644 --- a/workflow/scripts/runDeepTools.py +++ b/workflow/scripts/runDeepTools.py @@ -24,22 +24,22 @@ def prepare_argparser(): def run_qc(files, controls, labels): mbs_command = "multiBamSummary bins --bamfiles "+' '.join(files)+" -out sample_mbs.npz" - #p = subprocess.Popen(mbs_command, shell=True) + p = subprocess.Popen(mbs_command, shell=True) #logging.debug(mbs_command) - #p.communicate() - pcor_command = "plotCorrelation -in sample_mbs.npz --corMethod spearman --skipZeros --plotTitle \"Spearman Correlation of Read Counts\" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers --outFileCorMatrix experiment.deeptools.spearmanCorr_readCounts.tab -o experiment.deeptools.heatmap_spearmanCorr_readCounts_v2.png --labels "+" ".join(labels) + p.communicate() + pcor_command = "plotCorrelation -in sample_mbs.npz --corMethod spearman --skipZeros --plotTitle \"Spearman Correlation of Read Counts\" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers -o experiment.deeptools.heatmap_spearmanCorr_readCounts_v2.png --labels "+" ".join(labels) #logging.debug(pcor_command) - #p = subprocess.Popen(pcor_command, shell=True) - #p.communicate() + p = subprocess.Popen(pcor_command, shell=True) + p.communicate() #plotCoverage pcov_command = "plotCoverage -b "+" ".join(files)+" --plotFile experiment.deeptools_coverage.png -n 1000000 --plotTitle \"sample coverage\" --ignoreDuplicates --minMappingQuality 10" p = subprocess.Popen(pcov_command, shell=True) p.communicate() #draw fingerprints plots - #for treat,ctrl,name in zip(files,controls,labels): - # fp_command = "plotFingerprint -b "+treat+" "+ctrl+" --labels "+name+" control --plotFile "+name+".deeptools_fingerprints.png" - # p = subprocess.Popen(fp_command, shell=True) - # p.communicate() + for treat,ctrl,name in zip(files,controls,labels): + fp_command = "plotFingerprint -b "+treat+" "+ctrl+" --labels "+name+" control --plotFile "+name+".deeptools_fingerprints.png" + p = subprocess.Popen(fp_command, shell=True) + p.communicate() def bam2bw_wrapper(command): p = subprocess.Popen(command, shell=True) @@ -57,8 +57,8 @@ def run_signal(files, labels, genome): work_pool.join() cm_command = "computeMatrix scale-regions -R "+gene_bed+" -a 3000 -b 3000 --regionBodyLength 5000 --skipZeros -S *.bw -o samples.deeptools_generegionscalematrix.gz" - #p = subprocess.Popen(cm_command, shell=True) - #p.communicate() + p = subprocess.Popen(cm_command, shell=True) + p.communicate() hm_command = "plotHeatmap -m samples.deeptools_generegionscalematrix.gz -out samples.deeptools_readsHeatmap.png" p = subprocess.Popen(hm_command, shell=True) p.communicate() @@ -67,7 +67,7 @@ def run(dfile,genome): #parse dfile, suppose data files are the same folder as design file dfile = pd.read_csv(dfile) #QC: multiBamSummary and plotCorrelation - #run_qc(dfile['bamReads'], dfile['bamControl'], dfile['SampleID']) + run_qc(dfile['bamReads'], dfile['bamControl'], dfile['SampleID']) #signal plots run_signal(dfile['bamReads'],dfile['SampleID'],genome)