Skip to content
Snippets Groups Projects
Commit a6ac0e3f authored by Beibei Chen's avatar Beibei Chen
Browse files

deeptools qc works

parent f14f792c
Branches
Tags
No related merge requests found
#!/usr/bin/env nextflow
// Default parameter values to run tests
params.bams="$baseDir/../test/*.bam"
// params.bams="$baseDir/../test/*.bam"
params.design="$baseDir/../test/samplesheet.csv"
#params.genome="/project/shared/bicf_workflow_ref/GRCh37/"
// params.genome="/project/shared/bicf_workflow_ref/GRCh37/"
design_file = file(params.design)
bams=file(params.bams)
#gtf_file = file("$params.genome/gencode.gtf")
#genenames = file("$params.genome/genenames.txt")
#geneset = file("$params.genome/gsea_gmt/$params.geneset")
// design_file = file(params.design)
// bams=file(params.bams)
//gtf_file = file("$params.genome/gencode.gtf")
//genenames = file("$params.genome/genenames.txt")
//geneset = file("$params.genome/gsea_gmt/$params.geneset")
// Pair handling, helper function taken from rnatoy
// which is covered by the GNU General Public License v3
// https://github.com/nextflow-io/rnatoy/blob/master/main.nf
def fileMap = [:]
fastqs.each {
final fileName = it.getFileName().toString()
prefix = fileName.lastIndexOf('/')
fileMap[fileName] = it
}
def prefix = []
new File(params.design).withReader { reader ->
def hline = reader.readLine()
def header = hline.split(",")
prefixidx = header.findIndexOf{it == 'Condition'};
peakidx = header.findIndexOf{it == 'Peaks'};
bamipidx = header.findIndexOf{it == 'bamReads'};
bamctrlidx = header.findIndexOf{it == 'bamControl'};
if (bamctrlidx == -1) {
error "Must provide control BAM file"
}
if (peakidx == -1) {
error "Must provide peak file"
}
while (line = reader.readLine()) {
def row = line.split(",")
if (fileMap.get(row[bamipidx]) != null) {
prefix << tuple(row[prefixidx],fileMap.get(row[peakidx]),fileMap.get(row[bamipidx]),fileMap.get(row[bamctrlidx]))
}
}
}
if( ! prefix) { error "Didn't match any input files with entries in the design file" }
if (params.pairs == 'pe') {
Channel
.from(prefix)
.set { read_pe }
Channel
.empty()
.set { read_se }
}
if (params.pairs == 'se') {
Channel
.from(prefix)
.into { read_se }
Channel
.empty()
.set { read_pe }
}
process peakanno {
publishDir "$baseDir/output", mode: 'copy'
input:
file peak_file from greencenter
file design_file from input
file annotation Tdx
output:
set peak_id, file("${pattern}_annotation.xls"), file("${pattern}_peakTssDistribution.png") into peakanno
"""
module load R/3.2.1-intel
Rscript $baseDir/scripts/runchipseeker.R
"""
}
//Run deeptools for QC and other plots
//Since this problem also need all input files, need to build another channel?
process deeptools {
publishDir "$baseDir/output", mode: 'copy'
input:
file peak_file from input
file bam_file from imput
// publishDir "$baseDir/output", mode: 'copy'
// input:
// file design_file from input
// file annotation Tdx
output:
set
stdout result
// set peak_id, file("${pattern}_annotation.xls"), file("${pattern}_peakTssDistribution.png") into peakanno
script:
"""
module load R/3.2.1-intel
python $baseDir/scripts/process.py
#Rscript /project/BICF/BICF_Core/bchen4/chipseq_analysis/workflow/scripts/runchipseeker.R
"""
}
//Need to do it with more than 1 condition
process diffbind {
publishDir "$baseDir/output", mode: 'copy'
input:
file peak_file from input
file design_file name 'design.txt'
file bam_file from input
output:
file "*.txt" into txtfiles
file "*.png" into psfiles
file "*"
"""
module load R/3.2.1-intel
#cp design.txt design.shiny.txt
#cp geneset.gmt geneset.shiny.gmt
Rscript $baseDir/scripts/runDiffBind.R
"""
}
process buildrda {
publishDir "$baseDir/output", mode: 'copy'
input:
file stringtie_dir from strcts.toList()
file design_file name 'design.txt'
output:
file "bg.rda" into rdafiles
"""
module load R/3.2.1-intel
Rscript $baseDir/scripts/build_ballgown.R *_stringtie
"""
}
#!/usr/bin/python
# programmer : bbc
# usage:
import sys
import argparse as ap
import logging
import subprocess
import pandas as pd
from multiprocessing import Pool
logging.basicConfig(level=10)
def prepare_argparser():
description = "Make wig file for given bed using bam"
epilog = "For command line options of each command, type %(prog)% COMMAND -h"
argparser = ap.ArgumentParser(description=description, epilog = epilog)
argparser.add_argument("-i","--input",dest = "infile",type=str,required=True, help="input BAM file")
argparser.add_argument("-g","--genome",dest = "genome",type=str,required=True, help="genome", default="hg19")
#argparser.add_argument("-b","--bed",dest="bedfile",type=str,required=True, help = "Gene locus in bed format")
#argparser.add_argument("-s","--strandtype",dest="stranded",type=str,default="none", choices=["none","reverse","yes"])
#argparser.add_argument("-n","--name",dest="trackName",type=str,default="UserTrack",help = "track name for bedgraph header")
return(argparser)
def run_qc(files, controls, labels):
mbs_command = "multiBamSummary bins --bamfiles "+' '.join(files)+" -out sample_mbs.npz"
#p = subprocess.Popen(mbs_command, shell=True)
#logging.debug(mbs_command)
#p.communicate()
pcor_command = "plotCorrelation -in sample_mbs.npz --corMethod spearman --skipZeros --plotTitle \"Spearman Correlation of Read Counts\" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers --outFileCorMatrix experiment.deeptools.spearmanCorr_readCounts.tab -o experiment.deeptools.heatmap_spearmanCorr_readCounts_v2.png --labels "+" ".join(labels)
#logging.debug(pcor_command)
#p = subprocess.Popen(pcor_command, shell=True)
#p.communicate()
#plotCoverage
pcov_command = "plotCoverage -b "+" ".join(files)+" --plotFile experiment.deeptools_coverage.png -n 1000000 --plotTitle \"sample coverage\" --ignoreDuplicates --minMappingQuality 10"
p = subprocess.Popen(pcov_command, shell=True)
p.communicate()
#draw fingerprints plots
#for treat,ctrl,name in zip(files,controls,labels):
# fp_command = "plotFingerprint -b "+treat+" "+ctrl+" --labels "+name+" control --plotFile "+name+".deeptools_fingerprints.png"
# p = subprocess.Popen(fp_command, shell=True)
# p.communicate()
def bam2bw_wrapper(command):
p = subprocess.Popen(command, shell=True)
p.communicate()
def run_signal(files, labels, genome):
#compute matrix and draw profile and heatmap
gene_bed = "/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/genome/"+genome+"/gene.bed"
bw_commands = []
for f in files:
bw_commands.append("bamCoverage -b "+f+" -o "+f.replace("bam","bw"))
work_pool = Pool(min(len(files), 12))
work_pool.map(bam2bw_wrapper, bw_commands)
work_pool.close()
work_pool.join()
#cm_command = "computeMatrix scale-regions "
def run(dfile,genome):
#Get annotation address
tss_bed = "/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/genome/"+genome+"/tss.bed"
#parse dfile, suppose data files are the same folder as design file
dfile = pd.read_csv(dfile)
#QC: multiBamSummary and plotCorrelation
run_qc(dfile['bamReads'], dfile['bamControl'], dfile['SampleID'])
def main():
argparser = prepare_argparser()
args = argparser.parse_args()
run(args.infile, args.genome)
if __name__=="__main__":
main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment