Skip to content
Snippets Groups Projects
Commit b20526ff authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch 'master' into '22_output_dir'

# Conflicts:
#   workflow/main.nf
parents 6cbc074d b242be87
No related merge requests found
...@@ -14,6 +14,7 @@ params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ? ...@@ -14,6 +14,7 @@ params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.cutoffRatio = 1.2 params.cutoffRatio = 1.2
params.outDir= "$outDir" params.outDir= "$outDir"
params.extendReadsLen = 100
// Check inputs // Check inputs
if( params.bwaIndex ){ if( params.bwaIndex ){
...@@ -38,6 +39,7 @@ genomeSize = params.genomeSize ...@@ -38,6 +39,7 @@ genomeSize = params.genomeSize
chromSizes = params.chromSizes chromSizes = params.chromSizes
cutoffRatio = params.cutoffRatio cutoffRatio = params.cutoffRatio
outDir = params.outDir outDir = params.outDir
extendReadsLen = params.extendReadsLen
process checkDesignFile { process checkDesignFile {
...@@ -195,7 +197,7 @@ process experimentQC { ...@@ -195,7 +197,7 @@ process experimentQC {
script: script:
""" """
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign -e $extendReadsLen
""" """
} }
......
...@@ -34,6 +34,11 @@ def get_args(): ...@@ -34,6 +34,11 @@ def get_args():
help="The design file to run QC (tsv format).", help="The design file to run QC (tsv format).",
required=True) required=True)
parser.add_argument('-e', '--extension',
help="Number of base pairs to extend the reads",
type=int,
required=True)
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -51,7 +56,7 @@ def check_tools(): ...@@ -51,7 +56,7 @@ def check_tools():
raise Exception('Missing deeptools') raise Exception('Missing deeptools')
def generate_read_summary(design): def generate_read_summary(design, extension):
'''Generate summary of data based on read counts.''' '''Generate summary of data based on read counts.'''
bam_files = ' '.join(design['bam_reads']) bam_files = ' '.join(design['bam_reads'])
...@@ -59,8 +64,8 @@ def generate_read_summary(design): ...@@ -59,8 +64,8 @@ def generate_read_summary(design):
mbs_filename = 'sample_mbs.npz' mbs_filename = 'sample_mbs.npz'
mbs_command = ( mbs_command = (
"multiBamSummary bins -p %d --bamfiles %s --labels %s -out %s" "multiBamSummary bins -p %d --bamfiles %s --extendReads %d --labels %s -out %s"
% (cpu_count(), bam_files, labels, mbs_filename) % (cpu_count(), bam_files, extension, labels, mbs_filename)
) )
logger.info("Running deeptools with %s", mbs_command) logger.info("Running deeptools with %s", mbs_command)
...@@ -90,7 +95,7 @@ def check_correlation(mbs): ...@@ -90,7 +95,7 @@ def check_correlation(mbs):
out, err = spearman_correlation.communicate() out, err = spearman_correlation.communicate()
def check_coverage(design): def check_coverage(design, extension):
'''Asses the sequencing depth of samples.''' '''Asses the sequencing depth of samples.'''
bam_files = ' '.join(design['bam_reads']) bam_files = ' '.join(design['bam_reads'])
...@@ -100,8 +105,8 @@ def check_coverage(design): ...@@ -100,8 +105,8 @@ def check_coverage(design):
" --ignoreDuplicates --minMappingQuality 10" " --ignoreDuplicates --minMappingQuality 10"
coverage_command = ( coverage_command = (
"plotCoverage -b %s --labels %s %s --plotFile %s" "plotCoverage -b %s --extendReads %d --labels %s %s --plotFile %s"
% (bam_files, labels, coverage_params, coverage_filename) % (bam_files, extension, labels, coverage_params, coverage_filename)
) )
logger.info("Running deeptools with %s", coverage_command) logger.info("Running deeptools with %s", coverage_command)
...@@ -128,14 +133,14 @@ def update_controls(design): ...@@ -128,14 +133,14 @@ def update_controls(design):
return design return design
def check_enrichment(sample_id, control_id, sample_reads, control_reads): def check_enrichment(sample_id, control_id, sample_reads, control_reads, extension):
'''Asses the enrichment per sample.''' '''Asses the enrichment per sample.'''
fingerprint_filename = sample_id + '_fingerprint.png' fingerprint_filename = sample_id + '_fingerprint.png'
fingerprint_command = ( fingerprint_command = (
"plotFingerprint -b %s %s --labels %s %s --plotFile %s" "plotFingerprint -b %s %s --extendReads %d --labels %s %s --plotFile %s"
% (sample_reads, control_reads, sample_id, control_id, fingerprint_filename) % (sample_reads, control_reads, extension, sample_id, control_id, fingerprint_filename)
) )
logger.info("Running deeptools with %s", fingerprint_command) logger.info("Running deeptools with %s", fingerprint_command)
...@@ -147,6 +152,7 @@ def check_enrichment(sample_id, control_id, sample_reads, control_reads): ...@@ -147,6 +152,7 @@ def check_enrichment(sample_id, control_id, sample_reads, control_reads):
def main(): def main():
args = get_args() args = get_args()
design = args.design design = args.design
extension = args.extension
# Create a file handler # Create a file handler
handler = logging.FileHandler('experiment_qc.log') handler = logging.FileHandler('experiment_qc.log')
...@@ -159,11 +165,11 @@ def main(): ...@@ -159,11 +165,11 @@ def main():
design_df = pd.read_csv(design, sep='\t') design_df = pd.read_csv(design, sep='\t')
# Run correlation # Run correlation
mbs_filename = generate_read_summary(design_df) mbs_filename = generate_read_summary(design_df, extension)
check_correlation(mbs_filename) check_correlation(mbs_filename)
# Run coverage # Run coverage
check_coverage(design_df) check_coverage(design_df, extension)
# Run enrichment # Run enrichment
new_design_df = update_controls(design_df) new_design_df = update_controls(design_df)
...@@ -172,7 +178,8 @@ def main(): ...@@ -172,7 +178,8 @@ def main():
row['sample_id'], row['sample_id'],
row['control_id'], row['control_id'],
row['bam_reads'], row['bam_reads'],
row['control_reads']) row['control_reads'],
extension)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -61,20 +61,20 @@ def xcor(tag, paired): ...@@ -61,20 +61,20 @@ def xcor(tag, paired):
'''Use spp to calculate cross-correlation stats.''' '''Use spp to calculate cross-correlation stats.'''
tag_basename = os.path.basename(utils.strip_extensions(tag, ['.gz'])) tag_basename = os.path.basename(utils.strip_extensions(tag, ['.gz']))
uncompressed_tag_filename = tag_basename uncompressed_tag_filename = tag_basename
out, err = utils.run_pipe([
'gzip -dc %s > ' % (tag)], outfile=uncompressed_tag_filename)
# Subsample tagAlign file # Subsample tagAlign file
NREADS = 15000000 NREADS = 15000000
subsampled_tag_filename = \ subsampled_tag_filename = \
tag_basename + ".%d.tagAlign.gz" % (NREADS/1000000) tag_basename + ".%d.tagAlign.gz" % (NREADS/1000000)
steps = [ steps = [
'grep -v "chrM" %s' % (uncompressed_tag_filename), 'zcat %s' % (tag),
'shuf -n %d --random-source=%s' % (NREADS, uncompressed_tag_filename) 'grep -v "chrM"',
] 'shuf -n %d --random-source=%s' % (NREADS, tag)]
if paired: if paired:
steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""]) steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])
......
...@@ -40,5 +40,8 @@ def test_experiment_qc_singleend(): ...@@ -40,5 +40,8 @@ def test_experiment_qc_singleend():
@pytest.mark.integration @pytest.mark.integration
def test_experiment_qc_pairedend(): def test_experiment_qc_pairedend():
# Do the same thing for paired end data assert os.path.exists(os.path.join(test_output_path, 'sample_mbs.npz'))
pass assert os.path.exists(os.path.join(test_output_path, 'heatmap_SpearmanCorr.png'))
assert os.path.exists(os.path.join(test_output_path, 'coverage.png'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB568IYX_fingerprint.png'))
assert os.path.exists(os.path.join(test_output_path, 'ENCLB637LZP_fingerprint.png'))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment