diff --git a/workflow/main.nf b/workflow/main.nf index 450f47047c44dabe8eae9f47ffed028c0f3340b1..5ce28f77a699d3c5bda853e5aa5848285b0b2451 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -214,7 +214,7 @@ process convertReads { output: - set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), experimentId, biosample, factor, treatment, replicate into tagReads + set sampleId, file('*.tagAlign.gz'), experimentId, biosample, factor, treatment, replicate into tagReads script: @@ -263,7 +263,7 @@ process crossReads { input: - set sampleId, seTagAlign, tagAlign, experimentId, biosample, factor, treatment, replicate from tagReads + set sampleId, tagAlign, experimentId, biosample, factor, treatment, replicate from tagReads output: @@ -274,12 +274,12 @@ process crossReads { if (pairedEnd) { """ - python3 $baseDir/scripts/xcor.py -t $seTagAlign -p + python3 $baseDir/scripts/xcor.py -t $tagAlign -p """ } else { """ - python3 $baseDir/scripts/xcor.py -t $seTagAlign + python3 $baseDir/scripts/xcor.py -t $tagAlign """ } diff --git a/workflow/scripts/convert_reads.py b/workflow/scripts/convert_reads.py index d0997944f3ff74419d9041259185376f0a7bf162..0d515666f356983bfc6e7cd3d561d6ea92ec9c55 100644 --- a/workflow/scripts/convert_reads.py +++ b/workflow/scripts/convert_reads.py @@ -40,6 +40,16 @@ def get_args(): default=False, action='store_true') + parser.add_argument('-m', '--chrm', + help="True/False if to remove ChrM.", + default=True, + action='store_true') + + parser.add_argument('-t', '--tn5' + help='Enable TN5 shifting for ATAC-Seq.' + default=True, + action='store_true') + args = parser.parse_args() return args @@ -66,16 +76,22 @@ def check_tools(): raise Exception('Missing samtools') -def convert_mapped(bam, tag_filename): +def convert_mapped(bam, tag_filename, chrm): '''Use bedtools to convert to tagAlign.''' - out, err = utils.run_pipe([ + steps = [ "bamToBed -i %s" % (bam), - r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'""", - "gzip -nc"], outfile=tag_filename) + r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""] + + if chrm: + steps.extend["grep -v 'chrM'"] + steps.extend["gzip -nc"] -def convert_mapped_pe(bam, bam_basename): + out, err = utils.run_pipe(steps, outfile=tag_filename) + + +def convert_mapped_pe(bam, bam_basename, tag_filename, chrm): '''Use bedtools to convert to tagAlign PE data.''' bedpe_filename = bam_basename + ".bedpe.gz" @@ -89,15 +105,42 @@ def convert_mapped_pe(bam, bam_basename): logger.info(samtools_sort_command) subprocess.check_output(shlex.split(samtools_sort_command)) - out, err = utils.run_pipe([ - "bamToBed -bedpe -mate1 -i %s" % (nmsrt_bam_filename), - "gzip -nc"], outfile=bedpe_filename) + steps = ["bamToBed -bedpe -mate1 -i %s" % (nmsrt_bam_filename)] + + steps.extend["gzip -nc"] + + out, err = utils.run_pipe(steps, outfile=bedpe_filename) + os.remove(nmsrt_bam_filename) + + if chrm: + steps.extend["grep -v 'chrM'"] + + # Convert read pairs to reads into standard tagAlign file + tag_steps = ["zcat -f %s" % (bedpe_filename)] + tag_steps.extend([r"""awk 'BEGIN{OFS="\t"}{printf "%s\t%s\t%s\tN\t1000\t%s\n%s\t%s\t%s\tN\t1000\t%s\n",$1,$2,$3,$9,$4,$5,$6,$10}'"""]) + if chrm: + tag_steps.extend["grep -v 'chrM'"] + tag_steps.extend(['gzip -cn']) + out, err = utils.run_pipe(tag_steps, outfile=tag_filename) + os.remove(bedpe_filename) + + +def tn5_shift_tagalign(tag_filename, shitfted_tag_filename): + '''Shift tagalign file for Tn5.''' + + steps = [ + "zcat -f %s" % (tag_filename), + r"""awk 'BEGIN{OFS="\t"}{{if ($6 == "+") {{$2 = $2 + 4}; else if ($6 == "-") {{$3 = $3 - 5}} print $0}}}'""", + "gzip -nc"] + out, err = utils.run_pipe(steps, outfile=shitfted_tag_filename) def main(): args = get_args() paired = args.paired bam = args.bam + chrm = args.chrm + tn5 = args.tn5 # Create a file handler handler = logging.FileHandler('convert_reads.log') @@ -111,14 +154,23 @@ def main(): utils.strip_extensions(bam, ['.bam'])) tag_filename = bam_basename + '.tagAlign.gz' - convert_mapped(bam, tag_filename) + + shitfted_tag_filename = bam_basename + ".tn5.tagAlign.gz" + if paired: # paired-end data convert_mapped_pe(bam, bam_basename) else: - bedse_filename = bam_basename + ".bedse.gz" - shutil.copy(tag_filename, bedse_filename) + convert_mapped(bam, tag_filename) + + if tn5: + logger.info("TN5-shifting TAGALIGN...") + tn5_shift_ta(tag_filename, shitfted_tag_filename) + else: + tn5_shift = ["cp %s %s" % (tag_filename, shitfted_tag_filename)] + out, err = utils.run_pipe(tn5_shift) + os.remove(tag_filename) if __name__ == '__main__': main()