From 4c3add7579069e0064102068b11a040738ea81ef Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Sun, 6 Jan 2019 12:27:37 -0600 Subject: [PATCH] Update mapped reads with sample id. --- workflow/main.nf | 6 +++--- workflow/scripts/map_reads.py | 27 ++++++++++----------------- workflow/tests/test_map_reads.py | 8 ++++---- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/workflow/main.nf b/workflow/main.nf index 77f22be..1908dfd 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -129,18 +129,18 @@ process alignReads { output: set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads - file '*.srt.bam.flagstat.qc' into mappedReadsStats + file '*.flagstat.qc' into mappedReadsStats script: if (pairedEnd) { """ - python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -p + python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p """ } else { """ - python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa + python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId """ } diff --git a/workflow/scripts/map_reads.py b/workflow/scripts/map_reads.py index b7d3144..b8a2ec5 100644 --- a/workflow/scripts/map_reads.py +++ b/workflow/scripts/map_reads.py @@ -24,16 +24,10 @@ logger.addHandler(logging.NullHandler()) logger.propagate = False logger.setLevel(logging.INFO) -# the order of this list is important. -# strip_extensions strips from the right inward, so -# the expected right-most extensions should appear first (like .gz) -# Modified from J. Seth Strattan -STRIP_EXTENSIONS = ['.gz', '.fq', '.fastq', '_trimmed'] - def get_args(): '''Define arguments.''' - + parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -47,6 +41,10 @@ def get_args(): help="The bwa index of the reference genome.", required=True) + parser.add_argument('-s', '--sample', + help="The name of the sample.", + required=True) + parser.add_argument('-p', '--paired', help="True/False if paired-end or single end.", default=False, @@ -157,6 +155,7 @@ def main(): paired = args.paired fastq = args.fastq reference = args.reference + sample = args.sample # Create a file handler handler = logging.FileHandler('map.log') @@ -171,23 +170,17 @@ def main(): sai_filename = generate_sa(fq, reference) sai.append(sai_filename) + # Make file basename + fastq_basename = sample + # Run alignment for either PE or SE if paired: # paired-end data - fastq_r1_basename = os.path.basename( - utils.strip_extensions(fastq[0], STRIP_EXTENSIONS)) - fastq_r2_basename = os.path.basename( - utils.strip_extensions(fastq[1], STRIP_EXTENSIONS)) - fastq_basename = fastq_r1_basename + fastq_r2_basename - bam_filename = align_pe(fastq, sai, reference, fastq_basename) else: - fastq_basename = os.path.basename( - utils.strip_extensions(fastq[0], STRIP_EXTENSIONS)) - bam_filename = align_se(fastq, sai, reference, fastq_basename) - bam_mapstats_filename = '%s.srt.bam.flagstat.qc' % (fastq_basename) + bam_mapstats_filename = '%s.flagstat.qc' % (fastq_basename) with open(bam_mapstats_filename, 'w') as fh: subprocess.check_call( shlex.split("samtools flagstat %s" % (bam_filename)), diff --git a/workflow/tests/test_map_reads.py b/workflow/tests/test_map_reads.py index 3288582..60a2e39 100644 --- a/workflow/tests/test_map_reads.py +++ b/workflow/tests/test_map_reads.py @@ -9,8 +9,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_map_reads_singleend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.srt.bam')) - aligned_reads_report = test_output_path + 'ENCFF646LXU.srt.bam.flagstat.qc' + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.bam')) + aligned_reads_report = test_output_path + 'ENCLB831RUI.flagstat.qc' samtools_report = open(aligned_reads_report).readlines() assert '80795025 + 0 in total' in samtools_report[0] assert '80050072 + 0 mapped (99.08% : N/A)' in samtools_report[4] @@ -18,8 +18,8 @@ def test_map_reads_singleend(): @pytest.mark.pairedend def test_map_reads_pairedend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF002DTU_val_1ENCFF002EFI_val_2.srt.bam')) - aligned_reads_report = test_output_path + 'ENCFF002DTU_val_1ENCFF002EFI_val_2.srt.bam.flagstat.qc' + assert os.path.exists(os.path.join(test_output_path, 'ENCLB678IDC.bam')) + aligned_reads_report = test_output_path + 'ENCLB678IDC.flagstat.qc' samtools_report = open(aligned_reads_report).readlines() assert '72660890 + 0 in total' in samtools_report[0] assert '72053925 + 0 mapped (99.16% : N/A)' in samtools_report[4] -- GitLab