diff --git a/workflow/main.nf b/workflow/main.nf index 77f22becef0d5fd228ebf80b20936f9c831622e0..1908dfdfb2724a07916d7a342a088c3becf7a12c 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -129,18 +129,18 @@ process alignReads { output: set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads - file '*.srt.bam.flagstat.qc' into mappedReadsStats + file '*.flagstat.qc' into mappedReadsStats script: if (pairedEnd) { """ - python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -p + python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p """ } else { """ - python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa + python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId """ } diff --git a/workflow/scripts/map_reads.py b/workflow/scripts/map_reads.py index b7d3144fe8484f2d8939c773229c45b58e283315..b8a2ec54dc307949b374beec5f9af007af08685a 100644 --- a/workflow/scripts/map_reads.py +++ b/workflow/scripts/map_reads.py @@ -24,16 +24,10 @@ logger.addHandler(logging.NullHandler()) logger.propagate = False logger.setLevel(logging.INFO) -# the order of this list is important. -# strip_extensions strips from the right inward, so -# the expected right-most extensions should appear first (like .gz) -# Modified from J. Seth Strattan -STRIP_EXTENSIONS = ['.gz', '.fq', '.fastq', '_trimmed'] - def get_args(): '''Define arguments.''' - + parser = argparse.ArgumentParser( description=__doc__, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -47,6 +41,10 @@ def get_args(): help="The bwa index of the reference genome.", required=True) + parser.add_argument('-s', '--sample', + help="The name of the sample.", + required=True) + parser.add_argument('-p', '--paired', help="True/False if paired-end or single end.", default=False, @@ -157,6 +155,7 @@ def main(): paired = args.paired fastq = args.fastq reference = args.reference + sample = args.sample # Create a file handler handler = logging.FileHandler('map.log') @@ -171,23 +170,17 @@ def main(): sai_filename = generate_sa(fq, reference) sai.append(sai_filename) + # Make file basename + fastq_basename = sample + # Run alignment for either PE or SE if paired: # paired-end data - fastq_r1_basename = os.path.basename( - utils.strip_extensions(fastq[0], STRIP_EXTENSIONS)) - fastq_r2_basename = os.path.basename( - utils.strip_extensions(fastq[1], STRIP_EXTENSIONS)) - fastq_basename = fastq_r1_basename + fastq_r2_basename - bam_filename = align_pe(fastq, sai, reference, fastq_basename) else: - fastq_basename = os.path.basename( - utils.strip_extensions(fastq[0], STRIP_EXTENSIONS)) - bam_filename = align_se(fastq, sai, reference, fastq_basename) - bam_mapstats_filename = '%s.srt.bam.flagstat.qc' % (fastq_basename) + bam_mapstats_filename = '%s.flagstat.qc' % (fastq_basename) with open(bam_mapstats_filename, 'w') as fh: subprocess.check_call( shlex.split("samtools flagstat %s" % (bam_filename)), diff --git a/workflow/tests/test_map_reads.py b/workflow/tests/test_map_reads.py index 328858216abb88528c2d25e06bce20d7d469f1cb..60a2e39009f64f2a554d200ba41f1f4d28fe92a8 100644 --- a/workflow/tests/test_map_reads.py +++ b/workflow/tests/test_map_reads.py @@ -9,8 +9,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_map_reads_singleend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.srt.bam')) - aligned_reads_report = test_output_path + 'ENCFF646LXU.srt.bam.flagstat.qc' + assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.bam')) + aligned_reads_report = test_output_path + 'ENCLB831RUI.flagstat.qc' samtools_report = open(aligned_reads_report).readlines() assert '80795025 + 0 in total' in samtools_report[0] assert '80050072 + 0 mapped (99.08% : N/A)' in samtools_report[4] @@ -18,8 +18,8 @@ def test_map_reads_singleend(): @pytest.mark.pairedend def test_map_reads_pairedend(): - assert os.path.exists(os.path.join(test_output_path, 'ENCFF002DTU_val_1ENCFF002EFI_val_2.srt.bam')) - aligned_reads_report = test_output_path + 'ENCFF002DTU_val_1ENCFF002EFI_val_2.srt.bam.flagstat.qc' + assert os.path.exists(os.path.join(test_output_path, 'ENCLB678IDC.bam')) + aligned_reads_report = test_output_path + 'ENCLB678IDC.flagstat.qc' samtools_report = open(aligned_reads_report).readlines() assert '72660890 + 0 in total' in samtools_report[0] assert '72053925 + 0 mapped (99.16% : N/A)' in samtools_report[4]