Skip to content
Snippets Groups Projects
Commit 4c3add75 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update mapped reads with sample id.

parent b1c3fb87
1 merge request!20Resolve "Use SampleIds/ Experiment Id as file names throughtout pipeline"
...@@ -129,18 +129,18 @@ process alignReads { ...@@ -129,18 +129,18 @@ process alignReads {
output: output:
set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads set sampleId, file('*.bam'), experimentId, biosample, factor, treatment, replicate, controlId into mappedReads
file '*.srt.bam.flagstat.qc' into mappedReadsStats file '*.flagstat.qc' into mappedReadsStats
script: script:
if (pairedEnd) { if (pairedEnd) {
""" """
python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -p python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p
""" """
} }
else { else {
""" """
python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId
""" """
} }
......
...@@ -24,16 +24,10 @@ logger.addHandler(logging.NullHandler()) ...@@ -24,16 +24,10 @@ logger.addHandler(logging.NullHandler())
logger.propagate = False logger.propagate = False
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
# the order of this list is important.
# strip_extensions strips from the right inward, so
# the expected right-most extensions should appear first (like .gz)
# Modified from J. Seth Strattan
STRIP_EXTENSIONS = ['.gz', '.fq', '.fastq', '_trimmed']
def get_args(): def get_args():
'''Define arguments.''' '''Define arguments.'''
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG, description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter) formatter_class=argparse.RawDescriptionHelpFormatter)
...@@ -47,6 +41,10 @@ def get_args(): ...@@ -47,6 +41,10 @@ def get_args():
help="The bwa index of the reference genome.", help="The bwa index of the reference genome.",
required=True) required=True)
parser.add_argument('-s', '--sample',
help="The name of the sample.",
required=True)
parser.add_argument('-p', '--paired', parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.", help="True/False if paired-end or single end.",
default=False, default=False,
...@@ -157,6 +155,7 @@ def main(): ...@@ -157,6 +155,7 @@ def main():
paired = args.paired paired = args.paired
fastq = args.fastq fastq = args.fastq
reference = args.reference reference = args.reference
sample = args.sample
# Create a file handler # Create a file handler
handler = logging.FileHandler('map.log') handler = logging.FileHandler('map.log')
...@@ -171,23 +170,17 @@ def main(): ...@@ -171,23 +170,17 @@ def main():
sai_filename = generate_sa(fq, reference) sai_filename = generate_sa(fq, reference)
sai.append(sai_filename) sai.append(sai_filename)
# Make file basename
fastq_basename = sample
# Run alignment for either PE or SE # Run alignment for either PE or SE
if paired: # paired-end data if paired: # paired-end data
fastq_r1_basename = os.path.basename(
utils.strip_extensions(fastq[0], STRIP_EXTENSIONS))
fastq_r2_basename = os.path.basename(
utils.strip_extensions(fastq[1], STRIP_EXTENSIONS))
fastq_basename = fastq_r1_basename + fastq_r2_basename
bam_filename = align_pe(fastq, sai, reference, fastq_basename) bam_filename = align_pe(fastq, sai, reference, fastq_basename)
else: else:
fastq_basename = os.path.basename(
utils.strip_extensions(fastq[0], STRIP_EXTENSIONS))
bam_filename = align_se(fastq, sai, reference, fastq_basename) bam_filename = align_se(fastq, sai, reference, fastq_basename)
bam_mapstats_filename = '%s.srt.bam.flagstat.qc' % (fastq_basename) bam_mapstats_filename = '%s.flagstat.qc' % (fastq_basename)
with open(bam_mapstats_filename, 'w') as fh: with open(bam_mapstats_filename, 'w') as fh:
subprocess.check_call( subprocess.check_call(
shlex.split("samtools flagstat %s" % (bam_filename)), shlex.split("samtools flagstat %s" % (bam_filename)),
......
...@@ -9,8 +9,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -9,8 +9,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_map_reads_singleend(): def test_map_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.srt.bam')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB831RUI.bam'))
aligned_reads_report = test_output_path + 'ENCFF646LXU.srt.bam.flagstat.qc' aligned_reads_report = test_output_path + 'ENCLB831RUI.flagstat.qc'
samtools_report = open(aligned_reads_report).readlines() samtools_report = open(aligned_reads_report).readlines()
assert '80795025 + 0 in total' in samtools_report[0] assert '80795025 + 0 in total' in samtools_report[0]
assert '80050072 + 0 mapped (99.08% : N/A)' in samtools_report[4] assert '80050072 + 0 mapped (99.08% : N/A)' in samtools_report[4]
...@@ -18,8 +18,8 @@ def test_map_reads_singleend(): ...@@ -18,8 +18,8 @@ def test_map_reads_singleend():
@pytest.mark.pairedend @pytest.mark.pairedend
def test_map_reads_pairedend(): def test_map_reads_pairedend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF002DTU_val_1ENCFF002EFI_val_2.srt.bam')) assert os.path.exists(os.path.join(test_output_path, 'ENCLB678IDC.bam'))
aligned_reads_report = test_output_path + 'ENCFF002DTU_val_1ENCFF002EFI_val_2.srt.bam.flagstat.qc' aligned_reads_report = test_output_path + 'ENCLB678IDC.flagstat.qc'
samtools_report = open(aligned_reads_report).readlines() samtools_report = open(aligned_reads_report).readlines()
assert '72660890 + 0 in total' in samtools_report[0] assert '72660890 + 0 in total' in samtools_report[0]
assert '72053925 + 0 mapped (99.16% : N/A)' in samtools_report[4] assert '72053925 + 0 mapped (99.16% : N/A)' in samtools_report[4]
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment