From cec86acdf5ef001319bd9a3da024025d4a337de9 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Sun, 6 Jan 2019 10:56:41 -0600 Subject: [PATCH] Change fastq's to be new sample names as defined by the sample_id. --- workflow/main.nf | 4 ++-- workflow/scripts/trim_reads.py | 37 ++++++++++++++++++++++++++++++- workflow/tests/test_trim_reads.py | 8 +++---- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/workflow/main.nf b/workflow/main.nf index 1103db0..77f22be 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -104,12 +104,12 @@ process trimReads { if (pairedEnd) { """ - python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -p + python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p """ } else { """ - python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} + python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId """ } diff --git a/workflow/scripts/trim_reads.py b/workflow/scripts/trim_reads.py index 036c5f7..75e0b8c 100644 --- a/workflow/scripts/trim_reads.py +++ b/workflow/scripts/trim_reads.py @@ -5,6 +5,7 @@ import subprocess import argparse import shutil +import os import logging EPILOG = ''' @@ -32,6 +33,10 @@ def get_args(): nargs='+', required=True) + parser.add_argument('-s', '--sample', + help="The name of the sample.", + required=True) + parser.add_argument('-p', '--paired', help="True/False if paired-end or single end.", default=False, @@ -61,6 +66,32 @@ def check_tools(): raise Exception('Missing cutadapt') +def rename_reads(fastq, sample, paired): + '''Rename fastq files by sample name.''' + + # Get current directory to build paths + cwd = os.getcwd() + + renamed_fastq = [] + + if paired: # paired-end data + # Set file names + renamed_fastq[0] = cwd + '/' + sample + '_R1.fastq.gz' + renamed_fastq[1] = cwd + '/' + sample + '_R2.fastq.gz' + + # Great symbolic links + os.symlink(fastq[0], renamed_fastq[0]) + os.symlink(fastq[1], renamed_fastq[1]) + else: + # Set file names + renamed_fastq[0] = cwd + '/' + sample + '_R1.fastq.gz' + + # Great symbolic links + os.symlink(fastq[0], renamed_fastq[0]) + + return fastq_rename + + def trim_reads(fastq, paired): '''Run trim_galore on 1 or 2 files.''' @@ -82,6 +113,7 @@ def trim_reads(fastq, paired): def main(): args = get_args() fastq = args.fastq + sample = args.sample paired = args.paired # Create a file handler @@ -91,8 +123,11 @@ def main(): # Check if tools are present check_tools() + # Rename fastq files by sample + fastq_rename = rename_reads(fastq, sample paired) + # Run trim_reads - trim_reads(fastq, paired) + trim_reads(fastq_rename, paired) if __name__ == '__main__': diff --git a/workflow/tests/test_trim_reads.py b/workflow/tests/test_trim_reads.py index b925627..502312f 100644 --- a/workflow/tests/test_trim_reads.py +++ b/workflow/tests/test_trim_reads.py @@ -13,9 +13,9 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ @pytest.mark.singleend def test_trim_reads_singleend(): raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz' - trimmed_fastq = test_output_path + 'ENCFF833BLU_trimmed.fq.gz' + trimmed_fastq = test_output_path + 'ENCLB144FDT_trimmed.fq.gz' trimmed_fastq_report = test_output_path + \ - 'ENCFF833BLU.fastq.gz_trimming_report.txt' + 'ENCLB144FDT.fastq.gz_trimming_report.txt' assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) assert os.path.getsize(trimmed_fastq) == 2512853101 assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4] @@ -24,9 +24,9 @@ def test_trim_reads_singleend(): @pytest.mark.pairedend def test_trim_reads_pairedend(): raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz' - trimmed_fastq = test_output_path + ' ENCFF582IOZ_val_2.fq.gz' + trimmed_fastq = test_output_path + ' ENCLB637LZP_val_2.fq.gz' trimmed_fastq_report = test_output_path + \ - 'ENCFF582IOZ.fastq.gz_trimming_report.txt' + 'ENCLB637LZP.fastq.gz_trimming_report.txt' assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) assert os.path.getsize(trimmed_fastq) == 2229312710 assert 'Trimming mode: paired-end' in open(trimmed_fastq_report).readlines()[4] -- GitLab