Skip to content
Snippets Groups Projects
Commit cec86acd authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Change fastq's to be new sample names as defined by the sample_id.

parent 78b410c7
Branches
Tags
1 merge request!20Resolve "Use SampleIds/ Experiment Id as file names throughtout pipeline"
...@@ -104,12 +104,12 @@ process trimReads { ...@@ -104,12 +104,12 @@ process trimReads {
if (pairedEnd) { if (pairedEnd) {
""" """
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -p python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p
""" """
} }
else { else {
""" """
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId
""" """
} }
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
import subprocess import subprocess
import argparse import argparse
import shutil import shutil
import os
import logging import logging
EPILOG = ''' EPILOG = '''
...@@ -32,6 +33,10 @@ def get_args(): ...@@ -32,6 +33,10 @@ def get_args():
nargs='+', nargs='+',
required=True) required=True)
parser.add_argument('-s', '--sample',
help="The name of the sample.",
required=True)
parser.add_argument('-p', '--paired', parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.", help="True/False if paired-end or single end.",
default=False, default=False,
...@@ -61,6 +66,32 @@ def check_tools(): ...@@ -61,6 +66,32 @@ def check_tools():
raise Exception('Missing cutadapt') raise Exception('Missing cutadapt')
def rename_reads(fastq, sample, paired):
'''Rename fastq files by sample name.'''
# Get current directory to build paths
cwd = os.getcwd()
renamed_fastq = []
if paired: # paired-end data
# Set file names
renamed_fastq[0] = cwd + '/' + sample + '_R1.fastq.gz'
renamed_fastq[1] = cwd + '/' + sample + '_R2.fastq.gz'
# Great symbolic links
os.symlink(fastq[0], renamed_fastq[0])
os.symlink(fastq[1], renamed_fastq[1])
else:
# Set file names
renamed_fastq[0] = cwd + '/' + sample + '_R1.fastq.gz'
# Great symbolic links
os.symlink(fastq[0], renamed_fastq[0])
return fastq_rename
def trim_reads(fastq, paired): def trim_reads(fastq, paired):
'''Run trim_galore on 1 or 2 files.''' '''Run trim_galore on 1 or 2 files.'''
...@@ -82,6 +113,7 @@ def trim_reads(fastq, paired): ...@@ -82,6 +113,7 @@ def trim_reads(fastq, paired):
def main(): def main():
args = get_args() args = get_args()
fastq = args.fastq fastq = args.fastq
sample = args.sample
paired = args.paired paired = args.paired
# Create a file handler # Create a file handler
...@@ -91,8 +123,11 @@ def main(): ...@@ -91,8 +123,11 @@ def main():
# Check if tools are present # Check if tools are present
check_tools() check_tools()
# Rename fastq files by sample
fastq_rename = rename_reads(fastq, sample paired)
# Run trim_reads # Run trim_reads
trim_reads(fastq, paired) trim_reads(fastq_rename, paired)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -13,9 +13,9 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \ ...@@ -13,9 +13,9 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend @pytest.mark.singleend
def test_trim_reads_singleend(): def test_trim_reads_singleend():
raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz' raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz'
trimmed_fastq = test_output_path + 'ENCFF833BLU_trimmed.fq.gz' trimmed_fastq = test_output_path + 'ENCLB144FDT_trimmed.fq.gz'
trimmed_fastq_report = test_output_path + \ trimmed_fastq_report = test_output_path + \
'ENCFF833BLU.fastq.gz_trimming_report.txt' 'ENCLB144FDT.fastq.gz_trimming_report.txt'
assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
assert os.path.getsize(trimmed_fastq) == 2512853101 assert os.path.getsize(trimmed_fastq) == 2512853101
assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4] assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4]
...@@ -24,9 +24,9 @@ def test_trim_reads_singleend(): ...@@ -24,9 +24,9 @@ def test_trim_reads_singleend():
@pytest.mark.pairedend @pytest.mark.pairedend
def test_trim_reads_pairedend(): def test_trim_reads_pairedend():
raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz' raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz'
trimmed_fastq = test_output_path + ' ENCFF582IOZ_val_2.fq.gz' trimmed_fastq = test_output_path + ' ENCLB637LZP_val_2.fq.gz'
trimmed_fastq_report = test_output_path + \ trimmed_fastq_report = test_output_path + \
'ENCFF582IOZ.fastq.gz_trimming_report.txt' 'ENCLB637LZP.fastq.gz_trimming_report.txt'
assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq) assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
assert os.path.getsize(trimmed_fastq) == 2229312710 assert os.path.getsize(trimmed_fastq) == 2229312710
assert 'Trimming mode: paired-end' in open(trimmed_fastq_report).readlines()[4] assert 'Trimming mode: paired-end' in open(trimmed_fastq_report).readlines()[4]
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment