Skip to content
Snippets Groups Projects
Commit cec86acd authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Change fastq's to be new sample names as defined by the sample_id.

parent 78b410c7
No related merge requests found
......@@ -104,12 +104,12 @@ process trimReads {
if (pairedEnd) {
"""
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -p
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p
"""
}
else {
"""
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]}
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId
"""
}
......
......@@ -5,6 +5,7 @@
import subprocess
import argparse
import shutil
import os
import logging
EPILOG = '''
......@@ -32,6 +33,10 @@ def get_args():
nargs='+',
required=True)
parser.add_argument('-s', '--sample',
help="The name of the sample.",
required=True)
parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.",
default=False,
......@@ -61,6 +66,32 @@ def check_tools():
raise Exception('Missing cutadapt')
def rename_reads(fastq, sample, paired):
'''Rename fastq files by sample name.'''
# Get current directory to build paths
cwd = os.getcwd()
renamed_fastq = []
if paired: # paired-end data
# Set file names
renamed_fastq[0] = cwd + '/' + sample + '_R1.fastq.gz'
renamed_fastq[1] = cwd + '/' + sample + '_R2.fastq.gz'
# Great symbolic links
os.symlink(fastq[0], renamed_fastq[0])
os.symlink(fastq[1], renamed_fastq[1])
else:
# Set file names
renamed_fastq[0] = cwd + '/' + sample + '_R1.fastq.gz'
# Great symbolic links
os.symlink(fastq[0], renamed_fastq[0])
return fastq_rename
def trim_reads(fastq, paired):
'''Run trim_galore on 1 or 2 files.'''
......@@ -82,6 +113,7 @@ def trim_reads(fastq, paired):
def main():
args = get_args()
fastq = args.fastq
sample = args.sample
paired = args.paired
# Create a file handler
......@@ -91,8 +123,11 @@ def main():
# Check if tools are present
check_tools()
# Rename fastq files by sample
fastq_rename = rename_reads(fastq, sample paired)
# Run trim_reads
trim_reads(fastq, paired)
trim_reads(fastq_rename, paired)
if __name__ == '__main__':
......
......@@ -13,9 +13,9 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
@pytest.mark.singleend
def test_trim_reads_singleend():
raw_fastq = test_data_path + 'ENCFF833BLU.fastq.gz'
trimmed_fastq = test_output_path + 'ENCFF833BLU_trimmed.fq.gz'
trimmed_fastq = test_output_path + 'ENCLB144FDT_trimmed.fq.gz'
trimmed_fastq_report = test_output_path + \
'ENCFF833BLU.fastq.gz_trimming_report.txt'
'ENCLB144FDT.fastq.gz_trimming_report.txt'
assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
assert os.path.getsize(trimmed_fastq) == 2512853101
assert 'Trimming mode: single-end' in open(trimmed_fastq_report).readlines()[4]
......@@ -24,9 +24,9 @@ def test_trim_reads_singleend():
@pytest.mark.pairedend
def test_trim_reads_pairedend():
raw_fastq = test_data_path + 'ENCFF582IOZ.fastq.gz'
trimmed_fastq = test_output_path + ' ENCFF582IOZ_val_2.fq.gz'
trimmed_fastq = test_output_path + ' ENCLB637LZP_val_2.fq.gz'
trimmed_fastq_report = test_output_path + \
'ENCFF582IOZ.fastq.gz_trimming_report.txt'
'ENCLB637LZP.fastq.gz_trimming_report.txt'
assert os.path.getsize(raw_fastq) != os.path.getsize(trimmed_fastq)
assert os.path.getsize(trimmed_fastq) == 2229312710
assert 'Trimming mode: paired-end' in open(trimmed_fastq_report).readlines()[4]
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment