Skip to content
Snippets Groups Projects
Commit 8ef6e674 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Pass nextflow test for design file.

parent ee536623
1 merge request!4Resolve "Add in Fastqc"
sample_id biosample factor treatment replicate control_id fastq_read1
ENCSR238SGC limb H3K4me1 None 1 ENCSR687ALB ENCFF833BLU.fastq.gz
ENCSR238SGC limb H3K4me1 None 2 ENCSR687ALB ENCFF646LXU.fastq.gz
ENCSR687ALB limb Control None 1 ENCSR687ALB ENCFF524CAC.fastq.gz
ENCSR687ALB limb Control None 2 ENCSR687ALB ENCFF163AJI.fastq.gz
sample_id biosample factor treatment replicate control_id fastq_read1 fastq_read2
ENCSR729LGA MCF-7 SP1 None 1 ENCSR217LRF ENCFF957SQS.fastq.gz ENCFF582IOZ.fastq.gz
ENCSR729LGA MCF-7 SP1 None 2 ENCSR217LRF ENCFF330MCZ.fastq.gz ENCFF293YFE.fastq.gz
ENCSR217LRF MCF-7 Control None 1 ENCSR217LRF ENCFF002DTU.fastq.gz ENCFF002EFI.fastq.gz
ENCSR217LRF MCF-7 Control None 1 ENCSR217LRF ENCFF002EFG.fastq.gz ENCFF002DTS.fastq.gz
echo "Downloading test set..." echo "Downloading Single-end data set Mouse ENCSR238SGC and ENCSR687ALB"
wget -O ENCLB904PZW_R1.fastq.gz https://www.encodeproject.org/files/ENCFF704XKC/@@download/ENCFF704XKC.fastq.gz wget https://www.encodeproject.org/files/ENCFF833BLU/@@download/ENCFF833BLU.fastq.gz
wget -O ENCLB904PZW_R2.fastq.gz https://www.encodeproject.org/files/ENCFF707CNX/@@download/ENCFF707CNX.fastq.gz wget https://www.encodeproject.org/files/ENCFF646LXU/@@download/ENCFF646LXU.fastq.gz
echo "Done" wget https://www.encodeproject.org/files/ENCFF524CAC/@@download/ENCFF524CAC.fastq.gz
wget https://www.encodeproject.org/files/ENCFF163AJI/@@download/ENCFF163AJI.fastq.gz
echo "Done with Single-end"
echo "Downloading Paired-end data set Human ENCSR729LGA and ENCSR217LRF"
wget https://www.encodeproject.org/files/ENCFF957SQS/@@download/ENCFF957SQS.fastq.gz
wget https://www.encodeproject.org/files/ENCFF582IOZ/@@download/ENCFF582IOZ.fastq.gz
wget https://www.encodeproject.org/files/ENCFF330MCZ/@@download/ENCFF330MCZ.fastq.gz
wget https://www.encodeproject.org/files/ENCFF293YFE/@@download/ENCFF293YFE.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002DTU/@@download/ENCFF002DTU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002EFI/@@download/ENCFF002EFI.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002EFG/@@download/ENCFF002EFG.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002DTS/@@download/ENCFF002DTS.fastq.gz
echo "Done with Paired-end"
...@@ -3,31 +3,48 @@ ...@@ -3,31 +3,48 @@
// Path to an input file, or a pattern for multiple inputs // Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf // Note - $baseDir is the location of this workflow file main.nf
params.reads = "$baseDir/../test_data/*_R{1,2}.fastq.gz" // Define Input variables
params.singleEnd = false params.reads = "$baseDir/../test_data/*.fastq.gz"
params.pairedEnd = false
params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
// Define List of Files
Channel Channel
.fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 ) .fromPath( params.reads )
.ifEmpty { error "Cannot find any reads matching: ${params.reads}\nIf this is single-end data, please specify."} .flatten()
.set { read_pairs } .map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: 'fileList.tsv', newLine: true )
.set { readsList }
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
process checkDesignFile {
publishDir "$baseDir/output/design", mode: 'copy'
process qc_fastq { input:
tag "$name"
publishDir "$baseDir/output/$name/$task.process", mode: 'copy' designFile
file readsList
input: output:
set val(name), file(reads) from read_pairs
output: file("design.tsv") into designFilePaths
file "*_fastqc.{zip,html}" into qc_fastq_results
file "qc.log" into qc_fastq_log
script: script:
if (pairedEnd) {
"""
echo $designFile
python $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
""" """
module load python/3.6.1-2-anaconda }
module load fastqc/0.11.5 else {
$baseDir/scripts/qc_fastq.py -f $reads
""" """
python $baseDir/scripts/check_design.py -d $designFile -f $readsList
"""
}
} }
...@@ -35,7 +35,7 @@ def get_args(): ...@@ -35,7 +35,7 @@ def get_args():
parser.add_argument('-p', '--paired', parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.", help="True/False if paired-end or single end.",
default=True, default=False,
action='store_true') action='store_true')
args = parser.parse_args() args = parser.parse_args()
...@@ -119,7 +119,7 @@ def main(): ...@@ -119,7 +119,7 @@ def main():
# Read files # Read files
design_file = pd.read_csv(args.design, sep='\t') design_file = pd.read_csv(args.design, sep='\t')
fastq_file = pd.read_csv(args.design, sep='\t', names=['name', 'path']) fastq_file = pd.read_csv(args.fastq, sep='\t', names=['name', 'path'])
# Check design file # Check design file
check_design_headers(design_file, args.paired) check_design_headers(design_file, args.paired)
...@@ -127,7 +127,7 @@ def main(): ...@@ -127,7 +127,7 @@ def main():
new_design = check_files(design_file, fastq_file, args.paired) new_design = check_files(design_file, fastq_file, args.paired)
# Write out new design file # Write out new design file
new_design.to_csv('design.tsv', header=True, index=False) new_design.to_csv('design.tsv', header=True, sep='\t', index=False)
if __name__ == '__main__': if __name__ == '__main__':
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment