Skip to content
Snippets Groups Projects
Commit 8ef6e674 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Pass nextflow test for design file.

parent ee536623
Branches
Tags
No related merge requests found
sample_id biosample factor treatment replicate control_id fastq_read1
ENCSR238SGC limb H3K4me1 None 1 ENCSR687ALB ENCFF833BLU.fastq.gz
ENCSR238SGC limb H3K4me1 None 2 ENCSR687ALB ENCFF646LXU.fastq.gz
ENCSR687ALB limb Control None 1 ENCSR687ALB ENCFF524CAC.fastq.gz
ENCSR687ALB limb Control None 2 ENCSR687ALB ENCFF163AJI.fastq.gz
sample_id biosample factor treatment replicate control_id fastq_read1 fastq_read2
ENCSR729LGA MCF-7 SP1 None 1 ENCSR217LRF ENCFF957SQS.fastq.gz ENCFF582IOZ.fastq.gz
ENCSR729LGA MCF-7 SP1 None 2 ENCSR217LRF ENCFF330MCZ.fastq.gz ENCFF293YFE.fastq.gz
ENCSR217LRF MCF-7 Control None 1 ENCSR217LRF ENCFF002DTU.fastq.gz ENCFF002EFI.fastq.gz
ENCSR217LRF MCF-7 Control None 1 ENCSR217LRF ENCFF002EFG.fastq.gz ENCFF002DTS.fastq.gz
echo "Downloading test set..."
wget -O ENCLB904PZW_R1.fastq.gz https://www.encodeproject.org/files/ENCFF704XKC/@@download/ENCFF704XKC.fastq.gz
wget -O ENCLB904PZW_R2.fastq.gz https://www.encodeproject.org/files/ENCFF707CNX/@@download/ENCFF707CNX.fastq.gz
echo "Done"
echo "Downloading Single-end data set Mouse ENCSR238SGC and ENCSR687ALB"
wget https://www.encodeproject.org/files/ENCFF833BLU/@@download/ENCFF833BLU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF646LXU/@@download/ENCFF646LXU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF524CAC/@@download/ENCFF524CAC.fastq.gz
wget https://www.encodeproject.org/files/ENCFF163AJI/@@download/ENCFF163AJI.fastq.gz
echo "Done with Single-end"
echo "Downloading Paired-end data set Human ENCSR729LGA and ENCSR217LRF"
wget https://www.encodeproject.org/files/ENCFF957SQS/@@download/ENCFF957SQS.fastq.gz
wget https://www.encodeproject.org/files/ENCFF582IOZ/@@download/ENCFF582IOZ.fastq.gz
wget https://www.encodeproject.org/files/ENCFF330MCZ/@@download/ENCFF330MCZ.fastq.gz
wget https://www.encodeproject.org/files/ENCFF293YFE/@@download/ENCFF293YFE.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002DTU/@@download/ENCFF002DTU.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002EFI/@@download/ENCFF002EFI.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002EFG/@@download/ENCFF002EFG.fastq.gz
wget https://www.encodeproject.org/files/ENCFF002DTS/@@download/ENCFF002DTS.fastq.gz
echo "Done with Paired-end"
......@@ -3,31 +3,48 @@
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
params.reads = "$baseDir/../test_data/*_R{1,2}.fastq.gz"
params.singleEnd = false
// Define Input variables
params.reads = "$baseDir/../test_data/*.fastq.gz"
params.pairedEnd = false
params.designFile = "$baseDir/../test_data/design_ENCSR238SGC_SE.txt"
// Define List of Files
Channel
.fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 )
.ifEmpty { error "Cannot find any reads matching: ${params.reads}\nIf this is single-end data, please specify."}
.set { read_pairs }
.fromPath( params.reads )
.flatten()
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: 'fileList.tsv', newLine: true )
.set { readsList }
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
process checkDesignFile {
publishDir "$baseDir/output/design", mode: 'copy'
process qc_fastq {
tag "$name"
input:
publishDir "$baseDir/output/$name/$task.process", mode: 'copy'
designFile
file readsList
input:
set val(name), file(reads) from read_pairs
output:
output:
file "*_fastqc.{zip,html}" into qc_fastq_results
file "qc.log" into qc_fastq_log
file("design.tsv") into designFilePaths
script:
script:
if (pairedEnd) {
"""
echo $designFile
python $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
"""
module load python/3.6.1-2-anaconda
module load fastqc/0.11.5
$baseDir/scripts/qc_fastq.py -f $reads
}
else {
"""
python $baseDir/scripts/check_design.py -d $designFile -f $readsList
"""
}
}
......@@ -35,7 +35,7 @@ def get_args():
parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.",
default=True,
default=False,
action='store_true')
args = parser.parse_args()
......@@ -119,7 +119,7 @@ def main():
# Read files
design_file = pd.read_csv(args.design, sep='\t')
fastq_file = pd.read_csv(args.design, sep='\t', names=['name', 'path'])
fastq_file = pd.read_csv(args.fastq, sep='\t', names=['name', 'path'])
# Check design file
check_design_headers(design_file, args.paired)
......@@ -127,7 +127,7 @@ def main():
new_design = check_files(design_file, fastq_file, args.paired)
# Write out new design file
new_design.to_csv('design.tsv', header=True, index=False)
new_design.to_csv('design.tsv', header=True, sep='\t', index=False)
if __name__ == '__main__':
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment