Skip to content
Snippets Groups Projects
Commit 3caba697 authored by Jeremy Mathews's avatar Jeremy Mathews
Browse files

Test File Input - Process Error

parent 5fc25b2c
Branches
Tags
1 merge request!7Resolve "Check file inputs"
Pipeline #4126 failed with stage
in 1 minute and 50 seconds
......@@ -3,39 +3,6 @@
// Default parameter values to run tests
params.input = "$baseDir"
params.output= "$baseDir/output"
params.files="$params.input/*.f*"
files=Channel.fromPath(params.files)
process checkinputfiles {
input:
file fqs from files
output:
file("*.fastq.gz") into fastqs
script:
"""
if [[ $fqs == *.fq ]];
then new_name=`echo ${fqs} | sed -e "s/.fq\$/.fastq/"`;
mv ${fqs} \${new_name};
gzip -f \${new_name};
elif [[ $fqs == *.fastq ]];
then gzip -f $fqs;
elif [[ $fqs == *.fq.gz ]];
then new_name=`echo ${fqs} | sed -e "s/.fq.gz\$/.fastq.gz/"`;
mv ${fqs} \${new_name};
else
zcat ${fqs} | gzip -c > ${fqs}.temp
mv ${fqs}.temp ${fqs}
fi;
"""
}
params.design="$params.input/design.txt"
params.genome="/project/shared/bicf_workflow_ref/human/grch38/"
params.markdups="picard"
......@@ -46,7 +13,6 @@ params.align = 'hisat'
params.fusion = 'skip'
params.dea = 'detect'
design_file = file(params.design)
design_file = file(params.design)
gtf_file = file("$params.genome/gencode.gtf")
genenames = file("$params.genome/genenames.txt")
......@@ -57,19 +23,61 @@ indel="$params.genome/goldindels.vcf.gz"
knownindel=file(indel)
dbsnp=file(dbsnp)
files = Channel
.fromPath("$params.input/*")
good = Channel.fromPath("$params.input/*.fastq.gz")
process checkinputfiles {
module 'parallel/20150122:pigz/2.4'
queue 'super'
input:
file ("*") from files.collect()
file design_file name "design.tsv"
output:
set file ("design.tsv"), file ("*.fastq.gz") into design
file ("*.fastq.gz") into fastqs
script:
"""
for fqs in `ls | grep -v "^design.tsv\$"`;
do if [[ \$fqs == *.fq ]];
then new_name=`echo \${fqs} | sed -e "s/.fq\$/.fastq/"`;
mv \${fqs} \${new_name};
echo "pigz -f \${new_name}";
elif [[ \$fqs == *.fastq ]];
then echo "pigz -f \$fqs";
elif [[ \$fqs == *.fq.gz ]];
then new_name=`echo \${fqs} | sed -e "s/.fq.gz\$/.fastq.gz/"`;
mv \${fqs} \${new_name};
fi;
done | shuf | parallel -j\${SLURM_CPUS_ON_NODE};
"""
}
// params genome is the directory
// base name for the index is always genome
index_path = file(params.genome)
process checkdesignfile {
executor 'local'
publishDir "$params.output", mode: 'copy'
input:
file design_file name 'design.ori.txt'
set file ("design.ori.txt"), file ("*") from design
output:
file("design.valid.txt") into newdesign
stdout spltnames
script:
"""
perl -p -e 's/\\r\\n*/\\n/g' design.ori.txt > design.fix.txt
perl $baseDir/scripts/check_designfile.pl ${params.pairs} design.fix.txt
......@@ -78,14 +86,17 @@ process checkdesignfile {
def fileMap = [:]
fastqs.each {
fastqs
.mix(good)
.flatten()
.each {
final fileName = it.getFileName().toString()
prefix = fileName.lastIndexOf('/')
fileMap[fileName] = it
}
if (params.pairs == 'pe') {
spltnames
spltnames
.splitCsv()
.filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) }
......@@ -94,7 +105,7 @@ spltnames
else {
spltnames
.splitCsv()
.filter { fleMap.get(it[1]) != null }
.filter { fileMap.get(it[1]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]),'') }
.set { read }
}
......@@ -103,14 +114,21 @@ if( ! read ) { error "Didn't match any input files with entries in the design fi
//
// Trim raw reads using trimgalore
//
process trim {
errorStrategy 'ignore'
input:
set pair_id, file(read1), file(read2) from read
output:
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq
script:
"""
bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2}
"""
......@@ -126,13 +144,21 @@ process trim {
process starfusion {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from fusionfq
output:
file("${pair_id}.starfusion.txt") into fusionout
when:
params.fusion == 'detect' && params.pairs == 'pe'
script:
"""
bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f
"""
......@@ -143,12 +169,17 @@ process align {
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from trimread
output:
set pair_id, file("${pair_id}.bam") into aligned
set pair_id, file("${pair_id}.bam") into aligned2
file("${pair_id}.alignerout.txt") into hsatout
script:
"""
bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2}
"""
......@@ -159,11 +190,16 @@ process alignqc {
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(bam) from aligned2
output:
file("${pair_id}.flagstat.txt") into alignstats
set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
script:
"""
bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
"""
......@@ -175,10 +211,16 @@ process parse_alignstat {
publishDir "$params.output", mode: 'copy'
input:
file(txt) from alignstats.toList()
file(txt) from hsatout.toList()
output:
file('alignment.summary.txt')
script:
"""
perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
"""
......@@ -190,11 +232,16 @@ process markdups {
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from aligned
output:
set pair_id, file("${pair_id}.dedup.bam") into deduped1
set pair_id, file("${pair_id}.dedup.bam") into deduped2
script:
"""
bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
"""
......@@ -206,13 +253,20 @@ process markdups {
process geneabund {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from deduped1
output:
file("${pair_id}.cts") into counts
file("${pair_id}.cts.summary") into ctsum
file("${pair_id}_stringtie") into strcts
file("${pair_id}.fpkm.txt") into fpkm
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
"""
......@@ -221,7 +275,9 @@ process geneabund {
process statanal {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
file count_file from counts.toList()
file count_sum from ctsum.toList()
file newdesign name 'design.txt'
......@@ -229,29 +285,41 @@ process statanal {
file geneset name 'geneset.gmt'
file fpkm_file from fpkm.toList()
file stringtie_dir from strcts.toList()
output:
output:
file "*.txt" into txtfiles
file "*.png" into psfiles
file("*.rda") into rdafiles
file("geneset.shiny.gmt") into gmtfile
when:
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
"""
}
process gatkbam {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(rbam) from deduped2
output:
set file("${pair_id}.final.bam"),file("${pair_id}.final.bai") into gatkbam
when:
params.align == 'hisat' && $index_path == '/project/shared/bicf_workflow_ref/GRCh38/'
script:
"""
bash $baseDir/process_scripts/variants/gatkrunner.sh -a gatkbam_rna -b $rbam -r ${index_path}/hisat_index -p $pair_id
"""
......
#!/usr/bin/perl -w
#check_designfile.pl
use strict;
use warnings;
my $pe = shift @ARGV;
my $dfile = shift @ARGV;
open OUT, ">design.valid.txt" or die $!;
......@@ -11,7 +14,6 @@ $head =~ s/FullPathTo//g;
my @colnames = split(/\t/,$head);
my %newcols = map {$_=> 1} @colnames;
unless (grep(/FqR1/,@colnames)) {
die "Missing Sequence Files in Design File: FqR1\n";
}
......@@ -49,16 +51,36 @@ while (my $line = <DFILE>) {
$hash{FinalBam} = $hash{SampleID}.".final.bam";
$hash{Bam} = $hash{SampleID}.".bam";
unless ($hash{SampleGroup}) {
$j = $lnct %% 2;
my $j = $lnct %% 2;
$hash{SampleGroup} = $grp[$j];
}
$hash{SampleGroup} =~ s/_//g;
unless ($hash{FqR1} =~ m/.fastq.gz/) {
my $name = $hash{FqR1};
$name =~ s/.f.*/.fastq.gz/;
unless ($hash{FqR1} eq $name) {
$hash{FqR1} = $name;
unless (-e ($name)) {
die "Unable to find fastq read 1\n${name}\n";
}
}
}
$hash{FqR2} = 'na' unless ($hash{FqR2});
unless ($hash{FqR2} eq 'na') {
my $name = $hash{FqR2};
$name =~ s/.f.*/.fastq.gz/;
unless ($hash{FqR2} eq $name) {
$hash{FqR2} = $name;
unless (-e ($name)) {
die "Unable to find fastq read 2\n${name}\n";
}
}
}
my @line;
foreach $f (@cols) {
foreach my $f (@cols) {
push @line, $hash{$f};
}
print OUT join("\t",@line),"\n";
$hash{FqR2} = 'na' unless ($hash{FqR2});
print join(",",$hash{SampleID},$hash{FqR1},$hash{FqR2}),"\n";
$lnct ++;
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment