Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
rnaseq
Manage
Activity
Members
Labels
Plan
Issues
8
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
BICF
Astrocyte
rnaseq
Commits
3caba697
Commit
3caba697
authored
5 years ago
by
Jeremy Mathews
Browse files
Options
Downloads
Patches
Plain Diff
Test File Input - Process Error
parent
5fc25b2c
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!7
Resolve "Check file inputs"
Pipeline
#4126
failed with stage
in 1 minute and 50 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
workflow/main.nf
+107
-39
107 additions, 39 deletions
workflow/main.nf
workflow/scripts/check_designfile.pl
+26
-4
26 additions, 4 deletions
workflow/scripts/check_designfile.pl
with
133 additions
and
43 deletions
workflow/main.nf
+
107
−
39
View file @
3caba697
...
...
@@ -3,39 +3,6 @@
// Default parameter values to run tests
params.input = "$baseDir"
params.output= "$baseDir/output"
params.files="$params.input/*.f*"
files=Channel.fromPath(params.files)
process checkinputfiles {
input:
file fqs from files
output:
file("*.fastq.gz") into fastqs
script:
"""
if [[ $fqs == *.fq ]];
then new_name=`echo ${fqs} | sed -e "s/.fq\$/.fastq/"`;
mv ${fqs} \${new_name};
gzip -f \${new_name};
elif [[ $fqs == *.fastq ]];
then gzip -f $fqs;
elif [[ $fqs == *.fq.gz ]];
then new_name=`echo ${fqs} | sed -e "s/.fq.gz\$/.fastq.gz/"`;
mv ${fqs} \${new_name};
else
zcat ${fqs} | gzip -c > ${fqs}.temp
mv ${fqs}.temp ${fqs}
fi;
"""
}
params.design="$params.input/design.txt"
params.genome="/project/shared/bicf_workflow_ref/human/grch38/"
params.markdups="picard"
...
...
@@ -46,7 +13,6 @@ params.align = 'hisat'
params.fusion = 'skip'
params.dea = 'detect'
design_file = file(params.design)
design_file = file(params.design)
gtf_file = file("$params.genome/gencode.gtf")
genenames = file("$params.genome/genenames.txt")
...
...
@@ -57,19 +23,61 @@ indel="$params.genome/goldindels.vcf.gz"
knownindel=file(indel)
dbsnp=file(dbsnp)
files = Channel
.fromPath("$params.input/*")
good = Channel.fromPath("$params.input/*.fastq.gz")
process checkinputfiles {
module 'parallel/20150122:pigz/2.4'
queue 'super'
input:
file ("*") from files.collect()
file design_file name "design.tsv"
output:
set file ("design.tsv"), file ("*.fastq.gz") into design
file ("*.fastq.gz") into fastqs
script:
"""
for fqs in `ls | grep -v "^design.tsv\$"`;
do if [[ \$fqs == *.fq ]];
then new_name=`echo \${fqs} | sed -e "s/.fq\$/.fastq/"`;
mv \${fqs} \${new_name};
echo "pigz -f \${new_name}";
elif [[ \$fqs == *.fastq ]];
then echo "pigz -f \$fqs";
elif [[ \$fqs == *.fq.gz ]];
then new_name=`echo \${fqs} | sed -e "s/.fq.gz\$/.fastq.gz/"`;
mv \${fqs} \${new_name};
fi;
done | shuf | parallel -j\${SLURM_CPUS_ON_NODE};
"""
}
// params genome is the directory
// base name for the index is always genome
index_path = file(params.genome)
process checkdesignfile {
executor 'local'
publishDir "$params.output", mode: 'copy'
input:
file design_file name 'design.ori.txt'
set file ("design.ori.txt"), file ("*") from design
output:
file("design.valid.txt") into newdesign
stdout spltnames
script:
"""
perl -p -e 's/\\r\\n*/\\n/g' design.ori.txt > design.fix.txt
perl $baseDir/scripts/check_designfile.pl ${params.pairs} design.fix.txt
...
...
@@ -78,14 +86,17 @@ process checkdesignfile {
def fileMap = [:]
fastqs.each {
fastqs
.mix(good)
.flatten()
.each {
final fileName = it.getFileName().toString()
prefix = fileName.lastIndexOf('/')
fileMap[fileName] = it
}
if (params.pairs == 'pe') {
spltnames
spltnames
.splitCsv()
.filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) }
...
...
@@ -94,7 +105,7 @@ spltnames
else {
spltnames
.splitCsv()
.filter { fleMap.get(it[1]) != null }
.filter { f
i
leMap.get(it[1]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]),'') }
.set { read }
}
...
...
@@ -103,14 +114,21 @@ if( ! read ) { error "Didn't match any input files with entries in the design fi
//
// Trim raw reads using trimgalore
//
process trim {
errorStrategy 'ignore'
input:
set pair_id, file(read1), file(read2) from read
output:
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq
script:
"""
bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2}
"""
...
...
@@ -126,13 +144,21 @@ process trim {
process starfusion {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from fusionfq
output:
file("${pair_id}.starfusion.txt") into fusionout
when:
params.fusion == 'detect' && params.pairs == 'pe'
script:
"""
bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f
"""
...
...
@@ -143,12 +169,17 @@ process align {
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from trimread
output:
set pair_id, file("${pair_id}.bam") into aligned
set pair_id, file("${pair_id}.bam") into aligned2
file("${pair_id}.alignerout.txt") into hsatout
script:
"""
bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2}
"""
...
...
@@ -159,11 +190,16 @@ process alignqc {
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(bam) from aligned2
output:
file("${pair_id}.flagstat.txt") into alignstats
set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
script:
"""
bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
"""
...
...
@@ -175,10 +211,16 @@ process parse_alignstat {
publishDir "$params.output", mode: 'copy'
input:
file(txt) from alignstats.toList()
file(txt) from hsatout.toList()
output:
file('alignment.summary.txt')
script:
"""
perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
"""
...
...
@@ -190,11 +232,16 @@ process markdups {
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from aligned
output:
set pair_id, file("${pair_id}.dedup.bam") into deduped1
set pair_id, file("${pair_id}.dedup.bam") into deduped2
script:
"""
bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
"""
...
...
@@ -206,13 +253,20 @@ process markdups {
process geneabund {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from deduped1
output:
file("${pair_id}.cts") into counts
file("${pair_id}.cts.summary") into ctsum
file("${pair_id}_stringtie") into strcts
file("${pair_id}.fpkm.txt") into fpkm
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
"""
...
...
@@ -221,7 +275,9 @@ process geneabund {
process statanal {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
file count_file from counts.toList()
file count_sum from ctsum.toList()
file newdesign name 'design.txt'
...
...
@@ -229,29 +285,41 @@ process statanal {
file geneset name 'geneset.gmt'
file fpkm_file from fpkm.toList()
file stringtie_dir from strcts.toList()
output:
output:
file "*.txt" into txtfiles
file "*.png" into psfiles
file("*.rda") into rdafiles
file("geneset.shiny.gmt") into gmtfile
when:
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
"""
}
process gatkbam {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(rbam) from deduped2
output:
set file("${pair_id}.final.bam"),file("${pair_id}.final.bai") into gatkbam
when:
params.align == 'hisat' && $index_path == '/project/shared/bicf_workflow_ref/GRCh38/'
script:
"""
bash $baseDir/process_scripts/variants/gatkrunner.sh -a gatkbam_rna -b $rbam -r ${index_path}/hisat_index -p $pair_id
"""
...
...
This diff is collapsed.
Click to expand it.
workflow/scripts/check_designfile.pl
+
26
−
4
View file @
3caba697
#!/usr/bin/perl -w
#check_designfile.pl
use
strict
;
use
warnings
;
my
$pe
=
shift
@ARGV
;
my
$dfile
=
shift
@ARGV
;
open
OUT
,
"
>design.valid.txt
"
or
die
$!
;
...
...
@@ -11,7 +14,6 @@ $head =~ s/FullPathTo//g;
my
@colnames
=
split
(
/\t/
,
$head
);
my
%newcols
=
map
{
$_
=>
1
}
@colnames
;
unless
(
grep
(
/FqR1/
,
@colnames
))
{
die
"
Missing Sequence Files in Design File: FqR1
\n
";
}
...
...
@@ -49,16 +51,36 @@ while (my $line = <DFILE>) {
$hash
{
FinalBam
}
=
$hash
{
SampleID
}
.
"
.final.bam
";
$hash
{
Bam
}
=
$hash
{
SampleID
}
.
"
.bam
";
unless
(
$hash
{
SampleGroup
})
{
$j
=
$lnct
%%
2
;
my
$j
=
$lnct
%%
2
;
$hash
{
SampleGroup
}
=
$grp
[
$j
];
}
$hash
{
SampleGroup
}
=~
s/_//g
;
unless
(
$hash
{
FqR1
}
=~
m/.fastq.gz/
)
{
my
$name
=
$hash
{
FqR1
};
$name
=~
s/.f.*/.fastq.gz/
;
unless
(
$hash
{
FqR1
}
eq
$name
)
{
$hash
{
FqR1
}
=
$name
;
unless
(
-
e
(
$name
))
{
die
"
Unable to find fastq read 1
\n
${name}
\n
";
}
}
}
$hash
{
FqR2
}
=
'
na
'
unless
(
$hash
{
FqR2
});
unless
(
$hash
{
FqR2
}
eq
'
na
')
{
my
$name
=
$hash
{
FqR2
};
$name
=~
s/.f.*/.fastq.gz/
;
unless
(
$hash
{
FqR2
}
eq
$name
)
{
$hash
{
FqR2
}
=
$name
;
unless
(
-
e
(
$name
))
{
die
"
Unable to find fastq read 2
\n
${name}
\n
";
}
}
}
my
@line
;
foreach
$f
(
@cols
)
{
foreach
my
$f
(
@cols
)
{
push
@line
,
$hash
{
$f
};
}
print
OUT
join
("
\t
",
@line
),"
\n
";
$hash
{
FqR2
}
=
'
na
'
unless
(
$hash
{
FqR2
});
print
join
("
,
",
$hash
{
SampleID
},
$hash
{
FqR1
},
$hash
{
FqR2
}),"
\n
";
$lnct
++
;
}
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment