Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
BICF
Astrocyte
rnaseq
Commits
e7ad35bc
Commit
e7ad35bc
authored
Jun 20, 2019
by
Jeremy Mathews
Browse files
File Name Check
parent
3caba697
Pipeline
#4137
failed with stage
in 4 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
workflow/main.nf
View file @
e7ad35bc
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
params.input = "$baseDir"
params.input = "$baseDir"
params.output= "$baseDir/output"
params.output= "$baseDir/output"
params.design="$params.input/design.txt"
params.design="$params.input/design.txt"
params.genome="/project/shared/bicf_workflow_ref/human/
grc
h38/"
params.genome="/project/shared/bicf_workflow_ref/human/
GRC
h38/"
params.markdups="picard"
params.markdups="picard"
params.stranded="0"
params.stranded="0"
params.pairs="pe"
params.pairs="pe"
...
@@ -14,313 +14,231 @@ params.fusion = 'skip'
...
@@ -14,313 +14,231 @@ params.fusion = 'skip'
params.dea = 'detect'
params.dea = 'detect'
design_file = file(params.design)
design_file = file(params.design)
fqs = Channel.fromPath("$params.input/*")
gtf_file = file("$params.genome/gencode.gtf")
gtf_file = file("$params.genome/gencode.gtf")
genenames = file("$params.genome/genenames.txt")
genenames = file("$params.genome/genenames.txt")
geneset = file("$params.genome/../gsea_gmt/$params.geneset")
geneset = file("$params.genome/../gsea_gmt/$params.geneset")
dbsnp="$params.genome/db
s
np.vcf.gz"
dbsnp="$params.genome/db
S
np.vcf.gz"
indel="$params.genome/
g
old
i
ndels.vcf.gz"
indel="$params.genome/
G
old
I
ndels.vcf.gz"
knownindel=file(indel)
knownindel=file(indel)
dbsnp=file(dbsnp)
dbsnp=file(dbsnp)
files = Channel
.fromPath("$params.input/*")
good = Channel.fromPath("$params.input/*.fastq.gz")
process checkinputfiles {
module 'parallel/20150122:pigz/2.4'
queue 'super'
input:
file ("*") from files.collect()
file design_file name "design.tsv"
output:
set file ("design.tsv"), file ("*.fastq.gz") into design
file ("*.fastq.gz") into fastqs
script:
"""
for fqs in `ls | grep -v "^design.tsv\$"`;
do if [[ \$fqs == *.fq ]];
then new_name=`echo \${fqs} | sed -e "s/.fq\$/.fastq/"`;
mv \${fqs} \${new_name};
echo "pigz -f \${new_name}";
elif [[ \$fqs == *.fastq ]];
then echo "pigz -f \$fqs";
elif [[ \$fqs == *.fq.gz ]];
then new_name=`echo \${fqs} | sed -e "s/.fq.gz\$/.fastq.gz/"`;
mv \${fqs} \${new_name};
fi;
done | shuf | parallel -j\${SLURM_CPUS_ON_NODE};
"""
}
// params genome is the directory
// params genome is the directory
// base name for the index is always genome
// base name for the index is always genome
index_path = file(params.genome)
index_path = file(params.genome)
process checkdesignfile {
process checkdesignfile {
executor 'local'
queue 'super'
publishDir "$params.output", mode: 'copy'
module 'parallel/20150122:pigz/2.4'
publishDir "$params.output", mode: 'copy'
input:
input:
set file ("design.ori.txt"), file ("*") from design
file design_file name 'design.ori.txt'
file ("*") from fqs.collect()
output:
output:
file("design.valid.txt") into newdesign
file("design.valid.txt") into newdesign
stdout spltnames
file("*.fastq.gz") into fastqs mode flatten
stdout spltnames
script:
script:
"""
"""
perl -p -e 's/\\r\\n*/\\n/g' design.ori.txt > design.fix.txt
bash $baseDir/scripts/check_inputfiles.sh
perl $baseDir/scripts/check_designfile.pl ${params.pairs} design.fix.txt
perl -p -e 's/\\r\\n*/\\n/g' design.ori.txt > design.fix.txt
"""
perl $baseDir/scripts/check_designfile.pl ${params.pairs} design.fix.txt
"""
}
}
def fileMap = [:]
def fileMap = [:]
fastqs
fastqs
.mix(good)
.subscribe {
.flatten()
def fileName = it.getFileName()
.each {
fileMap."$fileName" = it
final fileName = it.getFileName().toString()
}
prefix = fileName.lastIndexOf('/')
fileMap[fileName] = it
}
if (params.pairs == 'pe') {
if (params.pairs == 'pe') {
spltnames
spltnames
.splitCsv()
.splitCsv()
.filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
.filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) }
.map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) }
.set { read }
.set { read }
}
} else {
else {
spltnames
spltnames
.splitCsv()
.splitCsv()
.filter { fileMap.get(it[1]) != null }
.filter { fileMap.get(it[1]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]),'') }
.map { it -> tuple(it[0], fileMap.get(it[1]),'') }
.set { read }
.set { read }
}
}
if( ! read ) { error "Didn't match any input files with entries in the design file" }
if( ! read ) { error "Didn't match any input files with entries in the design file" }
//
// Trim raw reads using trimgalore
// Trim raw reads using trimgalore
//
process trim {
process trim {
errorStrategy 'ignore'
input:
set pair_id, file(read1), file(read2) from read
input:
set pair_id, file(read1), file(read2) from read
output:
output:
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread
script:
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq
"""
bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2}
script:
"""
"""
bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2}
"""
}
}
//
// Align trimmed reads to genome indes with hisat2
// Align trimmed reads to genome indes with hisat2
// Sort and index with samtools
// Sort and index with samtools
// QC aligned reads with fastqc
// QC aligned reads with fastqc
// Alignment stats with samtools
// Alignment stats with samtools
//
process starfusion {
process starfusion {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from fusionfq
output:
file("${pair_id}.starfusion.txt") into fusionout
when:
input:
set pair_id, file(fq1), file(fq2) from fusionfq
params.fusion == 'detect' && params.pairs == 'pe'
output:
file("${pair_id}.starfusion.txt") into fusionout
script:
when:
params.fusion == 'detect' && params.pairs == 'pe'
"""
script:
bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f
"""
"""
bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f
"""
}
}
process align {
process align {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
publishDir "$params.output", mode: 'copy'
input:
input:
set pair_id, file(fq1), file(fq2) from trimread
set pair_id, file(fq1), file(fq2) from trimread
output:
set pair_id, file("${pair_id}.bam") into aligned
set pair_id, file("${pair_id}.bam") into aligned2
file("${pair_id}.alignerout.txt") into hsatout
output:
script:
"""
set pair_id, file("${pair_id}.bam") into aligned
bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2}
set pair_id, file("${pair_id}.bam") into aligned2
"""
file("${pair_id}.alignerout.txt") into hsatout
script:
"""
bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2}
"""
}
}
process alignqc {
process alignqc {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(bam) from aligned2
input:
set pair_id, file(bam) from aligned2
output:
output:
file("${pair_id}.flagstat.txt") into alignstats
set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
file("${pair_id}.flagstat.txt") into alignstats
script:
set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
"""
bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
script:
"""
"""
bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
"""
}
}
// Summarize all flagstat output
// Summarize all flagstat output
process parse_alignstat {
process parse_alignstat {
publishDir "$params.output", mode: 'copy'
publishDir "$params.output", mode: 'copy'
input:
file(txt) from alignstats.toList()
input:
file(txt) from hsatout.toList()
file(txt) from alignstats.toList()
file(txt) from hsatout.toList()
output:
output:
file('alignment.summary.txt')
file('alignment.summary.txt')
script:
"""
script:
perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
"""
"""
perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
"""
}
}
// Identify duplicate reads with Picard
// Identify duplicate reads with Picard
process markdups {
process markdups {
publishDir "$params.output", mode: 'copy'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from aligned
input:
set pair_id, file(sbam) from aligned
output:
output:
set pair_id, file("${pair_id}.dedup.bam") into deduped1
set pair_id, file("${pair_id}.dedup.bam") into deduped2
set pair_id, file("${pair_id}.dedup.bam") into deduped1
script:
set pair_id, file("${pair_id}.dedup.bam") into deduped2
"""
bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
script:
"""
"""
bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
"""
}
}
// Read summarization with subread
// Read summarization with subread
// Assemble transcripts with stringtie
// Assemble transcripts with stringtie
process geneabund {
process geneabund {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from deduped1
input:
set pair_id, file(sbam) from deduped1
output:
output:
file("${pair_id}.cts") into counts
file("${pair_id}.cts.summary") into ctsum
file("${pair_id}_stringtie") into strcts
file("${pair_id}.fpkm.txt") into fpkm
file("${pair_id}.cts") into counts
script:
file("${pair_id}.cts.summary") into ctsum
"""
file("${pair_id}_stringtie") into strcts
bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
file("${pair_id}.fpkm.txt") into fpkm
"""
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
"""
}
}
process statanal {
process statanal {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
publishDir "$params.output", mode: 'copy'
input:
input:
file count_file from counts.toList()
file count_sum from ctsum.toList()
file count_file from counts.toList()
file newdesign name 'design.txt'
file count_sum from ctsum.toList()
file genenames
file newdesign name 'design.txt'
file geneset name 'geneset.gmt'
file genenames
file fpkm_file from fpkm.toList()
file geneset name 'geneset.gmt'
file stringtie_dir from strcts.toList()
file fpkm_file from fpkm.toList()
file stringtie_dir from strcts.toList()
output:
file "*.txt" into txtfiles
output:
file "*.png" into psfiles
file("*.rda") into rdafiles
file "*.txt" into txtfiles
file("geneset.shiny.gmt") into gmtfile
file "*.png" into psfiles
file("*.rda") into rdafiles
script:
file("geneset.shiny.gmt") into gmtfile
"""
bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
when:
"""
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
"""
}
}
process gatkbam {
process gatkbam {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(rbam) from deduped2
output:
set file("${pair_id}.final.bam"),file("${pair_id}.final.bai") into gatkbam
when:
input:
set pair_id, file(rbam) from deduped2
params.align == 'hisat' && $index_path == '/project/shared/bicf_workflow_ref/GRCh38/'
output:
set file("${pair_id}.final.bam"),file("${pair_id}.final.bai") into gatkbam
script:
when:
params.align == 'hisat' && $index_path == '/project/shared/bicf_workflow_ref/GRCh38/'
"""
script:
bash $baseDir/process_scripts/variants/gatkrunner.sh -a gatkbam_rna -b $rbam -r ${index_path}/hisat_index -p $pair_id
"""
"""
bash $baseDir/process_scripts/variants/gatkrunner.sh -a gatkbam_rna -b $rbam -r ${index_path}/hisat_index -p $pair_id
"""
}
}
workflow/scripts/check_designfile.pl
View file @
e7ad35bc
...
@@ -55,9 +55,9 @@ while (my $line = <DFILE>) {
...
@@ -55,9 +55,9 @@ while (my $line = <DFILE>) {
$hash
{
SampleGroup
}
=
$grp
[
$j
];
$hash
{
SampleGroup
}
=
$grp
[
$j
];
}
}
$hash
{
SampleGroup
}
=~
s/_//g
;
$hash
{
SampleGroup
}
=~
s/_//g
;
unless
(
$hash
{
FqR1
}
=~
m/.fastq.gz/
)
{
unless
(
$hash
{
FqR1
}
=~
m/
_good
.fastq.gz/
)
{
my
$name
=
$hash
{
FqR1
};
my
$name
=
$hash
{
FqR1
};
$name
=~
s/.f.*/.fastq.gz/
;
$name
=~
s/.f.*/
_good
.fastq.gz/
;
unless
(
$hash
{
FqR1
}
eq
$name
)
{
unless
(
$hash
{
FqR1
}
eq
$name
)
{
$hash
{
FqR1
}
=
$name
;
$hash
{
FqR1
}
=
$name
;
unless
(
-
e
(
$name
))
{
unless
(
-
e
(
$name
))
{
...
@@ -68,7 +68,7 @@ while (my $line = <DFILE>) {
...
@@ -68,7 +68,7 @@ while (my $line = <DFILE>) {
$hash
{
FqR2
}
=
'
na
'
unless
(
$hash
{
FqR2
});
$hash
{
FqR2
}
=
'
na
'
unless
(
$hash
{
FqR2
});
unless
(
$hash
{
FqR2
}
eq
'
na
')
{
unless
(
$hash
{
FqR2
}
eq
'
na
')
{
my
$name
=
$hash
{
FqR2
};
my
$name
=
$hash
{
FqR2
};
$name
=~
s/.f.*/.fastq.gz/
;
$name
=~
s/.f.*/
_good
.fastq.gz/
;
unless
(
$hash
{
FqR2
}
eq
$name
)
{
unless
(
$hash
{
FqR2
}
eq
$name
)
{
$hash
{
FqR2
}
=
$name
;
$hash
{
FqR2
}
=
$name
;
unless
(
-
e
(
$name
))
{
unless
(
-
e
(
$name
))
{
...
...
workflow/scripts/check_inputfiles.sh
0 → 100644
View file @
e7ad35bc
#!/bin/bash
#check_inputfiles.sh
fqs
=
`
ls
*
.f
*
`
for
i
in
$fqs
;
do
if
[[
${
i
}
==
*
.fq
]]
;
then
new_name
=
`
echo
${
i
}
|
sed
-e
"s/.fq
\$
/_good.fastq/"
`
;
mv
${
i
}
${
new_name
}
;
`
pigz
-f
${
new_name
}
`
;
elif
[[
${
i
}
==
*
.fastq
]]
;
then
new_name
=
`
echo
${
i
}
|
sed
-e
"s/.fastq
\$
/_good.fastq/"
`
;
mv
${
i
}
${
new_name
}
;
`
pigz
-f
${
new_name
}
`
;
elif
[[
${
i
}
==
*
.fq.gz
]]
;
then
new_name
=
`
echo
${
i
}
|
sed
-e
"s/.fq.gz
\$
/_good.fastq.gz/"
`
;
mv
${
i
}
${
new_name
}
;
else
new_name
=
`
echo
${
i
}
|
sed
-e
"s/.fastq.gz
\$
/_good.fastq.gz/"
`
;
mv
${
i
}
${
new_name
}
;
fi
;
done
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment