Skip to content
Snippets Groups Projects
Commit 19918779 authored by Jonathan Gesell's avatar Jonathan Gesell
Browse files

Manually specifying reference directory, need to automate. Also added...

Manually specifying reference directory, need to automate.  Also added remaining flags to paired-end align.
parent ca261a6b
Branches
Tags
2 merge requests!37v0.0.1,!15Resolve "process_align"
Pipeline #5777 failed with stages
in 39 minutes and 13 seconds
...@@ -6,6 +6,7 @@ params.bdbag = "${baseDir}/../test_data/auth/cookies.txt" ...@@ -6,6 +6,7 @@ params.bdbag = "${baseDir}/../test_data/auth/cookies.txt"
//params.repRID = "16-1ZX4" //params.repRID = "16-1ZX4"
params.repRID = "Q-Y5JA" params.repRID = "Q-Y5JA"
params.outDir = "${baseDir}/../output" params.outDir = "${baseDir}/../output"
params.reference = "/project/BICF/BICF_Core/shared/gudmap/references"
// Parse input variables // Parse input variables
deriva = Channel deriva = Channel
...@@ -16,6 +17,7 @@ bdbag = Channel ...@@ -16,6 +17,7 @@ bdbag = Channel
.ifEmpty { exit 1, "deriva cookie file for bdbag not found: ${params.bdbag}" } .ifEmpty { exit 1, "deriva cookie file for bdbag not found: ${params.bdbag}" }
repRID = params.repRID repRID = params.repRID
outDir = params.outDir outDir = params.outDir
reference = params.reference
logsDir = "${outDir}/Logs" logsDir = "${outDir}/Logs"
// Define fixed files // Define fixed files
...@@ -144,8 +146,27 @@ process parseMetadata { ...@@ -144,8 +146,27 @@ process parseMetadata {
species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentMeta}" -p species) species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentMeta}" -p species)
echo "LOG: species metadata parsed: \${species}" >>${repRID}.parseMetadata.err echo "LOG: species metadata parsed: \${species}" >>${repRID}.parseMetadata.err
#Set the correct Reference Directory
if [ "\${spike}" == 'yes' ]; then
if [ "\${species}" == 'Homo sapiens' ]; then
referenceDir="/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12-S/hisat2/genome";
echo "LOG: Referece set to Homo sapiens with spike-in." >>${repRID}.parseMetadata.err;
elif [ "\${species}" == 'Mus musculus' ]; then
referenceDir="/project/BICF/BICF_Core/shared/gudmap/references/GRCm38.P6-S/hisat2/genome";
echo "LOG: Referece set to Mus musculus with spike-in." >>${repRID}.parseMetadata.err;
fi;
else
if [ "\${species}" == 'Homo sapiens' ]; then
referenceDir="/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12/hisat2/genome";
echo "LOG: Referece set to Homo sapiens without spike-in." >>${repRID}.parseMetadata.err;
elif [ "\${species}" == 'Mus musculus' ]; then
referenceDir="/project/BICF/BICF_Core/shared/gudmap/references/GRCm38.P6/hisat2/genome";
echo "LOG: Referece set to Mus musculus without spike-in." >>${repRID}.parseMetadata.err;
fi;
fi;
# Save design file # Save design file
echo "\${rep},\${endsMeta},\${endsManual},\${stranded},\${spike},\${species}" > design.csv echo "\${rep},\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${referenceDir}" > design.csv
""" """
} }
...@@ -156,17 +177,16 @@ endsManual = Channel.create() ...@@ -156,17 +177,16 @@ endsManual = Channel.create()
stranded = Channel.create() stranded = Channel.create()
spike = Channel.create() spike = Channel.create()
species = Channel.create() species = Channel.create()
reference = Channel.create() referenceDir = Channel.create()
metadata.splitCsv(sep: ',', header: false).separate( metadata.splitCsv(sep: ',', header: false).separate(
rep, rep,
endsMeta, endsMeta,
endsManual, endsManual,
stranded, stranded,
spike, spike,
species species,
referenceDir
) )
rep.subscribe { println "$it" }
endsManual.into { endsManual.into {
endsManual_trimData endsManual_trimData
endsManual_alignReads endsManual_alignReads
...@@ -174,30 +194,9 @@ endsManual.into { ...@@ -174,30 +194,9 @@ endsManual.into {
stranded.into { stranded.into {
stranded_alignReads stranded_alignReads
} }
referenceDir.into {
// Exit with no species referenceDir_alignReads
if (species == "") {
print ("ERROR: Reference genome not specified")
exit 1
}
spike.subscribe { println "$it"}
// Setting references
if (spike == 'yes') {
if (species == "Homo sapiens") {
reference = Channel.fromPath ("/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12-S/hisat2")
} else if (species == "Mus musculus") {
reference = Channel.fromPath ("/project/BICF/BICF_Core/shared/gudmap/references/GRCm38.P6-S/hisat2")
}
} else {
if (species == "Homo sapiens") {
reference = Channel.fromPath ("/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12/hisat2")
} else if (species == "Mus musculus") {
reference = Channel.fromPath ("/project/BICF/BICF_Core/shared/gudmap/references/GRCm38.P6/hisat2")
}
} }
//reference.subscribe { println "$it" }
/* /*
* trimData: trims any adapter or non-host sequences from the data * trimData: trims any adapter or non-host sequences from the data
...@@ -241,7 +240,7 @@ process alignReads { ...@@ -241,7 +240,7 @@ process alignReads {
val endsManual_alignReads val endsManual_alignReads
val stranded_alignReads val stranded_alignReads
path fqs from fastqs_trimmed path fqs from fastqs_trimmed
val reference val referenceDir_alignReads
output: output:
set repRID, file ("${repRID}.unal.gz"), file ("${repRID}.bam"), file ("${repRID}.bai") set repRID, file ("${repRID}.unal.gz"), file ("${repRID}.bam"), file ("${repRID}.bai")
...@@ -249,8 +248,8 @@ process alignReads { ...@@ -249,8 +248,8 @@ process alignReads {
script: script:
""" """
if [ "${endsManual_alignReads}" == 'pe' ]; then if [ "${endsManual_alignReads}" == 'pe' ]; then
hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${reference}/genome ${stranded_alignReads} -1 ${fqs[0]} -2 ${fqs[1]} 1>${repRID}.align.out 2>${repRID}.align.err; hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} --no-mixed --no-discordant -1 ${fqs[0]} -2 ${fqs[1]} 1>${repRID}.align.out 2>${repRID}.align.err;
else hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${reference}/genome ${stranded_alignReads} -U ${fqs[0]} 1>${repRID}.align.out 2>${repRID}.align.err; else hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x ${referenceDir_alignReads} ${stranded_alignReads} -U ${fqs[0]} 1>${repRID}.align.out 2>${repRID}.align.err;
fi; fi;
samtools view -1 --threads `nproc` -o ${repRID}.bam ${repRID}.sam 1>>${repRID}.align.out 2>>${repRID}.align.err; samtools view -1 --threads `nproc` -o ${repRID}.bam ${repRID}.sam 1>>${repRID}.align.out 2>>${repRID}.align.err;
samtools sort -@ `nproc` -O BAM -o ${repRID}.bam 1>>${repRID}.align.out 2>>${repRID}.align.err; samtools sort -@ `nproc` -O BAM -o ${repRID}.bam 1>>${repRID}.align.out 2>>${repRID}.align.err;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment