Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
RNA-seq
Manage
Activity
Members
Labels
Plan
Issues
12
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
GUDMAP_RBK
RNA-seq
Commits
85392b62
There was an error fetching the commit references. Please try again later.
Commit
85392b62
authored
5 years ago
by
Gervaise Henry
🤠
Browse files
Options
Downloads
Patches
Plain Diff
Harmonize logs
parent
558e7e78
2 merge requests
!37
v0.0.1
,
!33
Resolve "process_qc"
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
workflow/rna-seq.nf
+88
-59
88 additions, 59 deletions
workflow/rna-seq.nf
with
88 additions
and
59 deletions
workflow/rna-seq.nf
+
88
−
59
View file @
85392b62
...
...
@@ -94,12 +94,14 @@ process getBag {
export https_proxy=\${http_proxy}
# link credential file for authentication
echo -e "LOG: linking deriva credentials" >> ${repRID}.getBag.log
ln -sf `readlink -e credential.json` ~/.deriva/credential.json
echo "LOG:
deriva credentials
linked" >> ${repRID}.getBag.log
echo
-e
"LOG: linked" >> ${repRID}.getBag.log
# deriva-download replicate RID
echo "LOG: fetching
deriva catalog for selected
RID in GUDMAP
.
" >> ${repRID}.getBag.log
echo
-e
"LOG: fetching
bagit for ${rep
RID
}
in GUDMAP" >> ${repRID}.getBag.log
deriva-download-cli dev.gudmap.org --catalog 2 ${derivaConfig} . rid=${repRID}
echo -e "LOG: fetched" >> ${repRID}.getBag.log
"""
}
...
...
@@ -127,20 +129,23 @@ process getData {
export https_proxy=\${http_proxy}
# link deriva cookie for authentication
echo -e "LOG: linking deriva cookie" >> ${repRID}.getData.log
ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt
echo "LOG:
deriva cookie
linked" >> ${repRID}.getData.log
echo
-e
"LOG: linked" >> ${repRID}.getData.log
# get bagit basename
replicate=\$(basename "${bagit}" | cut -d "." -f1)
echo "LOG: \${replicate}" >> ${repRID}.getData.log
echo
-e
"LOG:
bagit replicate name
\${replicate}" >> ${repRID}.getData.log
# unzip bagit
echo -e "LOG: unzipping replicate bagit" >> ${repRID}.getData.log
unzip ${bagit}
echo "LOG:
replicate bdbag
unzipped" >> ${repRID}.getData.log
echo
-e
"LOG: unzipped" >> ${repRID}.getData.log
# bagit fetch fastq"s only and rename by repRID
sh ${script_bdbagFetch} \${replicate} ${repRID}
echo "LOG: replicate bdbag fetched" >> ${repRID}.getData.log
# bagit fetch fastq's only and rename by repRID
echo -e "LOG: fetching replicate bdbag" >> ${repRID}.getData.log
sh ${script_bdbagFetch} ${repRID} ${repRID}
echo -e "LOG: fetched" >> ${repRID}.getData.log
"""
}
...
...
@@ -172,38 +177,38 @@ process parseMetadata {
# check replicate RID metadata
rep=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p repRID)
echo "LOG: replicate RID metadata parsed: \${rep}" >> ${repRID}.parseMetadata.log
echo
-e
"LOG: replicate RID metadata parsed: \${rep}" >> ${repRID}.parseMetadata.log
# get experiment RID metadata
exp=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p expRID)
echo "LOG: experiment RID metadata parsed: \${exp}" >> ${repRID}.parseMetadata.log
echo
-e
"LOG: experiment RID metadata parsed: \${exp}" >> ${repRID}.parseMetadata.log
# get study RID metadata
study=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p studyRID)
echo "LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.log
echo
-e
"LOG: study RID metadata parsed: \${study}" >> ${repRID}.parseMetadata.log
# get endedness metadata
endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p endsMeta)
echo "LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.log
echo
-e
"LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.log
# ganually get endness
endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p endsManual)
echo "LOG: endedness manually detected: \${endsManual}" >> ${repRID}.parseMetadata.log
echo
-e
"LOG: endedness manually detected: \${endsManual}" >> ${repRID}.parseMetadata.log
# get strandedness metadata
stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p stranded)
echo "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.log
echo
-e
"LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.log
# get spike-in metadata
spike=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p spike)
echo "LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.log
echo
-e
"LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.log
# get species metadata
species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentMeta}" -p species)
echo "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.log
echo
-e
"LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.log
# gave design file
echo "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${exp},\${study}" > design.csv
echo
-e
"\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${exp},\${study}" > design.csv
"""
}
...
...
@@ -253,15 +258,15 @@ process trimData {
ulimit -a >> ${repRID}.trimData.log
# trim fastq's using trim_galore
echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log
if [ "${ends}" == "se" ]
then
echo "LOG: running trim_galore using single-end settings" >> ${repRID}.trimData.log
trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]}
elif [ "${ends}" == "pe" ]
then
echo "LOG: running trim_galore using paired-end settings" >> ${repRID}.trimData.log
trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]}
fi
echo -e "LOG: trimmed" >> ${repRID}.trimData.log
"""
}
...
...
@@ -286,8 +291,8 @@ process getRefInfer {
script:
"""
hostname > ${repRID}.getRefInfer.log
ulimit -a >> ${repRID}.getRefInfer.log
hostname > ${repRID}.
${refName}.
getRefInfer.log
ulimit -a >> ${repRID}.
${refName}.
getRefInfer.log
export https_proxy=\${http_proxy}
# set the reference name
...
...
@@ -301,29 +306,30 @@ process getRefInfer {
then
references=\$(echo ${referenceBase}/GRCh${refHuVersion})
else
echo -e "LOG: ERROR - References could not be set!\nReference found: ${referenceBase}" >> ${repRID}.getRefInfer.log
echo -e "LOG: ERROR - References could not be set!\nReference found: ${referenceBase}" >> ${repRID}.
${refName}.
getRefInfer.log
exit 1
fi
mkdir ${refName}
# retreive appropriate reference appropriate location
echo -e "LOG: fetching ${refName} reference files from ${referenceBase}" >> ${repRID}.${refName}.getRefInfer.log
if [ ${referenceBase} == "s3://bicf-references" ]
then
echo "LOG: grabbing reference files from S3" >> ${repRID}.getRefInfer.log
aws s3 cp "\${references}" /hisat2 ./ --recursive
aws s3 cp "\${references}" /bed ./${refName}/ --recursive
aws s3 cp "\${references}" /*.fna --recursive
aws s3 cp "\${references}" /*.gtf --recursive
elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
then
echo "LOG: using pre-defined locations for reference files" >> ${repRID}.getRefInfer.log
ln -s "\${references}"/hisat2
ln -s "\${references}"/bed ${refName}/bed
ln -s "\${references}"/genome.fna
ln -s "\${references}"/genome.gtf
fi
echo -e "LOG: fetched" >> ${repRID}.${refName}.getRefInfer.log
# make blank bed folder for ERCC
echo -e "LOG: making dummy bed folder for ERCC" >> ${repRID}.${refName}.getRefInfer.log
if [ "${refName}" == "ERCC" ]
then
rm ${refName}/bed
...
...
@@ -354,16 +360,17 @@ process downsampleData {
if [ "${ends}" == "se" ]
then
echo "LOG: downsampling
single-end
trimmed fastq" >> ${repRID}.downsampleData.log
echo
-e
"LOG: downsampling
SE
trimmed fastq" >> ${repRID}.downsampleData.log
seqtk sample -s100 *trimmed.fq.gz 100000 1> sampled.1.fq
touch sampled.2.fq
elif [ "${ends}" == "pe" ]
then
echo "LOG: downsampling
read 1 of paired-end
trimmed fastq" >> ${repRID}.downsampleData.log
echo
-e
"LOG: downsampling
R1 of PE
trimmed fastq" >> ${repRID}.downsampleData.log
seqtk sample -s100 *1.fq.gz 1000000 1> sampled.1.fq
echo "LOG: downsampling
read 2 of paired-end
trimmed fastq" >> ${repRID}.downsampleData.log
echo
-e
"LOG: downsampling
R2 of PE
trimmed fastq" >> ${repRID}.downsampleData.log
seqtk sample -s100 *2.fq.gz 1000000 1> sampled.2.fq
fi
echo -e "LOG: downsampled" >> ${repRID}.downsampleData.log
"""
}
...
...
@@ -389,27 +396,28 @@ process alignSampleData {
hostname > ${repRID}.${ref}.alignSampleData.log
ulimit -a >> ${repRID}.${ref}.alignSampleData.log
# align the reads with Hisat 2
# align the reads with Hisat2
echo -e "LOG: aligning ${ends}" >> ${repRID}.${ref}.alignSampleData.log
if [ "${ends}" == "se" ]
then
echo "LOG: running Hisat2 with single-end settings" >> ${repRID}.${ref}.alignSampleData.log
hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome -U ${fastq1} --summary-file ${ref}.alignSampleSummary.txt --new-summary
elif [ "${ends}" == "pe" ]
then
echo "LOG: running Hisat2 with paired-end settings" >> ${repRID}.${ref}.alignSampleData.log
hisat2 -p `nproc` --add-chrname -S ${ref}.sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq1} -2 ${fastq2} --summary-file ${ref}.alignSampleSummary.txt --new-summary
fi
echo -e "LOG: aliged" >> ${repRID}.${ref}.alignSampleData.log
# convert the output sam file to a sorted bam file using Samtools
echo "LOG: converting from sam to bam" >> ${repRID}.${ref}.alignSampleData.log
echo
-e
"LOG: converting from sam to bam" >> ${repRID}.${ref}.alignSampleData.log
samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${ref}.sampled.bam ${ref}.sampled.sam
# sort the bam file using Samtools
echo "LOG: sorting the bam file" >> ${repRID}.${ref}.alignSampleData.log
echo
-e
"LOG: sorting the bam file" >> ${repRID}.${ref}.alignSampleData.log
samtools sort -@ `nproc` -O BAM -o ${ref}.sampled.sorted.bam ${ref}.sampled.bam
# index the sorted bam using Samtools
echo "LOG: indexing sorted bam file" >> ${repRID}.${ref}.alignSampleData.log
echo
-e
"LOG: indexing sorted bam file" >> ${repRID}.${ref}.alignSampleData.log
samtools index -@ `nproc` -b ${ref}.sampled.sorted.bam ${ref}.sampled.sorted.bam.bai
"""
}
...
...
@@ -441,10 +449,13 @@ process inferMetadata {
# collect alignment rates (round down to integers)
align_ercc=\$(echo \$(grep "Overall alignment rate" ERCC.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
align_ercc=\$(echo \${align_ercc%.*})
echo -e "LOG: alignment rate to ERCC: \${align_ercc}" >> ${repRID}.inferMetadata.log
align_hu=\$(echo \$(grep "Overall alignment rate" GRCh.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
align_hu=\$(echo \${align_hu%.*})
echo -e "LOG: alignment rate to GRCh: \${align_hu}" >> ${repRID}.inferMetadata.log
align_mo=\$(echo \$(grep "Overall alignment rate" GRCm.alignSampleSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
align_mo=\$(echo \${align_mo%.*})
echo -e "LOG: alignment rate to GRCm: \${align_mo}" >> ${repRID}.inferMetadata.log
# determine spike-in
if [ 1 -eq \$(echo \$(expr \${align_ercc} ">=" 10)) ]
...
...
@@ -453,7 +464,7 @@ process inferMetadata {
else
spike="no"
fi
echo -e "LOG:
I
nference of strandedness results is: \${spike}" >> ${repRID}.inferMetadata.log
echo -e "LOG:
i
nference of strandedness results is: \${spike}" >> ${repRID}.inferMetadata.log
# determine species
if [ 1 -eq \$(echo \$(expr \${align_hu} ">=" 25)) ] && [ 1 -eq \$(echo \$(expr \${align_mo} "<" 25)) ]
...
...
@@ -467,16 +478,16 @@ process inferMetadata {
bam="GRCm.sampled.sorted.bam"
bed="./GRCm/bed/genome.bed"
else
echo -e "LOG: ERROR -
I
nference of species returns an ambiguous result: hu=\${align_hu} mo=\${align_mo}" >> ${repRID}.inferMetadata.log
echo -e "LOG: ERROR -
i
nference of species returns an ambiguous result: hu=\${align_hu} mo=\${align_mo}" >> ${repRID}.inferMetadata.log
exit 1
fi
echo -e "LOG:
I
nference of species results in: \${species}" >> ${repRID}.inferMetadata.log
echo -e "LOG:
i
nference of species results in: \${species}" >> ${repRID}.inferMetadata.log
# infer experimental setting from dedup bam
echo "LOG: infer experimental setting from dedup bam" >> ${repRID}.inferMetadata.log
echo
-e
"LOG: infer experimental setting from dedup bam" >> ${repRID}.inferMetadata.log
infer_experiment.py -r "\${bed}" -i "\${bam}" 1>> ${repRID}.infer_experiment.txt
echo -e "LOG: infered" >> ${repRID}.inferMetadata.log
echo "LOG: determining endedness and strandedness from file" >> ${repRID}.inferMetadata.log
ended=`bash inferMeta.sh endness ${repRID}.infer_experiment.txt`
fail=`bash inferMeta.sh fail ${repRID}.infer_experiment.txt`
if [ \${ended} == "PairEnd" ]
...
...
@@ -490,6 +501,8 @@ process inferMetadata {
percentF=`bash inferMeta.sh sef ${repRID}.infer_experiment.txt`
percentR=`bash inferMeta.sh ser ${repRID}.infer_experiment.txt`
fi
echo -e "LOG: percentage reads in the same direction as gene: \${percentF}" >> ${repRID}.inferMetadata.log
echo -e "LOG: percentage reads in the opposite direction as gene: \${percentR}" >> ${repRID}.inferMetadata.log
if [ 1 -eq \$(echo \$(expr \${percentF#*.} ">" 2500)) ] && [ 1 -eq \$(echo \$(expr \${percentR#*.} "<" 2500)) ]
then
stranded="forward"
...
...
@@ -500,7 +513,7 @@ process inferMetadata {
else
stranded="unstranded"
fi
echo -e "LOG: stradedness set to \${stranded}" >> ${repRID}.inferMetadata.log
echo -e "LOG: stradedness set to
:
\${stranded}" >> ${repRID}.inferMetadata.log
# write infered metadata to file
echo "\${ends},\${stranded},\${spike},\${species},\${align_ercc},\${align_hu},\${align_mo},\${percentF},\${percentR},\${fail}" 1>> infer.csv
...
...
@@ -589,24 +602,25 @@ process getRef {
then
reference=\$(echo \${references}/)
fi
echo "LOG: species set to \${references}" >> ${repRID}.getRef.log
echo
-e
"LOG: species set to \${references}" >> ${repRID}.getRef.log
# retreive appropriate reference appropriate location
echo -e "LOG: fetching ${species} reference files from ${referenceBase}" >> ${repRID}.getRef.log
if [ ${referenceBase} == "s3://bicf-references" ]
then
echo "LOG: grabbing reference files from S3" >> ${repRID}.getRef.log
echo
-e
"LOG: grabbing reference files from S3" >> ${repRID}.getRef.log
aws s3 cp "\${references}" /hisat2 ./ --recursive
aws s3 cp "\${references}" /bed ./ --recursive
aws s3 cp "\${references}" /*.fna --recursive
aws s3 cp "\${references}" /*.gtf --recursive
elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ]
then
echo "LOG: using pre-defined locations for reference files" >> ${repRID}.getRef.log
ln -s "\${references}"/hisat2
ln -s "\${references}"/bed
ln -s "\${references}"/genome.fna
ln -s "\${references}"/genome.gtf
fi
echo -e "LOG: fetched" >> ${repRID}.getRef.log
"""
}
...
...
@@ -656,27 +670,27 @@ process alignData {
strandedParam="--rna-strandness RF"
fi
# align the reads with Hisat 2
# align the reads with Hisat2
echo -e "LOG: aligning ${ends}" >> ${repRID}.align.log
if [ "${ends}" == "se" ]
then
echo "LOG: running Hisat2 with single-end settings" >> ${repRID}.align.log
hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} -U ${fastq[0]} --summary-file ${repRID}.alignSummary.txt --new-summary
elif [ "${ends}" == "pe" ]
then
echo "LOG: running Hisat2 with paired-end settings" >> ${repRID}.align.log
hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome \${strandedParam} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary
fi
echo -e "LOG: alignined" >> ${repRID}.align.log
# convert the output sam file to a sorted bam file using Samtools
echo "LOG: converting from sam to bam" >> ${repRID}.align.log
echo
-e
"LOG: converting from sam to bam" >> ${repRID}.align.log
samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam
# sort the bam file using Samtools
echo "LOG: sorting the bam file" >> ${repRID}.align.log
echo
-e
"LOG: sorting the bam file" >> ${repRID}.align.log
samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.bam ${repRID}.bam
# index the sorted bam using Samtools
echo "LOG: indexing sorted bam file" >> ${repRID}.align.log
echo
-e
"LOG: indexing sorted bam file" >> ${repRID}.align.log
samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bam.bai
"""
}
...
...
@@ -707,15 +721,18 @@ process dedupData {
ulimit -a >> ${repRID}.dedup.log
# remove duplicated reads using Picard's MarkDuplicates
echo "LOG:
running picard MarkDuplicates to remove
duplicat
e
reads" >> ${repRID}.dedup.log
echo
-e
"LOG:
de
duplicat
ing
reads" >> ${repRID}.dedup.log
java -jar /picard/build/libs/picard.jar MarkDuplicates I=${bam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true
echo -e "LOG: deduplicated" >> ${repRID}.dedup.log
# sort the bam file using Samtools
echo -e "LOG: sorting the bam file" >> ${repRID}.dedup.log
samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam
# index the sorted bam using Samtools
echo -e "LOG: indexing sorted bam file" >> ${repRID}.dedup.log
samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai
# split the deduped BAM file for multi-threaded tin calculation
for i in `samtools view ${repRID}.sorted.deduped.bam | cut -f3 | sort | uniq`;
do
...
...
@@ -749,9 +766,10 @@ process makeBigWig {
hostname > ${repRID}.makeBigWig.log
ulimit -a >> ${repRID}.makeBigWig.log
#
run bamCoverage
echo "LOG:
Runn
ing bi
g
Wig
bamCoverage
" >> ${repRID}.makeBigWig.log
#
create bigwig
echo
-e
"LOG:
creat
ing bi
b
Wig" >> ${repRID}.makeBigWig.log
bamCoverage -p `nproc` -b ${bam} -o ${repRID}.bw
echo -e "LOG: created" >> ${repRID}.makeBigWig.log
"""
}
...
...
@@ -783,19 +801,19 @@ process countData {
if [ "${stranded}" == "unstranded" ]
then
stranding=0
echo "LOG: strandedness set to unstranded [0]" >> ${repRID}.countData.log
echo
-e
"LOG: strandedness set to unstranded [0]" >> ${repRID}.countData.log
elif [ "${stranded}" == "forward" ]
then
stranding=1
echo "LOG: strandedness set to forward stranded [1]" >> ${repRID}.countData.log
echo
-e
"LOG: strandedness set to forward stranded [1]" >> ${repRID}.countData.log
elif [ "${stranded}" == "reverse" ]
then
stranding=2
echo "LOG: strandedness set to forward stranded [2]" >> ${repRID}.countData.log
echo
-e
"LOG: strandedness set to forward stranded [2]" >> ${repRID}.countData.log
fi
# run featureCounts
echo "LOG:
r
un
n
ing
featureCounts on the data
" >> ${repRID}.countData.log
echo
-e
"LOG:
co
un
t
ing
${ends} features
" >> ${repRID}.countData.log
if [ "${ends}" == "se" ]
then
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
...
...
@@ -803,9 +821,10 @@ process countData {
then
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
fi
echo -e "LOG: counted" >> ${repRID}.countData.log
# calculate TPM from the resulting countData table
echo "LOG: calculating TPM with R" >> ${repRID}.countData.log
echo
-e
"LOG: calculating TPM with R" >> ${repRID}.countData.log
Rscript calculateTPM.R --count "${repRID}.countData"
"""
}
...
...
@@ -828,7 +847,7 @@ process fastqc {
ulimit -a >> ${repRID}.fastqc.log
# run fastqc
echo "LOG:
begi
nning
F
ast
QC analysis of the data
" >> ${repRID}.fastqc.log
echo
-e
"LOG:
ru
nning
f
ast
q on raw fastqs
" >> ${repRID}.fastqc.log
fastqc *.fastq.gz -o .
"""
}
...
...
@@ -861,13 +880,19 @@ process dataQC {
done | parallel -j `nproc` -k 1>> ${repRID}.sorted.deduped.tin.xls
# bin TIN values
echo -e "LOG: binning TINs" >> ${repRID}.dataQC.log
python3 ${script_tinHist} -r ${repRID}
echo -e "LOG: binned" >> ${repRID}.dataQC.log
# calculate inner-distances for PE data
if [ "${ends}" == "pe" ]
then
echo -e "LOG: calculating inner distances for ${ends}" >> ${repRID}.dataQC.log
inner_distance.py -i "${bam}" -o ${repRID}.insertSize -r ./bed/genome.bed
else
echo -e "LOG: calculated" >> ${repRID}.dataQC.log
elif [ "${ends}" == "se" ]
then
echo -e "LOG: creating dummy inner distance file for ${ends}" >> ${repRID}.dataQC.log
touch ${repRID}.insertSize.inner_distance_freq.txt
fi
"""
...
...
@@ -910,22 +935,26 @@ process aggrQC {
ulimit -a >> ${repRID}.aggrQC.log
# make RID table
echo -e "LOG: creating RID table" >> ${repRID}.aggrQC.log
echo -e "Replicate RID\tExperiment RID\tStudy RID" > rid.tsv
echo -e "${repRID}\t${expRID}\t${studyRID}" >> rid.tsv
# make metadata table
echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log
echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in" > metadata.tsv
echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}" >> metadata.tsv
echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}" >> metadata.tsv
echo -e "Manual\t-\t${endsManual}\t-\t-" >> metadata.tsv
# remove inner distance report if it is empty (SE repRID)
echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log
if [ wc -l ${innerDistance} | awk '{print\${1}}' -eq 0 ]
then
rm -f ${innerDistance}
fi
# run MultiQC
echo -e "LOG: running multiqc" >> ${repRID}.aggrQC.log
multiqc -c ${multiqcConfig} .
"""
}
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment