diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 0c56150ca6923ba7cea1c6da0699e92b83f2f1e6..eaeff79747ff0e111fbfccd07b2506b7b57842d6 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -46,7 +46,7 @@ script_inferMeta = Channel.fromPath("${baseDir}/scripts/inferMeta.sh") */ process getBag { tag "${repRID}" - publishDir "${logsDir}", mode: "copy", pattern: "*.getBag.err" + publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.getBag.{out,err}" input: path credential, stageAs: "credential.json" from deriva @@ -58,16 +58,17 @@ process getBag { script: """ - hostname >>${repRID}.getBag.err - ulimit -a >>${repRID}.getBag.err + hostname > ${repRID}.getBag.err + ulimit -a >> ${repRID}.getBag.err export https_proxy=\${http_proxy} # link credential file for authentication - ln -sf `readlink -e credential.json` ~/.deriva/credential.json 2>>${repRID}.getBag.err - echo "LOG: deriva credentials linked" >>${repRID}.getBag.err + ln -sf `readlink -e credential.json` ~/.deriva/credential.json 1>> ${repRID}.getBag.out 2>> ${repRID}.getBag.err + echo "LOG: deriva credentials linked" >> ${repRID}.getBag.err # deriva-download replicate RID - deriva-download-cli dev.gudmap.org --catalog 2 ${derivaConfig} . rid=${repRID} 2>>${repRID}.getBag.err + echo "LOG: fetching deriva catalog for selected RID in GUDMAP." >> ${repRID}.getBag.err + deriva-download-cli dev.gudmap.org --catalog 2 ${derivaConfig} . rid=${repRID} 1>> ${repRID}.getBag.out 2>> ${repRID}.getBag.err """ } @@ -76,7 +77,7 @@ process getBag { */ process getData { tag "${repRID}" - publishDir "${logsDir}", mode: "copy", pattern: "*.getData.err" + publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.getData.{out,err}" input: path script_bdbagFetch @@ -88,29 +89,29 @@ process getData { path ("**/File.csv") into fileMeta path ("**/Experiment Settings.csv") into experimentSettingsMeta path ("**/Experiment.csv") into experimentMeta - path ("${repRID}.getData.err") + path ("${repRID}.getData.{out,err}") script: """ - hostname >>${repRID}.getData.err - ulimit -a >>${repRID}.getData.err + hostname > ${repRID}.getData.err + ulimit -a >> ${repRID}.getData.err export https_proxy=\${http_proxy} # link deriva cookie for authentication - ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt >>${repRID}.getData.err - echo "LOG: deriva cookie linked" >>${repRID}.getData.err + ln -sf `readlink -e deriva-cookies.txt` ~/.bdbag/deriva-cookies.txt 1>> ${repRID}.getData.out 2>> ${repRID}.getData.err + echo "LOG: deriva cookie linked" >> ${repRID}.getData.err # get bagit basename - replicate=\$(basename "${bagit}" | cut -d "." -f1) - echo "LOG: \${replicate}" >>${repRID}.getData.err + replicate=\$(basename "${bagit}" | cut -d "." -f1) 1>> ${repRID}.getData.out 2>> ${repRID}.getData.err + echo "LOG: \${replicate}" >> ${repRID}.getData.err # unzip bagit - unzip ${bagit} 2>>${repRID}.getData.err - echo "LOG: replicate bdbag unzipped" >>${repRID}.getData.err + unzip ${bagit} 2>> ${repRID}.getData.err 1>> ${repRID}.getData.out 2>> ${repRID}.getData.err + echo "LOG: replicate bdbag unzipped" >> ${repRID}.getData.err # bagit fetch fastq"s only and rename by repRID - sh ${script_bdbagFetch} \${replicate} ${repRID} 2>>${repRID}.getData.err - echo "LOG: replicate bdbag fetched" >>${repRID}.getData.err + sh ${script_bdbagFetch} \${replicate} ${repRID} 1>> ${repRID}.getData.out 2>> ${repRID}.getData.err + echo "LOG: replicate bdbag fetched" >> ${repRID}.getData.err """ } @@ -125,7 +126,7 @@ fastqs.into { */ process parseMetadata { tag "${repRID}" - publishDir "${logsDir}", mode: "copy", pattern: "*.parseMetadata.err" + publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.parseMetadata.{out,err}" input: path script_parseMeta @@ -136,35 +137,36 @@ process parseMetadata { output: path "design.csv" into metadata + path "${repRID}.parseMetadata.{out,err}" script: """ - hostname >>${repRID}.parseMetadata.err - ulimit -a >>${repRID}.parseMetadata.err + hostname > ${repRID}.parseMetadata.err + ulimit -a >> ${repRID}.parseMetadata.err # Check replicate RID metadata rep=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p repRID) - echo "LOG: replicate RID metadata parsed: \${rep}" >>${repRID}.parseMetadata.err + echo "LOG: replicate RID metadata parsed: \${rep}" >> ${repRID}.parseMetadata.err # Get endedness metadata endsMeta=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p endsMeta) - echo "LOG: endedness metadata parsed: \${endsMeta}" >>${repRID}.parseMetadata.err + echo "LOG: endedness metadata parsed: \${endsMeta}" >> ${repRID}.parseMetadata.err # Manually get endness endsManual=\$(python3 ${script_parseMeta} -r ${repRID} -m "${fileMeta}" -p endsManual) - echo "LOG: endedness manually detected: \${endsManual}" >>${repRID}.parseMetadata.err + echo "LOG: endedness manually detected: \${endsManual}" >> ${repRID}.parseMetadata.err # Get strandedness metadata stranded=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p stranded -e \${endsManual}) - echo "LOG: strandedness metadata parsed: \${stranded}" >>${repRID}.parseMetadata.err + echo "LOG: strandedness metadata parsed: \${stranded}" >> ${repRID}.parseMetadata.err # Get spike-in metadata spike=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettingsMeta}" -p spike) - echo "LOG: spike-in metadata parsed: \${spike}" >>${repRID}.parseMetadata.err + echo "LOG: spike-in metadata parsed: \${spike}" >> ${repRID}.parseMetadata.err # Get species metadata species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentMeta}" -p species) - echo "LOG: species metadata parsed: \${species}" >>${repRID}.parseMetadata.err + echo "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.err # Save design file echo "\${rep},\${endsMeta},\${endsManual},\${stranded},\${spike},\${species}" > design.csv @@ -194,6 +196,7 @@ endsManual.into { } stranded.into { stranded_alignData + stranded_featureCounts } spike.into{ spike_getRef @@ -207,19 +210,20 @@ species.into { */ process getRef { tag "${species_getRef}" - publishDir "${logsDir}", mode: "copy", pattern: "*.getRef.err" + publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.getRef.{out,err}" input: val spike_getRef val species_getRef output: - path ("*") into reference - + tuple path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf") into reference + path ("${repRID}.getRef.{out,err}") + script: """ - hostname >>${repRID}.getRef.err - ulimit -a >>${repRID}.getRef.err + hostname > ${repRID}.getRef.err + ulimit -a >> ${repRID}.getRef.err export https_proxy=\${http_proxy} # run set the reference name @@ -230,6 +234,7 @@ process getRef { then references=\$(echo ${referenceBase}/GRCh${refHuVersion}) else + echo -e "LOG: ERROR - References could not be set!\nSpecies reference found: ${species_getRef}" >> ${repRID}.getRef.err exit 1 fi if [ "${spike_getRef}" == "yes" ] @@ -239,20 +244,23 @@ process getRef { then reference=\$(echo \${references}/) fi + echo "LOG: species set to \${references}" >> ${repRID}.getRef.err # retreive appropriate reference appropriate location if [ ${referenceBase} == "s3://bicf-references" ] then - aws s3 cp "\${references}" /hisat2 ./ --recursive >>${repRID}.getRef.err - aws s3 cp "\${references}" /bed ./ --recursive >>${repRID}.getRef.err - aws s3 cp "\${references}" /*.fna --recursive >>${repRID}.getRef.err - aws s3 cp "\${references}" /*.gtf --recursive >>${repRID}.getRef.err + echo "LOG: grabbing reference files from S3" >> ${repRID}.getRef.err + aws s3 cp "\${references}" /hisat2 ./ --recursive 1>> ${repRID}.getRef.out 2>> ${repRID}.getRef.err + aws s3 cp "\${references}" /bed ./ --recursive 1>> ${repRID}.getRef.out 2>> ${repRID}.getRef.err + aws s3 cp "\${references}" /*.fna --recursive 1>> ${repRID}.getRef.out 2>> ${repRID}.getRef.err + aws s3 cp "\${references}" /*.gtf --recursive 1>> ${repRID}.getRef.out 2>> ${repRID}.getRef.err elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] then - ln -s "\${references}"/hisat2 >>${repRID}.getRef.err - ln -s "\${references}"/bed >>${repRID}.getRef.err - ln -s "\${references}"/genome.fna >>${repRID}.getRef.err - ln -s "\${references}"/genome.gtf >>${repRID}.getRef.err + echo "LOG: using pre-defined locations for reference files" >> ${repRID}.getRef.err + ln -s "\${references}"/hisat2 1>> ${repRID}.getRef.out 2>> ${repRID}.getRef.err + ln -s "\${references}"/bed 1>> ${repRID}.getRef.out 2>> ${repRID}.getRef.err + ln -s "\${references}"/genome.fna 1>> ${repRID}.getRef.out 2>> ${repRID}.getRef.err + ln -s "\${references}"/genome.gtf 1>> ${repRID}.getRef.out 2>> ${repRID}.getRef.err fi """ } @@ -269,7 +277,7 @@ reference.into { */ process trimData { tag "${repRID}" - publishDir "${logsDir}", mode: "copy", pattern: "*.trimData.*" + publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.trimData.{out,err}" input: val endsManual_trimData @@ -278,21 +286,22 @@ process trimData { output: path ("*.fq.gz") into fastqs_trimmed path ("*_trimming_report.txt") into trimQC - path ("${repRID}.trimData.log") - path ("${repRID}.trimData.err") + path ("${repRID}.trimData.{out,err}") script: """ - hostname >>${repRID}.trimData.err - ulimit -a >>${repRID}.trimData.err + hostname > ${repRID}.trimData.err + ulimit -a >> ${repRID}.trimData.err - # trim fastqs + #Trim fastqs using trim_galore if [ "${endsManual_trimData}" == "se" ] then - trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} 1>>${repRID}.trimData.log 2>>${repRID}.trimData.err + echo "LOG: running trim_galore using single-end settings" >> ${repRID}.trimData.err + trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} 1>> ${repRID}.trimData.out 2>> ${repRID}.trimData.err elif [ "${endsManual_trimData}" == "pe" ] then - trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]} 1>>${repRID}.trimData.log 2>>${repRID}.trimData.err + echo "LOG: running trim_galore using paired-end settings" >> ${repRID}.trimData.err + trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]} 1>> ${repRID}.trimData.out 2>> ${repRID}.trimData.err fi """ } @@ -302,7 +311,7 @@ process trimData { */ process alignData { tag "${repRID}" - publishDir "${logsDir}", mode: "copy", pattern: "*.align.{out,err}" + publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.align.{out,err}" input: val endsManual_alignData @@ -313,27 +322,35 @@ process alignData { output: tuple val ("${repRID}"), path ("${repRID}.sorted.bam"), path ("${repRID}.sorted.bam.bai") into rawBam path ("*.alignSummary.txt") into alignQC - path ("${repRID}.align.out") - path ("${repRID}.align.err") + path ("${repRID}.align.{out,err}") script: """ - hostname >${repRID}.align.err - ulimit -a >>${repRID}.align.err + hostname > ${repRID}.align.err + ulimit -a >> ${repRID}.align.err - # align reads + #Align the reads with Hisat 2 if [ "${endsManual_alignData}" == "se" ] then - hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} -U ${fastq[0]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>${repRID}.align.out 2>${repRID}.align.err + echo "LOG: running Hisat2 with single-end settings" >> ${repRID}.align.err + hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} -U ${fastq[0]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>> ${repRID}.align.out 2>> ${repRID}.align.err elif [ "${endsManual_alignData}" == "pe" ] then - hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>${repRID}.align.out 2>${repRID}.align.err + echo "LOG: running Hisat2 with paired-end settings" >> ${repRID}.align.err + hisat2 -p `nproc` --add-chrname --un-gz ${repRID}.unal.gz -S ${repRID}.sam -x hisat2/genome ${stranded_alignData} --no-mixed --no-discordant -1 ${fastq[0]} -2 ${fastq[1]} --summary-file ${repRID}.alignSummary.txt --new-summary 1>> ${repRID}.align.out 2> ${repRID}.align.err fi - # convert sam to bam and sort and index - samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam 1>>${repRID}.align.out 2>>${repRID}.align.err; - samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.bam ${repRID}.bam 1>>${repRID}.align.out 2>>${repRID}.align.err; - samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bam.bai 1>>${repRID}.align.out 2>>${repRID}.align.err; + #Convert the output sam file to a sorted bam file using Samtools + echo "LOG: converting from sam to bam" >> ${repRID}.align.err + samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o ${repRID}.bam ${repRID}.sam 1>> ${repRID}.align.out 2>> ${repRID}.align.err; + + #Sort the bam file using Samtools + echo "LOG: sorting the bam file" >> ${repRID}.align.err + samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.bam ${repRID}.bam 1>> ${repRID}.align.out 2>> ${repRID}.align.err; + + #Index the sorted bam using Samtools + echo "LOG: indexing sorted bam file" >> ${repRID}.align.err + samtools index -@ `nproc` -b ${repRID}.sorted.bam ${repRID}.sorted.bam.bai 1>> ${repRID}.align.out 2>> ${repRID}.align.err; """ } @@ -348,26 +365,32 @@ rawBam.into { process dedupData { tag "${repRID}" publishDir "${outDir}/bam", mode: 'copy', pattern: "*.deduped.bam" - publishDir "${logsDir}", mode: 'copy', pattern: "*.dedup.{out,err}" + publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.dedup.{out,err}" input: set val (repRID), path (inBam), path (inBai) from rawBam_dedupData output: - tuple val ("${repRID}"), path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam + tuple path ("${repRID}.sorted.deduped.bam"), path ("${repRID}.sorted.deduped.bam.bai") into dedupBam path ("*.deduped.Metrics.txt") into dedupQC - path ("${repRID}.dedup.out") - path ("${repRID}.dedup.err") + path ("${repRID}.dedup.{out,err}") script: """ - hostname >${repRID}.dedup.err - ulimit -a >>${repRID}.dedup.err + hostname > ${repRID}.dedup.err + ulimit -a >> ${repRID}.dedup.err + + # remove duplicated reads using Picard's MarkDuplicates + echo "LOG: running picard MarkDuplicates to remove duplicate reads" >> ${repRID}.dedup.err + java -jar /picard/build/libs/picard.jar MarkDuplicates I=${inBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>> ${repRID}.dedup.out 2>> ${repRID}.dedup.err - # remove duplicated reads - java -jar /picard/build/libs/picard.jar MarkDuplicates I=${inBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err - samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err - samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err + #Use SamTools to sort the now deduped bam file + echo "LOG: sorting the deduped bam file" >> ${repRID}.dedup.err + samtools sort -@ `nproc` -O BAM -o ${repRID}.sorted.deduped.bam ${repRID}.deduped.bam 1>> ${repRID}.dedup.out 2>> ${repRID}.dedup.err + + #Use SamTools to index the now sorted deduped bam file + echo "LOG: indexing the sorted deduped bam file" >> ${repRID}.dedup.err + samtools index -@ `nproc` -b ${repRID}.sorted.deduped.bam ${repRID}.sorted.deduped.bam.bai 1>> ${repRID}.dedup.out 2>> ${repRID}.dedup.err """ } @@ -383,17 +406,24 @@ dedupBam.into { */ process makeBigWig { tag "${repRID}" - publishDir "${logsDir}", mode: 'copy', pattern: "*.makeBigWig.err" + publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.makeBigWig.{out,err}" + publishDir "${outDir}/bigwig", mode: 'copy', pattern: "${repRID}.bw" input: - set val (repRID), path (inBam), path (inBai) from dedupBam_makeBigWig + set path (inBam), path (inBai) from dedupBam_makeBigWig output: path ("${repRID}.bw") + path ("${repRID}.makeBigWig.{out,err}") script: """ - bamCoverage -p `nproc` -b ${inBam} -o ${repRID}.bw + hostname > ${repRID}.makeBigWig.err + ulimit -a >> ${repRID}.makeBigWig.err + + #Run bamCoverage + echo "LOG: Running bigWig bamCoverage" >> ${repRID}.makeBigWig.err + bamCoverage -p `nproc` -b ${inBam} -o ${repRID}.bw 1>> ${repRID}.makeBigWig.out 2>> ${repRID}.makeBigWig.err """ } @@ -407,24 +437,43 @@ process makeFeatureCounts { input: path script_calculateTPM - tuple val (repRID), path (bam), path (bai) from dedupBam_makeFeatureCounts + tuple path (bam), path (bai) from dedupBam_makeFeatureCounts path reference_makeFeatureCounts val endsManual_featureCounts output: path ("*.countTable.csv") into counts path ("*.featureCounts.summary") into countsQC + path ("${repRID}.makeFeatureCounts.{out,err}") script: """ + hostname > ${repRID}.makeFeatureCounts.err + ulimit -a >> ${repRID}.makeFeatureCounts.err + + #Determine strandedness and setup strandig for featureCounts + stranding=0; + if [ "${stranded_featureCounts}" == "--rna-strandness F" ] || [ "${stranded_featureCounts}" == "--rna-strandness FR" ] + then + stranding=1 + echo "LOG: strandedness set to stranded [1]" >> ${repRID}.makeFeatureCounts.err + else + stranding=0 + echo "LOG: strandedness set to unstranded [0]" >> ${repRID}.makeFeatureCounts.err + fi; + #Run featureCounts + echo "LOG: running featureCounts on the data" >> ${repRID}.makeFeatureCounts.err if [ "${endsManual_featureCounts }" == "se" ] then - featureCounts -R SAM -p -G ./genome.fna -T `nproc` -s 1 -a ./genome.gtf -o ${repRID}.featureCounts -g 'gene_name' ${repRID}.sorted.deduped.bam; + featureCounts -R SAM -p -G ./genome.fna -T `nproc` -s \${stranding} -a ./genome.gtf -o ${repRID}.featureCounts -g 'gene_name' --primary --ignoreDup ${repRID}.sorted.deduped.bam 1>> ${repRID}.makeFeatureCounts.out 2>> ${repRID}.makeFeatureCounts.err elif [ "${endsManual_featureCounts }" == "pe" ] then - featureCounts -R SAM -p -G ./genmome.fna -T `nproc` -s 1 -p -a ./genome.gtf -o ${repRID}.featureCounts -g 'gene_name' ${repRID}.sorted.deduped.bam; + featureCounts -R SAM -p -G ./genmome.fna -T `nproc` -s \${stranding} -p -a ./genome.gtf -o ${repRID}.featureCounts -g 'gene_name' --primary --ignoreDup -B ${repRID}.sorted.deduped.bam 1>> ${repRID}.makeFeatureCounts.out 2>> ${repRID}.makeFeatureCounts.err fi - Rscript calculateTPM.R --count "${repRID}.featureCounts"; + + #Calculate TMP from the resulting featureCounts table + echo "LOG: calculating TMP with R" >> ${repRID}.makeFeatureCounts.err + Rscript calculateTPM.R --count "${repRID}.featureCounts" 1>> ${repRID}.makeFeatureCounts.out 2>> ${repRID}.makeFeatureCounts.err """ } @@ -434,21 +483,23 @@ process makeFeatureCounts { process fastqc { tag "${repRID}" publishDir "${outDir}/fastqc", mode: 'copy', pattern: "*_fastqc.zip" - publishDir "${logsDir}", mode: 'copy', pattern: "*.fastq.err" + publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.fastqc.{out,err}" input: path (fastq) from fastqs_fastqc output: path ("*_fastqc.zip") into fastqc + path ("${repRID}.fastqc.{out,err}") script: """ - hostname >${repRID}.fastqc.err - ulimit -a >>${repRID}.fastqc.err + hostname > ${repRID}.fastqc.err + ulimit -a >> ${repRID}.fastqc.err # run fastqc - fastqc *.fastq.gz -o . >>${repRID}.fastqc.err + echo "LOG: beginning FastQC analysis of the data" >> ${repRID}.fastqc.err + fastqc *.fastq.gz -o . 1>> ${repRID}.fastqc.out 2>> ${repRID}.fastqc.err """ } @@ -458,38 +509,40 @@ process fastqc { process inferMetadata { tag "${repRID}" - publishDir "${logsDir}", mode: 'copy', pattern: "*.rseqc.err" + publishDir "${logsDir}", mode: 'copy', pattern: "${repRID}.rseqc.{out,err}" input: path script_inferMeta path reference_inferMeta - set val (repRID), path (inBam), path (inBai) from dedupBam_inferMeta + set path (inBam), path (inBai) from dedupBam_inferMeta output: path "infer.csv" into inferedMetadata path "${inBam.baseName}.tin.xls" into tin path "${repRID}.insertSize.inner_distance_freq.txt" optional true into innerDistance - + path "${repRID}.rseqc.{out,err}" optional true script: """ - hostname >${repRID}.rseqc.err - ulimit -a >>${repRID}.rseqc.err + hostname > ${repRID}.rseqc.err + ulimit -a >> ${repRID}.rseqc.err # infer experimental setting from dedup bam - infer_experiment.py -r ./bed/genome.bed -i "${inBam}" >${repRID}.rseqc.log + echo "LOG: running inference from bed file" >> ${repRID}.rseqc.err + infer_experiment.py -r ./bed/genome.bed -i "${inBam}" > ${repRID}.rseqc.log 2>> ${repRID}.rseqc.err - endness=`bash inferMeta.sh endness ${repRID}.rseqc.log` - fail=`bash inferMeta.sh fail ${repRID}.rseqc.log` + echo "LOG: determining endedness and strandedness from file" >> ${repRID}.rseqc.err + endness=`bash inferMeta.sh endness ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err + fail=`bash inferMeta.sh fail ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err if [ \${endness} == "PairEnd" ] then - percentF=`bash inferMeta.sh pef ${repRID}.rseqc.log` - percentR=`bash inferMeta.sh per ${repRID}.rseqc.log` - inner_distance.py -i "${inBam}" -o ${repRID}.insertSize -r ./bed/genome.bed + percentF=`bash inferMeta.sh pef ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err + percentR=`bash inferMeta.sh per ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err + inner_distance.py -i "${inBam}" -o ${repRID}.insertSize -r ./bed/genome.bed 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err elif [ \${endness} == "SingleEnd" ] then - percentF=`bash inferMeta.sh sef ${repRID}.rseqc.log` - percentR=`bash inferMeta.sh ser ${repRID}.rseqc.log` + percentF=`bash inferMeta.sh sef ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err + percentR=`bash inferMeta.sh ser ${repRID}.rseqc.log` 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err fi if [ \$percentF -gt 0.25 ] && [ \$percentR -lt 0.25 ] then @@ -513,11 +566,12 @@ process inferMetadata { stranded="unstranded" strategy="us" fi + echo -e "LOG: strategy set to \${strategy}\nStranded set to ${stranded}" >> ${repRID}.rseqc.err # calcualte TIN values per feature - tin.py -i "${inBam}" -r ./bed/genome.bed + tin.py -i "${inBam}" -r ./bed/genome.bed 1>> ${repRID}.rseqc.out 2>> ${repRID}.rseqc.err # write infered metadata to file - echo \${endness},\${stranded},\${strategy},\${percentF},\${percentR},\${fail} > infer.csv + echo \${endness},\${stranded},\${strategy},\${percentF},\${percentR},\${fail} > infer.csv 2>> ${repRID}.rseqc.err """ -} \ No newline at end of file +}