Commit 436e0836 authored by Spencer Barnes's avatar Spencer Barnes
Browse files

add mouse test back in

parent 4ebabd72
Pipeline #7210 failed with stages
in 29 minutes and 46 seconds
......@@ -22,7 +22,7 @@ astrocyte_cli:
when:
- always
GRCh38-3.0.0:
GRCh38-0.0.1:
stage: reference_test
except:
- tags
......@@ -38,3 +38,28 @@ GRCh38-3.0.0:
- .nextflow.log
- workflow/output/multiqcReport/multiqc_report.html
expire_in: 2 days
mm10-0.0.1:
stage: reference_test
only:
refs:
- develop
- master
except:
- tags
script:
- nextflow run ./workflow/main.nf --designFile "./test_data/test_design_se.csv" --reads "./test_data/*fastq.gz"
- pytest -m methData_se
- pytest -m singleend
- pytest -m trimData_se
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 1
when:
- always
#!/usr/bin/env nextflow
/*
BICF RRBS/WGBS Analysis Workflow
#### Homepage / Documentation
https://git.biohpc.swmed.edu/BICF/Astrocyte/methylation_analysis/
Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/LICENSE.md)
*/
//pipeline tracking
params.ci = false
params.dev = false
//
process trackStart {
script:
"""
hostname
ulimit -a
export https_proxy=\${http_proxy}
curl -H 'Content-Type: application/json' -X PUT -d \
'{ \
"sessionId": "${workflow.sessionId}", \
"pipeline": "methylation_analysis", \
"start": "${workflow.start}", \
"status": "started", \
"nextflowVersion": "${workflow.nextflow.version}", \
"astrocyte": false, \
"ci": false, \
"dev": false \
}' \
"https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking"
"""
}
// Default parameter values to run tests
params.output = "$baseDir"
params.designFile = "$baseDir/../test_data/test_design_se.csv"
params.pairedEnd = false
//params.reads = "$baseDir/../test_data/single/*.fastq.gz"
params.reads = "/work/BICF/s181385/methylation_test_data/shortened/paired/*.fastq.gz"
//params.reads = "/work/BICF/s181385/methylation_test_data/shortened/single/*.fastq.gz"
//params.reads = "/work/BICF/s181385/test_again/*.fastq.gz"
//params.reads = "/work/BICF/s181385/methylation_test_data/paired/*.fastq.gz"
//params.reads = "/work/BICF/s181385/methylation_test_data/single/*.fastq.gz"
//params.reads = "/project/BICF/BICF_Core/shared/bicf_collaborations/effort240_MunshiNi/data/bennett_data/*.fastq.gz"
params.rrbs = true
params.enzyme = "Msp1"
params.genome = "GRCm38"
params.outDir= "$baseDir/output"
params.hisat2 = false
params.references = "$baseDir/../docs/references.md"
params.multiqc = "$baseDir/conf/multiqc_config.yaml"
runDir = params.output
designFile = params.designFile
// Define List of Files
readsList = Channel
.fromPath( params.reads )
.flatten()
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: "fileList.tsv", newLine: true )
//define regular variables
pairedEnd = params.pairedEnd
designFile = Channel.fromPath(params.designFile)
genome = params.genome
reads = params.reads
rrbs = params.rrbs
enzyme = params.enzyme
outDir = params.outDir
hisat2 = params.hisat2
references = params.references
if(params.genome == "GRCm38"){
genome = "/project/shared/bicf_workflow_ref/methylation_bismark_indices/GRCm38"
} else if(params.genome == "GRCh38"){
genome = "/project/shared/bicf_workflow_ref/methylation_bismark_indices/GRCh38"
}
// Check design file for errors
process checkDesignFile {
publishDir "$outDir/design", mode: "copy"
input:
file designFile
file readsList
output:
file("design.tsv") into designFilePaths
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList
"""
}
}
//define channel for raw reads
if (pairedEnd) {
rawReads = designFilePaths
.splitCsv(sep: "\t", header: true)
.map { row -> [ row.sample_id, [row.fastq_read1, row.fastq_read2], row.experiment_id, row.biosample, row.treatment, row.replicate] }
} else {
rawReads = designFilePaths
.splitCsv(sep: "\t", header: true)
.map { row -> [ row.sample_id, [row.fastq_read1], row.experiment_id, row.biosample, row.treatment, row.replicate] }
}
//Trim reads using trimgalore
process trimReads {
queue "super"
//queue "128GB,256GB,256GBv1"
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: "copy"
input:
set sampleId, reads, experimentId, biosample, treatment, replicate from rawReads
output:
set sampleId, file("*.fq.gz"), experimentId, biosample, treatment, replicate into trimmedReads
file("*trimming_report.txt") into trimgaloreReports
file("version_*.txt") into trimReadsVersions
script:
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load trimgalore/0.4.1
#module load trimgalore/0.6.4
python3 $baseDir/scripts/trim_reads_bismark.py -e $enzyme -b -f ${reads[0]} ${reads[1]} -s $sampleId -p
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load trimgalore/0.4.1
#module load trimgalore/0.6.4
python3 $baseDir/scripts/trim_reads_bismark.py -e $enzyme -b -f ${reads[0]} -s $sampleId
"""
}
}
//Align reads with Bismark
process alignReads {
queue "super"
//queue "128GB,256GB,256GBv1"
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: "copy"
input:
set sampleId, reads, experimentId, biosample, treatment, replicate from trimmedReads
//index from bismarkIndex ///make sure to fix this
output:
set sampleId, file("*.bam"), experimentId, biosample, treatment, replicate into mappedReads
file("version_*.txt") into alignReadsVersions
file("*report.txt") into alignReadsReports
script:
if (hisat2){
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load samtools
module load hisat2/2.1.0-intel
module load bismark/0.21.0
python3 $baseDir/scripts/align_reads_bismark.py -p -f ${reads[0]} ${reads[1]} -r $genome -a -s $sampleId
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load samtools
module load hisat2/2.1.0-intel
module load bismark/0.21.0
python3 $baseDir/scripts/align_reads_bismark.py -f $reads -r $genome -a -s $sampleId
"""
}
}
else{
if (pairedEnd) {
"""
module load python/3.6.1-2-anaconda
module load samtools
module load bowtie2/gcc/2.3.4.3
module load bismark/0.21.0
python3 $baseDir/scripts/align_reads_bismark.py -p -f ${reads[0]} ${reads[1]} -r $genome -s $sampleId
"""
}
else {
"""
module load python/3.6.1-2-anaconda
module load samtools
module load bowtie2/gcc/2.3.4.3
module load bismark/0.21.0
python3 $baseDir/scripts/align_reads_bismark.py -f $reads -r $genome -s $sampleId
"""
}
}
}
process methylExtract {
queue "super"
//queue "128GB,256GB,256GBv1"
tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: "copy"
//publishDir "$outDir/${task.process}/", mode: "copy"
input:
set sampleId, bams, experimentId, biosample, treatment, replicate from mappedReads
output:
//file("*.cov.gz") into coverageFiles
set sampleId, file("*.cov.gz"), experimentId, biosample, treatment, replicate into extractedMeth
set file("C*_O*.txt"), file("*png"), file("*bedGraph.gz") into extractMethResults
file("*report.txt") into extractMethReports
file("version_*.txt") into extractMethVersions
script:
"""
module load python/3.6.1-2-anaconda
module load samtools
module load bismark/0.21.0
python3 $baseDir/scripts/methylation_extract.py -b $bams
"""
}
//Define channel collecting coverageFiles into new designFile
extractedMeth
.map{ sampleId, coverage, experimentId, biosample, treatment, replicate ->
"$sampleId\t$coverage\t$experimentId\t$biosample\t$treatment\t$replicate\n"}
.collectFile(name:"design_diffmeth.tsv", seed:"sampleId\tcoverage\texperimentId\tbiosample\ttreatment\treplicate\n", storeDir:"$outDir/design")
.set{ diffmethDesign }
process diffMeth{
queue "128GB,256GB,256GBv1"
//queue "super"
//tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/", mode: "copy"
input:
file diffmethDesign
output:
set file("*.txt"), file("*.pdf"), file("*html") into diffMethResults
file("version_r.txt") into diffMethVersions
script:
"""
module load R/3.5.1-gccmkl
module load python/3.6.1-2-anaconda
rm -rf ~/R
ls ~
R --version > version_r.txt
Rscript $baseDir/scripts/diff_meth.R -f $diffmethDesign -a $params.genome
"""
}
process multiqcReport {
queue "super"
publishDir "$outDir/${task.process}", mode: "copy"
input:
file("trimReads/*") from trimgaloreReports.collect()
file("alignReads/*") from alignReadsReports.collect()
file("methylExtract/*") from extractMethReports.collect()
file("trimReads_vf/*") from trimReadsVersions.first()
file("alignReads_vf/*") from alignReadsVersions.first()
file("methylExtract_vf/*") from extractMethVersions.first()
file("r_vf/*") from diffMethVersions.first()
output:
file("software_versions_mqc.yaml") into softwareVersions
file("software_references_mqc.yaml") into softwareReferences
file "multiqc_report.html" into multiqcReport
file "*_data" into multqcData
script:
"""
module load python/3.6.1-2-anaconda
module load pandoc/2.7
module load multiqc/1.7
echo $workflow.nextflow.version > version_nextflow.txt
python3 --version > version_python.txt
multiqc --version > version_multiqc.txt
python3 $baseDir/scripts/generate_references.py -r $references -o software_references
python3 $baseDir/scripts/generate_versions.py -o software_versions
multiqc .
"""
}
......@@ -3,6 +3,9 @@ process {
queue = 'super'
clusterOptions = '--hold'
withName: trackStart {
executor = 'local'
}
// Process specific configuration
withName: checkDesignFile {
module = ['python/3.6.1-2-anaconda']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment