main.nf 9.81 KB
Newer Older
Brandi Cantarel's avatar
Brandi Cantarel committed
1 2 3
#!/usr/bin/env nextflow

// Default parameter values to run tests
Brandi Cantarel's avatar
Brandi Cantarel committed
4 5
params.bamdna="$baseDir/../test_data/dna/safe/*.bam"
params.bamrna="$baseDir/../test_data/*.bam"
Brandi Cantarel's avatar
Brandi Cantarel committed
6 7
rnabams=file(params.bamrna)
dnabams=file(params.bamdna)
Brandi Cantarel's avatar
Brandi Cantarel committed
8
params.incdna = '0'
Brandi Cantarel's avatar
Brandi Cantarel committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87


params.design="$baseDir/../test_data/design.txt"
design_file = file(params.design)

params.genome="/project/shared/bicf_workflow_ref/GRCh38"
dbsnp="$params.genome/dbSnp.vcf.gz"
indel="$params.genome/GoldIndels.vcf.gz"
btoolsgenome = file("$params.genome/genomefile.txt")
knownindel=file(indel)
gatkref=file("$params.genome/genome.fa")
index_path = file(params.genome)
dbsnp=file(dbsnp)
genebed = file("$params.genome/gencode.genes.chr.bed")

snpeff_vers = 'GRCh38.82';
if (params.genome == '/project/shared/bicf_workflow_ref/GRCm38') {
   snpeff_vers = 'GRCm38.82';
}
if (params.genome == '/project/shared/bicf_workflow_ref/GRCh37') {
   snpeff_vers = 'GRCh37.75';
}

def fileMap = [:]

dnabams.each {
    final fileName = it.getFileName().toString()
    prefix = fileName.lastIndexOf('/')
    fileMap[fileName] = it
}
rnabams.each {
    final fileName = it.getFileName().toString()
    prefix = fileName.lastIndexOf('/')
    fileMap[fileName] = it
}


def bamset = []

new File(params.design).withReader { reader ->
    def hline = reader.readLine()
    def header = hline.split("\t")
    prefixidx = header.findIndexOf{it == 'SampleID'};
    dnaidx =  header.findIndexOf{it == 'DNAbam'};
    rnaidx = header.findIndexOf{it == 'RNAbam'};
    if (dnaidx == -1) {
       dnaidx = rnaidx
       }      
    while (line = reader.readLine()) {
    	   def row = line.split("\t")
	   if (fileMap.get(row[rnaidx]) != null) {
	      bamset << tuple(row[prefixidx],fileMap.get(row[rnaidx]),fileMap.get(row[dnaidx]))
	   }
}
}

if( ! bamset) { error "Didn't match any input files with entries in the design file" }

Channel
  .from(bamset)
  .set { initbam }

process gatkbam {

  publishDir "$baseDir/output", mode: 'copy'

  input:
  set pair_id, file(rbam), file(dbam) from initbam

  output:
  set pair_id,file("${pair_id}.rna.bam"),file("${pair_id}.rna.bai") into gatkbam
  set pair_id,file("${pair_id}.rg_added_sorted.bam"),file("${pair_id}.unmapped.bam"),file(dbam) into svbam
  file("${pair_id}.rna.bam") into sambam
  file("${pair_id}.rna.bai") into samidx
  file("${pair_id}.rna.bam") into ssbam
  file("${pair_id}.rna.bai") into ssidx
  script:
  """
  module load gatk/3.5 samtools/intel/1.3 speedseq/20160506 picard/1.127
Brandi Cantarel's avatar
Brandi Cantarel committed
88 89
  java -Xmx4g -jar \$PICARD/picard.jar CleanSam INPUT=${rbam} O=${pair_id}.clean.bam
  java -Xmx4g -jar \$PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id}
Brandi Cantarel's avatar
Brandi Cantarel committed
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
  sambamba markdup -t 20 -r ${pair_id}.rg_added_sorted.bam ${pair_id}.dedup.bam
  java -Xmx4g -jar \$GATK_JAR -T SplitNCigarReads -R ${gatkref} -I ${pair_id}.dedup.bam -o ${pair_id}.split.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS
  java -Xmx4g -jar \$GATK_JAR -l INFO -R ${gatkref} --knownSites ${dbsnp} -I ${pair_id}.split.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct 30
  java -Xmx4g -jar \$GATK_JAR -T PrintReads -R ${gatkref} -I ${pair_id}.split.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.rna.bam -nt 1 -nct 8
  sambamba view -t 30 -f bam -F "unmapped" -o ${pair_id}.unmapped.bam ${pair_id}.rg_added_sorted.bam
  """
}

process svcall {

  publishDir "$baseDir/output", mode: 'copy'
  cpus 32
  input:
  set pair_id,file(rbam),file(unmap),file(dnabam) from svbam

  output:
  set pair_id,file("${pair_id}.genefusions.txt") 

  script:
  if (params.incdna == '1')
  """
  module load integrate/0.2.6 samtools/intel/1.3 speedseq/20160506
  sambamba index ${dnabam}
  sambamba index ${rbam}
  sambamba index ${unmap}
  Integrate fusion -reads ${pair_id}.reads.txt -sum ${pair_id}.summary.tsv -ex ${pair_id}.exons.tsv -bk ${pair_id}.breakpoints.tsv -vcf ${pair_id}.bk_sv.vcf -bedpe ${pair_id}.fusions.bedpe ${index_path}/genome.fa ${index_path}/annot.ensembl.txt ${index_path}/bwts/ ${rbam} ${unmap} ${dnabam}
  perl $baseDir/scripts/compile_genefusion_results.pl -i ${pair_id} -r ${index_path}
  """
  else
  """
Brandi Cantarel's avatar
Brandi Cantarel committed
120 121 122
  module load integrate/0.2.6 samtools/intel/1.3 speedseq/20160506
  sambamba index ${rbam}
  sambamba index ${unmap}
Brandi Cantarel's avatar
Brandi Cantarel committed
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
  Integrate fusion -reads ${pair_id}.reads.txt -sum ${pair_id}.summary.tsv -ex ${pair_id}.exons.tsv -bk ${pair_id}.breakpoints.tsv -vcf ${pair_id}.bk_sv.vcf -bedpe ${pair_id}.fusions.bedpe ${index_path}/genome.fa ${index_path}/annot.ensembl.txt ${index_path}/bwts/ ${rbam} ${unmap}
  perl $baseDir/scripts/compile_genefusion_results.pl -i ${pair_id} -r ${index_path}
  """
}

process gatkgvcf {

  //publishDir "$baseDir/output", mode: 'copy'
  cpus 30

  input:
  set pair_id,file(gbam),file(gidx) from gatkbam
  output:
  file("${pair_id}.gatk.g.vcf") into gatkgvcf
  script:
  """
Brandi Cantarel's avatar
Brandi Cantarel committed
139
  module load gatk/3.5
Brandi Cantarel's avatar
Brandi Cantarel committed
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
  java -Xmx10g -jar \$GATK_JAR -R ${gatkref} -D ${dbsnp} -T HaplotypeCaller -stand_call_conf 20 -stand_emit_conf 20.0 -dontUseSoftClippedBases -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -variant_index_type LINEAR -variant_index_parameter 128000 --emitRefConfidence GVCF -I ${gbam} -o ${pair_id}.gatk.g.vcf -L ${genebed}
  """
}
process gatk {

  publishDir "$baseDir/output", mode: 'copy'
  cpus 4

  input:	
  file(gvcf) from gatkgvcf.toList()

  output:
  file("final.gatkpanel.vcf.gz") into gatkvcf
  script:
  """
Brandi Cantarel's avatar
Brandi Cantarel committed
155
  module load gatk/3.5 bedtools/2.25.0 python/2.7.x-anaconda snpeff/4.2 vcftools/0.1.11 
Brandi Cantarel's avatar
Brandi Cantarel committed
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
  java -Xmx16g -jar \$GATK_JAR -R ${gatkref} -D ${dbsnp} -T GenotypeGVCFs -o final.gatk.vcf -nt 4 --variant ${(gvcf as List).join(' --variant ')}
  java -Xmx16g -jar \$GATK_JAR -R ${gatkref} -T VariantFiltration -V final.gatk.vcf -window 35 -cluster 3 -filterName FS -filter "FS > 30.0" -filterName QD -filter "QD < 2.0" -o final.filtgatk.vcf 
  vcf-annotate -n --fill-type final.filtgatk.vcf | java -jar \$SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (MQ > 40) & (DP >= 10))' |bgzip > final.gatkpanel.vcf.gz
  """
}

process mpileup {

  publishDir "$baseDir/output", mode: 'copy'
  cpus 32

  input:
  file(gbam) from sambam.toList()
  file(gidx) from samidx.toList()
  output:
  file("final.sampanel.vcf.gz") into samvcf
  script:
  """
Brandi Cantarel's avatar
Brandi Cantarel committed
174
  module load python/2.7.x-anaconda samtools/intel/1.3 bedtools/2.25.0 bcftools/intel/1.3 snpeff/4.2 vcftools/0.1.11
Brandi Cantarel's avatar
Brandi Cantarel committed
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
  ls *.bam > final.bam.list
  samtools mpileup -t 'AD,ADF,ADR,INFO/AD,SP' -ug -Q20 -C50 -f ${index_path}/genome.fa -b final.bam.list | bcftools call --format-fields gq,gp -vmO z -o final.sam.vcf.gz
  vcf-annotate -n --fill-type final.sam.vcf.gz | java -jar \$SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (MQ >= 20) & (DP >= 10))' |bgzip > final.sampanel.vcf.gz
  """
}

process speedseq {

  publishDir "$baseDir/output", mode: 'copy'
  cpus 32
  input:
  file(gbam) from ssbam.toList()
  file(gidx) from ssidx.toList()
  output:
  file("final.sspanel.vcf.gz") into ssvcf
  file("final.complex.vcf.gz") into complexvcf
  script:
  """
Brandi Cantarel's avatar
Brandi Cantarel committed
193
  module load python/2.7.x-anaconda samtools/intel/1.3 bedtools/2.25.0 bcftools/intel/1.3 snpeff/4.2 speedseq/20160506 vcftools/0.1.11
Brandi Cantarel's avatar
Brandi Cantarel committed
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
  speedseq var -q 10 -t 32 -o final.ssvar ${index_path}/genome.fa ${gbam}
  vcf-annotate -n --fill-type -n final.ssvar.vcf.gz | java -jar \$SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (DP >= 10))' |bgzip > final.sspanel.vcf.gz
  java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar filter "(TYPE='complex')" final.sspanel.vcf.gz |bgzip> final.complex.vcf.gz
  """
}

process integrate {
  publishDir "$baseDir/output", mode: 'copy'

  input:
  file(ssvcf)from ssvcf
  file(samvcf) from samvcf
  file(gvcf) from gatkvcf
  file(complex) from complexvcf

  output:
  file("final.integrate.vcf.gz") into finalvcf
  script:
  """
Brandi Cantarel's avatar
Brandi Cantarel committed
213
  module load python/2.7.x-anaconda bedtools/2.25.0 snpeff/4.2 bcftools/intel/1.3 samtools/intel/1.3 jags/4.2.0
Brandi Cantarel's avatar
Brandi Cantarel committed
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
  module load vcftools/0.1.11
  zcat ${ssvcf} | bedtools intersect -v -header -a stdin -b ${complex} | bgzip > ss.shuff.vcf.gz
  vcf-shuffle-cols -t ${ssvcf} ${gvcf} | vcf-sort | bedtools intersect -v -header -a stdin -b ${complex} | bgzip > gatk.shuff.vcf.gz
  vcf-shuffle-cols -t ${ssvcf} ${samvcf} | bedtools intersect -v -header -a stdin -b ${complex} | bgzip > sam.shuff.vcf.gz
  tabix ss.shuff.vcf.gz
  tabix gatk.shuff.vcf.gz
  tabix sam.shuff.vcf.gz
  vcf-compare ss.shuff.vcf.gz sam.shuff.vcf.gz gatk.shuff.vcf.gz > vcf_compare.out
  vcf-isec -f --prefix integrate ss.shuff.vcf.gz sam.shuff.vcf.gz gatk.shuff.vcf.gz
  perl $baseDir/scripts/baysic_blc.pl -c ${complex} -e $baseDir -f ${index_path}/genome.fa ss.shuff.vcf.gz sam.shuff.vcf.gz gatk.shuff.vcf.gz  
  """
}

process annotate {
  publishDir "$baseDir/output", mode: 'copy'

  input:
  file(intvcf) from finalvcf

  output:
  file("annot.vcf.gz") into annotvcf
  script:
  if (params.genome == '/project/shared/bicf_workflow_ref/GRCh38')
  """
Brandi Cantarel's avatar
Brandi Cantarel committed
238
  module load python/2.7.x-anaconda snpeff/4.2
Brandi Cantarel's avatar
Brandi Cantarel committed
239 240 241 242
  java -Xmx10g -jar \$SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c \$SNPEFF_HOME/snpEff.config ${snpeff_vers} final.integrate.vcf.gz | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar annotate ${index_path}/dbSnp.vcf.gz -  | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar annotate ${index_path}/clinvar.vcf.gz - | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar annotate ${index_path}/ExAC.vcf.gz - | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar gwasCat -db ${index_path}/gwas_catalog.tsv - |bgzip > annot.vcf.gz
  """
  else
  """
Brandi Cantarel's avatar
Brandi Cantarel committed
243
  module load python/2.7.x-anaconda snpeff/4.2
Brandi Cantarel's avatar
Brandi Cantarel committed
244
  java -Xmx10g -jar \$SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c \$SNPEFF_HOME/snpEff.config ${snpeff_vers} final.integrate.vcf.gz |bgzip > annot.vcf.gz
Brandi Cantarel's avatar
Brandi Cantarel committed
245 246
  """
}