Skip to content
Snippets Groups Projects
Commit 3cf7cfe8 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

add checkmate

parent 322c742e
Branches
Tags publish_0.1.2
No related merge requests found
......@@ -20,6 +20,7 @@ dbsnp=file(dbsnp)
strelkaconfig="/cm/shared/apps/strelka/1.0.15/etc/strelka_config_bwa_default.ini"
confstrelka=file(strelkaconfig)
ncmconf = file("$params.genome/ncm.conf")
snpeff_vers = 'GRCh38.82';
if (params.genome == '/project/shared/bicf_workflow_ref/GRCm38') {
......@@ -68,12 +69,33 @@ process indexbams {
set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into shimmerbam
set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into vscanbam
set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into virmidbam
// set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into strelkabam
set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into checkbams
set val("${tid}_${nid}"),tid,nid into pairnames
script:
"""
module load speedseq/20160506 samtools/intel/1.3
sambamba index -t 30 ${tumor}
sambamba index -t 30 ${normal}
module load speedseq/20160506 samtools/1.4.1
samtools index ${tumor} &
samtools index ${normal} &
wait
"""
}
process checkmates {
publishDir "$baseDir/output", mode: 'copy'
errorStrategy 'ignore'
input:
set tid,nid,file(tumor),file(normal),file(tidx),file(nidx) from checkbams
file(conf) from ncmconf
output:
file("${tid}_${nid}*") into checkmateout
when:
params.genome == '/project/shared/bicf_workflow_ref/GRCh38'
script:
"""
source /etc/profile.d/modules.sh
module load python/2.7.x-anaconda samtools/1.4.1 bcftools/1.4.1
python /project/shared/bicf_workflow_ref/seqprg/NGSCheckMate/ncm.py -B -d ./ -bed ${index_path}/NGSCheckMate.bed -O ./ -N ${tid}_${nid}
perl $baseDir/scripts/sequenceqc_somatic.pl -r ${index_path} -i ${tid}_${nid}_all.txt -o ${tid}_${nid}.sequence.stats.txt
"""
}
......@@ -86,8 +108,8 @@ process sstumor {
set val("${tid}_${nid}"), file("${tid}_${nid}.sspanel.vcf.gz") into ssvcf
script:
"""
module load python/2.7.x-anaconda bedtools/2.25.0 snpeff/4.2 speedseq/20160506 bcftools/intel/1.3 vcftools/0.1.11
speedseq somatic -q 10 -w ${target_panel} -t 30 -o ${tid}.sssom ${reffa} ${normal} ${tumor}
module load python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.2 speedseq/20160506 bcftools/1.4.1 vcftools/0.1.14
speedseq somatic -q 10 -w ${target_panel} -t \$SLURM_CPUS_ON_NODE -o ${tid}.sssom ${reffa} ${normal} ${tumor}
vcf-annotate -H -n --fill-type ${tid}.sssom.vcf.gz | java -jar \$SNPEFF_HOME/SnpSift.jar filter --pass '((QUAL >= 10) & (GEN[*].DP >= 10))' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${tid}_${nid}.sspanel.vcf.gz
"""
}
......@@ -102,8 +124,8 @@ process mutect {
set val("${tid}_${nid}"),file("${tid}_${nid}.pmutect.vcf.gz") into mutectvcf
script:
"""
module load python/2.7.x-anaconda gatk/3.5 bcftools/intel/1.3 bedtools/2.25.0 snpeff/4.2 vcftools/0.1.11
cut -f 1 ${index_path}/genomefile.chr.txt | xargs -I {} -n 1 -P 10 sh -c "java -Xmx10g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 30 -stand_emit_conf 10.0 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
module load parallel python/2.7.x-anaconda gatk/3.7 bcftools/1.4.1 bedtools/2.26.0 snpeff/4.2 vcftools/0.1.14
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
vcf-concat ${tid}*.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${tid}_${nid}.pmutect.vcf.gz
"""
}
......@@ -116,9 +138,9 @@ process varscan {
set val("${tid}_${nid}"),file("${tid}_${nid}.varscan.vcf.gz") into varscanvcf
script:
"""
module load python/2.7.x-anaconda bedtools/2.25.0 snpeff/4.2 bcftools/intel/1.3 samtools/intel/1.3 VarScan/2.4.2 speedseq/20160506 vcftools/0.1.11
sambamba mpileup -L ${target_panel} -t 30 ${tumor} --samtools "-C 50 -f ${reffa}" > t.mpileup
sambamba mpileup -L ${target_panel} -t 30 ${normal} --samtools "-C 50 -f ${reffa}" > n.mpileup
module load python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.2 bcftools/1.4.1 samtools/1.4.1 VarScan/2.4.2 speedseq/20160506 vcftools/0.1.14
sambamba mpileup -L ${target_panel} -t \$SLURM_CPUS_ON_NODE ${tumor} --samtools "-C 50 -f ${reffa}" > t.mpileup
sambamba mpileup -L ${target_panel} -t \$SLURM_CPUS_ON_NODE ${normal} --samtools "-C 50 -f ${reffa}" > n.mpileup
VarScan somatic n.mpileup t.mpileup ${tid}.vscan --output-vcf 1
VarScan copynumber n.mpileup t.mpileup ${tid}.vscancnv
vcf-concat ${tid}.vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar \$SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bedtools intersect -header -a stdin -b ${target_panel} |bgzip > ${tid}_${nid}.varscan.vcf.gz
......@@ -134,7 +156,7 @@ process shimmer {
set val("${tid}_${nid}"), file("${tid}_${nid}.shimmer.vcf.gz") into shimmervcf
script:
"""
module load python/2.7.x-anaconda bedtools/2.25.0 snpeff/4.2 bcftools/intel/1.3 shimmer/0.1.1 vcftools/0.1.11
module load python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.2 bcftools/1.4.1 shimmer/0.1.1 vcftools/0.1.14
shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err
perl $baseDir/scripts/add_readct_shimmer.pl
vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar \$SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' | bedtools intersect -header -a stdin -b ${target_panel} | bgzip > ${tid}_${nid}.shimmer.vcf.gz
......@@ -152,7 +174,7 @@ process shimmer {
// file("${tid}.strelka.vcf.gz") into strelkavcf
// script:
// """
// module load bedtools/2.25.0 snpeff/4.2 speedseq/20160506 vcftools/0.1.11 strelka/1.0.15
// module load bedtools/2.26.0 snpeff/4.2 speedseq/20160506 vcftools/0.1.14 strelka/1.0.15
// cp ${confstrelka} config.ini
// configureStrelkaWorkflow.pl --normal=${normal} --tumor=${tumor} --ref ${reffa} --config=./config.ini --output=strelka
// make -j 32 -C strelka/
......@@ -169,7 +191,7 @@ process virmid {
set val("${tid}_${nid}"), file("${tid}_${nid}.virmid.vcf.gz") into virmidvcf
script:
"""
module load python/2.7.x-anaconda bedtools/2.25.0 snpeff/4.2 virmid/1.2 vcftools/0.1.11
module load python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.2 virmid/1.2 vcftools/0.1.14
virmid -R ${reffa} -D ${tumor} -N ${normal} -s $cosmic t 30 -M 2000 -c1 10 -c2 10
perl $baseDir/scripts/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf
perl $baseDir/scripts/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf
......@@ -200,7 +222,7 @@ process integrate {
set fname,file("${fname}.union.vcf.gz") into union
script:
"""
module load gatk/3.5 python/2.7.x-anaconda bedtools/2.25.0 snpeff/4.2 bcftools/intel/1.3 samtools/intel/1.3
module load gatk/3.7 python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.2 bcftools/1.4.1 samtools/1.4.1
module load vcftools/0.1.14
perl $baseDir/scripts/unionize_vcf.pl -r ${index_path} ${ss} ${mutect} ${shimmer} ${vscan} ${virmid}
sh integrate.sh
......@@ -209,30 +231,42 @@ process integrate {
"""
}
union .phase(pairnames)
.map {p,q -> [p[0],p[1],q[1],q[2]]}
.set { union2 }
process annot {
publishDir "$baseDir/output", mode: 'copy'
input:
set fname,unionvcf from union
set fname,unionvcf from union2
output:
file("${fname}.annot.vcf.gz") into annot
file("${fname}.somatic.vcf.gz") into passout
file("${fname}.stats.txt") into stats
file("${fname}.statplot*") into plotstats
script:
if (params.genome == '/project/shared/bicf_workflow_ref/GRCh38')
"""
module load python/2.7.x-anaconda bedtools/2.25.0 snpeff/4.2 bcftools/intel/1.3 samtools/intel/1.3
module load python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.2 bcftools/1.4.1 samtools/1.4.1
tabix ${unionvcf}
bcftools annotate -Oz -a ${index_path}/ExAC.vcf.gz -o ${fname}.exac.vcf.gz --columns CHROM,POS,AC_Het,AC_Hom,AC_Hemi,AC_Adj,AN_Adj,AC_POPMAX,AN_POPMAX,POPMAX ${unionvcf}
tabix ${fname}.exac.vcf.gz
bcftools annotate -Oz -a ${index_path}/dbSnp.vcf.gz -o ${fname}.dbsnp.vcf.gz --columns CHROM,POS,ID,RS ${fname}.exac.vcf.gz
tabix ${fname}.dbsnp.vcf.gz
bcftools annotate -Oz -a ${index_path}/clinvar.vcf.gz -o ${fname}.clinvar.vcf.gz --columns CHROM,POS,CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${fname}.dbsnp.vcf.gz
java -Xmx10g -jar \$SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c \$SNPEFF_HOME/snpEff.config ${snpeff_vers} ${fname}.clinvar.vcf.gz | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz - |java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar gwasCat -db ${index_path}/gwas_catalog.tsv - |bgzip > ${fname}.annot.vcf.gz
tabix ${fname}.clinvar.vcf.gz
bcftools annotate -Oz -a ${index_path}/cosmic.vcf.gz -o ${fname}.cosmic.vcf.gz --collapse none --columns CHROM,POS,ID,CNT ${fname}.clinvar.vcf.gz
tabix ${fname}.cosmic.vcf.gz
java -Xmx10g -jar \$SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c \$SNPEFF_HOME/snpEff.config ${snpeff_vers} ${fname}.cosmic.vcf.gz | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar gwasCat -db ${index_path}/gwas_catalog.tsv - |bgzip > ${fname}.annot.vcf.gz
tabix ${fname}.annot.vcf.gz
bcftools stats ${fname}.annot.vcf.gz > ${fname}.stats.txt
perl $baseDir/scripts/filter_tumoronly.pl ${fname}.annot.vcf.gz ${fname}
bgzip ${fname}.annot.tumor.vcf
tabix ${fname}.annot.tumor.vcf.gz
bcftools stats ${fname}.annot.tumor.vcf.gz > ${fname}.stats.txt
perl $baseDir/scripts/calc_tmb.pl ${fname} ${fname}.stats.txt
plot-vcfstats -s -p ${fname}.statplot ${fname}.stats.txt
"""
else
......@@ -240,6 +274,7 @@ process annot {
module load snpeff/4.2
java -Xmx10g -jar \$SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c \$SNPEFF_HOME/snpEff.config ${snpeff_vers} ${unionvcf} |bgzip > ${fname}.annot.vcf.gz
tabix ${fname}.annot.vcf.gz
cp ${fname}.annot.vcf.gz ${fname}.somatic.vcf.gz
bcftools stats ${fname}.annot.vcf.gz > ${fname}.stats.txt
plot-vcfstats -s -p ${fname}.statplot ${fname}.stats.txt
"""
......
#!/usr/bin/perl -w
#uploadqc.pl
use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
my %opt = ();
my $results = GetOptions (\%opt,'refdir|r=s','input|i=s','output|o=s','help|h');
open MATE, "<$opt{input}" or die $!;
### Begin File Information ###
my @stats = stat("$opt{input}");
my ($day,$month,$year) = (localtime($stats[9]))[3,4,5];
$year += 1900;
$month++;
$date = join("-",$year,sprintf("%02s",$month),sprintf("%02s",$day));
$fileowner = 's'.$stats[4];
$fileowner = $fileowner;
### End File Information ###
### Begin Version Information ###
my $source = `zgrep '#' $opt{refdir}\/cosmic.vcf.gz |grep source`;
my $cosmic_ref = (split(/=/, $source))[1];
chomp($cosmic_ref);
my $dbsnp_source = `zgrep '#' $opt{refdir}\/dbSnp.vcf.gz |grep dbSNP_BUILD_ID`;
my $dbsnp_ref = (split(/=/, $dbsnp_source))[1];
chomp($dbsnp_ref);
my $gen_ref = (split(/\//,$opt{refdir}))[-1];
my $gittag = `cd /project/PHG/PHG_Clinical/clinseq_workflows;git describe --abbrev=0 --tags`;
chomp $gittag;
### End Version Information ###
while (my $line = <MATE>) {
chomp($line);
my ($sam1,$pf,$sam2,$corr,$depth) = split(/\t/,$line);
$sam1 =~ s/\.vcf//g;
$sam2 =~ s/\.vcf//g;
open OUT, ">$opt{output}" or die $!;
my $status= 'PASS';
$status='FAIL' if($pf eq 'unmatched');
print OUT join("\n","Sample_1\t".$sam1,"Sample_2\t".$sam2,"Correlation\t".$corr,
"Depth\t".$depth,"Status\t".$status,"Somatic_Date\t".$date,
"File_Owner\t".$fileowner,"Workflow_Version\t".$gittag,
"Cosmic_Reference\t".$cosmic_ref,"dbSnp_Reference\t".$dbsnp_ref,
"Genome_Reference\t".$gen_ref),"\n";
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment