Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
somatic_variant
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
BICF
Astrocyte
somatic_variant
Commits
3cf7cfe8
Commit
3cf7cfe8
authored
7 years ago
by
Brandi Cantarel
Browse files
Options
Downloads
Patches
Plain Diff
add checkmate
parent
322c742e
Branches
Branches containing commit
Tags
publish_0.1.2
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
workflow/main.nf
+54
-19
54 additions, 19 deletions
workflow/main.nf
workflow/scripts/sequenceqc_somatic.pl
+48
-0
48 additions, 0 deletions
workflow/scripts/sequenceqc_somatic.pl
with
102 additions
and
19 deletions
workflow/main.nf
+
54
−
19
View file @
3cf7cfe8
...
...
@@ -20,6 +20,7 @@ dbsnp=file(dbsnp)
strelkaconfig="/cm/shared/apps/strelka/1.0.15/etc/strelka_config_bwa_default.ini"
confstrelka=file(strelkaconfig)
ncmconf = file("$params.genome/ncm.conf")
snpeff_vers = 'GRCh38.82';
if (params.genome == '/project/shared/bicf_workflow_ref/GRCm38') {
...
...
@@ -68,12 +69,33 @@ process indexbams {
set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into shimmerbam
set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into vscanbam
set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into virmidbam
// set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into strelkabam
set tid,nid,file(tumor),file(normal),file("${tumor}.bai"),file("${normal}.bai") into checkbams
set val("${tid}_${nid}"),tid,nid into pairnames
script:
"""
module load speedseq/20160506 samtools/intel/1.3
sambamba index -t 30 ${tumor}
sambamba index -t 30 ${normal}
module load speedseq/20160506 samtools/1.4.1
samtools index ${tumor} &
samtools index ${normal} &
wait
"""
}
process checkmates {
publishDir "$baseDir/output", mode: 'copy'
errorStrategy 'ignore'
input:
set tid,nid,file(tumor),file(normal),file(tidx),file(nidx) from checkbams
file(conf) from ncmconf
output:
file("${tid}_${nid}*") into checkmateout
when:
params.genome == '/project/shared/bicf_workflow_ref/GRCh38'
script:
"""
source /etc/profile.d/modules.sh
module load python/2.7.x-anaconda samtools/1.4.1 bcftools/1.4.1
python /project/shared/bicf_workflow_ref/seqprg/NGSCheckMate/ncm.py -B -d ./ -bed ${index_path}/NGSCheckMate.bed -O ./ -N ${tid}_${nid}
perl $baseDir/scripts/sequenceqc_somatic.pl -r ${index_path} -i ${tid}_${nid}_all.txt -o ${tid}_${nid}.sequence.stats.txt
"""
}
...
...
@@ -86,8 +108,8 @@ process sstumor {
set val("${tid}_${nid}"), file("${tid}_${nid}.sspanel.vcf.gz") into ssvcf
script:
"""
module load python/2.7.x-anaconda bedtools/2.2
5
.0 snpeff/4.2 speedseq/20160506 bcftools/
intel/1.3
vcftools/0.1.1
1
speedseq somatic -q 10 -w ${target_panel} -t
30
-o ${tid}.sssom ${reffa} ${normal} ${tumor}
module load python/2.7.x-anaconda bedtools/2.2
6
.0 snpeff/4.2 speedseq/20160506 bcftools/
1.4.1
vcftools/0.1.1
4
speedseq somatic -q 10 -w ${target_panel} -t
\$SLURM_CPUS_ON_NODE
-o ${tid}.sssom ${reffa} ${normal} ${tumor}
vcf-annotate -H -n --fill-type ${tid}.sssom.vcf.gz | java -jar \$SNPEFF_HOME/SnpSift.jar filter --pass '((QUAL >= 10) & (GEN[*].DP >= 10))' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${tid}_${nid}.sspanel.vcf.gz
"""
}
...
...
@@ -102,8 +124,8 @@ process mutect {
set val("${tid}_${nid}"),file("${tid}_${nid}.pmutect.vcf.gz") into mutectvcf
script:
"""
module load python/2.7.x-anaconda gatk/3.
5
bcftools/
intel/1.3
bedtools/2.2
5
.0 snpeff/4.2 vcftools/0.1.1
1
cut -f 1 ${index_path}/genomefile.
chr
.txt |
x
ar
gs -I {} -n 1
-
P
10
sh -c
"java -Xmx
1
0g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf
30 -stand_emit_conf 10.
0 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
module load
parallel
python/2.7.x-anaconda gatk/3.
7
bcftools/
1.4.1
bedtools/2.2
6
.0 snpeff/4.2 vcftools/0.1.1
4
cut -f 1 ${index_path}/genomefile.
5M
.txt |
p
ar
allel --delay 2
-
j
10 "java -Xmx
2
0g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf
1
0 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
vcf-concat ${tid}*.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${tid}_${nid}.pmutect.vcf.gz
"""
}
...
...
@@ -116,9 +138,9 @@ process varscan {
set val("${tid}_${nid}"),file("${tid}_${nid}.varscan.vcf.gz") into varscanvcf
script:
"""
module load python/2.7.x-anaconda bedtools/2.2
5
.0 snpeff/4.2 bcftools/
intel/1.3
samtools/
intel/1.3
VarScan/2.4.2 speedseq/20160506 vcftools/0.1.1
1
sambamba mpileup -L ${target_panel} -t
30
${tumor} --samtools "-C 50 -f ${reffa}" > t.mpileup
sambamba mpileup -L ${target_panel} -t
30
${normal} --samtools "-C 50 -f ${reffa}" > n.mpileup
module load python/2.7.x-anaconda bedtools/2.2
6
.0 snpeff/4.2 bcftools/
1.4.1
samtools/
1.4.1
VarScan/2.4.2 speedseq/20160506 vcftools/0.1.1
4
sambamba mpileup -L ${target_panel} -t
\$SLURM_CPUS_ON_NODE
${tumor} --samtools "-C 50 -f ${reffa}" > t.mpileup
sambamba mpileup -L ${target_panel} -t
\$SLURM_CPUS_ON_NODE
${normal} --samtools "-C 50 -f ${reffa}" > n.mpileup
VarScan somatic n.mpileup t.mpileup ${tid}.vscan --output-vcf 1
VarScan copynumber n.mpileup t.mpileup ${tid}.vscancnv
vcf-concat ${tid}.vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar \$SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bedtools intersect -header -a stdin -b ${target_panel} |bgzip > ${tid}_${nid}.varscan.vcf.gz
...
...
@@ -134,7 +156,7 @@ process shimmer {
set val("${tid}_${nid}"), file("${tid}_${nid}.shimmer.vcf.gz") into shimmervcf
script:
"""
module load python/2.7.x-anaconda bedtools/2.2
5
.0 snpeff/4.2 bcftools/
intel/1.3
shimmer/0.1.1 vcftools/0.1.1
1
module load python/2.7.x-anaconda bedtools/2.2
6
.0 snpeff/4.2 bcftools/
1.4.1
shimmer/0.1.1 vcftools/0.1.1
4
shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err
perl $baseDir/scripts/add_readct_shimmer.pl
vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar \$SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' | bedtools intersect -header -a stdin -b ${target_panel} | bgzip > ${tid}_${nid}.shimmer.vcf.gz
...
...
@@ -152,7 +174,7 @@ process shimmer {
// file("${tid}.strelka.vcf.gz") into strelkavcf
// script:
// """
// module load bedtools/2.2
5
.0 snpeff/4.2 speedseq/20160506 vcftools/0.1.1
1
strelka/1.0.15
// module load bedtools/2.2
6
.0 snpeff/4.2 speedseq/20160506 vcftools/0.1.1
4
strelka/1.0.15
// cp ${confstrelka} config.ini
// configureStrelkaWorkflow.pl --normal=${normal} --tumor=${tumor} --ref ${reffa} --config=./config.ini --output=strelka
// make -j 32 -C strelka/
...
...
@@ -169,7 +191,7 @@ process virmid {
set val("${tid}_${nid}"), file("${tid}_${nid}.virmid.vcf.gz") into virmidvcf
script:
"""
module load python/2.7.x-anaconda bedtools/2.2
5
.0 snpeff/4.2 virmid/1.2 vcftools/0.1.1
1
module load python/2.7.x-anaconda bedtools/2.2
6
.0 snpeff/4.2 virmid/1.2 vcftools/0.1.1
4
virmid -R ${reffa} -D ${tumor} -N ${normal} -s $cosmic t 30 -M 2000 -c1 10 -c2 10
perl $baseDir/scripts/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf
perl $baseDir/scripts/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf
...
...
@@ -200,7 +222,7 @@ process integrate {
set fname,file("${fname}.union.vcf.gz") into union
script:
"""
module load gatk/3.
5
python/2.7.x-anaconda bedtools/2.2
5
.0 snpeff/4.2 bcftools/
intel/1.3
samtools/
intel/1.3
module load gatk/3.
7
python/2.7.x-anaconda bedtools/2.2
6
.0 snpeff/4.2 bcftools/
1.4.1
samtools/
1.4.1
module load vcftools/0.1.14
perl $baseDir/scripts/unionize_vcf.pl -r ${index_path} ${ss} ${mutect} ${shimmer} ${vscan} ${virmid}
sh integrate.sh
...
...
@@ -209,30 +231,42 @@ process integrate {
"""
}
union .phase(pairnames)
.map {p,q -> [p[0],p[1],q[1],q[2]]}
.set { union2 }
process annot {
publishDir "$baseDir/output", mode: 'copy'
input:
set fname,unionvcf from union
set fname,unionvcf from union
2
output:
file("${fname}.annot.vcf.gz") into annot
file("${fname}.somatic.vcf.gz") into passout
file("${fname}.stats.txt") into stats
file("${fname}.statplot*") into plotstats
script:
if (params.genome == '/project/shared/bicf_workflow_ref/GRCh38')
"""
module load python/2.7.x-anaconda bedtools/2.2
5
.0 snpeff/4.2 bcftools/
intel/1.3
samtools/
intel/1.3
module load python/2.7.x-anaconda bedtools/2.2
6
.0 snpeff/4.2 bcftools/
1.4.1
samtools/
1.4.1
tabix ${unionvcf}
bcftools annotate -Oz -a ${index_path}/ExAC.vcf.gz -o ${fname}.exac.vcf.gz --columns CHROM,POS,AC_Het,AC_Hom,AC_Hemi,AC_Adj,AN_Adj,AC_POPMAX,AN_POPMAX,POPMAX ${unionvcf}
tabix ${fname}.exac.vcf.gz
bcftools annotate -Oz -a ${index_path}/dbSnp.vcf.gz -o ${fname}.dbsnp.vcf.gz --columns CHROM,POS,ID,RS ${fname}.exac.vcf.gz
tabix ${fname}.dbsnp.vcf.gz
bcftools annotate -Oz -a ${index_path}/clinvar.vcf.gz -o ${fname}.clinvar.vcf.gz --columns CHROM,POS,CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${fname}.dbsnp.vcf.gz
java -Xmx10g -jar \$SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c \$SNPEFF_HOME/snpEff.config ${snpeff_vers} ${fname}.clinvar.vcf.gz | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz - |java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar gwasCat -db ${index_path}/gwas_catalog.tsv - |bgzip > ${fname}.annot.vcf.gz
tabix ${fname}.clinvar.vcf.gz
bcftools annotate -Oz -a ${index_path}/cosmic.vcf.gz -o ${fname}.cosmic.vcf.gz --collapse none --columns CHROM,POS,ID,CNT ${fname}.clinvar.vcf.gz
tabix ${fname}.cosmic.vcf.gz
java -Xmx10g -jar \$SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c \$SNPEFF_HOME/snpEff.config ${snpeff_vers} ${fname}.cosmic.vcf.gz | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | java -Xmx10g -jar \$SNPEFF_HOME/SnpSift.jar gwasCat -db ${index_path}/gwas_catalog.tsv - |bgzip > ${fname}.annot.vcf.gz
tabix ${fname}.annot.vcf.gz
bcftools stats ${fname}.annot.vcf.gz > ${fname}.stats.txt
perl $baseDir/scripts/filter_tumoronly.pl ${fname}.annot.vcf.gz ${fname}
bgzip ${fname}.annot.tumor.vcf
tabix ${fname}.annot.tumor.vcf.gz
bcftools stats ${fname}.annot.tumor.vcf.gz > ${fname}.stats.txt
perl $baseDir/scripts/calc_tmb.pl ${fname} ${fname}.stats.txt
plot-vcfstats -s -p ${fname}.statplot ${fname}.stats.txt
"""
else
...
...
@@ -240,6 +274,7 @@ process annot {
module load snpeff/4.2
java -Xmx10g -jar \$SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c \$SNPEFF_HOME/snpEff.config ${snpeff_vers} ${unionvcf} |bgzip > ${fname}.annot.vcf.gz
tabix ${fname}.annot.vcf.gz
cp ${fname}.annot.vcf.gz ${fname}.somatic.vcf.gz
bcftools stats ${fname}.annot.vcf.gz > ${fname}.stats.txt
plot-vcfstats -s -p ${fname}.statplot ${fname}.stats.txt
"""
...
...
This diff is collapsed.
Click to expand it.
workflow/scripts/sequenceqc_somatic.pl
0 → 100755
+
48
−
0
View file @
3cf7cfe8
#!/usr/bin/perl -w
#uploadqc.pl
use
Getopt::
Long
qw(:config no_ignore_case no_auto_abbrev)
;
my
%opt
=
();
my
$results
=
GetOptions
(
\
%opt
,'
refdir|r=s
','
input|i=s
','
output|o=s
','
help|h
');
open
MATE
,
"
<
$opt
{input}
"
or
die
$!
;
### Begin File Information ###
my
@stats
=
stat
("
$opt
{input}
");
my
(
$day
,
$month
,
$year
)
=
(
localtime
(
$stats
[
9
]))[
3
,
4
,
5
];
$year
+=
1900
;
$month
++
;
$date
=
join
("
-
",
$year
,
sprintf
("
%02s
",
$month
),
sprintf
("
%02s
",
$day
));
$fileowner
=
'
s
'
.
$stats
[
4
];
$fileowner
=
$fileowner
;
### End File Information ###
### Begin Version Information ###
my
$source
=
`
zgrep '#'
$opt
{refdir}
\
/cosmic.vcf.gz |grep source
`;
my
$cosmic_ref
=
(
split
(
/=/
,
$source
))[
1
];
chomp
(
$cosmic_ref
);
my
$dbsnp_source
=
`
zgrep '#'
$opt
{refdir}
\
/dbSnp.vcf.gz |grep dbSNP_BUILD_ID
`;
my
$dbsnp_ref
=
(
split
(
/=/
,
$dbsnp_source
))[
1
];
chomp
(
$dbsnp_ref
);
my
$gen_ref
=
(
split
(
/\//
,
$opt
{
refdir
}))[
-
1
];
my
$gittag
=
`
cd /project/PHG/PHG_Clinical/clinseq_workflows;git describe --abbrev=0 --tags
`;
chomp
$gittag
;
### End Version Information ###
while
(
my
$line
=
<
MATE
>
)
{
chomp
(
$line
);
my
(
$sam1
,
$pf
,
$sam2
,
$corr
,
$depth
)
=
split
(
/\t/
,
$line
);
$sam1
=~
s/\.vcf//g
;
$sam2
=~
s/\.vcf//g
;
open
OUT
,
"
>
$opt
{output}
"
or
die
$!
;
my
$status
=
'
PASS
';
$status
=
'
FAIL
'
if
(
$pf
eq
'
unmatched
');
print
OUT
join
("
\n
","
Sample_1
\t
"
.
$sam1
,"
Sample_2
\t
"
.
$sam2
,"
Correlation
\t
"
.
$corr
,
"
Depth
\t
"
.
$depth
,"
Status
\t
"
.
$status
,"
Somatic_Date
\t
"
.
$date
,
"
File_Owner
\t
"
.
$fileowner
,"
Workflow_Version
\t
"
.
$gittag
,
"
Cosmic_Reference
\t
"
.
$cosmic_ref
,"
dbSnp_Reference
\t
"
.
$dbsnp_ref
,
"
Genome_Reference
\t
"
.
$gen_ref
),"
\n
";
}
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment