Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
process_scripts
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
NGS CLIA Lab
process_scripts
Commits
8d9c724f
Commit
8d9c724f
authored
5 years ago
by
Brandi Cantarel
Browse files
Options
Downloads
Plain Diff
Merge branch 'master' of git.biohpc.swmed.edu:BICF/Astrocyte/process_scripts
parents
2d64c494
66010ebb
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
variants/filter_delly.pl
+111
-0
111 additions, 0 deletions
variants/filter_delly.pl
variants/filter_svaba.pl
+120
-0
120 additions, 0 deletions
variants/filter_svaba.pl
variants/svcalling.sh
+9
-0
9 additions, 0 deletions
variants/svcalling.sh
with
240 additions
and
0 deletions
variants/filter_delly.pl
0 → 100755
+
111
−
0
View file @
8d9c724f
#!/usr/bin/perl -w
#integrate_datasets.pl
#module load vcftools/0.1.14 samtools/1.6 bedtools/2.26.0
use
Getopt::
Long
qw(:config no_ignore_case no_auto_abbrev)
;
my
%opt
=
();
my
$results
=
GetOptions
(
\
%opt
,'
in|i=s
','
pid|p=s
','
tumor|t=s
');
open
VCFOUT
,
"
>
$opt
{pid}
\
.delly.vcf
"
or
die
$!
;
open
DELFUS
,
"
>
$opt
{pid}
\
.delly.potentialfusion.txt
"
or
die
$!
;
open
IN
,
"
gunzip -c
$opt
{in} |
"
or
die
$!
;
W1:while
(
my
$line
=
<
IN
>
)
{
chomp
(
$line
);
if
(
$line
=~
m/^#/
)
{
if
(
$line
=~
m/^#CHROM/
)
{
print
OUT
"
##INFO=<ID=AF,Number=A,Type=Integer,Description=
\"
Alternate allele observation frequency
\"
>
\n
";
my
@header
=
split
(
/\t/
,
$line
);
(
$chrom
,
$pos
,
$id
,
$ref
,
$alt
,
$score
,
$filter
,
$info
,
$format
,
@gtheader
)
=
split
(
/\t/
,
$line
);
unless
(
$opt
{
tumor
})
{
if
(
grep
(
/T_DNA/
,
@gtheader
))
{
my
@tsamps
=
grep
(
/T_DNA/
,
@gtheader
);
$opt
{
tumor
}
=
$tsamps
[
0
];
}
else
{
$opt
{
tumor
}
=
$gtheader
[
0
];
}
}
}
print
VCFOUT
$line
,"
\n
";
}
my
(
$chrom
,
$pos
,
$id
,
$ref
,
$alt
,
$score
,
$filter
,
$annot
,
$format
,
@gts
)
=
split
(
/\t/
,
$line
);
next
if
(
$ref
=~
m/\./
||
$alt
=~
m/\./
||
$alt
=~
m/,X/
);
my
%hash
=
();
foreach
$a
(
split
(
/;/
,
$annot
))
{
my
(
$key
,
$val
)
=
split
(
/=/
,
$a
);
$hash
{
$key
}
=
$val
unless
(
$hash
{
$key
});
}
next
unless
(
$hash
{
ANN
});
next
unless
(
$hash
{
ANN
}
=~
m/HIGH|MODERATE|LOW/
);
my
%gtinfo
=
();
my
@deschead
=
split
(
/:/
,
$format
);
F1:foreach
my
$k
(
0
..
$#gtheader
)
{
my
$subjid
=
$gtheader
[
$k
];
my
$allele_info
=
$gts
[
$k
];
my
@ainfo
=
split
(
/:/
,
$allele_info
);
my
@mutallfreq
=
();
foreach
my
$k
(
0
..
$#ainfo
)
{
$gtinfo
{
$subjid
}{
$deschead
[
$k
]}
=
$ainfo
[
$k
];
}
$gtinfo
{
$subjid
}{
DP
}
=
(
split
(
/,/
,
$gtinfo
{
$subjid
}{
DP
}))[
0
]
if
(
$gtinfo
{
$subjid
}{
DP
});
next
F1
unless
(
$gtinfo
{
$subjid
}{
DP
}
&&
$gtinfo
{
$subjid
}{
DP
}
ne
'
.
'
&&
$gtinfo
{
$subjid
}{
DP
}
>=
1
);
my
@altct
=
split
(
/,/
,
$gtinfo
{
$subjid
}{
AD
});
my
$refct
=
shift
@altct
;
@altct2
=
split
(
/,/
,
$gtinfo
{
$subjid
}{
AO
});
if
(
scalar
(
@altct
)
ne
scalar
(
@altct2
))
{
warn
"
Inconsistent Allele counts @
$chrom
,
$pos
,
$alt
,
$gtinfo
{
$subjid
}{AD},
$gtinfo
{
$subjid
}{AO}
\n
";
}
my
$total
=
$refct
;
foreach
my
$act
(
@altct
)
{
next
if
(
$act
eq
'
.
');
$total
+=
$act
;
push
@mutallfreq
,
sprintf
("
%.4f
",
$act
/
$gtinfo
{
$subjid
}{
DP
});
}
$gtinfo
{
$subjid
}{
MAF
}
=
\
@mutallfreq
;
}
next
unless
(
$gtinfo
{
$opt
{
tumor
}}{
DP
}
&&
$gtinfo
{
$opt
{
tumor
}}{
DP
}
ne
'
.
'
&&
$gtinfo
{
$opt
{
tumor
}}{
DP
}
>=
20
);
unless
(
$gtinfo
{
$opt
{
tumor
}}{
AO
}
=~
m/\d+/
&&
$gtinfo
{
$opt
{
tumor
}}{
AD
}
=~
m/,/
)
{
warn
"
Missing Alt:
$line
\n
";
}
@tumormaf
=
@
{
$gtinfo
{
$opt
{
tumor
}}{
MAF
}};
@tumoraltct
=
split
(
/,/
,
$gtinfo
{
$opt
{
tumor
}}{
AO
});
next
if
(
$tumoraltct
[
0
]
eq
'
.
');
$hash
{
AF
}
=
join
("
,
",
@tumormaf
);
next
if
(
$tumoraltct
[
0
]
<
20
);
next
if
(
$tumormaf
[
0
]
<
0.01
);
my
$keepforvcf
=
0
;
my
$keeptrx
;
F1:foreach
$trx
(
split
(
/,/
,
$hash
{
ANN
}))
{
my
(
$allele
,
$effect
,
$impact
,
$gene
,
$geneid
,
$feature
,
$featureid
,
$biotype
,
$rank
,
$codon
,
$aa
,
$pos_dna
,
$len_cdna
,
$cds_pos
,
$cds_len
,
$aapos
,
$aalen
,
$distance
,
$err
)
=
split
(
/\|/
,
$trx
);
next
unless
(
$impact
=~
m/HIGH|MODERATE/
||
$effect
=~
/splice/i
);
next
if
(
$effect
eq
'
sequence_feature
');
$keeptrx
=
$trx
;
$keepforvcf
=
$gene
;
last
F1
;
}
next
unless
$keepforvcf
;
my
@nannot
;
foreach
$info
(
sort
{
$a
cmp
$b
}
keys
%hash
)
{
if
(
defined
$hash
{
$info
})
{
push
@nannot
,
$info
.
"
=
"
.
$hash
{
$info
};
}
else
{
push
@nannot
,
$info
;
}
}
my
$newannot
=
join
("
;
",
@nannot
);
if
(
$hash
{
SVTYPE
}
eq
'
INS
'
||
(
$hash
{
SVTYPE
}
eq
'
DEL
'
&&
$keepforvcf
!~
m/&/
))
{
print
VCFOUT
join
("
\t
",
$chrom
,
$pos
,
$id
,
$ref
,
$alt
,
$score
,
$filter
,
$newannot
,
$format
,
@gts
),"
\n
";
}
elsif
(
$hash
{
END
}
&&
$hash
{
END
}
-
$pos
>
9999
)
{
my
(
$allele
,
$effect
,
$impact
,
$gene
,
$geneid
,
$feature
,
$featureid
,
$biotype
,
$rank
,
$codon
,
$aa
,
$pos_dna
,
$len_cdna
,
$cds_pos
,
$cds_len
,
$aapos
,
$aalen
,
$distance
,
$err
)
=
split
(
/\|/
,
$keeptrx
);
print
DELFUS
join
("
\t
",
$chrom
,
$pos
,
$hash
{
CHR2
},
$hash
{
END
},
$effect
,
$gene
,
$biotype
,
$filter
,
$format
,
@gts
),"
\n
";
}
}
close
IN
;
This diff is collapsed.
Click to expand it.
variants/filter_svaba.pl
0 → 100755
+
120
−
0
View file @
8d9c724f
#!/usr/bin/perl -w
#integrate_datasets.pl
#module load vcftools/0.1.14 samtools/1.6 bedtools/2.26.0
use
Getopt::
Long
qw(:config no_ignore_case no_auto_abbrev)
;
my
%opt
=
();
my
$results
=
GetOptions
(
\
%opt
,'
in|i=s
','
pid|p=s
','
tumor|t=s
');
open
VCFOUT
,
"
>
$opt
{pid}
\
.svaba.vcf
"
or
die
$!
;
open
DELFUS
,
"
>
$opt
{pid}
\
.svaba.potentialfusion.txt
"
or
die
$!
;
open
IN
,
"
gunzip -c
$opt
{in} |
"
or
die
$!
;
W1:while
(
my
$line
=
<
IN
>
)
{
chomp
(
$line
);
if
(
$line
=~
m/^#/
)
{
if
(
$line
=~
m/^#CHROM/
)
{
print
OUT
"
##INFO=<ID=AF,Number=A,Type=Integer,Description=
\"
Alternate allele observation frequency
\"
>
\n
";
my
@header
=
split
(
/\t/
,
$line
);
(
$chrom
,
$pos
,
$id
,
$ref
,
$alt
,
$score
,
$filter
,
$info
,
$format
,
@gtheader
)
=
split
(
/\t/
,
$line
);
unless
(
$opt
{
tumor
})
{
if
(
grep
(
/T_DNA/
,
@gtheader
))
{
my
@tsamps
=
grep
(
/T_DNA/
,
@gtheader
);
$opt
{
tumor
}
=
$tsamps
[
0
];
}
else
{
$opt
{
tumor
}
=
$gtheader
[
0
];
}
}
}
print
VCFOUT
$line
,"
\n
";
}
my
(
$chrom
,
$pos
,
$id
,
$ref
,
$alt
,
$score
,
$filter
,
$annot
,
$format
,
@gts
)
=
split
(
/\t/
,
$line
);
next
if
(
$ref
=~
m/\./
||
$alt
=~
m/\./
||
$alt
=~
m/,X/
);
my
%hash
=
();
foreach
$a
(
split
(
/;/
,
$annot
))
{
my
(
$key
,
$val
)
=
split
(
/=/
,
$a
);
$hash
{
$key
}
=
$val
unless
(
$hash
{
$key
});
}
if
(
length
(
$alt
)
>
length
(
$ref
))
{
$hash
{
SVTYPE
}
=
"
INS
";
$hash
{
END
}
=
$pos
;
}
elsif
(
length
(
$alt
)
<
length
(
$ref
))
{
my
$diff
=
substr
(
$ref
,
length
(
$alt
));
$hash
{
SVTYPE
}
=
"
DEL
";
$hash
{
END
}
=
$pos
+
length
(
$diff
);
}
next
unless
(
$hash
{
ANN
});
next
unless
(
$hash
{
ANN
}
=~
m/HIGH|MODERATE|LOW/
);
my
%gtinfo
=
();
my
@deschead
=
split
(
/:/
,
$format
);
F1:foreach
my
$k
(
0
..
$#gtheader
)
{
my
$subjid
=
$gtheader
[
$k
];
my
$allele_info
=
$gts
[
$k
];
my
@ainfo
=
split
(
/:/
,
$allele_info
);
my
@mutallfreq
=
();
foreach
my
$k
(
0
..
$#ainfo
)
{
$gtinfo
{
$subjid
}{
$deschead
[
$k
]}
=
$ainfo
[
$k
];
}
$gtinfo
{
$subjid
}{
DP
}
=
(
split
(
/,/
,
$gtinfo
{
$subjid
}{
DP
}))[
0
]
if
(
$gtinfo
{
$subjid
}{
DP
});
next
F1
unless
(
$gtinfo
{
$subjid
}{
DP
}
&&
$gtinfo
{
$subjid
}{
DP
}
ne
'
.
'
&&
$gtinfo
{
$subjid
}{
DP
}
>=
1
);
my
@altct
=
split
(
/,/
,
$gtinfo
{
$subjid
}{
AD
});
my
$refct
=
shift
@altct
;
@altct2
=
split
(
/,/
,
$gtinfo
{
$subjid
}{
AO
});
if
(
scalar
(
@altct
)
ne
scalar
(
@altct2
))
{
warn
"
Inconsistent Allele counts @
$chrom
,
$pos
,
$alt
,
$gtinfo
{
$subjid
}{AD},
$gtinfo
{
$subjid
}{AO}
\n
";
}
my
$total
=
$refct
;
foreach
my
$act
(
@altct
)
{
next
if
(
$act
eq
'
.
');
$total
+=
$act
;
push
@mutallfreq
,
sprintf
("
%.4f
",
$act
/
$gtinfo
{
$subjid
}{
DP
});
}
$gtinfo
{
$subjid
}{
MAF
}
=
\
@mutallfreq
;
}
next
unless
(
$gtinfo
{
$opt
{
tumor
}}{
DP
}
&&
$gtinfo
{
$opt
{
tumor
}}{
DP
}
ne
'
.
'
&&
$gtinfo
{
$opt
{
tumor
}}{
DP
}
>=
20
);
unless
(
$gtinfo
{
$opt
{
tumor
}}{
AO
}
=~
m/\d+/
&&
$gtinfo
{
$opt
{
tumor
}}{
AD
}
=~
m/,/
)
{
warn
"
Missing Alt:
$line
\n
";
}
@tumormaf
=
@
{
$gtinfo
{
$opt
{
tumor
}}{
MAF
}};
@tumoraltct
=
split
(
/,/
,
$gtinfo
{
$opt
{
tumor
}}{
AO
});
next
if
(
$tumoraltct
[
0
]
eq
'
.
');
$hash
{
AF
}
=
join
("
,
",
@tumormaf
);
next
if
(
$tumoraltct
[
0
]
<
20
);
next
if
(
$tumormaf
[
0
]
<
0.01
);
my
$keepforvcf
=
0
;
my
$keeptrx
;
F1:foreach
$trx
(
split
(
/,/
,
$hash
{
ANN
}))
{
my
(
$allele
,
$effect
,
$impact
,
$gene
,
$geneid
,
$feature
,
$featureid
,
$biotype
,
$rank
,
$codon
,
$aa
,
$pos_dna
,
$len_cdna
,
$cds_pos
,
$cds_len
,
$aapos
,
$aalen
,
$distance
,
$err
)
=
split
(
/\|/
,
$trx
);
next
unless
(
$impact
=~
m/HIGH|MODERATE/
||
$effect
=~
/splice/i
);
next
if
(
$effect
eq
'
sequence_feature
');
$keeptrx
=
$trx
;
$keepforvcf
=
$gene
;
last
F1
;
}
next
unless
$keepforvcf
;
my
@nannot
;
foreach
$info
(
sort
{
$a
cmp
$b
}
keys
%hash
)
{
if
(
defined
$hash
{
$info
})
{
push
@nannot
,
$info
.
"
=
"
.
$hash
{
$info
};
}
else
{
push
@nannot
,
$info
;
}
}
my
$newannot
=
join
("
;
",
@nannot
);
if
(
$hash
{
SVTYPE
}
eq
'
INS
'
||
(
$hash
{
SVTYPE
}
eq
'
DEL
'
&&
$keepforvcf
!~
m/&/
))
{
print
VCFOUT
join
("
\t
",
$chrom
,
$pos
,
$id
,
$ref
,
$alt
,
$score
,
$filter
,
$newannot
,
$format
,
@gts
),"
\n
";
}
elsif
(
$hash
{
SPAN
}
&&
$hash
{
SPAN
}
>
9999
)
{
my
(
$allele
,
$effect
,
$impact
,
$gene
,
$geneid
,
$feature
,
$featureid
,
$biotype
,
$rank
,
$codon
,
$aa
,
$pos_dna
,
$len_cdna
,
$cds_pos
,
$cds_len
,
$aapos
,
$aalen
,
$distance
,
$err
)
=
split
(
/\|/
,
$keeptrx
);
print
DELFUS
join
("
\t
",
$chrom
,
$pos
,
$hash
{
CHR2
},
$hash
{
END
},
$effect
,
$gene
,
$biotype
,
$filter
,
$format
,
@gts
),"
\n
";
}
}
close
IN
;
This diff is collapsed.
Click to expand it.
variants/svcalling.sh
+
9
−
0
View file @
8d9c724f
...
@@ -77,6 +77,7 @@ then
...
@@ -77,6 +77,7 @@ then
delly2 call
-t
INS
-o
${
pair_id
}
.delly_insertion.bcf
-q
30
-g
${
reffa
}
${
sbam
}
${
normal
}
delly2 call
-t
INS
-o
${
pair_id
}
.delly_insertion.bcf
-q
30
-g
${
reffa
}
${
sbam
}
${
normal
}
#delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf
#delly2 filter -o ${pair_id}.delly_tra.bcf -f somatic -s samples.tsv ${pair_id}.delly_translocations.bcf
else
else
$tid
=
#RUN DELLY
#RUN DELLY
delly2 call
-t
BND
-o
${
pair_id
}
.delly_translocations.bcf
-q
30
-g
${
reffa
}
${
sbam
}
delly2 call
-t
BND
-o
${
pair_id
}
.delly_translocations.bcf
-q
30
-g
${
reffa
}
${
sbam
}
delly2 call
-t
DUP
-o
${
pair_id
}
.delly_duplications.bcf
-q
30
-g
${
reffa
}
${
sbam
}
delly2 call
-t
DUP
-o
${
pair_id
}
.delly_duplications.bcf
-q
30
-g
${
reffa
}
${
sbam
}
...
@@ -94,6 +95,10 @@ then
...
@@ -94,6 +95,10 @@ then
then
then
zgrep
'#CHROM'
${
pair_id
}
.delly.vcf.gz
>
${
pair_id
}
.delly.genefusion.txt
zgrep
'#CHROM'
${
pair_id
}
.delly.vcf.gz
>
${
pair_id
}
.delly.genefusion.txt
zcat
${
pair_id
}
.delly.vcf.gz |
$SNPEFF_HOME
/scripts/vcfEffOnePerLine.pl |java
-jar
$SNPEFF_HOME
/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[
*
]
.EFFECT ANN[
*
]
.GENE ANN[
*
]
.BIOTYPE FILTER FORMAT GEN[
*
]
|grep
-E
'gene_fusion|feature_fusion'
|
sort
-u
>>
${
pair_id
}
.delly.genefusion.txt
zcat
${
pair_id
}
.delly.vcf.gz |
$SNPEFF_HOME
/scripts/vcfEffOnePerLine.pl |java
-jar
$SNPEFF_HOME
/SnpSift.jar extractFields - CHROM POS CHR2 END ANN[
*
]
.EFFECT ANN[
*
]
.GENE ANN[
*
]
.BIOTYPE FILTER FORMAT GEN[
*
]
|grep
-E
'gene_fusion|feature_fusion'
|
sort
-u
>>
${
pair_id
}
.delly.genefusion.txt
mv
${
pair_id
}
.delly.vcf.gz
${
pair_id
}
.delly.ori.vcf.gz
bash
$baseDir
/filter_delly.pl
-t
$tid
-p
$pair_id
-i
${
pair_id
}
.delly.ori.vcf.gz
bgzip
${
pair_id
}
.delly.vcf
cat
${
pair_id
}
.potentialfusion.txt
>>
${
pair_id
}
.delly.genefusion.txt
fi
fi
elif
[[
$method
==
'svaba'
]]
elif
[[
$method
==
'svaba'
]]
then
then
...
@@ -116,6 +121,10 @@ then
...
@@ -116,6 +121,10 @@ then
then
then
zgrep
'#CHROM'
${
pair_id
}
.svaba.sv.vcf.gz
>
${
pair_id
}
.svaba.genefusion.txt
zgrep
'#CHROM'
${
pair_id
}
.svaba.sv.vcf.gz
>
${
pair_id
}
.svaba.genefusion.txt
zcat
${
pair_id
}
.svaba.sv.vcf.gz |
$SNPEFF_HOME
/scripts/vcfEffOnePerLine.pl |java
-jar
$SNPEFF_HOME
/SnpSift.jar extractFields - CHROM POS ALT ID ANN[
*
]
.EFFECT ANN[
*
]
.GENE ANN[
*
]
.BIOTYPE FILTER FORMAT GEN[
*
]
|grep
-E
'gene_fusion|feature_fusion'
|
sort
-u
>>
${
pair_id
}
.svaba.genefusion.txt
zcat
${
pair_id
}
.svaba.sv.vcf.gz |
$SNPEFF_HOME
/scripts/vcfEffOnePerLine.pl |java
-jar
$SNPEFF_HOME
/SnpSift.jar extractFields - CHROM POS ALT ID ANN[
*
]
.EFFECT ANN[
*
]
.GENE ANN[
*
]
.BIOTYPE FILTER FORMAT GEN[
*
]
|grep
-E
'gene_fusion|feature_fusion'
|
sort
-u
>>
${
pair_id
}
.svaba.genefusion.txt
mv
${
pair_id
}
.svaba.vcf.gz
${
pair_id
}
.svaba.ori.vcf.gz
bash
$baseDir
/filter_svaba.pl
-t
$tid
-p
$pair_id
-i
${
pair_id
}
.svaba.ori.vcf.gz
bgzip
${
pair_id
}
.svaba.vcf
cat
${
pair_id
}
.potentialfusion.txt
>>
${
pair_id
}
.svaba.genefusion.txt
fi
fi
elif
[[
$method
==
'lumpy'
]]
elif
[[
$method
==
'lumpy'
]]
then
then
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment