Skip to content
Snippets Groups Projects
Commit 2b3b3cfe authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

adding cnv and umicode

parent 11a5dfe5
No related merge requests found
......@@ -48,10 +48,10 @@ if [[ $nuctype == 'dna' ]]; then
samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt
samtools view -b -q 1 ${pair_id}.ontarget.bam | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
samtools view -b -F 1024 ${pair_id}.ontarget.bam | bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b stdin -hist | grep ^all > ${pair_id}.dedupcov.txt
java -Xmx32g -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt
java -Xmx64g -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt
java -Xmx64g -jar $PICARD/picard.jar EstimateLibraryComplexity I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.libcomplex.txt
samtools view -F 1024 ${pair_id}.ontarget.bam | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt
java -Xmx4g -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt
java -Xmx64g -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt
samtools view -b -q 1 ${pair_id}.ontarget.bam | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt
bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt
perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt
......
......@@ -36,7 +36,7 @@ then
SLURM_CPUS_ON_NODE=1
fi
module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 bcftools/1.6 htslib/1.6 picard/2.10.3
module load bwakit/0.7.15 bwa/intel/0.7.15 samtools/1.6 picard/2.10.3
baseDir="`dirname \"$0\"`"
......
......@@ -48,10 +48,10 @@ then
samtools markdup -s --output-fmt BAM -@ $SLURM_CPUS_ON_NODE sort.bam ${pair_id}.dedup.bam
elif [ $algo == 'picard' ]
then
java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar MarkDuplicates I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
java -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
elif [ $algo == 'picard_umi' ]
then
java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
java -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt
elif [ $algo == 'fgbio_umi' ]
then
module load fgbio bwa/intel/0.7.15
......
......@@ -9,11 +9,12 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :b:p:h opt
while getopts :b:p:uh opt
do
case $opt in
b) sbam=$OPTARG;;
p) pair_id=$OPTARG;;
u) umi='umi';;
h) usage;;
esac
done
......@@ -32,7 +33,14 @@ fi
module load cnvkit/0.9.0
cnvkit.py coverage ${sbam} /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.cnvkit_targets.bed -o ${pair_id}.targetcoverage.cnn
cnvkit.py coverage ${sbam} /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.cnvkit_antitargets.bed -o ${pair_id}.antitargetcoverage.cnn
if [[ $umi == 'umi' ]]
then
cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.uminormals.cnn -o ${pair_id}.cnr
else
cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn /project/shared/bicf_workflow_ref/GRCh38/UTSWV2.normals.cnn -o ${pair_id}.cnr
fi
cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
cnvkit.py call ${pair_id}.cns -o ${pair_id}.call.cns
cnvkit.py diagram ${pair_id}.cnr -s ${pair_id}.cns -o ${pair_id}.cnv.pdf
perl $baseDir/filter_cnvkit.pl *.call.cns
#!/usr/bin/perl -w
#parse_cnvkit_table.pl
my $refdir = '/project/shared/bicf_workflow_ref/GRCh38/';
open OM, "<$refdir\/panel1385.genelist.txt" or die $!;
while (my $line = <OM>) {
chomp($line);
$keep{$line} = 1;
}
#my @keep = ("AKT2","ATM","AURKA","BAP1","BCL2L1","BCL6","BIRC3","BRCA2","CCND1","CCNE1","CD79B","CDK8","CDKN2A","CDKN2B","CEBPA","EGFR","ERBB2","FGF10","FGF14","FGF19","FGF3","FGF4","FLT1","FLT3","FOXP1","GATA6","GNA13","GNAS","IKZF1","IL7R","IRS2","KDM4C","KLHL6","KMT2A","KRAS","LYN","MCL1","MITF","MYC","NOTCH2","PIK3CA","PRKAR1A","PRKDC","PTEN","RB1","RICTOR","RUNX1T1","SDHA","TFDP1","TP53","ZNF217","CDK4","MDM4","MYCN","BTG2","BCL2L2","RAF1");
#my %keep = map {$_ => 1} @keep;
my @cvncalls = @ARGV;
foreach my $file (@ARGV) {
open IN, "<$file" or die $!;
my $out = $file;
$out =~ s/call.cns/cnvcalls.txt/;
open OUT, ">$out" or die $!;
print OUT join("\t","Gene","Chromosome","Start","End","Abberation Type","CN","Score"),"\n";
my $header = <IN>;
while (my $line = <IN>) {
chomp($line);
my ($chr,$start,$end,$geneids,$log2,$cn,$depth,
$probes,$weight) = split(/\t/,$line);
my %genes;
my @ids = split(/;|,/,$geneids);
foreach my $gid (@ids) {
my ($key,$value) = split(/=/,$gid);
if ($key eq 'ensembl_gn' || $key eq 'identifier') {
$genes{$value} = 1 if $keep{$value};
}
}
my $newgeneids = join(";", keys %genes);
my $len = sprintf("%.1f",($end-$start)/1000);
next if ($cn == 2) || scalar(keys %genes) < 1;
my $abtype = 'amplification';
$abtype = 'loss' if ($cn < 2);
print OUT join("\t",$newgeneids,$chr,$start,$end,$abtype,$cn,$weight),"\n";
}
close IN;
close OUT;
}
......@@ -100,4 +100,17 @@ then
vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz
tabix platypus.vcf.gz
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz
elif [[ $algo == 'strelka2' ]]
then
module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14
mkdir manta strelka
gvcflist=''
for i in *.bam; do
gvcflist="$gvcflist --bam ${i}"
done
configManta.py $gvcflist --referenceFasta ${reffa} --exome --runDir manta
manta/runWorkflow.py -m local -j 8
configureStrelkaGermlineWorkflow.py $gvcflist --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
strelka/runWorkflow.py -m local -j 8
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz
fi
......@@ -53,7 +53,8 @@ if [ $algo == 'strelka2' ]
/cm/shared/apps/manta/1.2.0/bin/configManta.py --normalBam ${normal}.final.bam --tumorBam ${tumor}.final.bam --referenceFasta ${genome_reference} --runDir ${manta_analysisPath}
${manta_analysisPath}/runWorkflow.py -m local -j 8
/cm/shared/apps/strelka/2.8.3/bin/configureStrelkaSomaticWorkflow.py --normalBam ${normal}.final.bam --tumorBam ${tumor}.final.bam --referenceFasta ${genome_reference} --targeted --indelCandidates ${manta_analysisPath}/results/variants/candidateSmallIndels.vcf.gz --runDir ${strelka_analysisPath}
${strelka_analysisPath}/runWorkflow.py -m local -j 8
${strelka_analysisPath}/runWorkflow.py -m local -j 8
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.strelka2.vcf.gz strelka/results/variants/variants.vcf.gz
fi
if [ $algo == 'virmid' ]
......
......@@ -82,13 +82,12 @@ if [ $algo == 'strelka2' ]
then
module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14
mkdir manta strelka
configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --runDir manta
configManta.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --runDir manta
manta/runWorkflow.py -m local -j 8
configureStrelkaSomaticWorkflow.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
strelka/runWorkflow.py -m local -j 8
vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "((FILTER = 'PASS') & (GEN[*].DP >= 10))" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka.vcf.gz
vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].DP >= 10)" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka2.vcf.gz
fi
if [ $algo == 'virmid' ]
then
module load snpeff/4.3q virmid/1.2 samtools/1.6 vcftools/0.1.14
......@@ -108,7 +107,12 @@ fi
if [ $algo == 'mutect2' ]
then
module load parallel gatk/3.7 snpeff/4.3q samtools/1.6 vcftools/0.1.14
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
if [ -z ${tbed} ]
then
cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
else
awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
fi
vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.pmutect.vcf.gz
fi
......
......@@ -31,8 +31,8 @@ calllist=''
for i in *.vcf.gz; do
if [[ $i == $HS ]]
then
bedtools multiinter -i $list1 |cut -f 1,2,3 |bedtools intersect -header -v -a $i -b stdin |bgzip > hotspot.nooverlap.vcf.gz
list2="$list2 hotspot.nooverlap.vcf.gz"
bedtools multiinter -i $list1 |cut -f 1,2,3 |bedtools intersect -header -v -a $i -b stdin |bgzip > nooverlap.hotspot.vcf.gz
list2="$list2 nooverlap.hotspot.vcf.gz"
fi
done
......
......@@ -7,99 +7,102 @@ my @vcffiles = @ARGV;
open HEADER, "<$headerfile" or die $!;
open OUT, ">int.vcf" or die $!;
while (my $line = <HEADER>) {
chomp($line);
print OUT $line,"\n";;
chomp($line);
print OUT $line,"\n";;
}
close HEADER;
my @sampleorder;
my %headerlines;
foreach $vcf (@vcffiles) {
$caller = (split(/\./,$vcf))[-3];
open VCF, "gunzip -c $vcf|" or die $!;
my @sampleids;
while (my $line = <VCF>) {
chomp($line);
if ($line =~ m/#/) {
if ($line =~ m/#CHROM/) {
($chromhd, $posd,$idhd,$refhd,$althd,$scorehd,
$filterhd,$annothd,$formathd,@sampleids) = split(/\t/, $line);
foreach $j (0..$#sampleids) {
$sampleids[$j] =~ s/\.final//g;
}
unless (@sampleorder) {
@sampleorder = @sampleids;
print OUT $line,"\n";
}
next;
}
next;
$caller = (split(/\./,$vcf))[-3];
open VCF, "gunzip -c $vcf|" or die $!;
my @sampleids;
while (my $line = <VCF>) {
chomp($line);
if ($line =~ m/#/) {
if ($line =~ m/#CHROM/) {
($chromhd, $posd,$idhd,$refhd,$althd,$scorehd,
$filterhd,$annothd,$formathd,@sampleids) = split(/\t/, $line);
foreach $j (0..$#sampleids) {
$sampleids[$j] =~ s/\.final//g;
}
my ($chrom, $pos,$id,$ref,$alt,$score,
$filter,$annot,$format,@gts) = split(/\t/, $line);
my %hash = ();
foreach $a (split(/;/,$annot)) {
my ($key,$val) = split(/=/,$a);
$hash{$key} = $val;
unless (@sampleorder) {
@sampleorder = @sampleids;
print OUT $line,"\n";
}
my @deschead = split(/:/,$format);
my $newformat = 'GT:DP:AD:AO:RO';
my %newgts;
my $missingGT = 0;
FG:foreach my $i (0..$#gts) {
my $allele_info = $gts[$i];
my $sid = $sampleids[$i];
my @gtinfo = split(/:/,$allele_info);
my %gtdata;
if ($allele_info eq '.') {
$newgts{$sid} = '.:.:.:.:.';
$missingGT ++;
next FG;
}
foreach my $i (0..$#deschead) {
$gtdata{$deschead[$i]} = $gtinfo[$i];
}
if ($gtdata{GT} eq './.') {
$newgts{$sid} = '.:.:.:.:.';
$missingGT ++;
next FG;
}
if ($gtdata{AD}){
($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
$gtdata{AO} = join(",",@alts);
$gtdata{DP} = $gtdata{RO};
foreach (@alts) {
$gtdata{DP} += $_;
}
} elsif (exists $gtdata{NR} && exists $gtdata{NV}) {
$gtdata{DP} = $gtdata{NR};
$gtdata{AO} = $gtdata{NV};
$gtdata{RO} = $gtdata{DP} - $gtdata{AO};
$gtdata{AD} = join(",",$gtdata{RO},$gtdata{AO})
} elsif (exists $gtdata{AO} && exists $gtdata{RO}) {
$gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO});
$gtdata{DP} = $gtdata{RO};
foreach (split(',',$gtdata{AO})) {
$gtdata{DP} += $_;
}
}
if ($gtdata{DP} && $gtdata{DP} < 3) {
$missingGT ++;
}
$newgts{$sid} = join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
next;
}
next;
}
my ($chrom, $pos,$id,$ref,$alt,$score,
$filter,$annot,$format,@gts) = split(/\t/, $line);
if ($pos == 48303944) {
warn "Debugging\n";
}
my %hash = ();
foreach $a (split(/;/,$annot)) {
my ($key,$val) = split(/=/,$a);
$hash{$key} = $val;
}
my @deschead = split(/:/,$format);
my $newformat = 'GT:DP:AD:AO:RO';
my %newgts;
my $missingGT = 0;
FG:foreach my $i (0..$#gts) {
my $allele_info = $gts[$i];
my $sid = $sampleids[$i];
my @gtinfo = split(/:/,$allele_info);
my %gtdata;
if ($allele_info eq '.') {
$newgts{$sid} = '.:.:.:.:.';
$missingGT ++;
next FG;
}
next if ($missingGT == scalar(@gts));
my @newgts;
foreach $id (@sampleorder) {
push @newgts, $newgts{$id};
foreach my $i (0..$#deschead) {
$gtdata{$deschead[$i]} = $gtinfo[$i];
}
if ($gtdata{GT} eq './.') {
$newgts{$sid} = '.:.:.:.:.';
$missingGT ++;
next FG;
}
if ($gtdata{AD}){
($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
$gtdata{AO} = join(",",@alts);
$gtdata{DP} = $gtdata{RO};
foreach (@alts) {
$gtdata{DP} += $_;
}
} elsif (exists $gtdata{NR} && exists $gtdata{NV}) {
$gtdata{DP} = $gtdata{NR};
$gtdata{AO} = $gtdata{NV};
$gtdata{RO} = $gtdata{DP} - $gtdata{AO};
$gtdata{AD} = join(",",$gtdata{RO},$gtdata{AO})
} elsif (exists $gtdata{AO} && exists $gtdata{RO}) {
$gtdata{AD} = join(',',$gtdata{RO},$gtdata{AO});
$gtdata{DP} = $gtdata{RO};
foreach (split(',',$gtdata{AO})) {
$gtdata{DP} += $_;
}
$lines{$chrom}{$pos}{$caller} = join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}
if ($gtdata{DP} && $gtdata{DP} < 3) {
$missingGT ++;
}
$newgts{$sid} = join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
}
next if ($missingGT == scalar(@gts));
my @newgts;
foreach $id (@sampleorder) {
push @newgts, $newgts{$id};
}
close VCF;
$lines{$chrom}{$pos}{$caller} = join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}
close VCF;
}
my @callers = ('ssvar','platypus','sam','gatk','hotspot');
my @callers = ('ssvar','platypus','sam','gatk','strelka2','hotspot');
if (grep(/mutect/,@vcffiles)) {
@callers = ('sssom','pmutect','shimmer','stelka','varscan','virmid');
@callers = ('sssom','pmutect','shimmer','strelka2','varscan','virmid');
}
F1:foreach $chr (sort {$a cmp $b} keys %lines) {
F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment