Skip to content
Snippets Groups Projects
Commit a0f3cd94 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

Merge branch 'master' of git.biohpc.swmed.edu:BICF/Astrocyte/process_scripts

parents 0398b847 9c238995
Branches
Tags
No related merge requests found
......@@ -51,21 +51,18 @@ module load python/2.7.x-anaconda bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3
baseDir="`dirname \"$0\"`"
diff $fq1 $fq2 > difffile
if [[ $aligner == 'bwa' ]]
then
module load bwa/intel/0.7.17
if [ -s difffile ]
then
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} ${fq2} > out.sam
else
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa ${fq1} > out.sam
fi
elif [[ $aligner == 'hisat2' ]]
module load bwa/intel/0.7.17
if [ -s difffile ]
then
module load hisat2/2.1.0-intel
hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal -x ${index_path}/hisat_index/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt
file_opt="${fq1} ${fq2}"
else
file_opt="${fq1}"
fi
bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa $file_opt > out.sam
if [[ $umi == 'umi' ]] && [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]]
then
k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam
......@@ -78,6 +75,7 @@ then
else
samtools view -1 -o output.unsort.bam out.sam
fi
which samtools
samtools sort -n --threads $SLURM_CPUS_ON_NODE -o output.dups.bam output.unsort.bam
java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar FixMateInformation ASSUME_SORTED=TRUE SORT_ORDER=coordinate ADD_MATE_CIGAR=TRUE I=output.dups.bam O=${pair_id}.bam
......
......@@ -11,7 +11,7 @@ usage() {
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :b:p:n:f:t:c:uqh opt
while getopts :b:p:n:t:r:uqh opt
do
case $opt in
b) sbam=$OPTARG;;
......@@ -19,7 +19,6 @@ do
n) normals=$OPTARG;;
r) index_path=$OPTARG;;
t) targets=$OPTARG;;
c) capture=$OPTARG;;
u) umi='umi';;
q) idtsnp=1;;
h) usage;;
......@@ -31,7 +30,7 @@ baseDir="`dirname \"$0\"`"
if [[ -z $index_path ]]
then
index_path='/project/shared/bicf_workflow_ref/human/GRCh38'
index_path='/project/shared/bicf_workflow_ref/human/grch38_cloud/dnaref'
fi
# Check for mandatory options
if [[ -z $pair_id ]] || [[ -z $sbam ]]
......@@ -46,6 +45,14 @@ if [[ -z $normals ]] || [[ -z $targets ]]
then
usage
fi
if [[ -s "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
dict="${index_path}/genome.dict"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
echo "${targets}targets.bed"
echo "${targets}antitargets.bed"
......@@ -58,7 +65,8 @@ unset DISPLAY
if [[ $idtsnp == 1 ]]
then
samtools index ${sbam}
bcftools mpileup --threads 10 -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} -t ${index_path}/clinseq_prj/IDT_snps.120bp.bed ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf
bcftools mpileup --threads 10 --gvcf 10 -A -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 1000000 -L 1000000 -C50 -f ${reffa} ${sbam} | bcftools call --threads 10 -vmO v -o common_variants.vcf -T ${index_path}/IDT_snps.hg38.bed
$baseDir/formatVcfCNV.pl cnvkit_common common_variants.vcf
fi
cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn
......@@ -68,7 +76,7 @@ cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns
if [[ $idtsnp == 1 ]]
then
cnvkit.py call --filter cn ${pair_id}.cns -v common_variants.vcf -o ${pair_id}.call.cns
cnvkit.py call --filter cn ${pair_id}.cns -v cnvkit_common.vcf -o ${pair_id}.call.cns
else
cnvkit.py call --filter cn ${pair_id}.cns -o ${pair_id}.call.cns
fi
......
......@@ -69,13 +69,11 @@ while (my $line = <CNR>) {
$genes{$value} = 1 if $keep{$value};
}
}
}elsif ($geneids =~ m/:/) {
my ($gene,$chr,$pos) = split(/:/,$geneids);
$genes{$gene} = 1;
}else {
} else {
my @ids = split(/,/,$geneids);
foreach my $gid (@ids) {
my ($gene,$trxid,$exonnum,$strand) = split(/\|/,$gid);
next if ($gid =~ /^SNP:rs\d+$/);
my ($gene,@other) = split(/:/,$gid);
$genes{$gene} = 1 if $keep{$gene};
}
}
......@@ -101,13 +99,11 @@ while (my $line = <IN>) {
$genes{$value} = 1 if $keep{$value};
}
}
}elsif ($geneids =~ m/:/) {
my ($gene,$chr,$pos) = split(/:/,$geneids);
$genes{$gene} = 1;
}else {
} else {
my @ids = split(/,/,$geneids);
foreach my $gid (@ids) {
my ($gene,$trxid,$exonnum,$strand) = split(/\|/,$gid);
next if ($gid =~ /^SNP:rs\d+$/);
my ($gene,@other) = split(/:/,$gid);
$genes{$gene} = 1 if $keep{$gene};
}
}
......
#!/usr/bin/perl
#migrate_db.pl
my $pair_id = shift @ARGV;
my $vcf = shift @ARGV;
my $outfile = $pair_id.".vcf";
open OUT, ">$outfile" or die $!;
open VCF, "$vcf" or die $!;
while (my $line = <VCF>) {
chomp($line);
if ($line =~ m/#/) {
if ($line =~ m/#CHROM/) {
print OUT "##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observation count\">\n";
print OUT "##FORMAT=<ID=RO,Number=1,Type=Integer,Description=\"Reference allele observation count\">\n";
print OUT "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed\">\n";
print OUT "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Approximate read depth (reads with MQ=255 or with bad mates are filtered)\">\n";
}
unless ($line =~ m/FORMAT=<ID=AO/ || $line =~ m/FORMAT=<ID=AD/ || $line =~ m/FORMAT=<ID=RO/ || $line =~ m/FORMAT=<ID=DP/) {
print OUT $line,"\n";
}
next;
}
my ($chrom, $pos,$id,$ref,$alt,$score,
$filter,$annot,$format,@gts) = split(/\t/, $line);
next if ($chrom =~ m/alt/);
my %hash = ();
foreach $a (split(/;/,$annot)) {
my ($key,$val) = split(/=/,$a);
$hash{$key} = $val;
}
my @deschead = split(/:/,$format);
my $newformat = 'GT:DP:AD:AO:RO';
my @newgts = ();
my $missingGT = 0;
FG:foreach my $allele_info (@gts) {
my @gtinfo = split(/:/,$allele_info);
my %gtdata;
if ($allele_info eq '.') {
push @newgts, '.:.:.:.:.';
$missingGT ++;
next FG;
}
foreach my $i (0..$#deschead) {
$gtdata{$deschead[$i]} = $gtinfo[$i];
}
if ($alt eq '.') {
$gtdata{AO} = 0;
$gtdata{RO} = $gtdata{DP};
$gtdata{AD} = join(",", $gtdata{RO},$gtdata{AO});
} elsif ($gtdata{AD}){
($gtdata{RO},@alts) = split(/,/,$gtdata{AD});
$gtdata{AO} = join(",",@alts);
$gtdata{DP} = $gtdata{RO};
foreach (@alts) {
$gtdata{DP} += $_;
}
}
if ($gtdata{DP} && $gtdata{DP} < 5) {
$missingGT ++;
}
if ($gtdata{DP} == 0 || $gtdata{GT} eq './.') {
push @newgts, '.:.:.:.:.';
$missingGT ++;
next FG;
}
push @newgts, join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
}
next if ($missingGT == scalar(@gts));
if ($hash{END}) {
foreach $i ($pos..$hash{END}) {
print OUT join("\t",$chrom,$i,$id,'N','N',$score,$filter,$annot,$newformat,@newgts),"\n";
}
}else {
print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n";
}
}
close VCF;
......@@ -34,11 +34,11 @@ if [[ -z $SLURM_CPUS_ON_NODE ]]
then
SLURM_CPUS_ON_NODE=1
fi
if [[ -a "${index_path}/dbSnp.vcf.gz" ]]
if [[ -a "${index_path}/dbSnp.gatk4.vcf.gz" ]]
then
dbsnp="${index_path}/dbSnp.vcf.gz"
dbsnp="${index_path}/dbSnp.gatk4.vcf.gz"
else
echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
echo "Missing dbSNP File: ${index_path}/dbSnp.gatk4.vcf.gz"
usage
fi
if [[ -a "${index_path}/GoldIndels.vcf.gz" ]]
......
......@@ -76,7 +76,7 @@ done
if [[ $algo == 'mpileup' ]]
then
threads=`expr $SLURM_CPUS_ON_NODE - 10`
bcftools mpileup --threads $threads -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -Q20 -d 99999 -C50 -f ${reffa} *.bam | bcftools call --threads 10 -vmO z -o ${pair_id}.vcf.gz
bcftools mpileup --threads $threads -a 'INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR' -Ou -A -d 1000000 -C50 -f ${reffa} *.bam | bcftools call -A --threads 10 -vmO z -o ${pair_id}.vcf.gz
vcf-annotate -n --fill-type ${pair_id}.vcf.gz | bcftools norm -c s -f ${reffa} -w 10 -O v -o sam.vcf
java -jar $PICARD/picard.jar SortVcf I=sam.vcf O=${pair_id}.sam.vcf R=${reffa} CREATE_INDEX=TRUE
bgzip ${pair_id}.sam.vcf
......
......@@ -52,4 +52,4 @@ stexe=`which samtools`
samtools view -@ $SLURM_CPUS_ON_NODE -L ${itdbed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz
tabix ${pair_id}.itdseek.vcf.gz
bcftools norm --fasta-ref $reffa -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz
bcftools norm --fasta-ref $reffa -c w -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz
......@@ -13,7 +13,6 @@ OPTIND=1 # Reset OPTIND
while getopts :r:p:v:h opt
do
case $opt in
r) index_path=$OPTARG;;
p) pair_id=$OPTARG;;
v) vcf=$OPTARG;;
h) usage;;
......@@ -24,17 +23,8 @@ shift $(($OPTIND -1))
baseDir="`dirname \"$0\"`"
source /etc/profile.d/modules.sh
module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q
module load bedtools/2.26.0 samtools/gcc/1.8 bcftools/gcc/1.8 snpeff/4.3q
if [[ -a "${index_path}/genome.fa" ]]
then
reffa="${index_path}/genome.fa"
dict="${index_path}/genome.dict"
else
echo "Missing Fasta File: ${index_path}/genome.fa"
usage
fi
perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf
bgzip -f ${pair_id}.uniform.vcf
......
......@@ -119,7 +119,7 @@ then
vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
elif [ $algo == 'varscan' ]
then
module load samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14
module load bcftools/gcc/1.8 samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14
module rm java/oracle/jdk1.7.0_51
module load snpeff/4.3q
samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup
......
......@@ -136,7 +136,7 @@ then
pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf
cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz
tabix pindel.vcf.gz
bash $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz
bash $baseDir/norm_annot.sh -p pindel -v pindel.vcf.gz
perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment