diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl index aba4e7247d541f7d8e2287ac9c9913652ae7af6f..139989f1033bf3fcb0b68d041802347da5914cd4 100755 --- a/alignment/filter_genefusions.pl +++ b/alignment/filter_genefusions.pl @@ -50,7 +50,7 @@ foreach $efile (@exonfiles) { open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!; -print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand", +print OAS join("\t","FusionName","LeftGene","LeftBreakpoint","LeftGeneExons","LeftStrand", "RightGene","RightBreakpoint","RightGeneExons","RightStrand", "RNAReads","DNAReads","FusionType","Annot",'Filter','ChrType','ChrDistance'),"\n"; diff --git a/alignment/viralalign.sh b/alignment/viralalign.sh new file mode 100644 index 0000000000000000000000000000000000000000..4e72b3122c137e82f0048ec0c0bf07cdf81ade0c --- /dev/null +++ b/alignment/viralalign.sh @@ -0,0 +1,48 @@ +#!/bin/bash +#abra.sh + +usage(){ + echo "-h Help documentation for gatk4runner.sh" + echo "-b --BAM File" + echo "-r --Reference path e.g. GRCh38" + echo "-p --Sample/Project ID" +} + +OPTIND=1 #Reset OPTIN +while getopts :b:r:p:h opt +do + case $opt in + b) bam=$OPTARG;; + r) ref=$OPTARG;; + p) pairid=$OPTARG;; + h) usage;; + esac +done + +shift $(($OPTIND -1)) + +#Check for mandatory options +if [[ -z $bam ]] || [[ -z $pairid ]] +then + usage +fi + +if [[ -z $SLURM_CPUS_ON_NODE ]] +then + SLURM_CPUS_ON_NODE=1 +fi + +reffa=${ref}/idt_virus_reference.fa + +module load bwa/intel/0.7.17 picard/2.10.3 samtools/1.6 + +samtools view -@ 8 -b -u -F 2 ${bam} |samtools sort -n - >unmapped.bam +java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar SamToFastq I=unmapped.bam FASTQ=unmapped.R1.fastq SECOND_END_FASTQ=unmapped.R2.fastq UNPAIRED_FASTQ=unmapped.unpaired.fastq + +bwa mem -M -t 4 -R "@RG\tID:${pairid}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pairid}" ${reffa} unmapped.R1.fastq unmapped.R2.fastq > out.sam + +samtools view -h -F 256 -b out.sam -o out.bam +samtools sort out.bam -o ${pairid}.viral.bam +samtools index ${pairid}.viral.bam +samtools idxstats ${pairid}.viral.bam >${pairid}.viral.idxstats.txt +samtools flagstat ${pairid}.viral.bam >${pairid}.viral.flagstat.txt diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index c039906dcd21701313d13633bb7eeec54243e925..bb9ea279be9b84e3466415fa82f1cb7e00feacc1 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -83,24 +83,31 @@ echo "${targets}targets.bed" echo "${targets}antitargets.bed" echo "${normals}" +numsnps=`grep -c -P "rs[0-9]+" ${targets}targets.bed` +panelsize=`awk '{ sum+=$3-$2} END {print sum}' ${targets}targets.bed` + unset DISPLAY cnvkit.py coverage ${sbam} ${targets}targets.bed -o ${pair_id}.targetcoverage.cnn cnvkit.py coverage ${sbam} ${targets}antitargets.bed -o ${pair_id}.antitargetcoverage.cnn cnvkit.py fix ${pair_id}.targetcoverage.cnn ${pair_id}.antitargetcoverage.cnn ${normals} -o ${pair_id}.cnr -cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns - -numsnps=`grep -c -P "rs[0-9]+" ${targets}targets.bed` +if [[ $panelsize -gt 4000000 ]] +then + cnvkit.py segment ${pair_id}.cnr -o ${pair_id}.cns +else + cnvkit.py segment -m haar ${pair_id}.cnr -o ${pair_id}.cns +fi -if [[ $numsnps > 100 ]] +if [[ $numsnps -gt 100 ]] then - samtools index ${sbam} - java -jar /cm/shared/apps/gatk/3.8/target/package/GenomeAnalysisTK.jar -T UnifiedGenotyper -R ${reffa} --output_mode EMIT_ALL_SITES -L ${index_path}/IDT_snps.hg38.bed -o common_variants.vcf -glm BOTH -dcov 10000 -I ${sbam} + #samtools index ${sbam} + #java -jar /cm/shared/apps/gatk/3.8/target/package/GenomeAnalysisTK.jar -T UnifiedGenotyper -R ${reffa} --output_mode EMIT_ALL_SITES -L ${index_path}/IDT_snps.hg38.bed -o common_variants.vcf -glm BOTH -dcov 10000 -I ${sbam} + bcftools mpileup -A -d 1000000 -C50 -Ou --gvcf 0 -f ${reffa} -a INFO/AD,INFO/ADF,INFO/ADR,FORMAT/DP,FORMAT/SP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR -T ${index_path}/IDT_snps.hg38.bed ${sbam} | bcftools call -m --gvcf 0 -Ov | bcftools convert --gvcf2vcf -f ${reffa} -Ov -o common_variants.vcf $baseDir/formatVcfCNV.pl cnvkit_common common_variants.vcf echo -e "CHROM\tPOS\tAO\tRO\tDP\tMAF" > ${pair_id}.ballelefreq.txt java -jar $SNPEFF_HOME/SnpSift.jar extractFields cnvkit_common.vcf CHROM POS GEN[0].AO GEN[0].RO GEN[0].DP |grep -v CHROM | awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$3/$5}' >> ${pair_id}.ballelefreq.txt - cnvkit.py call --filter cn ${pair_id}.cns -v cnvkit_common.vcf -o ${pair_id}.call.cns + cnvkit.py call --filter cn ${pair_id}.cns -v cnvkit_common.vcf -o ${pair_id}.call.cns cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf -v cnvkit_common.vcf else diff --git a/variants/filter_delly.pl b/variants/filter_delly.pl index 8cb2ec921fa8a7ae7f95db49146c060b50bd16d1..859a7ba876f1a5a399c56464b2026ebd0c0bb261 100755 --- a/variants/filter_delly.pl +++ b/variants/filter_delly.pl @@ -4,7 +4,7 @@ #module load vcftools/0.1.14 samtools/1.6 bedtools/2.26.0 use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); my %opt = (); -my $results = GetOptions (\%opt,'in|i=s','pid|p=s','tumor|t=s'); +my $results = GetOptions (\%opt,'in|i=s','pid|p=s','tumor|t=s','normal|n'); open VCFOUT, ">$opt{pid}\.delly.vcf" or die $!; open DELFUS, ">$opt{pid}\.delly.potentialfusion.txt" or die $!; @@ -27,6 +27,12 @@ W1:while (my $line = <IN>) { $opt{tumor} = $gtheader[0]; } } + unless ($opt{normal}) { + if (grep(/N_DNA/,@gtheader)) { + my @tsamps = grep(/N_DNA/,@gtheader); + $opt{tumor} = $tsamps[0]; + } + } } print VCFOUT $line,"\n"; next; @@ -34,6 +40,7 @@ W1:while (my $line = <IN>) { my ($chrom, $pos,$id,$ref,$alt,$score, $filter,$annot,$format,@gts) = split(/\t/, $line); next if ($ref =~ m/\./ || $alt =~ m/\./ || $alt=~ m/,X/); + next unless ($chrom =~ m/^chr\d+$/ || $chrom eq 'chrX' || $chrom eq 'chrY'); my %hash = (); foreach $a (split(/;/,$annot)) { my ($key,$val) = split(/=/,$a); diff --git a/variants/filter_svaba.pl b/variants/filter_svaba.pl index 19565c226b71800dfccf58d3db2cfb85db931918..e9625704e8fae361f70d8c1218f254c6ba5fffe6 100755 --- a/variants/filter_svaba.pl +++ b/variants/filter_svaba.pl @@ -34,6 +34,7 @@ W1:while (my $line = <IN>) { my ($chrom, $pos,$id,$ref,$alt,$score, $filter,$annot,$format,@gts) = split(/\t/, $line); next if ($ref =~ m/\./ || $alt =~ m/\./ || $alt=~ m/,X/); + next unless ($chrom =~ m/^chr\d+$/ || $chrom eq 'chrX' || $chrom eq 'chrY'); my %hash = (); foreach $a (split(/;/,$annot)) { my ($key,$val) = split(/=/,$a); diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 9278d5d5064b3a104105bbe6c3353356c8c6f9c4..0c3b97d85cd3c35dc2a7c739e54aef0d8124755e 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -185,9 +185,10 @@ then elif [[ $method == 'itdseek' ]] then stexe=`which samtools` - samtools view -@ $NPROC -L ${bed} ${sbam} | itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz + samtools view -@ $NPROC -L ${bed} ${sbam} | itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${bed} -a stdin | java -Xmx30g -jar $SNPEFF_HOME/SnpSift.jar filter "( LEN < 10000 )" | bgzip > ${pair_id}.itdseek.vcf.gz tabix ${pair_id}.itdseek.vcf.gz + bcftools norm --fasta-ref $reffa -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz if [[ $filter == 1 ]] then