diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index c08dcbe41e98a8964ede472573c5e90c16efb73d..604cfe4f133aeb6310d6c0c8f35aff4ed3aa8b32 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -27,7 +27,7 @@ done shift $(($OPTIND -1)) # Check for mandatory options -if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then +if [[ -z $pair_id ]] || [[ -z $sbam ]]; then usage fi diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh index 320943492d353cc43025fff346980d55cacf9e34..d994edb11e68b3daa259f157e2856e6e3b761de7 100644 --- a/alignment/rnaseqalign.sh +++ b/alignment/rnaseqalign.sh @@ -15,7 +15,7 @@ OPTIND=1 # Reset OPTIND while getopts :r:a:x:y:p:h opt do case $opt in - r) refgeno=$OPTARG;; + r) index_path=$OPTARG;; x) fq1=$OPTARG;; y) fq2=$OPTARG;; a) algo=$OPTARG;; @@ -31,12 +31,6 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi -if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then - index_path=/project/shared/bicf_workflow_ref/${refgeno} -else - usage -fi - module load samtools/gcc/1.6 picard/2.10.3 if [[ -z $SLURM_CPUS_ON_NODE ]] then @@ -47,9 +41,9 @@ then if ($fq1 != $fq2) then module load star/2.4.2a - STAR --genomeDir ${index_path}/star_index/ --readFilesIn ${fq1} ${fq2} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out + STAR --genomeDir ${index_path}/star_index/ --readFilesIn $fq1 $fq2 --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out else - STAR --genomeDir ${index_path}/${star_index} --readFilesIn ${fq1} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out + STAR --genomeDir ${index_path}/star_index/ --readFilesIn $fq1 --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out fi mv outLog.final.out ${pair_id}.alignerout.txt mv outAligned.sortedByCoord.out.bam output.bam @@ -57,9 +51,9 @@ else module load hisat2/2.1.0-intel if [ $fq1 == $fq2 ] then - hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -U ${fq1} -S out.sam --summary-file ${pair_id}.alignerout.txt + hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt else - hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -1 ${fq1} -2 ${fq2} -S out.sam --summary-file ${pair_id}.alignerout.txt + hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --add-chrname --no-unal --dta -x ${index_path}/hisat_index/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt fi samtools view -1 --threads $SLURM_CPUS_ON_NODE -o output.bam out.sam fi diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index b2b0acd2f58088ad22ed367a8b3c3febb1f26d9e..cad5ba902cbcc1e992cb3f06571f3c7a820332ae 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -29,11 +29,7 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi -if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ]; then - index_path=/project/shared/bicf_workflow_ref/${refgeno}/CTAT_lib/ -else - usage -fi +index_path=${refgeno}/CTAT_lib/ module add python/2.7.x-anaconda star/2.5.2b STAR-Fusion --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --output_dir star_fusion &> star_fusion.err diff --git a/diff_exp/geneabundance.sh b/diff_exp/geneabundance.sh new file mode 100644 index 0000000000000000000000000000000000000000..5385804a67d97c8f7ec3a7f1559afa248c4d8091 --- /dev/null +++ b/diff_exp/geneabundance.sh @@ -0,0 +1,42 @@ +#!/bin/bash +#rnaseqalign.sh + +usage() { + echo "-h Help documentation for hisat.sh" + echo "-b --BAM File" + echo "-g --GTF File" + echo "-s --stranded" + echo "-p --Prefix for output file name" + echo "Example: bash geneabudance.sh genect_rnaseq/geneabundance.sh -s stranded -g gtf_file -p pair_id -b bam" + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :b:g:p:s:h opt +do + case $opt in + b) sbam=$OPTARG;; + g) gtf=$OPTARG;; + p) pair_id=$OPTARG;; + s) stranded=$OPTARG;; + h) usage;; + esac +done + +shift $(($OPTIND -1)) + +# Check for mandatory options +if [[ -z $pair_id ]] || [[ -z $sbam ]] +then + usage +fi +if [[ -z $SLURM_CPUS_ON_NODE ]] +then + SLURM_CPUS_ON_NODE=1 +fi +module load subread/1.5.0-intel stringtie/1.1.2-intel +featureCounts -s $stranded -T $SLURM_CPUS_ON_NODE -p -g gene_name -a ${gtf} -o ${pair_id}.cts ${sbam} + +mkdir ${pair_id}_stringtie +cd ${pair_id}_stringtie +stringtie ../${sbam} -p $SLURM_CPUS_ON_NODE -G ../${gtf} -B -e -o denovo.gtf -A ../${pair_id}.fpkm.txt + \ No newline at end of file diff --git a/preproc_fastq/trimgalore.sh b/preproc_fastq/trimgalore.sh index f53cfeb2b788fd68c0742f8262e9d4c8efc00567..2c27f5ac219ca4618e63309e2398b5b3e5758baf 100644 --- a/preproc_fastq/trimgalore.sh +++ b/preproc_fastq/trimgalore.sh @@ -27,11 +27,11 @@ if [[ -z $pair_id ]] || [[ -z $fq1 ]]; then usage fi -r1base="${r1%.fastq*}" -r2base="${r2%.fastq*}" +r1base="${fq1%.fastq*}" +r2base="${fq2%.fastq*}" module load trimgalore/0.4.1 cutadapt/1.9.1 -if [ $R1 == $R2 ] +if [ $fq1 == $fq2 ] then trim_galore -q 25 --illumina --gzip --length 35 ${fq1} mv ${r1base}_trimmed.fq.gz ${pair_id}.trim.R1.fastq.gz diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index ee7f07dee43770f0bcc5e78f4a1ba48aea17ecec..077ce71fc4d18407a9a225de4123d2d331aa8b28 100644 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -2,7 +2,7 @@ #gatkrunner.sh usage() { - echo "-h Help documentation for hisat.sh" + echo "-h Help documentation for gatkrunner.sh" echo "-r --Reference Genome: GRCh38 or GRCm38" echo "-b --BAM File" echo "-p --Prefix for output file name" @@ -14,7 +14,7 @@ OPTIND=1 # Reset OPTIND while getopts :r:a:b:p:h opt do case $opt in - r) refgeno=$OPTARG;; + r) index_path=$OPTARG;; b) sbam=$OPTARG;; p) pair_id=$OPTARG;; a) algo=$OPTARG;; @@ -25,13 +25,8 @@ done shift $(($OPTIND -1)) # Check for mandatory options -if [[ -z $pair_id ]] || [[ -z $bam ] || [[ -z $refgeno ]]]; then - usage -fi - -if [ $refgeno == 'GRCh38' ] || [ $refgeno == 'GRCm38' ] || [ $refgeno == 'GRCh37' ]; then - index_path=/project/shared/bicf_workflow_ref/${refgeno} -else +if [[ -z $pair_id ]] || [[ -z $sbam ]] || [[ -z $index_path ]] +then usage fi @@ -39,23 +34,47 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 fi - -dbsnp="${index_path}/Snp.vcf.gz" -knownindel="${index_path}/Snp.vcf.gz" -reffa="${index_path}/genome.fa" +if [[ -a "${index_path}/dbSnp.vcf.gz" ]] +then + dbsnp="${index_path}/dbSnp.vcf.gz" +else + echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz" + usage +fi +if [[ -a "${index_path}/GoldIndels.vcf.gz" ]] +then + knownindel="${index_path}/GoldIndels.vcf.gz" +else + echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz" + usage +fi +if [[ -a "${index_path}/genome.fa" ]] +then + reffa="${index_path}/genome.fa" +else + echo "Missing Fasta File: ${index_path}/genome.fa" + usage +fi module load gatk/3.7 samtools/1.6 +samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} -if [[ $algo == 'gatkbam' ]] +if [[ $algo == 'gatkbam_rna' ]] +then + module load picard/2.10.3 + java -Xmx4g -jar $PICARD/picard.jar CleanSam INPUT=${sbam} OUTPUT=${pair_id}.clean.bam + #java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id} + samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.clean.bam + java -Xmx4g -jar $GATK_JAR -T SplitNCigarReads -R ${reffa} -I ${pair_id}.clean.bam -o ${pair_id}.split.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS + java -Xmx32g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${pair_id}.split.bam -nt $SLURM_CPUS_ON_NODE -nct 1 + java -Xmx32g -jar $GATK_JAR -I ${pair_id}.split.bam -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam + java -Xmx32g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct $SLURM_CPUS_ON_NODE + java -Xmx32g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct $SLURM_CPUS_ON_NODE +elif [[ $algo == 'gatkbam' ]] then - samtools index ${sbam} java -Xmx32g -jar $GATK_JAR -T RealignerTargetCreator -known ${knownindel} -R ${reffa} -o ${pair_id}.bam.list -I ${sbam} -nt $SLURM_CPUS_ON_NODE -nct 1 java -Xmx32g -jar $GATK_JAR -I ${sbam} -R ${reffa} --filter_mismatching_base_and_quals -T IndelRealigner -targetIntervals ${pair_id}.bam.list -o ${pair_id}.realigned.bam java -Xmx32g -jar $GATK_JAR -l INFO -R ${reffa} --knownSites ${dbsnp} -I ${pair_id}.realigned.bam -T BaseRecalibrator -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov ContextCovariate -o ${pair_id}.recal_data.grp -nt 1 -nct $SLURM_CPUS_ON_NODE - java -Xmx32g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct 8 -elif -then - -else - + java -Xmx32g -jar $GATK_JAR -T PrintReads -R ${reffa} -I ${pair_id}.realigned.bam -BQSR ${pair_id}.recal_data.grp -o ${pair_id}.final.bam -nt 1 -nct $SLURM_CPUS_ON_NODE fi + diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh new file mode 100644 index 0000000000000000000000000000000000000000..f2094311585e6e1883157a342bece9a8f044cc4b --- /dev/null +++ b/variants/germline_vc.sh @@ -0,0 +1,95 @@ +#!/bin/bash +#germline_vc.sh + +usage() { + echo "-h Help documentation for gatkrunner.sh" + echo "-r --Reference Genome: GRCh38 or GRCm38" + echo "-p --Prefix for output file name" + echo "-a --Algorithm/Command" + echo "Example: bash hisat.sh -p prefix -r /path/GRCh38" + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :r:a:b:p:h opt +do + case $opt in + r) index_path=$OPTARG;; + p) pair_id=$OPTARG;; + a) algo=$OPTARG;; + h) usage;; + esac +done +function join_by { local IFS="$1"; shift; echo "$*"; } + +shift $(($OPTIND -1)) + +# Check for mandatory options +if [[ -z $pair_id ]] || [[ -z $index_path ]]; then + usage +fi +if [[ -z $SLURM_CPUS_ON_NODE ]] +then + SLURM_CPUS_ON_NODE=1 +fi +if [[ -a "${index_path}/dbSnp.vcf.gz" ]] +then + dbsnp="${index_path}/dbSnp.vcf.gz" +else + echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz" + usage +fi +if [[ -a "${index_path}/GoldIndels.vcf.gz" ]] +then + knownindel="${index_path}/GoldIndels.vcf.gz" +else + echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz" + usage +fi +if [[ -a "${index_path}/genome.fa" ]] +then + reffa="${index_path}/genome.fa" +else + echo "Missing Fasta File: ${index_path}/genome.fa" + usage +fi +module load python/2.7.x-anaconda samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 + +for i in *.bam; do + samtools index -@ $SLURM_CPUS_ON_NODE $i +done + +if [[ $algo == 'mpileup' ]] +then + samtools mpileup -t 'AD,DP,INFO/AD' -ug -Q20 -C50 -f ${reffa} *.bam | bcftools call -vmO z -o ${pair_id}.sam.ori.vcf.gz + vcf-sort ${pair_id}.sam.ori.vcf.gz | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.sam.vcf.gz - +elif [[ $algo == 'hotspot' ]] +then + samtools mpileup -d 99999 -t 'AD,DP,INFO/AD' -uf ${reffa} *.bam > ${pair_id}.mpi + bcftools filter -i "AD[1]/DP > 0.01" ${pair_id}.mpi | bcftools filter -i "DP > 50" | bcftools call -m -A |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.lowfreq.vcf.gz - + java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "(CNT[*] >0)" - |bgzip > ${pair_id}.hotspot.vcf.gz +elif [[ $algo == 'speedseq' ]] +then + module load speedseq/20160506 + speedseq var -t $SLURM_CPUS_ON_NODE -o ssvar ${reffa} *.bam + vcf-annotate -n --fill-type ssvar.vcf.gz| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.ssvar.vcf.gz - +elif [[ $algo == 'gatk' ]] +then + module load gatk/3.7 + $gvcflist = '' + for i in *.bam; do + java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T HaplotypeCaller -stand_call_conf 30 -stand_emit_conf 10.0 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -variant_index_type LINEAR -variant_index_parameter 128000 --emitRefConfidence GVCF -I $i -o ${i}.gatk.g.vcf -nct 2 & + $gvcflist += "--variant ${i}.gatk.g.vcf " + done + wait + java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T GenotypeGVCFs -o gatk.vcf -nt $SLURM_CPUS_ON_NODE $gvcflist + vcf-annotate -n --fill-type gatk.vcf | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.gatk.vcf.gz - + tabix ${pair_id}.gatk.vcf.gz +elif [[ $algo == 'platypus' ]] +then + module load platypus/gcc/0.8.1 + $bamlist = join_by , *.bam + Platypus.py callVariants --minMapQual=10 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf + vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz + tabix platypus.vcf.gz + bcftools norm -c s -f ${reffa} -w 10 -O z -o ${i}.pl.vcf.gz platypus.vcf.gz +fi diff --git a/variants/union.sh b/variants/union.sh new file mode 100644 index 0000000000000000000000000000000000000000..640ddfc8162e71e70e161ba0a8fd79b491d8d394 --- /dev/null +++ b/variants/union.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#union.sh + +usage() { + echo "-h Help documentation for gatkrunner.sh" + echo "-r --Reference Genome: GRCh38 or GRCm38" + echo "-p --Prefix for output file name" + echo "-a --Algorithm/Command" + echo "Example: bash hisat.sh -p prefix -r /path/GRCh38" + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :r:p:h opt +do + case $opt in + r) index_path=$OPTARG;; + p) pair_id=$OPTARG;; + h) usage;; + esac +done +function join_by { local IFS="$1"; shift; echo "$*"; } +shift $(($OPTIND -1)) + +module load gatk/3.7 python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.3q samtools/1.6 vcftools/0.1.14 + +HS=*.hotspot.vcf.gz +list1=`ls *vcf.gz|grep -v hotspot` +list2=`ls *vcf.gz|grep -v hotspot` +calllist='' +for i in *.vcf.gz; do + EXT="${i#*.}" + CALL="${EXT%%.*}" + calllist+=" $CALL" + tabix $i + if [[ $i eq $HS ]] + then + bedtools multiinter -i $list1 |cut -f 1,2,3 |bedtools intersect -header -v -a $i -b stdin |bgzip > hotspot.nooverlap.vcf.gz + tabix hotspot.nooverlap.vcf.gz + list2+=" hotspot.nooverlap.vcf.gz" + varlist+=" --variant:$CALL hotspot.nooverlap.vcf.gz" + else + varlist+=" --variant:$CALL $i" + fi +done + +bedtools multiinter -i $list2 -names $calllist | cut -f 1,2,3,5 | bedtools sort -i stdin | bedtools merge -c 4 -o distinct > ${pair_id}_integrate.bed + +priority='ssvar' +if [[ *.platypus.vcf.gz ]] +then + priority+=',platypus' +fi +priority+=',sam,gatk' +if [[ *.hotspot.vcf.gz ]] +then + priority+=',hotspot' +fi + +java -Xmx32g -jar $GATK_JAR -R ${index_path}/genome.fa -T CombineVariants --filteredrecordsmergetype KEEP_UNCONDITIONAL $varlist -genotypeMergeOptions PRIORITIZE -priority $priority -o ${pair_id}.int.vcf + +perl $baseDir/scripts/uniform_integrated_vcf.pl ${pair_id}.int.vcf +bgzip ${pair_id}_integrate.bed +tabix ${pair_id}_integrate.bed.gz +bgzip ${pair_id}.uniform.vcf +tabix ${pair_id}.uniform.vcf.gz +bcftools annotate -a ${pair_id}_integrate.bed.gz --columns CHROM,FROM,TO,CallSet -h ${index_path}/CallSet.header ${pair_id}.uniform.vcf.gz | bgzip > ${pair_id}.union.vcf.gz