#!/bin/bash

#SBATCH --job-name=PrepareGenome
#SBATCH --partition=super
#SBATCH --output=build_chromesizes.%j.out
#SBATCH --error=build_chromesizes.%j.err
#SBATCH --mail-user=${USER}@utsouthwestern.edu
#SBATCH --mail-type=ALL

#Program to create the genomes for use in our standard pipelines.  Assumes that you have already downloaded the GTF and dna.toplevel.fa files

#Setup universal variables
DIRECTORY=`pwd -P`;
THREADS=`nproc --all`;

#Create the required genome and gencode files
zcat *.dna.toplevel.fa.gz > genome.fa &
zcat *.gtf.gz > gencode.gtf &
wait;

#Load and run BWA to build index
module load BWA/0.7.5;
bwa index -a bwtsw genome.fa;
module rm BWA/0.7.5;

#Run Hisat 2
module load hisat2/2.1.0-intel;
mkdir hisat_index;
hisat2-build -p ${THREADS} genome.fa hisat_index/genome;
module rm hisat2/2.1.0-intel;

#Run Star
module load star;
mkdir star_index;
STAR --runMode genomeGenerate --genomeDir star_index --genomeFastaFiles genome.fa --sjdbGTFfile gencode.gtf --runThreadN ${THREADS};
rm -rf _STARtmp;
module rm star/2.5.2b;

#Get genoome sizes
module load samtools/gcc/1.6;
samtools faidx genome.fa && cut -f1,2 genome.fa.fai > sizes.genome && module rm samtools/gcc/1.6 &

#Wait and build Bowtie2 Index
wait;
module load bowtie2/2.2.8-intel;
bowtie2-build -f genome.fa genome;

#Exit
exit 0;