diff --git a/generate_reference.sh b/generate_reference.sh new file mode 100644 index 0000000000000000000000000000000000000000..bf00d96e148d33c4226518c7a105372b0f935b90 --- /dev/null +++ b/generate_reference.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +#SBATCH --job-name=PrepareGenome +#SBATCH --partition=super +#SBATCH --output=build_chromesizes.%j.out +#SBATCH --error=build_chromesizes.%j.err +#SBATCH --mail-user=${USER}@utsouthwestern.edu +#SBATCH --mail-type=ALL + +#Program to create the genomes for use in our standard pipelines. Assumes that you have already downloaded the GTF and dna.toplevel.fa files + +#Setup universal variables +DIRECTORY=`pwd -P`; +THREADS=`nproc --all`; + +#Create the required genome and gencode files +zcat *.dna.toplevel.fa.gz > genome.fa & +zcat *.gtf.gz > gencode.gtf & +wait; + +#Load and run BWA to build index +module load BWA/0.7.5; +bwa index -a bwtsw genome.fa; +module rm BWA/0.7.5; + +#Run Hisat 2 +module load hisat2/2.1.0-intel; +mkdir hisat_index; +hisat2-build -p ${THREADS} genome.fa hisat_index/genome; +module rm hisat2/2.1.0-intel; + +#Run Star +module load star; +mkdir star_index; +STAR --runMode genomeGenerate --genomeDir star_index --genomeFastaFiles genome.fa --sjdbGTFfile gencode.gtf --runThreadN ${THREADS}; +rm -rf _STARtmp; +module rm star/2.5.2b; + +#Get genoome sizes +module load samtools/gcc/1.6; +samtools faidx genome.fa && cut -f1,2 genome.fa.fai > sizes.genome && module rm samtools/gcc/1.6 & + +#Wait and build Bowtie2 Index +wait; +module load bowtie2/2.2.8-intel; +bowtie2-build -f genome.fa genome; + +#Exit +exit 0;