From 1f6c3efe48468fa1ab106f2be3af7adf8b3e4b76 Mon Sep 17 00:00:00 2001 From: s181706 <jonathan.gesell@utsouthwestern.edu> Date: Fri, 1 Feb 2019 11:10:20 -0600 Subject: [PATCH] Created branch and added the automated reference generator process to the programs --- generate_reference.sh | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 generate_reference.sh diff --git a/generate_reference.sh b/generate_reference.sh new file mode 100644 index 0000000..bf00d96 --- /dev/null +++ b/generate_reference.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +#SBATCH --job-name=PrepareGenome +#SBATCH --partition=super +#SBATCH --output=build_chromesizes.%j.out +#SBATCH --error=build_chromesizes.%j.err +#SBATCH --mail-user=${USER}@utsouthwestern.edu +#SBATCH --mail-type=ALL + +#Program to create the genomes for use in our standard pipelines. Assumes that you have already downloaded the GTF and dna.toplevel.fa files + +#Setup universal variables +DIRECTORY=`pwd -P`; +THREADS=`nproc --all`; + +#Create the required genome and gencode files +zcat *.dna.toplevel.fa.gz > genome.fa & +zcat *.gtf.gz > gencode.gtf & +wait; + +#Load and run BWA to build index +module load BWA/0.7.5; +bwa index -a bwtsw genome.fa; +module rm BWA/0.7.5; + +#Run Hisat 2 +module load hisat2/2.1.0-intel; +mkdir hisat_index; +hisat2-build -p ${THREADS} genome.fa hisat_index/genome; +module rm hisat2/2.1.0-intel; + +#Run Star +module load star; +mkdir star_index; +STAR --runMode genomeGenerate --genomeDir star_index --genomeFastaFiles genome.fa --sjdbGTFfile gencode.gtf --runThreadN ${THREADS}; +rm -rf _STARtmp; +module rm star/2.5.2b; + +#Get genoome sizes +module load samtools/gcc/1.6; +samtools faidx genome.fa && cut -f1,2 genome.fa.fai > sizes.genome && module rm samtools/gcc/1.6 & + +#Wait and build Bowtie2 Index +wait; +module load bowtie2/2.2.8-intel; +bowtie2-build -f genome.fa genome; + +#Exit +exit 0; -- GitLab