Skip to content
Snippets Groups Projects

Created branch and added the automated reference generator process to the programs

Open Jonathan Gesell requested to merge 1-ReferenceGenerator into master
Compare and
1 file
+ 69
0
Preferences
File browser
Compare changes
+ 69
0
#!/bin/bash
#SBATCH --job-name=PrepareGenome
#SBATCH --partition=super
#SBATCH --output=build_references.%j.out
#SBATCH --error=build_references.%j.err
#SBATCH --mail-user=${USER}@utsouthwestern.edu
#SBATCH --mail-type=ALL
#Program to create the genomes for use in our standard pipelines. Assumes that you have already downloaded the GTF or GFF3 and dna.toplevel.fa files, both gzip compressed. These should be the only two files in the run directory at launch.
#Setup universal variables
DIRECTORY=`pwd -P`;
THREADS=`nproc --all`;
#Checks to see if the files are already present
if [ ! -f genome.fa ];
then if [ `ls *.dna.toplevel.fa.gz | wc -l` -lt 1 ];
then echo -e "Error: unable to locat genome files! Please download the dna.toplevel.fa.gz file from Ensembl.org and place it in this directory!";
exit 1;
else zcat *.dna.toplevel.fa.gz > genome.fa &
fi;
fi;
if [ ! -f gencode.gtf ];
then if [ `ls *.gtf.gz | wc -l` -lt 1 ];
then if [ `ls *.gff3.gz | wc -l` -lt 1 ];
then echo -e "Error: Unable to load a suitable gff3 or gtf gzipped file for annotations. Please download either the *.gtf.gz or *.gff3.gz file from Ensembl.org!";
exit 2;
else module load cufflinks/2.2.1;
zcat *.gff3.gz > gencode.gff3 && gffread gencode.gff3 -T -o gencode.gtf && module rm cufflinks/2.2.1 &
fi;
else zcat *.gtf.gz > gencode.gtf &
fi;
fi;
wait;
#Load and run BWA to build index
module load BWA/0.7.5;
bwa index -a bwtsw genome.fa;
module rm BWA/0.7.5;
#Run Hisat 2
module load hisat2/2.1.0-intel;
mkdir hisat_index;
hisat2-build -p ${THREADS} genome.fa hisat_index/genome;
module rm hisat2/2.1.0-intel;
#Run Star
module load star;
mkdir star_index;
STAR --runMode genomeGenerate --genomeDir star_index --genomeFastaFiles genome.fa --sjdbGTFfile gencode.gtf --runThreadN ${THREADS};
rm -rf _STARtmp;
module rm star/2.5.2b;
#Get genoome sizes
module load samtools/gcc/1.6;
samtools faidx genome.fa && cut -f1,2 genome.fa.fai > sizes.genome && module rm samtools/gcc/1.6 &
#Wait and build Bowtie2 Index
wait;
mkdir bowtie2_index
module load bowtie2/2.2.8-intel;
bowtie2-build -f genome.fa bowtie2_index/genome;
module rm bowtie2/2.2.8-intel;
#Exit
exit 0;