diff --git a/generate_reference.sh b/generate_reference.sh index bf00d96e148d33c4226518c7a105372b0f935b90..d59dbf8451ab090e477d44fca1972a1d9549f055 100644 --- a/generate_reference.sh +++ b/generate_reference.sh @@ -2,20 +2,38 @@ #SBATCH --job-name=PrepareGenome #SBATCH --partition=super -#SBATCH --output=build_chromesizes.%j.out -#SBATCH --error=build_chromesizes.%j.err +#SBATCH --output=build_references.%j.out +#SBATCH --error=build_references.%j.err #SBATCH --mail-user=${USER}@utsouthwestern.edu #SBATCH --mail-type=ALL -#Program to create the genomes for use in our standard pipelines. Assumes that you have already downloaded the GTF and dna.toplevel.fa files +#Program to create the genomes for use in our standard pipelines. Assumes that you have already downloaded the GTF or GFF3 and dna.toplevel.fa files, both gzip compressed. These should be the only two files in the run directory at launch. #Setup universal variables DIRECTORY=`pwd -P`; THREADS=`nproc --all`; -#Create the required genome and gencode files -zcat *.dna.toplevel.fa.gz > genome.fa & -zcat *.gtf.gz > gencode.gtf & +#Checks to see if the files are already present +if [ ! -f genome.fa ]; +then if [ `ls *.dna.toplevel.fa.gz | wc -l` -lt 1 ]; +then echo -e "Error: unable to locat genome files! Please download the dna.toplevel.fa.gz file from Ensembl.org and place it in this directory!"; +exit 1; +else zcat *.dna.toplevel.fa.gz > genome.fa & +fi; +fi; + +if [ ! -f gencode.gtf ]; +then if [ `ls *.gtf.gz | wc -l` -lt 1 ]; +then if [ `ls *.gff3.gz | wc -l` -lt 1 ]; +then echo -e "Error: Unable to load a suitable gff3 or gtf gzipped file for annotations. Please download either the *.gtf.gz or *.gff3.gz file from Ensembl.org!"; +exit 2; +else module load cufflinks/2.2.1; +zcat *.gff3.gz > gencode.gff3 && gffread gencode.gff3 -T -o gencode.gtf && module rm cufflinks/2.2.1 & +fi; +else zcat *.gtf.gz > gencode.gtf & +fi; +fi; + wait; #Load and run BWA to build index @@ -42,8 +60,10 @@ samtools faidx genome.fa && cut -f1,2 genome.fa.fai > sizes.genome && module rm #Wait and build Bowtie2 Index wait; +mkdir bowtie2_index module load bowtie2/2.2.8-intel; -bowtie2-build -f genome.fa genome; +bowtie2-build -f genome.fa bowtie2_index/genome; +module rm bowtie2/2.2.8-intel; #Exit exit 0;