From 9c0e413f7e5617a58b5205b6a3d7ba62048559d4 Mon Sep 17 00:00:00 2001
From: s181706 <jonathan.gesell@utsouthwestern.edu>
Date: Fri, 1 Feb 2019 11:48:56 -0600
Subject: [PATCH] Updated generate_reference.sh for more robust file checking.

---
 generate_reference.sh | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/generate_reference.sh b/generate_reference.sh
index bf00d96..d59dbf8 100644
--- a/generate_reference.sh
+++ b/generate_reference.sh
@@ -2,20 +2,38 @@
 
 #SBATCH --job-name=PrepareGenome
 #SBATCH --partition=super
-#SBATCH --output=build_chromesizes.%j.out
-#SBATCH --error=build_chromesizes.%j.err
+#SBATCH --output=build_references.%j.out
+#SBATCH --error=build_references.%j.err
 #SBATCH --mail-user=${USER}@utsouthwestern.edu
 #SBATCH --mail-type=ALL
 
-#Program to create the genomes for use in our standard pipelines.  Assumes that you have already downloaded the GTF and dna.toplevel.fa files
+#Program to create the genomes for use in our standard pipelines.  Assumes that you have already downloaded the GTF or GFF3 and dna.toplevel.fa files, both gzip compressed.  These should be the only two files in the run directory at launch.
 
 #Setup universal variables
 DIRECTORY=`pwd -P`;
 THREADS=`nproc --all`;
 
-#Create the required genome and gencode files
-zcat *.dna.toplevel.fa.gz > genome.fa &
-zcat *.gtf.gz > gencode.gtf &
+#Checks to see if the files are already present
+if [ ! -f genome.fa ];
+then if [ `ls *.dna.toplevel.fa.gz | wc -l` -lt 1 ];
+then echo -e "Error: unable to locat genome files!  Please download the dna.toplevel.fa.gz file from Ensembl.org and place it in this directory!";
+exit 1;
+else zcat *.dna.toplevel.fa.gz > genome.fa &
+fi;
+fi;
+
+if [ ! -f gencode.gtf ];
+then if [ `ls *.gtf.gz | wc -l` -lt 1 ];
+then if [ `ls *.gff3.gz | wc -l` -lt 1 ];
+then echo -e "Error: Unable to load a suitable gff3 or gtf gzipped file for annotations.  Please download either the *.gtf.gz or *.gff3.gz file from Ensembl.org!";
+exit 2;
+else module load cufflinks/2.2.1;
+zcat *.gff3.gz > gencode.gff3 && gffread gencode.gff3 -T -o gencode.gtf && module rm cufflinks/2.2.1 &
+fi;
+else zcat *.gtf.gz > gencode.gtf &
+fi;
+fi;
+
 wait;
 
 #Load and run BWA to build index
@@ -42,8 +60,10 @@ samtools faidx genome.fa && cut -f1,2 genome.fa.fai > sizes.genome && module rm
 
 #Wait and build Bowtie2 Index
 wait;
+mkdir bowtie2_index
 module load bowtie2/2.2.8-intel;
-bowtie2-build -f genome.fa genome;
+bowtie2-build -f genome.fa bowtie2_index/genome;
+module rm bowtie2/2.2.8-intel;
 
 #Exit
 exit 0;
-- 
GitLab