From b76082eaf3c3ab7e7f1550d3b61582227880324b Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Wed, 25 Apr 2018 13:55:23 -0500
Subject: [PATCH] Downsample data for QuSaGE before converting to dataframe to
 handle large datasets

---
 bash.scripts/sc_FullAnalysis-DPrF-Lin.sh | 37 ++++++++++++++++++
 bash.scripts/sc_FullAnalysis-DPrF.sh     | 50 ++++++++++++++++++++++++
 r.scripts/sc_QuSAGE.Lineage.R            |  4 +-
 r.scripts/sc_QuSAGE_EpiSubClust.R        |  4 +-
 r.scripts/sc_QuSAGE_StSubClust.R         |  4 +-
 5 files changed, 93 insertions(+), 6 deletions(-)
 create mode 100644 bash.scripts/sc_FullAnalysis-DPrF-Lin.sh
 create mode 100755 bash.scripts/sc_FullAnalysis-DPrF.sh

diff --git a/bash.scripts/sc_FullAnalysis-DPrF-Lin.sh b/bash.scripts/sc_FullAnalysis-DPrF-Lin.sh
new file mode 100644
index 0000000..d9b089b
--- /dev/null
+++ b/bash.scripts/sc_FullAnalysis-DPrF-Lin.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# CREATED USING THE BIOHPC PORTAL on Thu Nov 30 2017 16:16:26 GMT-0600 (Central Standard Time)
+#
+# This file is batch script used to run commands on the BioHPC cluster.
+# The script is submitted to the cluster using the SLURM `sbatch` command.
+# Lines starting with # are comments, and will not be run.
+# Lines starting with #SBATCH specify options for the scheduler.
+# Lines that do not start with # or #SBATCH are commands that will run.
+
+# Name for the job that will be visible in the job queue and accounting tools.
+#SBATCH --job-name R_FullAnalysis
+
+# Name of the SLURM partition that this job should run on.
+#SBATCH -p 256GB,256GBv1       # partition (queue)
+# Number of nodes required to run this job
+#SBATCH -N 1
+
+# Time limit for the job in the format Days-H:M:S
+# A job that reaches its time limit will be cancelled.
+# Specify an accurate time limit for efficient scheduling so your job runs promptly.
+#SBATCH -t 7-0:0:0
+
+# The standard output and errors from commands will be written to these files.
+# %j in the filename will be replace with the job number when it is submitted.
+#SBATCH -o job_%j.out
+#SBATCH -e job_%j.err
+
+# Send an email when the job status changes, to the specfied address.
+#SBATCH --mail-type ALL
+#SBATCH --mail-user gervaise.henry@utsouthwestern.edu
+
+module load R/3.4.1-gccmkl
+
+# COMMAND GROUP 1
+Rscript ../r.scripts/sc_QuSAGE.Lineage.R --p="DPrF"
+# END OF SCRIPT
diff --git a/bash.scripts/sc_FullAnalysis-DPrF.sh b/bash.scripts/sc_FullAnalysis-DPrF.sh
new file mode 100755
index 0000000..136c5fc
--- /dev/null
+++ b/bash.scripts/sc_FullAnalysis-DPrF.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# CREATED USING THE BIOHPC PORTAL on Thu Nov 30 2017 16:16:26 GMT-0600 (Central Standard Time)
+#
+# This file is batch script used to run commands on the BioHPC cluster.
+# The script is submitted to the cluster using the SLURM `sbatch` command.
+# Lines starting with # are comments, and will not be run.
+# Lines starting with #SBATCH specify options for the scheduler.
+# Lines that do not start with # or #SBATCH are commands that will run.
+
+# Name for the job that will be visible in the job queue and accounting tools.
+#SBATCH --job-name R_FullAnalysis
+
+# Name of the SLURM partition that this job should run on.
+#SBATCH -p 256GB,256GBv1       # partition (queue)
+# Number of nodes required to run this job
+#SBATCH -N 1
+
+# Time limit for the job in the format Days-H:M:S
+# A job that reaches its time limit will be cancelled.
+# Specify an accurate time limit for efficient scheduling so your job runs promptly.
+#SBATCH -t 7-0:0:0
+
+# The standard output and errors from commands will be written to these files.
+# %j in the filename will be replace with the job number when it is submitted.
+#SBATCH -o job_%j.out
+#SBATCH -e job_%j.err
+
+# Send an email when the job status changes, to the specfied address.
+#SBATCH --mail-type ALL
+#SBATCH --mail-user gervaise.henry@utsouthwestern.edu
+
+module load R/3.4.1-gccmkl
+
+# COMMAND GROUP 1
+Rscript ../r.scripts/sc_Demultiplex.R --p="DPrF" --d=7
+#Rscript ../r.scritps/sc_D-SampleReorder.R
+#Rscript ../r.scripts/sc_Seurat.Score.CellCycle.R --p="DPrF"
+Rscript ../r.scripts/sc_QC.R --p="DPrF" --cc=FALSE
+Rscript ../r.scripts/sc_Cluster.R --p="DPrF"
+Rscript ../r.scripts/sc_PC.Score.Stress.R --p="DPrF"
+#Rscript ../r.scripts/sc_QuSAGE.Lineage.R --p="DPrF"
+#Rscript ../r.scripts/sc_LineageSubClust.R --p="DPrF"
+#Rscript ../r.scripts/sc_QuSAGE_EpiSubClust.R --p="DPrF"
+#Rscript ../r.scripts/sc_QuSAGE_StSubClust.R --p="DPrF"
+#Rscript ../r.scripts/sc_MergeSubClust.R --p="DPrF"
+#Rscript ../r.scripts/sc_PC.Score.NE.R --p="DPrF"
+#Rscript ../r.scripts/sc_DEG.R --p="DPrF"
+#Rscript ../r.scripts/sc_Tables.R --p="DPrF"
+# END OF SCRIPT
diff --git a/r.scripts/sc_QuSAGE.Lineage.R b/r.scripts/sc_QuSAGE.Lineage.R
index 9c28794..076e212 100755
--- a/r.scripts/sc_QuSAGE.Lineage.R
+++ b/r.scripts/sc_QuSAGE.Lineage.R
@@ -55,8 +55,8 @@ if (!is.na(opt$s)){
 } else {
   rnd <- 1:ncol(sc10x.Group@data)
 }
-eset <- as.data.frame(as.matrix(sc10x.Group@data))
-eset <- eset[,rnd]
+eset <- sc10x.Group@data[,rnd]
+eset <- as.data.frame(as.matrix(eset))
 labels <- paste0("Cluster_",as.vector(factor(sc10x.Group@ident)))
 labels <- labels[rnd]
 rm(rnd)
diff --git a/r.scripts/sc_QuSAGE_EpiSubClust.R b/r.scripts/sc_QuSAGE_EpiSubClust.R
index 91fd7ef..be4edef 100755
--- a/r.scripts/sc_QuSAGE_EpiSubClust.R
+++ b/r.scripts/sc_QuSAGE_EpiSubClust.R
@@ -62,8 +62,8 @@ if (!is.na(opt$s)){
 } else {
   rnd <- 1:ncol(sc10x.Group.Epi@data)
 }
-eset <- as.data.frame(as.matrix(sc10x.Group.Epi@data))
-eset <- eset[,rnd]
+eset <- sc10x.Group@data[,rnd]
+eset <- as.data.frame(as.matrix(eset))
 labels <- paste0("Cluster_",as.vector(factor(sc10x.Group.Epi@ident)))
 labels <- labels[rnd]
 rm(rnd)
diff --git a/r.scripts/sc_QuSAGE_StSubClust.R b/r.scripts/sc_QuSAGE_StSubClust.R
index 586f2bf..6409726 100755
--- a/r.scripts/sc_QuSAGE_StSubClust.R
+++ b/r.scripts/sc_QuSAGE_StSubClust.R
@@ -60,8 +60,8 @@ if (!is.na(opt$s)){
 } else {
   rnd <- 1:ncol(sc10x.Group.St@data)
 }
-eset <- as.data.frame(as.matrix(sc10x.Group.St@data))
-eset <- eset[,rnd]
+eset <- sc10x.Group@data[,rnd]
+eset <- as.data.frame(as.matrix(eset))
 labels <- paste0("Cluster_",as.vector(factor(sc10x.Group.St@ident)))
 labels <- labels[rnd]
 rm(rnd)
-- 
GitLab