From b76082eaf3c3ab7e7f1550d3b61582227880324b Mon Sep 17 00:00:00 2001 From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu> Date: Wed, 25 Apr 2018 13:55:23 -0500 Subject: [PATCH] Downsample data for QuSaGE before converting to dataframe to handle large datasets --- bash.scripts/sc_FullAnalysis-DPrF-Lin.sh | 37 ++++++++++++++++++ bash.scripts/sc_FullAnalysis-DPrF.sh | 50 ++++++++++++++++++++++++ r.scripts/sc_QuSAGE.Lineage.R | 4 +- r.scripts/sc_QuSAGE_EpiSubClust.R | 4 +- r.scripts/sc_QuSAGE_StSubClust.R | 4 +- 5 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 bash.scripts/sc_FullAnalysis-DPrF-Lin.sh create mode 100755 bash.scripts/sc_FullAnalysis-DPrF.sh diff --git a/bash.scripts/sc_FullAnalysis-DPrF-Lin.sh b/bash.scripts/sc_FullAnalysis-DPrF-Lin.sh new file mode 100644 index 0000000..d9b089b --- /dev/null +++ b/bash.scripts/sc_FullAnalysis-DPrF-Lin.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# +# CREATED USING THE BIOHPC PORTAL on Thu Nov 30 2017 16:16:26 GMT-0600 (Central Standard Time) +# +# This file is batch script used to run commands on the BioHPC cluster. +# The script is submitted to the cluster using the SLURM `sbatch` command. +# Lines starting with # are comments, and will not be run. +# Lines starting with #SBATCH specify options for the scheduler. +# Lines that do not start with # or #SBATCH are commands that will run. + +# Name for the job that will be visible in the job queue and accounting tools. +#SBATCH --job-name R_FullAnalysis + +# Name of the SLURM partition that this job should run on. +#SBATCH -p 256GB,256GBv1 # partition (queue) +# Number of nodes required to run this job +#SBATCH -N 1 + +# Time limit for the job in the format Days-H:M:S +# A job that reaches its time limit will be cancelled. +# Specify an accurate time limit for efficient scheduling so your job runs promptly. +#SBATCH -t 7-0:0:0 + +# The standard output and errors from commands will be written to these files. +# %j in the filename will be replace with the job number when it is submitted. +#SBATCH -o job_%j.out +#SBATCH -e job_%j.err + +# Send an email when the job status changes, to the specfied address. +#SBATCH --mail-type ALL +#SBATCH --mail-user gervaise.henry@utsouthwestern.edu + +module load R/3.4.1-gccmkl + +# COMMAND GROUP 1 +Rscript ../r.scripts/sc_QuSAGE.Lineage.R --p="DPrF" +# END OF SCRIPT diff --git a/bash.scripts/sc_FullAnalysis-DPrF.sh b/bash.scripts/sc_FullAnalysis-DPrF.sh new file mode 100755 index 0000000..136c5fc --- /dev/null +++ b/bash.scripts/sc_FullAnalysis-DPrF.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# +# CREATED USING THE BIOHPC PORTAL on Thu Nov 30 2017 16:16:26 GMT-0600 (Central Standard Time) +# +# This file is batch script used to run commands on the BioHPC cluster. +# The script is submitted to the cluster using the SLURM `sbatch` command. +# Lines starting with # are comments, and will not be run. +# Lines starting with #SBATCH specify options for the scheduler. +# Lines that do not start with # or #SBATCH are commands that will run. + +# Name for the job that will be visible in the job queue and accounting tools. +#SBATCH --job-name R_FullAnalysis + +# Name of the SLURM partition that this job should run on. +#SBATCH -p 256GB,256GBv1 # partition (queue) +# Number of nodes required to run this job +#SBATCH -N 1 + +# Time limit for the job in the format Days-H:M:S +# A job that reaches its time limit will be cancelled. +# Specify an accurate time limit for efficient scheduling so your job runs promptly. +#SBATCH -t 7-0:0:0 + +# The standard output and errors from commands will be written to these files. +# %j in the filename will be replace with the job number when it is submitted. +#SBATCH -o job_%j.out +#SBATCH -e job_%j.err + +# Send an email when the job status changes, to the specfied address. +#SBATCH --mail-type ALL +#SBATCH --mail-user gervaise.henry@utsouthwestern.edu + +module load R/3.4.1-gccmkl + +# COMMAND GROUP 1 +Rscript ../r.scripts/sc_Demultiplex.R --p="DPrF" --d=7 +#Rscript ../r.scritps/sc_D-SampleReorder.R +#Rscript ../r.scripts/sc_Seurat.Score.CellCycle.R --p="DPrF" +Rscript ../r.scripts/sc_QC.R --p="DPrF" --cc=FALSE +Rscript ../r.scripts/sc_Cluster.R --p="DPrF" +Rscript ../r.scripts/sc_PC.Score.Stress.R --p="DPrF" +#Rscript ../r.scripts/sc_QuSAGE.Lineage.R --p="DPrF" +#Rscript ../r.scripts/sc_LineageSubClust.R --p="DPrF" +#Rscript ../r.scripts/sc_QuSAGE_EpiSubClust.R --p="DPrF" +#Rscript ../r.scripts/sc_QuSAGE_StSubClust.R --p="DPrF" +#Rscript ../r.scripts/sc_MergeSubClust.R --p="DPrF" +#Rscript ../r.scripts/sc_PC.Score.NE.R --p="DPrF" +#Rscript ../r.scripts/sc_DEG.R --p="DPrF" +#Rscript ../r.scripts/sc_Tables.R --p="DPrF" +# END OF SCRIPT diff --git a/r.scripts/sc_QuSAGE.Lineage.R b/r.scripts/sc_QuSAGE.Lineage.R index 9c28794..076e212 100755 --- a/r.scripts/sc_QuSAGE.Lineage.R +++ b/r.scripts/sc_QuSAGE.Lineage.R @@ -55,8 +55,8 @@ if (!is.na(opt$s)){ } else { rnd <- 1:ncol(sc10x.Group@data) } -eset <- as.data.frame(as.matrix(sc10x.Group@data)) -eset <- eset[,rnd] +eset <- sc10x.Group@data[,rnd] +eset <- as.data.frame(as.matrix(eset)) labels <- paste0("Cluster_",as.vector(factor(sc10x.Group@ident))) labels <- labels[rnd] rm(rnd) diff --git a/r.scripts/sc_QuSAGE_EpiSubClust.R b/r.scripts/sc_QuSAGE_EpiSubClust.R index 91fd7ef..be4edef 100755 --- a/r.scripts/sc_QuSAGE_EpiSubClust.R +++ b/r.scripts/sc_QuSAGE_EpiSubClust.R @@ -62,8 +62,8 @@ if (!is.na(opt$s)){ } else { rnd <- 1:ncol(sc10x.Group.Epi@data) } -eset <- as.data.frame(as.matrix(sc10x.Group.Epi@data)) -eset <- eset[,rnd] +eset <- sc10x.Group@data[,rnd] +eset <- as.data.frame(as.matrix(eset)) labels <- paste0("Cluster_",as.vector(factor(sc10x.Group.Epi@ident))) labels <- labels[rnd] rm(rnd) diff --git a/r.scripts/sc_QuSAGE_StSubClust.R b/r.scripts/sc_QuSAGE_StSubClust.R index 586f2bf..6409726 100755 --- a/r.scripts/sc_QuSAGE_StSubClust.R +++ b/r.scripts/sc_QuSAGE_StSubClust.R @@ -60,8 +60,8 @@ if (!is.na(opt$s)){ } else { rnd <- 1:ncol(sc10x.Group.St@data) } -eset <- as.data.frame(as.matrix(sc10x.Group.St@data)) -eset <- eset[,rnd] +eset <- sc10x.Group@data[,rnd] +eset <- as.data.frame(as.matrix(eset)) labels <- paste0("Cluster_",as.vector(factor(sc10x.Group.St@ident))) labels <- labels[rnd] rm(rnd) -- GitLab