Skip to content
Snippets Groups Projects
Commit 6e3b1de3 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

First pass a keeping track of analysis for PITTED.

parents
Branches
No related merge requests found
ChIP.Rmd 0 → 100644
Analysis of ChIP-seq Enhancer Prediction
=====================================
## Setup and Imports
```{r init}
source("http://bioconductor.org/biocLite.R")
biocLite("GenomicFeatures")
biocLite("org.Hs.eg.db")
library(groHMM)
library(org.Hs.eg.db)
library(GenomicAlignments)
library(GenomicFeatures)
```
## Alignments
```{r alignments}
ES_D10_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D10/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145823.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D10_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D10/H3K27ac/remove-duplicates.sh-1.1.0/SRR2130154.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D2_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D2/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145802.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D2_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D2/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145803.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D0_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D0/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145796.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D0_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D0/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145795.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D7_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D7/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145817.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D7_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D7/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145816.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D5_R1 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D5/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145810.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
ES_D5_R2 <- as(readGAlignments("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D5/H3K27ac/remove-duplicates.sh-1.1.0/SRR1145809.fastq.gz.sorted.bam.filtered.no_dups.bam"), "GRanges")
# Combine replicates
ES_D10 <- c(ES_D10_R1, ES_D10_R2)
ES_D2 <- c(ES_D2_R1, ES_D2_R2)
ES_D0 <- c(ES_D0_R1, ES_D0_R1)
ES_D7 <- c(ES_D7_R1, ES_D7_R2)
ES_D5 <- c(ES_D5_R1, ES_D5_R2)
# Library
library_ES_D10 <- NROW(ES_D10)
library_ES_D2 <- NROW(ES_D2)
library_ES_D0 <- NROW(ES_D0)
library_ES_D7 <- NROW(ES_D7)
library_ES_D5 <- NROW(ES_D5)
```
## Read in bed files
```{r alignments}
ES_D0_true <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D0_true.bed", format = "BED")
ES_D10_true <-import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D10_true.bed", format = "BED")
ES_D2_true <-import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D2_true.bed", format = "BED")
ES_D5_true <-import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D5_true.bed", format = "BED")
ES_D7_true <-import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D7_true.bed", format = "BED")
ES_D0_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D0_false.bed", format = "BED")
ES_D10_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D10_false.bed", format = "BED")
ES_D2_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D2_false.bed", format = "BED")
ES_D5_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D5_false.bed", format = "BED")
ES_D7_false <- import("/Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D7_false.bed", format = "BED")
```
## RPKM = numReads / ( geneLength/1000 * totalNumReads/1,000,000 )
```{r RPKM}
# True
rpkm_ES_D10_true <- countOverlaps(ES_D10_true,ES_D10)/ (width(ES_D10_true)/1000 * library_ES_D10/1000000)
rpkm_ES_D0_true <- countOverlaps(ES_D0_true,ES_D0)/ (width(ES_D0_true)/1000 * library_ES_D0/1000000)
rpkm_ES_D2_true <- countOverlaps(ES_D2_true,ES_D2)/ (width(ES_D2_true)/1000 * library_ES_D2/1000000)
rpkm_ES_D5_true <- countOverlaps(ES_D5_true,ES_D5)/ (width(ES_D5_true)/1000 * library_ES_D5/1000000)
rpkm_ES_D7_true <- countOverlaps(ES_D7_true,ES_D7)/ (width(ES_D7_true)/1000 * library_ES_D7/1000000)
# FALSE
rpkm_ES_D10_false <- countOverlaps(ES_D10_false,ES_D10)/ (width(ES_D10_false)/1000 * library_ES_D10/1000000)
rpkm_ES_D0_false <- countOverlaps(ES_D0_false,ES_D0)/ (width(ES_D0_false)/1000 * library_ES_D0/1000000)
rpkm_ES_D2_false <- countOverlaps(ES_D2_false,ES_D2)/ (width(ES_D2_false)/1000 * library_ES_D2/1000000)
rpkm_ES_D5_false <- countOverlaps(ES_D5_false,ES_D5)/ (width(ES_D5_false)/1000 * library_ES_D5/1000000)
rpkm_ES_D7_false <- countOverlaps(ES_D7_false,ES_D7)/ (width(ES_D7_false)/1000 * library_ES_D7/1000000)
# Combine and plot
rpkm_true <- c(rpkm_ES_D10_true,rpkm_ES_D0_true,rpkm_ES_D2_true,rpkm_ES_D5_true,rpkm_ES_D7_true)
rpkm_false <- c(rpkm_ES_D10_false,rpkm_ES_D0_false,rpkm_ES_D2_false,rpkm_ES_D5_false,rpkm_ES_D7_false)
hist(rpkm_true, col="red")
hist(rpkm_false, add=T, col=rgb(0, 1, 0, 0.5) )
summary(rpkm_true)
summary(rpkm_false)
# Leads to an RPKM cutoff of 1
```
Source Name Comment[ENA_SAMPLE] Material Type Characteristics [organism] Characteristics [developmental stage] Characteristics [cell line] Characteristics [cell type] Protocol REF ParameterValue [timepoint] Protocol REF ParameterValue [CD antibody] Protocol REF ParameterValue [histone antibody] Protocol REF Extract Name Material Type Comment[LIBRARY_LAYOUT] Comment[LIBRARY_SOURCE] Comment[LIBRARY_STRATEGY] Comment[LIBRARY_SELECTION] Comment[quality_scoring_system] Comment[quality_encoding] Comment[ascii_offset] Protocol REF Performer ParameterValue [actual_read_length] ParameterValue [read_type] Assay Name Technology Type Comment[ENA_EXPERIMENT] Scan Name Comment[SUBMITTED_FILE_NAME] Comment[ENA_RUN] Comment[FASTQ_URI] Comment[SPOT_LENGTH] Protocol REF ParameterValue [version] ParameterValue [reference_genome] Protocol REF ParameterValue [peak_size] ParameterValue [minimum_distance_between_peaks] Derived Array Data File Comment [Derived ArrayExpress FTP file] FactorValue [TIMEPOINT] FactorValue [CD antibody] FactorValue [histone antibody] FactorValue [CELL_TYPE] FactorValue [CELL PROPERTY] DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 2 P-MTAB-30018 H3K4me3 Millipore 04-745 Lot#: NG1643014 P-MTAB-30020 1050 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 40 single end FGC0119_s_7 sequencing assay ERX182699 FGC0119_s_7.fastq.gz FGC0119_s_7.fastq.gz ERR208014 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208014/ERR208014.fastq.gz 40 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 150 500 msrx_119_7_rd_150_500.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 2 n/a H3K4me3 Millipore 04-745 Lot#: NG1643014 embryonic stem cell embryonic stem cell directed to definitive endoderm DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 2 P-MTAB-30018 H3K27me3 Millipore 07-449 Lot#: DAM1588246 P-MTAB-30020 1055 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 40 single end FGC0119_s_8 sequencing assay ERX182674 FGC0119_s_8.fastq.gz FGC0119_s_8.fastq.gz ERR208022 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208022/ERR208022.fastq.gz 40 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 500 1000 msrx_119_8_rd_500_1000.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 2 n/a H3K27me3 Millipore 07-449 Lot#: DAM1588246 embryonic stem cell embryonic stem cell directed to definitive endoderm DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 2 P-MTAB-30018 P-MTAB-30020 1060 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0120_s_4 sequencing assay ERX182697 FGC0120_s_4.fastq.gz FGC0120_s_4.fastq.gz ERR208012 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208012/ERR208012.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 day 2 n/a input embryonic stem cell embryonic stem cell directed to definitive endoderm DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 0 P-MTAB-30018 P-MTAB-30020 1059 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0121_s_1 sequencing assay ERX182671 FGC0121_s_1.fastq.gz FGC0121_s_1.fastq.gz ERR208001 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208001/ERR208001.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 day 0 n/a input embryonic stem cell n/a DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 0 P-MTAB-30018 H3K4me3 Millipore 04-745 Lot#: NG1643014 P-MTAB-30020 1049 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0121_s_2 sequencing assay ERX182679 FGC0121_s_2.fastq.gz FGC0121_s_2.fastq.gz ERR208008 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208008/ERR208008.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 150 500 msrx_121_2_rd_150_500.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 0 n/a H3K4me3 Millipore 04-745 Lot#: NG1643014 embryonic stem cell n/a DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 5 P-MTAB-30018 H3K4me3 Millipore 04-745 Lot#: NG1643014 P-MTAB-30020 1051 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0121_s_3 sequencing assay ERX182667 FGC0121_s_3.fastq.gz FGC0121_s_3.fastq.gz ERR207998 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207998/ERR207998.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 150 500 msrx_121_3_rd_150_500.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 5 n/a H3K4me3 Millipore 04-745 Lot#: NG1643014 embryonic stem cell embryonic stem cell directed to primitive gut tube DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 7 P-MTAB-30018 H3K4me3 Millipore 04-745 Lot#: NG1643014 P-MTAB-30020 1052 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0121_s_4 sequencing assay ERX182662 FGC0121_s_4.fastq.gz FGC0121_s_4.fastq.gz ERR207987 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207987/ERR207987.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 150 500 msrx_121_4_rd_150_500.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 7 n/a H3K4me3 Millipore 04-745 Lot#: NG1643014 embryonic stem cell embryonic stem cell directed to posterior foregut DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 0 P-MTAB-30018 H3K27me3 Millipore 07-449 Lot#: DAM1588246 P-MTAB-30020 1054 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0121_s_5 sequencing assay ERX182653 FGC0121_s_5.fastq.gz FGC0121_s_5.fastq.gz ERR208019 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208019/ERR208019.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 500 1000 msrx_121_5_rd_500_1000.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 0 n/a H3K27me3 Millipore 07-449 Lot#: DAM1588246 embryonic stem cell n/a DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 5 P-MTAB-30018 H3K27me3 Millipore 07-449 Lot#: DAM1588246 P-MTAB-30020 1056 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0121_s_6 sequencing assay ERX182664 FGC0121_s_6.fastq.gz FGC0121_s_6.fastq.gz ERR208010 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208010/ERR208010.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 500 1000 msrx_121_6_rd_500_1000.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 5 n/a H3K27me3 Millipore 07-449 Lot#: DAM1588246 embryonic stem cell embryonic stem cell directed to primitive gut tube DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 7 P-MTAB-30018 H3K27me3 Millipore 07-449 Lot#: DAM1588246 P-MTAB-30020 1057 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0121_s_7 sequencing assay ERX182680 FGC0121_s_7.fastq.gz FGC0121_s_7.fastq.gz ERR208015 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208015/ERR208015.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 150 500 msrx_121_7_rd_500_1000.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 7 n/a H3K27me3 Millipore 07-449 Lot#: DAM1588246 embryonic stem cell embryonic stem cell directed to posterior foregut DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 5 P-MTAB-30018 P-MTAB-30020 1061 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0121_s_8 sequencing assay ERX182672 FGC0121_s_8.fastq.gz FGC0121_s_8.fastq.gz ERR207984 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207984/ERR207984.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 day 5 n/a input embryonic stem cell embryonic stem cell directed to primitive gut tube DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 10 P-MTAB-30018 H3K4me3 Millipore 04-745 Lot#: NG1643014 P-MTAB-30020 1053 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0123_s_1 sequencing assay ERX182695 FGC0123_s_1.fastq.gz FGC0123_s_1.fastq.gz ERR207999 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207999/ERR207999.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 150 500 msrx_123_1_rd_150_500.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 10 n/a H3K4me3 Millipore 04-745 Lot#: NG1643014 embryonic stem cell embryonic stem cell directed to pancreatic endoderm DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 10 P-MTAB-30018 H3K27me3 Millipore 07-449 Lot#: DAM1588246 P-MTAB-30020 1058 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0123_s_2 sequencing assay ERX182673 FGC0123_s_2.fastq.gz FGC0123_s_2.fastq.gz ERR207994 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207994/ERR207994.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 500 1000 msrx_123_2_rd_500_1000.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 10 n/a H3K27me3 Millipore 07-449 Lot#: DAM1588246 embryonic stem cell embryonic stem cell directed to pancreatic endoderm DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 7 P-MTAB-30018 P-MTAB-30020 1062 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0123_s_3 sequencing assay ERX182669 FGC0123_s_3.fastq.gz FGC0123_s_3.fastq.gz ERR208011 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208011/ERR208011.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 day 7 n/a input embryonic stem cell embryonic stem cell directed to posterior foregut DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 10 P-MTAB-30018 P-MTAB-30020 1063 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0123_s_4 sequencing assay ERX182698 FGC0123_s_4.fastq.gz FGC0123_s_4.fastq.gz ERR207986 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207986/ERR207986.fastq.gz 36 P-MTAB-30024 1.6.0 hg18 P-MTAB-30026 day 10 n/a input embryonic stem cell embryonic stem cell directed to pancreatic endoderm DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 0 P-MTAB-30017 P-MTAB-30019 1748 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 101 single end FGC0170_s_1 sequencing assay ERX182676 FGC0170_s_1.fastq.gz FGC0170_s_1.fastq.gz ERR207985 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207985/ERR207985.fastq.gz 101 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 0 n/a n/a (rna-seq) embryonic stem cell n/a DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 2 P-MTAB-30017 P-MTAB-30019 1749 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 101 single end FGC0170_s_2 sequencing assay ERX182678 FGC0170_s_2.fastq.gz FGC0170_s_2.fastq.gz ERR207981 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207981/ERR207981.fastq.gz 101 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 2 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to definitive endoderm DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 5 P-MTAB-30017 P-MTAB-30019 1750 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 101 single end FGC0170_s_3 sequencing assay ERX182677 FGC0170_s_3.fastq.gz FGC0170_s_3.fastq.gz ERR266339 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266339/ERR266339.fastq.gz 101 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 5 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to primitive gut tube DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 7 P-MTAB-30017 P-MTAB-30019 1751 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 101 single end FGC0170_s_5 sequencing assay ERX182665 FGC0170_s_5.fastq.gz FGC0170_s_5.fastq.gz ERR208004 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208004/ERR208004.fastq.gz 101 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 7 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to posterior foregut DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 10 P-MTAB-30017 P-MTAB-30019 1752 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 101 single end FGC0170_s_6 sequencing assay ERX182658 FGC0170_s_6.fastq.gz FGC0170_s_6.fastq.gz ERR207980 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207980/ERR207980.fastq.gz 101 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 10 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to pancreatic endoderm Cohort (E2147) ERS197534 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30016 P-MTAB-30017 P-MTAB-30019 1754 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 101 single end FGC0170_s_8 sequencing assay ERX182668 FGC0170_s_8.fastq.gz FGC0170_s_8.fastq.gz ERR266350 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266350/ERR266350.fastq.gz 101 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 n/a n/a (rna-seq) endocrine cell in vivo-matured endocrine cells Cohort (E2147) ERS197534 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30016 P-MTAB-30018 H3K4me3 Millipore 04-745 Lot#: NG1643014 P-MTAB-30020 1765 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0172_s_6 sequencing assay ERX182700 FGC0172_s_6.fastq.gz FGC0172_s_6.fastq.gz ERR208013 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208013/ERR208013.fastq.gz 36 P-MTAB-30024 1.7.0 hg18 P-MTAB-30026 150 500 msrx_172_6_rd_150_500.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 n/a H3K4me3 Millipore 04-745 Lot#: NG1643014 endocrine cell in vivo-matured endocrine cells Cohort (E2147) ERS197534 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30016 P-MTAB-30018 H3K27me3 Millipore 07-449 Lot#: DAM1588246 P-MTAB-30020 1767 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0172_s_8 sequencing assay ERX182685 FGC0172_s_8.fastq.gz FGC0172_s_8.fastq.gz ERR207979 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR207/ERR207979/ERR207979.fastq.gz 36 P-MTAB-30024 1.7.0 hg18 P-MTAB-30026 500 1000 msrx_172_8_rd_500_1000.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 n/a H3K27me3 Millipore 07-449 Lot#: DAM1588246 endocrine cell in vivo-matured endocrine cells Cohort (E2147) ERS197534 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30016 P-MTAB-30018 P-MTAB-30020 1768 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0173_s_8 sequencing assay ERX182701 FGC0173_s_8.fastq.gz FGC0173_s_8.fastq.gz ERR208029 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208029/ERR208029.fastq.gz 36 P-MTAB-30024 1.7.0 hg18 P-MTAB-30026 day 13 n/a input endocrine cell in vivo-matured endocrine cells DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 0 P-MTAB-30017 P-MTAB-30019 1748 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0175_s_1 sequencing assay ERX182659 FGC0175_s_1.fastq.gz FGC0175_s_1.fastq.gz ERR266342 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266342/ERR266342.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 0 n/a n/a (rna-seq) embryonic stem cell n/a DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 2 P-MTAB-30017 P-MTAB-30019 1749 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0175_s_2 sequencing assay ERX182681 FGC0175_s_2.fastq.gz FGC0175_s_2.fastq.gz ERR266333 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266333/ERR266333.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 2 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to definitive endoderm DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 5 P-MTAB-30017 P-MTAB-30019 1750 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0175_s_3 sequencing assay ERX182688 FGC0175_s_3.fastq.gz FGC0175_s_3.fastq.gz ERR266341 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266341/ERR266341.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 5 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to primitive gut tube DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 7 P-MTAB-30017 P-MTAB-30019 1751 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0175_s_5 sequencing assay ERX182655 FGC0175_s_5.fastq.gz FGC0175_s_5.fastq.gz ERR266346 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266346/ERR266346.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 7 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to posterior foregut DP1 ERS197531 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 10 P-MTAB-30017 P-MTAB-30019 1752 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0175_s_6 sequencing assay ERX182690 FGC0175_s_6.fastq.gz FGC0175_s_6.fastq.gz ERR266344 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266344/ERR266344.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 10 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to pancreatic endoderm Cohort (E2147) ERS197534 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30016 P-MTAB-30017 P-MTAB-30019 1754 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0175_s_8 sequencing assay ERX182663 FGC0175_s_8.fastq.gz FGC0175_s_8.fastq.gz ERR266331 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266331/ERR266331.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 n/a n/a (rna-seq) endocrine cell in vivo-matured endocrine cells "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD200-APC, eBioscience 17-9200, lot# E12743-101" P-MTAB-30017 P-MTAB-30019 1906 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0186_s_1 sequencing assay ERX182694 FGC0186_s_1.fastq.gz FGC0186_s_1.fastq.gz ERR266332 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266332/ERR266332.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD200-APC, eBioscience 17-9200, lot# E12743-101" n/a (rna-seq) CD200+ polyhormonal cell CD200+ polyhormonal cells from embryonic stem cell "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD200-APC, eBioscience 17-9200, lot# E12743-101" P-MTAB-30017 P-MTAB-30019 1906 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0186_s_2 sequencing assay ERX182696 FGC0186_s_2.fastq.gz FGC0186_s_2.fastq.gz ERR266336 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266336/ERR266336.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD200-APC, eBioscience 17-9200, lot# E12743-101" n/a (rna-seq) CD200+ polyhormonal cell CD200+ polyhormonal cells from embryonic stem cell "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD142-PE, BD 550312, lot#: 78463" P-MTAB-30017 P-MTAB-30019 1907 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0186_s_3 sequencing assay ERX182654 FGC0186_s_3.fastq.gz FGC0186_s_3.fastq.gz ERR266345 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266345/ERR266345.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD142-PE, BD 550312, lot#: 78463" n/a (rna-seq) CD142+ late pancreatic endoderm from embryonic stem cell CD142+ late pancreatic endoderm from embryonic stem cell "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD142-PE, BD 550312, lot#: 78463" P-MTAB-30017 P-MTAB-30019 1907 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0186_s_4 sequencing assay ERX182656 FGC0186_s_4.fastq.gz FGC0186_s_4.fastq.gz ERR266330 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266330/ERR266330.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD142-PE, BD 550312, lot#: 78463" n/a (rna-seq) CD142+ late pancreatic endoderm from embryonic stem cell CD142+ late pancreatic endoderm from embryonic stem cell Cohort (E2182) ERS197536 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30016 P-MTAB-30017 P-MTAB-30019 1908 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0186_s_5 sequencing assay ERX182687 FGC0186_s_5.fastq.gz FGC0186_s_5.fastq.gz ERR266348 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266348/ERR266348.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 n/a n/a (rna-seq) endocrine cell in vivo-matured endocrine cells Cohort (E2182) ERS197536 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30016 P-MTAB-30017 P-MTAB-30019 1908 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0186_s_6 sequencing assay ERX182702 FGC0186_s_6.fastq.gz FGC0186_s_6.fastq.gz ERR266334 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266334/ERR266334.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 n/a n/a (rna-seq) endocrine cell in vivo-matured endocrine cells "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD200-APC, eBioscience 17-9200, lot# E12743-101" P-MTAB-30018 H3K4me3 Millipore 04-745 Lot#: NG1643014 P-MTAB-30020 1946 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0190_s_5 sequencing assay ERX182684 FGC0190_s_5.fastq.gz FGC0190_s_5.fastq.gz ERR208002 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208002/ERR208002.fastq.gz 36 P-MTAB-30024 1.7.0 hg18 P-MTAB-30026 150 500 msrx_190_5_rd_150_500.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD200-APC, eBioscience 17-9200, lot# E12743-101" H3K4me3 Millipore 04-745 Lot#: NG1643014 CD200+ polyhormonal cell CD200+ polyhormonal cells from embryonic stem cell "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD200-APC, eBioscience 17-9200, lot# E12743-101" P-MTAB-30018 H3K27me3 Millipore 07-449 Lot#: DAM1588246 P-MTAB-30020 1947 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0190_s_6 sequencing assay ERX182689 FGC0190_s_6.fastq.gz FGC0190_s_6.fastq.gz ERR208023 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208023/ERR208023.fastq.gz 36 P-MTAB-30024 1.7.0 hg18 P-MTAB-30026 500 1000 msrx_190_6_rd_500_1000.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD200-APC, eBioscience 17-9200, lot# E12743-101" H3K27me3 Millipore 07-449 Lot#: DAM1588246 CD200+ polyhormonal cell CD200+ polyhormonal cells from embryonic stem cell "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD142-PE, BD 550312, lot#: 78463" P-MTAB-30018 H3K4me3 Millipore 04-745 Lot#: NG1643014 P-MTAB-30020 1948 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0190_s_7 sequencing assay ERX182691 FGC0190_s_7.fastq.gz FGC0190_s_7.fastq.gz ERR208024 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208024/ERR208024.fastq.gz 36 P-MTAB-30024 1.7.0 hg18 P-MTAB-30026 150 500 msrx_190_7_rd_150_500.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD142-PE, BD 550312, lot#: 78463" H3K4me3 Millipore 04-745 Lot#: NG1643014 CD142+ late pancreatic endoderm from embryonic stem cell CD142+ late pancreatic endoderm from embryonic stem cell "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD142-PE, BD 550312, lot#: 78463" P-MTAB-30018 H3K27me3 Millipore 07-449 Lot#: DAM1588246 P-MTAB-30020 1949 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0190_s_8 sequencing assay ERX182683 FGC0190_s_8.fastq.gz FGC0190_s_8.fastq.gz ERR208017 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208017/ERR208017.fastq.gz 36 P-MTAB-30024 1.7.0 hg18 P-MTAB-30026 500 1000 msrx_190_8_rd_500_1000.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD142-PE, BD 550312, lot#: 78463" H3K27me3 Millipore 07-449 Lot#: DAM1588246 CD142+ late pancreatic endoderm from embryonic stem cell CD142+ late pancreatic endoderm from embryonic stem cell DP3 ERS197530 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 0 P-MTAB-30017 P-MTAB-30019 2010 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0198_s_1 sequencing assay ERX182652 FGC0198_s_1.fastq.gz FGC0198_s_1.fastq.gz ERR266335 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266335/ERR266335.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 0 n/a n/a (rna-seq) embryonic stem cell n/a DP3 ERS197530 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 2 P-MTAB-30017 P-MTAB-30019 2011 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0198_s_2 sequencing assay ERX182686 FGC0198_s_2.fastq.gz FGC0198_s_2.fastq.gz ERR266349 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266349/ERR266349.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 2 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to definitive endoderm DP3 ERS197530 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 5 P-MTAB-30017 P-MTAB-30019 2012 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0198_s_3 sequencing assay ERX182661 FGC0198_s_3.fastq.gz FGC0198_s_3.fastq.gz ERR266337 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266337/ERR266337.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 5 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to primitive gut tube DP3 ERS197530 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 7 P-MTAB-30017 P-MTAB-30019 2013 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0198_s_4 sequencing assay ERX182670 FGC0198_s_4.fastq.gz FGC0198_s_4.fastq.gz ERR266351 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266351/ERR266351.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 7 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to posterior foregut DP3 ERS197530 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 10 P-MTAB-30017 P-MTAB-30019 2014 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0198_s_5 sequencing assay ERX182692 FGC0198_s_5.fastq.gz FGC0198_s_5.fastq.gz ERR266338 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266338/ERR266338.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 10 n/a n/a (rna-seq) embryonic stem cell embryonic stem cell directed to pancreatic endoderm "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD200-APC, eBioscience 17-9200, lot# E12743-101" P-MTAB-30017 P-MTAB-30019 2089 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0199_s_6 sequencing assay ERX182666 FGC0199_s_6.fastq.gz FGC0199_s_6.fastq.gz ERR266340 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266340/ERR266340.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD200-APC, eBioscience 17-9200, lot# E12743-101" n/a (rna-seq) CD200+ polyhormonal cell CD200+ polyhormonal cells from embryonic stem cell "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 "CD142-PE, BD 550312, lot#: 78463" P-MTAB-30017 P-MTAB-30019 2090 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii B P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0199_s_7 sequencing assay ERX182660 FGC0199_s_7.fastq.gz FGC0199_s_7.fastq.gz ERR266329 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266329/ERR266329.fastq.gz 100 P-MTAB-30023 1.09 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip day 13 "CD142-PE, BD 550312, lot#: 78463" n/a (rna-seq) CD142+ late pancreatic endoderm from embryonic stem cell CD142+ late pancreatic endoderm from embryonic stem cell "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 P-MTAB-30018 P-MTAB-30020 1950 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0201_s_8 sequencing assay ERX182693 FGC0201_s_8.fastq.gz FGC0201_s_8.fastq.gz ERR208026 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208026/ERR208026.fastq.gz 36 P-MTAB-30024 1.8.0 hg18 P-MTAB-30026 day 13 none input embryonic stem cell embryonic stem cell directed to pancreatic endoderm "DP3, 4, 5" ERS197532 cell Homo sapiens embryo Cyt49 embryonic stem cell P-MTAB-30014 day 13 P-MTAB-30015 P-MTAB-30018 P-MTAB-30020 1950 synthetic_DNA SINGLE GENOMIC ChIP-Seq ChIP phred ascii B P-MTAB-30021 "Functional Genomics Core , University of Pennsylvania" 36 single end FGC0202_s_1 sequencing assay ERX182682 FGC0202_s_1.fastq.gz FGC0202_s_1.fastq.gz ERR208003 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR208/ERR208003/ERR208003.fastq.gz 36 P-MTAB-30024 1.8.0 hg18 P-MTAB-30026 day 13 none input embryonic stem cell embryonic stem cell directed to pancreatic endoderm HI40 ERS197533 cell Homo sapiens adult islet cell P-MTAB-30017 P-MTAB-30019 5691 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii # P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0310_s_1 sequencing assay ERX182657 FGC0310_s_1.fastq.gz FGC0310_s_1.fastq.gz ERR266347 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266347/ERR266347.fastq.gz 100 P-MTAB-30023 1.8.0 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip n/a n/a n/a (rna-seq) islet cell n/a HI41 ERS197535 cell Homo sapiens adult islet cell P-MTAB-30017 P-MTAB-30019 5692 synthetic_DNA SINGLE TRANSCRIPTOMIC RNA-Seq cDNA phred ascii # P-MTAB-30022 "Functional Genomics Core , University of Pennsylvania" 100 single end FGC0310_s_2 sequencing assay ERX182675 FGC0310_s_2.fastq.gz FGC0310_s_2.fastq.gz ERR266343 ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR266/ERR266343/ERR266343.fastq.gz 100 P-MTAB-30023 1.8.0 hg18 P-MTAB-30025 msrx_rnaseq_processed_rep.txt ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/experiment/MTAB/E-MTAB-1086/E-MTAB-1086.processed.1.zip n/a n/a n/a (rna-seq) islet cell n/a
\ No newline at end of file
Run,ReleaseDate,LoadDate,spots,bases,spots_with_mates,avgLength,size_MB,AssemblyName,download_path,Experiment,LibraryName,LibraryStrategy,LibrarySelection,LibrarySource,LibraryLayout,InsertSize,InsertDev,Platform,Model,SRAStudy,BioProject,Study_Pubmed_id,ProjectID,Sample,BioSample,SampleType,TaxID,ScientificName,SampleName,g1k_pop_code,source,g1k_analysis_group,Subject_ID,Sex,Disease,Tumor,Affection_Status,Analyte_Type,Histological_Type,Body_Site,CenterName,Submission,dbgap_study_accession,Consent,RunHash,ReadHash SRR1145795,7/22/15,12/31/15,69173059,3458652950,0,50,1875,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145795,SRX451040,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544377,SAMN02598631,simple,9606,Homo sapiens,GSM1316300,,,,,,,no,,,,,GEO,SRA129458,,public,E792FD7BAA8254AD0B27DF3D601146A3,620768D82D4DFC30D6B39B06547AA7B6 SRR1145796,7/22/15,12/31/15,38027117,1901355850,0,50,1204,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145796,SRX451041,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544376,SAMN02598633,simple,9606,Homo sapiens,GSM1316301,,,,,,,no,,,,,GEO,SRA129458,,public,2CDAABDC01FA4E37DBFB73B827404593,F6C1176D03B47A470D2738C799A6BD20 SRR1145797,7/22/15,12/31/15,38283300,1914165000,0,50,1210,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145797,SRX451042,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544378,SAMN02598632,simple,9606,Homo sapiens,GSM1316302,,,,,,,no,,,,,GEO,SRA129458,,public,4717DE44CECCD683558FA00ABDACCF11,FD433EC00E22E872D31DDAFEB8B8FC8D SRR1145798,7/22/15,1/2/16,37280446,1342096056,0,36,856,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145798,SRX451043,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544379,SAMN02598635,simple,9606,Homo sapiens,GSM1316303,,,,,,,no,,,,,GEO,SRA129458,,public,EE6A6F826172D53204A3B315BC93CEFB,C2698CECB8D238B037F4EE7E611A5EFD SRR1145799,7/22/15,1/2/16,27300359,982812924,0,36,561,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145799,SRX451044,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544380,SAMN02598634,simple,9606,Homo sapiens,GSM1316304,,,,,,,no,,,,,GEO,SRA129458,,public,05154ECD0DF182DFDF4CD51528B10297,C34667E8C82D06CA0F235AFFBDBDEC30 SRR1145800,7/22/15,12/31/15,38680140,1392485040,0,36,868,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145800,SRX451045,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544381,SAMN02598639,simple,9606,Homo sapiens,GSM1316305,,,,,,,no,,,,,GEO,SRA129458,,public,A413EC0B71C75D0994FC2A605EB143F8,698359EC9595CFFFCDF005424DE32A59 SRR1145801,3/31/15,1/29/14,298099247,14904962350,0,50,9462,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145801,SRX451046,,OTHER,other,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544383,SAMN02598638,simple,9606,Homo sapiens,GSM1316306,,,,,,,no,,,,,GEO,SRA129458,,public,4E7BC1D448C16F0B2FD7632A02C5FE02,252C35CEF6F82FC6DDC4A5F549894C59 SRR1145802,7/22/15,1/2/16,15875431,571515516,0,36,328,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145802,SRX451047,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544382,SAMN02598637,simple,9606,Homo sapiens,GSM1316307,,,,,,,no,,,,,GEO,SRA129458,,public,639882105A17EA78A13182FEB34030F9,43A4F55C4F61E0786D685000557DA6EA SRR1145803,7/22/15,12/31/15,147426442,5307351912,0,36,3289,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145803,SRX451048,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544384,SAMN02598641,simple,9606,Homo sapiens,GSM1316308,,,,,,,no,,,,,GEO,SRA129458,,public,23F3BDA2CAC4007D1F4A9BF471ECD255,771CDE63D2B9EF978DF6D0F19C2BF8B0 SRR1145804,7/22/15,1/2/16,33877753,1693887650,0,50,1061,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145804,SRX451049,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544385,SAMN02598644,simple,9606,Homo sapiens,GSM1316309,,,,,,,no,,,,,GEO,SRA129458,,public,FDEE862202A19151F2117BE171D010D2,7B2D90D94F8B623668B473C165DCC334 SRR1145805,7/22/15,12/31/15,39649956,1427398416,0,36,922,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145805,SRX451050,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544387,SAMN02598636,simple,9606,Homo sapiens,GSM1316310,,,,,,,no,,,,,GEO,SRA129458,,public,DB7D06B2018C7A01772CEBBF9AF2FFA0,44CAA58ACD7B9D580F69C9774AE7CC2F SRR1145806,7/22/15,1/2/16,25249600,908985600,0,36,615,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145806,SRX451051,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544386,SAMN02598640,simple,9606,Homo sapiens,GSM1316311,,,,,,,no,,,,,GEO,SRA129458,,public,9287DD19FABCFFADD926080B51101C77,D5B115F7F9742EA35064C78460F6F33A SRR1145807,7/22/15,1/2/16,37496013,1349856468,0,36,800,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145807,SRX451052,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544389,SAMN02598643,simple,9606,Homo sapiens,GSM1316312,,,,,,,no,,,,,GEO,SRA129458,,public,42D92B4DE624B2BDECA7BD7F32860BAC,5B784FF08B73AE191D33B6C9A0744339 SRR1145808,3/31/15,1/29/14,290076884,14503844200,0,50,8617,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145808,SRX451053,,OTHER,other,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544388,SAMN02598642,simple,9606,Homo sapiens,GSM1316313,,,,,,,no,,,,,GEO,SRA129458,,public,208C5F1FB758CF95B14D036EC74A01C7,AB9A202B32CF51A8D1A0217B5A6208DB SRR1145809,7/22/15,1/2/16,13590817,489269412,0,36,286,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145809,SRX451054,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544390,SAMN02598647,simple,9606,Homo sapiens,GSM1316314,,,,,,,no,,,,,GEO,SRA129458,,public,0423A935F07801FFB5363F9A1257D2D2,FE8AD9B1256F46D760E2ABAD0199E1CD SRR1145810,7/22/15,1/2/16,52160936,1877793696,0,36,1053,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145810,SRX451055,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544391,SAMN02598648,simple,9606,Homo sapiens,GSM1316315,,,,,,,no,,,,,GEO,SRA129458,,public,407151F9C50319336C8E7E344F4BA02F,BDA3EA3591C6F3A64BB6050D14576F11 SRR1145811,7/22/15,1/2/16,37652714,1882635700,0,50,1181,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145811,SRX451056,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544393,SAMN02598649,simple,9606,Homo sapiens,GSM1316316,,,,,,,no,,,,,GEO,SRA129458,,public,FF0566B2E11F84324459E652B7AEE2AE,1F42648F605EAD00D57E86F44C2C2637 SRR1145812,7/22/15,12/31/15,40883319,1471799484,0,36,901,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145812,SRX451057,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544392,SAMN02598645,simple,9606,Homo sapiens,GSM1316317,,,,,,,no,,,,,GEO,SRA129458,,public,5CA5E1910743240FC3AA175C7183A6AD,E54E40FF7C62769E98D4452A27C72D9B SRR1145813,7/22/15,1/2/16,28018121,1008652356,0,36,592,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145813,SRX451058,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544394,SAMN02598609,simple,9606,Homo sapiens,GSM1316318,,,,,,,no,,,,,GEO,SRA129458,,public,D701805C2C954F8D6A232AE5A09E782F,8BA70181324ACD88800F91538EF9A7F8 SRR1145814,7/22/15,1/2/16,40670265,1464129540,0,36,892,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145814,SRX451059,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544395,SAMN02598610,simple,9606,Homo sapiens,GSM1316319,,,,,,,no,,,,,GEO,SRA129458,,public,23D2FE75F73F3D2AB394A3F8F7A37F14,A7B488A0C503220597FD4B7AB6284C3B SRR1145815,7/22/15,12/31/15,58828025,2941401250,0,50,1890,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145815,SRX451060,,OTHER,other,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544396,SAMN02598624,simple,9606,Homo sapiens,GSM1316320,,,,,,,no,,,,,GEO,SRA129458,,public,FA14B22E8188CC64E9E0B3C863FB7D20,C89B8F16965049BD1F96ED087423BA38 SRR1145816,7/22/15,1/2/16,23452640,844295040,0,36,486,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145816,SRX451061,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544397,SAMN02598618,simple,9606,Homo sapiens,GSM1316321,,,,,,,no,,,,,GEO,SRA129458,,public,0753FCBD14B1B497D3C0EF45394A6116,6B9B335AC1A1273B9727604CDF90DB77 SRR1145817,7/22/15,1/2/16,49739383,1790617788,0,36,983,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145817,SRX451062,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544398,SAMN02598650,simple,9606,Homo sapiens,GSM1316322,,,,,,,no,,,,,GEO,SRA129458,,public,78CD870EFA60CE374AAA32B5D82DEA26,591C716C82F8731F835EB44807F1526A SRR1145818,7/22/15,1/2/16,38044814,1902240700,0,50,1195,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145818,SRX451063,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544399,SAMN02598646,simple,9606,Homo sapiens,GSM1316323,,,,,,,no,,,,,GEO,SRA129458,,public,D4D320F0E36C70118FF759D65ACDAE31,3ACA8711C8E2846657A667D04BEB13BF SRR1145819,7/22/15,1/2/16,40142244,1445120784,0,36,916,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145819,SRX451064,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544401,SAMN02598608,simple,9606,Homo sapiens,GSM1316324,,,,,,,no,,,,,GEO,SRA129458,,public,D9A3714BF6DEF108350306FEF24F8FF8,3D3026F9496F6E197E3590B54914C520 SRR1145820,7/22/15,1/2/16,30243062,1088750232,0,36,655,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145820,SRX451065,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544400,SAMN02598615,simple,9606,Homo sapiens,GSM1316325,,,,,,,no,,,,,GEO,SRA129458,,public,629E943B1D96E046AFD04AA59FC3933F,A0A3CC648200DB0411ED22A22005349E SRR1145821,7/22/15,1/2/16,39285672,1414284192,0,36,918,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145821,SRX451066,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544402,SAMN02598620,simple,9606,Homo sapiens,GSM1316326,,,,,,,no,,,,,GEO,SRA129458,,public,7C249DE13FC267C2C9A53FB79BB77C45,10BBD976327DA7A8EF60F31D0B1823A5 SRR1145822,7/22/15,12/31/15,59380853,2969042650,0,50,1884,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145822,SRX451067,,OTHER,other,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544403,SAMN02598617,simple,9606,Homo sapiens,GSM1316327,,,,,,,no,,,,,GEO,SRA129458,,public,863C1BA347DBF8A9F3EE1F5AF36669AC,93FEA8DCC818C5428063171FB7229FF8 SRR1145823,7/22/15,1/2/16,20404023,734544828,0,36,421,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145823,SRX451068,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544404,SAMN02598619,simple,9606,Homo sapiens,GSM1316328,,,,,,,no,,,,,GEO,SRA129458,,public,D2788CE06001CDCB4FD38A862030A39B,D1ED4F8010C90EE3EFDE3113958438B8 SRR1145824,,1/29/14,160964728,5794730208,0,36,4056,,,SRX451069,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544405,SAMN02598621,simple,9606,Homo sapiens,GSM1316329,,,,,,,no,,,,,GEO,SRA129458,,public,9B9598388394E47497D03B38E394104D,95E0856606FAD1830E0FDD2794CAACAB SRR1145825,7/22/15,1/2/16,40482392,2024119600,0,50,1269,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145825,SRX451070,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544406,SAMN02598611,simple,9606,Homo sapiens,GSM1316330,,,,,,,no,,,,,GEO,SRA129458,,public,B4437D174A432CF9E42B36D98F43070D,C0A7C55DFBC1110DF892770227AD9070 SRR1145826,7/22/15,1/2/16,39571969,1424590884,0,36,902,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145826,SRX451071,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544407,SAMN02598612,simple,9606,Homo sapiens,GSM1316331,,,,,,,no,,,,,GEO,SRA129458,,public,E41C27679C41B384084ACAA43D90AEF3,096039D2A4CFDAD70709CD4FA7678A43 SRR1145827,7/22/15,1/2/16,30408999,1094723964,0,36,631,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145827,SRX451072,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544408,SAMN02598613,simple,9606,Homo sapiens,GSM1316332,,,,,,,no,,,,,GEO,SRA129458,,public,A189FDF19662D1E5D12643CB3B666572,C5514CFCBCA48146890090BAD1449AF5 SRR1145828,7/22/15,1/2/16,40026963,1440970668,0,36,863,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145828,SRX451073,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544409,SAMN02598614,simple,9606,Homo sapiens,GSM1316333,,,,,,,no,,,,,GEO,SRA129458,,public,844E8AFB84E1A206607FC60C320A9170,4BEDDBEE138B2F3DD5BF3DA2576DC580 SRR1145829,7/22/15,1/2/16,54947852,2747392600,0,50,1746,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145829,SRX451074,,OTHER,other,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544410,SAMN02598616,simple,9606,Homo sapiens,GSM1316334,,,,,,,no,,,,,GEO,SRA129458,,public,0CC36A330514372BB135324CD9C038A9,D7C7C28791E34640E37A3F5D774A1156 SRR1145830,3/31/15,1/29/14,32373509,1165446324,0,36,770,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145830,SRX451075,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544411,SAMN02598623,simple,9606,Homo sapiens,GSM1316335,,,,,,,no,,,,,GEO,SRA129458,,public,45E757621CD8D869E87D18CD3FD95098,71A34A4C551F166C6764F2DB18C37859 SRR1145831,3/31/15,1/29/14,36370065,1309322340,0,36,811,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145831,SRX451076,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544412,SAMN02598626,simple,9606,Homo sapiens,GSM1316336,,,,,,,no,,,,,GEO,SRA129458,,public,5BECDDC504587C1D7E65C285608B0FF5,559528824F2E236003BA4890C4931F59 SRR1145832,3/31/15,1/29/14,79932329,3996616450,0,50,2508,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145832,SRX451077,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544413,SAMN02598622,simple,9606,Homo sapiens,GSM1316337,,,,,,,no,,,,,GEO,SRA129458,,public,AFB9D7E1E54CC39AFC6DD49C9EF393B9,8C413B76DD05F1CCAE67CEE900643F40 SRR1145833,3/31/15,1/29/14,43758320,2187916000,0,50,1418,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145833,SRX451078,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544414,SAMN02598625,simple,9606,Homo sapiens,GSM1316338,,,,,,,no,,,,,GEO,SRA129458,,public,B68FD3759540365F518F4C83BA1C00EF,3F07D1BD0514032B25DBE6C014F14A4D SRR1145834,3/31/15,1/29/14,216993840,10849692000,0,50,6597,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145834,SRX451079,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544415,SAMN02598628,simple,9606,Homo sapiens,GSM1316339,,,,,,,no,,,,,GEO,SRA129458,,public,F40A770DA8DC3681B3CB93D490032462,FC63D0B633C82DC506088814CFC604C8 SRR1145835,3/31/15,1/29/14,140099827,7004991350,0,50,4050,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145835,SRX451080,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544416,SAMN02598629,simple,9606,Homo sapiens,GSM1316340,,,,,,,no,,,,,GEO,SRA129458,,public,831A4D0F2F3A3ED778F6CB7DACBFC8BF,64D8A0313CCC81F30A9390358100B1F4 SRR1145836,7/22/15,1/2/16,28300544,1018819584,0,36,621,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145836,SRX451081,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544417,SAMN02598627,simple,9606,Homo sapiens,GSM1316341,,,,,,,no,,,,,GEO,SRA129458,,public,1CE831B11AC2A2D7E4A7009ACCA61AF7,ED54114C0DA641A03C98034EF1B19FA5 SRR1145837,3/31/15,1/29/14,188247636,9412381800,0,50,6521,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1145837,SRX451082,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544421,SAMN02598630,simple,9606,Homo sapiens,GSM1316342,,,,,,,no,,,,,GEO,SRA129458,,public,76B7679D1ADBFAF1A4870A059400D7A7,8C2B35948CA40CA5A2BF50499963374C SRR1658444,3/31/15,11/20/14,36024813,1296893268,0,36,856,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658444,SRX764805,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749164,SAMN03203054,simple,9606,Homo sapiens,GSM1551464,,,,,,,no,,,,,GEO,SRA129458,,public,29396E439BCEFB10FA486592F4FE0A12,1EF893076C5B9D84E45F244C5F0DE98B SRR1658445,3/31/15,11/20/14,29598732,1065554352,0,36,705,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658445,SRX764806,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749165,SAMN03203064,simple,9606,Homo sapiens,GSM1551465,,,,,,,no,,,,,GEO,SRA129458,,public,1F0EB064C8ACC6BE7953E57A65A0565C,C3B4B43D2181BB16FA18EDECF1DFB87B SRR1658446,3/31/15,11/20/14,35278758,1270035288,0,36,835,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658446,SRX764807,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749166,SAMN03203065,simple,9606,Homo sapiens,GSM1551466,,,,,,,no,,,,,GEO,SRA129458,,public,B98BD543703BA582432781238E57DBD3,DE9DF7954B2FB4B9CD119E357D06144A SRR1658447,3/31/15,11/20/14,33711796,1213624656,0,36,801,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658447,SRX764808,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749167,SAMN03203055,simple,9606,Homo sapiens,GSM1551467,,,,,,,no,,,,,GEO,SRA129458,,public,59EF519856EA1789D717A288FEC17509,804AFBFD4372970978F53BDB16D17FBE SRR1658448,3/31/15,11/20/14,32600517,1630025850,0,50,1061,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658448,SRX764809,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749168,SAMN03203056,simple,9606,Homo sapiens,GSM1551468,,,,,,,no,,,,,GEO,SRA129458,,public,951E96117FEAB5C2B5236B80B701221C,441C3CEA7CFC2223765BCB365BDE0374 SRR1658449,3/31/15,11/20/14,32697049,1634852450,0,50,1063,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658449,SRX764810,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749170,SAMN03203051,simple,9606,Homo sapiens,GSM1551469,,,,,,,no,,,,,GEO,SRA129458,,public,04220BEEE9CD48453B066F2D2D0B7C44,00121AE43895D7144928D00B928D318A SRR1658450,3/31/15,11/20/14,27698629,1384931450,0,50,904,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658450,SRX764811,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749169,SAMN03203060,simple,9606,Homo sapiens,GSM1551470,,,,,,,no,,,,,GEO,SRA129458,,public,ABC429B882370066B98CF6DDBC4B11BF,0327F4A3427C721B629AB07BFCC3DFDF SRR1658451,3/31/15,11/20/14,34484729,1724236450,0,50,1119,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658451,SRX764812,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749171,SAMN03203057,simple,9606,Homo sapiens,GSM1551471,,,,,,,no,,,,,GEO,SRA129458,,public,33EB98BE52D1DA33DF18A2F537B68AB0,25BDEF7931FA311E8C3BE273FE74F1DC SRR1658452,3/31/15,11/20/14,40255113,1449184068,0,36,1018,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658452,SRX764813,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749172,SAMN03203058,simple,9606,Homo sapiens,GSM1551472,,,,,,,no,,,,,GEO,SRA129458,,public,7318D22686287523B4FAEC98A36766F0,688455A40B9AD0674B719A30D0CCD729 SRR1658453,3/31/15,11/20/14,25045093,901623348,0,36,609,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR1658453,SRX764814,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS749173,SAMN03203052,simple,9606,Homo sapiens,GSM1551473,,,,,,,no,,,,,GEO,SRA129458,,public,724DD2B9A523605F21CBEC4B424A168A,BC8CABE49E559F653593F7FCCD766618 SRR2130154,7/27/15,7/27/15,43230390,1556294040,0,36,814,,http://sra-download.ncbi.nlm.nih.gov/srapub/SRR2130154,SRX451069,,ChIP-Seq,ChIP,GENOMIC,SINGLE,0,0,ILLUMINA,Illumina HiSeq 2500,SRP035929,PRJNA236597,2,236597,SRS544405,SAMN02598621,simple,9606,Homo sapiens,GSM1316329,,,,,,,no,,,,,GEO,SRA129458,,public,A15FDB7A8336BCA6B4A5ACED202ACB02,0F449BAF91A0AC72926FD0D7D22393BD
\ No newline at end of file
Random Forest of Enhancer Prediction
http://enhancer.ucsd.edu/renlab/RFECS_enhancer_prediction/
#!/bin/bash
#SBATCH --job-name=star_index
#SBATCH --partition=super
#SBATCH --nodes=1
#SBATCH --time=0-24:00:00
#SBATCH --output=star_index.%j.out
#SBATCH --error=star_index.%j.err
#SBATCH --mail-user=venkat.malladi@utsouthwestern.edu
#SBATCH --mail-type=ALL
module load iGenomes/2013-03-25
module load star/2.4.2a
STAR --runMode genomeGenerate --genomeFastaFiles $iGENOMES_DB_DIR\/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa --sjdbOverhang 100 --sjdbGTFfile /project/GCRB/shared/Gencode_human/release_19/gencode.v19.annotation.gtf --runThreadN 8 --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0 --outFileNamePrefix gencode.v19.annotation
#!/bin/bash
#SBATCH --job-name=rsem_index
#SBATCH --partition=super
#SBATCH --nodes=1
#SBATCH --time=0-24:00:00
#SBATCH --output=rsem_index.%j.out
#SBATCH --error=rsem_index.%j.err
#SBATCH --mail-user=venkat.malladi@utsouthwestern.edu
#SBATCH --mail-type=ALL
module load iGenomes/2013-03-25
module load RSEM/1.2.31
rsem-prepare-reference --gtf /project/GCRB/shared/Gencode_human/release_19/gencode.v19.annotation.gtf $iGENOMES_DB_DIR\/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0
#!/bin/bash
#SBATCH --job-name=star_align
#SBATCH --partition=super
#SBATCH --nodes=1
#SBATCH --time=0-24:00:00
#SBATCH --output=star_align.%j.out
#SBATCH --error=star_align.%j.err
#SBATCH --mail-user=venkat.malladi@utsouthwestern.edu
#SBATCH --mail-type=ALL
module load iGenomes/2013-03-25
module load star/2.4.2a
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D10/ERR266338.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D10/align-star.sh-1.0.0/ERR266338
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D10/ERR266344.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D10/align-star.sh-1.0.0/ERR266344
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D2/ERR266349.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D2/align-star.sh-1.0.0/ERR266349
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D2/ERR266333.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D2/align-star.sh-1.0.0/ERR266333
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D0/ERR266335.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D0/align-star.sh-1.0.0/ERR266335
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D0/ERR266342.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D0/align-star.sh-1.0.0/ERR266342
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D7/ERR266351.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D7/align-star.sh-1.0.0/ERR266351
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D7/ERR266346.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D7/align-star.sh-1.0.0/ERR266346
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D5/ERR266341.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D5/align-star.sh-1.0.0/ERR266341
STAR --genomeDir /project/GCRB/shared/Gencode_human/release_19/prep-star.sh-1.0.0/ --readFilesIn /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D5/ERR266337.fastq.gz \
--readFilesCommand zcat --genomeLoad NoSharedMemory \
--outFilterMultimapNmax 1000 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 \
--outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate \
--outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD \
--outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate \
--quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D5/align-star.sh-1.0.0/ERR266337
#!/bin/bash
#SBATCH --job-name=rsem_quant
#SBATCH --partition=super
#SBATCH --nodes=1
#SBATCH --time=0-24:00:00
#SBATCH --output=rsem_quant.%j.out
#SBATCH --error=rsem_quant.%j.err
#SBATCH --mail-user=venkat.malladi@utsouthwestern.edu
#SBATCH --mail-type=ALL
module load RSEM/1.2.31
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D10/align-star.sh-1.0.0/ERR266344Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D10/quant-rsem.sh-1.0.0/ERR266344Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D10/align-star.sh-1.0.0/ERR266338Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D10/quant-rsem.sh-1.0.0/ERR266338Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D2/align-star.sh-1.0.0/ERR266349Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D2/quant-rsem.sh-1.0.0/ERR266349Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D2/align-star.sh-1.0.0/ERR266333Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D2/quant-rsem.sh-1.0.0/ERR266333Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D0/align-star.sh-1.0.0/ERR266335Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D0/quant-rsem.sh-1.0.0/ERR266335Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D0/align-star.sh-1.0.0/ERR266342Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D0/quant-rsem.sh-1.0.0/ERR266342Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D7/align-star.sh-1.0.0/ERR266346Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D7/quant-rsem.sh-1.0.0/ERR266346Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D7/align-star.sh-1.0.0/ERR266351Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D7/quant-rsem.sh-1.0.0/ERR266351Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D5/align-star.sh-1.0.0/ERR266337Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D5/quant-rsem.sh-1.0.0/ERR266337Aligned_rsem
rsem-calculate-expression -p 32 --bam --estimate-rspd --calc-ci --seed 12345 --no-bam-output --ci-memory 30000 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D5/align-star.sh-1.0.0/ERR266341Aligned.toTranscriptome.out.bam /project/GCRB/shared/Gencode_human/release_19/prep-rsem.sh-1.0.0/gencode.v19.annotation /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/RNA-seq/ES_D5/quant-rsem.sh-1.0.0/ERR266341Aligned_rsem
#!/bin/Rscript
# Load libraries
if(!require("argparse")){
install.packages("argparse", repos="http://cran.r-project.org")
library(argparse)
}
if(!require("jsonlite")){
install.packages("jsonlite", repos="http://cran.r-project.org")
library("jsonlite")
}
source("http://bioconductor.org/biocLite.R")
if (!require("groHMM")){
biocLite("groHMM")
library("groHMM")
}
if (!require("GenomicFeatures")){
biocLite("GenomicFeatures")
library("GenomicFeatures")
}
if (!require("rtracklayer")){
biocLite("rtracklayer")
library("rtracklayer")
}
if(!require("jsonlite")){
install.packages("jsonlite", repos="http://cran.r-project.org")
library("jsonlite")
}
# Currently mouse or human
if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene")){
biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
}
if (!require("TxDb.Mmusculus.UCSC.mm10.knownGene")){
biocLite("TxDb.Mmusculus.UCSC.mm10.knownGene")
library("TxDb.Mmusculus.UCSC.mm10.knownGene")
}
if (!require("org.Hs.eg.db")){
biocLite("org.Hs.eg.db")
library("org.Hs.eg.db")
}
if (!require("org.Mm.eg.db")){
biocLite("org.Mm.eg.db")
library("org.Mm.eg.db")
}
# Create parser object
parser <- ArgumentParser()
# Specify our desired options
parser$add_argument("-r1", "--replicate1", nargs='*', help = "File paths to Replicate 1", required = TRUE)
parser$add_argument("-g", "--genome", help = "The ucsc genome", required = TRUE)
parser$add_argument("-e", "--experiment", nargs='*', help = "The experiment names", required = TRUE)
parser$add_argument("-o", "--out", help = "The output directory", required = TRUE)
parser$add_argument("-b", "--ltprob", type="integer", help = "The LtsProbB", required = TRUE)
parser$add_argument("-u", "--uts", type="integer", help = "The UTS", required = TRUE)
# Parse arguments
args <- parser$parse_args()
# Set the working directory to output directory
setwd(args$out)
# Set mc.cores to 4
options(mc.cores=getCores(4))
# Load alignment files
alignments <- GRanges()
for (i in seq(1:length(args$experiment))){
a1 = args$replicate1[i]
alignments_1 = as(readGAlignments(a1), "GRanges")
combined.replicates <- sort(c(alignments_1,alignments))
alignments <- combined.replicates
}
# Load UCSC Genes
if(args$genome=='hg19') {
kgdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
org <- org.Hs.eg.db
} else if(args$genome=='mm10') {
kgdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
org <- org.Mm.eg.db
}
kgtx <- transcripts(kgdb,columns=c("gene_id", "tx_id", "tx_name"))
# Collapse overlapping annotations
kgConsensus <- makeConsensusAnnotations(kgtx, keytype="gene_id",
mc.cores=getOption("mc.cores"))
map <- select(org, keys=unlist(mcols(kgConsensus)$gene_id),
columns=c("SYMBOL"), keytype=c("ENTREZID"))
mcols(kgConsensus)$symbol <- map$SYMBOL
mcols(kgConsensus)$type <- "gene"
# Detect transcripts
hmmResult <- detectTranscripts(reads=alignments, LtProbB=args$ltprob, UTS=args$uts)
txHMM <- hmmResult$transcripts
getExpressedAnnotations <- function(features, reads) {
fLimit <- limitToXkb(features)
count <- countOverlaps(fLimit, reads)
features <- features[count!=0,]
return(features[(quantile(width(features), .05) < width(features))
& (width(features) < quantile(width(features), .95)),])}
conExpressed <- getExpressedAnnotations(features=kgConsensus,reads=alignments)
# Plot Density
#png('transcript-density-plot.png') # TODO: Fix plot output
td <- getTxDensity(txHMM, conExpressed, mc.cores=getOption("mc.cores"), plot=FALSE)
#u <- par("usr")
#lines(c(u[1], 0, 0, 1000, 1000, u[2]), c(0,0,u[4]-.04,u[4]-.04,0,0),col="red")
#legend("topright", lty=c(1,1), col=c(2,1), c("ideal", "groHMM"))
#text(c(-500,500), c(.05,.5), c("FivePrimeFP", "TP"))
#dev.off()
td_JSON <- toJSON(td, null='null',auto_unbox = TRUE, pretty=TRUE)
write(td_JSON,"transcript-density-quality-metrics.json", ncolumns = 1, append = FALSE, sep = " ")
# Repairing called transcripts
break_plus <- breakTranscriptsOnGenes(txHMM, kgConsensus, strand="+")
break_minus <- breakTranscriptsOnGenes(txHMM, kgConsensus, strand="-")
txBroken <- c(break_plus, break_minus)
txFinal <- combineTranscripts(txBroken, kgConsensus)
export(txFinal, con="final-transcripts.bed")
#!/bin/bash
# call-transcripts-timecourse-norep.sh
script_name="call-transcripts-timecourse-norep.sh"
script_ver="1.0.0"
#Help function
usage() {
echo "-h Help documentation for $script_name"
echo "-f --File path to Replicate 1 alignments"
echo "-r --UCSC Reference genome (e.g. hg19, mm10)"
echo "-o --Path to output directory"
echo "-e --The experiment names."
echo "-b --The LtsProbB value"
echo "-u --The UTS value"
echo "-v --Version of script"
echo "Example: $script_name -f 'foo_1.bam man_1.bam' -r 'hg19' -e 'foo man' -o '/path/to/output/dir/' -b -200 -u 5"
exit 1
}
# Version function
version(){
echo "$script_name $script_ver"
exit 1
}
main(){
# Load required modules
module load python/2.7.x-anaconda
module load R/3.1.0-intel
# Parsing options
OPTIND=1 # Reset OPTIND
while getopts :f:r:e:b:u:o:vh opt
do
case $opt in
f) aln1=$OPTARG;;
r) ucsc_reference=$OPTARG;;
e) exp=$OPTARG;;
o) out=$OPTARG;;
b) ltprob=$OPTARG;;
u) uts=$OPTARG;;
v) version;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $aln1 ]] || [[ -z $ucsc_reference ]] || [[ -z $ltprob ]] || [[ -z $uts ]] || [[ -z $exp ]] || [[ -z $out ]]; then
usage
fi
# Define the out directory
out_dir=$out\/$script_name-$script_ver
# Make sure directories exist
if [ ! -e $out ]; then
mkdir $out
fi
if [ ! -e $out_dir ]; then
mkdir $out_dir
fi
# Run call-transcripts.R
if [ ! -e $out_dir/metadata.json ]; then
Rscript call-transcripts-timecourse-norep.R --replicate1 $aln1 --experiment $exp --genome $ucsc_reference --out $out_dir --ltprob $ltprob --uts $uts
# Get input and output files and then print out metadata.json file
input_files=("${array_aln1[@]}"}")
printf -v input "\"%s\"," "${input_files[@]}"
input=${input%,}
output_file=($out_dir\/*)
printf -v output "\"%s\"," "${output_file[@]}"
output=${output%,}
printf '{"script name":"%s","script version":"%s", "input files": [%s], "LtsProbB": [%s], "UTS": [%s], "output files": [%s]}' "$script_name" "$script_ver" "$input" "$ltprob" "$uts" "$output" | python -m json.tool > $out_dir/metadata.json
else
aln1_fn=$(basename "$aln1")
echo "* HMM tune has been made "
fi
}
main "$@"
#!/bin/Rscript
# Load libraries
if(!require("argparse")){
install.packages("argparse", repos="http://cran.r-project.org")
library(argparse)
}
if(!require("jsonlite")){
install.packages("jsonlite", repos="http://cran.r-project.org")
library("jsonlite")
}
source("http://bioconductor.org/biocLite.R")
if (!require("groHMM")){
biocLite("groHMM")
library("groHMM")
}
if (!require("GenomicFeatures")){
biocLite("GenomicFeatures")
library("GenomicFeatures")
}
if (!require("rtracklayer")){
biocLite("rtracklayer")
library("rtracklayer")
}
if(!require("jsonlite")){
install.packages("jsonlite", repos="http://cran.r-project.org")
library("jsonlite")
}
# Currently mouse or human
if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene")){
biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
}
if (!require("TxDb.Mmusculus.UCSC.mm10.knownGene")){
biocLite("TxDb.Mmusculus.UCSC.mm10.knownGene")
library("TxDb.Mmusculus.UCSC.mm10.knownGene")
}
if (!require("org.Hs.eg.db")){
biocLite("org.Hs.eg.db")
library("org.Hs.eg.db")
}
if (!require("org.Mm.eg.db")){
biocLite("org.Mm.eg.db")
library("org.Mm.eg.db")
}
# Create parser object
parser <- ArgumentParser()
# Specify our desired options
parser$add_argument("-r1", "--replicate1", nargs=1, help = "File paths to Replicate 1", required = TRUE)
parser$add_argument("-g", "--genome", help = "The ucsc genome", required = TRUE)
parser$add_argument("-o", "--out", help = "The output directory", required = TRUE)
parser$add_argument("-b", "--ltprob", type="integer", help = "The LtsProbB", required = TRUE)
parser$add_argument("-u", "--uts", type="integer", help = "The UTS", required = TRUE)
# Parse arguments
args <- parser$parse_args()
# Set the working directory to output directory
setwd(args$out)
# Set mc.cores to 4
options(mc.cores=getCores(4))
# Load alignment files
alignment_1 <- as(readGAlignments(args$replicate1), "GRanges")
alignments <- sort(c(alignment_1))
# Load UCSC Genes
if(args$genome=='hg19') {
kgdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
org <- org.Hs.eg.db
} else if(args$genome=='mm10') {
kgdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
org <- org.Mm.eg.db
}
kgtx <- transcripts(kgdb,columns=c("gene_id", "tx_id", "tx_name"))
# Collapse overlapping annotations
kgConsensus <- makeConsensusAnnotations(kgtx, keytype="gene_id",
mc.cores=getOption("mc.cores"))
map <- select(org, keys=unlist(mcols(kgConsensus)$gene_id),
columns=c("SYMBOL"), keytype=c("ENTREZID"))
mcols(kgConsensus)$symbol <- map$SYMBOL
mcols(kgConsensus)$type <- "gene"
# Detect transcripts
hmmResult <- detectTranscripts(alignments, LtProbB=args$ltprob, UTS=args$uts)
txHMM <- hmmResult$transcripts
getExpressedAnnotations <- function(features, reads) {
fLimit <- limitToXkb(features)
count <- countOverlaps(fLimit, reads)
features <- features[count!=0,]
return(features[(quantile(width(features), .05) < width(features))
& (width(features) < quantile(width(features), .95)),])}
conExpressed <- getExpressedAnnotations(features=kgConsensus,reads=alignments)
# Plot Density
#png('transcript-density-plot.png') # TODO: Fix plot output
td <- getTxDensity(txHMM, conExpressed, mc.cores=getOption("mc.cores"), plot=FALSE)
#u <- par("usr")
#lines(c(u[1], 0, 0, 1000, 1000, u[2]), c(0,0,u[4]-.04,u[4]-.04,0,0),col="red")
#legend("topright", lty=c(1,1), col=c(2,1), c("ideal", "groHMM"))
#text(c(-500,500), c(.05,.5), c("FivePrimeFP", "TP"))
#dev.off()
td_JSON <- toJSON(td, null='null',auto_unbox = TRUE, pretty=TRUE)
write(td_JSON,"transcript-density-quality-metrics.json", ncolumns = 1, append = FALSE, sep = " ")
# Repairing called transcripts
break_plus <- breakTranscriptsOnGenes(txHMM, kgConsensus, strand="+")
break_minus <- breakTranscriptsOnGenes(txHMM, kgConsensus, strand="-")
txBroken <- c(break_plus, break_minus)
txFinal <- combineTranscripts(txBroken, kgConsensus)
export(txFinal, con="final-transcripts.bed")
#!/bin/bash
# call-transcripts.sh
script_name="call-transcripts.sh"
script_ver="1.0.0"
#Help function
usage() {
echo "-h Help documentation for $script_name"
echo "-f --File path to Replicate 1 alignments"
echo "-r --UCSC Reference genome (e.g. hg19, mm10)"
echo "-o --Path to output directory"
echo "-b --The LtsProbB value"
echo "-u --The UTS value"
echo "-v --Version of script"
echo "Example: $script_name -f 'foo_1.bam' -r 'hg19' -o '/path/to/output/dir/' -b -200 -u 5"
exit 1
}
# Version function
version(){
echo "$script_name $script_ver"
exit 1
}
main(){
# Load required modules
module load python/2.7.x-anaconda
module load R/3.1.0-intel
# Parsing options
OPTIND=1 # Reset OPTIND
while getopts :f:r:o:b:u:vh opt
do
case $opt in
f) aln1=$OPTARG;;
s) aln2=$OPTARG;;
r) ucsc_reference=$OPTARG;;
o) out=$OPTARG;;
b) ltprob=$OPTARG;;
u) uts=$OPTARG;;
v) version;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $aln1 ]] || [[ -z $ucsc_reference ]] || [[ -z $ltprob ]] || [[ -z $uts ]] || [[ -z $out ]]; then
usage
fi
# Define the out directory
out_dir=$out\/$script_name-$script_ver
# Make sure directories exist
if [ ! -e $out ]; then
mkdir $out
fi
if [ ! -e $out_dir ]; then
mkdir $out_dir
fi
# Run call-transcripts.R
if [ ! -e $out_dir/metadata.json ]; then
Rscript call-transcripts.R --replicate1 $aln1 --genome $ucsc_reference --out $out_dir --ltprob $ltprob --uts $uts
# Get input and output files and then print out metadata.json file
input_files=("$aln1")
printf -v input "\"%s\"," "${input_files[@]}"
input=${input%,}
output_file=($out_dir\/$prefix*)
printf -v output "\"%s\"," "${output_file[@]}"
output=${output%,}
printf '{"script name":"%s","script version":"%s", "input files": [%s], "LtsProbB": [%s], "UTS": [%s], "output files": [%s]}' "$script_name" "$script_ver" "$input" "$ltprob" "$uts" "$output" | python -m json.tool > $out_dir/metadata.json
else
aln1_fn=$(basename "$aln1")
echo "* Transcripts has been made from $aln1_fn"
fi
}
main "$@"
# Make universe of Peak regions
bedops --everything /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D10/call-transcripts.sh-1.0.0/final-transcripts.bed \
/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D2/call-transcripts.sh-1.0.0/final-transcripts.bed \
/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D0/call-transcripts.sh-1.0.0/final-transcripts.bed \
/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D7/call-transcripts.sh-1.0.0/final-transcripts.bed \
/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D5/call-transcripts.sh-1.0.0/final-transcripts.bed \
> /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts.bed
# Seperate into +/- strand
awk '$6 == "+"' /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts.bed > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts.pos.bed
awk '$6 == "-"' /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts.bed > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts.neg.bed
# Find overlapping fraction by at least 80%
bedmap --echo-map --fraction-both 0.8 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts.pos.bed \
| awk '(split($0, a, ";") > 1)' - \
| sed 's/\;/\n/g' - \
| sort-bed - \
| uniq - \
| head
# Make universe of Peak regions
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D10/H3K27ac/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D10.narrowPeak
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D2/H3K27ac/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D2.narrowPeak
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D0/H3K27ac/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D0.narrowPeak
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D7/H3K27ac/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D7.narrowPeak
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D5/H3K27ac/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D5.narrowPeak
# Find overlap of peaks with at least 50% overlap
bedops --everything /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D0.narrowPeak /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D2.narrowPeak /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D5.narrowPeak /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D7.narrowPeak /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D10.narrowPeak \
| bedmap --echo-map --fraction-both 0.5 - \
| awk '(split($0, a, ";") > 1)' - \
| sed 's/\;/\n/g' - \
| sort-bed - \
| uniq - \
> /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/overlap_peaks.narrowPeak​
# get peaks that are not represented in overlap peaks
for i in $(ls /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/ES_D*narrowPeak); do bedtools intersect -v -wa -f 1.0 -r -a $i -b /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/overlap_peaks.narrowPeak​ >> /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/unique_peaks.narrowPeak; done
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/unique_peaks.narrowPeak | cut -f1,2,3 > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/unique_peaks_sorted.narrowPeak
# Merge Peaks and Union of peaks
bedops -m --range -1 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/overlap_peaks.narrowPeak​ > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/merge_overlap_peaks.bed
bedops --everything /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/merge_overlap_peaks.bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/unique_peaks_sorted.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/universe_peaks.bed
# Merge peaks that overlapping within 500bp not adjoining
bedtools merge -i /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/universe_peaks.bed -d 500 > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/universe_peaks.merge.bed
# Remove extra files
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/overlap_peaks.narrowPeak​
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/unique_peaks.narrowPeak
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/unique_peaks_sorted.narrowPeak
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/merge_overlap_peaks.bed
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k27ac/universe_peaks.bed
# Make universe of Peak regions
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D10/H3K4me1/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D10.narrowPeak
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D2/H3K4me1/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D2.narrowPeak
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D0/H3K4me1/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D0.narrowPeak
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D7/H3K4me1/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D7.narrowPeak
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/ES_D5/H3K27ac/overlap-peaks.sh-1.0.0/experiment_pooled.sorted.narrowPeak.replicated.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D5.narrowPeak
# Find overlap of peaks with at least 50% overlap
bedops --everything /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D0.narrowPeak /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D2.narrowPeak /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D5.narrowPeak /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D7.narrowPeak /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D10.narrowPeak \
| bedmap --echo-map --fraction-both 0.5 - \
| awk '(split($0, a, ";") > 1)' - \
| sed 's/\;/\n/g' - \
| sort-bed - \
| uniq - \
> /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/overlap_peaks.narrowPeak​
# get peaks that are not represented in overlap peaks
for i in $(ls /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/ES_D*narrowPeak); do bedtools intersect -v -wa -f 1.0 -r -a $i -b /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/overlap_peaks.narrowPeak​ >> /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/unique_peaks.narrowPeak; done
sort-bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/unique_peaks.narrowPeak | cut -f1,2,3 > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/unique_peaks_sorted.narrowPeak
# Merge Peaks and Union of peaks
bedops -m --range -1 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/overlap_peaks.narrowPeak​ > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/merge_overlap_peaks.bed
bedops --everything /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/merge_overlap_peaks.bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/unique_peaks_sorted.narrowPeak > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/universe_peaks.bed
# Merge peaks that overlapping within 500bp not adjoining
bedtools merge -i /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/universe_peaks.bed -d 500 > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/universe_peaks.merge.bed
# Remove extra files
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/overlap_peaks.narrowPeak​
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/unique_peaks.narrowPeak
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/unique_peaks_sorted.narrowPeak
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/merge_overlap_peaks.bed
rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/ChIP-seq/universe_h3k4me1/universe_peaks.bed
awk '{if($14=="\"protein_coding\";"){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_protein_coding.gtf
#!/bin/Rscript
# Load libraries
if(!require("optparse")){
install.packages("optparse", repos="http://cran.r-project.org")
library("optparse")
}
source("http://bioconductor.org/biocLite.R")
if (!require("groHMM")){
biocLite("groHMM")
library("groHMM")
}
if (!require("GenomicFeatures")){
biocLite("GenomicFeatures")
library("GenomicFeatures")
}
# Currently mouse or human
if (!require("TxDb.Hsapiens.UCSC.hg19.knownGene")){
biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
}
if (!require("TxDb.Mmusculus.UCSC.mm10.knownGene")){
biocLite("TxDb.Mmusculus.UCSC.mm10.knownGene")
library("TxDb.Mmusculus.UCSC.mm10.knownGene")
}
if (!require("org.Hs.eg.db")){
biocLite("org.Hs.eg.db")
library("org.Hs.eg.db")
}
if (!require("org.Mm.eg.db")){
biocLite("org.Mm.eg.db")
library("org.Mm.eg.db")
}
# Specify our desired options
option_list <- list(
make_option(c("-a1", "--alignment1"), action="store", type='character', help = "File path to Replicate 1"),
make_option(c("-a2", "--alignment2"), action="store", type='character', help = "File path to Replicate 2"),
make_option(c("-g", "--genome"), action="store", type='character', help = "The UCSC genome to use"),
make_option(c("-o", "--out"), action="store", type='character', help = "The output directory")
)
# Parse arguments
args = parse_args(OptionParser(option_list=option_list))
# Set the working directory to output directory
setwd(args$out)
# Set mc.cores to 1
options(mc.cores=getCores(1))
# Load alignment files
alignment_1 <- as(readGAlignments(args$alignment1), "GRanges")
alignments <- sort(c(alignment_1))
# Load UCSC Genes
if(args$genome=='hg19') {
gencode <- file.path("/project/GCRB/shared/Gencode_human/release_19/gencode.v19.annotation.gtf")
kgdb <- makeTranscriptDbFromGFF(gencode, format="gtf")
org <- org.Hs.eg.db
} else if(args$genome=='mm10') {
kgdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
org <- org.Mm.eg.db
}
kgtx <- transcripts(kgdb,columns=c("GENEID", "TXID", "TXNAME"))
# Collapse overlapping annotations
kgConsensus <- makeConsensusAnnotations(kgtx, keytype="GENEID",
mc.cores=getOption("mc.cores"))
# Tune HMM
tune <- data.frame(LtProbB=c(rep(-100,7), rep(-150,7), rep(-200,7), rep(-250,7), rep(-300,7) ),
UTS=rep(c(5,10,15,20,25,30,35), 5))
evals <- mclapply(seq_len(35), function(x) {
hmm <- detectTranscripts(reads=alignments, LtProbB=tune$LtProbB[x], UTS=tune$UTS[x])
e <- evaluateHMMInAnnotations(hmm$transcripts, kgConsensus)
e$eval
}, mc.cores=getOption("mc.cores"), mc.silent=TRUE)
tune <- cbind(tune, do.call(rbind, evals))
write.table(tune,file="tune.tsv", quote=F, sep="\t", row.names=F, col.names=T)
#!/bin/bash
# tune-hmm.sh
script_name="tune-hmm.sh"
script_ver="1.0.0"
#Help function
usage() {
echo "-h Help documentation for $script_name"
echo "-f --File path to Replicate 1 alignments"
echo "-r --UCSC Reference genome (e.g. hg19, mm10)"
echo "-o --Path to output directory"
echo "-v --Version of script"
echo "Example: $script_name -f 'foo_1.bam' -r 'hg19' -o '/path/to/output/dir/'"
exit 1
}
# Version function
version(){
echo "$script_name $script_ver"
exit 1
}
main(){
# Load required modules
module load python/2.7.x-anaconda
module load R/3.1.0-intel
# Parsing options
OPTIND=1 # Reset OPTIND
while getopts :f:r:o:vh opt
do
case $opt in
f) aln1=$OPTARG;;
r) ucsc_reference=$OPTARG;;
o) out=$OPTARG;;
v) version;;
h) usage;;
esac
done
shift $(($OPTIND -1))
# Check for mandatory options
if [[ -z $aln1 ]] || [[ -z $ucsc_reference ]] || [[ -z $out ]]; then
usage
fi
# Define the out directory
out_dir=$out\/$script_name-$script_ver
# Make sure directories exist
if [ ! -e $out ]; then
mkdir $out
fi
if [ ! -e $out_dir ]; then
mkdir $out_dir
fi
# Run call-transcripts.R
if [ ! -e $out_dir/metadata.json ]; then
Rscript tune-hmm.R --alignment1 $aln1 --genome $ucsc_reference --out $out_dir
# Get input and output files and then print out metadata.json file
input_files=("$aln1")
printf -v input "\"%s\"," "${input_files[@]}"
input=${input%,}
output_file=($out_dir\/$prefix*)
printf -v output "\"%s\"," "${output_file[@]}"
output=${output%,}
printf '{"script name":"%s","script version":"%s", "input files": [%s], "output files": [%s]}' "$script_name" "$script_ver" "$input" "$output" | python -m json.tool > $out_dir/metadata.json
else
aln1_fn=$(basename "$aln1")
echo "* HMM tune has been made from $aln1_fn. "
fi
}
main "$@"
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment