Commit 6667781d authored by Venkat Malladi's avatar Venkat Malladi

Updatd analysis of RIP-seq data.

parent 4e4e7930
[submodule "RSEM"]
path = RSEM
url = https://github.com/deweylab/RSEM
experiment,file1,file2
ALK,ALK_RNA1_ACAGTG_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results,ALK_RNA2_GATCAG_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
ALKT,ALKT_RNA1_GCCAAT_L007_R1.rDNA.filtered.fastq.gz_filtered.genes.results,ALKT_RNA2_TAGCTT_L007_R1.rDNA.filtered.fastq.gz_filtered.genes.results
experiment,file1,file2
MLK,MLK_RNA1_AGTCAA_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results,MLK_RNA2_GTCCGC_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
MLKE,MLKE_RNA1_AGTTCC_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results,MLKE_RNA2_GTGAAA_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
Subproject commit 5c360eff9053a33fa0b086f8ca49db8fc40e5525
No preview for this file type
#!/bin/bash
#SBATCH --job-name=rsem_diff
#SBATCH --partition=super
#SBATCH --nodes=1
#SBATCH --time=0-24:00:00
#SBATCH --output=rsem_diff.%j.out
#SBATCH --error=rsem_diff.%j.err
#SBATCH --mail-user=venkat.malladi@utsouthwestern.edu
#SBATCH --mail-type=ALL
module load iGenomes/2013-03-25
module load RSEM/1.2.31
rsem-run-ebseq /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/gene_matrix/ALK.txt 2,2 /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/diff_genes/ALK.results
rsem-run-ebseq /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/gene_matrix/ALKT.txt 2,2 /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/diff_genes/ALKT.results
rsem-run-ebseq /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/gene_matrix/MLK.txt 2,2 /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/diff_genes/MLK.results
rsem-run-ebseq /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/gene_matrix/MLKEtxt 2,2 /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/diff_genes/MLKE.results
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env python
# -*- coding: latin-1 -*-
'''Take an gt average FPKM from list of RSEM output'''
EPILOG = '''
For more details:
%(prog)s --help
'''
import numpy as np
import pandas as pd
import argparse
import csv
def get_args():
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument('-e','--experiments',
help="Comma separated file of experiment name followed by file location.",
required = True)
parser.add_argument('-f','--factor',
help="Factor that is being analyzed.",
required = True)
args = parser.parse_args()
return args
def main():
args = get_args()
experiment_dict = csv.DictReader(open(args.experiments))
# Loop through all the files
fpkm_columns = []
fpkm_all = pd.DataFrame()
fpkm_err = pd.DataFrame()
for exp in experiment_dict:
experiment = exp['experiment']
fpkm_columns.append(experiment)
rep1 = pd.read_csv(exp['file1'], sep='\t')
rep2 = pd.read_csv(exp['file2'], sep='\t')
# Make mean of replicate 1 and 2
input_columns = ['FPKM']
rep1_index = rep1.gene_id.values
rep2_index = rep2.gene_id.values
tmp = pd.DataFrame(rep1, columns=input_columns)
rep1_tmp = tmp.set_index(rep1_index)
rep1_tmp.columns = ['Rep1_FPKM']
tmp = pd.DataFrame(rep2, columns=input_columns)
rep2_tmp = tmp.set_index(rep2_index)
rep2_tmp.columns = ['Rep2_FPKM']
result = pd.concat([rep1_tmp, rep2_tmp],axis=1)
fpkm_mean = result.mean(axis=1)
fpkm_mean.columns= [experiment]
fpkm_std = result.std(axis=1)
fpkm_std.columns= [experiment]
# Add values to update
if fpkm_all.empty:
fpkm_all = fpkm_mean
else:
fpkm_tmp = pd.concat([fpkm_all, fpkm_mean],axis=1)
fpkm_all = fpkm_tmp
if fpkm_err.empty:
fpkm_err = fpkm_std
else:
fpkm_tmp = pd.concat([fpkm_err, fpkm_std],axis=1)
fpkm_err = fpkm_tmp
fpkm_all.columns = fpkm_columns
fpkm_err.columns = fpkm_columns
fpkm_all.to_csv(args.factor + ".csv", index_label="gene_id")
fpkm_err.to_csv(args.factor + "_err.csv", index_label="gene_id")
if __name__ == '__main__':
main()
#RIP-seq analysis
#!/bin/bash
#SBATCH --job-name=rsem_matrix
#SBATCH --partition=super
#SBATCH --nodes=1
#SBATCH --time=0-24:00:00
#SBATCH --output=rsem_matrix.%j.out
#SBATCH --error=rsem_matrix.%j.err
#SBATCH --mail-user=venkat.malladi@utsouthwestern.edu
#SBATCH --mail-type=ALL
module load iGenomes/2013-03-25
module load RSEM/1.2.31
rsem-generate-data-matrix /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALK_RIP1_PI/gene-counts.sh-1.0.0/ALK_RIP1_PI_AGTCAA_L006_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALK_RIP2_PI/gene-counts.sh-1.0.0/ALK_RIP2_PI_GTTTCG_L007_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALK_RIP1_PARP1/gene-counts.sh-1.0.0/ALK_RIP1_PARP1_AGTTCC_L006_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALK_RIP2_PARP1/gene-counts.sh-1.0.0/ALK_RIP2_PARP1_CGTACG_L006_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
> /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/gene_matrix/ALK.txt
rsem-generate-data-matrix /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALKT_RIP1_PI/gene-counts.sh-1.0.0/ALKT_RIP1_PI_ATGTCA_L003_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALKT_RIP2_PI/gene-counts.sh-1.0.0/ALKT_RIP2_PI_GAGTGG_L003_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALKT_RIP1_PARP1/gene-counts.sh-1.0.0/ALKT_RIP1_PARP1_CCGTCC_L006_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALKT_RIP2_PARP1/gene-counts.sh-1.0.0/ALKT_RIP2_PARP1_GGTAGC_L006_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
> /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/gene_matrix/ALKT.txt
rsem-generate-data-matrix /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLK_RIP1_PI/gene-counts.sh-1.0.0/MLK_RIP1_PI_ACAGTG_L006_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLK_RIP2_PI/gene-counts.sh-1.0.0/MLK_RIP2_PI_GTTTCG_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLK_RIP1_PARP1/gene-counts.sh-1.0.0/MLK_RIP1_PARP1_GCCAAT_L005_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLK_RIP2_PARP1/gene-counts.sh-1.0.0/MLK_RIP2_PARP1_CGTACG_L005_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
> /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/gene_matrix/MLK.txt
rsem-generate-data-matrix /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLKE_RIP1_PI/gene-counts.sh-1.0.0/MLKE_RIP1_PI_CAGATC_L006_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLKE_RIP2_PI/gene-counts.sh-1.0.0/MLKE_RIP2_PI_GAGTGG_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLKE_RIP1_PARP1_SecondRun/gene-counts.sh-1.0.0/MLKE_RIP1_PARP1_ACTTGA_L004_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RIPseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLKE_RIP2_PARP1_16/gene-counts.sh-1.0.0/MLKE_RIP2_PARP1_16_CCGTCC_L007_R1.rDNA.filtered.fastq.gz_filtered.genes.results \
> /project/GCRB/Lee_Lab/s163035/DK_RIP-seq/gene_matrix/MLKE.txt
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/Volumes/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RNAseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALK_RNA1/gene-counts.sh-1.0.0/ALK_RNA1_ACAGTG_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
/Volumes/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RNAseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALK_RNA2/gene-counts.sh-1.0.0/ALK_RNA2_GATCAG_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
/Volumes/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RNAseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALKT_RNA1/gene-counts.sh-1.0.0/ALKT_RNA1_GCCAAT_L007_R1.rDNA.filtered.fastq.gz_filtered.genes.results
/Volumes/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RNAseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_ALKT_RNA2/gene-counts.sh-1.0.0/ALKT_RNA2_TAGCTT_L007_R1.rDNA.filtered.fastq.gz_filtered.genes.results
/Volumes/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RNAseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLK_RNA1/gene-counts.sh-1.0.0/MLK_RNA1_AGTCAA_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
/Volumes/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RNAseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLK_RNA2/gene-counts.sh-1.0.0/MLK_RNA2_GTCCGC_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
/Volumes/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RNAseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLKE_RNA1/gene-counts.sh-1.0.0/MLKE_RNA1_AGTTCC_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
/Volumes/project/GCRB/Lee_Lab/s163035/DK_RIP-seq/PARP1_RNAseq_in_AC16_MCF7_withTNFa_E2/raw/Sample_MLKE_RNA2/gene-counts.sh-1.0.0/MLKE_RNA2_GTGAAA_L008_R1.rDNA.filtered.fastq.gz_filtered.genes.results
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment