# Make universe of enhancer transcripts bedops --everything /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D10/trimmed_analysis/call-transcripts.sh-4.0.0/final-transcripts.bed \ /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D2/trimmed_analysis/call-transcripts.sh-4.0.0/final-transcripts.bed \ /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D0/trimmed_analysis/call-transcripts.sh-4.0.0/final-transcripts.bed \ /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D7/trimmed_analysis/call-transcripts.sh-4.0.0/final-transcripts.bed \ /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/ES_D5/trimmed_analysis/call-transcripts.sh-4.0.0/final-transcripts.bed \ > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts.bed # Define short transcripts ./define_small_transcripts.py -t /Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts.bed # Seperate into + and - strand grep '+' /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/short_transcripts.bed > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts_plus.bed grep '-' /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/short_transcripts.bed > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts_minus.bed # get transcripts that overlap at least 50% in both transcripts bedmap --echo-map --fraction-both 0.5 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts_plus.bed \ | awk '(split($0, a, ";") > 1)' - \ | sed 's/\;/\n/g' - \ | sort-bed - \ | uniq - \ > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/overlap_transcripts_plus.bed bedmap --echo-map --fraction-both 0.5 /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts_minus.bed \ | awk '(split($0, a, ";") > 1)' - \ | sed 's/\;/\n/g' - \ | sort-bed - \ | uniq - \ > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/overlap_transcripts_minus.bed # get transcripts that are not represented in overlap bedtools intersect -v -wa -f 1.0 -r -a /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts_plus.bed -b /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/overlap_transcripts_plus.bed > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/unique_transcripts_plus.bed bedtools intersect -v -wa -f 1.0 -r -a /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts_minus.bed -b /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/overlap_transcripts_minus.bed > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/unique_transcripts_minus.bed # Merge transcripts bedtools merge -s -d -1 -i /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/overlap_transcripts_plus.bed -c 4,5,6 -o distinct,mean,distinct \ | cut -f1,2,3,5,6,7 > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_overlap_transcripts_plus.bed bedtools merge -s -d -1 -i /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/overlap_transcripts_minus.bed -c 4,5,6 -o distinct,mean,distinct \ | cut -f1,2,3,5,6,7 > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_overlap_transcripts_minus.bed bedops --everything /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_overlap_transcripts_plus.bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_overlap_transcripts_minus.bed \ /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/unique_transcripts_plus.bed /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/unique_transcripts_minus.bed > /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_universe-transcripts.bed # Remove extra files rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts_plus.bed rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_final-transcripts_minus.bed rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/overlap_transcripts_plus.bed rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/overlap_transcripts_minus.bed rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/unique_transcripts_plus.bed rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/unique_transcripts_minus.bed rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_overlap_transcripts_plus.bed rm /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_overlap_transcripts_minus.bed #Identifying intergenic/genic transcripts bedtools intersect -a /Volumes/project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/GRO-seq/universe_transcripts/merge_universe-transcripts.bed -b excluded_3kb_flanking.bed -v > universe_enhancer_transcripts.bed ./define_short_transcripts.py -i universe_enhancer_transcripts.bed # Get RPKM ./rpkm_gro.py --peaks short_short_paired.bed --experiments gro-seq_list.csv -f SSP --minimum 0.5 ./rpkm_gro.py --peaks short_unpaired.bed --experiments gro-seq_list.csv -f SUNP --minimum 1 ./cutoff_analysis_gro.py --rpkm SSP_sum.tsv --color '#951B22' --factor SSP -l 0.5 ./cutoff_analysis_gro.py --rpkm SUNP_sum.tsv --color '#951B22' --factor SUNP -l 1 # Filter SSP Paired and merge cut -f4 SSP_filtered_peaks.bed | sort | uniq > t for i in $(cat t); do grep $i SSP_filtered_peaks.bed > tt; sort-bed tt > ttt ; bedtools merge -c 4 -o distinct -i ttt >> tttt; grep -v ',' tttt | sort-bed - > SSP_filtered_peaks_merged.bed; done # Acutal Processing python gro-seq_enhancer_plots.py bedtools sort -i ES_D0_gro-seq_enhancers.bed > ES_D0_gro-seq_enhancers_1kb.bed bedtools sort -i ES_D2_gro-seq_enhancers.bed > ES_D2_gro-seq_enhancers_1kb.bed bedtools sort -i ES_D5_gro-seq_enhancers.bed > ES_D5_gro-seq_enhancers_1kb.bed bedtools sort -i ES_D7_gro-seq_enhancers.bed > ES_D7_gro-seq_enhancers_1kb.bed bedtools sort -i ES_D10_gro-seq_enhancers.bed > ES_D10_gro-seq_enhancers_1kb.bed # Meme processing #!/bin/bash #SBATCH --job-name=fasta_processing #SBATCH --partition=super #SBATCH --nodes=2 #SBATCH --ntasks=64 #SBATCH --time=0-36:00:00 #SBATCH --output=fasta_processing.%j.out #SBATCH --error=fasta_processing.%j.err #SBATCH --mail-user=venkat.malladi@utsouthwestern.edu #SBATCH --mail-type=ALL # Fasta file processing module load bedtools bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D0_gro-seq_enhancers_1kb.bed -fo ES_D0_gro-seq_enhancers.fasta -name bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D2_gro-seq_enhancers_1kb.bed -fo ES_D2_gro-seq_enhancers.fasta -name bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D5_gro-seq_enhancers_1kb.bed -fo ES_D5_gro-seq_enhancers.fasta -name bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D7_gro-seq_enhancers_1kb.bed -fo ES_D7_gro-seq_enhancers.fasta -name bedtools getfasta -fi /project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa -bed ES_D10_gro-seq_enhancers_1kb.bed -fo ES_D10_gro-seq_enhancers.fasta -name #!/bin/bash #SBATCH --job-name=meme_test_gro #SBATCH --output=meme_test.%j.out #SBATCH --error=meme_test_gro.%j.err #SBATCH --mail-user=venkat.malladi@utsouthwestern.edu # Use super partition #SBATCH --partition=super # Use 2 nodes #SBATCH -N 2 # Total of 96 tasks #SBATCH -n 96 #SBATCH -t 1:0:0 module add meme/4.11.1-gcc-openmpi meme --p 96 -dna -mod zoops -nmotifs 15 -minw 8 -maxw 15 -maxsize 20000000 -revcomp meme-chip -meme-p 96 -dna -meme-mod zoops -meme-nmotifs 15 -meme-minw 8 -meme-maxw 15 -meme-maxsize 20000000 -db /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016.meme ES_D0_gro-seq_enhancers.fasta -o MEME_op_ES_D0_gro-seq_enhancers_1kb_zoops meme-chip -meme-p 96 -dna -meme-mod zoops -meme-nmotifs 15 -meme-minw 8 -meme-maxw 15 -meme-maxsize 20000000 -db /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016.meme ES_D2_gro-seq_enhancers.fasta -o MEME_op_ES_D2_gro-seq_enhancers_1kb_zoops meme-chip -meme-p 96 -dna -meme-mod zoops -meme-nmotifs 15 -meme-minw 8 -meme-maxw 15 -meme-maxsize 20000000 -db /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016.meme ES_D5_gro-seq_enhancers.fasta -o MEME_op_ES_D5_gro-seq_enhancers_1kb_zoops meme-chip -meme-p 96 -dna -meme-mod zoops -meme-nmotifs 15 -meme-minw 8 -meme-maxw 15 -meme-maxsize 20000000 -db /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016.meme ES_D7_gro-seq_enhancers.fasta -o MEME_op_ES_D7_gro-seq_enhancers_1kb_zoops meme-chip -meme-p 96 -dna -meme-mod zoops -meme-nmotifs 15 -meme-minw 8 -meme-maxw 15 -meme-maxsize 20000000 -db /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016.meme ES_D10_gro-seq_enhancers.fasta -o MEME_op_ES_D10_gro-seq_enhancers_1kb_zoops meme -p 96 -dna -mod zoops -nmotifs 15 -minw 8 -maxw 15 -maxsize 20000000 -revcomp ES_D0_gro-seq_enhancers.fasta -o MEME_op_ES_D0_gro-seq_enhancers_1kb_zoops meme -p 96 -dna -mod zoops -nmotifs 15 -minw 8 -maxw 15 -maxsize 20000000 -revcomp ES_D2_gro-seq_enhancers.fasta -o MEME_op_ES_D2_gro-seq_enhancers_1kb_zoops meme -p 96 -dna -mod zoops -nmotifs 15 -minw 8 -maxw 15 -maxsize 20000000 -revcomp ES_D5_gro-seq_enhancers.fasta -o MEME_op_ES_D5_gro-seq_enhancers_1kb_zoops meme -p 96 -dna -mod zoops -nmotifs 15 -minw 8 -maxw 15 -maxsize 20000000 -revcomp ES_D7_gro-seq_enhancers.fasta -o MEME_op_ES_D7_gro-seq_enhancers_1kb_zoops meme -p 96 -dna -mod zoops -nmotifs 15 -minw 8 -maxw 15 -maxsize 20000000 -revcomp ES_D10_gro-seq_enhancers.fasta -o MEME_op_ES_D10_gro-seq_enhancers_1kb_zoops #!/bin/bash #SBATCH --job-name=tomtom_test_gro #SBATCH --output=tomtom_test_gro.%j.out #SBATCH --error=tomtom_test_gro.%j.err #SBATCH --mail-user=venkat.malladi@utsouthwestern.edu # Use super partition #SBATCH --partition=super # Use 2 nodes #SBATCH -N 2 module add meme/4.11.1-gcc-openmpi tomtom -evalue -thresh 10 -o tomtom_op_ES_D0_gro-seq_enhancers_1kb /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/Motif/MEME_op_ES_D0_gro-seq_enhancers_1kb_zoops/meme.txt /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016_vertebrates.meme tomtom -evalue -thresh 10 -o tomtom_op_ES_D10_gro-seq_enhancers_1kb /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/Motif/MEME_op_ES_D10_gro-seq_enhancers_1kb_zoops/meme.txt /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016_vertebrates.meme tomtom -evalue -thresh 10 -o tomtom_op_ES_D2_gro-seq_enhancers_1kb /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/Motif/MEME_op_ES_D2_gro-seq_enhancers_1kb_zoops/meme.txt /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016_vertebrates.meme tomtom -evalue -thresh 10 -o tomtom_op_ES_D5_gro-seq_enhancers_1kb /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/Motif/MEME_op_ES_D5_gro-seq_enhancers_1kb_zoops/meme.txt /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016_vertebrates.meme tomtom -evalue -thresh 10 -o tomtom_op_ES_D7_gro-seq_enhancers_1kb /project/GCRB/Lee_Lab/s163035/Matrix_analysis_PMIT_25842977/Motif/MEME_op_ES_D7_gro-seq_enhancers_1kb_zoops/meme.txt /project/GCRB/Lee_Lab/shared/For_CBs/databases/meme_motif_databases/JASPAR/JASPAR_CORE_2016_vertebrates.meme