nc_overlap.sh 1.36 KB
Newer Older
1 2 3 4
# Filter for Bound vs unbound snoRNA that overlap with genes

## Filter for unbound snoRNA
bedtools intersect -a mcf-7-basal-snRNA_all.bed -b  mcf-7-basal-snRNA_bound.bed -v > mcf-7-basal-snRNA_unbound.bed
5
bedtools intersect -a mcf-7-basal-snoRNA_all.bed -b  mcf-7-basal-snoRNA_bound.bed -v > mcf-7-basal-snoRNA_unbound.bed
6 7 8

# Intersect to find parent gene

9 10 11 12 13
bedtools intersect -wa -a gencode.v19.annotation_protein_coding.gtf -b  mcf-7-basal-snoRNA_bound.bed > mcf-7-basal-snoRNA_bound_overlap.gtf
grep transcript mcf-7-basal-snRNA_bound_overlap.gtf | cut -f9  | cut -f1,2,5,8 -d ";" | cut -f2,4,6,8 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' | grep ENST | sort | cut -f1 -d ',' | cut -f1 -d '.' | uniq >  mcf-7-basal-snoRNA_bound_overlap_host.txt

bedtools intersect -wa -a gencode.v19.annotation_protein_coding.gtf -b  mcf-7-basal-snoRNA_unbound.bed > mcf-7-basal-snoRNA_unbound_overlap.gtf
grep transcript mcf-7-basal-snRNA_unbound_overlap.gtf | cut -f9  | cut -f1,2,5,8 -d ";" | cut -f2,4,6,8 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' | grep ENST | sort | cut -f1 -d ',' | cut -f1 -d '.' | uniq >  mcf-7-basal-snoRNA_unbound_overlap_host.txt
14

15 16 17
# basal genes
cut -f6 mcf-7-basal-snoRNA_unbound.bed| cut -f1 -d '.' | uniq >  mcf-7-basal-snoRNA_unbound.txt
cut -f6 mcf-7-basal-snoRNA_bound.bed| cut -f1 -d '.' | uniq >  mcf-7-basal-snoRNA_bound.txt