Commit 797eef45 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update parsing annotations.sh.

parent 75f33e04
# Parse out lncRNA and Peptides
awk '{if($14=="\"protein_coding\";"){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_protein_coding.gtf
awk '{if($14=="\"3prime_overlapping_ncrna\";" || $14=="\"antisense\";" || $14=="\"lincRNA\";" || $14=="\"processed_transcript\";" || $14=="\"sense_intronic\";" || $14=="\"sense_overlapping\";" ){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_lncRNA.gtf
awk '{if($14=="\"snRNA\";"){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_snRNA.gtf
awk '{if($14=="\"snoRNA\";"){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_snoRNA.gtf
awk '{if($20=="\"protein_coding\";"){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_protein_coding.gtf
awk '{if($20=="\"3prime_overlapping_ncrna\";" || $20=="\"antisense\";" || $20=="\"lincRNA\";" || $20=="\"processed_transcript\";" || $20=="\"sense_intronic\";" || $20=="\"sense_overlapping\";" ){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_lncRNA.gtf
awk '{if($20=="\"snRNA\";"){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_snRNA.gtf
awk '{if($20=="\"snoRNA\";"){print $0}}' gencode.v19.annotation.gtf > gencode.v19.annotation_snoRNA.gtf
# Make a index to map gene_id to gene_names
cut -f9 gencode.v19.annotation_protein_coding.gtf | cut -f1,5 -d ";" | cut -f2,4 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > gencode.v19.annotation_protein_coding_mapping.txt
cut -f9 gencode.v19.annotation_lncRNA.gtf | cut -f1,5 -d ";" | cut -f2,4 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > gencode.v19.annotation_lncRNA_mapping.txt
cut -f9 gencode.v19.annotation_snRNA.gtf | cut -f1,5 -d ";" | cut -f2,4 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > gencode.v19.annotation_snRNA_mapping.txt
cut -f9 gencode.v19.annotation_snoRNA.gtf | cut -f1,5 -d ";" | cut -f2,4 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > gencode.v19.annotation_snoRNA_mapping.txt
cut -f9 gencode.v19.annotation_protein_coding.gtf | cut -f1,2,5 -d ";" | cut -f2,4,6 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > gencode.v19.annotation_protein_coding_mapping.txt
cut -f9 gencode.v19.annotation_lncRNA.gtf | cut -f1,2,5 -d ";" | cut -f2,4,6 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > gencode.v19.annotation_lncRNA_mapping.txt
cut -f9 gencode.v19.annotation_snRNA.gtf | cut -f1,2,5 -d ";" | cut -f2,4,6 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > gencode.v19.annotation_snRNA_mapping.txt
cut -f9 gencode.v19.annotation_snoRNA.gtf | cut -f1,2,5 -d ";" | cut -f2,4,6 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > gencode.v19.annotation_snoRNA_mapping.txt
cut -f9 .v19.annotation.gtf | cut -f1,5 -d ";" | cut -f2,4 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > mapping.txt
cut -f9 gencode.v19.annotation.gtf | cut -f1,2,5 -d ";" | cut -f2,4,6 -d " " | sed 's/"//g' | sed 's/;//g' | sort -u | sed 's/ /,/g' > mapping.txt
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment