diff --git a/miscscripts b/miscscripts new file mode 160000 index 0000000000000000000000000000000000000000..cdb3b452854a8862d5d071c1275fc00094c95a1f --- /dev/null +++ b/miscscripts @@ -0,0 +1 @@ +Subproject commit cdb3b452854a8862d5d071c1275fc00094c95a1f diff --git a/obsolete/get_clinicaltrials.sh b/obsolete/get_clinicaltrials.sh new file mode 100755 index 0000000000000000000000000000000000000000..138b84b9408e624efe3902a9425ba155fc0f7c74 --- /dev/null +++ b/obsolete/get_clinicaltrials.sh @@ -0,0 +1,205 @@ +curl https://clinicaltrials.gov/ct2/show/NCT02674568?displayxml=true > NCT02674568.xml +curl https://clinicaltrials.gov/ct2/show/NCT01926197?displayxml=true > NCT01926197.xml +curl https://clinicaltrials.gov/ct2/show/NCT02176967?displayxml=true > NCT02176967.xml +curl https://clinicaltrials.gov/ct2/show/NCT02879643?displayxml=true > NCT02879643.xml +curl https://clinicaltrials.gov/ct2/show/NCT02601937?displayxml=true > NCT02601937.xml +curl https://clinicaltrials.gov/ct2/show/NCT01349881?displayxml=true > NCT01349881.xml +curl https://clinicaltrials.gov/ct2/show/NCT03072238?displayxml=true > NCT03072238.xml +curl https://clinicaltrials.gov/ct2/show/NCT02477878?displayxml=true > NCT02477878.xml +curl https://clinicaltrials.gov/ct2/show/NCT02411448?displayxml=true > NCT02411448.xml +curl https://clinicaltrials.gov/ct2/show/NCT01744223?displayxml=true > NCT01744223.xml +curl https://clinicaltrials.gov/ct2/show/NCT02554812?displayxml=true > NCT02554812.xml +curl https://clinicaltrials.gov/ct2/show/NCT02677922?displayxml=true > NCT02677922.xml +curl https://clinicaltrials.gov/ct2/show/NCT02445391?displayxml=true > NCT02445391.xml +curl https://clinicaltrials.gov/ct2/show/NCT02357836?displayxml=true > NCT02357836.xml +curl https://clinicaltrials.gov/ct2/show/NCT02289144?displayxml=true > NCT02289144.xml +curl https://clinicaltrials.gov/ct2/show/NCT02201992?displayxml=true > NCT02201992.xml +curl https://clinicaltrials.gov/ct2/show/NCT02659020?displayxml=true > NCT02659020.xml +curl https://clinicaltrials.gov/ct2/show/NCT03038100?displayxml=true > NCT03038100.xml +curl https://clinicaltrials.gov/ct2/show/NCT02152982?displayxml=true > NCT02152982.xml +curl https://clinicaltrials.gov/ct2/show/NCT01818986?displayxml=true > NCT01818986.xml +curl https://clinicaltrials.gov/ct2/show/NCT02065869?displayxml=true > NCT02065869.xml +curl https://clinicaltrials.gov/ct2/show/NCT02465060?displayxml=true > NCT02465060.xml +curl https://clinicaltrials.gov/ct2/show/NCT02154490?displayxml=true > NCT02154490.xml +curl https://clinicaltrials.gov/ct2/show/NCT01101451?displayxml=true > NCT01101451.xml +curl https://clinicaltrials.gov/ct2/show/NCT02562755?displayxml=true > NCT02562755.xml +curl https://clinicaltrials.gov/ct2/show/NCT02524119?displayxml=true > NCT02524119.xml +curl https://clinicaltrials.gov/ct2/show/NCT01674140?displayxml=true > NCT01674140.xml +curl https://clinicaltrials.gov/ct2/show/NCT02163356?displayxml=true > NCT02163356.xml +curl https://clinicaltrials.gov/ct2/show/NCT00980954?displayxml=true > NCT00980954.xml +curl https://clinicaltrials.gov/ct2/show/NCT02677116?displayxml=true > NCT02677116.xml +curl https://clinicaltrials.gov/ct2/show/NCT02424617?displayxml=true > NCT02424617.xml +curl https://clinicaltrials.gov/ct2/show/NCT02922777?displayxml=true > NCT02922777.xml +curl https://clinicaltrials.gov/ct2/show/NCT02064673?displayxml=true > NCT02064673.xml +curl https://clinicaltrials.gov/ct2/show/NCT02407054?displayxml=true > NCT02407054.xml +curl https://clinicaltrials.gov/ct2/show/NCT02514031?displayxml=true > NCT02514031.xml +curl https://clinicaltrials.gov/ct2/show/NCT02477826?displayxml=true > NCT02477826.xml +curl https://clinicaltrials.gov/ct2/show/NCT02637687?displayxml=true > NCT02637687.xml +curl https://clinicaltrials.gov/ct2/show/NCT01878617?displayxml=true > NCT01878617.xml +curl https://clinicaltrials.gov/ct2/show/NCT02667587?displayxml=true > NCT02667587.xml +curl https://clinicaltrials.gov/ct2/show/NCT02811783?displayxml=true > NCT02811783.xml +curl https://clinicaltrials.gov/ct2/show/NCT01602666?displayxml=true > NCT01602666.xml +curl https://clinicaltrials.gov/ct2/show/NCT02557854?displayxml=true > NCT02557854.xml +curl https://clinicaltrials.gov/ct2/show/NCT02667873?displayxml=true > NCT02667873.xml +curl https://clinicaltrials.gov/ct2/show/NCT02466971?displayxml=true > NCT02466971.xml +curl https://clinicaltrials.gov/ct2/show/NCT02516423?displayxml=true > NCT02516423.xml +curl https://clinicaltrials.gov/ct2/show/NCT02193282?displayxml=true > NCT02193282.xml +curl https://clinicaltrials.gov/ct2/show/NCT02401542?displayxml=true > NCT02401542.xml +curl https://clinicaltrials.gov/ct2/show/NCT02773849?displayxml=true > NCT02773849.xml +curl https://clinicaltrials.gov/ct2/show/NCT02834013?displayxml=true > NCT02834013.xml +curl https://clinicaltrials.gov/ct2/show/NCT02617589?displayxml=true > NCT02617589.xml +curl https://clinicaltrials.gov/ct2/show/NCT02513667?displayxml=true > NCT02513667.xml +curl https://clinicaltrials.gov/ct2/show/NCT02580448?displayxml=true > NCT02580448.xml +curl https://clinicaltrials.gov/ct2/show/NCT02688608?displayxml=true > NCT02688608.xml +curl https://clinicaltrials.gov/ct2/show/NCT00632853?displayxml=true > NCT00632853.xml +curl https://clinicaltrials.gov/ct2/show/NCT02781506?displayxml=true > NCT02781506.xml +curl https://clinicaltrials.gov/ct2/show/NCT02194738?displayxml=true > NCT02194738.xml +curl https://clinicaltrials.gov/ct2/show/NCT02902484?displayxml=true > NCT02902484.xml +curl https://clinicaltrials.gov/ct2/show/NCT02098343?displayxml=true > NCT02098343.xml +curl https://clinicaltrials.gov/ct2/show/NCT01962896?displayxml=true > NCT01962896.xml +curl https://clinicaltrials.gov/ct2/show/NCT02155920?displayxml=true > NCT02155920.xml +curl https://clinicaltrials.gov/ct2/show/NCT02562443?displayxml=true > NCT02562443.xml +curl https://clinicaltrials.gov/ct2/show/NCT02180867?displayxml=true > NCT02180867.xml +curl https://clinicaltrials.gov/ct2/show/NCT00887146?displayxml=true > NCT00887146.xml +curl https://clinicaltrials.gov/ct2/show/NCT02115282?displayxml=true > NCT02115282.xml +curl https://clinicaltrials.gov/ct2/show/NCT01503632?displayxml=true > NCT01503632.xml +curl https://clinicaltrials.gov/ct2/show/NCT01953588?displayxml=true > NCT01953588.xml +curl https://clinicaltrials.gov/ct2/show/NCT02003222?displayxml=true > NCT02003222.xml +curl https://clinicaltrials.gov/ct2/show/NCT03067610?displayxml=true > NCT03067610.xml +curl https://clinicaltrials.gov/ct2/show/NCT02492711?displayxml=true > NCT02492711.xml +curl https://clinicaltrials.gov/ct2/show/NCT00392327?displayxml=true > NCT00392327.xml +curl https://clinicaltrials.gov/ct2/show/NCT02228096?displayxml=true > NCT02228096.xml +curl https://clinicaltrials.gov/ct2/show/NCT03036488?displayxml=true > NCT03036488.xml +curl https://clinicaltrials.gov/ct2/show/NCT02240238?displayxml=true > NCT02240238.xml +curl https://clinicaltrials.gov/ct2/show/NCT00492778?displayxml=true > NCT00492778.xml +curl https://clinicaltrials.gov/ct2/show/NCT01979536?displayxml=true > NCT01979536.xml +curl https://clinicaltrials.gov/ct2/show/NCT02306161?displayxml=true > NCT02306161.xml +curl https://clinicaltrials.gov/ct2/show/NCT02101853?displayxml=true > NCT02101853.xml +curl https://clinicaltrials.gov/ct2/show/NCT02502266?displayxml=true > NCT02502266.xml +curl https://clinicaltrials.gov/ct2/show/NCT02883049?displayxml=true > NCT02883049.xml +curl https://clinicaltrials.gov/ct2/show/NCT02112916?displayxml=true > NCT02112916.xml +curl https://clinicaltrials.gov/ct2/show/NCT02595944?displayxml=true > NCT02595944.xml +curl https://clinicaltrials.gov/ct2/show/NCT02484443?displayxml=true > NCT02484443.xml +curl https://clinicaltrials.gov/ct2/show/NCT02166463?displayxml=true > NCT02166463.xml +curl https://clinicaltrials.gov/ct2/show/NCT02354586?displayxml=true > NCT02354586.xml +curl https://clinicaltrials.gov/ct2/show/NCT02285439?displayxml=true > NCT02285439.xml +curl https://clinicaltrials.gov/ct2/show/NCT02559778?displayxml=true > NCT02559778.xml +curl https://clinicaltrials.gov/ct2/show/NCT03065179?displayxml=true > NCT03065179.xml +curl https://clinicaltrials.gov/ct2/show/NCT01553071?displayxml=true > NCT01553071.xml +curl https://clinicaltrials.gov/ct2/show/NCT01042522?displayxml=true > NCT01042522.xml +curl https://clinicaltrials.gov/ct2/show/NCT02725268?displayxml=true > NCT02725268.xml +curl https://clinicaltrials.gov/ct2/show/NCT02655822?displayxml=true > NCT02655822.xml +curl https://clinicaltrials.gov/ct2/show/NCT02521493?displayxml=true > NCT02521493.xml +curl https://clinicaltrials.gov/ct2/show/NCT01614197?displayxml=true > NCT01614197.xml +curl https://clinicaltrials.gov/ct2/show/NCT02332668?displayxml=true > NCT02332668.xml +curl https://clinicaltrials.gov/ct2/show/NCT02576990?displayxml=true > NCT02576990.xml +curl https://clinicaltrials.gov/ct2/show/NCT02855125?displayxml=true > NCT02855125.xml +curl https://clinicaltrials.gov/ct2/show/NCT02450331?displayxml=true > NCT02450331.xml +curl https://clinicaltrials.gov/ct2/show/NCT02679144?displayxml=true > NCT02679144.xml +curl https://clinicaltrials.gov/ct2/show/NCT02135042?displayxml=true > NCT02135042.xml +curl https://clinicaltrials.gov/ct2/show/NCT02598661?displayxml=true > NCT02598661.xml +curl https://clinicaltrials.gov/ct2/show/NCT01803282?displayxml=true > NCT01803282.xml +curl https://clinicaltrials.gov/ct2/show/NCT02748135?displayxml=true > NCT02748135.xml +curl https://clinicaltrials.gov/ct2/show/NCT02013336?displayxml=true > NCT02013336.xml +curl https://clinicaltrials.gov/ct2/show/NCT02664961?displayxml=true > NCT02664961.xml +curl https://clinicaltrials.gov/ct2/show/NCT02303821?displayxml=true > NCT02303821.xml +curl https://clinicaltrials.gov/ct2/show/NCT02495415?displayxml=true > NCT02495415.xml +curl https://clinicaltrials.gov/ct2/show/NCT01659658?displayxml=true > NCT01659658.xml +curl https://clinicaltrials.gov/ct2/show/NCT01956669?displayxml=true > NCT01956669.xml +curl https://clinicaltrials.gov/ct2/show/NCT02339740?displayxml=true > NCT02339740.xml +curl https://clinicaltrials.gov/ct2/show/NCT00601003?displayxml=true > NCT00601003.xml +curl https://clinicaltrials.gov/ct2/show/NCT02178722?displayxml=true > NCT02178722.xml +curl https://clinicaltrials.gov/ct2/show/NCT02092324?displayxml=true > NCT02092324.xml +curl https://clinicaltrials.gov/ct2/show/NCT02568267?displayxml=true > NCT02568267.xml +curl https://clinicaltrials.gov/ct2/show/NCT01595061?displayxml=true > NCT01595061.xml +curl https://clinicaltrials.gov/ct2/show/NCT02728258?displayxml=true > NCT02728258.xml +curl https://clinicaltrials.gov/ct2/show/NCT02632708?displayxml=true > NCT02632708.xml +curl https://clinicaltrials.gov/ct2/show/NCT03024996?displayxml=true > NCT03024996.xml +curl https://clinicaltrials.gov/ct2/show/NCT02596503?displayxml=true > NCT02596503.xml +curl https://clinicaltrials.gov/ct2/show/NCT02853305?displayxml=true > NCT02853305.xml +curl https://clinicaltrials.gov/ct2/show/NCT02853604?displayxml=true > NCT02853604.xml +curl https://clinicaltrials.gov/ct2/show/NCT02631876?displayxml=true > NCT02631876.xml +curl https://clinicaltrials.gov/ct2/show/NCT02927769?displayxml=true > NCT02927769.xml +curl https://clinicaltrials.gov/ct2/show/NCT03029585?displayxml=true > NCT03029585.xml +curl https://clinicaltrials.gov/ct2/show/NCT00321685?displayxml=true > NCT00321685.xml +curl https://clinicaltrials.gov/ct2/show/NCT02851407?displayxml=true > NCT02851407.xml +curl https://clinicaltrials.gov/ct2/show/NCT02867592?displayxml=true > NCT02867592.xml +curl https://clinicaltrials.gov/ct2/show/NCT02684461?displayxml=true > NCT02684461.xml +curl https://clinicaltrials.gov/ct2/show/NCT02734537?displayxml=true > NCT02734537.xml +curl https://clinicaltrials.gov/ct2/show/NCT02398773?displayxml=true > NCT02398773.xml +curl https://clinicaltrials.gov/ct2/show/NCT03245151?displayxml=true > NCT03245151.xml +curl https://clinicaltrials.gov/ct2/show/NCT02706626?displayxml=true > NCT02706626.xml +curl https://clinicaltrials.gov/ct2/show/NCT02899195?displayxml=true > NCT02899195.xml +curl https://clinicaltrials.gov/ct2/show/NCT03155620?displayxml=true > NCT03155620.xml +curl https://clinicaltrials.gov/ct2/show/NCT02947347?displayxml=true > NCT02947347.xml +curl https://clinicaltrials.gov/ct2/show/NCT02989857?displayxml=true > NCT02989857.xml +curl https://clinicaltrials.gov/ct2/show/NCT03155997?displayxml=true > NCT03155997.xml +curl https://clinicaltrials.gov/ct2/show/NCT02715284?displayxml=true > NCT02715284.xml +curl https://clinicaltrials.gov/ct2/show/NCT02584478?displayxml=true > NCT02584478.xml +curl https://clinicaltrials.gov/ct2/show/NCT02729298?displayxml=true > NCT02729298.xml +curl https://clinicaltrials.gov/ct2/show/NCT03055013?displayxml=true > NCT03055013.xml +curl https://clinicaltrials.gov/ct2/show/NCT02703272?displayxml=true > NCT02703272.xml +curl https://clinicaltrials.gov/ct2/show/NCT03318497?displayxml=true > NCT03318497.xml +curl https://clinicaltrials.gov/ct2/show/NCT02043665?displayxml=true > NCT02043665.xml +curl https://clinicaltrials.gov/ct2/show/NCT02952534?displayxml=true > NCT02952534.xml +curl https://clinicaltrials.gov/ct2/show/NCT03065712?displayxml=true > NCT03065712.xml +curl https://clinicaltrials.gov/ct2/show/NCT03181126?displayxml=true > NCT03181126.xml +curl https://clinicaltrials.gov/ct2/show/NCT03151304?displayxml=true > NCT03151304.xml +curl https://clinicaltrials.gov/ct2/show/NCT03033511?displayxml=true > NCT03033511.xml +curl https://clinicaltrials.gov/ct2/show/NCT03077698?displayxml=true > NCT03077698.xml +curl https://clinicaltrials.gov/ct2/show/NCT02741570?displayxml=true > NCT02741570.xml +curl https://clinicaltrials.gov/ct2/show/NCT02828358?displayxml=true > NCT02828358.xml +curl https://clinicaltrials.gov/ct2/show/NCT03190915?displayxml=true > NCT03190915.xml +curl https://clinicaltrials.gov/ct2/show/NCT03303339?displayxml=true > NCT03303339.xml +curl https://clinicaltrials.gov/ct2/show/NCT03061812?displayxml=true > NCT03061812.xml +curl https://clinicaltrials.gov/ct2/show/NCT03268954?displayxml=true > NCT03268954.xml +curl https://clinicaltrials.gov/ct2/show/NCT02817113?displayxml=true > NCT02817113.xml +curl https://clinicaltrials.gov/ct2/show/NCT02595424?displayxml=true > NCT02595424.xml +curl https://clinicaltrials.gov/ct2/show/NCT03312114?displayxml=true > NCT03312114.xml +curl https://clinicaltrials.gov/ct2/show/NCT02793583?displayxml=true > NCT02793583.xml +curl https://clinicaltrials.gov/ct2/show/NCT03070886?displayxml=true > NCT03070886.xml +curl https://clinicaltrials.gov/ct2/show/NCT03091660?displayxml=true > NCT03091660.xml +curl https://clinicaltrials.gov/ct2/show/NCT03241550?displayxml=true > NCT03241550.xml +curl https://clinicaltrials.gov/ct2/show/NCT02567435?displayxml=true > NCT02567435.xml +curl https://clinicaltrials.gov/ct2/show/NCT03110562?displayxml=true > NCT03110562.xml +curl https://clinicaltrials.gov/ct2/show/NCT02913261?displayxml=true > NCT02913261.xml +curl https://clinicaltrials.gov/ct2/show/NCT03361748?displayxml=true > NCT03361748.xml +curl https://clinicaltrials.gov/ct2/show/NCT03117309?displayxml=true > NCT03117309.xml +curl https://clinicaltrials.gov/ct2/show/NCT03426865?displayxml=true > NCT03426865.xml +curl https://clinicaltrials.gov/ct2/show/NCT03137771?displayxml=true > NCT03137771.xml +curl https://clinicaltrials.gov/ct2/show/NCT03101566?displayxml=true > NCT03101566.xml +curl https://clinicaltrials.gov/ct2/show/NCT03164603?displayxml=true > NCT03164603.xml +curl https://clinicaltrials.gov/ct2/show/NCT03441360?displayxml=true > NCT03441360.xml +curl https://clinicaltrials.gov/ct2/show/NCT03138499?displayxml=true > NCT03138499.xml +curl https://clinicaltrials.gov/ct2/show/NCT03033576?displayxml=true > NCT03033576.xml +curl https://clinicaltrials.gov/ct2/show/NCT03343197?displayxml=true > NCT03343197.xml +curl https://clinicaltrials.gov/ct2/show/NCT02684058?displayxml=true > NCT02684058.xml +curl https://clinicaltrials.gov/ct2/show/NCT03095612?displayxml=true > NCT03095612.xml +curl https://clinicaltrials.gov/ct2/show/NCT02981628?displayxml=true > NCT02981628.xml +curl https://clinicaltrials.gov/ct2/show/NCT02203695?displayxml=true > NCT02203695.xml +curl https://clinicaltrials.gov/ct2/show/NCT03013998?displayxml=true > NCT03013998.xml +curl https://clinicaltrials.gov/ct2/show/NCT02724579?displayxml=true > NCT02724579.xml +curl https://clinicaltrials.gov/ct2/show/NCT03007147?displayxml=true > NCT03007147.xml +curl https://clinicaltrials.gov/ct2/show/NCT03365882?displayxml=true > NCT03365882.xml +curl https://clinicaltrials.gov/ct2/show/NCT02912559?displayxml=true > NCT02912559.xml +curl https://clinicaltrials.gov/ct2/show/NCT02717507?displayxml=true > NCT02717507.xml +curl https://clinicaltrials.gov/ct2/show/NCT02893930?displayxml=true > NCT02893930.xml +curl https://clinicaltrials.gov/ct2/show/NCT03298451?displayxml=true > NCT03298451.xml +curl https://clinicaltrials.gov/ct2/show/NCT03067181?displayxml=true > NCT03067181.xml +curl https://clinicaltrials.gov/ct2/show/NCT03141034?displayxml=true > NCT03141034.xml +curl https://clinicaltrials.gov/ct2/show/NCT03519997?displayxml=true > NCT03519997.xml +curl https://clinicaltrials.gov/ct2/show/NCT02924376?displayxml=true > NCT02924376.xml +curl https://clinicaltrials.gov/ct2/show/NCT03215511?displayxml=true > NCT03215511.xml +curl https://clinicaltrials.gov/ct2/show/NCT03289039?displayxml=true > NCT03289039.xml +curl https://clinicaltrials.gov/ct2/show/NCT02392429?displayxml=true > NCT02392429.xml +curl https://clinicaltrials.gov/ct2/show/NCT03142334?displayxml=true > NCT03142334.xml +curl https://clinicaltrials.gov/ct2/show/NCT03533582?displayxml=true > NCT03533582.xml +curl https://clinicaltrials.gov/ct2/show/NCT02892201?displayxml=true > NCT02892201.xml +curl https://clinicaltrials.gov/ct2/show/NCT03382561?displayxml=true > NCT03382561.xml +curl https://clinicaltrials.gov/ct2/show/NCT02498951?displayxml=true > NCT02498951.xml +curl https://clinicaltrials.gov/ct2/show/NCT03126916?displayxml=true > NCT03126916.xml +curl https://clinicaltrials.gov/ct2/show/NCT03336216?displayxml=true > NCT03336216.xml +curl https://clinicaltrials.gov/ct2/show/NCT02719574?displayxml=true > NCT02719574.xml +curl https://clinicaltrials.gov/ct2/show/NCT03306264?displayxml=true > NCT03306264.xml +curl https://clinicaltrials.gov/ct2/show/NCT02572453?displayxml=true > NCT02572453.xml +curl https://clinicaltrials.gov/ct2/show/NCT02549651?displayxml=true > NCT02549651.xml diff --git a/obsolete/parse_bamreadct.pl b/obsolete/parse_bamreadct.pl new file mode 100755 index 0000000000000000000000000000000000000000..fd99ced90d0d61ece43a800263c9c193af12cdb7 --- /dev/null +++ b/obsolete/parse_bamreadct.pl @@ -0,0 +1,32 @@ +#!/usr/bin/perl -w +#integrate_datasets.pl + +my $bamreadct = shift @ARGV; +open NRC, "<$bamreadct" or die $!; +open OUT, ">$bamreadct\.cttable.txt" or die $!; +while (my $line = <NRC>) { + chomp($line); + my ($chrom,$pos,$ref,$depth,@reads) = split(/\t/,$line); + next unless ($depth > 10); + $chrom = 'chr'.$chrom if ($chrom !~ m/^chr/); + my $ro; + my %hash; + foreach my $rct (@reads) { + my ($base,$basect,@otherstats) = split(/:/,$rct); + if ($ref eq $base) { + $ro = $basect; + }else { + if ($base =~ m/\+|\-/) { + $base =~ s/\+/$ref/; + #$base =~ s/\-/$ref/; + } + $hash{$base} = $basect if ($basect); + } + } + my @basecalls; + foreach (keys %hash) { + push @basecalls, join(":",$_,$hash{$_}); + } + print OUT join("\t",$chrom,$pos,$depth,$ref,$ro,join(";",@basecalls)),"\n"; +} +close NRC; diff --git a/obsolete/subset_gencode.pl b/obsolete/subset_gencode.pl new file mode 100755 index 0000000000000000000000000000000000000000..1e0309ba8215778953bf96cf5d6d3b4046a92f98 --- /dev/null +++ b/obsolete/subset_gencode.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl -w +#parse_gencode.pl + +my $keep = shift @ARGV; +open KEEP, "<$keep" or die $!; +while (my $line = <KEEP>) { + chomp($line); + my ($sym) = split(/\t/,$line); + $keep{$sym} = 1; +} + +open OUT, ">$keep\.bed" or die $!; +open GCODE, "</project/shared/bicf_workflow_ref/GRCh38/gencode.gtf" or die $!; +while (my $line = <GCODE>) { + chomp($line); + next if ($line =~ m/^#/); + my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) = + split(/\t/,$line); + next unless ($feature eq 'CDS'); + $info =~ s/\"//g; + my %hash; + foreach $a (split(/;\s*/,$info)) { + my ($key,$val) = split(/ /,$a); + $hash{$key} = $val; + } + $hash{gene_id} =~ s/\.\d+//; + next unless ($keep{$hash{gene_name}}); + print OUT join("\t",$chrom,$start,$end,join("|",$hash{gene_name},$hash{gene_id},$hash{exon_number})),"\n"; +} + diff --git a/obsolete/subset_gencode_gene.pl b/obsolete/subset_gencode_gene.pl new file mode 100644 index 0000000000000000000000000000000000000000..ebe4381f20fe959e61c32375b539f8c15e5f3231 --- /dev/null +++ b/obsolete/subset_gencode_gene.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl -w +#parse_gencode.pl + +my $keep = shift @ARGV; +open KEEP, "<$keep" or die $!; +while (my $line = <KEEP>) { + chomp($line); + my ($sym) = split(/\t/,$line); + $keep{$sym} = 1; +} + +open OUT, ">$keep\.bed" or die $!; +open GCODE, "</project/shared/bicf_workflow_ref/GRCh38/gencode.gtf" or die $!; +while (my $line = <GCODE>) { + chomp($line); + next if ($line =~ m/^#/); + my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) = + split(/\t/,$line); + next unless ($feature eq 'gene'); + $info =~ s/\"//g; + my %hash; + foreach $a (split(/;\s*/,$info)) { + my ($key,$val) = split(/ /,$a); + $hash{$key} = $val; + } + $hash{gene_id} =~ s/\.\d+//; + next unless ($keep{$hash{gene_name}}); + print OUT join("\t",$chrom,$start,$end,join("|",$hash{gene_name},$hash{gene_id})),"\n"; +} +