#!perl -w if($ARGV[0]=~/gz$/){ open GTF, "gzip -dc $ARGV[0]|" or die $!; }else{ open GTF, "$ARGV[0]" or die $!; } my %allgene; while () { chomp; my ($gene_id,$gene_type,$locus_tag,$old_locus_tag,$entry_id,$protein_id)=("-","-","-","-","-","-"); if(/\tgene\t/){ ($gene_id)=$_=~/gene_id \"([^"]+)\"/; ($gene_type)=$_=~/gene_biotype \"([^"]+)\"/ if ( $_=~/gene_biotype \"/); ($locus_tag)=$_=~/ locus_tag \"([^"]+)\"/ if ( $_=~/ locus_tag \"/); ($old_locus_tag)=$_=~/ old_locus_tag \"([^"]+)\"/ if ( $_=~/ old_locus_tag \"/); ($entry_id)=$_=~/ \"GeneID:(\d+)/ if ( $_=~/ \"GeneID:/); my ($gene_name)=$_=~/gene_name \"([^"]+)\"/; my ($gene)=$_=~/gene \"([^\"]+)/; my ($Name)=$_=~/Name \"([^\"]+)/; my $last_name=$gene_name||$Name||$gene||$locus_tag||$old_locus_tag||$gene_id; $allgene{$gene_id}{"type"}=$gene_type; $allgene{$gene_id}{"name"}=$last_name; $allgene{$gene_id}{"locus"}=$locus_tag; $allgene{$gene_id}{"old_locus"}=$old_locus_tag; $allgene{$gene_id}{"entry"}=$entry_id; } if(/gene_id \"/){ ($gene_id)=$_=~/gene_id \"([^"]+)\"/; if(/protein_id \"/){ ($protein_id)=$_=~/protein_id \"([^"]+)\"/; } $allgene{$gene_id}{"protein"}{$protein_id}=1 if(/gene_id \"/); } } close GTF; print "gene_id\tlocus_tag\told_locus_tag\tentry_id\tprotein_id\tgene_name\tgene_biotype\n"; foreach my $g (sort keys %allgene){ my @proteins = sort keys %{$allgene{$g}{"protein"}}; if(@proteins > 1){ foreach my $p (@proteins){ print join("\t",$g,$allgene{$g}{"locus"},$allgene{$g}{"old_locus"},$allgene{$g}{"entry"},$p,$allgene{$g}{"name"},$allgene{$g}{"type"})."\n" if($p ne "-"); } }else{ print join("\t",$g,$allgene{$g}{"locus"},$allgene{$g}{"old_locus"},$allgene{$g}{"entry"},$proteins[0],$allgene{$g}{"name"},$allgene{$g}{"type"})."\n"; } }