dat2tsv.pl 900 B

1234567891011121314151617181920212223242526272829
  1. #!/bin/perl -w
  2. open IN,"pigz -p 4 -dc $ARGV[0] |";
  3. open OUT,"> $ARGV[1]";
  4. local $/="//";
  5. print OUT "ID\tAC\tEntry_ID\tGene_Name\tORF_Name\tOrganism_Species\tNCBI_TaxID\tGO_ID\tKEGG_ID\n";
  6. while(<IN>){
  7. chomp;
  8. print OUT "$_\n";
  9. my $ID=$_=~/^ID\s+(\S+)/;
  10. my $AC=$_=~/\nAC\s+(\S+)/;
  11. my $entry_id=$_=~/\nDR GeneID; (\d+)/ || "-";
  12. my $genename=$_=~/\nGN Name=([^;]+);/ || "-";
  13. my $orfname=$_=~/\nGN ORFNames=([^;]+);/ || "-";
  14. my $OS=$_=~/\nOS\s+([^\n]+)/ || "-";
  15. my $taxid=$_=~/\nOX NCBI_TaxID=(\d+)/ || "-";
  16. my $goid="-"
  17. my $kegg=$_=~/\nDR KEGG; vg:4156252/;
  18. if(/\nDR GO; GO:\d+/){
  19. my @go=$_=~/\nDR GO; {GO:\d+}/g ;
  20. my %hash;
  21. my @unique = grep { !$hash{$_}++ } @go;
  22. $goid=join(";",@unique);
  23. }
  24. print OUT "$ID\t$AC\t$entry_id\t$genename\t$orfname\t$OS\t$taxid\t$goid\t$kegg\n";
  25. }
  26. close IN;
  27. close OUT;