1234567891011121314151617181920212223242526272829 |
- #!/bin/perl -w
- open IN,"pigz -p 4 -dc $ARGV[0] |";
- open OUT,"> $ARGV[1]";
- local $/="//";
- print OUT "ID\tAC\tEntry_ID\tGene_Name\tORF_Name\tOrganism_Species\tNCBI_TaxID\tGO_ID\tKEGG_ID\n";
- while(<IN>){
- chomp;
- print OUT "$_\n";
- my $ID=$_=~/^ID\s+(\S+)/;
- my $AC=$_=~/\nAC\s+(\S+)/;
- my $entry_id=$_=~/\nDR GeneID; (\d+)/ || "-";
- my $genename=$_=~/\nGN Name=([^;]+);/ || "-";
- my $orfname=$_=~/\nGN ORFNames=([^;]+);/ || "-";
- my $OS=$_=~/\nOS\s+([^\n]+)/ || "-";
- my $taxid=$_=~/\nOX NCBI_TaxID=(\d+)/ || "-";
- my $goid="-"
- my $kegg=$_=~/\nDR KEGG; vg:4156252/;
- if(/\nDR GO; GO:\d+/){
- my @go=$_=~/\nDR GO; {GO:\d+}/g ;
- my %hash;
- my @unique = grep { !$hash{$_}++ } @go;
- $goid=join(";",@unique);
- }
- print OUT "$ID\t$AC\t$entry_id\t$genename\t$orfname\t$OS\t$taxid\t$goid\t$kegg\n";
- }
- close IN;
- close OUT;
|