extract_fa_by_list.pl 658 B

123456789101112131415161718192021222324252627282930313233343536373839
  1. #!perl -w
  2. die "Usage:\n\tperl $0 id_list input.fa output.fa\n" if(@ARGV !=3);
  3. my ($id_list,$fa,$out_fa)=@ARGV;
  4. my %ids;
  5. open IN,$id_list||die $!;
  6. while(<IN>){
  7. chomp;
  8. next if /^$/;
  9. $ids{$_}=1;
  10. }
  11. close IN;
  12. open OUT,">$out_fa" || die $!;
  13. if($fa=~/.gz$/){
  14. open IN,"pigz -p 4 -dc $fa"||die $!;
  15. }else{
  16. open IN,$fa||die $!;
  17. }
  18. # 改输入符;提取序列会有问题
  19. #$/=">";
  20. #while(<IN>){
  21. # chomp;
  22. # next if /^$/;
  23. # my ($id)=$_=~/^([^\s\t]+)/;
  24. # print OUT ">$_" if($ids{$id});
  25. #}
  26. my $seqname="";
  27. while (<IN>) {
  28. chomp;
  29. next if (/^$/);
  30. ($seqname)=$_=~/>(\S+)/ if($_=~/^>/);
  31. print OUT $_,"\n" if(exists $ids{$seqname});
  32. }
  33. close IN;
  34. close OUT;