abstract_lncRNA_gtf.pl 1.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. #!perl -w
  2. use strict;
  3. use warnings;
  4. use Getopt::Long;
  5. use ScriptManager;
  6. my ($out,$in);
  7. GetOptions(
  8. "help|h" =>\&USAGE,
  9. "o:s"=>\$out,
  10. "i:s"=>\$in,
  11. );
  12. &USAGE unless ($in && $out);
  13. my @lncRNA_type=qw (lncRNA 3prime_overlapping_ncRNA antisense bidirectional_promoter_lncRNA lincRNA macro_lncRNA non_coding processed_transcript sense_intronic sense_overlapping TEC lnc_RNA);
  14. my %type;
  15. map {$type{$_}=1} @lncRNA_type;
  16. open IN,$in or die $!;
  17. open OUT,">$out";
  18. while (<IN>) {
  19. next if(/^\#/);
  20. print OUT $_ if(/gene_biotype \"([^\"]+)\"/ && $type{$1});
  21. print OUT $_ if(/transcript_biotype \"([^\"]+)\"/ && $type{$1});
  22. }
  23. close IN;
  24. close OUT;
  25. # remove $out if $out is empty
  26. #unlink $out if -f $out && -z $out;
  27. sub USAGE{
  28. my $usage=<<"USAGE";
  29. Usage:perl $0 -i reference.gtf -o lncRNA.gtf
  30. Options:
  31. -i <file> reference.gtf containing "gene_biotype" attribute, forced
  32. -o <file> output file, forced
  33. -h|help help
  34. USAGE
  35. print$usage;
  36. exit;
  37. }