uniprot_info.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. import os,sys
  2. import re
  3. import optparse
  4. from collections import defaultdict
  5. import time
  6. def Time():
  7. return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
  8. def make_dir(dir):
  9. if not os.path.exists(dir):
  10. dir=os.path.expanduser(dir)
  11. os.makedirs(dir)
  12. def uniport_info_get():
  13. uniprot_info={}
  14. with open("/Business/psn_company/t01/public/Database/Uniprot_info/swissprot.info.txt","r") as fi:
  15. for line in fi:
  16. uniprot_id=re.search(r"\|(.*?)\|",line).group(1)
  17. if re.search(r" GN=",line):
  18. gene_name=re.search(r" GN=(.*?) ",line).group(1)
  19. uniprot_info[uniprot_id]=gene_name
  20. else:
  21. uniprot_info[uniprot_id]="-"
  22. return uniprot_info
  23. def new_Annotation_get(uniprot_info,Annotation,out_file):
  24. with open(f'{Annotation}',"r") as fi,open(out_file,"w") as fw:
  25. for line in fi:
  26. lines=line.strip().split("\t")
  27. gene_id=lines[0]
  28. swiss_id=lines[1]
  29. if re.search(r"^tr\|",swiss_id) or re.search(r"^sp\|",swiss_id):
  30. if re.search(r' GN=(.*?) ', swiss_id):
  31. swiss_name = re.search(r' GN=(.*?) ', swiss_id).group(1)
  32. fw.write(gene_id+f"\t{swiss_name}\n")
  33. else:
  34. fw.write(gene_id+f"\t-\n")
  35. elif swiss_id != "-":
  36. swiss_info = re.split(r'[;,:]', swiss_id)
  37. swiss_name=[]
  38. #保留唯一名字,这个名字必须是uniprot对应的Gene Name ,避免插入的Name 为Swissprot ID(针对有多个uniprotId处理)
  39. name_insert_first=0
  40. for info in swiss_info:
  41. if info in uniprot_info :
  42. if uniprot_info[info] != "-":
  43. if name_insert_first==0:
  44. name_insert_first =1
  45. swiss_name.insert(0,uniprot_info[info])
  46. else:
  47. swiss_name.append(uniprot_info[info])
  48. else:
  49. swiss_name.append(info)
  50. else:
  51. swiss_name.append(info)
  52. swiss_all_name=";".join(swiss_name)
  53. if len(swiss_name) >0 :
  54. swiss_uniq_name=swiss_name[0]
  55. else:
  56. swiss_uniq_name="-"
  57. fw.write(gene_id+f"\t{swiss_uniq_name}\n")
  58. else:
  59. fw.write(gene_id+f"\t-\n")
  60. return 1
  61. if __name__ == "__main__":
  62. parser=optparse.OptionParser(usage='"usage:%prog [options] arg1,arg2"',version="%prog 1.2")
  63. parser.add_option('-s','--Swissprot',
  64. action='store',dest='Swissprot',
  65. help='Swissprot file')
  66. parser.add_option('-o','--outdir',
  67. action='store',dest='outdir',default = './',
  68. help='output directory [default:%default]')
  69. options,args=parser.parse_args()
  70. if not options.Swissprot:
  71. os.system("python3 "+sys.argv[0]+" -h")
  72. sys.exit(1)
  73. outdir=os.path.abspath(os.path.expanduser(options.outdir))
  74. make_dir(outdir)
  75. outfile=os.path.join(outdir, "SwissprotName")
  76. uniport_info=uniport_info_get()
  77. Annotation_get=new_Annotation_get(uniport_info,options.Swissprot,outfile)
  78. if Annotation_get == 0:
  79. os.system(f"cp {options.Swissprot} {outfile}")
  80. print("This Annotation file is not exists,please check it!")