12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- import os,sys
- import re
- import optparse
- from collections import defaultdict
- import time
- def Time():
- return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
- def make_dir(dir):
- if not os.path.exists(dir):
- dir=os.path.expanduser(dir)
- os.makedirs(dir)
-
-
- def uniport_info_get():
- uniprot_info={}
- with open("/Business/psn_company/t01/public/Database/Uniprot_info/swissprot.info.txt","r") as fi:
- for line in fi:
- uniprot_id=re.search(r"\|(.*?)\|",line).group(1)
- if re.search(r" GN=",line):
- gene_name=re.search(r" GN=(.*?) ",line).group(1)
- uniprot_info[uniprot_id]=gene_name
- else:
- uniprot_info[uniprot_id]="-"
- return uniprot_info
- def new_Annotation_get(uniprot_info,Annotation,out_file):
- with open(f'{Annotation}',"r") as fi,open(out_file,"w") as fw:
- for line in fi:
- lines=line.strip().split("\t")
- gene_id=lines[0]
- swiss_id=lines[1]
- if re.search(r"^tr\|",swiss_id) or re.search(r"^sp\|",swiss_id):
- if re.search(r' GN=(.*?) ', swiss_id):
- swiss_name = re.search(r' GN=(.*?) ', swiss_id).group(1)
- fw.write(gene_id+f"\t{swiss_name}\n")
- else:
- fw.write(gene_id+f"\t-\n")
- elif swiss_id != "-":
- swiss_info = re.split(r'[;,:]', swiss_id)
- swiss_name=[]
- #保留唯一名字,这个名字必须是uniprot对应的Gene Name ,避免插入的Name 为Swissprot ID(针对有多个uniprotId处理)
- name_insert_first=0
- for info in swiss_info:
- if info in uniprot_info :
- if uniprot_info[info] != "-":
- if name_insert_first==0:
- name_insert_first =1
- swiss_name.insert(0,uniprot_info[info])
- else:
- swiss_name.append(uniprot_info[info])
- else:
- swiss_name.append(info)
- else:
- swiss_name.append(info)
- swiss_all_name=";".join(swiss_name)
- if len(swiss_name) >0 :
- swiss_uniq_name=swiss_name[0]
- else:
- swiss_uniq_name="-"
- fw.write(gene_id+f"\t{swiss_uniq_name}\n")
- else:
- fw.write(gene_id+f"\t-\n")
- return 1
- if __name__ == "__main__":
- parser=optparse.OptionParser(usage='"usage:%prog [options] arg1,arg2"',version="%prog 1.2")
- parser.add_option('-s','--Swissprot',
- action='store',dest='Swissprot',
- help='Swissprot file')
- parser.add_option('-o','--outdir',
- action='store',dest='outdir',default = './',
- help='output directory [default:%default]')
-
- options,args=parser.parse_args()
- if not options.Swissprot:
- os.system("python3 "+sys.argv[0]+" -h")
- sys.exit(1)
- outdir=os.path.abspath(os.path.expanduser(options.outdir))
- make_dir(outdir)
-
- outfile=os.path.join(outdir, "SwissprotName")
- uniport_info=uniport_info_get()
- Annotation_get=new_Annotation_get(uniport_info,options.Swissprot,outfile)
- if Annotation_get == 0:
- os.system(f"cp {options.Swissprot} {outfile}")
- print("This Annotation file is not exists,please check it!")
|