import os,sys import re import optparse from collections import defaultdict import time def Time(): return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) def make_dir(dir): if not os.path.exists(dir): dir=os.path.expanduser(dir) os.makedirs(dir) def uniport_info_get(): uniprot_info={} with open("/Business/psn_company/t01/public/Database/Uniprot_info/swissprot.info.txt","r") as fi: for line in fi: uniprot_id=re.search(r"\|(.*?)\|",line).group(1) if re.search(r" GN=",line): gene_name=re.search(r" GN=(.*?) ",line).group(1) uniprot_info[uniprot_id]=gene_name else: uniprot_info[uniprot_id]="-" return uniprot_info def new_Annotation_get(uniprot_info,Annotation,out_file): with open(f'{Annotation}',"r") as fi,open(out_file,"w") as fw: for line in fi: lines=line.strip().split("\t") gene_id=lines[0] swiss_id=lines[1] if re.search(r"^tr\|",swiss_id) or re.search(r"^sp\|",swiss_id): if re.search(r' GN=(.*?) ', swiss_id): swiss_name = re.search(r' GN=(.*?) ', swiss_id).group(1) fw.write(gene_id+f"\t{swiss_name}\n") else: fw.write(gene_id+f"\t-\n") elif swiss_id != "-": swiss_info = re.split(r'[;,:]', swiss_id) swiss_name=[] #保留唯一名字,这个名字必须是uniprot对应的Gene Name ,避免插入的Name 为Swissprot ID(针对有多个uniprotId处理) name_insert_first=0 for info in swiss_info: if info in uniprot_info : if uniprot_info[info] != "-": if name_insert_first==0: name_insert_first =1 swiss_name.insert(0,uniprot_info[info]) else: swiss_name.append(uniprot_info[info]) else: swiss_name.append(info) else: swiss_name.append(info) swiss_all_name=";".join(swiss_name) if len(swiss_name) >0 : swiss_uniq_name=swiss_name[0] else: swiss_uniq_name="-" fw.write(gene_id+f"\t{swiss_uniq_name}\n") else: fw.write(gene_id+f"\t-\n") return 1 if __name__ == "__main__": parser=optparse.OptionParser(usage='"usage:%prog [options] arg1,arg2"',version="%prog 1.2") parser.add_option('-s','--Swissprot', action='store',dest='Swissprot', help='Swissprot file') parser.add_option('-o','--outdir', action='store',dest='outdir',default = './', help='output directory [default:%default]') options,args=parser.parse_args() if not options.Swissprot: os.system("python3 "+sys.argv[0]+" -h") sys.exit(1) outdir=os.path.abspath(os.path.expanduser(options.outdir)) make_dir(outdir) outfile=os.path.join(outdir, "SwissprotName") uniport_info=uniport_info_get() Annotation_get=new_Annotation_get(uniport_info,options.Swissprot,outfile) if Annotation_get == 0: os.system(f"cp {options.Swissprot} {outfile}") print("This Annotation file is not exists,please check it!")