※ NCBI의 KOG와 결과가 완전히 일치하지는 않지만, 상당 부분 유사하게 나옵니다. NCBI에선 어떻게 하는지는 모르겠지만, 이 script로도 유사한 결과를 도출 할 수 있습니다. :)
Source Open
from sets import Set from itertools import groupby import os,re,glob
#KOG data -> memory kog_dic = {} for line in open('kog'): if line.find('KOG') != -1: kls = line.split() cat = kls[0] nam = kls[1] kog_dic[nam]=cat
#fasta files -> list file for seq_file in glob.glob('*.fa'): name = seq_file.split('.')[0]
ow = open(name+'.list','w') for line in open(seq_file): if line.find('>') != -1: if line.find('>>') != -1: ow.write((line.split()[0])[2:]+'\n') else: ow.write((line.split()[0])[1:]+'\n')
ow.close()
for list_file in glob.glob('*.list'): # list files name = list_file.split('.')[0] ow = open(name+'.result.txt','w') # save files
for c_name in open(list_file): c_name1 = (c_name.replace('\n','')).replace('>','')
check = 0 tmp_func,tmp_result = [],[]
for open_file in glob.glob(name+'*.fa.out.txt'): # blastpgp files for line in open(open_file): ls = line.split()
c_name2 = ls[0] c_func = ls[1] c_valu = ls[-2]
if c_name1 == c_name2 and check == 0: check = 1 if float(c_valu) < float(1): tmp_func.append(c_func.split(':')[-1]) else: pass
check = 0
tmp_result = [(len(list(g)),k) for k, g in groupby(sorted(tmp_func))]