-
Notifications
You must be signed in to change notification settings - Fork 0
/
align-sh.py
52 lines (44 loc) · 1.02 KB
/
align-sh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from unidecode import unidecode
from cltk.corpus.greek.beta_to_unicode import Replacer
from fuzzywuzzy import fuzz
import re
moscs = {}
vs = {}
bestmatches = {}
bestmatch = {}
def getbestmatch(targ):
for foo in moscs:
r = fuzz.ratio(foo,targ)
if( not targ in bestmatches):
bestmatches[targ] = str(r) + '\t' + moscs[foo]
bestmatch[targ] = r
continue
if( r > bestmatch[targ]):
bestmatches[targ] = str(r) + '\t' + moscs[foo]
bestmatch[targ] = r
print('final',targ,bestmatches[targ])
f = open('mosc1.txt','r')
for l in f:
l = re.sub('^[ ]+','',l)
l = re.sub('\n','',l)
orgl = l
l = re.sub('<[^>]+>',' ',l)
if( re.search('^$',l)):
continue
moscs[l] = orgl
#print('mosc',l,end='')
f.close()
f = open('10.txt','r')
for l in f:
orgl = l
l = re.sub('<[^>]+>',' ',l)
l = re.sub('^[ ]+','',l)
if( re.search('^\n',l)):
continue
l = re.sub('\n','',l)
getbestmatch(l)
f.close()
#for foo in moscs:
# for boo in vs:
# r = fuzz.ratio(foo,boo)
# print(r,moscs[foo],'v',boo,'\n')