169 lines
4.4 KiB
Python
169 lines
4.4 KiB
Python
import sys
|
|
import os
|
|
|
|
sys.path.append(os.getcwd()+'/content-engine')
|
|
import pluralize_en
|
|
import pattern.en
|
|
|
|
|
|
def noun_morphology(word):
|
|
morph = {}
|
|
morph['PLR'] = pluralize_en.pluralize(word)
|
|
return morph
|
|
|
|
def adjective_morphology(word):
|
|
morph = {}
|
|
morph['SUP'] = pattern.en.superlative(word)
|
|
morph['CMP'] = pattern.en.comparative(word)
|
|
return morph
|
|
|
|
def verb_morphology(word):
|
|
morph = {}
|
|
lex = pattern.en.lexeme(word)
|
|
morph['PAS'] = lex[0]
|
|
morph['PTP'] = lex[0]
|
|
if len(lex) >= 2:
|
|
morph['3PS&PRS'] = lex[1]
|
|
if len(lex) >= 3:
|
|
morph['GER'] = lex[2]
|
|
if len(lex) >= 4:
|
|
morph['PAS'] = lex[3]
|
|
morph['PTP'] = lex[3]
|
|
if len(lex) >= 5:
|
|
morph['PTP'] = lex[4]
|
|
return morph
|
|
|
|
def adverb_morphology(word):
|
|
morph = {}
|
|
return morph
|
|
|
|
def morphology(name, pos):
|
|
if pos == 'n':
|
|
return noun_morphology(name)
|
|
elif pos == 'v':
|
|
return verb_morphology(name)
|
|
elif pos == 's' or pos == 'a':
|
|
return adjective_morphology(name)
|
|
elif pos == 'r':
|
|
return adverb_morphology(name)
|
|
else:
|
|
raise Exception
|
|
|
|
def getMorphRules():
|
|
f = open('content-engine/morphology.txt','r').readlines()
|
|
for idx,rule in enumerate(f):
|
|
f[idx] = rule.split(' ')
|
|
for idx,rule in enumerate(f):
|
|
if len(f[idx]) >= 3:
|
|
f[idx][2] = f[idx][2].strip()
|
|
rules = {}
|
|
for idx,rule in enumerate(f):
|
|
if len(rule) >= 3:
|
|
rules[rule[0]] = rule[2]
|
|
else:
|
|
rules[rule[0]] = {}
|
|
for i in rules:
|
|
if '#' in rules[i]:
|
|
rules[i] = rules[i].split('#')[0].strip()
|
|
for i in rules:
|
|
rules[i]=rules[i].split(';')
|
|
for i in rules:
|
|
rule_params = {}
|
|
for j in rules[i]:
|
|
j = j.split(':')
|
|
if len(j) >= 2:
|
|
rule_params[j[0]] = j[1]
|
|
rules[i] = rule_params
|
|
return rules
|
|
|
|
def is_number(s):
|
|
try:
|
|
int(s)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
def subRuleMatch(morph_dict_sub,sub_rule,word):
|
|
sub_rule = sub_rule.split('>')
|
|
if len(sub_rule) < 2:
|
|
sub_rule.append('')
|
|
if is_number(sub_rule[0]):
|
|
num = int(sub_rule[0])
|
|
if num == 0:
|
|
if word + sub_rule[1] != morph_dict_sub:
|
|
return False
|
|
else:
|
|
if word[:-1*num] + sub_rule[1] != morph_dict_sub:
|
|
return False
|
|
return True
|
|
if '^' in sub_rule[0]:
|
|
if sub_rule[1] + ' ' + word != morph_dict_sub:
|
|
return False
|
|
return True
|
|
if not word.endswith(sub_rule[0]):
|
|
return False
|
|
if word[:-1*len(sub_rule[0])] + sub_rule[1] != morph_dict_sub:
|
|
return False
|
|
return True
|
|
|
|
def ruleMatch(morph_dict,morph_rule,word):
|
|
for parameter in morph_rule:
|
|
if parameter not in morph_dict:
|
|
return False
|
|
if not subRuleMatch(morph_dict[parameter],morph_rule[parameter],word):
|
|
return False
|
|
return True
|
|
|
|
def morphRule(name, pos, morph_dict):
|
|
# morph_dict = morphology(name, pos)
|
|
|
|
morph_rules = getMorphRules()
|
|
specified_params = []
|
|
matching_rules = []
|
|
|
|
for rule in morph_rules:
|
|
specified_params = []
|
|
if rule != 'M0':
|
|
if ruleMatch(morph_dict,morph_rules[rule],name):
|
|
if len(morph_rules[rule]) < len(morph_dict):
|
|
for i in morph_dict:
|
|
if i not in morph_rules[rule]:
|
|
specified_params.append(str(i)+','+morph_dict[i])
|
|
matching_rules.append([rule,specified_params])
|
|
|
|
if len(matching_rules) != 0:
|
|
matching_rules.sort()
|
|
return matching_rules[0][0], matching_rules[0][1]
|
|
|
|
rule = 'M0'
|
|
if len(morph_rules[rule]) < len(morph_dict):
|
|
for i in morph_dict:
|
|
if i not in morph_rules[rule]:
|
|
specified_params.append(str(i)+','+morph_dict[i])
|
|
|
|
return rule, specified_params
|
|
|
|
def get_morph_rule(word,pos,morph_dict):
|
|
for m in morph_dict:
|
|
morph_dict[m] = morph_dict[m].lower()
|
|
m,r = morphRule(word.lower(),pos.lower(),morph_dict)
|
|
return m
|
|
|
|
def get_morph(word,p):
|
|
syn_pos_map = {"n":"n","j":"a","v":"v","a":"r"}
|
|
pos = syn_pos_map[p.lower()]
|
|
morph_dict = morphology(word,pos)
|
|
for m in morph_dict:
|
|
morph_dict[m] = morph_dict[m].lower()
|
|
m,params = morphRule(word.lower(),pos,morph_dict)
|
|
morphs = {}
|
|
morphs["morphclass"] = m
|
|
for r in params:
|
|
[k,v] = r.split(",")
|
|
morphs[k] = v
|
|
return morphs
|
|
|
|
if __name__ == '__main__':
|
|
print get_morph_rule("spell","v",{'3PS&PRS':'spells','PAS':'spellt','GER':'spelling','PTP':'spellt'})
|
|
print get_morph_rule("french fry","n",{'PLR':'french fries'})
|