import sys import os sys.path.append(os.getcwd()+'/content-engine') import pluralize_en import pattern.en def noun_morphology(word): morph = {} morph['PLR'] = pluralize_en.pluralize(word) return morph def adjective_morphology(word): morph = {} morph['SUP'] = pattern.en.superlative(word) morph['CMP'] = pattern.en.comparative(word) return morph def verb_morphology(word): morph = {} lex = pattern.en.lexeme(word) morph['PAS'] = lex[0] morph['PTP'] = lex[0] if len(lex) >= 2: morph['3PS&PRS'] = lex[1] if len(lex) >= 3: morph['GER'] = lex[2] if len(lex) >= 4: morph['PAS'] = lex[3] morph['PTP'] = lex[3] if len(lex) >= 5: morph['PTP'] = lex[4] return morph def adverb_morphology(word): morph = {} return morph def morphology(name, pos): if pos == 'n': return noun_morphology(name) elif pos == 'v': return verb_morphology(name) elif pos == 's' or pos == 'a': return adjective_morphology(name) elif pos == 'r': return adverb_morphology(name) else: raise Exception def getMorphRules(): f = open('content-engine/morphology.txt','r').readlines() for idx,rule in enumerate(f): f[idx] = rule.split(' ') for idx,rule in enumerate(f): if len(f[idx]) >= 3: f[idx][2] = f[idx][2].strip() rules = {} for idx,rule in enumerate(f): if len(rule) >= 3: rules[rule[0]] = rule[2] else: rules[rule[0]] = {} for i in rules: if '#' in rules[i]: rules[i] = rules[i].split('#')[0].strip() for i in rules: rules[i]=rules[i].split(';') for i in rules: rule_params = {} for j in rules[i]: j = j.split(':') if len(j) >= 2: rule_params[j[0]] = j[1] rules[i] = rule_params return rules def is_number(s): try: int(s) return True except ValueError: return False def subRuleMatch(morph_dict_sub,sub_rule,word): sub_rule = sub_rule.split('>') if len(sub_rule) < 2: sub_rule.append('') if is_number(sub_rule[0]): num = int(sub_rule[0]) if num == 0: if word + sub_rule[1] != morph_dict_sub: return False else: if word[:-1*num] + sub_rule[1] != morph_dict_sub: return False return True if '^' in sub_rule[0]: if sub_rule[1] + ' ' + word != morph_dict_sub: return False return True if not word.endswith(sub_rule[0]): return False if word[:-1*len(sub_rule[0])] + sub_rule[1] != morph_dict_sub: return False return True def ruleMatch(morph_dict,morph_rule,word): for parameter in morph_rule: if parameter not in morph_dict: return False if not subRuleMatch(morph_dict[parameter],morph_rule[parameter],word): return False return True def morphRule(name, pos, morph_dict): # morph_dict = morphology(name, pos) morph_rules = getMorphRules() specified_params = [] matching_rules = [] for rule in morph_rules: specified_params = [] if rule != 'M0': if ruleMatch(morph_dict,morph_rules[rule],name): if len(morph_rules[rule]) < len(morph_dict): for i in morph_dict: if i not in morph_rules[rule]: specified_params.append(str(i)+','+morph_dict[i]) matching_rules.append([rule,specified_params]) if len(matching_rules) != 0: matching_rules.sort() return matching_rules[0][0], matching_rules[0][1] rule = 'M0' if len(morph_rules[rule]) < len(morph_dict): for i in morph_dict: if i not in morph_rules[rule]: specified_params.append(str(i)+','+morph_dict[i]) return rule, specified_params def get_morph_rule(word,pos,morph_dict): for m in morph_dict: morph_dict[m] = morph_dict[m].lower() m,r = morphRule(word.lower(),pos.lower(),morph_dict) return m def get_morph(word,p): syn_pos_map = {"n":"n","j":"a","v":"v","a":"r"} pos = syn_pos_map[p.lower()] morph_dict = morphology(word,pos) for m in morph_dict: morph_dict[m] = morph_dict[m].lower() m,params = morphRule(word.lower(),pos,morph_dict) morphs = {} morphs["morphclass"] = m for r in params: [k,v] = r.split(",") morphs[k] = v return morphs if __name__ == '__main__': print get_morph_rule("spell","v",{'3PS&PRS':'spells','PAS':'spellt','GER':'spelling','PTP':'spellt'}) print get_morph_rule("french fry","n",{'PLR':'french fries'})