2017-05-10 23:09:39 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
from functools import reduce
|
|
|
|
|
|
|
|
|
2017-05-11 17:54:02 +00:00
|
|
|
def make_template(knowledge_base, text, parsed):
|
2017-05-10 23:09:39 +00:00
|
|
|
tokens = text.split()
|
2017-05-11 17:54:02 +00:00
|
|
|
matcher = list(tokens)
|
2017-05-10 23:09:39 +00:00
|
|
|
template = list(parsed)
|
2017-05-11 17:54:02 +00:00
|
|
|
for i in range(len(matcher)):
|
|
|
|
word = matcher[i]
|
2017-05-10 23:09:39 +00:00
|
|
|
if word in template:
|
|
|
|
template[template.index(word)] = i
|
2017-05-11 17:54:02 +00:00
|
|
|
matcher[i] = {
|
|
|
|
'groups': set(knowledge_base.knowledge[word]['groups'])
|
|
|
|
}
|
|
|
|
return tokens, matcher, template
|
2017-05-10 23:09:39 +00:00
|
|
|
|
|
|
|
|
2017-05-11 17:54:02 +00:00
|
|
|
def integrate_language(knowledge_base, example):
|
2017-05-10 23:09:39 +00:00
|
|
|
text = example["text"].lower()
|
|
|
|
parsed = example["parsed"]
|
2017-05-11 17:54:02 +00:00
|
|
|
tokens, matcher, result = make_template(knowledge_base, text, parsed)
|
2017-05-10 23:09:39 +00:00
|
|
|
print(text)
|
|
|
|
print(parsed)
|
|
|
|
print()
|
2017-05-11 17:54:02 +00:00
|
|
|
return tokens, matcher, result
|
2017-05-10 23:09:39 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_matching(sample, other):
|
|
|
|
l = len(sample[0])
|
|
|
|
other = list(filter(lambda x: len(x[0]) == l, other))
|
|
|
|
for i in range(l):
|
|
|
|
if len(other) == 0:
|
|
|
|
return []
|
|
|
|
|
|
|
|
if not isinstance(sample[0][i], str):
|
|
|
|
other = list(filter(lambda x: not isinstance(x[0][i], str) and
|
|
|
|
len(x[0][i]['groups'] & sample[0][i]['groups']) > 0,
|
|
|
|
other))
|
|
|
|
|
|
|
|
return [sample[0][x] if isinstance(sample[0][x], str)
|
|
|
|
else {'groups': sample[0][x]['groups'] & reduce(lambda a, b: a & b,
|
|
|
|
map(lambda y: y[0][x]['groups'],
|
|
|
|
other))}
|
|
|
|
for x
|
|
|
|
in range(l)]
|
|
|
|
|
|
|
|
|
|
|
|
def reprocess_language_knowledge(knowledge_base, examples):
|
|
|
|
examples = knowledge_base.examples + examples
|
|
|
|
|
|
|
|
print('\n'.join(map(str, knowledge_base.examples)))
|
|
|
|
print("--")
|
|
|
|
|
|
|
|
pattern_examples = []
|
|
|
|
for i, sample in enumerate(examples):
|
|
|
|
other = examples[:i] + examples[i + 1:]
|
|
|
|
print(sample)
|
|
|
|
match = get_matching(sample, other)
|
|
|
|
print("->", match)
|
|
|
|
if len(match) > 0:
|
|
|
|
sample = (match, sample[1],)
|
|
|
|
pattern_examples.append(sample)
|
|
|
|
print()
|
|
|
|
return pattern_examples
|
|
|
|
|
2017-05-11 17:54:02 +00:00
|
|
|
|
|
|
|
def get_fit(knowledge, row):
|
|
|
|
for sample, ast in knowledge.trained:
|
|
|
|
if len(sample) != len(row):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if all(map(lambda x: (not isinstance(sample[x], str)
|
|
|
|
or sample[x] == row[x]),
|
|
|
|
range(len(sample)))):
|
|
|
|
return sample, ast
|
|
|
|
else:
|
|
|
|
return None
|