refine manul parser (#131)

This commit is contained in:
KevinHuSh
2024-03-19 12:26:04 +08:00
committed by GitHub
parent d56c9e7630
commit 9da671b951
13 changed files with 145 additions and 52 deletions

View File

@ -38,7 +38,7 @@ class EsQueryer:
"",
txt)
return re.sub(
r"(what|who|how|which|where|why|(is|are|were|was) there) (is|are|were|was)*", "", txt, re.IGNORECASE)
r"(what|who|how|which|where|why|(is|are|were|was) there) (is|are|were|was|to)*", "", txt, re.IGNORECASE)
def question(self, txt, tbl="qa", min_match="60%"):
txt = re.sub(
@ -50,16 +50,16 @@ class EsQueryer:
txt = EsQueryer.rmWWW(txt)
if not self.isChinese(txt):
tks = txt.split(" ")
q = []
tks = [t for t in txt.split(" ") if t.strip()]
q = tks
for i in range(1, len(tks)):
q.append("\"%s %s\"~2" % (tks[i - 1], tks[i]))
q.append("\"%s %s\"^2" % (tks[i - 1], tks[i]))
if not q:
q.append(txt)
return Q("bool",
must=Q("query_string", fields=self.flds,
type="best_fields", query=" OR ".join(q),
boost=1, minimum_should_match="60%")
boost=1, minimum_should_match=min_match)
), txt.split(" ")
def needQieqie(tk):
@ -147,7 +147,7 @@ class EsQueryer:
atks = toDict(atks)
btkss = [toDict(tks) for tks in btkss]
tksim = [self.similarity(atks, btks) for btks in btkss]
return np.array(sims[0]) * vtweight + np.array(tksim) * tkweight, sims[0], tksim
return np.array(sims[0]) * vtweight + np.array(tksim) * tkweight, tksim, sims[0]
def similarity(self, qtwt, dtwt):
if isinstance(dtwt, type("")):