fix jina adding issure and term weight refinement (#974)

### What problem does this PR solve?

#724 #162

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
KevinHuSh
2024-05-29 19:38:57 +08:00
committed by GitHub
parent e0d05a3895
commit 758eb03ccb
7 changed files with 13 additions and 11 deletions

View File

@ -28,6 +28,7 @@ EmbeddingModel = {
"FastEmbed": FastEmbed,
"Youdao": YoudaoEmbed,
"BaiChuan": BaiChuanEmbed,
"Jina": JinaEmbed,
"BAAI": DefaultEmbedding
}

View File

@ -291,7 +291,7 @@ class JinaEmbed(Base):
"input": texts,
'encoding_type': 'float'
}
res = requests.post(self.base_url, headers=self.headers, json=data)
res = requests.post(self.base_url, headers=self.headers, json=data).json()
return np.array([d["embedding"] for d in res["data"]]), res["usage"]["total_tokens"]
def encode_queries(self, text):

View File

@ -91,7 +91,7 @@ class JinaRerank(Base):
"documents": texts,
"top_n": len(texts)
}
res = requests.post(self.base_url, headers=self.headers, json=data)
res = requests.post(self.base_url, headers=self.headers, json=data).json()
return np.array([d["relevance_score"] for d in res["results"]]), res["usage"]["total_tokens"]

View File

@ -44,7 +44,7 @@ class EsQueryer:
def question(self, txt, tbl="qa", min_match="60%"):
txt = re.sub(
r"[ \r\n\t,,。??/`!&\^%%]+",
r"[ :\r\n\t,,。??/`!&\^%%]+",
" ",
rag_tokenizer.tradi2simp(
rag_tokenizer.strQ2B(

View File

@ -104,7 +104,7 @@ class Dealer:
while i < len(tks):
j = i
if i == 0 and oneTerm(tks[i]) and len(
tks) > 1 and len(tks[i + 1]) > 1: # 多 工位
tks) > 1 and (len(tks[i + 1]) > 1 and not re.match(r"[0-9a-zA-Z]", tks[i + 1])): # 多 工位
res.append(" ".join(tks[0:2]))
i = 2
continue