Add doc meta data. (#4442)

### What problem does this PR solve?

#3690

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2025-01-10 19:06:59 +08:00
committed by GitHub
parent 4dde73f897
commit 7d909d4d1b
5 changed files with 56 additions and 13 deletions

View File

@ -116,8 +116,7 @@ def get():
@manager.route('/set', methods=['POST']) # noqa: F821
@login_required
@validate_request("doc_id", "chunk_id", "content_with_weight",
"important_kwd", "question_kwd")
@validate_request("doc_id", "chunk_id", "content_with_weight")
def set():
req = request.json
d = {
@ -125,14 +124,16 @@ def set():
"content_with_weight": req["content_with_weight"]}
d["content_ltks"] = rag_tokenizer.tokenize(req["content_with_weight"])
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
if req.get("important_kwd"):
if "important_kwd" in req:
d["important_kwd"] = req["important_kwd"]
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_kwd"]))
if req.get("question_kwd"):
if "question_kwd" in req:
d["question_kwd"] = req["question_kwd"]
d["question_tks"] = rag_tokenizer.tokenize("\n".join(req["question_kwd"]))
if req.get("tag_kwd"):
if "tag_kwd" in req:
d["tag_kwd"] = req["tag_kwd"]
if "tag_feas" in req:
d["tag_feas"] = req["tag_feas"]
if "available_int" in req:
d["available_int"] = req["available_int"]
@ -157,7 +158,7 @@ def set():
d = beAdoc(d, arr[0], arr[1], not any(
[rag_tokenizer.is_chinese(t) for t in q + a]))
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d["question_kwd"] else "\n".join(d["question_kwd"])])
v, c = embd_mdl.encode([doc.name, req["content_with_weight"] if not d.get("question_kwd") else "\n".join(d["question_kwd"])])
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
d["q_%d_vec" % len(v)] = v.tolist()
settings.docStoreConn.update({"id": req["chunk_id"]}, d, search.index_name(tenant_id), doc.kb_id)