mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Optimize graphrag cache get entity (#6018)
### What problem does this PR solve? Optimize graphrag cache get entity ### Type of change - [x] Performance Improvement
This commit is contained in:
@ -237,8 +237,33 @@ def is_float_regex(value):
|
||||
def chunk_id(chunk):
|
||||
return xxhash.xxh64((chunk["content_with_weight"] + chunk["kb_id"]).encode("utf-8")).hexdigest()
|
||||
|
||||
def get_entity_cache(tenant_id, kb_id, ent_name) -> str | list[str]:
|
||||
hasher = xxhash.xxh64()
|
||||
hasher.update(str(tenant_id).encode("utf-8"))
|
||||
hasher.update(str(kb_id).encode("utf-8"))
|
||||
hasher.update(str(ent_name).encode("utf-8"))
|
||||
|
||||
k = hasher.hexdigest()
|
||||
bin = REDIS_CONN.get(k)
|
||||
if not bin:
|
||||
return
|
||||
return json.loads(bin)
|
||||
|
||||
|
||||
def set_entity_cache(tenant_id, kb_id, ent_name, content_with_weight):
|
||||
hasher = xxhash.xxh64()
|
||||
hasher.update(str(tenant_id).encode("utf-8"))
|
||||
hasher.update(str(kb_id).encode("utf-8"))
|
||||
hasher.update(str(ent_name).encode("utf-8"))
|
||||
|
||||
k = hasher.hexdigest()
|
||||
REDIS_CONN.set(k, content_with_weight.encode("utf-8"), 3600)
|
||||
|
||||
|
||||
def get_entity(tenant_id, kb_id, ent_name):
|
||||
cache = get_entity_cache(tenant_id, kb_id, ent_name)
|
||||
if cache:
|
||||
return cache
|
||||
conds = {
|
||||
"fields": ["content_with_weight"],
|
||||
"entity_kwd": ent_name,
|
||||
@ -250,6 +275,7 @@ def get_entity(tenant_id, kb_id, ent_name):
|
||||
for id in es_res.ids:
|
||||
try:
|
||||
if isinstance(ent_name, str):
|
||||
set_entity_cache(tenant_id, kb_id, ent_name, es_res.field[id]["content_with_weight"])
|
||||
return json.loads(es_res.field[id]["content_with_weight"])
|
||||
res.append(json.loads(es_res.field[id]["content_with_weight"]))
|
||||
except Exception:
|
||||
@ -272,6 +298,7 @@ def set_entity(tenant_id, kb_id, embd_mdl, ent_name, meta):
|
||||
"available_int": 0
|
||||
}
|
||||
chunk["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(chunk["content_ltks"])
|
||||
set_entity_cache(tenant_id, kb_id, ent_name, chunk["content_with_weight"])
|
||||
res = settings.retrievaler.search({"entity_kwd": ent_name, "size": 1, "fields": []},
|
||||
search.index_name(tenant_id), [kb_id])
|
||||
if res.ids:
|
||||
|
||||
Reference in New Issue
Block a user