Light GraphRAG (#4585)

### What problem does this PR solve?

#4543

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2025-01-22 19:43:14 +08:00
committed by GitHub
parent 1a367664f1
commit dd0ebbea35
55 changed files with 5461 additions and 4000 deletions

View File

@ -123,6 +123,7 @@ class ESConnection(DocStoreConnection):
logger.exception("ESConnection.indexExist got exception")
if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
continue
break
return False
"""
@ -238,6 +239,7 @@ class ESConnection(DocStoreConnection):
for i in range(ATTEMPT_TIME):
try:
#print(json.dumps(q, ensure_ascii=False))
res = self.es.search(index=indexNames,
body=q,
timeout="600s",
@ -325,8 +327,9 @@ class ESConnection(DocStoreConnection):
except Exception as e:
logger.exception(
f"ESConnection.update(index={indexName}, id={id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception")
if str(e).find("Timeout") > 0:
if re.search(r"(timeout|connection)", str(e).lower()):
continue
break
return False
# update unspecific maybe-multiple documents
@ -364,9 +367,12 @@ class ESConnection(DocStoreConnection):
if (not isinstance(k, str) or not v) and k != "available_int":
continue
if isinstance(v, str):
scripts.append(f"ctx._source.{k} = '{v}'")
elif isinstance(v, int):
scripts.append(f"ctx._source.{k} = {v}")
v = re.sub(r"(['\n\r]|\\.)", " ", v)
scripts.append(f"ctx._source.{k}='{v}';")
elif isinstance(v, int) or isinstance(v, float):
scripts.append(f"ctx._source.{k}={v};")
elif isinstance(v, list):
scripts.append(f"ctx._source.{k}={json.dumps(v, ensure_ascii=False)};")
else:
raise Exception(
f"newValue `{str(k)}={str(v)}` value type is {str(type(v))}, expected to be int, str.")
@ -383,9 +389,10 @@ class ESConnection(DocStoreConnection):
_ = ubq.execute()
return True
except Exception as e:
logger.error("ESConnection.update got exception: " + str(e))
if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0:
logger.error("ESConnection.update got exception: " + str(e) + "\n".join(scripts))
if re.search(r"(timeout|connection|conflict)", str(e).lower()):
continue
break
return False
def delete(self, condition: dict, indexName: str, knowledgebaseId: str) -> int:
@ -399,7 +406,16 @@ class ESConnection(DocStoreConnection):
else:
qry = Q("bool")
for k, v in condition.items():
if isinstance(v, list):
if k == "exists":
qry.filter.append(Q("exists", field=v))
elif k == "must_not":
if isinstance(v, dict):
for kk, vv in v.items():
if kk == "exists":
qry.must_not.append(Q("exists", field=vv))
elif isinstance(v, list):
qry.must.append(Q("terms", **{k: v}))
elif isinstance(v, str) or isinstance(v, int):
qry.must.append(Q("term", **{k: v}))
@ -408,6 +424,7 @@ class ESConnection(DocStoreConnection):
logger.debug("ESConnection.delete query: " + json.dumps(qry.to_dict()))
for _ in range(ATTEMPT_TIME):
try:
print(Search().query(qry).to_dict(), flush=True)
res = self.es.delete_by_query(
index=indexName,
body=Search().query(qry).to_dict(),
@ -415,7 +432,7 @@ class ESConnection(DocStoreConnection):
return res["deleted"]
except Exception as e:
logger.warning("ESConnection.delete got exception: " + str(e))
if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE):
if re.search(r"(timeout|connection)", str(e).lower()):
time.sleep(3)
continue
if re.search(r"(not_found)", str(e), re.IGNORECASE):