Feat: enhance metadata arranging. (#12745)

### What problem does this PR solve?
#11564

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu
2026-01-22 15:34:08 +08:00
committed by GitHub
parent bc7b864a6c
commit 3beb85efa0
10 changed files with 195 additions and 129 deletions

View File

@ -377,7 +377,7 @@ async def async_chat(dialog, messages, stream=True, **kwargs):
logging.debug("Proceeding with retrieval")
tenant_ids = list(set([kb.tenant_id for kb in kbs]))
knowledges = []
if prompt_config.get("reasoning", False):
if prompt_config.get("reasoning", False) or kwargs.get("reasoning"):
reasoner = DeepResearcher(
chat_mdl,
prompt_config,

View File

@ -776,10 +776,25 @@ class DocumentService(CommonService):
@classmethod
@DB.connection_context()
def get_metadata_summary(cls, kb_id):
def get_metadata_summary(cls, kb_id, document_ids=None):
def _meta_value_type(value):
if value is None:
return None
if isinstance(value, list):
return "list"
if isinstance(value, bool):
return "string"
if isinstance(value, (int, float)):
return "number"
return "string"
fields = [cls.model.id, cls.model.meta_fields]
summary = {}
for r in cls.model.select(*fields).where(cls.model.kb_id == kb_id):
type_counter = {}
query = cls.model.select(*fields).where(cls.model.kb_id == kb_id)
if document_ids:
query = query.where(cls.model.id.in_(document_ids))
for r in query:
meta_fields = r.meta_fields or {}
if isinstance(meta_fields, str):
try:
@ -789,6 +804,11 @@ class DocumentService(CommonService):
if not isinstance(meta_fields, dict):
continue
for k, v in meta_fields.items():
value_type = _meta_value_type(v)
if value_type:
if k not in type_counter:
type_counter[k] = {}
type_counter[k][value_type] = type_counter[k].get(value_type, 0) + 1
values = v if isinstance(v, list) else [v]
for vv in values:
if not vv:
@ -797,11 +817,19 @@ class DocumentService(CommonService):
if k not in summary:
summary[k] = {}
summary[k][sv] = summary[k].get(sv, 0) + 1
return {k: sorted([(val, cnt) for val, cnt in v.items()], key=lambda x: x[1], reverse=True) for k, v in summary.items()}
result = {}
for k, v in summary.items():
values = sorted([(val, cnt) for val, cnt in v.items()], key=lambda x: x[1], reverse=True)
type_counts = type_counter.get(k, {})
value_type = "string"
if type_counts:
value_type = max(type_counts.items(), key=lambda item: item[1])[0]
result[k] = {"type": value_type, "values": values}
return result
@classmethod
@DB.connection_context()
def batch_update_metadata(cls, kb_id, doc_ids, updates=None, deletes=None):
def batch_update_metadata(cls, kb_id, doc_ids, updates=None, deletes=None, adds=None):
updates = updates or []
deletes = deletes or []
if not doc_ids:
@ -826,6 +854,8 @@ class DocumentService(CommonService):
key = upd.get("key")
if not key:
continue
if key not in meta:
meta[key] = upd.get("value")
new_value = upd.get("value")
match_provided = "match" in upd
@ -895,7 +925,7 @@ class DocumentService(CommonService):
updated_docs = 0
with DB.atomic():
rows = cls.model.select(cls.model.id, cls.model.meta_fields).where(
(cls.model.id.in_(doc_ids)) & (cls.model.kb_id == kb_id)
cls.model.id.in_(doc_ids)
)
for r in rows:
meta = _normalize_meta(r.meta_fields or {})