mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Compare commits
32 Commits
v0.22.1
...
f0a14f5fce
| Author | SHA1 | Date | |
|---|---|---|---|
| f0a14f5fce | |||
| 174a2578e8 | |||
| a0959b9d38 | |||
| 13299197b8 | |||
| 249296e417 | |||
| db0f6840d9 | |||
| 1033a3ae26 | |||
| 1845daf41f | |||
| 4c8f9f0d77 | |||
| cc00c3ec93 | |||
| 653b785958 | |||
| 971c1bcba7 | |||
| 065917bf1c | |||
| 820934fc77 | |||
| d3d2ccc76c | |||
| c8ab9079b3 | |||
| 0d5589bfda | |||
| b846a0f547 | |||
| 69578ebfce | |||
| 06cef71ba6 | |||
| d2b1da0e26 | |||
| 7c6d30f4c8 | |||
| ea0352ee4a | |||
| fa5cf10f56 | |||
| 3fe71ab7dd | |||
| 9f715d6bc2 | |||
| 48de3b26ba | |||
| 273c4bc4d3 | |||
| 420c97199a | |||
| ecf0322165 | |||
| 38234aca53 | |||
| 1c06ec39ca |
@ -86,7 +86,7 @@ Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
## 🔥 Latest Updates
|
||||
|
||||
- 2025-11-19 Supports Gemini 3 Pro.
|
||||
- 2025-11-12 Supports data synchronization from Confluence, AWS S3, Discord, Google Drive.
|
||||
- 2025-11-12 Supports data synchronization from Confluence, S3, Notion, Discord, Google Drive.
|
||||
- 2025-10-23 Supports MinerU & Docling as document parsing methods.
|
||||
- 2025-10-15 Supports orchestrable ingestion pipeline.
|
||||
- 2025-08-08 Supports OpenAI's latest GPT-5 series models.
|
||||
|
||||
@ -86,7 +86,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
## 🔥 Pembaruan Terbaru
|
||||
|
||||
- 2025-11-19 Mendukung Gemini 3 Pro.
|
||||
- 2025-11-12 Mendukung sinkronisasi data dari Confluence, AWS S3, Discord, Google Drive.
|
||||
- 2025-11-12 Mendukung sinkronisasi data dari Confluence, S3, Notion, Discord, Google Drive.
|
||||
- 2025-10-23 Mendukung MinerU & Docling sebagai metode penguraian dokumen.
|
||||
- 2025-10-15 Dukungan untuk jalur data yang terorkestrasi.
|
||||
- 2025-08-08 Mendukung model seri GPT-5 terbaru dari OpenAI.
|
||||
|
||||
@ -67,7 +67,7 @@
|
||||
## 🔥 最新情報
|
||||
|
||||
- 2025-11-19 Gemini 3 Proをサポートしています
|
||||
- 2025-11-12 Confluence、AWS S3、Discord、Google Drive からのデータ同期をサポートします。
|
||||
- 2025-11-12 Confluence、S3、Notion、Discord、Google Drive からのデータ同期をサポートします。
|
||||
- 2025-10-23 ドキュメント解析方法として MinerU と Docling をサポートします。
|
||||
- 2025-10-15 オーケストレーションされたデータパイプラインのサポート。
|
||||
- 2025-08-08 OpenAI の最新 GPT-5 シリーズモデルをサポートします。
|
||||
|
||||
@ -68,7 +68,7 @@
|
||||
## 🔥 업데이트
|
||||
|
||||
- 2025-11-19 Gemini 3 Pro를 지원합니다.
|
||||
- 2025-11-12 Confluence, AWS S3, Discord, Google Drive에서 데이터 동기화를 지원합니다.
|
||||
- 2025-11-12 Confluence, S3, Notion, Discord, Google Drive에서 데이터 동기화를 지원합니다.
|
||||
- 2025-10-23 문서 파싱 방법으로 MinerU 및 Docling을 지원합니다.
|
||||
- 2025-10-15 조정된 데이터 파이프라인 지원.
|
||||
- 2025-08-08 OpenAI의 최신 GPT-5 시리즈 모델을 지원합니다.
|
||||
|
||||
@ -87,7 +87,7 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
## 🔥 Últimas Atualizações
|
||||
|
||||
- 19-11-2025 Suporta Gemini 3 Pro.
|
||||
- 12-11-2025 Suporta a sincronização de dados do Confluence, AWS S3, Discord e Google Drive.
|
||||
- 12-11-2025 Suporta a sincronização de dados do Confluence, S3, Notion, Discord e Google Drive.
|
||||
- 23-10-2025 Suporta MinerU e Docling como métodos de análise de documentos.
|
||||
- 15-10-2025 Suporte para pipelines de dados orquestrados.
|
||||
- 08-08-2025 Suporta a mais recente série GPT-5 da OpenAI.
|
||||
|
||||
@ -86,7 +86,7 @@
|
||||
## 🔥 近期更新
|
||||
|
||||
- 2025-11-19 支援 Gemini 3 Pro.
|
||||
- 2025-11-12 支援從 Confluence、AWS S3、Discord、Google Drive 進行資料同步。
|
||||
- 2025-11-12 支援從 Confluence、S3、Notion、Discord、Google Drive 進行資料同步。
|
||||
- 2025-10-23 支援 MinerU 和 Docling 作為文件解析方法。
|
||||
- 2025-10-15 支援可編排的資料管道。
|
||||
- 2025-08-08 支援 OpenAI 最新的 GPT-5 系列模型。
|
||||
|
||||
@ -86,7 +86,7 @@
|
||||
## 🔥 近期更新
|
||||
|
||||
- 2025-11-19 支持 Gemini 3 Pro.
|
||||
- 2025-11-12 支持从 Confluence、AWS S3、Discord、Google Drive 进行数据同步。
|
||||
- 2025-11-12 支持从 Confluence、S3、Notion、Discord、Google Drive 进行数据同步。
|
||||
- 2025-10-23 支持 MinerU 和 Docling 作为文档解析方法。
|
||||
- 2025-10-15 支持可编排的数据管道。
|
||||
- 2025-08-08 支持 OpenAI 最新的 GPT-5 系列模型。
|
||||
|
||||
@ -32,7 +32,7 @@ class IterationParam(ComponentParamBase):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.items_ref = ""
|
||||
self.veriable={}
|
||||
self.variable={}
|
||||
|
||||
def get_input_form(self) -> dict[str, dict]:
|
||||
return {
|
||||
|
||||
@ -132,12 +132,12 @@ class Retrieval(ToolBase, ABC):
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if self._param.meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT)
|
||||
filters = gen_meta_filter(chat_mdl, metas, query)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, query)
|
||||
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif self._param.meta_data_filter.get("method") == "manual":
|
||||
filters=self._param.meta_data_filter["manual"]
|
||||
filters = self._param.meta_data_filter["manual"]
|
||||
for flt in filters:
|
||||
pat = re.compile(self.variable_ref_patt)
|
||||
s = flt["value"]
|
||||
@ -165,9 +165,9 @@ class Retrieval(ToolBase, ABC):
|
||||
|
||||
out_parts.append(s[last:])
|
||||
flt["value"] = "".join(out_parts)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
doc_ids.extend(meta_filter(metas, filters, self._param.meta_data_filter.get("logic", "and")))
|
||||
if filters and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
|
||||
if self._param.cross_languages:
|
||||
query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages)
|
||||
|
||||
@ -24,7 +24,7 @@ from flasgger import Swagger
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
from quart_cors import cors
|
||||
from common.constants import StatusEnum
|
||||
from api.db.db_models import close_connection
|
||||
from api.db.db_models import close_connection, APIToken
|
||||
from api.db.services import UserService
|
||||
from api.utils.json_encode import CustomJSONEncoder
|
||||
from api.utils import commands
|
||||
@ -124,6 +124,10 @@ def _load_user():
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if not user and len(authorization.split()) == 2:
|
||||
objs = APIToken.query(token=authorization.split()[1])
|
||||
if objs:
|
||||
user = UserService.query(id=objs[0].tenant_id, status=StatusEnum.VALID.value)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
logging.warning(f"User {user[0].email} has empty access_token in database")
|
||||
|
||||
@ -305,14 +305,14 @@ async def retrieval_test():
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(current_user.id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
|
||||
filters = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"]))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
|
||||
if meta_data_filter["manual"] and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
|
||||
try:
|
||||
tenants = UserTenantService.query(user_id=current_user.id)
|
||||
|
||||
@ -159,10 +159,10 @@ async def webhook(tenant_id: str, agent_id: str):
|
||||
data=False, message=str(e),
|
||||
code=RetCode.EXCEPTION_ERROR)
|
||||
|
||||
def sse():
|
||||
async def sse():
|
||||
nonlocal canvas
|
||||
try:
|
||||
for ans in canvas.run(query=req.get("query", ""), files=req.get("files", []), user_id=req.get("user_id", tenant_id), webhook_payload=req):
|
||||
async for ans in canvas.run(query=req.get("query", ""), files=req.get("files", []), user_id=req.get("user_id", tenant_id), webhook_payload=req):
|
||||
yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"
|
||||
|
||||
cvs.dsl = json.loads(str(canvas))
|
||||
|
||||
@ -120,7 +120,7 @@ async def retrieval(tenant_id):
|
||||
retrieval_setting = req.get("retrieval_setting", {})
|
||||
similarity_threshold = float(retrieval_setting.get("score_threshold", 0.0))
|
||||
top = int(retrieval_setting.get("top_k", 1024))
|
||||
metadata_condition = req.get("metadata_condition", {})
|
||||
metadata_condition = req.get("metadata_condition", {}) or {}
|
||||
metas = DocumentService.get_meta_by_kbs([kb_id])
|
||||
|
||||
doc_ids = []
|
||||
@ -132,7 +132,7 @@ async def retrieval(tenant_id):
|
||||
|
||||
embd_mdl = LLMBundle(kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
||||
if metadata_condition:
|
||||
doc_ids.extend(meta_filter(metas, convert_conditions(metadata_condition)))
|
||||
doc_ids.extend(meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and")))
|
||||
if not doc_ids and metadata_condition:
|
||||
doc_ids = ["-999"]
|
||||
ranks = settings.retriever.retrieval(
|
||||
|
||||
@ -1434,6 +1434,7 @@ async def retrieval_test(tenant_id):
|
||||
question = req["question"]
|
||||
doc_ids = req.get("document_ids", [])
|
||||
use_kg = req.get("use_kg", False)
|
||||
toc_enhance = req.get("toc_enhance", False)
|
||||
langs = req.get("cross_languages", [])
|
||||
if not isinstance(doc_ids, list):
|
||||
return get_error_data_result("`documents` should be a list")
|
||||
@ -1442,9 +1443,11 @@ async def retrieval_test(tenant_id):
|
||||
if doc_id not in doc_ids_list:
|
||||
return get_error_data_result(f"The datasets don't own the document {doc_id}")
|
||||
if not doc_ids:
|
||||
metadata_condition = req.get("metadata_condition", {})
|
||||
metadata_condition = req.get("metadata_condition", {}) or {}
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
doc_ids = meta_filter(metas, convert_conditions(metadata_condition))
|
||||
doc_ids = meta_filter(metas, convert_conditions(metadata_condition), metadata_condition.get("logic", "and"))
|
||||
if metadata_condition and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
||||
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
||||
top = int(req.get("top_k", 1024))
|
||||
@ -1485,6 +1488,11 @@ async def retrieval_test(tenant_id):
|
||||
highlight=highlight,
|
||||
rank_feature=label_question(question, kbs),
|
||||
)
|
||||
if toc_enhance:
|
||||
chat_mdl = LLMBundle(kb.tenant_id, LLMType.CHAT)
|
||||
cks = settings.retriever.retrieval_by_toc(question, ranks["chunks"], tenant_ids, chat_mdl, size)
|
||||
if cks:
|
||||
ranks["chunks"] = cks
|
||||
if use_kg:
|
||||
ck = settings.kg_retriever.retrieval(question, [k.tenant_id for k in kbs], kb_ids, embd_mdl, LLMBundle(kb.tenant_id, LLMType.CHAT))
|
||||
if ck["content_with_weight"]:
|
||||
|
||||
@ -428,17 +428,15 @@ async def agents_completion_openai_compatibility(tenant_id, agent_id):
|
||||
return resp
|
||||
else:
|
||||
# For non-streaming, just return the response directly
|
||||
response = next(
|
||||
completion_openai(
|
||||
async for response in completion_openai(
|
||||
tenant_id,
|
||||
agent_id,
|
||||
question,
|
||||
session_id=req.pop("session_id", req.get("id", "")) or req.get("metadata", {}).get("id", ""),
|
||||
stream=False,
|
||||
**req,
|
||||
)
|
||||
)
|
||||
return jsonify(response)
|
||||
):
|
||||
return jsonify(response)
|
||||
|
||||
|
||||
@manager.route("/agents/<agent_id>/completions", methods=["POST"]) # noqa: F821
|
||||
@ -448,8 +446,8 @@ async def agent_completions(tenant_id, agent_id):
|
||||
|
||||
if req.get("stream", True):
|
||||
|
||||
def generate():
|
||||
for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req):
|
||||
async def generate():
|
||||
async for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req):
|
||||
if isinstance(answer, str):
|
||||
try:
|
||||
ans = json.loads(answer[5:]) # remove "data:"
|
||||
@ -473,7 +471,7 @@ async def agent_completions(tenant_id, agent_id):
|
||||
full_content = ""
|
||||
reference = {}
|
||||
final_ans = ""
|
||||
for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req):
|
||||
async for answer in agent_completion(tenant_id=tenant_id, agent_id=agent_id, **req):
|
||||
try:
|
||||
ans = json.loads(answer[5:])
|
||||
|
||||
@ -875,7 +873,7 @@ async def agent_bot_completions(agent_id):
|
||||
resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
|
||||
return resp
|
||||
|
||||
for answer in agent_completion(objs[0].tenant_id, agent_id, **req):
|
||||
async for answer in agent_completion(objs[0].tenant_id, agent_id, **req):
|
||||
return get_result(data=answer)
|
||||
|
||||
|
||||
@ -977,14 +975,14 @@ async def retrieval_test_embedded():
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, llm_name=search_config.get("chat_id", ""))
|
||||
filters = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"]))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
|
||||
if meta_data_filter["manual"] and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
|
||||
try:
|
||||
tenants = UserTenantService.query(user_id=tenant_id)
|
||||
|
||||
@ -177,7 +177,7 @@ class UserCanvasService(CommonService):
|
||||
return True
|
||||
|
||||
|
||||
def completion(tenant_id, agent_id, session_id=None, **kwargs):
|
||||
async def completion(tenant_id, agent_id, session_id=None, **kwargs):
|
||||
query = kwargs.get("query", "") or kwargs.get("question", "")
|
||||
files = kwargs.get("files", [])
|
||||
inputs = kwargs.get("inputs", {})
|
||||
@ -219,10 +219,14 @@ def completion(tenant_id, agent_id, session_id=None, **kwargs):
|
||||
"id": message_id
|
||||
})
|
||||
txt = ""
|
||||
for ans in canvas.run(query=query, files=files, user_id=user_id, inputs=inputs):
|
||||
async for ans in canvas.run(query=query, files=files, user_id=user_id, inputs=inputs):
|
||||
ans["session_id"] = session_id
|
||||
if ans["event"] == "message":
|
||||
txt += ans["data"]["content"]
|
||||
if ans["data"].get("start_to_think", False):
|
||||
txt += "<think>"
|
||||
elif ans["data"].get("end_to_think", False):
|
||||
txt += "</think>"
|
||||
yield "data:" + json.dumps(ans, ensure_ascii=False) + "\n\n"
|
||||
|
||||
conv.message.append({"role": "assistant", "content": txt, "created_at": time.time(), "id": message_id})
|
||||
@ -233,7 +237,7 @@ def completion(tenant_id, agent_id, session_id=None, **kwargs):
|
||||
API4ConversationService.append_message(conv["id"], conv)
|
||||
|
||||
|
||||
def completion_openai(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
|
||||
async def completion_openai(tenant_id, agent_id, question, session_id=None, stream=True, **kwargs):
|
||||
tiktoken_encoder = tiktoken.get_encoding("cl100k_base")
|
||||
prompt_tokens = len(tiktoken_encoder.encode(str(question)))
|
||||
user_id = kwargs.get("user_id", "")
|
||||
@ -241,7 +245,7 @@ def completion_openai(tenant_id, agent_id, question, session_id=None, stream=Tru
|
||||
if stream:
|
||||
completion_tokens = 0
|
||||
try:
|
||||
for ans in completion(
|
||||
async for ans in completion(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
session_id=session_id,
|
||||
@ -300,7 +304,7 @@ def completion_openai(tenant_id, agent_id, question, session_id=None, stream=Tru
|
||||
try:
|
||||
all_content = ""
|
||||
reference = {}
|
||||
for ans in completion(
|
||||
async for ans in completion(
|
||||
tenant_id=tenant_id,
|
||||
agent_id=agent_id,
|
||||
session_id=session_id,
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#
|
||||
import logging
|
||||
from datetime import datetime
|
||||
import os
|
||||
from typing import Tuple, List
|
||||
|
||||
from anthropic import BaseModel
|
||||
@ -103,7 +104,8 @@ class SyncLogsService(CommonService):
|
||||
Knowledgebase.avatar.alias("kb_avatar"),
|
||||
Connector2Kb.auto_parse,
|
||||
cls.model.from_beginning.alias("reindex"),
|
||||
cls.model.status
|
||||
cls.model.status,
|
||||
cls.model.update_time
|
||||
]
|
||||
if not connector_id:
|
||||
fields.append(Connector.config)
|
||||
@ -116,7 +118,11 @@ class SyncLogsService(CommonService):
|
||||
if connector_id:
|
||||
query = query.where(cls.model.connector_id == connector_id)
|
||||
else:
|
||||
interval_expr = SQL("INTERVAL `t2`.`refresh_freq` MINUTE")
|
||||
database_type = os.getenv("DB_TYPE", "mysql")
|
||||
if "postgres" in database_type.lower():
|
||||
interval_expr = SQL("make_interval(mins => t2.refresh_freq)")
|
||||
else:
|
||||
interval_expr = SQL("INTERVAL `t2`.`refresh_freq` MINUTE")
|
||||
query = query.where(
|
||||
Connector.input_type == InputType.POLL,
|
||||
Connector.status == TaskStatus.SCHEDULE,
|
||||
|
||||
@ -287,7 +287,7 @@ def convert_conditions(metadata_condition):
|
||||
]
|
||||
|
||||
|
||||
def meta_filter(metas: dict, filters: list[dict]):
|
||||
def meta_filter(metas: dict, filters: list[dict], logic: str = "and"):
|
||||
doc_ids = set([])
|
||||
|
||||
def filter_out(v2docs, operator, value):
|
||||
@ -331,7 +331,10 @@ def meta_filter(metas: dict, filters: list[dict]):
|
||||
if not doc_ids:
|
||||
doc_ids = set(ids)
|
||||
else:
|
||||
doc_ids = doc_ids & set(ids)
|
||||
if logic == "and":
|
||||
doc_ids = doc_ids & set(ids)
|
||||
else:
|
||||
doc_ids = doc_ids | set(ids)
|
||||
if not doc_ids:
|
||||
return []
|
||||
return list(doc_ids)
|
||||
@ -407,14 +410,15 @@ def chat(dialog, messages, stream=True, **kwargs):
|
||||
if dialog.meta_data_filter:
|
||||
metas = DocumentService.get_meta_by_kbs(dialog.kb_ids)
|
||||
if dialog.meta_data_filter.get("method") == "auto":
|
||||
filters = gen_meta_filter(chat_mdl, metas, questions[-1])
|
||||
attachments.extend(meta_filter(metas, filters))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, questions[-1])
|
||||
attachments.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not attachments:
|
||||
attachments = None
|
||||
elif dialog.meta_data_filter.get("method") == "manual":
|
||||
attachments.extend(meta_filter(metas, dialog.meta_data_filter["manual"]))
|
||||
if not attachments:
|
||||
attachments = None
|
||||
conds = dialog.meta_data_filter["manual"]
|
||||
attachments.extend(meta_filter(metas, conds, dialog.meta_data_filter.get("logic", "and")))
|
||||
if conds and not attachments:
|
||||
attachments = ["-999"]
|
||||
|
||||
if prompt_config.get("keyword", False):
|
||||
questions[-1] += keyword_extraction(chat_mdl, questions[-1])
|
||||
@ -778,14 +782,14 @@ def ask(question, kb_ids, tenant_id, chat_llm_name=None, search_config={}):
|
||||
if meta_data_filter:
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if meta_data_filter.get("method") == "auto":
|
||||
filters = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"]))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
|
||||
if meta_data_filter["manual"] and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
|
||||
kbinfos = retriever.retrieval(
|
||||
question=question,
|
||||
@ -853,14 +857,14 @@ def gen_mindmap(question, kb_ids, tenant_id, search_config={}):
|
||||
if meta_data_filter:
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if meta_data_filter.get("method") == "auto":
|
||||
filters = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
filters: dict = gen_meta_filter(chat_mdl, metas, question)
|
||||
doc_ids.extend(meta_filter(metas, filters["conditions"], filters.get("logic", "and")))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"]))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
doc_ids.extend(meta_filter(metas, meta_data_filter["manual"], meta_data_filter.get("logic", "and")))
|
||||
if meta_data_filter["manual"] and not doc_ids:
|
||||
doc_ids = ["-999"]
|
||||
|
||||
ranks = settings.retriever.retrieval(
|
||||
question=question,
|
||||
|
||||
@ -118,6 +118,7 @@ class FileSource(StrEnum):
|
||||
SHAREPOINT = "sharepoint"
|
||||
SLACK = "slack"
|
||||
TEAMS = "teams"
|
||||
MOODLE = "moodle"
|
||||
|
||||
|
||||
class PipelineTaskType(StrEnum):
|
||||
|
||||
@ -14,6 +14,7 @@ from .google_drive.connector import GoogleDriveConnector
|
||||
from .jira.connector import JiraConnector
|
||||
from .sharepoint_connector import SharePointConnector
|
||||
from .teams_connector import TeamsConnector
|
||||
from .moodle_connector import MoodleConnector
|
||||
from .config import BlobType, DocumentSource
|
||||
from .models import Document, TextSection, ImageSection, BasicExpertInfo
|
||||
from .exceptions import (
|
||||
@ -36,6 +37,7 @@ __all__ = [
|
||||
"JiraConnector",
|
||||
"SharePointConnector",
|
||||
"TeamsConnector",
|
||||
"MoodleConnector",
|
||||
"BlobType",
|
||||
"DocumentSource",
|
||||
"Document",
|
||||
|
||||
@ -48,6 +48,7 @@ class DocumentSource(str, Enum):
|
||||
GOOGLE_DRIVE = "google_drive"
|
||||
GMAIL = "gmail"
|
||||
DISCORD = "discord"
|
||||
MOODLE = "moodle"
|
||||
S3_COMPATIBLE = "s3_compatible"
|
||||
|
||||
|
||||
|
||||
378
common/data_source/moodle_connector.py
Normal file
378
common/data_source/moodle_connector.py
Normal file
@ -0,0 +1,378 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime, timezone
|
||||
from retry import retry
|
||||
from typing import Any, Optional
|
||||
|
||||
from markdownify import markdownify as md
|
||||
from moodle import Moodle as MoodleClient, MoodleException
|
||||
|
||||
from common.data_source.config import INDEX_BATCH_SIZE
|
||||
from common.data_source.exceptions import (
|
||||
ConnectorMissingCredentialError,
|
||||
CredentialExpiredError,
|
||||
InsufficientPermissionsError,
|
||||
ConnectorValidationError,
|
||||
)
|
||||
from common.data_source.interfaces import LoadConnector, PollConnector, SecondsSinceUnixEpoch
|
||||
from common.data_source.models import Document
|
||||
from common.data_source.utils import batch_generator, rl_requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MoodleConnector(LoadConnector, PollConnector):
|
||||
"""Moodle LMS connector for accessing course content"""
|
||||
|
||||
def __init__(self, moodle_url: str, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
||||
self.moodle_url = moodle_url.rstrip("/")
|
||||
self.batch_size = batch_size
|
||||
self.moodle_client: Optional[MoodleClient] = None
|
||||
|
||||
def _add_token_to_url(self, file_url: str) -> str:
|
||||
"""Append Moodle token to URL if missing"""
|
||||
if not self.moodle_client:
|
||||
return file_url
|
||||
token = getattr(self.moodle_client, "token", "")
|
||||
if "token=" in file_url.lower():
|
||||
return file_url
|
||||
delimiter = "&" if "?" in file_url else "?"
|
||||
return f"{file_url}{delimiter}token={token}"
|
||||
|
||||
def _log_error(self, context: str, error: Exception, level: str = "warning") -> None:
|
||||
"""Simplified logging wrapper"""
|
||||
msg = f"{context}: {error}"
|
||||
if level == "error":
|
||||
logger.error(msg)
|
||||
else:
|
||||
logger.warning(msg)
|
||||
|
||||
def _get_latest_timestamp(self, *timestamps: int) -> int:
|
||||
"""Return latest valid timestamp"""
|
||||
return max((t for t in timestamps if t and t > 0), default=0)
|
||||
|
||||
def _yield_in_batches(
|
||||
self, generator: Generator[Document, None, None]
|
||||
) -> Generator[list[Document], None, None]:
|
||||
for batch in batch_generator(generator, self.batch_size):
|
||||
yield batch
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> None:
|
||||
token = credentials.get("moodle_token")
|
||||
if not token:
|
||||
raise ConnectorMissingCredentialError("Moodle API token is required")
|
||||
|
||||
try:
|
||||
self.moodle_client = MoodleClient(
|
||||
self.moodle_url + "/webservice/rest/server.php", token
|
||||
)
|
||||
self.moodle_client.core.webservice.get_site_info()
|
||||
except MoodleException as e:
|
||||
if "invalidtoken" in str(e).lower():
|
||||
raise CredentialExpiredError("Moodle token is invalid or expired")
|
||||
raise ConnectorMissingCredentialError(f"Failed to initialize Moodle client: {e}")
|
||||
|
||||
def validate_connector_settings(self) -> None:
|
||||
if not self.moodle_client:
|
||||
raise ConnectorMissingCredentialError("Moodle client not initialized")
|
||||
|
||||
try:
|
||||
site_info = self.moodle_client.core.webservice.get_site_info()
|
||||
if not site_info.sitename:
|
||||
raise InsufficientPermissionsError("Invalid Moodle API response")
|
||||
except MoodleException as e:
|
||||
msg = str(e).lower()
|
||||
if "invalidtoken" in msg:
|
||||
raise CredentialExpiredError("Moodle token is invalid or expired")
|
||||
if "accessexception" in msg:
|
||||
raise InsufficientPermissionsError(
|
||||
"Insufficient permissions. Ensure web services are enabled and permissions are correct."
|
||||
)
|
||||
raise ConnectorValidationError(f"Moodle validation error: {e}")
|
||||
except Exception as e:
|
||||
raise ConnectorValidationError(f"Unexpected validation error: {e}")
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Data loading & polling
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def load_from_state(self) -> Generator[list[Document], None, None]:
|
||||
if not self.moodle_client:
|
||||
raise ConnectorMissingCredentialError("Moodle client not initialized")
|
||||
|
||||
logger.info("Starting full load from Moodle workspace")
|
||||
courses = self._get_enrolled_courses()
|
||||
if not courses:
|
||||
logger.warning("No courses found to process")
|
||||
return
|
||||
|
||||
yield from self._yield_in_batches(self._process_courses(courses))
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> Generator[list[Document], None, None]:
|
||||
if not self.moodle_client:
|
||||
raise ConnectorMissingCredentialError("Moodle client not initialized")
|
||||
|
||||
logger.info(
|
||||
f"Polling Moodle updates between {datetime.fromtimestamp(start)} and {datetime.fromtimestamp(end)}"
|
||||
)
|
||||
courses = self._get_enrolled_courses()
|
||||
if not courses:
|
||||
logger.warning("No courses found to poll")
|
||||
return
|
||||
|
||||
yield from self._yield_in_batches(self._get_updated_content(courses, start, end))
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _get_enrolled_courses(self) -> list:
|
||||
if not self.moodle_client:
|
||||
raise ConnectorMissingCredentialError("Moodle client not initialized")
|
||||
|
||||
try:
|
||||
return self.moodle_client.core.course.get_courses()
|
||||
except MoodleException as e:
|
||||
self._log_error("fetching courses", e, "error")
|
||||
raise ConnectorValidationError(f"Failed to fetch courses: {e}")
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _get_course_contents(self, course_id: int):
|
||||
if not self.moodle_client:
|
||||
raise ConnectorMissingCredentialError("Moodle client not initialized")
|
||||
|
||||
try:
|
||||
return self.moodle_client.core.course.get_contents(courseid=course_id)
|
||||
except MoodleException as e:
|
||||
self._log_error(f"fetching course contents for {course_id}", e)
|
||||
return []
|
||||
|
||||
def _process_courses(self, courses) -> Generator[Document, None, None]:
|
||||
for course in courses:
|
||||
try:
|
||||
contents = self._get_course_contents(course.id)
|
||||
for section in contents:
|
||||
for module in section.modules:
|
||||
doc = self._process_module(course, section, module)
|
||||
if doc:
|
||||
yield doc
|
||||
except Exception as e:
|
||||
self._log_error(f"processing course {course.fullname}", e)
|
||||
|
||||
def _get_updated_content(
|
||||
self, courses, start: float, end: float
|
||||
) -> Generator[Document, None, None]:
|
||||
for course in courses:
|
||||
try:
|
||||
contents = self._get_course_contents(course.id)
|
||||
for section in contents:
|
||||
for module in section.modules:
|
||||
times = [
|
||||
getattr(module, "timecreated", 0),
|
||||
getattr(module, "timemodified", 0),
|
||||
]
|
||||
if hasattr(module, "contents"):
|
||||
times.extend(
|
||||
getattr(c, "timemodified", 0)
|
||||
for c in module.contents
|
||||
if c and getattr(c, "timemodified", 0)
|
||||
)
|
||||
last_mod = self._get_latest_timestamp(*times)
|
||||
if start < last_mod <= end:
|
||||
doc = self._process_module(course, section, module)
|
||||
if doc:
|
||||
yield doc
|
||||
except Exception as e:
|
||||
self._log_error(f"polling course {course.fullname}", e)
|
||||
|
||||
def _process_module(
|
||||
self, course, section, module
|
||||
) -> Optional[Document]:
|
||||
try:
|
||||
mtype = module.modname
|
||||
if mtype in ["label", "url"]:
|
||||
return None
|
||||
if mtype == "resource":
|
||||
return self._process_resource(course, section, module)
|
||||
if mtype == "forum":
|
||||
return self._process_forum(course, section, module)
|
||||
if mtype == "page":
|
||||
return self._process_page(course, section, module)
|
||||
if mtype in ["assign", "quiz"]:
|
||||
return self._process_activity(course, section, module)
|
||||
if mtype == "book":
|
||||
return self._process_book(course, section, module)
|
||||
except Exception as e:
|
||||
self._log_error(f"processing module {getattr(module, 'name', '?')}", e)
|
||||
return None
|
||||
|
||||
def _process_resource(self, course, section, module) -> Optional[Document]:
|
||||
if not getattr(module, "contents", None):
|
||||
return None
|
||||
|
||||
file_info = module.contents[0]
|
||||
if not getattr(file_info, "fileurl", None):
|
||||
return None
|
||||
|
||||
file_name = os.path.basename(file_info.filename)
|
||||
ts = self._get_latest_timestamp(
|
||||
getattr(module, "timecreated", 0),
|
||||
getattr(module, "timemodified", 0),
|
||||
getattr(file_info, "timemodified", 0),
|
||||
)
|
||||
|
||||
try:
|
||||
resp = rl_requests.get(self._add_token_to_url(file_info.fileurl), timeout=60)
|
||||
resp.raise_for_status()
|
||||
blob = resp.content
|
||||
ext = os.path.splitext(file_name)[1] or ".bin"
|
||||
semantic_id = f"{course.fullname} / {section.name} / {file_name}"
|
||||
return Document(
|
||||
id=f"moodle_resource_{module.id}",
|
||||
source="moodle",
|
||||
semantic_identifier=semantic_id,
|
||||
extension=ext,
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
)
|
||||
except Exception as e:
|
||||
self._log_error(f"downloading resource {file_name}", e, "error")
|
||||
return None
|
||||
|
||||
def _process_forum(self, course, section, module) -> Optional[Document]:
|
||||
if not self.moodle_client or not getattr(module, "instance", None):
|
||||
return None
|
||||
|
||||
try:
|
||||
result = self.moodle_client.mod.forum.get_forum_discussions(forumid=module.instance)
|
||||
disc_list = getattr(result, "discussions", [])
|
||||
if not disc_list:
|
||||
return None
|
||||
|
||||
markdown = [f"# {module.name}\n"]
|
||||
latest_ts = self._get_latest_timestamp(
|
||||
getattr(module, "timecreated", 0),
|
||||
getattr(module, "timemodified", 0),
|
||||
)
|
||||
|
||||
for d in disc_list:
|
||||
markdown.append(f"## {d.name}\n\n{md(d.message or '')}\n\n---\n")
|
||||
latest_ts = max(latest_ts, getattr(d, "timemodified", 0))
|
||||
|
||||
blob = "\n".join(markdown).encode("utf-8")
|
||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||
return Document(
|
||||
id=f"moodle_forum_{module.id}",
|
||||
source="moodle",
|
||||
semantic_identifier=semantic_id,
|
||||
extension=".md",
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(latest_ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
)
|
||||
except Exception as e:
|
||||
self._log_error(f"processing forum {module.name}", e)
|
||||
return None
|
||||
|
||||
def _process_page(self, course, section, module) -> Optional[Document]:
|
||||
if not getattr(module, "contents", None):
|
||||
return None
|
||||
|
||||
file_info = module.contents[0]
|
||||
if not getattr(file_info, "fileurl", None):
|
||||
return None
|
||||
|
||||
file_name = os.path.basename(file_info.filename)
|
||||
ts = self._get_latest_timestamp(
|
||||
getattr(module, "timecreated", 0),
|
||||
getattr(module, "timemodified", 0),
|
||||
getattr(file_info, "timemodified", 0),
|
||||
)
|
||||
|
||||
try:
|
||||
resp = rl_requests.get(self._add_token_to_url(file_info.fileurl), timeout=60)
|
||||
resp.raise_for_status()
|
||||
blob = resp.content
|
||||
ext = os.path.splitext(file_name)[1] or ".html"
|
||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||
return Document(
|
||||
id=f"moodle_page_{module.id}",
|
||||
source="moodle",
|
||||
semantic_identifier=semantic_id,
|
||||
extension=ext,
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
)
|
||||
except Exception as e:
|
||||
self._log_error(f"processing page {file_name}", e, "error")
|
||||
return None
|
||||
|
||||
def _process_activity(self, course, section, module) -> Optional[Document]:
|
||||
desc = getattr(module, "description", "")
|
||||
if not desc:
|
||||
return None
|
||||
|
||||
mtype, mname = module.modname, module.name
|
||||
markdown = f"# {mname}\n\n**Type:** {mtype.capitalize()}\n\n{md(desc)}"
|
||||
ts = self._get_latest_timestamp(
|
||||
getattr(module, "timecreated", 0),
|
||||
getattr(module, "timemodified", 0),
|
||||
getattr(module, "added", 0),
|
||||
)
|
||||
|
||||
semantic_id = f"{course.fullname} / {section.name} / {mname}"
|
||||
blob = markdown.encode("utf-8")
|
||||
return Document(
|
||||
id=f"moodle_{mtype}_{module.id}",
|
||||
source="moodle",
|
||||
semantic_identifier=semantic_id,
|
||||
extension=".md",
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
)
|
||||
|
||||
def _process_book(self, course, section, module) -> Optional[Document]:
|
||||
if not getattr(module, "contents", None):
|
||||
return None
|
||||
|
||||
contents = module.contents
|
||||
chapters = [
|
||||
c for c in contents
|
||||
if getattr(c, "fileurl", None) and os.path.basename(c.filename) == "index.html"
|
||||
]
|
||||
if not chapters:
|
||||
return None
|
||||
|
||||
latest_ts = self._get_latest_timestamp(
|
||||
getattr(module, "timecreated", 0),
|
||||
getattr(module, "timemodified", 0),
|
||||
*[getattr(c, "timecreated", 0) for c in contents],
|
||||
*[getattr(c, "timemodified", 0) for c in contents],
|
||||
)
|
||||
|
||||
markdown_parts = [f"# {module.name}\n"]
|
||||
for ch in chapters:
|
||||
try:
|
||||
resp = rl_requests.get(self._add_token_to_url(ch.fileurl), timeout=60)
|
||||
resp.raise_for_status()
|
||||
html = resp.content.decode("utf-8", errors="ignore")
|
||||
markdown_parts.append(md(html) + "\n\n---\n")
|
||||
except Exception as e:
|
||||
self._log_error(f"processing book chapter {ch.filename}", e)
|
||||
|
||||
blob = "\n".join(markdown_parts).encode("utf-8")
|
||||
semantic_id = f"{course.fullname} / {section.name} / {module.name}"
|
||||
return Document(
|
||||
id=f"moodle_book_{module.id}",
|
||||
source="moodle",
|
||||
semantic_identifier=semantic_id,
|
||||
extension=".md",
|
||||
blob=blob,
|
||||
doc_updated_at=datetime.fromtimestamp(latest_ts or 0, tz=timezone.utc),
|
||||
size_bytes=len(blob),
|
||||
)
|
||||
@ -1,38 +1,45 @@
|
||||
import html
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from retry import retry
|
||||
|
||||
from common.data_source.config import (
|
||||
INDEX_BATCH_SIZE,
|
||||
DocumentSource, NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP
|
||||
NOTION_CONNECTOR_DISABLE_RECURSIVE_PAGE_LOOKUP,
|
||||
DocumentSource,
|
||||
)
|
||||
from common.data_source.exceptions import (
|
||||
ConnectorMissingCredentialError,
|
||||
ConnectorValidationError,
|
||||
CredentialExpiredError,
|
||||
InsufficientPermissionsError,
|
||||
UnexpectedValidationError,
|
||||
)
|
||||
from common.data_source.interfaces import (
|
||||
LoadConnector,
|
||||
PollConnector,
|
||||
SecondsSinceUnixEpoch
|
||||
SecondsSinceUnixEpoch,
|
||||
)
|
||||
from common.data_source.models import (
|
||||
Document,
|
||||
TextSection, GenerateDocumentsOutput
|
||||
)
|
||||
from common.data_source.exceptions import (
|
||||
ConnectorValidationError,
|
||||
CredentialExpiredError,
|
||||
InsufficientPermissionsError,
|
||||
UnexpectedValidationError, ConnectorMissingCredentialError
|
||||
)
|
||||
from common.data_source.models import (
|
||||
NotionPage,
|
||||
GenerateDocumentsOutput,
|
||||
NotionBlock,
|
||||
NotionSearchResponse
|
||||
NotionPage,
|
||||
NotionSearchResponse,
|
||||
TextSection,
|
||||
)
|
||||
from common.data_source.utils import (
|
||||
rl_requests,
|
||||
batch_generator,
|
||||
datetime_from_string,
|
||||
fetch_notion_data,
|
||||
filter_pages_by_time,
|
||||
properties_to_str,
|
||||
filter_pages_by_time, datetime_from_string
|
||||
rl_requests,
|
||||
)
|
||||
|
||||
|
||||
@ -61,11 +68,9 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
self.recursive_index_enabled = recursive_index_enabled or bool(root_page_id)
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _fetch_child_blocks(
|
||||
self, block_id: str, cursor: Optional[str] = None
|
||||
) -> dict[str, Any] | None:
|
||||
def _fetch_child_blocks(self, block_id: str, cursor: Optional[str] = None) -> dict[str, Any] | None:
|
||||
"""Fetch all child blocks via the Notion API."""
|
||||
logging.debug(f"Fetching children of block with ID '{block_id}'")
|
||||
logging.debug(f"[Notion]: Fetching children of block with ID {block_id}")
|
||||
block_url = f"https://api.notion.com/v1/blocks/{block_id}/children"
|
||||
query_params = {"start_cursor": cursor} if cursor else None
|
||||
|
||||
@ -79,49 +84,42 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
if hasattr(e, 'response') and e.response.status_code == 404:
|
||||
logging.error(
|
||||
f"Unable to access block with ID '{block_id}'. "
|
||||
f"This is likely due to the block not being shared with the integration."
|
||||
)
|
||||
if hasattr(e, "response") and e.response.status_code == 404:
|
||||
logging.error(f"[Notion]: Unable to access block with ID {block_id}. This is likely due to the block not being shared with the integration.")
|
||||
return None
|
||||
else:
|
||||
logging.exception(f"Error fetching blocks: {e}")
|
||||
logging.exception(f"[Notion]: Error fetching blocks: {e}")
|
||||
raise
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _fetch_page(self, page_id: str) -> NotionPage:
|
||||
"""Fetch a page from its ID via the Notion API."""
|
||||
logging.debug(f"Fetching page for ID '{page_id}'")
|
||||
logging.debug(f"[Notion]: Fetching page for ID {page_id}")
|
||||
page_url = f"https://api.notion.com/v1/pages/{page_id}"
|
||||
|
||||
try:
|
||||
data = fetch_notion_data(page_url, self.headers, "GET")
|
||||
return NotionPage(**data)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to fetch page, trying database for ID '{page_id}': {e}")
|
||||
logging.warning(f"[Notion]: Failed to fetch page, trying database for ID {page_id}: {e}")
|
||||
return self._fetch_database_as_page(page_id)
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _fetch_database_as_page(self, database_id: str) -> NotionPage:
|
||||
"""Attempt to fetch a database as a page."""
|
||||
logging.debug(f"Fetching database for ID '{database_id}' as a page")
|
||||
logging.debug(f"[Notion]: Fetching database for ID {database_id} as a page")
|
||||
database_url = f"https://api.notion.com/v1/databases/{database_id}"
|
||||
|
||||
data = fetch_notion_data(database_url, self.headers, "GET")
|
||||
database_name = data.get("title")
|
||||
database_name = (
|
||||
database_name[0].get("text", {}).get("content") if database_name else None
|
||||
)
|
||||
database_name = database_name[0].get("text", {}).get("content") if database_name else None
|
||||
|
||||
return NotionPage(**data, database_name=database_name)
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _fetch_database(
|
||||
self, database_id: str, cursor: Optional[str] = None
|
||||
) -> dict[str, Any]:
|
||||
def _fetch_database(self, database_id: str, cursor: Optional[str] = None) -> dict[str, Any]:
|
||||
"""Fetch a database from its ID via the Notion API."""
|
||||
logging.debug(f"Fetching database for ID '{database_id}'")
|
||||
logging.debug(f"[Notion]: Fetching database for ID {database_id}")
|
||||
block_url = f"https://api.notion.com/v1/databases/{database_id}/query"
|
||||
body = {"start_cursor": cursor} if cursor else None
|
||||
|
||||
@ -129,17 +127,12 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
data = fetch_notion_data(block_url, self.headers, "POST", body)
|
||||
return data
|
||||
except Exception as e:
|
||||
if hasattr(e, 'response') and e.response.status_code in [404, 400]:
|
||||
logging.error(
|
||||
f"Unable to access database with ID '{database_id}'. "
|
||||
f"This is likely due to the database not being shared with the integration."
|
||||
)
|
||||
if hasattr(e, "response") and e.response.status_code in [404, 400]:
|
||||
logging.error(f"[Notion]: Unable to access database with ID {database_id}. This is likely due to the database not being shared with the integration.")
|
||||
return {"results": [], "next_cursor": None}
|
||||
raise
|
||||
|
||||
def _read_pages_from_database(
|
||||
self, database_id: str
|
||||
) -> tuple[list[NotionBlock], list[str]]:
|
||||
def _read_pages_from_database(self, database_id: str) -> tuple[list[NotionBlock], list[str]]:
|
||||
"""Returns a list of top level blocks and all page IDs in the database."""
|
||||
result_blocks: list[NotionBlock] = []
|
||||
result_pages: list[str] = []
|
||||
@ -158,10 +151,10 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
|
||||
if self.recursive_index_enabled:
|
||||
if obj_type == "page":
|
||||
logging.debug(f"Found page with ID '{obj_id}' in database '{database_id}'")
|
||||
logging.debug(f"[Notion]: Found page with ID {obj_id} in database {database_id}")
|
||||
result_pages.append(result["id"])
|
||||
elif obj_type == "database":
|
||||
logging.debug(f"Found database with ID '{obj_id}' in database '{database_id}'")
|
||||
logging.debug(f"[Notion]: Found database with ID {obj_id} in database {database_id}")
|
||||
_, child_pages = self._read_pages_from_database(obj_id)
|
||||
result_pages.extend(child_pages)
|
||||
|
||||
@ -172,44 +165,229 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
|
||||
return result_blocks, result_pages
|
||||
|
||||
def _read_blocks(self, base_block_id: str) -> tuple[list[NotionBlock], list[str]]:
|
||||
"""Reads all child blocks for the specified block, returns blocks and child page ids."""
|
||||
def _extract_rich_text(self, rich_text_array: list[dict[str, Any]]) -> str:
|
||||
collected_text: list[str] = []
|
||||
for rich_text in rich_text_array:
|
||||
content = ""
|
||||
r_type = rich_text.get("type")
|
||||
|
||||
if r_type == "equation":
|
||||
expr = rich_text.get("equation", {}).get("expression")
|
||||
if expr:
|
||||
content = expr
|
||||
elif r_type == "mention":
|
||||
mention = rich_text.get("mention", {}) or {}
|
||||
mention_type = mention.get("type")
|
||||
mention_value = mention.get(mention_type, {}) if mention_type else {}
|
||||
if mention_type == "date":
|
||||
start = mention_value.get("start")
|
||||
end = mention_value.get("end")
|
||||
if start and end:
|
||||
content = f"{start} - {end}"
|
||||
elif start:
|
||||
content = start
|
||||
elif mention_type in {"page", "database"}:
|
||||
content = mention_value.get("id", rich_text.get("plain_text", ""))
|
||||
elif mention_type == "link_preview":
|
||||
content = mention_value.get("url", rich_text.get("plain_text", ""))
|
||||
else:
|
||||
content = rich_text.get("plain_text", "") or str(mention_value)
|
||||
else:
|
||||
if rich_text.get("plain_text"):
|
||||
content = rich_text["plain_text"]
|
||||
elif "text" in rich_text and rich_text["text"].get("content"):
|
||||
content = rich_text["text"]["content"]
|
||||
|
||||
href = rich_text.get("href")
|
||||
if content and href:
|
||||
content = f"{content} ({href})"
|
||||
|
||||
if content:
|
||||
collected_text.append(content)
|
||||
|
||||
return "".join(collected_text).strip()
|
||||
|
||||
def _build_table_html(self, table_block_id: str) -> str | None:
|
||||
rows: list[str] = []
|
||||
cursor = None
|
||||
while True:
|
||||
data = self._fetch_child_blocks(table_block_id, cursor)
|
||||
if data is None:
|
||||
break
|
||||
|
||||
for result in data["results"]:
|
||||
if result.get("type") != "table_row":
|
||||
continue
|
||||
cells_html: list[str] = []
|
||||
for cell in result["table_row"].get("cells", []):
|
||||
cell_text = self._extract_rich_text(cell)
|
||||
cell_html = html.escape(cell_text) if cell_text else ""
|
||||
cells_html.append(f"<td>{cell_html}</td>")
|
||||
rows.append(f"<tr>{''.join(cells_html)}</tr>")
|
||||
|
||||
if data.get("next_cursor") is None:
|
||||
break
|
||||
cursor = data["next_cursor"]
|
||||
|
||||
if not rows:
|
||||
return None
|
||||
return "<table>\n" + "\n".join(rows) + "\n</table>"
|
||||
|
||||
def _download_file(self, url: str) -> bytes | None:
|
||||
try:
|
||||
response = rl_requests.get(url, timeout=60)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
except Exception as exc:
|
||||
logging.warning(f"[Notion]: Failed to download Notion file from {url}: {exc}")
|
||||
return None
|
||||
|
||||
def _extract_file_metadata(self, result_obj: dict[str, Any], block_id: str) -> tuple[str | None, str, str | None]:
|
||||
file_source_type = result_obj.get("type")
|
||||
file_source = result_obj.get(file_source_type, {}) if file_source_type else {}
|
||||
url = file_source.get("url")
|
||||
|
||||
name = result_obj.get("name") or file_source.get("name")
|
||||
if url and not name:
|
||||
parsed_name = Path(urlparse(url).path).name
|
||||
name = parsed_name or f"notion_file_{block_id}"
|
||||
elif not name:
|
||||
name = f"notion_file_{block_id}"
|
||||
|
||||
caption = self._extract_rich_text(result_obj.get("caption", [])) if "caption" in result_obj else None
|
||||
|
||||
return url, name, caption
|
||||
|
||||
def _build_attachment_document(
|
||||
self,
|
||||
block_id: str,
|
||||
url: str,
|
||||
name: str,
|
||||
caption: Optional[str],
|
||||
page_last_edited_time: Optional[str],
|
||||
) -> Document | None:
|
||||
file_bytes = self._download_file(url)
|
||||
if file_bytes is None:
|
||||
return None
|
||||
|
||||
extension = Path(name).suffix or Path(urlparse(url).path).suffix or ".bin"
|
||||
if extension and not extension.startswith("."):
|
||||
extension = f".{extension}"
|
||||
if not extension:
|
||||
extension = ".bin"
|
||||
|
||||
updated_at = datetime_from_string(page_last_edited_time) if page_last_edited_time else datetime.now(timezone.utc)
|
||||
semantic_identifier = caption or name or f"Notion file {block_id}"
|
||||
|
||||
return Document(
|
||||
id=block_id,
|
||||
blob=file_bytes,
|
||||
source=DocumentSource.NOTION,
|
||||
semantic_identifier=semantic_identifier,
|
||||
extension=extension,
|
||||
size_bytes=len(file_bytes),
|
||||
doc_updated_at=updated_at,
|
||||
)
|
||||
|
||||
def _read_blocks(self, base_block_id: str, page_last_edited_time: Optional[str] = None) -> tuple[list[NotionBlock], list[str], list[Document]]:
|
||||
result_blocks: list[NotionBlock] = []
|
||||
child_pages: list[str] = []
|
||||
attachments: list[Document] = []
|
||||
cursor = None
|
||||
|
||||
while True:
|
||||
data = self._fetch_child_blocks(base_block_id, cursor)
|
||||
|
||||
if data is None:
|
||||
return result_blocks, child_pages
|
||||
return result_blocks, child_pages, attachments
|
||||
|
||||
for result in data["results"]:
|
||||
logging.debug(f"Found child block for block with ID '{base_block_id}': {result}")
|
||||
logging.debug(f"[Notion]: Found child block for block with ID {base_block_id}: {result}")
|
||||
result_block_id = result["id"]
|
||||
result_type = result["type"]
|
||||
result_obj = result[result_type]
|
||||
|
||||
if result_type in ["ai_block", "unsupported", "external_object_instance_page"]:
|
||||
logging.warning(f"Skipping unsupported block type '{result_type}'")
|
||||
logging.warning(f"[Notion]: Skipping unsupported block type {result_type}")
|
||||
continue
|
||||
|
||||
if result_type == "table":
|
||||
table_html = self._build_table_html(result_block_id)
|
||||
if table_html:
|
||||
result_blocks.append(
|
||||
NotionBlock(
|
||||
id=result_block_id,
|
||||
text=table_html,
|
||||
prefix="\n\n",
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
if result_type == "equation":
|
||||
expr = result_obj.get("expression")
|
||||
if expr:
|
||||
result_blocks.append(
|
||||
NotionBlock(
|
||||
id=result_block_id,
|
||||
text=expr,
|
||||
prefix="\n",
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
cur_result_text_arr = []
|
||||
if "rich_text" in result_obj:
|
||||
for rich_text in result_obj["rich_text"]:
|
||||
if "text" in rich_text:
|
||||
text = rich_text["text"]["content"]
|
||||
cur_result_text_arr.append(text)
|
||||
text = self._extract_rich_text(result_obj["rich_text"])
|
||||
if text:
|
||||
cur_result_text_arr.append(text)
|
||||
|
||||
if result_type == "bulleted_list_item":
|
||||
if cur_result_text_arr:
|
||||
cur_result_text_arr[0] = f"- {cur_result_text_arr[0]}"
|
||||
else:
|
||||
cur_result_text_arr = ["- "]
|
||||
|
||||
if result_type == "numbered_list_item":
|
||||
if cur_result_text_arr:
|
||||
cur_result_text_arr[0] = f"1. {cur_result_text_arr[0]}"
|
||||
else:
|
||||
cur_result_text_arr = ["1. "]
|
||||
|
||||
if result_type == "to_do":
|
||||
checked = result_obj.get("checked")
|
||||
checkbox_prefix = "[x]" if checked else "[ ]"
|
||||
if cur_result_text_arr:
|
||||
cur_result_text_arr = [f"{checkbox_prefix} {cur_result_text_arr[0]}"] + cur_result_text_arr[1:]
|
||||
else:
|
||||
cur_result_text_arr = [checkbox_prefix]
|
||||
|
||||
if result_type in {"file", "image", "pdf", "video", "audio"}:
|
||||
file_url, file_name, caption = self._extract_file_metadata(result_obj, result_block_id)
|
||||
if file_url:
|
||||
attachment_doc = self._build_attachment_document(
|
||||
block_id=result_block_id,
|
||||
url=file_url,
|
||||
name=file_name,
|
||||
caption=caption,
|
||||
page_last_edited_time=page_last_edited_time,
|
||||
)
|
||||
if attachment_doc:
|
||||
attachments.append(attachment_doc)
|
||||
|
||||
attachment_label = caption or file_name
|
||||
if attachment_label:
|
||||
cur_result_text_arr.append(f"{result_type.capitalize()}: {attachment_label}")
|
||||
|
||||
if result["has_children"]:
|
||||
if result_type == "child_page":
|
||||
child_pages.append(result_block_id)
|
||||
else:
|
||||
logging.debug(f"Entering sub-block: {result_block_id}")
|
||||
subblocks, subblock_child_pages = self._read_blocks(result_block_id)
|
||||
logging.debug(f"Finished sub-block: {result_block_id}")
|
||||
logging.debug(f"[Notion]: Entering sub-block: {result_block_id}")
|
||||
subblocks, subblock_child_pages, subblock_attachments = self._read_blocks(result_block_id, page_last_edited_time)
|
||||
logging.debug(f"[Notion]: Finished sub-block: {result_block_id}")
|
||||
result_blocks.extend(subblocks)
|
||||
child_pages.extend(subblock_child_pages)
|
||||
attachments.extend(subblock_attachments)
|
||||
|
||||
if result_type == "child_database":
|
||||
inner_blocks, inner_child_pages = self._read_pages_from_database(result_block_id)
|
||||
@ -231,7 +409,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
|
||||
cursor = data["next_cursor"]
|
||||
|
||||
return result_blocks, child_pages
|
||||
return result_blocks, child_pages, attachments
|
||||
|
||||
def _read_page_title(self, page: NotionPage) -> Optional[str]:
|
||||
"""Extracts the title from a Notion page."""
|
||||
@ -245,9 +423,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
|
||||
return None
|
||||
|
||||
def _read_pages(
|
||||
self, pages: list[NotionPage]
|
||||
) -> Generator[Document, None, None]:
|
||||
def _read_pages(self, pages: list[NotionPage], start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None) -> Generator[Document, None, None]:
|
||||
"""Reads pages for rich text content and generates Documents."""
|
||||
all_child_page_ids: list[str] = []
|
||||
|
||||
@ -255,11 +431,17 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
if isinstance(page, dict):
|
||||
page = NotionPage(**page)
|
||||
if page.id in self.indexed_pages:
|
||||
logging.debug(f"Already indexed page with ID '{page.id}'. Skipping.")
|
||||
logging.debug(f"[Notion]: Already indexed page with ID {page.id}. Skipping.")
|
||||
continue
|
||||
|
||||
logging.info(f"Reading page with ID '{page.id}', with url {page.url}")
|
||||
page_blocks, child_page_ids = self._read_blocks(page.id)
|
||||
if start is not None and end is not None:
|
||||
page_ts = datetime_from_string(page.last_edited_time).timestamp()
|
||||
if not (page_ts > start and page_ts <= end):
|
||||
logging.debug(f"[Notion]: Skipping page {page.id} outside polling window.")
|
||||
continue
|
||||
|
||||
logging.info(f"[Notion]: Reading page with ID {page.id}, with url {page.url}")
|
||||
page_blocks, child_page_ids, attachment_docs = self._read_blocks(page.id, page.last_edited_time)
|
||||
all_child_page_ids.extend(child_page_ids)
|
||||
self.indexed_pages.add(page.id)
|
||||
|
||||
@ -268,14 +450,12 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
|
||||
if not page_blocks:
|
||||
if not raw_page_title:
|
||||
logging.warning(f"No blocks OR title found for page with ID '{page.id}'. Skipping.")
|
||||
logging.warning(f"[Notion]: No blocks OR title found for page with ID {page.id}. Skipping.")
|
||||
continue
|
||||
|
||||
text = page_title
|
||||
if page.properties:
|
||||
text += "\n\n" + "\n".join(
|
||||
[f"{key}: {value}" for key, value in page.properties.items()]
|
||||
)
|
||||
text += "\n\n" + "\n".join([f"{key}: {value}" for key, value in page.properties.items()])
|
||||
sections = [TextSection(link=page.url, text=text)]
|
||||
else:
|
||||
sections = [
|
||||
@ -286,45 +466,39 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
for block in page_blocks
|
||||
]
|
||||
|
||||
blob = ("\n".join([sec.text for sec in sections])).encode("utf-8")
|
||||
joined_text = "\n".join(sec.text for sec in sections)
|
||||
blob = joined_text.encode("utf-8")
|
||||
yield Document(
|
||||
id=page.id,
|
||||
blob=blob,
|
||||
source=DocumentSource.NOTION,
|
||||
semantic_identifier=page_title,
|
||||
extension=".txt",
|
||||
size_bytes=len(blob),
|
||||
doc_updated_at=datetime_from_string(page.last_edited_time)
|
||||
id=page.id, blob=blob, source=DocumentSource.NOTION, semantic_identifier=page_title, extension=".txt", size_bytes=len(blob), doc_updated_at=datetime_from_string(page.last_edited_time)
|
||||
)
|
||||
|
||||
for attachment_doc in attachment_docs:
|
||||
yield attachment_doc
|
||||
|
||||
if self.recursive_index_enabled and all_child_page_ids:
|
||||
for child_page_batch_ids in batch_generator(all_child_page_ids, INDEX_BATCH_SIZE):
|
||||
child_page_batch = [
|
||||
self._fetch_page(page_id)
|
||||
for page_id in child_page_batch_ids
|
||||
if page_id not in self.indexed_pages
|
||||
]
|
||||
yield from self._read_pages(child_page_batch)
|
||||
child_page_batch = [self._fetch_page(page_id) for page_id in child_page_batch_ids if page_id not in self.indexed_pages]
|
||||
yield from self._read_pages(child_page_batch, start, end)
|
||||
|
||||
@retry(tries=3, delay=1, backoff=2)
|
||||
def _search_notion(self, query_dict: dict[str, Any]) -> NotionSearchResponse:
|
||||
"""Search for pages from a Notion database."""
|
||||
logging.debug(f"Searching for pages in Notion with query_dict: {query_dict}")
|
||||
logging.debug(f"[Notion]: Searching for pages in Notion with query_dict: {query_dict}")
|
||||
data = fetch_notion_data("https://api.notion.com/v1/search", self.headers, "POST", query_dict)
|
||||
return NotionSearchResponse(**data)
|
||||
|
||||
def _recursive_load(self) -> Generator[list[Document], None, None]:
|
||||
def _recursive_load(self, start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None) -> Generator[list[Document], None, None]:
|
||||
"""Recursively load pages starting from root page ID."""
|
||||
if self.root_page_id is None or not self.recursive_index_enabled:
|
||||
raise RuntimeError("Recursive page lookup is not enabled")
|
||||
|
||||
logging.info(f"Recursively loading pages from Notion based on root page with ID: {self.root_page_id}")
|
||||
logging.info(f"[Notion]: Recursively loading pages from Notion based on root page with ID: {self.root_page_id}")
|
||||
pages = [self._fetch_page(page_id=self.root_page_id)]
|
||||
yield from batch_generator(self._read_pages(pages), self.batch_size)
|
||||
yield from batch_generator(self._read_pages(pages, start, end), self.batch_size)
|
||||
|
||||
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
|
||||
"""Applies integration token to headers."""
|
||||
self.headers["Authorization"] = f'Bearer {credentials["notion_integration_token"]}'
|
||||
self.headers["Authorization"] = f"Bearer {credentials['notion_integration_token']}"
|
||||
return None
|
||||
|
||||
def load_from_state(self) -> GenerateDocumentsOutput:
|
||||
@ -348,12 +522,10 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
else:
|
||||
break
|
||||
|
||||
def poll_source(
|
||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
||||
) -> GenerateDocumentsOutput:
|
||||
def poll_source(self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch) -> GenerateDocumentsOutput:
|
||||
"""Poll Notion for updated pages within a time period."""
|
||||
if self.recursive_index_enabled and self.root_page_id:
|
||||
yield from self._recursive_load()
|
||||
yield from self._recursive_load(start, end)
|
||||
return
|
||||
|
||||
query_dict = {
|
||||
@ -367,7 +539,7 @@ class NotionConnector(LoadConnector, PollConnector):
|
||||
pages = filter_pages_by_time(db_res.results, start, end, "last_edited_time")
|
||||
|
||||
if pages:
|
||||
yield from batch_generator(self._read_pages(pages), self.batch_size)
|
||||
yield from batch_generator(self._read_pages(pages, start, end), self.batch_size)
|
||||
if db_res.has_more:
|
||||
query_dict["start_cursor"] = db_res.next_cursor
|
||||
else:
|
||||
|
||||
@ -27,6 +27,7 @@ from common.constants import SVR_QUEUE_NAME, Storage
|
||||
import rag.utils
|
||||
import rag.utils.es_conn
|
||||
import rag.utils.infinity_conn
|
||||
import rag.utils.ob_conn
|
||||
import rag.utils.opensearch_conn
|
||||
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
|
||||
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
|
||||
@ -103,6 +104,7 @@ INFINITY = {}
|
||||
AZURE = {}
|
||||
S3 = {}
|
||||
MINIO = {}
|
||||
OB = {}
|
||||
OSS = {}
|
||||
OS = {}
|
||||
|
||||
@ -227,7 +229,7 @@ def init_settings():
|
||||
FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
|
||||
OAUTH_CONFIG = get_base_config("oauth", {})
|
||||
|
||||
global DOC_ENGINE, docStoreConn, ES, OS, INFINITY
|
||||
global DOC_ENGINE, docStoreConn, ES, OB, OS, INFINITY
|
||||
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
|
||||
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
|
||||
lower_case_doc_engine = DOC_ENGINE.lower()
|
||||
@ -240,6 +242,9 @@ def init_settings():
|
||||
elif lower_case_doc_engine == "opensearch":
|
||||
OS = get_base_config("os", {})
|
||||
docStoreConn = rag.utils.opensearch_conn.OSConnection()
|
||||
elif lower_case_doc_engine == "oceanbase":
|
||||
OB = get_base_config("oceanbase", {})
|
||||
docStoreConn = rag.utils.ob_conn.OBConnection()
|
||||
else:
|
||||
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")
|
||||
|
||||
|
||||
@ -35,6 +35,12 @@ def num_tokens_from_string(string: str) -> int:
|
||||
return 0
|
||||
|
||||
def total_token_count_from_response(resp):
|
||||
"""
|
||||
Extract token count from LLM response in various formats.
|
||||
|
||||
Handles None responses and different response structures from various LLM providers.
|
||||
Returns 0 if token count cannot be determined.
|
||||
"""
|
||||
if resp is None:
|
||||
return 0
|
||||
|
||||
@ -50,19 +56,19 @@ def total_token_count_from_response(resp):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if 'usage' in resp and 'total_tokens' in resp['usage']:
|
||||
if isinstance(resp, dict) and 'usage' in resp and 'total_tokens' in resp['usage']:
|
||||
try:
|
||||
return resp["usage"]["total_tokens"]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
|
||||
if isinstance(resp, dict) and 'usage' in resp and 'input_tokens' in resp['usage'] and 'output_tokens' in resp['usage']:
|
||||
try:
|
||||
return resp["usage"]["input_tokens"] + resp["usage"]["output_tokens"]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if 'meta' in resp and 'tokens' in resp['meta'] and 'input_tokens' in resp['meta']['tokens'] and 'output_tokens' in resp['meta']['tokens']:
|
||||
if isinstance(resp, dict) and 'meta' in resp and 'tokens' in resp['meta'] and 'input_tokens' in resp['meta']['tokens'] and 'output_tokens' in resp['meta']['tokens']:
|
||||
try:
|
||||
return resp["meta"]["tokens"]["input_tokens"] + resp["meta"]["tokens"]["output_tokens"]
|
||||
except Exception:
|
||||
|
||||
@ -28,6 +28,14 @@ os:
|
||||
infinity:
|
||||
uri: 'localhost:23817'
|
||||
db_name: 'default_db'
|
||||
oceanbase:
|
||||
scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
|
||||
config:
|
||||
db_name: 'test'
|
||||
user: 'root@ragflow'
|
||||
password: 'infini_rag_flow'
|
||||
host: 'localhost'
|
||||
port: 2881
|
||||
redis:
|
||||
db: 1
|
||||
password: 'infini_rag_flow'
|
||||
@ -139,5 +147,3 @@ user_default_llm:
|
||||
# secret_id: 'tencent_secret_id'
|
||||
# secret_key: 'tencent_secret_key'
|
||||
# region: 'tencent_region'
|
||||
# table_result_type: '1'
|
||||
# markdown_image_response_type: '1'
|
||||
|
||||
@ -187,7 +187,7 @@ class DoclingParser(RAGFlowPdfParser):
|
||||
bbox = _BBox(int(pn), bb[0], bb[1], bb[2], bb[3])
|
||||
yield (DoclingContentType.EQUATION.value, text, bbox)
|
||||
|
||||
def _transfer_to_sections(self, doc) -> list[tuple[str, str]]:
|
||||
def _transfer_to_sections(self, doc, parse_method: str) -> list[tuple[str, str]]:
|
||||
sections: list[tuple[str, str]] = []
|
||||
for typ, payload, bbox in self._iter_doc_items(doc):
|
||||
if typ == DoclingContentType.TEXT.value:
|
||||
@ -200,7 +200,12 @@ class DoclingParser(RAGFlowPdfParser):
|
||||
continue
|
||||
|
||||
tag = self._make_line_tag(bbox) if isinstance(bbox,_BBox) else ""
|
||||
sections.append((section, tag))
|
||||
if parse_method == "manual":
|
||||
sections.append((section, typ, tag))
|
||||
elif parse_method == "paper":
|
||||
sections.append((section + tag, typ))
|
||||
else:
|
||||
sections.append((section, tag))
|
||||
return sections
|
||||
|
||||
def cropout_docling_table(self, page_no: int, bbox: tuple[float, float, float, float], zoomin: int = 1):
|
||||
@ -282,7 +287,8 @@ class DoclingParser(RAGFlowPdfParser):
|
||||
output_dir: Optional[str] = None,
|
||||
lang: Optional[str] = None,
|
||||
method: str = "auto",
|
||||
delete_output: bool = True,
|
||||
delete_output: bool = True,
|
||||
parse_method: str = "raw"
|
||||
):
|
||||
|
||||
if not self.check_installation():
|
||||
@ -318,7 +324,7 @@ class DoclingParser(RAGFlowPdfParser):
|
||||
if callback:
|
||||
callback(0.7, f"[Docling] Parsed doc: {getattr(doc, 'num_pages', 'n/a')} pages")
|
||||
|
||||
sections = self._transfer_to_sections(doc)
|
||||
sections = self._transfer_to_sections(doc, parse_method=parse_method)
|
||||
tables = self._transfer_to_tables(doc)
|
||||
|
||||
if callback:
|
||||
|
||||
@ -476,7 +476,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
item[key] = str((subdir / item[key]).resolve())
|
||||
return data
|
||||
|
||||
def _transfer_to_sections(self, outputs: list[dict[str, Any]]):
|
||||
def _transfer_to_sections(self, outputs: list[dict[str, Any]], parse_method: str = None):
|
||||
sections = []
|
||||
for output in outputs:
|
||||
match output["type"]:
|
||||
@ -497,7 +497,11 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
case MinerUContentType.DISCARDED:
|
||||
pass
|
||||
|
||||
if section:
|
||||
if section and parse_method == "manual":
|
||||
sections.append((section, output["type"], self._line_tag(output)))
|
||||
elif section and parse_method == "paper":
|
||||
sections.append((section + self._line_tag(output), output["type"]))
|
||||
else:
|
||||
sections.append((section, self._line_tag(output)))
|
||||
return sections
|
||||
|
||||
@ -516,6 +520,7 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
method: str = "auto",
|
||||
server_url: Optional[str] = None,
|
||||
delete_output: bool = True,
|
||||
parse_method: str = "raw"
|
||||
) -> tuple:
|
||||
import shutil
|
||||
|
||||
@ -565,7 +570,8 @@ class MinerUParser(RAGFlowPdfParser):
|
||||
self.logger.info(f"[MinerU] Parsed {len(outputs)} blocks from PDF.")
|
||||
if callback:
|
||||
callback(0.75, f"[MinerU] Parsed {len(outputs)} blocks from PDF.")
|
||||
return self._transfer_to_sections(outputs), self._transfer_to_tables(outputs)
|
||||
|
||||
return self._transfer_to_sections(outputs, parse_method), self._transfer_to_tables(outputs)
|
||||
finally:
|
||||
if temp_pdf and temp_pdf.exists():
|
||||
try:
|
||||
|
||||
@ -33,6 +33,8 @@ import xgboost as xgb
|
||||
from huggingface_hub import snapshot_download
|
||||
from PIL import Image
|
||||
from pypdf import PdfReader as pdf2_read
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.metrics import silhouette_score
|
||||
|
||||
from common.file_utils import get_project_base_directory
|
||||
from common.misc_utils import pip_install_torch
|
||||
@ -353,7 +355,6 @@ class RAGFlowPdfParser:
|
||||
def _assign_column(self, boxes, zoomin=3):
|
||||
if not boxes:
|
||||
return boxes
|
||||
|
||||
if all("col_id" in b for b in boxes):
|
||||
return boxes
|
||||
|
||||
@ -361,61 +362,80 @@ class RAGFlowPdfParser:
|
||||
for b in boxes:
|
||||
by_page[b["page_number"]].append(b)
|
||||
|
||||
page_info = {} # pg -> dict(page_w, left_edge, cand_cols)
|
||||
counter = Counter()
|
||||
page_cols = {}
|
||||
|
||||
for pg, bxs in by_page.items():
|
||||
if not bxs:
|
||||
page_info[pg] = {"page_w": 1.0, "left_edge": 0.0, "cand": 1}
|
||||
counter[1] += 1
|
||||
page_cols[pg] = 1
|
||||
continue
|
||||
|
||||
if hasattr(self, "page_images") and self.page_images and len(self.page_images) >= pg:
|
||||
page_w = self.page_images[pg - 1].size[0] / max(1, zoomin)
|
||||
left_edge = 0.0
|
||||
else:
|
||||
xs0 = [box["x0"] for box in bxs]
|
||||
xs1 = [box["x1"] for box in bxs]
|
||||
left_edge = float(min(xs0))
|
||||
page_w = max(1.0, float(max(xs1) - left_edge))
|
||||
x0s_raw = np.array([b["x0"] for b in bxs], dtype=float)
|
||||
|
||||
widths = [max(1.0, (box["x1"] - box["x0"])) for box in bxs]
|
||||
median_w = float(np.median(widths)) if widths else 1.0
|
||||
min_x0 = np.min(x0s_raw)
|
||||
max_x1 = np.max([b["x1"] for b in bxs])
|
||||
width = max_x1 - min_x0
|
||||
|
||||
raw_cols = int(page_w / max(1.0, median_w))
|
||||
INDENT_TOL = width * 0.12
|
||||
x0s = []
|
||||
for x in x0s_raw:
|
||||
if abs(x - min_x0) < INDENT_TOL:
|
||||
x0s.append([min_x0])
|
||||
else:
|
||||
x0s.append([x])
|
||||
x0s = np.array(x0s, dtype=float)
|
||||
|
||||
max_try = min(4, len(bxs))
|
||||
if max_try < 2:
|
||||
max_try = 1
|
||||
best_k = 1
|
||||
best_score = -1
|
||||
|
||||
# cand = raw_cols if (raw_cols >= 2 and median_w < page_w / raw_cols * 0.8) else 1
|
||||
cand = raw_cols
|
||||
for k in range(1, max_try + 1):
|
||||
km = KMeans(n_clusters=k, n_init="auto")
|
||||
labels = km.fit_predict(x0s)
|
||||
|
||||
page_info[pg] = {"page_w": page_w, "left_edge": left_edge, "cand": cand}
|
||||
counter[cand] += 1
|
||||
centers = np.sort(km.cluster_centers_.flatten())
|
||||
if len(centers) > 1:
|
||||
try:
|
||||
score = silhouette_score(x0s, labels)
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
score = 0
|
||||
print(f"{k=},{score=}",flush=True)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_k = k
|
||||
|
||||
logging.info(f"[Page {pg}] median_w={median_w:.2f}, page_w={page_w:.2f}, raw_cols={raw_cols}, cand={cand}")
|
||||
page_cols[pg] = best_k
|
||||
logging.info(f"[Page {pg}] best_score={best_score:.2f}, best_k={best_k}")
|
||||
|
||||
global_cols = counter.most_common(1)[0][0]
|
||||
|
||||
global_cols = Counter(page_cols.values()).most_common(1)[0][0]
|
||||
logging.info(f"Global column_num decided by majority: {global_cols}")
|
||||
|
||||
|
||||
for pg, bxs in by_page.items():
|
||||
if not bxs:
|
||||
continue
|
||||
k = page_cols[pg]
|
||||
if len(bxs) < k:
|
||||
k = 1
|
||||
x0s = np.array([[b["x0"]] for b in bxs], dtype=float)
|
||||
km = KMeans(n_clusters=k, n_init="auto")
|
||||
labels = km.fit_predict(x0s)
|
||||
|
||||
page_w = page_info[pg]["page_w"]
|
||||
left_edge = page_info[pg]["left_edge"]
|
||||
centers = km.cluster_centers_.flatten()
|
||||
order = np.argsort(centers)
|
||||
|
||||
if global_cols == 1:
|
||||
for box in bxs:
|
||||
box["col_id"] = 0
|
||||
continue
|
||||
remap = {orig: new for new, orig in enumerate(order)}
|
||||
|
||||
for box in bxs:
|
||||
w = box["x1"] - box["x0"]
|
||||
if w >= 0.8 * page_w:
|
||||
box["col_id"] = 0
|
||||
continue
|
||||
cx = 0.5 * (box["x0"] + box["x1"])
|
||||
norm_cx = (cx - left_edge) / page_w
|
||||
norm_cx = max(0.0, min(norm_cx, 0.999999))
|
||||
box["col_id"] = int(min(global_cols - 1, norm_cx * global_cols))
|
||||
for b, lb in zip(bxs, labels):
|
||||
b["col_id"] = remap[lb]
|
||||
|
||||
grouped = defaultdict(list)
|
||||
for b in bxs:
|
||||
grouped[b["col_id"]].append(b)
|
||||
|
||||
return boxes
|
||||
|
||||
@ -1071,7 +1091,7 @@ class RAGFlowPdfParser:
|
||||
|
||||
logging.debug("Images converted.")
|
||||
self.is_english = [
|
||||
re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i])))))
|
||||
re.search(r"[ a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i])))))
|
||||
for i in range(len(self.page_chars))
|
||||
]
|
||||
if sum([1 if e else 0 for e in self.is_english]) > len(self.page_images) / 2:
|
||||
@ -1128,7 +1148,7 @@ class RAGFlowPdfParser:
|
||||
|
||||
if not self.is_english and not any([c for c in self.page_chars]) and self.boxes:
|
||||
bxes = [b for bxs in self.boxes for b in bxs]
|
||||
self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
|
||||
self.is_english = re.search(r"[ \na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
|
||||
|
||||
logging.debug(f"Is it English: {self.is_english}")
|
||||
|
||||
@ -1303,7 +1323,10 @@ class RAGFlowPdfParser:
|
||||
|
||||
positions = []
|
||||
for ii, (pns, left, right, top, bottom) in enumerate(poss):
|
||||
right = left + max_width
|
||||
if 0 < ii < len(poss) - 1:
|
||||
right = max(left + 10, right)
|
||||
else:
|
||||
right = left + max_width
|
||||
bottom *= ZM
|
||||
for pn in pns[1:]:
|
||||
if 0 <= pn - 1 < page_count:
|
||||
|
||||
@ -192,12 +192,16 @@ class TencentCloudAPIClient:
|
||||
|
||||
|
||||
class TCADPParser(RAGFlowPdfParser):
|
||||
def __init__(self, secret_id: str = None, secret_key: str = None, region: str = "ap-guangzhou"):
|
||||
def __init__(self, secret_id: str = None, secret_key: str = None, region: str = "ap-guangzhou",
|
||||
table_result_type: str = None, markdown_image_response_type: str = None):
|
||||
super().__init__()
|
||||
|
||||
# First initialize logger
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
# Log received parameters
|
||||
self.logger.info(f"[TCADP] Initializing with parameters - table_result_type: {table_result_type}, markdown_image_response_type: {markdown_image_response_type}")
|
||||
|
||||
# Priority: read configuration from RAGFlow configuration system (service_conf.yaml)
|
||||
try:
|
||||
tcadp_parser = get_base_config("tcadp_config", {})
|
||||
@ -205,14 +209,30 @@ class TCADPParser(RAGFlowPdfParser):
|
||||
self.secret_id = secret_id or tcadp_parser.get("secret_id")
|
||||
self.secret_key = secret_key or tcadp_parser.get("secret_key")
|
||||
self.region = region or tcadp_parser.get("region", "ap-guangzhou")
|
||||
self.table_result_type = tcadp_parser.get("table_result_type", "1")
|
||||
self.markdown_image_response_type = tcadp_parser.get("markdown_image_response_type", "1")
|
||||
self.logger.info("[TCADP] Configuration read from service_conf.yaml")
|
||||
# Set table_result_type and markdown_image_response_type from config or parameters
|
||||
self.table_result_type = table_result_type if table_result_type is not None else tcadp_parser.get("table_result_type", "1")
|
||||
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else tcadp_parser.get("markdown_image_response_type", "1")
|
||||
|
||||
else:
|
||||
self.logger.error("[TCADP] Please configure tcadp_config in service_conf.yaml first")
|
||||
# If config file is empty, use provided parameters or defaults
|
||||
self.secret_id = secret_id
|
||||
self.secret_key = secret_key
|
||||
self.region = region or "ap-guangzhou"
|
||||
self.table_result_type = table_result_type if table_result_type is not None else "1"
|
||||
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else "1"
|
||||
|
||||
except ImportError:
|
||||
self.logger.info("[TCADP] Configuration module import failed")
|
||||
# If config file is not available, use provided parameters or defaults
|
||||
self.secret_id = secret_id
|
||||
self.secret_key = secret_key
|
||||
self.region = region or "ap-guangzhou"
|
||||
self.table_result_type = table_result_type if table_result_type is not None else "1"
|
||||
self.markdown_image_response_type = markdown_image_response_type if markdown_image_response_type is not None else "1"
|
||||
|
||||
# Log final values
|
||||
self.logger.info(f"[TCADP] Final values - table_result_type: {self.table_result_type}, markdown_image_response_type: {self.markdown_image_response_type}")
|
||||
|
||||
if not self.secret_id or not self.secret_key:
|
||||
raise ValueError("[TCADP] Please set Tencent Cloud API keys, configure tcadp_config in service_conf.yaml")
|
||||
@ -400,6 +420,8 @@ class TCADPParser(RAGFlowPdfParser):
|
||||
"TableResultType": self.table_result_type,
|
||||
"MarkdownImageResponseType": self.markdown_image_response_type
|
||||
}
|
||||
|
||||
self.logger.info(f"[TCADP] API request config - TableResultType: {self.table_result_type}, MarkdownImageResponseType: {self.markdown_image_response_type}")
|
||||
|
||||
result = client.reconstruct_document_sse(
|
||||
file_type=file_type,
|
||||
|
||||
31
docker/.env
31
docker/.env
@ -7,6 +7,7 @@
|
||||
# Available options:
|
||||
# - `elasticsearch` (default)
|
||||
# - `infinity` (https://github.com/infiniflow/infinity)
|
||||
# - `oceanbase` (https://github.com/oceanbase/oceanbase)
|
||||
# - `opensearch` (https://github.com/opensearch-project/OpenSearch)
|
||||
DOC_ENGINE=${DOC_ENGINE:-elasticsearch}
|
||||
|
||||
@ -62,6 +63,27 @@ INFINITY_THRIFT_PORT=23817
|
||||
INFINITY_HTTP_PORT=23820
|
||||
INFINITY_PSQL_PORT=5432
|
||||
|
||||
# The hostname where the OceanBase service is exposed
|
||||
OCEANBASE_HOST=oceanbase
|
||||
# The port used to expose the OceanBase service
|
||||
OCEANBASE_PORT=2881
|
||||
# The username for OceanBase
|
||||
OCEANBASE_USER=root@ragflow
|
||||
# The password for OceanBase
|
||||
OCEANBASE_PASSWORD=infini_rag_flow
|
||||
# The doc database of the OceanBase service to use
|
||||
OCEANBASE_DOC_DBNAME=ragflow_doc
|
||||
|
||||
# OceanBase container configuration
|
||||
OB_CLUSTER_NAME=${OB_CLUSTER_NAME:-ragflow}
|
||||
OB_TENANT_NAME=${OB_TENANT_NAME:-ragflow}
|
||||
OB_SYS_PASSWORD=${OCEANBASE_PASSWORD:-infini_rag_flow}
|
||||
OB_TENANT_PASSWORD=${OCEANBASE_PASSWORD:-infini_rag_flow}
|
||||
OB_MEMORY_LIMIT=${OB_MEMORY_LIMIT:-10G}
|
||||
OB_SYSTEM_MEMORY=${OB_SYSTEM_MEMORY:-2G}
|
||||
OB_DATAFILE_SIZE=${OB_DATAFILE_SIZE:-20G}
|
||||
OB_LOG_DISK_SIZE=${OB_LOG_DISK_SIZE:-20G}
|
||||
|
||||
# The password for MySQL.
|
||||
MYSQL_PASSWORD=infini_rag_flow
|
||||
# The hostname where the MySQL service is exposed
|
||||
@ -208,9 +230,16 @@ REGISTER_ENABLED=1
|
||||
# SANDBOX_MAX_MEMORY=256m # b, k, m, g
|
||||
# SANDBOX_TIMEOUT=10s # s, m, 1m30s
|
||||
|
||||
# Enable DocLing and Mineru
|
||||
# Enable DocLing
|
||||
USE_DOCLING=false
|
||||
|
||||
# Enable Mineru
|
||||
USE_MINERU=false
|
||||
MINERU_EXECUTABLE="$HOME/uv_tools/.venv/bin/mineru"
|
||||
MINERU_DELETE_OUTPUT=0 # keep output directory
|
||||
MINERU_BACKEND=pipeline # or another backend you prefer
|
||||
|
||||
|
||||
|
||||
# pptx support
|
||||
DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
||||
@ -138,6 +138,15 @@ The [.env](./.env) file contains important environment variables for Docker.
|
||||
- `password`: The password for MinIO.
|
||||
- `host`: The MinIO serving IP *and* port inside the Docker container. Defaults to `minio:9000`.
|
||||
|
||||
- `oceanbase`
|
||||
- `scheme`: The connection scheme. Set to `mysql` to use mysql config, or other values to use config below.
|
||||
- `config`:
|
||||
- `db_name`: The OceanBase database name.
|
||||
- `user`: The username for OceanBase.
|
||||
- `password`: The password for OceanBase.
|
||||
- `host`: The hostname of the OceanBase service.
|
||||
- `port`: The port of OceanBase.
|
||||
|
||||
- `oss`
|
||||
- `access_key`: The access key ID used to authenticate requests to the OSS service.
|
||||
- `secret_key`: The secret access key used to authenticate requests to the OSS service.
|
||||
|
||||
@ -72,7 +72,7 @@ services:
|
||||
infinity:
|
||||
profiles:
|
||||
- infinity
|
||||
image: infiniflow/infinity:v0.6.5
|
||||
image: infiniflow/infinity:v0.6.6
|
||||
volumes:
|
||||
- infinity_data:/var/infinity
|
||||
- ./infinity_conf.toml:/infinity_conf.toml
|
||||
@ -96,6 +96,31 @@ services:
|
||||
retries: 120
|
||||
restart: on-failure
|
||||
|
||||
oceanbase:
|
||||
profiles:
|
||||
- oceanbase
|
||||
image: oceanbase/oceanbase-ce:4.4.1.0-100000032025101610
|
||||
volumes:
|
||||
- ./oceanbase/data:/root/ob
|
||||
- ./oceanbase/conf:/root/.obd/cluster
|
||||
- ./oceanbase/init.d:/root/boot/init.d
|
||||
ports:
|
||||
- ${OCEANBASE_PORT:-2881}:2881
|
||||
env_file: .env
|
||||
environment:
|
||||
- MODE=normal
|
||||
- OB_SERVER_IP=127.0.0.1
|
||||
mem_limit: ${MEM_LIMIT}
|
||||
healthcheck:
|
||||
test: [ 'CMD-SHELL', 'obclient -h127.0.0.1 -P2881 -uroot@${OB_TENANT_NAME:-ragflow} -p${OB_TENANT_PASSWORD:-infini_rag_flow} -e "CREATE DATABASE IF NOT EXISTS ${OCEANBASE_DOC_DBNAME:-ragflow_doc};"' ]
|
||||
interval: 10s
|
||||
retries: 30
|
||||
start_period: 30s
|
||||
timeout: 10s
|
||||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
|
||||
sandbox-executor-manager:
|
||||
profiles:
|
||||
- sandbox
|
||||
@ -154,7 +179,7 @@ services:
|
||||
|
||||
minio:
|
||||
image: quay.io/minio/minio:RELEASE.2025-06-13T11-33-47Z
|
||||
command: server --console-address ":9001" /data
|
||||
command: ["server", "--console-address", ":9001", "/data"]
|
||||
ports:
|
||||
- ${MINIO_PORT}:9000
|
||||
- ${MINIO_CONSOLE_PORT}:9001
|
||||
@ -176,7 +201,7 @@ services:
|
||||
redis:
|
||||
# swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/valkey/valkey:8
|
||||
image: valkey/valkey:8
|
||||
command: redis-server --requirepass ${REDIS_PASSWORD} --maxmemory 128mb --maxmemory-policy allkeys-lru
|
||||
command: ["redis-server", "--requirepass", "${REDIS_PASSWORD}", "--maxmemory", "128mb", "--maxmemory-policy", "allkeys-lru"]
|
||||
env_file: .env
|
||||
ports:
|
||||
- ${REDIS_PORT}:6379
|
||||
@ -256,6 +281,8 @@ volumes:
|
||||
driver: local
|
||||
infinity_data:
|
||||
driver: local
|
||||
ob_data:
|
||||
driver: local
|
||||
mysql_data:
|
||||
driver: local
|
||||
minio_data:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
[general]
|
||||
version = "0.6.5"
|
||||
version = "0.6.6"
|
||||
time_zone = "utc-8"
|
||||
|
||||
[network]
|
||||
|
||||
1
docker/oceanbase/init.d/vec_memory.sql
Normal file
1
docker/oceanbase/init.d/vec_memory.sql
Normal file
@ -0,0 +1 @@
|
||||
ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30;
|
||||
@ -28,6 +28,14 @@ os:
|
||||
infinity:
|
||||
uri: '${INFINITY_HOST:-infinity}:23817'
|
||||
db_name: 'default_db'
|
||||
oceanbase:
|
||||
scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
|
||||
config:
|
||||
db_name: '${OCEANBASE_DOC_DBNAME:-test}'
|
||||
user: '${OCEANBASE_USER:-root@ragflow}'
|
||||
password: '${OCEANBASE_PASSWORD:-infini_rag_flow}'
|
||||
host: '${OCEANBASE_HOST:-oceanbase}'
|
||||
port: ${OCEANBASE_PORT:-2881}
|
||||
redis:
|
||||
db: 1
|
||||
password: '${REDIS_PASSWORD:-infini_rag_flow}'
|
||||
@ -142,5 +150,3 @@ user_default_llm:
|
||||
# secret_id: '${TENCENT_SECRET_ID}'
|
||||
# secret_key: '${TENCENT_SECRET_KEY}'
|
||||
# region: '${TENCENT_REGION}'
|
||||
# table_result_type: '1'
|
||||
# markdown_image_response_type: '1'
|
||||
|
||||
@ -2072,6 +2072,7 @@ Retrieves chunks from specified datasets.
|
||||
- `"cross_languages"`: `list[string]`
|
||||
- `"metadata_condition"`: `object`
|
||||
- `"use_kg"`: `boolean`
|
||||
- `"toc_enhance"`: `boolean`
|
||||
##### Request example
|
||||
|
||||
```bash
|
||||
@ -2085,6 +2086,7 @@ curl --request POST \
|
||||
"dataset_ids": ["b2a62730759d11ef987d0242ac120004"],
|
||||
"document_ids": ["77df9ef4759a11ef8bdd0242ac120004"],
|
||||
"metadata_condition": {
|
||||
"logic": "and",
|
||||
"conditions": [
|
||||
{
|
||||
"name": "author",
|
||||
@ -2121,6 +2123,8 @@ curl --request POST \
|
||||
The number of chunks engaged in vector cosine computation. Defaults to `1024`.
|
||||
- `"use_kg"`: (*Body parameter*), `boolean`
|
||||
The search includes text chunks related to the knowledge graph of the selected dataset to handle complex multi-hop queries. Defaults to `False`.
|
||||
- `"toc_enhance"`: (*Body parameter*), `boolean`
|
||||
The search includes table of content enhancement in order to boost rank of relevant chunks. Files parsed with `TOC Enhance` enabled is prerequisite. Defaults to `False`.
|
||||
- `"rerank_id"`: (*Body parameter*), `integer`
|
||||
The ID of the rerank model.
|
||||
- `"keyword"`: (*Body parameter*), `boolean`
|
||||
@ -2135,6 +2139,9 @@ curl --request POST \
|
||||
The languages that should be translated into, in order to achieve keywords retrievals in different languages.
|
||||
- `"metadata_condition"`: (*Body parameter*), `object`
|
||||
The metadata condition used for filtering chunks:
|
||||
- `"logic"`: (*Body parameter*), `string`
|
||||
- `"and"` Intersection of the result from each condition (default).
|
||||
- `"or"` union of the result from each condition.
|
||||
- `"conditions"`: (*Body parameter*), `array`
|
||||
A list of metadata filter conditions.
|
||||
- `"name"`: `string` - The metadata field name to filter by, e.g., `"author"`, `"company"`, `"url"`. Ensure this parameter before use. See [Set metadata](../guides/dataset/set_metadata.md) for details.
|
||||
|
||||
@ -96,7 +96,7 @@ ragflow:
|
||||
infinity:
|
||||
image:
|
||||
repository: infiniflow/infinity
|
||||
tag: v0.6.5
|
||||
tag: v0.6.6
|
||||
pullPolicy: IfNotPresent
|
||||
pullSecrets: []
|
||||
storage:
|
||||
|
||||
@ -49,7 +49,7 @@ dependencies = [
|
||||
"html-text==0.6.2",
|
||||
"httpx[socks]>=0.28.1,<0.29.0",
|
||||
"huggingface-hub>=0.25.0,<0.26.0",
|
||||
"infinity-sdk==0.6.5",
|
||||
"infinity-sdk==0.6.6",
|
||||
"infinity-emb>=0.0.66,<0.0.67",
|
||||
"itsdangerous==2.1.2",
|
||||
"json-repair==0.35.0",
|
||||
@ -133,7 +133,7 @@ dependencies = [
|
||||
"pyicu>=2.15.3,<3.0.0",
|
||||
"flasgger>=0.9.7.1,<0.10.0",
|
||||
"xxhash>=3.5.0,<4.0.0",
|
||||
"trio>=0.29.0",
|
||||
"trio>=0.17.0,<0.29.0",
|
||||
"langfuse>=2.60.0",
|
||||
"debugpy>=1.8.13",
|
||||
"mcp>=1.9.4",
|
||||
@ -148,7 +148,9 @@ dependencies = [
|
||||
"markdownify>=1.2.0",
|
||||
"captcha>=0.7.1",
|
||||
"pip>=25.2",
|
||||
"moodlepy>=0.23.0",
|
||||
"pypandoc>=1.16",
|
||||
"pyobvector==0.2.18",
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
|
||||
@ -113,6 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
lang = lang,
|
||||
callback = callback,
|
||||
pdf_cls = Pdf,
|
||||
layout_recognizer = layout_recognizer,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
@ -172,6 +172,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
lang = lang,
|
||||
callback = callback,
|
||||
pdf_cls = Pdf,
|
||||
layout_recognizer = layout_recognizer,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
@ -213,6 +213,8 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
lang = lang,
|
||||
callback = callback,
|
||||
pdf_cls = Pdf,
|
||||
layout_recognizer = layout_recognizer,
|
||||
parse_method = "manual",
|
||||
**kwargs
|
||||
)
|
||||
|
||||
@ -225,7 +227,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
elif len(section) != 3:
|
||||
raise ValueError(f"Unexpected section length: {len(section)} (value={section!r})")
|
||||
|
||||
txt, sec_id, poss = section
|
||||
txt, layoutno, poss = section
|
||||
if isinstance(poss, str):
|
||||
poss = pdf_parser.extract_positions(poss)
|
||||
first = poss[0] # tuple: ([pn], x1, x2, y1, y2)
|
||||
@ -235,7 +237,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
pn = pn[0] # [pn] -> pn
|
||||
poss[0] = (pn, *first[1:])
|
||||
|
||||
return (txt, sec_id, poss)
|
||||
return (txt, layoutno, poss)
|
||||
|
||||
|
||||
sections = [_normalize_section(sec) for sec in sections]
|
||||
|
||||
@ -59,6 +59,7 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese"
|
||||
mineru_executable = os.environ.get("MINERU_EXECUTABLE", "mineru")
|
||||
mineru_api = os.environ.get("MINERU_APISERVER", "http://host.docker.internal:9987")
|
||||
pdf_parser = MinerUParser(mineru_path=mineru_executable, mineru_api=mineru_api)
|
||||
parse_method = kwargs.get("parse_method", "raw")
|
||||
|
||||
if not pdf_parser.check_installation():
|
||||
callback(-1, "MinerU not found.")
|
||||
@ -72,12 +73,14 @@ def by_mineru(filename, binary=None, from_page=0, to_page=100000, lang="Chinese"
|
||||
backend=os.environ.get("MINERU_BACKEND", "pipeline"),
|
||||
server_url=os.environ.get("MINERU_SERVER_URL", ""),
|
||||
delete_output=bool(int(os.environ.get("MINERU_DELETE_OUTPUT", 1))),
|
||||
parse_method=parse_method
|
||||
)
|
||||
return sections, tables, pdf_parser
|
||||
|
||||
|
||||
def by_docling(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", callback=None, pdf_cls = None ,**kwargs):
|
||||
pdf_parser = DoclingParser()
|
||||
parse_method = kwargs.get("parse_method", "raw")
|
||||
|
||||
if not pdf_parser.check_installation():
|
||||
callback(-1, "Docling not found.")
|
||||
@ -89,6 +92,7 @@ def by_docling(filename, binary=None, from_page=0, to_page=100000, lang="Chinese
|
||||
callback=callback,
|
||||
output_dir=os.environ.get("MINERU_OUTPUT_DIR", ""),
|
||||
delete_output=bool(int(os.environ.get("MINERU_DELETE_OUTPUT", 1))),
|
||||
parse_method=parse_method
|
||||
)
|
||||
return sections, tables, pdf_parser
|
||||
|
||||
@ -116,7 +120,7 @@ def by_plaintext(filename, binary=None, from_page=0, to_page=100000, callback=No
|
||||
else:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT, llm_name=kwargs.get("layout_recognizer", ""), lang=kwargs.get("lang", "Chinese"))
|
||||
pdf_parser = VisionParser(vision_model=vision_model, **kwargs)
|
||||
|
||||
|
||||
sections, tables = pdf_parser(
|
||||
filename if not binary else binary,
|
||||
from_page=from_page,
|
||||
@ -504,7 +508,7 @@ class Markdown(MarkdownParser):
|
||||
|
||||
return images if images else None
|
||||
|
||||
def __call__(self, filename, binary=None, separate_tables=True,delimiter=None):
|
||||
def __call__(self, filename, binary=None, separate_tables=True, delimiter=None):
|
||||
if binary:
|
||||
encoding = find_codec(binary)
|
||||
txt = binary.decode(encoding, errors="ignore")
|
||||
@ -602,7 +606,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
_SerializedRelationships.load_from_xml = load_from_xml_v2
|
||||
sections, tables = Docx()(filename, binary)
|
||||
|
||||
tables=vision_figure_parser_docx_wrapper(sections=sections,tbls=tables,callback=callback,**kwargs)
|
||||
tables = vision_figure_parser_docx_wrapper(sections=sections, tbls=tables, callback=callback, **kwargs)
|
||||
|
||||
res = tokenize_table(tables, doc, is_english)
|
||||
callback(0.8, "Finish parsing.")
|
||||
@ -653,18 +657,47 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
|
||||
if name in ["tcadp", "docling", "mineru"]:
|
||||
parser_config["chunk_token_num"] = 0
|
||||
|
||||
|
||||
res = tokenize_table(tables, doc, is_english)
|
||||
callback(0.8, "Finish parsing.")
|
||||
|
||||
elif re.search(r"\.(csv|xlsx?)$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
excel_parser = ExcelParser()
|
||||
if parser_config.get("html4excel"):
|
||||
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
|
||||
|
||||
# Check if tcadp_parser is selected for spreadsheet files
|
||||
layout_recognizer = parser_config.get("layout_recognize", "DeepDOC")
|
||||
if layout_recognizer == "TCADP Parser":
|
||||
table_result_type = parser_config.get("table_result_type", "1")
|
||||
markdown_image_response_type = parser_config.get("markdown_image_response_type", "1")
|
||||
tcadp_parser = TCADPParser(
|
||||
table_result_type=table_result_type,
|
||||
markdown_image_response_type=markdown_image_response_type
|
||||
)
|
||||
if not tcadp_parser.check_installation():
|
||||
callback(-1, "TCADP parser not available. Please check Tencent Cloud API configuration.")
|
||||
return res
|
||||
|
||||
# Determine file type based on extension
|
||||
file_type = "XLSX" if re.search(r"\.xlsx?$", filename, re.IGNORECASE) else "CSV"
|
||||
|
||||
sections, tables = tcadp_parser.parse_pdf(
|
||||
filepath=filename,
|
||||
binary=binary,
|
||||
callback=callback,
|
||||
output_dir=os.environ.get("TCADP_OUTPUT_DIR", ""),
|
||||
file_type=file_type
|
||||
)
|
||||
parser_config["chunk_token_num"] = 0
|
||||
res = tokenize_table(tables, doc, is_english)
|
||||
callback(0.8, "Finish parsing.")
|
||||
else:
|
||||
sections = [(_, "") for _ in excel_parser(binary) if _]
|
||||
parser_config["chunk_token_num"] = 12800
|
||||
# Default DeepDOC parser
|
||||
excel_parser = ExcelParser()
|
||||
if parser_config.get("html4excel"):
|
||||
sections = [(_, "") for _ in excel_parser.html(binary, 12) if _]
|
||||
else:
|
||||
sections = [(_, "") for _ in excel_parser(binary) if _]
|
||||
parser_config["chunk_token_num"] = 12800
|
||||
|
||||
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
@ -676,7 +709,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
elif re.search(r"\.(md|markdown)$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
markdown_parser = Markdown(int(parser_config.get("chunk_token_num", 128)))
|
||||
sections, tables = markdown_parser(filename, binary, separate_tables=False,delimiter=parser_config.get("delimiter", "\n!?;。;!?"))
|
||||
sections, tables = markdown_parser(filename, binary, separate_tables=False, delimiter=parser_config.get("delimiter", "\n!?;。;!?"))
|
||||
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
|
||||
@ -99,6 +99,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
lang = lang,
|
||||
callback = callback,
|
||||
pdf_cls = Pdf,
|
||||
layout_recognizer = layout_recognizer,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
@ -21,8 +21,10 @@ import re
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper
|
||||
from common.constants import ParserType
|
||||
from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks
|
||||
from deepdoc.parser import PdfParser, PlainParser
|
||||
from deepdoc.parser import PdfParser
|
||||
import numpy as np
|
||||
from rag.app.naive import by_plaintext, PARSERS
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
def __init__(self):
|
||||
@ -147,19 +149,40 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
"parser_config", {
|
||||
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": "DeepDOC"})
|
||||
if re.search(r"\.pdf$", filename, re.IGNORECASE):
|
||||
if parser_config.get("layout_recognize", "DeepDOC") == "Plain Text":
|
||||
pdf_parser = PlainParser()
|
||||
layout_recognizer = parser_config.get("layout_recognize", "DeepDOC")
|
||||
|
||||
if isinstance(layout_recognizer, bool):
|
||||
layout_recognizer = "DeepDOC" if layout_recognizer else "Plain Text"
|
||||
|
||||
name = layout_recognizer.strip().lower()
|
||||
pdf_parser = PARSERS.get(name, by_plaintext)
|
||||
callback(0.1, "Start to parse.")
|
||||
|
||||
if name == "deepdoc":
|
||||
pdf_parser = Pdf()
|
||||
paper = pdf_parser(filename if not binary else binary,
|
||||
from_page=from_page, to_page=to_page, callback=callback)
|
||||
else:
|
||||
sections, tables, pdf_parser = pdf_parser(
|
||||
filename=filename,
|
||||
binary=binary,
|
||||
from_page=from_page,
|
||||
to_page=to_page,
|
||||
lang=lang,
|
||||
callback=callback,
|
||||
pdf_cls=Pdf,
|
||||
parse_method="paper",
|
||||
**kwargs
|
||||
)
|
||||
|
||||
paper = {
|
||||
"title": filename,
|
||||
"authors": " ",
|
||||
"abstract": "",
|
||||
"sections": pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page)[0],
|
||||
"tables": []
|
||||
"sections": sections,
|
||||
"tables": tables
|
||||
}
|
||||
else:
|
||||
pdf_parser = Pdf()
|
||||
paper = pdf_parser(filename if not binary else binary,
|
||||
from_page=from_page, to_page=to_page, callback=callback)
|
||||
|
||||
tbls=paper["tables"]
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
paper["tables"] = tbls
|
||||
|
||||
@ -142,6 +142,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
lang = lang,
|
||||
callback = callback,
|
||||
pdf_cls = Pdf,
|
||||
layout_recognizer = layout_recognizer,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@ import io
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
import trio
|
||||
@ -83,6 +84,7 @@ class ParserParam(ProcessParamBase):
|
||||
"output_format": "json",
|
||||
},
|
||||
"spreadsheet": {
|
||||
"parse_method": "deepdoc", # deepdoc/tcadp_parser
|
||||
"output_format": "html",
|
||||
"suffix": [
|
||||
"xls",
|
||||
@ -102,8 +104,10 @@ class ParserParam(ProcessParamBase):
|
||||
"output_format": "json",
|
||||
},
|
||||
"slides": {
|
||||
"parse_method": "deepdoc", # deepdoc/tcadp_parser
|
||||
"suffix": [
|
||||
"pptx",
|
||||
"ppt"
|
||||
],
|
||||
"output_format": "json",
|
||||
},
|
||||
@ -245,7 +249,12 @@ class Parser(ProcessBase):
|
||||
bboxes.append(box)
|
||||
elif conf.get("parse_method").lower() == "tcadp parser":
|
||||
# ADP is a document parsing tool using Tencent Cloud API
|
||||
tcadp_parser = TCADPParser()
|
||||
table_result_type = conf.get("table_result_type", "1")
|
||||
markdown_image_response_type = conf.get("markdown_image_response_type", "1")
|
||||
tcadp_parser = TCADPParser(
|
||||
table_result_type=table_result_type,
|
||||
markdown_image_response_type=markdown_image_response_type
|
||||
)
|
||||
sections, _ = tcadp_parser.parse_pdf(
|
||||
filepath=name,
|
||||
binary=blob,
|
||||
@ -301,14 +310,86 @@ class Parser(ProcessBase):
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a Spreadsheet.")
|
||||
conf = self._param.setups["spreadsheet"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
spreadsheet_parser = ExcelParser()
|
||||
if conf.get("output_format") == "html":
|
||||
htmls = spreadsheet_parser.html(blob, 1000000000)
|
||||
self.set_output("html", htmls[0])
|
||||
elif conf.get("output_format") == "json":
|
||||
self.set_output("json", [{"text": txt} for txt in spreadsheet_parser(blob) if txt])
|
||||
elif conf.get("output_format") == "markdown":
|
||||
self.set_output("markdown", spreadsheet_parser.markdown(blob))
|
||||
|
||||
parse_method = conf.get("parse_method", "deepdoc")
|
||||
|
||||
# Handle TCADP parser
|
||||
if parse_method.lower() == "tcadp parser":
|
||||
table_result_type = conf.get("table_result_type", "1")
|
||||
markdown_image_response_type = conf.get("markdown_image_response_type", "1")
|
||||
tcadp_parser = TCADPParser(
|
||||
table_result_type=table_result_type,
|
||||
markdown_image_response_type=markdown_image_response_type
|
||||
)
|
||||
if not tcadp_parser.check_installation():
|
||||
raise RuntimeError("TCADP parser not available. Please check Tencent Cloud API configuration.")
|
||||
|
||||
# Determine file type based on extension
|
||||
if re.search(r"\.xlsx?$", name, re.IGNORECASE):
|
||||
file_type = "XLSX"
|
||||
else:
|
||||
file_type = "CSV"
|
||||
|
||||
self.callback(0.2, f"Using TCADP parser for {file_type} file.")
|
||||
sections, tables = tcadp_parser.parse_pdf(
|
||||
filepath=name,
|
||||
binary=blob,
|
||||
callback=self.callback,
|
||||
file_type=file_type,
|
||||
file_start_page=1,
|
||||
file_end_page=1000
|
||||
)
|
||||
|
||||
# Process TCADP parser output based on configured output_format
|
||||
output_format = conf.get("output_format", "html")
|
||||
|
||||
if output_format == "html":
|
||||
# For HTML output, combine sections and tables into HTML
|
||||
html_content = ""
|
||||
for section, position_tag in sections:
|
||||
if section:
|
||||
html_content += section + "\n"
|
||||
for table in tables:
|
||||
if table:
|
||||
html_content += table + "\n"
|
||||
|
||||
self.set_output("html", html_content)
|
||||
|
||||
elif output_format == "json":
|
||||
# For JSON output, create a list of text items
|
||||
result = []
|
||||
# Add sections as text
|
||||
for section, position_tag in sections:
|
||||
if section:
|
||||
result.append({"text": section})
|
||||
# Add tables as text
|
||||
for table in tables:
|
||||
if table:
|
||||
result.append({"text": table})
|
||||
|
||||
self.set_output("json", result)
|
||||
|
||||
elif output_format == "markdown":
|
||||
# For markdown output, combine into markdown
|
||||
md_content = ""
|
||||
for section, position_tag in sections:
|
||||
if section:
|
||||
md_content += section + "\n\n"
|
||||
for table in tables:
|
||||
if table:
|
||||
md_content += table + "\n\n"
|
||||
|
||||
self.set_output("markdown", md_content)
|
||||
else:
|
||||
# Default DeepDOC parser
|
||||
spreadsheet_parser = ExcelParser()
|
||||
if conf.get("output_format") == "html":
|
||||
htmls = spreadsheet_parser.html(blob, 1000000000)
|
||||
self.set_output("html", htmls[0])
|
||||
elif conf.get("output_format") == "json":
|
||||
self.set_output("json", [{"text": txt} for txt in spreadsheet_parser(blob) if txt])
|
||||
elif conf.get("output_format") == "markdown":
|
||||
self.set_output("markdown", spreadsheet_parser.markdown(blob))
|
||||
|
||||
def _word(self, name, blob):
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a Word Processor Document")
|
||||
@ -326,22 +407,69 @@ class Parser(ProcessBase):
|
||||
self.set_output("markdown", markdown_text)
|
||||
|
||||
def _slides(self, name, blob):
|
||||
from deepdoc.parser.ppt_parser import RAGFlowPptParser as ppt_parser
|
||||
|
||||
self.callback(random.randint(1, 5) / 100.0, "Start to work on a PowerPoint Document")
|
||||
|
||||
conf = self._param.setups["slides"]
|
||||
self.set_output("output_format", conf["output_format"])
|
||||
|
||||
ppt_parser = ppt_parser()
|
||||
txts = ppt_parser(blob, 0, 100000, None)
|
||||
parse_method = conf.get("parse_method", "deepdoc")
|
||||
|
||||
sections = [{"text": section} for section in txts if section.strip()]
|
||||
# Handle TCADP parser
|
||||
if parse_method.lower() == "tcadp parser":
|
||||
table_result_type = conf.get("table_result_type", "1")
|
||||
markdown_image_response_type = conf.get("markdown_image_response_type", "1")
|
||||
tcadp_parser = TCADPParser(
|
||||
table_result_type=table_result_type,
|
||||
markdown_image_response_type=markdown_image_response_type
|
||||
)
|
||||
if not tcadp_parser.check_installation():
|
||||
raise RuntimeError("TCADP parser not available. Please check Tencent Cloud API configuration.")
|
||||
|
||||
# json
|
||||
assert conf.get("output_format") == "json", "have to be json for ppt"
|
||||
if conf.get("output_format") == "json":
|
||||
self.set_output("json", sections)
|
||||
# Determine file type based on extension
|
||||
if re.search(r"\.pptx?$", name, re.IGNORECASE):
|
||||
file_type = "PPTX"
|
||||
else:
|
||||
file_type = "PPT"
|
||||
|
||||
self.callback(0.2, f"Using TCADP parser for {file_type} file.")
|
||||
|
||||
sections, tables = tcadp_parser.parse_pdf(
|
||||
filepath=name,
|
||||
binary=blob,
|
||||
callback=self.callback,
|
||||
file_type=file_type,
|
||||
file_start_page=1,
|
||||
file_end_page=1000
|
||||
)
|
||||
|
||||
# Process TCADP parser output - PPT only supports json format
|
||||
output_format = conf.get("output_format", "json")
|
||||
if output_format == "json":
|
||||
# For JSON output, create a list of text items
|
||||
result = []
|
||||
# Add sections as text
|
||||
for section, position_tag in sections:
|
||||
if section:
|
||||
result.append({"text": section})
|
||||
# Add tables as text
|
||||
for table in tables:
|
||||
if table:
|
||||
result.append({"text": table})
|
||||
|
||||
self.set_output("json", result)
|
||||
else:
|
||||
# Default DeepDOC parser (supports .pptx format)
|
||||
from deepdoc.parser.ppt_parser import RAGFlowPptParser as ppt_parser
|
||||
|
||||
ppt_parser = ppt_parser()
|
||||
txts = ppt_parser(blob, 0, 100000, None)
|
||||
|
||||
sections = [{"text": section} for section in txts if section.strip()]
|
||||
|
||||
# json
|
||||
assert conf.get("output_format") == "json", "have to be json for ppt"
|
||||
if conf.get("output_format") == "json":
|
||||
self.set_output("json", sections)
|
||||
|
||||
def _markdown(self, name, blob):
|
||||
from functools import reduce
|
||||
@ -579,6 +707,7 @@ class Parser(ProcessBase):
|
||||
"video": self._video,
|
||||
"email": self._email,
|
||||
}
|
||||
|
||||
try:
|
||||
from_upstream = ParserFromUpstream.model_validate(kwargs)
|
||||
except Exception as e:
|
||||
|
||||
@ -1635,6 +1635,15 @@ class LiteLLMBase(ABC):
|
||||
provider_cfg["allow_fallbacks"] = False
|
||||
extra_body["provider"] = provider_cfg
|
||||
completion_args.update({"extra_body": extra_body})
|
||||
|
||||
# Ollama deployments commonly sit behind a reverse proxy that enforces
|
||||
# Bearer auth. Ensure the Authorization header is set when an API key
|
||||
# is provided, while respecting any user-supplied headers. #11350
|
||||
extra_headers = deepcopy(completion_args.get("extra_headers") or {})
|
||||
if self.provider == SupportedLiteLLMProvider.Ollama and self.api_key and "Authorization" not in extra_headers:
|
||||
extra_headers["Authorization"] = f"Bearer {self.api_key}"
|
||||
if extra_headers:
|
||||
completion_args["extra_headers"] = extra_headers
|
||||
return completion_args
|
||||
|
||||
def chat_with_tools(self, system: str, history: list, gen_conf: dict = {}):
|
||||
|
||||
@ -234,7 +234,11 @@ class CoHereRerank(Base):
|
||||
def __init__(self, key, model_name, base_url=None):
|
||||
from cohere import Client
|
||||
|
||||
self.client = Client(api_key=key, base_url=base_url)
|
||||
# Only pass base_url if it's a non-empty string, otherwise use default Cohere API endpoint
|
||||
client_kwargs = {"api_key": key}
|
||||
if base_url and base_url.strip():
|
||||
client_kwargs["base_url"] = base_url
|
||||
self.client = Client(**client_kwargs)
|
||||
self.model_name = model_name.split("___")[0]
|
||||
|
||||
def similarity(self, query: str, texts: list):
|
||||
|
||||
@ -437,16 +437,16 @@ def not_title(txt):
|
||||
return re.search(r"[,;,。;!!]", txt)
|
||||
|
||||
def tree_merge(bull, sections, depth):
|
||||
|
||||
|
||||
if not sections or bull < 0:
|
||||
return sections
|
||||
if isinstance(sections[0], type("")):
|
||||
sections = [(s, "") for s in sections]
|
||||
|
||||
|
||||
# filter out position information in pdf sections
|
||||
sections = [(t, o) for t, o in sections if
|
||||
t and len(t.split("@")[0].strip()) > 1 and not re.match(r"[0-9]+$", t.split("@")[0].strip())]
|
||||
|
||||
|
||||
def get_level(bull, section):
|
||||
text, layout = section
|
||||
text = re.sub(r"\u3000", " ", text).strip()
|
||||
@ -465,7 +465,7 @@ def tree_merge(bull, sections, depth):
|
||||
level, text = get_level(bull, section)
|
||||
if not text.strip("\n"):
|
||||
continue
|
||||
|
||||
|
||||
lines.append((level, text))
|
||||
level_set.add(level)
|
||||
|
||||
@ -608,6 +608,26 @@ def naive_merge(sections: str | list, chunk_token_num=128, delimiter="\n。;
|
||||
cks[-1] += t
|
||||
tk_nums[-1] += tnum
|
||||
|
||||
custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)]
|
||||
has_custom = bool(custom_delimiters)
|
||||
if has_custom:
|
||||
custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True))
|
||||
cks, tk_nums = [], []
|
||||
for sec, pos in sections:
|
||||
split_sec = re.split(r"(%s)" % custom_pattern, sec, flags=re.DOTALL)
|
||||
for sub_sec in split_sec:
|
||||
if re.fullmatch(custom_pattern, sub_sec or ""):
|
||||
continue
|
||||
text = "\n" + sub_sec
|
||||
local_pos = pos
|
||||
if num_tokens_from_string(text) < 8:
|
||||
local_pos = ""
|
||||
if local_pos and text.find(local_pos) < 0:
|
||||
text += local_pos
|
||||
cks.append(text)
|
||||
tk_nums.append(num_tokens_from_string(text))
|
||||
return cks
|
||||
|
||||
dels = get_delimiters(delimiter)
|
||||
for sec, pos in sections:
|
||||
if num_tokens_from_string(sec) < chunk_token_num:
|
||||
@ -657,6 +677,29 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
|
||||
result_images[-1] = concat_img(result_images[-1], image)
|
||||
tk_nums[-1] += tnum
|
||||
|
||||
custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)]
|
||||
has_custom = bool(custom_delimiters)
|
||||
if has_custom:
|
||||
custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True))
|
||||
cks, result_images, tk_nums = [], [], []
|
||||
for text, image in zip(texts, images):
|
||||
text_str = text[0] if isinstance(text, tuple) else text
|
||||
text_pos = text[1] if isinstance(text, tuple) and len(text) > 1 else ""
|
||||
split_sec = re.split(r"(%s)" % custom_pattern, text_str)
|
||||
for sub_sec in split_sec:
|
||||
if re.fullmatch(custom_pattern, sub_sec or ""):
|
||||
continue
|
||||
text_seg = "\n" + sub_sec
|
||||
local_pos = text_pos
|
||||
if num_tokens_from_string(text_seg) < 8:
|
||||
local_pos = ""
|
||||
if local_pos and text_seg.find(local_pos) < 0:
|
||||
text_seg += local_pos
|
||||
cks.append(text_seg)
|
||||
result_images.append(image)
|
||||
tk_nums.append(num_tokens_from_string(text_seg))
|
||||
return cks, result_images
|
||||
|
||||
dels = get_delimiters(delimiter)
|
||||
for text, image in zip(texts, images):
|
||||
# if text is tuple, unpack it
|
||||
@ -748,6 +791,23 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。;!?"):
|
||||
images[-1] = concat_img(images[-1], image)
|
||||
tk_nums[-1] += tnum
|
||||
|
||||
custom_delimiters = [m.group(1) for m in re.finditer(r"`([^`]+)`", delimiter)]
|
||||
has_custom = bool(custom_delimiters)
|
||||
if has_custom:
|
||||
custom_pattern = "|".join(re.escape(t) for t in sorted(set(custom_delimiters), key=len, reverse=True))
|
||||
cks, images, tk_nums = [], [], []
|
||||
pattern = r"(%s)" % custom_pattern
|
||||
for sec, image in sections:
|
||||
split_sec = re.split(pattern, sec)
|
||||
for sub_sec in split_sec:
|
||||
if not sub_sec or re.fullmatch(custom_pattern, sub_sec):
|
||||
continue
|
||||
text_seg = "\n" + sub_sec
|
||||
cks.append(text_seg)
|
||||
images.append(image)
|
||||
tk_nums.append(num_tokens_from_string(text_seg))
|
||||
return cks, images
|
||||
|
||||
dels = get_delimiters(delimiter)
|
||||
pattern = r"(%s)" % dels
|
||||
|
||||
@ -789,7 +849,7 @@ class Node:
|
||||
self.level = level
|
||||
self.depth = depth
|
||||
self.texts = texts or []
|
||||
self.children = []
|
||||
self.children = []
|
||||
|
||||
def add_child(self, child_node):
|
||||
self.children.append(child_node)
|
||||
@ -835,7 +895,7 @@ class Node:
|
||||
return self
|
||||
|
||||
def get_tree(self):
|
||||
tree_list = []
|
||||
tree_list = []
|
||||
self._dfs(self, tree_list, [])
|
||||
return tree_list
|
||||
|
||||
@ -860,7 +920,7 @@ class Node:
|
||||
# A leaf title within depth emits its title path as a chunk (header-only section)
|
||||
elif not child and (1 <= level <= self.depth):
|
||||
tree_list.append("\n".join(path_titles))
|
||||
|
||||
|
||||
# Recurse into children with the updated title path
|
||||
for c in child:
|
||||
self._dfs(c, tree_list, path_titles)
|
||||
self._dfs(c, tree_list, path_titles)
|
||||
|
||||
@ -83,6 +83,7 @@ class FulltextQueryer:
|
||||
return txt
|
||||
|
||||
def question(self, txt, tbl="qa", min_match: float = 0.6):
|
||||
original_query = txt
|
||||
txt = FulltextQueryer.add_space_between_eng_zh(txt)
|
||||
txt = re.sub(
|
||||
r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>]+",
|
||||
@ -127,7 +128,7 @@ class FulltextQueryer:
|
||||
q.append(txt)
|
||||
query = " ".join(q)
|
||||
return MatchTextExpr(
|
||||
self.query_fields, query, 100
|
||||
self.query_fields, query, 100, {"original_query": original_query}
|
||||
), keywords
|
||||
|
||||
def need_fine_grained_tokenize(tk):
|
||||
@ -212,7 +213,7 @@ class FulltextQueryer:
|
||||
if not query:
|
||||
query = otxt
|
||||
return MatchTextExpr(
|
||||
self.query_fields, query, 100, {"minimum_should_match": min_match}
|
||||
self.query_fields, query, 100, {"minimum_should_match": min_match, "original_query": original_query}
|
||||
), keywords
|
||||
return None, keywords
|
||||
|
||||
@ -259,6 +260,7 @@ class FulltextQueryer:
|
||||
content_tks = [c.strip() for c in content_tks.strip() if c.strip()]
|
||||
tks_w = self.tw.weights(content_tks, preprocess=False)
|
||||
|
||||
origin_keywords = keywords.copy()
|
||||
keywords = [f'"{k.strip()}"' for k in keywords]
|
||||
for tk, w in sorted(tks_w, key=lambda x: x[1] * -1)[:keywords_topn]:
|
||||
tk_syns = self.syn.lookup(tk)
|
||||
@ -274,4 +276,4 @@ class FulltextQueryer:
|
||||
keywords.append(f"{tk}^{w}")
|
||||
|
||||
return MatchTextExpr(self.query_fields, " ".join(keywords), 100,
|
||||
{"minimum_should_match": min(3, len(keywords) // 10)})
|
||||
{"minimum_should_match": min(3, len(keywords) / 10), "original_query": " ".join(origin_keywords)})
|
||||
|
||||
@ -355,75 +355,102 @@ class Dealer:
|
||||
rag_tokenizer.tokenize(ans).split(),
|
||||
rag_tokenizer.tokenize(inst).split())
|
||||
|
||||
def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, similarity_threshold=0.2,
|
||||
vector_similarity_weight=0.3, top=1024, doc_ids=None, aggs=True,
|
||||
rerank_mdl=None, highlight=False,
|
||||
rank_feature: dict | None = {PAGERANK_FLD: 10}):
|
||||
def retrieval(
|
||||
self,
|
||||
question,
|
||||
embd_mdl,
|
||||
tenant_ids,
|
||||
kb_ids,
|
||||
page,
|
||||
page_size,
|
||||
similarity_threshold=0.2,
|
||||
vector_similarity_weight=0.3,
|
||||
top=1024,
|
||||
doc_ids=None,
|
||||
aggs=True,
|
||||
rerank_mdl=None,
|
||||
highlight=False,
|
||||
rank_feature: dict | None = {PAGERANK_FLD: 10},
|
||||
):
|
||||
ranks = {"total": 0, "chunks": [], "doc_aggs": {}}
|
||||
if not question:
|
||||
return ranks
|
||||
|
||||
# Ensure RERANK_LIMIT is multiple of page_size
|
||||
RERANK_LIMIT = math.ceil(64/page_size) * page_size if page_size>1 else 1
|
||||
req = {"kb_ids": kb_ids, "doc_ids": doc_ids, "page": math.ceil(page_size*page/RERANK_LIMIT), "size": RERANK_LIMIT,
|
||||
"question": question, "vector": True, "topk": top,
|
||||
"similarity": similarity_threshold,
|
||||
"available_int": 1}
|
||||
|
||||
RERANK_LIMIT = math.ceil(64 / page_size) * page_size if page_size > 1 else 1
|
||||
req = {
|
||||
"kb_ids": kb_ids,
|
||||
"doc_ids": doc_ids,
|
||||
"page": math.ceil(page_size * page / RERANK_LIMIT),
|
||||
"size": RERANK_LIMIT,
|
||||
"question": question,
|
||||
"vector": True,
|
||||
"topk": top,
|
||||
"similarity": similarity_threshold,
|
||||
"available_int": 1,
|
||||
}
|
||||
|
||||
if isinstance(tenant_ids, str):
|
||||
tenant_ids = tenant_ids.split(",")
|
||||
|
||||
sres = self.search(req, [index_name(tid) for tid in tenant_ids],
|
||||
kb_ids, embd_mdl, highlight, rank_feature=rank_feature)
|
||||
sres = self.search(req, [index_name(tid) for tid in tenant_ids], kb_ids, embd_mdl, highlight, rank_feature=rank_feature)
|
||||
|
||||
if rerank_mdl and sres.total > 0:
|
||||
sim, tsim, vsim = self.rerank_by_model(rerank_mdl,
|
||||
sres, question, 1 - vector_similarity_weight,
|
||||
vector_similarity_weight,
|
||||
rank_feature=rank_feature)
|
||||
sim, tsim, vsim = self.rerank_by_model(
|
||||
rerank_mdl,
|
||||
sres,
|
||||
question,
|
||||
1 - vector_similarity_weight,
|
||||
vector_similarity_weight,
|
||||
rank_feature=rank_feature,
|
||||
)
|
||||
else:
|
||||
lower_case_doc_engine = os.getenv('DOC_ENGINE', 'elasticsearch')
|
||||
if lower_case_doc_engine in ["elasticsearch","opensearch"]:
|
||||
lower_case_doc_engine = os.getenv("DOC_ENGINE", "elasticsearch")
|
||||
if lower_case_doc_engine in ["elasticsearch", "opensearch"]:
|
||||
# ElasticSearch doesn't normalize each way score before fusion.
|
||||
sim, tsim, vsim = self.rerank(
|
||||
sres, question, 1 - vector_similarity_weight, vector_similarity_weight,
|
||||
rank_feature=rank_feature)
|
||||
sres,
|
||||
question,
|
||||
1 - vector_similarity_weight,
|
||||
vector_similarity_weight,
|
||||
rank_feature=rank_feature,
|
||||
)
|
||||
else:
|
||||
# Don't need rerank here since Infinity normalizes each way score before fusion.
|
||||
sim = [sres.field[id].get("_score", 0.0) for id in sres.ids]
|
||||
sim = [s if s is not None else 0. for s in sim]
|
||||
sim = [s if s is not None else 0.0 for s in sim]
|
||||
tsim = sim
|
||||
vsim = sim
|
||||
# Already paginated in search function
|
||||
max_pages = RERANK_LIMIT // page_size
|
||||
page_index = (page % max_pages) - 1
|
||||
begin = max(page_index * page_size, 0)
|
||||
sim = sim[begin : begin + page_size]
|
||||
|
||||
sim_np = np.array(sim, dtype=np.float64)
|
||||
idx = np.argsort(sim_np * -1)
|
||||
if sim_np.size == 0:
|
||||
return ranks
|
||||
|
||||
sorted_idx = np.argsort(sim_np * -1)
|
||||
|
||||
valid_idx = [int(i) for i in sorted_idx if sim_np[i] >= similarity_threshold]
|
||||
filtered_count = len(valid_idx)
|
||||
ranks["total"] = int(filtered_count)
|
||||
|
||||
if filtered_count == 0:
|
||||
return ranks
|
||||
|
||||
max_pages = max(RERANK_LIMIT // max(page_size, 1), 1)
|
||||
page_index = (page - 1) % max_pages
|
||||
begin = page_index * page_size
|
||||
end = begin + page_size
|
||||
page_idx = valid_idx[begin:end]
|
||||
|
||||
dim = len(sres.query_vector)
|
||||
vector_column = f"q_{dim}_vec"
|
||||
zero_vector = [0.0] * dim
|
||||
filtered_count = (sim_np >= similarity_threshold).sum()
|
||||
ranks["total"] = int(filtered_count) # Convert from np.int64 to Python int otherwise JSON serializable error
|
||||
for i in idx:
|
||||
if np.float64(sim[i]) < similarity_threshold:
|
||||
break
|
||||
|
||||
for i in page_idx:
|
||||
id = sres.ids[i]
|
||||
chunk = sres.field[id]
|
||||
dnm = chunk.get("docnm_kwd", "")
|
||||
did = chunk.get("doc_id", "")
|
||||
|
||||
if len(ranks["chunks"]) >= page_size:
|
||||
if aggs:
|
||||
if dnm not in ranks["doc_aggs"]:
|
||||
ranks["doc_aggs"][dnm] = {"doc_id": did, "count": 0}
|
||||
ranks["doc_aggs"][dnm]["count"] += 1
|
||||
continue
|
||||
break
|
||||
|
||||
position_int = chunk.get("position_int", [])
|
||||
d = {
|
||||
"chunk_id": id,
|
||||
@ -434,12 +461,12 @@ class Dealer:
|
||||
"kb_id": chunk["kb_id"],
|
||||
"important_kwd": chunk.get("important_kwd", []),
|
||||
"image_id": chunk.get("img_id", ""),
|
||||
"similarity": sim[i],
|
||||
"vector_similarity": vsim[i],
|
||||
"term_similarity": tsim[i],
|
||||
"similarity": float(sim_np[i]),
|
||||
"vector_similarity": float(vsim[i]),
|
||||
"term_similarity": float(tsim[i]),
|
||||
"vector": chunk.get(vector_column, zero_vector),
|
||||
"positions": position_int,
|
||||
"doc_type_kwd": chunk.get("doc_type_kwd", "")
|
||||
"doc_type_kwd": chunk.get("doc_type_kwd", ""),
|
||||
}
|
||||
if highlight and sres.highlight:
|
||||
if id in sres.highlight:
|
||||
@ -447,15 +474,30 @@ class Dealer:
|
||||
else:
|
||||
d["highlight"] = d["content_with_weight"]
|
||||
ranks["chunks"].append(d)
|
||||
if dnm not in ranks["doc_aggs"]:
|
||||
ranks["doc_aggs"][dnm] = {"doc_id": did, "count": 0}
|
||||
ranks["doc_aggs"][dnm]["count"] += 1
|
||||
ranks["doc_aggs"] = [{"doc_name": k,
|
||||
"doc_id": v["doc_id"],
|
||||
"count": v["count"]} for k,
|
||||
v in sorted(ranks["doc_aggs"].items(),
|
||||
key=lambda x: x[1]["count"] * -1)]
|
||||
ranks["chunks"] = ranks["chunks"][:page_size]
|
||||
|
||||
if aggs:
|
||||
for i in valid_idx:
|
||||
id = sres.ids[i]
|
||||
chunk = sres.field[id]
|
||||
dnm = chunk.get("docnm_kwd", "")
|
||||
did = chunk.get("doc_id", "")
|
||||
if dnm not in ranks["doc_aggs"]:
|
||||
ranks["doc_aggs"][dnm] = {"doc_id": did, "count": 0}
|
||||
ranks["doc_aggs"][dnm]["count"] += 1
|
||||
|
||||
ranks["doc_aggs"] = [
|
||||
{
|
||||
"doc_name": k,
|
||||
"doc_id": v["doc_id"],
|
||||
"count": v["count"],
|
||||
}
|
||||
for k, v in sorted(
|
||||
ranks["doc_aggs"].items(),
|
||||
key=lambda x: x[1]["count"] * -1,
|
||||
)
|
||||
]
|
||||
else:
|
||||
ranks["doc_aggs"] = []
|
||||
|
||||
return ranks
|
||||
|
||||
@ -564,7 +606,7 @@ class Dealer:
|
||||
ids = relevant_chunks_with_toc(query, toc, chat_mdl, topn*2)
|
||||
if not ids:
|
||||
return chunks
|
||||
|
||||
|
||||
vector_size = 1024
|
||||
id2idx = {ck["chunk_id"]: i for i, ck in enumerate(chunks)}
|
||||
for cid, sim in ids:
|
||||
|
||||
@ -429,7 +429,7 @@ def rank_memories(chat_mdl, goal:str, sub_goal:str, tool_call_summaries: list[st
|
||||
return re.sub(r"^.*</think>", "", ans, flags=re.DOTALL)
|
||||
|
||||
|
||||
def gen_meta_filter(chat_mdl, meta_data:dict, query: str) -> list:
|
||||
def gen_meta_filter(chat_mdl, meta_data:dict, query: str) -> dict:
|
||||
sys_prompt = PROMPT_JINJA_ENV.from_string(META_FILTER).render(
|
||||
current_date=datetime.datetime.today().strftime('%Y-%m-%d'),
|
||||
metadata_keys=json.dumps(meta_data),
|
||||
@ -440,11 +440,13 @@ def gen_meta_filter(chat_mdl, meta_data:dict, query: str) -> list:
|
||||
ans = re.sub(r"(^.*</think>|```json\n|```\n*$)", "", ans, flags=re.DOTALL)
|
||||
try:
|
||||
ans = json_repair.loads(ans)
|
||||
assert isinstance(ans, list), ans
|
||||
assert isinstance(ans, dict), ans
|
||||
assert "conditions" in ans and isinstance(ans["conditions"], list), ans
|
||||
return ans
|
||||
except Exception:
|
||||
logging.exception(f"Loading json failure: {ans}")
|
||||
return []
|
||||
|
||||
return {"conditions": []}
|
||||
|
||||
|
||||
def gen_json(system_prompt:str, user_prompt:str, chat_mdl, gen_conf = None):
|
||||
|
||||
@ -9,11 +9,13 @@ You are a metadata filtering condition generator. Analyze the user's question an
|
||||
}
|
||||
|
||||
2. **Output Requirements**:
|
||||
- Always output a JSON array of filter objects
|
||||
- Each object must have:
|
||||
- Always output a JSON dictionary with only 2 keys: 'conditions'(filter objects) and 'logic' between the conditions ('and' or 'or').
|
||||
- Each filter object in conditions must have:
|
||||
"key": (metadata attribute name),
|
||||
"value": (string value to compare),
|
||||
"op": (operator from allowed list)
|
||||
- Logic between all the conditions: 'and'(Intersection of results for each condition) / 'or' (union of results for all conditions)
|
||||
|
||||
|
||||
3. **Operator Guide**:
|
||||
- Use these operators only: ["contains", "not contains", "start with", "end with", "empty", "not empty", "=", "≠", ">", "<", "≥", "≤"]
|
||||
@ -32,22 +34,97 @@ You are a metadata filtering condition generator. Analyze the user's question an
|
||||
- Attribute doesn't exist in metadata
|
||||
- Value has no match in metadata
|
||||
|
||||
5. **Example**:
|
||||
5. **Example A**:
|
||||
- User query: "上市日期七月份的有哪些商品,不要蓝色的"
|
||||
- Metadata: { "color": {...}, "listing_date": {...} }
|
||||
- Output:
|
||||
[
|
||||
{
|
||||
"logic": "and",
|
||||
"conditions": [
|
||||
{"key": "listing_date", "value": "2025-07-01", "op": "≥"},
|
||||
{"key": "listing_date", "value": "2025-08-01", "op": "<"},
|
||||
{"key": "color", "value": "blue", "op": "≠"}
|
||||
]
|
||||
}
|
||||
|
||||
6. **Final Output**:
|
||||
- ONLY output valid JSON array
|
||||
6. **Example B**:
|
||||
- User query: "Both blue and red are acceptable."
|
||||
- Metadata: { "color": {...}, "listing_date": {...} }
|
||||
- Output:
|
||||
{
|
||||
"logic": "or",
|
||||
"conditions": [
|
||||
{"key": "color", "value": "blue", "op": "="},
|
||||
{"key": "color", "value": "red", "op": "="}
|
||||
]
|
||||
}
|
||||
|
||||
7. **Final Output**:
|
||||
- ONLY output valid JSON dictionary
|
||||
- NO additional text/explanations
|
||||
- Json schema is as following:
|
||||
```json
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"logic": {
|
||||
"type": "string",
|
||||
"description": "Logic relationship between all the conditions, the default is 'and'.",
|
||||
"enum": [
|
||||
"and",
|
||||
"or"
|
||||
]
|
||||
},
|
||||
"conditions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "Metadata attribute name."
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "Value to compare."
|
||||
},
|
||||
"op": {
|
||||
"type": "string",
|
||||
"description": "Operator from allowed list.",
|
||||
"enum": [
|
||||
"contains",
|
||||
"not contains",
|
||||
"start with",
|
||||
"end with",
|
||||
"empty",
|
||||
"not empty",
|
||||
"=",
|
||||
"≠",
|
||||
">",
|
||||
"<",
|
||||
"≥",
|
||||
"≤"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"key",
|
||||
"value",
|
||||
"op"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"conditions"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
```
|
||||
|
||||
**Current Task**:
|
||||
- Today's date: {{current_date}}
|
||||
- Available metadata keys: {{metadata_keys}}
|
||||
- User query: "{{user_question}}"
|
||||
- Today's date: {{ current_date }}
|
||||
- Available metadata keys: {{ metadata_keys }}
|
||||
- User query: "{{ user_question }}"
|
||||
|
||||
|
||||
@ -37,14 +37,8 @@ from api.db.services.connector_service import ConnectorService, SyncLogsService
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from common import settings
|
||||
from common.config_utils import show_configs
|
||||
from common.data_source import BlobStorageConnector, NotionConnector, DiscordConnector, GoogleDriveConnector, MoodleConnector, JiraConnector
|
||||
from common.constants import FileSource, TaskStatus
|
||||
from common.data_source import (
|
||||
BlobStorageConnector,
|
||||
DiscordConnector,
|
||||
GoogleDriveConnector,
|
||||
JiraConnector,
|
||||
NotionConnector,
|
||||
)
|
||||
from common.data_source.config import INDEX_BATCH_SIZE
|
||||
from common.data_source.confluence_connector import ConfluenceConnector
|
||||
from common.data_source.interfaces import CheckpointOutputWrapper
|
||||
@ -418,6 +412,37 @@ class Teams(SyncBase):
|
||||
pass
|
||||
|
||||
|
||||
class Moodle(SyncBase):
|
||||
SOURCE_NAME: str = FileSource.MOODLE
|
||||
|
||||
async def _generate(self, task: dict):
|
||||
self.connector = MoodleConnector(
|
||||
moodle_url=self.conf["moodle_url"],
|
||||
batch_size=self.conf.get("batch_size", INDEX_BATCH_SIZE)
|
||||
)
|
||||
|
||||
self.connector.load_credentials(self.conf["credentials"])
|
||||
|
||||
# Determine the time range for synchronization based on reindex or poll_range_start
|
||||
if task["reindex"] == "1" or not task.get("poll_range_start"):
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
else:
|
||||
poll_start = task["poll_range_start"]
|
||||
if poll_start is None:
|
||||
document_generator = self.connector.load_from_state()
|
||||
begin_info = "totally"
|
||||
else:
|
||||
document_generator = self.connector.poll_source(
|
||||
poll_start.timestamp(),
|
||||
datetime.now(timezone.utc).timestamp()
|
||||
)
|
||||
begin_info = "from {}".format(poll_start)
|
||||
|
||||
logging.info("Connect to Moodle: {} {}".format(self.conf["moodle_url"], begin_info))
|
||||
return document_generator
|
||||
|
||||
|
||||
func_factory = {
|
||||
FileSource.S3: S3,
|
||||
FileSource.NOTION: Notion,
|
||||
@ -429,6 +454,7 @@ func_factory = {
|
||||
FileSource.SHAREPOINT: SharePoint,
|
||||
FileSource.SLACK: Slack,
|
||||
FileSource.TEAMS: Teams,
|
||||
FileSource.MOODLE: Moodle
|
||||
}
|
||||
|
||||
|
||||
|
||||
1562
rag/utils/ob_conn.py
Normal file
1562
rag/utils/ob_conn.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -69,7 +69,7 @@ class Document(Base):
|
||||
response = res.json()
|
||||
actual_keys = set(response.keys())
|
||||
if actual_keys == error_keys:
|
||||
raise Exception(res.get("message"))
|
||||
raise Exception(response.get("message"))
|
||||
else:
|
||||
return res.content
|
||||
except json.JSONDecodeError:
|
||||
|
||||
@ -80,6 +80,7 @@ class Session(Base):
|
||||
|
||||
|
||||
def _structure_answer(self, json_data):
|
||||
answer = ""
|
||||
if self.__session_type == "agent":
|
||||
answer = json_data["data"]["content"]
|
||||
elif self.__session_type == "chat":
|
||||
|
||||
96
web/package-lock.json
generated
96
web/package-lock.json
generated
@ -66,6 +66,7 @@
|
||||
"input-otp": "^1.4.1",
|
||||
"js-base64": "^3.7.5",
|
||||
"jsencrypt": "^3.3.2",
|
||||
"jsoneditor": "^10.4.2",
|
||||
"lexical": "^0.23.1",
|
||||
"lodash": "^4.17.21",
|
||||
"lucide-react": "^0.546.0",
|
||||
@ -8998,6 +8999,12 @@
|
||||
"@sinonjs/commons": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@sphinxxxx/color-conversion": {
|
||||
"version": "2.2.2",
|
||||
"resolved": "https://registry.npmmirror.com/@sphinxxxx/color-conversion/-/color-conversion-2.2.2.tgz",
|
||||
"integrity": "sha512-XExJS3cLqgrmNBIP3bBw6+1oQ1ksGjFh0+oClDKFYpCCqx/hlqwWO5KO/S63fzUo67SxI9dMrF0y5T/Ey7h8Zw==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/@storybook/addon-docs": {
|
||||
"version": "9.1.4",
|
||||
"resolved": "https://registry.npmmirror.com/@storybook/addon-docs/-/addon-docs-9.1.4.tgz",
|
||||
@ -12962,6 +12969,12 @@
|
||||
"node": ">= 0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/ace-builds": {
|
||||
"version": "1.43.4",
|
||||
"resolved": "https://registry.npmmirror.com/ace-builds/-/ace-builds-1.43.4.tgz",
|
||||
"integrity": "sha512-8hAxVfo2ImICd69BWlZwZlxe9rxDGDjuUhh+WeWgGDvfBCE+r3lkynkQvIovDz4jcMi8O7bsEaFygaDT+h9sBA==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/acorn": {
|
||||
"version": "8.15.0",
|
||||
"resolved": "https://registry.npmmirror.com/acorn/-/acorn-8.15.0.tgz",
|
||||
@ -21894,6 +21907,12 @@
|
||||
"@pkgjs/parseargs": "^0.11.0"
|
||||
}
|
||||
},
|
||||
"node_modules/javascript-natural-sort": {
|
||||
"version": "0.7.1",
|
||||
"resolved": "https://registry.npmmirror.com/javascript-natural-sort/-/javascript-natural-sort-0.7.1.tgz",
|
||||
"integrity": "sha512-nO6jcEfZWQXDhOiBtG2KvKyEptz7RVbpGP4vTD2hLBdmNQSsCiicO2Ioinv6UI4y9ukqnBpy+XZ9H6uLNgJTlw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/javascript-stringify": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/javascript-stringify/-/javascript-stringify-2.1.0.tgz",
|
||||
@ -24253,6 +24272,15 @@
|
||||
"jiti": "bin/jiti.js"
|
||||
}
|
||||
},
|
||||
"node_modules/jmespath": {
|
||||
"version": "0.16.0",
|
||||
"resolved": "https://registry.npmmirror.com/jmespath/-/jmespath-0.16.0.tgz",
|
||||
"integrity": "sha512-9FzQjJ7MATs1tSpnco1K6ayiYE3figslrXA72G2HQ/n76RzvYlofyi5QM+iX4YRs/pu3yzxlVQSST23+dMDknw==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">= 0.6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/js-base64": {
|
||||
"version": "3.7.5",
|
||||
"resolved": "https://registry.npmmirror.com/js-base64/-/js-base64-3.7.5.tgz",
|
||||
@ -24357,6 +24385,12 @@
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/json-source-map": {
|
||||
"version": "0.6.1",
|
||||
"resolved": "https://registry.npmmirror.com/json-source-map/-/json-source-map-0.6.1.tgz",
|
||||
"integrity": "sha512-1QoztHPsMQqhDq0hlXY5ZqcEdUzxQEIxgFkKl4WUp2pgShObl+9ovi4kRh2TfvAfxAoHOJ9vIMEqk3k4iex7tg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/json-stable-stringify-without-jsonify": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
|
||||
@ -24393,6 +24427,44 @@
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/jsoneditor": {
|
||||
"version": "10.4.2",
|
||||
"resolved": "https://registry.npmmirror.com/jsoneditor/-/jsoneditor-10.4.2.tgz",
|
||||
"integrity": "sha512-SQPCXlanU4PqdVsYuj2X7yfbLiiJYjklbksGfMKPsuwLhAIPxDlG43jYfXieGXvxpuq1fkw08YoRbkKXKabcLA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"ace-builds": "^1.36.2",
|
||||
"ajv": "^6.12.6",
|
||||
"javascript-natural-sort": "^0.7.1",
|
||||
"jmespath": "^0.16.0",
|
||||
"json-source-map": "^0.6.1",
|
||||
"jsonrepair": "^3.8.1",
|
||||
"picomodal": "^3.0.0",
|
||||
"vanilla-picker": "^2.12.3"
|
||||
}
|
||||
},
|
||||
"node_modules/jsoneditor/node_modules/ajv": {
|
||||
"version": "6.12.6",
|
||||
"resolved": "https://registry.npmmirror.com/ajv/-/ajv-6.12.6.tgz",
|
||||
"integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fast-deep-equal": "^3.1.1",
|
||||
"fast-json-stable-stringify": "^2.0.0",
|
||||
"json-schema-traverse": "^0.4.1",
|
||||
"uri-js": "^4.2.2"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/epoberezkin"
|
||||
}
|
||||
},
|
||||
"node_modules/jsoneditor/node_modules/json-schema-traverse": {
|
||||
"version": "0.4.1",
|
||||
"resolved": "https://registry.npmmirror.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
|
||||
"integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/jsonfile": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/jsonfile/-/jsonfile-6.1.0.tgz",
|
||||
@ -24404,6 +24476,15 @@
|
||||
"graceful-fs": "^4.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/jsonrepair": {
|
||||
"version": "3.13.1",
|
||||
"resolved": "https://registry.npmmirror.com/jsonrepair/-/jsonrepair-3.13.1.tgz",
|
||||
"integrity": "sha512-WJeiE0jGfxYmtLwBTEk8+y/mYcaleyLXWaqp5bJu0/ZTSeG0KQq/wWQ8pmnkKenEdN6pdnn6QtcoSUkbqDHWNw==",
|
||||
"license": "ISC",
|
||||
"bin": {
|
||||
"jsonrepair": "bin/cli.js"
|
||||
}
|
||||
},
|
||||
"node_modules/jsx-ast-utils": {
|
||||
"version": "3.3.5",
|
||||
"resolved": "https://registry.npmmirror.com/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz",
|
||||
@ -27499,6 +27580,12 @@
|
||||
"node": ">=8.6"
|
||||
}
|
||||
},
|
||||
"node_modules/picomodal": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/picomodal/-/picomodal-3.0.0.tgz",
|
||||
"integrity": "sha512-FoR3TDfuLlqUvcEeK5ifpKSVVns6B4BQvc8SDF6THVMuadya6LLtji0QgUDSStw0ZR2J7I6UGi5V2V23rnPWTw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/pidtree": {
|
||||
"version": "0.6.0",
|
||||
"resolved": "https://registry.npmmirror.com/pidtree/-/pidtree-0.6.0.tgz",
|
||||
@ -36235,6 +36322,15 @@
|
||||
"dev": true,
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/vanilla-picker": {
|
||||
"version": "2.12.3",
|
||||
"resolved": "https://registry.npmmirror.com/vanilla-picker/-/vanilla-picker-2.12.3.tgz",
|
||||
"integrity": "sha512-qVkT1E7yMbUsB2mmJNFmaXMWE2hF8ffqzMMwe9zdAikd8u2VfnsVY2HQcOUi2F38bgbxzlJBEdS1UUhOXdF9GQ==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@sphinxxxx/color-conversion": "^2.2.2"
|
||||
}
|
||||
},
|
||||
"node_modules/vary": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmmirror.com/vary/-/vary-1.1.2.tgz",
|
||||
|
||||
@ -79,6 +79,7 @@
|
||||
"input-otp": "^1.4.1",
|
||||
"js-base64": "^3.7.5",
|
||||
"jsencrypt": "^3.3.2",
|
||||
"jsoneditor": "^10.4.2",
|
||||
"lexical": "^0.23.1",
|
||||
"lodash": "^4.17.21",
|
||||
"lucide-react": "^0.546.0",
|
||||
|
||||
4
web/src/assets/svg/data-source/moodle.svg
Normal file
4
web/src/assets/svg/data-source/moodle.svg
Normal file
@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1230.87 315.18">
|
||||
<path fill="#f98012" d="M289.61 309.77V201.51q0-33.94-28-33.95t-28.06 33.95v108.26H178.4V201.51q0-33.94-27.57-33.95-28.05 0-28 33.95v108.26H67.67V195.12q0-35.43 24.6-53.63 21.66-16.25 58.56-16.25 37.41 0 55.12 19.19 15.26-19.19 55.62-19.19 36.9 0 58.54 16.25 24.6 18.19 24.61 53.63v114.65Zm675.49-.5V0h55.16v309.27Zm-70.3 0v-18.22q-7.39 9.84-25.11 15.76a92.81 92.81 0 0 1-30.05 5.41q-39.4 0-63.28-27.09t-23.89-67c0-26.25 7.76-48.3 23.4-66 13.85-15.65 36.35-26.59 62.29-26.59 29.22 0 46.28 11 56.64 23.63V0h53.68v309.27Zm0-102.92q0-14.78-14-28.33T852 164.47q-21.16 0-33.48 17.24-10.85 15.3-10.84 37.43 0 21.68 10.84 36.94 12.3 17.75 33.48 17.73 12.81 0 27.83-12.07t15-24.86ZM648.57 314.19q-41.87 0-69.19-26.59T552 219.14q0-41.83 27.34-68.45t69.19-26.59q41.85 0 69.44 26.59t27.58 68.45q0 41.88-27.58 68.46t-69.4 26.59Zm0-145.77q-19.94 0-30.65 15.1t-10.71 35.88q0 20.78 10 35.13 11.46 16.34 31.4 16.32T680 254.53q10.46-14.34 10.46-35.13t-10-35.13q-11.46-15.86-31.89-15.85ZM449.13 314.19q-41.86 0-69.2-26.59t-27.33-68.46q0-41.83 27.33-68.45t69.2-26.59q41.83 0 69.44 26.59t27.57 68.45q0 41.88-27.57 68.46t-69.44 26.59Zm0-145.77q-19.94 0-30.66 15.1t-10.71 35.88q0 20.78 10 35.13 11.46 16.34 31.41 16.32t31.39-16.32Q491 240.19 491 219.4t-10-35.13q-11.44-15.86-31.87-15.85Zm636.45 67.47c1.18 13.13 18.25 41.37 46.31 41.37 27.31 0 40.23-15.77 40.87-22.16l58.11-.5c-6.34 19.39-32.1 60.58-100 60.58-28.24 0-54.08-8.79-72.64-26.35s-27.82-40.45-27.82-68.7q0-43.83 27.82-69.68t72.16-25.85q48.25 0 75.34 32 25.13 29.53 25.12 79.28Zm90.13-34c-2.3-11.83-7.23-21.49-14.77-29.06q-12.82-12.3-29.55-12.31-17.25 0-28.82 11.82t-15.5 29.55Z"/>
|
||||
<path fill="#333" d="m174.74 116.9 54.74-40-.7-2.44C130 86.57 85.08 95.15 0 144.47l.79 2.24 6.76.07c-.62 6.81-1.7 23.64-.32 48.95-9.44 27.32-.24 45.88 8.4 66.07 1.37-21 1.23-44-5.22-66.89-1.35-25.14-.24-41.67.37-48.1l56.4.54a258 258 0 0 0 1.67 33.06c50.4 17.71 101.09-.06 128-43.72-7.47-8.37-22.11-19.79-22.11-19.79Z"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 2.0 KiB |
@ -29,7 +29,10 @@ const BackButton: React.FC<BackButtonProps> = ({
|
||||
return (
|
||||
<Button
|
||||
variant="ghost"
|
||||
className={cn('gap-2 bg-bg-card border border-border-default', className)}
|
||||
className={cn(
|
||||
'gap-2 bg-bg-card border border-border-default hover:bg-border-button hover:text-text-primary',
|
||||
className,
|
||||
)}
|
||||
onClick={handleClick}
|
||||
{...props}
|
||||
>
|
||||
|
||||
@ -44,6 +44,7 @@ export function ConfirmDeleteDialog({
|
||||
<AlertDialogContent
|
||||
onSelect={(e) => e.preventDefault()}
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="bg-bg-base"
|
||||
>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>
|
||||
@ -59,7 +60,7 @@ export function ConfirmDeleteDialog({
|
||||
{t('common.no')}
|
||||
</AlertDialogCancel>
|
||||
<AlertDialogAction
|
||||
className="bg-state-error text-text-primary"
|
||||
className="bg-state-error text-text-primary hover:text-text-primary hover:bg-state-error"
|
||||
onClick={onOk}
|
||||
>
|
||||
{t('common.yes')}
|
||||
|
||||
@ -68,6 +68,7 @@ export interface FormFieldConfig {
|
||||
dependencies?: string[];
|
||||
schema?: ZodSchema;
|
||||
shouldRender?: (formValues: any) => boolean;
|
||||
labelClassName?: string;
|
||||
}
|
||||
|
||||
// Component props interface
|
||||
@ -81,6 +82,7 @@ interface DynamicFormProps<T extends FieldValues> {
|
||||
fieldName: string,
|
||||
updatedField: Partial<FormFieldConfig>,
|
||||
) => void;
|
||||
labelClassName?: string;
|
||||
}
|
||||
|
||||
// Form ref interface
|
||||
@ -295,6 +297,7 @@ const DynamicForm = {
|
||||
children,
|
||||
defaultValues: formDefaultValues = {} as DefaultValues<T>,
|
||||
onFieldUpdate,
|
||||
labelClassName,
|
||||
}: DynamicFormProps<T>,
|
||||
ref: React.Ref<any>,
|
||||
) => {
|
||||
@ -353,6 +356,13 @@ const DynamicForm = {
|
||||
...combinedErrors,
|
||||
...fieldErrors,
|
||||
} as any;
|
||||
|
||||
console.log('combinedErrors', combinedErrors);
|
||||
for (const key in combinedErrors) {
|
||||
if (Array.isArray(combinedErrors[key])) {
|
||||
combinedErrors[key] = combinedErrors[key][0];
|
||||
}
|
||||
}
|
||||
console.log('combinedErrors', combinedErrors);
|
||||
return {
|
||||
values: Object.keys(combinedErrors).length ? {} : data,
|
||||
@ -456,6 +466,7 @@ const DynamicForm = {
|
||||
required={field.required}
|
||||
horizontal={field.horizontal}
|
||||
tooltip={field.tooltip}
|
||||
labelClassName={labelClassName || field.labelClassName}
|
||||
>
|
||||
{(fieldProps) => {
|
||||
const finalFieldProps = field.onChange
|
||||
@ -481,6 +492,7 @@ const DynamicForm = {
|
||||
required={field.required}
|
||||
horizontal={field.horizontal}
|
||||
tooltip={field.tooltip}
|
||||
labelClassName={labelClassName || field.labelClassName}
|
||||
>
|
||||
{(fieldProps) => {
|
||||
const finalFieldProps = field.onChange
|
||||
@ -511,6 +523,7 @@ const DynamicForm = {
|
||||
required={field.required}
|
||||
horizontal={field.horizontal}
|
||||
tooltip={field.tooltip}
|
||||
labelClassName={labelClassName || field.labelClassName}
|
||||
>
|
||||
{(fieldProps) => {
|
||||
const finalFieldProps = field.onChange
|
||||
@ -551,7 +564,10 @@ const DynamicForm = {
|
||||
{field.label && !field.horizontal && (
|
||||
<div className="space-y-1 leading-none">
|
||||
<FormLabel
|
||||
className="font-normal"
|
||||
className={cn(
|
||||
'font-medium',
|
||||
labelClassName || field.labelClassName,
|
||||
)}
|
||||
tooltip={field.tooltip}
|
||||
>
|
||||
{field.label}{' '}
|
||||
@ -564,7 +580,10 @@ const DynamicForm = {
|
||||
{field.label && field.horizontal && (
|
||||
<div className="space-y-1 leading-none w-1/4">
|
||||
<FormLabel
|
||||
className="font-normal"
|
||||
className={cn(
|
||||
'font-medium',
|
||||
labelClassName || field.labelClassName,
|
||||
)}
|
||||
tooltip={field.tooltip}
|
||||
>
|
||||
{field.label}{' '}
|
||||
@ -600,6 +619,7 @@ const DynamicForm = {
|
||||
required={field.required}
|
||||
horizontal={field.horizontal}
|
||||
tooltip={field.tooltip}
|
||||
labelClassName={labelClassName || field.labelClassName}
|
||||
>
|
||||
{(fieldProps) => {
|
||||
const finalFieldProps = field.onChange
|
||||
@ -629,6 +649,7 @@ const DynamicForm = {
|
||||
required={field.required}
|
||||
horizontal={field.horizontal}
|
||||
tooltip={field.tooltip}
|
||||
labelClassName={labelClassName || field.labelClassName}
|
||||
>
|
||||
{(fieldProps) => {
|
||||
const finalFieldProps = field.onChange
|
||||
@ -706,9 +727,7 @@ const DynamicForm = {
|
||||
type="button"
|
||||
disabled={submitLoading}
|
||||
onClick={() => {
|
||||
console.log('form submit');
|
||||
(async () => {
|
||||
console.log('form submit2');
|
||||
try {
|
||||
let beValid = await form.formControl.trigger();
|
||||
console.log('form valid', beValid, form, form.formControl);
|
||||
@ -748,7 +767,7 @@ const DynamicForm = {
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleCancel()}
|
||||
className="px-2 py-1 border border-input rounded-md hover:bg-muted"
|
||||
className="px-2 py-1 border border-border-button rounded-md text-text-secondary hover:bg-bg-card hover:text-primary"
|
||||
>
|
||||
{cancelText ?? t('modal.cancelText')}
|
||||
</button>
|
||||
|
||||
@ -102,8 +102,8 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
{Array.isArray(tagChild) && tagChild.length > 0 && <>{tagChild}</>}
|
||||
{!inputVisible && (
|
||||
<Button
|
||||
variant="dashed"
|
||||
className="w-fit flex items-center justify-center gap-2 bg-bg-card"
|
||||
variant="ghost"
|
||||
className="w-fit flex items-center justify-center gap-2 bg-bg-card border-dashed border"
|
||||
onClick={showInput}
|
||||
style={tagPlusStyle}
|
||||
>
|
||||
|
||||
@ -272,7 +272,7 @@ export function FileUploader(props: FileUploaderProps) {
|
||||
<div
|
||||
{...getRootProps()}
|
||||
className={cn(
|
||||
'group relative grid h-72 w-full cursor-pointer place-items-center rounded-lg border-2 border-dashed border-border-default px-5 py-2.5 text-center transition hover:bg-muted/25 bg-accent-primary-5',
|
||||
'group relative grid h-72 w-full cursor-pointer place-items-center rounded-lg border border-dashed border-border-default px-5 py-2.5 text-center transition hover:bg-border-button bg-bg-card',
|
||||
'ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2',
|
||||
isDragActive && 'border-muted-foreground/50',
|
||||
isDisabled && 'pointer-events-none opacity-60',
|
||||
@ -285,11 +285,11 @@ export function FileUploader(props: FileUploaderProps) {
|
||||
<div className="flex flex-col items-center justify-center gap-4 sm:px-5">
|
||||
<div className="rounded-full border border-dashed p-3">
|
||||
<Upload
|
||||
className="size-7 text-muted-foreground"
|
||||
className="size-7 text-text-secondary"
|
||||
aria-hidden="true"
|
||||
/>
|
||||
</div>
|
||||
<p className="font-medium text-muted-foreground">
|
||||
<p className="font-medium text-text-secondary">
|
||||
Drop the files here
|
||||
</p>
|
||||
</div>
|
||||
@ -297,15 +297,15 @@ export function FileUploader(props: FileUploaderProps) {
|
||||
<div className="flex flex-col items-center justify-center gap-4 sm:px-5">
|
||||
<div className="rounded-full border border-dashed p-3">
|
||||
<Upload
|
||||
className="size-7 text-muted-foreground"
|
||||
className="size-7 text-text-secondary"
|
||||
aria-hidden="true"
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col gap-px">
|
||||
<p className="font-medium text-muted-foreground">
|
||||
<p className="font-medium text-text-secondary">
|
||||
{t('knowledgeDetails.uploadTitle')}
|
||||
</p>
|
||||
<p className="text-sm text-text-secondary">
|
||||
<p className="text-sm text-text-disabled">
|
||||
{description || t('knowledgeDetails.uploadDescription')}
|
||||
{/* You can upload
|
||||
{maxFileCount > 1
|
||||
|
||||
132
web/src/components/json-edit/css/cloud9_night.less
Normal file
132
web/src/components/json-edit/css/cloud9_night.less
Normal file
@ -0,0 +1,132 @@
|
||||
.ace-tomorrow-night .ace_gutter {
|
||||
background: var(--bg-card);
|
||||
color: rgb(var(--text-primary));
|
||||
}
|
||||
.ace-tomorrow-night .ace_print-margin {
|
||||
width: 1px;
|
||||
background: #25282c;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night {
|
||||
background: var(--bg-card);
|
||||
color: rgb(var(--text-primary));
|
||||
.ace_editor {
|
||||
background: var(--bg-card);
|
||||
}
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_cursor {
|
||||
color: #aeafad;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_marker-layer .ace_selection {
|
||||
background: #373b41;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night.ace_multiselect .ace_selection.ace_start {
|
||||
box-shadow: 0 0 3px 0px #1d1f21;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_marker-layer .ace_step {
|
||||
background: rgb(102, 82, 0);
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_marker-layer .ace_bracket {
|
||||
margin: -1px 0 0 -1px;
|
||||
border: 1px solid #4b4e55;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_marker-layer .ace_active-line {
|
||||
background: var(--bg-card);
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_gutter-active-line {
|
||||
background-color: var(--bg-card);
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_marker-layer .ace_selected-word {
|
||||
border: 1px solid #373b41;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_invisible {
|
||||
color: #4b4e55;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_keyword,
|
||||
.ace-tomorrow-night .ace_meta,
|
||||
.ace-tomorrow-night .ace_storage,
|
||||
.ace-tomorrow-night .ace_storage.ace_type,
|
||||
.ace-tomorrow-night .ace_support.ace_type {
|
||||
color: #b294bb;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_keyword.ace_operator {
|
||||
color: #8abeb7;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_constant.ace_character,
|
||||
.ace-tomorrow-night .ace_constant.ace_language,
|
||||
.ace-tomorrow-night .ace_constant.ace_numeric,
|
||||
.ace-tomorrow-night .ace_keyword.ace_other.ace_unit,
|
||||
.ace-tomorrow-night .ace_support.ace_constant,
|
||||
.ace-tomorrow-night .ace_variable.ace_parameter {
|
||||
color: #de935f;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_constant.ace_other {
|
||||
color: #ced1cf;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_invalid {
|
||||
color: #ced2cf;
|
||||
background-color: #df5f5f;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_invalid.ace_deprecated {
|
||||
color: #ced2cf;
|
||||
background-color: #b798bf;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_fold {
|
||||
background-color: #81a2be;
|
||||
border-color: #c5c8c6;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_entity.ace_name.ace_function,
|
||||
.ace-tomorrow-night .ace_support.ace_function,
|
||||
.ace-tomorrow-night .ace_variable {
|
||||
color: #81a2be;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_support.ace_class,
|
||||
.ace-tomorrow-night .ace_support.ace_type {
|
||||
color: #f0c674;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_heading,
|
||||
.ace-tomorrow-night .ace_markup.ace_heading,
|
||||
.ace-tomorrow-night .ace_string {
|
||||
color: #b5bd68;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_entity.ace_name.ace_tag,
|
||||
.ace-tomorrow-night .ace_entity.ace_other.ace_attribute-name,
|
||||
.ace-tomorrow-night .ace_meta.ace_tag,
|
||||
.ace-tomorrow-night .ace_string.ace_regexp,
|
||||
.ace-tomorrow-night .ace_variable {
|
||||
color: #cc6666;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_comment {
|
||||
color: #969896;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_indent-guide {
|
||||
background: url()
|
||||
right repeat-y;
|
||||
}
|
||||
|
||||
.ace-tomorrow-night .ace_indent-guide-active {
|
||||
background: url()
|
||||
right repeat-y;
|
||||
}
|
||||
83
web/src/components/json-edit/css/index.less
Normal file
83
web/src/components/json-edit/css/index.less
Normal file
@ -0,0 +1,83 @@
|
||||
.jsoneditor {
|
||||
border: none;
|
||||
color: rgb(var(--text-primary));
|
||||
overflow: auto;
|
||||
scrollbar-width: none;
|
||||
background-color: var(--bg-base);
|
||||
.jsoneditor-menu {
|
||||
background-color: var(--bg-base);
|
||||
// border-color: var(--border-button);
|
||||
border-bottom: thin solid var(--border-button);
|
||||
}
|
||||
.jsoneditor-navigation-bar {
|
||||
border-bottom: 1px solid var(--border-button);
|
||||
background-color: var(--bg-input);
|
||||
}
|
||||
.jsoneditor-tree {
|
||||
background: var(--bg-base);
|
||||
}
|
||||
.jsoneditor-highlight {
|
||||
background-color: var(--bg-card);
|
||||
}
|
||||
}
|
||||
.jsoneditor-popover,
|
||||
.jsoneditor-schema-error,
|
||||
div.jsoneditor td,
|
||||
div.jsoneditor textarea,
|
||||
div.jsoneditor th,
|
||||
div.jsoneditor-field,
|
||||
div.jsoneditor-value,
|
||||
pre.jsoneditor-preview {
|
||||
font-family: consolas, menlo, monaco, 'Ubuntu Mono', source-code-pro,
|
||||
monospace;
|
||||
font-size: 14px;
|
||||
color: rgb(var(--text-primary));
|
||||
}
|
||||
|
||||
div.jsoneditor-field.jsoneditor-highlight,
|
||||
div.jsoneditor-field[contenteditable='true']:focus,
|
||||
div.jsoneditor-field[contenteditable='true']:hover,
|
||||
div.jsoneditor-value.jsoneditor-highlight,
|
||||
div.jsoneditor-value[contenteditable='true']:focus,
|
||||
div.jsoneditor-value[contenteditable='true']:hover {
|
||||
background-color: var(--bg-input);
|
||||
border: 1px solid var(--border-button);
|
||||
border-radius: 2px;
|
||||
}
|
||||
|
||||
.jsoneditor-selected,
|
||||
.jsoneditor-contextmenu .jsoneditor-menu li ul {
|
||||
background: var(--bg-base);
|
||||
}
|
||||
|
||||
.jsoneditor-contextmenu .jsoneditor-menu button {
|
||||
color: rgb(var(--text-secondary));
|
||||
}
|
||||
.jsoneditor-menu a.jsoneditor-poweredBy {
|
||||
display: none;
|
||||
}
|
||||
.ace-jsoneditor .ace_scroller {
|
||||
background-color: var(--bg-base);
|
||||
}
|
||||
.jsoneditor-statusbar {
|
||||
border-top: 1px solid var(--border-button);
|
||||
background-color: var(--bg-base);
|
||||
color: rgb(var(--text-primary));
|
||||
}
|
||||
.jsoneditor-menu > .jsoneditor-modes > button,
|
||||
.jsoneditor-menu > button {
|
||||
// color: rgb(var(--text-secondary));
|
||||
background-color: var(--text-disabled);
|
||||
}
|
||||
|
||||
.jsoneditor-menu > .jsoneditor-modes > button:active,
|
||||
.jsoneditor-menu > .jsoneditor-modes > button:focus,
|
||||
.jsoneditor-menu > button:active,
|
||||
.jsoneditor-menu > button:focus {
|
||||
background-color: rgb(var(--text-secondary));
|
||||
}
|
||||
.jsoneditor-menu > .jsoneditor-modes > button:hover,
|
||||
.jsoneditor-menu > button:hover {
|
||||
background-color: rgb(var(--text-secondary));
|
||||
border: 1px solid var(--border-button);
|
||||
}
|
||||
142
web/src/components/json-edit/index.tsx
Normal file
142
web/src/components/json-edit/index.tsx
Normal file
@ -0,0 +1,142 @@
|
||||
import React, { useEffect, useRef } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import './css/cloud9_night.less';
|
||||
import './css/index.less';
|
||||
import { JsonEditorOptions, JsonEditorProps } from './interface';
|
||||
const defaultConfig: JsonEditorOptions = {
|
||||
mode: 'code',
|
||||
modes: ['tree', 'code'],
|
||||
history: false,
|
||||
search: false,
|
||||
mainMenuBar: false,
|
||||
navigationBar: false,
|
||||
enableSort: false,
|
||||
enableTransform: false,
|
||||
indentation: 2,
|
||||
};
|
||||
const JsonEditor: React.FC<JsonEditorProps> = ({
|
||||
value,
|
||||
onChange,
|
||||
height = '400px',
|
||||
className = '',
|
||||
options = {},
|
||||
}) => {
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const editorRef = useRef<any>(null);
|
||||
const { i18n } = useTranslation();
|
||||
const currentLanguageRef = useRef<string>(i18n.language);
|
||||
|
||||
useEffect(() => {
|
||||
if (typeof window !== 'undefined') {
|
||||
const JSONEditor = require('jsoneditor');
|
||||
import('jsoneditor/dist/jsoneditor.min.css');
|
||||
|
||||
if (containerRef.current) {
|
||||
// Default configuration options
|
||||
const defaultOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
console.error(err);
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(
|
||||
containerRef.current,
|
||||
defaultOptions,
|
||||
);
|
||||
|
||||
if (value) {
|
||||
editorRef.current.set(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (editorRef.current) {
|
||||
if (typeof editorRef.current.destroy === 'function') {
|
||||
editorRef.current.destroy();
|
||||
}
|
||||
editorRef.current = null;
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
// Update language when i18n language changes
|
||||
// Since JSONEditor doesn't have a setOptions method, we need to recreate the editor
|
||||
if (editorRef.current && currentLanguageRef.current !== i18n.language) {
|
||||
currentLanguageRef.current = i18n.language;
|
||||
|
||||
// Save current data
|
||||
let currentData;
|
||||
try {
|
||||
currentData = editorRef.current.get();
|
||||
} catch (e) {
|
||||
// If there's an error getting data, use the passed value or empty object
|
||||
currentData = value || {};
|
||||
}
|
||||
|
||||
// Destroy the current editor
|
||||
if (typeof editorRef.current.destroy === 'function') {
|
||||
editorRef.current.destroy();
|
||||
}
|
||||
|
||||
// Recreate the editor with new language
|
||||
const JSONEditor = require('jsoneditor');
|
||||
|
||||
const newOptions: JsonEditorOptions = {
|
||||
...defaultConfig,
|
||||
language: i18n.language === 'zh' ? 'zh-CN' : 'en',
|
||||
onChange: () => {
|
||||
if (editorRef.current && onChange) {
|
||||
try {
|
||||
const updatedJson = editorRef.current.get();
|
||||
onChange(updatedJson);
|
||||
} catch (err) {
|
||||
// Do not trigger onChange when parsing error occurs
|
||||
}
|
||||
}
|
||||
},
|
||||
...options, // Merge user provided options with defaults
|
||||
};
|
||||
|
||||
editorRef.current = new JSONEditor(containerRef.current, newOptions);
|
||||
editorRef.current.set(currentData);
|
||||
}
|
||||
}, [i18n.language, value, onChange, options]);
|
||||
|
||||
useEffect(() => {
|
||||
if (editorRef.current && value !== undefined) {
|
||||
try {
|
||||
// Only update the editor when the value actually changes
|
||||
const currentJson = editorRef.current.get();
|
||||
if (JSON.stringify(currentJson) !== JSON.stringify(value)) {
|
||||
editorRef.current.set(value);
|
||||
}
|
||||
} catch (err) {
|
||||
// Skip update if there is a syntax error in the current editor
|
||||
editorRef.current.set(value);
|
||||
}
|
||||
}
|
||||
}, [value]);
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={containerRef}
|
||||
style={{ height }}
|
||||
className={`ace-tomorrow-night w-full border border-border-button rounded-lg overflow-hidden bg-bg-input ${className} `}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
export default JsonEditor;
|
||||
339
web/src/components/json-edit/interface.ts
Normal file
339
web/src/components/json-edit/interface.ts
Normal file
@ -0,0 +1,339 @@
|
||||
// JSONEditor configuration options interface see: https://github.com/josdejong/jsoneditor/blob/master/docs/api.md
|
||||
export interface JsonEditorOptions {
|
||||
/**
|
||||
* Editor mode. Available values: 'tree' (default), 'view', 'form', 'text', and 'code'.
|
||||
*/
|
||||
mode?: 'tree' | 'view' | 'form' | 'text' | 'code';
|
||||
|
||||
/**
|
||||
* Array of available modes
|
||||
*/
|
||||
modes?: Array<'tree' | 'view' | 'form' | 'text' | 'code'>;
|
||||
|
||||
/**
|
||||
* Field name for the root node. Only applicable for modes 'tree', 'view', and 'form'
|
||||
*/
|
||||
name?: string;
|
||||
|
||||
/**
|
||||
* Theme for the editor
|
||||
*/
|
||||
theme?: string;
|
||||
|
||||
/**
|
||||
* Enable history (undo/redo). True by default. Only applicable for modes 'tree', 'view', and 'form'
|
||||
*/
|
||||
history?: boolean;
|
||||
|
||||
/**
|
||||
* Enable search box. True by default. Only applicable for modes 'tree', 'view', and 'form'
|
||||
*/
|
||||
search?: boolean;
|
||||
|
||||
/**
|
||||
* Main menu bar visibility
|
||||
*/
|
||||
mainMenuBar?: boolean;
|
||||
|
||||
/**
|
||||
* Navigation bar visibility
|
||||
*/
|
||||
navigationBar?: boolean;
|
||||
|
||||
/**
|
||||
* Status bar visibility
|
||||
*/
|
||||
statusBar?: boolean;
|
||||
|
||||
/**
|
||||
* If true, object keys are sorted before display. false by default.
|
||||
*/
|
||||
sortObjectKeys?: boolean;
|
||||
|
||||
/**
|
||||
* Enable transform functionality
|
||||
*/
|
||||
enableTransform?: boolean;
|
||||
|
||||
/**
|
||||
* Enable sort functionality
|
||||
*/
|
||||
enableSort?: boolean;
|
||||
|
||||
/**
|
||||
* Limit dragging functionality
|
||||
*/
|
||||
limitDragging?: boolean;
|
||||
|
||||
/**
|
||||
* A JSON schema object
|
||||
*/
|
||||
schema?: any;
|
||||
|
||||
/**
|
||||
* Schemas that are referenced using the `$ref` property from the JSON schema
|
||||
*/
|
||||
schemaRefs?: Record<string, any>;
|
||||
|
||||
/**
|
||||
* Array of template objects
|
||||
*/
|
||||
templates?: Array<{
|
||||
text: string;
|
||||
title?: string;
|
||||
className?: string;
|
||||
field?: string;
|
||||
value: any;
|
||||
}>;
|
||||
|
||||
/**
|
||||
* Ace editor instance
|
||||
*/
|
||||
ace?: any;
|
||||
|
||||
/**
|
||||
* An instance of Ajv JSON schema validator
|
||||
*/
|
||||
ajv?: any;
|
||||
|
||||
/**
|
||||
* Switch to enable/disable autocomplete
|
||||
*/
|
||||
autocomplete?: {
|
||||
confirmKey?: string | string[];
|
||||
caseSensitive?: boolean;
|
||||
getOptions?: (
|
||||
text: string,
|
||||
path: Array<string | number>,
|
||||
input: string,
|
||||
editor: any,
|
||||
) => string[] | Promise<string[]> | null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Number of indentation spaces. 4 by default. Only applicable for modes 'text' and 'code'
|
||||
*/
|
||||
indentation?: number;
|
||||
|
||||
/**
|
||||
* Available languages
|
||||
*/
|
||||
languages?: string[];
|
||||
|
||||
/**
|
||||
* Language of the editor
|
||||
*/
|
||||
language?: string;
|
||||
|
||||
/**
|
||||
* Callback method, triggered on change of contents. Does not pass the contents itself.
|
||||
* See also onChangeJSON and onChangeText.
|
||||
*/
|
||||
onChange?: () => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered in modes on change of contents, passing the changed contents as JSON.
|
||||
* Only applicable for modes 'tree', 'view', and 'form'.
|
||||
*/
|
||||
onChangeJSON?: (json: any) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered in modes on change of contents, passing the changed contents as stringified JSON.
|
||||
*/
|
||||
onChangeText?: (text: string) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when an error occurs
|
||||
*/
|
||||
onError?: (error: Error) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when node is expanded
|
||||
*/
|
||||
onExpand?: (node: any) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when node is collapsed
|
||||
*/
|
||||
onCollapse?: (node: any) => void;
|
||||
|
||||
/**
|
||||
* Callback method, determines if a node is editable
|
||||
*/
|
||||
onEditable?: (node: any) => boolean | { field: boolean; value: boolean };
|
||||
|
||||
/**
|
||||
* Callback method, triggered when an event occurs in a JSON field or value.
|
||||
* Only applicable for modes 'form', 'tree' and 'view'
|
||||
*/
|
||||
onEvent?: (node: any, event: Event) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when the editor comes into focus, passing an object {type, target}.
|
||||
* Applicable for all modes
|
||||
*/
|
||||
onFocus?: (node: any) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when the editor goes out of focus, passing an object {type, target}.
|
||||
* Applicable for all modes
|
||||
*/
|
||||
onBlur?: (node: any) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when creating menu items
|
||||
*/
|
||||
onCreateMenu?: (menuItems: any[], node: any) => any[];
|
||||
|
||||
/**
|
||||
* Callback method, triggered on node selection change. Only applicable for modes 'tree', 'view', and 'form'
|
||||
*/
|
||||
onSelectionChange?: (selection: any) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered on text selection change. Only applicable for modes 'text' and 'code'
|
||||
*/
|
||||
onTextSelectionChange?: (selection: any) => void;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when a Node DOM is rendered. Function returns a css class name to be set on a node.
|
||||
* Only applicable for modes 'form', 'tree' and 'view'
|
||||
*/
|
||||
onClassName?: (node: any) => string | undefined;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when validating nodes
|
||||
*/
|
||||
onValidate?: (
|
||||
json: any,
|
||||
) =>
|
||||
| Array<{ path: Array<string | number>; message: string }>
|
||||
| Promise<Array<{ path: Array<string | number>; message: string }>>;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when node name is determined
|
||||
*/
|
||||
onNodeName?: (parentNode: any, childNode: any, name: string) => string;
|
||||
|
||||
/**
|
||||
* Callback method, triggered when mode changes
|
||||
*/
|
||||
onModeChange?: (newMode: string, oldMode: string) => void;
|
||||
|
||||
/**
|
||||
* Color picker options
|
||||
*/
|
||||
colorPicker?: boolean;
|
||||
|
||||
/**
|
||||
* Callback method for color picker
|
||||
*/
|
||||
onColorPicker?: (
|
||||
callback: (color: string) => void,
|
||||
parent: HTMLElement,
|
||||
) => void;
|
||||
|
||||
/**
|
||||
* If true, shows timestamp tag
|
||||
*/
|
||||
timestampTag?: boolean;
|
||||
|
||||
/**
|
||||
* Format for timestamps
|
||||
*/
|
||||
timestampFormat?: string;
|
||||
|
||||
/**
|
||||
* If true, unicode characters are escaped. false by default.
|
||||
*/
|
||||
escapeUnicode?: boolean;
|
||||
|
||||
/**
|
||||
* Number of children allowed for a node in 'tree', 'view', or 'form' mode before
|
||||
* the "show more/show all" buttons appear. 100 by default.
|
||||
*/
|
||||
maxVisibleChilds?: number;
|
||||
|
||||
/**
|
||||
* Callback method for validation errors
|
||||
*/
|
||||
onValidationError?: (
|
||||
errors: Array<{ path: Array<string | number>; message: string }>,
|
||||
) => void;
|
||||
|
||||
/**
|
||||
* Callback method for validation warnings
|
||||
*/
|
||||
onValidationWarning?: (
|
||||
warnings: Array<{ path: Array<string | number>; message: string }>,
|
||||
) => void;
|
||||
|
||||
/**
|
||||
* The anchor element to apply an overlay and display the modals in a centered location. Defaults to document.body
|
||||
*/
|
||||
modalAnchor?: HTMLElement | null;
|
||||
|
||||
/**
|
||||
* Anchor element for popups
|
||||
*/
|
||||
popupAnchor?: HTMLElement | null;
|
||||
|
||||
/**
|
||||
* Function to create queries
|
||||
*/
|
||||
createQuery?: () => void;
|
||||
|
||||
/**
|
||||
* Function to execute queries
|
||||
*/
|
||||
executeQuery?: () => void;
|
||||
|
||||
/**
|
||||
* Query description
|
||||
*/
|
||||
queryDescription?: string;
|
||||
|
||||
/**
|
||||
* Allow schema suggestions
|
||||
*/
|
||||
allowSchemaSuggestions?: boolean;
|
||||
|
||||
/**
|
||||
* Show error table
|
||||
*/
|
||||
showErrorTable?: boolean;
|
||||
|
||||
/**
|
||||
* Validate current JSON object against the configured JSON schema
|
||||
* Must be implemented by tree mode and text mode
|
||||
*/
|
||||
validate?: () => Promise<any[]>;
|
||||
|
||||
/**
|
||||
* Refresh the rendered contents
|
||||
* Can be implemented by tree mode and text mode
|
||||
*/
|
||||
refresh?: () => void;
|
||||
|
||||
/**
|
||||
* Callback method triggered when schema changes
|
||||
*/
|
||||
_onSchemaChange?: (schema: any, schemaRefs: any) => void;
|
||||
}
|
||||
|
||||
export interface JsonEditorProps {
|
||||
// JSON data to be displayed in the editor
|
||||
value?: any;
|
||||
|
||||
// Callback function triggered when the JSON data changes
|
||||
onChange?: (value: any) => void;
|
||||
|
||||
// Height of the editor
|
||||
height?: string;
|
||||
|
||||
// Additional CSS class names
|
||||
className?: string;
|
||||
|
||||
// Configuration options for the JSONEditor
|
||||
options?: JsonEditorOptions;
|
||||
}
|
||||
@ -1,6 +1,9 @@
|
||||
import { ModelVariableType } from '@/constants/knowledge';
|
||||
import {
|
||||
ModelVariableType,
|
||||
settledModelVariableMap,
|
||||
} from '@/constants/knowledge';
|
||||
import { useTranslate } from '@/hooks/common-hooks';
|
||||
import { camelCase } from 'lodash';
|
||||
import { camelCase, isEqual } from 'lodash';
|
||||
import { useCallback } from 'react';
|
||||
import { useFormContext } from 'react-hook-form';
|
||||
import { z } from 'zod';
|
||||
@ -25,6 +28,13 @@ import { useHandleFreedomChange } from './use-watch-change';
|
||||
interface LlmSettingFieldItemsProps {
|
||||
prefix?: string;
|
||||
options?: any[];
|
||||
showFields?: Array<
|
||||
| 'temperature'
|
||||
| 'top_p'
|
||||
| 'presence_penalty'
|
||||
| 'frequency_penalty'
|
||||
| 'max_tokens'
|
||||
>;
|
||||
}
|
||||
|
||||
export const LLMIdFormField = {
|
||||
@ -56,6 +66,13 @@ export const LlmSettingSchema = {
|
||||
export function LlmSettingFieldItems({
|
||||
prefix,
|
||||
options,
|
||||
showFields = [
|
||||
'temperature',
|
||||
'top_p',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'max_tokens',
|
||||
],
|
||||
}: LlmSettingFieldItemsProps) {
|
||||
const form = useFormContext();
|
||||
const { t } = useTranslate('chat');
|
||||
@ -72,14 +89,53 @@ export function LlmSettingFieldItems({
|
||||
const parameterOptions = Object.values(ModelVariableType).map((x) => ({
|
||||
label: t(camelCase(x)),
|
||||
value: x,
|
||||
}));
|
||||
})) as { label: string; value: ModelVariableType | 'Custom' }[];
|
||||
|
||||
parameterOptions.push({
|
||||
label: t(camelCase('Custom')),
|
||||
value: 'Custom',
|
||||
});
|
||||
const checkParameterIsEqual = () => {
|
||||
const [
|
||||
parameter,
|
||||
topPValue,
|
||||
frequencyPenaltyValue,
|
||||
temperatureValue,
|
||||
presencePenaltyValue,
|
||||
maxTokensValue,
|
||||
] = form.getValues([
|
||||
getFieldWithPrefix('parameter'),
|
||||
getFieldWithPrefix('temperature'),
|
||||
getFieldWithPrefix('top_p'),
|
||||
getFieldWithPrefix('frequency_penalty'),
|
||||
getFieldWithPrefix('presence_penalty'),
|
||||
getFieldWithPrefix('max_tokens'),
|
||||
]);
|
||||
if (parameter && parameter !== 'Custom') {
|
||||
const parameterValue =
|
||||
settledModelVariableMap[parameter as keyof typeof ModelVariableType];
|
||||
const parameterRealValue = {
|
||||
top_p: topPValue,
|
||||
temperature: temperatureValue,
|
||||
frequency_penalty: frequencyPenaltyValue,
|
||||
presence_penalty: presencePenaltyValue,
|
||||
max_tokens: maxTokensValue,
|
||||
};
|
||||
if (!isEqual(parameterValue, parameterRealValue)) {
|
||||
form.setValue(getFieldWithPrefix('parameter'), 'Custom');
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-5">
|
||||
<LLMFormField options={options}></LLMFormField>
|
||||
<LLMFormField
|
||||
options={options}
|
||||
name={getFieldWithPrefix('llm_id')}
|
||||
></LLMFormField>
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={'parameter'}
|
||||
name={getFieldWithPrefix('parameter')}
|
||||
render={({ field }) => (
|
||||
<FormItem className="flex justify-between items-center">
|
||||
<FormLabel className="flex-1">{t('freedom')}</FormLabel>
|
||||
@ -107,45 +163,71 @@ export function LlmSettingFieldItems({
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('temperature')}
|
||||
checkName="temperatureEnabled"
|
||||
label="temperature"
|
||||
max={1}
|
||||
step={0.01}
|
||||
min={0}
|
||||
></SliderInputSwitchFormField>
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('top_p')}
|
||||
checkName="topPEnabled"
|
||||
label="topP"
|
||||
max={1}
|
||||
step={0.01}
|
||||
min={0}
|
||||
></SliderInputSwitchFormField>
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('presence_penalty')}
|
||||
checkName="presencePenaltyEnabled"
|
||||
label="presencePenalty"
|
||||
max={1}
|
||||
step={0.01}
|
||||
min={0}
|
||||
></SliderInputSwitchFormField>
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('frequency_penalty')}
|
||||
checkName="frequencyPenaltyEnabled"
|
||||
label="frequencyPenalty"
|
||||
max={1}
|
||||
step={0.01}
|
||||
min={0}
|
||||
></SliderInputSwitchFormField>
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('max_tokens')}
|
||||
checkName="maxTokensEnabled"
|
||||
label="maxTokens"
|
||||
max={128000}
|
||||
min={0}
|
||||
></SliderInputSwitchFormField>
|
||||
{showFields.some((item) => item === 'temperature') && (
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('temperature')}
|
||||
checkName="temperatureEnabled"
|
||||
label="temperature"
|
||||
max={1}
|
||||
step={0.01}
|
||||
min={0}
|
||||
onChange={() => {
|
||||
checkParameterIsEqual();
|
||||
}}
|
||||
></SliderInputSwitchFormField>
|
||||
)}
|
||||
{showFields.some((item) => item === 'top_p') && (
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('top_p')}
|
||||
checkName="topPEnabled"
|
||||
label="topP"
|
||||
max={1}
|
||||
step={0.01}
|
||||
min={0}
|
||||
onChange={() => {
|
||||
checkParameterIsEqual();
|
||||
}}
|
||||
></SliderInputSwitchFormField>
|
||||
)}
|
||||
{showFields.some((item) => item === 'presence_penalty') && (
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('presence_penalty')}
|
||||
checkName="presencePenaltyEnabled"
|
||||
label="presencePenalty"
|
||||
max={1}
|
||||
step={0.01}
|
||||
min={0}
|
||||
onChange={() => {
|
||||
checkParameterIsEqual();
|
||||
}}
|
||||
></SliderInputSwitchFormField>
|
||||
)}
|
||||
{showFields.some((item) => item === 'frequency_penalty') && (
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('frequency_penalty')}
|
||||
checkName="frequencyPenaltyEnabled"
|
||||
label="frequencyPenalty"
|
||||
max={1}
|
||||
step={0.01}
|
||||
min={0}
|
||||
onChange={() => {
|
||||
checkParameterIsEqual();
|
||||
}}
|
||||
></SliderInputSwitchFormField>
|
||||
)}
|
||||
{showFields.some((item) => item === 'max_tokens') && (
|
||||
<SliderInputSwitchFormField
|
||||
name={getFieldWithPrefix('max_tokens')}
|
||||
checkName="maxTokensEnabled"
|
||||
numberInputClassName="w-20"
|
||||
label="maxTokens"
|
||||
max={128000}
|
||||
min={0}
|
||||
onChange={() => {
|
||||
checkParameterIsEqual();
|
||||
}}
|
||||
></SliderInputSwitchFormField>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@ -22,6 +22,7 @@ type SliderInputSwitchFormFieldProps = {
|
||||
onChange?: (value: number) => void;
|
||||
className?: string;
|
||||
checkName: string;
|
||||
numberInputClassName?: string;
|
||||
};
|
||||
|
||||
export function SliderInputSwitchFormField({
|
||||
@ -34,6 +35,7 @@ export function SliderInputSwitchFormField({
|
||||
onChange,
|
||||
className,
|
||||
checkName,
|
||||
numberInputClassName,
|
||||
}: SliderInputSwitchFormFieldProps) {
|
||||
const form = useFormContext();
|
||||
const disabled = !form.watch(checkName);
|
||||
@ -81,7 +83,10 @@ export function SliderInputSwitchFormField({
|
||||
<FormControl>
|
||||
<NumberInput
|
||||
disabled={disabled}
|
||||
className="h-7 w-20"
|
||||
className={cn(
|
||||
'h-6 w-10 p-1 border border-border-button rounded-sm',
|
||||
numberInputClassName,
|
||||
)}
|
||||
max={max}
|
||||
min={min}
|
||||
step={step}
|
||||
|
||||
@ -14,6 +14,7 @@ type MetadataFilterProps = {
|
||||
export const MetadataFilterSchema = {
|
||||
meta_data_filter: z
|
||||
.object({
|
||||
logic: z.string().optional(),
|
||||
method: z.string().optional(),
|
||||
manual: z
|
||||
.array(
|
||||
|
||||
@ -15,14 +15,17 @@ import {
|
||||
} from '@/components/ui/form';
|
||||
import { Input } from '@/components/ui/input';
|
||||
import { Separator } from '@/components/ui/separator';
|
||||
import { SwitchOperatorOptions } from '@/constants/agent';
|
||||
import { SwitchLogicOperator, SwitchOperatorOptions } from '@/constants/agent';
|
||||
import { useBuildSwitchOperatorOptions } from '@/hooks/logic-hooks/use-build-operator-options';
|
||||
import { useBuildSwitchLogicOperatorOptions } from '@/hooks/logic-hooks/use-build-options';
|
||||
import { useFetchKnowledgeMetadata } from '@/hooks/use-knowledge-request';
|
||||
import { PromptEditor } from '@/pages/agent/form/components/prompt-editor';
|
||||
import { Plus, X } from 'lucide-react';
|
||||
import { useCallback } from 'react';
|
||||
import { useFieldArray, useFormContext } from 'react-hook-form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { RAGFlowFormItem } from '../ragflow-form';
|
||||
import { RAGFlowSelect } from '../ui/select';
|
||||
|
||||
export function MetadataFilterConditions({
|
||||
kbIds,
|
||||
@ -36,10 +39,13 @@ export function MetadataFilterConditions({
|
||||
const { t } = useTranslation();
|
||||
const form = useFormContext();
|
||||
const name = prefix + 'meta_data_filter.manual';
|
||||
const logic = prefix + 'meta_data_filter.logic';
|
||||
const metadata = useFetchKnowledgeMetadata(kbIds);
|
||||
|
||||
const switchOperatorOptions = useBuildSwitchOperatorOptions();
|
||||
|
||||
const switchLogicOperatorOptions = useBuildSwitchLogicOperatorOptions();
|
||||
|
||||
const { fields, remove, append } = useFieldArray({
|
||||
name,
|
||||
control: form.control,
|
||||
@ -47,13 +53,14 @@ export function MetadataFilterConditions({
|
||||
|
||||
const add = useCallback(
|
||||
(key: string) => () => {
|
||||
form.setValue(logic, SwitchLogicOperator.And);
|
||||
append({
|
||||
key,
|
||||
value: '',
|
||||
op: SwitchOperatorOptions[0].value,
|
||||
});
|
||||
},
|
||||
[append],
|
||||
[append, form, logic],
|
||||
);
|
||||
|
||||
return (
|
||||
@ -77,73 +84,92 @@ export function MetadataFilterConditions({
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
</div>
|
||||
<div className="space-y-5">
|
||||
{fields.map((field, index) => {
|
||||
const typeField = `${name}.${index}.key`;
|
||||
return (
|
||||
<div key={field.id} className="flex w-full items-center gap-2">
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={typeField}
|
||||
render={({ field }) => (
|
||||
<FormItem className="flex-1 overflow-hidden">
|
||||
<FormControl>
|
||||
<Input
|
||||
{...field}
|
||||
placeholder={t('common.pleaseInput')}
|
||||
></Input>
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
<Separator className="w-3 text-text-secondary" />
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={`${name}.${index}.op`}
|
||||
render={({ field }) => (
|
||||
<FormItem className="flex-1 overflow-hidden">
|
||||
<FormControl>
|
||||
<SelectWithSearch
|
||||
{...field}
|
||||
options={switchOperatorOptions}
|
||||
></SelectWithSearch>
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
<Separator className="w-3 text-text-secondary" />
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={`${name}.${index}.value`}
|
||||
render={({ field }) => (
|
||||
<FormItem className="flex-1 overflow-hidden">
|
||||
<FormControl>
|
||||
{canReference ? (
|
||||
<PromptEditor
|
||||
{...field}
|
||||
multiLine={false}
|
||||
showToolbar={false}
|
||||
></PromptEditor>
|
||||
) : (
|
||||
<Input
|
||||
placeholder={t('common.pleaseInput')}
|
||||
{...field}
|
||||
/>
|
||||
<section className="flex">
|
||||
{fields.length > 1 && (
|
||||
<div className="relative min-w-14">
|
||||
<RAGFlowFormItem
|
||||
name={logic}
|
||||
className="absolute top-1/2 -translate-y-1/2 right-1 left-0 z-10 bg-bg-base"
|
||||
>
|
||||
<RAGFlowSelect
|
||||
options={switchLogicOperatorOptions}
|
||||
triggerClassName="w-full text-xs px-1 py-0 h-6"
|
||||
></RAGFlowSelect>
|
||||
</RAGFlowFormItem>
|
||||
<div className="absolute border-l border-y w-5 right-0 top-4 bottom-4 rounded-l-lg"></div>
|
||||
</div>
|
||||
)}
|
||||
<div className="space-y-5 flex-1">
|
||||
{fields.map((field, index) => {
|
||||
const typeField = `${name}.${index}.key`;
|
||||
return (
|
||||
<section key={field.id} className="flex gap-2">
|
||||
<div className="w-full space-y-2">
|
||||
<div className="flex items-center gap-1">
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={typeField}
|
||||
render={({ field }) => (
|
||||
<FormItem className="flex-1 overflow-hidden">
|
||||
<FormControl>
|
||||
<Input
|
||||
{...field}
|
||||
placeholder={t('common.pleaseInput')}
|
||||
></Input>
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
<Button variant={'ghost'} onClick={() => remove(index)}>
|
||||
<X className="text-text-sub-title-invert " />
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
/>
|
||||
<Separator className="w-1 text-text-secondary" />
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={`${name}.${index}.op`}
|
||||
render={({ field }) => (
|
||||
<FormItem className="flex-1 overflow-hidden">
|
||||
<FormControl>
|
||||
<SelectWithSearch
|
||||
{...field}
|
||||
options={switchOperatorOptions}
|
||||
></SelectWithSearch>
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
<FormField
|
||||
control={form.control}
|
||||
name={`${name}.${index}.value`}
|
||||
render={({ field }) => (
|
||||
<FormItem className="flex-1 overflow-hidden">
|
||||
<FormControl>
|
||||
{canReference ? (
|
||||
<PromptEditor
|
||||
{...field}
|
||||
multiLine={false}
|
||||
showToolbar={false}
|
||||
></PromptEditor>
|
||||
) : (
|
||||
<Input
|
||||
placeholder={t('common.pleaseInput')}
|
||||
{...field}
|
||||
/>
|
||||
)}
|
||||
</FormControl>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
<Button variant={'ghost'} onClick={() => remove(index)}>
|
||||
<X className="text-text-sub-title-invert " />
|
||||
</Button>
|
||||
</section>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
);
|
||||
}
|
||||
|
||||
@ -32,13 +32,13 @@ const Input = function ({
|
||||
type={type}
|
||||
data-slot="input"
|
||||
className={cn(
|
||||
'border-input file:text-foreground placeholder:text-muted-foreground/70 flex h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-sm shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50',
|
||||
'focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px]',
|
||||
'border-border-button file:text-foreground placeholder:text-text-disabled flex h-9 w-full min-w-0 rounded-md border bg-transparent px-3 py-1 text-sm shadow-xs transition-[color,box-shadow] outline-none file:inline-flex file:h-7 file:border-0 file:bg-transparent file:text-sm file:font-medium disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50',
|
||||
'focus-visible:border-border-button focus-visible:ring-text-primary/50 focus-visible:ring-1',
|
||||
'aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive',
|
||||
type === 'search' &&
|
||||
'[&::-webkit-search-cancel-button]:appearance-none [&::-webkit-search-decoration]:appearance-none [&::-webkit-search-results-button]:appearance-none [&::-webkit-search-results-decoration]:appearance-none',
|
||||
type === 'file' &&
|
||||
'text-muted-foreground/70 file:border-input file:text-foreground p-0 pr-3 italic file:me-3 file:h-full file:border-0 file:border-r file:border-solid file:bg-transparent file:px-3 file:text-sm file:font-medium file:not-italic',
|
||||
'text-text-disabled file:border-input file:text-foreground p-0 pr-3 italic file:me-3 file:h-full file:border-0 file:border-r file:border-solid file:bg-transparent file:px-3 file:text-sm file:font-medium file:not-italic',
|
||||
icon && iconPosition === 'left' && 'pl-7',
|
||||
icon && iconPosition === 'right' && 'pr-7',
|
||||
className,
|
||||
|
||||
@ -24,20 +24,6 @@ export default React.forwardRef<HTMLInputElement, InputProps>(
|
||||
ref={ref}
|
||||
{...props}
|
||||
/>
|
||||
<button
|
||||
className="text-muted-foreground/80 hover:text-foreground focus-visible:border-ring focus-visible:ring-ring/50 absolute inset-y-0 end-0 flex h-full w-9 items-center justify-center rounded-e-md transition-[color,box-shadow] outline-none focus:z-10 focus-visible:ring-[3px] disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-50"
|
||||
type="button"
|
||||
onClick={toggleVisibility}
|
||||
aria-label={isVisible ? 'Hide password' : 'Show password'}
|
||||
aria-pressed={isVisible}
|
||||
aria-controls="password"
|
||||
>
|
||||
{/* {isVisible ? (
|
||||
<EyeOffIcon size={16} aria-hidden="true" />
|
||||
) : (
|
||||
<EyeIcon size={16} aria-hidden="true" />
|
||||
)} */}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
@ -140,7 +140,7 @@ export const SelectWithSearch = forwardRef<
|
||||
ref={ref}
|
||||
disabled={disabled}
|
||||
className={cn(
|
||||
'!bg-bg-input hover:bg-background border-border-button w-full justify-between px-3 font-normal outline-offset-0 outline-none focus-visible:outline-[3px] [&_svg]:pointer-events-auto',
|
||||
'!bg-bg-input hover:bg-background border-border-button w-full justify-between px-3 font-normal outline-offset-0 outline-none focus-visible:outline-[3px] [&_svg]:pointer-events-auto group',
|
||||
triggerClassName,
|
||||
)}
|
||||
>
|
||||
@ -155,12 +155,12 @@ export const SelectWithSearch = forwardRef<
|
||||
{value && allowClear && (
|
||||
<>
|
||||
<XIcon
|
||||
className="h-4 mx-2 cursor-pointer text-text-disabled"
|
||||
className="h-4 mx-2 cursor-pointer text-text-disabled hidden group-hover:block"
|
||||
onClick={handleClear}
|
||||
/>
|
||||
<Separator
|
||||
orientation="vertical"
|
||||
className="flex min-h-6 h-full"
|
||||
className=" min-h-6 h-full hidden group-hover:flex"
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
@ -173,12 +173,17 @@ export const SelectWithSearch = forwardRef<
|
||||
</Button>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent
|
||||
className="border-input w-full min-w-[var(--radix-popper-anchor-width)] p-0"
|
||||
className="border-border-button w-full min-w-[var(--radix-popper-anchor-width)] p-0"
|
||||
align="start"
|
||||
>
|
||||
<Command>
|
||||
<CommandInput placeholder={t('common.search') + '...'} />
|
||||
<CommandList>
|
||||
<Command className="p-5">
|
||||
{options && options.length > 0 && (
|
||||
<CommandInput
|
||||
placeholder={t('common.search') + '...'}
|
||||
className=" placeholder:text-text-disabled"
|
||||
/>
|
||||
)}
|
||||
<CommandList className="mt-2">
|
||||
<CommandEmpty>{t('common.noDataFound')}</CommandEmpty>
|
||||
{options.map((group, idx) => {
|
||||
if (group.options) {
|
||||
@ -209,6 +214,7 @@ export const SelectWithSearch = forwardRef<
|
||||
value={group.value}
|
||||
disabled={group.disabled}
|
||||
onSelect={handleSelect}
|
||||
className="min-h-10"
|
||||
>
|
||||
<span className="leading-none">{group.label}</span>
|
||||
|
||||
|
||||
@ -221,10 +221,12 @@ const RaptorFormFields = ({
|
||||
defaultValue={0}
|
||||
type="number"
|
||||
suffix={
|
||||
<Shuffle
|
||||
className="size-3.5 cursor-pointer"
|
||||
onClick={handleGenerate}
|
||||
/>
|
||||
<div className="w-7 flex justify-center items-center">
|
||||
<Shuffle
|
||||
className="size-3.5 cursor-pointer"
|
||||
onClick={handleGenerate}
|
||||
/>
|
||||
</div>
|
||||
}
|
||||
/>
|
||||
</FormControl>
|
||||
|
||||
@ -59,6 +59,7 @@ interface SimilaritySliderFormFieldProps {
|
||||
similarityName?: string;
|
||||
vectorSimilarityWeightName?: string;
|
||||
isTooltipShown?: boolean;
|
||||
numberInputClassName?: string;
|
||||
}
|
||||
|
||||
export const initialSimilarityThresholdValue = {
|
||||
@ -86,6 +87,7 @@ export function SimilaritySliderFormField({
|
||||
similarityName = 'similarity_threshold',
|
||||
vectorSimilarityWeightName = 'vector_similarity_weight',
|
||||
isTooltipShown,
|
||||
numberInputClassName,
|
||||
}: SimilaritySliderFormFieldProps) {
|
||||
const { t } = useTranslate('knowledgeDetails');
|
||||
const form = useFormContext();
|
||||
@ -101,6 +103,7 @@ export function SimilaritySliderFormField({
|
||||
step={0.01}
|
||||
layout={FormLayout.Vertical}
|
||||
tooltip={isTooltipShown && t('similarityThresholdTip')}
|
||||
numberInputClassName={numberInputClassName}
|
||||
></SliderInputFormField>
|
||||
<FormField
|
||||
control={form.control}
|
||||
@ -124,7 +127,7 @@ export function SimilaritySliderFormField({
|
||||
isVector ? 'vectorSimilarityWeight' : 'keywordSimilarityWeight',
|
||||
)}
|
||||
</FormLabel>
|
||||
<div className={cn('flex items-end gap-14 justify-between')}>
|
||||
<div className={cn('flex items-end gap-4 justify-between')}>
|
||||
<FormControl>
|
||||
<div className="flex flex-col flex-1 gap-2">
|
||||
<div className="flex justify-between items-center">
|
||||
@ -158,6 +161,7 @@ export function SimilaritySliderFormField({
|
||||
className={cn(
|
||||
'h-6 w-10 p-0 text-center bg-bg-input border-border-default border text-text-secondary',
|
||||
'[appearance:textfield] [&::-webkit-outer-spin-button]:appearance-none [&::-webkit-inner-spin-button]:appearance-none',
|
||||
numberInputClassName,
|
||||
)}
|
||||
max={1}
|
||||
min={0}
|
||||
|
||||
@ -25,6 +25,7 @@ type SliderInputFormFieldProps = {
|
||||
tooltip?: ReactNode;
|
||||
defaultValue?: number;
|
||||
className?: string;
|
||||
numberInputClassName?: string;
|
||||
} & FormLayoutType;
|
||||
|
||||
export function SliderInputFormField({
|
||||
@ -36,6 +37,7 @@ export function SliderInputFormField({
|
||||
tooltip,
|
||||
defaultValue,
|
||||
className,
|
||||
numberInputClassName,
|
||||
layout = FormLayout.Horizontal,
|
||||
}: SliderInputFormFieldProps) {
|
||||
const form = useFormContext();
|
||||
@ -61,7 +63,7 @@ export function SliderInputFormField({
|
||||
</FormLabel>
|
||||
<div
|
||||
className={cn(
|
||||
'flex items-center gap-14 justify-between',
|
||||
'flex items-center gap-4 justify-between',
|
||||
{ 'w-3/4': isHorizontal },
|
||||
className,
|
||||
)}
|
||||
@ -82,6 +84,7 @@ export function SliderInputFormField({
|
||||
className={cn(
|
||||
'h-6 w-10 p-0 text-center bg-bg-input border border-border-default text-text-secondary',
|
||||
'[appearance:textfield] [&::-webkit-outer-spin-button]:appearance-none [&::-webkit-inner-spin-button]:appearance-none',
|
||||
numberInputClassName,
|
||||
)}
|
||||
max={max}
|
||||
min={min}
|
||||
|
||||
@ -39,12 +39,15 @@ const CommandInput = React.forwardRef<
|
||||
React.ElementRef<typeof CommandPrimitive.Input>,
|
||||
React.ComponentPropsWithoutRef<typeof CommandPrimitive.Input>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<div className="flex items-center border-b px-3" data-cmdk-input-wrapper="">
|
||||
<div
|
||||
className="flex items-center border rounded-md border-border-default bg-bg-input px-3"
|
||||
data-cmdk-input-wrapper=""
|
||||
>
|
||||
<Search className="mr-2 h-4 w-4 shrink-0 opacity-50" />
|
||||
<CommandPrimitive.Input
|
||||
ref={ref}
|
||||
className={cn(
|
||||
'flex h-11 w-full rounded-md bg-transparent py-3 text-sm outline-none placeholder:text-muted-foreground disabled:cursor-not-allowed disabled:opacity-50',
|
||||
'flex min-h-8 w-full rounded-md bg-transparent py-2 text-sm outline-none placeholder:text-text-secondary disabled:cursor-not-allowed disabled:opacity-50',
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
@ -66,7 +69,10 @@ const CommandList = React.forwardRef<
|
||||
*/
|
||||
<CommandPrimitive.List
|
||||
ref={ref}
|
||||
className={cn('max-h-[300px] overflow-y-auto overflow-x-hidden', className)}
|
||||
className={cn(
|
||||
'max-h-[300px] overflow-y-auto overflow-x-hidden scrollbar-auto',
|
||||
className,
|
||||
)}
|
||||
onWheel={(e) => e.stopPropagation()}
|
||||
onMouseEnter={(e) => e.currentTarget.focus()}
|
||||
tabIndex={-1}
|
||||
@ -96,7 +102,7 @@ const CommandGroup = React.forwardRef<
|
||||
<CommandPrimitive.Group
|
||||
ref={ref}
|
||||
className={cn(
|
||||
'overflow-hidden p-1 text-foreground [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:py-1.5 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium [&_[cmdk-group-heading]]:text-muted-foreground',
|
||||
'overflow-hidden p-1 text-foreground [&_[cmdk-group-heading]]:px-2 [&_[cmdk-group-heading]]:py-1.5 [&_[cmdk-group-heading]]:text-xs [&_[cmdk-group-heading]]:font-medium [&_[cmdk-group-heading]]:text-text-secondary',
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
|
||||
@ -17,7 +17,7 @@ const Divider: React.FC<DividerProps> = ({
|
||||
direction = 'horizontal',
|
||||
type = 'horizontal',
|
||||
text,
|
||||
color = 'border-muted-foreground/50',
|
||||
color = 'border-border-button',
|
||||
margin = 'my-4',
|
||||
className = '',
|
||||
}) => {
|
||||
|
||||
@ -2,8 +2,7 @@ import * as React from 'react';
|
||||
|
||||
import { cn } from '@/lib/utils';
|
||||
import { Eye, EyeOff, Search } from 'lucide-react';
|
||||
import { useState } from 'react';
|
||||
import { Button } from './button';
|
||||
import { useEffect, useMemo, useRef, useState } from 'react';
|
||||
|
||||
export interface InputProps
|
||||
extends Omit<React.InputHTMLAttributes<HTMLInputElement>, 'prefix'> {
|
||||
@ -18,6 +17,20 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
const { defaultValue, ...restProps } = props;
|
||||
const inputValue = isControlled ? value : defaultValue;
|
||||
const [showPassword, setShowPassword] = useState(false);
|
||||
const [prefixWidth, setPrefixWidth] = useState(0);
|
||||
const [suffixWidth, setSuffixWidth] = useState(0);
|
||||
|
||||
const prefixRef = useRef<HTMLSpanElement>(null);
|
||||
const suffixRef = useRef<HTMLSpanElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (prefixRef.current) {
|
||||
setPrefixWidth(prefixRef.current.offsetWidth);
|
||||
}
|
||||
if (suffixRef.current) {
|
||||
setSuffixWidth(suffixRef.current.offsetWidth);
|
||||
}
|
||||
}, [prefix, suffix, prefixRef, suffixRef]);
|
||||
const handleChange: React.ChangeEventHandler<HTMLInputElement> = (e) => {
|
||||
if (type === 'number') {
|
||||
const numValue = e.target.value === '' ? '' : Number(e.target.value);
|
||||
@ -35,40 +48,60 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
|
||||
const isPasswordInput = type === 'password';
|
||||
|
||||
const inputEl = (
|
||||
<input
|
||||
ref={ref}
|
||||
type={isPasswordInput && showPassword ? 'text' : type}
|
||||
className={cn(
|
||||
'peer/input',
|
||||
'flex h-8 w-full rounded-md border-0.5 border-border-button bg-bg-input px-3 py-2 outline-none text-sm text-text-primary',
|
||||
'file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-text-disabled',
|
||||
'focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-accent-primary',
|
||||
'disabled:cursor-not-allowed disabled:opacity-50 transition-colors',
|
||||
{
|
||||
'pl-12': !!prefix,
|
||||
'pr-12': !!suffix || isPasswordInput,
|
||||
'pr-24': !!suffix && isPasswordInput,
|
||||
},
|
||||
className,
|
||||
)}
|
||||
value={inputValue ?? ''}
|
||||
onChange={handleChange}
|
||||
{...restProps}
|
||||
/>
|
||||
const inputEl = useMemo(
|
||||
() => (
|
||||
<input
|
||||
ref={ref}
|
||||
type={isPasswordInput && showPassword ? 'text' : type}
|
||||
className={cn(
|
||||
'peer/input',
|
||||
'flex h-8 w-full rounded-md border-0.5 border-border-button bg-bg-input px-3 py-2 outline-none text-sm text-text-primary',
|
||||
'file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-text-disabled',
|
||||
'focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-accent-primary',
|
||||
'disabled:cursor-not-allowed disabled:opacity-50 transition-colors',
|
||||
type === 'number' &&
|
||||
'[appearance:textfield] [&::-webkit-outer-spin-button]:appearance-none [&::-webkit-inner-spin-button]:appearance-none',
|
||||
className,
|
||||
)}
|
||||
style={{
|
||||
paddingLeft: !!prefix ? `${prefixWidth}px` : '',
|
||||
paddingRight: isPasswordInput
|
||||
? '40px'
|
||||
: !!suffix
|
||||
? `${suffixWidth}px`
|
||||
: '',
|
||||
}}
|
||||
value={inputValue ?? ''}
|
||||
onChange={handleChange}
|
||||
{...restProps}
|
||||
/>
|
||||
),
|
||||
[
|
||||
prefixWidth,
|
||||
suffixWidth,
|
||||
isPasswordInput,
|
||||
inputValue,
|
||||
className,
|
||||
handleChange,
|
||||
restProps,
|
||||
],
|
||||
);
|
||||
|
||||
if (prefix || suffix || isPasswordInput) {
|
||||
return (
|
||||
<div className="relative">
|
||||
{prefix && (
|
||||
<span className="absolute left-0 top-[50%] translate-y-[-50%]">
|
||||
<span
|
||||
ref={prefixRef}
|
||||
className="absolute left-0 top-[50%] translate-y-[-50%]"
|
||||
>
|
||||
{prefix}
|
||||
</span>
|
||||
)}
|
||||
{inputEl}
|
||||
{suffix && (
|
||||
<span
|
||||
ref={suffixRef}
|
||||
className={cn('absolute right-0 top-[50%] translate-y-[-50%]', {
|
||||
'right-14': isPasswordInput,
|
||||
})}
|
||||
@ -77,10 +110,10 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
</span>
|
||||
)}
|
||||
{isPasswordInput && (
|
||||
<Button
|
||||
variant="transparent"
|
||||
<button
|
||||
type="button"
|
||||
className="
|
||||
p-2 text-text-secondary
|
||||
absolute border-0 right-1 top-[50%] translate-y-[-50%]
|
||||
dark:peer-autofill/input:text-text-secondary-inverse
|
||||
dark:peer-autofill/input:hover:text-text-primary-inverse
|
||||
@ -93,7 +126,7 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
|
||||
) : (
|
||||
<Eye className="size-[1em]" />
|
||||
)}
|
||||
</Button>
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
|
||||
@ -27,7 +27,10 @@ export interface ModalProps {
|
||||
okText?: ReactNode | string;
|
||||
onOk?: () => void;
|
||||
onCancel?: () => void;
|
||||
okButtonClassName?: string;
|
||||
cancelButtonClassName?: string;
|
||||
disabled?: boolean;
|
||||
style?: React.CSSProperties;
|
||||
}
|
||||
export interface ModalType extends FC<ModalProps> {
|
||||
show: typeof modalIns.show;
|
||||
@ -56,7 +59,10 @@ const Modal: ModalType = ({
|
||||
confirmLoading,
|
||||
cancelText,
|
||||
okText,
|
||||
okButtonClassName,
|
||||
cancelButtonClassName,
|
||||
disabled = false,
|
||||
style,
|
||||
}) => {
|
||||
const sizeClasses = {
|
||||
small: 'max-w-md',
|
||||
@ -111,7 +117,10 @@ const Modal: ModalType = ({
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleCancel()}
|
||||
className="px-2 py-1 border border-border-button rounded-md hover:bg-bg-card hover:text-text-primary "
|
||||
className={cn(
|
||||
'px-2 py-1 border border-border-button rounded-md hover:bg-bg-card hover:text-text-primary ',
|
||||
cancelButtonClassName,
|
||||
)}
|
||||
>
|
||||
{cancelText ?? t('modal.cancelText')}
|
||||
</button>
|
||||
@ -122,6 +131,7 @@ const Modal: ModalType = ({
|
||||
className={cn(
|
||||
'px-2 py-1 bg-primary text-primary-foreground rounded-md hover:bg-primary/90',
|
||||
{ 'cursor-not-allowed': disabled },
|
||||
okButtonClassName,
|
||||
)}
|
||||
>
|
||||
{confirmLoading && (
|
||||
@ -153,23 +163,26 @@ const Modal: ModalType = ({
|
||||
handleOk,
|
||||
showfooter,
|
||||
footerClassName,
|
||||
okButtonClassName,
|
||||
cancelButtonClassName,
|
||||
]);
|
||||
return (
|
||||
<DialogPrimitive.Root open={open} onOpenChange={handleChange}>
|
||||
<DialogPrimitive.Portal>
|
||||
<DialogPrimitive.Overlay
|
||||
className="fixed inset-0 z-50 bg-colors-background-neutral-weak/50 backdrop-blur-sm flex items-center justify-center p-4"
|
||||
className="fixed inset-0 z-50 bg-bg-card backdrop-blur-sm flex items-center justify-center p-4"
|
||||
onClick={() => maskClosable && onOpenChange?.(false)}
|
||||
>
|
||||
<DialogPrimitive.Content
|
||||
className={`relative w-[700px] ${full ? 'max-w-full' : sizeClasses[size]} ${className} bg-bg-base rounded-lg shadow-lg border border-border-default transition-all focus-visible:!outline-none`}
|
||||
style={style}
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
{/* title */}
|
||||
{(title || closable) && (
|
||||
<div
|
||||
className={cn(
|
||||
'flex items-center px-6 py-4',
|
||||
'flex items-start px-6 py-4',
|
||||
{
|
||||
'justify-end': closable && !title,
|
||||
'justify-between': closable && title,
|
||||
@ -187,7 +200,7 @@ const Modal: ModalType = ({
|
||||
<DialogPrimitive.Close asChild>
|
||||
<button
|
||||
type="button"
|
||||
className="flex h-7 w-7 items-center justify-center rounded-full hover:bg-muted focus-visible:outline-none"
|
||||
className="flex h-7 w-7 items-center justify-center text-text-secondary rounded-full hover:bg-bg-card focus-visible:outline-none"
|
||||
onClick={handleCancel}
|
||||
>
|
||||
{closeIcon}
|
||||
@ -198,7 +211,7 @@ const Modal: ModalType = ({
|
||||
)}
|
||||
|
||||
{/* content */}
|
||||
<div className="py-2 px-6 overflow-y-auto scrollbar-auto max-h-[80vh] focus-visible:!outline-none">
|
||||
<div className="py-2 px-6 overflow-y-auto scrollbar-auto max-h-[calc(100vh-280px)] focus-visible:!outline-none">
|
||||
{destroyOnClose && !open ? null : children}
|
||||
</div>
|
||||
|
||||
|
||||
@ -289,12 +289,12 @@ export const MultiSelect = React.forwardRef<
|
||||
{...props}
|
||||
onClick={handleTogglePopover}
|
||||
className={cn(
|
||||
'flex w-full p-1 rounded-md border min-h-10 h-auto items-center justify-between bg-inherit hover:bg-inherit [&_svg]:pointer-events-auto',
|
||||
'flex w-full p-1 rounded-md border border-border-button min-h-10 h-auto placeholder:text-text-disabled items-center justify-between bg-bg-input hover:bg-bg-input [&_svg]:pointer-events-auto',
|
||||
className,
|
||||
)}
|
||||
>
|
||||
{selectedValues.length > 0 ? (
|
||||
<div className="flex justify-between items-center w-full">
|
||||
<div className="flex justify-between items-center w-full group">
|
||||
<div className="flex flex-wrap items-center">
|
||||
{selectedValues?.slice(0, maxCount)?.map((value) => {
|
||||
const option = flatOptions.find((o) => o.value === value);
|
||||
@ -348,9 +348,9 @@ export const MultiSelect = React.forwardRef<
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center justify-between ">
|
||||
<XIcon
|
||||
className="h-4 mx-2 cursor-pointer text-muted-foreground"
|
||||
className="h-4 mx-2 cursor-pointer text-text-secondary hidden group-hover:block"
|
||||
onClick={(event) => {
|
||||
event.stopPropagation();
|
||||
handleClear();
|
||||
@ -358,17 +358,17 @@ export const MultiSelect = React.forwardRef<
|
||||
/>
|
||||
<Separator
|
||||
orientation="vertical"
|
||||
className="flex min-h-6 h-full"
|
||||
className="min-h-6 h-full hidden group-hover:flex"
|
||||
/>
|
||||
<ChevronDown className="h-4 mx-2 cursor-pointer text-muted-foreground" />
|
||||
<ChevronDown className="h-4 mx-2 cursor-pointer text-text-secondary" />
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center justify-between w-full mx-auto">
|
||||
<span className="text-sm text-muted-foreground mx-3">
|
||||
<span className="text-sm text-text-secondary mx-3">
|
||||
{placeholder}
|
||||
</span>
|
||||
<ChevronDown className="h-4 cursor-pointer text-muted-foreground mx-2" />
|
||||
<ChevronDown className="h-4 cursor-pointer text-text-secondary mx-2" />
|
||||
</div>
|
||||
)}
|
||||
</Button>
|
||||
@ -378,15 +378,17 @@ export const MultiSelect = React.forwardRef<
|
||||
align="start"
|
||||
onEscapeKeyDown={() => setIsPopoverOpen(false)}
|
||||
>
|
||||
<Command>
|
||||
<CommandInput
|
||||
placeholder={t('common.search') + '...'}
|
||||
onKeyDown={handleInputKeyDown}
|
||||
/>
|
||||
<CommandList>
|
||||
<Command className="p-5 pb-8">
|
||||
{options && options.length > 0 && (
|
||||
<CommandInput
|
||||
placeholder={t('common.search') + '...'}
|
||||
onKeyDown={handleInputKeyDown}
|
||||
/>
|
||||
)}
|
||||
<CommandList className="mt-2">
|
||||
<CommandEmpty>No results found.</CommandEmpty>
|
||||
<CommandGroup>
|
||||
{showSelectAll && (
|
||||
{showSelectAll && options && options.length > 0 && (
|
||||
<CommandItem
|
||||
key="all"
|
||||
onSelect={toggleAll}
|
||||
@ -437,9 +439,9 @@ export const MultiSelect = React.forwardRef<
|
||||
})}
|
||||
</CommandGroup>
|
||||
))}
|
||||
<CommandSeparator />
|
||||
<CommandGroup>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className=" absolute bottom-1 left-1 right-1 flex items-center justify-between mx-5 bg-bg-base border-t border-border-button">
|
||||
<CommandSeparator />
|
||||
{selectedValues.length > 0 && (
|
||||
<>
|
||||
<CommandItem
|
||||
@ -454,12 +456,14 @@ export const MultiSelect = React.forwardRef<
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
<CommandItem
|
||||
onSelect={() => setIsPopoverOpen(false)}
|
||||
className="flex-1 justify-center cursor-pointer max-w-full"
|
||||
>
|
||||
{t('common.close')}
|
||||
</CommandItem>
|
||||
{options && options.length > 0 && (
|
||||
<CommandItem
|
||||
onSelect={() => setIsPopoverOpen(false)}
|
||||
className="flex-1 justify-center cursor-pointer max-w-full"
|
||||
>
|
||||
{t('common.close')}
|
||||
</CommandItem>
|
||||
)}
|
||||
</div>
|
||||
</CommandGroup>
|
||||
</CommandList>
|
||||
|
||||
@ -59,7 +59,7 @@ const SelectScrollUpButton = React.forwardRef<
|
||||
<SelectPrimitive.ScrollUpButton
|
||||
ref={ref}
|
||||
className={cn(
|
||||
'flex cursor-default items-center justify-center py-1',
|
||||
'flex cursor-default items-center justify-center py-1 text-text-secondary hover:text-text-primary',
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
@ -76,7 +76,7 @@ const SelectScrollDownButton = React.forwardRef<
|
||||
<SelectPrimitive.ScrollDownButton
|
||||
ref={ref}
|
||||
className={cn(
|
||||
'flex cursor-default items-center justify-center py-1',
|
||||
'flex cursor-default items-center justify-center py-1 text-text-secondary hover:text-text-primary',
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
|
||||
@ -54,7 +54,7 @@ const Textarea = forwardRef<HTMLTextAreaElement, TextareaProps>(
|
||||
return (
|
||||
<textarea
|
||||
className={cn(
|
||||
'flex min-h-[80px] w-full bg-bg-input rounded-md border border-input px-3 py-2 text-base ring-offset-background placeholder:text-text-disabled focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-accent-primary focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm overflow-hidden',
|
||||
'flex min-h-[80px] w-full bg-bg-input rounded-md border border-border-button px-3 py-2 text-base ring-offset-background placeholder:text-text-disabled focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-accent-primary focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm overflow-hidden',
|
||||
className,
|
||||
)}
|
||||
rows={autoSize?.minRows ?? props.rows ?? undefined}
|
||||
|
||||
@ -20,7 +20,7 @@ const TooltipContent = React.forwardRef<
|
||||
ref={ref}
|
||||
sideOffset={sideOffset}
|
||||
className={cn(
|
||||
'z-50 overflow-auto scrollbar-auto rounded-md whitespace-pre-wrap border bg-popover px-3 py-1.5 text-sm text-popover-foreground shadow-md animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 max-w-[30vw]',
|
||||
'z-50 overflow-auto scrollbar-auto rounded-md whitespace-pre-wrap border bg-bg-base px-3 py-1.5 text-sm text-text-primary shadow-md animate-in fade-in-0 zoom-in-95 data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=closed]:zoom-out-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 max-w-[30vw]',
|
||||
className,
|
||||
)}
|
||||
{...props}
|
||||
@ -39,7 +39,7 @@ export const FormTooltip = ({ tooltip }: { tooltip: React.ReactNode }) => {
|
||||
e.preventDefault(); // Prevent clicking the tooltip from triggering form save
|
||||
}}
|
||||
>
|
||||
<CircleQuestionMark className="size-3 ml-2" />
|
||||
<CircleQuestionMark className="size-3 ml-[2px] -translate-y-1" />
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>{tooltip}</TooltipContent>
|
||||
</Tooltip>
|
||||
|
||||
@ -179,3 +179,8 @@ export enum JsonSchemaDataType {
|
||||
Array = 'array',
|
||||
Object = 'object',
|
||||
}
|
||||
|
||||
export enum SwitchLogicOperator {
|
||||
And = 'and',
|
||||
Or = 'or',
|
||||
}
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import { ExclamationCircleFilled } from '@ant-design/icons';
|
||||
import { App } from 'antd';
|
||||
import { Modal } from '@/components/ui/modal/modal';
|
||||
import isEqual from 'lodash/isEqual';
|
||||
import { ReactNode, useCallback, useEffect, useRef, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
@ -84,20 +83,27 @@ interface IProps {
|
||||
}
|
||||
|
||||
export const useShowDeleteConfirm = () => {
|
||||
const { modal } = App.useApp();
|
||||
const { t } = useTranslation();
|
||||
|
||||
const showDeleteConfirm = useCallback(
|
||||
({ title, content, onOk, onCancel }: IProps): Promise<number> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
modal.confirm({
|
||||
Modal.show({
|
||||
title: title ?? t('common.deleteModalTitle'),
|
||||
icon: <ExclamationCircleFilled />,
|
||||
content,
|
||||
visible: true,
|
||||
onVisibleChange: () => {
|
||||
Modal.hide();
|
||||
},
|
||||
footer: null,
|
||||
closable: true,
|
||||
maskClosable: false,
|
||||
okText: t('common.yes'),
|
||||
okType: 'danger',
|
||||
cancelText: t('common.no'),
|
||||
async onOk() {
|
||||
style: {
|
||||
width: '400px',
|
||||
},
|
||||
okButtonClassName:
|
||||
'bg-state-error text-white hover:bg-state-error hover:text-white',
|
||||
onOk: async () => {
|
||||
try {
|
||||
const ret = await onOk?.();
|
||||
resolve(ret);
|
||||
@ -106,13 +112,15 @@ export const useShowDeleteConfirm = () => {
|
||||
reject(error);
|
||||
}
|
||||
},
|
||||
onCancel() {
|
||||
onCancel: () => {
|
||||
onCancel?.();
|
||||
Modal.hide();
|
||||
},
|
||||
children: content,
|
||||
});
|
||||
});
|
||||
},
|
||||
[t, modal],
|
||||
[t],
|
||||
);
|
||||
|
||||
return showDeleteConfirm;
|
||||
|
||||
12
web/src/hooks/logic-hooks/use-build-options.ts
Normal file
12
web/src/hooks/logic-hooks/use-build-options.ts
Normal file
@ -0,0 +1,12 @@
|
||||
import { SwitchLogicOperator } from '@/constants/agent';
|
||||
import { buildOptions } from '@/utils/form';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
export function useBuildSwitchLogicOperatorOptions() {
|
||||
const { t } = useTranslation();
|
||||
return buildOptions(
|
||||
SwitchLogicOperator,
|
||||
t,
|
||||
'flow.switchLogicOperatorOptions',
|
||||
);
|
||||
}
|
||||
@ -25,6 +25,7 @@ export default {
|
||||
portugueseBr: 'Portuguese (Brazil)',
|
||||
chinese: 'Simplified Chinese',
|
||||
traditionalChinese: 'Traditional Chinese',
|
||||
russian: 'Russian',
|
||||
language: 'Language',
|
||||
languageMessage: 'Please input your language!',
|
||||
languagePlaceholder: 'select your language',
|
||||
@ -695,6 +696,8 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
|
||||
tocEnhanceTip: ` During the parsing of the document, table of contents information was generated (see the 'Enable Table of Contents Extraction' option in the General method). This allows the large model to return table of contents items relevant to the user's query, thereby using these items to retrieve related chunks and apply weighting to these chunks during the sorting process. This approach is derived from mimicking the behavioral logic of how humans search for knowledge in books.`,
|
||||
},
|
||||
setting: {
|
||||
seconds: 'seconds',
|
||||
minutes: 'minutes',
|
||||
edit: 'Edit',
|
||||
cropTip:
|
||||
'Drag the selection area to choose the cropping position of the image, and scroll to zoom in/out',
|
||||
@ -733,9 +736,15 @@ Example: https://fsn1.your-objectstorage.com`,
|
||||
google_drivePrimaryAdminTip:
|
||||
'Email address that has access to the Drive content being synced.',
|
||||
google_driveMyDriveEmailsTip:
|
||||
'Comma-separated emails whose “My Drive” contents should be indexed (include the primary admin).',
|
||||
'Comma-separated emails whose "My Drive" contents should be indexed (include the primary admin).',
|
||||
google_driveSharedFoldersTip:
|
||||
'Comma-separated Google Drive folder links to crawl.',
|
||||
moodleDescription:
|
||||
'Connect to your Moodle LMS to sync course content, forums, and resources.',
|
||||
moodleUrlTip:
|
||||
'The base URL of your Moodle instance (e.g., https://moodle.university.edu). Do not include /webservice or /login.',
|
||||
moodleTokenTip:
|
||||
'Generate a web service token in Moodle: Go to Site administration → Server → Web services → Manage tokens. The user must be enrolled in the courses you want to sync.',
|
||||
jiraDescription:
|
||||
'Connect your Jira workspace to sync issues, comments, and attachments.',
|
||||
jiraBaseUrlTip:
|
||||
@ -1043,7 +1052,7 @@ Example: https://fsn1.your-objectstorage.com`,
|
||||
downloadFileType: 'Download file type',
|
||||
formatTypeError: 'Format or type error',
|
||||
variableNameMessage:
|
||||
'Variable name can only contain letters and underscores',
|
||||
'Variable name can only contain letters and underscores and numbers',
|
||||
variableDescription: 'Variable Description',
|
||||
defaultValue: 'Default Value',
|
||||
conversationVariable: 'Conversation variable',
|
||||
@ -1752,6 +1761,8 @@ The variable aggregation node (originally the variable assignment node) is a cru
|
||||
The Indexer will store the content in the corresponding data structures for the selected methods.`,
|
||||
// file: 'File',
|
||||
parserMethod: 'PDF parser',
|
||||
tableResultType: 'Table Result Type',
|
||||
markdownImageResponseType: 'Markdown Image Response Type',
|
||||
// systemPrompt: 'System Prompt',
|
||||
systemPromptPlaceholder:
|
||||
'Enter system prompt for image analysis, if empty the system default value will be used',
|
||||
@ -1934,6 +1945,7 @@ Important structured information may include: names, dates, locations, events, k
|
||||
japanese: 'Japanese',
|
||||
korean: 'Korean',
|
||||
vietnamese: 'Vietnamese',
|
||||
russian: 'Russian',
|
||||
},
|
||||
pagination: {
|
||||
total: 'Total {{total}}',
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -685,6 +685,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
tocEnhanceTip: `解析文档时生成了目录信息(见General方法的‘启用目录抽取’),让大模型返回和用户问题相关的目录项,从而利用目录项拿到相关chunk,对这些chunk在排序中进行加权。这种方法来源于模仿人类查询书本中知识的行为逻辑`,
|
||||
},
|
||||
setting: {
|
||||
seconds: '秒',
|
||||
minutes: '分',
|
||||
edit: '编辑',
|
||||
cropTip: '拖动选区可以选择要图片的裁剪位置,滚动可以放大/缩小选区',
|
||||
cropImage: '剪裁图片',
|
||||
@ -978,7 +980,7 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
downloadFileTypeTip: '文件下载的类型',
|
||||
downloadFileType: '文件类型',
|
||||
formatTypeError: '格式或类型错误',
|
||||
variableNameMessage: '名称只能包含字母和下划线',
|
||||
variableNameMessage: '名称只能包含字母,数字和下划线',
|
||||
variableDescription: '变量的描述',
|
||||
defaultValue: '默认值',
|
||||
conversationVariable: '会话变量',
|
||||
@ -1629,6 +1631,8 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
||||
filenameEmbdWeight: '文件名嵌入权重',
|
||||
parserMethod: '解析方法',
|
||||
tableResultType: '表格返回形式',
|
||||
markdownImageResponseType: '图片返回形式',
|
||||
systemPromptPlaceholder:
|
||||
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
|
||||
exportJson: '导出 JSON',
|
||||
|
||||
@ -25,8 +25,8 @@ const ThemeSwitch = forwardRef<
|
||||
setTheme(value ? ThemeEnum.Dark : ThemeEnum.Light)
|
||||
}
|
||||
>
|
||||
<div className="self-center p-3 py-2 rounded-full bg-bg-card transition-[background-color] duration-300">
|
||||
<div className="h-full flex items-center justify-between gap-4 relative z-[1] text-text-disabled transition-[text-color] duration-300 delay-75">
|
||||
<div className="self-center p-3 py-2 rounded-full bg-bg-card transition-[background-color]">
|
||||
<div className="h-full flex items-center justify-between gap-4 relative z-[1] text-text-disabled transition-[text-color]">
|
||||
<LucideSun
|
||||
className={cn('size-[1em]', !isDark && 'text-text-primary')}
|
||||
/>
|
||||
@ -39,7 +39,7 @@ const ThemeSwitch = forwardRef<
|
||||
<Thumb
|
||||
className={cn(
|
||||
'absolute top-0 left-0 w-[calc(50%+.25rem)] p-0.5 h-full rounded-full overflow-hidden',
|
||||
'transition-all ease-out duration-300',
|
||||
'transition-all ease-out',
|
||||
'group-hover/theme-switch:w-[calc(50%+.66rem)] group-focus-visible/theme-switch:w-[calc(50%+.66rem)]',
|
||||
{
|
||||
'left-[calc(50%-.25rem)] group-hover/theme-switch:left-[calc(50%-.66rem)] group-focus-visible/theme-switch:left-[calc(50%-.66rem)]':
|
||||
@ -47,12 +47,7 @@ const ThemeSwitch = forwardRef<
|
||||
},
|
||||
)}
|
||||
>
|
||||
<div
|
||||
className="
|
||||
size-full rounded-full bg-bg-base shadow-md
|
||||
transition-colors ease-out duration-300 delay-75
|
||||
"
|
||||
/>
|
||||
<div className="size-full rounded-full bg-bg-base shadow-md transition-colors ease-out" />
|
||||
</Thumb>
|
||||
</Root>
|
||||
);
|
||||
|
||||
@ -8,4 +8,7 @@
|
||||
border: 0;
|
||||
background-color: transparent;
|
||||
}
|
||||
:global(.react-flow__node-group.selectable.selected) {
|
||||
box-shadow: none;
|
||||
}
|
||||
}
|
||||
|
||||
@ -8,7 +8,6 @@ import { memo } from 'react';
|
||||
import { NodeHandleId, Operator } from '../../constant';
|
||||
import OperatorIcon from '../../operator-icon';
|
||||
import { CommonHandle, LeftEndHandle } from './handle';
|
||||
import styles from './index.less';
|
||||
import NodeHeader from './node-header';
|
||||
import { NodeWrapper } from './node-wrapper';
|
||||
import { ResizeIcon, controlStyle } from './resize-icon';
|
||||
@ -23,9 +22,12 @@ export function InnerIterationNode({
|
||||
return (
|
||||
<ToolBar selected={selected} id={id} label={data.label} showRun={false}>
|
||||
<section
|
||||
className={cn('h-full bg-transparent rounded-b-md group', {
|
||||
[styles.selectedHeader]: selected,
|
||||
})}
|
||||
className={cn(
|
||||
'h-full bg-transparent rounded-b-md group border border-border-button border-t-0',
|
||||
{
|
||||
['border-x border-accent-primary']: selected,
|
||||
},
|
||||
)}
|
||||
>
|
||||
<NodeResizeControl style={controlStyle} minWidth={100} minHeight={50}>
|
||||
<ResizeIcon />
|
||||
@ -43,9 +45,9 @@ export function InnerIterationNode({
|
||||
name={data.name}
|
||||
label={data.label}
|
||||
wrapperClassName={cn(
|
||||
'bg-background-header-bar p-2 rounded-t-[10px] absolute w-full top-[-44px] left-[-0.3px]',
|
||||
'bg-background-header-bar p-2 rounded-t-[10px] absolute w-full top-[-38px] left-[-0.3px] border-x border-t border-border-button',
|
||||
{
|
||||
[styles.selectedHeader]: selected,
|
||||
['border-x border-t border-accent-primary']: selected,
|
||||
},
|
||||
)}
|
||||
></NodeHeader>
|
||||
|
||||
@ -10,6 +10,7 @@ import {
|
||||
JsonSchemaDataType,
|
||||
Operator,
|
||||
ProgrammingLanguage,
|
||||
SwitchLogicOperator,
|
||||
SwitchOperatorOptions,
|
||||
initialLlmBaseValues,
|
||||
} from '@/constants/agent';
|
||||
@ -51,8 +52,6 @@ import {
|
||||
|
||||
export const BeginId = 'begin';
|
||||
|
||||
export const SwitchLogicOperatorOptions = ['and', 'or'];
|
||||
|
||||
export const CommonOperatorList = Object.values(Operator).filter(
|
||||
(x) => x !== Operator.Note,
|
||||
);
|
||||
@ -308,7 +307,7 @@ export const initialExeSqlValues = {
|
||||
export const initialSwitchValues = {
|
||||
conditions: [
|
||||
{
|
||||
logical_operator: SwitchLogicOperatorOptions[0],
|
||||
logical_operator: SwitchLogicOperator.And,
|
||||
items: [
|
||||
{
|
||||
operator: SwitchOperatorOptions[0].value,
|
||||
|
||||
@ -169,6 +169,7 @@ export const initialParserValues = {
|
||||
{
|
||||
fileFormat: FileType.Spreadsheet,
|
||||
output_format: SpreadsheetOutputFormat.Html,
|
||||
parse_method: ParseDocumentType.DeepDOC,
|
||||
},
|
||||
{
|
||||
fileFormat: FileType.Image,
|
||||
@ -192,6 +193,7 @@ export const initialParserValues = {
|
||||
{
|
||||
fileFormat: FileType.PowerPoint,
|
||||
output_format: PptOutputFormat.Json,
|
||||
parse_method: ParseDocumentType.DeepDOC,
|
||||
},
|
||||
],
|
||||
};
|
||||
@ -243,7 +245,7 @@ export const FileTypeSuffixMap = {
|
||||
[FileType.Email]: ['eml', 'msg'],
|
||||
[FileType.TextMarkdown]: ['md', 'markdown', 'mdx', 'txt'],
|
||||
[FileType.Docx]: ['doc', 'docx'],
|
||||
[FileType.PowerPoint]: ['pptx'],
|
||||
[FileType.PowerPoint]: ['pptx', 'ppt'],
|
||||
[FileType.Video]: ['mp4', 'avi', 'mkv'],
|
||||
[FileType.Audio]: [
|
||||
'da',
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user