Compare commits

...

5 Commits

Author SHA1 Message Date
14273b4595 Fix: Optimized knowledge base file parsing and display #9869 (#10292)
### What problem does this PR solve?

Fix: Optimized knowledge base file parsing and display #9869

- Optimized the ChunkMethodDialog component logic and adjusted
FormSchema validation rules
- Updated the document information interface definition, adding
pipeline_id, pipeline_name, and suffix fields
- Refactored the ChunkResultBar component, removing filter-related logic
and simplifying the input box and chunk creation functionality
- Improved FormatPreserveEditor to support text mode switching
(full/omitted) display control
- Updated timeline node titles to more accurate semantic descriptions
(e.g., character splitters)
- Optimized the data flow result page structure and style, dynamically
adjusting height and content display
- Fixed the table sorting function on the dataset overview page and
enhanced the display of task type icons and status mapping.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-09-25 19:53:49 +08:00
abe7132630 Feat: Change the corresponding prompt word according to the value of fieldName #9869 (#10291)
### What problem does this PR solve?

Feat: Change the corresponding prompt word according to the value of
fieldName #9869
### Type of change


- [x] New Feature (non-breaking change which adds functionality)
2025-09-25 19:53:37 +08:00
c1151519a0 Feat: add foundational support for RAPTOR dataset pipeline logs (#10277)
### What problem does this PR solve?

Add foundational support for RAPTOR dataset pipeline logs.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-09-25 16:46:24 +08:00
a1147ce609 Feat: Allows the extractor operator's prompt to reference the output of an upstream operator #9869 (#10279)
### What problem does this PR solve?

Feat: Allows the extractor operator's prompt to reference the output of
an upstream operator #9869

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
2025-09-25 15:24:24 +08:00
d907e79893 Refa: fake doc ID. (#10276)
### What problem does this PR solve?
#10273
### Type of change

- [x] Refactoring
2025-09-25 13:52:50 +08:00
47 changed files with 1196 additions and 553 deletions

View File

@ -24,7 +24,7 @@ from api.db.services.document_service import DocumentService, queue_raptor_o_gra
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
from api.db.services.task_service import TaskService
from api.db.services.task_service import TaskService, GRAPH_RAPTOR_FAKE_DOC_ID
from api.db.services.user_service import TenantService, UserTenantService
from api.utils.api_utils import get_error_data_result, server_error_response, get_data_error_result, validate_request, not_allowed_parameters
from api.utils import get_uuid
@ -533,10 +533,6 @@ def run_graphrag():
if not kb_id:
return get_error_data_result(message='Lack of "KB ID"')
doc_ids = req.get("doc_ids", [])
if not doc_ids:
return get_error_data_result(message="Need to specify document IDs to run Graph RAG")
ok, kb = KnowledgebaseService.get_by_id(kb_id)
if not ok:
return get_error_data_result(message="Invalid Knowledgebase ID")
@ -547,18 +543,31 @@ def run_graphrag():
logging.warning(f"A valid GraphRAG task id is expected for kb {kb_id}")
if task and task.progress not in [-1, 1]:
return get_error_data_result(message=f"Task in progress with status {task.progress}. A Graph Task is already running.")
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A Graph Task is already running.")
document_ids = set()
document_ids = []
sample_document = {}
for doc_id in doc_ids:
ok, document = DocumentService.get_by_id(doc_id)
if ok:
document_ids.add(document.id)
if not sample_document:
sample_document = document.to_dict()
task_id = queue_raptor_o_graphrag_tasks(doc=sample_document, ty="graphrag", priority=0, fake_doc_id="x", doc_ids=list(document_ids))
documents, _ = DocumentService.get_by_kb_id(
kb_id=kb_id,
page_number=0,
items_per_page=0,
orderby="create_time",
desc=False,
keywords="",
run_status=[],
types=[],
suffix=[],
)
for document in documents:
if not sample_document and document["parser_config"].get("graphrag", {}).get("use_graphrag", False):
sample_document = document
document_ids.insert(0, document["id"])
else:
document_ids.append(document["id"])
task_id = queue_raptor_o_graphrag_tasks(doc=sample_document, ty="graphrag", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
if not KnowledgebaseService.update_by_id(kb.id, {"graphrag_task_id": task_id}):
logging.warning(f"Cannot save graphrag_task_id for kb {kb_id}")
@ -584,6 +593,80 @@ def trace_graphrag():
ok, task = TaskService.get_by_id(task_id)
if not ok:
return get_json_result(data=False, message="GraphRAG Task Not Found or Error Occurred", code=settings.RetCode.ARGUMENT_ERROR)
return get_error_data_result(message="GraphRAG Task Not Found or Error Occurred")
return get_json_result(data=task.to_dict())
@manager.route("/run_raptor", methods=["POST"]) # noqa: F821
@login_required
def run_raptor():
req = request.json
kb_id = req.get("kb_id", "")
if not kb_id:
return get_error_data_result(message='Lack of "KB ID"')
ok, kb = KnowledgebaseService.get_by_id(kb_id)
if not ok:
return get_error_data_result(message="Invalid Knowledgebase ID")
task_id = kb.raptor_task_id
ok, task = TaskService.get_by_id(task_id)
if not ok:
logging.warning(f"A valid RAPTOR task id is expected for kb {kb_id}")
if task and task.progress not in [-1, 1]:
return get_error_data_result(message=f"Task {task_id} in progress with status {task.progress}. A RAPTOR Task is already running.")
document_ids = []
sample_document = {}
documents, _ = DocumentService.get_by_kb_id(
kb_id=kb_id,
page_number=0,
items_per_page=0,
orderby="create_time",
desc=False,
keywords="",
run_status=[],
types=[],
suffix=[],
)
for document in documents:
if not sample_document:
sample_document = document
document_ids.insert(0, document["id"])
else:
document_ids.append(document["id"])
task_id = queue_raptor_o_graphrag_tasks(doc=sample_document, ty="raptor", priority=0, fake_doc_id=GRAPH_RAPTOR_FAKE_DOC_ID, doc_ids=list(document_ids))
if not KnowledgebaseService.update_by_id(kb.id, {"raptor_task_id": task_id}):
logging.warning(f"Cannot save raptor_task_id for kb {kb_id}")
return get_json_result(data={"raptor_task_id": task_id})
@manager.route("/trace_raptor", methods=["GET"]) # noqa: F821
@login_required
def trace_raptor():
kb_id = request.args.get("kb_id", "")
if not kb_id:
return get_error_data_result(message='Lack of "KB ID"')
ok, kb = KnowledgebaseService.get_by_id(kb_id)
if not ok:
return get_error_data_result(message="Invalid Knowledgebase ID")
task_id = kb.raptor_task_id
if not task_id:
return get_error_data_result(message="RAPTOR Task ID Not Found")
ok, task = TaskService.get_by_id(task_id)
if not ok:
return get_error_data_result(message="RAPTOR Task Not Found or Error Occurred")
return get_json_result(data=task.to_dict())

View File

@ -651,6 +651,7 @@ class Knowledgebase(DataBaseModel):
pagerank = IntegerField(default=0, index=False)
graphrag_task_id = CharField(max_length=32, null=True, help_text="Graph RAG task ID", index=True)
raptor_task_id = CharField(max_length=32, null=True, help_text="RAPTOR task ID", index=True)
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
@ -1079,4 +1080,8 @@ def migrate_db():
migrate(migrator.add_column("knowledgebase", "graphrag_task_id", CharField(max_length=32, null=True, help_text="Gragh RAG task ID", index=True)))
except Exception:
pass
try:
migrate(migrator.add_column("knowledgebase", "raptor_task_id", CharField(max_length=32, null=True, help_text="RAPTOR task ID", index=True)))
except Exception:
pass
logging.disable(logging.NOTSET)

View File

@ -121,7 +121,7 @@ class DocumentService(CommonService):
orderby, desc, keywords, run_status, types, suffix):
fields = cls.get_cls_model_fields()
if keywords:
docs = cls.model.select(*[*fields, UserCanvas.title])\
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name")])\
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
.join(File, on=(File.id == File2Document.file_id))\
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
@ -130,7 +130,7 @@ class DocumentService(CommonService):
(fn.LOWER(cls.model.name).contains(keywords.lower()))
)
else:
docs = cls.model.select(*[*fields, UserCanvas.title])\
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name")])\
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
.join(File, on=(File.id == File2Document.file_id))\
@ -342,8 +342,7 @@ class DocumentService(CommonService):
process_duration=cls.model.process_duration + duration).where(
cls.model.id == doc_id).execute()
if num == 0:
raise LookupError(
"Document not found which is supposed to be there")
logging.warning("Document not found which is supposed to be there")
num = Knowledgebase.update(
token_num=Knowledgebase.token_num +
token_num,
@ -781,8 +780,9 @@ def queue_raptor_o_graphrag_tasks(doc, ty, priority, fake_doc_id="", doc_ids=[])
task["digest"] = hasher.hexdigest()
bulk_insert_into_db(Task, [task], True)
if ty == "graphrag":
if ty in ["graphrag", "raptor"]:
task["doc_ids"] = doc_ids
DocumentService.begin2parse(doc["id"])
assert REDIS_CONN.queue_product(get_svr_queue_name(priority), message=task), "Can't access Redis. Please check the Redis' status."
return task["id"]

View File

@ -25,6 +25,7 @@ from api.db.services.canvas_service import UserCanvasService
from api.db.services.common_service import CommonService
from api.db.services.document_service import DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID
from api.utils import current_timestamp, datetime_format, get_uuid
@ -88,7 +89,7 @@ class PipelineOperationLogService(CommonService):
dsl = ""
referred_document_id = document_id
if referred_document_id == "x" and fake_document_ids:
if referred_document_id == GRAPH_RAPTOR_FAKE_DOC_ID and fake_document_ids:
referred_document_id = fake_document_ids[0]
ok, document = DocumentService.get_by_id(referred_document_id)
if not ok:
@ -128,7 +129,7 @@ class PipelineOperationLogService(CommonService):
log = dict(
id=get_uuid(),
document_id=document_id, # "x" or real document_id
document_id=document_id, # GRAPH_RAPTOR_FAKE_DOC_ID or real document_id
tenant_id=tenant_id,
kb_id=document.kb_id,
pipeline_id=pipeline_id,
@ -168,7 +169,7 @@ class PipelineOperationLogService(CommonService):
else:
logs = cls.model.select(*fields).where(cls.model.kb_id == kb_id)
logs = logs.where(cls.model.document_id != "x")
logs = logs.where(cls.model.document_id != GRAPH_RAPTOR_FAKE_DOC_ID)
if operation_status:
logs = logs.where(cls.model.operation_status.in_(operation_status))
@ -206,7 +207,7 @@ class PipelineOperationLogService(CommonService):
@DB.connection_context()
def get_dataset_logs_by_kb_id(cls, kb_id, page_number, items_per_page, orderby, desc, operation_status):
fields = cls.get_dataset_logs_fields()
logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (cls.model.document_id == "x"))
logs = cls.model.select(*fields).where((cls.model.kb_id == kb_id), (cls.model.document_id == GRAPH_RAPTOR_FAKE_DOC_ID))
if operation_status:
logs = logs.where(cls.model.operation_status.in_(operation_status))

View File

@ -36,6 +36,7 @@ from api import settings
from rag.nlp import search
CANVAS_DEBUG_DOC_ID = "dataflow_x"
GRAPH_RAPTOR_FAKE_DOC_ID = "graph_raptor_x"
def trim_header_by_lines(text: str, max_length) -> str:
# Trim header text to maximum length while preserving line breaks

View File

@ -679,7 +679,9 @@ TimeoutException = Union[Type[BaseException], BaseException]
OnTimeoutCallback = Union[Callable[..., Any], Coroutine[Any, Any, Any]]
def timeout(seconds: float | int = None, attempts: int = 2, *, exception: Optional[TimeoutException] = None, on_timeout: Optional[OnTimeoutCallback] = None):
def timeout(seconds: float | int | str = None, attempts: int = 2, *, exception: Optional[TimeoutException] = None, on_timeout: Optional[OnTimeoutCallback] = None):
if isinstance(seconds, str):
seconds = float(seconds)
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):

View File

@ -50,7 +50,7 @@ from peewee import DoesNotExist
from api.db import LLMType, ParserType, PipelineTaskType
from api.db.services.document_service import DocumentService
from api.db.services.llm_service import LLMBundle
from api.db.services.task_service import TaskService, has_canceled, CANVAS_DEBUG_DOC_ID
from api.db.services.task_service import TaskService, has_canceled, CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID
from api.db.services.file2document_service import File2DocumentService
from api import settings
from api.versions import get_ragflow_version
@ -222,9 +222,9 @@ async def collect():
return None, None
canceled = False
if msg.get("doc_id", "") == "x":
if msg.get("doc_id", "") == GRAPH_RAPTOR_FAKE_DOC_ID:
task = msg
if task["task_type"] == "graphrag" and msg.get("doc_ids", []):
if task["task_type"] in ["graphrag", "raptor"] and msg.get("doc_ids", []):
print(f"hack {msg['doc_ids']=}=",flush=True)
task = TaskService.get_task(msg["id"], msg["doc_ids"])
task["doc_ids"] = msg["doc_ids"]
@ -537,8 +537,25 @@ async def run_dataflow(task: dict):
v = vects[i].tolist()
ck["q_%d_vec" % len(v)] = v
metadata = {}
def dict_update(meta):
nonlocal metadata
if not meta or not isinstance(meta, dict):
return
for k,v in meta.items():
if k not in metadata:
metadata[k] = v
continue
if isinstance(metadata[k], list):
if isinstance(v, list):
metadata[k].extend(v)
else:
metadata[k].append(v)
else:
metadata[k] = v
for ck in chunks:
ck["doc_id"] = task["doc_id"]
ck["doc_id"] = doc_id
ck["kb_id"] = [str(task["kb_id"])]
ck["docnm_kwd"] = task["name"]
ck["create_time"] = str(datetime.now()).replace("T", " ")[:19]
@ -550,8 +567,19 @@ async def run_dataflow(task: dict):
del ck["keywords"]
if "summary" in ck:
del ck["summary"]
if "metadata" in ck:
dict_update(ck["metadata"])
del ck["metadata"]
del ck["text"]
if metadata:
e, doc = DocumentService.get_by_id(doc_id)
if e:
if isinstance(doc.meta_fields, str):
doc.meta_fields = json.loads(doc.meta_fields)
dict_update(doc.meta_fields)
DocumentService.update_by_id(doc_id, {"meta_fields": metadata})
start_ts = timer()
set_progress(task_id, prog=0.82, msg="Start to index...")
e = await insert_es(task_id, task["tenant_id"], task["kb_id"], chunks, partial(set_progress, task_id, 0, 100000000))
@ -562,8 +590,7 @@ async def run_dataflow(task: dict):
time_cost = timer() - start_ts
task_time_cost = timer() - task_start_ts
set_progress(task_id, prog=1., msg="Indexing done ({:.2f}s). Task done ({:.2f}s)".format(time_cost, task_time_cost))
logging.info(
"[Done], chunks({}), token({}), elapsed:{:.2f}".format(len(chunks), embedding_token_consumption, task_time_cost))
logging.info("[Done], chunks({}), token({}), elapsed:{:.2f}".format(len(chunks), embedding_token_consumption, task_time_cost))
PipelineOperationLogService.create(document_id=doc_id, pipeline_id=dataflow_id, task_type=PipelineTaskType.PARSE)
@ -609,6 +636,52 @@ async def run_raptor(row, chat_mdl, embd_mdl, vector_size, callback=None):
return res, tk_count
@timeout(3600)
async def run_raptor_for_kb(row, chat_mdl, embd_mdl, vector_size, callback=None, doc_ids=[]):
fake_doc_id = GRAPH_RAPTOR_FAKE_DOC_ID
chunks = []
vctr_nm = "q_%d_vec"%vector_size
for doc_id in doc_ids:
for d in settings.retrievaler.chunk_list(doc_id, row["tenant_id"], [str(row["kb_id"])],
fields=["content_with_weight", vctr_nm],
sort_by_position=True):
chunks.append((d["content_with_weight"], np.array(d[vctr_nm])))
raptor = Raptor(
row["parser_config"]["raptor"].get("max_cluster", 64),
chat_mdl,
embd_mdl,
row["parser_config"]["raptor"]["prompt"],
row["parser_config"]["raptor"]["max_token"],
row["parser_config"]["raptor"]["threshold"]
)
original_length = len(chunks)
chunks = await raptor(chunks, row["parser_config"]["raptor"]["random_seed"], callback)
doc = {
"doc_id": fake_doc_id,
"kb_id": [str(row["kb_id"])],
"docnm_kwd": row["name"],
"title_tks": rag_tokenizer.tokenize(row["name"])
}
if row["pagerank"]:
doc[PAGERANK_FLD] = int(row["pagerank"])
res = []
tk_count = 0
for content, vctr in chunks[original_length:]:
d = copy.deepcopy(doc)
d["id"] = xxhash.xxh64((content + str(fake_doc_id)).encode("utf-8")).hexdigest()
d["create_time"] = str(datetime.now()).replace("T", " ")[:19]
d["create_timestamp_flt"] = datetime.now().timestamp()
d[vctr_nm] = vctr.tolist()
d["content_with_weight"] = content
d["content_ltks"] = rag_tokenizer.tokenize(content)
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
res.append(d)
tk_count += num_tokens_from_string(content)
return res, tk_count
async def delete_image(kb_id, chunk_id):
try:
async with minio_limiter:
@ -704,7 +777,15 @@ async def do_handle_task(task):
chat_model = LLMBundle(task_tenant_id, LLMType.CHAT, llm_name=task_llm_id, lang=task_language)
# run RAPTOR
async with kg_limiter:
chunks, token_count = await run_raptor(task, chat_model, embedding_model, vector_size, progress_callback)
# chunks, token_count = await run_raptor(task, chat_model, embedding_model, vector_size, progress_callback)
chunks, token_count = await run_raptor_for_kb(
row=task,
chat_mdl=chat_model,
embd_mdl=embedding_model,
vector_size=vector_size,
callback=progress_callback,
doc_ids=task.get("doc_ids", []),
)
# Either using graphrag or Standard chunking methods
elif task_type == "graphrag":
if not task_parser_config.get("graphrag", {}).get("use_graphrag", False):
@ -807,7 +888,7 @@ async def handle_task():
logging.exception(f"handle_task got exception for task {json.dumps(task)}")
finally:
task_document_ids = []
if task_type in ["graphrag"]:
if task_type in ["graphrag", "raptor"]:
task_document_ids = task["doc_ids"]
if task["doc_id"] != CANVAS_DEBUG_DOC_ID:
PipelineOperationLogService.record_pipeline_operation(document_id=task["doc_id"], pipeline_id=task.get("dataflow_id", "") or "", task_type=pipeline_task_type, fake_document_ids=task_document_ids)

View File

@ -51,7 +51,7 @@ import RaptorFormFields, {
import { ButtonLoading } from '../ui/button';
import { Input } from '../ui/input';
import { DynamicPageRange } from './dynamic-page-range';
import { useFetchParserListOnMount, useShowAutoKeywords } from './hooks';
import { useShowAutoKeywords } from './hooks';
import {
useDefaultParserValues,
useFillDefaultValueOnMount,
@ -93,8 +93,6 @@ export function ChunkMethodDialog({
}: IProps) {
const { t } = useTranslation();
const { parserList } = useFetchParserListOnMount(documentExtension);
const { data: knowledgeDetails } = useFetchKnowledgeBaseConfiguration();
const useGraphRag = useMemo(() => {
@ -105,48 +103,58 @@ export function ChunkMethodDialog({
const fillDefaultParserValue = useFillDefaultValueOnMount();
const FormSchema = z.object({
parseType: z.number(),
parser_id: z
.string()
.min(1, {
message: t('common.pleaseSelect'),
})
.trim(),
pipeline_id: z.string().optional(),
parser_config: z.object({
task_page_size: z.coerce.number().optional(),
layout_recognize: z.string().optional(),
chunk_token_num: z.coerce.number().optional(),
delimiter: z.string().optional(),
auto_keywords: z.coerce.number().optional(),
auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),
prompt: z.string().optional().optional(),
max_token: z.coerce.number().optional(),
threshold: z.coerce.number().optional(),
max_cluster: z.coerce.number().optional(),
random_seed: z.coerce.number().optional(),
const FormSchema = z
.object({
parseType: z.number(),
parser_id: z
.string()
.min(1, {
message: t('common.pleaseSelect'),
})
.optional(),
graphrag: z.object({
use_graphrag: z.boolean().optional(),
.trim(),
pipeline_id: z.string().optional(),
parser_config: z.object({
task_page_size: z.coerce.number().optional(),
layout_recognize: z.string().optional(),
chunk_token_num: z.coerce.number().optional(),
delimiter: z.string().optional(),
auto_keywords: z.coerce.number().optional(),
auto_questions: z.coerce.number().optional(),
html4excel: z.boolean().optional(),
raptor: z
.object({
use_raptor: z.boolean().optional(),
prompt: z.string().optional().optional(),
max_token: z.coerce.number().optional(),
threshold: z.coerce.number().optional(),
max_cluster: z.coerce.number().optional(),
random_seed: z.coerce.number().optional(),
})
.optional(),
graphrag: z.object({
use_graphrag: z.boolean().optional(),
}),
entity_types: z.array(z.string()).optional(),
pages: z
.array(z.object({ from: z.coerce.number(), to: z.coerce.number() }))
.optional(),
}),
entity_types: z.array(z.string()).optional(),
pages: z
.array(z.object({ from: z.coerce.number(), to: z.coerce.number() }))
.optional(),
}),
});
})
.superRefine((data, ctx) => {
if (data.parseType === 2 && !data.pipeline_id) {
ctx.addIssue({
path: ['pipeline_id'],
message: t('common.pleaseSelect'),
code: 'custom',
});
}
});
const form = useForm<z.infer<typeof FormSchema>>({
resolver: zodResolver(FormSchema),
defaultValues: {
parser_id: parserId,
pipeline_id: pipelineId,
parser_id: parserId || '',
pipeline_id: pipelineId || '',
parseType: pipelineId ? 2 : 1,
parser_config: defaultParserValues,
},
@ -209,8 +217,8 @@ export function ChunkMethodDialog({
const pages =
parserConfig?.pages?.map((x) => ({ from: x[0], to: x[1] })) ?? [];
form.reset({
parser_id: parserId,
pipeline_id: pipelineId,
parser_id: parserId || '',
pipeline_id: pipelineId || '',
parseType: pipelineId ? 2 : 1,
parser_config: fillDefaultParserValue({
pages: pages.length > 0 ? pages : [{ from: 1, to: 1024 }],
@ -231,13 +239,14 @@ export function ChunkMethodDialog({
knowledgeDetails.parser_config,
parserConfig,
parserId,
pipelineId,
useGraphRag,
visible,
]);
const parseType = useWatch({
control: form.control,
name: 'parseType',
defaultValue: 1,
defaultValue: pipelineId ? 2 : 1,
});
return (
<Dialog open onOpenChange={hideModal}>

View File

@ -49,7 +49,7 @@ function Radio({ value, checked, disabled, onChange, children }: RadioProps) {
>
<span
className={cn(
'flex h-4 w-4 items-center justify-center rounded-full border border-input transition-colors',
'flex h-4 w-4 items-center justify-center rounded-full border border-border transition-colors',
'peer ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2',
isChecked && 'border-primary bg-primary/10',
mergedDisabled && 'border-muted',

View File

@ -11,6 +11,8 @@ export interface IDocumentInfo {
name: string;
parser_config: IParserConfig;
parser_id: string;
pipeline_id: string;
pipeline_name: string;
process_begin_at?: string;
process_duration: number;
progress: number;
@ -19,6 +21,7 @@ export interface IDocumentInfo {
size: number;
source_type: string;
status: string;
suffix: string;
thumbnail: string;
token_num: number;
type: string;

View File

@ -102,6 +102,9 @@ export default {
noMoreData: `That's all. Nothing more.`,
},
knowledgeDetails: {
fileSize: 'File Size',
fileType: 'File Type',
uploadedBy: 'Uploaded by',
notGenerated: 'Not generated',
generatedOn: 'Generated on',
subbarFiles: 'Files',
@ -128,7 +131,7 @@ export default {
success: 'Success',
failed: 'Failed',
completed: 'Completed',
processLog: 'Process Log',
datasetLog: 'Dataset Log',
created: 'Created',
learnMore: 'Learn More',
general: 'General',
@ -1705,13 +1708,60 @@ This delimiter is used to split the input text into several text pieces echo of
exportJson: 'Export JSON',
viewResult: 'View Result',
running: 'Running',
context: 'Context Generator',
contextDescription: 'Context Generator',
summary: 'Summary',
extractor: 'Extractor',
extractorDescription: 'Extractor',
summary: 'Augmented Context',
keywords: 'Keywords',
questions: 'Questions',
metadata: 'Metadata',
fieldName: 'Result Destination',
prompts: {
system: {
keywords: `Role
You are a text analyzer.
Task
Extract the most important keywords/phrases of a given piece of text content.
Requirements
- Summarize the text content, and give the top 5 important keywords/phrases.
- The keywords MUST be in the same language as the given piece of text content.
- The keywords are delimited by ENGLISH COMMA.
- Output keywords ONLY.`,
questions: `Role
You are a text analyzer.
Task
Propose 3 questions about a given piece of text content.
Requirements
- Understand and summarize the text content, and propose the top 3 important questions.
- The questions SHOULD NOT have overlapping meanings.
- The questions SHOULD cover the main content of the text as much as possible.
- The questions MUST be in the same language as the given piece of text content.
- One question per line.
- Output questions ONLY.`,
summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
Key Instructions:
1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
2. Language: Write the summary in the same language as the source text.
3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
},
user: {
keywords: `Text Content
[Insert text here]`,
questions: `Text Content
[Insert text here]`,
summary: `Text to Summarize:
[Insert text here]`,
metadata: `Content: [INSERT CONTENT HERE]`,
},
},
},
},
};

View File

@ -94,6 +94,9 @@ export default {
noMoreData: '没有更多数据了',
},
knowledgeDetails: {
fileSize: '文件大小',
fileType: '文件类型',
uploadedBy: '创建者',
notGenerated: '未生成',
generatedOn: '生成于',
subbarFiles: '文件列表',
@ -116,7 +119,7 @@ export default {
success: '成功',
failed: '失败',
completed: '已完成',
processLog: '处理进度日志',
datasetLog: '知识库日志',
created: '创建于',
learnMore: '了解更多',
general: '通用',
@ -1623,13 +1626,60 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
exportJson: '导出 JSON',
viewResult: '查看结果',
running: '运行中',
context: '上下文生成器',
contextDescription: '上下文生成器',
summary: '摘要',
extractor: '提取器',
extractorDescription: '提取器',
summary: '增强上下文',
keywords: '关键词',
questions: '问题',
metadata: '元数据',
fieldName: '结果目的地',
prompts: {
system: {
keywords: `角色
你是一名文本分析员。
任务
从给定的文本内容中提取最重要的关键词/短语。
要求
- 总结文本内容并给出最重要的5个关键词/短语。
- 关键词必须与给定的文本内容使用相同的语言。
- 关键词之间用英文逗号分隔。
- 仅输出关键词。`,
questions: `角色
你是一名文本分析员。
任务
针对给定的文本内容提出3个问题。
要求
- 理解并总结文本内容并提出最重要的3个问题。
- 问题的含义不应重叠。
- 问题应尽可能涵盖文本的主要内容。
- 问题必须与给定的文本内容使用相同的语言。
- 每行一个问题。
- 仅输出问题。`,
summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
关键说明:
1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
2. 语言:摘要必须使用与原文相同的语言。
3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。
4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`,
metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。
重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
},
user: {
keywords: `文本内容
[在此处插入文本]`,
questions: `文本内容
[在此处插入文本]`,
summary: `要总结的文本:
[在此处插入文本]`,
metadata: `内容:[在此处插入内容]`,
},
},
},
},
};

View File

@ -55,7 +55,7 @@ type IProps = {
onChange?: (value?: string) => void;
placeholder?: ReactNode;
} & PromptContentProps &
Pick<VariablePickerMenuPluginProps, 'extraOptions'>;
Pick<VariablePickerMenuPluginProps, 'extraOptions' | 'baseOptions'>;
function PromptContent({
showToolbar = true,
@ -126,6 +126,7 @@ export function PromptEditor({
showToolbar,
multiLine = true,
extraOptions,
baseOptions,
}: IProps) {
const { t } = useTranslation();
const initialConfig: InitialConfigType = {
@ -177,6 +178,7 @@ export function PromptEditor({
<VariablePickerMenuPlugin
value={value}
extraOptions={extraOptions}
baseOptions={baseOptions}
></VariablePickerMenuPlugin>
<PasteHandlerPlugin />
<VariableOnChangePlugin

View File

@ -109,29 +109,42 @@ function VariablePickerMenuItem({
);
}
export type VariablePickerMenuOptionType = {
label: string;
title: string;
value?: string;
options: Array<{
label: string;
value: string;
icon: ReactNode;
}>;
};
export type VariablePickerMenuPluginProps = {
value?: string;
extraOptions?: Array<{
label: string;
title: string;
options: Array<{ label: string; value: string; icon?: ReactNode }>;
}>;
extraOptions?: VariablePickerMenuOptionType[];
baseOptions?: VariablePickerMenuOptionType[];
};
export default function VariablePickerMenuPlugin({
value,
extraOptions,
baseOptions,
}: VariablePickerMenuPluginProps): JSX.Element {
const [editor] = useLexicalComposerContext();
const isFirstRender = useRef(true);
const checkForTriggerMatch = useBasicTypeaheadTriggerMatch('/', {
minLength: 0,
});
const previousValue = useRef<string | undefined>();
const [queryString, setQueryString] = React.useState<string | null>('');
let options = useBuildQueryVariableOptions();
if (baseOptions) {
options = baseOptions as typeof options;
}
const buildNextOptions = useCallback(() => {
let filteredOptions = [...options, ...(extraOptions ?? [])];
if (queryString) {
@ -267,8 +280,8 @@ export default function VariablePickerMenuPlugin({
);
useEffect(() => {
if (editor && value && isFirstRender.current) {
isFirstRender.current = false;
if (editor && value && value !== previousValue.current) {
previousValue.current = value;
editor.update(
() => {
parseTextToVariableNodes(value);

View File

@ -1,7 +1,7 @@
import { buildOutputOptions } from '@/utils/canvas-util';
import { isEmpty } from 'lodash';
import { useMemo } from 'react';
import { Operator } from '../../constant';
import { buildOutputOptions } from '../../hooks/use-get-begin-query';
import useGraphStore from '../../store';
export function useBuildSubNodeOutputOptions(nodeId?: string) {

View File

@ -1,19 +1,11 @@
import { AgentGlobals } from '@/constants/agent';
import { useFetchAgent } from '@/hooks/use-agent-request';
import { RAGFlowNodeType } from '@/interfaces/database/flow';
import { Edge } from '@xyflow/react';
import { buildNodeOutputOptions } from '@/utils/canvas-util';
import { DefaultOptionType } from 'antd/es/select';
import { t } from 'i18next';
import { isEmpty } from 'lodash';
import get from 'lodash/get';
import {
ReactNode,
useCallback,
useContext,
useEffect,
useMemo,
useState,
} from 'react';
import { useCallback, useContext, useEffect, useMemo, useState } from 'react';
import {
AgentDialogueMode,
BeginId,
@ -83,72 +75,18 @@ export const useGetBeginNodeDataQueryIsSafe = () => {
return isBeginNodeDataQuerySafe;
};
function filterAllUpstreamNodeIds(edges: Edge[], nodeIds: string[]) {
return nodeIds.reduce<string[]>((pre, nodeId) => {
const currentEdges = edges.filter((x) => x.target === nodeId);
const upstreamNodeIds: string[] = currentEdges.map((x) => x.source);
const ids = upstreamNodeIds.concat(
filterAllUpstreamNodeIds(edges, upstreamNodeIds),
);
ids.forEach((x) => {
if (pre.every((y) => y !== x)) {
pre.push(x);
}
});
return pre;
}, []);
}
export function buildOutputOptions(
outputs: Record<string, any> = {},
nodeId?: string,
parentLabel?: string | ReactNode,
icon?: ReactNode,
) {
return Object.keys(outputs).map((x) => ({
label: x,
value: `${nodeId}@${x}`,
parentLabel,
icon,
type: outputs[x]?.type,
}));
}
export function useBuildNodeOutputOptions(nodeId?: string) {
const nodes = useGraphStore((state) => state.nodes);
const edges = useGraphStore((state) => state.edges);
const nodeOutputOptions = useMemo(() => {
if (!nodeId) {
return [];
}
const upstreamIds = filterAllUpstreamNodeIds(edges, [nodeId]);
const nodeWithOutputList = nodes.filter(
(x) =>
upstreamIds.some((y) => y === x.id) && !isEmpty(x.data?.form?.outputs),
);
return nodeWithOutputList
.filter((x) => x.id !== nodeId)
.map((x) => ({
label: x.data.name,
value: x.id,
title: x.data.name,
options: buildOutputOptions(
x.data.form.outputs,
x.id,
x.data.name,
<OperatorIcon name={x.data.label as Operator} />,
),
}));
return useMemo(() => {
return buildNodeOutputOptions({
nodes,
edges,
nodeId,
Icon: ({ name }) => <OperatorIcon name={name as Operator}></OperatorIcon>,
});
}, [edges, nodeId, nodes]);
return nodeOutputOptions;
}
// exclude nodes with branches

View File

@ -124,7 +124,7 @@ function AccordionOperators({
Operator.Tokenizer,
Operator.Splitter,
Operator.HierarchicalMerger,
Operator.Context,
Operator.Extractor,
]}
isCustomDropdown={isCustomDropdown}
mousePosition={mousePosition}

View File

@ -119,7 +119,7 @@ export enum Operator {
Tokenizer = 'Tokenizer',
Splitter = 'Splitter',
HierarchicalMerger = 'HierarchicalMerger',
Context = 'Context',
Extractor = 'Extractor',
}
export const SwitchLogicOperatorOptions = ['and', 'or'];
@ -256,6 +256,23 @@ export const initialParserValues = {
fields: Object.values(ParserFields),
output_format: EmailOutputFormat.Text,
},
{
fileFormat: FileType.TextMarkdown,
output_format: TextMarkdownOutputFormat.Text,
},
{
fileFormat: FileType.Docx,
output_format: DocxOutputFormat.Json,
},
{
fileFormat: FileType.PowerPoint,
output_format: PptOutputFormat.Json,
},
{
fileFormat: FileType.Audio,
llm_id: '',
output_format: AudioOutputFormat.Text,
},
],
};
@ -289,9 +306,9 @@ export const initialHierarchicalMergerValues = {
],
};
export const initialContextValues = {
export const initialExtractorValues = {
...initialLlmBaseValues,
field_name: [ContextGeneratorFieldName.Summary],
field_name: ContextGeneratorFieldName.Summary,
outputs: {},
};
@ -318,6 +335,7 @@ export const RestrictedUpstreamMap = {
[Operator.Splitter]: [Operator.Begin],
[Operator.HierarchicalMerger]: [Operator.Begin],
[Operator.Tokenizer]: [Operator.Begin],
[Operator.Extractor]: [Operator.Begin],
};
export const NodeMap = {
@ -327,7 +345,7 @@ export const NodeMap = {
[Operator.Tokenizer]: 'tokenizerNode',
[Operator.Splitter]: 'splitterNode',
[Operator.HierarchicalMerger]: 'hierarchicalMergerNode',
[Operator.Context]: 'contextNode',
[Operator.Extractor]: 'contextNode',
};
export enum BeginQueryType {

View File

@ -1,5 +1,5 @@
import { Operator } from '../constant';
import ContextForm from '../form/context-form';
import ExtractorForm from '../form/extractor-form';
import HierarchicalMergerForm from '../form/hierarchical-merger-form';
import ParserForm from '../form/parser-form';
import SplitterForm from '../form/splitter-form';
@ -24,7 +24,7 @@ export const FormConfigMap = {
[Operator.HierarchicalMerger]: {
component: HierarchicalMergerForm,
},
[Operator.Context]: {
component: ContextForm,
[Operator.Extractor]: {
component: ExtractorForm,
},
};

View File

@ -1,86 +1,103 @@
import { LargeModelFormField } from '@/components/large-model-form-field';
import { LlmSettingSchema } from '@/components/llm-setting-items/next';
import { SelectWithSearch } from '@/components/originui/select-with-search';
import { RAGFlowFormItem } from '@/components/ragflow-form';
import { Form } from '@/components/ui/form';
import { MultiSelect } from '@/components/ui/multi-select';
import { useBuildPromptExtraPromptOptions } from '@/pages/agent/form/agent-form/use-build-prompt-options';
import { PromptEditor } from '@/pages/agent/form/components/prompt-editor';
import { buildOptions } from '@/utils/form';
import { zodResolver } from '@hookform/resolvers/zod';
import { memo } from 'react';
import { memo, useCallback } from 'react';
import { useForm } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import {
ContextGeneratorFieldName,
initialContextValues,
initialExtractorValues,
} from '../../constant';
import { useBuildNodeOutputOptions } from '../../hooks/use-build-options';
import { useFormValues } from '../../hooks/use-form-values';
import { useWatchFormChange } from '../../hooks/use-watch-form-change';
import { INextOperatorForm } from '../../interface';
import useGraphStore from '../../store';
import { buildOutputList } from '../../utils/build-output-list';
import { FormWrapper } from '../components/form-wrapper';
import { Output } from '../components/output';
const outputList = buildOutputList(initialContextValues.outputs);
export const FormSchema = z.object({
field_name: z.string(),
sys_prompt: z.string(),
prompts: z.string().optional(),
...LlmSettingSchema,
field_name: z.array(z.string()),
});
export type ContextFormSchemaType = z.infer<typeof FormSchema>;
export type ExtractorFormSchemaType = z.infer<typeof FormSchema>;
const ContextForm = ({ node }: INextOperatorForm) => {
const defaultValues = useFormValues(initialContextValues, node);
const ExtractorForm = ({ node }: INextOperatorForm) => {
const defaultValues = useFormValues(initialExtractorValues, node);
const { t } = useTranslation();
const form = useForm<ContextFormSchemaType>({
const form = useForm<ExtractorFormSchemaType>({
defaultValues,
resolver: zodResolver(FormSchema),
// mode: 'onChange',
});
const { edges } = useGraphStore((state) => state);
const { extraOptions } = useBuildPromptExtraPromptOptions(edges, node?.id);
const promptOptions = useBuildNodeOutputOptions(node?.id);
const options = buildOptions(ContextGeneratorFieldName, t, 'dataflow');
const setPromptValue = useCallback(
(field: keyof ExtractorFormSchemaType, key: string, value: string) => {
form.setValue(field, t(`dataflow.prompts.${key}.${value}`), {
shouldDirty: true,
shouldValidate: true,
});
},
[form, t],
);
const handleFieldNameChange = useCallback(
(value: string) => {
if (value) {
setPromptValue('sys_prompt', 'system', value);
setPromptValue('prompts', 'user', value);
}
},
[setPromptValue],
);
useWatchFormChange(node?.id, form);
return (
<Form {...form}>
<FormWrapper>
<LargeModelFormField></LargeModelFormField>
<RAGFlowFormItem label={t('dataflow.fieldName')} name="field_name">
{(field) => (
<SelectWithSearch
onChange={(value) => {
field.onChange(value);
handleFieldNameChange(value);
}}
value={field.value}
placeholder={t('dataFlowPlaceholder')}
options={options}
></SelectWithSearch>
)}
</RAGFlowFormItem>
<RAGFlowFormItem label={t('flow.systemPrompt')} name="sys_prompt">
<PromptEditor
placeholder={t('flow.messagePlaceholder')}
showToolbar={true}
extraOptions={extraOptions}
baseOptions={promptOptions}
></PromptEditor>
</RAGFlowFormItem>
<RAGFlowFormItem label={t('flow.userPrompt')} name="prompts">
<PromptEditor showToolbar={true}></PromptEditor>
</RAGFlowFormItem>
<RAGFlowFormItem label={t('dataflow.fieldName')} name="field_name">
{(field) => (
<MultiSelect
onValueChange={field.onChange}
placeholder={t('dataFlowPlaceholder')}
defaultValue={field.value}
options={options}
></MultiSelect>
)}
<PromptEditor
showToolbar={true}
baseOptions={promptOptions}
></PromptEditor>
</RAGFlowFormItem>
</FormWrapper>
<div className="p-5">
<Output list={outputList}></Output>
</div>
</Form>
);
};
export default memo(ContextForm);
export default memo(ExtractorForm);

View File

@ -116,7 +116,7 @@ function ParserItem({ name, index, fieldLength, remove }: ParserItemProps) {
>
<div className="flex justify-between items-center">
<span className="text-text-primary text-sm font-medium">
Parser {index}
Parser {index + 1}
</span>
{index > 0 && (
<Button variant={'ghost'} onClick={() => remove(index)} ref={ref}>

View File

@ -9,7 +9,7 @@ import {
NodeMap,
Operator,
initialBeginValues,
initialContextValues,
initialExtractorValues,
initialHierarchicalMergerValues,
initialNoteValues,
initialParserValues,
@ -24,6 +24,7 @@ import {
export const useInitializeOperatorParams = () => {
const llmId = useFetchModelId();
const { t } = useTranslation();
const initialFormValuesMap = useMemo(() => {
return {
@ -33,9 +34,14 @@ export const useInitializeOperatorParams = () => {
[Operator.Tokenizer]: initialTokenizerValues,
[Operator.Splitter]: initialSplitterValues,
[Operator.HierarchicalMerger]: initialHierarchicalMergerValues,
[Operator.Context]: { ...initialContextValues, llm_id: llmId },
[Operator.Extractor]: {
...initialExtractorValues,
llm_id: llmId,
sys_prompt: t('dataflow.prompts.system.summary'),
prompts: t('dataflow.prompts.user.summary'),
},
};
}, [llmId]);
}, [llmId, t]);
const initializeOperatorParams = useCallback(
(operatorName: Operator) => {

View File

@ -0,0 +1,19 @@
import { buildNodeOutputOptions } from '@/utils/canvas-util';
import { useMemo } from 'react';
import { Operator } from '../constant';
import OperatorIcon from '../operator-icon';
import useGraphStore from '../store';
export function useBuildNodeOutputOptions(nodeId?: string) {
const nodes = useGraphStore((state) => state.nodes);
const edges = useGraphStore((state) => state.edges);
return useMemo(() => {
return buildNodeOutputOptions({
nodes,
edges,
nodeId,
Icon: ({ name }) => <OperatorIcon name={name as Operator}></OperatorIcon>,
});
}, [edges, nodeId, nodes]);
}

View File

@ -25,7 +25,7 @@ export const SVGIconMap = {
[Operator.Tokenizer]: ListMinus,
[Operator.Splitter]: Blocks,
[Operator.HierarchicalMerger]: Heading,
[Operator.Context]: FileStack,
[Operator.Extractor]: FileStack,
};
const Empty = () => {

View File

@ -1,55 +1,34 @@
import { Input } from '@/components/originui/input';
import { Button } from '@/components/ui/button';
import {
Popover,
PopoverContent,
PopoverTrigger,
} from '@/components/ui/popover';
import { Radio } from '@/components/ui/radio';
import { useTranslate } from '@/hooks/common-hooks';
import { cn } from '@/lib/utils';
import { SearchOutlined } from '@ant-design/icons';
import { ListFilter, Plus } from 'lucide-react';
import { Plus } from 'lucide-react';
import { useState } from 'react';
import { ChunkTextMode } from '../../constant';
interface ChunkResultBarProps {
changeChunkTextMode: React.Dispatch<React.SetStateAction<string | number>>;
available: number | undefined;
selectAllChunk: (value: boolean) => void;
handleSetAvailable: (value: number | undefined) => void;
createChunk: () => void;
handleInputChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
searchString: string;
createChunk: (text: string) => void;
}
export default ({
changeChunkTextMode,
available,
selectAllChunk,
handleSetAvailable,
createChunk,
handleInputChange,
searchString,
}: ChunkResultBarProps) => {
export default ({ changeChunkTextMode, createChunk }: ChunkResultBarProps) => {
const { t } = useTranslate('chunk');
const [textSelectValue, setTextSelectValue] = useState<string | number>(
ChunkTextMode.Full,
);
const handleFilterChange = (e: string | number) => {
const value = e === -1 ? undefined : (e as number);
selectAllChunk(false);
handleSetAvailable(value);
};
const filterContent = (
<div className="w-[200px]">
<Radio.Group onChange={handleFilterChange} value={available}>
<div className="flex flex-col gap-2 p-4">
<Radio value={-1}>{t('all')}</Radio>
<Radio value={1}>{t('enabled')}</Radio>
<Radio value={0}>{t('disabled')}</Radio>
</div>
</Radio.Group>
</div>
);
// const handleFilterChange = (e: string | number) => {
// const value = e === -1 ? undefined : (e as number);
// selectAllChunk(false);
// handleSetAvailable(value);
// };
// const filterContent = (
// <div className="w-[200px]">
// <Radio.Group onChange={handleFilterChange} value={available}>
// <div className="flex flex-col gap-2 p-4">
// <Radio value={-1}>{t('all')}</Radio>
// <Radio value={1}>{t('enabled')}</Radio>
// <Radio value={0}>{t('disabled')}</Radio>
// </div>
// </Radio.Group>
// </div>
// );
const textSelectOptions = [
{ label: t(ChunkTextMode.Full), value: ChunkTextMode.Full },
{ label: t(ChunkTextMode.Ellipse), value: ChunkTextMode.Ellipse },
@ -78,7 +57,7 @@ export default ({
</div>
))}
</div>
<Input
{/* <Input
className="bg-bg-card text-muted-foreground"
style={{ width: 200 }}
placeholder={t('search')}
@ -95,9 +74,9 @@ export default ({
<PopoverContent className="p-0 w-[200px]">
{filterContent}
</PopoverContent>
</Popover>
</Popover> */}
<Button
onClick={() => createChunk()}
onClick={() => createChunk('')}
variant={'secondary'}
className="bg-bg-card text-muted-foreground hover:bg-card"
>

View File

@ -2,8 +2,9 @@ import { Checkbox } from '@/components/ui/checkbox';
import { Textarea } from '@/components/ui/textarea';
import { cn } from '@/lib/utils';
import { CheckedState } from '@radix-ui/react-checkbox';
import { useState } from 'react';
import { useEffect, useState } from 'react';
import { ChunkTextMode } from '../../constant';
import styles from '../../index.less';
interface FormatPreserveEditorProps {
initialValue: {
key: string;
@ -17,6 +18,7 @@ interface FormatPreserveEditorProps {
isChunck?: boolean;
handleCheckboxClick?: (id: string | number, checked: boolean) => void;
selectedChunkIds?: string[];
textMode?: ChunkTextMode;
}
const FormatPreserveEditor = ({
initialValue,
@ -25,6 +27,7 @@ const FormatPreserveEditor = ({
isChunck,
handleCheckboxClick,
selectedChunkIds,
textMode,
}: FormatPreserveEditorProps) => {
const [content, setContent] = useState(initialValue);
// const [isEditing, setIsEditing] = useState(false);
@ -32,6 +35,10 @@ const FormatPreserveEditor = ({
undefined,
);
console.log('initialValue', initialValue);
useEffect(() => {
setContent(initialValue);
}, [initialValue]);
const handleEdit = (e?: any, index?: number) => {
console.log(e, index, content);
if (content.key === 'json') {
@ -143,7 +150,12 @@ const FormatPreserveEditor = ({
)}
{activeEditIndex !== index && (
<div
className="text-text-secondary overflow-auto scrollbar-auto whitespace-pre-wrap"
className={cn(
'text-text-secondary overflow-auto scrollbar-auto whitespace-pre-wrap w-full',
{
[styles.contentEllipsis]: textMode === ChunkTextMode.Ellipse,
},
)}
key={index}
onClick={(e) => {
handleEdit(e, index);

View File

@ -37,11 +37,11 @@ export const TimelineNodeObj = {
icon: <Heading size={13} />,
},
[TimelineNodeType.characterSplitter]: {
title: 'Title Splitter',
title: 'Character Splitter',
icon: <Heading size={13} />,
},
[TimelineNodeType.splitter]: {
title: 'Character Splitter',
title: 'Splitter',
icon: <Blocks size={13} />,
},
[TimelineNodeType.tokenizer]: {
@ -50,40 +50,6 @@ export const TimelineNodeObj = {
clickable: false,
},
};
// export const TimelineNodeArr = [
// {
// id: 1,
// title: 'File',
// icon: <PlayIcon size={13} />,
// clickable: false,
// type: TimelineNodeType.begin,
// },
// {
// id: 2,
// title: 'Context Generator',
// icon: <PlayIcon size={13} />,
// type: TimelineNodeType.contextGenerator,
// },
// {
// id: 3,
// title: 'Title Splitter',
// icon: <PlayIcon size={13} />,
// type: TimelineNodeType.titleSplitter,
// },
// {
// id: 4,
// title: 'Character Splitter',
// icon: <PlayIcon size={13} />,
// type: TimelineNodeType.characterSplitter,
// },
// {
// id: 5,
// title: 'Tokenizer',
// icon: <CheckLine size={13} />,
// clickable: false,
// type: TimelineNodeType.tokenizer,
// },
// ]
export interface TimelineDataFlowProps {
activeId: number | string;
activeFunc: (id: number | string, step: TimelineNode) => void;

View File

@ -249,7 +249,6 @@ export const useTimelineDataFlow = (data: IPipelineFileLogDetail) => {
}
const timeNode = {
...TimelineNodeObj[name],
clickable: true,
id: index,
className: 'w-32',
completed: false,

View File

@ -82,15 +82,6 @@
}
}
.card {
:global {
.ant-card-body {
padding: 10px;
margin: 0;
}
margin-bottom: 10px;
}
cursor: pointer;
.contentEllipsis {
.multipleLineEllipsis(3);
}

View File

@ -42,7 +42,7 @@ const Chunk = () => {
data: { documentInfo },
} = useFetchNextChunkList();
const { selectedChunkId } = useHandleChunkCardClick();
const [activeStepId, setActiveStepId] = useState<number | string>(0);
const [activeStepId, setActiveStepId] = useState<number | string>(2);
const { data: dataset } = useFetchPipelineFileLogDetail();
const { t } = useTranslation();

View File

@ -1,14 +1,16 @@
import { TimelineNode } from '@/components/originui/timeline';
import Spotlight from '@/components/spotlight';
import { Spin } from '@/components/ui/spin';
import { cn } from '@/lib/utils';
import classNames from 'classnames';
import { useCallback, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import ChunkResultBar from './components/chunk-result-bar';
import CheckboxSets from './components/chunk-result-bar/checkbox-sets';
import FormatPreserEditor from './components/parse-editer';
import RerunButton from './components/rerun-button';
import { TimelineNodeType } from './constant';
import { useFetchParserList } from './hooks';
import { useChangeChunkTextMode, useFetchParserList } from './hooks';
import { IDslComponent } from './interface';
interface IProps {
isChange: boolean;
@ -23,6 +25,7 @@ const ParserContainer = (props: IProps) => {
const { t } = useTranslation();
const { loading } = useFetchParserList();
const [selectedChunkIds, setSelectedChunkIds] = useState<string[]>([]);
const { changeChunkTextMode, textMode } = useChangeChunkTextMode();
const initialValue = useMemo(() => {
const outputs = data?.value?.obj?.params?.outputs;
const key = outputs?.output_format?.value;
@ -108,6 +111,19 @@ const ParserContainer = (props: IProps) => {
step?.type === TimelineNodeType.characterSplitter ||
step?.type === TimelineNodeType.titleSplitter ||
step?.type === TimelineNodeType.splitter;
const handleCreateChunk = useCallback(
(text: string) => {
console.log('handleCreateChunk', text);
const newText = [...initialText.value, { text: text || ' ' }];
setInitialText({
...initialText,
value: newText,
});
console.log('newText', newText, initialText);
},
[initialText],
);
return (
<>
{isChange && (
@ -122,28 +138,50 @@ const ParserContainer = (props: IProps) => {
<div className={classNames('flex flex-col w-full')}>
<Spin spinning={loading} className="" size="large">
<div className="h-[50px] flex flex-col justify-end pb-[5px]">
<div>
<h2 className="text-[16px]">
{t('dataflowParser.parseSummary')}
</h2>
<div className="text-[12px] text-text-secondary italic ">
{t('dataflowParser.parseSummaryTip')}
{!isChunck && (
<div>
<h2 className="text-[16px]">
{t('dataflowParser.parseSummary')}
</h2>
<div className="text-[12px] text-text-secondary italic ">
{t('dataflowParser.parseSummaryTip')}
</div>
</div>
</div>
)}
{isChunck && (
<div>
<h2 className="text-[16px]">{t('chunk.chunkResult')}</h2>
<div className="text-[12px] text-text-secondary italic">
{t('chunk.chunkResultTip')}
</div>
</div>
)}
</div>
{isChunck && (
<div className="pt-[5px] pb-[5px]">
<div className="pt-[5px] pb-[5px] flex justify-between items-center">
<CheckboxSets
selectAllChunk={selectAllChunk}
removeChunk={handleRemoveChunk}
checked={selectedChunkIds.length === initialText.value.length}
selectedChunkIds={selectedChunkIds}
/>
<ChunkResultBar
changeChunkTextMode={changeChunkTextMode}
createChunk={handleCreateChunk}
/>
</div>
)}
<div className=" border rounded-lg p-[20px] box-border h-[calc(100vh-180px)] w-[calc(100%-20px)] overflow-auto scrollbar-none">
<div
className={cn(
' border rounded-lg p-[20px] box-border w-[calc(100%-20px)] overflow-auto scrollbar-none',
{
'h-[calc(100vh-240px)]': isChunck,
'h-[calc(100vh-180px)]': !isChunck,
},
)}
>
<FormatPreserEditor
initialValue={initialText}
onSave={handleSave}
@ -151,6 +189,7 @@ const ParserContainer = (props: IProps) => {
initialText.key !== 'json' ? '!h-[calc(100vh-220px)]' : ''
}
isChunck={isChunck}
textMode={textMode}
isDelete={
step?.type === TimelineNodeType.characterSplitter ||
step?.type === TimelineNodeType.titleSplitter ||

View File

@ -4,6 +4,11 @@ export enum LogTabs {
}
export enum ProcessingType {
knowledgeGraph = 'knowledgeGraph',
raptor = 'raptor',
knowledgeGraph = 'GraphRAG',
raptor = 'RAPTOR',
}
export const ProcessingTypeMap = {
[ProcessingType.knowledgeGraph]: 'Knowledge Graph',
[ProcessingType.raptor]: 'Raptor',
};

View File

@ -10,13 +10,8 @@ import { useQuery } from '@tanstack/react-query';
import { useCallback, useState } from 'react';
import { useParams, useSearchParams } from 'umi';
import { LogTabs } from './dataset-common';
import { IFileLogList, IOverviewTital } from './interface';
export interface IOverviewTital {
cancelled: number;
failed: number;
finished: number;
processing: number;
}
const useFetchOverviewTital = () => {
const [searchParams] = useSearchParams();
const { id } = useParams();
@ -33,40 +28,6 @@ const useFetchOverviewTital = () => {
return { data };
};
export interface IFileLogItem {
create_date: string;
create_time: number;
document_id: string;
document_name: string;
document_suffix: string;
document_type: string;
dsl: any;
path: string[];
task_id: string;
id: string;
name: string;
kb_id: string;
operation_status: string;
parser_id: string;
pipeline_id: string;
pipeline_title: string;
avatar: string;
process_begin_at: null | string;
process_duration: number;
progress: number;
progress_msg: string;
source_from: string;
status: string;
task_type: string;
tenant_id: string;
update_date: string;
update_time: number;
}
export interface IFileLogList {
logs: IFileLogItem[];
total: number;
}
const useFetchFileLogList = () => {
const [searchParams] = useSearchParams();
const { searchString, handleInputChange } = useHandleSearchChange();

View File

@ -124,7 +124,7 @@ const FileLogsPage: FC = () => {
};
});
}
}, [tableOriginData, active]);
}, [tableOriginData]);
const changeActiveLogs = (active: (typeof LogTabs)[keyof typeof LogTabs]) => {
setActive(active);

View File

@ -0,0 +1,62 @@
import { RunningStatus, RunningStatusMap } from '../dataset/constant';
import { LogTabs } from './dataset-common';
export interface DocumentLog {
fileName: string;
status: RunningStatus;
statusName: typeof RunningStatusMap;
}
export interface FileLogsTableProps {
data: Array<IFileLogItem & DocumentLog>;
pageCount: number;
pagination: {
current: number;
pageSize: number;
total: number;
};
setPagination: (pagination: { page: number; pageSize: number }) => void;
loading?: boolean;
active: (typeof LogTabs)[keyof typeof LogTabs];
}
export interface IOverviewTital {
cancelled: number;
failed: number;
finished: number;
processing: number;
}
export interface IFileLogItem {
create_date: string;
create_time: number;
document_id: string;
document_name: string;
document_suffix: string;
document_type: string;
dsl: any;
path: string[];
task_id: string;
id: string;
name: string;
kb_id: string;
operation_status: string;
parser_id: string;
pipeline_id: string;
pipeline_title: string;
avatar: string;
process_begin_at: null | string;
process_duration: number;
progress: number;
progress_msg: string;
source_from: string;
status: string;
task_type: string;
tenant_id: string;
update_date: string;
update_time: number;
}
export interface IFileLogList {
logs: IFileLogItem[];
total: number;
}

View File

@ -1,7 +1,6 @@
import FileStatusBadge from '@/components/file-status-badge';
import { FileIcon } from '@/components/icon-font';
import { FileIcon, IconFontFill } from '@/components/icon-font';
import { RAGFlowAvatar } from '@/components/ragflow-avatar';
import SvgIcon from '@/components/svg-icon';
import { Button } from '@/components/ui/button';
import { RAGFlowPagination } from '@/components/ui/ragflow-pagination';
import {
@ -15,7 +14,7 @@ import {
import { RunningStatusMap } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
import { formatDate } from '@/utils/date';
import { formatDate, formatSecondsToHumanReadable } from '@/utils/date';
import {
ColumnDef,
ColumnFiltersState,
@ -29,32 +28,13 @@ import {
useReactTable,
} from '@tanstack/react-table';
import { TFunction } from 'i18next';
import { ClipboardList, Eye } from 'lucide-react';
import { ArrowUpDown, ClipboardList, Eye } from 'lucide-react';
import { FC, useMemo, useState } from 'react';
import { useParams } from 'umi';
import { RunningStatus } from '../dataset/constant';
import ProcessLogModal from '../process-log-modal';
import { LogTabs, ProcessingType } from './dataset-common';
import { IFileLogItem } from './hook';
interface DocumentLog {
fileName: string;
status: RunningStatus;
statusName: typeof RunningStatusMap;
}
interface FileLogsTableProps {
data: DocumentLog[];
pageCount: number;
pagination: {
current: number;
pageSize: number;
total: number;
};
setPagination: (pagination: { page: number; pageSize: number }) => void;
loading?: boolean;
active: (typeof LogTabs)[keyof typeof LogTabs];
}
import { LogTabs, ProcessingType, ProcessingTypeMap } from './dataset-common';
import { DocumentLog, FileLogsTableProps, IFileLogItem } from './interface';
export const getFileLogsTableColumns = (
t: TFunction<'translation', string>,
@ -133,7 +113,18 @@ export const getFileLogsTableColumns = (
},
{
accessorKey: 'process_begin_at',
header: t('startDate'),
header: ({ column }) => {
return (
<Button
variant="transparent"
className="border-none"
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
>
{t('startDate')}
<ArrowUpDown />
</Button>
);
},
cell: ({ row }) => (
<div className="text-text-primary">
{formatDate(row.original.process_begin_at)}
@ -227,34 +218,60 @@ export const getDatasetLogsTableColumns = (
),
},
{
accessorKey: 'startDate',
header: t('startDate'),
accessorKey: 'process_begin_at',
header: ({ column }) => {
return (
<Button
variant="transparent"
className="border-none"
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
>
{t('startDate')}
<ArrowUpDown />
</Button>
);
},
cell: ({ row }) => (
<div className="text-text-primary">{row.original.startDate}</div>
),
},
{
accessorKey: 'processingType',
header: t('processingType'),
cell: ({ row }) => (
<div className="flex items-center gap-2 text-text-primary">
{ProcessingType.knowledgeGraph === row.original.processingType && (
<SvgIcon name={`data-flow/knowledgegraph`} width={24}></SvgIcon>
)}
{ProcessingType.raptor === row.original.processingType && (
<SvgIcon name={`data-flow/raptor`} width={24}></SvgIcon>
)}
{row.original.processingType}
<div className="text-text-primary">
{formatDate(row.original.process_begin_at)}
</div>
),
},
{
accessorKey: 'status',
accessorKey: 'task_type',
header: t('processingType'),
cell: ({ row }) => (
<div className="flex items-center gap-2 text-text-primary">
{ProcessingType.knowledgeGraph === row.original.task_type && (
<IconFontFill
name={`knowledgegraph`}
className="text-text-secondary"
></IconFontFill>
)}
{ProcessingType.raptor === row.original.task_type && (
<IconFontFill
name={`dataflow-01`}
className="text-text-secondary"
></IconFontFill>
)}
{ProcessingTypeMap[row.original.task_type as ProcessingType] ||
row.original.task_type}
</div>
),
},
{
accessorKey: 'operation_status',
header: t('status'),
cell: ({ row }) => (
// <FileStatusBadge
// status={row.original.status}
// name={row.original.statusName}
// />
<FileStatusBadge
status={row.original.status}
name={row.original.statusName}
status={row.original.operation_status as RunningStatus}
name={
RunningStatusMap[row.original.operation_status as RunningStatus]
}
/>
),
},
@ -294,17 +311,19 @@ const FileLogsTable: FC<FileLogsTableProps> = ({
const { t } = useTranslate('knowledgeDetails');
const [isModalVisible, setIsModalVisible] = useState(false);
const { navigateToDataflowResult } = useNavigatePage();
const [logInfo, setLogInfo] = useState<IFileLogItem>({});
const [logInfo, setLogInfo] = useState<IFileLogItem>();
const kowledgeId = useParams().id;
const showLog = (row: Row<IFileLogItem & DocumentLog>) => {
const logDetail = {
taskId: row.original.id,
taskId: row.original?.dsl?.task_id,
fileName: row.original.document_name,
source: row.original.source_from,
task: row.original.dsl.task_id,
task: row.original?.task_type,
status: row.original.statusName,
startDate: formatDate(row.original.process_begin_at),
duration: (row.original.process_duration || 0) + 's',
duration: formatSecondsToHumanReadable(
row.original.process_duration || 0,
),
details: row.original.progress_msg,
};
console.log('logDetail', logDetail);
@ -331,7 +350,7 @@ const FileLogsTable: FC<FileLogsTableProps> = ({
[pagination],
);
const table = useReactTable({
const table = useReactTable<IFileLogItem & DocumentLog>({
data: data || [],
columns,
manualPagination: true,
@ -405,11 +424,14 @@ const FileLogsTable: FC<FileLogsTableProps> = ({
/>
</div>
</div>
<ProcessLogModal
visible={isModalVisible}
onCancel={() => setIsModalVisible(false)}
logInfo={logInfo}
/>
{isModalVisible && (
<ProcessLogModal
title={active === LogTabs.FILE_LOGS ? t('fileLogs') : t('datasetLog')}
visible={isModalVisible}
onCancel={() => setIsModalVisible(false)}
logInfo={logInfo}
/>
)}
</div>
);
};

View File

@ -27,6 +27,7 @@ import {
import { UseRowSelectionType } from '@/hooks/logic-hooks/use-row-selection';
import { useFetchDocumentList } from '@/hooks/use-document-request';
import { getExtension } from '@/utils/document-util';
import { t } from 'i18next';
import { pick } from 'lodash';
import { useMemo } from 'react';
import ProcessLogModal from '../process-log-modal';
@ -184,6 +185,7 @@ export function DatasetTable({
<ChunkMethodDialog
documentId={changeParserRecord.id}
parserId={changeParserRecord.parser_id}
pipelineId={changeParserRecord.pipeline_id}
parserConfig={changeParserRecord.parser_config}
documentExtension={getExtension(changeParserRecord.name)}
onOk={onChangeParserOk}
@ -213,6 +215,7 @@ export function DatasetTable({
)}
{logVisible && (
<ProcessLogModal
title={t('knowledgeDetails.fileLogs')}
visible={logVisible}
onCancel={() => hideLog()}
logInfo={logInfo}

View File

@ -9,63 +9,148 @@ import {
import { Modal } from '@/components/ui/modal/modal';
import { cn } from '@/lib/utils';
import { toFixed } from '@/utils/common-util';
import { UseMutateAsyncFunction } from '@tanstack/react-query';
import { t } from 'i18next';
import { lowerFirst } from 'lodash';
import { CirclePause, Trash2, WandSparkles } from 'lucide-react';
import { useState } from 'react';
import { useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { generateStatus, useFetchGenerateData } from './hook';
import { replaceText } from '../../process-log-modal';
import {
ITraceInfo,
generateStatus,
useDatasetGenerate,
useTraceGenerate,
} from './hook';
export enum GenerateType {
KnowledgeGraph = 'KnowledgeGraph',
Raptor = 'Raptor',
}
const MenuItem: React.FC<{ name: GenerateType }> = ({ name }) => {
console.log(name, 'pppp');
const MenuItem: React.FC<{
name: GenerateType;
data: ITraceInfo;
pauseGenerate: () => void;
runGenerate: UseMutateAsyncFunction<
any,
Error,
{
type: GenerateType;
},
unknown
>;
}> = ({ name, runGenerate, data, pauseGenerate }) => {
console.log(name, 'pppp', data);
const iconKeyMap = {
KnowledgeGraph: 'knowledgegraph',
Raptor: 'dataflow-01',
};
const {
data: { percent, type },
pauseGenerate,
} = useFetchGenerateData();
const type = useMemo(() => {
if (!data) {
return generateStatus.start;
}
if (data.progress >= 1) {
return generateStatus.completed;
} else if (!data.progress && data.progress !== 0) {
return generateStatus.start;
} else if (data.progress < 0) {
return generateStatus.failed;
} else if (data.progress < 1) {
return generateStatus.running;
}
}, [data]);
const percent =
type === generateStatus.failed
? 100
: type === generateStatus.running
? data.progress * 100
: 0;
return (
<div className="flex items-start gap-2 flex-col w-full">
<div className="flex justify-start text-text-primary items-center gap-2">
<IconFontFill name={iconKeyMap[name]} className="text-accent-primary" />
{t(`knowledgeDetails.${lowerFirst(name)}`)}
</div>
{type === generateStatus.start && (
<div className="text-text-secondary text-sm">
{t(`knowledgeDetails.generate${name}`)}
</div>
<DropdownMenuItem
className={cn(
'border cursor-pointer p-2 rounded-md focus:bg-transparent',
{
'hover:border-accent-primary hover:bg-[rgba(59,160,92,0.1)]':
type === generateStatus.start,
'hover:border-border hover:bg-[rgba(59,160,92,0)]':
type !== generateStatus.start,
},
)}
{type === generateStatus.running && (
<div className="flex justify-between items-center w-full">
<div className="w-[calc(100%-100px)] bg-border-button h-1 rounded-full">
<div
className="h-1 bg-accent-primary rounded-full"
style={{ width: `${toFixed(percent)}%` }}
></div>
onSelect={(e) => {
e.preventDefault();
}}
onClick={(e) => {
e.stopPropagation();
}}
>
<div
className="flex items-start gap-2 flex-col w-full"
onClick={() => {
if (type === generateStatus.start) {
runGenerate({ type: name });
}
}}
>
<div className="flex justify-start text-text-primary items-center gap-2">
<IconFontFill
name={iconKeyMap[name]}
className="text-accent-primary"
/>
{t(`knowledgeDetails.${lowerFirst(name)}`)}
</div>
{type === generateStatus.start && (
<div className="text-text-secondary text-sm">
{t(`knowledgeDetails.generate${name}`)}
</div>
<span>{toFixed(percent) as string}%</span>
<span
className="text-state-error"
onClick={() => {
pauseGenerate();
}}
>
<CirclePause />
</span>
)}
{(type === generateStatus.running ||
type === generateStatus.failed) && (
<div className="flex justify-between items-center w-full px-2.5 py-1">
<div
className={cn(' bg-border-button h-1 rounded-full', {
'w-[calc(100%-100px)]': type === generateStatus.running,
'w-[calc(100%-50px)]': type === generateStatus.failed,
})}
>
<div
className={cn('h-1 rounded-full', {
'bg-state-error': type === generateStatus.failed,
'bg-accent-primary': type === generateStatus.running,
})}
style={{ width: `${toFixed(percent)}%` }}
></div>
</div>
{type === generateStatus.running && (
<span>{(toFixed(percent) as string) + '%'}</span>
)}
<span
className="text-state-error"
onClick={(e) => {
e.stopPropagation();
pauseGenerate();
}}
>
{type === generateStatus.failed ? (
<IconFontFill name="reparse" className="text-accent-primary" />
) : (
<CirclePause />
)}
</span>
</div>
)}
<div className="w-full whitespace-pre-line text-wrap rounded-lg h-fit max-h-[350px] overflow-y-auto scrollbar-auto px-2.5 py-1">
{replaceText(data?.progress_msg || '')}
</div>
)}
</div>
</div>
</DropdownMenuItem>
);
};
const Generate: React.FC = () => {
const [open, setOpen] = useState(false);
const { graphRunData, raptorRunData } = useTraceGenerate({ open });
const { runGenerate, pauseGenerate } = useDatasetGenerate();
const handleOpenChange = (isOpen: boolean) => {
setOpen(isOpen);
console.log('Dropdown is now', isOpen ? 'open' : 'closed');
@ -85,29 +170,30 @@ const Generate: React.FC = () => {
{t('knowledgeDetails.generate')}
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent className="w-[380px] p-5 ">
<DropdownMenuItem
className="border cursor-pointer p-2 rounded-md hover:border-accent-primary hover:bg-[rgba(59,160,92,0.1)]"
onSelect={(e) => {
e.preventDefault();
}}
onClick={(e) => {
e.stopPropagation();
}}
>
<MenuItem name="KnowledgeGraph" />
</DropdownMenuItem>
<DropdownMenuItem
className="border cursor-pointer p-2 rounded-md mt-3 hover:border-accent-primary hover:bg-[rgba(59,160,92,0.1)]"
onSelect={(e) => {
e.preventDefault();
}}
onClick={(e) => {
e.stopPropagation();
}}
>
<MenuItem name="Raptor" />
</DropdownMenuItem>
<DropdownMenuContent className="w-[380px] p-5 flex flex-col gap-2 ">
{Object.values(GenerateType).map((name) => {
const data = (
name === GenerateType.KnowledgeGraph
? graphRunData
: raptorRunData
) as ITraceInfo;
console.log(
name,
'data',
data,
!data || (!data.progress && data.progress !== 0),
);
return (
<div key={name}>
<MenuItem
name={name}
runGenerate={runGenerate}
data={data}
pauseGenerate={pauseGenerate}
/>
</div>
);
})}
</DropdownMenuContent>
</DropdownMenu>
</div>

View File

@ -1,25 +1,128 @@
import { useQuery } from '@tanstack/react-query';
import { useCallback } from 'react';
import message from '@/components/ui/message';
import kbService from '@/services/knowledge-service';
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
import { t } from 'i18next';
import { useCallback, useEffect, useState } from 'react';
import { useParams } from 'umi';
import { GenerateType } from './generate';
export const generateStatus = {
running: 'running',
completed: 'completed',
start: 'start',
failed: 'failed',
};
const useFetchGenerateData = () => {
let number = 10;
// TODO: 获取数据
const { data, isFetching: loading } = useQuery({
queryKey: ['generateData', 'id'],
initialData: { id: 0, percent: 0, type: 'running' },
gcTime: 0,
refetchInterval: 3000,
queryFn: async () => {
number += Math.random() * 10;
const data = {
id: Math.random(),
percent: number,
type: generateStatus.running,
};
enum DatasetKey {
generate = 'generate',
}
export interface ITraceInfo {
begin_at: string;
chunk_ids: string;
create_date: string;
create_time: number;
digest: string;
doc_id: string;
from_page: number;
id: string;
priority: number;
process_duration: number;
progress: number;
progress_msg: string;
retry_count: number;
task_type: string;
to_page: number;
update_date: string;
update_time: number;
}
export const useTraceGenerate = ({ open }: { open: boolean }) => {
const { id } = useParams();
const [isLoopGraphRun, setLoopGraphRun] = useState(false);
const [isLoopRaptorRun, setLoopRaptorRun] = useState(false);
const { data: graphRunData, isFetching: graphRunloading } =
useQuery<ITraceInfo>({
queryKey: [GenerateType.KnowledgeGraph, id, open],
// initialData: {},
gcTime: 0,
refetchInterval: isLoopGraphRun ? 5000 : false,
retry: 3,
retryDelay: 1000,
enabled: open,
queryFn: async () => {
const { data } = await kbService.traceGraphRag({
kb_id: id,
});
return data?.data || {};
},
});
const { data: raptorRunData, isFetching: raptorRunloading } =
useQuery<ITraceInfo>({
queryKey: [GenerateType.Raptor, id, open],
// initialData: {},
gcTime: 0,
refetchInterval: isLoopRaptorRun ? 5000 : false,
retry: 3,
retryDelay: 1000,
enabled: open,
queryFn: async () => {
const { data } = await kbService.traceRaptor({
kb_id: id,
});
return data?.data || {};
},
});
useEffect(() => {
setLoopGraphRun(
!!(
(graphRunData?.progress || graphRunData?.progress === 0) &&
graphRunData?.progress < 1 &&
graphRunData?.progress >= 0
),
);
}, [graphRunData?.progress]);
useEffect(() => {
setLoopRaptorRun(
!!(
(raptorRunData?.progress || raptorRunData?.progress === 0) &&
raptorRunData?.progress < 1 &&
raptorRunData?.progress >= 0
),
);
}, [raptorRunData?.progress]);
return {
graphRunData,
graphRunloading,
raptorRunData,
raptorRunloading,
};
};
export const useDatasetGenerate = () => {
const queryClient = useQueryClient();
const { id } = useParams();
const {
data,
isPending: loading,
mutateAsync,
} = useMutation({
mutationKey: [DatasetKey.generate],
mutationFn: async ({ type }: { type: GenerateType }) => {
const func =
type === GenerateType.KnowledgeGraph
? kbService.runGraphRag
: kbService.runRaptor;
const { data } = await func({
kb_id: id,
});
if (data.code === 0) {
message.success(t('message.operated'));
queryClient.invalidateQueries({
queryKey: [type],
});
}
return data;
},
});
@ -27,6 +130,5 @@ const useFetchGenerateData = () => {
// TODO: pause generate
console.log('pause generate');
}, []);
return { data, loading, pauseGenerate };
return { runGenerate: mutateAsync, pauseGenerate, data, loading };
};
export { useFetchGenerateData };

View File

@ -2,9 +2,12 @@ import { useSetModalState } from '@/hooks/common-hooks';
import { useNextWebCrawl } from '@/hooks/document-hooks';
import { useGetKnowledgeSearchParams } from '@/hooks/route-hook';
import { IDocumentInfo } from '@/interfaces/database/document';
import { formatDate, formatSecondsToHumanReadable } from '@/utils/date';
import { formatBytes } from '@/utils/file-util';
import { useCallback, useMemo, useState } from 'react';
import { useNavigate } from 'umi';
import { ILogInfo } from '../process-log-modal';
import { RunningStatus } from './constant';
export const useNavigateToOtherPage = () => {
const navigate = useNavigate();
@ -75,15 +78,17 @@ export const useShowLog = (documents: IDocumentInfo[]) => {
};
if (findRecord) {
log = {
taskId: findRecord.id,
fileName: findRecord.name,
fileSize: findRecord.size + '',
source: findRecord.source_type,
task: findRecord.status,
status: findRecord.run,
startTime: findRecord.process_begin_at,
endTime: findRecord.process_begin_at,
duration: findRecord.process_duration + 's',
fileType: findRecord?.suffix,
uploadedBy: findRecord?.created_by,
fileName: findRecord?.name,
uploadDate: formatDate(findRecord.create_date),
fileSize: formatBytes(findRecord.size || 0),
processBeginAt: formatDate(findRecord.process_begin_at),
chunkNumber: findRecord.chunk_num,
duration: formatSecondsToHumanReadable(
findRecord.process_duration || 0,
),
status: findRecord.run as RunningStatus,
details: findRecord.progress_msg,
};
}

View File

@ -1,9 +1,4 @@
import { Button } from '@/components/ui/button';
import {
HoverCard,
HoverCardContent,
HoverCardTrigger,
} from '@/components/ui/hover-card';
import { IDocumentInfo } from '@/interfaces/database/document';
import { useTranslation } from 'react-i18next';
import reactStringReplace from 'react-string-replace';
@ -88,20 +83,13 @@ export const PopoverContent = ({ record }: IProps) => {
export function ParsingCard({ record, handleShowLog }: IProps) {
return (
<HoverCard>
<HoverCardTrigger asChild>
<Button
variant={'transparent'}
className="border-none"
size={'sm'}
onClick={() => handleShowLog?.(record)}
>
<Dot run={record.run}></Dot>
</Button>
</HoverCardTrigger>
<HoverCardContent className="w-[40vw]">
<PopoverContent record={record}></PopoverContent>
</HoverCardContent>
</HoverCard>
<Button
variant={'transparent'}
className="border-none"
size={'sm'}
onClick={() => handleShowLog?.(record)}
>
<Dot run={record.run}></Dot>
</Button>
);
}

View File

@ -46,7 +46,15 @@ export function ParsingStatusCell({
} & UseChangeDocumentParserShowType &
UseSaveMetaShowType) {
const { t } = useTranslation();
const { run, parser_id, progress, chunk_num, id } = record;
const {
run,
parser_id,
pipeline_id,
pipeline_name,
progress,
chunk_num,
id,
} = record;
const operationIcon = IconMap[run];
const p = Number((progress * 100).toFixed(2));
const { handleRunDocumentByIds } = useHandleRunDocumentByIds(id);
@ -80,7 +88,11 @@ export function ParsingStatusCell({
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button variant={'transparent'} className="border-none" size={'sm'}>
{parser_id === 'naive' ? 'general' : parser_id}
{pipeline_id
? pipeline_name || pipeline_id
: parser_id === 'naive'
? 'general'
: parser_id}
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent>

View File

@ -3,10 +3,16 @@ import { Button } from '@/components/ui/button';
import { Modal } from '@/components/ui/modal/modal';
import { RunningStatusMap } from '@/constants/knowledge';
import { useTranslate } from '@/hooks/common-hooks';
import React from 'react';
import React, { useMemo } from 'react';
import reactStringReplace from 'react-string-replace';
import { RunningStatus } from './dataset/constant';
export interface ILogInfo {
fileType?: string;
uploadedBy?: string;
uploadDate?: string;
processBeginAt?: string;
chunkNumber?: number;
taskId?: string;
fileName: string;
fileSize?: string;
@ -23,6 +29,7 @@ interface ProcessLogModalProps {
visible: boolean;
onCancel: () => void;
logInfo: ILogInfo;
title: string;
}
const InfoItem: React.FC<{
@ -37,35 +44,41 @@ const InfoItem: React.FC<{
</div>
);
};
export const replaceText = (text: string) => {
// Remove duplicate \n
const nextText = text.replace(/(\n)\1+/g, '$1');
const replacedText = reactStringReplace(
nextText,
/(\[ERROR\].+\s)/g,
(match, i) => {
return (
<span key={i} className={'text-red-600'}>
{match}
</span>
);
},
);
return replacedText;
};
const ProcessLogModal: React.FC<ProcessLogModalProps> = ({
visible,
onCancel,
logInfo,
logInfo: initData,
title,
}) => {
const { t } = useTranslate('knowledgeDetails');
const blackKeyList = [''];
const replaceText = (text: string) => {
// Remove duplicate \n
const nextText = text.replace(/(\n)\1+/g, '$1');
console.log('logInfo', initData);
const logInfo = useMemo(() => {
console.log('logInfo', initData);
return initData;
}, [initData]);
const replacedText = reactStringReplace(
nextText,
/(\[ERROR\].+\s)/g,
(match, i) => {
return (
<span key={i} className={'text-red-600'}>
{match}
</span>
);
},
);
return replacedText;
};
return (
<Modal
title={t('processLog')}
title={title || 'log'}
open={visible}
onCancel={onCancel}
footer={
@ -77,7 +90,7 @@ const ProcessLogModal: React.FC<ProcessLogModalProps> = ({
>
<div className=" rounded-lg">
<div className="flex flex-wrap ">
{Object.keys(logInfo).map((key) => {
{Object?.keys(logInfo).map((key) => {
if (
blackKeyList.includes(key) ||
!logInfo[key as keyof typeof logInfo]
@ -86,7 +99,7 @@ const ProcessLogModal: React.FC<ProcessLogModalProps> = ({
}
if (key === 'details') {
return (
<div className="w-full" key={key}>
<div className="w-full mt-2" key={key}>
<InfoItem
label={t(key)}
value={

View File

@ -42,6 +42,10 @@ const {
getKnowledgeBasicInfo,
fetchDataPipelineLog,
fetchPipelineDatasetLogs,
runGraphRag,
traceGraphRag,
runRaptor,
traceRaptor,
} = api;
const methods = {
@ -188,6 +192,23 @@ const methods = {
url: api.get_pipeline_detail,
method: 'get',
},
runGraphRag: {
url: runGraphRag,
method: 'post',
},
traceGraphRag: {
url: traceGraphRag,
method: 'get',
},
runRaptor: {
url: runRaptor,
method: 'post',
},
traceRaptor: {
url: traceRaptor,
method: 'get',
},
};
const kbService = registerServer<keyof typeof methods>(methods, request);

View File

@ -50,6 +50,10 @@ export default {
fetchDataPipelineLog: `${api_host}/kb/list_pipeline_logs`,
get_pipeline_detail: `${api_host}/kb/pipeline_log_detail`,
fetchPipelineDatasetLogs: `${api_host}/kb/list_pipeline_dataset_logs`,
runGraphRag: `${api_host}/kb/run_graphrag`,
traceGraphRag: `${api_host}/kb/trace_graphrag`,
runRaptor: `${api_host}/kb/run_raptor`,
traceRaptor: `${api_host}/kb/trace_raptor`,
// tags
listTag: (knowledgeId: string) => `${api_host}/kb/${knowledgeId}/tags`,

View File

@ -0,0 +1,75 @@
import { BaseNode } from '@/interfaces/database/agent';
import { Edge } from '@xyflow/react';
import { isEmpty } from 'lodash';
import { ComponentType, ReactNode } from 'react';
export function filterAllUpstreamNodeIds(edges: Edge[], nodeIds: string[]) {
return nodeIds.reduce<string[]>((pre, nodeId) => {
const currentEdges = edges.filter((x) => x.target === nodeId);
const upstreamNodeIds: string[] = currentEdges.map((x) => x.source);
const ids = upstreamNodeIds.concat(
filterAllUpstreamNodeIds(edges, upstreamNodeIds),
);
ids.forEach((x) => {
if (pre.every((y) => y !== x)) {
pre.push(x);
}
});
return pre;
}, []);
}
export function buildOutputOptions(
outputs: Record<string, any> = {},
nodeId?: string,
parentLabel?: string | ReactNode,
icon?: ReactNode,
) {
return Object.keys(outputs).map((x) => ({
label: x,
value: `${nodeId}@${x}`,
parentLabel,
icon,
type: outputs[x]?.type,
}));
}
export function buildNodeOutputOptions({
nodes,
edges,
nodeId,
Icon,
}: {
nodes: BaseNode[];
edges: Edge[];
nodeId?: string;
Icon: ComponentType<{ name: string }>;
}) {
if (!nodeId) {
return [];
}
const upstreamIds = filterAllUpstreamNodeIds(edges, [nodeId]);
const nodeWithOutputList = nodes.filter(
(x) =>
upstreamIds.some((y) => y === x.id) && !isEmpty(x.data?.form?.outputs),
);
return nodeWithOutputList
.filter((x) => x.id !== nodeId)
.map((x) => ({
label: x.data.name,
value: x.id,
title: x.data.name,
options: buildOutputOptions(
x.data.form.outputs,
x.id,
x.data.name,
<Icon name={x.data.name} />,
),
}));
}

View File

@ -1,5 +1,4 @@
import dayjs from 'dayjs';
import { toFixed } from './common-util';
export function formatDate(date: any) {
if (!date) {
@ -52,12 +51,13 @@ export function formatSecondsToHumanReadable(seconds: number): string {
const h = Math.floor(seconds / 3600);
const m = Math.floor((seconds % 3600) / 60);
const s = toFixed(seconds % 60, 3);
// const s = toFixed(seconds % 60, 3);
const s = seconds % 60;
const formattedSeconds = s === 0 ? '0' : s.toFixed(3).replace(/\.?0+$/, '');
const parts = [];
if (h > 0) parts.push(`${h}h`);
if (m > 0) parts.push(`${m}m`);
if (s || parts.length === 0) parts.push(`${s}s`);
if (h > 0) parts.push(`${h}h `);
if (m > 0) parts.push(`${m}m `);
if (s || parts.length === 0) parts.push(`${formattedSeconds}s`);
return parts.join('');
}