mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-06 18:45:08 +08:00
Compare commits
2 Commits
664bc0b961
...
8bc8126848
| Author | SHA1 | Date | |
|---|---|---|---|
| 8bc8126848 | |||
| 71f69cdb75 |
@ -19,17 +19,19 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
|
import flask
|
||||||
import trio
|
import trio
|
||||||
from flask import request, Response
|
from flask import request, Response
|
||||||
from flask_login import login_required, current_user
|
from flask_login import login_required, current_user
|
||||||
|
|
||||||
from agent.component import LLM
|
from agent.component import LLM
|
||||||
|
from api import settings
|
||||||
from api.db import CanvasCategory, FileType
|
from api.db import CanvasCategory, FileType
|
||||||
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService, API4ConversationService
|
from api.db.services.canvas_service import CanvasTemplateService, UserCanvasService, API4ConversationService
|
||||||
from api.db.services.document_service import DocumentService
|
from api.db.services.document_service import DocumentService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
|
from api.db.services.pipeline_operation_log_service import PipelineOperationLogService
|
||||||
from api.db.services.task_service import queue_dataflow, CANVAS_DEBUG_DOC_ID
|
from api.db.services.task_service import queue_dataflow, CANVAS_DEBUG_DOC_ID, TaskService
|
||||||
from api.db.services.user_service import TenantService
|
from api.db.services.user_service import TenantService
|
||||||
from api.db.services.user_canvas_version import UserCanvasVersionService
|
from api.db.services.user_canvas_version import UserCanvasVersionService
|
||||||
from api.settings import RetCode
|
from api.settings import RetCode
|
||||||
@ -37,11 +39,12 @@ from api.utils import get_uuid
|
|||||||
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
||||||
from agent.canvas import Canvas
|
from agent.canvas import Canvas
|
||||||
from peewee import MySQLDatabase, PostgresqlDatabase
|
from peewee import MySQLDatabase, PostgresqlDatabase
|
||||||
from api.db.db_models import APIToken
|
from api.db.db_models import APIToken, Task
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from api.utils.file_utils import filename_type, read_potential_broken_pdf
|
from api.utils.file_utils import filename_type, read_potential_broken_pdf
|
||||||
from rag.flow.pipeline import Pipeline
|
from rag.flow.pipeline import Pipeline
|
||||||
|
from rag.nlp import search
|
||||||
from rag.utils.redis_conn import REDIS_CONN
|
from rag.utils.redis_conn import REDIS_CONN
|
||||||
|
|
||||||
|
|
||||||
@ -189,6 +192,15 @@ def rerun():
|
|||||||
if 0 < doc["progress"] < 1:
|
if 0 < doc["progress"] < 1:
|
||||||
return get_data_error_result(message=f"`{doc['name']}` is processing...")
|
return get_data_error_result(message=f"`{doc['name']}` is processing...")
|
||||||
|
|
||||||
|
if settings.docStoreConn.indexExist(search.index_name(current_user.id), doc["kb_id"]):
|
||||||
|
settings.docStoreConn.delete({"doc_id": doc["id"]}, search.index_name(current_user.id), doc["kb_id"])
|
||||||
|
doc["progress_msg"] = ""
|
||||||
|
doc["chunk_num"] = 0
|
||||||
|
doc["token_num"] = 0
|
||||||
|
DocumentService.clear_chunk_num_when_rerun(doc["id"])
|
||||||
|
DocumentService.update_by_id(id, doc)
|
||||||
|
TaskService.filter_delete([Task.doc_id == id])
|
||||||
|
|
||||||
dsl = req["dsl"]
|
dsl = req["dsl"]
|
||||||
dsl["path"] = [req["component_id"]]
|
dsl["path"] = [req["component_id"]]
|
||||||
PipelineOperationLogService.update_by_id(req["id"], {"dsl": dsl})
|
PipelineOperationLogService.update_by_id(req["id"], {"dsl": dsl})
|
||||||
@ -420,8 +432,8 @@ def getversion( version_id):
|
|||||||
@login_required
|
@login_required
|
||||||
def list_canvas():
|
def list_canvas():
|
||||||
keywords = request.args.get("keywords", "")
|
keywords = request.args.get("keywords", "")
|
||||||
page_number = int(request.args.get("page", 1))
|
page_number = int(request.args.get("page", 0))
|
||||||
items_per_page = int(request.args.get("page_size", 150))
|
items_per_page = int(request.args.get("page_size", 0))
|
||||||
orderby = request.args.get("orderby", "create_time")
|
orderby = request.args.get("orderby", "create_time")
|
||||||
canvas_category = request.args.get("canvas_category")
|
canvas_category = request.args.get("canvas_category")
|
||||||
if request.args.get("desc", "true").lower() == "false":
|
if request.args.get("desc", "true").lower() == "false":
|
||||||
@ -429,9 +441,12 @@ def list_canvas():
|
|||||||
else:
|
else:
|
||||||
desc = True
|
desc = True
|
||||||
owner_ids = request.args.get("owner_ids", [])
|
owner_ids = request.args.get("owner_ids", [])
|
||||||
|
if owner_ids and isinstance(owner_ids, str):
|
||||||
|
owner_ids = [owner_ids]
|
||||||
if not owner_ids:
|
if not owner_ids:
|
||||||
tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
|
tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
|
||||||
tenants = [m["tenant_id"] for m in tenants]
|
tenants = [m["tenant_id"] for m in tenants]
|
||||||
|
tenants.append(current_user.id)
|
||||||
canvas, total = UserCanvasService.get_by_tenant_ids(
|
canvas, total = UserCanvasService.get_by_tenant_ids(
|
||||||
tenants, current_user.id, page_number,
|
tenants, current_user.id, page_number,
|
||||||
items_per_page, orderby, desc, keywords, canvas_category)
|
items_per_page, orderby, desc, keywords, canvas_category)
|
||||||
@ -525,3 +540,11 @@ def prompts():
|
|||||||
#"context_ranking": RANK_MEMORY,
|
#"context_ranking": RANK_MEMORY,
|
||||||
"citation_guidelines": CITATION_PROMPT_TEMPLATE
|
"citation_guidelines": CITATION_PROMPT_TEMPLATE
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route('/download', methods=['GET']) # noqa: F821
|
||||||
|
def download():
|
||||||
|
id = request.args.get("id")
|
||||||
|
created_by = request.args.get("created_by")
|
||||||
|
blob = FileService.get_blob(created_by, id)
|
||||||
|
return flask.make_response(blob)
|
||||||
@ -68,7 +68,34 @@ def create():
|
|||||||
e, t = TenantService.get_by_id(current_user.id)
|
e, t = TenantService.get_by_id(current_user.id)
|
||||||
if not e:
|
if not e:
|
||||||
return get_data_error_result(message="Tenant not found.")
|
return get_data_error_result(message="Tenant not found.")
|
||||||
#req["embd_id"] = t.embd_id
|
req["parser_config"] = {
|
||||||
|
"layout_recognize": "DeepDOC",
|
||||||
|
"chunk_token_num": 512,
|
||||||
|
"delimiter": "\n",
|
||||||
|
"auto_keywords": 0,
|
||||||
|
"auto_questions": 0,
|
||||||
|
"html4excel": False,
|
||||||
|
"topn_tags": 3,
|
||||||
|
"raptor": {
|
||||||
|
"use_raptor": True,
|
||||||
|
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
|
||||||
|
"max_token": 256,
|
||||||
|
"threshold": 0.1,
|
||||||
|
"max_cluster": 64,
|
||||||
|
"random_seed": 0
|
||||||
|
},
|
||||||
|
"graphrag": {
|
||||||
|
"use_graphrag": True,
|
||||||
|
"entity_types": [
|
||||||
|
"organization",
|
||||||
|
"person",
|
||||||
|
"geo",
|
||||||
|
"event",
|
||||||
|
"category"
|
||||||
|
],
|
||||||
|
"method": "light"
|
||||||
|
}
|
||||||
|
}
|
||||||
if not KnowledgebaseService.save(**req):
|
if not KnowledgebaseService.save(**req):
|
||||||
return get_data_error_result()
|
return get_data_error_result()
|
||||||
return get_json_result(data={"kb_id": req["id"]})
|
return get_json_result(data={"kb_id": req["id"]})
|
||||||
@ -729,19 +756,21 @@ def delete_kb_task():
|
|||||||
if not pipeline_task_type or pipeline_task_type not in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP]:
|
if not pipeline_task_type or pipeline_task_type not in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP]:
|
||||||
return get_error_data_result(message="Invalid task type")
|
return get_error_data_result(message="Invalid task type")
|
||||||
|
|
||||||
kb_task_id = ""
|
|
||||||
match pipeline_task_type:
|
match pipeline_task_type:
|
||||||
case PipelineTaskType.GRAPH_RAG:
|
case PipelineTaskType.GRAPH_RAG:
|
||||||
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
|
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
|
||||||
kb_task_id = "graphrag_task_id"
|
kb_task_id = "graphrag_task_id"
|
||||||
|
kb_task_finish_at = "graphrag_task_finish_at"
|
||||||
case PipelineTaskType.RAPTOR:
|
case PipelineTaskType.RAPTOR:
|
||||||
kb_task_id = "raptor_task_id"
|
kb_task_id = "raptor_task_id"
|
||||||
|
kb_task_finish_at = "raptor_task_finish_at"
|
||||||
case PipelineTaskType.MINDMAP:
|
case PipelineTaskType.MINDMAP:
|
||||||
kb_task_id = "mindmap_task_id"
|
kb_task_id = "mindmap_task_id"
|
||||||
|
kb_task_finish_at = "mindmap_task_finish_at"
|
||||||
case _:
|
case _:
|
||||||
return get_error_data_result(message="Internal Error: Invalid task type")
|
return get_error_data_result(message="Internal Error: Invalid task type")
|
||||||
|
|
||||||
ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id: ""})
|
ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id: "", kb_task_finish_at: None})
|
||||||
if not ok:
|
if not ok:
|
||||||
return server_error_response(f"Internal error: cannot delete task {pipeline_task_type}")
|
return server_error_response(f"Internal error: cannot delete task {pipeline_task_type}")
|
||||||
|
|
||||||
|
|||||||
@ -18,7 +18,7 @@ import logging
|
|||||||
import time
|
import time
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from agent.canvas import Canvas
|
from agent.canvas import Canvas
|
||||||
from api.db import CanvasCategory, TenantPermission
|
from api.db import CanvasCategory
|
||||||
from api.db.db_models import DB, CanvasTemplate, User, UserCanvas, API4Conversation
|
from api.db.db_models import DB, CanvasTemplate, User, UserCanvas, API4Conversation
|
||||||
from api.db.services.api_service import API4ConversationService
|
from api.db.services.api_service import API4ConversationService
|
||||||
from api.db.services.common_service import CommonService
|
from api.db.services.common_service import CommonService
|
||||||
@ -104,6 +104,7 @@ class UserCanvasService(CommonService):
|
|||||||
cls.model.dsl,
|
cls.model.dsl,
|
||||||
cls.model.description,
|
cls.model.description,
|
||||||
cls.model.permission,
|
cls.model.permission,
|
||||||
|
cls.model.user_id.alias("tenant_id"),
|
||||||
User.nickname,
|
User.nickname,
|
||||||
User.avatar.alias('tenant_avatar'),
|
User.avatar.alias('tenant_avatar'),
|
||||||
cls.model.update_time,
|
cls.model.update_time,
|
||||||
@ -111,16 +112,15 @@ class UserCanvasService(CommonService):
|
|||||||
]
|
]
|
||||||
if keywords:
|
if keywords:
|
||||||
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||||
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
cls.model.user_id.in_(joined_tenant_ids),
|
||||||
TenantPermission.TEAM.value)) | (
|
fn.LOWER(cls.model.title).contains(keywords.lower())
|
||||||
cls.model.user_id == user_id)),
|
#(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id)),
|
||||||
(fn.LOWER(cls.model.title).contains(keywords.lower()))
|
#(fn.LOWER(cls.model.title).contains(keywords.lower()))
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
agents = cls.model.select(*fields).join(User, on=(cls.model.user_id == User.id)).where(
|
||||||
((cls.model.user_id.in_(joined_tenant_ids) & (cls.model.permission ==
|
cls.model.user_id.in_(joined_tenant_ids)
|
||||||
TenantPermission.TEAM.value)) | (
|
#(((cls.model.user_id.in_(joined_tenant_ids)) & (cls.model.permission == TenantPermission.TEAM.value)) | (cls.model.user_id == user_id))
|
||||||
cls.model.user_id == user_id))
|
|
||||||
)
|
)
|
||||||
if canvas_category:
|
if canvas_category:
|
||||||
agents = agents.where(cls.model.canvas_category == canvas_category)
|
agents = agents.where(cls.model.canvas_category == canvas_category)
|
||||||
@ -128,7 +128,9 @@ class UserCanvasService(CommonService):
|
|||||||
agents = agents.order_by(cls.model.getter_by(orderby).desc())
|
agents = agents.order_by(cls.model.getter_by(orderby).desc())
|
||||||
else:
|
else:
|
||||||
agents = agents.order_by(cls.model.getter_by(orderby).asc())
|
agents = agents.order_by(cls.model.getter_by(orderby).asc())
|
||||||
|
|
||||||
count = agents.count()
|
count = agents.count()
|
||||||
|
if page_number and items_per_page:
|
||||||
agents = agents.paginate(page_number, items_per_page)
|
agents = agents.paginate(page_number, items_per_page)
|
||||||
return list(agents.dicts()), count
|
return list(agents.dicts()), count
|
||||||
|
|
||||||
|
|||||||
@ -29,7 +29,8 @@ from peewee import fn, Case, JOIN
|
|||||||
from api import settings
|
from api import settings
|
||||||
from api.constants import IMG_BASE64_PREFIX, FILE_NAME_LEN_LIMIT
|
from api.constants import IMG_BASE64_PREFIX, FILE_NAME_LEN_LIMIT
|
||||||
from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus, UserTenantRole, CanvasCategory
|
from api.db import FileType, LLMType, ParserType, StatusEnum, TaskStatus, UserTenantRole, CanvasCategory
|
||||||
from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant, File2Document, File, UserCanvas
|
from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTenant, File2Document, File, UserCanvas, \
|
||||||
|
User
|
||||||
from api.db.db_utils import bulk_insert_into_db
|
from api.db.db_utils import bulk_insert_into_db
|
||||||
from api.db.services.common_service import CommonService
|
from api.db.services.common_service import CommonService
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
@ -121,19 +122,21 @@ class DocumentService(CommonService):
|
|||||||
orderby, desc, keywords, run_status, types, suffix):
|
orderby, desc, keywords, run_status, types, suffix):
|
||||||
fields = cls.get_cls_model_fields()
|
fields = cls.get_cls_model_fields()
|
||||||
if keywords:
|
if keywords:
|
||||||
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name")])\
|
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\
|
||||||
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
|
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
|
||||||
.join(File, on=(File.id == File2Document.file_id))\
|
.join(File, on=(File.id == File2Document.file_id))\
|
||||||
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
|
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
|
.join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
.where(
|
.where(
|
||||||
(cls.model.kb_id == kb_id),
|
(cls.model.kb_id == kb_id),
|
||||||
(fn.LOWER(cls.model.name).contains(keywords.lower()))
|
(fn.LOWER(cls.model.name).contains(keywords.lower()))
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name")])\
|
docs = cls.model.select(*[*fields, UserCanvas.title.alias("pipeline_name"), User.nickname])\
|
||||||
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
|
.join(File2Document, on=(File2Document.document_id == cls.model.id))\
|
||||||
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
|
.join(UserCanvas, on=(cls.model.pipeline_id == UserCanvas.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
.join(File, on=(File.id == File2Document.file_id))\
|
.join(File, on=(File.id == File2Document.file_id))\
|
||||||
|
.join(User, on=(cls.model.created_by == User.id), join_type=JOIN.LEFT_OUTER)\
|
||||||
.where(cls.model.kb_id == kb_id)
|
.where(cls.model.kb_id == kb_id)
|
||||||
|
|
||||||
if run_status:
|
if run_status:
|
||||||
|
|||||||
@ -123,7 +123,7 @@ class PipelineOperationLogService(CommonService):
|
|||||||
raise RuntimeError(f"Cannot find knowledge base {document.kb_id} for referred_document {referred_document_id}")
|
raise RuntimeError(f"Cannot find knowledge base {document.kb_id} for referred_document {referred_document_id}")
|
||||||
|
|
||||||
tenant_id = kb_info.tenant_id
|
tenant_id = kb_info.tenant_id
|
||||||
title = document.name
|
title = document.parser_id
|
||||||
avatar = document.thumbnail
|
avatar = document.thumbnail
|
||||||
|
|
||||||
if task_type not in VALID_PIPELINE_TASK_TYPES:
|
if task_type not in VALID_PIPELINE_TASK_TYPES:
|
||||||
@ -228,14 +228,12 @@ class PipelineOperationLogService(CommonService):
|
|||||||
@classmethod
|
@classmethod
|
||||||
@DB.connection_context()
|
@DB.connection_context()
|
||||||
def get_documents_info(cls, id):
|
def get_documents_info(cls, id):
|
||||||
fields = [Document.id, Document.name, Document.progress]
|
fields = [Document.id, Document.name, Document.progress, Document.kb_id]
|
||||||
return (
|
return (
|
||||||
cls.model.select(*fields)
|
cls.model.select(*fields)
|
||||||
.join(Document, on=(cls.model.document_id == Document.id))
|
.join(Document, on=(cls.model.document_id == Document.id))
|
||||||
.where(
|
.where(
|
||||||
cls.model.id == id,
|
cls.model.id == id
|
||||||
Document.progress > 0,
|
|
||||||
Document.progress < 1,
|
|
||||||
)
|
)
|
||||||
.dicts()
|
.dicts()
|
||||||
)
|
)
|
||||||
|
|||||||
@ -358,7 +358,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
|||||||
page_size = doc["parser_config"].get("task_page_size") or 12
|
page_size = doc["parser_config"].get("task_page_size") or 12
|
||||||
if doc["parser_id"] == "paper":
|
if doc["parser_id"] == "paper":
|
||||||
page_size = doc["parser_config"].get("task_page_size") or 22
|
page_size = doc["parser_config"].get("task_page_size") or 22
|
||||||
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC":
|
if doc["parser_id"] in ["one", "knowledge_graph"] or do_layout != "DeepDOC" or doc["parser_config"].get("toc", True):
|
||||||
page_size = 10 ** 9
|
page_size = 10 ** 9
|
||||||
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
|
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
|
||||||
for s, e in page_ranges:
|
for s, e in page_ranges:
|
||||||
@ -505,7 +505,6 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
|
|||||||
|
|
||||||
task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)
|
task["kb_id"] = DocumentService.get_knowledgebase_id(doc_id)
|
||||||
task["tenant_id"] = tenant_id
|
task["tenant_id"] = tenant_id
|
||||||
task["task_type"] = "dataflow"
|
|
||||||
task["dataflow_id"] = flow_id
|
task["dataflow_id"] = flow_id
|
||||||
task["file"] = file
|
task["file"] = file
|
||||||
|
|
||||||
|
|||||||
@ -12,7 +12,7 @@
|
|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import json
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@ -68,9 +68,10 @@ class HierarchicalMerger(ProcessBase):
|
|||||||
|
|
||||||
lines = [ln for ln in payload.split("\n") if ln]
|
lines = [ln for ln in payload.split("\n") if ln]
|
||||||
else:
|
else:
|
||||||
lines = [o.get("text", "") for o in from_upstream.json_result]
|
arr = from_upstream.chunks if from_upstream.output_format == "chunks" else from_upstream.json_result
|
||||||
|
lines = [o.get("text", "") for o in arr]
|
||||||
sections, section_images = [], []
|
sections, section_images = [], []
|
||||||
for o in from_upstream.json_result or []:
|
for o in arr or []:
|
||||||
sections.append((o.get("text", ""), o.get("position_tag", "")))
|
sections.append((o.get("text", ""), o.get("position_tag", "")))
|
||||||
section_images.append(o.get("img_id"))
|
section_images.append(o.get("img_id"))
|
||||||
|
|
||||||
@ -128,21 +129,26 @@ class HierarchicalMerger(ProcessBase):
|
|||||||
all_pathes = []
|
all_pathes = []
|
||||||
def dfs(n, path, depth):
|
def dfs(n, path, depth):
|
||||||
nonlocal all_pathes
|
nonlocal all_pathes
|
||||||
if depth < self._param.hierarchy:
|
if not n["children"] and path:
|
||||||
path = deepcopy(path)
|
all_pathes.append(path)
|
||||||
|
|
||||||
for nn in n["children"]:
|
for nn in n["children"]:
|
||||||
path.extend([nn["index"], *nn["texts"]])
|
if depth < self._param.hierarchy:
|
||||||
dfs(nn, path, depth+1)
|
_path = deepcopy(path)
|
||||||
|
else:
|
||||||
|
_path = path
|
||||||
|
_path.extend([nn["index"], *nn["texts"]])
|
||||||
|
dfs(nn, _path, depth+1)
|
||||||
|
|
||||||
if depth == self._param.hierarchy:
|
if depth == self._param.hierarchy:
|
||||||
all_pathes.append(path)
|
all_pathes.append(_path)
|
||||||
|
|
||||||
for i in range(len(lines)):
|
for i in range(len(lines)):
|
||||||
print(i, lines[i])
|
print(i, lines[i])
|
||||||
dfs(root, [], 0)
|
dfs(root, [], 0)
|
||||||
print("sSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS", json.dumps(root, ensure_ascii=False, indent=2))
|
|
||||||
|
|
||||||
|
if root["texts"]:
|
||||||
|
all_pathes.insert(0, root["texts"])
|
||||||
if from_upstream.output_format in ["markdown", "text", "html"]:
|
if from_upstream.output_format in ["markdown", "text", "html"]:
|
||||||
cks = []
|
cks = []
|
||||||
for path in all_pathes:
|
for path in all_pathes:
|
||||||
@ -161,7 +167,7 @@ class HierarchicalMerger(ProcessBase):
|
|||||||
for i in path:
|
for i in path:
|
||||||
txt += lines[i] + "\n"
|
txt += lines[i] + "\n"
|
||||||
concat_img(img, id2image(section_images[i], partial(STORAGE_IMPL.get)))
|
concat_img(img, id2image(section_images[i], partial(STORAGE_IMPL.get)))
|
||||||
cks.append(cks)
|
cks.append(txt)
|
||||||
images.append(img)
|
images.append(img)
|
||||||
|
|
||||||
cks = [
|
cks = [
|
||||||
@ -175,5 +181,6 @@ class HierarchicalMerger(ProcessBase):
|
|||||||
async with trio.open_nursery() as nursery:
|
async with trio.open_nursery() as nursery:
|
||||||
for d in cks:
|
for d in cks:
|
||||||
nursery.start_soon(image2id, d, partial(STORAGE_IMPL.put), get_uuid())
|
nursery.start_soon(image2id, d, partial(STORAGE_IMPL.put), get_uuid())
|
||||||
|
self.set_output("chunks", cks)
|
||||||
|
|
||||||
self.callback(1, "Done.")
|
self.callback(1, "Done.")
|
||||||
|
|||||||
@ -235,8 +235,8 @@ class Parser(ProcessBase):
|
|||||||
self.set_output("output_format", conf["output_format"])
|
self.set_output("output_format", conf["output_format"])
|
||||||
spreadsheet_parser = ExcelParser()
|
spreadsheet_parser = ExcelParser()
|
||||||
if conf.get("output_format") == "html":
|
if conf.get("output_format") == "html":
|
||||||
html = spreadsheet_parser.html(blob, 1000000000)
|
htmls = spreadsheet_parser.html(blob, 1000000000)
|
||||||
self.set_output("html", html)
|
self.set_output("html", htmls[0])
|
||||||
elif conf.get("output_format") == "json":
|
elif conf.get("output_format") == "json":
|
||||||
self.set_output("json", [{"text": txt} for txt in spreadsheet_parser(blob) if txt])
|
self.set_output("json", [{"text": txt} for txt in spreadsheet_parser(blob) if txt])
|
||||||
elif conf.get("output_format") == "markdown":
|
elif conf.get("output_format") == "markdown":
|
||||||
|
|||||||
@ -75,7 +75,6 @@ class Pipeline(Graph):
|
|||||||
"trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": 0}],
|
"trace": [{"progress": progress, "message": message, "datetime": datetime.datetime.now().strftime("%H:%M:%S"), "timestamp": timestamp, "elapsed_time": 0}],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
REDIS_CONN.set_obj(log_key, obj, 60 * 30)
|
|
||||||
if component_name != "END" and self._doc_id and self.task_id:
|
if component_name != "END" and self._doc_id and self.task_id:
|
||||||
percentage = 1.0 / len(self.components.items())
|
percentage = 1.0 / len(self.components.items())
|
||||||
finished = 0.0
|
finished = 0.0
|
||||||
@ -94,6 +93,10 @@ class Pipeline(Graph):
|
|||||||
t = obj[-1]["trace"][-1]
|
t = obj[-1]["trace"][-1]
|
||||||
msg += "%s: %s\n" % (t["datetime"], t["message"])
|
msg += "%s: %s\n" % (t["datetime"], t["message"])
|
||||||
TaskService.update_progress(self.task_id, {"progress": finished, "progress_msg": msg})
|
TaskService.update_progress(self.task_id, {"progress": finished, "progress_msg": msg})
|
||||||
|
elif component_name == "END" and not self._doc_id:
|
||||||
|
obj[-1]["trace"][-1]["dsl"] = json.loads(str(self))
|
||||||
|
REDIS_CONN.set_obj(log_key, obj, 60 * 30)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.exception(e)
|
logging.exception(e)
|
||||||
|
|
||||||
|
|||||||
@ -102,7 +102,7 @@ class Splitter(ProcessBase):
|
|||||||
"image": img,
|
"image": img,
|
||||||
"positions": [[pos[0][-1], *pos[1:]] for pos in RAGFlowPdfParser.extract_positions(c)],
|
"positions": [[pos[0][-1], *pos[1:]] for pos in RAGFlowPdfParser.extract_positions(c)],
|
||||||
}
|
}
|
||||||
for c, img in zip(chunks, images)
|
for c, img in zip(chunks, images) if c.strip()
|
||||||
]
|
]
|
||||||
async with trio.open_nursery() as nursery:
|
async with trio.open_nursery() as nursery:
|
||||||
for d in cks:
|
for d in cks:
|
||||||
|
|||||||
@ -40,12 +40,14 @@ class TokenizerFromUpstream(BaseModel):
|
|||||||
if self.chunks:
|
if self.chunks:
|
||||||
return self
|
return self
|
||||||
|
|
||||||
if self.output_format in {"markdown", "text"}:
|
if self.output_format in {"markdown", "text", "html"}:
|
||||||
if self.output_format == "markdown" and not self.markdown_result:
|
if self.output_format == "markdown" and not self.markdown_result:
|
||||||
raise ValueError("output_format=markdown requires a markdown payload (field: 'markdown' or 'markdown_result').")
|
raise ValueError("output_format=markdown requires a markdown payload (field: 'markdown' or 'markdown_result').")
|
||||||
if self.output_format == "text" and not self.text_result:
|
if self.output_format == "text" and not self.text_result:
|
||||||
raise ValueError("output_format=text requires a text payload (field: 'text' or 'text_result').")
|
raise ValueError("output_format=text requires a text payload (field: 'text' or 'text_result').")
|
||||||
|
if self.output_format == "html" and not self.html_result:
|
||||||
|
raise ValueError("output_format=text requires a html payload (field: 'html' or 'html_result').")
|
||||||
else:
|
else:
|
||||||
if not self.json_result:
|
if not self.json_result and not self.chunks:
|
||||||
raise ValueError("When no chunks are provided and output_format is not markdown/text, a JSON list payload is required (field: 'json' or 'json_result').")
|
raise ValueError("When no chunks are provided and output_format is not markdown/text, a JSON list payload is required (field: 'json' or 'json_result').")
|
||||||
return self
|
return self
|
||||||
|
|||||||
@ -137,7 +137,7 @@ class Tokenizer(ProcessBase):
|
|||||||
payload = from_upstream.markdown_result
|
payload = from_upstream.markdown_result
|
||||||
elif from_upstream.output_format == "text":
|
elif from_upstream.output_format == "text":
|
||||||
payload = from_upstream.text_result
|
payload = from_upstream.text_result
|
||||||
else: # == "html"
|
else:
|
||||||
payload = from_upstream.html_result
|
payload = from_upstream.html_result
|
||||||
|
|
||||||
if not payload:
|
if not payload:
|
||||||
|
|||||||
@ -245,7 +245,7 @@ async def collect():
|
|||||||
|
|
||||||
task_type = msg.get("task_type", "")
|
task_type = msg.get("task_type", "")
|
||||||
task["task_type"] = task_type
|
task["task_type"] = task_type
|
||||||
if task_type == "dataflow":
|
if task_type[:8] == "dataflow":
|
||||||
task["tenant_id"] = msg["tenant_id"]
|
task["tenant_id"] = msg["tenant_id"]
|
||||||
task["dataflow_id"] = msg["dataflow_id"]
|
task["dataflow_id"] = msg["dataflow_id"]
|
||||||
task["kb_id"] = msg.get("kb_id", "")
|
task["kb_id"] = msg.get("kb_id", "")
|
||||||
@ -491,6 +491,7 @@ async def run_dataflow(task: dict):
|
|||||||
e, pipeline_log = PipelineOperationLogService.get_by_id(dataflow_id)
|
e, pipeline_log = PipelineOperationLogService.get_by_id(dataflow_id)
|
||||||
assert e, "Pipeline log not found."
|
assert e, "Pipeline log not found."
|
||||||
dsl = pipeline_log.dsl
|
dsl = pipeline_log.dsl
|
||||||
|
dataflow_id = pipeline_log.pipeline_id
|
||||||
pipeline = Pipeline(dsl, tenant_id=task["tenant_id"], doc_id=doc_id, task_id=task_id, flow_id=dataflow_id)
|
pipeline = Pipeline(dsl, tenant_id=task["tenant_id"], doc_id=doc_id, task_id=task_id, flow_id=dataflow_id)
|
||||||
chunks = await pipeline.run(file=task["file"]) if task.get("file") else await pipeline.run()
|
chunks = await pipeline.run(file=task["file"]) if task.get("file") else await pipeline.run()
|
||||||
if doc_id == CANVAS_DEBUG_DOC_ID:
|
if doc_id == CANVAS_DEBUG_DOC_ID:
|
||||||
@ -652,7 +653,7 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si
|
|||||||
raptor_config["threshold"],
|
raptor_config["threshold"],
|
||||||
)
|
)
|
||||||
original_length = len(chunks)
|
original_length = len(chunks)
|
||||||
chunks = await raptor(chunks, row["parser_config"]["raptor"]["random_seed"], callback)
|
chunks = await raptor(chunks, row["kb_parser_config"]["raptor"]["random_seed"], callback)
|
||||||
doc = {
|
doc = {
|
||||||
"doc_id": fake_doc_id,
|
"doc_id": fake_doc_id,
|
||||||
"kb_id": [str(row["kb_id"])],
|
"kb_id": [str(row["kb_id"])],
|
||||||
|
|||||||
@ -52,6 +52,12 @@
|
|||||||
'<symbol id="icon-system" viewBox="0 0 1024 1024"><path d="M764.416 97.472H259.648a106.752 106.752 0 0 0-106.752 106.752v664a54.72 54.72 0 0 0 78.08 49.536l235.104-111.008a106.688 106.688 0 0 1 91.04-0.032l235.904 111.104a54.72 54.72 0 0 0 78.048-49.536V204.224a106.656 106.656 0 0 0-106.656-106.752zM671.68 577.856h-319.36a38.144 38.144 0 1 1 0-76.288h319.36a38.144 38.144 0 1 1 0 76.288z m0-174.144h-319.36a38.144 38.144 0 1 1 0-76.288h319.36a38.144 38.144 0 0 1 0 76.288z" fill="#2595E8" ></path><path d="M871.104 469.824v-265.6a106.752 106.752 0 0 0-106.752-106.752H259.648a106.752 106.752 0 0 0-106.752 106.752V838.4a635.296 635.296 0 0 0 229.824 7.68l83.36-39.36a106.784 106.784 0 0 1 74.88-5.952 635.84 635.84 0 0 0 330.144-330.944z m-199.424 108.032h-319.36a38.144 38.144 0 1 1 0-76.288h319.36a38.144 38.144 0 1 1 0 76.288z m0-174.144h-319.36a38.144 38.144 0 1 1 0-76.288h319.36a38.144 38.144 0 0 1 0 76.288z" fill="#3A9CED" ></path><path d="M362.592 577.856h-10.272a38.144 38.144 0 0 1 0-76.288h142.848a637.12 637.12 0 0 0 103.808-97.856H352.32a38.144 38.144 0 1 1 0-76.288h302.208a630.496 630.496 0 0 0 86.272-229.888H259.648a106.752 106.752 0 0 0-106.752 106.752v422.496a631.072 631.072 0 0 0 209.696-48.96z" fill="#59ADF8" ></path><path d="M498.496 97.472H259.648a106.752 106.752 0 0 0-106.752 106.752v168.064a635.488 635.488 0 0 0 345.6-274.816z" fill="#6BC2FC" ></path></symbol>' +
|
'<symbol id="icon-system" viewBox="0 0 1024 1024"><path d="M764.416 97.472H259.648a106.752 106.752 0 0 0-106.752 106.752v664a54.72 54.72 0 0 0 78.08 49.536l235.104-111.008a106.688 106.688 0 0 1 91.04-0.032l235.904 111.104a54.72 54.72 0 0 0 78.048-49.536V204.224a106.656 106.656 0 0 0-106.656-106.752zM671.68 577.856h-319.36a38.144 38.144 0 1 1 0-76.288h319.36a38.144 38.144 0 1 1 0 76.288z m0-174.144h-319.36a38.144 38.144 0 1 1 0-76.288h319.36a38.144 38.144 0 0 1 0 76.288z" fill="#2595E8" ></path><path d="M871.104 469.824v-265.6a106.752 106.752 0 0 0-106.752-106.752H259.648a106.752 106.752 0 0 0-106.752 106.752V838.4a635.296 635.296 0 0 0 229.824 7.68l83.36-39.36a106.784 106.784 0 0 1 74.88-5.952 635.84 635.84 0 0 0 330.144-330.944z m-199.424 108.032h-319.36a38.144 38.144 0 1 1 0-76.288h319.36a38.144 38.144 0 1 1 0 76.288z m0-174.144h-319.36a38.144 38.144 0 1 1 0-76.288h319.36a38.144 38.144 0 0 1 0 76.288z" fill="#3A9CED" ></path><path d="M362.592 577.856h-10.272a38.144 38.144 0 0 1 0-76.288h142.848a637.12 637.12 0 0 0 103.808-97.856H352.32a38.144 38.144 0 1 1 0-76.288h302.208a630.496 630.496 0 0 0 86.272-229.888H259.648a106.752 106.752 0 0 0-106.752 106.752v422.496a631.072 631.072 0 0 0 209.696-48.96z" fill="#59ADF8" ></path><path d="M498.496 97.472H259.648a106.752 106.752 0 0 0-106.752 106.752v168.064a635.488 635.488 0 0 0 345.6-274.816z" fill="#6BC2FC" ></path></symbol>' +
|
||||||
'<symbol id="icon-word" viewBox="0 0 1024 1024"><path d="M894.08 863.616H525.44c-14.336 0-25.92-14.08-25.92-31.36V193.92c0-17.344 11.584-31.424 25.856-31.424h368.64c14.272 0 25.856 14.08 25.856 31.36v638.272c0 17.344-11.584 31.36-25.856 31.36v0.064z" fill="#E8E8E8" ></path><path d="M788.672 353.28H525.44c-14.272 0-25.856-14.08-25.856-31.424s11.584-31.424 25.856-31.424h263.296c14.336 0 25.856 14.08 25.856 31.36 0 16.32-11.52 31.424-25.856 31.424v0.064z m0 127.808H525.44c-14.272 0-25.856-14.08-25.856-31.36 0-17.344 11.584-31.488 25.856-31.488h263.296c14.336 0 25.856 14.08 25.856 31.424 0 17.408-11.52 31.424-25.856 31.424z m0 126.848H525.44c-14.272 0-25.856-14.08-25.856-31.488 0-17.28 11.584-31.36 25.856-31.36h263.296c14.336 0 25.856 14.08 25.856 31.36 0 17.344-11.52 31.424-25.856 31.424z m0 127.872H525.44c-14.272 0-25.856-14.08-25.856-31.488 0-17.28 11.584-31.36 25.856-31.36h263.296c14.336 0 25.856 14.08 25.856 31.36 0 17.344-11.52 31.424-25.856 31.424z" fill="#B2B2B2" ></path><path d="M595.008 1024l-490.88-113.792V113.792L595.008 0z" fill="#0D47A1" ></path><path d="M455.808 707.584h-62.464l-41.152-250.24a236.8 236.8 0 0 1-3.52-43.392h-0.896a413.44 413.44 0 0 1-4.48 43.328l-42.88 250.304H235.328L170.24 317.504h61.568l34.816 260.096c1.792 10.816 2.688 25.984 3.584 44.352h0.896c0-14.08 2.688-29.248 5.376-45.44l44.608-259.008h59.776l41.088 262.208a371.2 371.2 0 0 1 3.584 42.24h0.896c0.896-14.08 1.792-28.16 3.52-43.328l34.816-260.032h56.256l-65.152 388.992z" fill="#FFFFFF" ></path></symbol>' +
|
'<symbol id="icon-word" viewBox="0 0 1024 1024"><path d="M894.08 863.616H525.44c-14.336 0-25.92-14.08-25.92-31.36V193.92c0-17.344 11.584-31.424 25.856-31.424h368.64c14.272 0 25.856 14.08 25.856 31.36v638.272c0 17.344-11.584 31.36-25.856 31.36v0.064z" fill="#E8E8E8" ></path><path d="M788.672 353.28H525.44c-14.272 0-25.856-14.08-25.856-31.424s11.584-31.424 25.856-31.424h263.296c14.336 0 25.856 14.08 25.856 31.36 0 16.32-11.52 31.424-25.856 31.424v0.064z m0 127.808H525.44c-14.272 0-25.856-14.08-25.856-31.36 0-17.344 11.584-31.488 25.856-31.488h263.296c14.336 0 25.856 14.08 25.856 31.424 0 17.408-11.52 31.424-25.856 31.424z m0 126.848H525.44c-14.272 0-25.856-14.08-25.856-31.488 0-17.28 11.584-31.36 25.856-31.36h263.296c14.336 0 25.856 14.08 25.856 31.36 0 17.344-11.52 31.424-25.856 31.424z m0 127.872H525.44c-14.272 0-25.856-14.08-25.856-31.488 0-17.28 11.584-31.36 25.856-31.36h263.296c14.336 0 25.856 14.08 25.856 31.36 0 17.344-11.52 31.424-25.856 31.424z" fill="#B2B2B2" ></path><path d="M595.008 1024l-490.88-113.792V113.792L595.008 0z" fill="#0D47A1" ></path><path d="M455.808 707.584h-62.464l-41.152-250.24a236.8 236.8 0 0 1-3.52-43.392h-0.896a413.44 413.44 0 0 1-4.48 43.328l-42.88 250.304H235.328L170.24 317.504h61.568l34.816 260.096c1.792 10.816 2.688 25.984 3.584 44.352h0.896c0-14.08 2.688-29.248 5.376-45.44l44.608-259.008h59.776l41.088 262.208a371.2 371.2 0 0 1 3.584 42.24h0.896c0.896-14.08 1.792-28.16 3.52-43.328l34.816-260.032h56.256l-65.152 388.992z" fill="#FFFFFF" ></path></symbol>' +
|
||||||
'<symbol id="icon-file" viewBox="0 0 1024 1024"><path d="M181.12 493.664a127.168 127.168 0 0 1 114.56-71.04h548.352v-30.592a99.2 99.2 0 0 0-99.2-99.2H601.6c-23.36 0-45.952-8.256-63.84-23.264l-111.424-93.696a99.328 99.328 0 0 0-63.84-23.232H185.952a99.2 99.2 0 0 0-99.2 99.2v432l94.336-190.176z" fill="#2595E8" ></path><path d="M879.008 487.936H295.68c-23.744 0-45.504 13.44-56.064 34.752l-143.264 288.768a99.2 99.2 0 0 0 89.6 56.8h558.816a99.328 99.328 0 0 0 90.656-58.848l0.096 0.128 100.928-234.24a62.56 62.56 0 0 0-57.44-87.36z m-176.992 309.92h-186.56a37.984 37.984 0 1 1 0-76h186.56a37.984 37.984 0 1 1 0 76z" fill="#2595E8" ></path><path d="M800.352 333.728a633.6 633.6 0 0 0-0.512-24.288 98.752 98.752 0 0 0-55.072-16.672H601.6c-23.36 0-45.952-8.256-63.84-23.264l-111.424-93.632a99.328 99.328 0 0 0-63.84-23.232H185.952a99.2 99.2 0 0 0-99.2 99.2v432l94.336-190.208a127.168 127.168 0 0 1 114.592-71.072h498.24c4.224-28.992 6.432-58.688 6.432-88.832z" fill="#3A9CED" ></path><path d="M295.68 487.936c-23.744 0-45.504 13.44-56.064 34.752l-143.264 288.768a99.2 99.2 0 0 0 89.6 56.8h296.64a609.92 609.92 0 0 0 102.624-70.4H515.52a37.984 37.984 0 1 1 0-76h144.96a606.144 606.144 0 0 0 120.224-233.92H295.68z" fill="#3A9CED" ></path><path d="M608.416 292.768H601.6c-23.36 0-45.952-8.256-63.84-23.264l-111.424-93.632a99.328 99.328 0 0 0-63.84-23.232H185.952a99.2 99.2 0 0 0-99.2 99.2v432l94.336-190.208a127.168 127.168 0 0 1 114.592-71.072h256.512a607.136 607.136 0 0 0 56.224-129.792z" fill="#59ADF8" ></path><path d="M239.616 522.688l-95.072 191.616a608.256 608.256 0 0 0 363.84-226.432H295.68a62.656 62.656 0 0 0-56.064 34.816z" fill="#59ADF8" ></path><path d="M418.976 170.304a99.2 99.2 0 0 0-56.544-17.664H185.952a99.2 99.2 0 0 0-99.2 99.2v220.128a610.144 610.144 0 0 0 332.224-301.664z" fill="#6BC2FC" ></path></symbol>' +
|
'<symbol id="icon-file" viewBox="0 0 1024 1024"><path d="M181.12 493.664a127.168 127.168 0 0 1 114.56-71.04h548.352v-30.592a99.2 99.2 0 0 0-99.2-99.2H601.6c-23.36 0-45.952-8.256-63.84-23.264l-111.424-93.696a99.328 99.328 0 0 0-63.84-23.232H185.952a99.2 99.2 0 0 0-99.2 99.2v432l94.336-190.176z" fill="#2595E8" ></path><path d="M879.008 487.936H295.68c-23.744 0-45.504 13.44-56.064 34.752l-143.264 288.768a99.2 99.2 0 0 0 89.6 56.8h558.816a99.328 99.328 0 0 0 90.656-58.848l0.096 0.128 100.928-234.24a62.56 62.56 0 0 0-57.44-87.36z m-176.992 309.92h-186.56a37.984 37.984 0 1 1 0-76h186.56a37.984 37.984 0 1 1 0 76z" fill="#2595E8" ></path><path d="M800.352 333.728a633.6 633.6 0 0 0-0.512-24.288 98.752 98.752 0 0 0-55.072-16.672H601.6c-23.36 0-45.952-8.256-63.84-23.264l-111.424-93.632a99.328 99.328 0 0 0-63.84-23.232H185.952a99.2 99.2 0 0 0-99.2 99.2v432l94.336-190.208a127.168 127.168 0 0 1 114.592-71.072h498.24c4.224-28.992 6.432-58.688 6.432-88.832z" fill="#3A9CED" ></path><path d="M295.68 487.936c-23.744 0-45.504 13.44-56.064 34.752l-143.264 288.768a99.2 99.2 0 0 0 89.6 56.8h296.64a609.92 609.92 0 0 0 102.624-70.4H515.52a37.984 37.984 0 1 1 0-76h144.96a606.144 606.144 0 0 0 120.224-233.92H295.68z" fill="#3A9CED" ></path><path d="M608.416 292.768H601.6c-23.36 0-45.952-8.256-63.84-23.264l-111.424-93.632a99.328 99.328 0 0 0-63.84-23.232H185.952a99.2 99.2 0 0 0-99.2 99.2v432l94.336-190.208a127.168 127.168 0 0 1 114.592-71.072h256.512a607.136 607.136 0 0 0 56.224-129.792z" fill="#59ADF8" ></path><path d="M239.616 522.688l-95.072 191.616a608.256 608.256 0 0 0 363.84-226.432H295.68a62.656 62.656 0 0 0-56.064 34.816z" fill="#59ADF8" ></path><path d="M418.976 170.304a99.2 99.2 0 0 0-56.544-17.664H185.952a99.2 99.2 0 0 0-99.2 99.2v220.128a610.144 610.144 0 0 0 332.224-301.664z" fill="#6BC2FC" ></path></symbol>' +
|
||||||
|
`<symbol id="icon-a-DiscordIconSVGVectorIcon" viewBox="0 0 1024 1024">
|
||||||
|
<path d="M867.424 180.388A834.008 834.008 0 0 0 656.168 114c-9.1 16.452-19.732 38.58-27.064 56.184-78.768-11.844-156.812-11.844-234.132 0-7.328-17.6-18.2-39.732-27.384-56.184a831.236 831.236 0 0 0-211.42 66.552C22.472 382.588-13.772 579.6 4.348 773.824c88.676 66.22 174.612 106.448 259.1 132.772A644.376 644.376 0 0 0 318.94 815.2a545.652 545.652 0 0 1-87.384-42.528 434.544 434.544 0 0 0 21.424-16.948c168.488 78.808 351.56 78.808 518.04 0a526.64 526.64 0 0 0 21.42 16.948 544.28 544.28 0 0 1-87.544 42.612c16.024 32.08 34.552 62.68 55.492 91.392 84.568-26.32 170.584-66.548 259.26-132.852 21.264-225.152-36.32-420.36-152.224-593.44zM341.896 654.38c-50.58 0-92.06-47.22-92.06-104.72s40.596-104.8 92.06-104.8c51.468 0 92.944 47.216 92.06 104.8 0.08 57.5-40.592 104.72-92.06 104.72z m340.204 0c-50.58 0-92.056-47.22-92.056-104.72s40.592-104.8 92.056-104.8c51.468 0 92.944 47.216 92.06 104.8 0 57.5-40.592 104.72-92.06 104.72z"></path>
|
||||||
|
</symbol>` +
|
||||||
|
`<symbol id="icon-GitHub" viewBox="0 0 1024 1024">
|
||||||
|
<path d="M512 42.666667C252.714667 42.666667 42.666667 252.714667 42.666667 512c0 207.658667 134.357333 383.104 320.896 445.269333 23.466667 4.096 32.256-9.941333 32.256-22.272 0-11.178667-0.554667-48.128-0.554667-87.424-117.930667 21.717333-148.437333-28.757333-157.824-55.125333-5.290667-13.525333-28.16-55.168-48.085333-66.304-16.426667-8.832-39.936-30.506667-0.597334-31.104 36.949333-0.597333 63.36 34.005333 72.149334 48.128 42.24 70.954667 109.696 51.029333 136.704 38.698667 4.096-30.506667 16.426667-51.029333 29.909333-62.762667-104.448-11.733333-213.546667-52.224-213.546667-231.765333 0-51.029333 18.176-93.269333 48.128-126.122667-4.736-11.733333-21.162667-59.818667 4.693334-124.373333 0 0 39.296-12.288 129.024 48.128a434.901333 434.901333 0 0 1 117.333333-15.829334c39.936 0 79.829333 5.248 117.333333 15.829334 89.770667-61.013333 129.066667-48.128 129.066667-48.128 25.813333 64.554667 9.429333 112.64 4.736 124.373333 29.909333 32.853333 48.085333 74.538667 48.085333 126.122667 0 180.138667-109.696 220.032-214.144 231.765333 17.024 14.677333 31.701333 42.837333 31.701334 86.826667 0 62.762667-0.597333 113.237333-0.597334 129.066666 0 12.330667 8.789333 26.965333 32.256 22.272C846.976 895.104 981.333333 719.104 981.333333 512c0-259.285333-210.005333-469.333333-469.333333-469.333333z"></path>
|
||||||
|
</symbol>` +
|
||||||
'</svg>'),
|
'</svg>'),
|
||||||
((h) => {
|
((h) => {
|
||||||
var a = (l = (l = document.getElementsByTagName('script'))[
|
var a = (l = (l = document.getElementsByTagName('script'))[
|
||||||
|
|||||||
@ -122,18 +122,16 @@ export const useFetchAgentListByPage = () => {
|
|||||||
: [];
|
: [];
|
||||||
const owner = filterValue.owner;
|
const owner = filterValue.owner;
|
||||||
|
|
||||||
const requestParams = {
|
const requestParams: Record<string, any> = {
|
||||||
keywords: debouncedSearchString,
|
keywords: debouncedSearchString,
|
||||||
page_size: pagination.pageSize,
|
page_size: pagination.pageSize,
|
||||||
page: pagination.current,
|
page: pagination.current,
|
||||||
canvas_category:
|
canvas_category:
|
||||||
canvasCategory.length === 1 ? canvasCategory[0] : undefined,
|
canvasCategory.length === 1 ? canvasCategory[0] : undefined,
|
||||||
owner_ids: '',
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if (Array.isArray(owner) && owner.length > 0) {
|
if (Array.isArray(owner) && owner.length > 0) {
|
||||||
requestParams.owner_ids =
|
requestParams.owner_ids = owner.join(',');
|
||||||
`${owner[0]}` + owner.slice(1).map((id) => `&owner_ids=${id}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const { data, isFetching: loading } = useQuery<{
|
const { data, isFetching: loading } = useQuery<{
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
import { IconFontFill } from '@/components/icon-font';
|
||||||
import { RAGFlowAvatar } from '@/components/ragflow-avatar';
|
import { RAGFlowAvatar } from '@/components/ragflow-avatar';
|
||||||
import { useTheme } from '@/components/theme-provider';
|
import { useTheme } from '@/components/theme-provider';
|
||||||
import { Button } from '@/components/ui/button';
|
import { Button } from '@/components/ui/button';
|
||||||
@ -20,7 +21,6 @@ import {
|
|||||||
CircleHelp,
|
CircleHelp,
|
||||||
Cpu,
|
Cpu,
|
||||||
File,
|
File,
|
||||||
Github,
|
|
||||||
House,
|
House,
|
||||||
Library,
|
Library,
|
||||||
MessageSquareText,
|
MessageSquareText,
|
||||||
@ -114,15 +114,6 @@ export function Header() {
|
|||||||
className="size-10 mr-[12] cursor-pointer"
|
className="size-10 mr-[12] cursor-pointer"
|
||||||
onClick={handleLogoClick}
|
onClick={handleLogoClick}
|
||||||
/>
|
/>
|
||||||
<a
|
|
||||||
className="flex items-center gap-1.5 text-text-secondary"
|
|
||||||
target="_blank"
|
|
||||||
href="https://github.com/infiniflow/ragflow"
|
|
||||||
rel="noreferrer"
|
|
||||||
>
|
|
||||||
<Github className="size-4" />
|
|
||||||
{/* <span className=" text-base">21.5k stars</span> */}
|
|
||||||
</a>
|
|
||||||
</div>
|
</div>
|
||||||
<Segmented
|
<Segmented
|
||||||
options={options}
|
options={options}
|
||||||
@ -130,6 +121,20 @@ export function Header() {
|
|||||||
onChange={handleChange}
|
onChange={handleChange}
|
||||||
></Segmented>
|
></Segmented>
|
||||||
<div className="flex items-center gap-5 text-text-badge">
|
<div className="flex items-center gap-5 text-text-badge">
|
||||||
|
<a
|
||||||
|
target="_blank"
|
||||||
|
href="https://discord.com/invite/NjYzJD3GM3"
|
||||||
|
rel="noreferrer"
|
||||||
|
>
|
||||||
|
<IconFontFill name="a-DiscordIconSVGVectorIcon"></IconFontFill>
|
||||||
|
</a>
|
||||||
|
<a
|
||||||
|
target="_blank"
|
||||||
|
href="https://github.com/infiniflow/ragflow"
|
||||||
|
rel="noreferrer"
|
||||||
|
>
|
||||||
|
<IconFontFill name="GitHub"></IconFontFill>
|
||||||
|
</a>
|
||||||
<DropdownMenu>
|
<DropdownMenu>
|
||||||
<DropdownMenuTrigger>
|
<DropdownMenuTrigger>
|
||||||
<div className="flex items-center gap-1">
|
<div className="flex items-center gap-1">
|
||||||
|
|||||||
@ -1705,7 +1705,6 @@ This delimiter is used to split the input text into several text pieces echo of
|
|||||||
regularExpressions: 'Regular Expressions',
|
regularExpressions: 'Regular Expressions',
|
||||||
overlappedPercent: 'Overlapped percent',
|
overlappedPercent: 'Overlapped percent',
|
||||||
searchMethod: 'Search method',
|
searchMethod: 'Search method',
|
||||||
filenameEmbdWeight: 'Filename embd weight',
|
|
||||||
begin: 'File',
|
begin: 'File',
|
||||||
parserMethod: 'Parser method',
|
parserMethod: 'Parser method',
|
||||||
systemPrompt: 'System Prompt',
|
systemPrompt: 'System Prompt',
|
||||||
@ -1771,6 +1770,11 @@ Important structured information may include: names, dates, locations, events, k
|
|||||||
cancel: 'Cancel',
|
cancel: 'Cancel',
|
||||||
swicthPromptMessage:
|
swicthPromptMessage:
|
||||||
'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
|
'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
|
||||||
|
tokenizerSearchMethodOptions: {
|
||||||
|
full_text: 'Full-text',
|
||||||
|
embedding: 'Embedding',
|
||||||
|
},
|
||||||
|
filenameEmbeddingWeight: 'Filename embedding weight',
|
||||||
tokenizerFieldsOptions: {
|
tokenizerFieldsOptions: {
|
||||||
text: 'Text',
|
text: 'Text',
|
||||||
keywords: 'Keywords',
|
keywords: 'Keywords',
|
||||||
|
|||||||
@ -29,12 +29,15 @@ export const FormSchema = z.object({
|
|||||||
fields: z.string(),
|
fields: z.string(),
|
||||||
});
|
});
|
||||||
|
|
||||||
const SearchMethodOptions = buildOptions(TokenizerSearchMethod);
|
|
||||||
|
|
||||||
const TokenizerForm = ({ node }: INextOperatorForm) => {
|
const TokenizerForm = ({ node }: INextOperatorForm) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const defaultValues = useFormValues(initialTokenizerValues, node);
|
const defaultValues = useFormValues(initialTokenizerValues, node);
|
||||||
|
|
||||||
|
const SearchMethodOptions = buildOptions(
|
||||||
|
TokenizerSearchMethod,
|
||||||
|
t,
|
||||||
|
`dataflow.tokenizerSearchMethodOptions`,
|
||||||
|
);
|
||||||
const FieldsOptions = buildOptions(
|
const FieldsOptions = buildOptions(
|
||||||
TokenizerFields,
|
TokenizerFields,
|
||||||
t,
|
t,
|
||||||
@ -67,7 +70,7 @@ const TokenizerForm = ({ node }: INextOperatorForm) => {
|
|||||||
</RAGFlowFormItem>
|
</RAGFlowFormItem>
|
||||||
<SliderInputFormField
|
<SliderInputFormField
|
||||||
name="filename_embd_weight"
|
name="filename_embd_weight"
|
||||||
label={t('dataflow.filenameEmbdWeight')}
|
label={t('dataflow.filenameEmbeddingWeight')}
|
||||||
max={0.5}
|
max={0.5}
|
||||||
step={0.01}
|
step={0.01}
|
||||||
></SliderInputFormField>
|
></SliderInputFormField>
|
||||||
|
|||||||
Reference in New Issue
Block a user