Compare commits

...

15 Commits

Author SHA1 Message Date
4cdaa77545 Docs: refine MinerU part in FAQ (#11111)
### What problem does this PR solve?

Refine MinerU part in FAQ.

### Type of change

- [x] Documentation Update
2025-11-07 19:58:07 +08:00
9fcc4946e2 Feat: add kimi-k2-thinking and moonshot-v1-vision-preview (#11110)
### What problem does this PR solve?

Add kimi-k2-thinking and moonshot-v1-vision-preview.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-11-07 19:52:57 +08:00
98e9d68c75 Feat: Add Variable aggregator (#11114)
### What problem does this PR solve?
Feat: Add Variable aggregator

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-11-07 19:52:26 +08:00
8f34824aa4 Feat: Display the selected variables in the variable aggregation node. #10427 (#11113)
### What problem does this PR solve?
Feat: Display the selected variables in the variable aggregation node.
#10427

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
2025-11-07 19:52:04 +08:00
9a6808230a Fix workflows 2025-11-07 17:14:04 +08:00
c7bd0a755c Fix: python api streaming structure (#11105)
### What problem does this PR solve?

Fix: python api streaming structure

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-11-07 16:50:58 +08:00
dd1c8c5779 Feat: add auto parse to connector. (#11099)
### What problem does this PR solve?

#10953

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
2025-11-07 16:49:29 +08:00
526ba3388f Feat: The output is derived based on the configuration of the variable aggregation operator. #10427 (#11109)
### What problem does this PR solve?

Feat: The output is derived based on the configuration of the variable
aggregation operator. #10427

### Type of change


- [x] New Feature (non-breaking change which adds functionality)
2025-11-07 16:35:32 +08:00
cb95072ecf Fix workflows 2025-11-07 15:57:33 +08:00
f6aeebc608 Fix: cannot write mode RGBA as JPEG (#11102)
### What problem does this PR solve?
Fix #11091 
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-11-07 15:45:10 +08:00
307f53dae8 Minor tweaks (#11106)
### What problem does this PR solve?

Refactor

### Type of change

- [x] Refactoring

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2025-11-07 15:44:57 +08:00
fa98cc2bb9 Fix: add huggingface model download functionality (#11101)
### What problem does this PR solve?

reverse #11048

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-11-07 15:12:47 +08:00
c58d95ed69 Bump infinity to 0.6.4 (#11104)
### What problem does this PR solve?

Bump infinity to 0.6.4

Fixed https://github.com/infiniflow/infinity/issues/3048

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-11-07 14:44:34 +08:00
edbc396bc6 Fix: Added some prompts and polling functionality to retrieve data source logs. #10703 (#11103)
### What problem does this PR solve?

Fix: Added some prompts and polling functionality to retrieve data
source logs.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-11-07 14:28:45 +08:00
b137de1def Fix: Plain parser is skipped (#11094)
### What problem does this PR solve?

plain parser skipeed

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-11-07 13:39:29 +08:00
50 changed files with 4080 additions and 3571 deletions

View File

@ -19,7 +19,7 @@ jobs:
runs-on: [ "self-hosted", "ragflow-test" ]
steps:
- name: Ensure workspace ownership
run: echo "chown -R $USER $GITHUB_WORKSPACE" && sudo chown -R $USER $GITHUB_WORKSPACE
run: echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE}
# https://github.com/actions/checkout/blob/v3/README.md
- name: Check out code
@ -31,37 +31,37 @@ jobs:
- name: Prepare release body
run: |
if [[ $GITHUB_EVENT_NAME == 'create' ]]; then
if [[ ${GITHUB_EVENT_NAME} == "create" ]]; then
RELEASE_TAG=${GITHUB_REF#refs/tags/}
if [[ $RELEASE_TAG == 'nightly' ]]; then
if [[ ${RELEASE_TAG} == "nightly" ]]; then
PRERELEASE=true
else
PRERELEASE=false
fi
echo "Workflow triggered by create tag: $RELEASE_TAG"
echo "Workflow triggered by create tag: ${RELEASE_TAG}"
else
RELEASE_TAG=nightly
PRERELEASE=true
echo "Workflow triggered by schedule"
fi
echo "RELEASE_TAG=$RELEASE_TAG" >> $GITHUB_ENV
echo "PRERELEASE=$PRERELEASE" >> $GITHUB_ENV
echo "RELEASE_TAG=${RELEASE_TAG}" >> ${GITHUB_ENV}
echo "PRERELEASE=${PRERELEASE}" >> ${GITHUB_ENV}
RELEASE_DATETIME=$(date --rfc-3339=seconds)
echo Release $RELEASE_TAG created from $GITHUB_SHA at $RELEASE_DATETIME > release_body.md
echo Release ${RELEASE_TAG} created from ${GITHUB_SHA} at ${RELEASE_DATETIME} > release_body.md
- name: Move the existing mutable tag
# https://github.com/softprops/action-gh-release/issues/171
run: |
git fetch --tags
if [[ $GITHUB_EVENT_NAME == 'schedule' ]]; then
if [[ ${GITHUB_EVENT_NAME} == "schedule" ]]; then
# Determine if a given tag exists and matches a specific Git commit.
# actions/checkout@v4 fetch-tags doesn't work when triggered by schedule
if [ "$(git rev-parse -q --verify "refs/tags/$RELEASE_TAG")" = "$GITHUB_SHA" ]; then
echo "mutable tag $RELEASE_TAG exists and matches $GITHUB_SHA"
if [ "$(git rev-parse -q --verify "refs/tags/${RELEASE_TAG}")" = "${GITHUB_SHA}" ]; then
echo "mutable tag ${RELEASE_TAG} exists and matches ${GITHUB_SHA}"
else
git tag -f $RELEASE_TAG $GITHUB_SHA
git push -f origin $RELEASE_TAG:refs/tags/$RELEASE_TAG
echo "created/moved mutable tag $RELEASE_TAG to $GITHUB_SHA"
git tag -f ${RELEASE_TAG} ${GITHUB_SHA}
git push -f origin ${RELEASE_TAG}:refs/tags/${RELEASE_TAG}
echo "created/moved mutable tag ${RELEASE_TAG} to ${GITHUB_SHA}"
fi
fi
@ -87,7 +87,7 @@ jobs:
- name: Build and push image
run: |
echo ${{ secrets.DOCKERHUB_TOKEN }} | sudo docker login --username infiniflow --password-stdin
sudo docker login --username infiniflow --password-stdin <<< ${{ secrets.DOCKERHUB_TOKEN }}
sudo docker build --build-arg NEED_MIRROR=1 -t infiniflow/ragflow:${RELEASE_TAG} -f Dockerfile .
sudo docker tag infiniflow/ragflow:${RELEASE_TAG} infiniflow/ragflow:latest
sudo docker push infiniflow/ragflow:${RELEASE_TAG}

View File

@ -9,8 +9,11 @@ on:
- 'docs/**'
- '*.md'
- '*.mdx'
pull_request:
types: [ labeled, synchronize, reopened ]
# The only difference between pull_request and pull_request_target is the context in which the workflow runs:
# — pull_request_target workflows use the workflow files from the default branch, and secrets are available.
# — pull_request workflows use the workflow files from the pull request branch, and secrets are unavailable.
pull_request_target:
types: [ synchronize, ready_for_review ]
paths-ignore:
- 'docs/**'
- '*.md'
@ -28,7 +31,7 @@ jobs:
name: ragflow_tests
# https://docs.github.com/en/actions/using-jobs/using-conditions-to-control-job-execution
# https://github.com/orgs/community/discussions/26261
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci') }}
if: ${{ github.event_name != 'pull_request_target' || contains(github.event.pull_request.labels.*.name, 'ci') }}
runs-on: [ "self-hosted", "ragflow-test" ]
steps:
# https://github.com/hmarr/debug-action
@ -37,19 +40,20 @@ jobs:
- name: Ensure workspace ownership
run: |
echo "Workflow triggered by ${{ github.event_name }}"
echo "chown -R $USER $GITHUB_WORKSPACE" && sudo chown -R $USER $GITHUB_WORKSPACE
echo "chown -R ${USER} ${GITHUB_WORKSPACE}" && sudo chown -R ${USER} ${GITHUB_WORKSPACE}
# https://github.com/actions/checkout/issues/1781
- name: Check out code
uses: actions/checkout@v4
with:
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.sha }}
fetch-depth: 0
fetch-tags: true
- name: Check workflow duplication
if: ${{ !cancelled() && !failure() && (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'ci')) }}
if: ${{ !cancelled() && !failure() }}
run: |
if [[ "$GITHUB_EVENT_NAME" != "pull_request" && "$GITHUB_EVENT_NAME" != "schedule" ]]; then
if [[ ${GITHUB_EVENT_NAME} != "pull_request_target" && ${GITHUB_EVENT_NAME} != "schedule" ]]; then
HEAD=$(git rev-parse HEAD)
# Find a PR that introduced a given commit
gh auth login --with-token <<< "${{ secrets.GITHUB_TOKEN }}"
@ -67,14 +71,14 @@ jobs:
gh run cancel ${GITHUB_RUN_ID}
while true; do
status=$(gh run view ${GITHUB_RUN_ID} --json status -q .status)
[ "$status" = "completed" ] && break
[ "${status}" = "completed" ] && break
sleep 5
done
exit 1
fi
fi
fi
else
elif [[ ${GITHUB_EVENT_NAME} == "pull_request_target" ]]; then
PR_NUMBER=${{ github.event.pull_request.number }}
PR_SHA_FP=${RUNNER_WORKSPACE_PREFIX}/artifacts/${GITHUB_REPOSITORY}/PR_${PR_NUMBER}
# Calculate the hash of the current workspace content
@ -93,18 +97,18 @@ jobs:
- name: Build ragflow:nightly
run: |
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-${HOME}}
RAGFLOW_IMAGE=infiniflow/ragflow:${GITHUB_RUN_ID}
echo "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" >> $GITHUB_ENV
echo "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" >> ${GITHUB_ENV}
sudo docker pull ubuntu:22.04
sudo DOCKER_BUILDKIT=1 docker build --build-arg NEED_MIRROR=1 -f Dockerfile -t ${RAGFLOW_IMAGE} .
if [[ "$GITHUB_EVENT_NAME" == "schedule" ]]; then
if [[ ${GITHUB_EVENT_NAME} == "schedule" ]]; then
export HTTP_API_TEST_LEVEL=p3
else
export HTTP_API_TEST_LEVEL=p2
fi
echo "HTTP_API_TEST_LEVEL=${HTTP_API_TEST_LEVEL}" >> $GITHUB_ENV
echo "RAGFLOW_CONTAINER=${GITHUB_RUN_ID}-ragflow-cpu-1" >> $GITHUB_ENV
echo "HTTP_API_TEST_LEVEL=${HTTP_API_TEST_LEVEL}" >> ${GITHUB_ENV}
echo "RAGFLOW_CONTAINER=${GITHUB_RUN_ID}-ragflow-cpu-1" >> ${GITHUB_ENV}
- name: Start ragflow:nightly
run: |
@ -154,7 +158,7 @@ jobs:
echo -e "COMPOSE_PROFILES=\${COMPOSE_PROFILES},tei-cpu" >> docker/.env
echo -e "TEI_MODEL=BAAI/bge-small-en-v1.5" >> docker/.env
echo -e "RAGFLOW_IMAGE=${RAGFLOW_IMAGE}" >> docker/.env
echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> $GITHUB_ENV
echo "HOST_ADDRESS=http://host.docker.internal:${SVR_HTTP_PORT}" >> ${GITHUB_ENV}
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} up -d
uv sync --python 3.10 --only-group test --no-default-groups --frozen && uv pip install sdk/python
@ -189,7 +193,8 @@ jobs:
- name: Stop ragflow:nightly
if: always() # always run this step even if previous steps failed
run: |
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} down -v
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} down -v || true
sudo docker ps -a --filter "label=com.docker.compose.project=${GITHUB_RUN_ID}" -q | xargs -r sudo docker rm -f
- name: Start ragflow:nightly
run: |
@ -226,5 +231,9 @@ jobs:
- name: Stop ragflow:nightly
if: always() # always run this step even if previous steps failed
run: |
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} down -v
sudo docker rmi -f ${RAGFLOW_IMAGE:-NO_IMAGE} || true
# Sometimes `docker compose down` fail due to hang container, heavy load etc. Need to remove such containers to release resources(for example, listen ports).
sudo docker compose -f docker/docker-compose.yml -p ${GITHUB_RUN_ID} down -v || true
sudo docker ps -a --filter "label=com.docker.compose.project=${GITHUB_RUN_ID}" -q | xargs -r sudo docker rm -f
if [[ -n ${RAGFLOW_IMAGE} ]]; then
sudo docker rmi -f ${RAGFLOW_IMAGE}
fi

View File

@ -0,0 +1,84 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
import os
from common.connection_utils import timeout
from agent.component.base import ComponentBase, ComponentParamBase
class VariableAggregatorParam(ComponentParamBase):
"""
Parameters for VariableAggregator
- groups: list of dicts {"group_name": str, "variables": [variable selectors]}
"""
def __init__(self):
super().__init__()
# each group expects: {"group_name": str, "variables": List[str]}
self.groups = []
def check(self):
self.check_empty(self.groups, "[VariableAggregator] groups")
for g in self.groups:
if not g.get("group_name"):
raise ValueError("[VariableAggregator] group_name can not be empty!")
if not g.get("variables"):
raise ValueError(
f"[VariableAggregator] variables of group `{g.get('group_name')}` can not be empty"
)
if not isinstance(g.get("variables"), list):
raise ValueError(
f"[VariableAggregator] variables of group `{g.get('group_name')}` should be a list of strings"
)
def get_input_form(self) -> dict[str, dict]:
return {
"variables": {
"name": "Variables",
"type": "list",
}
}
class VariableAggregator(ComponentBase):
component_name = "VariableAggregator"
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
def _invoke(self, **kwargs):
# Group mode: for each group, pick the first available variable
for group in self._param.groups:
gname = group.get("group_name")
# record candidate selectors within this group
self.set_input_value(f"{gname}.variables", list(group.get("variables", [])))
for selector in group.get("variables", []):
val = self._canvas.get_variable_value(selector['value'])
if val:
self.set_output(gname, val)
break
@staticmethod
def _to_object(value: Any) -> Any:
# Try to convert value to serializable object if it has to_object()
try:
return value.to_object() # type: ignore[attr-defined]
except Exception:
return value
def thoughts(self) -> str:
return "Aggregating variables from canvas and grouping as configured."

View File

@ -466,10 +466,7 @@ def upload():
if "run" in form_data.keys():
if request.form.get("run").strip() == "1":
try:
info = {"run": 1, "progress": 0}
info["progress_msg"] = ""
info["chunk_num"] = 0
info["token_num"] = 0
info = {"run": 1, "progress": 0, "progress_msg": "", "chunk_num": 0, "token_num": 0}
DocumentService.update_by_id(doc["id"], info)
# if str(req["run"]) == TaskStatus.CANCEL.value:
tenant_id = DocumentService.get_tenant_id(doc["id"])
@ -726,8 +723,7 @@ def completion_faq():
if "quote" not in req:
req["quote"] = True
msg = []
msg.append({"role": "user", "content": req["word"]})
msg = [{"role": "user", "content": req["word"]}]
if not msg[-1].get("id"):
msg[-1]["id"] = get_uuid()
message_id = msg[-1]["id"]

View File

@ -410,22 +410,22 @@ def test_db_connect():
ibm_db.close(conn)
return get_json_result(data="Database Connection Successful!")
elif req["db_type"] == 'trino':
def _parse_catalog_schema(db: str):
if not db:
def _parse_catalog_schema(db_name: str):
if not db_name:
return None, None
if "." in db:
c, s = db.split(".", 1)
elif "/" in db:
c, s = db.split("/", 1)
if "." in db_name:
catalog_name, schema_name = db_name.split(".", 1)
elif "/" in db_name:
catalog_name, schema_name = db_name.split("/", 1)
else:
c, s = db, "default"
return c, s
catalog_name, schema_name = db_name, "default"
return catalog_name, schema_name
try:
import trino
import os
from trino.auth import BasicAuthentication
except Exception:
return server_error_response("Missing dependency 'trino'. Please install: pip install trino")
except Exception as e:
return server_error_response(f"Missing dependency 'trino'. Please install: pip install trino, detail: {e}")
catalog, schema = _parse_catalog_schema(req["database"])
if not catalog:
@ -479,7 +479,6 @@ def getlistversion(canvas_id):
@login_required
def getversion( version_id):
try:
e, version = UserCanvasVersionService.get_by_id(version_id)
if version:
return get_json_result(data=version.to_dict())
@ -546,11 +545,11 @@ def trace():
cvs_id = request.args.get("canvas_id")
msg_id = request.args.get("message_id")
try:
bin = REDIS_CONN.get(f"{cvs_id}-{msg_id}-logs")
if not bin:
binary = REDIS_CONN.get(f"{cvs_id}-{msg_id}-logs")
if not binary:
return get_json_result(data={})
return get_json_result(data=json.loads(bin.encode("utf-8")))
return get_json_result(data=json.loads(binary.encode("utf-8")))
except Exception as e:
logging.exception(e)

View File

@ -122,7 +122,7 @@ def update():
if not e:
return get_data_error_result(
message="Database error (Knowledgebase rename)!")
errors = Connector2KbService.link_connectors(kb.id, [conn["id"] for conn in connectors], current_user.id)
errors = Connector2KbService.link_connectors(kb.id, [conn for conn in connectors], current_user.id)
if errors:
logging.error("Link KB errors: ", errors)
kb = kb.to_dict()

View File

@ -73,7 +73,9 @@ class ConnectorService(CommonService):
return
SyncLogsService.filter_delete([SyncLogs.connector_id==connector_id, SyncLogs.kb_id==kb_id])
docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}")
return FileService.delete_docs([d.id for d in docs], tenant_id)
err = FileService.delete_docs([d.id for d in docs], tenant_id)
SyncLogsService.schedule(connector_id, kb_id, reindex=True)
return err
class SyncLogsService(CommonService):
@ -226,16 +228,20 @@ class Connector2KbService(CommonService):
model = Connector2Kb
@classmethod
def link_connectors(cls, kb_id:str, connector_ids: list[str], tenant_id:str):
def link_connectors(cls, kb_id:str, connectors: list[dict], tenant_id:str):
arr = cls.query(kb_id=kb_id)
old_conn_ids = [a.connector_id for a in arr]
for conn_id in connector_ids:
connector_ids = []
for conn in connectors:
conn_id = conn["id"]
connector_ids.append(conn_id)
if conn_id in old_conn_ids:
continue
cls.save(**{
"id": get_uuid(),
"connector_id": conn_id,
"kb_id": kb_id
"kb_id": kb_id,
"auto_parse": conn.get("auto_parse", "1")
})
SyncLogsService.schedule(conn_id, kb_id, reindex=True)

View File

@ -63,7 +63,7 @@ def _convert_message_to_document(
semantic_identifier=semantic_identifier,
doc_updated_at=doc_updated_at,
blob=message.content.encode("utf-8"),
extension="txt",
extension=".txt",
size_bytes=len(message.content.encode("utf-8")),
)
@ -275,7 +275,7 @@ class DiscordConnector(LoadConnector, PollConnector):
semantic_identifier=f"{min_updated_at} -> {max_updated_at}",
doc_updated_at=max_updated_at,
blob=blob,
extension="txt",
extension=".txt",
size_bytes=size_bytes,
)

View File

@ -1,6 +1,5 @@
import logging
from collections.abc import Generator
from datetime import datetime, timezone
from typing import Any, Optional
from retry import retry
@ -33,7 +32,7 @@ from common.data_source.utils import (
batch_generator,
fetch_notion_data,
properties_to_str,
filter_pages_by_time
filter_pages_by_time, datetime_from_string
)
@ -293,9 +292,9 @@ class NotionConnector(LoadConnector, PollConnector):
blob=blob,
source=DocumentSource.NOTION,
semantic_identifier=page_title,
extension="txt",
extension=".txt",
size_bytes=len(blob),
doc_updated_at=datetime.fromisoformat(page.last_edited_time).astimezone(timezone.utc)
doc_updated_at=datetime_from_string(page.last_edited_time)
)
if self.recursive_index_enabled and all_child_page_ids:

View File

@ -892,7 +892,7 @@
{
"name": "Moonshot",
"logo": "",
"tags": "LLM,TEXT EMBEDDING",
"tags": "LLM,TEXT EMBEDDING,IMAGE2TEXT",
"status": "1",
"llm": [
{
@ -916,6 +916,20 @@
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "kimi-k2-thinking",
"tags": "LLM,CHAT,256k",
"max_tokens": 262144,
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "kimi-k2-thinking-turbo",
"tags": "LLM,CHAT,256k",
"max_tokens": 262144,
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "kimi-k2-turbo-preview",
"tags": "LLM,CHAT,256k",
@ -932,25 +946,46 @@
},
{
"llm_name": "moonshot-v1-8k",
"tags": "LLM,CHAT,",
"max_tokens": 7900,
"tags": "LLM,CHAT,8k",
"max_tokens": 8192,
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "moonshot-v1-32k",
"tags": "LLM,CHAT,",
"tags": "LLM,CHAT,32k",
"max_tokens": 32768,
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "moonshot-v1-128k",
"tags": "LLM,CHAT",
"max_tokens": 128000,
"tags": "LLM,CHAT,128k",
"max_tokens": 131072,
"model_type": "chat",
"is_tools": true
},
{
"llm_name": "moonshot-v1-8k-vision-preview",
"tags": "LLM,IMAGE2TEXT,8k",
"max_tokens": 8192,
"model_type": "image2text",
"is_tools": true
},
{
"llm_name": "moonshot-v1-32k-vision-preview",
"tags": "LLM,IMAGE2TEXT,32k",
"max_tokens": 32768,
"model_type": "image2text",
"is_tools": true
},
{
"llm_name": "moonshot-v1-128k-vision-preview",
"tags": "LLM,IMAGE2TEXT,128k",
"max_tokens": 131072,
"model_type": "image2text",
"is_tools": true
},
{
"llm_name": "moonshot-v1-auto",
"tags": "LLM,CHAT,",

View File

@ -72,7 +72,7 @@ services:
infinity:
profiles:
- infinity
image: infiniflow/infinity:v0.6.2
image: infiniflow/infinity:v0.6.4
volumes:
- infinity_data:/var/infinity
- ./infinity_conf.toml:/infinity_conf.toml
@ -120,8 +120,8 @@ services:
healthcheck:
test: ["CMD", "curl", "http://localhost:9385/healthz"]
interval: 10s
timeout: 5s
retries: 5
timeout: 10s
retries: 120
restart: on-failure
mysql:
@ -149,7 +149,7 @@ services:
test: ["CMD", "mysqladmin" ,"ping", "-uroot", "-p${MYSQL_PASSWORD}"]
interval: 10s
timeout: 10s
retries: 3
retries: 120
restart: on-failure
minio:
@ -169,9 +169,9 @@ services:
restart: on-failure
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
interval: 10s
timeout: 10s
retries: 120
redis:
# swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/valkey/valkey:8
@ -187,10 +187,9 @@ services:
restart: on-failure
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
interval: 5s
timeout: 3s
retries: 3
start_period: 10s
interval: 10s
timeout: 10s
retries: 120
tei-cpu:

View File

@ -1,5 +1,5 @@
[general]
version = "0.6.2"
version = "0.6.4"
time_zone = "utc-8"
[network]

View File

@ -512,43 +512,85 @@ See [here](./guides/chat/best_practices/accelerate_question_answering.mdx).
See [here](./guides/agent/best_practices/accelerate_agent_question_answering.md).
---
### How to use MinerU to parse PDF documents?
MinerU PDF document parsing is available starting from v0.21.1. To use this feature, follow these steps:
MinerU PDF document parsing is available starting from v0.21.1. RAGFlow supports MinerU (>= 2.6.3) as an optional PDF parser with multiple backends. RAGFlow itself only acts as a client: it calls MinerU to parse documents, reads the output files, and ingests the parsed content into RAGFlow. To use this feature, follow these steps:
1. Before deploying ragflow-server, update your **docker/.env** file:
- Enable `HF_ENDPOINT=https://hf-mirror.com`
- Add a MinerU entry: `MINERU_EXECUTABLE=/ragflow/uv_tools/.venv/bin/mineru`
1. **Prepare MinerU**
2. Start the ragflow-server and run the following commands inside the container:
- **If you run RAGFlow from source**, install MinerU into an isolated virtual environment (recommended path: `$HOME/uv_tools`):
```bash
mkdir uv_tools
cd uv_tools
uv venv .venv
source .venv/bin/activate
uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple
```
```bash
mkdir -p "$HOME/uv_tools"
cd "$HOME/uv_tools"
uv venv .venv
source .venv/bin/activate
uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple
# or
# uv pip install -U "mineru[all]" -i https://mirrors.aliyun.com/pypi/simple
```
3. Restart the ragflow-server.
4. In the web UI, navigate to the **Configuration** page of your dataset. Click **Built-in** in the **Ingestion pipeline** section, select a chunking method from the **Built-in** dropdown, which supports PDF parsing, and slect **MinerU** in **PDF parser**.
5. If you use a custom ingestion pipeline instead, you must also complete the first three steps before selecting **MinerU** in the **Parsing method** section of the **Parser** component.
- **If you run RAGFlow with Docker**, you usually only need to turn on MinerU support in `docker/.env`:
```bash
# docker/.env
...
USE_MINERU=true
...
```
Enabling `USE_MINERU=true` will internally perform the same setup as the manual configuration (including setting the MinerU executable path and related environment variables). You only need the manual installation above if you are running from source or want full control over the MinerU installation.
2. **Start RAGFlow with MinerU enabled**
- **Source deployment** in the RAGFlow repo, export the key MinerU-related variables and start the backend service:
```bash
# in RAGFlow repo
export MINERU_EXECUTABLE="$HOME/uv_tools/.venv/bin/mineru"
export MINERU_DELETE_OUTPUT=0 # keep output directory
export MINERU_BACKEND=pipeline # or another backend you prefer
source .venv/bin/activate
export PYTHONPATH=$(pwd)
bash docker/launch_backend_service.sh
```
- **Docker deployment** after setting `USE_MINERU=true`, restart the containers so that the new settings take effect:
```bash
# in RAGFlow repo
docker compose -f docker/docker-compose.yml restart
```
3. In the web UI, navigate to the **Configuration** page of your dataset. Click **Built-in** in the **Ingestion pipeline** section, select a chunking method from the **Built-in** dropdown (which supports PDF parsing), and select **MinerU** in **PDF parser**.
4. If you use a custom ingestion pipeline instead, you must also complete the first two steps before selecting **MinerU** in the **Parsing method** section of the **Parser** component.
---
### How to configure MinerU-specific settings?
1. Set `MINERU_EXECUTABLE` (default: `mineru`) to the path to the MinerU executable.
2. Set `MINERU_DELETE_OUTPUT` to `0` to keep MinerU's output. (Default: `1`, which deletes temporary output)
3. Set `MINERU_OUTPUT_DIR` to specify the output directory for MinerU.
The table below summarizes the most commonly used MinerU-related environment variables:
| Environment variable | Description | Default | Example |
| ---------------------- | ---------------------------------- | ----------------------------------- | ----------------------------------------------------------------------------------------------- |
| `MINERU_EXECUTABLE` | Path to the local MinerU executable | `mineru` | `MINERU_EXECUTABLE=/home/ragflow/uv_tools/.venv/bin/mineru` |
| `MINERU_DELETE_OUTPUT` | Whether to delete MinerU output directory | `1` (do **not** keep the output directory) | `MINERU_DELETE_OUTPUT=0` |
| `MINERU_OUTPUT_DIR` | Directory for MinerU output files | System-defined temporary directory | `MINERU_OUTPUT_DIR=/home/ragflow/mineru/output` |
| `MINERU_BACKEND` | MinerU parsing backend | `pipeline` | `MINERU_BACKEND=pipeline\|vlm-transformers\|vlm-vllm-engine\|vlm-http-client` |
| `MINERU_SERVER_URL` | URL of remote vLLM server (only for `vlm-http-client` backend) | _unset_ | `MINERU_SERVER_URL=http://your-vllm-server-ip:30000` |
| `MINERU_APISERVER` | URL of remote MinerU service used as the parser (instead of local MinerU) | _unset_ | `MINERU_APISERVER=http://your-mineru-server:port` |
1. Set `MINERU_EXECUTABLE` to the path to the MinerU executable if the default `mineru` is not on `PATH`.
2. Set `MINERU_DELETE_OUTPUT` to `0` to keep MinerU's output. (Default: `1`, which deletes temporary output.)
3. Set `MINERU_OUTPUT_DIR` to specify the output directory for MinerU (otherwise a system temp directory is used).
4. Set `MINERU_BACKEND` to specify a parsing backend:
- `"pipeline"` (default): The traditional multimodel pipeline.
- `"vlm-transformers"`: A vision-language model using HuggingFace Transformers.
- `"vlm-vllm-engine"`: A vision-language model using local vLLM engine (requires a local GPU).
- `"vlm-http-client"`: A vision-language model via HTTP client to remote vLLM server (RAGFlow only requires CPU).
- `"vlm-vllm-engine"`: A vision-language model using a local vLLM engine (requires a local GPU).
- `"vlm-http-client"`: A vision-language model via HTTP client to a remote vLLM server (RAGFlow only requires CPU).
5. If using the `"vlm-http-client"` backend, you must also set `MINERU_SERVER_URL` to the URL of your vLLM server.
6. If you want RAGFlow to call a **remote MinerU service** (instead of a MinerU process running locally with RAGFlow), set `MINERU_APISERVER` to the URL of the remote MinerU server.
:::tip NOTE
For information about other environment variables natively supported by MinerU, see [here](https://opendatalab.github.io/MinerU/usage/cli_tools/#environment-variables-description).
@ -561,16 +603,18 @@ For information about other environment variables natively supported by MinerU,
RAGFlow supports MinerU's `vlm-http-client` backend, enabling you to delegate document parsing tasks to a remote vLLM server. With this configuration, RAGFlow will connect to your remote vLLM server as a client and use its powerful GPU resources for document parsing. This significantly improves performance for parsing complex documents while reducing the resources required on your RAGFlow server. To configure MinerU with a vLLM server:
1. Set up a vLLM server running MinerU:
```bash
mineru-vllm-server --port 30000
```
2. Configure the following environment variables in your **docker/.env** file:
- `MINERU_EXECUTABLE=/ragflow/uv_tools/.venv/bin/mineru` (or the path to your MinerU executable)
2. Configure the following environment variables in your **docker/.env** file (or your shell if running from source):
- `MINERU_EXECUTABLE=/home/ragflow/uv_tools/.venv/bin/mineru` (or the path to your MinerU executable)
- `MINERU_BACKEND="vlm-http-client"`
- `MINERU_SERVER_URL="http://your-vllm-server-ip:30000"`
3. Complete the rest standard MinerU setup steps as described [here](#how-to-configure-mineru-specific-settings).
3. Complete the rest of the standard MinerU setup steps as described [here](#how-to-configure-mineru-specific-settings).
:::tip NOTE
When using the `vlm-http-client` backend, the RAGFlow server requires no GPU, only network connectivity. This enables cost-effective distributed deployment with multiple RAGFlow instances sharing one remote vLLM server.

View File

@ -14,6 +14,7 @@ import urllib.request
from typing import Union
import nltk
from huggingface_hub import snapshot_download
def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
@ -39,6 +40,19 @@ def get_urls(use_china_mirrors=False) -> list[Union[str, list[str]]]:
]
repos = [
"InfiniFlow/text_concat_xgb_v1.0",
"InfiniFlow/deepdoc",
"InfiniFlow/huqie",
]
def download_model(repo_id):
local_dir = os.path.abspath(os.path.join("huggingface.co", repo_id))
os.makedirs(local_dir, exist_ok=True)
snapshot_download(repo_id=repo_id, local_dir=local_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Download dependencies with optional China mirror support")
parser.add_argument("--china-mirrors", action="store_true", help="Use China-accessible mirrors for downloads")
@ -57,3 +71,7 @@ if __name__ == "__main__":
for data in ["wordnet", "punkt", "punkt_tab"]:
print(f"Downloading nltk {data}...")
nltk.download(data, download_dir=local_dir)
for repo_id in repos:
print(f"Downloading huggingface repo {repo_id}...")
download_model(repo_id)

View File

@ -96,7 +96,7 @@ ragflow:
infinity:
image:
repository: infiniflow/infinity
tag: v0.6.2
tag: v0.6.4
pullPolicy: IfNotPresent
pullSecrets: []
storage:

View File

@ -48,7 +48,7 @@ dependencies = [
"html-text==0.6.2",
"httpx[socks]>=0.28.1,<0.29.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.2",
"infinity-sdk==0.6.4",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",

View File

@ -641,6 +641,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
to_page = to_page,
lang = lang,
callback = callback,
layout_recognizer = layout_recognizer,
**kwargs
)

View File

@ -97,7 +97,13 @@ def vision_llm_chunk(binary, vision_model, prompt=None, callback=None):
try:
with io.BytesIO() as img_binary:
img.save(img_binary, format="JPEG")
try:
img.save(img_binary, format="JPEG")
except Exception:
img_binary.seek(0)
img_binary.truncate()
img.save(img_binary, format="PNG")
img_binary.seek(0)
ans = clean_markdown_block(vision_model.describe_with_prompt(img_binary.read(), prompt))
txt += "\n" + ans

View File

@ -464,7 +464,7 @@ class GPUStackCV(GptV4):
class LocalCV(Base):
_FACTORY_NAME = "Moonshot"
_FACTORY_NAME = "Local"
def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs):
pass
@ -975,3 +975,12 @@ class GoogleCV(AnthropicCV, GeminiCV):
else:
for ans in GeminiCV.chat_streamly(self, system, history, gen_conf, images):
yield ans
class MoonshotCV(GptV4):
_FACTORY_NAME = "Moonshot"
def __init__(self, key, model_name="moonshot-v1-8k-vision-preview", lang="Chinese", base_url="https://api.moonshot.cn/v1", **kwargs):
if not base_url:
base_url = "https://api.moonshot.cn/v1"
super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)

View File

@ -63,6 +63,8 @@ class SyncBase:
if task["poll_range_start"]:
next_update = task["poll_range_start"]
for document_batch in document_batch_generator:
if not document_batch:
continue
min_update = min([doc.doc_updated_at for doc in document_batch])
max_update = max([doc.doc_updated_at for doc in document_batch])
next_update = max([next_update, max_update])

View File

@ -74,15 +74,15 @@ class Session(Base):
json_data = res.json()
except ValueError:
raise Exception(f"Invalid response {res}")
yield self._structure_answer(json_data)
yield self._structure_answer(json_data["data"])
def _structure_answer(self, json_data):
if self.__session_type == "agent":
answer = json_data["data"]["data"]["content"]
answer = json_data["data"]["content"]
elif self.__session_type == "chat":
answer =json_data["data"]["answer"]
reference = json_data["data"].get("reference", {})
answer = json_data["answer"]
reference = json_data.get("reference", {})
temp_dict = {
"content": answer,
"role": "assistant"

6634
uv.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -30,32 +30,47 @@ const Input = React.forwardRef<HTMLInputElement, InputProps>(
}
};
return (
<div className="relative w-full">
<input
type={type === 'password' && showPassword ? 'text' : type}
className={cn(
'flex h-8 w-full rounded-md border border-input bg-bg-input px-2 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-text-disabled focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 text-text-primary',
className,
)}
ref={ref}
value={inputValue ?? ''}
onChange={handleChange}
{...restProps}
/>
{type === 'password' && (
<button
type="button"
className="absolute inset-y-0 right-0 pr-3 flex items-center"
onClick={() => setShowPassword(!showPassword)}
>
{showPassword ? (
<EyeOff className="h-4 w-4 text-text-secondary" />
) : (
<Eye className="h-4 w-4 text-text-secondary" />
<>
{type !== 'password' && (
<input
type={type === 'password' && showPassword ? 'text' : type}
className={cn(
'flex h-8 w-full rounded-md border border-input bg-bg-input px-2 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-text-disabled focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 text-text-primary',
className,
)}
</button>
ref={ref}
value={inputValue ?? ''}
onChange={handleChange}
{...restProps}
/>
)}
</div>
{type === 'password' && (
<div className="relative w-full">
<input
type={type === 'password' && showPassword ? 'text' : type}
className={cn(
'flex h-8 w-full rounded-md border border-input bg-bg-input px-2 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-text-disabled focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 text-text-primary',
className,
)}
ref={ref}
value={inputValue ?? ''}
onChange={handleChange}
{...restProps}
/>
<button
type="button"
className="absolute inset-y-0 right-0 pr-3 flex items-center"
onClick={() => setShowPassword(!showPassword)}
>
{showPassword ? (
<EyeOff className="h-4 w-4 text-text-secondary" />
) : (
<Eye className="h-4 w-4 text-text-secondary" />
)}
</button>
</div>
)}
</>
);
},
);

View File

@ -274,6 +274,10 @@ export default {
reRankModelWaring: 'Re-rank model is very time consuming.',
},
knowledgeConfiguration: {
rebuildTip:
'Re-downloads files from the linked data source and parses them again.',
baseInfo: 'Basic Info',
gobalIndex: 'Global Index',
dataSource: 'Data Source',
linkSourceSetTip: 'Manage data source linkage with this dataset',
linkDataSource: 'Link Data Source',
@ -304,7 +308,7 @@ export default {
dataFlowPlaceholder: 'Please select a pipeline.',
buildItFromScratch: 'Build it from scratch',
dataFlow: 'Pipeline',
parseType: 'Ingestion pipeline',
parseType: 'Parse Type',
manualSetup: 'Choose pipeline',
builtIn: 'Built-in',
titleDescription:
@ -692,6 +696,8 @@ This auto-tagging feature enhances retrieval by adding another layer of domain-s
newDocs: 'New Docs',
timeStarted: 'Time started',
log: 'Log',
confluenceDescription:
'Integrate your Confluence workspace to search documentation.',
s3Description:
'Connect to your AWS S3 bucket to import and sync stored files.',
discordDescription:

View File

@ -260,6 +260,9 @@ export default {
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
},
knowledgeConfiguration: {
rebuildTip: '从所有已关联的数据源重新下载文件并再次解析。',
baseInfo: '基础信息',
gobalIndex: '全局索引',
dataSource: '数据源',
linkSourceSetTip: '管理与此数据集的数据源链接',
linkDataSource: '链接数据源',
@ -282,14 +285,14 @@ export default {
eidtLinkDataPipeline: '编辑pipeline',
linkPipelineSetTip: '管理与此数据集的数据管道链接',
default: '默认',
dataPipeline: 'pipeline',
dataPipeline: 'Ingestion pipeline',
linkDataPipeline: '关联pipeline',
enableAutoGenerate: '是否启用自动生成',
teamPlaceholder: '请选择团队',
dataFlowPlaceholder: '请选择pipeline',
buildItFromScratch: '去Scratch构建',
dataFlow: 'pipeline',
parseType: 'Ingestion pipeline',
parseType: '解析方法',
manualSetup: '选择pipeline',
builtIn: '内置',
titleDescription: '在这里更新您的知识库详细信息,尤其是切片方法。',
@ -683,6 +686,7 @@ General实体和关系提取提示来自 GitHub - microsoft/graphrag基于
newDocs: '新文档',
timeStarted: '开始时间',
log: '日志',
confluenceDescription: '连接你的 Confluence 工作区以搜索文档内容。',
s3Description: ' 连接你的 AWS S3 存储桶以导入和同步文件。',
discordDescription: ' 连接你的 Discord 服务器以访问和分析聊天数据。',
notionDescription: ' 同步 Notion 页面与数据库,用于知识检索。',

View File

@ -1,11 +1,22 @@
import { IRagNode } from '@/interfaces/database/agent';
import { BaseNode } from '@/interfaces/database/agent';
import { NodeProps } from '@xyflow/react';
import { camelCase } from 'lodash';
import { useTranslation } from 'react-i18next';
import { RagNode } from '.';
import { DataOperationsFormSchemaType } from '../../form/data-operations-form';
import { LabelCard } from './card';
export function DataOperationsNode({
...props
}: NodeProps<BaseNode<DataOperationsFormSchemaType>>) {
const { data } = props;
const { t } = useTranslation();
export function DataOperationsNode({ ...props }: NodeProps<IRagNode>) {
return (
<RagNode {...props}>
<section>select</section>
<LabelCard>
{t(`flow.operationsOptions.${camelCase(data.form?.operations)}`)}
</LabelCard>
</RagNode>
);
}

View File

@ -1,11 +1,36 @@
import { IRagNode } from '@/interfaces/database/agent';
import { NodeCollapsible } from '@/components/collapse';
import { BaseNode } from '@/interfaces/database/agent';
import { NodeProps } from '@xyflow/react';
import { RagNode } from '.';
import { VariableAggregatorFormSchemaType } from '../../form/variable-aggregator-form/schema';
import { useGetVariableLabelOrTypeByValue } from '../../hooks/use-get-begin-query';
import { LabelCard } from './card';
export function VariableAggregatorNode({
...props
}: NodeProps<BaseNode<VariableAggregatorFormSchemaType>>) {
const { data } = props;
const { getLabel } = useGetVariableLabelOrTypeByValue();
export function VariableAggregatorNode({ ...props }: NodeProps<IRagNode>) {
return (
<RagNode {...props}>
<section>VariableAggregatorNode</section>
<NodeCollapsible items={data.form?.groups}>
{(x, idx) => (
<section key={idx} className="space-y-1">
<div className="flex justify-between items-center gap-2">
<span className="flex-1 min-w-0 truncate"> {x.group_name}</span>
<span className="text-text-secondary">{x.type}</span>
</div>
<div className="space-y-1">
{x.variables.map((y, index) => (
<LabelCard key={index} className="truncate">
{getLabel(y.value)}
</LabelCard>
))}
</div>
</section>
)}
</NodeCollapsible>
</RagNode>
);
}

View File

@ -598,7 +598,7 @@ export const initialDataOperationsValues = {
export const initialVariableAssignerValues = {};
export const initialVariableAggregatorValues = {};
export const initialVariableAggregatorValues = { outputs: {}, groups: [] };
export const CategorizeAnchorPointPositions = [
{ top: 1, right: 34 },

View File

@ -36,6 +36,7 @@ import {
useShowSecondaryMenu,
} from '@/pages/agent/hooks/use-build-structured-output';
import { useFilterQueryVariableOptionsByTypes } from '@/pages/agent/hooks/use-get-begin-query';
import { get } from 'lodash';
import { PromptIdentity } from '../../agent-form/use-build-prompt-options';
import { StructuredOutputSecondaryMenu } from '../structured-output-secondary-menu';
import { ProgrammaticTag } from './constant';
@ -45,18 +46,21 @@ class VariableInnerOption extends MenuOption {
value: string;
parentLabel: string | JSX.Element;
icon?: ReactNode;
type?: string;
constructor(
label: string,
value: string,
parentLabel: string | JSX.Element,
icon?: ReactNode,
type?: string,
) {
super(value);
this.label = label;
this.value = value;
this.parentLabel = parentLabel;
this.icon = icon;
this.type = type;
}
}
@ -126,9 +130,10 @@ function VariablePickerMenuItem({
<li
key={x.value}
onClick={() => selectOptionAndCleanUp(x)}
className="hover:bg-bg-card p-1 text-text-primary rounded-sm"
className="hover:bg-bg-card p-1 text-text-primary rounded-sm flex justify-between items-center"
>
{x.label}
<span className="truncate flex-1 min-w-0">{x.label}</span>
<span className="text-text-secondary">{get(x, 'type')}</span>
</li>
);
})}
@ -146,6 +151,7 @@ export type VariablePickerMenuOptionType = {
label: string;
value: string;
icon: ReactNode;
type?: string;
}>;
};
@ -214,7 +220,13 @@ export default function VariablePickerMenuPlugin({
x.label,
x.title,
x.options.map((y) => {
return new VariableInnerOption(y.label, y.value, x.label, y.icon);
return new VariableInnerOption(
y.label,
y.value,
x.label,
y.icon,
y.type,
);
}),
),
);
@ -378,7 +390,7 @@ export default function VariablePickerMenuPlugin({
const nextOptions = buildNextOptions();
return anchorElementRef.current && nextOptions.length
? ReactDOM.createPortal(
<div className="typeahead-popover w-[200px] p-2 bg-bg-base">
<div className="typeahead-popover w-80 p-2 bg-bg-base">
<ul className="scroll-auto overflow-x-hidden">
{nextOptions.map((option, i: number) => (
<VariablePickerMenuItem

View File

@ -18,6 +18,7 @@ type QueryVariableProps = {
label?: ReactNode;
hideLabel?: boolean;
className?: string;
onChange?: (value: string) => void;
};
export function QueryVariable({
@ -26,6 +27,7 @@ export function QueryVariable({
label,
hideLabel = false,
className,
onChange,
}: QueryVariableProps) {
const { t } = useTranslation();
const form = useFormContext();
@ -46,7 +48,11 @@ export function QueryVariable({
<FormControl>
<GroupedSelectWithSecondaryMenu
options={finalOptions}
{...field}
value={field.value}
onChange={(val) => {
field.onChange(val);
onChange?.(val);
}}
// allowClear
types={types}
></GroupedSelectWithSecondaryMenu>

View File

@ -209,8 +209,12 @@ export function GroupedSelectWithSecondaryMenu({
onChange?.(option.value);
setOpen(false);
}}
className="flex items-center justify-between"
>
{option.label}
<span> {option.label}</span>
<span className="text-text-secondary">
{get(option, 'type')}
</span>
</CommandItem>
);
})}

View File

@ -112,9 +112,12 @@ export function StructuredOutputSecondaryMenu({
<HoverCardTrigger asChild>
<li
onClick={handleMenuClick}
className="hover:bg-bg-card py-1 px-2 text-text-primary rounded-sm text-sm flex justify-between items-center"
className="hover:bg-bg-card py-1 px-2 text-text-primary rounded-sm text-sm flex justify-between items-center gap-2"
>
{data.label} <ChevronRight className="size-3.5 text-text-secondary" />
<div className="flex justify-between flex-1">
{data.label} <span className="text-text-secondary">object</span>
</div>
<ChevronRight className="size-3.5 text-text-secondary" />
</li>
</HoverCardTrigger>
<HoverCardContent

View File

@ -5,6 +5,7 @@ import { Plus, Trash2 } from 'lucide-react';
import { useFieldArray, useFormContext } from 'react-hook-form';
import { useGetVariableLabelOrTypeByValue } from '../../hooks/use-get-begin-query';
import { QueryVariable } from '../components/query-variable';
import { NameInput } from './name-input';
type DynamicGroupVariableProps = {
name: string;
@ -36,9 +37,17 @@ export function DynamicGroupVariable({
<div className="flex items-center justify-between">
<div className="flex items-center gap-3">
<RAGFlowFormItem name={`${name}.group_name`} className="w-32">
{(field) => (
<NameInput
value={field.value}
onChange={field.onChange}
></NameInput>
)}
</RAGFlowFormItem>
{/* Use a hidden form to store data types; otherwise, data loss may occur. */}
<RAGFlowFormItem name={`${name}.type`} className="hidden">
<Input></Input>
</RAGFlowFormItem>
<Button
variant={'ghost'}
type="button"
@ -72,6 +81,14 @@ export function DynamicGroupVariable({
className="flex-1 min-w-0"
hideLabel
types={firstType && fields.length > 1 ? [firstType] : []}
onChange={(val) => {
const type = getType(val);
if (type && index === 0) {
form.setValue(`${name}.type`, type, {
shouldDirty: true,
});
}
}}
></QueryVariable>
<Button
variant={'ghost'}

View File

@ -2,41 +2,27 @@ import { BlockButton } from '@/components/ui/button';
import { Form } from '@/components/ui/form';
import { Separator } from '@/components/ui/separator';
import { zodResolver } from '@hookform/resolvers/zod';
import { memo } from 'react';
import { memo, useCallback } from 'react';
import { useFieldArray, useForm } from 'react-hook-form';
import { useTranslation } from 'react-i18next';
import { z } from 'zod';
import { initialDataOperationsValues } from '../../constant';
import { useFormValues } from '../../hooks/use-form-values';
import { useWatchFormChange } from '../../hooks/use-watch-form-change';
import { INextOperatorForm } from '../../interface';
import useGraphStore from '../../store';
import { buildOutputList } from '../../utils/build-output-list';
import { FormWrapper } from '../components/form-wrapper';
import { Output } from '../components/output';
import { DynamicGroupVariable } from './dynamic-group-variable';
export const RetrievalPartialSchema = {
groups: z.array(
z.object({
group_name: z.string(),
variables: z.array(z.object({ value: z.string().optional() })),
}),
),
operations: z.string(),
};
export const FormSchema = z.object(RetrievalPartialSchema);
export type DataOperationsFormSchemaType = z.infer<typeof FormSchema>;
const outputList = buildOutputList(initialDataOperationsValues.outputs);
import { FormSchema, VariableAggregatorFormSchemaType } from './schema';
import { useWatchFormChange } from './use-watch-change';
function VariableAggregatorForm({ node }: INextOperatorForm) {
const { t } = useTranslation();
const getNode = useGraphStore((state) => state.getNode);
const defaultValues = useFormValues(initialDataOperationsValues, node);
const form = useForm<DataOperationsFormSchemaType>({
const form = useForm<VariableAggregatorFormSchemaType>({
defaultValues: defaultValues,
mode: 'onChange',
resolver: zodResolver(FormSchema),
@ -48,7 +34,15 @@ function VariableAggregatorForm({ node }: INextOperatorForm) {
control: form.control,
});
useWatchFormChange(node?.id, form, true);
const appendItem = useCallback(() => {
append({ group_name: `Group ${fields.length}`, variables: [] });
}, [append, fields.length]);
const outputList = buildOutputList(
getNode(node?.id)?.data.form.outputs ?? {},
);
useWatchFormChange(node?.id, form);
return (
<Form {...form}>
@ -63,16 +57,10 @@ function VariableAggregatorForm({ node }: INextOperatorForm) {
></DynamicGroupVariable>
))}
</section>
<BlockButton
onClick={() =>
append({ group_name: `Group ${fields.length}`, variables: [] })
}
>
{t('common.add')}
</BlockButton>
<BlockButton onClick={appendItem}>{t('common.add')}</BlockButton>
<Separator />
<Output list={outputList} isFormRequired></Output>
<Output list={outputList}></Output>
</FormWrapper>
</Form>
);

View File

@ -0,0 +1,55 @@
import { Input } from '@/components/ui/input';
import { PenLine } from 'lucide-react';
import { useCallback, useEffect, useRef, useState } from 'react';
import { useHandleNameChange } from './use-handle-name-change';
type NameInputProps = {
value: string;
onChange: (value: string) => void;
};
export function NameInput({ value, onChange }: NameInputProps) {
const { name, handleNameBlur, handleNameChange } = useHandleNameChange(value);
const inputRef = useRef<HTMLInputElement>(null);
const [isEditingMode, setIsEditingMode] = useState(false);
const switchIsEditingMode = useCallback(() => {
setIsEditingMode((prev) => !prev);
}, []);
const handleBlur = useCallback(() => {
const nextName = handleNameBlur();
setIsEditingMode(false);
onChange(nextName);
}, [handleNameBlur, onChange]);
useEffect(() => {
if (isEditingMode && inputRef.current) {
requestAnimationFrame(() => {
inputRef.current?.focus();
});
}
}, [isEditingMode]);
return (
<div className="flex items-center gap-1 flex-1">
{isEditingMode ? (
<Input
ref={inputRef}
value={name}
onBlur={handleBlur}
onChange={handleNameChange}
></Input>
) : (
<div className="flex items-center justify-between gap-2 text-base w-full">
<span className="truncate flex-1">{name}</span>
<PenLine
onClick={switchIsEditingMode}
className="size-3.5 text-text-secondary cursor-pointer hidden group-hover:block"
/>
</div>
)}
</div>
);
}

View File

@ -0,0 +1,15 @@
import { z } from 'zod';
export const VariableAggregatorSchema = {
groups: z.array(
z.object({
group_name: z.string(),
variables: z.array(z.object({ value: z.string().optional() })),
type: z.string().optional(),
}),
),
};
export const FormSchema = z.object(VariableAggregatorSchema);
export type VariableAggregatorFormSchemaType = z.infer<typeof FormSchema>;

View File

@ -0,0 +1,37 @@
import message from '@/components/ui/message';
import { trim } from 'lodash';
import { ChangeEvent, useCallback, useEffect, useState } from 'react';
import { useFormContext } from 'react-hook-form';
import { VariableAggregatorFormSchemaType } from './schema';
export const useHandleNameChange = (previousName: string) => {
const [name, setName] = useState<string>('');
const form = useFormContext<VariableAggregatorFormSchemaType>();
const handleNameBlur = useCallback(() => {
const names = form.getValues('groups');
const existsSameName = names.some((x) => x.group_name === name);
if (trim(name) === '' || existsSameName) {
if (existsSameName && previousName !== name) {
message.error('The name cannot be repeated');
}
setName(previousName);
return previousName;
}
return name;
}, [form, name, previousName]);
const handleNameChange = useCallback((e: ChangeEvent<any>) => {
setName(e.target.value);
}, []);
useEffect(() => {
setName(previousName);
}, [previousName]);
return {
name,
handleNameBlur,
handleNameChange,
};
};

View File

@ -0,0 +1,31 @@
import { useEffect } from 'react';
import { UseFormReturn, useWatch } from 'react-hook-form';
import useGraphStore from '../../store';
import { VariableAggregatorFormSchemaType } from './schema';
export function useWatchFormChange(
id?: string,
form?: UseFormReturn<VariableAggregatorFormSchemaType>,
) {
let values = useWatch({ control: form?.control });
const { replaceNodeForm } = useGraphStore((state) => state);
useEffect(() => {
if (id && form?.formState.isDirty) {
const outputs = values.groups?.reduce(
(pre, cur) => {
if (cur.group_name) {
pre[cur.group_name] = {
type: cur.type,
};
}
return pre;
},
{} as Record<string, Record<string, any>>,
);
replaceNodeForm(id, { ...values, outputs: outputs ?? {} });
}
}, [form?.formState.isDirty, id, replaceNodeForm, values]);
}

View File

@ -153,3 +153,24 @@ export function useFindAgentStructuredOutputTypeByValue() {
return findAgentStructuredOutputTypeByValue;
}
export function useFindAgentStructuredOutputLabelByValue() {
const { getNode } = useGraphStore((state) => state);
const findAgentStructuredOutputLabel = useCallback(
(value?: string) => {
if (value) {
const operatorName = getNode(getNodeId(value ?? ''))?.data.name;
if (operatorName) {
return operatorName + ' / ' + value?.split('@').at(1);
}
}
return '';
},
[getNode],
);
return findAgentStructuredOutputLabel;
}

View File

@ -20,7 +20,10 @@ import { buildBeginInputListFromObject } from '../form/begin-form/utils';
import { BeginQuery } from '../interface';
import OperatorIcon from '../operator-icon';
import useGraphStore from '../store';
import { useFindAgentStructuredOutputTypeByValue } from './use-build-structured-output';
import {
useFindAgentStructuredOutputLabelByValue,
useFindAgentStructuredOutputTypeByValue,
} from './use-build-structured-output';
export function useSelectBeginNodeDataInputs() {
const getNode = useGraphStore((state) => state.getNode);
@ -281,6 +284,8 @@ export function useGetVariableLabelOrTypeByValue(nodeId?: string) {
const flattenOptions = useFlattenQueryVariableOptions(nodeId);
const findAgentStructuredOutputTypeByValue =
useFindAgentStructuredOutputTypeByValue();
const findAgentStructuredOutputLabel =
useFindAgentStructuredOutputLabelByValue();
const getItem = useCallback(
(val?: string) => {
@ -291,9 +296,17 @@ export function useGetVariableLabelOrTypeByValue(nodeId?: string) {
const getLabel = useCallback(
(val?: string) => {
return getItem(val)?.label;
const item = getItem(val);
if (item) {
return (
<div>
{item.parentLabel} / {item.label}
</div>
);
}
return getItem(val)?.label || findAgentStructuredOutputLabel(val);
},
[getItem],
[findAgentStructuredOutputLabel, getItem],
);
const getType = useCallback(

View File

@ -49,7 +49,8 @@ export interface IFileLogItem {
process_duration: number;
progress: number;
progress_msg: string;
source_type: string;
source_type?: string;
source_from?: string;
status: string;
task_type: string;
tenant_id: string;

View File

@ -107,9 +107,9 @@ export const getFileLogsTableColumns = (
meta: { cellClassName: 'max-w-[10vw]' },
cell: ({ row }) => (
<div className="text-text-primary">
{row.original.source_type
{row.original.source_from
? DataSourceInfo[
row.original.source_type as keyof typeof DataSourceInfo
row.original.source_from as keyof typeof DataSourceInfo
].icon
: t('localUpload')}
</div>

View File

@ -1,9 +1,15 @@
import { IconFontFill } from '@/components/icon-font';
import { Button } from '@/components/ui/button';
import { Modal } from '@/components/ui/modal/modal';
import {
Tooltip,
TooltipContent,
TooltipTrigger,
} from '@/components/ui/tooltip';
import { useNavigatePage } from '@/hooks/logic-hooks/navigate-hooks';
import { IConnector } from '@/interfaces/database/knowledge';
import { delSourceModal } from '@/pages/user-setting/data-source/component/delete-source-modal';
import { DataSourceInfo } from '@/pages/user-setting/data-source/contant';
import { useDataSourceRebuild } from '@/pages/user-setting/data-source/hooks';
import { IDataSourceBase } from '@/pages/user-setting/data-source/interface';
import { Link, Settings, Unlink } from 'lucide-react';
import { useMemo, useState } from 'react';
@ -27,58 +33,45 @@ interface DataSourceItemProps extends IDataSourceNodeProps {
const DataSourceItem = (props: DataSourceItemProps) => {
const { t } = useTranslation();
const { id, name, icon, unbindFunc } = props;
const { id, name, icon, source, unbindFunc } = props;
const { navigateToDataSourceDetail } = useNavigatePage();
const { handleRebuild } = useDataSourceRebuild();
const toDetail = (id: string) => {
navigateToDataSourceDetail(id);
};
const openUnlinkModal = () => {
Modal.show({
visible: true,
className: '!w-[560px]',
title: t('dataflowParser.unlinkSourceModalTitle'),
children: (
<div
className="text-sm text-text-secondary"
dangerouslySetInnerHTML={{
__html: t('dataflowParser.unlinkSourceModalContent'),
}}
></div>
),
onVisibleChange: () => {
Modal.hide();
},
footer: (
<div className="flex justify-end gap-2">
<Button variant={'outline'} onClick={() => Modal.hide()}>
{t('dataflowParser.changeStepModalCancelText')}
</Button>
<Button
variant={'secondary'}
className="!bg-state-error text-bg-base"
onClick={() => {
unbindFunc?.(props);
Modal.hide();
}}
>
{t('dataflowParser.unlinkSourceModalConfirmText')}
</Button>
</div>
),
});
};
return (
<div className="flex items-center justify-between gap-1 px-2 rounded-md border ">
<div className="flex items-center justify-between gap-1 px-2 h-10 rounded-md border group hover:bg-bg-card">
<div className="flex items-center gap-1">
<div className="w-6 h-6 flex-shrink-0">{icon}</div>
<div className="text-base text-text-primary">
{DataSourceInfo[source].name}
</div>
<div>{name}</div>
</div>
<div className="flex gap-1 items-center">
<div className="flex items-center">
<Tooltip>
<TooltipTrigger>
<Button
variant={'transparent'}
className="border-none hidden group-hover:block"
type="button"
onClick={() => {
handleRebuild({ source_id: id });
}}
>
{/* <Settings /> */}
<IconFontFill name="reparse" className="text-text-primary" />
</Button>
</TooltipTrigger>
<TooltipContent>
{t('knowledgeConfiguration.rebuildTip')}
</TooltipContent>
</Tooltip>
<Button
variant={'transparent'}
className="border-none"
className="border-none hidden group-hover:block"
type="button"
onClick={() => {
toDetail(id);
@ -93,7 +86,7 @@ const DataSourceItem = (props: DataSourceItemProps) => {
<Button
type="button"
variant={'transparent'}
className="border-none"
className="border-none hidden group-hover:block"
onClick={() => {
// openUnlinkModal();
delSourceModal({
@ -151,9 +144,12 @@ const LinkDataSource = (props: ILinkDataSourceProps) => {
return (
<div className="flex flex-col gap-2">
<section className="flex flex-col">
<div className="flex items-center gap-1 text-text-primary text-sm">
<div className="text-base font-medium text-text-primary">
{t('knowledgeConfiguration.dataSource')}
</div>
{/* <div className="flex items-center gap-1 text-text-primary text-sm">
{t('knowledgeConfiguration.dataSource')}
</div> */}
<div className="flex justify-between items-center">
<div className="text-center text-xs text-text-secondary">
{t('knowledgeConfiguration.linkSourceSetTip')}

View File

@ -219,9 +219,14 @@ export default function DatasetSettings() {
<form onSubmit={form.handleSubmit(onSubmit)} className="space-y-6 ">
<div className="w-[768px] h-[calc(100vh-240px)] pr-1 overflow-y-auto scrollbar-auto">
<MainContainer className="text-text-secondary">
<div className="text-base font-medium text-text-primary">
{t('knowledgeConfiguration.baseInfo')}
</div>
<GeneralForm></GeneralForm>
<Divider />
<div className="text-base font-medium text-text-primary">
{t('knowledgeConfiguration.gobalIndex')}
</div>
<GraphRagItems
className="border-none p-0"
data={graphRagGenerateData as IGenerateLogButtonProps}
@ -235,6 +240,9 @@ export default function DatasetSettings() {
onDelete={() => handleDeletePipelineTask(GenerateType.Raptor)}
></RaptorFormFields>
<Divider />
<div className="text-base font-medium text-text-primary">
{t('knowledgeConfiguration.dataPipeline')}
</div>
<ParseTypeItem line={1} />
{parseType === 1 && (
<ChunkMethodItem line={1}></ChunkMethodItem>

View File

@ -66,7 +66,7 @@ const SourceDetailPage = () => {
render: (fieldProps: FormFieldConfig) => (
<div className="flex items-center gap-1 w-full relative">
<Input {...fieldProps} type={FormFieldType.Number} />
<span className="absolute right-0 -translate-x-12 text-text-secondary italic ">
<span className="absolute right-0 -translate-x-[58px] text-text-secondary italic ">
minutes
</span>
<button
@ -96,7 +96,7 @@ const SourceDetailPage = () => {
return (
<div className="flex items-center gap-1 w-full relative">
<Input {...fieldProps} type={FormFieldType.Number} />
<span className="absolute right-0 -translate-x-3 text-text-secondary italic ">
<span className="absolute right-0 -translate-x-6 text-text-secondary italic ">
hours
</span>
</div>
@ -111,7 +111,7 @@ const SourceDetailPage = () => {
render: (fieldProps: FormFieldConfig) => (
<div className="flex items-center gap-1 w-full relative">
<Input {...fieldProps} type={FormFieldType.Number} />
<span className="absolute right-0 -translate-x-3 text-text-secondary italic ">
<span className="absolute right-0 -translate-x-6 text-text-secondary italic ">
seconds
</span>
</div>
@ -183,7 +183,7 @@ const SourceDetailPage = () => {
</div>
<section className="flex flex-col gap-2 mt-6">
<div className="text-2xl text-text-primary">{t('setting.log')}</div>
<DataSourceLogsTable />
<DataSourceLogsTable refresh_freq={detail?.refresh_freq || false} />
</section>
</CardContent>
</Card>

View File

@ -130,9 +130,14 @@ const columns = ({
// pageSize: 10,
// total: 0,
// };
export const DataSourceLogsTable = () => {
export const DataSourceLogsTable = ({
refresh_freq,
}: {
refresh_freq: number | false;
}) => {
// const [pagination, setPagination] = useState(paginationInit);
const { data, pagination, setPagination } = useLogListDataSource();
const { data, pagination, setPagination } =
useLogListDataSource(refresh_freq);
const navigate = useNavigate();
const currentPagination = useMemo(
() => ({

View File

@ -2,6 +2,7 @@ import message from '@/components/ui/message';
import { useSetModalState } from '@/hooks/common-hooks';
import { useGetPaginationWithRouter } from '@/hooks/logic-hooks';
import dataSourceService, {
dataSourceRebuild,
dataSourceResume,
deleteDataSource,
featchDataSourceDetail,
@ -10,7 +11,7 @@ import dataSourceService, {
import { useQuery, useQueryClient } from '@tanstack/react-query';
import { t } from 'i18next';
import { useCallback, useMemo, useState } from 'react';
import { useSearchParams } from 'umi';
import { useParams, useSearchParams } from 'umi';
import { DataSourceInfo, DataSourceKey } from './contant';
import { IDataSorceInfo, IDataSource, IDataSourceBase } from './interface';
@ -109,14 +110,15 @@ export const useAddDataSource = () => {
};
};
export const useLogListDataSource = () => {
export const useLogListDataSource = (refresh_freq: number | false) => {
const { pagination, setPagination } = useGetPaginationWithRouter();
const [currentQueryParameters] = useSearchParams();
const id = currentQueryParameters.get('id');
const { data, isFetching } = useQuery<{ logs: IDataSource[]; total: number }>(
{
queryKey: ['data-source-logs', id, pagination],
queryKey: ['data-source-logs', id, pagination, refresh_freq],
refetchInterval: refresh_freq ? refresh_freq * 60 * 1000 : false,
queryFn: async () => {
const { data } = await getDataSourceLogs(id as string, {
page_size: pagination.pageSize,
@ -186,3 +188,22 @@ export const useDataSourceResume = () => {
);
return { handleResume };
};
export const useDataSourceRebuild = () => {
const { id } = useParams();
// const [currentQueryParameters] = useSearchParams();
// const id = currentQueryParameters.get('id');
const handleRebuild = useCallback(
async (param: { source_id: string }) => {
const { data } = await dataSourceRebuild(param.source_id as string, {
kb_id: id as string,
});
if (data.code === 0) {
// queryClient.invalidateQueries({ queryKey: ['data-source-detail', id] });
message.success(t(`message.operated`));
}
},
[id],
);
return { handleRebuild };
};

View File

@ -58,7 +58,17 @@ const DataSource = () => {
icon,
}: IDataSorceInfo) => {
return (
<div className="p-[10px] border border-border-button rounded-lg relative group hover:bg-bg-card">
<div
className="p-[10px] border border-border-button rounded-lg relative group hover:bg-bg-card"
onClick={() =>
showAddingModal({
id,
name,
description,
icon,
})
}
>
<div className="flex gap-2">
<div className="w-6 h-6">{icon}</div>
<div className="flex flex-1 flex-col items-start gap-2">
@ -67,17 +77,7 @@ const DataSource = () => {
</div>
</div>
<div className=" absolute top-2 right-2">
<Button
onClick={() =>
showAddingModal({
id,
name,
description,
icon,
})
}
className=" rounded-md px-1 text-bg-base gap-1 bg-text-primary text-xs py-0 h-6 items-center hidden group-hover:flex"
>
<Button className=" rounded-md px-1 text-bg-base gap-1 bg-text-primary text-xs py-0 h-6 items-center hidden group-hover:flex">
<Plus size={12} />
{t('setting.add')}
</Button>

View File

@ -21,10 +21,13 @@ const dataSourceService = registerServer<keyof typeof methods>(
export const deleteDataSource = (id: string) =>
request.post(api.dataSourceDel(id));
export const dataSourceResume = (id: string, data: { resume: boolean }) => {
console.log('api.dataSourceResume(id)', data);
return request.put(api.dataSourceResume(id), { data });
};
export const dataSourceRebuild = (id: string, data: { kb_id: string }) => {
return request.put(api.dataSourceRebuild(id), { data });
};
export const getDataSourceLogs = (id: string, params?: any) =>
request.get(api.dataSourceLogs(id), { params });
export const featchDataSourceDetail = (id: string) =>

View File

@ -39,6 +39,7 @@ export default {
dataSourceList: `${api_host}/connector/list`,
dataSourceDel: (id: string) => `${api_host}/connector/${id}/rm`,
dataSourceResume: (id: string) => `${api_host}/connector/${id}/resume`,
dataSourceRebuild: (id: string) => `${api_host}/connector/${id}/rebuild`,
dataSourceLogs: (id: string) => `${api_host}/connector/${id}/logs`,
dataSourceDetail: (id: string) => `${api_host}/connector/${id}`,