mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-04 03:25:30 +08:00
Compare commits
59 Commits
v0.21.0
...
cd77425b87
| Author | SHA1 | Date | |
|---|---|---|---|
| cd77425b87 | |||
| 544c9990e3 | |||
| 41a647fe32 | |||
| 594bf485d4 | |||
| 863c3e3d9c | |||
| 1767039be3 | |||
| cd75fa02b1 | |||
| cfdd37820a | |||
| 9d12380806 | |||
| 866098634b | |||
| 8013505daf | |||
| deb81810e9 | |||
| 6ab96287c9 | |||
| aaa4776657 | |||
| 5b2e5dd334 | |||
| de46b0d46e | |||
| cc703da747 | |||
| d956a442ce | |||
| 5fc59a3132 | |||
| 1d955507e9 | |||
| cf09c2260a | |||
| c9b18cbe18 | |||
| 8123942ec1 | |||
| 685114d253 | |||
| c9e56d20cf | |||
| 8ee0b6ea54 | |||
| f50b2461cb | |||
| 617faee718 | |||
| b15643bd80 | |||
| f12290f04b | |||
| 15838a6673 | |||
| 39ad9490ac | |||
| 387baf858f | |||
| 2dba858c84 | |||
| 43ea312144 | |||
| ce05696d95 | |||
| 0f62bfda21 | |||
| 70ffe2b4e8 | |||
| e76db6e222 | |||
| 7b664b5a84 | |||
| 8a41057236 | |||
| 447041d265 | |||
| f0375c4acd | |||
| 8af769de41 | |||
| f808bc32ba | |||
| e8cb1d8fc4 | |||
| 4e86ee4ff9 | |||
| c99034f717 | |||
| 86b254d214 | |||
| 1c38f4cefb | |||
| 74c195cd36 | |||
| e48bec1cbf | |||
| 205a5eb9f5 | |||
| 8844826208 | |||
| 8fe4281d81 | |||
| fb1bedbd3c | |||
| 6e55b9146c | |||
| 071ea9c493 | |||
| 5037a28e4d |
@ -135,7 +135,7 @@ releases! 🌟
|
||||
## 🔎 System Architecture
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 Get Started
|
||||
|
||||
@ -129,7 +129,7 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
## 🔎 Arsitektur Sistem
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 Mulai
|
||||
|
||||
@ -109,7 +109,7 @@
|
||||
## 🔎 システム構成
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 初期設定
|
||||
|
||||
@ -109,7 +109,7 @@
|
||||
## 🔎 시스템 아키텍처
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 시작하기
|
||||
|
||||
@ -129,7 +129,7 @@ Experimente nossa demo em [https://demo.ragflow.io](https://demo.ragflow.io).
|
||||
## 🔎 Arquitetura do Sistema
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 Primeiros Passos
|
||||
|
||||
@ -132,7 +132,7 @@
|
||||
## 🔎 系統架構
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 快速開始
|
||||
|
||||
@ -132,7 +132,7 @@
|
||||
## 🔎 系统架构
|
||||
|
||||
<div align="center" style="margin-top:20px;margin-bottom:20px;">
|
||||
<img src="https://github.com/infiniflow/ragflow/assets/12318111/d6ac5664-c237-4200-a7c2-a4a00691b485" width="1000"/>
|
||||
<img src="https://github.com/user-attachments/assets/31b0dd6f-ca4f-445a-9457-70cb44a381b2" width="1000"/>
|
||||
</div>
|
||||
|
||||
## 🎬 快速开始
|
||||
|
||||
@ -48,13 +48,20 @@ It consists of a server-side Service and a command-line client (CLI), both imple
|
||||
1. Ensure the Admin Service is running.
|
||||
2. Install ragflow-cli.
|
||||
```bash
|
||||
pip install ragflow-cli
|
||||
pip install ragflow-cli==0.21.0
|
||||
```
|
||||
3. Launch the CLI client:
|
||||
```bash
|
||||
ragflow-cli -h 0.0.0.0 -p 9381
|
||||
ragflow-cli -h 127.0.0.1 -p 9381
|
||||
```
|
||||
Enter superuser's password to login. Default password is `admin`.
|
||||
You will be prompted to enter the superuser's password to log in.
|
||||
The default password is admin.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- -h: RAGFlow admin server host address
|
||||
|
||||
- -p: RAGFlow admin server port
|
||||
|
||||
|
||||
|
||||
|
||||
@ -21,9 +21,8 @@ from cmd import Cmd
|
||||
from Cryptodome.PublicKey import RSA
|
||||
from Cryptodome.Cipher import PKCS1_v1_5 as Cipher_pkcs1_v1_5
|
||||
from typing import Dict, List, Any
|
||||
from lark import Lark, Transformer, Tree, Token
|
||||
from lark import Lark, Transformer, Tree
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
|
||||
GRAMMAR = r"""
|
||||
start: command
|
||||
@ -43,6 +42,15 @@ sql_command: list_services
|
||||
| activate_user
|
||||
| list_datasets
|
||||
| list_agents
|
||||
| create_role
|
||||
| drop_role
|
||||
| alter_role
|
||||
| list_roles
|
||||
| show_role
|
||||
| grant_permission
|
||||
| revoke_permission
|
||||
| alter_user_role
|
||||
| show_user_permission
|
||||
|
||||
// meta command definition
|
||||
meta_command: "\\" meta_command_name [meta_args]
|
||||
@ -71,6 +79,19 @@ PASSWORD: "PASSWORD"i
|
||||
DATASETS: "DATASETS"i
|
||||
OF: "OF"i
|
||||
AGENTS: "AGENTS"i
|
||||
ROLE: "ROLE"i
|
||||
ROLES: "ROLES"i
|
||||
DESCRIPTION: "DESCRIPTION"i
|
||||
GRANT: "GRANT"i
|
||||
REVOKE: "REVOKE"i
|
||||
ALL: "ALL"i
|
||||
PERMISSION: "PERMISSION"i
|
||||
TO: "TO"i
|
||||
FROM: "FROM"i
|
||||
FOR: "FOR"i
|
||||
RESOURCES: "RESOURCES"i
|
||||
ON: "ON"i
|
||||
SET: "SET"i
|
||||
|
||||
list_services: LIST SERVICES ";"
|
||||
show_service: SHOW SERVICE NUMBER ";"
|
||||
@ -88,6 +109,19 @@ activate_user: ALTER USER ACTIVE quoted_string status ";"
|
||||
list_datasets: LIST DATASETS OF quoted_string ";"
|
||||
list_agents: LIST AGENTS OF quoted_string ";"
|
||||
|
||||
create_role: CREATE ROLE identifier [DESCRIPTION quoted_string] ";"
|
||||
drop_role: DROP ROLE identifier ";"
|
||||
alter_role: ALTER ROLE identifier SET DESCRIPTION quoted_string ";"
|
||||
list_roles: LIST ROLES ";"
|
||||
show_role: SHOW ROLE identifier ";"
|
||||
|
||||
grant_permission: GRANT action_list ON identifier TO ROLE identifier ";"
|
||||
revoke_permission: REVOKE action_list ON identifier FROM ROLE identifier ";"
|
||||
alter_user_role: ALTER USER quoted_string SET ROLE identifier ";"
|
||||
show_user_permission: SHOW USER PERMISSION quoted_string ";"
|
||||
|
||||
action_list: identifier ("," identifier)*
|
||||
|
||||
identifier: WORD
|
||||
quoted_string: QUOTED_STRING
|
||||
status: WORD
|
||||
@ -134,34 +168,86 @@ class AdminTransformer(Transformer):
|
||||
|
||||
def show_user(self, items):
|
||||
user_name = items[2]
|
||||
return {"type": "show_user", "username": user_name}
|
||||
return {"type": "show_user", "user_name": user_name}
|
||||
|
||||
def drop_user(self, items):
|
||||
user_name = items[2]
|
||||
return {"type": "drop_user", "username": user_name}
|
||||
return {"type": "drop_user", "user_name": user_name}
|
||||
|
||||
def alter_user(self, items):
|
||||
user_name = items[3]
|
||||
new_password = items[4]
|
||||
return {"type": "alter_user", "username": user_name, "password": new_password}
|
||||
return {"type": "alter_user", "user_name": user_name, "password": new_password}
|
||||
|
||||
def create_user(self, items):
|
||||
user_name = items[2]
|
||||
password = items[3]
|
||||
return {"type": "create_user", "username": user_name, "password": password, "role": "user"}
|
||||
return {"type": "create_user", "user_name": user_name, "password": password, "role": "user"}
|
||||
|
||||
def activate_user(self, items):
|
||||
user_name = items[3]
|
||||
activate_status = items[4]
|
||||
return {"type": "activate_user", "activate_status": activate_status, "username": user_name}
|
||||
return {"type": "activate_user", "activate_status": activate_status, "user_name": user_name}
|
||||
|
||||
def list_datasets(self, items):
|
||||
user_name = items[3]
|
||||
return {"type": "list_datasets", "username": user_name}
|
||||
return {"type": "list_datasets", "user_name": user_name}
|
||||
|
||||
def list_agents(self, items):
|
||||
user_name = items[3]
|
||||
return {"type": "list_agents", "username": user_name}
|
||||
return {"type": "list_agents", "user_name": user_name}
|
||||
|
||||
def create_role(self, items):
|
||||
role_name = items[2]
|
||||
if len(items) > 4:
|
||||
description = items[4]
|
||||
return {"type": "create_role", "role_name": role_name, "description": description}
|
||||
else:
|
||||
return {"type": "create_role", "role_name": role_name}
|
||||
|
||||
def drop_role(self, items):
|
||||
role_name = items[2]
|
||||
return {"type": "drop_role", "role_name": role_name}
|
||||
|
||||
def alter_role(self, items):
|
||||
role_name = items[2]
|
||||
description = items[5]
|
||||
return {"type": "alter_role", "role_name": role_name, "description": description}
|
||||
|
||||
def list_roles(self, items):
|
||||
return {"type": "list_roles"}
|
||||
|
||||
def show_role(self, items):
|
||||
role_name = items[2]
|
||||
return {"type": "show_role", "role_name": role_name}
|
||||
|
||||
def grant_permission(self, items):
|
||||
action_list = items[1]
|
||||
resource = items[3]
|
||||
role_name = items[6]
|
||||
return {"type": "grant_permission", "role_name": role_name, "resource": resource, "actions": action_list}
|
||||
|
||||
def revoke_permission(self, items):
|
||||
action_list = items[1]
|
||||
resource = items[3]
|
||||
role_name = items[6]
|
||||
return {
|
||||
"type": "revoke_permission",
|
||||
"role_name": role_name,
|
||||
"resource": resource, "actions": action_list
|
||||
}
|
||||
|
||||
def alter_user_role(self, items):
|
||||
user_name = items[2]
|
||||
role_name = items[5]
|
||||
return {"type": "alter_user_role", "user_name": user_name, "role_name": role_name}
|
||||
|
||||
def show_user_permission(self, items):
|
||||
user_name = items[3]
|
||||
return {"type": "show_user_permission", "user_name": user_name}
|
||||
|
||||
def action_list(self, items):
|
||||
return items
|
||||
|
||||
def meta_command(self, items):
|
||||
command_name = str(items[0]).lower()
|
||||
@ -205,6 +291,8 @@ class AdminCLI(Cmd):
|
||||
self.is_interactive = False
|
||||
self.admin_account = "admin@ragflow.io"
|
||||
self.admin_password: str = "admin"
|
||||
self.session = requests.Session()
|
||||
self.access_token: str = ""
|
||||
self.host: str = ""
|
||||
self.port: int = 0
|
||||
|
||||
@ -213,12 +301,8 @@ class AdminCLI(Cmd):
|
||||
|
||||
def onecmd(self, command: str) -> bool:
|
||||
try:
|
||||
# print(f"command: {command}")
|
||||
result = self.parse_command(command)
|
||||
|
||||
# if 'type' in result and result.get('type') == 'empty':
|
||||
# return False
|
||||
|
||||
if isinstance(result, dict):
|
||||
if 'type' in result and result.get('type') == 'empty':
|
||||
return False
|
||||
@ -244,7 +328,7 @@ class AdminCLI(Cmd):
|
||||
def default(self, line: str) -> bool:
|
||||
return self.onecmd(line)
|
||||
|
||||
def parse_command(self, command_str: str) -> dict[str, str] | Tree[Token]:
|
||||
def parse_command(self, command_str: str) -> dict[str, str]:
|
||||
if not command_str.strip():
|
||||
return {'type': 'empty'}
|
||||
|
||||
@ -256,32 +340,38 @@ class AdminCLI(Cmd):
|
||||
except Exception as e:
|
||||
return {'type': 'error', 'message': f'Parse error: {str(e)}'}
|
||||
|
||||
def verify_admin(self, args):
|
||||
|
||||
conn_info = self._parse_connection_args(args)
|
||||
if 'error' in conn_info:
|
||||
print(f"Error: {conn_info['error']}")
|
||||
return
|
||||
|
||||
self.host = conn_info['host']
|
||||
self.port = conn_info['port']
|
||||
def verify_admin(self, arguments: dict, single_command: bool):
|
||||
self.host = arguments['host']
|
||||
self.port = arguments['port']
|
||||
print(f"Attempt to access ip: {self.host}, port: {self.port}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/auth'
|
||||
url = f"http://{self.host}:{self.port}/api/v1/admin/login"
|
||||
|
||||
attempt_count = 3
|
||||
if single_command:
|
||||
attempt_count = 1
|
||||
|
||||
try_count = 0
|
||||
while True:
|
||||
try_count += 1
|
||||
if try_count > 3:
|
||||
if try_count > attempt_count:
|
||||
return False
|
||||
|
||||
admin_passwd = input(f"password for {self.admin_account}: ").strip()
|
||||
if single_command:
|
||||
admin_passwd = arguments['password']
|
||||
else:
|
||||
admin_passwd = input(f"password for {self.admin_account}: ").strip()
|
||||
try:
|
||||
self.admin_password = encode_to_base64(admin_passwd)
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
self.admin_password = encrypt(admin_passwd)
|
||||
response = self.session.post(url, json={'email': self.admin_account, 'password': self.admin_password})
|
||||
if response.status_code == 200:
|
||||
res_json = response.json()
|
||||
error_code = res_json.get('code', -1)
|
||||
if error_code == 0:
|
||||
self.session.headers.update({
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': response.headers['Authorization'],
|
||||
'User-Agent': 'RAGFlow-CLI/0.21.0'
|
||||
})
|
||||
print("Authentication successful.")
|
||||
return True
|
||||
else:
|
||||
@ -289,8 +379,9 @@ class AdminCLI(Cmd):
|
||||
print(f"Authentication failed: {error_message}, try again")
|
||||
continue
|
||||
else:
|
||||
print(f"Bad response,status: {response.status_code}, try again")
|
||||
except Exception:
|
||||
print(f"Bad response,status: {response.status_code}, password is wrong")
|
||||
except Exception as e:
|
||||
print(str(e))
|
||||
print(f"Can't access {self.host}, port: {self.port}")
|
||||
|
||||
def _print_table_simple(self, data):
|
||||
@ -341,9 +432,9 @@ class AdminCLI(Cmd):
|
||||
row = "|"
|
||||
for col in columns:
|
||||
value = str(item.get(col, ''))
|
||||
if len(value) > col_widths[col]:
|
||||
if get_string_width(value) > col_widths[col]:
|
||||
value = value[:col_widths[col] - 3] + "..."
|
||||
row += f" {value:<{col_widths[col]}} |"
|
||||
row += f" {value:<{col_widths[col] - (get_string_width(value) - len(value))}} |"
|
||||
print(row)
|
||||
|
||||
print(separator)
|
||||
@ -375,23 +466,31 @@ class AdminCLI(Cmd):
|
||||
print("\nGoodbye!")
|
||||
break
|
||||
|
||||
def run_single_command(self, args):
|
||||
conn_info = self._parse_connection_args(args)
|
||||
if 'error' in conn_info:
|
||||
print(f"Error: {conn_info['error']}")
|
||||
return
|
||||
def run_single_command(self, command: str):
|
||||
result = self.parse_command(command)
|
||||
self.execute_command(result)
|
||||
|
||||
def _parse_connection_args(self, args: List[str]) -> Dict[str, Any]:
|
||||
def parse_connection_args(self, args: List[str]) -> Dict[str, Any]:
|
||||
parser = argparse.ArgumentParser(description='Admin CLI Client', add_help=False)
|
||||
parser.add_argument('-h', '--host', default='localhost', help='Admin service host')
|
||||
parser.add_argument('-p', '--port', type=int, default=8080, help='Admin service port')
|
||||
|
||||
parser.add_argument('-w', '--password', default='admin', type=str, help='Superuser password')
|
||||
parser.add_argument('command', nargs='?', help='Single command')
|
||||
try:
|
||||
parsed_args, remaining_args = parser.parse_known_args(args)
|
||||
return {
|
||||
'host': parsed_args.host,
|
||||
'port': parsed_args.port,
|
||||
}
|
||||
if remaining_args:
|
||||
command = remaining_args[0]
|
||||
return {
|
||||
'host': parsed_args.host,
|
||||
'port': parsed_args.port,
|
||||
'password': parsed_args.password,
|
||||
'command': command
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'host': parsed_args.host,
|
||||
'port': parsed_args.port,
|
||||
}
|
||||
except SystemExit:
|
||||
return {'error': 'Invalid connection arguments'}
|
||||
|
||||
@ -438,6 +537,24 @@ class AdminCLI(Cmd):
|
||||
self._handle_list_datasets(command_dict)
|
||||
case 'list_agents':
|
||||
self._handle_list_agents(command_dict)
|
||||
case 'create_role':
|
||||
self._create_role(command_dict)
|
||||
case 'drop_role':
|
||||
self._drop_role(command_dict)
|
||||
case 'alter_role':
|
||||
self._alter_role(command_dict)
|
||||
case 'list_roles':
|
||||
self._list_roles(command_dict)
|
||||
case 'show_role':
|
||||
self._show_role(command_dict)
|
||||
case 'grant_permission':
|
||||
self._grant_permission(command_dict)
|
||||
case 'revoke_permission':
|
||||
self._revoke_permission(command_dict)
|
||||
case 'alter_user_role':
|
||||
self._alter_user_role(command_dict)
|
||||
case 'show_user_permission':
|
||||
self._show_user_permission(command_dict)
|
||||
case 'meta':
|
||||
self._handle_meta_command(command_dict)
|
||||
case _:
|
||||
@ -447,30 +564,30 @@ class AdminCLI(Cmd):
|
||||
print("Listing all services")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/services'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
|
||||
print(f"Fail to get all services, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_show_service(self, command):
|
||||
service_id: int = command['number']
|
||||
print(f"Showing service: {service_id}")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/services/{service_id}'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
res_data = res_json['data']
|
||||
if res_data['alive']:
|
||||
print(f"Service {res_data['service_name']} is alive. Detail:")
|
||||
if 'status' in res_data and res_data['status'] == 'alive':
|
||||
print(f"Service {res_data['service_name']} is alive, ")
|
||||
if isinstance(res_data['message'], str):
|
||||
print(res_data['message'])
|
||||
else:
|
||||
self._print_table_simple(res_data['message'])
|
||||
else:
|
||||
print(f"Service {res_data['service_name']} is down. Detail: {res_data['message']}")
|
||||
print(f"Service {res_data['service_name']} is down, {res_data['message']}")
|
||||
else:
|
||||
print(f"Fail to show service, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
@ -490,7 +607,7 @@ class AdminCLI(Cmd):
|
||||
print("Listing all users")
|
||||
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
@ -498,23 +615,23 @@ class AdminCLI(Cmd):
|
||||
print(f"Fail to get all users, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_show_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Showing user: {username}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
username_tree: Tree = command['user_name']
|
||||
user_name: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Showing user: {user_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get user {username}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
print(f"Fail to get user {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_drop_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Drop user: {username}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}'
|
||||
response = requests.delete(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
username_tree: Tree = command['user_name']
|
||||
user_name: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Drop user: {user_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}'
|
||||
response = self.session.delete(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
@ -522,14 +639,13 @@ class AdminCLI(Cmd):
|
||||
print(f"Fail to drop user, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_alter_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||
password_tree: Tree = command['password']
|
||||
password: str = password_tree.children[0].strip("'\"")
|
||||
print(f"Alter user: {username}, password: {password}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/password'
|
||||
response = requests.put(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password),
|
||||
json={'new_password': encrypt(password)})
|
||||
print(f"Alter user: {user_name}, password: {password}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/password'
|
||||
response = self.session.put(url, json={'new_password': encrypt(password)})
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
@ -537,34 +653,32 @@ class AdminCLI(Cmd):
|
||||
print(f"Fail to alter password, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_create_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||
password_tree: Tree = command['password']
|
||||
password: str = password_tree.children[0].strip("'\"")
|
||||
role: str = command['role']
|
||||
print(f"Create user: {username}, password: {password}, role: {role}")
|
||||
print(f"Create user: {user_name}, password: {password}, role: {role}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users'
|
||||
response = requests.post(
|
||||
response = self.session.post(
|
||||
url,
|
||||
auth=HTTPBasicAuth(self.admin_account, self.admin_password),
|
||||
json={'username': username, 'password': encrypt(password), 'role': role}
|
||||
json={'user_name': user_name, 'password': encrypt(password), 'role': role}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to create user {username}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
print(f"Fail to create user {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_activate_user(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name: str = user_name_tree.children[0].strip("'\"")
|
||||
activate_tree: Tree = command['activate_status']
|
||||
activate_status: str = activate_tree.children[0].strip("'\"")
|
||||
if activate_status.lower() in ['on', 'off']:
|
||||
print(f"Alter user {username} activate status, turn {activate_status.lower()}.")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/activate'
|
||||
response = requests.put(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password),
|
||||
json={'activate_status': activate_status})
|
||||
print(f"Alter user {user_name} activate status, turn {activate_status.lower()}.")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/activate'
|
||||
response = self.session.put(url, json={'activate_status': activate_status})
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
print(res_json["message"])
|
||||
@ -574,28 +688,178 @@ class AdminCLI(Cmd):
|
||||
print(f"Unknown activate status: {activate_status}.")
|
||||
|
||||
def _handle_list_datasets(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Listing all datasets of user: {username}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/datasets'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
username_tree: Tree = command['user_name']
|
||||
user_name: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Listing all datasets of user: {user_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/datasets'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all datasets of {username}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
print(f"Fail to get all datasets of {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_list_agents(self, command):
|
||||
username_tree: Tree = command['username']
|
||||
username: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Listing all agents of user: {username}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{username}/agents'
|
||||
response = requests.get(url, auth=HTTPBasicAuth(self.admin_account, self.admin_password))
|
||||
username_tree: Tree = command['user_name']
|
||||
user_name: str = username_tree.children[0].strip("'\"")
|
||||
print(f"Listing all agents of user: {user_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name}/agents'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to get all agents of {username}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
print(f"Fail to get all agents of {user_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _create_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||
desc_str: str = ''
|
||||
if 'description' in command:
|
||||
desc_tree: Tree = command['description']
|
||||
desc_str = desc_tree.children[0].strip("'\"")
|
||||
|
||||
print(f"create role name: {role_name}, description: {desc_str}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles'
|
||||
response = self.session.post(
|
||||
url,
|
||||
json={'role_name': role_name, 'description': desc_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to create role {role_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _drop_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||
print(f"drop role name: {role_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}'
|
||||
response = self.session.delete(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to drop role {role_name}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _alter_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||
desc_tree: Tree = command['description']
|
||||
desc_str: str = desc_tree.children[0].strip("'\"")
|
||||
|
||||
print(f"alter role name: {role_name}, description: {desc_str}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}'
|
||||
response = self.session.put(
|
||||
url,
|
||||
json={'description': desc_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to update role {role_name} with description: {desc_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _list_roles(self, command):
|
||||
print("Listing all roles")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to list roles, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _show_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name: str = role_name_tree.children[0].strip("'\"")
|
||||
print(f"show role: {role_name}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name}/permission'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(f"Fail to list roles, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _grant_permission(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||
resource_tree: Tree = command['resource']
|
||||
resource_str: str = resource_tree.children[0].strip("'\"")
|
||||
action_tree_list: list = command['actions']
|
||||
actions: list = []
|
||||
for action_tree in action_tree_list:
|
||||
action_str: str = action_tree.children[0].strip("'\"")
|
||||
actions.append(action_str)
|
||||
print(f"grant role_name: {role_name_str}, resource: {resource_str}, actions: {actions}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name_str}/permission'
|
||||
response = self.session.post(
|
||||
url,
|
||||
json={'actions': actions, 'resource': resource_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to grant role {role_name_str} with {actions} on {resource_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _revoke_permission(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||
resource_tree: Tree = command['resource']
|
||||
resource_str: str = resource_tree.children[0].strip("'\"")
|
||||
action_tree_list: list = command['actions']
|
||||
actions: list = []
|
||||
for action_tree in action_tree_list:
|
||||
action_str: str = action_tree.children[0].strip("'\"")
|
||||
actions.append(action_str)
|
||||
print(f"revoke role_name: {role_name_str}, resource: {resource_str}, actions: {actions}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/roles/{role_name_str}/permission'
|
||||
response = self.session.delete(
|
||||
url,
|
||||
json={'actions': actions, 'resource': resource_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to revoke role {role_name_str} with {actions} on {resource_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _alter_user_role(self, command):
|
||||
role_name_tree: Tree = command['role_name']
|
||||
role_name_str: str = role_name_tree.children[0].strip("'\"")
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name_str: str = user_name_tree.children[0].strip("'\"")
|
||||
print(f"alter_user_role user_name: {user_name_str}, role_name: {role_name_str}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name_str}/role'
|
||||
response = self.session.put(
|
||||
url,
|
||||
json={'role_name': role_name_str}
|
||||
)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to alter user: {user_name_str} to role {role_name_str}, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _show_user_permission(self, command):
|
||||
user_name_tree: Tree = command['user_name']
|
||||
user_name_str: str = user_name_tree.children[0].strip("'\"")
|
||||
print(f"show_user_permission user_name: {user_name_str}")
|
||||
url = f'http://{self.host}:{self.port}/api/v1/admin/users/{user_name_str}/permission'
|
||||
response = self.session.get(url)
|
||||
res_json = response.json()
|
||||
if response.status_code == 200:
|
||||
self._print_table_simple(res_json['data'])
|
||||
else:
|
||||
print(
|
||||
f"Fail to show user: {user_name_str} permission, code: {res_json['code']}, message: {res_json['message']}")
|
||||
|
||||
def _handle_meta_command(self, command):
|
||||
meta_command = command['command']
|
||||
@ -638,27 +902,29 @@ def main():
|
||||
|
||||
cli = AdminCLI()
|
||||
|
||||
if len(sys.argv) == 1 or (len(sys.argv) > 1 and sys.argv[1] == '-'):
|
||||
print(r"""
|
||||
____ ___ ______________ ___ __ _
|
||||
/ __ \/ | / ____/ ____/ /___ _ __ / | ____/ /___ ___ (_)___
|
||||
/ /_/ / /| |/ / __/ /_ / / __ \ | /| / / / /| |/ __ / __ `__ \/ / __ \
|
||||
/ _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ / / ___ / /_/ / / / / / / / / / /
|
||||
/_/ |_/_/ |_\____/_/ /_/\____/|__/|__/ /_/ |_\__,_/_/ /_/ /_/_/_/ /_/
|
||||
""")
|
||||
if cli.verify_admin(sys.argv):
|
||||
cli.cmdloop()
|
||||
args = cli.parse_connection_args(sys.argv)
|
||||
if 'error' in args:
|
||||
print(f"Error: {args['error']}")
|
||||
return
|
||||
|
||||
if 'command' in args:
|
||||
if 'password' not in args:
|
||||
print("Error: password is missing")
|
||||
return
|
||||
if cli.verify_admin(args, single_command=True):
|
||||
command: str = args['command']
|
||||
print(f"Run single command: {command}")
|
||||
cli.run_single_command(command)
|
||||
else:
|
||||
print(r"""
|
||||
____ ___ ______________ ___ __ _
|
||||
/ __ \/ | / ____/ ____/ /___ _ __ / | ____/ /___ ___ (_)___
|
||||
/ /_/ / /| |/ / __/ /_ / / __ \ | /| / / / /| |/ __ / __ `__ \/ / __ \
|
||||
/ _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ / / ___ / /_/ / / / / / / / / / /
|
||||
/_/ |_/_/ |_\____/_/ /_/\____/|__/|__/ /_/ |_\__,_/_/ /_/ /_/_/_/ /_/
|
||||
""")
|
||||
if cli.verify_admin(sys.argv):
|
||||
if cli.verify_admin(args, single_command=False):
|
||||
print(r"""
|
||||
____ ___ ______________ ___ __ _
|
||||
/ __ \/ | / ____/ ____/ /___ _ __ / | ____/ /___ ___ (_)___
|
||||
/ /_/ / /| |/ / __/ /_ / / __ \ | /| / / / /| |/ __ / __ `__ \/ / __ \
|
||||
/ _, _/ ___ / /_/ / __/ / / /_/ / |/ |/ / / ___ / /_/ / / / / / / / / / /
|
||||
/_/ |_/_/ |_\____/_/ /_/\____/|__/|__/ /_/ |_\__,_/_/ /_/ /_/_/_/ /_/
|
||||
""")
|
||||
cli.cmdloop()
|
||||
# cli.run_single_command(sys.argv[1:])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "ragflow-cli"
|
||||
version = "0.21.0.dev5"
|
||||
version = "0.21.0"
|
||||
description = "Admin Service's client of [RAGFlow](https://github.com/infiniflow/ragflow). The Admin Service provides user management and system monitoring. "
|
||||
authors = [{ name = "Lynn", email = "lynn_inf@hotmail.com" }]
|
||||
license = { text = "Apache License, Version 2.0" }
|
||||
|
||||
@ -26,7 +26,10 @@ from routes import admin_bp
|
||||
from api.utils.log_utils import init_root_logger
|
||||
from api.constants import SERVICE_CONF
|
||||
from api import settings
|
||||
from admin.server.config import load_configurations, SERVICE_CONFIGS
|
||||
from config import load_configurations, SERVICE_CONFIGS
|
||||
from auth import init_default_admin, setup_auth
|
||||
from flask_session import Session
|
||||
from flask_login import LoginManager
|
||||
|
||||
stop_event = threading.Event()
|
||||
|
||||
@ -42,7 +45,17 @@ if __name__ == '__main__':
|
||||
|
||||
app = Flask(__name__)
|
||||
app.register_blueprint(admin_bp)
|
||||
app.config["SESSION_PERMANENT"] = False
|
||||
app.config["SESSION_TYPE"] = "filesystem"
|
||||
app.config["MAX_CONTENT_LENGTH"] = int(
|
||||
os.environ.get("MAX_CONTENT_LENGTH", 1024 * 1024 * 1024)
|
||||
)
|
||||
Session(app)
|
||||
login_manager = LoginManager()
|
||||
login_manager.init_app(app)
|
||||
settings.init_settings()
|
||||
setup_auth(login_manager)
|
||||
init_default_admin()
|
||||
SERVICE_CONFIGS.configs = load_configurations(SERVICE_CONF)
|
||||
|
||||
try:
|
||||
|
||||
@ -18,11 +18,122 @@
|
||||
import logging
|
||||
import uuid
|
||||
from functools import wraps
|
||||
from datetime import datetime
|
||||
from flask import request, jsonify
|
||||
from flask_login import current_user, login_user
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
|
||||
from api.common.exceptions import AdminException
|
||||
from api import settings
|
||||
from api.common.exceptions import AdminException, UserNotFoundError
|
||||
from api.db.init_data import encode_to_base64
|
||||
from api.db.services import UserService
|
||||
from api.db import ActiveEnum, StatusEnum
|
||||
from api.utils.crypt import decrypt
|
||||
from api.utils import (
|
||||
current_timestamp,
|
||||
datetime_format,
|
||||
get_uuid,
|
||||
)
|
||||
from api.utils.api_utils import (
|
||||
construct_response,
|
||||
)
|
||||
|
||||
|
||||
def setup_auth(login_manager):
|
||||
@login_manager.request_loader
|
||||
def load_user(web_request):
|
||||
jwt = Serializer(secret_key=settings.SECRET_KEY)
|
||||
authorization = web_request.headers.get("Authorization")
|
||||
if authorization:
|
||||
try:
|
||||
access_token = str(jwt.loads(authorization))
|
||||
|
||||
if not access_token or not access_token.strip():
|
||||
logging.warning("Authentication attempt with empty access token")
|
||||
return None
|
||||
|
||||
# Access tokens should be UUIDs (32 hex characters)
|
||||
if len(access_token.strip()) < 32:
|
||||
logging.warning(f"Authentication attempt with invalid token format: {len(access_token)} chars")
|
||||
return None
|
||||
|
||||
user = UserService.query(
|
||||
access_token=access_token, status=StatusEnum.VALID.value
|
||||
)
|
||||
if user:
|
||||
if not user[0].access_token or not user[0].access_token.strip():
|
||||
logging.warning(f"User {user[0].email} has empty access_token in database")
|
||||
return None
|
||||
return user[0]
|
||||
else:
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.warning(f"load_user got exception {e}")
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def init_default_admin():
|
||||
# Verify that at least one active admin user exists. If not, create a default one.
|
||||
users = UserService.query(is_superuser=True)
|
||||
if not users:
|
||||
default_admin = {
|
||||
"id": uuid.uuid1().hex,
|
||||
"password": encode_to_base64("admin"),
|
||||
"nickname": "admin",
|
||||
"is_superuser": True,
|
||||
"email": "admin@ragflow.io",
|
||||
"creator": "system",
|
||||
"status": "1",
|
||||
}
|
||||
if not UserService.save(**default_admin):
|
||||
raise AdminException("Can't init admin.", 500)
|
||||
elif not any([u.is_active == ActiveEnum.ACTIVE.value for u in users]):
|
||||
raise AdminException("No active admin. Please update 'is_active' in db manually.", 500)
|
||||
|
||||
|
||||
def check_admin_auth(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
user = UserService.filter_by_id(current_user.id)
|
||||
if not user:
|
||||
raise UserNotFoundError(current_user.email)
|
||||
if not user.is_superuser:
|
||||
raise AdminException("Not admin", 403)
|
||||
if user.is_active == ActiveEnum.INACTIVE.value:
|
||||
raise AdminException(f"User {current_user.email} inactive", 403)
|
||||
|
||||
return func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def login_admin(email: str, password: str):
|
||||
"""
|
||||
:param email: admin email
|
||||
:param password: string before decrypt
|
||||
"""
|
||||
users = UserService.query(email=email)
|
||||
if not users:
|
||||
raise UserNotFoundError(email)
|
||||
psw = decrypt(password)
|
||||
user = UserService.query_user(email, psw)
|
||||
if not user:
|
||||
raise AdminException("Email and password do not match!")
|
||||
if not user.is_superuser:
|
||||
raise AdminException("Not admin", 403)
|
||||
if user.is_active == ActiveEnum.INACTIVE.value:
|
||||
raise AdminException(f"User {email} inactive", 403)
|
||||
|
||||
resp = user.to_json()
|
||||
user.access_token = get_uuid()
|
||||
login_user(user)
|
||||
user.update_time = (current_timestamp(),)
|
||||
user.update_date = (datetime_format(datetime.now()),)
|
||||
user.save()
|
||||
msg = "Welcome back!"
|
||||
return construct_response(data=resp, auth=user.get_id(), message=msg)
|
||||
|
||||
|
||||
def check_admin(username: str, password: str):
|
||||
@ -61,12 +172,18 @@ def login_verify(f):
|
||||
|
||||
username = auth.parameters['username']
|
||||
password = auth.parameters['password']
|
||||
# TODO: to check the username and password from DB
|
||||
if check_admin(username, password) is False:
|
||||
try:
|
||||
if check_admin(username, password) is False:
|
||||
return jsonify({
|
||||
"code": 500,
|
||||
"message": "Access denied",
|
||||
"data": None
|
||||
}), 200
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
return jsonify({
|
||||
"code": 403,
|
||||
"message": "Access denied",
|
||||
"data": None
|
||||
"code": 500,
|
||||
"message": error_msg
|
||||
}), 200
|
||||
|
||||
return f(*args, **kwargs)
|
||||
|
||||
@ -26,6 +26,8 @@ from urllib.parse import urlparse
|
||||
|
||||
|
||||
class ServiceConfigs:
|
||||
configs = dict
|
||||
|
||||
def __init__(self):
|
||||
self.configs = []
|
||||
self.lock = threading.Lock()
|
||||
@ -229,7 +231,8 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
||||
host: str = v['host']
|
||||
http_port: int = v['http_port']
|
||||
config = RAGFlowServerConfig(id=id_count, name=name, host=host, port=http_port,
|
||||
service_type="ragflow_server", detail_func_name="check_ragflow_server_alive")
|
||||
service_type="ragflow_server",
|
||||
detail_func_name="check_ragflow_server_alive")
|
||||
configurations.append(config)
|
||||
id_count += 1
|
||||
case "es":
|
||||
@ -254,7 +257,8 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
||||
host = parts[0]
|
||||
port = int(parts[1])
|
||||
database: str = v.get('db_name', 'default_db')
|
||||
config = InfinityConfig(id=id_count, name=name, host=host, port=port, service_type="retrieval", retrieval_type="infinity",
|
||||
config = InfinityConfig(id=id_count, name=name, host=host, port=port, service_type="retrieval",
|
||||
retrieval_type="infinity",
|
||||
db_name=database, detail_func_name="get_infinity_status")
|
||||
configurations.append(config)
|
||||
id_count += 1
|
||||
@ -266,7 +270,8 @@ def load_configurations(config_path: str) -> list[BaseConfig]:
|
||||
port = int(parts[1])
|
||||
user = v.get('user')
|
||||
password = v.get('password')
|
||||
config = MinioConfig(id=id_count, name=name, host=host, port=port, user=user, password=password, service_type="file_store",
|
||||
config = MinioConfig(id=id_count, name=name, host=host, port=port, user=user, password=password,
|
||||
service_type="file_store",
|
||||
store_type="minio", detail_func_name="check_minio_alive")
|
||||
configurations.append(config)
|
||||
id_count += 1
|
||||
|
||||
76
admin/server/roles.py
Normal file
76
admin/server/roles.py
Normal file
@ -0,0 +1,76 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
from api.common.exceptions import AdminException
|
||||
|
||||
|
||||
class RoleMgr:
|
||||
@staticmethod
|
||||
def create_role(role_name: str, description: str):
|
||||
error_msg = f"not implement: create role: {role_name}, description: {description}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def update_role_description(role_name: str, description: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: update role: {role_name} with description: {description}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def delete_role(role_name: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: drop role: {role_name}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def list_roles() -> Dict[str, Any]:
|
||||
error_msg = "not implement: list roles"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def get_role_permission(role_name: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: show role {role_name}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def grant_role_permission(role_name: str, actions: list, resource: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: grant role {role_name} actions: {actions} on {resource}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def revoke_role_permission(role_name: str, actions: list, resource: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: revoke role {role_name} actions: {actions} on {resource}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def update_user_role(user_name: str, role_name: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: update user role: {user_name} to role {role_name}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
|
||||
@staticmethod
|
||||
def get_user_permission(user_name: str) -> Dict[str, Any]:
|
||||
error_msg = f"not implement: get user permission: {user_name}"
|
||||
logging.error(error_msg)
|
||||
raise AdminException(error_msg)
|
||||
@ -14,17 +14,38 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import secrets
|
||||
|
||||
from flask import Blueprint, request
|
||||
from flask_login import current_user, logout_user, login_required
|
||||
|
||||
from admin.server.auth import login_verify
|
||||
from auth import login_verify, login_admin, check_admin_auth
|
||||
from responses import success_response, error_response
|
||||
from services import UserMgr, ServiceMgr, UserServiceMgr
|
||||
from roles import RoleMgr
|
||||
from api.common.exceptions import AdminException
|
||||
|
||||
admin_bp = Blueprint('admin', __name__, url_prefix='/api/v1/admin')
|
||||
|
||||
|
||||
@admin_bp.route('/login', methods=['POST'])
|
||||
def login():
|
||||
if not request.json:
|
||||
return error_response('Authorize admin failed.' ,400)
|
||||
email = request.json.get("email", "")
|
||||
password = request.json.get("password", "")
|
||||
return login_admin(email, password)
|
||||
|
||||
|
||||
@admin_bp.route('/logout', methods=['GET'])
|
||||
@login_required
|
||||
def logout():
|
||||
current_user.access_token = f"INVALID_{secrets.token_hex(16)}"
|
||||
current_user.save()
|
||||
logout_user()
|
||||
return success_response(True)
|
||||
|
||||
|
||||
@admin_bp.route('/auth', methods=['GET'])
|
||||
@login_verify
|
||||
def auth_admin():
|
||||
@ -35,7 +56,8 @@ def auth_admin():
|
||||
|
||||
|
||||
@admin_bp.route('/users', methods=['GET'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def list_users():
|
||||
try:
|
||||
users = UserMgr.get_all_users()
|
||||
@ -45,7 +67,8 @@ def list_users():
|
||||
|
||||
|
||||
@admin_bp.route('/users', methods=['POST'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def create_user():
|
||||
try:
|
||||
data = request.get_json()
|
||||
@ -71,7 +94,8 @@ def create_user():
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>', methods=['DELETE'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def delete_user(username):
|
||||
try:
|
||||
res = UserMgr.delete_user(username)
|
||||
@ -87,7 +111,8 @@ def delete_user(username):
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/password', methods=['PUT'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def change_password(username):
|
||||
try:
|
||||
data = request.get_json()
|
||||
@ -105,7 +130,8 @@ def change_password(username):
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/activate', methods=['PUT'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def alter_user_activate_status(username):
|
||||
try:
|
||||
data = request.get_json()
|
||||
@ -121,7 +147,8 @@ def alter_user_activate_status(username):
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>', methods=['GET'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_user_details(username):
|
||||
try:
|
||||
user_details = UserMgr.get_user_details(username)
|
||||
@ -134,7 +161,8 @@ def get_user_details(username):
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/datasets', methods=['GET'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_user_datasets(username):
|
||||
try:
|
||||
datasets_list = UserServiceMgr.get_user_datasets(username)
|
||||
@ -147,7 +175,8 @@ def get_user_datasets(username):
|
||||
|
||||
|
||||
@admin_bp.route('/users/<username>/agents', methods=['GET'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_user_agents(username):
|
||||
try:
|
||||
agents_list = UserServiceMgr.get_user_agents(username)
|
||||
@ -160,7 +189,8 @@ def get_user_agents(username):
|
||||
|
||||
|
||||
@admin_bp.route('/services', methods=['GET'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_services():
|
||||
try:
|
||||
services = ServiceMgr.get_all_services()
|
||||
@ -170,7 +200,8 @@ def get_services():
|
||||
|
||||
|
||||
@admin_bp.route('/service_types/<service_type>', methods=['GET'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_services_by_type(service_type_str):
|
||||
try:
|
||||
services = ServiceMgr.get_services_by_type(service_type_str)
|
||||
@ -180,7 +211,8 @@ def get_services_by_type(service_type_str):
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['GET'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.get_service_details(service_id)
|
||||
@ -190,7 +222,8 @@ def get_service(service_id):
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['DELETE'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def shutdown_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.shutdown_service(service_id)
|
||||
@ -200,10 +233,133 @@ def shutdown_service(service_id):
|
||||
|
||||
|
||||
@admin_bp.route('/services/<service_id>', methods=['PUT'])
|
||||
@login_verify
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def restart_service(service_id):
|
||||
try:
|
||||
services = ServiceMgr.restart_service(service_id)
|
||||
return success_response(services)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles', methods=['POST'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def create_role():
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'role_name' not in data:
|
||||
return error_response("Role name is required", 400)
|
||||
role_name: str = data['role_name']
|
||||
description: str = data['description']
|
||||
res = RoleMgr.create_role(role_name, description)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>', methods=['PUT'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def update_role(role_name: str):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'description' not in data:
|
||||
return error_response("Role description is required", 400)
|
||||
description: str = data['description']
|
||||
res = RoleMgr.update_role_description(role_name, description)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>', methods=['DELETE'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def delete_role(role_name: str):
|
||||
try:
|
||||
res = RoleMgr.delete_role(role_name)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def list_roles():
|
||||
try:
|
||||
res = RoleMgr.list_roles()
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>/permission', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_role_permission(role_name: str):
|
||||
try:
|
||||
res = RoleMgr.get_role_permission(role_name)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>/permission', methods=['POST'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def grant_role_permission(role_name: str):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'actions' not in data or 'resource' not in data:
|
||||
return error_response("Permission is required", 400)
|
||||
actions: list = data['actions']
|
||||
resource: str = data['resource']
|
||||
res = RoleMgr.grant_role_permission(role_name, actions, resource)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/roles/<role_name>/permission', methods=['DELETE'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def revoke_role_permission(role_name: str):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'actions' not in data or 'resource' not in data:
|
||||
return error_response("Permission is required", 400)
|
||||
actions: list = data['actions']
|
||||
resource: str = data['resource']
|
||||
res = RoleMgr.revoke_role_permission(role_name, actions, resource)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<user_name>/role', methods=['PUT'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def update_user_role(user_name: str):
|
||||
try:
|
||||
data = request.get_json()
|
||||
if not data or 'role_name' not in data:
|
||||
return error_response("Role name is required", 400)
|
||||
role_name: str = data['role_name']
|
||||
res = RoleMgr.update_user_role(user_name, role_name)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
|
||||
@admin_bp.route('/users/<user_name>/permission', methods=['GET'])
|
||||
@login_required
|
||||
@check_admin_auth
|
||||
def get_user_permission(user_name: str):
|
||||
try:
|
||||
res = RoleMgr.get_user_permission(user_name)
|
||||
return success_response(res)
|
||||
except Exception as e:
|
||||
return error_response(str(e), 500)
|
||||
|
||||
@ -27,7 +27,7 @@ from api.utils.crypt import decrypt
|
||||
from api.utils import health_utils
|
||||
|
||||
from api.common.exceptions import AdminException, UserAlreadyExistsError, UserNotFoundError
|
||||
from admin.server.config import SERVICE_CONFIGS
|
||||
from config import SERVICE_CONFIGS
|
||||
|
||||
|
||||
class UserMgr:
|
||||
@ -166,8 +166,7 @@ class UserServiceMgr:
|
||||
return [{
|
||||
'title': r['title'],
|
||||
'permission': r['permission'],
|
||||
'canvas_type': r['canvas_type'],
|
||||
'canvas_category': r['canvas_category']
|
||||
'canvas_category': r['canvas_category'].split('-')[0]
|
||||
} for r in res]
|
||||
|
||||
|
||||
@ -181,12 +180,12 @@ class ServiceMgr:
|
||||
config_dict = config.to_dict()
|
||||
try:
|
||||
service_detail = ServiceMgr.get_service_details(service_id)
|
||||
if service_detail['alive']:
|
||||
config_dict['status'] = 'Alive'
|
||||
if "status" in service_detail:
|
||||
config_dict['status'] = service_detail['status']
|
||||
else:
|
||||
config_dict['status'] = 'Timeout'
|
||||
config_dict['status'] = 'timeout'
|
||||
except Exception:
|
||||
config_dict['status'] = 'Timeout'
|
||||
config_dict['status'] = 'timeout'
|
||||
result.append(config_dict)
|
||||
return result
|
||||
|
||||
@ -206,7 +205,7 @@ class ServiceMgr:
|
||||
}
|
||||
service_info = service_config_mapping.get(service_id, {})
|
||||
if not service_info:
|
||||
raise AdminException(f"Invalid service_id: {service_id}")
|
||||
raise AdminException(f"invalid service_id: {service_id}")
|
||||
|
||||
detail_func = getattr(health_utils, service_info.get('detail_func_name'))
|
||||
res = detail_func()
|
||||
|
||||
@ -18,12 +18,14 @@ import re
|
||||
from abc import ABC
|
||||
from agent.tools.base import ToolParamBase, ToolBase, ToolMeta
|
||||
from api.db import LLMType
|
||||
from api.db.services.document_service import DocumentService
|
||||
from api.db.services.dialog_service import meta_filter
|
||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api import settings
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.app.tag import label_question
|
||||
from rag.prompts.generator import cross_languages, kb_prompt
|
||||
from rag.prompts.generator import cross_languages, kb_prompt, gen_meta_filter
|
||||
|
||||
|
||||
class RetrievalParam(ToolParamBase):
|
||||
@ -58,6 +60,7 @@ class RetrievalParam(ToolParamBase):
|
||||
self.use_kg = False
|
||||
self.cross_languages = []
|
||||
self.toc_enhance = False
|
||||
self.meta_data_filter={}
|
||||
|
||||
def check(self):
|
||||
self.check_decimal_float(self.similarity_threshold, "[Retrieval] Similarity threshold")
|
||||
@ -117,6 +120,21 @@ class Retrieval(ToolBase, ABC):
|
||||
vars = self.get_input_elements_from_text(kwargs["query"])
|
||||
vars = {k:o["value"] for k,o in vars.items()}
|
||||
query = self.string_format(kwargs["query"], vars)
|
||||
|
||||
doc_ids=[]
|
||||
if self._param.meta_data_filter!={}:
|
||||
metas = DocumentService.get_meta_by_kbs(kb_ids)
|
||||
if self._param.meta_data_filter.get("method") == "auto":
|
||||
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT)
|
||||
filters = gen_meta_filter(chat_mdl, metas, query)
|
||||
doc_ids.extend(meta_filter(metas, filters))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
elif self._param.meta_data_filter.get("method") == "manual":
|
||||
doc_ids.extend(meta_filter(metas, self._param.meta_data_filter["manual"]))
|
||||
if not doc_ids:
|
||||
doc_ids = None
|
||||
|
||||
if self._param.cross_languages:
|
||||
query = cross_languages(kbs[0].tenant_id, None, query, self._param.cross_languages)
|
||||
|
||||
@ -131,6 +149,7 @@ class Retrieval(ToolBase, ABC):
|
||||
self._param.top_n,
|
||||
self._param.similarity_threshold,
|
||||
1 - self._param.keywords_similarity_weight,
|
||||
doc_ids=doc_ids,
|
||||
aggs=False,
|
||||
rerank_mdl=rerank_mdl,
|
||||
rank_feature=label_question(query, kbs),
|
||||
|
||||
@ -60,7 +60,7 @@ def list_chunk():
|
||||
}
|
||||
if "available_int" in req:
|
||||
query["available_int"] = int(req["available_int"])
|
||||
sres = settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=True)
|
||||
sres = settings.retriever.search(query, search.index_name(tenant_id), kb_ids, highlight=["content_ltks"])
|
||||
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
||||
for id in sres.ids:
|
||||
d = {
|
||||
@ -350,7 +350,8 @@ def retrieval_test():
|
||||
float(req.get("similarity_threshold", 0.0)),
|
||||
float(req.get("vector_similarity_weight", 0.3)),
|
||||
top,
|
||||
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"),
|
||||
doc_ids, rerank_mdl=rerank_mdl,
|
||||
highlight=req.get("highlight", False),
|
||||
rank_feature=labels
|
||||
)
|
||||
if use_kg:
|
||||
|
||||
@ -45,7 +45,7 @@ from api.utils.api_utils import (
|
||||
from api.utils.file_utils import filename_type, get_project_base_directory, thumbnail
|
||||
from api.utils.web_utils import CONTENT_TYPE_MAP, html2pdf, is_valid_url
|
||||
from deepdoc.parser.html_parser import RAGFlowHtmlParser
|
||||
from rag.nlp import search
|
||||
from rag.nlp import search, rag_tokenizer
|
||||
from rag.utils.storage_factory import STORAGE_IMPL
|
||||
|
||||
|
||||
@ -524,6 +524,21 @@ def rename():
|
||||
e, file = FileService.get_by_id(informs[0].file_id)
|
||||
FileService.update_by_id(file.id, {"name": req["name"]})
|
||||
|
||||
tenant_id = DocumentService.get_tenant_id(req["doc_id"])
|
||||
title_tks = rag_tokenizer.tokenize(req["name"])
|
||||
es_body = {
|
||||
"docnm_kwd": req["name"],
|
||||
"title_tks": title_tks,
|
||||
"title_sm_tks": rag_tokenizer.fine_grained_tokenize(title_tks),
|
||||
}
|
||||
if settings.docStoreConn.indexExist(search.index_name(tenant_id), doc.kb_id):
|
||||
settings.docStoreConn.update(
|
||||
{"doc_id": req["doc_id"]},
|
||||
es_body,
|
||||
search.index_name(tenant_id),
|
||||
doc.kb_id,
|
||||
)
|
||||
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License
|
||||
#
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
@ -234,54 +235,63 @@ def get_all_parent_folders():
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/rm', methods=['POST']) # noqa: F821
|
||||
@manager.route("/rm", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("file_ids")
|
||||
def rm():
|
||||
req = request.json
|
||||
file_ids = req["file_ids"]
|
||||
|
||||
def _delete_single_file(file):
|
||||
try:
|
||||
if file.location:
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove object: {file.parent_id}/{file.location}")
|
||||
|
||||
informs = File2DocumentService.get_by_file_id(file.id)
|
||||
for inform in informs:
|
||||
doc_id = inform.document_id
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if e and doc:
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if tenant_id:
|
||||
DocumentService.remove_document(doc, tenant_id)
|
||||
File2DocumentService.delete_by_file_id(file.id)
|
||||
|
||||
FileService.delete(file)
|
||||
|
||||
def _delete_folder_recursive(folder, tenant_id):
|
||||
sub_files = FileService.list_all_files_by_parent_id(folder.id)
|
||||
for sub_file in sub_files:
|
||||
if sub_file.type == FileType.FOLDER.value:
|
||||
_delete_folder_recursive(sub_file, tenant_id)
|
||||
else:
|
||||
_delete_single_file(sub_file)
|
||||
|
||||
FileService.delete(folder)
|
||||
|
||||
try:
|
||||
for file_id in file_ids:
|
||||
e, file = FileService.get_by_id(file_id)
|
||||
if not e:
|
||||
if not e or not file:
|
||||
return get_data_error_result(message="File or Folder not found!")
|
||||
if not file.tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if not check_file_team_permission(file, current_user.id):
|
||||
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
return get_json_result(data=False, message="No authorization.", code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
|
||||
if file.source_type == FileSource.KNOWLEDGEBASE:
|
||||
continue
|
||||
|
||||
if file.type == FileType.FOLDER.value:
|
||||
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])
|
||||
for inner_file_id in file_id_list:
|
||||
e, file = FileService.get_by_id(inner_file_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="File not found!")
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
FileService.delete_folder_by_pf_id(current_user.id, file_id)
|
||||
else:
|
||||
STORAGE_IMPL.rm(file.parent_id, file.location)
|
||||
if not FileService.delete(file):
|
||||
return get_data_error_result(
|
||||
message="Database error (File removal)!")
|
||||
_delete_folder_recursive(file, current_user.id)
|
||||
continue
|
||||
|
||||
# delete file2document
|
||||
informs = File2DocumentService.get_by_file_id(file_id)
|
||||
for inform in informs:
|
||||
doc_id = inform.document_id
|
||||
e, doc = DocumentService.get_by_id(doc_id)
|
||||
if not e:
|
||||
return get_data_error_result(message="Document not found!")
|
||||
tenant_id = DocumentService.get_tenant_id(doc_id)
|
||||
if not tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if not DocumentService.remove_document(doc, tenant_id):
|
||||
return get_data_error_result(
|
||||
message="Database error (Document removal)!")
|
||||
File2DocumentService.delete_by_file_id(file_id)
|
||||
_delete_single_file(file)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -355,31 +365,89 @@ def get(file_id):
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route('/mv', methods=['POST']) # noqa: F821
|
||||
@manager.route("/mv", methods=["POST"]) # noqa: F821
|
||||
@login_required
|
||||
@validate_request("src_file_ids", "dest_file_id")
|
||||
def move():
|
||||
req = request.json
|
||||
try:
|
||||
file_ids = req["src_file_ids"]
|
||||
parent_id = req["dest_file_id"]
|
||||
dest_parent_id = req["dest_file_id"]
|
||||
|
||||
ok, dest_folder = FileService.get_by_id(dest_parent_id)
|
||||
if not ok or not dest_folder:
|
||||
return get_data_error_result(message="Parent Folder not found!")
|
||||
|
||||
files = FileService.get_by_ids(file_ids)
|
||||
files_dict = {}
|
||||
for file in files:
|
||||
files_dict[file.id] = file
|
||||
if not files:
|
||||
return get_data_error_result(message="Source files not found!")
|
||||
|
||||
files_dict = {f.id: f for f in files}
|
||||
|
||||
for file_id in file_ids:
|
||||
file = files_dict[file_id]
|
||||
file = files_dict.get(file_id)
|
||||
if not file:
|
||||
return get_data_error_result(message="File or Folder not found!")
|
||||
if not file.tenant_id:
|
||||
return get_data_error_result(message="Tenant not found!")
|
||||
if not check_file_team_permission(file, current_user.id):
|
||||
return get_json_result(data=False, message='No authorization.', code=settings.RetCode.AUTHENTICATION_ERROR)
|
||||
fe, _ = FileService.get_by_id(parent_id)
|
||||
if not fe:
|
||||
return get_data_error_result(message="Parent Folder not found!")
|
||||
FileService.move_file(file_ids, parent_id)
|
||||
return get_json_result(
|
||||
data=False,
|
||||
message="No authorization.",
|
||||
code=settings.RetCode.AUTHENTICATION_ERROR,
|
||||
)
|
||||
|
||||
def _move_entry_recursive(source_file_entry, dest_folder):
|
||||
if source_file_entry.type == FileType.FOLDER.value:
|
||||
existing_folder = FileService.query(name=source_file_entry.name, parent_id=dest_folder.id)
|
||||
if existing_folder:
|
||||
new_folder = existing_folder[0]
|
||||
else:
|
||||
new_folder = FileService.insert(
|
||||
{
|
||||
"id": get_uuid(),
|
||||
"parent_id": dest_folder.id,
|
||||
"tenant_id": source_file_entry.tenant_id,
|
||||
"created_by": current_user.id,
|
||||
"name": source_file_entry.name,
|
||||
"location": "",
|
||||
"size": 0,
|
||||
"type": FileType.FOLDER.value,
|
||||
}
|
||||
)
|
||||
|
||||
sub_files = FileService.list_all_files_by_parent_id(source_file_entry.id)
|
||||
for sub_file in sub_files:
|
||||
_move_entry_recursive(sub_file, new_folder)
|
||||
|
||||
FileService.delete_by_id(source_file_entry.id)
|
||||
return
|
||||
|
||||
old_parent_id = source_file_entry.parent_id
|
||||
old_location = source_file_entry.location
|
||||
filename = source_file_entry.name
|
||||
|
||||
new_location = filename
|
||||
while STORAGE_IMPL.obj_exist(dest_folder.id, new_location):
|
||||
new_location += "_"
|
||||
|
||||
try:
|
||||
STORAGE_IMPL.move(old_parent_id, old_location, dest_folder.id, new_location)
|
||||
except Exception as storage_err:
|
||||
raise RuntimeError(f"Move file failed at storage layer: {str(storage_err)}")
|
||||
|
||||
FileService.update_by_id(
|
||||
source_file_entry.id,
|
||||
{
|
||||
"parent_id": dest_folder.id,
|
||||
"location": new_location,
|
||||
},
|
||||
)
|
||||
|
||||
for file in files:
|
||||
_move_entry_recursive(file, dest_folder)
|
||||
|
||||
return get_json_result(data=True)
|
||||
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -194,6 +194,9 @@ def add_llm():
|
||||
elif factory == "Azure-OpenAI":
|
||||
api_key = apikey_json(["api_key", "api_version"])
|
||||
|
||||
elif factory == "OpenRouter":
|
||||
api_key = apikey_json(["api_key", "provider_order"])
|
||||
|
||||
llm = {
|
||||
"tenant_id": current_user.id,
|
||||
"llm_factory": factory,
|
||||
|
||||
@ -470,6 +470,20 @@ def list_docs(dataset_id, tenant_id):
|
||||
required: false
|
||||
default: 0
|
||||
description: Unix timestamp for filtering documents created before this time. 0 means no filter.
|
||||
- in: query
|
||||
name: suffix
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
required: false
|
||||
description: Filter by file suffix (e.g., ["pdf", "txt", "docx"]).
|
||||
- in: query
|
||||
name: run
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
required: false
|
||||
description: Filter by document run status. Supports both numeric ("0", "1", "2", "3", "4") and text formats ("UNSTART", "RUNNING", "CANCEL", "DONE", "FAIL").
|
||||
- in: header
|
||||
name: Authorization
|
||||
type: string
|
||||
@ -512,63 +526,62 @@ def list_docs(dataset_id, tenant_id):
|
||||
description: Processing status.
|
||||
"""
|
||||
if not KnowledgebaseService.accessible(kb_id=dataset_id, user_id=tenant_id):
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||
id = request.args.get("id")
|
||||
name = request.args.get("name")
|
||||
return get_error_data_result(message=f"You don't own the dataset {dataset_id}. ")
|
||||
|
||||
if id and not DocumentService.query(id=id, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {id}.")
|
||||
q = request.args
|
||||
document_id = q.get("id")
|
||||
name = q.get("name")
|
||||
|
||||
if document_id and not DocumentService.query(id=document_id, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {document_id}.")
|
||||
if name and not DocumentService.query(name=name, kb_id=dataset_id):
|
||||
return get_error_data_result(message=f"You don't own the document {name}.")
|
||||
|
||||
page = int(request.args.get("page", 1))
|
||||
keywords = request.args.get("keywords", "")
|
||||
page_size = int(request.args.get("page_size", 30))
|
||||
orderby = request.args.get("orderby", "create_time")
|
||||
if request.args.get("desc") == "False":
|
||||
desc = False
|
||||
else:
|
||||
desc = True
|
||||
docs, tol = DocumentService.get_list(dataset_id, page, page_size, orderby, desc, keywords, id, name)
|
||||
page = int(q.get("page", 1))
|
||||
page_size = int(q.get("page_size", 30))
|
||||
orderby = q.get("orderby", "create_time")
|
||||
desc = str(q.get("desc", "true")).strip().lower() != "false"
|
||||
keywords = q.get("keywords", "")
|
||||
|
||||
create_time_from = int(request.args.get("create_time_from", 0))
|
||||
create_time_to = int(request.args.get("create_time_to", 0))
|
||||
# filters - align with OpenAPI parameter names
|
||||
suffix = q.getlist("suffix")
|
||||
run_status = q.getlist("run")
|
||||
create_time_from = int(q.get("create_time_from", 0))
|
||||
create_time_to = int(q.get("create_time_to", 0))
|
||||
|
||||
# map run status (accept text or numeric) - align with API parameter
|
||||
run_status_text_to_numeric = {"UNSTART": "0", "RUNNING": "1", "CANCEL": "2", "DONE": "3", "FAIL": "4"}
|
||||
run_status_converted = [run_status_text_to_numeric.get(v, v) for v in run_status]
|
||||
|
||||
docs, total = DocumentService.get_list(
|
||||
dataset_id, page, page_size, orderby, desc, keywords, document_id, name, suffix, run_status_converted
|
||||
)
|
||||
|
||||
# time range filter (0 means no bound)
|
||||
if create_time_from or create_time_to:
|
||||
filtered_docs = []
|
||||
for doc in docs:
|
||||
doc_create_time = doc.get("create_time", 0)
|
||||
if (create_time_from == 0 or doc_create_time >= create_time_from) and (create_time_to == 0 or doc_create_time <= create_time_to):
|
||||
filtered_docs.append(doc)
|
||||
docs = filtered_docs
|
||||
docs = [
|
||||
d for d in docs
|
||||
if (create_time_from == 0 or d.get("create_time", 0) >= create_time_from)
|
||||
and (create_time_to == 0 or d.get("create_time", 0) <= create_time_to)
|
||||
]
|
||||
|
||||
# rename key's name
|
||||
renamed_doc_list = []
|
||||
# rename keys + map run status back to text for output
|
||||
key_mapping = {
|
||||
"chunk_num": "chunk_count",
|
||||
"kb_id": "dataset_id",
|
||||
"kb_id": "dataset_id",
|
||||
"token_num": "token_count",
|
||||
"parser_id": "chunk_method",
|
||||
}
|
||||
run_mapping = {
|
||||
"0": "UNSTART",
|
||||
"1": "RUNNING",
|
||||
"2": "CANCEL",
|
||||
"3": "DONE",
|
||||
"4": "FAIL",
|
||||
}
|
||||
for doc in docs:
|
||||
renamed_doc = {}
|
||||
for key, value in doc.items():
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(str(value))
|
||||
new_key = key_mapping.get(key, key)
|
||||
renamed_doc[new_key] = value
|
||||
if key == "run":
|
||||
renamed_doc["run"] = run_mapping.get(value)
|
||||
renamed_doc_list.append(renamed_doc)
|
||||
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
||||
run_status_numeric_to_text = {"0": "UNSTART", "1": "RUNNING", "2": "CANCEL", "3": "DONE", "4": "FAIL"}
|
||||
|
||||
output_docs = []
|
||||
for d in docs:
|
||||
renamed_doc = {key_mapping.get(k, k): v for k, v in d.items()}
|
||||
if "run" in d:
|
||||
renamed_doc["run"] = run_status_numeric_to_text.get(str(d["run"]), d["run"])
|
||||
output_docs.append(renamed_doc)
|
||||
|
||||
return get_result(data={"total": total, "docs": output_docs})
|
||||
|
||||
@manager.route("/datasets/<dataset_id>/documents", methods=["DELETE"]) # noqa: F821
|
||||
@token_required
|
||||
|
||||
@ -15,11 +15,14 @@
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import string
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from flask import redirect, request, session
|
||||
from flask import redirect, request, session, Response
|
||||
from flask_login import current_user, login_required, login_user, logout_user
|
||||
from werkzeug.security import check_password_hash, generate_password_hash
|
||||
|
||||
@ -46,6 +49,19 @@ from api.utils.api_utils import (
|
||||
validate_request,
|
||||
)
|
||||
from api.utils.crypt import decrypt
|
||||
from rag.utils.redis_conn import REDIS_CONN
|
||||
from api.apps import smtp_mail_server
|
||||
from api.utils.web_utils import (
|
||||
send_email_html,
|
||||
OTP_LENGTH,
|
||||
OTP_TTL_SECONDS,
|
||||
ATTEMPT_LIMIT,
|
||||
ATTEMPT_LOCK_SECONDS,
|
||||
RESEND_COOLDOWN_SECONDS,
|
||||
otp_keys,
|
||||
hash_code,
|
||||
captcha_key,
|
||||
)
|
||||
|
||||
|
||||
@manager.route("/login", methods=["POST", "GET"]) # noqa: F821
|
||||
@ -825,3 +841,170 @@ def set_tenant_info():
|
||||
return get_json_result(data=True)
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
|
||||
@manager.route("/forget/captcha", methods=["GET"]) # noqa: F821
|
||||
def forget_get_captcha():
|
||||
"""
|
||||
GET /forget/captcha?email=<email>
|
||||
- Generate an image captcha and cache it in Redis under key captcha:{email} with TTL = OTP_TTL_SECONDS.
|
||||
- Returns the captcha as a PNG image.
|
||||
"""
|
||||
email = (request.args.get("email") or "")
|
||||
if not email:
|
||||
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email is required")
|
||||
|
||||
users = UserService.query(email=email)
|
||||
if not users:
|
||||
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||
|
||||
# Generate captcha text
|
||||
allowed = string.ascii_uppercase + string.digits
|
||||
captcha_text = "".join(secrets.choice(allowed) for _ in range(OTP_LENGTH))
|
||||
REDIS_CONN.set(captcha_key(email), captcha_text, 60) # Valid for 60 seconds
|
||||
|
||||
from captcha.image import ImageCaptcha
|
||||
image = ImageCaptcha(width=300, height=120, font_sizes=[50, 60, 70])
|
||||
img_bytes = image.generate(captcha_text).read()
|
||||
return Response(img_bytes, mimetype="image/png")
|
||||
|
||||
|
||||
@manager.route("/forget/otp", methods=["POST"]) # noqa: F821
|
||||
def forget_send_otp():
|
||||
"""
|
||||
POST /forget/otp
|
||||
- Verify the image captcha stored at captcha:{email} (case-insensitive).
|
||||
- On success, generate an email OTP (A–Z with length = OTP_LENGTH), store hash + salt (and timestamp) in Redis with TTL, reset attempts and cooldown, and send the OTP via email.
|
||||
"""
|
||||
req = request.get_json()
|
||||
email = req.get("email") or ""
|
||||
captcha = (req.get("captcha") or "").strip()
|
||||
|
||||
if not email or not captcha:
|
||||
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email and captcha required")
|
||||
|
||||
users = UserService.query(email=email)
|
||||
if not users:
|
||||
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||
|
||||
stored_captcha = REDIS_CONN.get(captcha_key(email))
|
||||
if not stored_captcha:
|
||||
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="invalid or expired captcha")
|
||||
if (stored_captcha or "").strip().lower() != captcha.lower():
|
||||
return get_json_result(data=False, code=settings.RetCode.AUTHENTICATION_ERROR, message="invalid or expired captcha")
|
||||
|
||||
# Delete captcha to prevent reuse
|
||||
REDIS_CONN.delete(captcha_key(email))
|
||||
|
||||
k_code, k_attempts, k_last, k_lock = otp_keys(email)
|
||||
now = int(time.time())
|
||||
last_ts = REDIS_CONN.get(k_last)
|
||||
if last_ts:
|
||||
try:
|
||||
elapsed = now - int(last_ts)
|
||||
except Exception:
|
||||
elapsed = RESEND_COOLDOWN_SECONDS
|
||||
remaining = RESEND_COOLDOWN_SECONDS - elapsed
|
||||
if remaining > 0:
|
||||
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message=f"you still have to wait {remaining} seconds")
|
||||
|
||||
# Generate OTP (uppercase letters only) and store hashed
|
||||
otp = "".join(secrets.choice(string.ascii_uppercase) for _ in range(OTP_LENGTH))
|
||||
salt = os.urandom(16)
|
||||
code_hash = hash_code(otp, salt)
|
||||
REDIS_CONN.set(k_code, f"{code_hash}:{salt.hex()}", OTP_TTL_SECONDS)
|
||||
REDIS_CONN.set(k_attempts, 0, OTP_TTL_SECONDS)
|
||||
REDIS_CONN.set(k_last, now, OTP_TTL_SECONDS)
|
||||
REDIS_CONN.delete(k_lock)
|
||||
|
||||
ttl_min = OTP_TTL_SECONDS // 60
|
||||
|
||||
if not smtp_mail_server:
|
||||
logging.warning("SMTP mail server not initialized; skip sending email.")
|
||||
else:
|
||||
try:
|
||||
send_email_html(
|
||||
subject="Your Password Reset Code",
|
||||
to_email=email,
|
||||
template_key="reset_code",
|
||||
code=otp,
|
||||
ttl_min=ttl_min,
|
||||
)
|
||||
except Exception:
|
||||
return get_json_result(data=False, code=settings.RetCode.SERVER_ERROR, message="failed to send email")
|
||||
|
||||
return get_json_result(data=True, code=settings.RetCode.SUCCESS, message="verification passed, email sent")
|
||||
|
||||
|
||||
@manager.route("/forget", methods=["POST"]) # noqa: F821
|
||||
def forget():
|
||||
"""
|
||||
POST: Verify email + OTP and reset password, then log the user in.
|
||||
Request JSON: { email, otp, new_password, confirm_new_password }
|
||||
"""
|
||||
req = request.get_json()
|
||||
email = req.get("email") or ""
|
||||
otp = (req.get("otp") or "").strip()
|
||||
new_pwd = req.get("new_password")
|
||||
new_pwd2 = req.get("confirm_new_password")
|
||||
|
||||
if not all([email, otp, new_pwd, new_pwd2]):
|
||||
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="email, otp and passwords are required")
|
||||
|
||||
# For reset, passwords are provided as-is (no decrypt needed)
|
||||
if new_pwd != new_pwd2:
|
||||
return get_json_result(data=False, code=settings.RetCode.ARGUMENT_ERROR, message="passwords do not match")
|
||||
|
||||
users = UserService.query(email=email)
|
||||
if not users:
|
||||
return get_json_result(data=False, code=settings.RetCode.DATA_ERROR, message="invalid email")
|
||||
|
||||
user = users[0]
|
||||
# Verify OTP from Redis
|
||||
k_code, k_attempts, k_last, k_lock = otp_keys(email)
|
||||
if REDIS_CONN.get(k_lock):
|
||||
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="too many attempts, try later")
|
||||
|
||||
stored = REDIS_CONN.get(k_code)
|
||||
if not stored:
|
||||
return get_json_result(data=False, code=settings.RetCode.NOT_EFFECTIVE, message="expired otp")
|
||||
|
||||
try:
|
||||
stored_hash, salt_hex = str(stored).split(":", 1)
|
||||
salt = bytes.fromhex(salt_hex)
|
||||
except Exception:
|
||||
return get_json_result(data=False, code=settings.RetCode.EXCEPTION_ERROR, message="otp storage corrupted")
|
||||
|
||||
# Case-insensitive verification: OTP generated uppercase
|
||||
calc = hash_code(otp.upper(), salt)
|
||||
if calc != stored_hash:
|
||||
# bump attempts
|
||||
try:
|
||||
attempts = int(REDIS_CONN.get(k_attempts) or 0) + 1
|
||||
except Exception:
|
||||
attempts = 1
|
||||
REDIS_CONN.set(k_attempts, attempts, OTP_TTL_SECONDS)
|
||||
if attempts >= ATTEMPT_LIMIT:
|
||||
REDIS_CONN.set(k_lock, int(time.time()), ATTEMPT_LOCK_SECONDS)
|
||||
return get_json_result(data=False, code=settings.RetCode.AUTHENTICATION_ERROR, message="expired otp")
|
||||
|
||||
# Success: consume OTP and reset password
|
||||
REDIS_CONN.delete(k_code)
|
||||
REDIS_CONN.delete(k_attempts)
|
||||
REDIS_CONN.delete(k_last)
|
||||
REDIS_CONN.delete(k_lock)
|
||||
|
||||
try:
|
||||
UserService.update_user_password(user.id, new_pwd)
|
||||
except Exception as e:
|
||||
logging.exception(e)
|
||||
return get_json_result(data=False, code=settings.RetCode.EXCEPTION_ERROR, message="failed to reset password")
|
||||
|
||||
# Auto login (reuse login flow)
|
||||
user.access_token = get_uuid()
|
||||
login_user(user)
|
||||
user.update_time = (current_timestamp(),)
|
||||
user.update_date = (datetime_format(datetime.now()),)
|
||||
user.save()
|
||||
msg = "Password reset successful. Logged in."
|
||||
return construct_response(data=user.to_json(), auth=user.get_id(), message=msg)
|
||||
|
||||
@ -36,3 +36,8 @@ class UserAlreadyExistsError(AdminException):
|
||||
class CannotDeleteAdminError(AdminException):
|
||||
def __init__(self):
|
||||
super().__init__("Cannot delete admin account", 403)
|
||||
|
||||
|
||||
class NotAdminError(AdminException):
|
||||
def __init__(self, username):
|
||||
super().__init__(f"User '{username}' is not admin", 403)
|
||||
|
||||
@ -313,9 +313,75 @@ class RetryingPooledMySQLDatabase(PooledMySQLDatabase):
|
||||
raise
|
||||
|
||||
|
||||
class RetryingPooledPostgresqlDatabase(PooledPostgresqlDatabase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.max_retries = kwargs.pop("max_retries", 5)
|
||||
self.retry_delay = kwargs.pop("retry_delay", 1)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def execute_sql(self, sql, params=None, commit=True):
|
||||
for attempt in range(self.max_retries + 1):
|
||||
try:
|
||||
return super().execute_sql(sql, params, commit)
|
||||
except (OperationalError, InterfaceError) as e:
|
||||
# PostgreSQL specific error codes
|
||||
# 57P01: admin_shutdown
|
||||
# 57P02: crash_shutdown
|
||||
# 57P03: cannot_connect_now
|
||||
# 08006: connection_failure
|
||||
# 08003: connection_does_not_exist
|
||||
# 08000: connection_exception
|
||||
error_messages = ['connection', 'server closed', 'connection refused',
|
||||
'no connection to the server', 'terminating connection']
|
||||
|
||||
should_retry = any(msg in str(e).lower() for msg in error_messages)
|
||||
|
||||
if should_retry and attempt < self.max_retries:
|
||||
logging.warning(
|
||||
f"PostgreSQL connection issue (attempt {attempt+1}/{self.max_retries}): {e}"
|
||||
)
|
||||
self._handle_connection_loss()
|
||||
time.sleep(self.retry_delay * (2 ** attempt))
|
||||
else:
|
||||
logging.error(f"PostgreSQL execution failure: {e}")
|
||||
raise
|
||||
return None
|
||||
|
||||
def _handle_connection_loss(self):
|
||||
try:
|
||||
self.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self.connect()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to reconnect to PostgreSQL: {e}")
|
||||
time.sleep(0.1)
|
||||
self.connect()
|
||||
|
||||
def begin(self):
|
||||
for attempt in range(self.max_retries + 1):
|
||||
try:
|
||||
return super().begin()
|
||||
except (OperationalError, InterfaceError) as e:
|
||||
error_messages = ['connection', 'server closed', 'connection refused',
|
||||
'no connection to the server', 'terminating connection']
|
||||
|
||||
should_retry = any(msg in str(e).lower() for msg in error_messages)
|
||||
|
||||
if should_retry and attempt < self.max_retries:
|
||||
logging.warning(
|
||||
f"PostgreSQL connection lost during transaction (attempt {attempt+1}/{self.max_retries})"
|
||||
)
|
||||
self._handle_connection_loss()
|
||||
time.sleep(self.retry_delay * (2 ** attempt))
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
class PooledDatabase(Enum):
|
||||
MYSQL = RetryingPooledMySQLDatabase
|
||||
POSTGRES = PooledPostgresqlDatabase
|
||||
POSTGRES = RetryingPooledPostgresqlDatabase
|
||||
|
||||
|
||||
class DatabaseMigrator(Enum):
|
||||
|
||||
@ -79,7 +79,7 @@ class DocumentService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_list(cls, kb_id, page_number, items_per_page,
|
||||
orderby, desc, keywords, id, name):
|
||||
orderby, desc, keywords, id, name, suffix=None, run = None):
|
||||
fields = cls.get_cls_model_fields()
|
||||
docs = cls.model.select(*[*fields, UserCanvas.title]).join(File2Document, on = (File2Document.document_id == cls.model.id))\
|
||||
.join(File, on = (File.id == File2Document.file_id))\
|
||||
@ -96,6 +96,10 @@ class DocumentService(CommonService):
|
||||
docs = docs.where(
|
||||
fn.LOWER(cls.model.name).contains(keywords.lower())
|
||||
)
|
||||
if suffix:
|
||||
docs = docs.where(cls.model.suffix.in_(suffix))
|
||||
if run:
|
||||
docs = docs.where(cls.model.run.in_(run))
|
||||
if desc:
|
||||
docs = docs.order_by(cls.model.getter_by(orderby).desc())
|
||||
else:
|
||||
|
||||
@ -476,6 +476,16 @@ class FileService(CommonService):
|
||||
|
||||
return err, files
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def list_all_files_by_parent_id(cls, parent_id):
|
||||
try:
|
||||
files = cls.model.select().where((cls.model.parent_id == parent_id) & (cls.model.id != parent_id))
|
||||
return list(files)
|
||||
except Exception:
|
||||
logging.exception("list_by_parent_id failed")
|
||||
raise RuntimeError("Database error (list_by_parent_id)!")
|
||||
|
||||
@staticmethod
|
||||
def parse_docs(file_objs, user_id):
|
||||
exe = ThreadPoolExecutor(max_workers=12)
|
||||
|
||||
@ -205,7 +205,7 @@ class LLMBundle(LLM4Tenant):
|
||||
return txt
|
||||
|
||||
return txt[last_think_end + len("</think>") :]
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _clean_param(chat_partial, **kwargs):
|
||||
func = chat_partial.func
|
||||
@ -222,15 +222,15 @@ class LLMBundle(LLM4Tenant):
|
||||
if not support_var_args:
|
||||
use_kwargs = {k: v for k, v in kwargs.items() if k in keyword_args}
|
||||
return use_kwargs
|
||||
|
||||
|
||||
def chat(self, system: str, history: list, gen_conf: dict = {}, **kwargs) -> str:
|
||||
if self.langfuse:
|
||||
generation = self.langfuse.start_generation(trace_context=self.trace_context, name="chat", model=self.llm_name, input={"system": system, "history": history})
|
||||
|
||||
chat_partial = partial(self.mdl.chat, system, history, gen_conf)
|
||||
chat_partial = partial(self.mdl.chat, system, history, gen_conf, **kwargs)
|
||||
if self.is_tools and self.mdl.is_tools:
|
||||
chat_partial = partial(self.mdl.chat_with_tools, system, history, gen_conf)
|
||||
|
||||
chat_partial = partial(self.mdl.chat_with_tools, system, history, gen_conf, **kwargs)
|
||||
|
||||
use_kwargs = self._clean_param(chat_partial, **kwargs)
|
||||
txt, used_tokens = chat_partial(**use_kwargs)
|
||||
txt = self._remove_reasoning_content(txt)
|
||||
@ -266,7 +266,7 @@ class LLMBundle(LLM4Tenant):
|
||||
break
|
||||
|
||||
if txt.endswith("</think>"):
|
||||
ans = ans.rstrip("</think>")
|
||||
ans = ans[: -len("</think>")]
|
||||
|
||||
if not self.verbose_tool_use:
|
||||
txt = re.sub(r"<tool_call>.*?</tool_call>", "", txt, flags=re.DOTALL)
|
||||
|
||||
@ -351,7 +351,7 @@ def queue_tasks(doc: dict, bucket: str, name: str, priority: int):
|
||||
"progress": 0.0,
|
||||
"from_page": 0,
|
||||
"to_page": 100000000,
|
||||
"begin_at": datetime.now(),
|
||||
"begin_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
}
|
||||
|
||||
parse_task_array = []
|
||||
@ -503,7 +503,7 @@ def queue_dataflow(tenant_id:str, flow_id:str, task_id:str, doc_id:str=CANVAS_DE
|
||||
to_page=100000000,
|
||||
task_type="dataflow" if not rerun else "dataflow_rerun",
|
||||
priority=priority,
|
||||
begin_at=datetime.now(),
|
||||
begin_at= datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
)
|
||||
if doc_id not in [CANVAS_DEBUG_DOC_ID, GRAPH_RAPTOR_FAKE_DOC_ID]:
|
||||
TaskService.model.delete().where(TaskService.model.doc_id == doc_id).execute()
|
||||
|
||||
25
api/utils/email_templates.py
Normal file
25
api/utils/email_templates.py
Normal file
@ -0,0 +1,25 @@
|
||||
"""
|
||||
Reusable HTML email templates and registry.
|
||||
"""
|
||||
|
||||
# Invitation email template
|
||||
INVITE_EMAIL_TMPL = """
|
||||
<p>Hi {{email}},</p>
|
||||
<p>{{inviter}} has invited you to join their team (ID: {{tenant_id}}).</p>
|
||||
<p>Click the link below to complete your registration:<br>
|
||||
<a href="{{invite_url}}">{{invite_url}}</a></p>
|
||||
<p>If you did not request this, please ignore this email.</p>
|
||||
"""
|
||||
|
||||
# Password reset code template
|
||||
RESET_CODE_EMAIL_TMPL = """
|
||||
<p>Hello,</p>
|
||||
<p>Your password reset code is: <b>{{ code }}</b></p>
|
||||
<p>This code will expire in {{ ttl_min }} minutes.</p>
|
||||
"""
|
||||
|
||||
# Template registry
|
||||
EMAIL_TEMPLATES = {
|
||||
"invite": INVITE_EMAIL_TMPL,
|
||||
"reset_code": RESET_CODE_EMAIL_TMPL,
|
||||
}
|
||||
@ -13,7 +13,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
# Standard library imports
|
||||
import base64
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@ -22,13 +27,20 @@ import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
|
||||
# Typing
|
||||
from typing import List, Union, Tuple
|
||||
|
||||
# Third-party imports
|
||||
import olefile
|
||||
import pdfplumber
|
||||
from cachetools import LRUCache, cached
|
||||
from PIL import Image
|
||||
from ruamel.yaml import YAML
|
||||
|
||||
# Local imports
|
||||
from api.constants import IMG_BASE64_PREFIX
|
||||
from api.db import FileType
|
||||
|
||||
@ -284,3 +296,125 @@ def read_potential_broken_pdf(blob):
|
||||
return repaired
|
||||
|
||||
return blob
|
||||
|
||||
|
||||
|
||||
def _is_zip(h: bytes) -> bool:
|
||||
return h.startswith(b"PK\x03\x04") or h.startswith(b"PK\x05\x06") or h.startswith(b"PK\x07\x08")
|
||||
|
||||
def _is_pdf(h: bytes) -> bool:
|
||||
return h.startswith(b"%PDF-")
|
||||
|
||||
def _is_ole(h: bytes) -> bool:
|
||||
return h.startswith(b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")
|
||||
|
||||
def _sha10(b: bytes) -> str:
|
||||
return hashlib.sha256(b).hexdigest()[:10]
|
||||
|
||||
def _guess_ext(b: bytes) -> str:
|
||||
h = b[:8]
|
||||
if _is_zip(h):
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(b), "r") as z:
|
||||
names = [n.lower() for n in z.namelist()]
|
||||
if any(n.startswith("word/") for n in names):
|
||||
return ".docx"
|
||||
if any(n.startswith("ppt/") for n in names):
|
||||
return ".pptx"
|
||||
if any(n.startswith("xl/") for n in names):
|
||||
return ".xlsx"
|
||||
except Exception:
|
||||
pass
|
||||
return ".zip"
|
||||
if _is_pdf(h):
|
||||
return ".pdf"
|
||||
if _is_ole(h):
|
||||
return ".doc"
|
||||
return ".bin"
|
||||
|
||||
# Try to extract the real embedded payload from OLE's Ole10Native
|
||||
def _extract_ole10native_payload(data: bytes) -> bytes:
|
||||
try:
|
||||
pos = 0
|
||||
if len(data) < 4:
|
||||
return data
|
||||
_ = int.from_bytes(data[pos:pos+4], "little")
|
||||
pos += 4
|
||||
# filename/src/tmp (NUL-terminated ANSI)
|
||||
for _ in range(3):
|
||||
z = data.index(b"\x00", pos)
|
||||
pos = z + 1
|
||||
# skip unknown 4 bytes
|
||||
pos += 4
|
||||
if pos + 4 > len(data):
|
||||
return data
|
||||
size = int.from_bytes(data[pos:pos+4], "little")
|
||||
pos += 4
|
||||
if pos + size <= len(data):
|
||||
return data[pos:pos+size]
|
||||
except Exception:
|
||||
pass
|
||||
return data
|
||||
|
||||
def extract_embed_file(target: Union[bytes, bytearray]) -> List[Tuple[str, bytes]]:
|
||||
"""
|
||||
Only extract the 'first layer' of embedding, returning raw (filename, bytes).
|
||||
"""
|
||||
top = bytes(target)
|
||||
head = top[:8]
|
||||
out: List[Tuple[str, bytes]] = []
|
||||
seen = set()
|
||||
|
||||
def push(b: bytes, name_hint: str = ""):
|
||||
h10 = _sha10(b)
|
||||
if h10 in seen:
|
||||
return
|
||||
seen.add(h10)
|
||||
ext = _guess_ext(b)
|
||||
# If name_hint has an extension use its basename; else fallback to guessed ext
|
||||
if "." in name_hint:
|
||||
fname = name_hint.split("/")[-1]
|
||||
else:
|
||||
fname = f"{h10}{ext}"
|
||||
out.append((fname, b))
|
||||
|
||||
# OOXML/ZIP container (docx/xlsx/pptx)
|
||||
if _is_zip(head):
|
||||
try:
|
||||
with zipfile.ZipFile(io.BytesIO(top), "r") as z:
|
||||
embed_dirs = (
|
||||
"word/embeddings/", "word/objects/", "word/activex/",
|
||||
"xl/embeddings/", "ppt/embeddings/"
|
||||
)
|
||||
for name in z.namelist():
|
||||
low = name.lower()
|
||||
if any(low.startswith(d) for d in embed_dirs):
|
||||
try:
|
||||
b = z.read(name)
|
||||
push(b, name)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
return out
|
||||
|
||||
# OLE container (doc/ppt/xls)
|
||||
if _is_ole(head):
|
||||
try:
|
||||
with olefile.OleFileIO(io.BytesIO(top)) as ole:
|
||||
for entry in ole.listdir():
|
||||
p = "/".join(entry)
|
||||
try:
|
||||
data = ole.openstream(entry).read()
|
||||
except Exception:
|
||||
continue
|
||||
if not data:
|
||||
continue
|
||||
if "Ole10Native" in p or "ole10native" in p.lower():
|
||||
data = _extract_ole10native_payload(data)
|
||||
push(data, p)
|
||||
except Exception:
|
||||
pass
|
||||
return out
|
||||
|
||||
return out
|
||||
@ -74,12 +74,12 @@ def get_es_cluster_stats() -> dict:
|
||||
raise Exception("Elasticsearch is not in use.")
|
||||
try:
|
||||
return {
|
||||
"alive": True,
|
||||
"status": "alive",
|
||||
"message": ESConnection().get_cluster_stats()
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@ -90,12 +90,12 @@ def get_infinity_status():
|
||||
raise Exception("Infinity is not in use.")
|
||||
try:
|
||||
return {
|
||||
"alive": True,
|
||||
"status": "alive",
|
||||
"message": InfinityConnection().health()
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@ -107,12 +107,12 @@ def get_mysql_status():
|
||||
headers = ['id', 'user', 'host', 'db', 'command', 'time', 'state', 'info']
|
||||
cursor.close()
|
||||
return {
|
||||
"alive": True,
|
||||
"status": "alive",
|
||||
"message": [dict(zip(headers, r)) for r in res_rows]
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@ -122,12 +122,12 @@ def check_minio_alive():
|
||||
try:
|
||||
response = requests.get(f'http://{rag_settings.MINIO["host"]}/minio/health/live')
|
||||
if response.status_code == 200:
|
||||
return {'alive': True, "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
return {"status": "alive", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
else:
|
||||
return {'alive': False, "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
return {"status": "timeout", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@ -135,12 +135,12 @@ def check_minio_alive():
|
||||
def get_redis_info():
|
||||
try:
|
||||
return {
|
||||
"alive": True,
|
||||
"status": "alive",
|
||||
"message": REDIS_CONN.info()
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@ -150,12 +150,12 @@ def check_ragflow_server_alive():
|
||||
try:
|
||||
response = requests.get(f'http://{settings.HOST_IP}:{settings.HOST_PORT}/v1/system/ping')
|
||||
if response.status_code == 200:
|
||||
return {'alive': True, "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
return {"status": "alive", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
else:
|
||||
return {'alive': False, "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
return {"status": "timeout", "message": f"Confirm elapsed: {(timer() - start_time) * 1000.0:.1f} ms."}
|
||||
except Exception as e:
|
||||
return {
|
||||
"alive": False,
|
||||
"status": "timeout",
|
||||
"message": f"error: {str(e)}",
|
||||
}
|
||||
|
||||
@ -192,9 +192,7 @@ def run_health_checks() -> tuple[dict, bool]:
|
||||
except Exception:
|
||||
result["storage"] = "nok"
|
||||
|
||||
|
||||
all_ok = (result.get("db") == "ok") and (result.get("redis") == "ok") and (result.get("doc_engine") == "ok") and (result.get("storage") == "ok")
|
||||
all_ok = (result.get("db") == "ok") and (result.get("redis") == "ok") and (result.get("doc_engine") == "ok") and (
|
||||
result.get("storage") == "ok")
|
||||
result["status"] = "ok" if all_ok else "nok"
|
||||
return result, all_ok
|
||||
|
||||
|
||||
|
||||
@ -24,6 +24,7 @@ from urllib.parse import urlparse
|
||||
from api.apps import smtp_mail_server
|
||||
from flask_mail import Message
|
||||
from flask import render_template_string
|
||||
from api.utils.email_templates import EMAIL_TEMPLATES
|
||||
from selenium import webdriver
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
@ -34,6 +35,12 @@ from selenium.webdriver.support.ui import WebDriverWait
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
|
||||
|
||||
OTP_LENGTH = 8
|
||||
OTP_TTL_SECONDS = 5 * 60
|
||||
ATTEMPT_LIMIT = 5
|
||||
ATTEMPT_LOCK_SECONDS = 30 * 60
|
||||
RESEND_COOLDOWN_SECONDS = 60
|
||||
|
||||
|
||||
CONTENT_TYPE_MAP = {
|
||||
# Office
|
||||
@ -178,24 +185,49 @@ def get_float(req: dict, key: str, default: float | int = 10.0) -> float:
|
||||
return default
|
||||
|
||||
|
||||
INVITE_EMAIL_TMPL = """
|
||||
<p>Hi {{email}},</p>
|
||||
<p>{{inviter}} has invited you to join their team (ID: {{tenant_id}}).</p>
|
||||
<p>Click the link below to complete your registration:<br>
|
||||
<a href="{{invite_url}}">{{invite_url}}</a></p>
|
||||
<p>If you did not request this, please ignore this email.</p>
|
||||
"""
|
||||
def send_email_html(subject: str, to_email: str, template_key: str, **context):
|
||||
"""Generic HTML email sender using shared templates.
|
||||
template_key must exist in EMAIL_TEMPLATES.
|
||||
"""
|
||||
from api.apps import app
|
||||
tmpl = EMAIL_TEMPLATES.get(template_key)
|
||||
if not tmpl:
|
||||
raise ValueError(f"Unknown email template: {template_key}")
|
||||
with app.app_context():
|
||||
msg = Message(subject=subject, recipients=[to_email])
|
||||
msg.html = render_template_string(tmpl, **context)
|
||||
smtp_mail_server.send(msg)
|
||||
|
||||
|
||||
def send_invite_email(to_email, invite_url, tenant_id, inviter):
|
||||
from api.apps import app
|
||||
with app.app_context():
|
||||
msg = Message(subject="RAGFlow Invitation",
|
||||
recipients=[to_email])
|
||||
msg.html = render_template_string(
|
||||
INVITE_EMAIL_TMPL,
|
||||
email=to_email,
|
||||
invite_url=invite_url,
|
||||
tenant_id=tenant_id,
|
||||
inviter=inviter,
|
||||
)
|
||||
smtp_mail_server.send(msg)
|
||||
# Reuse the generic HTML sender with 'invite' template
|
||||
send_email_html(
|
||||
subject="RAGFlow Invitation",
|
||||
to_email=to_email,
|
||||
template_key="invite",
|
||||
email=to_email,
|
||||
invite_url=invite_url,
|
||||
tenant_id=tenant_id,
|
||||
inviter=inviter,
|
||||
)
|
||||
|
||||
|
||||
def otp_keys(email: str):
|
||||
email = (email or "").strip().lower()
|
||||
return (
|
||||
f"otp:{email}",
|
||||
f"otp_attempts:{email}",
|
||||
f"otp_last_sent:{email}",
|
||||
f"otp_lock:{email}",
|
||||
)
|
||||
|
||||
|
||||
def hash_code(code: str, salt: bytes) -> str:
|
||||
import hashlib
|
||||
import hmac
|
||||
return hmac.new(salt, (code or "").encode("utf-8"), hashlib.sha256).hexdigest()
|
||||
|
||||
|
||||
def captcha_key(email: str) -> str:
|
||||
return f"captcha:{email}"
|
||||
|
||||
|
||||
@ -31,7 +31,6 @@
|
||||
"entities_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"pagerank_fea": {"type": "integer", "default": 0},
|
||||
"tag_feas": {"type": "varchar", "default": "", "analyzer": "rankfeatures"},
|
||||
|
||||
"from_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"to_entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"entity_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
@ -39,6 +38,6 @@
|
||||
"source_id": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"n_hop_with_weight": {"type": "varchar", "default": ""},
|
||||
"removed_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
|
||||
"doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}
|
||||
"doc_type_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"},
|
||||
"toc_kwd": {"type": "varchar", "default": "", "analyzer": "whitespace-#"}
|
||||
}
|
||||
|
||||
@ -971,31 +971,9 @@
|
||||
{
|
||||
"name": "VolcEngine",
|
||||
"logo": "",
|
||||
"tags": "LLM, TEXT EMBEDDING",
|
||||
"tags": "LLM, TEXT EMBEDDING, IMAGE2TEXT",
|
||||
"status": "1",
|
||||
"llm": [
|
||||
{
|
||||
"llm_name": "Doubao-pro-128k",
|
||||
"tags": "LLM,CHAT,128k",
|
||||
"max_tokens": 131072,
|
||||
"model_type": "chat",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "Doubao-pro-32k",
|
||||
"tags": "LLM,CHAT,32k",
|
||||
"max_tokens": 32768,
|
||||
"model_type": "chat",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "Doubao-pro-4k",
|
||||
"tags": "LLM,CHAT,4k",
|
||||
"max_tokens": 4096,
|
||||
"model_type": "chat",
|
||||
"is_tools": true
|
||||
}
|
||||
]
|
||||
"llm": []
|
||||
},
|
||||
{
|
||||
"name": "BaiChuan",
|
||||
@ -1367,35 +1345,35 @@
|
||||
"llm_name": "gemini-2.5-flash",
|
||||
"tags": "LLM,CHAT,1024K,IMAGE2TEXT",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "gemini-2.5-pro",
|
||||
"tags": "LLM,CHAT,IMAGE2TEXT,1024K",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "gemini-2.5-flash-lite",
|
||||
"tags": "LLM,CHAT,1024K,IMAGE2TEXT",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "gemini-2.0-flash",
|
||||
"tags": "LLM,CHAT,1024K",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
"llm_name": "gemini-2.0-flash-lite",
|
||||
"tags": "LLM,CHAT,1024K",
|
||||
"max_tokens": 1048576,
|
||||
"model_type": "chat",
|
||||
"model_type": "image2text",
|
||||
"is_tools": true
|
||||
},
|
||||
{
|
||||
|
||||
@ -54,8 +54,8 @@ class RAGFlowExcelParser:
|
||||
try:
|
||||
file_like_object.seek(0)
|
||||
try:
|
||||
df = pd.read_excel(file_like_object)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(df)
|
||||
dfs = pd.read_excel(file_like_object, sheet_name=None)
|
||||
return RAGFlowExcelParser._dataframe_to_workbook(dfs)
|
||||
except Exception as ex:
|
||||
logging.info(f"pandas with default engine load error: {ex}, try calamine instead")
|
||||
file_like_object.seek(0)
|
||||
@ -75,6 +75,10 @@ class RAGFlowExcelParser:
|
||||
|
||||
@staticmethod
|
||||
def _dataframe_to_workbook(df):
|
||||
# if contains multiple sheets use _dataframes_to_workbook
|
||||
if isinstance(df, dict) and len(df) > 1:
|
||||
return RAGFlowExcelParser._dataframes_to_workbook(df)
|
||||
|
||||
df = RAGFlowExcelParser._clean_dataframe(df)
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
@ -88,6 +92,22 @@ class RAGFlowExcelParser:
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
|
||||
return wb
|
||||
|
||||
@staticmethod
|
||||
def _dataframes_to_workbook(dfs: dict):
|
||||
wb = Workbook()
|
||||
default_sheet = wb.active
|
||||
wb.remove(default_sheet)
|
||||
|
||||
for sheet_name, df in dfs.items():
|
||||
df = RAGFlowExcelParser._clean_dataframe(df)
|
||||
ws = wb.create_sheet(title=sheet_name)
|
||||
for col_num, column_name in enumerate(df.columns, 1):
|
||||
ws.cell(row=1, column=col_num, value=column_name)
|
||||
for row_num, row in enumerate(df.values, 2):
|
||||
for col_num, value in enumerate(row, 1):
|
||||
ws.cell(row=row_num, column=col_num, value=value)
|
||||
return wb
|
||||
|
||||
def html(self, fnm, chunk_rows=256):
|
||||
from html import escape
|
||||
|
||||
@ -17,6 +17,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.utils.api_utils import timeout
|
||||
from rag.app.picture import vision_llm_chunk as picture_vision_llm_chunk
|
||||
from rag.prompts.generator import vision_llm_figure_describe_prompt
|
||||
@ -32,6 +34,43 @@ def vision_figure_parser_figure_data_wrapper(figures_data_without_positions):
|
||||
if isinstance(figure_data[1], Image.Image)
|
||||
]
|
||||
|
||||
def vision_figure_parser_docx_wrapper(sections,tbls,callback=None,**kwargs):
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
except Exception:
|
||||
vision_model = None
|
||||
if vision_model:
|
||||
figures_data = vision_figure_parser_figure_data_wrapper(sections)
|
||||
try:
|
||||
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
|
||||
boosted_figures = docx_vision_parser(callback=callback)
|
||||
tbls.extend(boosted_figures)
|
||||
except Exception as e:
|
||||
callback(0.8, f"Visual model error: {e}. Skipping figure parsing enhancement.")
|
||||
return tbls
|
||||
|
||||
def vision_figure_parser_pdf_wrapper(tbls,callback=None,**kwargs):
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.7, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
except Exception:
|
||||
vision_model = None
|
||||
if vision_model:
|
||||
def is_figure_item(item):
|
||||
return (
|
||||
isinstance(item[0][0], Image.Image) and
|
||||
isinstance(item[0][1], list)
|
||||
)
|
||||
figures_data = [item for item in tbls if is_figure_item(item)]
|
||||
try:
|
||||
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
|
||||
boosted_figures = docx_vision_parser(callback=callback)
|
||||
tbls = [item for item in tbls if not is_figure_item(item)]
|
||||
tbls.extend(boosted_figures)
|
||||
except Exception as e:
|
||||
callback(0.8, f"Visual model error: {e}. Skipping figure parsing enhancement.")
|
||||
return tbls
|
||||
|
||||
shared_executor = ThreadPoolExecutor(max_workers=10)
|
||||
|
||||
|
||||
344
deepdoc/parser/mineru_parser.py
Normal file
344
deepdoc/parser/mineru_parser.py
Normal file
@ -0,0 +1,344 @@
|
||||
#
|
||||
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import json
|
||||
import logging
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
from io import BytesIO
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from queue import Empty, Queue
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
import pdfplumber
|
||||
from PIL import Image
|
||||
from strenum import StrEnum
|
||||
|
||||
from deepdoc.parser.pdf_parser import RAGFlowPdfParser
|
||||
|
||||
LOCK_KEY_pdfplumber = "global_shared_lock_pdfplumber"
|
||||
if LOCK_KEY_pdfplumber not in sys.modules:
|
||||
sys.modules[LOCK_KEY_pdfplumber] = threading.Lock()
|
||||
|
||||
|
||||
class MinerUContentType(StrEnum):
|
||||
IMAGE = "image"
|
||||
TABLE = "table"
|
||||
TEXT = "text"
|
||||
EQUATION = "equation"
|
||||
|
||||
|
||||
class MinerUParser(RAGFlowPdfParser):
|
||||
def __init__(self, mineru_path: str = "mineru"):
|
||||
self.mineru_path = Path(mineru_path)
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
def check_installation(self) -> bool:
|
||||
subprocess_kwargs = {
|
||||
"capture_output": True,
|
||||
"text": True,
|
||||
"check": True,
|
||||
"encoding": "utf-8",
|
||||
"errors": "ignore",
|
||||
}
|
||||
|
||||
if platform.system() == "Windows":
|
||||
subprocess_kwargs["creationflags"] = getattr(subprocess, "CREATE_NO_WINDOW", 0)
|
||||
|
||||
try:
|
||||
result = subprocess.run([str(self.mineru_path), "--version"], **subprocess_kwargs)
|
||||
version_info = result.stdout.strip()
|
||||
if version_info:
|
||||
logging.info(f"[MinerU] Detected version: {version_info}")
|
||||
else:
|
||||
logging.info("[MinerU] Detected MinerU, but version info is empty.")
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.warning(f"[MinerU] Execution failed (exit code {e.returncode}).")
|
||||
except FileNotFoundError:
|
||||
logging.warning("[MinerU] MinerU not found. Please install it via: pip install -U 'mineru[core]'")
|
||||
except Exception as e:
|
||||
logging.error(f"[MinerU] Unexpected error during installation check: {e}")
|
||||
return False
|
||||
|
||||
def _run_mineru(self, input_path: Path, output_dir: Path, method: str = "auto", lang: Optional[str] = None):
|
||||
cmd = [str(self.mineru_path), "-p", str(input_path), "-o", str(output_dir), "-m", method]
|
||||
if lang:
|
||||
cmd.extend(["-l", lang])
|
||||
|
||||
self.logger.info(f"[MinerU] Running command: {' '.join(cmd)}")
|
||||
|
||||
subprocess_kwargs = {
|
||||
"stdout": subprocess.PIPE,
|
||||
"stderr": subprocess.PIPE,
|
||||
"text": True,
|
||||
"encoding": "utf-8",
|
||||
"errors": "ignore",
|
||||
"bufsize": 1,
|
||||
}
|
||||
|
||||
if platform.system() == "Windows":
|
||||
subprocess_kwargs["creationflags"] = getattr(subprocess, "CREATE_NO_WINDOW", 0)
|
||||
|
||||
process = subprocess.Popen(cmd, **subprocess_kwargs)
|
||||
stdout_queue, stderr_queue = Queue(), Queue()
|
||||
|
||||
def enqueue_output(pipe, queue, prefix):
|
||||
for line in iter(pipe.readline, ""):
|
||||
if line.strip():
|
||||
queue.put((prefix, line.strip()))
|
||||
pipe.close()
|
||||
|
||||
threading.Thread(target=enqueue_output, args=(process.stdout, stdout_queue, "STDOUT"), daemon=True).start()
|
||||
threading.Thread(target=enqueue_output, args=(process.stderr, stderr_queue, "STDERR"), daemon=True).start()
|
||||
|
||||
while process.poll() is None:
|
||||
for q in (stdout_queue, stderr_queue):
|
||||
try:
|
||||
while True:
|
||||
prefix, line = q.get_nowait()
|
||||
if prefix == "STDOUT":
|
||||
self.logger.info(f"[MinerU] {line}")
|
||||
else:
|
||||
self.logger.warning(f"[MinerU] {line}")
|
||||
except Empty:
|
||||
pass
|
||||
time.sleep(0.1)
|
||||
|
||||
return_code = process.wait()
|
||||
if return_code != 0:
|
||||
raise RuntimeError(f"[MinerU] Process failed with exit code {return_code}")
|
||||
self.logger.info("[MinerU] Command completed successfully.")
|
||||
|
||||
def __images__(self, fnm, zoomin: int = 1, page_from=0, page_to=600, callback=None):
|
||||
self.page_from = page_from
|
||||
self.page_to = page_to
|
||||
try:
|
||||
with pdfplumber.open(fnm) if isinstance(fnm, (str, PathLike)) else pdfplumber.open(BytesIO(fnm)) as pdf:
|
||||
self.pdf = pdf
|
||||
self.page_images = [p.to_image(resolution=72 * zoomin, antialias=True).original for _, p in enumerate(self.pdf.pages[page_from:page_to])]
|
||||
except Exception as e:
|
||||
self.page_images = None
|
||||
self.total_page = 0
|
||||
logging.exception(e)
|
||||
|
||||
def _line_tag(self, bx):
|
||||
pn = [bx["page_idx"] + 1]
|
||||
positions = bx["bbox"]
|
||||
x0, top, x1, bott = positions
|
||||
|
||||
if hasattr(self, "page_images") and self.page_images and len(self.page_images) > bx["page_idx"]:
|
||||
page_width, page_height = self.page_images[bx["page_idx"]].size
|
||||
x0 = (x0 / 1000.0) * page_width
|
||||
x1 = (x1 / 1000.0) * page_width
|
||||
top = (top / 1000.0) * page_height
|
||||
bott = (bott / 1000.0) * page_height
|
||||
|
||||
return "@@{}\t{:.1f}\t{:.1f}\t{:.1f}\t{:.1f}##".format("-".join([str(p) for p in pn]), x0, x1, top, bott)
|
||||
|
||||
def crop(self, text, ZM=1, need_position=False):
|
||||
imgs = []
|
||||
poss = self.extract_positions(text)
|
||||
if not poss:
|
||||
if need_position:
|
||||
return None, None
|
||||
return
|
||||
|
||||
max_width = max(np.max([right - left for (_, left, right, _, _) in poss]), 6)
|
||||
GAP = 6
|
||||
pos = poss[0]
|
||||
poss.insert(0, ([pos[0][0]], pos[1], pos[2], max(0, pos[3] - 120), max(pos[3] - GAP, 0)))
|
||||
pos = poss[-1]
|
||||
poss.append(([pos[0][-1]], pos[1], pos[2], min(self.page_images[pos[0][-1]].size[1], pos[4] + GAP), min(self.page_images[pos[0][-1]].size[1], pos[4] + 120)))
|
||||
|
||||
positions = []
|
||||
for ii, (pns, left, right, top, bottom) in enumerate(poss):
|
||||
right = left + max_width
|
||||
|
||||
if bottom <= top:
|
||||
bottom = top + 2
|
||||
|
||||
for pn in pns[1:]:
|
||||
bottom += self.page_images[pn - 1].size[1]
|
||||
|
||||
img0 = self.page_images[pns[0]]
|
||||
x0, y0, x1, y1 = int(left), int(top), int(right), int(min(bottom, img0.size[1]))
|
||||
crop0 = img0.crop((x0, y0, x1, y1))
|
||||
imgs.append(crop0)
|
||||
if 0 < ii < len(poss) - 1:
|
||||
positions.append((pns[0] + self.page_from, x0, x1, y0, y1))
|
||||
|
||||
bottom -= img0.size[1]
|
||||
for pn in pns[1:]:
|
||||
page = self.page_images[pn]
|
||||
x0, y0, x1, y1 = int(left), 0, int(right), int(min(bottom, page.size[1]))
|
||||
cimgp = page.crop((x0, y0, x1, y1))
|
||||
imgs.append(cimgp)
|
||||
if 0 < ii < len(poss) - 1:
|
||||
positions.append((pn + self.page_from, x0, x1, y0, y1))
|
||||
bottom -= page.size[1]
|
||||
|
||||
if not imgs:
|
||||
if need_position:
|
||||
return None, None
|
||||
return
|
||||
|
||||
height = 0
|
||||
for img in imgs:
|
||||
height += img.size[1] + GAP
|
||||
height = int(height)
|
||||
width = int(np.max([i.size[0] for i in imgs]))
|
||||
pic = Image.new("RGB", (width, height), (245, 245, 245))
|
||||
height = 0
|
||||
for ii, img in enumerate(imgs):
|
||||
if ii == 0 or ii + 1 == len(imgs):
|
||||
img = img.convert("RGBA")
|
||||
overlay = Image.new("RGBA", img.size, (0, 0, 0, 0))
|
||||
overlay.putalpha(128)
|
||||
img = Image.alpha_composite(img, overlay).convert("RGB")
|
||||
pic.paste(img, (0, int(height)))
|
||||
height += img.size[1] + GAP
|
||||
|
||||
if need_position:
|
||||
return pic, positions
|
||||
return pic
|
||||
|
||||
@staticmethod
|
||||
def extract_positions(txt: str):
|
||||
poss = []
|
||||
for tag in re.findall(r"@@[0-9-]+\t[0-9.\t]+##", txt):
|
||||
pn, left, right, top, bottom = tag.strip("#").strip("@").split("\t")
|
||||
left, right, top, bottom = float(left), float(right), float(top), float(bottom)
|
||||
poss.append(([int(p) - 1 for p in pn.split("-")], left, right, top, bottom))
|
||||
return poss
|
||||
|
||||
def _read_output(self, output_dir: Path, file_stem: str, method: str = "auto") -> list[dict[str, Any]]:
|
||||
subdir = output_dir / file_stem / method
|
||||
json_file = subdir / f"{file_stem}_content_list.json"
|
||||
|
||||
if not json_file.exists():
|
||||
raise FileNotFoundError(f"[MinerU] Missing output file: {json_file}")
|
||||
|
||||
with open(json_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
for item in data:
|
||||
for key in ("img_path", "table_img_path", "equation_img_path"):
|
||||
if key in item and item[key]:
|
||||
item[key] = str((subdir / item[key]).resolve())
|
||||
return data
|
||||
|
||||
def _transfer_to_sections(self, outputs: list[dict[str, Any]]):
|
||||
sections = []
|
||||
for output in outputs:
|
||||
match output["type"]:
|
||||
case MinerUContentType.TEXT:
|
||||
section = output["text"]
|
||||
case MinerUContentType.TABLE:
|
||||
section = output["table_body"] + "\n".join(output["table_caption"]) + "\n".join(output["table_footnote"])
|
||||
case MinerUContentType.IMAGE:
|
||||
section = "".join(output["image_caption"]) + "\n" + "".join(output["image_footnote"])
|
||||
case MinerUContentType.EQUATION:
|
||||
section = output["text"]
|
||||
|
||||
if section:
|
||||
sections.append((section, self._line_tag(output)))
|
||||
return sections
|
||||
|
||||
def _transfer_to_tables(self, outputs: list[dict[str, Any]]):
|
||||
return []
|
||||
|
||||
def parse_pdf(
|
||||
self,
|
||||
filepath: str | PathLike[str],
|
||||
binary: BytesIO | bytes,
|
||||
callback: Optional[Callable] = None,
|
||||
*,
|
||||
output_dir: Optional[str] = None,
|
||||
lang: Optional[str] = None,
|
||||
method: str = "auto",
|
||||
delete_output: bool = True,
|
||||
) -> tuple:
|
||||
import shutil
|
||||
|
||||
temp_pdf = None
|
||||
created_tmp_dir = False
|
||||
|
||||
if binary:
|
||||
temp_dir = Path(tempfile.mkdtemp(prefix="mineru_bin_pdf_"))
|
||||
temp_pdf = temp_dir / Path(filepath).name
|
||||
with open(temp_pdf, "wb") as f:
|
||||
f.write(binary)
|
||||
pdf = temp_pdf
|
||||
self.logger.info(f"[MinerU] Received binary PDF -> {temp_pdf}")
|
||||
if callback:
|
||||
callback(0.15, f"[MinerU] Received binary PDF -> {temp_pdf}")
|
||||
else:
|
||||
pdf = Path(filepath)
|
||||
if not pdf.exists():
|
||||
if callback:
|
||||
callback(-1, f"[MinerU] PDF not found: {pdf}")
|
||||
raise FileNotFoundError(f"[MinerU] PDF not found: {pdf}")
|
||||
|
||||
if output_dir:
|
||||
out_dir = Path(output_dir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
else:
|
||||
out_dir = Path(tempfile.mkdtemp(prefix="mineru_pdf_"))
|
||||
created_tmp_dir = True
|
||||
|
||||
self.logger.info(f"[MinerU] Output directory: {out_dir}")
|
||||
if callback:
|
||||
callback(0.15, f"[MinerU] Output directory: {out_dir}")
|
||||
|
||||
self.__images__(pdf, zoomin=1)
|
||||
|
||||
try:
|
||||
self._run_mineru(pdf, out_dir, method=method, lang=lang)
|
||||
outputs = self._read_output(out_dir, pdf.stem, method=method)
|
||||
self.logger.info(f"[MinerU] Parsed {len(outputs)} blocks from PDF.")
|
||||
if callback:
|
||||
callback(0.75, f"[MinerU] Parsed {len(outputs)} blocks from PDF.")
|
||||
return self._transfer_to_sections(outputs), self._transfer_to_tables(outputs)
|
||||
finally:
|
||||
if temp_pdf and temp_pdf.exists():
|
||||
try:
|
||||
temp_pdf.unlink()
|
||||
temp_pdf.parent.rmdir()
|
||||
except Exception:
|
||||
pass
|
||||
if delete_output and created_tmp_dir and out_dir.exists():
|
||||
try:
|
||||
shutil.rmtree(out_dir)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = MinerUParser("mineru")
|
||||
print("MinerU available:", parser.check_installation())
|
||||
|
||||
filepath = ""
|
||||
with open(filepath, "rb") as file:
|
||||
outputs = parser.parse_pdf(filepath=filepath, binary=file.read())
|
||||
for output in outputs:
|
||||
print(output)
|
||||
@ -9,7 +9,7 @@ The component equipped with reasoning, tool usage, and multi-agent collaboration
|
||||
|
||||
---
|
||||
|
||||
An **Agent** component fine-tunes the LLM and sets its prompt. From v0.21.0 onwards, an **Agent** component is able to work independently and with the following capabilities:
|
||||
An **Agent** component fine-tunes the LLM and sets its prompt. From v0.20.5 onwards, an **Agent** component is able to work independently and with the following capabilities:
|
||||
|
||||
- Autonomous reasoning with reflection and adjustment based on environmental feedback.
|
||||
- Use of tools or subagents to complete tasks.
|
||||
@ -24,7 +24,7 @@ An **Agent** component is essential when you need the LLM to assist with summari
|
||||
|
||||

|
||||
|
||||
2. If your Agent involves dataset retrieval, ensure you [have properly configured your target knowledge base(s)](../../dataset/configure_knowledge_base.md).
|
||||
2. If your Agent involves dataset retrieval, ensure you [have properly configured your target dataset(s)](../../dataset/configure_knowledge_base.md).
|
||||
|
||||
## Quickstart
|
||||
|
||||
@ -113,7 +113,7 @@ Click the dropdown menu of **Model** to show the model configuration window.
|
||||
- **Model**: The chat model to use.
|
||||
- Ensure you set the chat model correctly on the **Model providers** page.
|
||||
- You can use different models for different components to increase flexibility or improve overall performance.
|
||||
- **Freedom**: A shortcut to **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty** settings, indicating the freedom level of the model. From **Improvise**, **Precise**, to **Balance**, each preset configuration corresponds to a unique combination of **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**.
|
||||
- **Creavity**: A shortcut to **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty** settings, indicating the freedom level of the model. From **Improvise**, **Precise**, to **Balance**, each preset configuration corresponds to a unique combination of **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**.
|
||||
This parameter has three options:
|
||||
- **Improvise**: Produces more creative responses.
|
||||
- **Precise**: (Default) Produces more conservative responses.
|
||||
@ -132,11 +132,12 @@ Click the dropdown menu of **Model** to show the model configuration window.
|
||||
- **Frequency penalty**: Discourages the model from repeating the same words or phrases too frequently in the generated text.
|
||||
- A higher **frequency penalty** value results in the model being more conservative in its use of repeated tokens.
|
||||
- Defaults to 0.7.
|
||||
- **Max tokens**:
|
||||
- **Max tokens**:
|
||||
This sets the maximum length of the model's output, measured in the number of tokens (words or pieces of words). It is disabled by default, allowing the model to determine the number of tokens in its responses.
|
||||
|
||||
:::tip NOTE
|
||||
- It is not necessary to stick with the same model for all components. If a specific model is not performing well for a particular task, consider using a different one.
|
||||
- If you are uncertain about the mechanism behind **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**, simply choose one of the three options of **Preset configurations**.
|
||||
- If you are uncertain about the mechanism behind **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**, simply choose one of the three options of **Creavity**.
|
||||
:::
|
||||
|
||||
### System prompt
|
||||
@ -147,7 +148,7 @@ An **Agent** component relies on keys (variables) to specify its data inputs. It
|
||||
|
||||
#### Advanced usage
|
||||
|
||||
From v0.21.0 onwards, four framework-level prompt blocks are available in the **System prompt** field, enabling you to customize and *override* prompts at the framework level. Type `/` or click **(x)** to view them; they appear under the **Framework** entry in the dropdown menu.
|
||||
From v0.20.5 onwards, four framework-level prompt blocks are available in the **System prompt** field, enabling you to customize and *override* prompts at the framework level. Type `/` or click **(x)** to view them; they appear under the **Framework** entry in the dropdown menu.
|
||||
|
||||
- `task_analysis` prompt block
|
||||
- This block is responsible for analyzing tasks — either a user task or a task assigned by the lead Agent when the **Agent** component is acting as a Sub-Agent.
|
||||
|
||||
@ -42,7 +42,7 @@ Click the dropdown menu of **Model** to show the model configuration window.
|
||||
- **Model**: The chat model to use.
|
||||
- Ensure you set the chat model correctly on the **Model providers** page.
|
||||
- You can use different models for different components to increase flexibility or improve overall performance.
|
||||
- **Freedom**: A shortcut to **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty** settings, indicating the freedom level of the model. From **Improvise**, **Precise**, to **Balance**, each preset configuration corresponds to a unique combination of **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**.
|
||||
- **Creavity**: A shortcut to **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty** settings, indicating the freedom level of the model. From **Improvise**, **Precise**, to **Balance**, each preset configuration corresponds to a unique combination of **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**.
|
||||
This parameter has three options:
|
||||
- **Improvise**: Produces more creative responses.
|
||||
- **Precise**: (Default) Produces more conservative responses.
|
||||
@ -61,10 +61,12 @@ Click the dropdown menu of **Model** to show the model configuration window.
|
||||
- **Frequency penalty**: Discourages the model from repeating the same words or phrases too frequently in the generated text.
|
||||
- A higher **frequency penalty** value results in the model being more conservative in its use of repeated tokens.
|
||||
- Defaults to 0.7.
|
||||
- **Max tokens**:
|
||||
This sets the maximum length of the model's output, measured in the number of tokens (words or pieces of words). It is disabled by default, allowing the model to determine the number of tokens in its responses.
|
||||
|
||||
:::tip NOTE
|
||||
- It is not necessary to stick with the same model for all components. If a specific model is not performing well for a particular task, consider using a different one.
|
||||
- If you are uncertain about the mechanism behind **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**, simply choose one of the three options of **Preset configurations**.
|
||||
- If you are uncertain about the mechanism behind **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**, simply choose one of the three options of **Creavity**.
|
||||
:::
|
||||
|
||||
### Message window size
|
||||
|
||||
17
docs/guides/agent/agent_component_reference/chunker_token.md
Normal file
17
docs/guides/agent/agent_component_reference/chunker_token.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 32
|
||||
slug: /chunker_token_component
|
||||
---
|
||||
|
||||
# Parser component
|
||||
|
||||
A component that sets the parsing rules for your dataset.
|
||||
|
||||
---
|
||||
|
||||
A **Parser** component defines how various file types should be parsed, including parsing methods for PDFs , fields to parse for Emails, and OCR methods for images.
|
||||
|
||||
|
||||
## Scenario
|
||||
|
||||
A **Parser** component is auto-populated on the ingestion pipeline canvas and required in all ingestion pipeline workflows.
|
||||
29
docs/guides/agent/agent_component_reference/indexer.md
Normal file
29
docs/guides/agent/agent_component_reference/indexer.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
sidebar_position: 40
|
||||
slug: /indexer_component
|
||||
---
|
||||
|
||||
# Indexer component
|
||||
|
||||
A component that defines how chunks are indexed.
|
||||
|
||||
---
|
||||
|
||||
An **Indexer** component indexes chunks and configures their storage formats in the document engine.
|
||||
|
||||
## Scenario
|
||||
|
||||
An **Indexer** component is the mandatory ending component for all ingestion pipelines.
|
||||
|
||||
## Configurations
|
||||
|
||||
### Search method
|
||||
|
||||
This setting configures how chunks are stored in the document engine: as full-text, embeddings, or both.
|
||||
|
||||
### Filename embedding weight
|
||||
|
||||
This setting defines the filename's contribution to the final embedding, which is a weighted combination of both the chunk content and the filename. Essentially, a higher value gives the filename more influence in the final *composite* embedding.
|
||||
|
||||
- 0.1: Filename contributes 10% (chunk content 90%)
|
||||
- 0.5 (maximum): Filename contributes 50% (chunk content 90%)
|
||||
17
docs/guides/agent/agent_component_reference/parser.md
Normal file
17
docs/guides/agent/agent_component_reference/parser.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 30
|
||||
slug: /parser_component
|
||||
---
|
||||
|
||||
# Parser component
|
||||
|
||||
A component that sets the parsing rules for your dataset.
|
||||
|
||||
---
|
||||
|
||||
A **Parser** component defines how various file types should be parsed, including parsing methods for PDFs , fields to parse for Emails, and OCR methods for images.
|
||||
|
||||
|
||||
## Scenario
|
||||
|
||||
A **Parser** component is auto-populated on the ingestion pipeline canvas and required in all ingestion pipeline workflows.
|
||||
@ -87,9 +87,9 @@ RAGFlow employs a combination of weighted keyword similarity and weighted vector
|
||||
|
||||
Defaults to 0.2.
|
||||
|
||||
### Keyword similarity weight
|
||||
### Vector similarity weight
|
||||
|
||||
This parameter sets the weight of keyword similarity in the combined similarity score. The total of the two weights must equal 1.0. Its default value is 0.7, which means the weight of vector similarity in the combined search is 1 - 0.7 = 0.3.
|
||||
This parameter sets the weight of vector similarity in the composite similarity score. The total of the two weights must equal 1.0. Its default value is 0.3, which means the weight of keyword similarity in a combined search is 1 - 0.3 = 0.7.
|
||||
|
||||
### Top N
|
||||
|
||||
|
||||
80
docs/guides/agent/agent_component_reference/transformer.md
Normal file
80
docs/guides/agent/agent_component_reference/transformer.md
Normal file
@ -0,0 +1,80 @@
|
||||
---
|
||||
sidebar_position: 37
|
||||
slug: /transformer_component
|
||||
---
|
||||
|
||||
# Transformer component
|
||||
|
||||
A component that uses an LLM to extract insights from the chunks.
|
||||
|
||||
---
|
||||
|
||||
A **Transformer** component indexes chunks and configures their storage formats in the document engine. It *typically* precedes the **Indexer** in the ingestion pipeline, but you can also chain multiple **Transformer** components in sequence.
|
||||
|
||||
## Scenario
|
||||
|
||||
A **Transformer** component is essential when you need the LLM to extract new information, such as keywords, questions, metadata, and summaries, from the original chunks.
|
||||
|
||||
## Configurations
|
||||
|
||||
### Model
|
||||
|
||||
Click the dropdown menu of **Model** to show the model configuration window.
|
||||
|
||||
- **Model**: The chat model to use.
|
||||
- Ensure you set the chat model correctly on the **Model providers** page.
|
||||
- You can use different models for different components to increase flexibility or improve overall performance.
|
||||
- **Creavity**: A shortcut to **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty** settings, indicating the freedom level of the model. From **Improvise**, **Precise**, to **Balance**, each preset configuration corresponds to a unique combination of **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**.
|
||||
This parameter has three options:
|
||||
- **Improvise**: Produces more creative responses.
|
||||
- **Precise**: (Default) Produces more conservative responses.
|
||||
- **Balance**: A middle ground between **Improvise** and **Precise**.
|
||||
- **Temperature**: The randomness level of the model's output.
|
||||
Defaults to 0.1.
|
||||
- Lower values lead to more deterministic and predictable outputs.
|
||||
- Higher values lead to more creative and varied outputs.
|
||||
- A temperature of zero results in the same output for the same prompt.
|
||||
- **Top P**: Nucleus sampling.
|
||||
- Reduces the likelihood of generating repetitive or unnatural text by setting a threshold *P* and restricting the sampling to tokens with a cumulative probability exceeding *P*.
|
||||
- Defaults to 0.3.
|
||||
- **Presence penalty**: Encourages the model to include a more diverse range of tokens in the response.
|
||||
- A higher **presence penalty** value results in the model being more likely to generate tokens not yet been included in the generated text.
|
||||
- Defaults to 0.4.
|
||||
- **Frequency penalty**: Discourages the model from repeating the same words or phrases too frequently in the generated text.
|
||||
- A higher **frequency penalty** value results in the model being more conservative in its use of repeated tokens.
|
||||
- Defaults to 0.7.
|
||||
- **Max tokens**:
|
||||
This sets the maximum length of the model's output, measured in the number of tokens (words or pieces of words). It is disabled by default, allowing the model to determine the number of tokens in its responses.
|
||||
|
||||
:::tip NOTE
|
||||
- It is not necessary to stick with the same model for all components. If a specific model is not performing well for a particular task, consider using a different one.
|
||||
- If you are uncertain about the mechanism behind **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**, simply choose one of the three options of **Creativity**.
|
||||
:::
|
||||
|
||||
### Result destination
|
||||
|
||||
Select the type of output to be generated by the LLM:
|
||||
|
||||
- Summary
|
||||
- Keywords
|
||||
- Questions
|
||||
- Metadata
|
||||
|
||||
### System prompt
|
||||
|
||||
Typically, you use the system prompt to describe the task for the LLM, specify how it should respond, and outline other miscellaneous requirements. We do not plan to elaborate on this topic, as it can be as extensive as prompt engineering.
|
||||
|
||||
:::tip NOTE
|
||||
The system prompt here automatically updates to match your selected **Result destination**.
|
||||
:::
|
||||
|
||||
### User prompt
|
||||
|
||||
The user-defined prompt. For example, you can type `/` or click **(x)** to insert variables of preceding components in the ingestion pipeline as the LLM's input.
|
||||
|
||||
### Output
|
||||
|
||||
The global variable name for the output of the **Transformer** component, which can be referenced by subsequent **Transformer** components in the ingestion pipeline.
|
||||
|
||||
- Default: `chunks`
|
||||
- Type: `Array<Object>`
|
||||
@ -19,7 +19,7 @@ You start an AI conversation by creating an assistant.
|
||||
|
||||
> RAGFlow offers you the flexibility of choosing a different chat model for each dialogue, while allowing you to set the default models in **System Model Settings**.
|
||||
|
||||
2. Update **Assistant settings**:
|
||||
2. Update Assistant-specific settings:
|
||||
|
||||
- **Assistant name** is the name of your chat assistant. Each assistant corresponds to a dialogue with a unique combination of datasets, prompts, hybrid search configurations, and large model settings.
|
||||
- **Empty response**:
|
||||
@ -28,12 +28,12 @@ You start an AI conversation by creating an assistant.
|
||||
- **Show quote**: This is a key feature of RAGFlow and enabled by default. RAGFlow does not work like a black box. Instead, it clearly shows the sources of information that its responses are based on.
|
||||
- Select the corresponding datasets. You can select one or multiple datasets, but ensure that they use the same embedding model, otherwise an error would occur.
|
||||
|
||||
3. Update **Prompt engine**:
|
||||
3. Update Prompt-specific settings:
|
||||
|
||||
- In **System**, you fill in the prompts for your LLM, you can also leave the default prompt as-is for the beginning.
|
||||
- **Similarity threshold** sets the similarity "bar" for each chunk of text. The default is 0.2. Text chunks with lower similarity scores are filtered out of the final response.
|
||||
- **Keyword similarity weight** is set to 0.7 by default. RAGFlow uses a hybrid score system to evaluate the relevance of different text chunks. This value sets the weight assigned to the keyword similarity component in the hybrid score.
|
||||
- If **Rerank model** is left empty, the hybrid score system uses keyword similarity and vector similarity, and the default weight assigned to the vector similarity component is 1-0.7=0.3.
|
||||
- **Vector similarity weight** is set to 0.3 by default. RAGFlow uses a hybrid score system to evaluate the relevance of different text chunks. This value sets the weight assigned to the vector similarity component in the hybrid score.
|
||||
- If **Rerank model** is left empty, the hybrid score system uses keyword similarity and vector similarity, and the default weight assigned to the keyword similarity component is 1-0.3=0.7.
|
||||
- If **Rerank model** is selected, the hybrid score system uses keyword similarity and reranker score, and the default weight assigned to the reranker score is 1-0.7=0.3.
|
||||
- **Top N** determines the *maximum* number of chunks to feed to the LLM. In other words, even if more chunks are retrieved, only the top N chunks are provided as input.
|
||||
- **Multi-turn optimization** enhances user queries using existing context in a multi-round conversation. It is enabled by default. When enabled, it will consume additional LLM tokens and significantly increase the time to generate answers.
|
||||
@ -52,10 +52,10 @@ You start an AI conversation by creating an assistant.
|
||||
- HTTP method [Converse with chat assistant](../../references/http_api_reference.md#converse-with-chat-assistant), or
|
||||
- Python method [Converse with chat assistant](../../references/python_api_reference.md#converse-with-chat-assistant).
|
||||
|
||||
4. Update **Model Setting**:
|
||||
4. Update Model-specific Settings:
|
||||
|
||||
- In **Model**: you select the chat model. Though you have selected the default chat model in **System Model Settings**, RAGFlow allows you to choose an alternative chat model for your dialogue.
|
||||
- **Freedom**: A shortcut to **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty** settings, indicating the freedom level of the model. From **Improvise**, **Precise**, to **Balance**, each preset configuration corresponds to a unique combination of **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**.
|
||||
- **Creavity**: A shortcut to **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty** settings, indicating the freedom level of the model. From **Improvise**, **Precise**, to **Balance**, each preset configuration corresponds to a unique combination of **Temperature**, **Top P**, **Presence penalty**, and **Frequency penalty**.
|
||||
This parameter has three options:
|
||||
- **Improvise**: Produces more creative responses.
|
||||
- **Precise**: (Default) Produces more conservative responses.
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_position: -1
|
||||
sidebar_position: -10
|
||||
slug: /configure_knowledge_base
|
||||
---
|
||||
|
||||
@ -37,7 +37,7 @@ This section covers the following topics:
|
||||
|
||||
### Select chunking method
|
||||
|
||||
RAGFlow offers multiple chunking template to facilitate chunking files of different layouts and ensure semantic integrity. In **Chunking method**, you can choose the default template that suits the layouts and formats of your files. The following table shows the descriptions and the compatible file formats of each supported chunk template:
|
||||
RAGFlow offers multiple built-in chunking template to facilitate chunking files of different layouts and ensure semantic integrity. From the **Built-in** chunking method dropdown under **Parse type**, you can choose the default template that suits the layouts and formats of your files. The following table shows the descriptions and the compatible file formats of each supported chunk template:
|
||||
|
||||
| **Template** | Description | File format |
|
||||
|--------------|-----------------------------------------------------------------------|-----------------------------------------------------------------------------------------------|
|
||||
@ -54,9 +54,23 @@ RAGFlow offers multiple chunking template to facilitate chunking files of differ
|
||||
| One | Each document is chunked in its entirety (as one). | DOCX, XLSX, XLS (Excel 97-2003), PDF, TXT |
|
||||
| Tag | The dataset functions as a tag set for the others. | XLSX, CSV/TXT |
|
||||
|
||||
You can also change a file's chunking method on the **Datasets** page.
|
||||
You can also change a file's chunking method on the **Files** page.
|
||||
|
||||

|
||||

|
||||
|
||||
<details>
|
||||
<summary>From v0.21.0 onward, RAGFlow supports ingestion pipeline for customized data ingestion and cleansing workflows.</summary>
|
||||
|
||||
To use a customized data pipeline:
|
||||
|
||||
1. On the **Agent** page, click **+ Create agent** > **Create from blank**.
|
||||
2. Select **Ingestion pipeline** and name your data pipeline in the popup, then click **Save** to show the data pipeline canvas.
|
||||
3. After updating your data pipeline, click **Save** on the top right of the canvas.
|
||||
4. Navigate to the **Configuration** page of your dataset, select **Choose pipeline** in **Ingestion pipeline**.
|
||||
|
||||
*Your saved data pipeline will appear in the dropdown menu below.*
|
||||
|
||||
</details>
|
||||
|
||||
### Select embedding model
|
||||
|
||||
|
||||
@ -53,25 +53,31 @@ Whether to enable entity resolution. You can think of this as an entity deduplic
|
||||
- (Default) Disable entity resolution.
|
||||
- Enable entity resolution. This option consumes more tokens.
|
||||
|
||||
### Community report generation
|
||||
### Community reports
|
||||
|
||||
In a knowledge graph, a community is a cluster of entities linked by relationships. You can have the LLM generate an abstract for each community, known as a community report. See [here](https://www.microsoft.com/en-us/research/blog/graphrag-improving-global-search-via-dynamic-community-selection/) for more information. This indicates whether to generate community reports:
|
||||
|
||||
- Generate community reports. This option consumes more tokens.
|
||||
- (Default) Do not generate community reports.
|
||||
|
||||
## Procedure
|
||||
## Quickstart
|
||||
|
||||
1. On the **Configuration** page of your dataset, switch on **Extract knowledge graph** or adjust its settings as needed, and click **Save** to confirm your changes.
|
||||
1. Navigate to the **Configuration** page of your dataset and update:
|
||||
|
||||
- Entity types: *Required* - Specifies the entity types in the knowledge graph to generate. You don't have to stick with the default, but you need to customize them for your documents.
|
||||
- Method: *Optional*
|
||||
- Entity resolution: *Optional*
|
||||
- Community reports: *Optional*
|
||||
*The default knowledge graph configurations for your dataset are now set.*
|
||||
|
||||
- *The default knowledge graph configurations for your dataset are now set and files uploaded from this point onward will automatically use these settings during parsing.*
|
||||
- *Files parsed before this update will retain their original knowledge graph settings.*
|
||||
2. Navigate to the **Files** page of your dataset, click the **Generate** button on the top right corner of the page, then select **Knowledge graph** from the dropdown to initiate the knowledge graph generation process.
|
||||
|
||||
2. The knowledge graph of your dataset does *not* automatically update *until* a newly uploaded file is parsed.
|
||||
*You can click the pause button in the dropdown to halt the build process when necessary.*
|
||||
|
||||
_A **Knowledge graph** entry appears under **Configuration** once a knowledge graph is created._
|
||||
3. Go back to the **Configuration** page:
|
||||
|
||||
*Once a knowledge graph is generated, the **Knowledge graph** field changes from `Not generated` to `Generated at a specific timestamp`. You can delete it by clicking the recycle bin button to the right of the field.*
|
||||
|
||||
3. Click **Knowledge graph** to view the details of the generated graph.
|
||||
4. To use the created knowledge graph, do either of the following:
|
||||
|
||||
- In the **Chat setting** panel of your chat app, switch on the **Use knowledge graph** toggle.
|
||||
@ -79,17 +85,13 @@ In a knowledge graph, a community is a cluster of entities linked by relationshi
|
||||
|
||||
## Frequently asked questions
|
||||
|
||||
### Can I have different knowledge graph settings for different files in my dataset?
|
||||
|
||||
Yes, you can. Just one graph is generated per dataset. The smaller graphs of your files will be *combined* into one big, unified graph at the end of the graph extraction process.
|
||||
|
||||
### Does the knowledge graph automatically update when I remove a related file?
|
||||
|
||||
Nope. The knowledge graph does *not* automatically update *until* a newly uploaded document is parsed.
|
||||
Nope. The knowledge graph does *not* update *until* you regenerate a knowledge graph for your dataset.
|
||||
|
||||
### How to remove a generated knowledge graph?
|
||||
|
||||
To remove the generated knowledge graph, delete all related files in your dataset. Although the **Knowledge graph** entry will still be visible, the graph has actually been deleted.
|
||||
On the **Configuration** page of your dataset, find the **Knoweledge graph** field and click the recycle bin button to the right of the field.
|
||||
|
||||
### Where is the created knowledge graph stored?
|
||||
|
||||
|
||||
@ -72,3 +72,22 @@ The maximum number of clusters to create. Defaults to 64, with a maximum limit o
|
||||
### Random seed
|
||||
|
||||
A random seed. Click **+** to change the seed value.
|
||||
|
||||
## Quickstart
|
||||
|
||||
1. Navigate to the **Configuration** page of your dataset and update:
|
||||
|
||||
- Prompt: *Optional* - We recommend that you keep it as-is until you understand the mechanism behind.
|
||||
- Max token: *Optional*
|
||||
- Threshold: *Optional*
|
||||
- Max cluster: *Optional*
|
||||
|
||||
2. Navigate to the **Files** page of your dataset, click the **Generate** button on the top right corner of the page, then select **RAPTOR** from the dropdown to initiate the RAPTOR build process.
|
||||
|
||||
*You can click the pause button in the dropdown to halt the build process when necessary.*
|
||||
|
||||
3. Go back to the **Configuration** page:
|
||||
|
||||
*The **RAPTOR** field changes from `Not generated` to `Generated at a specific timestamp` when a RAPTOR hierarchical tree structure is generated. You can delete it by clicking the recycle bin button to the right of the field.*
|
||||
|
||||
4. Once a RAPTOR hierarchical tree structure is generated, your chat assistant and **Retrieval** agent component will use it for retrieval as a default.
|
||||
|
||||
39
docs/guides/dataset/extract_table_of_contents.md
Normal file
39
docs/guides/dataset/extract_table_of_contents.md
Normal file
@ -0,0 +1,39 @@
|
||||
---
|
||||
sidebar_position: 4
|
||||
slug: /enable_table_of_contents
|
||||
---
|
||||
|
||||
# Extract table of contents
|
||||
|
||||
Extract table of contents (TOC) from documents to provide long context RAG and improve retrieval.
|
||||
|
||||
---
|
||||
|
||||
During indexing, this technique uses LLM to extract and generate chapter information, which is added to each chunk to provide sufficient global context. At the retrieval stage, it first uses the chunks matched by search, then supplements missing chunks based on the table of contents structure. This addresses issues caused by chunk fragmentation and insufficient context, improving answer quality.
|
||||
|
||||
:::danger WARNING
|
||||
Enabling TOC extraction requires significant memory, computational resources, and tokens.
|
||||
:::
|
||||
|
||||
## Prerequisites
|
||||
|
||||
The system's default chat model is used to summarize clustered content. Before proceeding, ensure that you have a chat model properly configured:
|
||||
|
||||

|
||||
|
||||
## Quickstart
|
||||
|
||||
1. Navigate to the **Configuration** page.
|
||||
|
||||
2. Enable **TOC Enhance**.
|
||||
|
||||
3. To use this technique during retrieval, do either of the following:
|
||||
|
||||
- In the **Chat setting** panel of your chat app, switch on the **TOC Enhance** toggle.
|
||||
- If you are using an agent, click the **Retrieval** agent component to specify the dataset(s) and switch on the **TOC Enhance** toggle.
|
||||
|
||||
## Frequently asked questions
|
||||
|
||||
### Will previously parsed files be searched using the TOC enhancement feature once I enable `TOC Enhance`?
|
||||
|
||||
No. Only files parsed after you enable **TOC Enhance** will be searched using the TOC enhancement feature. To apply this feature to files parsed before enabling **TOC Enhance**, you must reparse them.
|
||||
@ -29,9 +29,9 @@ In contrast, chunks created from [knowledge graph construction](./construct_know
|
||||
|
||||
This sets the bar for retrieving chunks: chunks with similarities below the threshold will be filtered out. By default, the threshold is set to 0.2. This means that only chunks with hybrid similarity score of 20 or higher will be retrieved.
|
||||
|
||||
### Keyword similarity weight
|
||||
### Vector similarity weight
|
||||
|
||||
This sets the weight of keyword similarity in the combined similarity score, whether used with vector cosine similarity or a reranking score. By default, it is set to 0.7, making the weight of the other component 0.3 (1 - 0.7).
|
||||
This sets the weight of vector similarity in the composite similarity score, whether used with vector cosine similarity or a reranking score. By default, it is set to 0.3, making the weight of the other component 0.7 (1 - 0.3).
|
||||
|
||||
### Rerank model
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_position: -4
|
||||
slug: /select_pdf_parser
|
||||
---
|
||||
|
||||
@ -25,7 +25,7 @@ RAGFlow isn't one-size-fits-all. It is built for flexibility and supports deeper
|
||||
- **One**
|
||||
- To use a third-party visual model for parsing PDFs, ensure you have set a default img2txt model under **Set default models** on the **Model providers** page.
|
||||
|
||||
## Procedure
|
||||
## Quickstart
|
||||
|
||||
1. On your dataset's **Configuration** page, select a chunking method, say **General**.
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_position: -7
|
||||
slug: /set_metada
|
||||
---
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
---
|
||||
sidebar_position: 2
|
||||
sidebar_position: -2
|
||||
slug: /set_page_rank
|
||||
---
|
||||
|
||||
|
||||
@ -42,8 +42,8 @@ A tag set is *not* involved in document indexing or retrieval. Do not specify a
|
||||
:::
|
||||
|
||||
1. Click **+ Create dataset** to create a dataset.
|
||||
2. Navigate to the **Configuration** page of the created dataset and choose **Tag** as the default chunking method.
|
||||
3. Navigate to the **Dataset** page and upload and parse your table file in XLSX, CSV, or TXT formats.
|
||||
2. Navigate to the **Configuration** page of the created dataset, select **Built-in** in **Ingestion pipeline**, then choose **Tag** as the default chunking method from the **Built-in** drop-down menu.
|
||||
3. Go back to the **Files** page and upload and parse your table file in XLSX, CSV, or TXT formats.
|
||||
_A tag cloud appears under the **Tag view** section, indicating the tag set is created:_
|
||||

|
||||
4. Click the **Table** tab to view the tag frequency table:
|
||||
|
||||
@ -46,16 +46,23 @@ The Admin CLI and Admin Service form a client-server architectural suite for RAG
|
||||
2. Install ragflow-cli.
|
||||
|
||||
```bash
|
||||
pip install ragflow-cli
|
||||
pip install ragflow-cli==0.21.0
|
||||
```
|
||||
|
||||
3. Launch the CLI client:
|
||||
|
||||
```bash
|
||||
ragflow-cli -h 0.0.0.0 -p 9381
|
||||
ragflow-cli -h 127.0.0.1 -p 9381
|
||||
```
|
||||
|
||||
Enter superuser's password to login. Default password is `admin`.
|
||||
You will be prompted to enter the superuser's password to log in.
|
||||
The default password is admin.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- -h: RAGFlow admin server host address
|
||||
|
||||
- -p: RAGFlow admin server port
|
||||
|
||||
|
||||
|
||||
|
||||
@ -343,19 +343,20 @@ You can add keywords or questions to a file chunk to improve its ranking for que
|
||||
|
||||
Conversations in RAGFlow are based on a particular dataset or multiple datasets. Once you have created your dataset and finished file parsing, you can go ahead and start an AI conversation.
|
||||
|
||||
1. Click the **Chat** tab in the middle top of the mage **>** **Create an assistant** to show the **Chat Configuration** dialogue *of your next dialogue*.
|
||||
1. Click the **Chat** tab in the middle top of the page **>** **Create chat** to create a chat assistant.
|
||||
2. Click the created chat app to enter its configuration page.
|
||||
> RAGFlow offer the flexibility of choosing a different chat model for each dialogue, while allowing you to set the default models in **System Model Settings**.
|
||||
|
||||
2. Update **Assistant settings**:
|
||||
2. Update **Chat setting** on the right of the configuration page:
|
||||
|
||||
- Name your assistant and specify your datasets.
|
||||
- **Empty response**:
|
||||
- If you wish to *confine* RAGFlow's answers to your datasets, leave a response here. Then when it doesn't retrieve an answer, it *uniformly* responds with what you set here.
|
||||
- If you wish RAGFlow to *improvise* when it doesn't retrieve an answer from your datasets, leave it blank, which may give rise to hallucinations.
|
||||
|
||||
3. Update **Prompt engine** or leave it as is for the beginning.
|
||||
3. Update **System prompt** or leave it as is for the beginning.
|
||||
|
||||
4. Update **Model settings**.
|
||||
4. Select a chat model in the **Model** dropdown list.
|
||||
|
||||
5. Now, let's start the show:
|
||||
|
||||
|
||||
@ -1198,23 +1198,24 @@ Failure:
|
||||
|
||||
### List documents
|
||||
|
||||
**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}`
|
||||
**GET** `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}`
|
||||
|
||||
Lists documents in a specified dataset.
|
||||
|
||||
#### Request
|
||||
|
||||
- Method: GET
|
||||
- URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}`
|
||||
- URL: `/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp}&suffix={file_suffix}&run={run_status}`
|
||||
- Headers:
|
||||
- `'content-Type: application/json'`
|
||||
- `'Authorization: Bearer <YOUR_API_KEY>'`
|
||||
|
||||
##### Request example
|
||||
##### Request examples
|
||||
|
||||
**A basic request with pagination:**
|
||||
```bash
|
||||
curl --request GET \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/documents?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&keywords={keywords}&id={document_id}&name={document_name}&create_time_from={timestamp}&create_time_to={timestamp} \
|
||||
--url http://{address}/api/v1/datasets/{dataset_id}/documents?page=1&page_size=10 \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||
```
|
||||
|
||||
@ -1236,10 +1237,34 @@ curl --request GET \
|
||||
Indicates whether the retrieved documents should be sorted in descending order. Defaults to `true`.
|
||||
- `id`: (*Filter parameter*), `string`
|
||||
The ID of the document to retrieve.
|
||||
- `create_time_from`: (*Filter parameter*), `integer`
|
||||
- `create_time_from`: (*Filter parameter*), `integer`
|
||||
Unix timestamp for filtering documents created after this time. 0 means no filter. Defaults to `0`.
|
||||
- `create_time_to`: (*Filter parameter*), `integer`
|
||||
- `create_time_to`: (*Filter parameter*), `integer`
|
||||
Unix timestamp for filtering documents created before this time. 0 means no filter. Defaults to `0`.
|
||||
- `suffix`: (*Filter parameter*), `array[string]`
|
||||
Filter by file suffix. Supports multiple values, e.g., `pdf`, `txt`, and `docx`. Defaults to all suffixes.
|
||||
- `run`: (*Filter parameter*), `array[string]`
|
||||
Filter by document processing status. Supports numeric, text, and mixed formats:
|
||||
- Numeric format: `["0", "1", "2", "3", "4"]`
|
||||
- Text format: `[UNSTART, RUNNING, CANCEL, DONE, FAIL]`
|
||||
- Mixed format: `[UNSTART, 1, DONE]` (mixing numeric and text formats)
|
||||
- Status mapping:
|
||||
- `0` / `UNSTART`: Document not yet processed
|
||||
- `1` / `RUNNING`: Document is currently being processed
|
||||
- `2` / `CANCEL`: Document processing was cancelled
|
||||
- `3` / `DONE`: Document processing completed successfully
|
||||
- `4` / `FAIL`: Document processing failed
|
||||
Defaults to all statuses.
|
||||
|
||||
##### Usage examples
|
||||
|
||||
**A request with multiple filtering parameters**
|
||||
|
||||
```bash
|
||||
curl --request GET \
|
||||
--url 'http://{address}/api/v1/datasets/{dataset_id}/documents?suffix=pdf&run=DONE&page=1&page_size=10' \
|
||||
--header 'Authorization: Bearer <YOUR_API_KEY>'
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
@ -1270,7 +1295,7 @@ Success:
|
||||
"process_duration": 0.0,
|
||||
"progress": 0.0,
|
||||
"progress_msg": "",
|
||||
"run": "0",
|
||||
"run": "UNSTART",
|
||||
"size": 7,
|
||||
"source_type": "local",
|
||||
"status": "1",
|
||||
|
||||
@ -30,7 +30,7 @@ Released on October 15, 2025.
|
||||
|
||||
- Orchestratable ingestion pipeline: Supports customized data ingestion and cleansing workflows, enabling users to flexibly design their data flows or directly apply the official data flow templates on the canvas.
|
||||
- GraphRAG & RAPTOR write process optimized: Replaces the automatic incremental build process with manual batch building, significantly reducing construction overhead.
|
||||
- Long-context RAG: Automatically generates document-level table of contents (TOC) structures to mitigate context loss caused by inaccurate or excessive chunking, substantially improving retrieval quality. This feature is now available via a TOC extraction template.
|
||||
- Long-context RAG: Automatically generates document-level table of contents (TOC) structures to mitigate context loss caused by inaccurate or excessive chunking, substantially improving retrieval quality. This feature is now available via a TOC extraction template. See [here](./guides/dataset/extract_table_of_contents.md).
|
||||
- Video file parsing: Expands the system's multimodal data processing capabilities by supporting video file parsing.
|
||||
- Admin CLI: Introduces a new command-line tool for system administration, allowing users to manage and monitor RAGFlow's service status via command line.
|
||||
|
||||
@ -45,7 +45,7 @@ Released on October 15, 2025.
|
||||
- Claude Sonnet 4.5
|
||||
- Meituan LongCat-Flash-Thinking
|
||||
|
||||
## New agent templates
|
||||
### New agent templates
|
||||
|
||||
- Company Research Report Deep Dive Agent: Designed for financial institutions to help analysts quickly organize information, generate research reports, and make investment decisions.
|
||||
- Orchestratable Ingestion Pipeline Template: Allows users to apply this template on the canvas to rapidly establish standardized data ingestion and cleansing processes.
|
||||
|
||||
@ -227,7 +227,7 @@ class Extractor:
|
||||
async def _handle_entity_relation_summary(self, entity_or_relation_name: str, description: str) -> str:
|
||||
summary_max_tokens = 512
|
||||
use_description = truncate(description, summary_max_tokens)
|
||||
description_list = (use_description.split(GRAPH_FIELD_SEP),)
|
||||
description_list = use_description.split(GRAPH_FIELD_SEP)
|
||||
if len(description_list) <= 12:
|
||||
return use_description
|
||||
prompt_template = SUMMARIZE_DESCRIPTIONS_PROMPT
|
||||
|
||||
@ -44,7 +44,7 @@ dependencies = [
|
||||
"groq==0.9.0",
|
||||
"hanziconv==0.3.2",
|
||||
"html-text==0.6.2",
|
||||
"httpx[socks]==0.27.2",
|
||||
"httpx[socks]>=0.28.1,<0.29.0",
|
||||
"huggingface-hub>=0.25.0,<0.26.0",
|
||||
"infinity-sdk==0.6.0",
|
||||
"infinity-emb>=0.0.66,<0.0.67",
|
||||
@ -56,7 +56,7 @@ dependencies = [
|
||||
"mistralai==0.4.2",
|
||||
"nltk==3.9.1",
|
||||
"numpy>=1.26.0,<2.0.0",
|
||||
"ollama==0.2.1",
|
||||
"ollama>=0.5.0",
|
||||
"onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
|
||||
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
|
||||
"openai>=1.45.0",
|
||||
@ -102,7 +102,8 @@ dependencies = [
|
||||
"tika==2.6.0",
|
||||
"tiktoken==0.7.0",
|
||||
"umap_learn==0.5.6",
|
||||
"vertexai==1.64.0",
|
||||
"vertexai==1.70.0",
|
||||
"google-genai>=1.41.0,<2.0.0",
|
||||
"volcengine==1.0.194",
|
||||
"voyageai==0.2.3",
|
||||
"webdriver-manager==4.0.1",
|
||||
@ -113,7 +114,7 @@ dependencies = [
|
||||
"xpinyin==0.7.6",
|
||||
"yfinance==0.2.65",
|
||||
"zhipuai==2.0.1",
|
||||
"google-generativeai>=0.8.1,<0.9.0",
|
||||
"google-generativeai>=0.8.1,<0.9.0", # Needed for cv_model and embedding_model
|
||||
"python-docx>=1.1.2,<2.0.0",
|
||||
"pypdf2>=3.0.1,<4.0.0",
|
||||
"graspologic>=3.4.1,<4.0.0",
|
||||
@ -135,6 +136,7 @@ dependencies = [
|
||||
"lark>=1.2.2",
|
||||
"mammoth>=1.11.0",
|
||||
"markdownify>=1.2.0",
|
||||
"captcha>=0.7.1",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
@ -20,11 +20,14 @@ import re
|
||||
from io import BytesIO
|
||||
|
||||
from deepdoc.parser.utils import get_text
|
||||
from rag.app import naive
|
||||
from rag.nlp import bullets_category, is_english,remove_contents_table, \
|
||||
hierarchical_merge, make_colon_as_title, naive_merge, random_choices, tokenize_table, \
|
||||
tokenize_chunks
|
||||
from rag.nlp import rag_tokenizer
|
||||
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
||||
from deepdoc.parser import PdfParser, PlainParser, HtmlParser
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
@ -81,13 +84,15 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
sections, tbls = [], []
|
||||
if re.search(r"\.docx$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
doc_parser = DocxParser()
|
||||
doc_parser = naive.Docx()
|
||||
# TODO: table of contents need to be removed
|
||||
sections, tbls = doc_parser(
|
||||
binary if binary else filename, from_page=from_page, to_page=to_page)
|
||||
filename, binary=binary, from_page=from_page, to_page=to_page)
|
||||
remove_contents_table(sections, eng=is_english(
|
||||
random_choices([t for t, _ in sections], k=200)))
|
||||
tbls = [((None, lns), None) for lns in tbls]
|
||||
tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
|
||||
# tbls = [((None, lns), None) for lns in tbls]
|
||||
sections=[(item[0],item[1] if item[1] is not None else "") for item in sections if not isinstance(item[1], Image.Image)]
|
||||
callback(0.8, "Finish parsing.")
|
||||
|
||||
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
||||
@ -96,6 +101,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
pdf_parser = PlainParser()
|
||||
sections, tbls = pdf_parser(filename if not binary else binary,
|
||||
from_page=from_page, to_page=to_page, callback=callback)
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
|
||||
elif re.search(r"\.txt$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
|
||||
@ -23,6 +23,7 @@ from io import BytesIO
|
||||
from rag.nlp import rag_tokenizer, tokenize, tokenize_table, bullets_category, title_frequency, tokenize_chunks, docx_question_level
|
||||
from rag.utils import num_tokens_from_string
|
||||
from deepdoc.parser import PdfParser, PlainParser, DocxParser
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
|
||||
from docx import Document
|
||||
from PIL import Image
|
||||
|
||||
@ -252,7 +253,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
tk_cnt = num_tokens_from_string(txt)
|
||||
if sec_id > -1:
|
||||
last_sid = sec_id
|
||||
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
res = tokenize_table(tbls, doc, eng)
|
||||
res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
|
||||
return res
|
||||
@ -261,6 +262,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
docx_parser = Docx()
|
||||
ti_list, tbls = docx_parser(filename, binary,
|
||||
from_page=0, to_page=10000, callback=callback)
|
||||
tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
|
||||
res = tokenize_table(tbls, doc, eng)
|
||||
for text, image in ti_list:
|
||||
d = copy.deepcopy(doc)
|
||||
|
||||
@ -16,10 +16,10 @@
|
||||
|
||||
import logging
|
||||
import re
|
||||
import os
|
||||
from functools import reduce
|
||||
from io import BytesIO
|
||||
from timeit import default_timer as timer
|
||||
|
||||
from docx import Document
|
||||
from docx.image.exceptions import InvalidImageStreamError, UnexpectedEndOfFileError, UnrecognizedImageError
|
||||
from docx.opc.pkgreader import _SerializedRelationships, _SerializedRelationship
|
||||
@ -30,9 +30,11 @@ from tika import parser
|
||||
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from api.utils.file_utils import extract_embed_file
|
||||
from deepdoc.parser import DocxParser, ExcelParser, HtmlParser, JsonParser, MarkdownElementExtractor, MarkdownParser, PdfParser, TxtParser
|
||||
from deepdoc.parser.figure_parser import VisionFigureParser, vision_figure_parser_figure_data_wrapper
|
||||
from deepdoc.parser.figure_parser import VisionFigureParser,vision_figure_parser_docx_wrapper,vision_figure_parser_pdf_wrapper
|
||||
from deepdoc.parser.pdf_parser import PlainParser, VisionParser
|
||||
from deepdoc.parser.mineru_parser import MinerUParser
|
||||
from rag.nlp import concat_img, find_codec, naive_merge, naive_merge_with_images, naive_merge_docx, rag_tokenizer, tokenize_chunks, tokenize_chunks_with_images, tokenize_table
|
||||
|
||||
|
||||
@ -435,6 +437,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
Successive text will be sliced into pieces using 'delimiter'.
|
||||
Next, these successive pieces are merge into chunks whose token number is no more than 'Max token number'.
|
||||
"""
|
||||
|
||||
|
||||
is_english = lang.lower() == "english" # is_english(cks)
|
||||
parser_config = kwargs.get(
|
||||
@ -448,27 +451,37 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
res = []
|
||||
pdf_parser = None
|
||||
section_images = None
|
||||
|
||||
is_root = kwargs.get("is_root", True)
|
||||
embed_res = []
|
||||
if is_root:
|
||||
# Only extract embedded files at the root call
|
||||
embeds = []
|
||||
if binary is not None:
|
||||
embeds = extract_embed_file(binary)
|
||||
else:
|
||||
raise Exception("Embedding extraction from file path is not supported.")
|
||||
|
||||
# Recursively chunk each embedded file and collect results
|
||||
for embed_filename, embed_bytes in embeds:
|
||||
try:
|
||||
sub_res = chunk(embed_filename, binary=embed_bytes, lang=lang, callback=callback, is_root=False, **kwargs) or []
|
||||
embed_res.extend(sub_res)
|
||||
except Exception as e:
|
||||
if callback:
|
||||
callback(0.05, f"Failed to chunk embed {embed_filename}: {e}")
|
||||
continue
|
||||
|
||||
if re.search(r"\.docx$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.15, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
except Exception:
|
||||
vision_model = None
|
||||
|
||||
|
||||
# fix "There is no item named 'word/NULL' in the archive", referring to https://github.com/python-openxml/python-docx/issues/1105#issuecomment-1298075246
|
||||
_SerializedRelationships.load_from_xml = load_from_xml_v2
|
||||
sections, tables = Docx()(filename, binary)
|
||||
|
||||
if vision_model:
|
||||
figures_data = vision_figure_parser_figure_data_wrapper(sections)
|
||||
try:
|
||||
docx_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures_data, **kwargs)
|
||||
boosted_figures = docx_vision_parser(callback=callback)
|
||||
tables.extend(boosted_figures)
|
||||
except Exception as e:
|
||||
callback(0.6, f"Visual model error: {e}. Skipping figure parsing enhancement.")
|
||||
tables=vision_figure_parser_docx_wrapper(sections=sections,tbls=tables,callback=callback,**kwargs)
|
||||
|
||||
res = tokenize_table(tables, doc, is_english)
|
||||
callback(0.8, "Finish parsing.")
|
||||
@ -481,10 +494,12 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
"delimiter", "\n!?。;!?"))
|
||||
|
||||
if kwargs.get("section_only", False):
|
||||
chunks.extend(embed_res)
|
||||
return chunks
|
||||
|
||||
res.extend(tokenize_chunks_with_images(chunks, doc, is_english, images))
|
||||
logging.info("naive_merge({}): {}".format(filename, timer() - st))
|
||||
res.extend(embed_res)
|
||||
return res
|
||||
|
||||
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
||||
@ -495,29 +510,28 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
|
||||
if layout_recognizer == "DeepDOC":
|
||||
pdf_parser = Pdf()
|
||||
|
||||
try:
|
||||
vision_model = LLMBundle(kwargs["tenant_id"], LLMType.IMAGE2TEXT)
|
||||
callback(0.15, "Visual model detected. Attempting to enhance figure extraction...")
|
||||
except Exception:
|
||||
vision_model = None
|
||||
|
||||
if vision_model:
|
||||
sections, tables, figures = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback, separate_tables_figures=True)
|
||||
callback(0.5, "Basic parsing complete. Proceeding with figure enhancement...")
|
||||
try:
|
||||
pdf_vision_parser = VisionFigureParser(vision_model=vision_model, figures_data=figures, **kwargs)
|
||||
boosted_figures = pdf_vision_parser(callback=callback)
|
||||
tables.extend(boosted_figures)
|
||||
except Exception as e:
|
||||
callback(0.6, f"Visual model error: {e}. Skipping figure parsing enhancement.")
|
||||
tables.extend(figures)
|
||||
else:
|
||||
sections, tables = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback)
|
||||
sections, tables = pdf_parser(filename if not binary else binary, from_page=from_page, to_page=to_page, callback=callback)
|
||||
tables=vision_figure_parser_pdf_wrapper(tbls=tables,callback=callback,**kwargs)
|
||||
|
||||
res = tokenize_table(tables, doc, is_english)
|
||||
callback(0.8, "Finish parsing.")
|
||||
|
||||
elif layout_recognizer == "MinerU":
|
||||
mineru_executable = os.environ.get("MINERU_EXECUTABLE", "mineru")
|
||||
pdf_parser = MinerUParser(mineru_path=mineru_executable)
|
||||
if not pdf_parser.check_installation():
|
||||
callback(-1, "MinerU not found.")
|
||||
return res
|
||||
|
||||
sections, tables = pdf_parser.parse_pdf(
|
||||
filepath=filename,
|
||||
binary=binary,
|
||||
callback=callback,
|
||||
output_dir=os.environ.get("MINERU_OUTPUT_DIR", ""),
|
||||
delete_output=bool(int(os.environ.get("MINERU_DELETE_OUTPUT", 1))),
|
||||
)
|
||||
parser_config["chunk_token_num"] = 0
|
||||
callback(0.8, "Finish parsing.")
|
||||
else:
|
||||
if layout_recognizer == "Plain Text":
|
||||
pdf_parser = PlainParser()
|
||||
@ -604,7 +618,6 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
callback(0.8, f"tika.parser got empty content from {filename}.")
|
||||
logging.warning(f"tika.parser got empty content from {filename}.")
|
||||
return []
|
||||
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"file type not supported yet(pdf, xlsx, doc, docx, txt supported)")
|
||||
@ -621,6 +634,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
"chunk_token_num", 128)), parser_config.get(
|
||||
"delimiter", "\n!?。;!?"))
|
||||
if kwargs.get("section_only", False):
|
||||
chunks.extend(embed_res)
|
||||
return chunks
|
||||
|
||||
res.extend(tokenize_chunks_with_images(chunks, doc, is_english, images))
|
||||
@ -630,11 +644,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
"chunk_token_num", 128)), parser_config.get(
|
||||
"delimiter", "\n!?。;!?"))
|
||||
if kwargs.get("section_only", False):
|
||||
chunks.extend(embed_res)
|
||||
return chunks
|
||||
|
||||
res.extend(tokenize_chunks(chunks, doc, is_english, pdf_parser))
|
||||
|
||||
logging.info("naive_merge({}): {}".format(filename, timer() - st))
|
||||
if embed_res:
|
||||
res.extend(embed_res)
|
||||
return res
|
||||
|
||||
|
||||
|
||||
@ -23,6 +23,7 @@ from deepdoc.parser.utils import get_text
|
||||
from rag.app import naive
|
||||
from rag.nlp import rag_tokenizer, tokenize
|
||||
from deepdoc.parser import PdfParser, ExcelParser, PlainParser, HtmlParser
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper,vision_figure_parser_docx_wrapper
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
@ -57,13 +58,8 @@ class Pdf(PdfParser):
|
||||
|
||||
sections = [(b["text"], self.get_position(b, zoomin))
|
||||
for i, b in enumerate(self.boxes)]
|
||||
for (img, rows), poss in tbls:
|
||||
if not rows:
|
||||
continue
|
||||
sections.append((rows if isinstance(rows, str) else rows[0],
|
||||
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
|
||||
return [(txt, "") for txt, _ in sorted(sections, key=lambda x: (
|
||||
x[-1][0][0], x[-1][0][3], x[-1][0][1]))], None
|
||||
x[-1][0][0], x[-1][0][3], x[-1][0][1]))], tbls
|
||||
|
||||
|
||||
def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
@ -80,6 +76,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
if re.search(r"\.docx$", filename, re.IGNORECASE):
|
||||
callback(0.1, "Start to parse.")
|
||||
sections, tbls = naive.Docx()(filename, binary)
|
||||
tbls=vision_figure_parser_docx_wrapper(sections=sections,tbls=tbls,callback=callback,**kwargs)
|
||||
sections = [s for s, _ in sections if s]
|
||||
for (_, html), _ in tbls:
|
||||
sections.append(html)
|
||||
@ -89,8 +86,14 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
pdf_parser = Pdf()
|
||||
if parser_config.get("layout_recognize", "DeepDOC") == "Plain Text":
|
||||
pdf_parser = PlainParser()
|
||||
sections, _ = pdf_parser(
|
||||
sections, tbls = pdf_parser(
|
||||
filename if not binary else binary, to_page=to_page, callback=callback)
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
for (img, rows), poss in tbls:
|
||||
if not rows:
|
||||
continue
|
||||
sections.append((rows if isinstance(rows, str) else rows[0],
|
||||
[(p[0] + 1 - from_page, p[1], p[2], p[3], p[4]) for p in poss]))
|
||||
sections = [s for s, _ in sections if s]
|
||||
|
||||
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
|
||||
|
||||
@ -18,12 +18,12 @@ import logging
|
||||
import copy
|
||||
import re
|
||||
|
||||
from deepdoc.parser.figure_parser import vision_figure_parser_pdf_wrapper
|
||||
from api.db import ParserType
|
||||
from rag.nlp import rag_tokenizer, tokenize, tokenize_table, add_positions, bullets_category, title_frequency, tokenize_chunks
|
||||
from deepdoc.parser import PdfParser, PlainParser
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Pdf(PdfParser):
|
||||
def __init__(self):
|
||||
self.model_speciess = ParserType.PAPER.value
|
||||
@ -160,6 +160,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
pdf_parser = Pdf()
|
||||
paper = pdf_parser(filename if not binary else binary,
|
||||
from_page=from_page, to_page=to_page, callback=callback)
|
||||
tbls=paper["tables"]
|
||||
tbls=vision_figure_parser_pdf_wrapper(tbls=tbls,callback=callback,**kwargs)
|
||||
paper["tables"] = tbls
|
||||
else:
|
||||
raise NotImplementedError("file type not supported yet(pdf supported)")
|
||||
|
||||
|
||||
@ -23,44 +23,62 @@ from PIL import Image
|
||||
from api.db import LLMType
|
||||
from api.db.services.llm_service import LLMBundle
|
||||
from deepdoc.vision import OCR
|
||||
from rag.nlp import tokenize
|
||||
from rag.nlp import rag_tokenizer, tokenize
|
||||
from rag.utils import clean_markdown_block
|
||||
from rag.nlp import rag_tokenizer
|
||||
|
||||
|
||||
ocr = OCR()
|
||||
|
||||
# Gemini supported MIME types
|
||||
VIDEO_EXTS = [".mp4", ".mov", ".avi", ".flv", ".mpeg", ".mpg", ".webm", ".wmv", ".3gp", ".3gpp"]
|
||||
|
||||
|
||||
def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):
|
||||
img = Image.open(io.BytesIO(binary)).convert('RGB')
|
||||
doc = {
|
||||
"docnm_kwd": filename,
|
||||
"title_tks": rag_tokenizer.tokenize(re.sub(r"\.[a-zA-Z]+$", "", filename)),
|
||||
"image": img,
|
||||
"doc_type_kwd": "image"
|
||||
}
|
||||
bxs = ocr(np.array(img))
|
||||
txt = "\n".join([t[0] for _, t in bxs if t[0]])
|
||||
eng = lang.lower() == "english"
|
||||
callback(0.4, "Finish OCR: (%s ...)" % txt[:12])
|
||||
if (eng and len(txt.split()) > 32) or len(txt) > 32:
|
||||
tokenize(doc, txt, eng)
|
||||
callback(0.8, "OCR results is too long to use CV LLM.")
|
||||
return [doc]
|
||||
|
||||
try:
|
||||
callback(0.4, "Use CV LLM to describe the picture.")
|
||||
cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang)
|
||||
img_binary = io.BytesIO()
|
||||
img.save(img_binary, format='JPEG')
|
||||
img_binary.seek(0)
|
||||
ans = cv_mdl.describe(img_binary.read())
|
||||
callback(0.8, "CV LLM respond: %s ..." % ans[:32])
|
||||
txt += "\n" + ans
|
||||
tokenize(doc, txt, eng)
|
||||
return [doc]
|
||||
except Exception as e:
|
||||
callback(prog=-1, msg=str(e))
|
||||
if any(filename.lower().endswith(ext) for ext in VIDEO_EXTS):
|
||||
try:
|
||||
doc.update({"doc_type_kwd": "video"})
|
||||
cv_mdl = LLMBundle(tenant_id, llm_type=LLMType.IMAGE2TEXT, lang=lang)
|
||||
ans = cv_mdl.chat(system="", history=[], gen_conf={}, video_bytes=binary, filename=filename)
|
||||
callback(0.8, "CV LLM respond: %s ..." % ans[:32])
|
||||
ans += "\n" + ans
|
||||
tokenize(doc, ans, eng)
|
||||
return [doc]
|
||||
except Exception as e:
|
||||
callback(prog=-1, msg=str(e))
|
||||
else:
|
||||
img = Image.open(io.BytesIO(binary)).convert("RGB")
|
||||
doc.update(
|
||||
{
|
||||
"image": img,
|
||||
"doc_type_kwd": "image",
|
||||
}
|
||||
)
|
||||
bxs = ocr(np.array(img))
|
||||
txt = "\n".join([t[0] for _, t in bxs if t[0]])
|
||||
callback(0.4, "Finish OCR: (%s ...)" % txt[:12])
|
||||
if (eng and len(txt.split()) > 32) or len(txt) > 32:
|
||||
tokenize(doc, txt, eng)
|
||||
callback(0.8, "OCR results is too long to use CV LLM.")
|
||||
return [doc]
|
||||
|
||||
try:
|
||||
callback(0.4, "Use CV LLM to describe the picture.")
|
||||
cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang)
|
||||
img_binary = io.BytesIO()
|
||||
img.save(img_binary, format="JPEG")
|
||||
img_binary.seek(0)
|
||||
ans = cv_mdl.describe(img_binary.read())
|
||||
callback(0.8, "CV LLM respond: %s ..." % ans[:32])
|
||||
txt += "\n" + ans
|
||||
tokenize(doc, txt, eng)
|
||||
return [doc]
|
||||
except Exception as e:
|
||||
callback(prog=-1, msg=str(e))
|
||||
|
||||
return []
|
||||
|
||||
@ -79,7 +97,7 @@ def vision_llm_chunk(binary, vision_model, prompt=None, callback=None):
|
||||
|
||||
try:
|
||||
with io.BytesIO() as img_binary:
|
||||
img.save(img_binary, format='JPEG')
|
||||
img.save(img_binary, format="JPEG")
|
||||
img_binary.seek(0)
|
||||
ans = clean_markdown_block(vision_model.describe_with_prompt(img_binary.read(), prompt))
|
||||
txt += "\n" + ans
|
||||
|
||||
@ -1165,15 +1165,13 @@ class GoogleChat(Base):
|
||||
else:
|
||||
self.client = AnthropicVertex(region=region, project_id=project_id)
|
||||
else:
|
||||
import vertexai.generative_models as glm
|
||||
from google.cloud import aiplatform
|
||||
from google import genai
|
||||
|
||||
if access_token:
|
||||
credits = service_account.Credentials.from_service_account_info(access_token)
|
||||
aiplatform.init(credentials=credits, project=project_id, location=region)
|
||||
credits = service_account.Credentials.from_service_account_info(access_token, scopes=scopes)
|
||||
self.client = genai.Client(vertexai=True, project=project_id, location=region, credentials=credits)
|
||||
else:
|
||||
aiplatform.init(project=project_id, location=region)
|
||||
self.client = glm.GenerativeModel(model_name=self.model_name)
|
||||
self.client = genai.Client(vertexai=True, project=project_id, location=region)
|
||||
|
||||
def _clean_conf(self, gen_conf):
|
||||
if "claude" in self.model_name:
|
||||
@ -1188,38 +1186,11 @@ class GoogleChat(Base):
|
||||
del gen_conf[k]
|
||||
return gen_conf
|
||||
|
||||
def _get_thinking_config(self, gen_conf):
|
||||
"""Extract and create ThinkingConfig from gen_conf.
|
||||
|
||||
Default behavior for Vertex AI Generative Models: thinking_budget=0 (disabled)
|
||||
unless explicitly specified by the user. This does not apply to Claude models.
|
||||
|
||||
Users can override by setting thinking_budget in gen_conf/llm_setting:
|
||||
- 0: Disabled (default)
|
||||
- 1-24576: Manual budget
|
||||
- -1: Auto (model decides)
|
||||
"""
|
||||
# Claude models don't support ThinkingConfig
|
||||
if "claude" in self.model_name:
|
||||
gen_conf.pop("thinking_budget", None)
|
||||
return None
|
||||
|
||||
# For Vertex AI Generative Models, default to thinking disabled
|
||||
thinking_budget = gen_conf.pop("thinking_budget", 0)
|
||||
|
||||
if thinking_budget is not None:
|
||||
try:
|
||||
import vertexai.generative_models as glm # type: ignore
|
||||
return glm.ThinkingConfig(thinking_budget=thinking_budget)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def _chat(self, history, gen_conf={}, **kwargs):
|
||||
system = history[0]["content"] if history and history[0]["role"] == "system" else ""
|
||||
thinking_config = self._get_thinking_config(gen_conf)
|
||||
gen_conf = self._clean_conf(gen_conf)
|
||||
|
||||
if "claude" in self.model_name:
|
||||
gen_conf = self._clean_conf(gen_conf)
|
||||
response = self.client.messages.create(
|
||||
model=self.model_name,
|
||||
messages=[h for h in history if h["role"] != "system"],
|
||||
@ -1235,28 +1206,63 @@ class GoogleChat(Base):
|
||||
response["usage"]["input_tokens"] + response["usage"]["output_tokens"],
|
||||
)
|
||||
|
||||
self.client._system_instruction = system
|
||||
hist = []
|
||||
# Gemini models with google-genai SDK
|
||||
# Set default thinking_budget=0 if not specified
|
||||
if "thinking_budget" not in gen_conf:
|
||||
gen_conf["thinking_budget"] = 0
|
||||
|
||||
thinking_budget = gen_conf.pop("thinking_budget", 0)
|
||||
gen_conf = self._clean_conf(gen_conf)
|
||||
|
||||
# Build GenerateContentConfig
|
||||
try:
|
||||
from google.genai.types import GenerateContentConfig, ThinkingConfig, Content, Part
|
||||
except ImportError as e:
|
||||
logging.error(f"[GoogleChat] Failed to import google-genai: {e}. Please install: pip install google-genai>=1.41.0")
|
||||
raise
|
||||
|
||||
config_dict = {}
|
||||
if system:
|
||||
config_dict["system_instruction"] = system
|
||||
if "temperature" in gen_conf:
|
||||
config_dict["temperature"] = gen_conf["temperature"]
|
||||
if "top_p" in gen_conf:
|
||||
config_dict["top_p"] = gen_conf["top_p"]
|
||||
if "max_output_tokens" in gen_conf:
|
||||
config_dict["max_output_tokens"] = gen_conf["max_output_tokens"]
|
||||
|
||||
# Add ThinkingConfig
|
||||
config_dict["thinking_config"] = ThinkingConfig(thinking_budget=thinking_budget)
|
||||
|
||||
config = GenerateContentConfig(**config_dict)
|
||||
|
||||
# Convert history to google-genai Content format
|
||||
contents = []
|
||||
for item in history:
|
||||
if item["role"] == "system":
|
||||
continue
|
||||
hist.append(deepcopy(item))
|
||||
item = hist[-1]
|
||||
if "role" in item and item["role"] == "assistant":
|
||||
item["role"] = "model"
|
||||
if "content" in item:
|
||||
item["parts"] = [
|
||||
{
|
||||
"text": item.pop("content"),
|
||||
}
|
||||
]
|
||||
# google-genai uses 'model' instead of 'assistant'
|
||||
role = "model" if item["role"] == "assistant" else item["role"]
|
||||
content = Content(
|
||||
role=role,
|
||||
parts=[Part(text=item["content"])]
|
||||
)
|
||||
contents.append(content)
|
||||
|
||||
response = self.client.models.generate_content(
|
||||
model=self.model_name,
|
||||
contents=contents,
|
||||
config=config
|
||||
)
|
||||
|
||||
if thinking_config:
|
||||
response = self.client.generate_content(hist, generation_config=gen_conf, thinking_config=thinking_config)
|
||||
else:
|
||||
response = self.client.generate_content(hist, generation_config=gen_conf)
|
||||
ans = response.text
|
||||
return ans, response.usage_metadata.total_token_count
|
||||
# Get token count from response
|
||||
try:
|
||||
total_tokens = response.usage_metadata.total_token_count
|
||||
except Exception:
|
||||
total_tokens = 0
|
||||
|
||||
return ans, total_tokens
|
||||
|
||||
def chat_streamly(self, system, history, gen_conf={}, **kwargs):
|
||||
if "claude" in self.model_name:
|
||||
@ -1283,34 +1289,59 @@ class GoogleChat(Base):
|
||||
|
||||
yield total_tokens
|
||||
else:
|
||||
response = None
|
||||
total_tokens = 0
|
||||
self.client._system_instruction = system
|
||||
thinking_config = self._get_thinking_config(gen_conf)
|
||||
if "max_tokens" in gen_conf:
|
||||
gen_conf["max_output_tokens"] = gen_conf["max_tokens"]
|
||||
del gen_conf["max_tokens"]
|
||||
for k in list(gen_conf.keys()):
|
||||
if k not in ["temperature", "top_p", "max_output_tokens"]:
|
||||
del gen_conf[k]
|
||||
for item in history:
|
||||
if "role" in item and item["role"] == "assistant":
|
||||
item["role"] = "model"
|
||||
if "content" in item:
|
||||
item["parts"] = [
|
||||
{
|
||||
"text": item.pop("content"),
|
||||
}
|
||||
]
|
||||
# Gemini models with google-genai SDK
|
||||
ans = ""
|
||||
total_tokens = 0
|
||||
|
||||
# Set default thinking_budget=0 if not specified
|
||||
if "thinking_budget" not in gen_conf:
|
||||
gen_conf["thinking_budget"] = 0
|
||||
|
||||
thinking_budget = gen_conf.pop("thinking_budget", 0)
|
||||
gen_conf = self._clean_conf(gen_conf)
|
||||
|
||||
# Build GenerateContentConfig
|
||||
try:
|
||||
if thinking_config:
|
||||
response = self.client.generate_content(history, generation_config=gen_conf, thinking_config=thinking_config, stream=True)
|
||||
else:
|
||||
response = self.client.generate_content(history, generation_config=gen_conf, stream=True)
|
||||
for resp in response:
|
||||
ans = resp.text
|
||||
total_tokens += num_tokens_from_string(ans)
|
||||
from google.genai.types import GenerateContentConfig, ThinkingConfig, Content, Part
|
||||
except ImportError as e:
|
||||
logging.error(f"[GoogleChat] Failed to import google-genai: {e}. Please install: pip install google-genai>=1.41.0")
|
||||
raise
|
||||
|
||||
config_dict = {}
|
||||
if system:
|
||||
config_dict["system_instruction"] = system
|
||||
if "temperature" in gen_conf:
|
||||
config_dict["temperature"] = gen_conf["temperature"]
|
||||
if "top_p" in gen_conf:
|
||||
config_dict["top_p"] = gen_conf["top_p"]
|
||||
if "max_output_tokens" in gen_conf:
|
||||
config_dict["max_output_tokens"] = gen_conf["max_output_tokens"]
|
||||
|
||||
# Add ThinkingConfig
|
||||
config_dict["thinking_config"] = ThinkingConfig(thinking_budget=thinking_budget)
|
||||
|
||||
config = GenerateContentConfig(**config_dict)
|
||||
|
||||
# Convert history to google-genai Content format
|
||||
contents = []
|
||||
for item in history:
|
||||
# google-genai uses 'model' instead of 'assistant'
|
||||
role = "model" if item["role"] == "assistant" else item["role"]
|
||||
content = Content(
|
||||
role=role,
|
||||
parts=[Part(text=item["content"])]
|
||||
)
|
||||
contents.append(content)
|
||||
|
||||
try:
|
||||
for chunk in self.client.models.generate_content_stream(
|
||||
model=self.model_name,
|
||||
contents=contents,
|
||||
config=config
|
||||
):
|
||||
text = chunk.text
|
||||
ans = text
|
||||
total_tokens += num_tokens_from_string(text)
|
||||
yield ans
|
||||
|
||||
except Exception as e:
|
||||
@ -1394,6 +1425,9 @@ class LiteLLMBase(ABC):
|
||||
self.bedrock_ak = json.loads(key).get("bedrock_ak", "")
|
||||
self.bedrock_sk = json.loads(key).get("bedrock_sk", "")
|
||||
self.bedrock_region = json.loads(key).get("bedrock_region", "")
|
||||
elif self.provider == SupportedLiteLLMProvider.OpenRouter:
|
||||
self.api_key = json.loads(key).get("api_key", "")
|
||||
self.provider_order = json.loads(key).get("provider_order", "")
|
||||
|
||||
def _get_delay(self):
|
||||
"""Calculate retry delay time"""
|
||||
@ -1438,7 +1472,6 @@ class LiteLLMBase(ABC):
|
||||
timeout=self.timeout,
|
||||
)
|
||||
# response = self.client.chat.completions.create(model=self.model_name, messages=history, **gen_conf, **kwargs)
|
||||
|
||||
if any([not response.choices, not response.choices[0].message, not response.choices[0].message.content]):
|
||||
return "", 0
|
||||
ans = response.choices[0].message.content.strip()
|
||||
@ -1589,6 +1622,24 @@ class LiteLLMBase(ABC):
|
||||
"aws_region_name": self.bedrock_region,
|
||||
}
|
||||
)
|
||||
|
||||
if self.provider == SupportedLiteLLMProvider.OpenRouter:
|
||||
if self.provider_order:
|
||||
def _to_order_list(x):
|
||||
if x is None:
|
||||
return []
|
||||
if isinstance(x, str):
|
||||
return [s.strip() for s in x.split(",") if s.strip()]
|
||||
if isinstance(x, (list, tuple)):
|
||||
return [str(s).strip() for s in x if str(s).strip()]
|
||||
return []
|
||||
extra_body = {}
|
||||
provider_cfg = {}
|
||||
provider_order = _to_order_list(self.provider_order)
|
||||
provider_cfg["order"] = provider_order
|
||||
provider_cfg["allow_fallbacks"] = False
|
||||
extra_body["provider"] = provider_cfg
|
||||
completion_args.update({"extra_body": extra_body})
|
||||
return completion_args
|
||||
|
||||
def chat_with_tools(self, system: str, history: list, gen_conf: dict = {}):
|
||||
|
||||
@ -13,12 +13,16 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import logging
|
||||
from abc import ABC
|
||||
from copy import deepcopy
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin
|
||||
import requests
|
||||
from openai import OpenAI
|
||||
@ -38,6 +42,7 @@ class Base(ABC):
|
||||
self.is_tools = False
|
||||
self.tools = []
|
||||
self.toolcall_sessions = {}
|
||||
self.extra_body = None
|
||||
|
||||
def describe(self, image):
|
||||
raise NotImplementedError("Please implement encode method!")
|
||||
@ -77,7 +82,8 @@ class Base(ABC):
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=self._form_history(system, history, images)
|
||||
messages=self._form_history(system, history, images),
|
||||
extra_body=self.extra_body,
|
||||
)
|
||||
return response.choices[0].message.content.strip(), response.usage.total_tokens
|
||||
except Exception as e:
|
||||
@ -90,7 +96,8 @@ class Base(ABC):
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=self._form_history(system, history, images),
|
||||
stream=True
|
||||
stream=True,
|
||||
extra_body=self.extra_body,
|
||||
)
|
||||
for resp in response:
|
||||
if not resp.choices[0].delta.content:
|
||||
@ -167,6 +174,7 @@ class GptV4(Base):
|
||||
def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1", **kwargs):
|
||||
if not base_url:
|
||||
base_url = "https://api.openai.com/v1"
|
||||
self.api_key = key
|
||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||
self.model_name = model_name
|
||||
self.lang = lang
|
||||
@ -177,6 +185,7 @@ class GptV4(Base):
|
||||
res = self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=self.prompt(b64),
|
||||
extra_body=self.extra_body,
|
||||
)
|
||||
return res.choices[0].message.content.strip(), total_token_count_from_response(res)
|
||||
|
||||
@ -185,6 +194,7 @@ class GptV4(Base):
|
||||
res = self.client.chat.completions.create(
|
||||
model=self.model_name,
|
||||
messages=self.vision_llm_prompt(b64, prompt),
|
||||
extra_body=self.extra_body,
|
||||
)
|
||||
return res.choices[0].message.content.strip(),total_token_count_from_response(res)
|
||||
|
||||
@ -218,6 +228,61 @@ class QWenCV(GptV4):
|
||||
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
|
||||
|
||||
def chat(self, system, history, gen_conf, images=[], video_bytes=None, filename=""):
|
||||
if video_bytes:
|
||||
try:
|
||||
summary, summary_num_tokens = self._process_video(video_bytes, filename)
|
||||
return summary, summary_num_tokens
|
||||
except Exception as e:
|
||||
return "**ERROR**: " + str(e), 0
|
||||
|
||||
return "**ERROR**: Method chat not supported yet.", 0
|
||||
|
||||
def _process_video(self, video_bytes, filename):
|
||||
from dashscope import MultiModalConversation
|
||||
|
||||
video_suffix = Path(filename).suffix or ".mp4"
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=video_suffix) as tmp:
|
||||
tmp.write(video_bytes)
|
||||
tmp_path = tmp.name
|
||||
|
||||
video_path = f"file://{tmp_path}"
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"video": video_path,
|
||||
"fps": 2,
|
||||
},
|
||||
{
|
||||
"text": "Please summarize this video in proper sentences.",
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
def call_api():
|
||||
response = MultiModalConversation.call(
|
||||
api_key=self.api_key,
|
||||
model=self.model_name,
|
||||
messages=messages,
|
||||
)
|
||||
summary = response["output"]["choices"][0]["message"].content[0]["text"]
|
||||
return summary, num_tokens_from_string(summary)
|
||||
|
||||
try:
|
||||
return call_api()
|
||||
except Exception as e1:
|
||||
import dashscope
|
||||
|
||||
dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
|
||||
try:
|
||||
return call_api()
|
||||
except Exception as e2:
|
||||
raise RuntimeError(f"Both default and intl endpoint failed.\nFirst error: {e1}\nSecond error: {e2}")
|
||||
|
||||
|
||||
|
||||
class HunyuanCV(GptV4):
|
||||
_FACTORY_NAME = "Tencent Hunyuan"
|
||||
@ -249,6 +314,17 @@ class StepFunCV(GptV4):
|
||||
self.lang = lang
|
||||
Base.__init__(self, **kwargs)
|
||||
|
||||
class VolcEngineCV(GptV4):
|
||||
_FACTORY_NAME = "VolcEngine"
|
||||
|
||||
def __init__(self, key, model_name, lang="Chinese", base_url="https://ark.cn-beijing.volces.com/api/v3", **kwargs):
|
||||
if not base_url:
|
||||
base_url = "https://ark.cn-beijing.volces.com/api/v3"
|
||||
ark_api_key = json.loads(key).get("ark_api_key", "")
|
||||
self.client = OpenAI(api_key=ark_api_key, base_url=base_url)
|
||||
self.model_name = json.loads(key).get("ep_id", "") + json.loads(key).get("endpoint_id", "")
|
||||
self.lang = lang
|
||||
Base.__init__(self, **kwargs)
|
||||
|
||||
class LmStudioCV(GptV4):
|
||||
_FACTORY_NAME = "LM-Studio"
|
||||
@ -327,10 +403,27 @@ class OpenRouterCV(GptV4):
|
||||
):
|
||||
if not base_url:
|
||||
base_url = "https://openrouter.ai/api/v1"
|
||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||
api_key = json.loads(key).get("api_key", "")
|
||||
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
self.model_name = model_name
|
||||
self.lang = lang
|
||||
Base.__init__(self, **kwargs)
|
||||
provider_order = json.loads(key).get("provider_order", "")
|
||||
self.extra_body = {}
|
||||
if provider_order:
|
||||
def _to_order_list(x):
|
||||
if x is None:
|
||||
return []
|
||||
if isinstance(x, str):
|
||||
return [s.strip() for s in x.split(",") if s.strip()]
|
||||
if isinstance(x, (list, tuple)):
|
||||
return [str(s).strip() for s in x if str(s).strip()]
|
||||
return []
|
||||
provider_cfg = {}
|
||||
provider_order = _to_order_list(provider_order)
|
||||
provider_cfg["order"] = provider_order
|
||||
provider_cfg["allow_fallbacks"] = False
|
||||
self.extra_body["provider"] = provider_cfg
|
||||
|
||||
|
||||
class LocalAICV(GptV4):
|
||||
@ -496,6 +589,7 @@ class GeminiCV(Base):
|
||||
|
||||
client.configure(api_key=key)
|
||||
_client = client.get_default_generative_client()
|
||||
self.api_key=key
|
||||
self.model_name = model_name
|
||||
self.model = GenerativeModel(model_name=self.model_name)
|
||||
self.model._client = _client
|
||||
@ -538,7 +632,15 @@ class GeminiCV(Base):
|
||||
res = self.model.generate_content(input)
|
||||
return res.text, total_token_count_from_response(res)
|
||||
|
||||
def chat(self, system, history, gen_conf, images=[]):
|
||||
|
||||
def chat(self, system, history, gen_conf, images=[], video_bytes=None, filename=""):
|
||||
if video_bytes:
|
||||
try:
|
||||
summary, summary_num_tokens = self._process_video(video_bytes, filename)
|
||||
return summary, summary_num_tokens
|
||||
except Exception as e:
|
||||
return "**ERROR**: " + str(e), 0
|
||||
|
||||
generation_config = dict(temperature=gen_conf.get("temperature", 0.3), top_p=gen_conf.get("top_p", 0.7))
|
||||
try:
|
||||
response = self.model.generate_content(
|
||||
@ -570,6 +672,46 @@ class GeminiCV(Base):
|
||||
|
||||
yield total_token_count_from_response(response)
|
||||
|
||||
def _process_video(self, video_bytes, filename):
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
video_size_mb = len(video_bytes) / (1024 * 1024)
|
||||
client = genai.Client(api_key=self.api_key)
|
||||
|
||||
tmp_path = None
|
||||
try:
|
||||
if video_size_mb <= 20:
|
||||
response = client.models.generate_content(
|
||||
model="models/gemini-2.5-flash",
|
||||
contents=types.Content(parts=[
|
||||
types.Part(inline_data=types.Blob(data=video_bytes, mime_type="video/mp4")),
|
||||
types.Part(text="Please summarize the video in proper sentences.")
|
||||
])
|
||||
)
|
||||
else:
|
||||
logging.info(f"Video size {video_size_mb:.2f}MB exceeds 20MB. Using Files API...")
|
||||
video_suffix = Path(filename).suffix or ".mp4"
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=video_suffix) as tmp:
|
||||
tmp.write(video_bytes)
|
||||
tmp_path = Path(tmp.name)
|
||||
uploaded_file = client.files.upload(file=tmp_path)
|
||||
|
||||
response = client.models.generate_content(
|
||||
model="gemini-2.5-flash",
|
||||
contents=[uploaded_file, "Please summarize this video in proper sentences."]
|
||||
)
|
||||
|
||||
summary = response.text or ""
|
||||
logging.info(f"Video summarized: {summary[:32]}...")
|
||||
return summary, num_tokens_from_string(summary)
|
||||
except Exception as e:
|
||||
logging.error(f"Video processing failed: {e}")
|
||||
raise
|
||||
finally:
|
||||
if tmp_path and tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
|
||||
|
||||
class NvidiaCV(Base):
|
||||
_FACTORY_NAME = "NVIDIA"
|
||||
|
||||
@ -459,12 +459,10 @@ def tree_merge(bull, sections, depth):
|
||||
return len(BULLET_PATTERN[bull])+1, text
|
||||
else:
|
||||
return len(BULLET_PATTERN[bull])+2, text
|
||||
|
||||
level_set = set()
|
||||
lines = []
|
||||
for section in sections:
|
||||
level, text = get_level(bull, section)
|
||||
|
||||
if not text.strip("\n"):
|
||||
continue
|
||||
|
||||
@ -797,8 +795,8 @@ class Node:
|
||||
def __init__(self, level, depth=-1, texts=None):
|
||||
self.level = level
|
||||
self.depth = depth
|
||||
self.texts = texts if texts is not None else [] # 存放内容
|
||||
self.children = [] # 子节点
|
||||
self.texts = texts or []
|
||||
self.children = []
|
||||
|
||||
def add_child(self, child_node):
|
||||
self.children.append(child_node)
|
||||
@ -825,35 +823,51 @@ class Node:
|
||||
return f"Node(level={self.level}, texts={self.texts}, children={len(self.children)})"
|
||||
|
||||
def build_tree(self, lines):
|
||||
stack = [self]
|
||||
for line in lines:
|
||||
level, text = line
|
||||
node = Node(level=level, texts=[text])
|
||||
|
||||
if level <= self.depth or self.depth == -1:
|
||||
while stack and level <= stack[-1].get_level():
|
||||
stack.pop()
|
||||
|
||||
stack[-1].add_child(node)
|
||||
stack.append(node)
|
||||
else:
|
||||
stack = [self]
|
||||
for level, text in lines:
|
||||
if self.depth != -1 and level > self.depth:
|
||||
# Beyond target depth: merge content into the current leaf instead of creating deeper nodes
|
||||
stack[-1].add_text(text)
|
||||
return self
|
||||
continue
|
||||
|
||||
# Move up until we find the proper parent whose level is strictly smaller than current
|
||||
while len(stack) > 1 and level <= stack[-1].get_level():
|
||||
stack.pop()
|
||||
|
||||
node = Node(level=level, texts=[text])
|
||||
# Attach as child of current parent and descend
|
||||
stack[-1].add_child(node)
|
||||
stack.append(node)
|
||||
|
||||
return self
|
||||
|
||||
def get_tree(self):
|
||||
tree_list = []
|
||||
self._dfs(self, tree_list, 0, [])
|
||||
self._dfs(self, tree_list, [])
|
||||
return tree_list
|
||||
|
||||
def _dfs(self, node, tree_list, current_depth, titles):
|
||||
def _dfs(self, node, tree_list, titles):
|
||||
level = node.get_level()
|
||||
texts = node.get_texts()
|
||||
child = node.get_children()
|
||||
|
||||
if node.get_texts():
|
||||
if 0 < node.get_level() < self.depth:
|
||||
titles.extend(node.get_texts())
|
||||
else:
|
||||
combined_text = ["\n".join(titles + node.get_texts())]
|
||||
tree_list.append(combined_text)
|
||||
if level == 0 and texts:
|
||||
tree_list.append("\n".join(titles+texts))
|
||||
|
||||
# Titles within configured depth are accumulated into the current path
|
||||
if 1 <= level <= self.depth:
|
||||
path_titles = titles + texts
|
||||
else:
|
||||
path_titles = titles
|
||||
|
||||
for child in node.get_children():
|
||||
self._dfs(child, tree_list, current_depth + 1, titles.copy())
|
||||
# Body outside the depth limit becomes its own chunk under the current title path
|
||||
if level > self.depth and texts:
|
||||
tree_list.append("\n".join(path_titles + texts))
|
||||
|
||||
# A leaf title within depth emits its title path as a chunk (header-only section)
|
||||
elif not child and (1 <= level <= self.depth):
|
||||
tree_list.append("\n".join(path_titles))
|
||||
|
||||
# Recurse into children with the updated title path
|
||||
for c in child:
|
||||
self._dfs(c, tree_list, path_titles)
|
||||
@ -17,6 +17,7 @@ import json
|
||||
import logging
|
||||
import re
|
||||
import math
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
|
||||
@ -71,7 +72,7 @@ class Dealer:
|
||||
def search(self, req, idx_names: str | list[str],
|
||||
kb_ids: list[str],
|
||||
emb_mdl=None,
|
||||
highlight=False,
|
||||
highlight: bool | list = False,
|
||||
rank_feature: dict | None = None
|
||||
):
|
||||
filters = self.get_filters(req)
|
||||
@ -100,7 +101,11 @@ class Dealer:
|
||||
total = self.dataStore.getTotal(res)
|
||||
logging.debug("Dealer.search TOTAL: {}".format(total))
|
||||
else:
|
||||
highlightFields = ["content_ltks", "title_tks"] if highlight else []
|
||||
highlightFields = ["content_ltks", "title_tks"]
|
||||
if not highlight:
|
||||
highlightFields = []
|
||||
elif isinstance(highlight, list):
|
||||
highlightFields = highlight
|
||||
matchText, keywords = self.qryr.question(qst, min_match=0.3)
|
||||
if emb_mdl is None:
|
||||
matchExprs = [matchText]
|
||||
@ -154,7 +159,7 @@ class Dealer:
|
||||
query_vector=q_vec,
|
||||
aggregation=aggs,
|
||||
highlight=highlight,
|
||||
field=self.dataStore.getFields(res, src),
|
||||
field=self.dataStore.getFields(res, src + ["_score"]),
|
||||
keywords=keywords
|
||||
)
|
||||
|
||||
@ -354,10 +359,8 @@ class Dealer:
|
||||
if not question:
|
||||
return ranks
|
||||
|
||||
RERANK_LIMIT = 64
|
||||
RERANK_LIMIT = int(RERANK_LIMIT//page_size + ((RERANK_LIMIT%page_size)/(page_size*1.) + 0.5)) * page_size if page_size>1 else 1
|
||||
if RERANK_LIMIT < 1: ## when page_size is very large the RERANK_LIMIT will be 0.
|
||||
RERANK_LIMIT = 1
|
||||
# Ensure RERANK_LIMIT is multiple of page_size
|
||||
RERANK_LIMIT = math.ceil(64/page_size) * page_size if page_size>1 else 1
|
||||
req = {"kb_ids": kb_ids, "doc_ids": doc_ids, "page": math.ceil(page_size*page/RERANK_LIMIT), "size": RERANK_LIMIT,
|
||||
"question": question, "vector": True, "topk": top,
|
||||
"similarity": similarity_threshold,
|
||||
@ -376,15 +379,25 @@ class Dealer:
|
||||
vector_similarity_weight,
|
||||
rank_feature=rank_feature)
|
||||
else:
|
||||
sim, tsim, vsim = self.rerank(
|
||||
sres, question, 1 - vector_similarity_weight, vector_similarity_weight,
|
||||
rank_feature=rank_feature)
|
||||
lower_case_doc_engine = os.getenv('DOC_ENGINE', 'elasticsearch')
|
||||
if lower_case_doc_engine == "elasticsearch":
|
||||
# ElasticSearch doesn't normalize each way score before fusion.
|
||||
sim, tsim, vsim = self.rerank(
|
||||
sres, question, 1 - vector_similarity_weight, vector_similarity_weight,
|
||||
rank_feature=rank_feature)
|
||||
else:
|
||||
# Don't need rerank here since Infinity normalizes each way score before fusion.
|
||||
sim = [sres.field[id].get("_score", 0.0) for id in sres.ids]
|
||||
tsim = sim
|
||||
vsim = sim
|
||||
# Already paginated in search function
|
||||
idx = np.argsort(sim * -1)[(page - 1) * page_size:page * page_size]
|
||||
begin = ((page % (RERANK_LIMIT//page_size)) - 1) * page_size
|
||||
sim = sim[begin : begin + page_size]
|
||||
sim_np = np.array(sim)
|
||||
idx = np.argsort(sim_np * -1)
|
||||
dim = len(sres.query_vector)
|
||||
vector_column = f"q_{dim}_vec"
|
||||
zero_vector = [0.0] * dim
|
||||
sim_np = np.array(sim)
|
||||
filtered_count = (sim_np >= similarity_threshold).sum()
|
||||
ranks["total"] = int(filtered_count) # Convert from np.int64 to Python int otherwise JSON serializable error
|
||||
for i in idx:
|
||||
|
||||
@ -114,7 +114,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
||||
),
|
||||
}
|
||||
],
|
||||
{"max_tokens": self._max_token},
|
||||
{"max_tokens": max(self._max_token, 512)}, # fix issue: #10235
|
||||
)
|
||||
cnt = re.sub(
|
||||
"(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)",
|
||||
|
||||
@ -447,7 +447,7 @@ def build_TOC(task, docs, progress_callback):
|
||||
d["content_with_weight"] = json.dumps(toc, ensure_ascii=False)
|
||||
d["toc_kwd"] = "toc"
|
||||
d["available_int"] = 0
|
||||
d["page_num_int"] = 100000000
|
||||
d["page_num_int"] = [100000000]
|
||||
d["id"] = xxhash.xxh64((d["content_with_weight"] + str(d["doc_id"])).encode("utf-8", "surrogatepass")).hexdigest()
|
||||
return d
|
||||
|
||||
@ -1052,13 +1052,14 @@ async def task_manager():
|
||||
|
||||
async def main():
|
||||
logging.info(r"""
|
||||
______ __ ______ __
|
||||
/_ __/___ ______/ /__ / ____/ _____ _______ __/ /_____ _____
|
||||
/ / / __ `/ ___/ //_/ / __/ | |/_/ _ \/ ___/ / / / __/ __ \/ ___/
|
||||
/ / / /_/ (__ ) ,< / /____> </ __/ /__/ /_/ / /_/ /_/ / /
|
||||
/_/ \__,_/____/_/|_| /_____/_/|_|\___/\___/\__,_/\__/\____/_/
|
||||
____ __ _
|
||||
/ _/___ ____ ____ _____/ /_(_)___ ____ ________ ______ _____ _____
|
||||
/ // __ \/ __ `/ _ \/ ___/ __/ / __ \/ __ \ / ___/ _ \/ ___/ | / / _ \/ ___/
|
||||
_/ // / / / /_/ / __(__ ) /_/ / /_/ / / / / (__ ) __/ / | |/ / __/ /
|
||||
/___/_/ /_/\__, /\___/____/\__/_/\____/_/ /_/ /____/\___/_/ |___/\___/_/
|
||||
/____/
|
||||
""")
|
||||
logging.info(f'TaskExecutor: RAGFlow version: {get_ragflow_version()}')
|
||||
logging.info(f'RAGFlow version: {get_ragflow_version()}')
|
||||
settings.init_settings()
|
||||
print_rag_settings()
|
||||
if sys.platform != "win32":
|
||||
|
||||
@ -445,8 +445,8 @@ class InfinityConnection(DocStoreConnection):
|
||||
self.connPool.release_conn(inf_conn)
|
||||
res = concat_dataframes(df_list, output)
|
||||
if matchExprs:
|
||||
res["Sum"] = res[score_column] + res[PAGERANK_FLD]
|
||||
res = res.sort_values(by="Sum", ascending=False).reset_index(drop=True).drop(columns=["Sum"])
|
||||
res["_score"] = res[score_column] + res[PAGERANK_FLD]
|
||||
res = res.sort_values(by="_score", ascending=False).reset_index(drop=True)
|
||||
res = res.head(limit)
|
||||
logger.debug(f"INFINITY search final result: {str(res)}")
|
||||
return res, total_hits_count
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
import logging
|
||||
import time
|
||||
from minio import Minio
|
||||
from minio.commonconfig import CopySource
|
||||
from minio.error import S3Error
|
||||
from io import BytesIO
|
||||
from rag import settings
|
||||
@ -141,3 +142,36 @@ class RAGFlowMinio:
|
||||
except Exception:
|
||||
logging.exception(f"Fail to remove bucket {bucket}")
|
||||
|
||||
def copy(self, src_bucket, src_path, dest_bucket, dest_path):
|
||||
try:
|
||||
if not self.conn.bucket_exists(dest_bucket):
|
||||
self.conn.make_bucket(dest_bucket)
|
||||
|
||||
try:
|
||||
self.conn.stat_object(src_bucket, src_path)
|
||||
except Exception as e:
|
||||
logging.exception(f"Source object not found: {src_bucket}/{src_path}, {e}")
|
||||
return False
|
||||
|
||||
self.conn.copy_object(
|
||||
dest_bucket,
|
||||
dest_path,
|
||||
CopySource(src_bucket, src_path),
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception:
|
||||
logging.exception(f"Fail to copy {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
||||
return False
|
||||
|
||||
def move(self, src_bucket, src_path, dest_bucket, dest_path):
|
||||
try:
|
||||
if self.copy(src_bucket, src_path, dest_bucket, dest_path):
|
||||
self.rm(src_bucket, src_path)
|
||||
return True
|
||||
else:
|
||||
logging.error(f"Copy failed, move aborted: {src_bucket}/{src_path}")
|
||||
return False
|
||||
except Exception:
|
||||
logging.exception(f"Fail to move {src_bucket}/{src_path} -> {dest_bucket}/{dest_path}")
|
||||
return False
|
||||
|
||||
@ -83,7 +83,7 @@ class TestChunksRetrieval:
|
||||
"ValueError('Search does not support negative slicing.')",
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
pytest.param({"page": 2, "page_size": 2}, 0, 2, "", marks=pytest.mark.skip(reason="issues/6646")),
|
||||
({"page": 2, "page_size": 2}, 0, 2, ""),
|
||||
({"page": 3, "page_size": 2}, 0, 0, ""),
|
||||
({"page": "3", "page_size": 2}, 0, 0, ""),
|
||||
pytest.param(
|
||||
@ -124,9 +124,9 @@ class TestChunksRetrieval:
|
||||
marks=pytest.mark.skip,
|
||||
),
|
||||
# ({"page_size": 0}, 0, 0, ""),
|
||||
({"page_size": 1}, 0, 1, ""),
|
||||
pytest.param({"page_size": 1}, 0, 1, "", marks=pytest.mark.skip(reason="issues/10692")),
|
||||
({"page_size": 5}, 0, 4, ""),
|
||||
({"page_size": "1"}, 0, 1, ""),
|
||||
pytest.param({"page_size": "1"}, 0, 1, "", marks=pytest.mark.skip(reason="issues/10692")),
|
||||
# ({"page_size": -1}, 0, 0, ""),
|
||||
pytest.param(
|
||||
{"page_size": "a"},
|
||||
|
||||
330
uv.lock
generated
330
uv.lock
generated
@ -31,6 +31,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/1c/a17fb513aeb684fb83bef5f395910f53103ab30308bbdd77fd66d6698c46/accelerate-1.9.0-py3-none-any.whl", hash = "sha256:c24739a97ade1d54af4549a65f8b6b046adc87e2b3e4d6c66516e32c53d5a8f1", size = 367073, upload-time = "2025-07-16T16:24:52.957Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "acres"
|
||||
version = "0.5.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ec/ba/94b63a9af588fbf7bde25ce44d55456199654a92fb7b2337767198a824b0/acres-0.5.0.tar.gz", hash = "sha256:128b6447bf5df3b6210264feccbfa018b4ac5bd337358319aec6563f99db8f3a", size = 57750, upload-time = "2025-06-04T12:40:30.329Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/39/e8/806475fe4cdfd8635535d3fa11bd61d19b7cc94b61b9147ebdd2ab4cbbee/acres-0.5.0-py3-none-any.whl", hash = "sha256:fcc32b974b510897de0f041609b4234f9ff03e2e960aea088f63973fb106c772", size = 12703, upload-time = "2025-06-04T12:40:28.745Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aiofiles"
|
||||
version = "24.1.0"
|
||||
@ -658,6 +667,18 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fb/2b/a64c2d25a37aeb921fddb929111413049fc5f8b9a4c1aefaffaafe768d54/cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945", size = 9325, upload-time = "2024-02-26T20:33:20.308Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "captcha"
|
||||
version = "0.7.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "pillow" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b4/65/8e186bb798f33ba390eab897c995b0fcee92bc030e0f40cb8ea01f34dd07/captcha-0.7.1.tar.gz", hash = "sha256:a1b462bcc633a64d8db5efa7754548a877c698d98f87716c620a707364cabd6b", size = 226561, upload-time = "2025-03-01T05:00:13.395Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/da/ff/3f0982ecd37c2d6a7266c22e7ea2e47d0773fe449984184c5316459d2776/captcha-0.7.1-py3-none-any.whl", hash = "sha256:8b73b5aba841ad1e5bdb856205bf5f09560b728ee890eb9dae42901219c8c599", size = 147606, upload-time = "2025-03-01T05:00:10.433Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cbor"
|
||||
version = "1.0.0"
|
||||
@ -806,6 +827,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ci-info"
|
||||
version = "0.3.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/27/938d6ef93df09c686dcee1c7334578274320e98e7bf912a6409cf2c8c3e5/ci-info-0.3.0.tar.gz", hash = "sha256:1fd50cbd401f29adffeeb18b0489e232d16ac1a7458ac6bc316deab6ae535fb0", size = 25169, upload-time = "2022-07-27T17:22:49.365Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/c3/8ac768b389d5b6dda1c3ce7992b3acd2b46401f9b71439123858b17b1a2c/ci_info-0.3.0-py3-none-any.whl", hash = "sha256:e9e05d262a6c48aa03cd904475de5ce8c4da8a5435e516631c795d0487dc9e07", size = 7764, upload-time = "2022-07-27T17:22:47.196Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
version = "8.2.1"
|
||||
@ -900,6 +930,24 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/1d/62f5bf92e12335eb63517f42671ed78512d48bbc69e02a942dd7b90f03f0/compressed_rtf-1.0.7-py3-none-any.whl", hash = "sha256:b7904921d78c67a0a4b7fff9fb361a00ae2b447b6edca010ce321cd98fa0fcc0", size = 7968, upload-time = "2025-03-24T23:03:57.433Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "configobj"
|
||||
version = "5.0.9"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f5/c4/c7f9e41bc2e5f8eeae4a08a01c91b2aea3dfab40a3e14b25e87e7db8d501/configobj-5.0.9.tar.gz", hash = "sha256:03c881bbf23aa07bccf1b837005975993c4ab4427ba57f959afdd9d1a2386848", size = 101518, upload-time = "2024-09-21T12:47:46.315Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/c4/0679472c60052c27efa612b4cd3ddd2a23e885dcdc73461781d2c802d39e/configobj-5.0.9-py2.py3-none-any.whl", hash = "sha256:1ba10c5b6ee16229c79a05047aeda2b55eb4e80d7c7d8ecf17ec1ca600c79882", size = 35615, upload-time = "2024-11-26T14:03:32.972Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "configparser"
|
||||
version = "7.2.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/ac/ea19242153b5e8be412a726a70e82c7b5c1537c83f61b20995b2eda3dcd7/configparser-7.2.0.tar.gz", hash = "sha256:b629cc8ae916e3afbd36d1b3d093f34193d851e11998920fdcfc4552218b7b70", size = 51273, upload-time = "2025-03-08T16:04:09.339Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/fe/f61e7129e9e689d9e40bbf8a36fb90f04eceb477f4617c02c6a18463e81f/configparser-7.2.0-py3-none-any.whl", hash = "sha256:fee5e1f3db4156dcd0ed95bc4edfa3580475537711f67a819c966b389d09ce62", size = 17232, upload-time = "2025-03-08T16:04:07.743Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "contourpy"
|
||||
version = "1.3.2"
|
||||
@ -1459,6 +1507,19 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "etelemetry"
|
||||
version = "0.3.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "ci-info" },
|
||||
{ name = "packaging" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/83/27/f997c9da0e179986fadd6c8474d16743f1b3697c129c2fcd1e739cd038c2/etelemetry-0.3.1-py3-none-any.whl", hash = "sha256:a64f09bcd55cbfa5684e4d9fb6d1d6a018ab99d2ea28e638435c4c26e6814a6b" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "events"
|
||||
version = "0.5"
|
||||
@ -2017,7 +2078,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "google-cloud-aiplatform"
|
||||
version = "1.64.0"
|
||||
version = "1.70.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "docstring-parser" },
|
||||
@ -2032,9 +2093,9 @@ dependencies = [
|
||||
{ name = "pydantic" },
|
||||
{ name = "shapely" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/e3/f86b429d000a9c25f25bcd122e4b6286aeef70a89acfd6ea088324af016c/google-cloud-aiplatform-1.64.0.tar.gz", hash = "sha256:475a612829b283eb8f783e773d37115c30db42e2e50065c8653db0c9bd18b0da", size = 6258492, upload-time = "2024-08-28T01:03:24.573Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/88/06/bc8028c03d4bedb85114c780a9f749b67ff06ce29d25dc7f1a99622f2692/google-cloud-aiplatform-1.70.0.tar.gz", hash = "sha256:e8edef6dbc7911380d0ea55c47544e799f62b891cb1a83b504ca1c09fff9884b", size = 6311624, upload-time = "2024-10-09T04:28:12.606Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/c5/cdf0eaeded413d5f6221f9c4f466a7714c79a1938c2f7221467d4a9b9859/google_cloud_aiplatform-1.64.0-py2.py3-none-any.whl", hash = "sha256:3a79ce2ec047868c348336624a60993464ca977fd258bcf609cc79309a8101c4", size = 5228409, upload-time = "2024-08-28T01:03:21.275Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/d9/280e5a9b5caf69322f64fa55f62bf447d76c5fe30e8df6e93373f22c4bd7/google_cloud_aiplatform-1.70.0-py2.py3-none-any.whl", hash = "sha256:690e6041f03d3aa85102ac3f316c958d6f43a99aefb7fb3f8938dee56d08abd9", size = 5267225, upload-time = "2024-10-09T04:28:09.271Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2129,6 +2190,25 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fd/3c/2a19a60a473de48717b4efb19398c3f914795b64a96cf3fbe82588044f78/google_crc32c-1.7.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6efb97eb4369d52593ad6f75e7e10d053cf00c48983f7a973105bc70b0ac4d82", size = 28048, upload-time = "2025-03-26T14:41:46.696Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-genai"
|
||||
version = "1.43.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "google-auth" },
|
||||
{ name = "httpx" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "requests" },
|
||||
{ name = "tenacity" },
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/75/992ca4462682949750709678b8efbc865222c9a16cf34504b69c5459606c/google_genai-1.43.0.tar.gz", hash = "sha256:84eb219d320759c5882bc2cdb4e2ac84544d00f5d12c7892c79fb03d71bfc9a4", size = 236132, upload-time = "2025-10-10T23:16:40.131Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/61/85/e90dda488d5044e6e4cd1b49e7e7f0cc7f4a2a1c8004e88a5122d42ea024/google_genai-1.43.0-py3-none-any.whl", hash = "sha256:be1d4b1acab268125d536fd81b73c38694a70cb08266759089154718924434fd", size = 236733, upload-time = "2025-10-10T23:16:38.809Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "google-generativeai"
|
||||
version = "0.8.5"
|
||||
@ -2472,18 +2552,17 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "httpx"
|
||||
version = "0.27.2"
|
||||
version = "0.28.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "certifi" },
|
||||
{ name = "httpcore" },
|
||||
{ name = "idna" },
|
||||
{ name = "sniffio" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/82/08f8c936781f67d9e6b9eeb8a0c8b4e406136ea4c3d1f89a5db71d42e0e6/httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2", size = 144189, upload-time = "2024-08-27T12:54:01.334Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/56/95/9377bcb415797e44274b51d46e3249eba641711cf3348050f76ee7b15ffc/httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0", size = 76395, upload-time = "2024-08-27T12:53:59.653Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
@ -2663,6 +2742,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "importlib-resources"
|
||||
version = "6.5.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/8c/f834fbf984f691b4f7ff60f50b514cc3de5cc08abfc3295564dd89c5e2e7/importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c", size = 44693, upload-time = "2025-01-03T18:51:56.698Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "infinity-emb"
|
||||
version = "0.0.66"
|
||||
@ -3045,6 +3133,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "looseversion"
|
||||
version = "1.3.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/64/7e/f13dc08e0712cc2eac8e56c7909ce2ac280dbffef2ffd87bd5277ce9d58b/looseversion-1.3.0.tar.gz", hash = "sha256:ebde65f3f6bb9531a81016c6fef3eb95a61181adc47b7f949e9c0ea47911669e", size = 8799, upload-time = "2023-07-05T16:07:51.173Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4e/74/d5405b9b3b12e9176dff223576d7090bc161092878f533fd0dc23dd6ae1d/looseversion-1.3.0-py2.py3-none-any.whl", hash = "sha256:781ef477b45946fc03dd4c84ea87734b21137ecda0e1e122bcb3c8d16d2a56e0", size = 8237, upload-time = "2023-07-05T16:07:49.782Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lxml"
|
||||
version = "5.3.0"
|
||||
@ -3651,6 +3748,50 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406, upload-time = "2025-05-29T11:35:04.961Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nibabel"
|
||||
version = "5.3.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "importlib-resources", marker = "python_full_version < '3.12'" },
|
||||
{ name = "numpy" },
|
||||
{ name = "packaging" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d9/61/33036cb89f1ec1fedbc4039602345d830b27cbd8a5c7bf28c2e5b5de3ea2/nibabel-5.3.2.tar.gz", hash = "sha256:0bdca6503b1c784b446c745a4542367de7756cfba0d72143b91f9ffb78be569b", size = 4504842, upload-time = "2024-10-23T14:19:55.866Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/b2/dc384197be44e2a640bb43311850e23c2c30f3b82ce7c8cdabbf0e53045e/nibabel-5.3.2-py3-none-any.whl", hash = "sha256:52970a5a8a53b1b55249cba4d9bcfaa8cc57e3e5af35a29d7352237e8680a6f8", size = 3293839, upload-time = "2024-10-23T14:19:52.65Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nipype"
|
||||
version = "1.10.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "acres" },
|
||||
{ name = "click" },
|
||||
{ name = "etelemetry" },
|
||||
{ name = "filelock" },
|
||||
{ name = "looseversion" },
|
||||
{ name = "networkx", version = "3.4.2", source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }, marker = "python_full_version < '3.11'" },
|
||||
{ name = "networkx", version = "3.5", source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }, marker = "python_full_version >= '3.11'" },
|
||||
{ name = "nibabel" },
|
||||
{ name = "numpy" },
|
||||
{ name = "packaging" },
|
||||
{ name = "prov" },
|
||||
{ name = "puremagic" },
|
||||
{ name = "pydot" },
|
||||
{ name = "python-dateutil" },
|
||||
{ name = "rdflib" },
|
||||
{ name = "scipy" },
|
||||
{ name = "simplejson" },
|
||||
{ name = "traits" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e1/1a/7ff53f5802d37085a55d7c6df7c6ebebbc8a044930628ca21f7e661c1983/nipype-1.10.0.tar.gz", hash = "sha256:19e5d6cefa70997198f78bc665ef4d3d3cb53325b5b98a72e51aefadaf6b3e0e", size = 2919807, upload-time = "2025-03-19T23:30:07.473Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/53/c5ad0140e2e4c4d92ae45558587e26b2ebc62e39eafa30b74cb052d9375b/nipype-1.10.0-py3-none-any.whl", hash = "sha256:56ced3272e77952e330f13e28328a8fe2e8a69587ca89bc34234f7d06f8319bb", size = 3200685, upload-time = "2025-03-19T23:30:05.357Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nltk"
|
||||
version = "3.9.1"
|
||||
@ -3883,14 +4024,15 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "ollama"
|
||||
version = "0.2.1"
|
||||
version = "0.6.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "httpx" },
|
||||
{ name = "pydantic" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/aa/2b/bda3e59080b136e90367bebb67d5072922a912f0e0b6f49be1b4eb79c109/ollama-0.2.1.tar.gz", hash = "sha256:fa316baa9a81eac3beb4affb0a17deb3008fdd6ed05b123c26306cfbe4c349b6", size = 9918, upload-time = "2024-06-05T19:00:52.447Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/47/f9ee32467fe92744474a8c72e138113f3b529fc266eea76abfdec9a33f3b/ollama-0.6.0.tar.gz", hash = "sha256:da2b2d846b5944cfbcee1ca1e6ee0585f6c9d45a2fe9467cbcd096a37383da2f", size = 50811, upload-time = "2025-09-24T22:46:02.417Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7d/b7/8cc05807bfbc5b92da7fb94c525e1e56572a08eea7cdf3656e6c5dc6f9b1/ollama-0.2.1-py3-none-any.whl", hash = "sha256:b6e2414921c94f573a903d1069d682ba2fb2607070ea9e19ca4a7872f2a460ec", size = 9738, upload-time = "2024-06-05T19:00:47.437Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/c1/edc9f41b425ca40b26b7c104c5f6841a4537bb2552bfa6ca66e81405bb95/ollama-0.6.0-py3-none-any.whl", hash = "sha256:534511b3ccea2dff419ae06c3b58d7f217c55be7897c8ce5868dfb6b219cf7a0", size = 14130, upload-time = "2025-09-24T22:46:01.19Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4331,6 +4473,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/00/2f/804f58f0b856ab3bf21617cccf5b39206e6c4c94c2cd227bde125ea6105f/parameterized-0.9.0-py2.py3-none-any.whl", hash = "sha256:4e0758e3d41bea3bbd05ec14fc2c24736723f243b28d702081aef438c9372b1b", size = 20475, upload-time = "2023-03-27T02:01:09.31Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pathlib"
|
||||
version = "1.0.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ac/aa/9b065a76b9af472437a0059f77e8f962fe350438b927cb80184c32f075eb/pathlib-1.0.1.tar.gz", hash = "sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f", size = 49298, upload-time = "2014-09-03T15:41:57.18Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/f9/690a8600b93c332de3ab4a344a4ac34f00c8f104917061f779db6a918ed6/pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147", size = 14363, upload-time = "2022-05-04T13:37:20.585Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "patsy"
|
||||
version = "1.0.1"
|
||||
@ -4666,6 +4817,21 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3a/fa/4c3ac5527ed2e5f3577167ecd5f8180ffcdc8bdd59c9f143409c19706456/protobuf-5.27.2-py3-none-any.whl", hash = "sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470", size = 164772, upload-time = "2024-06-25T20:54:52.196Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prov"
|
||||
version = "2.1.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "networkx", version = "3.4.2", source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }, marker = "python_full_version < '3.11'" },
|
||||
{ name = "networkx", version = "3.5", source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }, marker = "python_full_version >= '3.11'" },
|
||||
{ name = "pydot" },
|
||||
{ name = "python-dateutil" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/bb/442f2e478061543c9c229f48c2d3a43cb0a77642584edecac126bc1ade99/prov-2.1.1.tar.gz", hash = "sha256:7d012b164f5bbb42e118ed9d25788ab012d09082b722bc9dd4e811a309ea57f5", size = 136802, upload-time = "2025-06-24T22:01:50.767Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/17/5703ad2380e57ecceb2700e30646ba0d856d9b90c9f33b01c68a3e298e3a/prov-2.1.1-py3-none-any.whl", hash = "sha256:04f74f9151b68f0bda68c943e111b1275207b19e197689043644a1b355a9d035", size = 425860, upload-time = "2025-06-24T22:01:49.485Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
version = "7.0.0"
|
||||
@ -4725,6 +4891,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7b/08/9c66c269b0d417a0af9fb969535f0371b8c538633535a7a6a5ca3f9231e2/psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab", size = 1163864, upload-time = "2023-10-28T09:37:28.155Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "puremagic"
|
||||
version = "1.30"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/7f/9998706bc516bdd664ccf929a1da6c6e5ee06e48f723ce45aae7cf3ff36e/puremagic-1.30.tar.gz", hash = "sha256:f9ff7ac157d54e9cf3bff1addfd97233548e75e685282d84ae11e7ffee1614c9", size = 314785, upload-time = "2025-07-04T18:48:36.061Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/ed/1e347d85d05b37a8b9a039ca832e5747e1e5248d0bd66042783ef48b4a37/puremagic-1.30-py3-none-any.whl", hash = "sha256:5eeeb2dd86f335b9cfe8e205346612197af3500c6872dffebf26929f56e9d3c1", size = 43304, upload-time = "2025-07-04T18:48:34.801Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "py"
|
||||
version = "1.11.0"
|
||||
@ -4981,6 +5156,18 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ca/8f/86d7931c62013a5a7ebf4e1642a87d4a6050c0f570e714f61b0df1984c62/pydivert-2.1.0-py2.py3-none-any.whl", hash = "sha256:382db488e3c37c03ec9ec94e061a0b24334d78dbaeebb7d4e4d32ce4355d9da1", size = 104718, upload-time = "2017-10-20T21:36:56.726Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pydot"
|
||||
version = "4.0.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "pyparsing" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/35/b17cb89ff865484c6a20ef46bf9d95a5f07328292578de0b295f4a6beec2/pydot-4.0.1.tar.gz", hash = "sha256:c2148f681c4a33e08bf0e26a9e5f8e4099a82e0e2a068098f32ce86577364ad5", size = 162594, upload-time = "2025-06-17T20:09:56.454Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/32/a7125fb28c4261a627f999d5fb4afff25b523800faed2c30979949d6facd/pydot-4.0.1-py3-none-any.whl", hash = "sha256:869c0efadd2708c0be1f916eb669f3d664ca684bc57ffb7ecc08e70d5e93fee6", size = 37087, upload-time = "2025-06-17T20:09:55.25Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyee"
|
||||
version = "13.0.0"
|
||||
@ -5373,6 +5560,20 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyxnat"
|
||||
version = "1.6.3"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "lxml" },
|
||||
{ name = "pathlib" },
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7f/24/c8737985e65d8adbbf51970b2a75cf54b5376d68d251159d9b7c5c9673b6/pyxnat-1.6.3.tar.gz", hash = "sha256:ddd074f35f7b35b5dccb6f713b20cf083c79d6e0d3d9cafbcaabb7c661b0cc68", size = 82466, upload-time = "2025-02-04T19:03:53.801Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1c/df/257c0f0af8e624daa924a3899f88e6465f162d72ada3fb0b96df9e61a2d6/pyxnat-1.6.3-py3-none-any.whl", hash = "sha256:a6d84dd24486eab9731a5de5df4fb486021b095665083c2fb1d33ac1e719d3c5", size = 95408, upload-time = "2025-02-04T19:03:51.707Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0.2"
|
||||
@ -5452,6 +5653,7 @@ dependencies = [
|
||||
{ name = "boto3" },
|
||||
{ name = "botocore" },
|
||||
{ name = "cachetools" },
|
||||
{ name = "captcha" },
|
||||
{ name = "chardet" },
|
||||
{ name = "click" },
|
||||
{ name = "cn2an" },
|
||||
@ -5476,6 +5678,7 @@ dependencies = [
|
||||
{ name = "flask-login" },
|
||||
{ name = "flask-mail" },
|
||||
{ name = "flask-session" },
|
||||
{ name = "google-genai" },
|
||||
{ name = "google-generativeai" },
|
||||
{ name = "google-search-results" },
|
||||
{ name = "graspologic" },
|
||||
@ -5608,6 +5811,7 @@ requires-dist = [
|
||||
{ name = "boto3", specifier = "==1.34.140" },
|
||||
{ name = "botocore", specifier = "==1.34.140" },
|
||||
{ name = "cachetools", specifier = "==5.3.3" },
|
||||
{ name = "captcha", specifier = ">=0.7.1" },
|
||||
{ name = "chardet", specifier = "==5.2.0" },
|
||||
{ name = "click", specifier = ">=8.1.8" },
|
||||
{ name = "cn2an", specifier = "==0.5.22" },
|
||||
@ -5635,13 +5839,14 @@ requires-dist = [
|
||||
{ name = "flask-login", specifier = "==0.6.3" },
|
||||
{ name = "flask-mail", specifier = ">=0.10.0" },
|
||||
{ name = "flask-session", specifier = "==0.8.0" },
|
||||
{ name = "google-genai", specifier = ">=1.41.0,<2.0.0" },
|
||||
{ name = "google-generativeai", specifier = ">=0.8.1,<0.9.0" },
|
||||
{ name = "google-search-results", specifier = "==2.4.2" },
|
||||
{ name = "graspologic", specifier = ">=3.4.1,<4.0.0" },
|
||||
{ name = "groq", specifier = "==0.9.0" },
|
||||
{ name = "hanziconv", specifier = "==0.3.2" },
|
||||
{ name = "html-text", specifier = "==0.6.2" },
|
||||
{ name = "httpx", extras = ["socks"], specifier = "==0.27.2" },
|
||||
{ name = "httpx", extras = ["socks"], specifier = ">=0.28.1,<0.29.0" },
|
||||
{ name = "huggingface-hub", specifier = ">=0.25.0,<0.26.0" },
|
||||
{ name = "infinity-emb", specifier = ">=0.0.66,<0.0.67" },
|
||||
{ name = "infinity-sdk", specifier = "==0.6.0" },
|
||||
@ -5660,7 +5865,7 @@ requires-dist = [
|
||||
{ name = "mistralai", specifier = "==0.4.2" },
|
||||
{ name = "nltk", specifier = "==3.9.1" },
|
||||
{ name = "numpy", specifier = ">=1.26.0,<2.0.0" },
|
||||
{ name = "ollama", specifier = "==0.2.1" },
|
||||
{ name = "ollama", specifier = ">=0.5.0" },
|
||||
{ name = "onnxruntime", marker = "platform_machine != 'x86_64' or sys_platform == 'darwin'", specifier = "==1.19.2" },
|
||||
{ name = "onnxruntime-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'", specifier = "==1.19.2" },
|
||||
{ name = "openai", specifier = ">=1.45.0" },
|
||||
@ -5716,7 +5921,7 @@ requires-dist = [
|
||||
{ name = "trio", specifier = ">=0.29.0" },
|
||||
{ name = "umap-learn", specifier = "==0.5.6" },
|
||||
{ name = "valkey", specifier = "==6.0.2" },
|
||||
{ name = "vertexai", specifier = "==1.64.0" },
|
||||
{ name = "vertexai", specifier = "==1.70.0" },
|
||||
{ name = "volcengine", specifier = "==1.0.194" },
|
||||
{ name = "voyageai", specifier = "==0.2.3" },
|
||||
{ name = "webdriver-manager", specifier = "==4.0.1" },
|
||||
@ -5780,6 +5985,19 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1e/30/53f41b7b728a48da8974075f56c57200d7b11e4e9fa93be3cabf8218dc0c/ranx-0.3.20-py3-none-any.whl", hash = "sha256:e056e4d5981b0328b045868cc7064fc57a545f36009fbe9bb602295ec33335de", size = 99318, upload-time = "2024-07-01T17:40:27.095Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rdflib"
|
||||
version = "7.2.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "isodate", marker = "python_full_version < '3.11'" },
|
||||
{ name = "pyparsing" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/99/d2fec85e5f6bdfe4367dea143119cb4469bf48710487939df0abf7e22003/rdflib-7.2.1.tar.gz", hash = "sha256:cf9b7fa25234e8925da8b1fb09700f8349b5f0f100e785fb4260e737308292ac", size = 4873802, upload-time = "2025-09-19T02:33:36.492Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/31/98/7fa830bb4b9da21905683a5352aa0a01a1f3082328ae976aad341e980c23/rdflib-7.2.1-py3-none-any.whl", hash = "sha256:1a175bc1386a167a42fbfaba003bfa05c164a2a3ca3cb9c0c97f9c9638ca6ac2", size = 565423, upload-time = "2025-09-19T02:33:30.889Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "readability-lxml"
|
||||
version = "0.8.1"
|
||||
@ -6377,6 +6595,54 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "simplejson"
|
||||
version = "3.20.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/f4/a1ac5ed32f7ed9a088d62a59d410d4c204b3b3815722e2ccfb491fa8251b/simplejson-3.20.2.tar.gz", hash = "sha256:5fe7a6ce14d1c300d80d08695b7f7e633de6cd72c80644021874d985b3393649", size = 85784, upload-time = "2025-09-26T16:29:36.64Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/78/09/2bf3761de89ea2d91bdce6cf107dcd858892d0adc22c995684878826cc6b/simplejson-3.20.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6d7286dc11af60a2f76eafb0c2acde2d997e87890e37e24590bb513bec9f1bc5", size = 94039, upload-time = "2025-09-26T16:27:29.283Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0f/33/c3277db8931f0ae9e54b9292668863365672d90fb0f632f4cf9829cb7d68/simplejson-3.20.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c01379b4861c3b0aa40cba8d44f2b448f5743999aa68aaa5d3ef7049d4a28a2d", size = 75894, upload-time = "2025-09-26T16:27:30.378Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/ea/ae47b04d03c7c8a7b7b1a8b39a6e27c3bd424e52f4988d70aca6293ff5e5/simplejson-3.20.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16b029ca25645b3bc44e84a4f941efa51bf93c180b31bd704ce6349d1fc77c1", size = 76116, upload-time = "2025-09-26T16:27:31.42Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4b/42/6c9af551e5a8d0f171d6dce3d9d1260068927f7b80f1f09834e07887c8c4/simplejson-3.20.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e22a5fb7b1437ffb057e02e1936a3bfb19084ae9d221ec5e9f4cf85f69946b6", size = 138827, upload-time = "2025-09-26T16:27:32.486Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2b/22/5e268bbcbe9f75577491e406ec0a5536f5b2fa91a3b52031fea51cd83e1d/simplejson-3.20.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b6ff02fc7b8555c906c24735908854819b0d0dc85883d453e23ca4c0445d01", size = 146772, upload-time = "2025-09-26T16:27:34.036Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/b4/800f14728e2ad666f420dfdb57697ca128aeae7f991b35759c09356b829a/simplejson-3.20.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2bfc1c396ad972ba4431130b42307b2321dba14d988580c1ac421ec6a6b7cee3", size = 134497, upload-time = "2025-09-26T16:27:35.211Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/b9/c54eef4226c6ac8e9a389bbe5b21fef116768f97a2dc1a683c716ffe66ef/simplejson-3.20.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a97249ee1aee005d891b5a211faf58092a309f3d9d440bc269043b08f662eda", size = 138172, upload-time = "2025-09-26T16:27:36.44Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/09/36/4e282f5211b34620f1b2e4b51d9ddaab5af82219b9b7b78360a33f7e5387/simplejson-3.20.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f1036be00b5edaddbddbb89c0f80ed229714a941cfd21e51386dc69c237201c2", size = 140272, upload-time = "2025-09-26T16:27:37.605Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/aa/b0/94ad2cf32f477c449e1f63c863d8a513e2408d651c4e58fe4b6a7434e168/simplejson-3.20.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5d6f5bacb8cdee64946b45f2680afa3f54cd38e62471ceda89f777693aeca4e4", size = 140468, upload-time = "2025-09-26T16:27:39.015Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/46/827731e4163be3f987cb8ee90f5d444161db8f540b5e735355faa098d9bc/simplejson-3.20.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8db6841fb796ec5af632f677abf21c6425a1ebea0d9ac3ef1a340b8dc69f52b8", size = 148700, upload-time = "2025-09-26T16:27:40.171Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/28/c32121064b1ec2fb7b5d872d9a1abda62df064d35e0160eddfa907118343/simplejson-3.20.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c0a341f7cc2aae82ee2b31f8a827fd2e51d09626f8b3accc441a6907c88aedb7", size = 141323, upload-time = "2025-09-26T16:27:41.324Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/b6/c897c54326fe86dd12d101981171a49361949f4728294f418c3b86a1af77/simplejson-3.20.2-cp310-cp310-win32.whl", hash = "sha256:27f9c01a6bc581d32ab026f515226864576da05ef322d7fc141cd8a15a95ce53", size = 74377, upload-time = "2025-09-26T16:27:42.533Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/87/a6e03d4d80cca99c1fee4e960f3440e2f21be9470e537970f960ca5547f1/simplejson-3.20.2-cp310-cp310-win_amd64.whl", hash = "sha256:c0a63ec98a4547ff366871bf832a7367ee43d047bcec0b07b66c794e2137b476", size = 76081, upload-time = "2025-09-26T16:27:43.945Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b9/3e/96898c6c66d9dca3f9bd14d7487bf783b4acc77471b42f979babbb68d4ca/simplejson-3.20.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:06190b33cd7849efc413a5738d3da00b90e4a5382fd3d584c841ac20fb828c6f", size = 92633, upload-time = "2025-09-26T16:27:45.028Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/6b/a2/cd2e10b880368305d89dd540685b8bdcc136df2b3c76b5ddd72596254539/simplejson-3.20.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4ad4eac7d858947a30d2c404e61f16b84d16be79eb6fb316341885bdde864fa8", size = 75309, upload-time = "2025-09-26T16:27:46.142Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5d/02/290f7282eaa6ebe945d35c47e6534348af97472446951dce0d144e013f4c/simplejson-3.20.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b392e11c6165d4a0fde41754a0e13e1d88a5ad782b245a973dd4b2bdb4e5076a", size = 75308, upload-time = "2025-09-26T16:27:47.542Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/91/43695f17b69e70c4b0b03247aa47fb3989d338a70c4b726bbdc2da184160/simplejson-3.20.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51eccc4e353eed3c50e0ea2326173acdc05e58f0c110405920b989d481287e51", size = 143733, upload-time = "2025-09-26T16:27:48.673Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9b/4b/fdcaf444ac1c3cbf1c52bf00320c499e1cf05d373a58a3731ae627ba5e2d/simplejson-3.20.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:306e83d7c331ad833d2d43c76a67f476c4b80c4a13334f6e34bb110e6105b3bd", size = 153397, upload-time = "2025-09-26T16:27:49.89Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c4/83/21550f81a50cd03599f048a2d588ffb7f4c4d8064ae091511e8e5848eeaa/simplejson-3.20.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f820a6ac2ef0bc338ae4963f4f82ccebdb0824fe9caf6d660670c578abe01013", size = 141654, upload-time = "2025-09-26T16:27:51.168Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/cf/54/d76c0e72ad02450a3e723b65b04f49001d0e73218ef6a220b158a64639cb/simplejson-3.20.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21e7a066528a5451433eb3418184f05682ea0493d14e9aae690499b7e1eb6b81", size = 144913, upload-time = "2025-09-26T16:27:52.331Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/49/976f59b42a6956d4aeb075ada16ad64448a985704bc69cd427a2245ce835/simplejson-3.20.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:438680ddde57ea87161a4824e8de04387b328ad51cfdf1eaf723623a3014b7aa", size = 144568, upload-time = "2025-09-26T16:27:53.41Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/60/c7/30bae30424ace8cd791ca660fed454ed9479233810fe25c3f3eab3d9dc7b/simplejson-3.20.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:cac78470ae68b8d8c41b6fca97f5bf8e024ca80d5878c7724e024540f5cdaadb", size = 146239, upload-time = "2025-09-26T16:27:54.502Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/79/3e/7f3b7b97351c53746e7b996fcd106986cda1954ab556fd665314756618d2/simplejson-3.20.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7524e19c2da5ef281860a3d74668050c6986be15c9dd99966034ba47c68828c2", size = 154497, upload-time = "2025-09-26T16:27:55.885Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/1d/48/7241daa91d0bf19126589f6a8dcbe8287f4ed3d734e76fd4a092708947be/simplejson-3.20.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e9b6d845a603b2eef3394eb5e21edb8626cd9ae9a8361d14e267eb969dbe413", size = 148069, upload-time = "2025-09-26T16:27:57.039Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/f4/ef18d2962fe53e7be5123d3784e623859eec7ed97060c9c8536c69d34836/simplejson-3.20.2-cp311-cp311-win32.whl", hash = "sha256:47d8927e5ac927fdd34c99cc617938abb3624b06ff86e8e219740a86507eb961", size = 74158, upload-time = "2025-09-26T16:27:58.265Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/fd/3d1158ecdc573fdad81bf3cc78df04522bf3959758bba6597ba4c956c74d/simplejson-3.20.2-cp311-cp311-win_amd64.whl", hash = "sha256:ba4edf3be8e97e4713d06c3d302cba1ff5c49d16e9d24c209884ac1b8455520c", size = 75911, upload-time = "2025-09-26T16:27:59.292Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9d/9e/1a91e7614db0416885eab4136d49b7303de20528860ffdd798ce04d054db/simplejson-3.20.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:4376d5acae0d1e91e78baeba4ee3cf22fbf6509d81539d01b94e0951d28ec2b6", size = 93523, upload-time = "2025-09-26T16:28:00.356Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5e/2b/d2413f5218fc25608739e3d63fe321dfa85c5f097aa6648dbe72513a5f12/simplejson-3.20.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f8fe6de652fcddae6dec8f281cc1e77e4e8f3575249e1800090aab48f73b4259", size = 75844, upload-time = "2025-09-26T16:28:01.756Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ad/f1/efd09efcc1e26629e120fef59be059ce7841cc6e1f949a4db94f1ae8a918/simplejson-3.20.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25ca2663d99328d51e5a138f22018e54c9162438d831e26cfc3458688616eca8", size = 75655, upload-time = "2025-09-26T16:28:03.037Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/97/ec/5c6db08e42f380f005d03944be1af1a6bd501cc641175429a1cbe7fb23b9/simplejson-3.20.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12a6b2816b6cab6c3fd273d43b1948bc9acf708272074c8858f579c394f4cbc9", size = 150335, upload-time = "2025-09-26T16:28:05.027Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/f5/808a907485876a9242ec67054da7cbebefe0ee1522ef1c0be3bfc90f96f6/simplejson-3.20.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac20dc3fcdfc7b8415bfc3d7d51beccd8695c3f4acb7f74e3a3b538e76672868", size = 158519, upload-time = "2025-09-26T16:28:06.5Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/66/af/b8a158246834645ea890c36136584b0cc1c0e4b83a73b11ebd9c2a12877c/simplejson-3.20.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db0804d04564e70862ef807f3e1ace2cc212ef0e22deb1b3d6f80c45e5882c6b", size = 148571, upload-time = "2025-09-26T16:28:07.715Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/20/05/ed9b2571bbf38f1a2425391f18e3ac11cb1e91482c22d644a1640dea9da7/simplejson-3.20.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:979ce23ea663895ae39106946ef3d78527822d918a136dbc77b9e2b7f006237e", size = 152367, upload-time = "2025-09-26T16:28:08.921Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/81/2c/bad68b05dd43e93f77994b920505634d31ed239418eb6a88997d06599983/simplejson-3.20.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a2ba921b047bb029805726800819675249ef25d2f65fd0edb90639c5b1c3033c", size = 150205, upload-time = "2025-09-26T16:28:10.086Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/69/46/90c7fc878061adafcf298ce60cecdee17a027486e9dce507e87396d68255/simplejson-3.20.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:12d3d4dc33770069b780cc8f5abef909fe4a3f071f18f55f6d896a370fd0f970", size = 151823, upload-time = "2025-09-26T16:28:11.329Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ab/27/b85b03349f825ae0f5d4f780cdde0bbccd4f06c3d8433f6a3882df887481/simplejson-3.20.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:aff032a59a201b3683a34be1169e71ddda683d9c3b43b261599c12055349251e", size = 158997, upload-time = "2025-09-26T16:28:12.917Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/71/ad/d7f3c331fb930638420ac6d236db68e9f4c28dab9c03164c3cd0e7967e15/simplejson-3.20.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:30e590e133b06773f0dc9c3f82e567463df40598b660b5adf53eb1c488202544", size = 154367, upload-time = "2025-09-26T16:28:14.393Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/46/5c67324addd40fa2966f6e886cacbbe0407c03a500db94fb8bb40333fcdf/simplejson-3.20.2-cp312-cp312-win32.whl", hash = "sha256:8d7be7c99939cc58e7c5bcf6bb52a842a58e6c65e1e9cdd2a94b697b24cddb54", size = 74285, upload-time = "2025-09-26T16:28:15.931Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fa/c9/5cc2189f4acd3a6e30ffa9775bf09b354302dbebab713ca914d7134d0f29/simplejson-3.20.2-cp312-cp312-win_amd64.whl", hash = "sha256:2c0b4a67e75b945489052af6590e7dca0ed473ead5d0f3aad61fa584afe814ab", size = 75969, upload-time = "2025-09-26T16:28:17.017Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/5b/83e1ff87eb60ca706972f7e02e15c0b33396e7bdbd080069a5d1b53cf0d8/simplejson-3.20.2-py3-none-any.whl", hash = "sha256:3b6bb7fb96efd673eac2e4235200bfffdc2353ad12c54117e1e4e2fc485ac017", size = 57309, upload-time = "2025-09-26T16:29:35.312Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "six"
|
||||
version = "1.16.0"
|
||||
@ -6982,6 +7248,38 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "traits"
|
||||
version = "7.0.2"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/ba/33e199bfae748e802f68a857035fb003089c176897bf43e2cf38ff167740/traits-7.0.2.tar.gz", hash = "sha256:a563515809cb3911975de5a54209855f0b6fdb7ca6912a5e81de26529f70428c", size = 9534785, upload-time = "2025-01-24T20:52:59.954Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/06/5c/6aa6aef1472a79accd4c077cc8eccf3c3a2acc4b42ece2c48f5651f2f915/traits-7.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb59a033260dfa3aacfe484307a91f318a1fa801f5e8c8293fe22834fa4b30a7", size = 5034452, upload-time = "2025-01-24T20:55:25.02Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/0a/8387ff6f32898c334b2a96b465a8790633cec3c2270893210946d43de0d3/traits-7.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f5c18d5f4aea2988b15bc10e2ac9f4eb49531d1ec380857f3046a7ba14509e4b", size = 5034825, upload-time = "2025-01-24T20:56:04.238Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8f/15/a04a5e1cd0c2e2979365e1ac3a674ec0f16a5af36d19809c869985e63f7a/traits-7.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11950d519b113e9a34d5a99fca112866d8c36aa8fce85edadf52995ad03de07e", size = 5110401, upload-time = "2025-01-24T20:57:19.172Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/b3/da/58d58c3495b2bfee03975d95799d5a8ac771a2f510d579935122c02d26dc/traits-7.0.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d50b42061cb8f34119b6b7abe703982c6fa157a2fe4e10a5b9ab9f93c340d5e3", size = 5121856, upload-time = "2025-01-24T20:57:20.949Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/74/66ed1b2511c0a457f716f6c718abf807db58c76292cbd69ecf4390519fea/traits-7.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:53fbd8a0adf42d235e6a73bd3fbb3f7190a28302d151c9a25967ff6f12b918cd", size = 5109296, upload-time = "2025-01-24T20:57:23.835Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/9e/30/60efe8a3fe454fd7b939695d556cdee7943b1ced19fc40f9b4f2a240211c/traits-7.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0b48be9fb0b9e5a733e9fa5a542b0751237822e20b52fac80b5796cc606af509", size = 5117788, upload-time = "2025-01-24T20:57:27.096Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/62/ef/e884bd2c05d52415acb0344ed3847f1c3835d1651a4189a17e06fa2363fa/traits-7.0.2-cp310-cp310-win32.whl", hash = "sha256:5b98600b9f40e980e0cc5b1f0ade5fb1c1f1c19d25afc2b33ea30773015eb3e5", size = 5033760, upload-time = "2025-01-24T21:01:04.683Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/d2/71/a630ee815843e3d87484c9a0368f81eb993e862aa4cb9c20822deee7e9a3/traits-7.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:def3ab01e7d636aceda9dc6ca2abf71f2a992f9ec993c7ea200157c1ca983ae7", size = 5036225, upload-time = "2025-01-24T21:01:07.817Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/db/da628e34564a89f68d6b3ff5caee8a0a932858a4a3e1bf0d077d9f6d053c/traits-7.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:33fd20c3bc29fbb1f51ddb23f63173bf59a2fdafd300e5f4790352d76e4cf68e", size = 5034488, upload-time = "2025-01-24T20:55:26.853Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e9/4e/d64ad9fb725ff1b943432c5df32c64abb28ad17f66e976d6ce6aaa1b54d5/traits-7.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:018d4f7cbd5e18cb34bafc915134c29aa8568bccd35d9aa9102e2af9ef66cb80", size = 5034832, upload-time = "2025-01-24T20:56:06.125Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/3f/80/f32ade6b131c69d2a3451edfa5c9f23056c3c9889b1d7918890ff6dad273/traits-7.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa323b634abd9c7f049892d86296bc1c46bad6ad80121fefeaf12039002d58ff", size = 5119215, upload-time = "2025-01-24T20:57:31.594Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/be/d6/0c7c2c12a53698906e86a0076d13ee3d529a5c0a44468e89cb8a91186f22/traits-7.0.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:209bfb95c094cd315f77fc610ae65af86ec0de075be2d84e6e6290ff2f860715", size = 5130753, upload-time = "2025-01-24T20:57:34.737Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/8b/09/070aef46f818eaab7afdada8647b303facb14d4d5f931c1fb560cfc24e1b/traits-7.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4f38eee0b94f9fbab2f086200e35f835ad1563ba7e078a044cb268ce50542565", size = 5117762, upload-time = "2025-01-24T20:57:36.764Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/85/99/fb239d5fe1ac2931c284496995998abc72f6af0ca32cfdb70095b883fab9/traits-7.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:135dc11da393f5dec1ecaf6981f0608976354435f7be53b9e9175a9c8a118127", size = 5126325, upload-time = "2025-01-24T20:57:38.638Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/48/6c1484be7d5b322c57415c9b6d39c7419ad4ee1eb52b288ddfa3893caf31/traits-7.0.2-cp311-cp311-win32.whl", hash = "sha256:c588571d981d1254d9abf8bd2f8e449f82f31ebe8f951853290910ae2f03dc84", size = 5033773, upload-time = "2025-01-24T21:01:09.598Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/73/f4/d8cb863aaacfe1633d2b636647bcc70b1cd2e258e4a83e71eae995a34ed4/traits-7.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:98a880b6adab40d66ce0eda1c6f4fdcf178bb182d28d0fb71d3755c36065dd39", size = 5036235, upload-time = "2025-01-24T21:01:12.296Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/6c/9b3be8e459627267de56029a0c91e9a9c9a082353cd5b9ec1edd2f4738a5/traits-7.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bccfafbda22346f0278f010458e819f0a58a95f242f91e14014b055580a15cd8", size = 5035260, upload-time = "2025-01-24T20:55:28.536Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/35/0c/990486e972614dd0173ea647b80c30c30d3ad4819befa9ec94f4a8a421b6/traits-7.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9899ee203fd379fb0e07aebc178940d62d5790dc311263d5c3a577f3baf7dfa", size = 5035240, upload-time = "2025-01-24T20:56:08.856Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/7c/458041d4b345ddd351451303353acbc72a36cbc47649eedb29863a37f119/traits-7.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2938cccfea2da2fdce6cc7ec1e605c923e66610df1b223cf24a4b23ba97375de", size = 5121555, upload-time = "2025-01-24T20:57:41.688Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/77/f3/7736bf1bee46c6fd1c488e180236067c91490cf2aea235ed851bcf2151e2/traits-7.0.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f696c4d4d03b333e8f8beec206d80d4998ce6b4801deb74c258dbc4415f92345", size = 5135379, upload-time = "2025-01-24T20:57:45.797Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f0/07/e80f6663d460f80f09b443175cb8118b74ca3b7bd164f1ec5c44e1da2047/traits-7.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c49384b12ecaf39b9ab156e1c7d31960206e15071a9917596ab3c265d7bb99aa", size = 5120513, upload-time = "2025-01-24T20:57:49.354Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f2/8b/0716f7b8f34e1b57b39f81472460f4e02491dde02fbc114bac42cf0acd85/traits-7.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6932e5a784000368aa3948890bf55c4aba10494d4a45e9bb6c2b228644f2e67c", size = 5130509, upload-time = "2025-01-24T20:57:51.933Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/bf/e0135ce54d5604c57caad8866ac56a05265943a1b3a438277fb6ee10b0f6/traits-7.0.2-cp312-cp312-win32.whl", hash = "sha256:f434da460be8b3eb9f9f35143af116622cd313fa346c0df37b026d318c88ad29", size = 5034118, upload-time = "2025-01-24T21:01:14.04Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a7/2b/49423d5b269dfc095e09ecbb41b987b224f4154716d91da063cebaf963a0/traits-7.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:497463a437cb8cd4bb2ed27ae4e4491a8ed3d4d8515803476c94ce952a17af54", size = 5036464, upload-time = "2025-01-24T21:01:16.256Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "transformers"
|
||||
version = "4.36.2"
|
||||
@ -7217,14 +7515,14 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "vertexai"
|
||||
version = "1.64.0"
|
||||
version = "1.70.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "google-cloud-aiplatform" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0a/36/2dcb9e212bc1ccaff83c897702e74d01cac65c2a664818e9cb5577a8418e/vertexai-1.64.0.tar.gz", hash = "sha256:d8bb42b64fe294180104e9210819dce694b50b27daf64b8b7725878eac65986c", size = 9289, upload-time = "2024-08-28T01:03:34.903Z" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/01/17/04958e273962f420cb89573c6423f231e34a684769ef49c6fed2b12cd7b1/vertexai-1.70.0.tar.gz", hash = "sha256:3af16f63c462dfc77600773fba366a99575b9fe4303fc080bd1cf823066c66fa", size = 9294, upload-time = "2024-10-09T04:28:23.814Z" }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/f7/98/ce77d9111ffd3cd49154c44a9863b8507a0eb141058fb3fb6c04a65104c7/vertexai-1.64.0-py3-none-any.whl", hash = "sha256:967c17c09e28bc7d34ff6b2ef51a1953ded4750809bf174dd8b6c9c15017180e", size = 7274, upload-time = "2024-08-28T01:03:33.324Z" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/4a/e4/ec11c62ba6e17457b68e089b740075c23b894e801545979c0f9d01208a81/vertexai-1.70.0-py3-none-any.whl", hash = "sha256:9e0c85013efa5cad41e37e23e9fcca7e959b409288ca22832a1b7b9ae6abc393", size = 7268, upload-time = "2024-10-09T04:28:21.864Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@ -27,6 +27,21 @@ const config: StorybookConfig = {
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
test: /\.less$/,
|
||||
use: [
|
||||
'style-loader',
|
||||
'css-loader',
|
||||
{
|
||||
loader: 'postcss-loader',
|
||||
options: {
|
||||
postcssOptions: {
|
||||
plugins: [require('tailwindcss'), require('autoprefixer')],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
143
web/package-lock.json
generated
143
web/package-lock.json
generated
@ -49,7 +49,7 @@
|
||||
"@xyflow/react": "^12.3.6",
|
||||
"ahooks": "^3.7.10",
|
||||
"antd": "^5.12.7",
|
||||
"axios": "^1.6.3",
|
||||
"axios": "^1.12.0",
|
||||
"class-variance-authority": "^0.7.0",
|
||||
"classnames": "^2.5.1",
|
||||
"clsx": "^2.1.1",
|
||||
@ -13551,12 +13551,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.7.2",
|
||||
"resolved": "https://registry.npmmirror.com/axios/-/axios-1.7.2.tgz",
|
||||
"integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==",
|
||||
"version": "1.12.0",
|
||||
"resolved": "https://registry.npmmirror.com/axios/-/axios-1.12.0.tgz",
|
||||
"integrity": "sha512-oXTDccv8PcfjZmPGlWsPSwtOJCZ/b6W5jAMCNcfwJbCzDckwG0jrYJFaWH1yvivfCXjVzV/SPDEhMB3Q+DSurg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.6",
|
||||
"form-data": "^4.0.0",
|
||||
"form-data": "^4.0.4",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
@ -14181,6 +14182,19 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/call-bind-apply-helpers": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmmirror.com/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
|
||||
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0",
|
||||
"function-bind": "^1.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/call-me-maybe": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmmirror.com/call-me-maybe/-/call-me-maybe-1.0.2.tgz",
|
||||
@ -16795,6 +16809,20 @@
|
||||
"underscore": "^1.13.1"
|
||||
}
|
||||
},
|
||||
"node_modules/dunder-proto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
||||
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"call-bind-apply-helpers": "^1.0.1",
|
||||
"es-errors": "^1.3.0",
|
||||
"gopd": "^1.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/duplexer": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmmirror.com/duplexer/-/duplexer-0.1.2.tgz",
|
||||
@ -17364,12 +17392,10 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/es-define-property": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/es-define-property/-/es-define-property-1.0.0.tgz",
|
||||
"integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
|
||||
"dependencies": {
|
||||
"get-intrinsic": "^1.2.4"
|
||||
},
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/es-define-property/-/es-define-property-1.0.1.tgz",
|
||||
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
@ -17426,9 +17452,10 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/es-object-atoms": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/es-object-atoms/-/es-object-atoms-1.0.0.tgz",
|
||||
"integrity": "sha512-MZ4iQ6JwHOBQjahnjwaC1ZtIBH+2ohjamzAO3oaHcXYup7qxjF2fixyH+Q71voWHeOkI2q/TnJao/KfXYIZWbw==",
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmmirror.com/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
|
||||
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0"
|
||||
},
|
||||
@ -17437,13 +17464,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/es-set-tostringtag": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/es-set-tostringtag/-/es-set-tostringtag-2.0.3.tgz",
|
||||
"integrity": "sha512-3T8uNMC3OQTHkFUsFq8r/BwAXLHvU/9O9mE0fBc/MY5iq/8H7ncvO947LmYA6ldWw9Uh8Yhf25zu6n7nML5QWQ==",
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
|
||||
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"get-intrinsic": "^1.2.4",
|
||||
"es-errors": "^1.3.0",
|
||||
"get-intrinsic": "^1.2.6",
|
||||
"has-tostringtag": "^1.0.2",
|
||||
"hasown": "^2.0.1"
|
||||
"hasown": "^2.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
@ -19233,12 +19262,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/form-data/-/form-data-4.0.0.tgz",
|
||||
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmmirror.com/form-data/-/form-data-4.0.4.tgz",
|
||||
"integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"es-set-tostringtag": "^2.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
@ -19398,18 +19430,27 @@
|
||||
}
|
||||
},
|
||||
"node_modules/get-intrinsic": {
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmmirror.com/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
|
||||
"integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://registry.npmmirror.com/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
|
||||
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"call-bind-apply-helpers": "^1.0.2",
|
||||
"es-define-property": "^1.0.1",
|
||||
"es-errors": "^1.3.0",
|
||||
"es-object-atoms": "^1.1.1",
|
||||
"function-bind": "^1.1.2",
|
||||
"has-proto": "^1.0.1",
|
||||
"has-symbols": "^1.0.3",
|
||||
"hasown": "^2.0.0"
|
||||
"get-proto": "^1.0.1",
|
||||
"gopd": "^1.2.0",
|
||||
"has-symbols": "^1.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"math-intrinsics": "^1.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/get-nonce": {
|
||||
@ -19428,6 +19469,19 @@
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-proto": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/get-proto/-/get-proto-1.0.1.tgz",
|
||||
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"dunder-proto": "^1.0.1",
|
||||
"es-object-atoms": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/get-stdin": {
|
||||
"version": "9.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/get-stdin/-/get-stdin-9.0.0.tgz",
|
||||
@ -19609,11 +19663,15 @@
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/gopd": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/gopd/-/gopd-1.0.1.tgz",
|
||||
"integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
|
||||
"dependencies": {
|
||||
"get-intrinsic": "^1.1.3"
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmmirror.com/gopd/-/gopd-1.2.0.tgz",
|
||||
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/graceful-fs": {
|
||||
@ -19746,11 +19804,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/has-symbols": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/has-symbols/-/has-symbols-1.0.3.tgz",
|
||||
"integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/has-symbols/-/has-symbols-1.1.0.tgz",
|
||||
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/has-tostringtag": {
|
||||
@ -25234,6 +25296,15 @@
|
||||
"resolved": "https://registry.npmmirror.com/markdown-table/-/markdown-table-3.0.3.tgz",
|
||||
"integrity": "sha512-Z1NL3Tb1M9wH4XESsCDEksWoKTdlUafKc4pt0GRwjUyXaCFZ+dc3g2erqB6zm3szA2IUSi7VnPI+o/9jnxh9hw=="
|
||||
},
|
||||
"node_modules/math-intrinsics": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
|
||||
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/mathml-tag-names": {
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmmirror.com/mathml-tag-names/-/mathml-tag-names-2.1.3.tgz",
|
||||
|
||||
@ -62,7 +62,7 @@
|
||||
"@xyflow/react": "^12.3.6",
|
||||
"ahooks": "^3.7.10",
|
||||
"antd": "^5.12.7",
|
||||
"axios": "^1.6.3",
|
||||
"axios": "^1.12.0",
|
||||
"class-variance-authority": "^0.7.0",
|
||||
"classnames": "^2.5.1",
|
||||
"clsx": "^2.1.1",
|
||||
|
||||
@ -340,7 +340,9 @@ export function ChunkMethodDialog({
|
||||
show={showAutoKeywords(selectedTag) || showExcelToHtml}
|
||||
className="space-y-3"
|
||||
>
|
||||
<EnableTocToggle />
|
||||
{selectedTag === DocumentParserType.Naive && (
|
||||
<EnableTocToggle />
|
||||
)}
|
||||
{showAutoKeywords(selectedTag) && (
|
||||
<>
|
||||
<AutoKeywordsFormField></AutoKeywordsFormField>
|
||||
|
||||
@ -5,6 +5,8 @@ import {
|
||||
} from '@/components/ui/collapsible';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { CollapsibleProps } from '@radix-ui/react-collapsible';
|
||||
import { ChevronDown, ChevronUp } from 'lucide-react';
|
||||
import * as React from 'react';
|
||||
import {
|
||||
PropsWithChildren,
|
||||
ReactNode,
|
||||
@ -67,3 +69,53 @@ export function Collapse({
|
||||
</Collapsible>
|
||||
);
|
||||
}
|
||||
|
||||
export type NodeCollapsibleProps<T extends any[]> = {
|
||||
items?: T;
|
||||
children: (item: T[0], idx: number) => ReactNode;
|
||||
className?: string;
|
||||
};
|
||||
export function NodeCollapsible<T extends any[]>({
|
||||
items = [] as unknown as T,
|
||||
children,
|
||||
className,
|
||||
}: NodeCollapsibleProps<T>) {
|
||||
const [isOpen, setIsOpen] = React.useState(false);
|
||||
|
||||
const nextClassName = cn('space-y-2', className);
|
||||
|
||||
const nextItems = items.every((x) => Array.isArray(x)) ? items.flat() : items;
|
||||
|
||||
return (
|
||||
<Collapsible
|
||||
open={isOpen}
|
||||
onOpenChange={setIsOpen}
|
||||
className={cn('relative', nextClassName)}
|
||||
>
|
||||
{nextItems.slice(0, 3).map(children)}
|
||||
<CollapsibleContent className={nextClassName}>
|
||||
{nextItems.slice(3).map(children)}
|
||||
</CollapsibleContent>
|
||||
{nextItems.length > 3 && (
|
||||
<CollapsibleTrigger
|
||||
asChild
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
className="absolute left-1/2 -translate-x-1/2 bottom-0 translate-y-1/2 cursor-pointer"
|
||||
>
|
||||
<div
|
||||
className={cn(
|
||||
'size-3 bg-text-secondary rounded-full flex items-center justify-center',
|
||||
{ 'bg-text-primary': isOpen },
|
||||
)}
|
||||
>
|
||||
{isOpen ? (
|
||||
<ChevronUp className="stroke-bg-component" />
|
||||
) : (
|
||||
<ChevronDown className="stroke-bg-component" />
|
||||
)}
|
||||
</div>
|
||||
</CollapsibleTrigger>
|
||||
)}
|
||||
</Collapsible>
|
||||
);
|
||||
}
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import { PlusOutlined } from '@ant-design/icons';
|
||||
import { TweenOneGroup } from 'rc-tween-one';
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
|
||||
import { X } from 'lucide-react';
|
||||
@ -16,7 +15,7 @@ interface EditTagsProps {
|
||||
}
|
||||
|
||||
const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
({ value = [], onChange }: EditTagsProps, ref) => {
|
||||
({ value = [], onChange }: EditTagsProps) => {
|
||||
const [inputVisible, setInputVisible] = useState(false);
|
||||
const [inputValue, setInputValue] = useState('');
|
||||
const inputRef = useRef<HTMLInputElement>(null);
|
||||
@ -57,7 +56,7 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
<HoverCard key={tag}>
|
||||
<HoverCardContent side="top">{tag}</HoverCardContent>
|
||||
<HoverCardTrigger asChild>
|
||||
<div className="w-fit flex items-center justify-center gap-2 border-dashed border px-1 rounded-sm bg-bg-card">
|
||||
<div className="w-fit flex items-center justify-center gap-2 border-dashed border px-2 py-1 rounded-sm bg-bg-card">
|
||||
<div className="flex gap-2 items-center">
|
||||
<div className="max-w-80 overflow-hidden text-ellipsis">
|
||||
{tag}
|
||||
@ -84,11 +83,11 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
|
||||
return (
|
||||
<div>
|
||||
{inputVisible ? (
|
||||
{inputVisible && (
|
||||
<Input
|
||||
ref={inputRef}
|
||||
type="text"
|
||||
className="h-8 bg-bg-card"
|
||||
className="h-8 bg-bg-card mb-1"
|
||||
value={inputValue}
|
||||
onChange={handleInputChange}
|
||||
onBlur={handleInputConfirm}
|
||||
@ -98,36 +97,20 @@ const EditTag = React.forwardRef<HTMLDivElement, EditTagsProps>(
|
||||
}
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<Button
|
||||
variant="dashed"
|
||||
className="w-fit flex items-center justify-center gap-2 bg-bg-card"
|
||||
onClick={showInput}
|
||||
style={tagPlusStyle}
|
||||
>
|
||||
<PlusOutlined />
|
||||
</Button>
|
||||
)}
|
||||
{Array.isArray(tagChild) && tagChild.length > 0 && (
|
||||
<TweenOneGroup
|
||||
className="flex gap-2 flex-wrap mt-2"
|
||||
enter={{
|
||||
scale: 0.8,
|
||||
opacity: 0,
|
||||
type: 'from',
|
||||
duration: 100,
|
||||
}}
|
||||
onEnd={(e) => {
|
||||
if (e.type === 'appear' || e.type === 'enter') {
|
||||
(e.target as any).style = 'display: inline-block';
|
||||
}
|
||||
}}
|
||||
leave={{ opacity: 0, width: 0, scale: 0, duration: 200 }}
|
||||
appear={false}
|
||||
>
|
||||
{tagChild}
|
||||
</TweenOneGroup>
|
||||
)}
|
||||
<div className="flex gap-2 py-1">
|
||||
{Array.isArray(tagChild) && tagChild.length > 0 && <>{tagChild}</>}
|
||||
{!inputVisible && (
|
||||
<Button
|
||||
variant="dashed"
|
||||
className="w-fit flex items-center justify-center gap-2 bg-bg-card"
|
||||
onClick={showInput}
|
||||
style={tagPlusStyle}
|
||||
>
|
||||
<PlusOutlined />
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
},
|
||||
|
||||
@ -1,22 +1,32 @@
|
||||
import Image from '@/components/image';
|
||||
import SvgIcon from '@/components/svg-icon';
|
||||
import { useFetchDocumentThumbnailsByIds, useGetDocumentUrl } from '@/hooks/document-hooks';
|
||||
import {
|
||||
useFetchDocumentThumbnailsByIds,
|
||||
useGetDocumentUrl,
|
||||
} from '@/hooks/document-hooks';
|
||||
import { IReference, IReferenceChunk } from '@/interfaces/database/chat';
|
||||
import { preprocessLaTeX, replaceThinkToSection, showImage } from '@/utils/chat';
|
||||
import {
|
||||
preprocessLaTeX,
|
||||
replaceThinkToSection,
|
||||
showImage,
|
||||
} from '@/utils/chat';
|
||||
import { getExtension } from '@/utils/document-util';
|
||||
import { InfoCircleOutlined } from '@ant-design/icons';
|
||||
import { Button, Flex, Popover, Tooltip } from 'antd';
|
||||
import classNames from 'classnames';
|
||||
import DOMPurify from 'dompurify';
|
||||
import 'katex/dist/katex.min.css';
|
||||
import { omit } from 'lodash';
|
||||
import { pipe } from 'lodash/fp';
|
||||
import 'katex/dist/katex.min.css';
|
||||
import { useCallback, useEffect, useMemo } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import Markdown from 'react-markdown';
|
||||
import reactStringReplace from 'react-string-replace';
|
||||
import SyntaxHighlighter from 'react-syntax-highlighter';
|
||||
import { oneDark, oneLight } from 'react-syntax-highlighter/dist/esm/styles/prism';
|
||||
import {
|
||||
oneDark,
|
||||
oneLight,
|
||||
} from 'react-syntax-highlighter/dist/esm/styles/prism';
|
||||
import rehypeKatex from 'rehype-katex';
|
||||
import rehypeRaw from 'rehype-raw';
|
||||
import remarkGfm from 'remark-gfm';
|
||||
@ -39,7 +49,8 @@ const FloatingChatWidgetMarkdown = ({
|
||||
clickDocumentButton?: (documentId: string, chunk: IReferenceChunk) => void;
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const { setDocumentIds, data: fileThumbnails } = useFetchDocumentThumbnailsByIds();
|
||||
const { setDocumentIds, data: fileThumbnails } =
|
||||
useFetchDocumentThumbnailsByIds();
|
||||
const getDocumentUrl = useGetDocumentUrl();
|
||||
const isDarkTheme = useIsDarkTheme();
|
||||
|
||||
@ -51,23 +62,37 @@ const FloatingChatWidgetMarkdown = ({
|
||||
|
||||
useEffect(() => {
|
||||
const docAggs = reference?.doc_aggs;
|
||||
const docList = Array.isArray(docAggs) ? docAggs : Object.values(docAggs ?? {});
|
||||
const docList = Array.isArray(docAggs)
|
||||
? docAggs
|
||||
: Object.values(docAggs ?? {});
|
||||
setDocumentIds(docList.map((x: any) => x.doc_id).filter(Boolean));
|
||||
}, [reference, setDocumentIds]);
|
||||
|
||||
const handleDocumentButtonClick = useCallback((documentId: string, chunk: IReferenceChunk, isPdf: boolean, documentUrl?: string) => () => {
|
||||
if (!documentId) return;
|
||||
if (!isPdf && documentUrl) {
|
||||
window.open(documentUrl, '_blank');
|
||||
} else if (clickDocumentButton) {
|
||||
clickDocumentButton(documentId, chunk);
|
||||
}
|
||||
}, [clickDocumentButton]);
|
||||
const handleDocumentButtonClick = useCallback(
|
||||
(
|
||||
documentId: string,
|
||||
chunk: IReferenceChunk,
|
||||
isPdf: boolean,
|
||||
documentUrl?: string,
|
||||
) =>
|
||||
() => {
|
||||
if (!documentId) return;
|
||||
if (!isPdf && documentUrl) {
|
||||
window.open(documentUrl, '_blank');
|
||||
} else if (clickDocumentButton) {
|
||||
clickDocumentButton(documentId, chunk);
|
||||
}
|
||||
},
|
||||
[clickDocumentButton],
|
||||
);
|
||||
|
||||
const rehypeWrapReference = () => (tree: any) => {
|
||||
visitParents(tree, 'text', (node, ancestors) => {
|
||||
const latestAncestor = ancestors[ancestors.length - 1];
|
||||
if (latestAncestor.tagName !== 'custom-typography' && latestAncestor.tagName !== 'code') {
|
||||
if (
|
||||
latestAncestor.tagName !== 'custom-typography' &&
|
||||
latestAncestor.tagName !== 'code'
|
||||
) {
|
||||
node.type = 'element';
|
||||
node.tagName = 'custom-typography';
|
||||
node.properties = {};
|
||||
@ -76,90 +101,173 @@ const FloatingChatWidgetMarkdown = ({
|
||||
});
|
||||
};
|
||||
|
||||
const getReferenceInfo = useCallback((chunkIndex: number) => {
|
||||
const chunkItem = reference?.chunks?.[chunkIndex];
|
||||
if (!chunkItem) return null;
|
||||
const docAggsArray = Array.isArray(reference?.doc_aggs) ? reference.doc_aggs : Object.values(reference?.doc_aggs ?? {});
|
||||
const document = docAggsArray.find((x: any) => x?.doc_id === chunkItem?.document_id) as any;
|
||||
const documentId = document?.doc_id;
|
||||
const documentUrl = document?.url ?? (documentId ? getDocumentUrl(documentId) : undefined);
|
||||
const fileThumbnail = documentId ? fileThumbnails[documentId] : '';
|
||||
const fileExtension = documentId ? getExtension(document?.doc_name ?? '') : '';
|
||||
return { documentUrl, fileThumbnail, fileExtension, imageId: chunkItem.image_id, chunkItem, documentId, document };
|
||||
}, [fileThumbnails, reference, getDocumentUrl]);
|
||||
const getReferenceInfo = useCallback(
|
||||
(chunkIndex: number) => {
|
||||
const chunkItem = reference?.chunks?.[chunkIndex];
|
||||
if (!chunkItem) return null;
|
||||
const docAggsArray = Array.isArray(reference?.doc_aggs)
|
||||
? reference.doc_aggs
|
||||
: Object.values(reference?.doc_aggs ?? {});
|
||||
const document = docAggsArray.find(
|
||||
(x: any) => x?.doc_id === chunkItem?.document_id,
|
||||
) as any;
|
||||
const documentId = document?.doc_id;
|
||||
const documentUrl =
|
||||
document?.url ?? (documentId ? getDocumentUrl(documentId) : undefined);
|
||||
const fileThumbnail = documentId ? fileThumbnails[documentId] : '';
|
||||
const fileExtension = documentId
|
||||
? getExtension(document?.doc_name ?? '')
|
||||
: '';
|
||||
return {
|
||||
documentUrl,
|
||||
fileThumbnail,
|
||||
fileExtension,
|
||||
imageId: chunkItem.image_id,
|
||||
chunkItem,
|
||||
documentId,
|
||||
document,
|
||||
};
|
||||
},
|
||||
[fileThumbnails, reference, getDocumentUrl],
|
||||
);
|
||||
|
||||
const getPopoverContent = useCallback((chunkIndex: number) => {
|
||||
const info = getReferenceInfo(chunkIndex);
|
||||
|
||||
if (!info) {
|
||||
return <div className="p-2 text-xs text-red-500">Error: Missing document information.</div>;
|
||||
}
|
||||
|
||||
const { documentUrl, fileThumbnail, fileExtension, imageId, chunkItem, documentId, document } = info;
|
||||
|
||||
return (
|
||||
<div key={`popover-content-${chunkItem.id}`} className="flex gap-2 widget-citation-content">
|
||||
{imageId && (
|
||||
<Popover placement="left" content={<Image id={imageId} className="max-w-[80vw] max-h-[60vh] rounded" />}>
|
||||
<Image id={imageId} className="w-24 h-24 object-contain rounded m-1 cursor-pointer" />
|
||||
</Popover>
|
||||
)}
|
||||
<div className="space-y-2 flex-1 min-w-0">
|
||||
<div
|
||||
dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(chunkItem?.content ?? '') }}
|
||||
className="max-h-[250px] overflow-y-auto text-xs leading-relaxed p-2 bg-gray-50 dark:bg-gray-800 rounded prose-sm"
|
||||
></div>
|
||||
{documentId && (
|
||||
<Flex gap={'small'} align="center">
|
||||
{fileThumbnail ? (
|
||||
<img src={fileThumbnail} alt={document?.doc_name} className="w-6 h-6 rounded" />
|
||||
) : (
|
||||
<SvgIcon name={`file-icon/${fileExtension}`} width={20} />
|
||||
)}
|
||||
<Tooltip title={!documentUrl && fileExtension !== 'pdf' ? 'Document link unavailable' : document.doc_name}>
|
||||
<Button
|
||||
type="link"
|
||||
size="small"
|
||||
className="p-0 text-xs break-words h-auto text-left flex-1"
|
||||
onClick={handleDocumentButtonClick(documentId, chunkItem, fileExtension === 'pdf', documentUrl)}
|
||||
disabled={!documentUrl && fileExtension !== 'pdf'}
|
||||
style={{ whiteSpace: 'normal' }}
|
||||
>
|
||||
<span className="truncate">{document?.doc_name ?? 'Unnamed Document'}</span>
|
||||
</Button>
|
||||
</Tooltip>
|
||||
</Flex>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}, [getReferenceInfo, handleDocumentButtonClick]);
|
||||
|
||||
const renderReference = useCallback((text: string) => {
|
||||
return reactStringReplace(text, currentReg, (match, i) => {
|
||||
const chunkIndex = getChunkIndex(match);
|
||||
const getPopoverContent = useCallback(
|
||||
(chunkIndex: number) => {
|
||||
const info = getReferenceInfo(chunkIndex);
|
||||
|
||||
if (!info) {
|
||||
return <Tooltip key={`err-tooltip-${i}`} title="Reference unavailable"><InfoCircleOutlined className={styles.referenceIcon} /></Tooltip>;
|
||||
return (
|
||||
<div className="p-2 text-xs text-red-500">
|
||||
Error: Missing document information.
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const { imageId, chunkItem, documentId, fileExtension, documentUrl } = info;
|
||||
|
||||
if (showImage(chunkItem?.doc_type)) {
|
||||
return <Image key={`img-${i}`} id={imageId} className="block object-contain max-w-full max-h-48 rounded my-2 cursor-pointer" onClick={handleDocumentButtonClick(documentId, chunkItem, fileExtension === 'pdf', documentUrl)} />;
|
||||
}
|
||||
const {
|
||||
documentUrl,
|
||||
fileThumbnail,
|
||||
fileExtension,
|
||||
imageId,
|
||||
chunkItem,
|
||||
documentId,
|
||||
document,
|
||||
} = info;
|
||||
|
||||
return (
|
||||
<Popover
|
||||
content={getPopoverContent(chunkIndex)}
|
||||
key={`popover-${i}`}
|
||||
<div
|
||||
key={`popover-content-${chunkItem.id}`}
|
||||
className="flex gap-2 widget-citation-content"
|
||||
>
|
||||
<InfoCircleOutlined className={styles.referenceIcon} />
|
||||
</Popover>
|
||||
{imageId && (
|
||||
<Popover
|
||||
placement="left"
|
||||
content={
|
||||
<Image
|
||||
id={imageId}
|
||||
className="max-w-[80vw] max-h-[60vh] rounded"
|
||||
/>
|
||||
}
|
||||
>
|
||||
<Image
|
||||
id={imageId}
|
||||
className="w-24 h-24 object-contain rounded m-1 cursor-pointer"
|
||||
/>
|
||||
</Popover>
|
||||
)}
|
||||
<div className="space-y-2 flex-1 min-w-0">
|
||||
<div
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: DOMPurify.sanitize(chunkItem?.content ?? ''),
|
||||
}}
|
||||
className="max-h-[250px] overflow-y-auto text-xs leading-relaxed p-2 bg-gray-50 dark:bg-gray-800 rounded prose-sm"
|
||||
></div>
|
||||
{documentId && (
|
||||
<Flex gap={'small'} align="center">
|
||||
{fileThumbnail ? (
|
||||
<img
|
||||
src={fileThumbnail}
|
||||
alt={document?.doc_name}
|
||||
className="w-6 h-6 rounded"
|
||||
/>
|
||||
) : (
|
||||
<SvgIcon name={`file-icon/${fileExtension}`} width={20} />
|
||||
)}
|
||||
<Tooltip
|
||||
title={
|
||||
!documentUrl && fileExtension !== 'pdf'
|
||||
? 'Document link unavailable'
|
||||
: document.doc_name
|
||||
}
|
||||
>
|
||||
<Button
|
||||
type="link"
|
||||
size="small"
|
||||
className="p-0 text-xs break-words h-auto text-left flex-1"
|
||||
onClick={handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)}
|
||||
disabled={!documentUrl && fileExtension !== 'pdf'}
|
||||
style={{ whiteSpace: 'normal' }}
|
||||
>
|
||||
<span className="truncate">
|
||||
{document?.doc_name ?? 'Unnamed Document'}
|
||||
</span>
|
||||
</Button>
|
||||
</Tooltip>
|
||||
</Flex>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
});
|
||||
}, [getPopoverContent, getReferenceInfo, handleDocumentButtonClick]);
|
||||
},
|
||||
[getReferenceInfo, handleDocumentButtonClick],
|
||||
);
|
||||
|
||||
const renderReference = useCallback(
|
||||
(text: string) => {
|
||||
return reactStringReplace(text, currentReg, (match, i) => {
|
||||
const chunkIndex = getChunkIndex(match);
|
||||
const info = getReferenceInfo(chunkIndex);
|
||||
|
||||
if (!info) {
|
||||
return (
|
||||
<Tooltip key={`err-tooltip-${i}`} title="Reference unavailable">
|
||||
<InfoCircleOutlined className={styles.referenceIcon} />
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
|
||||
const { imageId, chunkItem, documentId, fileExtension, documentUrl } =
|
||||
info;
|
||||
|
||||
if (showImage(chunkItem?.doc_type)) {
|
||||
return (
|
||||
<Image
|
||||
key={`img-${i}`}
|
||||
id={imageId}
|
||||
className="block object-contain max-w-full max-h-48 rounded my-2 cursor-pointer"
|
||||
onClick={handleDocumentButtonClick(
|
||||
documentId,
|
||||
chunkItem,
|
||||
fileExtension === 'pdf',
|
||||
documentUrl,
|
||||
)}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<Popover content={getPopoverContent(chunkIndex)} key={`popover-${i}`}>
|
||||
<InfoCircleOutlined className={styles.referenceIcon} />
|
||||
</Popover>
|
||||
);
|
||||
});
|
||||
},
|
||||
[getPopoverContent, getReferenceInfo, handleDocumentButtonClick],
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="floating-chat-widget">
|
||||
@ -167,28 +275,38 @@ const FloatingChatWidgetMarkdown = ({
|
||||
rehypePlugins={[rehypeWrapReference, rehypeKatex, rehypeRaw]}
|
||||
remarkPlugins={[remarkGfm, remarkMath]}
|
||||
className="text-sm leading-relaxed space-y-2 prose-sm max-w-full"
|
||||
components={{
|
||||
'custom-typography': ({ children }: { children: string }) => renderReference(children),
|
||||
code(props: any) {
|
||||
const { children, className, node, ...rest } = props;
|
||||
const match = /language-(\w+)/.exec(className || '');
|
||||
return match ? (
|
||||
<SyntaxHighlighter
|
||||
{...omit(rest, 'inline')}
|
||||
PreTag="div"
|
||||
language={match[1]}
|
||||
style={isDarkTheme ? oneDark : oneLight}
|
||||
wrapLongLines
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code {...rest} className={classNames(className, 'text-wrap text-xs bg-gray-200 dark:bg-gray-700 px-1 py-0.5 rounded')}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
} as any}
|
||||
components={
|
||||
{
|
||||
'custom-typography': ({ children }: { children: string }) =>
|
||||
renderReference(children),
|
||||
code(props: any) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||||
const { children, className, node, ...rest } = props;
|
||||
const match = /language-(\w+)/.exec(className || '');
|
||||
return match ? (
|
||||
<SyntaxHighlighter
|
||||
{...omit(rest, 'inline')}
|
||||
PreTag="div"
|
||||
language={match[1]}
|
||||
style={isDarkTheme ? oneDark : oneLight}
|
||||
wrapLongLines
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code
|
||||
{...rest}
|
||||
className={classNames(
|
||||
className,
|
||||
'text-wrap text-xs bg-gray-200 dark:bg-gray-700 px-1 py-0.5 rounded',
|
||||
)}
|
||||
>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
} as any
|
||||
}
|
||||
>
|
||||
{contentWithCursor}
|
||||
</Markdown>
|
||||
@ -196,4 +314,4 @@ const FloatingChatWidgetMarkdown = ({
|
||||
);
|
||||
};
|
||||
|
||||
export default FloatingChatWidgetMarkdown;
|
||||
export default FloatingChatWidgetMarkdown;
|
||||
|
||||
@ -1,18 +1,10 @@
|
||||
import { MessageType, SharedFrom } from '@/constants/chat';
|
||||
import { useFetchNextConversationSSE } from '@/hooks/chat-hooks';
|
||||
import { useFetchFlowSSE } from '@/hooks/flow-hooks';
|
||||
import { useFetchExternalChatInfo } from '@/hooks/use-chat-request';
|
||||
import PdfDrawer from '@/components/pdf-drawer';
|
||||
import { useClickDrawer } from '@/components/pdf-drawer/hooks';
|
||||
import { MessageType } from '@/constants/chat';
|
||||
import { useFetchExternalChatInfo } from '@/hooks/use-chat-request';
|
||||
import i18n from '@/locales/config';
|
||||
import { MessageCircle, Minimize2, Send, X } from 'lucide-react';
|
||||
import PdfDrawer from '@/components/pdf-drawer';
|
||||
import React, {
|
||||
useCallback,
|
||||
useEffect,
|
||||
useMemo,
|
||||
useRef,
|
||||
useState,
|
||||
} from 'react';
|
||||
import React, { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import {
|
||||
useGetSharedChatSearchParams,
|
||||
useSendSharedMessage,
|
||||
@ -28,12 +20,7 @@ const FloatingChatWidget = () => {
|
||||
const [isLoaded, setIsLoaded] = useState(false);
|
||||
const messagesEndRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
const {
|
||||
sharedId: conversationId,
|
||||
from,
|
||||
locale,
|
||||
visibleAvatar,
|
||||
} = useGetSharedChatSearchParams();
|
||||
const { sharedId: conversationId, locale } = useGetSharedChatSearchParams();
|
||||
|
||||
// Check if we're in button-only mode or window-only mode
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
@ -58,14 +45,6 @@ const FloatingChatWidget = () => {
|
||||
|
||||
const { data: chatInfo } = useFetchExternalChatInfo();
|
||||
|
||||
const useFetchAvatar = useMemo(() => {
|
||||
return from === SharedFrom.Agent
|
||||
? useFetchFlowSSE
|
||||
: useFetchNextConversationSSE;
|
||||
}, [from]);
|
||||
|
||||
const { data: avatarData } = useFetchAvatar();
|
||||
|
||||
const { visible, hideModal, documentId, selectedChunk, clickDocumentButton } =
|
||||
useClickDrawer();
|
||||
|
||||
@ -181,6 +160,40 @@ const FloatingChatWidget = () => {
|
||||
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
|
||||
}, [displayMessages]);
|
||||
|
||||
// Render different content based on mode
|
||||
// Master mode - handles everything and creates second iframe dynamically
|
||||
useEffect(() => {
|
||||
if (mode !== 'master') return;
|
||||
// Create the chat window iframe dynamically when needed
|
||||
const createChatWindow = () => {
|
||||
// Check if iframe already exists in parent document
|
||||
window.parent.postMessage(
|
||||
{
|
||||
type: 'CREATE_CHAT_WINDOW',
|
||||
src: window.location.href.replace('mode=master', 'mode=window'),
|
||||
},
|
||||
'*',
|
||||
);
|
||||
};
|
||||
|
||||
createChatWindow();
|
||||
|
||||
// Listen for our own toggle events to show/hide the dynamic iframe
|
||||
const handleToggle = (e: MessageEvent) => {
|
||||
if (e.source === window) return; // Ignore our own messages
|
||||
|
||||
const chatWindow = document.getElementById(
|
||||
'dynamic-chat-window',
|
||||
) as HTMLIFrameElement;
|
||||
if (chatWindow && e.data.type === 'TOGGLE_CHAT') {
|
||||
chatWindow.style.display = e.data.isOpen ? 'block' : 'none';
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('message', handleToggle);
|
||||
return () => window.removeEventListener('message', handleToggle);
|
||||
}, [mode]);
|
||||
|
||||
// Play sound only when AI response is complete (not streaming chunks)
|
||||
useEffect(() => {
|
||||
if (derivedMessages && derivedMessages.length > 0 && !sendLoading) {
|
||||
@ -234,7 +247,7 @@ const FloatingChatWidget = () => {
|
||||
const syntheticEvent = {
|
||||
target: { value: inputValue },
|
||||
currentTarget: { value: inputValue },
|
||||
preventDefault: () => { },
|
||||
preventDefault: () => {},
|
||||
} as any;
|
||||
|
||||
handleInputChange(syntheticEvent);
|
||||
@ -271,46 +284,14 @@ const FloatingChatWidget = () => {
|
||||
|
||||
const messageCount = displayMessages?.length || 0;
|
||||
|
||||
// Render different content based on mode
|
||||
// Show just the button in master mode
|
||||
if (mode === 'master') {
|
||||
// Master mode - handles everything and creates second iframe dynamically
|
||||
useEffect(() => {
|
||||
// Create the chat window iframe dynamically when needed
|
||||
const createChatWindow = () => {
|
||||
// Check if iframe already exists in parent document
|
||||
window.parent.postMessage(
|
||||
{
|
||||
type: 'CREATE_CHAT_WINDOW',
|
||||
src: window.location.href.replace('mode=master', 'mode=window'),
|
||||
},
|
||||
'*',
|
||||
);
|
||||
};
|
||||
|
||||
createChatWindow();
|
||||
|
||||
// Listen for our own toggle events to show/hide the dynamic iframe
|
||||
const handleToggle = (e: MessageEvent) => {
|
||||
if (e.source === window) return; // Ignore our own messages
|
||||
|
||||
const chatWindow = document.getElementById(
|
||||
'dynamic-chat-window',
|
||||
) as HTMLIFrameElement;
|
||||
if (chatWindow && e.data.type === 'TOGGLE_CHAT') {
|
||||
chatWindow.style.display = e.data.isOpen ? 'block' : 'none';
|
||||
}
|
||||
};
|
||||
|
||||
window.addEventListener('message', handleToggle);
|
||||
return () => window.removeEventListener('message', handleToggle);
|
||||
}, []);
|
||||
|
||||
// Show just the button in master mode
|
||||
return (
|
||||
<div
|
||||
className={`fixed bottom-6 right-6 z-50 transition-opacity duration-300 ${isLoaded ? 'opacity-100' : 'opacity-0'}`}
|
||||
>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => {
|
||||
const newIsOpen = !isOpen;
|
||||
setIsOpen(newIsOpen);
|
||||
@ -325,8 +306,9 @@ const FloatingChatWidget = () => {
|
||||
'*',
|
||||
);
|
||||
}}
|
||||
className={`w-14 h-14 bg-blue-600 hover:bg-blue-700 text-white rounded-full transition-all duration-300 flex items-center justify-center group ${isOpen ? 'scale-95' : 'scale-100 hover:scale-105'
|
||||
}`}
|
||||
className={`w-14 h-14 bg-blue-600 hover:bg-blue-700 text-white rounded-full transition-all duration-300 flex items-center justify-center group ${
|
||||
isOpen ? 'scale-95' : 'scale-100 hover:scale-105'
|
||||
}`}
|
||||
>
|
||||
<div
|
||||
className={`transition-transform duration-300 ${isOpen ? 'rotate-45' : 'rotate-0'}`}
|
||||
@ -352,9 +334,11 @@ const FloatingChatWidget = () => {
|
||||
className={`fixed bottom-6 right-6 z-50 transition-opacity duration-300 ${isLoaded ? 'opacity-100' : 'opacity-0'}`}
|
||||
>
|
||||
<button
|
||||
type="button"
|
||||
onClick={toggleChat}
|
||||
className={`w-14 h-14 bg-blue-600 hover:bg-blue-700 text-white rounded-full transition-all duration-300 flex items-center justify-center group ${isOpen ? 'scale-95' : 'scale-100 hover:scale-105'
|
||||
}`}
|
||||
className={`w-14 h-14 bg-blue-600 hover:bg-blue-700 text-white rounded-full transition-all duration-300 flex items-center justify-center group ${
|
||||
isOpen ? 'scale-95' : 'scale-100 hover:scale-105'
|
||||
}`}
|
||||
>
|
||||
<div
|
||||
className={`transition-transform duration-300 ${isOpen ? 'rotate-45' : 'rotate-0'}`}
|
||||
@ -431,10 +415,11 @@ const FloatingChatWidget = () => {
|
||||
className={`flex ${message.role === MessageType.User ? 'justify-end' : 'justify-start'}`}
|
||||
>
|
||||
<div
|
||||
className={`max-w-[280px] px-4 py-2 rounded-2xl ${message.role === MessageType.User
|
||||
? 'bg-blue-600 text-white rounded-br-md'
|
||||
: 'bg-gray-100 text-gray-800 rounded-bl-md'
|
||||
}`}
|
||||
className={`max-w-[280px] px-4 py-2 rounded-2xl ${
|
||||
message.role === MessageType.User
|
||||
? 'bg-blue-600 text-white rounded-br-md'
|
||||
: 'bg-gray-100 text-gray-800 rounded-bl-md'
|
||||
}`}
|
||||
>
|
||||
{message.role === MessageType.User ? (
|
||||
<p className="text-sm leading-relaxed whitespace-pre-wrap">
|
||||
@ -444,7 +429,13 @@ const FloatingChatWidget = () => {
|
||||
<FloatingChatWidgetMarkdown
|
||||
loading={false}
|
||||
content={message.content}
|
||||
reference={message.reference || { doc_aggs: [], chunks: [], total: 0 }}
|
||||
reference={
|
||||
message.reference || {
|
||||
doc_aggs: [],
|
||||
chunks: [],
|
||||
total: 0,
|
||||
}
|
||||
}
|
||||
clickDocumentButton={clickDocumentButton}
|
||||
/>
|
||||
)}
|
||||
@ -486,12 +477,13 @@ const FloatingChatWidget = () => {
|
||||
onKeyPress={handleKeyPress}
|
||||
placeholder="Type your message..."
|
||||
rows={1}
|
||||
className="w-full resize-none border border-gray-300 rounded-2xl px-4 py-3 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
|
||||
className="w-full resize-none border border-gray-300 rounded-2xl px-4 py-3 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent text-black"
|
||||
style={{ minHeight: '44px', maxHeight: '120px' }}
|
||||
disabled={hasError || sendLoading}
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleSendMessage}
|
||||
disabled={!inputValue.trim() || sendLoading}
|
||||
className="p-3 bg-blue-600 text-white rounded-full hover:bg-blue-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
|
||||
@ -512,7 +504,7 @@ const FloatingChatWidget = () => {
|
||||
/>
|
||||
</>
|
||||
);
|
||||
} // Full mode - render everything together (original behavior)
|
||||
} // Full mode - render everything together (original behavior)
|
||||
return (
|
||||
<div
|
||||
className={`transition-opacity duration-300 ${isLoaded ? 'opacity-100' : 'opacity-0'}`}
|
||||
@ -520,8 +512,9 @@ const FloatingChatWidget = () => {
|
||||
{/* Chat Widget Container */}
|
||||
{isOpen && (
|
||||
<div
|
||||
className={`fixed bottom-24 right-6 z-50 bg-blue-600 rounded-2xl transition-all duration-300 ease-out ${isMinimized ? 'h-16' : 'h-[500px]'
|
||||
} w-[380px] overflow-hidden`}
|
||||
className={`fixed bottom-24 right-6 z-50 bg-blue-600 rounded-2xl transition-all duration-300 ease-out ${
|
||||
isMinimized ? 'h-16' : 'h-[500px]'
|
||||
} w-[380px] overflow-hidden`}
|
||||
>
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between p-4 bg-gradient-to-r from-blue-600 to-blue-700 text-white rounded-t-2xl">
|
||||
@ -540,12 +533,14 @@ const FloatingChatWidget = () => {
|
||||
</div>
|
||||
<div className="flex items-center space-x-1">
|
||||
<button
|
||||
type="button"
|
||||
onClick={minimizeChat}
|
||||
className="p-1.5 hover:bg-white hover:bg-opacity-20 rounded-full transition-colors"
|
||||
>
|
||||
<Minimize2 size={16} />
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onClick={toggleChat}
|
||||
className="p-1.5 hover:bg-white hover:bg-opacity-20 rounded-full transition-colors"
|
||||
>
|
||||
@ -592,10 +587,11 @@ const FloatingChatWidget = () => {
|
||||
className={`flex ${message.role === MessageType.User ? 'justify-end' : 'justify-start'}`}
|
||||
>
|
||||
<div
|
||||
className={`max-w-[280px] px-4 py-2 rounded-2xl ${message.role === MessageType.User
|
||||
? 'bg-blue-600 text-white rounded-br-md'
|
||||
: 'bg-gray-100 text-gray-800 rounded-bl-md'
|
||||
}`}
|
||||
className={`max-w-[280px] px-4 py-2 rounded-2xl ${
|
||||
message.role === MessageType.User
|
||||
? 'bg-blue-600 text-white rounded-br-md'
|
||||
: 'bg-gray-100 text-gray-800 rounded-bl-md'
|
||||
}`}
|
||||
>
|
||||
{message.role === MessageType.User ? (
|
||||
<p className="text-sm leading-relaxed whitespace-pre-wrap">
|
||||
@ -605,7 +601,13 @@ const FloatingChatWidget = () => {
|
||||
<FloatingChatWidgetMarkdown
|
||||
loading={false}
|
||||
content={message.content}
|
||||
reference={message.reference || { doc_aggs: [], chunks: [], total: 0 }}
|
||||
reference={
|
||||
message.reference || {
|
||||
doc_aggs: [],
|
||||
chunks: [],
|
||||
total: 0,
|
||||
}
|
||||
}
|
||||
clickDocumentButton={clickDocumentButton}
|
||||
/>
|
||||
)}
|
||||
@ -650,12 +652,13 @@ const FloatingChatWidget = () => {
|
||||
onKeyPress={handleKeyPress}
|
||||
placeholder="Type your message..."
|
||||
rows={1}
|
||||
className="w-full resize-none border border-gray-300 rounded-2xl px-4 py-3 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent"
|
||||
className="w-full resize-none border border-gray-300 rounded-2xl px-4 py-3 text-sm focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent text-black"
|
||||
style={{ minHeight: '44px', maxHeight: '120px' }}
|
||||
disabled={hasError || sendLoading}
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
type="button"
|
||||
onClick={handleSendMessage}
|
||||
disabled={!inputValue.trim() || sendLoading}
|
||||
className="p-3 bg-blue-600 text-white rounded-full hover:bg-blue-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
|
||||
@ -672,9 +675,11 @@ const FloatingChatWidget = () => {
|
||||
{/* Floating Button */}
|
||||
<div className="fixed bottom-6 right-6 z-50">
|
||||
<button
|
||||
type="button"
|
||||
onClick={toggleChat}
|
||||
className={`w-14 h-14 bg-blue-600 hover:bg-blue-700 text-white rounded-full transition-all duration-300 flex items-center justify-center group ${isOpen ? 'scale-95' : 'scale-100 hover:scale-105'
|
||||
}`}
|
||||
className={`w-14 h-14 bg-blue-600 hover:bg-blue-700 text-white rounded-full transition-all duration-300 flex items-center justify-center group ${
|
||||
isOpen ? 'scale-95' : 'scale-100 hover:scale-105'
|
||||
}`}
|
||||
>
|
||||
<div
|
||||
className={`transition-transform duration-300 ${isOpen ? 'rotate-45' : 'rotate-0'}`}
|
||||
|
||||
@ -17,6 +17,7 @@ import {
|
||||
export const enum ParseDocumentType {
|
||||
DeepDOC = 'DeepDOC',
|
||||
PlainText = 'Plain Text',
|
||||
MinerU = 'MinerU',
|
||||
}
|
||||
|
||||
export function LayoutRecognizeFormField({
|
||||
@ -38,9 +39,12 @@ export function LayoutRecognizeFormField({
|
||||
const options = useMemo(() => {
|
||||
const list = optionsWithoutLLM
|
||||
? optionsWithoutLLM
|
||||
: [ParseDocumentType.DeepDOC, ParseDocumentType.PlainText].map((x) => ({
|
||||
label:
|
||||
x === ParseDocumentType.PlainText ? t(camelCase(x)) : 'DeepDoc',
|
||||
: [
|
||||
ParseDocumentType.DeepDOC,
|
||||
ParseDocumentType.PlainText,
|
||||
ParseDocumentType.MinerU,
|
||||
].map((x) => ({
|
||||
label: x === ParseDocumentType.PlainText ? t(camelCase(x)) : x,
|
||||
value: x,
|
||||
}));
|
||||
|
||||
|
||||
@ -70,7 +70,7 @@ export function UseGraphRagFormField({
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="parser_config.graphrag.use_graphrag"
|
||||
render={({ field }) => (
|
||||
render={() => (
|
||||
<FormItem defaultChecked={false} className=" items-center space-y-0 ">
|
||||
<div className="flex items-center gap-1">
|
||||
<FormLabel
|
||||
|
||||
@ -17,7 +17,7 @@ const buttonVariants = cva(
|
||||
outline:
|
||||
'border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:bg-input/30 dark:border-input dark:hover:bg-input/50',
|
||||
secondary:
|
||||
'bg-bg-input text-secondary-foreground shadow-xs hover:bg-bg-input/80',
|
||||
'bg-bg-input text-text-primary shadow-xs hover:bg-bg-input/80 border border-border-button',
|
||||
ghost:
|
||||
'hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50',
|
||||
link: 'text-primary underline-offset-4 hover:underline',
|
||||
|
||||
@ -27,6 +27,7 @@ export interface ModalProps {
|
||||
okText?: ReactNode | string;
|
||||
onOk?: () => void;
|
||||
onCancel?: () => void;
|
||||
disabled?: boolean;
|
||||
}
|
||||
export interface ModalType extends FC<ModalProps> {
|
||||
show: typeof modalIns.show;
|
||||
@ -55,6 +56,7 @@ const Modal: ModalType = ({
|
||||
confirmLoading,
|
||||
cancelText,
|
||||
okText,
|
||||
disabled = false,
|
||||
}) => {
|
||||
const sizeClasses = {
|
||||
small: 'max-w-md',
|
||||
@ -86,7 +88,7 @@ const Modal: ModalType = ({
|
||||
const handleChange = (open: boolean) => {
|
||||
onOpenChange?.(open);
|
||||
console.log('open', open, onOpenChange);
|
||||
if (open) {
|
||||
if (open && !disabled) {
|
||||
onOk?.();
|
||||
}
|
||||
if (!open) {
|
||||
@ -112,9 +114,12 @@ const Modal: ModalType = ({
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
disabled={confirmLoading}
|
||||
disabled={confirmLoading || disabled}
|
||||
onClick={() => handleOk()}
|
||||
className="px-2 py-1 bg-primary text-primary-foreground rounded-md hover:bg-primary/90"
|
||||
className={cn(
|
||||
'px-2 py-1 bg-primary text-primary-foreground rounded-md hover:bg-primary/90',
|
||||
{ 'cursor-not-allowed': disabled },
|
||||
)}
|
||||
>
|
||||
{confirmLoading && (
|
||||
<Loader className="inline-block mr-2 h-4 w-4 animate-spin" />
|
||||
|
||||
@ -291,7 +291,7 @@ export const RAGFlowSelect = forwardRef<
|
||||
onReset={handleReset}
|
||||
allowClear={allowClear}
|
||||
ref={ref}
|
||||
className={cn(triggerClassName, 'bg-bg-base')}
|
||||
className={cn('bg-bg-base', triggerClassName)}
|
||||
>
|
||||
<SelectValue placeholder={placeholder}>{label}</SelectValue>
|
||||
</SelectTrigger>
|
||||
|
||||
@ -14,6 +14,12 @@ const sizeClasses = {
|
||||
large: 'w-8 h-8',
|
||||
};
|
||||
|
||||
const minSizeClasses = {
|
||||
small: 'min-w-4 min-h-4',
|
||||
default: 'min-w-6 min-h-6',
|
||||
large: 'min-w-8 min-h-8',
|
||||
};
|
||||
|
||||
export const Spin: React.FC<SpinProps> = ({
|
||||
spinning = true,
|
||||
size = 'default',
|
||||
@ -32,7 +38,12 @@ export const Spin: React.FC<SpinProps> = ({
|
||||
)}
|
||||
>
|
||||
{spinning && (
|
||||
<div className="absolute inset-0 z-10 flex items-center justify-center bg-text-primary/30 ">
|
||||
<div
|
||||
className={cn(
|
||||
'absolute inset-0 z-10 flex items-center justify-center bg-text-primary/30',
|
||||
minSizeClasses[size],
|
||||
)}
|
||||
>
|
||||
<div
|
||||
className={cn(
|
||||
'rounded-full border-muted-foreground border-2 border-t-transparent animate-spin',
|
||||
|
||||
@ -53,6 +53,10 @@ export enum AgentCategory {
|
||||
DataflowCanvas = 'dataflow_canvas',
|
||||
}
|
||||
|
||||
export enum AgentQuery {
|
||||
Category = 'category',
|
||||
}
|
||||
|
||||
export enum DataflowOperator {
|
||||
Begin = 'File',
|
||||
Note = 'Note',
|
||||
@ -62,3 +66,55 @@ export enum DataflowOperator {
|
||||
HierarchicalMerger = 'HierarchicalMerger',
|
||||
Extractor = 'Extractor',
|
||||
}
|
||||
|
||||
export enum Operator {
|
||||
Begin = 'Begin',
|
||||
Retrieval = 'Retrieval',
|
||||
Categorize = 'Categorize',
|
||||
Message = 'Message',
|
||||
Relevant = 'Relevant',
|
||||
RewriteQuestion = 'RewriteQuestion',
|
||||
KeywordExtract = 'KeywordExtract',
|
||||
Baidu = 'Baidu',
|
||||
DuckDuckGo = 'DuckDuckGo',
|
||||
Wikipedia = 'Wikipedia',
|
||||
PubMed = 'PubMed',
|
||||
ArXiv = 'ArXiv',
|
||||
Google = 'Google',
|
||||
Bing = 'Bing',
|
||||
GoogleScholar = 'GoogleScholar',
|
||||
DeepL = 'DeepL',
|
||||
GitHub = 'GitHub',
|
||||
BaiduFanyi = 'BaiduFanyi',
|
||||
QWeather = 'QWeather',
|
||||
ExeSQL = 'ExeSQL',
|
||||
Switch = 'Switch',
|
||||
WenCai = 'WenCai',
|
||||
AkShare = 'AkShare',
|
||||
YahooFinance = 'YahooFinance',
|
||||
Jin10 = 'Jin10',
|
||||
TuShare = 'TuShare',
|
||||
Note = 'Note',
|
||||
Crawler = 'Crawler',
|
||||
Invoke = 'Invoke',
|
||||
Email = 'Email',
|
||||
Iteration = 'Iteration',
|
||||
IterationStart = 'IterationItem',
|
||||
Code = 'CodeExec',
|
||||
WaitingDialogue = 'WaitingDialogue',
|
||||
Agent = 'Agent',
|
||||
Tool = 'Tool',
|
||||
TavilySearch = 'TavilySearch',
|
||||
TavilyExtract = 'TavilyExtract',
|
||||
UserFillUp = 'UserFillUp',
|
||||
StringTransform = 'StringTransform',
|
||||
SearXNG = 'SearXNG',
|
||||
Placeholder = 'Placeholder',
|
||||
File = 'File', // pipeline
|
||||
Parser = 'Parser',
|
||||
Tokenizer = 'Tokenizer',
|
||||
Splitter = 'Splitter',
|
||||
HierarchicalMerger = 'HierarchicalMerger',
|
||||
Extractor = 'Extractor',
|
||||
Generate = 'Generate',
|
||||
}
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import { AgentCategory, AgentQuery } from '@/constants/agent';
|
||||
import { NavigateToDataflowResultProps } from '@/pages/dataflow-result/interface';
|
||||
import { Routes } from '@/routes';
|
||||
import { useCallback } from 'react';
|
||||
@ -70,8 +71,8 @@ export const useNavigatePage = () => {
|
||||
}, [navigate]);
|
||||
|
||||
const navigateToAgent = useCallback(
|
||||
(id: string) => () => {
|
||||
navigate(`${Routes.Agent}/${id}`);
|
||||
(id: string, category?: AgentCategory) => () => {
|
||||
navigate(`${Routes.Agent}/${id}?${AgentQuery.Category}=${category}`);
|
||||
},
|
||||
[navigate],
|
||||
);
|
||||
|
||||
@ -161,7 +161,7 @@ export type IIterationNode = BaseNode;
|
||||
export type IIterationStartNode = BaseNode;
|
||||
export type IKeywordNode = BaseNode;
|
||||
export type ICodeNode = BaseNode<ICodeForm>;
|
||||
export type IAgentNode = BaseNode;
|
||||
export type IAgentNode<T = any> = BaseNode<T>;
|
||||
|
||||
export type RAGFlowNodeType =
|
||||
| IBeginNode
|
||||
|
||||
@ -115,7 +115,7 @@ export default {
|
||||
generateKnowledgeGraph:
|
||||
'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.',
|
||||
generateRaptor:
|
||||
'This will extract entities and relationships from all your documents in this dataset. The process may take a while to complete.',
|
||||
'Performs recursive clustering and summarization of document chunks to build a hierarchical tree structure, enabling more context-aware retrieval across lengthy documents.',
|
||||
generate: 'Generate',
|
||||
raptor: 'RAPTOR',
|
||||
processingType: 'Processing Type',
|
||||
@ -1533,8 +1533,8 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
'Your users will see this welcome message at the beginning.',
|
||||
modeTip: 'The mode defines how the workflow is initiated.',
|
||||
mode: 'Mode',
|
||||
conversational: 'conversational',
|
||||
task: 'task',
|
||||
conversational: 'Conversational',
|
||||
task: 'Task',
|
||||
beginInputTip:
|
||||
'By defining input parameters, this content can be accessed by other components in subsequent processes.',
|
||||
query: 'Query variables',
|
||||
@ -1605,6 +1605,119 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
ceateAgent: 'Agent flow',
|
||||
createPipeline: 'Ingestion pipeline',
|
||||
chooseAgentType: 'Choose Agent Type',
|
||||
parser: 'Parser',
|
||||
parserDescription:
|
||||
'Extracts raw text and structure from files for downstream processing.',
|
||||
tokenizer: 'Indexer',
|
||||
tokenizerRequired: 'Please add the Indexer node first',
|
||||
tokenizerDescription:
|
||||
'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
|
||||
splitter: 'Token',
|
||||
splitterDescription:
|
||||
'Split text into chunks by token length with optional delimiters and overlap.',
|
||||
hierarchicalMergerDescription:
|
||||
'Split documents into sections by title hierarchy with regex rules for finer control.',
|
||||
hierarchicalMerger: 'Title',
|
||||
extractor: 'Transformer',
|
||||
extractorDescription:
|
||||
'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',
|
||||
outputFormat: 'Output format',
|
||||
fileFormats: 'File format',
|
||||
fileFormatOptions: {
|
||||
pdf: 'PDF',
|
||||
spreadsheet: 'Spreadsheet',
|
||||
image: 'Image',
|
||||
email: 'Email',
|
||||
'text&markdown': 'Text & Markup',
|
||||
word: 'Word',
|
||||
slides: 'PPT',
|
||||
audio: 'Audio',
|
||||
},
|
||||
fields: 'Field',
|
||||
addParser: 'Add Parser',
|
||||
hierarchy: 'Hierarchy',
|
||||
regularExpressions: 'Regular Expressions',
|
||||
overlappedPercent: 'Overlapped percent (%)',
|
||||
searchMethod: 'Search method',
|
||||
searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
|
||||
The Indexer will store the content in the corresponding data structures for the selected methods.`,
|
||||
// file: 'File',
|
||||
parserMethod: 'Parsing method',
|
||||
// systemPrompt: 'System Prompt',
|
||||
systemPromptPlaceholder:
|
||||
'Enter system prompt for image analysis, if empty the system default value will be used',
|
||||
exportJson: 'Export JSON',
|
||||
viewResult: 'View result',
|
||||
running: 'Running',
|
||||
summary: 'Summary',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
metadata: 'Metadata',
|
||||
fieldName: 'Result destination',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `Role
|
||||
You are a text analyzer.
|
||||
|
||||
Task
|
||||
Extract the most important keywords/phrases of a given piece of text content.
|
||||
|
||||
Requirements
|
||||
- Summarize the text content, and give the top 5 important keywords/phrases.
|
||||
- The keywords MUST be in the same language as the given piece of text content.
|
||||
- The keywords are delimited by ENGLISH COMMA.
|
||||
- Output keywords ONLY.`,
|
||||
questions: `Role
|
||||
You are a text analyzer.
|
||||
|
||||
Task
|
||||
Propose 3 questions about a given piece of text content.
|
||||
|
||||
Requirements
|
||||
- Understand and summarize the text content, and propose the top 3 important questions.
|
||||
- The questions SHOULD NOT have overlapping meanings.
|
||||
- The questions SHOULD cover the main content of the text as much as possible.
|
||||
- The questions MUST be in the same language as the given piece of text content.
|
||||
- One question per line.
|
||||
- Output questions ONLY.`,
|
||||
summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
|
||||
|
||||
Key Instructions:
|
||||
1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
|
||||
2. Language: Write the summary in the same language as the source text.
|
||||
3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
|
||||
4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
|
||||
metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
|
||||
|
||||
Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
|
||||
},
|
||||
user: {
|
||||
keywords: `Text Content
|
||||
[Insert text here]`,
|
||||
questions: `Text Content
|
||||
[Insert text here]`,
|
||||
summary: `Text to Summarize:
|
||||
[Insert text here]`,
|
||||
metadata: `Content: [INSERT CONTENT HERE]`,
|
||||
},
|
||||
},
|
||||
cancel: 'Cancel',
|
||||
swicthPromptMessage:
|
||||
'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
|
||||
tokenizerSearchMethodOptions: {
|
||||
full_text: 'Full-text',
|
||||
embedding: 'Embedding',
|
||||
},
|
||||
filenameEmbeddingWeight: 'Filename embedding weight',
|
||||
tokenizerFieldsOptions: {
|
||||
text: 'Processed Text',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
summary: 'Augmented Context',
|
||||
},
|
||||
imageParseMethodOptions: {
|
||||
ocr: 'OCR',
|
||||
},
|
||||
},
|
||||
llmTools: {
|
||||
bad_calculator: {
|
||||
@ -1705,125 +1818,6 @@ This delimiter is used to split the input text into several text pieces echo of
|
||||
<p>Are you sure you want to proceed?</p> `,
|
||||
unlinkPipelineModalConfirmText: 'Unlink',
|
||||
},
|
||||
dataflow: {
|
||||
parser: 'Parser',
|
||||
parserDescription:
|
||||
'Extracts raw text and structure from files for downstream processing.',
|
||||
tokenizer: 'Indexer',
|
||||
tokenizerRequired: 'Please add the Indexer node first',
|
||||
tokenizerDescription:
|
||||
'Transforms text into the required data structure (e.g., vector embeddings for Embedding Search) depending on the chosen search method.',
|
||||
splitter: 'Token',
|
||||
splitterDescription:
|
||||
'Split text into chunks by token length with optional delimiters and overlap.',
|
||||
hierarchicalMergerDescription:
|
||||
'Split documents into sections by title hierarchy with regex rules for finer control.',
|
||||
hierarchicalMerger: 'Title',
|
||||
extractor: 'Transformer',
|
||||
extractorDescription:
|
||||
'Use an LLM to extract structured insights from document chunks—such as summaries, classifications, etc.',
|
||||
outputFormat: 'Output format',
|
||||
lang: 'Language',
|
||||
fileFormats: 'File format',
|
||||
fileFormatOptions: {
|
||||
pdf: 'PDF',
|
||||
spreadsheet: 'Spreadsheet',
|
||||
image: 'Image',
|
||||
email: 'Email',
|
||||
'text&markdown': 'Text & Markup',
|
||||
word: 'Word',
|
||||
slides: 'PPT',
|
||||
audio: 'Audio',
|
||||
},
|
||||
fields: 'Field',
|
||||
addParser: 'Add Parser',
|
||||
hierarchy: 'Hierarchy',
|
||||
regularExpressions: 'Regular Expressions',
|
||||
overlappedPercent: 'Overlapped percent (%)',
|
||||
searchMethod: 'Search method',
|
||||
searchMethodTip: `Defines how the content can be searched — by full-text, embedding, or both.
|
||||
The Indexer will store the content in the corresponding data structures for the selected methods.`,
|
||||
begin: 'File',
|
||||
parserMethod: 'Parsing method',
|
||||
systemPrompt: 'System Prompt',
|
||||
systemPromptPlaceholder:
|
||||
'Enter system prompt for image analysis, if empty the system default value will be used',
|
||||
exportJson: 'Export JSON',
|
||||
viewResult: 'View result',
|
||||
running: 'Running',
|
||||
summary: 'Summary',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
metadata: 'Metadata',
|
||||
fieldName: 'Result destination',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `Role
|
||||
You are a text analyzer.
|
||||
|
||||
Task
|
||||
Extract the most important keywords/phrases of a given piece of text content.
|
||||
|
||||
Requirements
|
||||
- Summarize the text content, and give the top 5 important keywords/phrases.
|
||||
- The keywords MUST be in the same language as the given piece of text content.
|
||||
- The keywords are delimited by ENGLISH COMMA.
|
||||
- Output keywords ONLY.`,
|
||||
questions: `Role
|
||||
You are a text analyzer.
|
||||
|
||||
Task
|
||||
Propose 3 questions about a given piece of text content.
|
||||
|
||||
Requirements
|
||||
- Understand and summarize the text content, and propose the top 3 important questions.
|
||||
- The questions SHOULD NOT have overlapping meanings.
|
||||
- The questions SHOULD cover the main content of the text as much as possible.
|
||||
- The questions MUST be in the same language as the given piece of text content.
|
||||
- One question per line.
|
||||
- Output questions ONLY.`,
|
||||
summary: `Act as a precise summarizer. Your task is to create a summary of the provided content that is both concise and faithful to the original.
|
||||
|
||||
Key Instructions:
|
||||
1. Accuracy: Strictly base the summary on the information given. Do not introduce any new facts, conclusions, or interpretations that are not explicitly stated.
|
||||
2. Language: Write the summary in the same language as the source text.
|
||||
3. Objectivity: Present the key points without bias, preserving the original intent and tone of the content. Do not editorialize.
|
||||
4. Conciseness: Focus on the most important ideas, omitting minor details and fluff.`,
|
||||
metadata: `Extract important structured information from the given content. Output ONLY a valid JSON string with no additional text. If no important structured information is found, output an empty JSON object: {}.
|
||||
|
||||
Important structured information may include: names, dates, locations, events, key facts, numerical data, or other extractable entities.`,
|
||||
},
|
||||
user: {
|
||||
keywords: `Text Content
|
||||
[Insert text here]`,
|
||||
questions: `Text Content
|
||||
[Insert text here]`,
|
||||
summary: `Text to Summarize:
|
||||
[Insert text here]`,
|
||||
metadata: `Content: [INSERT CONTENT HERE]`,
|
||||
},
|
||||
},
|
||||
cancel: 'Cancel',
|
||||
swicthPromptMessage:
|
||||
'The prompt word will change. Please confirm whether to abandon the existing prompt word?',
|
||||
tokenizerSearchMethodOptions: {
|
||||
full_text: 'Full-text',
|
||||
embedding: 'Embedding',
|
||||
},
|
||||
filenameEmbeddingWeight: 'Filename embedding weight',
|
||||
tokenizerFieldsOptions: {
|
||||
text: 'Processed Text',
|
||||
keywords: 'Keywords',
|
||||
questions: 'Questions',
|
||||
summary: 'Augmented Context',
|
||||
},
|
||||
imageParseMethodOptions: {
|
||||
ocr: 'OCR',
|
||||
},
|
||||
note: 'Note',
|
||||
noteDescription: 'Note',
|
||||
notePlaceholder: 'Please enter a note',
|
||||
},
|
||||
datasetOverview: {
|
||||
downloadTip: 'Files being downloaded from data sources. ',
|
||||
processingTip: 'Files being processed by Ingestion pipeline.',
|
||||
|
||||
@ -105,7 +105,7 @@ export default {
|
||||
generatedOn: '生成于',
|
||||
subbarFiles: '文件列表',
|
||||
generate: '生成',
|
||||
raptor: 'Raptor',
|
||||
raptor: 'RAPTOR',
|
||||
processingType: '处理类型',
|
||||
dataPipeline: '数据管道',
|
||||
operations: '操作',
|
||||
@ -258,7 +258,6 @@ export default {
|
||||
theDocumentBeingParsedCannotBeDeleted: '正在解析的文档不能被删除',
|
||||
},
|
||||
knowledgeConfiguration: {
|
||||
tocExtraction: '目录增强',
|
||||
tocExtractionTip:
|
||||
'对于已有的chunk生成层级结构的目录信息(每个文件一个目录)。在查询时,激活`目录增强`后,系统会用大模型去判断用户问题和哪些目录项相关,从而找到相关的chunk。',
|
||||
deleteGenerateModalContent: `
|
||||
@ -1512,6 +1511,93 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
createFromTemplate: '从模板创建',
|
||||
importJsonFile: '导入 JSON 文件',
|
||||
chooseAgentType: '选择智能体类型',
|
||||
parser: '解析器',
|
||||
parserDescription: '从文件中提取原始文本和结构以供下游处理。',
|
||||
tokenizer: '分词器',
|
||||
tokenizerRequired: '请先添加Tokenizer节点',
|
||||
tokenizerDescription:
|
||||
'根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。',
|
||||
splitter: '按字符分割',
|
||||
splitterDescription:
|
||||
'根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。',
|
||||
hierarchicalMergerDescription:
|
||||
'使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。',
|
||||
hierarchicalMerger: '按标题分割',
|
||||
extractor: '提取器',
|
||||
extractorDescription:
|
||||
'使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。',
|
||||
outputFormat: '输出格式',
|
||||
fileFormats: '文件格式',
|
||||
fields: '字段',
|
||||
addParser: '增加解析器',
|
||||
hierarchy: '层次结构',
|
||||
regularExpressions: '正则表达式',
|
||||
overlappedPercent: '重叠百分比(%)',
|
||||
searchMethod: '搜索方法',
|
||||
searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。
|
||||
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
||||
filenameEmbdWeight: '文件名嵌入权重',
|
||||
parserMethod: '解析方法',
|
||||
systemPromptPlaceholder:
|
||||
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
|
||||
exportJson: '导出 JSON',
|
||||
viewResult: '查看结果',
|
||||
running: '运行中',
|
||||
summary: '增强上下文',
|
||||
keywords: '关键词',
|
||||
questions: '问题',
|
||||
metadata: '元数据',
|
||||
fieldName: '结果目的地',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
从给定的文本内容中提取最重要的关键词/短语。
|
||||
|
||||
要求
|
||||
- 总结文本内容,并给出最重要的5个关键词/短语。
|
||||
- 关键词必须与给定的文本内容使用相同的语言。
|
||||
- 关键词之间用英文逗号分隔。
|
||||
- 仅输出关键词。`,
|
||||
questions: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
针对给定的文本内容提出3个问题。
|
||||
|
||||
要求
|
||||
- 理解并总结文本内容,并提出最重要的3个问题。
|
||||
- 问题的含义不应重叠。
|
||||
- 问题应尽可能涵盖文本的主要内容。
|
||||
- 问题必须与给定的文本内容使用相同的语言。
|
||||
- 每行一个问题。
|
||||
- 仅输出问题。`,
|
||||
summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
|
||||
|
||||
关键说明:
|
||||
1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
|
||||
2. 语言:摘要必须使用与原文相同的语言。
|
||||
3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。
|
||||
4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`,
|
||||
metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。
|
||||
|
||||
重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
|
||||
},
|
||||
user: {
|
||||
keywords: `文本内容
|
||||
[在此处插入文本]`,
|
||||
questions: `文本内容
|
||||
[在此处插入文本]`,
|
||||
summary: `要总结的文本:
|
||||
[在此处插入文本]`,
|
||||
metadata: `内容:[在此处插入内容]`,
|
||||
},
|
||||
},
|
||||
cancel: '取消',
|
||||
filenameEmbeddingWeight: '文件名嵌入权重',
|
||||
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
|
||||
},
|
||||
footer: {
|
||||
profile: 'All rights reserved @ React',
|
||||
@ -1619,101 +1705,6 @@ General:实体和关系提取提示来自 GitHub - microsoft/graphrag:基于
|
||||
<p>你确定要继续吗?</p> `,
|
||||
unlinkPipelineModalConfirmText: '解绑',
|
||||
},
|
||||
dataflow: {
|
||||
parser: '解析器',
|
||||
parserDescription: '从文件中提取原始文本和结构以供下游处理。',
|
||||
tokenizer: '分词器',
|
||||
tokenizerRequired: '请先添加Tokenizer节点',
|
||||
tokenizerDescription:
|
||||
'根据所选的搜索方法,将文本转换为所需的数据结构(例如,用于嵌入搜索的向量嵌入)。',
|
||||
splitter: '按字符分割',
|
||||
splitterDescription:
|
||||
'根据分词器长度将文本拆分成块,并带有可选的分隔符和重叠。',
|
||||
hierarchicalMergerDescription:
|
||||
'使用正则表达式规则按标题层次结构将文档拆分成多个部分,以实现更精细的控制。',
|
||||
hierarchicalMerger: '按标题分割',
|
||||
extractor: '提取器',
|
||||
extractorDescription:
|
||||
'使用 LLM 从文档块(例如摘要、分类等)中提取结构化见解。',
|
||||
outputFormat: '输出格式',
|
||||
lang: '语言',
|
||||
fileFormats: '文件格式',
|
||||
fields: '字段',
|
||||
addParser: '增加解析器',
|
||||
hierarchy: '层次结构',
|
||||
regularExpressions: '正则表达式',
|
||||
overlappedPercent: '重叠百分比(%)',
|
||||
searchMethod: '搜索方法',
|
||||
searchMethodTip: `决定该数据集启用的搜索方式,可选择全文、向量,或两者兼有。
|
||||
Tokenizer 会根据所选方式将内容存储为对应的数据结构。`,
|
||||
filenameEmbdWeight: '文件名嵌入权重',
|
||||
begin: '文件',
|
||||
parserMethod: '解析方法',
|
||||
systemPrompt: '系统提示词',
|
||||
systemPromptPlaceholder:
|
||||
'请输入用于图像分析的系统提示词,若为空则使用系统缺省值',
|
||||
exportJson: '导出 JSON',
|
||||
viewResult: '查看结果',
|
||||
running: '运行中',
|
||||
summary: '增强上下文',
|
||||
keywords: '关键词',
|
||||
questions: '问题',
|
||||
metadata: '元数据',
|
||||
fieldName: '结果目的地',
|
||||
prompts: {
|
||||
system: {
|
||||
keywords: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
从给定的文本内容中提取最重要的关键词/短语。
|
||||
|
||||
要求
|
||||
- 总结文本内容,并给出最重要的5个关键词/短语。
|
||||
- 关键词必须与给定的文本内容使用相同的语言。
|
||||
- 关键词之间用英文逗号分隔。
|
||||
- 仅输出关键词。`,
|
||||
questions: `角色
|
||||
你是一名文本分析员。
|
||||
|
||||
任务
|
||||
针对给定的文本内容提出3个问题。
|
||||
|
||||
要求
|
||||
- 理解并总结文本内容,并提出最重要的3个问题。
|
||||
- 问题的含义不应重叠。
|
||||
- 问题应尽可能涵盖文本的主要内容。
|
||||
- 问题必须与给定的文本内容使用相同的语言。
|
||||
- 每行一个问题。
|
||||
- 仅输出问题。`,
|
||||
summary: `扮演一个精准的摘要者。你的任务是为提供的内容创建一个简洁且忠实于原文的摘要。
|
||||
|
||||
关键说明:
|
||||
1. 准确性:摘要必须严格基于所提供的信息。请勿引入任何未明确说明的新事实、结论或解释。
|
||||
2. 语言:摘要必须使用与原文相同的语言。
|
||||
3. 客观性:不带偏见地呈现要点,保留内容的原始意图和语气。请勿进行编辑。
|
||||
4. 简洁性:专注于最重要的思想,省略细节和多余的内容。`,
|
||||
metadata: `从给定内容中提取重要的结构化信息。仅输出有效的 JSON 字符串,不包含任何附加文本。如果未找到重要的结构化信息,则输出一个空的 JSON 对象:{}。
|
||||
|
||||
重要的结构化信息可能包括:姓名、日期、地点、事件、关键事实、数字数据或其他可提取实体。`,
|
||||
},
|
||||
user: {
|
||||
keywords: `文本内容
|
||||
[在此处插入文本]`,
|
||||
questions: `文本内容
|
||||
[在此处插入文本]`,
|
||||
summary: `要总结的文本:
|
||||
[在此处插入文本]`,
|
||||
metadata: `内容:[在此处插入内容]`,
|
||||
},
|
||||
},
|
||||
cancel: '取消',
|
||||
filenameEmbeddingWeight: '文件名嵌入权重',
|
||||
switchPromptMessage: '提示词将发生变化,请确认是否放弃已有提示词?',
|
||||
note: '注释',
|
||||
noteDescription: '注释',
|
||||
notePlaceholder: '请输入注释',
|
||||
},
|
||||
datasetOverview: {
|
||||
downloadTip: '正在从数据源下载文件。',
|
||||
processingTip: '正在由pipeline处理文件。',
|
||||
|
||||
@ -56,19 +56,24 @@ import { RagNode } from './node';
|
||||
import { AgentNode } from './node/agent-node';
|
||||
import { BeginNode } from './node/begin-node';
|
||||
import { CategorizeNode } from './node/categorize-node';
|
||||
import { InnerNextStepDropdown } from './node/dropdown/next-step-dropdown';
|
||||
import { NextStepDropdown } from './node/dropdown/next-step-dropdown';
|
||||
import { ExtractorNode } from './node/extractor-node';
|
||||
import { FileNode } from './node/file-node';
|
||||
import { GenerateNode } from './node/generate-node';
|
||||
import { InvokeNode } from './node/invoke-node';
|
||||
import { IterationNode, IterationStartNode } from './node/iteration-node';
|
||||
import { KeywordNode } from './node/keyword-node';
|
||||
import { MessageNode } from './node/message-node';
|
||||
import NoteNode from './node/note-node';
|
||||
import ParserNode from './node/parser-node';
|
||||
import { PlaceholderNode } from './node/placeholder-node';
|
||||
import { RelevantNode } from './node/relevant-node';
|
||||
import { RetrievalNode } from './node/retrieval-node';
|
||||
import { RewriteNode } from './node/rewrite-node';
|
||||
import { SplitterNode } from './node/splitter-node';
|
||||
import { SwitchNode } from './node/switch-node';
|
||||
import { TemplateNode } from './node/template-node';
|
||||
import TokenizerNode from './node/tokenizer-node';
|
||||
import { ToolNode } from './node/tool-node';
|
||||
|
||||
export const nodeTypes: NodeTypes = {
|
||||
@ -91,6 +96,11 @@ export const nodeTypes: NodeTypes = {
|
||||
iterationStartNode: IterationStartNode,
|
||||
agentNode: AgentNode,
|
||||
toolNode: ToolNode,
|
||||
fileNode: FileNode,
|
||||
parserNode: ParserNode,
|
||||
tokenizerNode: TokenizerNode,
|
||||
splitterNode: SplitterNode,
|
||||
contextNode: ExtractorNode,
|
||||
};
|
||||
|
||||
const edgeTypes = {
|
||||
@ -194,6 +204,7 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
|
||||
getConnectionStartContext,
|
||||
shouldPreventClose,
|
||||
onMove,
|
||||
nodeId,
|
||||
} = useConnectionDrag(
|
||||
reactFlowInstance,
|
||||
originalOnConnect,
|
||||
@ -312,7 +323,7 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
|
||||
}
|
||||
}
|
||||
>
|
||||
<InnerNextStepDropdown
|
||||
<NextStepDropdown
|
||||
hideModal={() => {
|
||||
removePlaceholderNode();
|
||||
hideModal();
|
||||
@ -320,9 +331,10 @@ function AgentCanvas({ drawerVisible, hideDrawer }: IProps) {
|
||||
}}
|
||||
position={dropdownPosition}
|
||||
onNodeCreated={onNodeCreated}
|
||||
nodeId={nodeId}
|
||||
>
|
||||
<span></span>
|
||||
</InnerNextStepDropdown>
|
||||
</NextStepDropdown>
|
||||
</HandleContext.Provider>
|
||||
)}
|
||||
</AgentInstanceContext.Provider>
|
||||
|
||||
@ -1,12 +1,14 @@
|
||||
import LLMLabel from '@/components/llm-select/llm-label';
|
||||
import { IAgentNode } from '@/interfaces/database/flow';
|
||||
import { cn } from '@/lib/utils';
|
||||
import { Handle, NodeProps, Position } from '@xyflow/react';
|
||||
import { get } from 'lodash';
|
||||
import { memo, useMemo } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { AgentExceptionMethod, NodeHandleId } from '../../constant';
|
||||
import { AgentFormSchemaType } from '../../form/agent-form';
|
||||
import useGraphStore from '../../store';
|
||||
import { isBottomSubAgent } from '../../utils';
|
||||
import { hasSubAgent, isBottomSubAgent } from '../../utils';
|
||||
import { CommonHandle, LeftEndHandle } from './handle';
|
||||
import { RightHandleStyle } from './handle-icon';
|
||||
import NodeHeader from './node-header';
|
||||
@ -18,7 +20,7 @@ function InnerAgentNode({
|
||||
data,
|
||||
isConnectable = true,
|
||||
selected,
|
||||
}: NodeProps<IAgentNode>) {
|
||||
}: NodeProps<IAgentNode<AgentFormSchemaType>>) {
|
||||
const edges = useGraphStore((state) => state.edges);
|
||||
const { t } = useTranslation();
|
||||
|
||||
@ -30,6 +32,12 @@ function InnerAgentNode({
|
||||
return get(data, 'form.exception_method');
|
||||
}, [data]);
|
||||
|
||||
const hasTools = useMemo(() => {
|
||||
const tools = get(data, 'form.tools', []);
|
||||
const mcp = get(data, 'form.mcp', []);
|
||||
return tools.length > 0 || mcp.length > 0;
|
||||
}, [data]);
|
||||
|
||||
const isGotoMethod = useMemo(() => {
|
||||
return exceptionMethod === AgentExceptionMethod.Goto;
|
||||
}, [exceptionMethod]);
|
||||
@ -51,7 +59,6 @@ function InnerAgentNode({
|
||||
></CommonHandle>
|
||||
</>
|
||||
)}
|
||||
|
||||
{isHeadAgent || (
|
||||
<Handle
|
||||
type="target"
|
||||
@ -67,7 +74,9 @@ function InnerAgentNode({
|
||||
isConnectable={false}
|
||||
id={NodeHandleId.AgentBottom}
|
||||
style={{ left: 180 }}
|
||||
className="!bg-accent-primary !size-2"
|
||||
className={cn('!bg-accent-primary !size-2 invisible', {
|
||||
visible: hasSubAgent(edges, id),
|
||||
})}
|
||||
></Handle>
|
||||
<Handle
|
||||
type="source"
|
||||
@ -75,7 +84,9 @@ function InnerAgentNode({
|
||||
isConnectable={false}
|
||||
id={NodeHandleId.Tool}
|
||||
style={{ left: 20 }}
|
||||
className="!bg-accent-primary !size-2"
|
||||
className={cn('!bg-accent-primary !size-2 invisible', {
|
||||
visible: hasTools,
|
||||
})}
|
||||
></Handle>
|
||||
<NodeHeader id={id} name={data.name} label={data.label}></NodeHeader>
|
||||
<section className="flex flex-col gap-2">
|
||||
|
||||
@ -17,6 +17,9 @@ import {
|
||||
SelectValue,
|
||||
} from '@/components/ui/select';
|
||||
|
||||
import { cn } from '@/lib/utils';
|
||||
import { PropsWithChildren } from 'react';
|
||||
|
||||
export function CardWithForm() {
|
||||
return (
|
||||
<Card className="w-[350px]">
|
||||
@ -55,3 +58,13 @@ export function CardWithForm() {
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
type LabelCardProps = {
|
||||
className?: string;
|
||||
} & PropsWithChildren;
|
||||
|
||||
export function LabelCard({ children, className }: LabelCardProps) {
|
||||
return (
|
||||
<div className={cn('bg-bg-card rounded-sm p-1', className)}>{children}</div>
|
||||
);
|
||||
}
|
||||
|
||||
196
web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx
Normal file
196
web/src/pages/agent/canvas/node/dropdown/accordion-operators.tsx
Normal file
@ -0,0 +1,196 @@
|
||||
import {
|
||||
Accordion,
|
||||
AccordionContent,
|
||||
AccordionItem,
|
||||
AccordionTrigger,
|
||||
} from '@/components/ui/accordion';
|
||||
import { Operator } from '@/constants/agent';
|
||||
import useGraphStore from '@/pages/agent/store';
|
||||
import { useCallback, useMemo } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { OperatorItemList } from './operator-item-list';
|
||||
|
||||
export function AccordionOperators({
|
||||
isCustomDropdown = false,
|
||||
mousePosition,
|
||||
}: {
|
||||
isCustomDropdown?: boolean;
|
||||
mousePosition?: { x: number; y: number };
|
||||
}) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
return (
|
||||
<Accordion
|
||||
type="multiple"
|
||||
className="px-2 text-text-title max-h-[45vh] overflow-auto scrollbar-none"
|
||||
defaultValue={['item-1', 'item-2', 'item-3', 'item-4', 'item-5']}
|
||||
>
|
||||
<AccordionItem value="item-1">
|
||||
<AccordionTrigger className="text-xl">
|
||||
{t('flow.foundation')}
|
||||
</AccordionTrigger>
|
||||
<AccordionContent className="flex flex-col gap-4 text-balance">
|
||||
<OperatorItemList
|
||||
operators={[Operator.Agent, Operator.Retrieval]}
|
||||
isCustomDropdown={isCustomDropdown}
|
||||
mousePosition={mousePosition}
|
||||
></OperatorItemList>
|
||||
</AccordionContent>
|
||||
</AccordionItem>
|
||||
<AccordionItem value="item-2">
|
||||
<AccordionTrigger className="text-xl">
|
||||
{t('flow.dialog')}
|
||||
</AccordionTrigger>
|
||||
<AccordionContent className="flex flex-col gap-4 text-balance">
|
||||
<OperatorItemList
|
||||
operators={[Operator.Message, Operator.UserFillUp]}
|
||||
isCustomDropdown={isCustomDropdown}
|
||||
mousePosition={mousePosition}
|
||||
></OperatorItemList>
|
||||
</AccordionContent>
|
||||
</AccordionItem>
|
||||
<AccordionItem value="item-3">
|
||||
<AccordionTrigger className="text-xl">
|
||||
{t('flow.flow')}
|
||||
</AccordionTrigger>
|
||||
<AccordionContent className="flex flex-col gap-4 text-balance">
|
||||
<OperatorItemList
|
||||
operators={[
|
||||
Operator.Switch,
|
||||
Operator.Iteration,
|
||||
Operator.Categorize,
|
||||
]}
|
||||
isCustomDropdown={isCustomDropdown}
|
||||
mousePosition={mousePosition}
|
||||
></OperatorItemList>
|
||||
</AccordionContent>
|
||||
</AccordionItem>
|
||||
<AccordionItem value="item-4">
|
||||
<AccordionTrigger className="text-xl">
|
||||
{t('flow.dataManipulation')}
|
||||
</AccordionTrigger>
|
||||
<AccordionContent className="flex flex-col gap-4 text-balance">
|
||||
<OperatorItemList
|
||||
operators={[Operator.Code, Operator.StringTransform]}
|
||||
isCustomDropdown={isCustomDropdown}
|
||||
mousePosition={mousePosition}
|
||||
></OperatorItemList>
|
||||
</AccordionContent>
|
||||
</AccordionItem>
|
||||
<AccordionItem value="item-5">
|
||||
<AccordionTrigger className="text-xl">
|
||||
{t('flow.tools')}
|
||||
</AccordionTrigger>
|
||||
<AccordionContent className="flex flex-col gap-4 text-balance">
|
||||
<OperatorItemList
|
||||
operators={[
|
||||
Operator.TavilySearch,
|
||||
Operator.TavilyExtract,
|
||||
Operator.ExeSQL,
|
||||
Operator.Google,
|
||||
Operator.YahooFinance,
|
||||
Operator.Email,
|
||||
Operator.DuckDuckGo,
|
||||
Operator.Wikipedia,
|
||||
Operator.GoogleScholar,
|
||||
Operator.ArXiv,
|
||||
Operator.PubMed,
|
||||
Operator.GitHub,
|
||||
Operator.Invoke,
|
||||
Operator.WenCai,
|
||||
Operator.SearXNG,
|
||||
]}
|
||||
isCustomDropdown={isCustomDropdown}
|
||||
mousePosition={mousePosition}
|
||||
></OperatorItemList>
|
||||
</AccordionContent>
|
||||
</AccordionItem>
|
||||
</Accordion>
|
||||
);
|
||||
}
|
||||
|
||||
// Limit the number of operators of a certain type on the canvas to only one
|
||||
function useRestrictSingleOperatorOnCanvas() {
|
||||
const { findNodeByName } = useGraphStore((state) => state);
|
||||
|
||||
const restrictSingleOperatorOnCanvas = useCallback(
|
||||
(singleOperators: Operator[]) => {
|
||||
const list: Operator[] = [];
|
||||
singleOperators.forEach((operator) => {
|
||||
if (!findNodeByName(operator)) {
|
||||
list.push(operator);
|
||||
}
|
||||
});
|
||||
return list;
|
||||
},
|
||||
[findNodeByName],
|
||||
);
|
||||
|
||||
return restrictSingleOperatorOnCanvas;
|
||||
}
|
||||
|
||||
export function PipelineAccordionOperators({
|
||||
isCustomDropdown = false,
|
||||
mousePosition,
|
||||
nodeId,
|
||||
}: {
|
||||
isCustomDropdown?: boolean;
|
||||
mousePosition?: { x: number; y: number };
|
||||
nodeId?: string;
|
||||
}) {
|
||||
const restrictSingleOperatorOnCanvas = useRestrictSingleOperatorOnCanvas();
|
||||
const { getOperatorTypeFromId } = useGraphStore((state) => state);
|
||||
|
||||
const operators = useMemo(() => {
|
||||
let list = [
|
||||
...restrictSingleOperatorOnCanvas([Operator.Parser, Operator.Tokenizer]),
|
||||
];
|
||||
list.push(Operator.Extractor);
|
||||
return list;
|
||||
}, [restrictSingleOperatorOnCanvas]);
|
||||
|
||||
const chunkerOperators = useMemo(() => {
|
||||
return [
|
||||
...restrictSingleOperatorOnCanvas([
|
||||
Operator.Splitter,
|
||||
Operator.HierarchicalMerger,
|
||||
]),
|
||||
];
|
||||
}, [restrictSingleOperatorOnCanvas]);
|
||||
|
||||
const showChunker = useMemo(() => {
|
||||
return (
|
||||
getOperatorTypeFromId(nodeId) !== Operator.Extractor &&
|
||||
chunkerOperators.length > 0
|
||||
);
|
||||
}, [chunkerOperators.length, getOperatorTypeFromId, nodeId]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<OperatorItemList
|
||||
operators={operators}
|
||||
isCustomDropdown={isCustomDropdown}
|
||||
mousePosition={mousePosition}
|
||||
></OperatorItemList>
|
||||
{showChunker && (
|
||||
<Accordion
|
||||
type="single"
|
||||
collapsible
|
||||
className="w-full px-4"
|
||||
defaultValue="item-1"
|
||||
>
|
||||
<AccordionItem value="item-1">
|
||||
<AccordionTrigger>Chunker</AccordionTrigger>
|
||||
<AccordionContent className="flex flex-col gap-4 text-balance">
|
||||
<OperatorItemList
|
||||
operators={chunkerOperators}
|
||||
isCustomDropdown={isCustomDropdown}
|
||||
mousePosition={mousePosition}
|
||||
></OperatorItemList>
|
||||
</AccordionContent>
|
||||
</AccordionItem>
|
||||
</Accordion>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user