Compare commits

...

3 Commits

Author SHA1 Message Date
cc703da747 Fix: The agent dialogue sheet does not display the opening remarks. #10664 (#10665)
### What problem does this PR solve?

Fix: The agent dialogue sheet does not display the opening remarks.
#10664

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-10-20 13:46:05 +08:00
d956a442ce Fix: Remove pdf embed support, update based on #10635 (#10663)
### What problem does this PR solve?

Fix: Remove pdf embed support, update based on  #10635

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-10-20 13:45:53 +08:00
5fc59a3132 Fix: retrieval test (#10662)
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
2025-10-20 11:37:18 +08:00
5 changed files with 27 additions and 79 deletions

View File

@ -350,7 +350,8 @@ def retrieval_test():
float(req.get("similarity_threshold", 0.0)), float(req.get("similarity_threshold", 0.0)),
float(req.get("vector_similarity_weight", 0.3)), float(req.get("vector_similarity_weight", 0.3)),
top, top,
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"), doc_ids, rerank_mdl=rerank_mdl,
highlight=req.get("highlight", False),
rank_feature=labels rank_feature=labels
) )
if use_kg: if use_kg:

View File

@ -35,7 +35,6 @@ from typing import List, Union, Tuple
# Third-party imports # Third-party imports
import olefile import olefile
import fitz
import pdfplumber import pdfplumber
from cachetools import LRUCache, cached from cachetools import LRUCache, cached
from PIL import Image from PIL import Image
@ -299,6 +298,7 @@ def read_potential_broken_pdf(blob):
return blob return blob
def _is_zip(h: bytes) -> bool: def _is_zip(h: bytes) -> bool:
return h.startswith(b"PK\x03\x04") or h.startswith(b"PK\x05\x06") or h.startswith(b"PK\x07\x08") return h.startswith(b"PK\x03\x04") or h.startswith(b"PK\x05\x06") or h.startswith(b"PK\x07\x08")
@ -317,18 +317,18 @@ def _guess_ext(b: bytes) -> str:
try: try:
with zipfile.ZipFile(io.BytesIO(b), "r") as z: with zipfile.ZipFile(io.BytesIO(b), "r") as z:
names = [n.lower() for n in z.namelist()] names = [n.lower() for n in z.namelist()]
if any(n.startswith("word/") for n in names): if any(n.startswith("word/") for n in names):
return ".docx" return ".docx"
if any(n.startswith("ppt/") for n in names): if any(n.startswith("ppt/") for n in names):
return ".pptx" return ".pptx"
if any(n.startswith("xl/") for n in names): if any(n.startswith("xl/") for n in names):
return ".xlsx" return ".xlsx"
except Exception: except Exception:
pass pass
return ".zip" return ".zip"
if _is_pdf(h): if _is_pdf(h):
return ".pdf" return ".pdf"
if _is_ole(h): if _is_ole(h):
return ".doc" return ".doc"
return ".bin" return ".bin"
@ -336,19 +336,21 @@ def _guess_ext(b: bytes) -> str:
def _extract_ole10native_payload(data: bytes) -> bytes: def _extract_ole10native_payload(data: bytes) -> bytes:
try: try:
pos = 0 pos = 0
if len(data) < 4: if len(data) < 4:
return data return data
_ = int.from_bytes(data[pos:pos+4], "little") _ = int.from_bytes(data[pos:pos+4], "little")
pos += 4 pos += 4
for _ in range(3): # filename/src/tmp (NUL-terminated ANSI) # filename/src/tmp (NUL-terminated ANSI)
for _ in range(3):
z = data.index(b"\x00", pos) z = data.index(b"\x00", pos)
pos = z + 1 pos = z + 1
# skip unknown 4 bytes
pos += 4 pos += 4
if pos + 4 > len(data): if pos + 4 > len(data):
return data return data
size = int.from_bytes(data[pos:pos+4], "little") size = int.from_bytes(data[pos:pos+4], "little")
pos += 4 pos += 4
if pos + size <= len(data): if pos + size <= len(data):
return data[pos:pos+size] return data[pos:pos+size]
except Exception: except Exception:
pass pass
@ -356,22 +358,20 @@ def _extract_ole10native_payload(data: bytes) -> bytes:
def extract_embed_file(target: Union[bytes, bytearray]) -> List[Tuple[str, bytes]]: def extract_embed_file(target: Union[bytes, bytearray]) -> List[Tuple[str, bytes]]:
""" """
Only extract the "first layer" of embedding, returning raw (filename, bytes). Only extract the 'first layer' of embedding, returning raw (filename, bytes).
These bytes can be directly used for io.BytesIO and then passed to zipfile/fitz/olefile and other libraries for further parsing.
""" """
top = bytes(target) top = bytes(target)
head = top[:8] head = top[:8]
out: List[Tuple[str, bytes]] = [] out: List[Tuple[str, bytes]] = []
seen = set() seen = set()
def push(b: bytes, name_hint: str = ""): def push(b: bytes, name_hint: str = ""):
h10 = _sha10(b) h10 = _sha10(b)
if h10 in seen: if h10 in seen:
return return
seen.add(h10) seen.add(h10)
ext = _guess_ext(b) ext = _guess_ext(b)
# If name_hint does not have a clear extension, use the extension guessed from the content # If name_hint has an extension use its basename; else fallback to guessed ext
if "." in name_hint: if "." in name_hint:
fname = name_hint.split("/")[-1] fname = name_hint.split("/")[-1]
else: else:
@ -382,8 +382,10 @@ def extract_embed_file(target: Union[bytes, bytearray]) -> List[Tuple[str, bytes
if _is_zip(head): if _is_zip(head):
try: try:
with zipfile.ZipFile(io.BytesIO(top), "r") as z: with zipfile.ZipFile(io.BytesIO(top), "r") as z:
embed_dirs = ("word/embeddings/", "word/objects/", "word/activex/", embed_dirs = (
"xl/embeddings/", "ppt/embeddings/") "word/embeddings/", "word/objects/", "word/activex/",
"xl/embeddings/", "ppt/embeddings/"
)
for name in z.namelist(): for name in z.namelist():
low = name.lower() low = name.lower()
if any(low.startswith(d) for d in embed_dirs): if any(low.startswith(d) for d in embed_dirs):
@ -396,24 +398,7 @@ def extract_embed_file(target: Union[bytes, bytearray]) -> List[Tuple[str, bytes
pass pass
return out return out
# PDF attachments # OLE container (doc/ppt/xls)
if _is_pdf(head):
try:
doc = fitz.open(stream=top, filetype="pdf")
count = getattr(doc, "embfile_count", 0)
for i in range(count):
try:
data = doc.embfile_get(i)
if data:
push(bytes(data), f"EmbeddedFiles[{i}]")
except Exception:
pass
doc.close()
except Exception:
pass
return out
# Legacy OLE (.doc/.xls/.ppt)
if _is_ole(head): if _is_ole(head):
try: try:
with olefile.OleFileIO(io.BytesIO(top)) as ole: with olefile.OleFileIO(io.BytesIO(top)) as ole:

View File

@ -137,7 +137,6 @@ dependencies = [
"mammoth>=1.11.0", "mammoth>=1.11.0",
"markdownify>=1.2.0", "markdownify>=1.2.0",
"captcha>=0.7.1", "captcha>=0.7.1",
"fitz>=0.0.1.dev2",
] ]
[project.optional-dependencies] [project.optional-dependencies]

38
uv.lock generated
View File

@ -1708,25 +1708,6 @@ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/f0/48285f0262fe47103a4a45972ed2f9b93e4c80b8fd609fa98da78b2a5706/filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7", size = 16159, upload-time = "2024-06-22T15:59:12.695Z" }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/f0/48285f0262fe47103a4a45972ed2f9b93e4c80b8fd609fa98da78b2a5706/filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7", size = 16159, upload-time = "2024-06-22T15:59:12.695Z" },
] ]
[[package]]
name = "fitz"
version = "0.0.1.dev2"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "configobj" },
{ name = "configparser" },
{ name = "httplib2" },
{ name = "nibabel" },
{ name = "nipype" },
{ name = "numpy" },
{ name = "pandas" },
{ name = "pyxnat" },
{ name = "scipy" },
]
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/28/27f27d66eb82f24e6595deb26c0a875e62431878c416e38eac515023abb2/fitz-0.0.1.dev2-py2.py3-none-any.whl", hash = "sha256:3b75083d58068d9bd51695eb2f78c9c92094cd6c8dada839e93edcddf18c0c5c", size = 20003, upload-time = "2017-02-25T23:29:54.403Z" },
]
[[package]] [[package]]
name = "flagembedding" name = "flagembedding"
version = "1.2.10" version = "1.2.10"
@ -5237,21 +5218,6 @@ crypto = [
{ name = "cryptography" }, { name = "cryptography" },
] ]
[[package]]
name = "pymupdf"
version = "1.26.5"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8d/9a/e0a4e92a85fc17be7c54afdbb113f0ade2a8bca49856d510e28bd249e462/pymupdf-1.26.5.tar.gz", hash = "sha256:8ef335e07f648492df240f2247854d0e7c0467afb9c4dc2376ec30978ec158c3", size = 84319274, upload-time = "2025-10-10T14:04:51.826Z" }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/dd/3f/7fc927fd66922ce838d4c974ff9a685c5f5aba108a5d94914dc05c9371f5/pymupdf-1.26.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2bfb58f07ad631e5f71ad0bd6f1ff52700f7ba7ebb4973130e81e75b721beae1", size = 23065601, upload-time = "2025-10-10T13:58:43.98Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/e2/e87e62284ba98d59f1fd4fc7542ef2ed0002525754a485fa4077b3bbddae/pymupdf-1.26.5-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:d58599479bc471d3ae56c3d68d9160d0b7de8a3bd40221ddc3a4eaae2d281b86", size = 22412612, upload-time = "2025-10-10T13:59:04.846Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/df/c2/af93c6367f79e9b5435f803bde51c1dc8225f054f8238162dda80b44986d/pymupdf-1.26.5-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:7dfea81fdd73437a6a6ce83e1fcf556faee9327a6540571e58bf04fa362bb0cd", size = 23457410, upload-time = "2025-10-10T22:45:26.355Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/5b/5a/1292a0df4ff71fbc00dfa8c08759d17c97e1e8ea9277eb5bc5f079ca188d/pymupdf-1.26.5-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:caad0ffeb63dcc4a29ca40f3c68d7b78d32a932e834b0056b529cc0bdbaaffc9", size = 24064941, upload-time = "2025-10-10T13:59:48.544Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/28/90/87b7fdfc9cd6991a3eb69a5752f6343374c34f258c511c242f4d60791eea/pymupdf-1.26.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e24e7a7d696bd398543cc5c147869edb2026d5d5a21b7f8e35db2f20170b389e", size = 24268203, upload-time = "2025-10-10T14:00:28.791Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/2c/99/9d4b36485538e29df0a013fb02bbf6b5b0743a428fa07515e36631c43363/pymupdf-1.26.5-cp39-abi3-win32.whl", hash = "sha256:a2a42f5911d153a47bf5c3e162a0bfe8745eb9bec3e59fbaf87617b4003d8270", size = 17130722, upload-time = "2025-10-10T14:00:51.377Z" },
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c6/96/fd59c1532891762ea4815e73956c532053d5e26d56969e1e5d1e4ca4b207/pymupdf-1.26.5-cp39-abi3-win_amd64.whl", hash = "sha256:39a6fb58182b27b51ea8150a0cd2e4ee7e0cf71e9d6723978f28699b42ee61ae", size = 18747258, upload-time = "2025-10-10T14:01:37.346Z" },
]
[[package]] [[package]]
name = "pymysql" name = "pymysql"
version = "1.1.1" version = "1.1.1"
@ -5706,7 +5672,6 @@ dependencies = [
{ name = "elasticsearch-dsl" }, { name = "elasticsearch-dsl" },
{ name = "extract-msg" }, { name = "extract-msg" },
{ name = "filelock" }, { name = "filelock" },
{ name = "fitz" },
{ name = "flasgger" }, { name = "flasgger" },
{ name = "flask" }, { name = "flask" },
{ name = "flask-cors" }, { name = "flask-cors" },
@ -5759,7 +5724,6 @@ dependencies = [
{ name = "pyclipper" }, { name = "pyclipper" },
{ name = "pycryptodomex" }, { name = "pycryptodomex" },
{ name = "pyicu" }, { name = "pyicu" },
{ name = "pymupdf" },
{ name = "pymysql" }, { name = "pymysql" },
{ name = "pyodbc" }, { name = "pyodbc" },
{ name = "pypdf" }, { name = "pypdf" },
@ -5868,7 +5832,6 @@ requires-dist = [
{ name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" }, { name = "fastembed", marker = "(platform_machine != 'x86_64' and extra == 'full') or (sys_platform == 'darwin' and extra == 'full')", specifier = ">=0.3.6,<0.4.0" },
{ name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" }, { name = "fastembed-gpu", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin' and extra == 'full'", specifier = ">=0.3.6,<0.4.0" },
{ name = "filelock", specifier = "==3.15.4" }, { name = "filelock", specifier = "==3.15.4" },
{ name = "fitz", specifier = ">=0.0.1.dev2" },
{ name = "flagembedding", marker = "extra == 'full'", specifier = "==1.2.10" }, { name = "flagembedding", marker = "extra == 'full'", specifier = "==1.2.10" },
{ name = "flasgger", specifier = ">=0.9.7.1,<0.10.0" }, { name = "flasgger", specifier = ">=0.9.7.1,<0.10.0" },
{ name = "flask", specifier = "==3.0.3" }, { name = "flask", specifier = "==3.0.3" },
@ -5922,7 +5885,6 @@ requires-dist = [
{ name = "pyclipper", specifier = "==1.3.0.post5" }, { name = "pyclipper", specifier = "==1.3.0.post5" },
{ name = "pycryptodomex", specifier = "==3.20.0" }, { name = "pycryptodomex", specifier = "==3.20.0" },
{ name = "pyicu", specifier = ">=2.15.3,<3.0.0" }, { name = "pyicu", specifier = ">=2.15.3,<3.0.0" },
{ name = "pymupdf", specifier = ">=1.26.5" },
{ name = "pymysql", specifier = ">=1.1.1,<2.0.0" }, { name = "pymysql", specifier = ">=1.1.1,<2.0.0" },
{ name = "pyodbc", specifier = ">=5.2.0,<6.0.0" }, { name = "pyodbc", specifier = ">=5.2.0,<6.0.0" },
{ name = "pypdf", specifier = "==6.0.0" }, { name = "pypdf", specifier = "==6.0.0" },

View File

@ -101,13 +101,13 @@ export function getLatestError(eventList: IEventList) {
export const useGetBeginNodePrologue = () => { export const useGetBeginNodePrologue = () => {
const getNode = useGraphStore((state) => state.getNode); const getNode = useGraphStore((state) => state.getNode);
const formData = get(getNode(BeginId), 'data.form', {});
return useMemo(() => { return useMemo(() => {
const formData = get(getNode(BeginId), 'data.form', {});
if (formData?.enablePrologue) { if (formData?.enablePrologue) {
return formData?.prologue; return formData?.prologue;
} }
}, [getNode]); }, [formData?.enablePrologue, formData?.prologue]);
}; };
export function useFindMessageReference(answerList: IEventList) { export function useFindMessageReference(answerList: IEventList) {
@ -381,9 +381,10 @@ export const useSendAgentMessage = ({
useEffect(() => { useEffect(() => {
const { content, id } = findMessageFromList(answerList); const { content, id } = findMessageFromList(answerList);
const inputAnswer = findInputFromList(answerList); const inputAnswer = findInputFromList(answerList);
if (answerList.length > 0) { const answer = content || getLatestError(answerList);
if (answerList.length > 0 && answer) {
addNewestOneAnswer({ addNewestOneAnswer({
answer: content || getLatestError(answerList), answer: answer,
id: id, id: id,
...inputAnswer, ...inputAnswer,
}); });