diff --git a/Dockerfile b/Dockerfile index d16834125..95a9d54b7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -78,12 +78,12 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ # A modern version of cargo is needed for the latest version of the Rust compiler. RUN apt update && apt install -y curl build-essential \ && if [ "$NEED_MIRROR" == "1" ]; then \ - # Use TUNA mirrors for rustup/rust dist files + # Use TUNA mirrors for rustup/rust dist files \ export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \ export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \ echo "Using TUNA mirrors for Rustup."; \ fi; \ - # Force curl to use HTTP/1.1 + # Force curl to use HTTP/1.1 \ curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \ && echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc diff --git a/common/data_source/discord_connector.py b/common/data_source/discord_connector.py index 46b23443c..4c19a6d5e 100644 --- a/common/data_source/discord_connector.py +++ b/common/data_source/discord_connector.py @@ -33,7 +33,7 @@ def _convert_message_to_document( metadata: dict[str, str | list[str]] = {} semantic_substring = "" - # Only messages from TextChannels will make it here but we have to check for it anyways + # Only messages from TextChannels will make it here, but we have to check for it anyway if isinstance(message.channel, TextChannel) and (channel_name := message.channel.name): metadata["Channel"] = channel_name semantic_substring += f" in Channel: #{channel_name}" @@ -176,7 +176,7 @@ def _manage_async_retrieval( # parse requested_start_date_string to datetime pull_date: datetime | None = datetime.strptime(requested_start_date_string, "%Y-%m-%d").replace(tzinfo=timezone.utc) if requested_start_date_string else None - # Set start_time to the later of start and pull_date, or whichever is provided + # Set start_time to the most recent of start and pull_date, or whichever is provided start_time = max(filter(None, [start, pull_date])) if start or pull_date else None end_time: datetime | None = end diff --git a/deepdoc/parser/html_parser.py b/deepdoc/parser/html_parser.py index 7e4467c16..dcf33a8bb 100644 --- a/deepdoc/parser/html_parser.py +++ b/deepdoc/parser/html_parser.py @@ -151,7 +151,7 @@ class RAGFlowHtmlParser: block_content = [] current_content = "" table_info_list = [] - lask_block_id = None + last_block_id = None for item in parser_result: content = item.get("content") tag_name = item.get("tag_name") @@ -160,11 +160,11 @@ class RAGFlowHtmlParser: if block_id: if title_flag: content = f"{TITLE_TAGS[tag_name]} {content}" - if lask_block_id != block_id: - if lask_block_id is not None: + if last_block_id != block_id: + if last_block_id is not None: block_content.append(current_content) current_content = content - lask_block_id = block_id + last_block_id = block_id else: current_content += (" " if current_content else "") + content else: diff --git a/deepdoc/vision/ocr.py b/deepdoc/vision/ocr.py index 207fb0e84..afa692127 100644 --- a/deepdoc/vision/ocr.py +++ b/deepdoc/vision/ocr.py @@ -582,7 +582,7 @@ class OCR: self.crop_image_res_index = 0 def get_rotate_crop_image(self, img, points): - ''' + """ img_height, img_width = img.shape[0:2] left = int(np.min(points[:, 0])) right = int(np.max(points[:, 0])) @@ -591,7 +591,7 @@ class OCR: img_crop = img[top:bottom, left:right, :].copy() points[:, 0] = points[:, 0] - left points[:, 1] = points[:, 1] - top - ''' + """ assert len(points) == 4, "shape of points must be 4*2" img_crop_width = int( max( diff --git a/deepdoc/vision/postprocess.py b/deepdoc/vision/postprocess.py index a61464382..7704bc582 100644 --- a/deepdoc/vision/postprocess.py +++ b/deepdoc/vision/postprocess.py @@ -67,10 +67,10 @@ class DBPostProcess: [[1, 1], [1, 1]]) def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' + """ _bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1} - ''' + """ bitmap = _bitmap height, width = bitmap.shape @@ -114,10 +114,10 @@ class DBPostProcess: return boxes, scores def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): - ''' + """ _bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1} - ''' + """ bitmap = _bitmap height, width = bitmap.shape @@ -192,9 +192,9 @@ class DBPostProcess: return box, min(bounding_box[1]) def box_score_fast(self, bitmap, _box): - ''' + """ box_score_fast: use bbox mean score as the mean score - ''' + """ h, w = bitmap.shape[:2] box = _box.copy() xmin = np.clip(np.floor(box[:, 0].min()).astype("int32"), 0, w - 1) @@ -209,9 +209,9 @@ class DBPostProcess: return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] def box_score_slow(self, bitmap, contour): - ''' - box_score_slow: use polyon mean score as the mean score - ''' + """ + box_score_slow: use polygon mean score as the mean score + """ h, w = bitmap.shape[:2] contour = contour.copy() contour = np.reshape(contour, (-1, 2)) diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index adb337511..b851687a5 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -25,7 +25,7 @@ services: # - --no-transport-streamable-http-enabled # Disable Streamable HTTP transport (/mcp endpoint) # - --no-json-response # Disable JSON response mode in Streamable HTTP transport (instead of SSE over HTTP) - # Example configration to start Admin server: + # Example configuration to start Admin server: # command: # - --enable-adminserver ports: @@ -74,7 +74,7 @@ services: # - --no-transport-streamable-http-enabled # Disable Streamable HTTP transport (/mcp endpoint) # - --no-json-response # Disable JSON response mode in Streamable HTTP transport (instead of SSE over HTTP) - # Example configration to start Admin server: + # Example configuration to start Admin server: # command: # - --enable-adminserver ports: diff --git a/docs/faq.mdx b/docs/faq.mdx index 55997e1c3..10c6bc57c 100644 --- a/docs/faq.mdx +++ b/docs/faq.mdx @@ -151,7 +151,7 @@ See [Build a RAGFlow Docker image](./develop/build_docker_image.mdx). ### Cannot access https://huggingface.co -A locally deployed RAGflow downloads OCR models from [Huggingface website](https://huggingface.co) by default. If your machine is unable to access this site, the following error occurs and PDF parsing fails: +A locally deployed RAGFlow downloads OCR models from [Huggingface website](https://huggingface.co) by default. If your machine is unable to access this site, the following error occurs and PDF parsing fails: ``` FileNotFoundError: [Errno 2] No such file or directory: '/root/.cache/huggingface/hub/models--InfiniFlow--deepdoc/snapshots/be0c1e50eef6047b412d1800aa89aba4d275f997/ocr.res' diff --git a/docs/guides/agent/best_practices/accelerate_agent_question_answering.md b/docs/guides/agent/best_practices/accelerate_agent_question_answering.md index 76de06068..1161588bd 100644 --- a/docs/guides/agent/best_practices/accelerate_agent_question_answering.md +++ b/docs/guides/agent/best_practices/accelerate_agent_question_answering.md @@ -45,13 +45,13 @@ Click the light bulb icon above the *current* dialogue and scroll down the popup | Item name | Description | -| ----------------- | --------------------------------------------------------------------------------------------- | +| ----------------- |-----------------------------------------------------------------------------------------------| | Total | Total time spent on this conversation round, including chunk retrieval and answer generation. | | Check LLM | Time to validate the specified LLM. | | Create retriever | Time to create a chunk retriever. | | Bind embedding | Time to initialize an embedding model instance. | | Bind LLM | Time to initialize an LLM instance. | -| Tune question | Time to optimize the user query using the context of the mult-turn conversation. | +| Tune question | Time to optimize the user query using the context of the multi-turn conversation. | | Bind reranker | Time to initialize an reranker model instance for chunk retrieval. | | Generate keywords | Time to extract keywords from the user query. | | Retrieval | Time to retrieve the chunks. | diff --git a/docs/guides/chat/best_practices/accelerate_question_answering.mdx b/docs/guides/chat/best_practices/accelerate_question_answering.mdx index e404c1c2a..af4d2521b 100644 --- a/docs/guides/chat/best_practices/accelerate_question_answering.mdx +++ b/docs/guides/chat/best_practices/accelerate_question_answering.mdx @@ -37,7 +37,7 @@ Please note that rerank models are essential in certain scenarios. There is alwa | Create retriever | Time to create a chunk retriever. | | Bind embedding | Time to initialize an embedding model instance. | | Bind LLM | Time to initialize an LLM instance. | -| Tune question | Time to optimize the user query using the context of the mult-turn conversation. | +| Tune question | Time to optimize the user query using the context of the multi-turn conversation. | | Bind reranker | Time to initialize an reranker model instance for chunk retrieval. | | Generate keywords | Time to extract keywords from the user query. | | Retrieval | Time to retrieve the chunks. | diff --git a/docs/guides/manage_users_and_services.md b/docs/guides/manage_users_and_services.md index 94b933ec2..6c06c40f8 100644 --- a/docs/guides/manage_users_and_services.md +++ b/docs/guides/manage_users_and_services.md @@ -8,7 +8,7 @@ slug: /manage_users_and_services -The Admin CLI and Admin Service form a client-server architectural suite for RAGflow system administration. The Admin CLI serves as an interactive command-line interface that receives instructions and displays execution results from the Admin Service in real-time. This duo enables real-time monitoring of system operational status, supporting visibility into RAGflow Server services and dependent components including MySQL, Elasticsearch, Redis, and MinIO. In administrator mode, they provide user management capabilities that allow viewing users and performing critical operations—such as user creation, password updates, activation status changes, and comprehensive user data deletion—even when corresponding web interface functionalities are disabled. +The Admin CLI and Admin Service form a client-server architectural suite for RAGFlow system administration. The Admin CLI serves as an interactive command-line interface that receives instructions and displays execution results from the Admin Service in real-time. This duo enables real-time monitoring of system operational status, supporting visibility into RAGFlow Server services and dependent components including MySQL, Elasticsearch, Redis, and MinIO. In administrator mode, they provide user management capabilities that allow viewing users and performing critical operations—such as user creation, password updates, activation status changes, and comprehensive user data deletion—even when corresponding web interface functionalities are disabled. diff --git a/docs/guides/models/deploy_local_llm.mdx b/docs/guides/models/deploy_local_llm.mdx index dfee3fc78..997e526f3 100644 --- a/docs/guides/models/deploy_local_llm.mdx +++ b/docs/guides/models/deploy_local_llm.mdx @@ -305,7 +305,7 @@ With the Ollama service running, open a new terminal and run `./ollama pull -### 4. Configure RAGflow +### 4. Configure RAGFlow To enable IPEX-LLM accelerated Ollama in RAGFlow, you must also complete the configurations in RAGFlow. The steps are identical to those outlined in the *Deploy a local model using Ollama* section: diff --git a/example/sdk/dataset_example.py b/example/sdk/dataset_example.py index 3a0504d8d..a3931f143 100644 --- a/example/sdk/dataset_example.py +++ b/example/sdk/dataset_example.py @@ -14,9 +14,9 @@ # limitations under the License. # -''' +""" The example is about CRUD operations (Create, Read, Update, Delete) on a dataset. -''' +""" from ragflow_sdk import RAGFlow import sys diff --git a/sandbox/executor_manager/core/container.py b/sandbox/executor_manager/core/container.py index f953886c1..36cdded28 100644 --- a/sandbox/executor_manager/core/container.py +++ b/sandbox/executor_manager/core/container.py @@ -122,15 +122,15 @@ async def create_container(name: str, language: SupportLanguage) -> bool: logger.info(f"Sandbox config:\n\t {create_args}") try: - returncode, _, stderr = await async_run_command(*create_args, timeout=10) - if returncode != 0: + return_code, _, stderr = await async_run_command(*create_args, timeout=10) + if return_code != 0: logger.error(f"❌ Container creation failed {name}: {stderr}") return False if language == SupportLanguage.NODEJS: copy_cmd = ["docker", "exec", name, "bash", "-c", "cp -a /app/node_modules /workspace/"] - returncode, _, stderr = await async_run_command(*copy_cmd, timeout=10) - if returncode != 0: + return_code, _, stderr = await async_run_command(*copy_cmd, timeout=10) + if return_code != 0: logger.error(f"❌ Failed to prepare dependencies for {name}: {stderr}") return False @@ -185,7 +185,7 @@ async def allocate_container_blocking(language: SupportLanguage, timeout=10) -> async def container_is_running(name: str) -> bool: """Asynchronously check the container status""" try: - returncode, stdout, _ = await async_run_command("docker", "inspect", "-f", "{{.State.Running}}", name, timeout=2) - return returncode == 0 and stdout.strip() == "true" + return_code, stdout, _ = await async_run_command("docker", "inspect", "-f", "{{.State.Running}}", name, timeout=2) + return return_code == 0 and stdout.strip() == "true" except Exception: return False