Remove 'get_lan_ip' and add common misc_utils.py (#10880)

### What problem does this PR solve?

Add get_uuid, download_img and hash_str2int into misc_utils.py

### Type of change

- [x] Refactoring

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-10-31 16:42:01 +08:00
committed by GitHub
parent e9debfd74d
commit f52e56c2d6
39 changed files with 344 additions and 94 deletions

View File

@ -29,7 +29,7 @@ from api.db.init_data import encode_to_base64
from api.db.services import UserService from api.db.services import UserService
from api.db import ActiveEnum, StatusEnum from api.db import ActiveEnum, StatusEnum
from api.utils.crypt import decrypt from api.utils.crypt import decrypt
from api.utils import get_uuid from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, datetime_format, get_format_time from common.time_utils import current_timestamp, datetime_format, get_format_time
from api.utils.api_utils import ( from api.utils.api_utils import (
construct_response, construct_response,

View File

@ -26,7 +26,7 @@ from typing import Any, Union, Tuple
from agent.component import component_class from agent.component import component_class
from agent.component.base import ComponentBase from agent.component.base import ComponentBase
from api.db.services.file_service import FileService from api.db.services.file_service import FileService
from api.utils import get_uuid, hash_str2int from common.misc_utils import get_uuid, hash_str2int
from rag.prompts.generator import chunks_format from rag.prompts.generator import chunks_format
from rag.utils.redis_conn import REDIS_CONN from rag.utils.redis_conn import REDIS_CONN

View File

@ -20,7 +20,7 @@ from copy import deepcopy
from functools import partial from functools import partial
from typing import TypedDict, List, Any from typing import TypedDict, List, Any
from agent.component.base import ComponentParamBase, ComponentBase from agent.component.base import ComponentParamBase, ComponentBase
from api.utils import hash_str2int from common.misc_utils import hash_str2int
from rag.llm.chat_model import ToolCallSession from rag.llm.chat_model import ToolCallSession
from rag.prompts.generator import kb_prompt from rag.prompts.generator import kb_prompt
from rag.utils.mcp_tool_call_conn import MCPToolCallSession from rag.utils.mcp_tool_call_conn import MCPToolCallSession

View File

@ -33,7 +33,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.task_service import queue_tasks, TaskService from api.db.services.task_service import queue_tasks, TaskService
from api.db.services.user_service import UserTenantService from api.db.services.user_service import UserTenantService
from api import settings from api import settings
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request, \ from api.utils.api_utils import server_error_response, get_data_error_result, get_json_result, validate_request, \
generate_confirmation_token generate_confirmation_token

View File

@ -35,7 +35,7 @@ from api.db.services.task_service import queue_dataflow, CANVAS_DEBUG_DOC_ID, Ta
from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from api.db.services.user_canvas_version import UserCanvasVersionService from api.db.services.user_canvas_version import UserCanvasVersionService
from api.settings import RetCode from api.settings import RetCode
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
from agent.canvas import Canvas from agent.canvas import Canvas
from peewee import MySQLDatabase, PostgresqlDatabase from peewee import MySQLDatabase, PostgresqlDatabase

View File

@ -24,7 +24,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.user_service import TenantService, UserTenantService from api.db.services.user_service import TenantService, UserTenantService
from api import settings from api import settings
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import get_json_result from api.utils.api_utils import get_json_result

View File

@ -35,7 +35,7 @@ from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks, queue_dataflow from api.db.services.task_service import TaskService, cancel_all_task_of, queue_tasks, queue_dataflow
from api.db.services.user_service import UserTenantService from api.db.services.user_service import UserTenantService
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import ( from api.utils.api_utils import (
get_data_error_result, get_data_error_result,
get_json_result, get_json_result,

View File

@ -23,7 +23,7 @@ from flask import request
from flask_login import login_required, current_user from flask_login import login_required, current_user
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.db import FileType from api.db import FileType
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
from api import settings from api import settings

View File

@ -26,7 +26,7 @@ from api.common.check_team_permission import check_file_team_permission
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService from api.db.services.file2document_service import File2DocumentService
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.db import FileType, FileSource from api.db import FileType, FileSource
from api.db.services import duplicate_name from api.db.services import duplicate_name
from api.db.services.file_service import FileService from api.db.services.file_service import FileService

View File

@ -31,7 +31,7 @@ from api.db.services.pipeline_operation_log_service import PipelineOperationLogS
from api.db.services.task_service import TaskService, GRAPH_RAPTOR_FAKE_DOC_ID from api.db.services.task_service import TaskService, GRAPH_RAPTOR_FAKE_DOC_ID
from api.db.services.user_service import TenantService, UserTenantService from api.db.services.user_service import TenantService, UserTenantService
from api.utils.api_utils import get_error_data_result, server_error_response, get_data_error_result, validate_request, not_allowed_parameters from api.utils.api_utils import get_error_data_result, server_error_response, get_data_error_result, validate_request, not_allowed_parameters
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.db import PipelineTaskType, StatusEnum, FileSource, VALID_FILE_TYPES, VALID_TASK_STATUS from api.db import PipelineTaskType, StatusEnum, FileSource, VALID_FILE_TYPES, VALID_TASK_STATUS
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.db_models import File from api.db.db_models import File

View File

@ -22,7 +22,7 @@ from api.db.services.mcp_server_service import MCPServerService
from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from api.settings import RetCode from api.settings import RetCode
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response, validate_request, \ from api.utils.api_utils import get_data_error_result, get_json_result, server_error_response, validate_request, \
get_mcp_tools get_mcp_tools
from api.utils.web_utils import get_float, safe_json_parse from api.utils.web_utils import get_float, safe_json_parse

View File

@ -20,7 +20,7 @@ from typing import Any, cast
from api.db.services.canvas_service import UserCanvasService from api.db.services.canvas_service import UserCanvasService
from api.db.services.user_canvas_version import UserCanvasVersionService from api.db.services.user_canvas_version import UserCanvasVersionService
from api.settings import RetCode from api.settings import RetCode
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import get_data_error_result, get_error_data_result, get_json_result, token_required from api.utils.api_utils import get_data_error_result, get_error_data_result, get_json_result, token_required
from api.utils.api_utils import get_result from api.utils.api_utils import get_result
from flask import request from flask import request

View File

@ -23,7 +23,7 @@ from api.db.services.dialog_service import DialogService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.tenant_llm_service import TenantLLMService from api.db.services.tenant_llm_service import TenantLLMService
from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import check_duplicate_ids, get_error_data_result, get_result, token_required from api.utils.api_utils import check_duplicate_ids, get_error_data_result, get_result, token_required

View File

@ -28,7 +28,7 @@ from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.user_service import TenantService from api.db.services.user_service import TenantService
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import ( from api.utils.api_utils import (
deep_merge, deep_merge,
get_error_argument_result, get_error_argument_result,

View File

@ -26,7 +26,7 @@ from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService from api.db.services.file2document_service import File2DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api.utils.api_utils import server_error_response, token_required from api.utils.api_utils import server_error_response, token_required
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.db import FileType from api.db import FileType
from api.db.services import duplicate_name from api.db.services import duplicate_name
from api.db.services.file_service import FileService from api.db.services.file_service import FileService

View File

@ -35,7 +35,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import LLMBundle from api.db.services.llm_service import LLMBundle
from api.db.services.search_service import SearchService from api.db.services.search_service import SearchService
from api.db.services.user_service import UserTenantService from api.db.services.user_service import UserTenantService
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import check_duplicate_ids, get_data_openai, get_error_data_result, get_json_result, \ from api.utils.api_utils import check_duplicate_ids, get_data_openai, get_error_data_result, get_json_result, \
get_result, server_error_response, token_required, validate_request get_result, server_error_response, token_required, validate_request
from rag.app.tag import label_question from rag.app.tag import label_question

View File

@ -24,7 +24,7 @@ from api.db.db_models import DB
from api.db.services import duplicate_name from api.db.services import duplicate_name
from api.db.services.search_service import SearchService from api.db.services.search_service import SearchService
from api.db.services.user_service import TenantService, UserTenantService from api.db.services.user_service import TenantService, UserTenantService
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import get_data_error_result, get_json_result, not_allowed_parameters, server_error_response, validate_request from api.utils.api_utils import get_data_error_result, get_json_result, not_allowed_parameters, server_error_response, validate_request

View File

@ -23,7 +23,7 @@ from api.db import UserTenantRole, StatusEnum
from api.db.db_models import UserTenant from api.db.db_models import UserTenant
from api.db.services.user_service import UserTenantService, UserService from api.db.services.user_service import UserTenantService, UserService
from api.utils import get_uuid from common.misc_utils import get_uuid
from common.time_utils import delta_seconds from common.time_utils import delta_seconds
from api.utils.api_utils import get_json_result, validate_request, server_error_response, get_data_error_result from api.utils.api_utils import get_json_result, validate_request, server_error_response, get_data_error_result
from api.utils.web_utils import send_invite_email from api.utils.web_utils import send_invite_email

View File

@ -35,7 +35,7 @@ from api.db.services.llm_service import get_init_tenant_llm
from api.db.services.tenant_llm_service import TenantLLMService from api.db.services.tenant_llm_service import TenantLLMService
from api.db.services.user_service import TenantService, UserService, UserTenantService from api.db.services.user_service import TenantService, UserService, UserTenantService
from common.time_utils import current_timestamp, datetime_format, get_format_time from common.time_utils import current_timestamp, datetime_format, get_format_time
from api.utils import download_img, get_uuid from common.misc_utils import download_img, get_uuid
from api.utils.api_utils import ( from api.utils.api_utils import (
construct_response, construct_response,
get_data_error_result, get_data_error_result,

View File

@ -22,7 +22,7 @@ from api.db import CanvasCategory, TenantPermission
from api.db.db_models import DB, CanvasTemplate, User, UserCanvas, API4Conversation from api.db.db_models import DB, CanvasTemplate, User, UserCanvas, API4Conversation
from api.db.services.api_service import API4ConversationService from api.db.services.api_service import API4ConversationService
from api.db.services.common_service import CommonService from api.db.services.common_service import CommonService
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import get_data_openai from api.utils.api_utils import get_data_openai
import tiktoken import tiktoken
from peewee import fn from peewee import fn

View File

@ -19,7 +19,7 @@ import peewee
from peewee import InterfaceError, OperationalError from peewee import InterfaceError, OperationalError
from api.db.db_models import DB from api.db.db_models import DB
from api.utils import get_uuid from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, datetime_format from common.time_utils import current_timestamp, datetime_format
def retry_db_operation(func): def retry_db_operation(func):

View File

@ -20,7 +20,7 @@ from api.db.db_models import Conversation, DB
from api.db.services.api_service import API4ConversationService from api.db.services.api_service import API4ConversationService
from api.db.services.common_service import CommonService from api.db.services.common_service import CommonService
from api.db.services.dialog_service import DialogService, chat from api.db.services.dialog_service import DialogService, chat
from api.utils import get_uuid from common.misc_utils import get_uuid
import json import json
from rag.prompts.generator import chunks_format from rag.prompts.generator import chunks_format

View File

@ -34,7 +34,7 @@ from api.db.db_models import DB, Document, Knowledgebase, Task, Tenant, UserTena
from api.db.db_utils import bulk_insert_into_db from api.db.db_utils import bulk_insert_into_db
from api.db.services.common_service import CommonService from api.db.services.common_service import CommonService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api.utils import get_uuid from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, get_format_time from common.time_utils import current_timestamp, get_format_time
from rag.nlp import rag_tokenizer, search from rag.nlp import rag_tokenizer, search
from rag.settings import get_svr_queue_name, SVR_CONSUMER_GROUP_NAME from rag.settings import get_svr_queue_name, SVR_CONSUMER_GROUP_NAME

View File

@ -27,7 +27,7 @@ from api.db.services import duplicate_name
from api.db.services.common_service import CommonService from api.db.services.common_service import CommonService
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService from api.db.services.file2document_service import File2DocumentService
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img from api.utils.file_utils import filename_type, read_potential_broken_pdf, thumbnail_img
from rag.llm.cv_model import GptV4 from rag.llm.cv_model import GptV4
from rag.utils.storage_factory import STORAGE_IMPL from rag.utils.storage_factory import STORAGE_IMPL

View File

@ -27,7 +27,7 @@ from api.db.services.common_service import CommonService
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID from api.db.services.task_service import GRAPH_RAPTOR_FAKE_DOC_ID
from api.utils import get_uuid from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, datetime_format from common.time_utils import current_timestamp, datetime_format

View File

@ -27,7 +27,7 @@ from api.db import StatusEnum, FileType, TaskStatus
from api.db.db_models import Task, Document, Knowledgebase, Tenant from api.db.db_models import Task, Document, Knowledgebase, Tenant
from api.db.services.common_service import CommonService from api.db.services.common_service import CommonService
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
from api.utils import get_uuid from common.misc_utils import get_uuid
from common.time_utils import current_timestamp from common.time_utils import current_timestamp
from deepdoc.parser.excel_parser import RAGFlowExcelParser from deepdoc.parser.excel_parser import RAGFlowExcelParser
from rag.settings import get_svr_queue_name from rag.settings import get_svr_queue_name

View File

@ -24,7 +24,7 @@ from api.db import UserTenantRole
from api.db.db_models import DB, UserTenant from api.db.db_models import DB, UserTenant
from api.db.db_models import User, Tenant from api.db.db_models import User, Tenant
from api.db.services.common_service import CommonService from api.db.services.common_service import CommonService
from api.utils import get_uuid from common.misc_utils import get_uuid
from common.time_utils import current_timestamp, datetime_format from common.time_utils import current_timestamp, datetime_format
from api.db import StatusEnum from api.db import StatusEnum
from rag.settings import MINIO from rag.settings import MINIO

View File

@ -13,49 +13,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
import base64
import hashlib
import os
import socket
import uuid
import requests
import importlib import importlib
from .common import string_to_bytes
def get_lan_ip():
if os.name != "nt":
import fcntl
import struct
def get_interface_ip(ifname):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
return socket.inet_ntoa(
fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s', string_to_bytes(ifname[:15])))[20:24])
ip = socket.gethostbyname(socket.getfqdn())
if ip.startswith("127.") and os.name != "nt":
interfaces = [
"bond1",
"eth0",
"eth1",
"eth2",
"wlan0",
"wlan1",
"wifi0",
"ath0",
"ath1",
"ppp0",
]
for ifname in interfaces:
try:
ip = get_interface_ip(ifname)
break
except IOError:
pass
return ip or ''
def from_dict_hook(in_dict: dict): def from_dict_hook(in_dict: dict):
if "type" in in_dict and "data" in in_dict: if "type" in in_dict and "data" in in_dict:
@ -66,20 +25,3 @@ def from_dict_hook(in_dict: dict):
in_dict["module"]), in_dict["type"])(**in_dict["data"]) in_dict["module"]), in_dict["type"])(**in_dict["data"])
else: else:
return in_dict return in_dict
def get_uuid():
return uuid.uuid1().hex
def download_img(url):
if not url:
return ""
response = requests.get(url)
return "data:" + \
response.headers.get('Content-Type', 'image/jpg') + ";" + \
"base64," + base64.b64encode(response.content).decode("utf-8")
def hash_str2int(line: str, mod: int = 10 ** 8) -> int:
return int(hashlib.sha1(line.encode("utf-8")).hexdigest(), 16) % mod

36
common/misc_utils.py Normal file
View File

@ -0,0 +1,36 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import base64
import hashlib
import uuid
import requests
def get_uuid():
return uuid.uuid1().hex
def download_img(url):
if not url:
return ""
response = requests.get(url)
return "data:" + \
response.headers.get('Content-Type', 'image/jpg') + ";" + \
"base64," + base64.b64encode(response.content).decode("utf-8")
def hash_str2int(line: str, mod: int = 10 ** 8) -> int:
return int(hashlib.sha1(line.encode("utf-8")).hexdigest(), 16) % mod

View File

@ -22,7 +22,7 @@ import trio
from api import settings from api import settings
from api.db.services.document_service import DocumentService from api.db.services.document_service import DocumentService
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import timeout from api.utils.api_utils import timeout
from graphrag.entity_resolution import EntityResolution from graphrag.entity_resolution import EntityResolution
from graphrag.general.community_reports_extractor import CommunityReportsExtractor from graphrag.general.community_reports_extractor import CommunityReportsExtractor

View File

@ -21,7 +21,7 @@ import json_repair
import pandas as pd import pandas as pd
import trio import trio
from api.utils import get_uuid from common.misc_utils import get_uuid
from graphrag.query_analyze_prompt import PROMPTS from graphrag.query_analyze_prompt import PROMPTS
from graphrag.utils import get_entity_type2samples, get_llm_cache, set_llm_cache, get_relation from graphrag.utils import get_entity_type2samples, get_llm_cache, set_llm_cache, get_relation
from rag.utils import num_tokens_from_string from rag.utils import num_tokens_from_string

View File

@ -24,7 +24,7 @@ import xxhash
from networkx.readwrite import json_graph from networkx.readwrite import json_graph
from api import settings from api import settings
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.api_utils import timeout from api.utils.api_utils import timeout
from rag.nlp import rag_tokenizer, search from rag.nlp import rag_tokenizer, search
from rag.utils.doc_store_conn import OrderByExpr from rag.utils.doc_store_conn import OrderByExpr

View File

@ -24,7 +24,7 @@ from api.db import LLMType
from api.db.services.llm_service import LLMBundle from api.db.services.llm_service import LLMBundle
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api import settings from api import settings
from api.utils import get_uuid from common.misc_utils import get_uuid
from rag.nlp import tokenize, search from rag.nlp import tokenize, search
from ranx import evaluate from ranx import evaluate
from ranx import Qrels, Run from ranx import Qrels, Run

View File

@ -20,7 +20,7 @@ from functools import partial
import trio import trio
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.base64_image import id2image, image2id from api.utils.base64_image import id2image, image2id
from deepdoc.parser.pdf_parser import RAGFlowPdfParser from deepdoc.parser.pdf_parser import RAGFlowPdfParser
from rag.flow.base import ProcessBase, ProcessParamBase from rag.flow.base import ProcessBase, ProcessParamBase

View File

@ -26,7 +26,7 @@ from api.db import LLMType
from api.db.services.file2document_service import File2DocumentService from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService from api.db.services.file_service import FileService
from api.db.services.llm_service import LLMBundle from api.db.services.llm_service import LLMBundle
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.base64_image import image2id from api.utils.base64_image import image2id
from deepdoc.parser import ExcelParser from deepdoc.parser import ExcelParser
from deepdoc.parser.mineru_parser import MinerUParser from deepdoc.parser.mineru_parser import MinerUParser

View File

@ -17,7 +17,7 @@ from functools import partial
import trio import trio
from api.utils import get_uuid from common.misc_utils import get_uuid
from api.utils.base64_image import id2image, image2id from api.utils.base64_image import id2image, image2id
from deepdoc.parser.pdf_parser import RAGFlowPdfParser from deepdoc.parser.pdf_parser import RAGFlowPdfParser
from rag.flow.base import ProcessBase, ProcessParamBase from rag.flow.base import ProcessBase, ProcessParamBase

View File

@ -22,7 +22,7 @@ from typing import Tuple
import jinja2 import jinja2
import json_repair import json_repair
import trio import trio
from api.utils import hash_str2int from common.misc_utils import hash_str2int
from rag.nlp import rag_tokenizer from rag.nlp import rag_tokenizer
from rag.prompts.template import load_prompt from rag.prompts.template import load_prompt
from rag.settings import TAG_FLD from rag.settings import TAG_FLD

View File

@ -15,7 +15,7 @@
# #
import logging import logging
from tavily import TavilyClient from tavily import TavilyClient
from api.utils import get_uuid from common.misc_utils import get_uuid
from rag.nlp import rag_tokenizer from rag.nlp import rag_tokenizer

View File

@ -0,0 +1,272 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import uuid
import hashlib
from common.misc_utils import get_uuid, download_img, hash_str2int
class TestGetUuid:
"""Test cases for get_uuid function"""
def test_returns_string(self):
"""Test that function returns a string"""
result = get_uuid()
assert isinstance(result, str)
def test_hex_format(self):
"""Test that returned string is in hex format"""
result = get_uuid()
# UUID v1 hex should be 32 characters (without dashes)
assert len(result) == 32
# Should only contain hexadecimal characters
assert all(c in '0123456789abcdef' for c in result)
def test_no_dashes_in_result(self):
"""Test that result contains no dashes"""
result = get_uuid()
assert '-' not in result
def test_unique_results(self):
"""Test that multiple calls return different UUIDs"""
results = [get_uuid() for _ in range(10)]
# All results should be unique
assert len(results) == len(set(results))
# All should be valid hex strings of correct length
for result in results:
assert len(result) == 32
assert all(c in '0123456789abcdef' for c in result)
def test_valid_uuid_structure(self):
"""Test that the hex string can be converted back to UUID"""
result = get_uuid()
# Should be able to create UUID from the hex string
reconstructed_uuid = uuid.UUID(hex=result)
assert isinstance(reconstructed_uuid, uuid.UUID)
# The hex representation should match the original
assert reconstructed_uuid.hex == result
def test_uuid1_specific_characteristics(self):
"""Test that UUID v1 characteristics are present"""
result = get_uuid()
uuid_obj = uuid.UUID(hex=result)
# UUID v1 should have version 1
assert uuid_obj.version == 1
# Variant should be RFC 4122
assert uuid_obj.variant == 'specified in RFC 4122'
def test_result_length_consistency(self):
"""Test that all generated UUIDs have consistent length"""
for _ in range(100):
result = get_uuid()
assert len(result) == 32
def test_hex_characters_only(self):
"""Test that only valid hex characters are used"""
for _ in range(100):
result = get_uuid()
# Should only contain lowercase hex characters (UUID hex is lowercase)
assert result.islower()
assert all(c in '0123456789abcdef' for c in result)
class TestDownloadImg:
"""Test cases for download_img function"""
def test_empty_url_returns_empty_string(self):
"""Test that empty URL returns empty string"""
result = download_img("")
assert result == ""
def test_none_url_returns_empty_string(self):
"""Test that None URL returns empty string"""
result = download_img(None)
assert result == ""
class TestHashStr2Int:
"""Test cases for hash_str2int function"""
def test_basic_hashing(self):
"""Test basic string hashing functionality"""
result = hash_str2int("hello")
assert isinstance(result, int)
assert 0 <= result < 10 ** 8
def test_default_mod_value(self):
"""Test that default mod value is 10^8"""
result = hash_str2int("test")
assert 0 <= result < 10 ** 8
def test_custom_mod_value(self):
"""Test with custom mod value"""
result = hash_str2int("test", mod=1000)
assert isinstance(result, int)
assert 0 <= result < 1000
def test_same_input_same_output(self):
"""Test that same input produces same output"""
result1 = hash_str2int("consistent")
result2 = hash_str2int("consistent")
result3 = hash_str2int("consistent")
assert result1 == result2 == result3
def test_different_input_different_output(self):
"""Test that different inputs produce different outputs (usually)"""
result1 = hash_str2int("hello")
result2 = hash_str2int("world")
result3 = hash_str2int("hello world")
# While hash collisions are possible, they're very unlikely for these inputs
results = [result1, result2, result3]
assert len(set(results)) == len(results)
def test_empty_string(self):
"""Test hashing empty string"""
result = hash_str2int("")
assert isinstance(result, int)
assert 0 <= result < 10 ** 8
def test_unicode_string(self):
"""Test hashing unicode strings"""
test_strings = [
"中文",
"🚀火箭",
"café",
"🎉",
"Hello 世界"
]
for test_str in test_strings:
result = hash_str2int(test_str)
assert isinstance(result, int)
assert 0 <= result < 10 ** 8
def test_special_characters(self):
"""Test hashing strings with special characters"""
test_strings = [
"hello@world.com",
"test#123",
"line\nwith\nnewlines",
"tab\tcharacter",
"space in string"
]
for test_str in test_strings:
result = hash_str2int(test_str)
assert isinstance(result, int)
assert 0 <= result < 10 ** 8
def test_large_string(self):
"""Test hashing large string"""
large_string = "x" * 10000
result = hash_str2int(large_string)
assert isinstance(result, int)
assert 0 <= result < 10 ** 8
def test_mod_value_1(self):
"""Test with mod value 1 (should always return 0)"""
result = hash_str2int("any string", mod=1)
assert result == 0
def test_mod_value_2(self):
"""Test with mod value 2 (should return 0 or 1)"""
result = hash_str2int("test", mod=2)
assert result in [0, 1]
def test_very_large_mod(self):
"""Test with very large mod value"""
result = hash_str2int("test", mod=10 ** 12)
assert isinstance(result, int)
assert 0 <= result < 10 ** 12
def test_hash_algorithm_sha1(self):
"""Test that SHA1 algorithm is used"""
test_string = "hello"
expected_hash = hashlib.sha1(test_string.encode("utf-8")).hexdigest()
expected_int = int(expected_hash, 16) % (10 ** 8)
result = hash_str2int(test_string)
assert result == expected_int
def test_utf8_encoding(self):
"""Test that UTF-8 encoding is used"""
# This should work without encoding errors
result = hash_str2int("café 🎉")
assert isinstance(result, int)
def test_range_with_different_mods(self):
"""Test that result is always in correct range for different mod values"""
test_cases = [
("test1", 100),
("test2", 1000),
("test3", 10000),
("test4", 999999),
]
for test_str, mod_val in test_cases:
result = hash_str2int(test_str, mod=mod_val)
assert 0 <= result < mod_val
def test_hexdigest_conversion(self):
"""Test the hexdigest to integer conversion"""
test_string = "hello"
hash_obj = hashlib.sha1(test_string.encode("utf-8"))
hex_digest = hash_obj.hexdigest()
expected_int = int(hex_digest, 16) % (10 ** 8)
result = hash_str2int(test_string)
assert result == expected_int
def test_consistent_with_direct_calculation(self):
"""Test that function matches direct hashlib usage"""
test_strings = ["a", "b", "abc", "hello world", "12345"]
for test_str in test_strings:
direct_result = int(hashlib.sha1(test_str.encode("utf-8")).hexdigest(), 16) % (10 ** 8)
function_result = hash_str2int(test_str)
assert function_result == direct_result
def test_numeric_strings(self):
"""Test hashing numeric strings"""
test_strings = ["123", "0", "999999", "3.14159", "-42"]
for test_str in test_strings:
result = hash_str2int(test_str)
assert isinstance(result, int)
assert 0 <= result < 10 ** 8
def test_whitespace_strings(self):
"""Test hashing strings with various whitespace"""
test_strings = [
" leading",
"trailing ",
" both ",
"\ttab",
"new\nline",
"\r\nwindows"
]
for test_str in test_strings:
result = hash_str2int(test_str)
assert isinstance(result, int)
assert 0 <= result < 10 ** 8