mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: wrap search app (#8320)
### What problem does this PR solve? Wrap search app ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -13,16 +13,16 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import hashlib
|
||||
import inspect
|
||||
import logging
|
||||
import operator
|
||||
import os
|
||||
import sys
|
||||
import typing
|
||||
import time
|
||||
import typing
|
||||
from enum import Enum
|
||||
from functools import wraps
|
||||
import hashlib
|
||||
|
||||
from flask_login import UserMixin
|
||||
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
||||
@ -264,14 +264,15 @@ class BaseDataBase:
|
||||
|
||||
def with_retry(max_retries=3, retry_delay=1.0):
|
||||
"""Decorator: Add retry mechanism to database operations
|
||||
|
||||
|
||||
Args:
|
||||
max_retries (int): maximum number of retries
|
||||
retry_delay (float): initial retry delay (seconds), will increase exponentially
|
||||
|
||||
|
||||
Returns:
|
||||
decorated function
|
||||
"""
|
||||
|
||||
def decorator(func):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
@ -284,26 +285,28 @@ def with_retry(max_retries=3, retry_delay=1.0):
|
||||
# get self and method name for logging
|
||||
self_obj = args[0] if args else None
|
||||
func_name = func.__name__
|
||||
lock_name = getattr(self_obj, 'lock_name', 'unknown') if self_obj else 'unknown'
|
||||
|
||||
lock_name = getattr(self_obj, "lock_name", "unknown") if self_obj else "unknown"
|
||||
|
||||
if retry < max_retries - 1:
|
||||
current_delay = retry_delay * (2 ** retry)
|
||||
logging.warning(f"{func_name} {lock_name} failed: {str(e)}, retrying ({retry+1}/{max_retries})")
|
||||
current_delay = retry_delay * (2**retry)
|
||||
logging.warning(f"{func_name} {lock_name} failed: {str(e)}, retrying ({retry + 1}/{max_retries})")
|
||||
time.sleep(current_delay)
|
||||
else:
|
||||
logging.error(f"{func_name} {lock_name} failed after all attempts: {str(e)}")
|
||||
|
||||
|
||||
if last_exception:
|
||||
raise last_exception
|
||||
return False
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class PostgresDatabaseLock:
|
||||
def __init__(self, lock_name, timeout=10, db=None):
|
||||
self.lock_name = lock_name
|
||||
self.lock_id = int(hashlib.md5(lock_name.encode()).hexdigest(), 16) % (2**31-1)
|
||||
self.lock_id = int(hashlib.md5(lock_name.encode()).hexdigest(), 16) % (2**31 - 1)
|
||||
self.timeout = int(timeout)
|
||||
self.db = db if db else DB
|
||||
|
||||
@ -542,7 +545,7 @@ class LLM(DataBaseModel):
|
||||
max_tokens = IntegerField(default=0)
|
||||
|
||||
tags = CharField(max_length=255, null=False, help_text="LLM, Text Embedding, Image2Text, Chat, 32k...", index=True)
|
||||
is_tools = BooleanField(null=False, help_text="support tools", default=False)
|
||||
is_tools = BooleanField(null=False, help_text="support tools", default=False)
|
||||
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
|
||||
|
||||
def __str__(self):
|
||||
@ -796,6 +799,50 @@ class UserCanvasVersion(DataBaseModel):
|
||||
db_table = "user_canvas_version"
|
||||
|
||||
|
||||
class Search(DataBaseModel):
|
||||
id = CharField(max_length=32, primary_key=True)
|
||||
avatar = TextField(null=True, help_text="avatar base64 string")
|
||||
tenant_id = CharField(max_length=32, null=False, index=True)
|
||||
name = CharField(max_length=128, null=False, help_text="Search name", index=True)
|
||||
description = TextField(null=True, help_text="KB description")
|
||||
created_by = CharField(max_length=32, null=False, index=True)
|
||||
search_config = JSONField(
|
||||
null=False,
|
||||
default={
|
||||
"kb_ids": [],
|
||||
"doc_ids": [],
|
||||
"similarity_threshold": 0.0,
|
||||
"vector_similarity_weight": 0.3,
|
||||
"use_kg": False,
|
||||
# rerank settings
|
||||
"rerank_id": "",
|
||||
"top_k": 1024,
|
||||
# chat settings
|
||||
"summary": False,
|
||||
"chat_id": "",
|
||||
"llm_setting": {
|
||||
"temperature": 0.1,
|
||||
"top_p": 0.3,
|
||||
"frequency_penalty": 0.7,
|
||||
"presence_penalty": 0.4,
|
||||
},
|
||||
"chat_settingcross_languages": [],
|
||||
"highlight": False,
|
||||
"keyword": False,
|
||||
"web_search": False,
|
||||
"related_search": False,
|
||||
"query_mindmap": False,
|
||||
},
|
||||
)
|
||||
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted, 1: validate)", default="1", index=True)
|
||||
|
||||
def __str__(self):
|
||||
return self.name
|
||||
|
||||
class Meta:
|
||||
db_table = "search"
|
||||
|
||||
|
||||
def migrate_db():
|
||||
migrator = DatabaseMigrator[settings.DATABASE_TYPE.upper()].value(DB)
|
||||
try:
|
||||
|
||||
@ -159,6 +159,7 @@ BAD_CITATION_PATTERNS = [
|
||||
re.compile(r"ref\s*(\d+)", flags=re.IGNORECASE), # ref12、REF 12
|
||||
]
|
||||
|
||||
|
||||
def repair_bad_citation_formats(answer: str, kbinfos: dict, idx: set):
|
||||
max_index = len(kbinfos["chunks"])
|
||||
|
||||
@ -555,7 +556,7 @@ def tts(tts_mdl, text):
|
||||
return binascii.hexlify(bin).decode("utf-8")
|
||||
|
||||
|
||||
def ask(question, kb_ids, tenant_id):
|
||||
def ask(question, kb_ids, tenant_id, chat_llm_name=None):
|
||||
kbs = KnowledgebaseService.get_by_ids(kb_ids)
|
||||
embedding_list = list(set([kb.embd_id for kb in kbs]))
|
||||
|
||||
@ -563,7 +564,7 @@ def ask(question, kb_ids, tenant_id):
|
||||
retriever = settings.retrievaler if not is_knowledge_graph else settings.kg_retrievaler
|
||||
|
||||
embd_mdl = LLMBundle(tenant_id, LLMType.EMBEDDING, embedding_list[0])
|
||||
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT)
|
||||
chat_mdl = LLMBundle(tenant_id, LLMType.CHAT, chat_llm_name)
|
||||
max_tokens = chat_mdl.max_length
|
||||
tenant_ids = list(set([kb.tenant_id for kb in kbs]))
|
||||
kbinfos = retriever.retrieval(question, embd_mdl, tenant_ids, kb_ids, 1, 12, 0.1, 0.3, aggs=False, rank_feature=label_question(question, kbs))
|
||||
|
||||
110
api/db/services/search_service.py
Normal file
110
api/db/services/search_service.py
Normal file
@ -0,0 +1,110 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
from datetime import datetime
|
||||
|
||||
from peewee import fn
|
||||
|
||||
from api.db import StatusEnum
|
||||
from api.db.db_models import DB, Search, User
|
||||
from api.db.services.common_service import CommonService
|
||||
from api.utils import current_timestamp, datetime_format
|
||||
|
||||
|
||||
class SearchService(CommonService):
|
||||
model = Search
|
||||
|
||||
@classmethod
|
||||
def save(cls, **kwargs):
|
||||
kwargs["create_time"] = current_timestamp()
|
||||
kwargs["create_date"] = datetime_format(datetime.now())
|
||||
kwargs["update_time"] = current_timestamp()
|
||||
kwargs["update_date"] = datetime_format(datetime.now())
|
||||
obj = cls.model.create(**kwargs)
|
||||
return obj
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def accessible4deletion(cls, search_id, user_id) -> bool:
|
||||
search = (
|
||||
cls.model.select(cls.model.id)
|
||||
.where(
|
||||
cls.model.id == search_id,
|
||||
cls.model.created_by == user_id,
|
||||
cls.model.status == StatusEnum.VALID.value,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
return search is not None
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_detail(cls, search_id):
|
||||
fields = [
|
||||
cls.model.id,
|
||||
cls.model.avatar,
|
||||
cls.model.tenant_id,
|
||||
cls.model.name,
|
||||
cls.model.description,
|
||||
cls.model.created_by,
|
||||
cls.model.search_config,
|
||||
cls.model.update_time,
|
||||
User.nickname,
|
||||
User.avatar.alias("tenant_avatar"),
|
||||
]
|
||||
search = (
|
||||
cls.model.select(*fields)
|
||||
.join(User, on=((User.id == cls.model.tenant_id) & (User.status == StatusEnum.VALID.value)))
|
||||
.where((cls.model.id == search_id) & (cls.model.status == StatusEnum.VALID.value))
|
||||
.first()
|
||||
.to_dict()
|
||||
)
|
||||
return search
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_by_tenant_ids(cls, joined_tenant_ids, user_id, page_number, items_per_page, orderby, desc, keywords):
|
||||
fields = [
|
||||
cls.model.id,
|
||||
cls.model.avatar,
|
||||
cls.model.tenant_id,
|
||||
cls.model.name,
|
||||
cls.model.description,
|
||||
cls.model.created_by,
|
||||
cls.model.status,
|
||||
cls.model.update_time,
|
||||
cls.model.create_time,
|
||||
User.nickname,
|
||||
User.avatar.alias("tenant_avatar"),
|
||||
]
|
||||
query = (
|
||||
cls.model.select(*fields)
|
||||
.join(User, on=(cls.model.tenant_id == User.id))
|
||||
.where(((cls.model.tenant_id.in_(joined_tenant_ids)) | (cls.model.tenant_id == user_id)) & (cls.model.status == StatusEnum.VALID.value))
|
||||
)
|
||||
|
||||
if keywords:
|
||||
query = query.where(fn.LOWER(cls.model.name).contains(keywords.lower()))
|
||||
if desc:
|
||||
query = query.order_by(cls.model.getter_by(orderby).desc())
|
||||
else:
|
||||
query = query.order_by(cls.model.getter_by(orderby).asc())
|
||||
|
||||
count = query.count()
|
||||
|
||||
if page_number and items_per_page:
|
||||
query = query.paginate(page_number, items_per_page)
|
||||
|
||||
return list(query.dicts()), count
|
||||
Reference in New Issue
Block a user