mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
remove unused codes, seperate layout detection out as a new api. Add new rag methed 'table' (#55)
This commit is contained in:
@ -77,3 +77,4 @@ class ParserType(StrEnum):
|
||||
RESUME = "resume"
|
||||
BOOK = "book"
|
||||
QA = "qa"
|
||||
TABLE = "table"
|
||||
|
||||
@ -29,7 +29,7 @@ from peewee import (
|
||||
)
|
||||
from playhouse.pool import PooledMySQLDatabase
|
||||
|
||||
from api.db import SerializedType
|
||||
from api.db import SerializedType, ParserType
|
||||
from api.settings import DATABASE, stat_logger, SECRET_KEY
|
||||
from api.utils.log_utils import getLogger
|
||||
from api import utils
|
||||
@ -381,7 +381,8 @@ class Tenant(DataBaseModel):
|
||||
embd_id = CharField(max_length=128, null=False, help_text="default embedding model ID")
|
||||
asr_id = CharField(max_length=128, null=False, help_text="default ASR model ID")
|
||||
img2txt_id = CharField(max_length=128, null=False, help_text="default image to text model ID")
|
||||
parser_ids = CharField(max_length=128, null=False, help_text="default image to text model ID")
|
||||
parser_ids = CharField(max_length=128, null=False, help_text="document processors")
|
||||
credit = IntegerField(default=512)
|
||||
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
|
||||
|
||||
class Meta:
|
||||
@ -472,7 +473,8 @@ class Knowledgebase(DataBaseModel):
|
||||
similarity_threshold = FloatField(default=0.2)
|
||||
vector_similarity_weight = FloatField(default=0.3)
|
||||
|
||||
parser_id = CharField(max_length=32, null=False, help_text="default parser ID")
|
||||
parser_id = CharField(max_length=32, null=False, help_text="default parser ID", default=ParserType.GENERAL.value)
|
||||
parser_config = JSONField(null=False, default={"from_page":0, "to_page": 100000})
|
||||
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted,1: validate)", default="1")
|
||||
|
||||
def __str__(self):
|
||||
@ -487,6 +489,7 @@ class Document(DataBaseModel):
|
||||
thumbnail = TextField(null=True, help_text="thumbnail base64 string")
|
||||
kb_id = CharField(max_length=256, null=False, index=True)
|
||||
parser_id = CharField(max_length=32, null=False, help_text="default parser ID")
|
||||
parser_config = JSONField(null=False, default={"from_page":0, "to_page": 100000})
|
||||
source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document from")
|
||||
type = CharField(max_length=32, null=False, help_text="file extension")
|
||||
created_by = CharField(max_length=32, null=False, help_text="who created it")
|
||||
|
||||
@ -1,157 +0,0 @@
|
||||
#
|
||||
# Copyright 2021 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import abc
|
||||
import json
|
||||
import time
|
||||
from functools import wraps
|
||||
from shortuuid import ShortUUID
|
||||
|
||||
from api.versions import get_rag_version
|
||||
|
||||
from api.errors.error_services import *
|
||||
from api.settings import (
|
||||
GRPC_PORT, HOST, HTTP_PORT,
|
||||
RANDOM_INSTANCE_ID, stat_logger,
|
||||
)
|
||||
|
||||
|
||||
instance_id = ShortUUID().random(length=8) if RANDOM_INSTANCE_ID else f'flow-{HOST}-{HTTP_PORT}'
|
||||
server_instance = (
|
||||
f'{HOST}:{GRPC_PORT}',
|
||||
json.dumps({
|
||||
'instance_id': instance_id,
|
||||
'timestamp': round(time.time() * 1000),
|
||||
'version': get_rag_version() or '',
|
||||
'host': HOST,
|
||||
'grpc_port': GRPC_PORT,
|
||||
'http_port': HTTP_PORT,
|
||||
}),
|
||||
)
|
||||
|
||||
|
||||
def check_service_supported(method):
|
||||
"""Decorator to check if `service_name` is supported.
|
||||
The attribute `supported_services` MUST be defined in class.
|
||||
The first and second arguments of `method` MUST be `self` and `service_name`.
|
||||
|
||||
:param Callable method: The class method.
|
||||
:return: The inner wrapper function.
|
||||
:rtype: Callable
|
||||
"""
|
||||
@wraps(method)
|
||||
def magic(self, service_name, *args, **kwargs):
|
||||
if service_name not in self.supported_services:
|
||||
raise ServiceNotSupported(service_name=service_name)
|
||||
return method(self, service_name, *args, **kwargs)
|
||||
return magic
|
||||
|
||||
|
||||
class ServicesDB(abc.ABC):
|
||||
"""Database for storage service urls.
|
||||
Abstract base class for the real backends.
|
||||
|
||||
"""
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def supported_services(self):
|
||||
"""The names of supported services.
|
||||
The returned list SHOULD contain `ragflow` (model download) and `servings` (RAG-Serving).
|
||||
|
||||
:return: The service names.
|
||||
:rtype: list
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def _get_serving(self):
|
||||
pass
|
||||
|
||||
def get_serving(self):
|
||||
|
||||
try:
|
||||
return self._get_serving()
|
||||
except ServicesError as e:
|
||||
stat_logger.exception(e)
|
||||
return []
|
||||
|
||||
@abc.abstractmethod
|
||||
def _insert(self, service_name, service_url, value=''):
|
||||
pass
|
||||
|
||||
@check_service_supported
|
||||
def insert(self, service_name, service_url, value=''):
|
||||
"""Insert a service url to database.
|
||||
|
||||
:param str service_name: The service name.
|
||||
:param str service_url: The service url.
|
||||
:return: None
|
||||
"""
|
||||
try:
|
||||
self._insert(service_name, service_url, value)
|
||||
except ServicesError as e:
|
||||
stat_logger.exception(e)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _delete(self, service_name, service_url):
|
||||
pass
|
||||
|
||||
@check_service_supported
|
||||
def delete(self, service_name, service_url):
|
||||
"""Delete a service url from database.
|
||||
|
||||
:param str service_name: The service name.
|
||||
:param str service_url: The service url.
|
||||
:return: None
|
||||
"""
|
||||
try:
|
||||
self._delete(service_name, service_url)
|
||||
except ServicesError as e:
|
||||
stat_logger.exception(e)
|
||||
|
||||
def register_flow(self):
|
||||
"""Call `self.insert` for insert the flow server address to databae.
|
||||
|
||||
:return: None
|
||||
"""
|
||||
self.insert('flow-server', *server_instance)
|
||||
|
||||
def unregister_flow(self):
|
||||
"""Call `self.delete` for delete the flow server address from databae.
|
||||
|
||||
:return: None
|
||||
"""
|
||||
self.delete('flow-server', server_instance[0])
|
||||
|
||||
@abc.abstractmethod
|
||||
def _get_urls(self, service_name, with_values=False):
|
||||
pass
|
||||
|
||||
@check_service_supported
|
||||
def get_urls(self, service_name, with_values=False):
|
||||
"""Query service urls from database. The urls may belong to other nodes.
|
||||
Currently, only `ragflow` (model download) urls and `servings` (RAG-Serving) urls are supported.
|
||||
`ragflow` is a url containing scheme, host, port and path,
|
||||
while `servings` only contains host and port.
|
||||
|
||||
:param str service_name: The service name.
|
||||
:return: The service urls.
|
||||
:rtype: list
|
||||
"""
|
||||
try:
|
||||
return self._get_urls(service_name, with_values)
|
||||
except ServicesError as e:
|
||||
stat_logger.exception(e)
|
||||
return []
|
||||
@ -63,7 +63,7 @@ class DocumentService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_newly_uploaded(cls, tm, mod=0, comm=1, items_per_page=64):
|
||||
fields = [cls.model.id, cls.model.kb_id, cls.model.parser_id, cls.model.name, cls.model.type, cls.model.location, cls.model.size, Knowledgebase.tenant_id, Tenant.embd_id, Tenant.img2txt_id, Tenant.asr_id, cls.model.update_time]
|
||||
fields = [cls.model.id, cls.model.kb_id, cls.model.parser_id, cls.model.parser_config, cls.model.name, cls.model.type, cls.model.location, cls.model.size, Knowledgebase.tenant_id, Tenant.embd_id, Tenant.img2txt_id, Tenant.asr_id, cls.model.update_time]
|
||||
docs = cls.model.select(*fields) \
|
||||
.join(Knowledgebase, on=(cls.model.kb_id == Knowledgebase.id)) \
|
||||
.join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))\
|
||||
|
||||
@ -52,7 +52,8 @@ class KnowledgebaseService(CommonService):
|
||||
cls.model.doc_num,
|
||||
cls.model.token_num,
|
||||
cls.model.chunk_num,
|
||||
cls.model.parser_id]
|
||||
cls.model.parser_id,
|
||||
cls.model.parser_config]
|
||||
kbs = cls.model.select(*fields).join(Tenant, on=((Tenant.id == cls.model.tenant_id)&(Tenant.status== StatusEnum.VALID.value))).where(
|
||||
(cls.model.id == kb_id),
|
||||
(cls.model.status == StatusEnum.VALID.value)
|
||||
|
||||
@ -27,7 +27,7 @@ class TaskService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_tasks(cls, tm, mod=0, comm=1, items_per_page=64):
|
||||
fields = [cls.model.id, cls.model.doc_id, cls.model.from_page,cls.model.to_page, Document.kb_id, Document.parser_id, Document.name, Document.type, Document.location, Document.size, Knowledgebase.tenant_id, Tenant.embd_id, Tenant.img2txt_id, Tenant.asr_id, cls.model.update_time]
|
||||
fields = [cls.model.id, cls.model.doc_id, cls.model.from_page,cls.model.to_page, Document.kb_id, Document.parser_id, Document.parser_config, Document.name, Document.type, Document.location, Document.size, Knowledgebase.tenant_id, Tenant.embd_id, Tenant.img2txt_id, Tenant.asr_id, cls.model.update_time]
|
||||
docs = cls.model.select(*fields) \
|
||||
.join(Document, on=(cls.model.doc_id == Document.id)) \
|
||||
.join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
|
||||
@ -53,3 +53,13 @@ class TaskService(CommonService):
|
||||
except Exception as e:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def update_progress(cls, id, info):
|
||||
cls.model.update(progress_msg=cls.model.progress_msg + "\n"+info["progress_msg"]).where(
|
||||
cls.model.id == id).execute()
|
||||
if "progress" in info:
|
||||
cls.model.update(progress=info["progress"]).where(
|
||||
cls.model.id == id).execute()
|
||||
|
||||
@ -92,6 +92,12 @@ class TenantService(CommonService):
|
||||
.join(UserTenant, on=((cls.model.id == UserTenant.tenant_id) & (UserTenant.user_id==user_id) & (UserTenant.status == StatusEnum.VALID.value) & (UserTenant.role==UserTenantRole.NORMAL.value)))\
|
||||
.where(cls.model.status == StatusEnum.VALID.value).dicts())
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def decrease(cls, user_id, num):
|
||||
num = cls.model.update(credit=cls.model.credit - num).where(
|
||||
cls.model.id == user_id).execute()
|
||||
if num == 0: raise LookupError("Tenant not found which is supposed to be there")
|
||||
|
||||
class UserTenantService(CommonService):
|
||||
model = UserTenant
|
||||
|
||||
Reference in New Issue
Block a user