Fix some issues in API (#2902)

### What problem does this PR solve?

Fix some issues in API

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
liuhua
2024-10-21 14:29:06 +08:00
committed by GitHub
parent 609cfa7b5f
commit 1935c3be1a
12 changed files with 210 additions and 162 deletions

View File

@ -22,7 +22,7 @@ class Base(object):
res = self.rag.post(path, json, stream=stream,files=files)
return res
def get(self, path, params):
def get(self, path, params=None):
res = self.rag.get(path, params)
return res

View File

@ -73,6 +73,3 @@ class Chat(Base):
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def get_prologue(self):
return self.prompt.opener

View File

@ -1,78 +1,78 @@
from typing import Optional, List
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
from .document import Document
from .base import Base
class DataSet(Base):
class ParserConfig(Base):
def __init__(self, rag, res_dict):
self.chunk_token_count = 128
self.layout_recognize = True
self.delimiter = '\n!?。;!?'
self.task_page_size = 12
super().__init__(rag, res_dict)
def __init__(self, rag, res_dict):
self.id = ""
self.name = ""
self.avatar = ""
self.tenant_id = None
self.description = ""
self.language = "English"
self.embedding_model = ""
self.permission = "me"
self.document_count = 0
self.chunk_count = 0
self.parse_method = "naive"
self.parser_config = None
for k in list(res_dict.keys()):
if k not in self.__dict__:
res_dict.pop(k)
super().__init__(rag, res_dict)
def update(self, update_message: dict):
res = self.put(f'/dataset/{self.id}',
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def upload_documents(self,document_list: List[dict]):
url = f"/dataset/{self.id}/document"
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
res = self.post(path=url,json=None,files=files)
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
res = res.json()
documents = []
if res.get("code") == 0:
for document in res["data"].get("docs"):
documents.append(Document(self.rag,document))
return documents
raise Exception(res["message"])
def delete_documents(self,ids: List[str] = None):
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def async_parse_documents(self,document_ids):
res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def async_cancel_parse_documents(self,document_ids):
res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
from typing import Optional, List
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
from .document import Document
from .base import Base
class DataSet(Base):
class ParserConfig(Base):
def __init__(self, rag, res_dict):
self.chunk_token_count = 128
self.layout_recognize = True
self.delimiter = '\n!?。;!?'
self.task_page_size = 12
super().__init__(rag, res_dict)
def __init__(self, rag, res_dict):
self.id = ""
self.name = ""
self.avatar = ""
self.tenant_id = None
self.description = ""
self.language = "English"
self.embedding_model = ""
self.permission = "me"
self.document_count = 0
self.chunk_count = 0
self.chunk_method = "naive"
self.parser_config = None
for k in list(res_dict.keys()):
if k not in self.__dict__:
res_dict.pop(k)
super().__init__(rag, res_dict)
def update(self, update_message: dict):
res = self.put(f'/dataset/{self.id}',
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def upload_documents(self,document_list: List[dict]):
url = f"/dataset/{self.id}/document"
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
res = self.post(path=url,json=None,files=files)
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
res = res.json()
documents = []
if res.get("code") == 0:
for document in res["data"].get("docs"):
documents.append(Document(self.rag,document))
return documents
raise Exception(res["message"])
def delete_documents(self,ids: List[str] = None):
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def async_parse_documents(self,document_ids):
res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def async_cancel_parse_documents(self,document_ids):
res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))

View File

@ -1,7 +1,4 @@
import time
from PIL.ImageFile import raise_oserror
import json
from .base import Base
from .chunk import Chunk
from typing import List
@ -13,7 +10,7 @@ class Document(Base):
self.name = ""
self.thumbnail = None
self.knowledgebase_id = None
self.parser_method = ""
self.chunk_method = ""
self.parser_config = {"pages": [[1, 1000000]]}
self.source_type = "local"
self.type = ""
@ -32,6 +29,23 @@ class Document(Base):
res_dict.pop(k)
super().__init__(rag, res_dict)
def update(self, update_message: dict):
res = self.put(f'/dataset/{self.knowledgebase_id}/info/{self.id}',
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def download(self):
res = self.get(f"/dataset/{self.knowledgebase_id}/document/{self.id}")
try:
res = res.json()
raise Exception(res.get("message"))
except json.JSONDecodeError:
return res.content
def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)

View File

@ -24,11 +24,11 @@ from .modules.document import Document
class RAGFlow:
def __init__(self, user_key, base_url, version='v1'):
def __init__(self, api_key, base_url, version='v1'):
"""
api_url: http://<host_address>/api/v1
"""
self.user_key = user_key
self.user_key = api_key
self.api_url = f"{base_url}/api/{version}"
self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)}
@ -50,7 +50,7 @@ class RAGFlow:
def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
permission: str = "me",
document_count: int = 0, chunk_count: int = 0, parse_method: str = "naive",
document_count: int = 0, chunk_count: int = 0, chunk_method: str = "naive",
parser_config: DataSet.ParserConfig = None) -> DataSet:
if parser_config is None:
parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
@ -59,7 +59,7 @@ class RAGFlow:
res = self.post("/dataset",
{"name": name, "avatar": avatar, "description": description, "language": language,
"permission": permission,
"document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
"document_count": document_count, "chunk_count": chunk_count, "chunk_method": chunk_method,
"parser_config": parser_config
}
)
@ -93,7 +93,7 @@ class RAGFlow:
return result_list
raise Exception(res["message"])
def create_chat(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
def create_chat(self, name: str, avatar: str = "", knowledgebases: List[DataSet] = [],
llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
datasets = []
for dataset in knowledgebases:

View File

@ -35,7 +35,7 @@ class TestDocument(TestSdk):
def test_update_document_with_success(self):
"""
Test updating a document with success.
Update name or parser_method are supported
Update name or chunk_method are supported
"""
rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.list_datasets(name="God")
@ -43,7 +43,7 @@ class TestDocument(TestSdk):
doc = ds.list_documents()
doc = doc[0]
if isinstance(doc, Document):
res = doc.update({"parser_method":"manual","name":"manual.txt"})
res = doc.update({"chunk_method":"manual","name":"manual.txt"})
assert res is None, f"Failed to update document, error: {res}"
else:
assert False, f"Failed to get document, error: {doc}"