mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix some issues in API (#2902)
### What problem does this PR solve? Fix some issues in API ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
@ -22,7 +22,7 @@ class Base(object):
|
||||
res = self.rag.post(path, json, stream=stream,files=files)
|
||||
return res
|
||||
|
||||
def get(self, path, params):
|
||||
def get(self, path, params=None):
|
||||
res = self.rag.get(path, params)
|
||||
return res
|
||||
|
||||
|
||||
@ -73,6 +73,3 @@ class Chat(Base):
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res.get("message"))
|
||||
|
||||
def get_prologue(self):
|
||||
return self.prompt.opener
|
||||
|
||||
@ -1,78 +1,78 @@
|
||||
from typing import Optional, List
|
||||
|
||||
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
|
||||
|
||||
from .document import Document
|
||||
|
||||
from .base import Base
|
||||
|
||||
|
||||
class DataSet(Base):
|
||||
class ParserConfig(Base):
|
||||
def __init__(self, rag, res_dict):
|
||||
self.chunk_token_count = 128
|
||||
self.layout_recognize = True
|
||||
self.delimiter = '\n!?。;!?'
|
||||
self.task_page_size = 12
|
||||
super().__init__(rag, res_dict)
|
||||
|
||||
def __init__(self, rag, res_dict):
|
||||
self.id = ""
|
||||
self.name = ""
|
||||
self.avatar = ""
|
||||
self.tenant_id = None
|
||||
self.description = ""
|
||||
self.language = "English"
|
||||
self.embedding_model = ""
|
||||
self.permission = "me"
|
||||
self.document_count = 0
|
||||
self.chunk_count = 0
|
||||
self.parse_method = "naive"
|
||||
self.parser_config = None
|
||||
for k in list(res_dict.keys()):
|
||||
if k not in self.__dict__:
|
||||
res_dict.pop(k)
|
||||
super().__init__(rag, res_dict)
|
||||
|
||||
def update(self, update_message: dict):
|
||||
res = self.put(f'/dataset/{self.id}',
|
||||
update_message)
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res["message"])
|
||||
|
||||
def upload_documents(self,document_list: List[dict]):
|
||||
url = f"/dataset/{self.id}/document"
|
||||
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
|
||||
res = self.post(path=url,json=None,files=files)
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res.get("message"))
|
||||
|
||||
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
|
||||
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
|
||||
res = res.json()
|
||||
documents = []
|
||||
if res.get("code") == 0:
|
||||
for document in res["data"].get("docs"):
|
||||
documents.append(Document(self.rag,document))
|
||||
return documents
|
||||
raise Exception(res["message"])
|
||||
|
||||
def delete_documents(self,ids: List[str] = None):
|
||||
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res["message"])
|
||||
|
||||
def async_parse_documents(self,document_ids):
|
||||
res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res.get("message"))
|
||||
|
||||
def async_cancel_parse_documents(self,document_ids):
|
||||
res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res.get("message"))
|
||||
from typing import Optional, List
|
||||
|
||||
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
|
||||
|
||||
from .document import Document
|
||||
|
||||
from .base import Base
|
||||
|
||||
|
||||
class DataSet(Base):
|
||||
class ParserConfig(Base):
|
||||
def __init__(self, rag, res_dict):
|
||||
self.chunk_token_count = 128
|
||||
self.layout_recognize = True
|
||||
self.delimiter = '\n!?。;!?'
|
||||
self.task_page_size = 12
|
||||
super().__init__(rag, res_dict)
|
||||
|
||||
def __init__(self, rag, res_dict):
|
||||
self.id = ""
|
||||
self.name = ""
|
||||
self.avatar = ""
|
||||
self.tenant_id = None
|
||||
self.description = ""
|
||||
self.language = "English"
|
||||
self.embedding_model = ""
|
||||
self.permission = "me"
|
||||
self.document_count = 0
|
||||
self.chunk_count = 0
|
||||
self.chunk_method = "naive"
|
||||
self.parser_config = None
|
||||
for k in list(res_dict.keys()):
|
||||
if k not in self.__dict__:
|
||||
res_dict.pop(k)
|
||||
super().__init__(rag, res_dict)
|
||||
|
||||
def update(self, update_message: dict):
|
||||
res = self.put(f'/dataset/{self.id}',
|
||||
update_message)
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res["message"])
|
||||
|
||||
def upload_documents(self,document_list: List[dict]):
|
||||
url = f"/dataset/{self.id}/document"
|
||||
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
|
||||
res = self.post(path=url,json=None,files=files)
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res.get("message"))
|
||||
|
||||
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
|
||||
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
|
||||
res = res.json()
|
||||
documents = []
|
||||
if res.get("code") == 0:
|
||||
for document in res["data"].get("docs"):
|
||||
documents.append(Document(self.rag,document))
|
||||
return documents
|
||||
raise Exception(res["message"])
|
||||
|
||||
def delete_documents(self,ids: List[str] = None):
|
||||
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res["message"])
|
||||
|
||||
def async_parse_documents(self,document_ids):
|
||||
res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res.get("message"))
|
||||
|
||||
def async_cancel_parse_documents(self,document_ids):
|
||||
res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res.get("message"))
|
||||
|
||||
@ -1,7 +1,4 @@
|
||||
import time
|
||||
|
||||
from PIL.ImageFile import raise_oserror
|
||||
|
||||
import json
|
||||
from .base import Base
|
||||
from .chunk import Chunk
|
||||
from typing import List
|
||||
@ -13,7 +10,7 @@ class Document(Base):
|
||||
self.name = ""
|
||||
self.thumbnail = None
|
||||
self.knowledgebase_id = None
|
||||
self.parser_method = ""
|
||||
self.chunk_method = ""
|
||||
self.parser_config = {"pages": [[1, 1000000]]}
|
||||
self.source_type = "local"
|
||||
self.type = ""
|
||||
@ -32,6 +29,23 @@ class Document(Base):
|
||||
res_dict.pop(k)
|
||||
super().__init__(rag, res_dict)
|
||||
|
||||
|
||||
def update(self, update_message: dict):
|
||||
res = self.put(f'/dataset/{self.knowledgebase_id}/info/{self.id}',
|
||||
update_message)
|
||||
res = res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res["message"])
|
||||
|
||||
def download(self):
|
||||
res = self.get(f"/dataset/{self.knowledgebase_id}/document/{self.id}")
|
||||
try:
|
||||
res = res.json()
|
||||
raise Exception(res.get("message"))
|
||||
except json.JSONDecodeError:
|
||||
return res.content
|
||||
|
||||
|
||||
def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
|
||||
data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
|
||||
res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)
|
||||
|
||||
@ -24,11 +24,11 @@ from .modules.document import Document
|
||||
|
||||
|
||||
class RAGFlow:
|
||||
def __init__(self, user_key, base_url, version='v1'):
|
||||
def __init__(self, api_key, base_url, version='v1'):
|
||||
"""
|
||||
api_url: http://<host_address>/api/v1
|
||||
"""
|
||||
self.user_key = user_key
|
||||
self.user_key = api_key
|
||||
self.api_url = f"{base_url}/api/{version}"
|
||||
self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)}
|
||||
|
||||
@ -50,7 +50,7 @@ class RAGFlow:
|
||||
|
||||
def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
|
||||
permission: str = "me",
|
||||
document_count: int = 0, chunk_count: int = 0, parse_method: str = "naive",
|
||||
document_count: int = 0, chunk_count: int = 0, chunk_method: str = "naive",
|
||||
parser_config: DataSet.ParserConfig = None) -> DataSet:
|
||||
if parser_config is None:
|
||||
parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
|
||||
@ -59,7 +59,7 @@ class RAGFlow:
|
||||
res = self.post("/dataset",
|
||||
{"name": name, "avatar": avatar, "description": description, "language": language,
|
||||
"permission": permission,
|
||||
"document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
|
||||
"document_count": document_count, "chunk_count": chunk_count, "chunk_method": chunk_method,
|
||||
"parser_config": parser_config
|
||||
}
|
||||
)
|
||||
@ -93,7 +93,7 @@ class RAGFlow:
|
||||
return result_list
|
||||
raise Exception(res["message"])
|
||||
|
||||
def create_chat(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
|
||||
def create_chat(self, name: str, avatar: str = "", knowledgebases: List[DataSet] = [],
|
||||
llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
|
||||
datasets = []
|
||||
for dataset in knowledgebases:
|
||||
|
||||
@ -35,7 +35,7 @@ class TestDocument(TestSdk):
|
||||
def test_update_document_with_success(self):
|
||||
"""
|
||||
Test updating a document with success.
|
||||
Update name or parser_method are supported
|
||||
Update name or chunk_method are supported
|
||||
"""
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.list_datasets(name="God")
|
||||
@ -43,7 +43,7 @@ class TestDocument(TestSdk):
|
||||
doc = ds.list_documents()
|
||||
doc = doc[0]
|
||||
if isinstance(doc, Document):
|
||||
res = doc.update({"parser_method":"manual","name":"manual.txt"})
|
||||
res = doc.update({"chunk_method":"manual","name":"manual.txt"})
|
||||
assert res is None, f"Failed to update document, error: {res}"
|
||||
else:
|
||||
assert False, f"Failed to get document, error: {doc}"
|
||||
|
||||
Reference in New Issue
Block a user