Refactor Dataset API (#2783)

### What problem does this PR solve?

Refactor Dataset API

### Type of change

- [x] Refactoring

---------

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
liuhua
2024-10-11 09:55:27 +08:00
committed by GitHub
parent a2f9c03a95
commit cbd7cd7c4d
11 changed files with 449 additions and 393 deletions

View File

@ -30,5 +30,9 @@ class Base(object):
res = self.rag.delete(path, params)
return res
def put(self,path, json):
res = self.rag.put(path,json)
return res
def __str__(self):
return str(self.to_json())

View File

@ -32,24 +32,13 @@ class DataSet(Base):
res_dict.pop(k)
super().__init__(rag, res_dict)
def save(self) -> bool:
res = self.post('/dataset/save',
{"id": self.id, "name": self.name, "avatar": self.avatar, "tenant_id": self.tenant_id,
"description": self.description, "language": self.language, "embedding_model": self.embedding_model,
"permission": self.permission,
"document_count": self.document_count, "chunk_count": self.chunk_count, "parse_method": self.parse_method,
"parser_config": self.parser_config.to_json()
})
def update(self, update_message: dict):
res = self.put(f'/dataset/{self.id}',
update_message)
res = res.json()
if res.get("retmsg") == "success": return True
raise Exception(res["retmsg"])
if res.get("code") != 0:
raise Exception(res["message"])
def delete(self) -> bool:
res = self.rm('/dataset/delete',
{"id": self.id})
res = res.json()
if res.get("retmsg") == "success": return True
raise Exception(res["retmsg"])
def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]:
"""

View File

@ -18,9 +18,9 @@ from typing import List
import requests
from .modules.assistant import Assistant
from .modules.chunk import Chunk
from .modules.dataset import DataSet
from .modules.document import Document
from .modules.chunk import Chunk
class RAGFlow:
@ -41,7 +41,11 @@ class RAGFlow:
return res
def delete(self, path, params):
res = requests.delete(url=self.api_url + path, params=params, headers=self.authorization_header)
res = requests.delete(url=self.api_url + path, json=params, headers=self.authorization_header)
return res
def put(self, path, json):
res = requests.put(url=self.api_url + path, json= json,headers=self.authorization_header)
return res
def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
@ -52,7 +56,7 @@ class RAGFlow:
parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
"delimiter": "\n!?。;!?", "task_page_size": 12})
parser_config = parser_config.to_json()
res = self.post("/dataset/save",
res = self.post("/dataset",
{"name": name, "avatar": avatar, "description": description, "language": language,
"permission": permission,
"document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
@ -60,27 +64,28 @@ class RAGFlow:
}
)
res = res.json()
if res.get("retmsg") == "success":
if res.get("code") == 0:
return DataSet(self, res["data"])
raise Exception(res["retmsg"])
raise Exception(res["message"])
def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True) -> \
def delete_dataset(self, ids: List[str] = None, names: List[str] = None):
res = self.delete("/dataset",{"ids": ids, "names": names})
res=res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
id: str = None, name: str = None) -> \
List[DataSet]:
res = self.get("/dataset/list", {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc})
res = self.get("/dataset",
{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
res = res.json()
result_list = []
if res.get("retmsg") == "success":
if res.get("code") == 0:
for data in res['data']:
result_list.append(DataSet(self, data))
return result_list
raise Exception(res["retmsg"])
def get_dataset(self, id: str = None, name: str = None) -> DataSet:
res = self.get("/dataset/detail", {"id": id, "name": name})
res = res.json()
if res.get("retmsg") == "success":
return DataSet(self, res['data'])
raise Exception(res["retmsg"])
raise Exception(res["message"])
def create_assistant(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
llm: Assistant.LLM = None, prompt: Assistant.Prompt = None) -> Assistant:
@ -272,4 +277,3 @@ class RAGFlow:
except Exception as e:
print(f"An error occurred during retrieval: {e}")
raise

View File

@ -1,4 +1,4 @@
API_KEY = 'ragflow-k0YzUxMGY4NjY5YTExZWY5MjI5MDI0Mm'
API_KEY = 'ragflow-NiYmZjNTVjODYwNzExZWZiODEwMDI0Mm'
HOST_ADDRESS = 'http://127.0.0.1:9380'

View File

@ -24,9 +24,8 @@ class TestDataset(TestSdk):
ds = rag.create_dataset("ABC")
if isinstance(ds, DataSet):
assert ds.name == "ABC", "Name does not match."
ds.name = 'DEF'
res = ds.save()
assert res is True, f"Failed to update dataset, error: {res}"
res = ds.update({"name":"DEF"})
assert res is None, f"Failed to update dataset, error: {res}"
else:
assert False, f"Failed to create dataset, error: {ds}"
@ -38,8 +37,8 @@ class TestDataset(TestSdk):
ds = rag.create_dataset("MA")
if isinstance(ds, DataSet):
assert ds.name == "MA", "Name does not match."
res = ds.delete()
assert res is True, f"Failed to delete dataset, error: {res}"
res = rag.delete_dataset(names=["MA"])
assert res is None, f"Failed to delete dataset, error: {res}"
else:
assert False, f"Failed to create dataset, error: {ds}"
@ -52,12 +51,3 @@ class TestDataset(TestSdk):
assert len(list_datasets) > 0, "Do not exist any dataset"
for ds in list_datasets:
assert isinstance(ds, DataSet), "Existence type is not dataset."
def test_get_detail_dataset_with_success(self):
"""
Test getting a dataset's detail with success
"""
rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.get_dataset(name="God")
assert isinstance(ds, DataSet), f"Failed to get dataset, error: {ds}."
assert ds.name == "God", "Name does not match"