mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Refactor Dataset API (#2783)
### What problem does this PR solve? Refactor Dataset API ### Type of change - [x] Refactoring --------- Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
@ -30,5 +30,9 @@ class Base(object):
|
||||
res = self.rag.delete(path, params)
|
||||
return res
|
||||
|
||||
def put(self,path, json):
|
||||
res = self.rag.put(path,json)
|
||||
return res
|
||||
|
||||
def __str__(self):
|
||||
return str(self.to_json())
|
||||
|
||||
@ -32,24 +32,13 @@ class DataSet(Base):
|
||||
res_dict.pop(k)
|
||||
super().__init__(rag, res_dict)
|
||||
|
||||
def save(self) -> bool:
|
||||
res = self.post('/dataset/save',
|
||||
{"id": self.id, "name": self.name, "avatar": self.avatar, "tenant_id": self.tenant_id,
|
||||
"description": self.description, "language": self.language, "embedding_model": self.embedding_model,
|
||||
"permission": self.permission,
|
||||
"document_count": self.document_count, "chunk_count": self.chunk_count, "parse_method": self.parse_method,
|
||||
"parser_config": self.parser_config.to_json()
|
||||
})
|
||||
def update(self, update_message: dict):
|
||||
res = self.put(f'/dataset/{self.id}',
|
||||
update_message)
|
||||
res = res.json()
|
||||
if res.get("retmsg") == "success": return True
|
||||
raise Exception(res["retmsg"])
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res["message"])
|
||||
|
||||
def delete(self) -> bool:
|
||||
res = self.rm('/dataset/delete',
|
||||
{"id": self.id})
|
||||
res = res.json()
|
||||
if res.get("retmsg") == "success": return True
|
||||
raise Exception(res["retmsg"])
|
||||
|
||||
def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]:
|
||||
"""
|
||||
|
||||
@ -18,9 +18,9 @@ from typing import List
|
||||
import requests
|
||||
|
||||
from .modules.assistant import Assistant
|
||||
from .modules.chunk import Chunk
|
||||
from .modules.dataset import DataSet
|
||||
from .modules.document import Document
|
||||
from .modules.chunk import Chunk
|
||||
|
||||
|
||||
class RAGFlow:
|
||||
@ -41,7 +41,11 @@ class RAGFlow:
|
||||
return res
|
||||
|
||||
def delete(self, path, params):
|
||||
res = requests.delete(url=self.api_url + path, params=params, headers=self.authorization_header)
|
||||
res = requests.delete(url=self.api_url + path, json=params, headers=self.authorization_header)
|
||||
return res
|
||||
|
||||
def put(self, path, json):
|
||||
res = requests.put(url=self.api_url + path, json= json,headers=self.authorization_header)
|
||||
return res
|
||||
|
||||
def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
|
||||
@ -52,7 +56,7 @@ class RAGFlow:
|
||||
parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
|
||||
"delimiter": "\n!?。;!?", "task_page_size": 12})
|
||||
parser_config = parser_config.to_json()
|
||||
res = self.post("/dataset/save",
|
||||
res = self.post("/dataset",
|
||||
{"name": name, "avatar": avatar, "description": description, "language": language,
|
||||
"permission": permission,
|
||||
"document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
|
||||
@ -60,27 +64,28 @@ class RAGFlow:
|
||||
}
|
||||
)
|
||||
res = res.json()
|
||||
if res.get("retmsg") == "success":
|
||||
if res.get("code") == 0:
|
||||
return DataSet(self, res["data"])
|
||||
raise Exception(res["retmsg"])
|
||||
raise Exception(res["message"])
|
||||
|
||||
def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True) -> \
|
||||
def delete_dataset(self, ids: List[str] = None, names: List[str] = None):
|
||||
res = self.delete("/dataset",{"ids": ids, "names": names})
|
||||
res=res.json()
|
||||
if res.get("code") != 0:
|
||||
raise Exception(res["message"])
|
||||
|
||||
def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
|
||||
id: str = None, name: str = None) -> \
|
||||
List[DataSet]:
|
||||
res = self.get("/dataset/list", {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc})
|
||||
res = self.get("/dataset",
|
||||
{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
|
||||
res = res.json()
|
||||
result_list = []
|
||||
if res.get("retmsg") == "success":
|
||||
if res.get("code") == 0:
|
||||
for data in res['data']:
|
||||
result_list.append(DataSet(self, data))
|
||||
return result_list
|
||||
raise Exception(res["retmsg"])
|
||||
|
||||
def get_dataset(self, id: str = None, name: str = None) -> DataSet:
|
||||
res = self.get("/dataset/detail", {"id": id, "name": name})
|
||||
res = res.json()
|
||||
if res.get("retmsg") == "success":
|
||||
return DataSet(self, res['data'])
|
||||
raise Exception(res["retmsg"])
|
||||
raise Exception(res["message"])
|
||||
|
||||
def create_assistant(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
|
||||
llm: Assistant.LLM = None, prompt: Assistant.Prompt = None) -> Assistant:
|
||||
@ -272,4 +277,3 @@ class RAGFlow:
|
||||
except Exception as e:
|
||||
print(f"An error occurred during retrieval: {e}")
|
||||
raise
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
|
||||
|
||||
API_KEY = 'ragflow-k0YzUxMGY4NjY5YTExZWY5MjI5MDI0Mm'
|
||||
API_KEY = 'ragflow-NiYmZjNTVjODYwNzExZWZiODEwMDI0Mm'
|
||||
HOST_ADDRESS = 'http://127.0.0.1:9380'
|
||||
@ -24,9 +24,8 @@ class TestDataset(TestSdk):
|
||||
ds = rag.create_dataset("ABC")
|
||||
if isinstance(ds, DataSet):
|
||||
assert ds.name == "ABC", "Name does not match."
|
||||
ds.name = 'DEF'
|
||||
res = ds.save()
|
||||
assert res is True, f"Failed to update dataset, error: {res}"
|
||||
res = ds.update({"name":"DEF"})
|
||||
assert res is None, f"Failed to update dataset, error: {res}"
|
||||
else:
|
||||
assert False, f"Failed to create dataset, error: {ds}"
|
||||
|
||||
@ -38,8 +37,8 @@ class TestDataset(TestSdk):
|
||||
ds = rag.create_dataset("MA")
|
||||
if isinstance(ds, DataSet):
|
||||
assert ds.name == "MA", "Name does not match."
|
||||
res = ds.delete()
|
||||
assert res is True, f"Failed to delete dataset, error: {res}"
|
||||
res = rag.delete_dataset(names=["MA"])
|
||||
assert res is None, f"Failed to delete dataset, error: {res}"
|
||||
else:
|
||||
assert False, f"Failed to create dataset, error: {ds}"
|
||||
|
||||
@ -52,12 +51,3 @@ class TestDataset(TestSdk):
|
||||
assert len(list_datasets) > 0, "Do not exist any dataset"
|
||||
for ds in list_datasets:
|
||||
assert isinstance(ds, DataSet), "Existence type is not dataset."
|
||||
|
||||
def test_get_detail_dataset_with_success(self):
|
||||
"""
|
||||
Test getting a dataset's detail with success
|
||||
"""
|
||||
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
||||
ds = rag.get_dataset(name="God")
|
||||
assert isinstance(ds, DataSet), f"Failed to get dataset, error: {ds}."
|
||||
assert ds.name == "God", "Name does not match"
|
||||
|
||||
Reference in New Issue
Block a user