diff --git a/sdk/python/ragflow/modules/__init__.py b/sdk/python/ragflow/modules/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sdk/python/ragflow/modules/base.py b/sdk/python/ragflow/modules/base.py new file mode 100644 index 000000000..fe22e5565 --- /dev/null +++ b/sdk/python/ragflow/modules/base.py @@ -0,0 +1,30 @@ +class Base(object): + def __init__(self, rag, res_dict): + self.rag = rag + for k, v in res_dict.items(): + if isinstance(v, dict): + self.__dict__[k] = Base(rag, v) + else: + self.__dict__[k] = v + + def to_json(self): + pr = {} + for name in dir(self): + value = getattr(self, name) + if not name.startswith('__') and not callable(value) and name != "rag": + if isinstance(value, Base): + pr[name] = value.to_json() + else: + pr[name] = value + return pr + + + def post(self, path, param): + res = self.rag.post(path,param) + return res + + def get(self, path, params=''): + res = self.rag.get(path,params) + return res + + diff --git a/sdk/python/ragflow/modules/dataset.py b/sdk/python/ragflow/modules/dataset.py new file mode 100644 index 000000000..889f3703f --- /dev/null +++ b/sdk/python/ragflow/modules/dataset.py @@ -0,0 +1,33 @@ +from .base import Base + + +class DataSet(Base): + class ParseConfig(Base): + def __init__(self, rag, res_dict): + self.chunk_token_count = 128 + self.layout_recognize = True + self.delimiter = '\n!?。;!?' + self.task_page_size = 12 + super().__init__(rag, res_dict) + + def __init__(self, rag, res_dict): + self.id = "" + self.name = "" + self.avatar = "" + self.tenant_id = None + self.description = "" + self.language = "English" + self.embedding_model = "" + self.permission = "me" + self.document_count = 0 + self.chunk_count = 0 + self.parse_method = 0 + self.parser_config = None + super().__init__(rag, res_dict) + + def delete(self): + try: + self.post("/rm", {"kb_id": self.id}) + return True + except Exception: + return False diff --git a/sdk/python/ragflow/ragflow.py b/sdk/python/ragflow/ragflow.py index cc2927778..3a94ea95f 100644 --- a/sdk/python/ragflow/ragflow.py +++ b/sdk/python/ragflow/ragflow.py @@ -12,35 +12,56 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import json -import os import requests -from api.db.services.document_service import DocumentService -from api.settings import RetCode +from .modules.dataset import DataSet class RAGFlow: def __init__(self, user_key, base_url, version='v1'): """ - api_url: http:///api/v1 - dataset_url: http:///api/v1/dataset - document_url: http:///api/v1/dataset/{dataset_id}/documents + api_url: http:///v1 + dataset_url: http:///v1/kb + document_url: http:///v1/dataset/{dataset_id}/documents """ self.user_key = user_key - self.api_url = f"{base_url}/api/{version}" - self.dataset_url = f"{self.api_url}/dataset" + self.api_url = f"{base_url}/{version}" + self.dataset_url = f"{self.api_url}/kb" self.authorization_header = {"Authorization": "{}".format(self.user_key)} + self.base_url = base_url + + def post(self, path, param): + res = requests.post(url=self.dataset_url + path, json=param, headers=self.authorization_header) + return res + + def get(self, path, params=''): + res = requests.get(self.dataset_url + path, params=params, headers=self.authorization_header) + return res def create_dataset(self, dataset_name): """ name: dataset name """ - res = requests.post(url=self.dataset_url, json={"name": dataset_name}, headers=self.authorization_header) - result_dict = json.loads(res.text) - return result_dict + res_create = self.post("/create", {"name": dataset_name}) + res_create_data = res_create.json()['data'] + res_detail = self.get("/detail", {"kb_id": res_create_data["kb_id"]}) + res_detail_data = res_detail.json()['data'] + result = {} + result['id'] = res_detail_data['id'] + result['name'] = res_detail_data['name'] + result['avatar'] = res_detail_data['avatar'] + result['description'] = res_detail_data['description'] + result['language'] = res_detail_data['language'] + result['embedding_model'] = res_detail_data['embd_id'] + result['permission'] = res_detail_data['permission'] + result['document_count'] = res_detail_data['doc_num'] + result['chunk_count'] = res_detail_data['chunk_num'] + result['parser_config'] = res_detail_data['parser_config'] + dataset = DataSet(self, result) + return dataset + """ def delete_dataset(self, dataset_name): dataset_id = self.find_dataset_id_by_name(dataset_name) @@ -55,16 +76,6 @@ class RAGFlow: return dataset["id"] return None - def list_dataset(self, offset=0, count=-1, orderby="create_time", desc=True): - params = { - "offset": offset, - "count": count, - "orderby": orderby, - "desc": desc - } - response = requests.get(url=self.dataset_url, params=params, headers=self.authorization_header) - return response.json() - def get_dataset(self, dataset_name): dataset_id = self.find_dataset_id_by_name(dataset_name) endpoint = f"{self.dataset_url}/{dataset_id}" @@ -78,7 +89,7 @@ class RAGFlow: response = requests.put(endpoint, json=params, headers=self.authorization_header) return response.json() -# ------------------------------- CONTENT MANAGEMENT ----------------------------------------------------- + # ------------------------------- CONTENT MANAGEMENT ----------------------------------------------------- # ----------------------------upload local files----------------------------------------------------- def upload_local_file(self, dataset_id, file_paths): @@ -186,4 +197,4 @@ class RAGFlow: # ----------------------------get a specific chunk----------------------------------------------------- # ----------------------------retrieval test----------------------------------------------------- - +""" diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 1ce9854c4..a90fc1ccb 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -15,5 +15,7 @@ import setuptools if __name__ == "__main__": - setuptools.setup(packages=['ragflow']) + setuptools.setup(name='ragflow', + version="0.1", + packages=setuptools.find_packages()) diff --git a/sdk/python/test/common.py b/sdk/python/test/common.py index 94acbf48c..ac286db40 100644 --- a/sdk/python/test/common.py +++ b/sdk/python/test/common.py @@ -1,4 +1,4 @@ -API_KEY = 'IjJkOGQ4ZDE2MzkyMjExZWZhYTk0MzA0M2Q3ZWU1MzdlIg.ZoUfug.RmqcYyCrlAnLtkzk6bYXiXN3eEY' +API_KEY = 'IjUxNGM0MmM4NWY5MzExZWY5MDhhMDI0MmFjMTIwMDA2Ig.ZsWebA.mV1NKdSPPllgowiH-7vz36tMWyI' HOST_ADDRESS = 'http://127.0.0.1:9380' \ No newline at end of file diff --git a/sdk/python/test/t_dataset.py b/sdk/python/test/t_dataset.py new file mode 100644 index 000000000..35b1e4c97 --- /dev/null +++ b/sdk/python/test/t_dataset.py @@ -0,0 +1,23 @@ +from ragflow import RAGFlow + +from common import API_KEY, HOST_ADDRESS +from test_sdkbase import TestSdk + + +class TestDataset(TestSdk): + def test_create_dataset_with_success(self): + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset("God") + assert ds is not None, "The dataset creation failed, returned None." + assert ds.name == "God", "Dataset name does not match." + + def test_delete_one_file(self): + """ + Test deleting one file with success. + """ + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset("ABC") + assert ds is not None, "Failed to create dataset" + assert ds.name == "ABC", "Dataset name mismatch" + delete_result = ds.delete() + assert delete_result is True, "Failed to delete dataset"