create list_dataset api and tests (#1138)

### What problem does this PR solve? This PR have completed both HTTP API and Python SDK for 'list_dataset". In addition, there are tests for it. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-01-27 13:46:39 +08:00 · 2024-06-17 12:19:05 +08:00
parent f04fb36c26
commit 1eb4caf02a
7 changed files with 170 additions and 25 deletions
--- a/sdk/python/ragflow/init.py
+++ b/sdk/python/ragflow/init.py
@ -1,3 +1,5 @@
 import importlib.metadata

 __version__ = importlib.metadata.version("ragflow")
+
+from .ragflow import RAGFlow
--- a/sdk/python/ragflow/dataset.py
+++ b/sdk/python/ragflow/dataset.py
@ -18,4 +18,4 @@ class DataSet:
        self.user_key = user_key
        self.dataset_url = dataset_url
        self.uuid = uuid
-        self.name = name
+        self.name = name
--- a/sdk/python/ragflow/ragflow.py
+++ b/sdk/python/ragflow/ragflow.py
@ -17,7 +17,10 @@ import os
 import requests
 import json

-class RAGFLow:
+from httpx import HTTPError
+
+
+class RAGFlow:
    def __init__(self, user_key, base_url, version = 'v1'):
        '''
        api_url: http://<host_address>/api/v1
@ -36,16 +39,39 @@ class RAGFLow:
        result_dict = json.loads(res.text)
        return result_dict

-    def delete_dataset(self, dataset_name = None, dataset_id = None):
+    def delete_dataset(self, dataset_name=None, dataset_id=None):
        return dataset_name

-    def list_dataset(self):
-        response = requests.get(self.dataset_url)
-        print(response)
-        if response.status_code == 200:
-            return response.json()['datasets']
-        else:
-            return None
+    def list_dataset(self, offset=0, count=-1, orderby="create_time", desc=True):
+        params = {
+            "offset": offset,
+            "count": count,
+            "orderby": orderby,
+            "desc": desc
+        }
+        try:
+            response = requests.get(url=self.dataset_url, params=params, headers=self.authorization_header)
+            response.raise_for_status()  # if it is not 200
+            original_data = response.json()
+            # TODO: format the data
+            # print(original_data)
+            # # Process the original data into the desired format
+            # formatted_data = {
+            #     "datasets": [
+            #         {
+            #             "id": dataset["id"],
+            #             "created": dataset["create_time"],  # Adjust the key based on the actual response
+            #             "fileCount": dataset["doc_num"],  # Adjust the key based on the actual response
+            #             "name": dataset["name"]
+            #         }
+            #         for dataset in original_data
+            #     ]
+            # }
+            return response.status_code, original_data
+        except HTTPError as http_err:
+            print(f"HTTP error occurred: {http_err}")
+        except Exception as err:
+            print(f"An error occurred: {err}")

    def get_dataset(self, dataset_id):
        endpoint = f"{self.dataset_url}/{dataset_id}"
@ -61,4 +87,4 @@ class RAGFLow:
        if response.status_code == 200:
            return True
        else:
-            return False
+            return False