API: create dataset (#1106)

### What problem does this PR solve?

This PR have finished 'create dataset' of both HTTP API and Python SDK.
HTTP API:
```
curl --request POST --url http://<HOST_ADDRESS>/api/v1/dataset   --header 'Content-Type: application/json' --header 'Authorization: <ACCESS_KEY>' --data-binary '{
  "name": "<DATASET_NAME>"
}'
```

Python SDK:
```
from ragflow.ragflow import RAGFLow
ragflow = RAGFLow('<ACCESS_KEY>', 'http://127.0.0.1:9380')
ragflow.create_dataset("dataset1")

```

TODO: 
- ACCESS_KEY is the login_token when user login RAGFlow, currently.
RAGFlow should have the function that user can add/delete access_key.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update

---------

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2024-06-11 11:16:37 +08:00
committed by GitHub
parent 97ced2f667
commit cf2f6592dd
11 changed files with 355 additions and 62 deletions

View File

@ -1 +1,41 @@
# ragflow
# python-ragflow
# update python client
- Update "version" field of [project] chapter
- build new python SDK
- upload to pypi.org
- install new python SDK
# build python SDK
```shell
rm -f dist/* && python setup.py sdist bdist_wheel
```
# install python SDK
```shell
pip uninstall -y ragflow && pip install dist/*.whl
```
This will install ragflow-sdk and its dependencies.
# upload to pypi.org
```shell
twine upload dist/*.whl
```
Enter your pypi API token according to the prompt.
Note that pypi allow a version of a package [be uploaded only once](https://pypi.org/help/#file-name-reuse). You need to change the `version` inside the `pyproject.toml` before build and upload.
# using
```python
```
# For developer
```shell
pip install -e .
```

View File

@ -0,0 +1,21 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class DataSet:
def __init__(self, user_key, dataset_url, uuid, name):
self.user_key = user_key
self.dataset_url = dataset_url
self.uuid = uuid
self.name = name

View File

@ -12,33 +12,43 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
from abc import ABC
import requests
import json
class RAGFLow(ABC):
def __init__(self, user_key, base_url):
class RAGFLow:
def __init__(self, user_key, base_url, version = 'v1'):
'''
api_url: http://<host_address>/api/v1
dataset_url: http://<host_address>/api/v1/dataset
'''
self.user_key = user_key
self.base_url = base_url
self.api_url = f"{base_url}/api/{version}"
self.dataset_url = f"{self.api_url}/dataset"
self.authorization_header = {"Authorization": "{}".format(self.user_key)}
def create_dataset(self, name):
return name
def create_dataset(self, dataset_name):
"""
name: dataset name
"""
res = requests.post(url=self.dataset_url, json={"name": dataset_name}, headers=self.authorization_header)
result_dict = json.loads(res.text)
return result_dict
def delete_dataset(self, name):
return name
def delete_dataset(self, dataset_name = None, dataset_id = None):
return dataset_name
def list_dataset(self):
endpoint = f"{self.base_url}/api/v1/dataset"
response = requests.get(endpoint)
response = requests.get(self.dataset_url)
print(response)
if response.status_code == 200:
return response.json()['datasets']
else:
return None
def get_dataset(self, dataset_id):
endpoint = f"{self.base_url}/api/v1/dataset/{dataset_id}"
endpoint = f"{self.dataset_url}/{dataset_id}"
response = requests.get(endpoint)
if response.status_code == 200:
return response.json()
@ -46,7 +56,7 @@ class RAGFLow(ABC):
return None
def update_dataset(self, dataset_id, params):
endpoint = f"{self.base_url}/api/v1/dataset/{dataset_id}"
endpoint = f"{self.dataset_url}/{dataset_id}"
response = requests.put(endpoint, json=params)
if response.status_code == 200:
return True

View File

@ -0,0 +1,4 @@
API_KEY = 'IjJiMTVkZWNhMjU3MzExZWY4YzNiNjQ0OTdkMTllYjM3Ig.ZmQZrA.x9Z7c-1ErBUSL3m8SRtBRgGq5uE'
HOST_ADDRESS = 'http://127.0.0.1:9380'

View File

@ -3,49 +3,46 @@ import ragflow
from ragflow.ragflow import RAGFLow
import pytest
from unittest.mock import MagicMock
from common import API_KEY, HOST_ADDRESS
class TestCase(TestSdk):
@pytest.fixture
def ragflow_instance(self):
# Here we create a mock instance of RAGFlow for testing
return ragflow.ragflow.RAGFLow('123', 'url')
class TestBasic(TestSdk):
def test_version(self):
print(ragflow.__version__)
def test_create_dataset(self):
assert ragflow.ragflow.RAGFLow('123', 'url').create_dataset('abc') == 'abc'
def test_delete_dataset(self):
assert ragflow.ragflow.RAGFLow('123', 'url').delete_dataset('abc') == 'abc'
def test_list_dataset_success(self, ragflow_instance, monkeypatch):
# Mocking the response of requests.get method
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {'datasets': [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]}
# Patching requests.get to return the mock_response
monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
# Call the method under test
result = ragflow_instance.list_dataset()
# Assertion
assert result == [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]
def test_list_dataset_failure(self, ragflow_instance, monkeypatch):
# Mocking the response of requests.get method
mock_response = MagicMock()
mock_response.status_code = 404 # Simulating a failed request
# Patching requests.get to return the mock_response
monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
# Call the method under test
result = ragflow_instance.list_dataset()
# Assertion
assert result is None
# def test_create_dataset(self):
# res = RAGFLow(API_KEY, HOST_ADDRESS).create_dataset('abc')
# print(res)
#
# def test_delete_dataset(self):
# assert RAGFLow('123', 'url').delete_dataset('abc') == 'abc'
#
# def test_list_dataset_success(self, ragflow_instance, monkeypatch):
# # Mocking the response of requests.get method
# mock_response = MagicMock()
# mock_response.status_code = 200
# mock_response.json.return_value = {'datasets': [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]}
#
# # Patching requests.get to return the mock_response
# monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
#
# # Call the method under test
# result = ragflow_instance.list_dataset()
#
# # Assertion
# assert result == [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]
#
# def test_list_dataset_failure(self, ragflow_instance, monkeypatch):
# # Mocking the response of requests.get method
# mock_response = MagicMock()
# mock_response.status_code = 404 # Simulating a failed request
#
# # Patching requests.get to return the mock_response
# monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
#
# # Call the method under test
# result = ragflow_instance.list_dataset()
#
# # Assertion
# assert result is None

View File

@ -0,0 +1,26 @@
from test_sdkbase import TestSdk
import ragflow
from ragflow.ragflow import RAGFLow
import pytest
from unittest.mock import MagicMock
from common import API_KEY, HOST_ADDRESS
class TestDataset(TestSdk):
def test_create_dataset(self):
'''
1. create a kb
2. list the kb
3. get the detail info according to the kb id
4. update the kb
5. delete the kb
'''
ragflow = RAGFLow(API_KEY, HOST_ADDRESS)
# create a kb
res = ragflow.create_dataset("kb1")
assert res['code'] == 0 and res['message'] == 'success'
dataset_id = res['data']['dataset_id']
print(dataset_id)
# TODO: list the kb