mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
API: create dataset (#1106)
### What problem does this PR solve?
This PR have finished 'create dataset' of both HTTP API and Python SDK.
HTTP API:
```
curl --request POST --url http://<HOST_ADDRESS>/api/v1/dataset --header 'Content-Type: application/json' --header 'Authorization: <ACCESS_KEY>' --data-binary '{
"name": "<DATASET_NAME>"
}'
```
Python SDK:
```
from ragflow.ragflow import RAGFLow
ragflow = RAGFLow('<ACCESS_KEY>', 'http://127.0.0.1:9380')
ragflow.create_dataset("dataset1")
```
TODO:
- ACCESS_KEY is the login_token when user login RAGFlow, currently.
RAGFlow should have the function that user can add/delete access_key.
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update
---------
Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -1 +1,41 @@
|
||||
# ragflow
|
||||
# python-ragflow
|
||||
|
||||
# update python client
|
||||
|
||||
- Update "version" field of [project] chapter
|
||||
- build new python SDK
|
||||
- upload to pypi.org
|
||||
- install new python SDK
|
||||
|
||||
# build python SDK
|
||||
|
||||
```shell
|
||||
rm -f dist/* && python setup.py sdist bdist_wheel
|
||||
```
|
||||
|
||||
# install python SDK
|
||||
```shell
|
||||
pip uninstall -y ragflow && pip install dist/*.whl
|
||||
```
|
||||
|
||||
This will install ragflow-sdk and its dependencies.
|
||||
|
||||
# upload to pypi.org
|
||||
```shell
|
||||
twine upload dist/*.whl
|
||||
```
|
||||
|
||||
Enter your pypi API token according to the prompt.
|
||||
|
||||
Note that pypi allow a version of a package [be uploaded only once](https://pypi.org/help/#file-name-reuse). You need to change the `version` inside the `pyproject.toml` before build and upload.
|
||||
|
||||
# using
|
||||
|
||||
```python
|
||||
|
||||
```
|
||||
|
||||
# For developer
|
||||
```shell
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
21
sdk/python/ragflow/dataset.py
Normal file
21
sdk/python/ragflow/dataset.py
Normal file
@ -0,0 +1,21 @@
|
||||
#
|
||||
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
class DataSet:
|
||||
def __init__(self, user_key, dataset_url, uuid, name):
|
||||
self.user_key = user_key
|
||||
self.dataset_url = dataset_url
|
||||
self.uuid = uuid
|
||||
self.name = name
|
||||
@ -12,33 +12,43 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import os
|
||||
from abc import ABC
|
||||
import requests
|
||||
import json
|
||||
|
||||
|
||||
class RAGFLow(ABC):
|
||||
def __init__(self, user_key, base_url):
|
||||
class RAGFLow:
|
||||
def __init__(self, user_key, base_url, version = 'v1'):
|
||||
'''
|
||||
api_url: http://<host_address>/api/v1
|
||||
dataset_url: http://<host_address>/api/v1/dataset
|
||||
'''
|
||||
self.user_key = user_key
|
||||
self.base_url = base_url
|
||||
self.api_url = f"{base_url}/api/{version}"
|
||||
self.dataset_url = f"{self.api_url}/dataset"
|
||||
self.authorization_header = {"Authorization": "{}".format(self.user_key)}
|
||||
|
||||
def create_dataset(self, name):
|
||||
return name
|
||||
def create_dataset(self, dataset_name):
|
||||
"""
|
||||
name: dataset name
|
||||
"""
|
||||
res = requests.post(url=self.dataset_url, json={"name": dataset_name}, headers=self.authorization_header)
|
||||
result_dict = json.loads(res.text)
|
||||
return result_dict
|
||||
|
||||
def delete_dataset(self, name):
|
||||
return name
|
||||
def delete_dataset(self, dataset_name = None, dataset_id = None):
|
||||
return dataset_name
|
||||
|
||||
def list_dataset(self):
|
||||
endpoint = f"{self.base_url}/api/v1/dataset"
|
||||
response = requests.get(endpoint)
|
||||
response = requests.get(self.dataset_url)
|
||||
print(response)
|
||||
if response.status_code == 200:
|
||||
return response.json()['datasets']
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_dataset(self, dataset_id):
|
||||
endpoint = f"{self.base_url}/api/v1/dataset/{dataset_id}"
|
||||
endpoint = f"{self.dataset_url}/{dataset_id}"
|
||||
response = requests.get(endpoint)
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
@ -46,7 +56,7 @@ class RAGFLow(ABC):
|
||||
return None
|
||||
|
||||
def update_dataset(self, dataset_id, params):
|
||||
endpoint = f"{self.base_url}/api/v1/dataset/{dataset_id}"
|
||||
endpoint = f"{self.dataset_url}/{dataset_id}"
|
||||
response = requests.put(endpoint, json=params)
|
||||
if response.status_code == 200:
|
||||
return True
|
||||
|
||||
4
sdk/python/test/common.py
Normal file
4
sdk/python/test/common.py
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
|
||||
API_KEY = 'IjJiMTVkZWNhMjU3MzExZWY4YzNiNjQ0OTdkMTllYjM3Ig.ZmQZrA.x9Z7c-1ErBUSL3m8SRtBRgGq5uE'
|
||||
HOST_ADDRESS = 'http://127.0.0.1:9380'
|
||||
@ -3,49 +3,46 @@ import ragflow
|
||||
from ragflow.ragflow import RAGFLow
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
from common import API_KEY, HOST_ADDRESS
|
||||
|
||||
|
||||
class TestCase(TestSdk):
|
||||
|
||||
@pytest.fixture
|
||||
def ragflow_instance(self):
|
||||
# Here we create a mock instance of RAGFlow for testing
|
||||
return ragflow.ragflow.RAGFLow('123', 'url')
|
||||
class TestBasic(TestSdk):
|
||||
|
||||
def test_version(self):
|
||||
print(ragflow.__version__)
|
||||
|
||||
def test_create_dataset(self):
|
||||
assert ragflow.ragflow.RAGFLow('123', 'url').create_dataset('abc') == 'abc'
|
||||
|
||||
def test_delete_dataset(self):
|
||||
assert ragflow.ragflow.RAGFLow('123', 'url').delete_dataset('abc') == 'abc'
|
||||
|
||||
def test_list_dataset_success(self, ragflow_instance, monkeypatch):
|
||||
# Mocking the response of requests.get method
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {'datasets': [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]}
|
||||
|
||||
# Patching requests.get to return the mock_response
|
||||
monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
|
||||
|
||||
# Call the method under test
|
||||
result = ragflow_instance.list_dataset()
|
||||
|
||||
# Assertion
|
||||
assert result == [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]
|
||||
|
||||
def test_list_dataset_failure(self, ragflow_instance, monkeypatch):
|
||||
# Mocking the response of requests.get method
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 404 # Simulating a failed request
|
||||
|
||||
# Patching requests.get to return the mock_response
|
||||
monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
|
||||
|
||||
# Call the method under test
|
||||
result = ragflow_instance.list_dataset()
|
||||
|
||||
# Assertion
|
||||
assert result is None
|
||||
# def test_create_dataset(self):
|
||||
# res = RAGFLow(API_KEY, HOST_ADDRESS).create_dataset('abc')
|
||||
# print(res)
|
||||
#
|
||||
# def test_delete_dataset(self):
|
||||
# assert RAGFLow('123', 'url').delete_dataset('abc') == 'abc'
|
||||
#
|
||||
# def test_list_dataset_success(self, ragflow_instance, monkeypatch):
|
||||
# # Mocking the response of requests.get method
|
||||
# mock_response = MagicMock()
|
||||
# mock_response.status_code = 200
|
||||
# mock_response.json.return_value = {'datasets': [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]}
|
||||
#
|
||||
# # Patching requests.get to return the mock_response
|
||||
# monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
|
||||
#
|
||||
# # Call the method under test
|
||||
# result = ragflow_instance.list_dataset()
|
||||
#
|
||||
# # Assertion
|
||||
# assert result == [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]
|
||||
#
|
||||
# def test_list_dataset_failure(self, ragflow_instance, monkeypatch):
|
||||
# # Mocking the response of requests.get method
|
||||
# mock_response = MagicMock()
|
||||
# mock_response.status_code = 404 # Simulating a failed request
|
||||
#
|
||||
# # Patching requests.get to return the mock_response
|
||||
# monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
|
||||
#
|
||||
# # Call the method under test
|
||||
# result = ragflow_instance.list_dataset()
|
||||
#
|
||||
# # Assertion
|
||||
# assert result is None
|
||||
|
||||
26
sdk/python/test/test_dataset.py
Normal file
26
sdk/python/test/test_dataset.py
Normal file
@ -0,0 +1,26 @@
|
||||
from test_sdkbase import TestSdk
|
||||
import ragflow
|
||||
from ragflow.ragflow import RAGFLow
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
from common import API_KEY, HOST_ADDRESS
|
||||
|
||||
class TestDataset(TestSdk):
|
||||
|
||||
def test_create_dataset(self):
|
||||
'''
|
||||
1. create a kb
|
||||
2. list the kb
|
||||
3. get the detail info according to the kb id
|
||||
4. update the kb
|
||||
5. delete the kb
|
||||
'''
|
||||
ragflow = RAGFLow(API_KEY, HOST_ADDRESS)
|
||||
|
||||
# create a kb
|
||||
res = ragflow.create_dataset("kb1")
|
||||
assert res['code'] == 0 and res['message'] == 'success'
|
||||
dataset_id = res['data']['dataset_id']
|
||||
print(dataset_id)
|
||||
|
||||
# TODO: list the kb
|
||||
Reference in New Issue
Block a user