Update comments (#4569)

### What problem does this PR solve?

Add license statement.

### Type of change

- [x] Refactoring

Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2025-01-21 20:52:28 +08:00
committed by GitHub
parent 583050a876
commit 3894de895b
86 changed files with 1034 additions and 145 deletions

View File

@ -1,3 +1,19 @@
#
# Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from .document import Document
from .base import Base
@ -29,48 +45,51 @@ class DataSet(Base):
def update(self, update_message: dict):
res = self.put(f'/datasets/{self.id}',
update_message)
update_message)
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def upload_documents(self,document_list: list[dict]):
def upload_documents(self, document_list: list[dict]):
url = f"/datasets/{self.id}/documents"
files = [("file",(ele["display_name"],ele["blob"])) for ele in document_list]
res = self.post(path=url,json=None,files=files)
files = [("file", (ele["display_name"], ele["blob"])) for ele in document_list]
res = self.post(path=url, json=None, files=files)
res = res.json()
if res.get("code") == 0:
doc_list=[]
doc_list = []
for doc in res["data"]:
document = Document(self.rag,doc)
document = Document(self.rag, doc)
doc_list.append(document)
return doc_list
raise Exception(res.get("message"))
def list_documents(self, id: str | None = None, keywords: str | None = None, page: int = 1, page_size: int = 30, orderby: str = "create_time", desc: bool = True):
res = self.get(f"/datasets/{self.id}/documents",params={"id": id,"keywords": keywords,"page": page,"page_size": page_size,"orderby": orderby,"desc": desc})
def list_documents(self, id: str | None = None, keywords: str | None = None, page: int = 1, page_size: int = 30,
orderby: str = "create_time", desc: bool = True):
res = self.get(f"/datasets/{self.id}/documents",
params={"id": id, "keywords": keywords, "page": page, "page_size": page_size, "orderby": orderby,
"desc": desc})
res = res.json()
documents = []
if res.get("code") == 0:
for document in res["data"].get("docs"):
documents.append(Document(self.rag,document))
documents.append(Document(self.rag, document))
return documents
raise Exception(res["message"])
def delete_documents(self,ids: list[str] | None = None):
res = self.rm(f"/datasets/{self.id}/documents",{"ids":ids})
def delete_documents(self, ids: list[str] | None = None):
res = self.rm(f"/datasets/{self.id}/documents", {"ids": ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def async_parse_documents(self,document_ids):
res = self.post(f"/datasets/{self.id}/chunks",{"document_ids":document_ids})
def async_parse_documents(self, document_ids):
res = self.post(f"/datasets/{self.id}/chunks", {"document_ids": document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def async_cancel_parse_documents(self,document_ids):
res = self.rm(f"/datasets/{self.id}/chunks",{"document_ids":document_ids})
def async_cancel_parse_documents(self, document_ids):
res = self.rm(f"/datasets/{self.id}/chunks", {"document_ids": document_ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))