Files
ragflow/sdk/python/ragflow/modules/document.py
liuhua dab92ac1e8 Refactor Chunk API (#2855)
### What problem does this PR solve?

Refactor Chunk API
#2846
### Type of change


- [x] Refactoring

---------

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2024-10-16 18:41:24 +08:00

59 lines
1.9 KiB
Python

import time
from PIL.ImageFile import raise_oserror
from .base import Base
from .chunk import Chunk
from typing import List
class Document(Base):
def __init__(self, rag, res_dict):
self.id = ""
self.name = ""
self.thumbnail = None
self.knowledgebase_id = None
self.parser_method = ""
self.parser_config = {"pages": [[1, 1000000]]}
self.source_type = "local"
self.type = ""
self.created_by = ""
self.size = 0
self.token_count = 0
self.chunk_count = 0
self.progress = 0.0
self.progress_msg = ""
self.process_begin_at = None
self.process_duration = 0.0
self.run = "0"
self.status = "1"
for k in list(res_dict.keys()):
if k not in self.__dict__:
res_dict.pop(k)
super().__init__(rag, res_dict)
def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)
res = res.json()
if res.get("code") == 0:
chunks=[]
for data in res["data"].get("chunks"):
chunk = Chunk(self.rag,data)
chunks.append(chunk)
return chunks
raise Exception(res.get("message"))
def add_chunk(self, content: str):
res = self.post(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', {"content":content})
res = res.json()
if res.get("code") == 0:
return Chunk(self.rag,res["data"].get("chunk"))
raise Exception(res.get("message"))
def delete_chunks(self,ids:List[str]):
res = self.rm(f"dataset/{self.knowledgebase_id}/document/{self.id}/chunk",{"ids":ids})
res = res.json()
if res.get("code")!=0:
raise Exception(res.get("message"))