mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
init README of deepdoc, add picture processer. (#71)
* init README of deepdoc, add picture processer. * add resume parsing
This commit is contained in:
@ -163,6 +163,7 @@ def completion():
|
||||
del req["conversation_id"]
|
||||
del req["messages"]
|
||||
ans = chat(dia, msg, **req)
|
||||
if not conv.reference: conv.reference = []
|
||||
conv.reference.append(ans["reference"])
|
||||
conv.message.append({"role": "assistant", "content": ans["answer"]})
|
||||
ConversationService.update_by_id(conv.id, conv.to_dict())
|
||||
|
||||
@ -32,7 +32,6 @@ def set_dialog():
|
||||
dialog_id = req.get("dialog_id")
|
||||
name = req.get("name", "New Dialog")
|
||||
description = req.get("description", "A helpful Dialog")
|
||||
language = req.get("language", "Chinese")
|
||||
top_n = req.get("top_n", 6)
|
||||
similarity_threshold = req.get("similarity_threshold", 0.1)
|
||||
vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
|
||||
@ -80,7 +79,6 @@ def set_dialog():
|
||||
"name": name,
|
||||
"kb_ids": req["kb_ids"],
|
||||
"description": description,
|
||||
"language": language,
|
||||
"llm_id": llm_id,
|
||||
"llm_setting": llm_setting,
|
||||
"prompt_config": prompt_config,
|
||||
|
||||
@ -272,7 +272,9 @@ def get(doc_id):
|
||||
response = flask.make_response(MINIO.get(doc.kb_id, doc.location))
|
||||
ext = re.search(r"\.([^.]+)$", doc.name)
|
||||
if ext:
|
||||
response.headers.set('Content-Type', 'application/%s'%ext.group(1))
|
||||
if doc.type == FileType.VISUAL.value:
|
||||
response.headers.set('Content-Type', 'image/%s'%ext.group(1))
|
||||
else: response.headers.set('Content-Type', 'application/%s'%ext.group(1))
|
||||
return response
|
||||
except Exception as e:
|
||||
return server_error_response(e)
|
||||
|
||||
@ -464,6 +464,7 @@ class Knowledgebase(DataBaseModel):
|
||||
avatar = TextField(null=True, help_text="avatar base64 string")
|
||||
tenant_id = CharField(max_length=32, null=False)
|
||||
name = CharField(max_length=128, null=False, help_text="KB name", index=True)
|
||||
language = CharField(max_length=32, null=True, default="Chinese", help_text="English|Chinese")
|
||||
description = TextField(null=True, help_text="KB description")
|
||||
embd_id = CharField(max_length=128, null=False, help_text="default embedding model ID")
|
||||
permission = CharField(max_length=16, null=False, help_text="me|team", default="me")
|
||||
|
||||
@ -57,7 +57,7 @@ class TenantLLMService(CommonService):
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def model_instance(cls, tenant_id, llm_type, llm_name=None):
|
||||
def model_instance(cls, tenant_id, llm_type, llm_name=None, lang="Chinese"):
|
||||
e, tenant = TenantService.get_by_id(tenant_id)
|
||||
if not e:
|
||||
raise LookupError("Tenant not found")
|
||||
@ -87,7 +87,7 @@ class TenantLLMService(CommonService):
|
||||
if model_config["llm_factory"] not in CvModel:
|
||||
return
|
||||
return CvModel[model_config["llm_factory"]](
|
||||
model_config["api_key"], model_config["llm_name"])
|
||||
model_config["api_key"], model_config["llm_name"], lang)
|
||||
|
||||
if llm_type == LLMType.CHAT.value:
|
||||
if model_config["llm_factory"] not in ChatModel:
|
||||
@ -120,11 +120,11 @@ class TenantLLMService(CommonService):
|
||||
|
||||
|
||||
class LLMBundle(object):
|
||||
def __init__(self, tenant_id, llm_type, llm_name=None):
|
||||
def __init__(self, tenant_id, llm_type, llm_name=None, lang="Chinese"):
|
||||
self.tenant_id = tenant_id
|
||||
self.llm_type = llm_type
|
||||
self.llm_name = llm_name
|
||||
self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name)
|
||||
self.mdl = TenantLLMService.model_instance(tenant_id, llm_type, llm_name, lang=lang)
|
||||
assert self.mdl, "Can't find mole for {}/{}/{}".format(tenant_id, llm_type, llm_name)
|
||||
|
||||
def encode(self, texts: list, batch_size=32):
|
||||
|
||||
@ -27,7 +27,24 @@ class TaskService(CommonService):
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def get_tasks(cls, tm, mod=0, comm=1, items_per_page=64):
|
||||
fields = [cls.model.id, cls.model.doc_id, cls.model.from_page,cls.model.to_page, Document.kb_id, Document.parser_id, Document.parser_config, Document.name, Document.type, Document.location, Document.size, Knowledgebase.tenant_id, Tenant.embd_id, Tenant.img2txt_id, Tenant.asr_id, cls.model.update_time]
|
||||
fields = [
|
||||
cls.model.id,
|
||||
cls.model.doc_id,
|
||||
cls.model.from_page,
|
||||
cls.model.to_page,
|
||||
Document.kb_id,
|
||||
Document.parser_id,
|
||||
Document.parser_config,
|
||||
Document.name,
|
||||
Document.type,
|
||||
Document.location,
|
||||
Document.size,
|
||||
Knowledgebase.tenant_id,
|
||||
Knowledgebase.language,
|
||||
Tenant.embd_id,
|
||||
Tenant.img2txt_id,
|
||||
Tenant.asr_id,
|
||||
cls.model.update_time]
|
||||
docs = cls.model.select(*fields) \
|
||||
.join(Document, on=(cls.model.doc_id == Document.id)) \
|
||||
.join(Knowledgebase, on=(Document.kb_id == Knowledgebase.id)) \
|
||||
@ -42,7 +59,6 @@ class TaskService(CommonService):
|
||||
.paginate(1, items_per_page)
|
||||
return list(docs.dicts())
|
||||
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def do_cancel(cls, id):
|
||||
@ -54,12 +70,11 @@ class TaskService(CommonService):
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
@classmethod
|
||||
@DB.connection_context()
|
||||
def update_progress(cls, id, info):
|
||||
cls.model.update(progress_msg=cls.model.progress_msg + "\n"+info["progress_msg"]).where(
|
||||
cls.model.update(progress_msg=cls.model.progress_msg + "\n" + info["progress_msg"]).where(
|
||||
cls.model.id == id).execute()
|
||||
if "progress" in info:
|
||||
cls.model.update(progress=info["progress"]).where(
|
||||
cls.model.id == id).execute()
|
||||
cls.model.id == id).execute()
|
||||
|
||||
Binary file not shown.
@ -167,7 +167,11 @@ def thumbnail(filename, blob):
|
||||
return "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||
|
||||
if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename):
|
||||
return ("data:image/%s;base64,"%filename.split(".")[-1]) + base64.b64encode(Image.open(BytesIO(blob)).thumbnail((30, 30)).tobytes()).decode("utf-8")
|
||||
image = Image.open(BytesIO(blob))
|
||||
image.thumbnail((30, 30))
|
||||
buffered = BytesIO()
|
||||
image.save(buffered, format="png")
|
||||
return "data:image/png;base64," + base64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||
|
||||
if re.match(r".*\.(ppt|pptx)$", filename):
|
||||
import aspose.slides as slides
|
||||
|
||||
Reference in New Issue
Block a user