mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
init README of deepdoc, add picture processer. (#71)
* init README of deepdoc, add picture processer. * add resume parsing
This commit is contained in:
@ -13,12 +13,18 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import io
|
||||
from abc import ABC
|
||||
|
||||
from PIL import Image
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import base64
|
||||
from io import BytesIO
|
||||
|
||||
from api.utils import get_uuid
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
|
||||
|
||||
class Base(ABC):
|
||||
def __init__(self, key, model_name):
|
||||
@ -44,25 +50,26 @@ class Base(ABC):
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等。",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{b64}"
|
||||
},
|
||||
},
|
||||
{
|
||||
"text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。" if self.lang.lower() == "chinese" else \
|
||||
"Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out.",
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
class GptV4(Base):
|
||||
def __init__(self, key, model_name="gpt-4-vision-preview"):
|
||||
def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese"):
|
||||
self.client = OpenAI(api_key=key)
|
||||
self.model_name = model_name
|
||||
self.lang = lang
|
||||
|
||||
def describe(self, image, max_tokens=300):
|
||||
b64 = self.image2base64(image)
|
||||
@ -76,18 +83,40 @@ class GptV4(Base):
|
||||
|
||||
|
||||
class QWenCV(Base):
|
||||
def __init__(self, key, model_name="qwen-vl-chat-v1"):
|
||||
def __init__(self, key, model_name="qwen-vl-chat-v1", lang="Chinese"):
|
||||
import dashscope
|
||||
dashscope.api_key = key
|
||||
self.model_name = model_name
|
||||
self.lang = lang
|
||||
|
||||
def prompt(self, binary):
|
||||
# stupid as hell
|
||||
tmp_dir = get_project_base_directory("tmp")
|
||||
if not os.path.exists(tmp_dir): os.mkdir(tmp_dir)
|
||||
path = os.path.join(tmp_dir, "%s.jpg"%get_uuid())
|
||||
Image.open(io.BytesIO(binary)).save(path)
|
||||
return [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"image": f"file://{path}"
|
||||
},
|
||||
{
|
||||
"text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。" if self.lang.lower() == "chinese" else \
|
||||
"Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out.",
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
def describe(self, image, max_tokens=300):
|
||||
from http import HTTPStatus
|
||||
from dashscope import MultiModalConversation
|
||||
response = MultiModalConversation.call(model=self.model_name,
|
||||
messages=self.prompt(self.image2base64(image)))
|
||||
messages=self.prompt(image))
|
||||
if response.status_code == HTTPStatus.OK:
|
||||
return response.output.choices[0]['message']['content'], response.usage.output_tokens
|
||||
return response.output.choices[0]['message']['content'][0]["text"], response.usage.output_tokens
|
||||
return response.message, 0
|
||||
|
||||
|
||||
@ -95,9 +124,10 @@ from zhipuai import ZhipuAI
|
||||
|
||||
|
||||
class Zhipu4V(Base):
|
||||
def __init__(self, key, model_name="glm-4v"):
|
||||
def __init__(self, key, model_name="glm-4v", lang="Chinese"):
|
||||
self.client = ZhipuAI(api_key=key)
|
||||
self.model_name = model_name
|
||||
self.lang = lang
|
||||
|
||||
def describe(self, image, max_tokens=1024):
|
||||
b64 = self.image2base64(image)
|
||||
|
||||
Reference in New Issue
Block a user