init README of deepdoc, add picture processer. (#71)

* init README of deepdoc, add picture processer.

* add resume parsing
This commit is contained in:
KevinHuSh
2024-02-23 18:28:12 +08:00
committed by GitHub
parent d32322c081
commit 7fd1eca582
42 changed files with 58319 additions and 350 deletions

View File

@ -13,12 +13,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import io
from abc import ABC
from PIL import Image
from openai import OpenAI
import os
import base64
from io import BytesIO
from api.utils import get_uuid
from api.utils.file_utils import get_project_base_directory
class Base(ABC):
def __init__(self, key, model_name):
@ -44,25 +50,26 @@ class Base(ABC):
{
"role": "user",
"content": [
{
"type": "text",
"text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等。",
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{b64}"
},
},
{
"text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。" if self.lang.lower() == "chinese" else \
"Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out.",
},
],
}
]
class GptV4(Base):
def __init__(self, key, model_name="gpt-4-vision-preview"):
def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese"):
self.client = OpenAI(api_key=key)
self.model_name = model_name
self.lang = lang
def describe(self, image, max_tokens=300):
b64 = self.image2base64(image)
@ -76,18 +83,40 @@ class GptV4(Base):
class QWenCV(Base):
def __init__(self, key, model_name="qwen-vl-chat-v1"):
def __init__(self, key, model_name="qwen-vl-chat-v1", lang="Chinese"):
import dashscope
dashscope.api_key = key
self.model_name = model_name
self.lang = lang
def prompt(self, binary):
# stupid as hell
tmp_dir = get_project_base_directory("tmp")
if not os.path.exists(tmp_dir): os.mkdir(tmp_dir)
path = os.path.join(tmp_dir, "%s.jpg"%get_uuid())
Image.open(io.BytesIO(binary)).save(path)
return [
{
"role": "user",
"content": [
{
"image": f"file://{path}"
},
{
"text": "请用中文详细描述一下图中的内容,比如时间,地点,人物,事情,人物心情等,如果有数据请提取出数据。" if self.lang.lower() == "chinese" else \
"Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out.",
},
],
}
]
def describe(self, image, max_tokens=300):
from http import HTTPStatus
from dashscope import MultiModalConversation
response = MultiModalConversation.call(model=self.model_name,
messages=self.prompt(self.image2base64(image)))
messages=self.prompt(image))
if response.status_code == HTTPStatus.OK:
return response.output.choices[0]['message']['content'], response.usage.output_tokens
return response.output.choices[0]['message']['content'][0]["text"], response.usage.output_tokens
return response.message, 0
@ -95,9 +124,10 @@ from zhipuai import ZhipuAI
class Zhipu4V(Base):
def __init__(self, key, model_name="glm-4v"):
def __init__(self, key, model_name="glm-4v", lang="Chinese"):
self.client = ZhipuAI(api_key=key)
self.model_name = model_name
self.lang = lang
def describe(self, image, max_tokens=1024):
b64 = self.image2base64(image)