mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Feat: Qwen-VL series supports video parsing (#10676)
### What problem does this PR solve? Qwen-VL series supports video parsing. #10617. ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -13,13 +13,16 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import tempfile
|
||||||
import logging
|
import logging
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
import requests
|
import requests
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
@ -171,6 +174,7 @@ class GptV4(Base):
|
|||||||
def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1", **kwargs):
|
def __init__(self, key, model_name="gpt-4-vision-preview", lang="Chinese", base_url="https://api.openai.com/v1", **kwargs):
|
||||||
if not base_url:
|
if not base_url:
|
||||||
base_url = "https://api.openai.com/v1"
|
base_url = "https://api.openai.com/v1"
|
||||||
|
self.api_key = key
|
||||||
self.client = OpenAI(api_key=key, base_url=base_url)
|
self.client = OpenAI(api_key=key, base_url=base_url)
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
@ -224,6 +228,61 @@ class QWenCV(GptV4):
|
|||||||
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||||
super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
|
super().__init__(key, model_name, lang=lang, base_url=base_url, **kwargs)
|
||||||
|
|
||||||
|
def chat(self, system, history, gen_conf, images=[], video_bytes=None, filename=""):
|
||||||
|
if video_bytes:
|
||||||
|
try:
|
||||||
|
summary, summary_num_tokens = self._process_video(video_bytes, filename)
|
||||||
|
return summary, summary_num_tokens
|
||||||
|
except Exception as e:
|
||||||
|
return "**ERROR**: " + str(e), 0
|
||||||
|
|
||||||
|
return "**ERROR**: Method chat not supported yet.", 0
|
||||||
|
|
||||||
|
def _process_video(self, video_bytes, filename):
|
||||||
|
from dashscope import MultiModalConversation
|
||||||
|
|
||||||
|
video_suffix = Path(filename).suffix or ".mp4"
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, suffix=video_suffix) as tmp:
|
||||||
|
tmp.write(video_bytes)
|
||||||
|
tmp_path = tmp.name
|
||||||
|
|
||||||
|
video_path = f"file://{tmp_path}"
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"video": video_path,
|
||||||
|
"fps": 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "Please summarize this video in proper sentences.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def call_api():
|
||||||
|
response = MultiModalConversation.call(
|
||||||
|
api_key=self.api_key,
|
||||||
|
model=self.model_name,
|
||||||
|
messages=messages,
|
||||||
|
)
|
||||||
|
summary = response["output"]["choices"][0]["message"].content[0]["text"]
|
||||||
|
return summary, num_tokens_from_string(summary)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return call_api()
|
||||||
|
except Exception as e1:
|
||||||
|
import dashscope
|
||||||
|
|
||||||
|
dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
|
||||||
|
try:
|
||||||
|
return call_api()
|
||||||
|
except Exception as e2:
|
||||||
|
raise RuntimeError(f"Both default and intl endpoint failed.\nFirst error: {e1}\nSecond error: {e2}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class HunyuanCV(GptV4):
|
class HunyuanCV(GptV4):
|
||||||
_FACTORY_NAME = "Tencent Hunyuan"
|
_FACTORY_NAME = "Tencent Hunyuan"
|
||||||
@ -616,8 +675,6 @@ class GeminiCV(Base):
|
|||||||
def _process_video(self, video_bytes, filename):
|
def _process_video(self, video_bytes, filename):
|
||||||
from google import genai
|
from google import genai
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
video_size_mb = len(video_bytes) / (1024 * 1024)
|
video_size_mb = len(video_bytes) / (1024 * 1024)
|
||||||
client = genai.Client(api_key=self.api_key)
|
client = genai.Client(api_key=self.api_key)
|
||||||
|
|||||||
Reference in New Issue
Block a user