Fix: video file suffix (#10740)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2025-10-23 11:13:09 +08:00
committed by GitHub
parent 484c536f2e
commit f24d464a53
3 changed files with 4 additions and 3 deletions

View File

@ -173,7 +173,7 @@ def filename_type(filename):
if re.match(r".*\.(wav|flac|ape|alac|wavpack|wv|mp3|aac|ogg|vorbis|opus)$", filename):
return FileType.AURAL.value
if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4)$", filename):
if re.match(r".*\.(jpg|jpeg|png|tif|gif|pcx|tga|exif|fpx|svg|psd|cdr|pcd|dxf|ufo|eps|ai|raw|WMF|webp|avif|apng|icon|ico|mpg|mpeg|avi|rm|rmvb|mov|wmv|asf|dat|asx|wvx|mpe|mpa|mp4|avi|mkv)$", filename):
return FileType.VISUAL.value
return FileType.OTHER.value

View File

@ -29,7 +29,7 @@ from rag.utils import clean_markdown_block
ocr = OCR()
# Gemini supported MIME types
VIDEO_EXTS = [".mp4", ".mov", ".avi", ".flv", ".mpeg", ".mpg", ".webm", ".wmv", ".3gp", ".3gpp"]
VIDEO_EXTS = [".mp4", ".mov", ".avi", ".flv", ".mpeg", ".mpg", ".webm", ".wmv", ".3gp", ".3gpp", ".mkv"]
def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs):

View File

@ -144,7 +144,8 @@ class ParserParam(ProcessParamBase):
"video": {
"suffix":[
"mp4",
"av"
"avi",
"mkv"
],
"output_format": "json",
},