mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
fix plainPdf bugs (#152)
This commit is contained in:
@ -14,9 +14,6 @@
|
||||
import copy
|
||||
import time
|
||||
import os
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
from .operators import *
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
@ -24,7 +21,6 @@ import onnxruntime as ort
|
||||
from .postprocess import build_post_process
|
||||
from rag.settings import cron_logger
|
||||
|
||||
|
||||
def transform(data, ops=None):
|
||||
""" transform """
|
||||
if ops is None:
|
||||
@ -82,7 +78,7 @@ class TextRecognizer(object):
|
||||
self.rec_batch_num = 16
|
||||
postprocess_params = {
|
||||
'name': 'CTCLabelDecode',
|
||||
"character_dict_path": os.path.join(os.path.dirname(os.path.realpath(__file__)), "ocr.res"),
|
||||
"character_dict_path": os.path.join(model_dir, "ocr.res"),
|
||||
"use_space_char": True
|
||||
}
|
||||
self.postprocess_op = build_post_process(postprocess_params)
|
||||
|
||||
@ -16,6 +16,7 @@ import re
|
||||
from collections import Counter
|
||||
|
||||
import numpy as np
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
from rag.nlp import huqie
|
||||
@ -33,7 +34,8 @@ class TableStructureRecognizer(Recognizer):
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.labels, "tsr",os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
|
||||
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc")
|
||||
super().__init__(self.labels, "tsr", model_dir)#os.path.join(get_project_base_directory(), "rag/res/deepdoc/"))
|
||||
|
||||
def __call__(self, images, thr=0.2):
|
||||
tbls = super().__call__(images, thr)
|
||||
|
||||
Reference in New Issue
Block a user