add redis to accelerate access of minio (#482)

### What problem does this PR solve?

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
KevinHuSh
2024-04-22 14:11:09 +08:00
committed by GitHub
parent fc87c20bd8
commit b8e58fe27a
10 changed files with 101 additions and 16 deletions

View File

@ -66,6 +66,8 @@ class Docx(DocxParser):
class Pdf(PdfParser):
def __call__(self, filename, binary=None, from_page=0,
to_page=100000, zoomin=3, callback=None):
from timeit import default_timer as timer
start = timer()
callback(msg="OCR is running...")
self.__images__(
filename if not binary else binary,
@ -75,8 +77,8 @@ class Pdf(PdfParser):
callback
)
callback(msg="OCR finished")
cron_logger.info("OCR: {}".format(timer() - start))
from timeit import default_timer as timer
start = timer()
self._layouts_rec(zoomin)
callback(0.63, "Layout analysis finished.")
@ -90,7 +92,7 @@ class Pdf(PdfParser):
self._concat_downward()
#self._filter_forpages()
cron_logger.info("paddle layouts:".format(
cron_logger.info("paddle layouts: {}".format(
(timer() - start) / (self.total_page + 0.1)))
return [(b["text"], self._line_tag(b, zoomin))
for b in self.boxes], tbls