mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
add redis to accelerate access of minio (#482)
### What problem does this PR solve? ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
@ -66,6 +66,8 @@ class Docx(DocxParser):
|
||||
class Pdf(PdfParser):
|
||||
def __call__(self, filename, binary=None, from_page=0,
|
||||
to_page=100000, zoomin=3, callback=None):
|
||||
from timeit import default_timer as timer
|
||||
start = timer()
|
||||
callback(msg="OCR is running...")
|
||||
self.__images__(
|
||||
filename if not binary else binary,
|
||||
@ -75,8 +77,8 @@ class Pdf(PdfParser):
|
||||
callback
|
||||
)
|
||||
callback(msg="OCR finished")
|
||||
cron_logger.info("OCR: {}".format(timer() - start))
|
||||
|
||||
from timeit import default_timer as timer
|
||||
start = timer()
|
||||
self._layouts_rec(zoomin)
|
||||
callback(0.63, "Layout analysis finished.")
|
||||
@ -90,7 +92,7 @@ class Pdf(PdfParser):
|
||||
self._concat_downward()
|
||||
#self._filter_forpages()
|
||||
|
||||
cron_logger.info("paddle layouts:".format(
|
||||
cron_logger.info("paddle layouts: {}".format(
|
||||
(timer() - start) / (self.total_page + 0.1)))
|
||||
return [(b["text"], self._line_tag(b, zoomin))
|
||||
for b in self.boxes], tbls
|
||||
|
||||
Reference in New Issue
Block a user