Update progress info and start welcome info (#3768)

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

### Type of change

- [x] Refactoring

---------

Signed-off-by: jinhai <haijin.chn@gmail.com>
This commit is contained in:
Jin Hai
2024-11-30 18:48:06 +08:00
committed by GitHub
parent d00297a763
commit e079656473
11 changed files with 107 additions and 59 deletions

View File

@ -73,7 +73,7 @@ class Pdf(PdfParser):
def __call__(self, filename, binary=None, from_page=0,
to_page=100000, zoomin=3, callback=None):
start = timer()
callback(msg="OCR is running...")
callback(msg="OCR started")
self.__images__(
filename if not binary else binary,
zoomin,
@ -81,15 +81,19 @@ class Pdf(PdfParser):
to_page,
callback
)
callback(msg="OCR finished")
logging.debug("OCR({}~{}): {}".format(from_page, to_page, timer() - start))
callback(msg="OCR finished ({:.2f}s)".format(timer() - start))
logging.debug("OCR({}~{}): {:.2f}s".format(from_page, to_page, timer() - start))
start = timer()
self._layouts_rec(zoomin, drop=False)
callback(0.63, "Layout analysis finished.")
callback(0.63, "Layout analysis ({:.2f}s)".format(timer() - start))
start = timer()
self._table_transformer_job(zoomin)
callback(0.65, "Table analysis finished.")
callback(0.65, "Table analysis ({:.2f}s)".format(timer() - start))
start = timer()
self._text_merge()
callback(0.67, "Text merging finished")
callback(0.67, "Text merged ({:.2f}s)".format(timer() - start))
tbls = self._extract_table_figure(True, zoomin, True, True)
#self._naive_vertical_merge()
# self._concat_downward()
@ -226,7 +230,7 @@ class Docx(DocxParser):
sum_question = '\n'.join(question_stack)
if sum_question:
qai_list.append((sum_question, last_answer, last_image))
tbls = []
for tb in self.doc.tables:
html= "<table>"