mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
READEME refined (#156)
This commit is contained in:
@ -42,7 +42,9 @@ class Pdf(PdfParser):
|
||||
self._text_merge()
|
||||
callback(0.67, "Text merging finished")
|
||||
tbls = self._extract_table_figure(True, zoomin, True, True)
|
||||
self._naive_vertical_merge()
|
||||
#self._naive_vertical_merge()
|
||||
self._concat_downward()
|
||||
#self._filter_forpages()
|
||||
|
||||
cron_logger.info("paddle layouts:".format(
|
||||
(timer() - start) / (self.total_page + 0.1)))
|
||||
@ -79,7 +81,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
|
||||
elif re.search(r"\.pdf$", filename, re.IGNORECASE):
|
||||
pdf_parser = Pdf(
|
||||
) if parser_config["layout_recognize"] else PlainParser()
|
||||
) if parser_config.get("layout_recognize", True) else PlainParser()
|
||||
sections, tbls = pdf_parser(filename if not binary else binary,
|
||||
from_page=from_page, to_page=to_page, callback=callback)
|
||||
res = tokenize_table(tbls, doc, eng)
|
||||
|
||||
@ -7,7 +7,6 @@ from elasticsearch_dsl import Q, Search
|
||||
from typing import List, Optional, Dict, Union
|
||||
from dataclasses import dataclass
|
||||
|
||||
from api.settings import chat_logger
|
||||
from rag.settings import es_logger
|
||||
from rag.utils import rmSpace
|
||||
from rag.nlp import huqie, query
|
||||
@ -365,6 +364,7 @@ class Dealer:
|
||||
return ranks
|
||||
|
||||
def sql_retrieval(self, sql, fetch_size=128, format="json"):
|
||||
from api.settings import chat_logger
|
||||
sql = re.sub(r"[ ]+", " ", sql)
|
||||
sql = sql.replace("%", "")
|
||||
es_logger.info(f"Get es sql: {sql}")
|
||||
|
||||
Reference in New Issue
Block a user