mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-19 03:56:42 +08:00
Fix error and format issue (#11975)
### What problem does this PR solve? 1. Fix error of book chunking. 2. Fix format issues. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] Refactoring --------- Signed-off-by: Jin Hai <haijin.chn@gmail.com>
This commit is contained in:
@ -105,10 +105,10 @@ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
apt update && \
|
||||
arch="$(uname -m)"; \
|
||||
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
|
||||
# ARM64 (macOS/Apple Silicon or Linux aarch64)
|
||||
# ARM64 (macOS/Apple Silicon or Linux aarch64) \
|
||||
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
|
||||
else \
|
||||
# x86_64 or others
|
||||
# x86_64 or others \
|
||||
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
|
||||
fi || \
|
||||
{ echo "Failed to install ODBC driver"; exit 1; }
|
||||
|
||||
@ -13,6 +13,3 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# from beartype.claw import beartype_this_package
|
||||
# beartype_this_package()
|
||||
|
||||
@ -27,7 +27,6 @@ import pandas as pd
|
||||
from agent import settings
|
||||
from common.connection_utils import timeout
|
||||
|
||||
|
||||
_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
|
||||
_DEPRECATED_PARAMS = "_deprecated_params"
|
||||
_USER_FEEDED_PARAMS = "_user_feeded_params"
|
||||
@ -253,96 +252,65 @@ class ComponentParamBase(ABC):
|
||||
self._validate_param(attr, validation_json)
|
||||
|
||||
@staticmethod
|
||||
def check_string(param, descr):
|
||||
def check_string(param, description):
|
||||
if type(param).__name__ not in ["str"]:
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be string type".format(param)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be string type".format(param))
|
||||
|
||||
@staticmethod
|
||||
def check_empty(param, descr):
|
||||
def check_empty(param, description):
|
||||
if not param:
|
||||
raise ValueError(
|
||||
descr + " does not support empty value."
|
||||
)
|
||||
raise ValueError(description + " does not support empty value.")
|
||||
|
||||
@staticmethod
|
||||
def check_positive_integer(param, descr):
|
||||
def check_positive_integer(param, description):
|
||||
if type(param).__name__ not in ["int", "long"] or param <= 0:
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be positive integer".format(param)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be positive integer".format(param))
|
||||
|
||||
@staticmethod
|
||||
def check_positive_number(param, descr):
|
||||
def check_positive_number(param, description):
|
||||
if type(param).__name__ not in ["float", "int", "long"] or param <= 0:
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be positive numeric".format(param)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be positive numeric".format(param))
|
||||
|
||||
@staticmethod
|
||||
def check_nonnegative_number(param, descr):
|
||||
def check_nonnegative_number(param, description):
|
||||
if type(param).__name__ not in ["float", "int", "long"] or param < 0:
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} not supported, should be non-negative numeric".format(param)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be non-negative numeric".format(param))
|
||||
|
||||
@staticmethod
|
||||
def check_decimal_float(param, descr):
|
||||
def check_decimal_float(param, description):
|
||||
if type(param).__name__ not in ["float", "int"] or param < 0 or param > 1:
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} not supported, should be a float number in range [0, 1]".format(
|
||||
param
|
||||
)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be a float number in range [0, 1]".format(param))
|
||||
|
||||
@staticmethod
|
||||
def check_boolean(param, descr):
|
||||
def check_boolean(param, description):
|
||||
if type(param).__name__ != "bool":
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be bool type".format(param)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be bool type".format(param))
|
||||
|
||||
@staticmethod
|
||||
def check_open_unit_interval(param, descr):
|
||||
def check_open_unit_interval(param, description):
|
||||
if type(param).__name__ not in ["float"] or param <= 0 or param >= 1:
|
||||
raise ValueError(
|
||||
descr + " should be a numeric number between 0 and 1 exclusively"
|
||||
)
|
||||
raise ValueError(description + " should be a numeric number between 0 and 1 exclusively")
|
||||
|
||||
@staticmethod
|
||||
def check_valid_value(param, descr, valid_values):
|
||||
def check_valid_value(param, description, valid_values):
|
||||
if param not in valid_values:
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} is not supported, it should be in {}".format(param, valid_values)
|
||||
)
|
||||
raise ValueError(description + " {} is not supported, it should be in {}".format(param, valid_values))
|
||||
|
||||
@staticmethod
|
||||
def check_defined_type(param, descr, types):
|
||||
def check_defined_type(param, description, types):
|
||||
if type(param).__name__ not in types:
|
||||
raise ValueError(
|
||||
descr + " {} not supported, should be one of {}".format(param, types)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be one of {}".format(param, types))
|
||||
|
||||
@staticmethod
|
||||
def check_and_change_lower(param, valid_list, descr=""):
|
||||
def check_and_change_lower(param, valid_list, description=""):
|
||||
if type(param).__name__ != "str":
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} not supported, should be one of {}".format(param, valid_list)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be one of {}".format(param, valid_list))
|
||||
|
||||
lower_param = param.lower()
|
||||
if lower_param in valid_list:
|
||||
return lower_param
|
||||
else:
|
||||
raise ValueError(
|
||||
descr
|
||||
+ " {} not supported, should be one of {}".format(param, valid_list)
|
||||
)
|
||||
raise ValueError(description + " {} not supported, should be one of {}".format(param, valid_list))
|
||||
|
||||
@staticmethod
|
||||
def _greater_equal_than(value, limit):
|
||||
@ -374,16 +342,16 @@ class ComponentParamBase(ABC):
|
||||
def _not_in(value, wrong_value_list):
|
||||
return value not in wrong_value_list
|
||||
|
||||
def _warn_deprecated_param(self, param_name, descr):
|
||||
def _warn_deprecated_param(self, param_name, description):
|
||||
if self._deprecated_params_set.get(param_name):
|
||||
logging.warning(
|
||||
f"{descr} {param_name} is deprecated and ignored in this version."
|
||||
f"{description} {param_name} is deprecated and ignored in this version."
|
||||
)
|
||||
|
||||
def _warn_to_deprecate_param(self, param_name, descr, new_param):
|
||||
def _warn_to_deprecate_param(self, param_name, description, new_param):
|
||||
if self._deprecated_params_set.get(param_name):
|
||||
logging.warning(
|
||||
f"{descr} {param_name} will be deprecated in future release; "
|
||||
f"{description} {param_name} will be deprecated in future release; "
|
||||
f"please use {new_param} instead."
|
||||
)
|
||||
return True
|
||||
@ -532,7 +500,7 @@ class ComponentBase(ABC):
|
||||
res[exp] = {
|
||||
"name": (self._canvas.get_component_name(cpn_id) + f"@{var_nm}") if cpn_id else exp,
|
||||
"value": self._canvas.get_variable_value(exp),
|
||||
"_retrival": self._canvas.get_variable_value(f"{cpn_id}@_references") if cpn_id else None,
|
||||
"_retrieval": self._canvas.get_variable_value(f"{cpn_id}@_references") if cpn_id else None,
|
||||
"_cpn_id": cpn_id
|
||||
}
|
||||
return res
|
||||
@ -583,6 +551,7 @@ class ComponentBase(ABC):
|
||||
for n, v in kv.items():
|
||||
def repl(_match, val=v):
|
||||
return str(val) if val is not None else ""
|
||||
|
||||
content = re.sub(
|
||||
r"\{%s\}" % re.escape(n),
|
||||
repl,
|
||||
|
||||
@ -75,7 +75,7 @@
|
||||
},
|
||||
"history": [],
|
||||
"path": [],
|
||||
"retrival": {"chunks": [], "doc_aggs": []},
|
||||
"retrieval": {"chunks": [], "doc_aggs": []},
|
||||
"globals": {
|
||||
"sys.query": "",
|
||||
"sys.user_id": "",
|
||||
|
||||
@ -82,7 +82,7 @@
|
||||
},
|
||||
"history": [],
|
||||
"path": [],
|
||||
"retrival": {"chunks": [], "doc_aggs": []},
|
||||
"retrieval": {"chunks": [], "doc_aggs": []},
|
||||
"globals": {
|
||||
"sys.query": "",
|
||||
"sys.user_id": "",
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
},
|
||||
"history": [],
|
||||
"path": [],
|
||||
"retrival": {"chunks": [], "doc_aggs": []},
|
||||
"retrieval": {"chunks": [], "doc_aggs": []},
|
||||
"globals": {
|
||||
"sys.query": "",
|
||||
"sys.user_id": "",
|
||||
|
||||
@ -85,7 +85,7 @@
|
||||
},
|
||||
"history": [],
|
||||
"path": [],
|
||||
"retrival": {"chunks": [], "doc_aggs": []},
|
||||
"retrieval": {"chunks": [], "doc_aggs": []},
|
||||
"globals": {
|
||||
"sys.query": "",
|
||||
"sys.user_id": "",
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
},
|
||||
"history": [],
|
||||
"path": [],
|
||||
"retrival": {"chunks": [], "doc_aggs": []},
|
||||
"retrieval": {"chunks": [], "doc_aggs": []},
|
||||
"globals": {
|
||||
"sys.query": "",
|
||||
"sys.user_id": "",
|
||||
|
||||
@ -166,9 +166,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
||||
sections = [s.split("@") for s, _ in sections]
|
||||
sections = [(pr[0], "@" + pr[1]) if len(pr) == 2 else (pr[0], '') for pr in sections ]
|
||||
chunks = naive_merge(
|
||||
sections, kwargs.get(
|
||||
"chunk_token_num", 256), kwargs.get(
|
||||
"delimer", "\n。;!?"))
|
||||
sections,
|
||||
parser_config.get("chunk_token_num", 256),
|
||||
parser_config.get("delimiter", "\n。;!?")
|
||||
)
|
||||
|
||||
# is it English
|
||||
# is_english(random_choices([t for t, _ in sections], k=218))
|
||||
|
||||
Reference in New Issue
Block a user