From d6a941ebf57a1a0a422d4b2c4a402db1098f3586 Mon Sep 17 00:00:00 2001 From: HaiyangP <46739135+HaiyangPeng@users.noreply.github.com> Date: Tue, 24 Jun 2025 18:18:30 +0800 Subject: [PATCH] Fix the bug of long type value overflow (#8313) ### What problem does this PR solve? This PR will fix the #8271 by extending int type to float type when there is any value out of long type range in a column. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/app/table.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/rag/app/table.py b/rag/app/table.py index 450cd6280..90b4c2849 100644 --- a/rag/app/table.py +++ b/rag/app/table.py @@ -92,11 +92,15 @@ def column_data_type(arr): arr = list(arr) counts = {"int": 0, "float": 0, "text": 0, "datetime": 0, "bool": 0} trans = {t: f for f, t in [(int, "int"), (float, "float"), (trans_datatime, "datetime"), (trans_bool, "bool"), (str, "text")]} + float_flag = False for a in arr: if a is None: continue - if re.match(r"[+-]?[0-9]{,19}(\.0+)?$", str(a).replace("%%", "")): + if re.match(r"[+-]?[0-9]+$", str(a).replace("%%", "")): counts["int"] += 1 + if int(str(a)) > 2**63 - 1: + float_flag = True + break elif re.match(r"[+-]?[0-9.]{,19}$", str(a).replace("%%", "")): counts["float"] += 1 elif re.match(r"(true|yes|是|\*|✓|✔|☑|✅|√|false|no|否|⍻|×)$", str(a), flags=re.IGNORECASE): @@ -105,8 +109,11 @@ def column_data_type(arr): counts["datetime"] += 1 else: counts["text"] += 1 - counts = sorted(counts.items(), key=lambda x: x[1] * -1) - ty = counts[0][0] + if float_flag: + ty = "float" + else: + counts = sorted(counts.items(), key=lambda x: x[1] * -1) + ty = counts[0][0] for i in range(len(arr)): if arr[i] is None: continue