Fix some bugs in text2sql.(#4279)(#4281) (#4280)

Fix some bugs in text2sql.(#4279)(#4281)

### What problem does this PR solve?
- The incorrect results in parsing CSV files of the QA knowledge base in
the text2sql scenario. Process CSV files using the csv library. Decouple
CSV parsing from TXT parsing
- Most llm return results in markdown format ```sql query ```, Fix
execution error caused by LLM output SQLmarkdown format.### Type of
change
- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
TeslaZY
2024-12-30 10:32:19 +08:00
committed by GitHub
parent 8cdf10148d
commit dd13a5d05c
2 changed files with 42 additions and 14 deletions

View File

@ -65,20 +65,16 @@ class ExeSQL(ComponentBase, ABC):
self._loop += 1
ans = self.get_input()
ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
if self._param.db_type == 'mssql':
# improve the information extraction, most llm return results in markdown format ```sql query ```
match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
if match:
ans = match.group(1) # Query content
print(ans)
else:
print("no markdown")
ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
# improve the information extraction, most llm return results in markdown format ```sql query ```
match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
if match:
ans = match.group(1) # Query content
print(ans)
else:
ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE)
print("no markdown")
ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE)
ans = re.sub(r';[^;]*$', r';', ans)
if not ans: