Refine resume parts and fix bugs in retrival using sql (#66)

2026-01-31 23:55:06 +08:00 · 2024-02-19 19:22:17 +08:00
parent 452020d33a
commit a8294f2168
29 changed files with 302 additions and 158 deletions
--- a/rag/app/book.py
+++ b/rag/app/book.py
@ -39,6 +39,11 @@ class Pdf(HuParser):


 def chunk(filename, binary=None, from_page=0, to_page=100000, callback=None, **kwargs):
+    """
+        Supported file formats are docx, pdf, txt.
+        Since a book is long and not all the parts are useful, if it's a PDF,
+        please setup the page ranges for every book in order eliminate negative effects and save elapsed computing time.
+    """
    doc = {
        "docnm_kwd": filename,
        "title_tks": huqie.qie(re.sub(r"\.[a-zA-Z]+$", "", filename))