diff --git a/rag/app/manual.py b/rag/app/manual.py index 363c6e9e7..54a05f192 100644 --- a/rag/app/manual.py +++ b/rag/app/manual.py @@ -219,31 +219,26 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, ) def _normalize_section(section): - # Pad/normalize to (txt, layout, positions) - if not isinstance(section, (list, tuple)): - section = (section, "", []) - elif len(section) == 1: + # pad section to length 3: (txt, sec_id, poss) + if len(section) == 1: section = (section[0], "", []) elif len(section) == 2: section = (section[0], "", section[1]) - else: - section = (section[0], section[1], section[2]) + elif len(section) != 3: + raise ValueError(f"Unexpected section length: {len(section)} (value={section!r})") txt, layoutno, poss = section if isinstance(poss, str): poss = pdf_parser.extract_positions(poss) if poss: - first = poss[0] # tuple: ([pn], x1, x2, y1, y2) - pn = first[0] + first = poss[0] # tuple: ([pn], x1, x2, y1, y2) + pn = first[0] if isinstance(pn, list) and pn: - pn = pn[0] # [pn] -> pn - poss[0] = (pn, *first[1:]) - if not poss: - poss = [] + pn = pn[0] # [pn] -> pn + poss[0] = (pn, *first[1:]) return (txt, layoutno, poss) - sections = [_normalize_section(sec) for sec in sections] if not sections and not tbls: diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 8a2743866..648bbe8ce 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -146,7 +146,6 @@ class Base(ABC): request_kwargs["stop"] = stop response = await self.async_client.chat.completions.create(**request_kwargs) - async for resp in response: if not resp.choices: continue @@ -161,7 +160,6 @@ class Base(ABC): else: reasoning_start = False ans = resp.choices[0].delta.content - tol = total_token_count_from_response(resp) if not tol: tol = num_tokens_from_string(resp.choices[0].delta.content) @@ -193,7 +191,7 @@ class Base(ABC): except Exception as e: e = await self._exceptions_async(e, attempt) if e: - yield e + yield e yield total_tokens return