mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
fix bug about fetching knowledge graph (#3394)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -10,6 +10,8 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import re
|
||||
|
||||
from deepdoc.parser.utils import get_text
|
||||
from rag.nlp import num_tokens_from_string
|
||||
|
||||
@ -29,8 +31,6 @@ class RAGFlowTxtParser:
|
||||
def add_chunk(t):
|
||||
nonlocal cks, tk_nums, delimiter
|
||||
tnum = num_tokens_from_string(t)
|
||||
if tnum < 8:
|
||||
pos = ""
|
||||
if tk_nums[-1] > chunk_token_num:
|
||||
cks.append(t)
|
||||
tk_nums.append(tnum)
|
||||
@ -38,15 +38,19 @@ class RAGFlowTxtParser:
|
||||
cks[-1] += t
|
||||
tk_nums[-1] += tnum
|
||||
|
||||
s, e = 0, 1
|
||||
while e < len(txt):
|
||||
if txt[e] in delimiter:
|
||||
add_chunk(txt[s: e + 1])
|
||||
s = e + 1
|
||||
e = s + 1
|
||||
else:
|
||||
e += 1
|
||||
if s < e:
|
||||
add_chunk(txt[s: e + 1])
|
||||
dels = []
|
||||
s = 0
|
||||
for m in re.finditer(r"`([^`]+)`", delimiter, re.I):
|
||||
f, t = m.span()
|
||||
dels.append(m.group(1))
|
||||
dels.extend(list(delimiter[s: f]))
|
||||
s = t
|
||||
if s < len(delimiter):
|
||||
dels.extend(list(delimiter[s:]))
|
||||
dels = [re.escape(d) for d in delimiter if d]
|
||||
dels = [d for d in dels if d]
|
||||
dels = "|".join(dels)
|
||||
secs = re.split(r"(%s)" % dels, txt)
|
||||
for sec in secs: add_chunk(sec)
|
||||
|
||||
return [[c,""] for c in cks]
|
||||
return [[c, ""] for c in cks]
|
||||
|
||||
Reference in New Issue
Block a user