mirror of
https://github.com/infiniflow/ragflow.git
synced 2025-12-08 20:42:30 +08:00
Fix: tree merge (#10691)
### What problem does this PR solve? Fix: Fix tree merge, solved #10636 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
@ -459,12 +459,10 @@ def tree_merge(bull, sections, depth):
|
|||||||
return len(BULLET_PATTERN[bull])+1, text
|
return len(BULLET_PATTERN[bull])+1, text
|
||||||
else:
|
else:
|
||||||
return len(BULLET_PATTERN[bull])+2, text
|
return len(BULLET_PATTERN[bull])+2, text
|
||||||
|
|
||||||
level_set = set()
|
level_set = set()
|
||||||
lines = []
|
lines = []
|
||||||
for section in sections:
|
for section in sections:
|
||||||
level, text = get_level(bull, section)
|
level, text = get_level(bull, section)
|
||||||
|
|
||||||
if not text.strip("\n"):
|
if not text.strip("\n"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -797,8 +795,8 @@ class Node:
|
|||||||
def __init__(self, level, depth=-1, texts=None):
|
def __init__(self, level, depth=-1, texts=None):
|
||||||
self.level = level
|
self.level = level
|
||||||
self.depth = depth
|
self.depth = depth
|
||||||
self.texts = texts if texts is not None else [] # 存放内容
|
self.texts = texts or []
|
||||||
self.children = [] # 子节点
|
self.children = []
|
||||||
|
|
||||||
def add_child(self, child_node):
|
def add_child(self, child_node):
|
||||||
self.children.append(child_node)
|
self.children.append(child_node)
|
||||||
@ -826,34 +824,50 @@ class Node:
|
|||||||
|
|
||||||
def build_tree(self, lines):
|
def build_tree(self, lines):
|
||||||
stack = [self]
|
stack = [self]
|
||||||
for line in lines:
|
for level, text in lines:
|
||||||
level, text = line
|
if self.depth != -1 and level > self.depth:
|
||||||
node = Node(level=level, texts=[text])
|
# Beyond target depth: merge content into the current leaf instead of creating deeper nodes
|
||||||
|
stack[-1].add_text(text)
|
||||||
|
continue
|
||||||
|
|
||||||
if level <= self.depth or self.depth == -1:
|
# Move up until we find the proper parent whose level is strictly smaller than current
|
||||||
while stack and level <= stack[-1].get_level():
|
while len(stack) > 1 and level <= stack[-1].get_level():
|
||||||
stack.pop()
|
stack.pop()
|
||||||
|
|
||||||
|
node = Node(level=level, texts=[text])
|
||||||
|
# Attach as child of current parent and descend
|
||||||
stack[-1].add_child(node)
|
stack[-1].add_child(node)
|
||||||
stack.append(node)
|
stack.append(node)
|
||||||
else:
|
|
||||||
stack[-1].add_text(text)
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def get_tree(self):
|
def get_tree(self):
|
||||||
tree_list = []
|
tree_list = []
|
||||||
self._dfs(self, tree_list, 0, [])
|
self._dfs(self, tree_list, [])
|
||||||
return tree_list
|
return tree_list
|
||||||
|
|
||||||
def _dfs(self, node, tree_list, current_depth, titles):
|
def _dfs(self, node, tree_list, titles):
|
||||||
|
level = node.get_level()
|
||||||
|
texts = node.get_texts()
|
||||||
|
child = node.get_children()
|
||||||
|
|
||||||
if node.get_texts():
|
if level == 0 and texts:
|
||||||
if 0 < node.get_level() < self.depth:
|
tree_list.append("\n".join(titles+texts))
|
||||||
titles.extend(node.get_texts())
|
|
||||||
|
# Titles within configured depth are accumulated into the current path
|
||||||
|
if 1 <= level <= self.depth:
|
||||||
|
path_titles = titles + texts
|
||||||
else:
|
else:
|
||||||
combined_text = ["\n".join(titles + node.get_texts())]
|
path_titles = titles
|
||||||
tree_list.append(combined_text)
|
|
||||||
|
|
||||||
|
# Body outside the depth limit becomes its own chunk under the current title path
|
||||||
|
if level > self.depth and texts:
|
||||||
|
tree_list.append("\n".join(path_titles + texts))
|
||||||
|
|
||||||
for child in node.get_children():
|
# A leaf title within depth emits its title path as a chunk (header-only section)
|
||||||
self._dfs(child, tree_list, current_depth + 1, titles.copy())
|
elif not child and (1 <= level <= self.depth):
|
||||||
|
tree_list.append("\n".join(path_titles))
|
||||||
|
|
||||||
|
# Recurse into children with the updated title path
|
||||||
|
for c in child:
|
||||||
|
self._dfs(c, tree_list, path_titles)
|
||||||
Reference in New Issue
Block a user