mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-02-10 21:05:11 +08:00
Refactor: improve ppt shape order logic (#13054)
### What problem does this PR solve? improve ppt shape order logic ### Type of change - [x] Refactoring
This commit is contained in:
@ -22,6 +22,16 @@ from pptx import Presentation
|
|||||||
class RAGFlowPptParser:
|
class RAGFlowPptParser:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self._shape_cache = {}
|
||||||
|
|
||||||
|
def __sort_shapes(self, shapes):
|
||||||
|
cache_key = id(shapes)
|
||||||
|
if cache_key not in self._shape_cache:
|
||||||
|
self._shape_cache[cache_key] = sorted(
|
||||||
|
shapes,
|
||||||
|
key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left if x.left is not None else 0)
|
||||||
|
)
|
||||||
|
return self._shape_cache[cache_key]
|
||||||
|
|
||||||
def __get_bulleted_text(self, paragraph):
|
def __get_bulleted_text(self, paragraph):
|
||||||
is_bulleted = bool(paragraph._p.xpath("./a:pPr/a:buChar")) or bool(paragraph._p.xpath("./a:pPr/a:buAutoNum")) or bool(paragraph._p.xpath("./a:pPr/a:buBlip"))
|
is_bulleted = bool(paragraph._p.xpath("./a:pPr/a:buChar")) or bool(paragraph._p.xpath("./a:pPr/a:buAutoNum")) or bool(paragraph._p.xpath("./a:pPr/a:buBlip"))
|
||||||
@ -62,7 +72,7 @@ class RAGFlowPptParser:
|
|||||||
# Handle group shape
|
# Handle group shape
|
||||||
if shape_type == 6:
|
if shape_type == 6:
|
||||||
texts = []
|
texts = []
|
||||||
for p in sorted(shape.shapes, key=lambda x: (x.top // 10, x.left)):
|
for p in self.__sort_shapes(shape.shapes):
|
||||||
t = self.__extract(p)
|
t = self.__extract(p)
|
||||||
if t:
|
if t:
|
||||||
texts.append(t)
|
texts.append(t)
|
||||||
@ -86,8 +96,7 @@ class RAGFlowPptParser:
|
|||||||
if i >= to_page:
|
if i >= to_page:
|
||||||
break
|
break
|
||||||
texts = []
|
texts = []
|
||||||
for shape in sorted(
|
for shape in self.__sort_shapes(slide.shapes):
|
||||||
slide.shapes, key=lambda x: ((x.top if x.top is not None else 0) // 10, x.left if x.left is not None else 0)):
|
|
||||||
txt = self.__extract(shape)
|
txt = self.__extract(shape)
|
||||||
if txt:
|
if txt:
|
||||||
texts.append(txt)
|
texts.append(txt)
|
||||||
|
|||||||
Reference in New Issue
Block a user