From 71201f4446c405b08255ef6881b4fd2d4a8562ac Mon Sep 17 00:00:00 2001 From: Alexey Nagaev Date: Tue, 25 Mar 2025 14:48:02 +0300 Subject: [PATCH] Add new lines groupping algo --- DocxRenderer/src/logic/Page.cpp | 149 +++++++++++-------- DocxRenderer/src/logic/Page.h | 14 +- DocxRenderer/src/logic/elements/BaseItem.h | 18 +++ DocxRenderer/src/logic/elements/TextLine.cpp | 6 + DocxRenderer/src/logic/elements/TextLine.h | 3 - DocxRenderer/src/resources/Constants.h | 2 +- DocxRenderer/src/resources/utils.h | 10 ++ 7 files changed, 127 insertions(+), 75 deletions(-) diff --git a/DocxRenderer/src/logic/Page.cpp b/DocxRenderer/src/logic/Page.cpp index 709b9e2cd8..622a4dc09a 100644 --- a/DocxRenderer/src/logic/Page.cpp +++ b/DocxRenderer/src/logic/Page.cpp @@ -45,12 +45,6 @@ namespace NSDocxRenderer else m_oClipVectorGraphics = std::move(m_oCurrVectorGraphics); } - - // write in a new cont if new text command - else if (lType == c_nTextType) - { - m_oContBuilder.NullCurrCont(); - } } void CPage::Clear() @@ -746,6 +740,10 @@ namespace NSDocxRenderer SplitLines(); AnalyzeOverlapLines(); + for (auto& line : m_arTextLines) + if (line && line->m_arConts.empty()) + line = nullptr; + auto right = MoveNullptr(m_arTextLines.begin(), m_arTextLines.end()); m_arTextLines.erase(right, m_arTextLines.end()); @@ -1410,72 +1408,95 @@ namespace NSDocxRenderer void CPage::BuildTextLineGroups() { - struct Group { - double left{}; - double right{}; - double top{}; - double bot{}; - bool closed{false}; - }; + if (m_arTextLines.empty()) + return; - std::vector groups; + double curr_bot = std::numeric_limits::max(); + std::vector> bot_aligned_text_lines; - for (auto& line : m_arTextLines) + for (const auto& line : m_arTextLines) { - bool is_found = false; - bool is_create_new = false; - size_t insert_index = 0; - - for (size_t index = 0; index < groups.size(); ++index) + if (fabs(line->m_dBotWithMaxDescent - curr_bot) < 4 * c_dTHE_SAME_STRING_Y_PRECISION_MM) { - auto& group = groups[index]; - bool is_crossing_h = !((line->m_dRight <= group.left) || (line->m_dLeft >= group.right)); - bool is_crossing_v = !((line->m_dBotWithMaxDescent <= group.top) || (line->m_dTopWithMaxAscent >= group.bot)); - - if (!group.closed && is_crossing_h) - { - if (is_crossing_v) - { - groups[index].closed = true; - continue; - } - if (!is_found && !is_create_new) - { - is_found = true; - insert_index = index; - } - else - { - groups[insert_index].closed = true; - groups[index].closed = true; - is_create_new = true; - is_found = false; - } - } - } - if (is_found) - { - groups[insert_index].left = std::min(groups[insert_index].left, line->m_dLeft); - groups[insert_index].right = std::max(groups[insert_index].right, line->m_dRight); - groups[insert_index].bot = std::max(groups[insert_index].bot, line->m_dBot); - groups[insert_index].top = std::min(groups[insert_index].top, line->m_dTop); - m_arTextLineGroups[insert_index].push_back(line); + bot_aligned_text_lines.back().push_back(line); } else { - Group new_group; - new_group.left = line->m_dLeft; - new_group.right = line->m_dRight; - new_group.top = line->m_dTop; - new_group.bot = line->m_dBot; - new_group.closed = false; - groups.push_back(new_group); - - std::vector line_group; - line_group.push_back(line); - m_arTextLineGroups.push_back(line_group); + bot_aligned_text_lines.push_back({}); + bot_aligned_text_lines.back().push_back(line); + curr_bot = line->m_dBotWithMaxDescent; } } + + std::vector ar_is_group_open; + for (const auto& text_lines : bot_aligned_text_lines) + { + // lines [i] belongs group [j] (like a matrix) + // only 1 [i] to 1 [j]. + std::vector> lines_x_groups(text_lines.size()); + for (auto& lxg : lines_x_groups) + lxg.resize(m_arTextLineGroups.size()); + + for (size_t i = 0; i < text_lines.size(); ++i) + { + for (size_t j = 0; j < m_arTextLineGroups.size(); ++j) + { + if (!ar_is_group_open[j]) + continue; + + // line inside of the group + if (CmpOrEqual(text_lines[i]->m_dLeft, m_arTextLineGroups[j]->m_dRight, c_dTHE_SAME_STRING_X_PRECISION_MM) && + CmpOrEqual(m_arTextLineGroups[j]->m_dLeft, text_lines[i]->m_dRight, c_dTHE_SAME_STRING_X_PRECISION_MM)) + { + lines_x_groups[i][j] = true; + } + } + } + + for (size_t j = 0; j < m_arTextLineGroups.size(); ++j) + { + size_t lines_counter = 0; + for (size_t i = 0; i < text_lines.size(); ++i) + if (lines_x_groups[i][j]) + lines_counter++; + + // if lines_counter > 1 then group is overloaded - 1 group 1 text line + if (lines_counter > 1) + ar_is_group_open[j] = false; + } + + std::vector groups_add_later; + + for (size_t i = 0; i < text_lines.size(); ++i) + { + std::vector groups_taken; + for (size_t j = 0; j < m_arTextLineGroups.size(); ++j) + { + if (!ar_is_group_open[j]) + continue; + + if (lines_x_groups[i][j]) + groups_taken.push_back(j); + } + if (groups_taken.size() != 1) + { + for (const auto& taken : groups_taken) + ar_is_group_open[taken] = false; + + auto group_new = std::make_shared(); + group_new->AddTextLine(text_lines[i]); + groups_add_later.push_back(std::move(group_new)); + ar_is_group_open.push_back(true); + } + else + { + m_arTextLineGroups[groups_taken[0]]->AddTextLine(text_lines[i]); + } + } + for (auto&& group : groups_add_later) + m_arTextLineGroups.push_back(std::move(group)); + } + return; } void CPage::AnalyzeOverlapLines() @@ -1873,7 +1894,7 @@ namespace NSDocxRenderer m_eTextAssociationType == TextAssociationType::tatParagraphToShape) { for (auto& g : m_arTextLineGroups) - build_paragraphs(g); + build_paragraphs(g->m_arTextLines); } std::sort(ar_paragraphs.begin(), ar_paragraphs.end(), [] (const paragraph_ptr_t& a, const paragraph_ptr_t& b) { diff --git a/DocxRenderer/src/logic/Page.h b/DocxRenderer/src/logic/Page.h index c27955ce4b..09030a4b12 100644 --- a/DocxRenderer/src/logic/Page.h +++ b/DocxRenderer/src/logic/Page.h @@ -87,6 +87,7 @@ namespace NSDocxRenderer using shape_ptr_t = std::shared_ptr; using cont_ptr_t = std::shared_ptr; using text_line_ptr_t = std::shared_ptr; + using group_text_line_ptr_t = std::shared_ptr; using base_item_ptr_t = std::shared_ptr; using ooxml_item_ptr_t = std::shared_ptr; using paragraph_ptr_t = std::shared_ptr; @@ -196,14 +197,13 @@ namespace NSDocxRenderer CContTextBuilder m_oContBuilder; CHorVerLinesCollector m_oHorVerLinesCollector; - std::vector m_arShapes; - std::vector m_arTextLines; - std::vector m_arParagraphs; - std::vector m_arTables; + std::vector m_arShapes; + std::vector m_arTextLines; + std::vector m_arTextLineGroups; + std::vector m_arParagraphs; + std::vector m_arTables; + std::vector m_arOutputObjects; - std::vector> m_arTextLineGroups; - - std::vector m_arOutputObjects; std::vector m_arCompleteObjectsXml; size_t m_nShapeOrder = 0; diff --git a/DocxRenderer/src/logic/elements/BaseItem.h b/DocxRenderer/src/logic/elements/BaseItem.h index 4d2823b2f1..aa7eebcf91 100644 --- a/DocxRenderer/src/logic/elements/BaseItem.h +++ b/DocxRenderer/src/logic/elements/BaseItem.h @@ -71,4 +71,22 @@ namespace NSDocxRenderer virtual void ToXml(NSStringUtils::CStringBuilder& oWriter) const = 0; virtual void ToXmlPptx(NSStringUtils::CStringBuilder& oWriter) const = 0; }; + + enum class eBaseItemCmpType + { + bictVertical, + bictHorizontal + }; + + template + struct CBaseItemCmp + { + bool operator() (const CBaseItem& item1, const CBaseItem& item2) const + { + if (CmpType == eBaseItemCmpType::bictVertical) + return item1.m_dBot < item2.m_dBot; + if (CmpType == eBaseItemCmpType::bictHorizontal) + return item1.m_dLeft < item2.m_dLeft; + } + }; } diff --git a/DocxRenderer/src/logic/elements/TextLine.cpp b/DocxRenderer/src/logic/elements/TextLine.cpp index 7c986d81d8..df6b9ffbd3 100644 --- a/DocxRenderer/src/logic/elements/TextLine.cpp +++ b/DocxRenderer/src/logic/elements/TextLine.cpp @@ -339,4 +339,10 @@ namespace NSDocxRenderer nSymPos = 0; } } + + void CTextLineGroup::AddTextLine(const std::shared_ptr& pTextLine) + { + this->CBaseItem::RecalcWithNewItem(pTextLine.get()); + m_arTextLines.push_back(pTextLine); + } } diff --git a/DocxRenderer/src/logic/elements/TextLine.h b/DocxRenderer/src/logic/elements/TextLine.h index 23b4507ca8..4db8b678be 100644 --- a/DocxRenderer/src/logic/elements/TextLine.h +++ b/DocxRenderer/src/logic/elements/TextLine.h @@ -59,10 +59,7 @@ namespace NSDocxRenderer class CTextLineGroup : public CBaseItem { public: - void AddTextLine(const std::shared_ptr& pTextLine); - - private: std::vector> m_arTextLines; }; } diff --git a/DocxRenderer/src/resources/Constants.h b/DocxRenderer/src/resources/Constants.h index b958626eea..9014eae4ff 100644 --- a/DocxRenderer/src/resources/Constants.h +++ b/DocxRenderer/src/resources/Constants.h @@ -23,7 +23,7 @@ constexpr double c_dPtToEMU = 12700.0; constexpr double c_dDegreeToAngle = 60000.0; const double c_dSTANDART_STRING_HEIGHT_MM = 4.2333333333333334; -const double c_dTHE_SAME_STRING_Y_PRECISION_MM = 0.05; +const double c_dTHE_SAME_STRING_Y_PRECISION_MM = 0.02; const double c_dTHE_SAME_STRING_X_PRECISION_MM = 0.02; const double c_dLINE_DISTANCE_ERROR_MM = 0.3; const double c_dERROR_OF_PARAGRAPH_BORDERS_MM = 1.0; diff --git a/DocxRenderer/src/resources/utils.h b/DocxRenderer/src/resources/utils.h index 96820a7ce4..46a897ff02 100644 --- a/DocxRenderer/src/resources/utils.h +++ b/DocxRenderer/src/resources/utils.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include "../../../DesktopEditor/common/Types.h" #include "../../../DesktopEditor/common/StringUTF32.h" @@ -27,3 +28,12 @@ It MoveNullptr(It start, It end) return right; } + +template > +bool CmpOrEqual(const T& val1, + const T& val2, + const T& eps = std::numeric_limits::epsilon(), + const Cmp& cmp = Cmp()) +{ + return std::abs(val1 - val2) < eps || cmp(val1, val2); +}