From 1241d411022f59fca0c26abb3ebbd35e6e5ab255 Mon Sep 17 00:00:00 2001 From: Alexey Nagaev Date: Thu, 18 Sep 2025 20:07:30 +0300 Subject: [PATCH] Add PptxTxtConverter --- OOXML/Base/Base.h | 2 + TxtFile/Projects/Linux/TxtXmlFormatLib.pro | 2 + TxtFile/Source/PptxTxtConverter.cpp | 111 +++++++++++++++++++++ TxtFile/Source/PptxTxtConverter.h | 21 ++++ 4 files changed, 136 insertions(+) create mode 100644 TxtFile/Source/PptxTxtConverter.cpp create mode 100644 TxtFile/Source/PptxTxtConverter.h diff --git a/OOXML/Base/Base.h b/OOXML/Base/Base.h index b4c359890d..6ea88f5201 100644 --- a/OOXML/Base/Base.h +++ b/OOXML/Base/Base.h @@ -48,6 +48,7 @@ #ifndef FILE_SEPARATOR #define FILE_SEPARATOR #define FILE_SEPARATOR_CHAR '\\' +#define FILE_SEPARATOR_WCHAR L'\\' #define FILE_SEPARATOR_STR L"\\" #endif #else @@ -57,6 +58,7 @@ #ifndef FILE_SEPARATOR #define FILE_SEPARATOR #define FILE_SEPARATOR_CHAR '/' +#define FILE_SEPARATOR_WCHAR L'/' #define FILE_SEPARATOR_STR L"/" #endif #endif diff --git a/TxtFile/Projects/Linux/TxtXmlFormatLib.pro b/TxtFile/Projects/Linux/TxtXmlFormatLib.pro index 20dc543248..62c50eec95 100644 --- a/TxtFile/Projects/Linux/TxtXmlFormatLib.pro +++ b/TxtFile/Projects/Linux/TxtXmlFormatLib.pro @@ -24,6 +24,7 @@ DEFINES += UNICODE \ include($$PWD/../../../Common/3dParty/boost/boost.pri) HEADERS += \ + ../../Source/PptxTxtConverter.h \ ../../Source/TxtFormat/File.h \ ../../Source/TxtFormat/TxtFile.h \ ../../Source/TxtRenderer.h \ @@ -32,6 +33,7 @@ HEADERS += \ ../../Source/ConvertTxt2Docx.h SOURCES += \ + ../../Source/PptxTxtConverter.cpp \ ../../Source/TxtFormat/File.cpp \ ../../Source/TxtFormat/TxtFile.cpp \ ../../Source/TxtRenderer.cpp \ diff --git a/TxtFile/Source/PptxTxtConverter.cpp b/TxtFile/Source/PptxTxtConverter.cpp new file mode 100644 index 0000000000..45dae8e931 --- /dev/null +++ b/TxtFile/Source/PptxTxtConverter.cpp @@ -0,0 +1,111 @@ +#include "PptxTxtConverter.h" + +#include "../../../OOXML/PPTXFormat/FileTypes.h" +#include "../../../OOXML/PPTXFormat/Folder.h" +#include "../../../OOXML/PPTXFormat/Slide.h" +#include "../../../OOXML/PPTXFormat/Presentation.h" +#include "../../../OOXML/PPTXFormat/Logic/Shape.h" +#include "../../../OOXML/PPTXFormat/Logic/Csld.h" +#include "../../../OOXML/PPTXFormat/Logic/SpTree.h" +#include "../../../OOXML/PPTXFormat/Logic/SpTreeElem.h" + +#include "../../../OOXML/DocxFormat/WritingElement.h" + +#include "../../../Common/OfficeFileErrorDescription.h" +#include "../../../DesktopEditor/common/File.h" + +class CPptxTxtConverter::CPptxTxtConverterImpl +{ +public: + CPptxTxtConverterImpl() = default; + CPptxTxtConverterImpl(const CPptxTxtConverterImpl& other) = delete; + CPptxTxtConverterImpl(CPptxTxtConverterImpl&& other) = delete; + virtual ~CPptxTxtConverterImpl(); + + void AddSlide(const PPTX::Slide& oSlide); + + // returns true if ok + void Save(const std::wstring& wsDstTxtFile); + +private: + std::wstring GetTextFromShape(const PPTX::Logic::Shape& oShape) const; + std::vector m_arTxtData; + + const size_t knStringReserve = 1000; +}; + +CPptxTxtConverter::CPptxTxtConverter() + : m_pImpl(std::unique_ptr(new CPptxTxtConverterImpl())) +{ +} +CPptxTxtConverter::~CPptxTxtConverter() +{ +} +int CPptxTxtConverter::Convert(const std::wstring& wsSrcPptxDir, const std::wstring& wsDstTxtFile) +{ + std::wstring norm_src_pptx_dir = CorrectPathW(wsSrcPptxDir); + if (norm_src_pptx_dir.back() != FILE_SEPARATOR_WCHAR) + norm_src_pptx_dir += FILE_SEPARATOR_WCHAR; + + std::unique_ptr pptx_document(new PPTX::Document()); + bool is_read_ok = pptx_document->read(norm_src_pptx_dir); + if (!is_read_ok) + return AVS_FILEUTILS_ERROR_CONVERT; + + const auto presentation_file_type = OOX::Presentation::FileTypes::Presentation; + smart_ptr presentation = pptx_document->Get(presentation_file_type).smart_dynamic_cast(); + if (!presentation.is_init()) + return AVS_FILEUTILS_ERROR_CONVERT; + + for (size_t i = 0; i < presentation->sldIdLst.size(); ++i) + { + std::wstring rid = presentation->sldIdLst[i].rid.get(); + smart_ptr slide = ((*presentation)[rid]).smart_dynamic_cast(); + + if (slide.IsInit() == false) + continue; + + m_pImpl->AddSlide(*slide); + } + m_pImpl->Save(wsDstTxtFile); + return S_OK; +} +void CPptxTxtConverter::CPptxTxtConverterImpl::AddSlide(const PPTX::Slide& oSlide) +{ + std::wstring slide_text; + slide_text.reserve(knStringReserve); + + auto& sp_tree_elems = oSlide.cSld->spTree.SpTreeElems; + for (auto& elem : sp_tree_elems) + { + if (elem.getType() != OOX::et_p_Shape) + continue; + + const auto& shape = *elem.GetElem().as(); + slide_text += GetTextFromShape(shape); + } + m_arTxtData.push_back(std::move(slide_text)); +} +void CPptxTxtConverter::CPptxTxtConverterImpl::Save(const std::wstring& wsDstTxtFile) +{ + NSStringUtils::CStringBuilder str_builder; + str_builder.AddSize(1000); + for (const auto& txt_str : m_arTxtData) + { + str_builder.WriteString(txt_str); + str_builder.WriteString(L"\n"); + } + NSFile::CFileBinary::SaveToFile(wsDstTxtFile, str_builder.GetData()); + m_arTxtData.clear(); +} + +std::wstring CPptxTxtConverter::CPptxTxtConverterImpl::GetTextFromShape(const PPTX::Logic::Shape& oShape) const +{ + std::wstring text; + text.reserve(knStringReserve); + + for (const auto& paragraph : oShape.txBody->Paragrs) + text += paragraph.GetText(true); + + return text; // RVO +} diff --git a/TxtFile/Source/PptxTxtConverter.h b/TxtFile/Source/PptxTxtConverter.h new file mode 100644 index 0000000000..a791d950f2 --- /dev/null +++ b/TxtFile/Source/PptxTxtConverter.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +class CPptxTxtConverter +{ +public: + CPptxTxtConverter(); + CPptxTxtConverter(const CPptxTxtConverter& other) = delete; + CPptxTxtConverter(CPptxTxtConverter&& other) = delete; + virtual ~CPptxTxtConverter(); + + // returns S_OK if convertation was successful + int Convert(const std::wstring& wsSrcPptxDir, const std::wstring& wsDstTxtFile = L"./output.txt"); + +private: + class CPptxTxtConverterImpl; + std::unique_ptr m_pImpl; +}; +