From 5c7a285bb9b7f5d402ee27ee24adcf854501e334 Mon Sep 17 00:00:00 2001 From: RIMINMIR Date: Wed, 26 Apr 2023 17:11:03 +0600 Subject: [PATCH] add new realisation with map reading --- .../Reader/XMLReader/XML2TableConverter.cpp | 2 - .../Sheets/Reader/XMLReader/XMLConverter2.cpp | 33 ++++ .../Sheets/Reader/XMLReader/XMLConverter2.h | 33 ++++ .../Binary/Sheets/Reader/XMLReader/XMLMap.cpp | 167 ++++++++++++++++++ OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.h | 115 ++++++++++++ .../Sheets/Reader/XMLReader/XMLReader2.cpp | 70 ++++++++ .../Linux/BinDocument/BinDocument.pro | 9 +- X2tConverter/src/ASCConverters.cpp | 2 +- 8 files changed, 426 insertions(+), 5 deletions(-) create mode 100644 OOXML/Binary/Sheets/Reader/XMLReader/XMLConverter2.cpp create mode 100644 OOXML/Binary/Sheets/Reader/XMLReader/XMLConverter2.h create mode 100644 OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.cpp create mode 100644 OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.h create mode 100644 OOXML/Binary/Sheets/Reader/XMLReader/XMLReader2.cpp diff --git a/OOXML/Binary/Sheets/Reader/XMLReader/XML2TableConverter.cpp b/OOXML/Binary/Sheets/Reader/XMLReader/XML2TableConverter.cpp index 7503bc45e2..6ce6d735d8 100644 --- a/OOXML/Binary/Sheets/Reader/XMLReader/XML2TableConverter.cpp +++ b/OOXML/Binary/Sheets/Reader/XMLReader/XML2TableConverter.cpp @@ -127,8 +127,6 @@ void XML2TableConverter::insertEmptyNode (const std::wstring &key) { uniqueKey = getNodeName(key, parents_.at(0).second); } - - keyvalues_.emplace(uniqueKey, L""); } std::wstring XML2TableConverter::getNodeName(const std::wstring &name, std::set &names) diff --git a/OOXML/Binary/Sheets/Reader/XMLReader/XMLConverter2.cpp b/OOXML/Binary/Sheets/Reader/XMLReader/XMLConverter2.cpp new file mode 100644 index 0000000000..c2aad0b54e --- /dev/null +++ b/OOXML/Binary/Sheets/Reader/XMLReader/XMLConverter2.cpp @@ -0,0 +1,33 @@ +/* + * (c) Copyright Ascensio System SIA 2010-2023 + * + * This program is a free software product. You can redistribute it and/or + * modify it under the terms of the GNU Affero General Public License (AGPL) + * version 3 as published by the Free Software Foundation. In accordance with + * Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect + * that Ascensio System SIA expressly excludes the warranty of non-infringement + * of any third-party rights. + * + * This program is distributed WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For + * details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html + * + * You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish + * street, Riga, Latvia, EU, LV-1050. + * + * The interactive user interfaces in modified source and object code versions + * of the Program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU AGPL version 3. + * + * Pursuant to Section 7(b) of the License you must retain the original Product + * logo when distributing the program. Pursuant to Section 7(e) we decline to + * grant you any rights under trademark law for use of our trademarks. + * + * All the Product's GUI elements, including illustrations and icon sets, as + * well as technical writing content are licensed under the terms of the + * Creative Commons Attribution-ShareAlike 4.0 International. See the License + * terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode + * + */ + +#include "XMLConverter2.h" \ No newline at end of file diff --git a/OOXML/Binary/Sheets/Reader/XMLReader/XMLConverter2.h b/OOXML/Binary/Sheets/Reader/XMLReader/XMLConverter2.h new file mode 100644 index 0000000000..e3c45bfca2 --- /dev/null +++ b/OOXML/Binary/Sheets/Reader/XMLReader/XMLConverter2.h @@ -0,0 +1,33 @@ +/* + * (c) Copyright Ascensio System SIA 2010-2023 + * + * This program is a free software product. You can redistribute it and/or + * modify it under the terms of the GNU Affero General Public License (AGPL) + * version 3 as published by the Free Software Foundation. In accordance with + * Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect + * that Ascensio System SIA expressly excludes the warranty of non-infringement + * of any third-party rights. + * + * This program is distributed WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For + * details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html + * + * You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish + * street, Riga, Latvia, EU, LV-1050. + * + * The interactive user interfaces in modified source and object code versions + * of the Program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU AGPL version 3. + * + * Pursuant to Section 7(b) of the License you must retain the original Product + * logo when distributing the program. Pursuant to Section 7(e) we decline to + * grant you any rights under trademark law for use of our trademarks. + * + * All the Product's GUI elements, including illustrations and icon sets, as + * well as technical writing content are licensed under the terms of the + * Creative Commons Attribution-ShareAlike 4.0 International. See the License + * terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode + * + */ +#pragma once + diff --git a/OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.cpp b/OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.cpp new file mode 100644 index 0000000000..60b6a7c5c2 --- /dev/null +++ b/OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.cpp @@ -0,0 +1,167 @@ +/* + * (c) Copyright Ascensio System SIA 2010-2023 + * + * This program is a free software product. You can redistribute it and/or + * modify it under the terms of the GNU Affero General Public License (AGPL) + * version 3 as published by the Free Software Foundation. In accordance with + * Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect + * that Ascensio System SIA expressly excludes the warranty of non-infringement + * of any third-party rights. + * + * This program is distributed WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For + * details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html + * + * You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish + * street, Riga, Latvia, EU, LV-1050. + * + * The interactive user interfaces in modified source and object code versions + * of the Program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU AGPL version 3. + * + * Pursuant to Section 7(b) of the License you must retain the original Product + * logo when distributing the program. Pursuant to Section 7(e) we decline to + * grant you any rights under trademark law for use of our trademarks. + * + * All the Product's GUI elements, including illustrations and icon sets, as + * well as technical writing content are licensed under the terms of the + * Creative Commons Attribution-ShareAlike 4.0 International. See the License + * terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode + * + */ + +#include "XMLMap.h" + +bool XMLMap::ReadXmlStructure(XmlUtils::CXmlLiteReader &reader, ColumnNameController &nameController, std::shared_ptr nodeTree) +{ + reader_ = &reader; + colNames_ = &nameController; + parents_.push_back(nodeTree); + + XmlUtils::XmlNodeType nodeType; + while(reader_->Read(nodeType)) + { + if(nodeType == XmlUtils::XmlNodeType::XmlNodeType_Element) + { + openNode(); + } + else if(nodeType == XmlUtils::XmlNodeType::XmlNodeType_Text || nodeType == XmlUtils::XmlNodeType::XmlNodeType_CDATA) + { + insertValue(parents_.back()->name); + } + else if(nodeType == XmlUtils::XmlNodeType::XmlNodeType_EndElement) + { + closeNode(); + } + prevType_ = nodeType; + } + + return true; +} + +void XMLMap::readAttributes() +{ + ///@todo проверять нет ли в parents нод с таким же именем для вставки в их столбец вместо создания нового + if(!reader_->GetAttributesCount()) + { + return; + } + reader_->MoveToFirstAttribute(); + + insertValue(reader_->GetName()); + + while(reader_->MoveToNextAttribute()) + { + insertValue(reader_->GetName()); + } + + reader_->MoveToElement(); +} + +void XMLMap::insertValue(const std::wstring &key) +{ + auto parent = parents_.at(parents_.size()-2); + parent->childColumns.emplace(getNodeName(key, parent->childColumns)); + parents_.back()->columns.insert(key); +} + +std::wstring XMLMap::getNodeName(const std::wstring &name, std::set &names) +{ + /// ищем среди использовавшихся имён нужное + for(auto i = names.begin(); i != names.end(); i++) + { + if(colNames_->GetXmlName(*i) == name) + { + return *i; + } + } + /// если не нашли, создаём его и вставляем + auto resultName = name; + colNames_->CreateColumnName(resultName); + names.insert(resultName); + return resultName; +} + + +std::shared_ptr XMLMap::searchSameNode(const std::wstring &name) +{ + _UINT32 nodeCount = 0; + for(auto i = parents_.back()->childs.begin(); i != parents_.back()->childs.end(); i++) + { + if(name == (*i)->name) + { + + return (*i); + } + } + return nullptr; +} + + +void XMLMap::openNode() +{ + auto nodeName = reader_->GetName(); + + auto newElem = searchSameNode(nodeName); + if(newElem) + { + newElem->counter++; + } + else + { + newElem = std::make_shared(); + newElem->name = reader_->GetName(); + newElem->parent = parents_.back(); + newElem->counter = 1; + parents_.back()->childs.emplace(newElem); + } + + if(!reader_->IsEmptyNode()) + { + parents_.push_back(newElem); + readAttributes(); + } + else if(reader_->GetAttributesCount() == 0) + { + insertValue(newElem->name); + } + else + { + parents_.push_back(newElem); + readAttributes(); + closeNode(); + } +} + +void XMLMap::closeNode() +{ + //вставка ноды типа + if(prevType_ == XmlUtils::XmlNodeType::XmlNodeType_Element) + { + insertValue(parents_.back()->name); + } + auto lastElem = parents_.back(); + parents_.pop_back(); + parents_.back()->childColumns.insert(lastElem->childColumns.begin(), lastElem->childColumns.end()); +} + diff --git a/OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.h b/OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.h new file mode 100644 index 0000000000..198b1a5eb2 --- /dev/null +++ b/OOXML/Binary/Sheets/Reader/XMLReader/XMLMap.h @@ -0,0 +1,115 @@ +/* + * (c) Copyright Ascensio System SIA 2010-2023 + * + * This program is a free software product. You can redistribute it and/or + * modify it under the terms of the GNU Affero General Public License (AGPL) + * version 3 as published by the Free Software Foundation. In accordance with + * Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect + * that Ascensio System SIA expressly excludes the warranty of non-infringement + * of any third-party rights. + * + * This program is distributed WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For + * details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html + * + * You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish + * street, Riga, Latvia, EU, LV-1050. + * + * The interactive user interfaces in modified source and object code versions + * of the Program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU AGPL version 3. + * + * Pursuant to Section 7(b) of the License you must retain the original Product + * logo when distributing the program. Pursuant to Section 7(e) we decline to + * grant you any rights under trademark law for use of our trademarks. + * + * All the Product's GUI elements, including illustrations and icon sets, as + * well as technical writing content are licensed under the terms of the + * Creative Commons Attribution-ShareAlike 4.0 International. See the License + * terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode + * + */ + +#pragma once + +#include "columnNamesController.h" + +#include "../../../../DesktopEditor/xml/include/xmlutils.h" +#include "../../../../Base/Base.h" + +#include +#include +#include +#include +#include +#include + +/// @brief узел xml дерева +struct XmlNode +{ + /// @brief имя ноды + std::wstring name; + /// @brief предок ноды + std::shared_ptr parent; + /// @brief столбцы ноды + std::set columns; + /// @brief наследуемые столбцы ноды + std::set childColumns; + /// @brief потомки ноды + std::set> childs; + /// @brief счетчик повторений ноды, чтобы расширять одну ноду, вместо использования многих с одинаковым именем + _UINT32 counter; +}; + + +/// @brief класс осуществляющий считывание xml файла и построение его табличной структуры для дальнейшей конвертации +class XMLMap +{ +public: + /// @brief метод, считывающий структуру xml файла + /// @param reader xmlLiteReader с загруженным в него xml документом + /// @param nameController контроллер имен в который будут загружаться имена столбцов + /// @param nodeTree указатель на корневой элемент дерева нод, которое будет заполнено этим методом + /// @return true в случае успеха, иначе false + bool ReadXmlStructure(XmlUtils::CXmlLiteReader &reader, ColumnNameController &nameController, std::shared_ptr nodeTree); + +private: + + /// @brief считывает аттрибуты текущей ноды + void readAttributes(); + + /// @brief обрабатывает ноду типа element + /// @param type тип обрабатываемой ноды + void openNode(); + + /// @brief обрабатывает ноду типа endelement + /// @param type тип обрабатываемой ноды + void closeNode(); + + /// @brief вставляет значение во временную внутреннюю структуру + /// @param key ключ, по которому будет вставлено значение + void insertValue(const std::wstring &key); + + /// @brief Получение уникального имени ноды, либо его поиск в переданном множестве + /// @param name имя ноды, прочитанное из xml + /// @param names set содержащий уникальные имена, среди которых будет осуществляться поиск + /// @return найденное или сгенерированное уникальное имя ноды + std::wstring getNodeName(const std::wstring &name, std::set &names); + + /// @brief ищет на верхнем уровне ноду с переданным именем, используется для подсчета строк таблицы + /// @param name имя ноды + std::shared_ptr searchSameNode(const std::wstring &name); + + /// @brief указатель на считавший xml данные reader + XmlUtils::CXmlLiteReader *reader_; + + /// @brief указатель на контроллер имен столбцов таблицы + ColumnNameController *colNames_; + + /// @brief вектор с родительскими нодами и используемыми на их уровнях именами + std::vector> parents_; + + /// @brief тип предыдущей ноды(для поиска нод вида ) + XmlUtils::XmlNodeType prevType_ = XmlUtils::XmlNodeType::XmlNodeType_None; + +}; \ No newline at end of file diff --git a/OOXML/Binary/Sheets/Reader/XMLReader/XMLReader2.cpp b/OOXML/Binary/Sheets/Reader/XMLReader/XMLReader2.cpp new file mode 100644 index 0000000000..0db51f2ac9 --- /dev/null +++ b/OOXML/Binary/Sheets/Reader/XMLReader/XMLReader2.cpp @@ -0,0 +1,70 @@ +/* + * (c) Copyright Ascensio System SIA 2010-2023 + * + * This program is a free software product. You can redistribute it and/or + * modify it under the terms of the GNU Affero General Public License (AGPL) + * version 3 as published by the Free Software Foundation. In accordance with + * Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect + * that Ascensio System SIA expressly excludes the warranty of non-infringement + * of any third-party rights. + * + * This program is distributed WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For + * details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html + * + * You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish + * street, Riga, Latvia, EU, LV-1050. + * + * The interactive user interfaces in modified source and object code versions + * of the Program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU AGPL version 3. + * + * Pursuant to Section 7(b) of the License you must retain the original Product + * logo when distributing the program. Pursuant to Section 7(e) we decline to + * grant you any rights under trademark law for use of our trademarks. + * + * All the Product's GUI elements, including illustrations and icon sets, as + * well as technical writing content are licensed under the terms of the + * Creative Commons Attribution-ShareAlike 4.0 International. See the License + * terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode + * + */ +#include "XMLReader.h" +#include "XLSXTableController.h" +#include "XMLConverter2.h" +#include "XMLMap.h" + +#include "../../../../DesktopEditor/common/File.h" +#include "../../../Base/unicode_util.h" +#include "../../../../Common/OfficeFileErrorDescription.h" +#include "../../../XlsxFormat/Workbook/Workbook.h" + + +#include "../../../../DesktopEditor/xml/include/xmlutils.h" + +#include + +_UINT32 XMLReader::Read2(const std::wstring &sFileName, OOX::Spreadsheet::CXlsx &oXlsx) +{ + XmlUtils::CXmlLiteReader reader = {}; + if(!reader.FromFile(sFileName)) + { + return AVS_FILEUTILS_ERROR_CONVERT_READ_FILE; + } + + XMLMap map{}; + + auto rootNode = std::make_shared(); + ColumnNameController columnNames{}; + map.ReadXmlStructure(reader, columnNames, rootNode); + + XLSXTableController table = {oXlsx}; + // map хранящий текущий номер колонки для записи + std::map<_UINT32, _UINT32> rowNumbers = {}; + std::map<_UINT32, std::wstring> stringData = {}; + + //table.FormBook(); + + return 0; + +} diff --git a/OOXML/Projects/Linux/BinDocument/BinDocument.pro b/OOXML/Projects/Linux/BinDocument/BinDocument.pro index cde1d57a72..881744f48a 100644 --- a/OOXML/Projects/Linux/BinDocument/BinDocument.pro +++ b/OOXML/Projects/Linux/BinDocument/BinDocument.pro @@ -63,7 +63,10 @@ SOURCES += \ ../../../Binary/Sheets/Reader/XMLReader/XMLReader.cpp \ ../../../Binary/Sheets/Reader/XMLReader/XML2TableConverter.cpp \ ../../../Binary/Sheets/Reader/XMLReader/XLSXTableController.cpp \ - ../../../Binary/Sheets/Reader/XMLReader/columnNameController.cpp + ../../../Binary/Sheets/Reader/XMLReader/columnNameController.cpp \ + ../../../Binary/Sheets/Reader/XMLReader/XMLConverter2.cpp \ + ../../../Binary/Sheets/Reader/XMLReader/XMLMap.cpp \ + ../../../Binary/Sheets/Reader/XMLReader/XMLReader2.cpp HEADERS += \ ../../../Binary/Document/DocWrapper/DocxSerializer.h \ @@ -104,4 +107,6 @@ HEADERS += \ ../../../Binary/Sheets/Reader/XMLReader/XMLReader.h \ ../../../Binary/Sheets/Reader/XMLReader/XML2TableConverter.h \ ../../../Binary/Sheets/Reader/XMLReader/XLSXTableController.h \ - ../../../Binary/Sheets/Reader/XMLReader/columnNameController.h + ../../../Binary/Sheets/Reader/XMLReader/columnNameController.h \ + ../../../Binary/Sheets/Reader/XMLReader/XMLConverter2.h \ + ../../../Binary/Sheets/Reader/XMLReader/XMLMap.h diff --git a/X2tConverter/src/ASCConverters.cpp b/X2tConverter/src/ASCConverters.cpp index 2ff1b54e3d..94c9064b3b 100644 --- a/X2tConverter/src/ASCConverters.cpp +++ b/X2tConverter/src/ASCConverters.cpp @@ -1499,7 +1499,7 @@ namespace NExtractTools XMLReader reader = {}; - reader.Read(sFrom, oXlsx); + reader.Read2(sFrom, oXlsx); oXlsx.PrepareToWrite();