#include #include #include #include #include #include #include "htmlfile2.h" #include "../Common/3dParty/html/htmltoxhtml.h" #include "../Common/3dParty/html/css/src/CCssCalculator.h" #include "../Common/3dParty/html/css/src/CDocumentStyle.h" #include "../Common/FileDownloader/FileDownloader.h" #include "../DesktopEditor/common/Base64.h" #include "../DesktopEditor/common/SystemUtils.h" #include "../DesktopEditor/common/StringBuilder.h" #include "../DesktopEditor/common/File.h" #include "../DesktopEditor/common/Directory.h" #include "../DesktopEditor/common/Path.h" #include "../DesktopEditor/xml/include/xmlutils.h" #include "../DesktopEditor/raster/BgraFrame.h" #include "../DesktopEditor/graphics/pro/Fonts.h" #include "../DesktopEditor/graphics/pro/Graphics.h" #ifndef VALUE2STR #define VALUE_TO_STRING(x) #x #define VALUE2STR(x) VALUE_TO_STRING(x) #endif struct CLi { bool bNeedLi; // Требуется ли маркер? int nLevelLi; // Уровень вложенности маркера bool bType; // Маркированный? нет - нумерованный }; class CHtmlFile2_Private { public: XmlUtils::CXmlLiteReader m_oLightReader; // SAX Reader NSCSS::CCssCalculator m_oStylesCalculator; // Css калькулятор std::wstring m_sTmp; // Temp папка для конфертации html в xhtml std::wstring m_sSrc; // Директория источника std::wstring m_sDst; // Директория назначения std::wstring m_sBase; // Полный базовый адрес std::map> m_sSrcs; // Имена обрабатываемых файлов (имя файла, имя перекрестной ссылки) std::map m_mStyles; // Стили в document.xml. Хранятся как (имя тэга, его стиль) private: int m_nImageId; // ID картинки int m_nFootnoteId; // ID сноски int m_nHyperlinkId; // ID ссылки int m_nStyleId; // ID стиля int m_nCrossId; // ID перекрестной ссылки NSStringUtils::CStringBuilder m_oStylesXml; // styles.xml NSStringUtils::CStringBuilder m_oDocXmlRels; // document.xml.rels NSStringUtils::CStringBuilder m_oDocXml; // document.xml NSStringUtils::CStringBuilder m_oNoteXml; // footnotes.xml public: CHtmlFile2_Private() { m_nImageId = 1; m_nFootnoteId = 1; m_nHyperlinkId = 1; m_nStyleId = 1; m_nCrossId = 1; m_sBase = L""; } ~CHtmlFile2_Private() { m_oLightReader.Clear(); m_mStyles.clear(); m_oStylesXml.Clear(); m_oDocXmlRels.Clear(); m_oDocXml.Clear(); m_oNoteXml.Clear(); } // Проверяет наличие тэга html bool isHtml() { if(!m_oLightReader.ReadNextNode()) return false; if(m_oLightReader.GetName() != L"html") return false; return true; } void CreateDocxEmpty(CHtmlParams* oParams) { // Создаем пустые папки std::wstring strDirectory = m_sDst; // rels std::wstring pathRels = strDirectory + L"/_rels"; NSDirectory::CreateDirectory(pathRels); // docProps std::wstring pathDocProps = strDirectory + L"/docProps"; NSDirectory::CreateDirectory(pathDocProps); // word std::wstring pathWord = strDirectory + L"/word"; NSDirectory::CreateDirectory(pathWord); // documentRels std::wstring pathWordRels = pathWord + L"/_rels"; NSDirectory::CreateDirectory(pathWordRels); // media std::wstring pathMedia = pathWord + L"/media"; NSDirectory::CreateDirectory(pathMedia); // theme std::wstring pathTheme = pathWord + L"/theme"; NSDirectory::CreateDirectory(pathTheme); // theme1.xml std::wstring sTheme = L""; NSFile::CFileBinary oThemeWriter; if (oThemeWriter.CreateFileW(pathTheme + L"/theme1.xml")) { oThemeWriter.WriteStringUTF8(sTheme); oThemeWriter.CloseFile(); } // app.xml std::wstring sApplication = NSSystemUtils::GetEnvVariable(NSSystemUtils::gc_EnvApplicationName); if (sApplication.empty()) sApplication = NSSystemUtils::gc_EnvApplicationNameDefault; #if defined(INTVER) std::string sVersion = VALUE2STR(INTVER); #endif sApplication += L"/"; sApplication += UTF8_TO_U(sVersion); std::wstring sApp = L""; sApp += sApplication; sApp += L"0falsefalsefalsefalse"; NSFile::CFileBinary oAppWriter; if (oAppWriter.CreateFileW(pathDocProps + L"/app.xml")) { oAppWriter.WriteStringUTF8(sApp); oAppWriter.CloseFile(); } // .rels std::wstring sRels = L""; NSFile::CFileBinary oRelsWriter; if (oRelsWriter.CreateFileW(pathRels + L"/.rels")) { oRelsWriter.WriteStringUTF8(sRels); oRelsWriter.CloseFile(); } // [Content_Types].xml std::wstring sContent = L""; NSFile::CFileBinary oContentWriter; if (oContentWriter.CreateFileW(strDirectory + L"/[Content_Types].xml")) { oContentWriter.WriteStringUTF8(sContent); oContentWriter.CloseFile(); } // footnotes.xml.rels NSFile::CFileBinary oFootRelsWriter; if (oFootRelsWriter.CreateFileW(pathWordRels + L"/footnotes.xml.rels")) { oFootRelsWriter.WriteStringUTF8(L""); oFootRelsWriter.CloseFile(); } // fontTable.xml std::wstring sFontTable = L""; NSFile::CFileBinary oFontTableWriter; if (oFontTableWriter.CreateFileW(pathWord + L"/fontTable.xml")) { oFontTableWriter.WriteStringUTF8(sFontTable); oFontTableWriter.CloseFile(); } // settings.xml std::wstring sSettings = L""; NSFile::CFileBinary oSettingsWriter; if (oSettingsWriter.CreateFileW(pathWord + L"/settings.xml")) { oSettingsWriter.WriteStringUTF8(sSettings); oSettingsWriter.CloseFile(); } // webSettings.xml std::wstring sWebSettings = L""; NSFile::CFileBinary oWebWriter; if (oWebWriter.CreateFileW(pathWord + L"/webSettings.xml")) { oWebWriter.WriteStringUTF8(sWebSettings); oWebWriter.CloseFile(); } // numbering.xml NSStringUtils::CStringBuilder oNumbering; // Маркированный список oNumbering += L""; // Нумерованный список oNumbering += L""; // Ссылки oNumbering += L""; oNumbering += L""; NSFile::CFileBinary oNumberingWriter; if (oNumberingWriter.CreateFileW(pathWord + L"/numbering.xml")) { oNumberingWriter.WriteStringUTF8(oNumbering.GetData()); oNumberingWriter.CloseFile(); } // core.xml std::wstring sCore = L""; if(oParams != NULL) { if(!oParams->m_sBookTitle.empty()) { sCore += L""; sCore += oParams->m_sBookTitle; sCore += L""; } if(!oParams->m_sAuthors.empty()) { sCore += L""; sCore += oParams->m_sAuthors; sCore += L""; } if(!oParams->m_sGenres.empty()) { sCore += L""; sCore += oParams->m_sGenres; sCore += L""; } if(!oParams->m_sDate.empty()) { sCore += L""; sCore += oParams->m_sDate; sCore += L""; } if(!oParams->m_sDescription.empty()) { sCore += L""; sCore += oParams->m_sDescription; sCore += L""; } } sCore += L""; NSFile::CFileBinary oCoreWriter; if (oCoreWriter.CreateFileW(pathDocProps + L"/core.xml")) { oCoreWriter.WriteStringUTF8(sCore); oCoreWriter.CloseFile(); } // Начала файлов m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXml += L""; m_oNoteXml += L""; m_oNoteXml += L""; m_oStylesXml += L""; // m_oStylesXml += L""; // m_oStylesXml += L""; // Стили по умолчанию // Нормальный стиль, от которого базируются m_oStylesXml += L""; // Маркированный список m_oStylesXml += L""; // Заголовки m_oStylesXml += L""; // Текст: p, div m_oStylesXml += L""; m_oStylesXml += L""; // Ссылка m_oStylesXml += L""; // Таблица m_oStylesXml += L""; } bool readSrc(const std::wstring& sFileName) { // Читаем html if(!isHtml()) return false; if(m_oLightReader.IsEmptyNode()) return true; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { std::wstring sName = m_oLightReader.GetName(); if(sName == L"head") readHead(); else if(sName == L"body") readBody(sFileName); } return true; } void write() { m_oDocXmlRels += L""; NSFile::CFileBinary oRelsWriter; if (oRelsWriter.CreateFileW(m_sDst + L"/word/_rels/document.xml.rels")) { oRelsWriter.WriteStringUTF8(m_oDocXmlRels.GetData()); oRelsWriter.CloseFile(); } m_oDocXml += L""; NSFile::CFileBinary oDocumentWriter; if (oDocumentWriter.CreateFileW(m_sDst + L"/word/document.xml")) { oDocumentWriter.WriteStringUTF8(m_oDocXml.GetData()); oDocumentWriter.CloseFile(); } m_oNoteXml += L""; NSFile::CFileBinary oFootnotesWriter; if (oFootnotesWriter.CreateFileW(m_sDst + L"/word/footnotes.xml")) { oFootnotesWriter.WriteStringUTF8(m_oNoteXml.GetData()); oFootnotesWriter.CloseFile(); } // styles.xml m_oStylesXml += L""; NSFile::CFileBinary oStylesWriter; if (oStylesWriter.CreateFileW(m_sDst + L"/word/styles.xml")) { oStylesWriter.WriteStringUTF8(m_oStylesXml.GetData()); oStylesWriter.CloseFile(); } } void htmlXhtml(const std::wstring& sSrc) { NSFile::CFileBinary oXhtmlWriter; if (oXhtmlWriter.CreateFileW(m_sTmp + L"/res.xhtml")) { // htmlToXhtml возвращает текст файла в кодировке UTF-8 oXhtmlWriter.WriteStringUTF8(htmlToXhtml(sSrc)); oXhtmlWriter.CloseFile(); } } void readStyle(std::vector sSelectors) { if(m_oLightReader.IsEmptyNode()) return; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { std::wstring sName = m_oLightReader.GetName(); // Стиль по ссылке if(sName == L"link") { while(m_oLightReader.MoveToNextAttribute()) { if(m_oLightReader.GetName() == L"href") { std::wstring sRef = m_oLightReader.GetText(); // Если это css файл, то поведение аналогично тэгу style // Кроме функции получения стилей std::wstring sType = NSFile::GetFileExtention(sRef); if(sType == L"css") { bool bBad = false; // КОСТЫЛЬ std::ifstream in(m_sSrc + L"/" + sRef); if (in.is_open()) { std::string line; while (getline(in, line)) { if(line.find("@") != std::string::npos) { bBad = true; break; } } } if(!bBad) m_oStylesCalculator.AddStylesFromFile(m_sSrc + L"/" + sRef); } } } m_oLightReader.MoveToElement(); } // тэг style содержит стили для styles.xml else if(sName == L"style") m_oStylesCalculator.AddStyles(U_TO_UTF8(content())); std::string sClass = ""; std::string sStyle = ""; std::string sId = ""; // Стиль по атрибуту while(m_oLightReader.MoveToNextAttribute()) { std::wstring sAName = m_oLightReader.GetName(); if(sAName == L"style") sStyle = m_oLightReader.GetTextA(); else if(sAName == L"class") sClass = m_oLightReader.GetTextA(); else if(sAName == L"id") sId = m_oLightReader.GetTextA(); } m_oLightReader.MoveToElement(); if(!sStyle.empty()) { std::string sSelector = ""; if(!sId.empty()) sSelector += "#" + sId + " "; if(!sClass.empty()) sSelector += "." + sClass + " "; sSelector += m_oLightReader.GetNameA(); std::vector sSubClass(sSelectors); sSubClass.push_back(sSelector); m_oStylesCalculator.AddStyle(sSubClass, sStyle); readStyle(sSubClass); } else readStyle(sSelectors); } } private: std::vector GetSubClass(const std::vector& sSelectors) { std::vector sSubClass(sSelectors); NSCSS::CNode oNode; oNode.m_sName = m_oLightReader.GetName(); if(oNode.m_sName == L"#text") return sSubClass; // Стиль по атрибуту while(m_oLightReader.MoveToNextAttribute()) { std::wstring sAName = m_oLightReader.GetName(); if(sAName == L"class") oNode.m_sClass = m_oLightReader.GetText(); else if(sAName == L"id") oNode.m_sId = m_oLightReader.GetText(); else if(sAName == L"style") oNode.m_sStyle = m_oLightReader.GetText(); } m_oLightReader.MoveToElement(); sSubClass.push_back(oNode); return sSubClass; } std::wstring getStyle(std::vector& sSelectors) { NSCSS::CNode oChild = sSelectors.back(); sSelectors.pop_back(); NSCSS::CCompiledStyle oStyle = m_oStylesCalculator.GetCompiledStyle(oChild, sSelectors); NSCSS::CDocumentStyle oXmlStyle; oXmlStyle.WriteStyle(oStyle); m_oStylesXml += oXmlStyle.GetStyle(); return oXmlStyle.GetId(); } void readHead() { if(m_oLightReader.IsEmptyNode()) return; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { std::wstring sName = m_oLightReader.GetName(); // Базовый адрес if(sName == L"base") { while(m_oLightReader.MoveToNextAttribute()) if(m_oLightReader.GetName() == L"href") m_sBase = m_oLightReader.GetText(); m_oLightReader.MoveToElement(); } } } void readBody(const std::wstring& sFileName) { bool bWasP = true; std::vector sSelectors; sSelectors = GetSubClass(sSelectors); CLi oLi; oLi.bNeedLi = false; oLi.nLevelLi = -1; oLi.bType = true; std::map>::iterator it = m_sSrcs.find(sFileName); if(it != m_sSrcs.end()) { for(const std::wstring& sId : it->second) { std::wstring sCrossId = std::to_wstring(m_nCrossId++); m_oDocXml += L""; } } m_oDocXml += L""; readStream(&m_oDocXml, sSelectors, L"", false, oLi, bWasP); m_oDocXml += L""; } void readStream(NSStringUtils::CStringBuilder* oXml, const std::vector& sSelectors, std::wstring sRStyle, bool bBdo, const CLi& oLi, bool& bWasP) { if(m_oLightReader.IsEmptyNode()) return; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode2(nDeath)) { std::vector sSubClass = GetSubClass(sSelectors); std::wstring sName = m_oLightReader.GetName(); if(sName == L"#text") { if(bWasP) { *oXml += L""; } std::wstring sText = m_oLightReader.GetText(); if(bBdo) std::reverse(sText.begin(), sText.end()); *oXml += L""; *oXml += sRStyle; *oXml += L""; (*oXml).WriteEncodeXmlString(sText); *oXml += L""; bWasP = false; } // Ссылки else if(sName == L"a") readLink(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP); // Абревиатура, реализована как сноски else if(sName == L"abbr") readAbbr(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP); // Адрес else if(sName == L"address") { if(!bWasP) { *oXml += L""; bWasP = true; } readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); if(!bWasP) { *oXml += L""; bWasP = true; } } // Статья // Боковой блок // Выделенная цитата // Скрытая информация // Контейнер // Заголовок скрытой информации // ... else if(sName == L"article" || sName == L"header" || sName == L"div" || sName == L"blockquote" || sName == L"main" || sName == L"summary" || sName == L"footer" || sName == L"nav" || sName == L"figcaption" || sName == L"form" || sName == L"details" || sName == L"option" || sName == L"pre" || sName == L"fieldset" || sName == L"p" || sName == L"section" || sName == L"figure" || sName == L"dl" || sName == L"aside" ) { if(!bWasP) { *oXml += L""; bWasP = true; } readStream(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP); if(!bWasP) { *oXml += L""; bWasP = true; } } // Полужирный текст // Акцентированный текст else if(sName == L"b" || sName == L"strong") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Направление текста else if(sName == L"bdo") { std::wstring sDir = L""; while(m_oLightReader.MoveToNextAttribute()) if(m_oLightReader.GetName() == L"dir") sDir = m_oLightReader.GetText(); m_oLightReader.MoveToElement(); if(sDir == L"rtl") readStream(oXml, sSubClass, sRStyle, true, oLi, bWasP); else readStream(oXml, sSubClass, sRStyle, false, oLi, bWasP); } // Отмена направления текста else if(sName == L"bdi") readStream(oXml, sSubClass, sRStyle, false, oLi, bWasP); // Увеличивает размер шрифта else if(sName == L"big") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Перенос строки else if(sName == L"br") { if(!bWasP) { *oXml += L""; bWasP = true; } } // Цитата, обычно выделяется курсивом // Новый термин, обычно выделяется курсивом // Акцентированный текст // Курсивный текст // Переменная, обычно выделяется курсивом else if(sName == L"cite" || sName == L"dfn" || sName == L"em" || sName == L"i" || sName == L"var") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Код // Моноширинный шрифт, например, Consolas // Результат скрипта else if(sName == L"code" || sName == L"kbd" || sName == L"samp") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Зачеркнутый текст else if(sName == L"del" || sName == L"s") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Заголовок else if(sName == L"h1" || sName == L"h2" || sName == L"h3" || sName == L"h4" || sName == L"h5" || sName == L"h6") { if(!bWasP) { *oXml += L""; bWasP = true; } readStream(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP); if(!bWasP) { *oXml += L""; bWasP = true; } } // Горизонтальная линия else if(sName == L"hr") { if(!bWasP) { *oXml += L""; bWasP = true; } *oXml += L""; bWasP = true; } // Картинки else if(sName == L"img") { readImage(oXml); bWasP = false; } // Подчеркнутый else if(sName == L"ins") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Выделенный текст, обычно выделяется желтым else if(sName == L"mark") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Меню // Маркированный список else if(sName == L"menu" || sName == L"ul") readLi(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP, true); // Нумерованный список else if(sName == L"ol") readLi(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP, false); // Цитата, выделенная кавычками, обычно выделяется курсивом else if(sName == L"q") { *oXml += L"""; readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); *oXml += L"""; bWasP = false; } // Текст верхнего регистра else if(sName == L"rt" || sName == L"sup") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Уменьшает размер шрифта else if(sName == L"small") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Текст нижнего регистра else if(sName == L"sub") readStream(oXml, sSubClass, sRStyle + L"", bBdo, oLi, bWasP); // Векторная картинка else if(sName == L"svg") { readSVG(oXml); bWasP = false; } // Таблицы else if(sName == L"table") { *oXml += L""; readTable(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP); *oXml += L""; bWasP = true; } // Текст с границами else if(sName == L"textarea") { if(!bWasP) { *oXml += L""; bWasP = true; } *oXml += L""; readStream(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP); if(!bWasP) { *oXml += L""; bWasP = true; } } // Игнорируется else if(sName == L"template" || sName == L"canvas" || sName == L"video" || sName == L"math" || sName == L"rp" || sName == L"command" || sName == L"iframe" || sName == L"embed" || sName == L"area" || sName == L"map" || sName == L"keygen" || sName == L"script" || sName == L"audio" || sName == L"wbr" ) continue; // Без нового абзаца else if(sName == L"datalist" || sName == L"button" || sName == L"label" || sName == L"data" || sName == L"object" || sName == L"noscript" || sName == L"select" || sName == L"input" || sName == L"time" || sName == L"output" || sName == L"progress" || sName == L"hgroup" || sName == L"meter" || sName == L"span" || sName == L"audio" || sName == L"ruby") readStream(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP); else { if(!bWasP) { *oXml += L""; bWasP = true; } readStream(oXml, sSubClass, sRStyle, bBdo, oLi, bWasP); if(!bWasP) { *oXml += L""; bWasP = true; } } } } int readTr (NSStringUtils::CStringBuilder* oXml, const std::vector& sSelectors, std::wstring sRStyle, bool bBdo, const CLi& oLi, bool& bWasP, std::map& mTable) { int nGridCol = 0; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { std::wstring sName = m_oLightReader.GetName(); // tr - строки в таблице if(sName != L"tr") continue; if(m_oLightReader.IsEmptyNode()) continue; int nTCol = 0; *oXml += L""; int nTrDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nTrDeath)) { std::wstring sColspan = L""; std::wstring sRowspan = L""; while(m_oLightReader.MoveToNextAttribute()) { if(m_oLightReader.GetName() == L"colspan") sColspan = m_oLightReader.GetText(); else if(m_oLightReader.GetName() == L"rowspan") sRowspan = m_oLightReader.GetText(); } m_oLightReader.MoveToElement(); *oXml += L""; if(!sColspan.empty()) { *oXml += L""; } *oXml += L""; if(++nTCol > nGridCol) nGridCol = nTCol; // Читаем th. Ячейка заголовка таблицы. Выравнивание посередине. Выделяется полужирным if(m_oLightReader.GetName() == L"th") { *oXml += L""; bWasP = true; readStream(oXml, sSelectors, sRStyle + L"", bBdo, oLi, bWasP); } // Читаем td. Ячейка таблицы. Выравнивание вправо else if(m_oLightReader.GetName() == L"td") { *oXml += L""; readStream(oXml, sSelectors, sRStyle, bBdo, oLi, bWasP); } *oXml += L""; } *oXml += L""; } return nGridCol; } void readTable (NSStringUtils::CStringBuilder* oXml, const std::vector& sSelectors, std::wstring sRStyle, bool bBdo, const CLi& oLi, bool& bWasP) { if(m_oLightReader.IsEmptyNode()) return; // Стиль таблицы *oXml += L""; NSStringUtils::CStringBuilder oHead; NSStringUtils::CStringBuilder oBody; NSStringUtils::CStringBuilder oFoot; int nGridCol = 0; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { int n = 0; std::map mTable; std::wstring sName = m_oLightReader.GetName(); if(sName == L"thead") n = readTr(&oHead, sSelectors, sRStyle, bBdo, oLi, bWasP, mTable); else if(sName == L"tbody") n = readTr(&oBody, sSelectors, sRStyle, bBdo, oLi, bWasP, mTable); else if(sName == L"tfoot") n = readTr(&oFoot, sSelectors, sRStyle, bBdo, oLi, bWasP, mTable); if(n > nGridCol) nGridCol = n; } // Размеры таблицы std::wstring sGridCol = L""; if(nGridCol != 0) sGridCol = std::to_wstring((int)(9570.0 / (double)nGridCol)); *oXml += L""; for(int i = 0; i < nGridCol; i++) { *oXml += L""; } *oXml += L""; // Конец таблицы *oXml += oHead.GetData(); *oXml += oBody.GetData(); *oXml += oFoot.GetData(); *oXml += L""; // Пустая строка после таблицы, чтобы следующий текст не приклеивался *oXml += L""; } void readLi (NSStringUtils::CStringBuilder* oXml, const std::vector& sSelectors, std::wstring sRStyle, bool bBdo, const CLi& oLi, bool& bWasP, bool bType) { if(m_oLightReader.IsEmptyNode()) return; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { if(m_oLightReader.GetName() != L"li") continue; if(!bWasP) { *oXml += L""; bWasP = true; } CLi oSubLi; oSubLi.bNeedLi = true; oSubLi.nLevelLi = oLi.nLevelLi + 1; oSubLi.bType = bType; *oXml += L""; readStream(oXml, sSelectors, sRStyle, bBdo, oSubLi, bWasP); if(!bWasP) { *oXml += L""; bWasP = true; } } } void readAbbr (NSStringUtils::CStringBuilder* oXml, const std::vector& sSelectors, std::wstring sRStyle, bool bBdo, const CLi& oLi, bool& bWasP) { std::wstring sNote = L""; while(m_oLightReader.MoveToNextAttribute()) if(m_oLightReader.GetName() == L"title") sNote = m_oLightReader.GetText(); m_oLightReader.MoveToElement(); readStream(oXml, sSelectors, sRStyle, bBdo, oLi, bWasP); *oXml += L""; m_oNoteXml += L""; m_oNoteXml += sNote; m_oNoteXml += L""; } void readLink (NSStringUtils::CStringBuilder* oXml, const std::vector& sSelectors, std::wstring sRStyle, bool bBdo, const CLi& oLi, bool& bWasP) { std::wstring sRef = L""; std::wstring sTitle = L""; bool bCross = false; while(m_oLightReader.MoveToNextAttribute()) { std::wstring sName = m_oLightReader.GetName(); if(sName == L"href") { sRef = m_oLightReader.GetText(); size_t nSrc = sRef.rfind(L"/"); if(nSrc == std::wstring::npos) nSrc = 0; else nSrc++; size_t nLen = sRef.rfind(L"html"); if(nLen == std::wstring::npos) continue; else nLen += 4; std::wstring sFileName = sRef.substr(nSrc, nLen - nSrc); std::map>::iterator it = m_sSrcs.find(sFileName); if(it != m_sSrcs.end()) { bCross = true; it->second.push_back(L"cHyp" + std::to_wstring(m_nHyperlinkId)); } } else if(sName == L"title") sTitle = m_oLightReader.GetText(); } m_oLightReader.MoveToElement(); if(sRef.empty()) return; if(sTitle.empty()) sTitle = sRef; // Перекрестная ссылка внутри файла if(bCross) *oXml += L""; // Пишем в document.xml *oXml += L""; bWasP = false; readStream(oXml, sSelectors, sRStyle += L"", bBdo, oLi, bWasP); *oXml += L""; } void readImage (NSStringUtils::CStringBuilder* oXml) { while(m_oLightReader.MoveToNextAttribute()) { if(m_oLightReader.GetName() != L"src") continue; bool bRes = false; std::wstring sSrcM = m_oLightReader.GetText(); std::wstring sImageName = L""; std::wstring sImageId = std::to_wstring(m_nImageId); size_t nLen = (sSrcM.length() > 4 ? 4 : 0); // Картинка Base64 if(sSrcM.substr(0, nLen) == L"data") { size_t nBase = sSrcM.find(L"/", nLen) + 1; std::wstring sType = sSrcM.substr(nBase, sSrcM.find(L";", nBase) - nBase); sImageName = sImageId + L"." + sType; NSFile::CFileBinary oImageWriter; if(oImageWriter.CreateFileW(m_sDst + L"/word/media/i" + sImageName)) { bRes = true; size_t nBase = sSrcM.find(L"base64", nLen) + 7; std::string sBase64 = m_oLightReader.GetTextA().substr(nBase); int nSrcLen = (int)sBase64.length(); int nDecodeLen = NSBase64::Base64DecodeGetRequiredLength(nSrcLen); BYTE* pImageData = new BYTE[nDecodeLen]; if (TRUE == NSBase64::Base64Decode(sBase64.c_str(), nSrcLen, pImageData, &nDecodeLen)) oImageWriter.WriteFile(pImageData, (DWORD)nDecodeLen); RELEASEARRAYOBJECTS(pImageData); oImageWriter.CloseFile(); } } // Картинка в сети else if(sSrcM.substr(0, nLen) == L"http" || !m_sBase.empty()) { sImageName = NSFile::GetFileName(sSrcM); CFileDownloader oDownloadImg(m_sBase + sSrcM, false); oDownloadImg.SetFilePath(m_sDst + L"/word/media/i" + sImageName); bRes = oDownloadImg.DownloadSync(); } // Картинка по относительному пути else { size_t nSrcM = sSrcM.rfind(L"/") + 1; sImageName = sSrcM.substr(nSrcM); bRes = NSFile::CFileBinary::Copy(m_sSrc + L"/" + sSrcM, m_sDst + L"/word/media/i" + sImageName); if(!bRes) bRes = NSFile::CFileBinary::Copy(m_sSrc + L"/" + sImageName, m_sDst + L"/word/media/i" + sImageName); } if(bRes) ImageRels(oXml, sImageId, L"i" + sImageName); } m_oLightReader.MoveToElement(); } void ImageRels (NSStringUtils::CStringBuilder* oXml, const std::wstring& sImageId, const std::wstring& sImageName) { m_nImageId++; // Прописать рельсы m_oDocXmlRels += L""; // Получаем размеры картинки CBgraFrame oBgraFrame; oBgraFrame.OpenFile(m_sDst + L"/word/media/" + sImageName); int nHy = oBgraFrame.get_Height(); int nWx = oBgraFrame.get_Width(); if(nWx > nHy) { int nW = nWx * 9525; nW = (nW > 7000000 ? 7000000 : nW); nHy = (int)((double)nHy * (double)nW / (double)nWx); nWx = nW; } else { int nH = nHy * 9525; nH = (nH > 9000000 ? 9000000 : nH); int nW = (int)((double)nWx * (double)nH / (double)nHy); if(nW > 7000000) { nW = 7000000; nHy = (int)((double)nHy * (double)nW / (double)nWx); } else nHy = nH; nWx = nW; } // Пишем в document.xml *oXml += L""; } void readSVG (NSStringUtils::CStringBuilder* oXml) { // Сохранить как .svg картинку NSStringUtils::CStringBuilder oSVG; bool bNeedXmlns = true; oSVG += L""; std::wstring sSVG = m_oLightReader.GetInnerXml(); size_t nRef = sSVG.find(L"image"); while(nRef != std::wstring::npos) { size_t nHRef = sSVG.find(L"href", nRef); if(nHRef == std::wstring::npos) break; nHRef += 6; if(sSVG.compare(nHRef, 4, L"http") == 0) { nRef = sSVG.find(L"image", nRef + 5); continue; } size_t nHRefLen = sSVG.find(L"\"", nHRef); std::wstring sImageName = sSVG.substr(nHRef, nHRefLen - nHRef); bool bRes = NSFile::CFileBinary::Copy(m_sSrc + L"/" + sImageName, m_sDst + L"/word/media/" + NSFile::GetFileName(sImageName)); if(!bRes) bRes = NSFile::CFileBinary::Copy(m_sSrc + L"/" + NSFile::GetFileName(sImageName), m_sDst + L"/word/media/" + NSFile::GetFileName(sImageName)); if(bRes) sSVG.replace(nHRef, nHRefLen - nHRef, NSFile::GetFileName(sImageName)); nRef = sSVG.find(L"image", nRef + 5); } oSVG += sSVG; oSVG += L""; std::wstring sImageId = std::to_wstring(m_nImageId); NSFile::CFileBinary oSVGWriter; std::wstring sImageFile = m_sDst + L"/word/media/" + sImageId + L".svg"; if (oSVGWriter.CreateFileW(sImageFile)) { oSVGWriter.WriteStringUTF8(oSVG.GetData()); oSVGWriter.CloseFile(); } // Конвертация из svg в png NSFonts::IApplicationFonts* pFonts = NSFonts::NSApplication::Create(); MetaFile::IMetaFile* pMetafile = MetaFile::Create(pFonts); bool bLoad = pMetafile->LoadFromFile(sImageFile.data()); if(bLoad) { std::wstring sPngFile = m_sDst + L"/word/media/" + sImageId + L".png"; pMetafile->ConvertToRaster(sPngFile.data(), 4, 1000); } pMetafile->Release(); pFonts->Release(); ImageRels(oXml, sImageId, sImageId + L".png"); } std::wstring content() { std::wstring sRes = L""; if(m_oLightReader.IsEmptyNode()) return sRes; if(m_oLightReader.ReadNextSiblingNode2(m_oLightReader.GetDepth())) sRes = m_oLightReader.GetText(); return sRes; } }; CHtmlFile2::CHtmlFile2() { m_internal = new CHtmlFile2_Private(); } CHtmlFile2::~CHtmlFile2() { RELEASEOBJECT(m_internal); } bool CHtmlFile2::IsHtmlFile(const std::wstring& sFile) { m_internal->htmlXhtml(sFile); // Открывает файл на проверку if (!m_internal->m_oLightReader.FromFile(m_internal->m_sTmp + L"/res.xhtml")) return false; // Читаем html if(!m_internal->isHtml()) return false; return true; } void CHtmlFile2::SetTmpDirectory(const std::wstring& sFolder) { m_internal->m_sTmp = sFolder; } HRESULT CHtmlFile2::Open(const std::wstring& sSrc, const std::wstring& sDst, CHtmlParams* oParams) { if(!IsHtmlFile(sSrc)) return S_FALSE; m_internal->m_sSrc = NSSystemPath::GetDirectoryName(sSrc); m_internal->m_sDst = sDst; m_internal->CreateDocxEmpty(oParams); std::vector sStyle; m_internal->readStyle(sStyle); // Переходим в начало if(!m_internal->m_oLightReader.MoveToStart()) return S_FALSE; if(!m_internal->readSrc(NSFile::GetFileName(sSrc))) return S_FALSE; m_internal->write(); NSFile::CFileBinary::Remove(m_internal->m_sTmp + L"/res.xhtml"); return S_OK; } HRESULT CHtmlFile2::OpenBatch(const std::vector& sSrc, const std::wstring& sDst, CHtmlParams* oParams) { m_internal->m_sDst = sDst; m_internal->CreateDocxEmpty(oParams); for(const std::wstring& sS : sSrc) m_internal->m_sSrcs.insert(std::make_pair(NSFile::GetFileName(sS), std::vector())); for(const std::wstring& sS : sSrc) { #ifdef _DEBUG std::wcout << NSFile::GetFileName(sS) << std::endl; #endif m_internal->m_sSrc = NSSystemPath::GetDirectoryName(sS); if(!IsHtmlFile(sS)) return S_FALSE; std::vector sStyle; m_internal->readStyle(sStyle); // Переходим в начало if(!m_internal->m_oLightReader.MoveToStart()) return S_FALSE; if(!m_internal->readSrc(NSFile::GetFileName(sS))) return S_FALSE; NSFile::CFileBinary::Remove(m_internal->m_sTmp + L"/res.xhtml"); m_internal->m_oLightReader.Clear(); m_internal->m_oStylesCalculator.Clear(); m_internal->m_mStyles.clear(); m_internal->m_sBase = L""; } m_internal->write(); return S_OK; }