#include #include #include #include #include #include "htmlfile2.h" #include "../Common/3dParty/html/htmltoxhtml.h" #include "../Common/3dParty/html/css/src/CCssCalculator.h" #include "../Common/FileDownloader/FileDownloader.h" #include "../DesktopEditor/common/Base64.h" #include "../DesktopEditor/common/SystemUtils.h" #include "../DesktopEditor/common/StringBuilder.h" #include "../DesktopEditor/common/File.h" #include "../DesktopEditor/common/Directory.h" #include "../DesktopEditor/common/Path.h" #include "../DesktopEditor/xml/include/xmlutils.h" #include "../DesktopEditor/raster/BgraFrame.h" #ifndef VALUE2STR #define VALUE_TO_STRING(x) #x #define VALUE2STR(x) VALUE_TO_STRING(x) #endif class CHtmlFile2_Private { public: XmlUtils::CXmlLiteReader m_oLightReader; // SAX Reader NSCSS::CCssCalculator m_oStylesCalculator; // Css калькулятор std::wstring m_sTmp; // Temp папка для конфертации html в xhtml std::wstring m_sSrc; // Директория источника std::wstring m_sDst; // Директория назначения std::wstring m_sBase; // Полный базовый адрес std::map m_mStyles; // Стили в document.xml. Хранятся как (имя тэга, его стиль) private: int m_nImageId; // ID картинки int m_nFootnoteId; // ID сноски int m_nHyperlinkId; // ID ссылки int m_nStyleId; // ID стиля NSStringUtils::CStringBuilder m_oStylesXml; // styles.xml NSStringUtils::CStringBuilder m_oDocXmlRels; // document.xml.rels NSStringUtils::CStringBuilder m_oDocXml; // document.xml NSStringUtils::CStringBuilder m_oNoteXml; // footnotes.xml public: CHtmlFile2_Private() { m_nImageId = 1; m_nFootnoteId = 1; m_nHyperlinkId = 1; m_nStyleId = 1; m_sBase = L""; } ~CHtmlFile2_Private() { m_oLightReader.Clear(); m_mStyles.clear(); m_oStylesXml.Clear(); m_oDocXmlRels.Clear(); m_oDocXml.Clear(); m_oNoteXml.Clear(); } // Проверяет наличие тэга html bool isHtml() { if(!m_oLightReader.ReadNextNode()) return false; if(m_oLightReader.GetName() != L"html") return false; return true; } void CreateDocxEmpty(CHtmlParams* oParams) { // Создаем пустые папки std::wstring strDirectory = m_sDst; // rels std::wstring pathRels = strDirectory + L"/_rels"; NSDirectory::CreateDirectory(pathRels); // docProps std::wstring pathDocProps = strDirectory + L"/docProps"; NSDirectory::CreateDirectory(pathDocProps); // word std::wstring pathWord = strDirectory + L"/word"; NSDirectory::CreateDirectory(pathWord); // documentRels std::wstring pathWordRels = pathWord + L"/_rels"; NSDirectory::CreateDirectory(pathWordRels); // media std::wstring pathMedia = pathWord + L"/media"; NSDirectory::CreateDirectory(pathMedia); // theme std::wstring pathTheme = pathWord + L"/theme"; NSDirectory::CreateDirectory(pathTheme); // theme1.xml std::wstring sTheme = L""; NSFile::CFileBinary oThemeWriter; if (oThemeWriter.CreateFileW(pathTheme + L"/theme1.xml")) { oThemeWriter.WriteStringUTF8(sTheme); oThemeWriter.CloseFile(); } // app.xml std::wstring sApplication = NSSystemUtils::GetEnvVariable(NSSystemUtils::gc_EnvApplicationName); if (sApplication.empty()) sApplication = NSSystemUtils::gc_EnvApplicationNameDefault; #if defined(INTVER) std::string sVersion = VALUE2STR(INTVER); #endif sApplication += L"/"; sApplication += UTF8_TO_U(sVersion); std::wstring sApp = L""; sApp += sApplication; sApp += L"0falsefalsefalsefalse"; NSFile::CFileBinary oAppWriter; if (oAppWriter.CreateFileW(pathDocProps + L"/app.xml")) { oAppWriter.WriteStringUTF8(sApp); oAppWriter.CloseFile(); } // .rels std::wstring sRels = L""; NSFile::CFileBinary oRelsWriter; if (oRelsWriter.CreateFileW(pathRels + L"/.rels")) { oRelsWriter.WriteStringUTF8(sRels); oRelsWriter.CloseFile(); } // [Content_Types].xml std::wstring sContent = L""; NSFile::CFileBinary oContentWriter; if (oContentWriter.CreateFileW(strDirectory + L"/[Content_Types].xml")) { oContentWriter.WriteStringUTF8(sContent); oContentWriter.CloseFile(); } // footnotes.xml.rels NSFile::CFileBinary oFootRelsWriter; if (oFootRelsWriter.CreateFileW(pathWordRels + L"/footnotes.xml.rels")) { oFootRelsWriter.WriteStringUTF8(L""); oFootRelsWriter.CloseFile(); } // fontTable.xml std::wstring sFontTable = L""; NSFile::CFileBinary oFontTableWriter; if (oFontTableWriter.CreateFileW(pathWord + L"/fontTable.xml")) { oFontTableWriter.WriteStringUTF8(sFontTable); oFontTableWriter.CloseFile(); } // settings.xml std::wstring sSettings = L""; NSFile::CFileBinary oSettingsWriter; if (oSettingsWriter.CreateFileW(pathWord + L"/settings.xml")) { oSettingsWriter.WriteStringUTF8(sSettings); oSettingsWriter.CloseFile(); } // webSettings.xml std::wstring sWebSettings = L""; NSFile::CFileBinary oWebWriter; if (oWebWriter.CreateFileW(pathWord + L"/webSettings.xml")) { oWebWriter.WriteStringUTF8(sWebSettings); oWebWriter.CloseFile(); } // numbering.xml NSStringUtils::CStringBuilder oNumbering; oNumbering += L""; NSFile::CFileBinary oNumberingWriter; if (oNumberingWriter.CreateFileW(pathWord + L"/numbering.xml")) { oNumberingWriter.WriteStringUTF8(oNumbering.GetData()); oNumberingWriter.CloseFile(); } // core.xml std::wstring sCore = L""; if(oParams != NULL) { sCore += L""; sCore += oParams->m_sBookTitle; sCore += L""; sCore += oParams->GetAuthors(); sCore += L""; sCore += oParams->GetGenres(); sCore += L""; } sCore += L""; NSFile::CFileBinary oCoreWriter; if (oCoreWriter.CreateFileW(pathDocProps + L"/core.xml")) { oCoreWriter.WriteStringUTF8(sCore); oCoreWriter.CloseFile(); } // Начала файлов m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXmlRels += L""; m_oDocXml += L""; m_oNoteXml += L""; m_oNoteXml += L""; m_oStylesXml += L""; m_oStylesXml += L""; m_oStylesXml += L""; // Стили по умолчанию // Нормальный стиль, от которого базируются m_oStylesXml += L""; // Маркированный список m_oStylesXml += L""; } bool readSrc() { // Читаем html if(!isHtml()) return false; if(m_oLightReader.IsEmptyNode()) return true; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { std::wstring sName = m_oLightReader.GetName(); if(sName == L"head") readHead(); else if(sName == L"body") { std::vector sSelectors; readBody(sSelectors, L"", false, true, false, -1); } } return true; } void write() { m_oDocXmlRels += L""; NSFile::CFileBinary oRelsWriter; if (oRelsWriter.CreateFileW(m_sDst + L"/word/_rels/document.xml.rels")) { oRelsWriter.WriteStringUTF8(m_oDocXmlRels.GetData()); oRelsWriter.CloseFile(); } m_oDocXml += L""; NSFile::CFileBinary oDocumentWriter; if (oDocumentWriter.CreateFileW(m_sDst + L"/word/document.xml")) { oDocumentWriter.WriteStringUTF8(m_oDocXml.GetData()); oDocumentWriter.CloseFile(); } m_oNoteXml += L""; NSFile::CFileBinary oFootnotesWriter; if (oFootnotesWriter.CreateFileW(m_sDst + L"/word/footnotes.xml")) { oFootnotesWriter.WriteStringUTF8(m_oNoteXml.GetData()); oFootnotesWriter.CloseFile(); } // styles.xml m_oStylesXml += L""; NSFile::CFileBinary oStylesWriter; if (oStylesWriter.CreateFileW(m_sDst + L"/word/styles.xml")) { oStylesWriter.WriteStringUTF8(m_oStylesXml.GetData()); oStylesWriter.CloseFile(); } } void htmlXhtml(const std::wstring& sSrc) { NSFile::CFileBinary oXhtmlWriter; if (oXhtmlWriter.CreateFileW(m_sTmp + L"/res.xhtml")) { // htmlToXhtml возвращает текст файла в кодировке UTF-8 oXhtmlWriter.WriteStringUTF8(htmlToXhtml(sSrc)); oXhtmlWriter.CloseFile(); } } void readStyle(std::vector sSelectors) { if(m_oLightReader.IsEmptyNode()) return; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { std::wstring sName = m_oLightReader.GetName(); // Стиль по ссылке if(sName == L"link") { while(m_oLightReader.MoveToNextAttribute()) { if(m_oLightReader.GetName() == L"href") { std::wstring sRef = m_oLightReader.GetText(); // Если это css файл, то поведение аналогично тэгу style // Кроме функции получения стилей std::wstring sType = NSFile::GetFileExtention(sRef); if(sType == L"css") m_oStylesCalculator.AddStylesFromFile(m_sSrc + L"/" + sRef); } } m_oLightReader.MoveToElement(); } // тэг style содержит стили для styles.xml else if(sName == L"style") m_oStylesCalculator.AddStyles(U_TO_UTF8(content())); std::string sClass = ""; std::string sStyle = ""; std::string sId = ""; // Стиль по атрибуту while(m_oLightReader.MoveToNextAttribute()) { std::wstring sAName = m_oLightReader.GetName(); if(sAName == L"style") sStyle = m_oLightReader.GetTextA(); else if(sAName == L"class") sClass = m_oLightReader.GetTextA(); else if(sAName == L"id") sId = m_oLightReader.GetTextA(); } m_oLightReader.MoveToElement(); if(!sStyle.empty()) { std::string sSelector = ""; if(!sId.empty()) sSelector += "#" + sId + " "; if(!sClass.empty()) sSelector += "." + sClass + " "; sSelector += m_oLightReader.GetNameA(); std::vector sSubClass(sSelectors); sSubClass.push_back(sSelector); m_oStylesCalculator.AddStyle(sSubClass, sStyle); readStyle(sSubClass); } else readStyle(sSelectors); } } private: /* std::vector getStyle(std::vector sSelectors) { std::string sClass = ""; std::string sStyle = ""; std::string sId = ""; while(m_oLightReader.MoveToNextAttribute()) { std::wstring sAName = m_oLightReader.GetName(); if(sAName == L"style") sStyle = m_oLightReader.GetTextA(); else if(sAName == L"class") sClass = m_oLightReader.GetTextA(); else if(sAName == L"id") sId = m_oLightReader.GetTextA(); } m_oLightReader.MoveToElement(); std::vector sSubClass(sSelectors); if(!sStyle.empty()) { std::string sSelector = ""; if(!sId.empty()) sSelector += "#" + sId + " "; if(!sClass.empty()) sSelector += "." + sClass + " "; sSelector += m_oLightReader.GetNameA(); sSubClass.push_back(sSelector); NSCSS::CCompiledStyle oStyle = m_oStylesCalculator.GetCompiledStyle(sSubClass); m_oStylesXml += L""; m_oStylesXml += oStyle.GetStyleW(); m_oStylesXml += L""; } return sSubClass; } */ void readHead() { if(m_oLightReader.IsEmptyNode()) return; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { std::wstring sName = m_oLightReader.GetName(); // Базовый адрес if(sName == L"base") { while(m_oLightReader.MoveToNextAttribute()) if(m_oLightReader.GetName() == L"href") m_sBase = m_oLightReader.GetText(); m_oLightReader.MoveToElement(); } /* // Заголовок документа else if(sName == L"title") readTitle(); */ } } void neadLi(bool bNeedLi, int nLevelLi) { if(bNeedLi) { m_oDocXml += L""; } } void readBody(std::vector& sSelectors, std::wstring sRStyle, bool bBdo, bool bNeedP, bool bNeedLi, int nLevelLi) { // sSelectors = getStyle(sSelectors); if(m_oLightReader.IsEmptyNode()) return; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode2(nDeath)) { std::vector sSubClass; // = getStyle(sSelectors); std::wstring sName = m_oLightReader.GetName(); if(sName == L"#text") { std::wstring sText = m_oLightReader.GetText(); if(bBdo) std::reverse(sText.begin(), sText.end()); if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } m_oDocXml += L""; m_oDocXml += sRStyle; m_oDocXml += L""; m_oDocXml.WriteEncodeXmlString(sText); m_oDocXml += L""; if(bNeedP) m_oDocXml += L""; } // Ссылки else if(sName == L"a") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readLink(sSubClass, sRStyle, bBdo, bNeedLi, nLevelLi); if(bNeedP) m_oDocXml += L""; } // Абревиатура, реализована как сноски else if(sName == L"abbr") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readAbbr(sSubClass, sRStyle, bBdo); if(bNeedP) m_oDocXml += L""; } // Адрес else if(sName == L"address") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readBody(sSubClass, sRStyle, bBdo, false, bNeedLi, nLevelLi); if(bNeedP) m_oDocXml += L""; } // Статья // Боковой блок // Выделенная цитата // Контейнер else if(sName == L"article" || sName == L"aside" || sName == L"blockquote" || sName == L"div") readBody(sSubClass, sRStyle, bBdo, bNeedP, bNeedLi, nLevelLi); // Полужирный текст else if(sName == L"b") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readP(sSubClass, sRStyle + L"", bBdo); if(bNeedP) m_oDocXml += L""; } // Направление текста else if(sName == L"bdo") { std::wstring sDir = L""; while(m_oLightReader.MoveToNextAttribute()) if(m_oLightReader.GetName() == L"dir") sDir = m_oLightReader.GetText(); m_oLightReader.MoveToElement(); if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } if(sDir == L"ltr") readP(sSubClass, sRStyle, false); else if(sDir == L"rtl") readP(sSubClass, sRStyle, true); else readP(sSubClass, sRStyle, !bBdo); if(bNeedP) m_oDocXml += L""; } // Отмена направления текста else if(sName == L"bdi") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readP(sSubClass, sRStyle, false); if(bNeedP) m_oDocXml += L""; } // Перенос строки else if(sName == L"br") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } m_oDocXml += L""; if(bNeedP) m_oDocXml += L""; } // Кнопка // Абзац текста. Содержит фразовый контент else if(sName == L"button"|| sName == L"details" || sName == L"p") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readP(sSubClass, sRStyle, bBdo); if(bNeedP) m_oDocXml += L""; } // Цитата, обычно выделяется курсивом // Новый термин, обычно выделяется курсивом else if(sName == L"cite" || sName == L"dfn") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readP(sSubClass, sRStyle + L"", bBdo); if(bNeedP) m_oDocXml += L""; } // Код else if(sName == L"code") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readP(sSubClass, sRStyle + L"", bBdo); if(bNeedP) m_oDocXml += L""; } // Зачеркнутый текст else if(sName == L"del") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readP(sSubClass, sRStyle + L"", bBdo); if(bNeedP) m_oDocXml += L""; } // Заголовок else if(sName == L"h1" || sName == L"h2" || sName == L"h3" || sName == L"h4" || sName == L"h5" || sName == L"h6") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readBody(sSubClass, sRStyle, bBdo, false, bNeedLi, nLevelLi); if(bNeedP) m_oDocXml += L""; } // Горизонтальная линия else if(sName == L"hr") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } m_oDocXml += L""; if(bNeedP) m_oDocXml += L""; } // Картинки else if(sName == L"img" || sName == L"image") { if(bNeedP) { m_oDocXml += L""; neadLi(bNeedLi, nLevelLi); } readImage(); if(bNeedP) m_oDocXml += L""; } // Маркированный список else if(sName == L"ul") readUl(sSubClass, sRStyle, bBdo, bNeedP, nLevelLi); else readBody(sSubClass, sRStyle, bBdo, bNeedP, bNeedLi, nLevelLi); } } void readUl(std::vector& sSelectors, std::wstring sRStyle, bool bBdo, bool bNeedP, int nLevelLi) { if(m_oLightReader.IsEmptyNode()) return; int nDeath = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode(nDeath)) { if(m_oLightReader.GetName() != L"li") continue; readBody(sSelectors, sRStyle, bBdo, bNeedP, true, nLevelLi + 1); } } void readAbbr(std::vector& sSelectors, std::wstring sRStyle, bool bBdo) { std::wstring sNote = L""; while(m_oLightReader.MoveToNextAttribute()) if(m_oLightReader.GetName() == L"title") sNote = m_oLightReader.GetText(); m_oLightReader.MoveToElement(); readP(sSelectors, sRStyle, bBdo); m_oDocXml += L""; m_oNoteXml += L""; m_oNoteXml += sNote; m_oNoteXml += L""; } void readLink(std::vector& sSelectors, std::wstring sRStyle, bool bBdo, bool bNeedLi, int nLevelLi) { std::wstring sRef = L""; while(m_oLightReader.MoveToNextAttribute()) { if(m_oLightReader.GetName() == L"href") { sRef = m_oLightReader.GetText(); size_t nLen = (sRef.length() > 4 ? 4 : 0); // Ссылка на сайт if(sRef.substr(0, nLen) == L"http") { } // Ссылка на документ, который нужно обработать else { } } } m_oLightReader.MoveToElement(); if(sRef.empty()) return; // Пишем рельсы m_oDocXmlRels += L""; // Пишем в document.xml m_oDocXml += L""; readBody(sSelectors, sRStyle += L"", bBdo, false, bNeedLi, nLevelLi); m_oDocXml += L""; } void readImage() { while(m_oLightReader.MoveToNextAttribute()) { std::wstring sAName = m_oLightReader.GetName(); if(sAName == L"src" || sAName == L"href") { bool bRes = false; std::wstring sSrcM = m_oLightReader.GetText(); std::wstring sImageName = L""; std::wstring sImageId = std::to_wstring(m_nImageId); size_t nLen = (sSrcM.length() > 4 ? 4 : 0); // Картинка Base64 if(sSrcM.substr(0, nLen) == L"data") { size_t nBase = sSrcM.find(L"/", nLen) + 1; std::wstring sType = sSrcM.substr(nBase, sSrcM.find(L";", nBase) - nBase); sImageName = sImageId + L"." + sType; NSFile::CFileBinary oImageWriter; if(oImageWriter.CreateFileW(m_sDst + L"/word/media/" + sImageName)) { bRes = true; size_t nBase = sSrcM.find(L"base64", nLen) + 7; std::string sBase64 = m_oLightReader.GetTextA().substr(nBase); int nSrcLen = (int)sBase64.length(); int nDecodeLen = NSBase64::Base64DecodeGetRequiredLength(nSrcLen); BYTE* pImageData = new BYTE[nDecodeLen]; if (TRUE == NSBase64::Base64Decode(sBase64.c_str(), nSrcLen, pImageData, &nDecodeLen)) oImageWriter.WriteFile(pImageData, (DWORD)nDecodeLen); RELEASEARRAYOBJECTS(pImageData); oImageWriter.CloseFile(); } } // Картинка в сети else if(sSrcM.substr(0, nLen) == L"http" || !m_sBase.empty()) { sImageName = NSFile::GetFileName(sSrcM); CFileDownloader oDownloadImg(m_sBase + sSrcM, false); oDownloadImg.SetFilePath(m_sDst + L"/word/media/" + sImageName); bRes = oDownloadImg.DownloadSync(); } // Картинка по относительному пути else { size_t nSrcM = sSrcM.rfind(L"/") + 1; sImageName = sSrcM.substr(nSrcM); bRes = NSFile::CFileBinary::Copy(m_sSrc + L"/" + sSrcM, m_sDst + L"/word/media/" + sImageName); } if(bRes) { m_nImageId++; // Прописать рельсы m_oDocXmlRels += L""; // Получаем размеры картинки CBgraFrame oBgraFrame; oBgraFrame.OpenFile(m_sDst + L"/word/media/" + sImageName); int nHy = oBgraFrame.get_Height(); int nWx = oBgraFrame.get_Width(); if(nWx > nHy) { int nW = nWx * 9525; nW = (nW > 7000000 ? 7000000 : nW); nHy = (int)((double)nHy * (double)nW / (double)nWx); nWx = nW; } else { int nH = nHy * 9525; nH = (nH > 9000000 ? 9000000 : nH); int nW = (int)((double)nWx * (double)nH / (double)nHy); if(nW > 7000000) { nW = 7000000; nHy = (int)((double)nHy * (double)nW / (double)nWx); } else nHy = nH; nWx = nW; } // Пишем в document.xml m_oDocXml += L""; } } } m_oLightReader.MoveToElement(); } void readP(std::vector& sSelectors, std::wstring sRStyle, bool bBdo) { if(m_oLightReader.IsEmptyNode()) return; int nDepth = m_oLightReader.GetDepth(); while(m_oLightReader.ReadNextSiblingNode2(nDepth)) { std::vector sSubClass; // = getStyle(sSelectors); std::wstring sName = m_oLightReader.GetName(); if(sName == L"#text") { std::wstring sText = m_oLightReader.GetText(); if(bBdo) std::reverse(sText.begin(), sText.end()); m_oDocXml += L""; m_oDocXml += sRStyle; m_oDocXml += L""; m_oDocXml.WriteEncodeXmlString(sText); m_oDocXml += L""; } // Ссылки else if(sName == L"a") readLink(sSubClass, sRStyle, bBdo, false, -1); // Абревиатура, реализована как сноски else if(sName == L"abbr") readAbbr(sSubClass, sRStyle, bBdo); // Полужирный текст // Акцентированный текст else if(sName == L"b" || sName == L"strong") readP(sSubClass, sRStyle + L"", bBdo); // Направление текста else if(sName == L"bdo") { std::wstring sDir = L""; while(m_oLightReader.MoveToNextAttribute()) if(m_oLightReader.GetName() == L"dir") sDir = m_oLightReader.GetText(); m_oLightReader.MoveToElement(); if(sDir == L"ltr") readP(sSubClass, sRStyle, false); else if(sDir == L"rtl") readP(sSubClass, sRStyle, true); else readP(sSubClass, sRStyle, !bBdo); } // Отмена направления текста else if(sName == L"bdi") readP(sSubClass, sRStyle, false); // Увеличивает размер шрифта else if(sName == L"big") readP(sSubClass, sRStyle + L"", bBdo); // Перенос строки else if(sName == L"br") m_oDocXml += L""; // Цитата, обычно выделяется курсивом // Новый термин, обычно выделяется курсивом // Акцентированный текст // Курсивный текст // Переменная, обычно выделяется курсивом else if(sName == L"cite" || sName == L"dfn" || sName == L"em" || sName == L"i" || sName == L"var") readP(sSubClass, sRStyle + L"", bBdo); // Код // Моноширинный шрифт, например, Consolas // Результат скрипта else if(sName == L"code" || sName == L"kbd" || sName == L"samp") readP(sSubClass, sRStyle + L"", bBdo); // Зачеркнутый текст else if(sName == L"del") readP(sSubClass, sRStyle + L"", bBdo); // Ссылка // Объект для обработки else if(sName == L"iframe" || sName == L"object") { } // Картинки else if(sName == L"img" || sName == L"image") readImage(); // Метка // Скрипты не поддерживаются // Выводится информация с помощью скриптов else if(sName == L"label" || sName == L"noscript" || sName == L"output") readP(sSubClass, sRStyle, bBdo); // Выделенный текст, обычно выделяется желтым else if(sName == L"mark") readP(sSubClass, sRStyle + L"", bBdo); // Математическая формула else if(sName == L"math") { } // Цитата, выделенная кавычками, обычно выделяется курсивом else if(sName == L"q") { m_oDocXml += L"""; readP(sSubClass, sRStyle + L"", bBdo); m_oDocXml += L"""; } // Текст верхнего регистра else if(sName == L"rt" || sName == L"sup") readP(sSubClass, sRStyle + L"", bBdo); // Текст при отсутствии поддержки rt игнорируется // Скрипт игнорируется else if(sName == L"rp" || sName == L"script") continue; // Уменьшает размер шрифта else if(sName == L"small") readP(sSubClass, sRStyle + L"", bBdo); // Текст нижнего регистра else if(sName == L"sub") readP(sSubClass, sRStyle + L"", bBdo); // Векторная картинка else if(sName == L"svg") readSVG(); // Текст с границами else if(sName == L"textarea") { m_oDocXml += L""; readP(sSubClass, sRStyle, bBdo); } else readP(sSubClass, sRStyle, bBdo); } } void readSVG() { // Сохранить как .svg картинку NSStringUtils::CStringBuilder oSVG; bool bNeedXmlns = true; oSVG += L""; oSVG += m_oLightReader.GetInnerXml(); oSVG += L""; NSFile::CFileBinary oSVGWriter; std::wstring sImageId = std::to_wstring(m_nImageId++); if (oSVGWriter.CreateFileW(m_sDst + L"/word/media/" + sImageId + L".svg")) { oSVGWriter.WriteStringUTF8(oSVG.GetData()); oSVGWriter.CloseFile(); } // Прописать рельсы // Прописать в document.xml } std::wstring content() { std::wstring sRes = L""; if(m_oLightReader.IsEmptyNode()) return sRes; if(m_oLightReader.ReadNextSiblingNode2(m_oLightReader.GetDepth())) sRes = m_oLightReader.GetText(); return sRes; } }; CHtmlFile2::CHtmlFile2() { m_internal = new CHtmlFile2_Private(); } CHtmlFile2::~CHtmlFile2() { RELEASEOBJECT(m_internal); } bool CHtmlFile2::IsHtmlFile(const std::wstring& sFile) { m_internal->htmlXhtml(sFile); // Открывает файл на проверку if (!m_internal->m_oLightReader.FromFile(m_internal->m_sTmp + L"/res.xhtml")) return false; // Читаем html if(!m_internal->isHtml()) return false; return true; } void CHtmlFile2::SetTmpDirectory(const std::wstring& sFolder) { m_internal->m_sTmp = sFolder; } HRESULT CHtmlFile2::Open(const std::wstring& sSrc, const std::wstring& sDst, CHtmlParams* oParams) { if(!IsHtmlFile(sSrc)) return S_FALSE; m_internal->m_sSrc = NSSystemPath::GetDirectoryName(sSrc); m_internal->m_sDst = sDst; m_internal->CreateDocxEmpty(oParams); std::vector sStyle; m_internal->readStyle(sStyle); // Переходим в начало if(!m_internal->m_oLightReader.MoveToStart()) return S_FALSE; if(!m_internal->readSrc()) return S_FALSE; m_internal->write(); NSFile::CFileBinary::Remove(m_internal->m_sTmp + L"/res.xhtml"); return S_OK; } HRESULT CHtmlFile2::OpenBatch(const std::vector& sSrc, const std::wstring& sDst, CHtmlParams* oParams) { m_internal->m_sDst = sDst; m_internal->CreateDocxEmpty(oParams); for(std::wstring sS : sSrc) { #ifdef _DEBUG std::wcout << NSFile::GetFileName(sS) << std::endl; #endif m_internal->m_sSrc = NSSystemPath::GetDirectoryName(sS); if(!IsHtmlFile(sS)) return S_FALSE; std::vector sStyle; m_internal->readStyle(sStyle); // Переходим в начало if(!m_internal->m_oLightReader.MoveToStart()) return S_FALSE; if(!m_internal->readSrc()) return S_FALSE; NSFile::CFileBinary::Remove(m_internal->m_sTmp + L"/res.xhtml"); m_internal->m_oLightReader.Clear(); m_internal->m_oStylesCalculator.Clear(); m_internal->m_mStyles.clear(); m_internal->m_sBase = L""; } m_internal->write(); return S_OK; }