mirror of
https://github.com/ONLYOFFICE/core.git
synced 2026-02-10 18:05:41 +08:00
Modification of the html converter
This commit is contained in:
@ -511,7 +511,6 @@ namespace NSCSS
|
||||
m_mDefaultStyleData[L"i"] = new CElement(L"i", {{L"font-style", L"italic"}});
|
||||
m_mDefaultStyleData[L"code"] = new CElement(L"code", {{L"font-family", L"Courier New"}});
|
||||
m_mDefaultStyleData[L"kbd"] = new CElement(L"kbd", {{L"font-family", L"Courier New"},
|
||||
{L"font-size", L"20pt"},
|
||||
{L"font_weight", L"bold"}});
|
||||
m_mDefaultStyleData[L"s"] = new CElement(L"s", {{L"text-decoration", L"line-through"}});
|
||||
m_mDefaultStyleData[L"u"] = new CElement(L"u", {{L"text-decoration", L"underline"}});
|
||||
@ -520,7 +519,6 @@ namespace NSCSS
|
||||
m_mDefaultStyleData[L"sub"] = new CElement(L"sub", {{L"vertical-align", L"bottom"}});
|
||||
m_mDefaultStyleData[L"dd"] = new CElement(L"dd", {{L"margin-left", L"720tw"}});
|
||||
m_mDefaultStyleData[L"pre"] = new CElement(L"pre", {{L"font-family", L"Courier New"},
|
||||
{L"font-size", L"20pt"},
|
||||
{L"margin-top", L"0"},
|
||||
{L"margin-bottom", L"0"}});
|
||||
m_mDefaultStyleData[L"blockquote"] = new CElement(L"blockquote", {{L"margin", L"0px"}});
|
||||
|
||||
@ -142,7 +142,7 @@ HRESULT CEpubFile::Convert(const std::wstring& sInputFile, const std::wstring& s
|
||||
*/
|
||||
|
||||
CHtmlFile2 oFile;
|
||||
CHtmlParams oFileParams;
|
||||
HTML::THTMLParameters oFileParams;
|
||||
|
||||
oFileParams.SetAuthors (m_oBookInfo.GetCreators());
|
||||
oFileParams.SetGenres (m_oBookInfo.GetSubjects());
|
||||
@ -155,7 +155,7 @@ HRESULT CEpubFile::Convert(const std::wstring& sInputFile, const std::wstring& s
|
||||
|
||||
std::wstring sDocxFileTempDir = m_sTempDir + L"/tmp";
|
||||
NSDirectory::CreateDirectory(sDocxFileTempDir);
|
||||
oFile.SetTmpDirectory(sDocxFileTempDir);
|
||||
oFile.SetTempDirectory(sDocxFileTempDir);
|
||||
oFile.SetCoreDirectory(NSFile::GetDirectoryName(sContent));
|
||||
|
||||
std::vector<std::wstring> arFiles;
|
||||
@ -182,7 +182,7 @@ HRESULT CEpubFile::Convert(const std::wstring& sInputFile, const std::wstring& s
|
||||
sOutputDir = sOutputFile;
|
||||
|
||||
NSDirectory::CreateDirectory(sOutputDir);
|
||||
HRESULT hRes = oFile.OpenBatchHtml(arFiles, sOutputDir, &oFileParams);
|
||||
HRESULT hRes = oFile.ConvertHTML2OOXML(arFiles, sOutputDir, &oFileParams);
|
||||
if (bIsOutCompress && S_OK == hRes)
|
||||
hRes = oOfficeUtils.CompressFileOrDirectory(sOutputDir, sOutputFile);
|
||||
|
||||
|
||||
@ -2170,7 +2170,7 @@ HRESULT CFb2File::FromHtml(const std::wstring& sHtmlFile, const std::wstring& sD
|
||||
RELEASEARRAYOBJECTS(pData);
|
||||
|
||||
//XmlUtils::CXmlLiteReader oIndexHtml;
|
||||
std::wstring xhtml = htmlToXhtml(sContent, bNeedConvert);
|
||||
std::wstring xhtml = HTML::htmlToXhtml(sContent, bNeedConvert);
|
||||
|
||||
if (!m_internal->m_oLightReader.FromString(xhtml))
|
||||
return S_FALSE;
|
||||
|
||||
@ -162,6 +162,96 @@ const static std::map<std::wstring, HtmlTag> m_HTML_TAGS
|
||||
ADD_TAG(L"svg", SVG)
|
||||
};
|
||||
|
||||
bool HTML2XHTML(const std::wstring& wsFileName, XmlUtils::CXmlLiteReader& oLiteReader)
|
||||
{
|
||||
BYTE* pData;
|
||||
DWORD nLength;
|
||||
if (!NSFile::CFileBinary::ReadAllBytes(wsFileName, &pData, nLength))
|
||||
return false;
|
||||
|
||||
std::string sFileContent = XmlUtils::GetUtf8FromFileContent(pData, nLength);
|
||||
|
||||
bool bNeedConvert = true;
|
||||
if (nLength > 4)
|
||||
{
|
||||
if (pData[0] == 0xFF && pData[1] == 0xFE && !(pData[2] == 0x00 && pData[3] == 0x00))
|
||||
bNeedConvert = false;
|
||||
if (pData[0] == 0xFE && pData[1] == 0xFF)
|
||||
bNeedConvert = false;
|
||||
|
||||
if (pData[0] == 0xFF && pData[1] == 0xFE && pData[2] == 0x00 && pData[3] == 0x00)
|
||||
bNeedConvert = false;
|
||||
if (pData[0] == 0 && pData[1] == 0 && pData[2] == 0xFE && pData[3] == 0xFF)
|
||||
bNeedConvert = false;
|
||||
}
|
||||
|
||||
RELEASEARRAYOBJECTS(pData);
|
||||
|
||||
size_t nFind = sFileContent.find("version=\"");
|
||||
if(nFind != std::string::npos)
|
||||
{
|
||||
nFind += 9;
|
||||
size_t nFindEnd = sFileContent.find("\"", nFind);
|
||||
if(nFindEnd != std::string::npos)
|
||||
sFileContent.replace(nFind, nFindEnd - nFind, "1.0");
|
||||
}
|
||||
|
||||
const std::wstring sRes{htmlToXhtml(sFileContent, bNeedConvert)};
|
||||
|
||||
#ifdef SAVE_NORMALIZED_HTML
|
||||
#if 1 == SAVE_NORMALIZED_HTML
|
||||
NSFile::CFileBinary oWriter;
|
||||
if (oWriter.CreateFileW(L"res.html"))
|
||||
{
|
||||
oWriter.WriteStringUTF8(sRes);
|
||||
oWriter.CloseFile();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return oLiteReader.FromString(sRes);
|
||||
}
|
||||
|
||||
bool MHT2XHTML(const std::wstring& wsFileName, XmlUtils::CXmlLiteReader& oLiteReader)
|
||||
{
|
||||
NSFile::CFileBinary file;
|
||||
if (!file.OpenFile(wsFileName))
|
||||
return false;
|
||||
|
||||
unsigned char* buffer = new unsigned char[4096];
|
||||
if (!buffer)
|
||||
{
|
||||
file.CloseFile();
|
||||
return false;
|
||||
}
|
||||
|
||||
DWORD dwReadBytes = 0;
|
||||
file.ReadFile(buffer, 4096, dwReadBytes);
|
||||
file.CloseFile();
|
||||
std::string xml_string = XmlUtils::GetUtf8FromFileContent(buffer, dwReadBytes);
|
||||
|
||||
const std::string sContentType = NSStringFinder::FindProperty(xml_string, "content-type", ":", ";");
|
||||
bool bRes = false;
|
||||
|
||||
if(NSStringFinder::Equals(sContentType, "multipart/related"))
|
||||
{
|
||||
BYTE* pData;
|
||||
DWORD nLength;
|
||||
if (!NSFile::CFileBinary::ReadAllBytes(wsFileName, &pData, nLength))
|
||||
return false;
|
||||
|
||||
std::string sFileContent = XmlUtils::GetUtf8FromFileContent(pData, nLength);
|
||||
RELEASEARRAYOBJECTS(pData);
|
||||
const std::wstring sRes = mhtToXhtml(sFileContent);
|
||||
bRes = oLiteReader.FromString(sRes);
|
||||
}
|
||||
else
|
||||
bRes = HTML2XHTML(wsFileName, oLiteReader);
|
||||
|
||||
RELEASEARRAYOBJECTS(buffer);
|
||||
return bRes;
|
||||
}
|
||||
|
||||
inline std::wstring GetArgumentValue(XmlUtils::CXmlLiteReader& oLiteReader, const std::wstring& wsArgumentName, const std::wstring& wsDefaultValue = L"");
|
||||
inline bool CheckArgumentMath(const std::wstring& wsNodeName, const std::wstring& wsStyleName);
|
||||
inline HtmlTag GetHtmlTag(const std::wstring& wsStrTag);
|
||||
@ -190,17 +280,42 @@ void CHTMLReader::SetCoreDirectory(const std::wstring& wsPath)
|
||||
|
||||
HRESULT CHTMLReader::ConvertHTML2OOXML(const std::wstring& wsPath, const std::wstring& wsDirectory, THTMLParameters* pParameters)
|
||||
{
|
||||
InitOOXMLTags(pParameters);
|
||||
|
||||
m_wsDstDirectory = wsDirectory;
|
||||
|
||||
return ConvertHTML(wsPath, wsDirectory);
|
||||
return InitAndConvert2OOXML({wsPath}, wsDirectory, HTML2XHTML, pParameters);
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::ConvertHTML2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters)
|
||||
{
|
||||
InitMDTags();
|
||||
return ConvertHTML(wsPath, wsFinalFile);
|
||||
return InitAndConvert2Markdown({wsPath}, wsFinalFile, HTML2XHTML, pParameters);
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::ConvertHTML2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, THTMLParameters* pParameters)
|
||||
{
|
||||
return InitAndConvert2OOXML(arPaths, wsDirectory, HTML2XHTML, pParameters);
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters)
|
||||
{
|
||||
return InitAndConvert2Markdown(arPaths, wsFinalFile, HTML2XHTML, pParameters);
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::ConvertMHT2OOXML(const std::wstring& wsPath, const std::wstring& wsDirectory, THTMLParameters* pParameters)
|
||||
{
|
||||
return InitAndConvert2OOXML({wsPath}, wsDirectory, MHT2XHTML, pParameters);
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::ConvertMHT2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters)
|
||||
{
|
||||
return InitAndConvert2Markdown({wsPath}, wsFinalFile, MHT2XHTML, pParameters);
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::ConvertMHT2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, THTMLParameters* pParameters)
|
||||
{
|
||||
return InitAndConvert2OOXML(arPaths, wsDirectory, MHT2XHTML, pParameters);
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::ConvertMHT2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters)
|
||||
{
|
||||
return InitAndConvert2Markdown(arPaths, wsFinalFile, MHT2XHTML, pParameters);
|
||||
}
|
||||
|
||||
void CHTMLReader::Clear()
|
||||
@ -221,14 +336,11 @@ void CHTMLReader::InitOOXMLTags(THTMLParameters* pParametrs)
|
||||
{
|
||||
Clear();
|
||||
|
||||
COOXMLWriter *pWriter = new COOXMLWriter();
|
||||
COOXMLWriter *pWriter = new COOXMLWriter(pParametrs, &m_oCSSCalculator);
|
||||
|
||||
if (nullptr == pWriter)
|
||||
return;
|
||||
|
||||
pWriter->SetCSSCalculator(&m_oCSSCalculator);
|
||||
pWriter->SetHTMLParameters(pParametrs);
|
||||
|
||||
pWriter->SetSrcDirectory (m_wsSrcDirectory);
|
||||
pWriter->SetDstDirectory (m_wsDstDirectory);
|
||||
pWriter->SetTempDirectory(m_wsTempDirectory);
|
||||
@ -270,12 +382,12 @@ void CHTMLReader::InitOOXMLTags(THTMLParameters* pParametrs)
|
||||
m_mTags[HTML_TAG(BDO)] = oIgnoredTag;
|
||||
m_mTags[HTML_TAG(SPAN)] = oIgnoredTag;
|
||||
m_mTags[HTML_TAG(H1)] = oIgnoredTag;
|
||||
m_mTags[HTML_TAG(CODE)] = oIgnoredTag;
|
||||
m_mTags[HTML_TAG(CODE)] = oIgnoredTag;
|
||||
}
|
||||
|
||||
void CHTMLReader::InitMDTags()
|
||||
void CHTMLReader::InitMDTags(TMarkdownParameters* pParametrs)
|
||||
{
|
||||
CMDWriter *pWriter = new CMDWriter({});
|
||||
CMDWriter *pWriter = new CMDWriter((nullptr != pParametrs) ? *pParametrs : TMarkdownParameters{});
|
||||
|
||||
if (nullptr == pWriter)
|
||||
return;
|
||||
@ -326,65 +438,56 @@ bool CHTMLReader::IsHTML()
|
||||
return ((m_oLightReader.MoveToStart() && m_oLightReader.ReadNextNode()) ? m_oLightReader.GetName() == L"html" : false);
|
||||
}
|
||||
|
||||
bool CHTMLReader::HTML2XHTML(const std::wstring& wsFileName)
|
||||
HRESULT CHTMLReader::InitAndConvert2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, Convert_Func Convertation, THTMLParameters* pParameters)
|
||||
{
|
||||
BYTE* pData;
|
||||
DWORD nLength;
|
||||
if (!NSFile::CFileBinary::ReadAllBytes(wsFileName, &pData, nLength))
|
||||
return false;
|
||||
InitOOXMLTags(pParameters);
|
||||
m_wsDstDirectory = wsDirectory;
|
||||
|
||||
std::string sFileContent = XmlUtils::GetUtf8FromFileContent(pData, nLength);
|
||||
|
||||
bool bNeedConvert = true;
|
||||
if (nLength > 4)
|
||||
{
|
||||
if (pData[0] == 0xFF && pData[1] == 0xFE && !(pData[2] == 0x00 && pData[3] == 0x00))
|
||||
bNeedConvert = false;
|
||||
if (pData[0] == 0xFE && pData[1] == 0xFF)
|
||||
bNeedConvert = false;
|
||||
|
||||
if (pData[0] == 0xFF && pData[1] == 0xFE && pData[2] == 0x00 && pData[3] == 0x00)
|
||||
bNeedConvert = false;
|
||||
if (pData[0] == 0 && pData[1] == 0 && pData[2] == 0xFE && pData[3] == 0xFF)
|
||||
bNeedConvert = false;
|
||||
}
|
||||
|
||||
RELEASEARRAYOBJECTS(pData);
|
||||
|
||||
size_t nFind = sFileContent.find("version=\"");
|
||||
if(nFind != std::string::npos)
|
||||
{
|
||||
nFind += 9;
|
||||
size_t nFindEnd = sFileContent.find("\"", nFind);
|
||||
if(nFindEnd != std::string::npos)
|
||||
sFileContent.replace(nFind, nFindEnd - nFind, "1.0");
|
||||
}
|
||||
|
||||
const std::wstring sRes{htmlToXhtml(sFileContent, bNeedConvert)};
|
||||
|
||||
#ifdef SAVE_NORMALIZED_HTML
|
||||
#if 1 == SAVE_NORMALIZED_HTML
|
||||
NSFile::CFileBinary oWriter;
|
||||
if (oWriter.CreateFileW(m_sTmp + L"/res.html"))
|
||||
{
|
||||
oWriter.WriteStringUTF8(sRes);
|
||||
oWriter.CloseFile();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return m_oLightReader.FromString(sRes);
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::ConvertHTML(const std::wstring& wsPath, const std::wstring& wsDirectory)
|
||||
{
|
||||
if (nullptr == m_pWriter || !HTML2XHTML(wsPath) || !m_oLightReader.IsValid() || !IsHTML())
|
||||
return S_FALSE;
|
||||
HRESULT lResult{S_FALSE};
|
||||
|
||||
m_pWriter->Begin(wsDirectory);
|
||||
|
||||
for (const std::wstring& wsPath : arPaths)
|
||||
{
|
||||
if (Convert(wsPath, Convertation))
|
||||
{
|
||||
lResult = S_OK;
|
||||
|
||||
if (nullptr != pParameters && pParameters->m_bNeedPageBreakBefore)
|
||||
m_pWriter->PageBreak();
|
||||
}
|
||||
}
|
||||
|
||||
m_pWriter->End(wsDirectory);
|
||||
|
||||
return lResult;
|
||||
}
|
||||
|
||||
HRESULT CHTMLReader::InitAndConvert2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, Convert_Func Convertation, TMarkdownParameters* pParameters)
|
||||
{
|
||||
InitMDTags(pParameters);
|
||||
|
||||
HRESULT lResult{S_FALSE};
|
||||
|
||||
m_pWriter->Begin(L"");
|
||||
|
||||
for (const std::wstring& wsPath : arPaths)
|
||||
{
|
||||
if (Convert(wsPath, Convertation))
|
||||
lResult = S_OK;
|
||||
}
|
||||
|
||||
m_pWriter->End(wsFinalFile);
|
||||
|
||||
return lResult;
|
||||
}
|
||||
|
||||
bool CHTMLReader::Convert(const std::wstring& wsPath, Convert_Func Convertation)
|
||||
{
|
||||
if (nullptr == m_pWriter || !Convertation(wsPath, m_oLightReader) || !m_oLightReader.IsValid() || !IsHTML())
|
||||
return false;
|
||||
|
||||
m_wsSrcDirectory = NSSystemPath::GetDirectoryName(wsPath);
|
||||
// m_sDst = sDst;
|
||||
|
||||
m_oLightReader.MoveToStart();
|
||||
m_oLightReader.ReadNextNode();
|
||||
@ -394,13 +497,9 @@ HRESULT CHTMLReader::ConvertHTML(const std::wstring& wsPath, const std::wstring&
|
||||
if(!m_oLightReader.MoveToStart())
|
||||
return S_FALSE;
|
||||
|
||||
// if(oParams && oParams->m_bNeedPageBreakBefore)
|
||||
// m_internal->PageBreakBefore();
|
||||
|
||||
ReadDocument();
|
||||
|
||||
m_pWriter->End(wsDirectory);
|
||||
return S_OK;
|
||||
return true;
|
||||
}
|
||||
|
||||
void CHTMLReader::ReadStyle()
|
||||
@ -438,16 +537,16 @@ void CHTMLReader::ReadStyle()
|
||||
|
||||
void CHTMLReader::ReadStyle2()
|
||||
{
|
||||
std::wstring sName = m_oLightReader.GetName();
|
||||
const std::wstring wsName = m_oLightReader.GetName();
|
||||
// Стиль по ссылке
|
||||
if(sName == L"link")
|
||||
if(wsName == L"link")
|
||||
{
|
||||
while(m_oLightReader.MoveToNextAttribute())
|
||||
ReadStyleFromNetwork();
|
||||
m_oLightReader.MoveToElement();
|
||||
}
|
||||
// тэг style содержит стили для styles.xml
|
||||
else if(sName == L"style")
|
||||
else if(wsName == L"style")
|
||||
m_oCSSCalculator.AddStyles(m_oLightReader.GetText2());
|
||||
|
||||
const int nDeath = m_oLightReader.GetDepth();
|
||||
@ -492,10 +591,10 @@ void CHTMLReader::ReadDocument()
|
||||
int nDeath = m_oLightReader.GetDepth();
|
||||
while(m_oLightReader.ReadNextSiblingNode(nDeath))
|
||||
{
|
||||
std::wstring sName = m_oLightReader.GetName();
|
||||
if(sName == L"head")
|
||||
const std::wstring wsName = m_oLightReader.GetName();
|
||||
if(wsName == L"head")
|
||||
ReadHead();
|
||||
else if(sName == L"body")
|
||||
else if(wsName == L"body")
|
||||
ReadBody();
|
||||
}
|
||||
}
|
||||
@ -524,12 +623,11 @@ void CHTMLReader::ReadBody()
|
||||
|
||||
GetSubClass(arSelectors);
|
||||
|
||||
/*
|
||||
if (!sSelectors.back().m_mAttributes.empty())
|
||||
if (!arSelectors.back().m_mAttributes.empty())
|
||||
{
|
||||
std::map<std::wstring, std::wstring>::iterator itFound = sSelectors.back().m_mAttributes.find(L"bgcolor");
|
||||
std::map<std::wstring, std::wstring>::iterator itFound = arSelectors.back().m_mAttributes.find(L"bgcolor");
|
||||
|
||||
if (sSelectors.back().m_mAttributes.end() != itFound)
|
||||
if (arSelectors.back().m_mAttributes.end() != itFound)
|
||||
{
|
||||
NSCSS::NSProperties::CColor oColor;
|
||||
oColor.SetValue(itFound->second);
|
||||
@ -539,15 +637,14 @@ void CHTMLReader::ReadBody()
|
||||
const std::wstring wsHEXColor{oColor.ToHEX()};
|
||||
|
||||
if (!wsHEXColor.empty())
|
||||
m_oDocXml.WriteString(L"<w:background w:color=\"" + wsHEXColor + L"\"/>");
|
||||
m_pWriter->GetCurrentDocument()->WriteString(L"<w:background w:color=\"" + wsHEXColor + L"\"/>");
|
||||
|
||||
sSelectors.back().m_mAttributes.erase(itFound);
|
||||
arSelectors.back().m_mAttributes.erase(itFound);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_oLightReader.MoveToElement();
|
||||
*/
|
||||
|
||||
ReadStream(arSelectors);
|
||||
}
|
||||
@ -557,42 +654,24 @@ bool CHTMLReader::ReadStream(std::vector<NSCSS::CNode>& arSelectors, bool bInser
|
||||
if (nullptr == m_pWriter)
|
||||
return false;
|
||||
|
||||
const int nDepth{m_oLightReader.GetDepth()};
|
||||
bool bResult = false;
|
||||
XmlUtils::XmlNodeType eNodeType = XmlUtils::XmlNodeType_EndElement;
|
||||
bool bResult{false};
|
||||
|
||||
while (m_oLightReader.Read(eNodeType) && m_oLightReader.GetDepth() >= nDepth && XmlUtils::XmlNodeType_EndElement != eNodeType)
|
||||
const int nDeath = m_oLightReader.GetDepth();
|
||||
if(m_oLightReader.IsEmptyNode() || !m_oLightReader.ReadNextSiblingNode2(nDeath))
|
||||
{
|
||||
if (eNodeType == XmlUtils::XmlNodeType_Text ||
|
||||
eNodeType == XmlUtils::XmlNodeType_Whitespace ||
|
||||
eNodeType == XmlUtils::XmlNodeType_SIGNIFICANT_WHITESPACE ||
|
||||
eNodeType == XmlUtils::XmlNodeType_CDATA)
|
||||
{
|
||||
const char* pValue = m_oLightReader.GetTextChar();
|
||||
if (!bInsertEmptyP)
|
||||
return false;
|
||||
|
||||
if('\0' != pValue[0])
|
||||
{
|
||||
std::wstring wsText;
|
||||
NSFile::CUtf8Converter::GetUnicodeStringFromUTF8((BYTE*)pValue, (LONG)strlen(pValue), wsText);
|
||||
|
||||
if (wsText.empty())
|
||||
continue;
|
||||
|
||||
arSelectors.push_back(NSCSS::CNode{L"#text", L"", L""});
|
||||
m_oCSSCalculator.CalculateCompiledStyle(arSelectors);
|
||||
|
||||
bResult = m_pWriter->WriteText(wsText, arSelectors);
|
||||
|
||||
arSelectors.pop_back();
|
||||
}
|
||||
}
|
||||
else if (eNodeType == XmlUtils::XmlNodeType_Element)
|
||||
{
|
||||
if (ReadInside(arSelectors))
|
||||
bResult = true;
|
||||
}
|
||||
m_pWriter->WriteEmptyParagraph();
|
||||
return true;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
if (ReadInside(arSelectors))
|
||||
bResult = true;
|
||||
} while(m_oLightReader.ReadNextSiblingNode2(nDeath));
|
||||
|
||||
if (!bResult && bInsertEmptyP)
|
||||
m_pWriter->WriteEmptyParagraph();
|
||||
|
||||
@ -603,10 +682,10 @@ bool CHTMLReader::ReadInside(std::vector<NSCSS::CNode>& arSelectors)
|
||||
{
|
||||
const std::wstring wsName{m_oLightReader.GetName()};
|
||||
|
||||
//TODO:: обработать все варианты return'а
|
||||
if(wsName == L"#text")
|
||||
return ReadText(arSelectors);
|
||||
|
||||
//TODO:: обработать все варианты return'а
|
||||
if (UnreadableNode(wsName) || TagIsUnprocessed(wsName))
|
||||
return false;
|
||||
|
||||
@ -621,7 +700,7 @@ bool CHTMLReader::ReadInside(std::vector<NSCSS::CNode>& arSelectors)
|
||||
case HTML_TAG(A):
|
||||
case HTML_TAG(AREA):
|
||||
{
|
||||
bResult = ReadAnchor(arSelectors);
|
||||
bResult = ReadDefaultTag(HTML_TAG(A), arSelectors);
|
||||
break;
|
||||
}
|
||||
case HTML_TAG(ABBR):
|
||||
@ -892,15 +971,6 @@ bool CHTMLReader::ReadInside(std::vector<NSCSS::CNode>& arSelectors)
|
||||
}
|
||||
}
|
||||
|
||||
// if (HTML_TAG(DIV) != eHtmlTag && HTML_TAG(ASIDE) != eHtmlTag)
|
||||
// {
|
||||
// if (bResult)
|
||||
// m_oState.m_eLastElement = eHtmlTag;
|
||||
|
||||
// m_oState.m_bBanUpdatePageData = true;
|
||||
// }
|
||||
|
||||
// readNote(oXml, sSelectors, sNote);
|
||||
arSelectors.pop_back();
|
||||
return bResult;
|
||||
}
|
||||
@ -919,25 +989,12 @@ bool CHTMLReader::ReadText(std::vector<NSCSS::CNode>& arSelectors)
|
||||
return bResult;
|
||||
}
|
||||
|
||||
bool CHTMLReader::ReadAnchor(std::vector<NSCSS::CNode>& arSelectors)
|
||||
{
|
||||
if (nullptr == m_pWriter || !m_mTags[HTML_TAG(A)]->Open(arSelectors))
|
||||
return false;
|
||||
|
||||
if (!ReadStream(arSelectors))
|
||||
m_pWriter->WriteEmptyParagraph(true);
|
||||
|
||||
m_mTags[HTML_TAG(A)]->Close(arSelectors);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CHTMLReader::ReadSVG(const std::vector<NSCSS::CNode>& arSelectors)
|
||||
{
|
||||
if (!m_mTags[HTML_TAG(IMAGE)]->Open(arSelectors, m_oLightReader.GetOuterXml()))
|
||||
if (!m_mTags[HTML_TAG(IMG)]->Open(arSelectors, m_oLightReader.GetOuterXml()))
|
||||
return false;
|
||||
|
||||
m_mTags[HTML_TAG(IMAGE)]->Close(arSelectors);
|
||||
m_mTags[HTML_TAG(IMG)]->Close(arSelectors);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -37,20 +37,29 @@ public:
|
||||
HRESULT ConvertHTML2OOXML (const std::wstring& wsPath, const std::wstring& wsDirectory, THTMLParameters* pParameters = nullptr);
|
||||
HRESULT ConvertHTML2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters = nullptr);
|
||||
|
||||
HRESULT ConvertMHT2OOXML (const std::wstring& sPath, const std::wstring& sDirectory, THTMLParameters* pParameters = nullptr);
|
||||
HRESULT ConvertMHT2Markdown (const std::wstring& sPath, const std::wstring& sDirectory, TMarkdownParameters* pParameters = nullptr);
|
||||
HRESULT ConvertHTML2OOXML (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, THTMLParameters* pParameters = nullptr);
|
||||
HRESULT ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters = nullptr);
|
||||
|
||||
HRESULT ConvertMHT2OOXML (const std::wstring& wsPath, const std::wstring& wsDirectory, THTMLParameters* pParameters = nullptr);
|
||||
HRESULT ConvertMHT2Markdown (const std::wstring& wsPath, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters = nullptr);
|
||||
|
||||
HRESULT ConvertMHT2OOXML (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, THTMLParameters* pParameters = nullptr);
|
||||
HRESULT ConvertMHT2Markdown (const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters = nullptr);
|
||||
|
||||
NSCSS::CCssCalculator* GetCSSCalculator();
|
||||
private:
|
||||
void Clear();
|
||||
void InitOOXMLTags(THTMLParameters* pParametrs = nullptr);
|
||||
void InitMDTags();
|
||||
void InitMDTags(TMarkdownParameters* pParametrs = nullptr);
|
||||
|
||||
bool IsHTML();
|
||||
|
||||
bool HTML2XHTML(const std::wstring& wsFileName);
|
||||
typedef std::function<bool(const std::wstring&, XmlUtils::CXmlLiteReader&)> Convert_Func;
|
||||
|
||||
HRESULT ConvertHTML(const std::wstring& wsPath, const std::wstring& wsDirectory);
|
||||
HRESULT InitAndConvert2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, Convert_Func Convertation, THTMLParameters* pParameters = nullptr);
|
||||
HRESULT InitAndConvert2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, Convert_Func Convertation, TMarkdownParameters* pParameters = nullptr);
|
||||
|
||||
bool Convert(const std::wstring& wsPath, Convert_Func Convertation);
|
||||
|
||||
void ReadStyle();
|
||||
void ReadStyle2();
|
||||
@ -65,7 +74,6 @@ private:
|
||||
|
||||
bool ReadText(std::vector<NSCSS::CNode>& arSelectors);
|
||||
|
||||
bool ReadAnchor(std::vector<NSCSS::CNode>& arSelectors);
|
||||
bool ReadSVG(const std::vector<NSCSS::CNode>& arSelectors);
|
||||
bool ReadEmptyTag(UINT unTag, const std::vector<NSCSS::CNode>& arSelectors);
|
||||
bool ReadDefaultTag(UINT unTag, std::vector<NSCSS::CNode>& arSelectors);
|
||||
|
||||
@ -49,34 +49,10 @@ bool CAnchor<COOXMLWriter>::Open(const std::vector<NSCSS::CNode>& arSelectors, c
|
||||
bCross = true;
|
||||
|
||||
if (arSelectors.back().GetAttributeValue(L"name", wsName))
|
||||
m_pWriter->WriteBookmark(wsName);
|
||||
m_pWriter->WriteEmptyBookmark(wsName);
|
||||
|
||||
arSelectors.back().GetAttributeValue(L"alt", wsAlt);
|
||||
|
||||
if (!m_pWriter->OpenP())
|
||||
m_pWriter->CloseR();
|
||||
else
|
||||
m_pWriter->WritePPr(arSelectors);
|
||||
|
||||
if (bCross)
|
||||
m_pWriter->OpenCrossHyperlink(wsRef, arSelectors);
|
||||
else
|
||||
{
|
||||
std::wstring wsTooltip(wsRef);
|
||||
arSelectors.back().GetAttributeValue(L"title", wsTooltip);
|
||||
|
||||
m_pWriter->OpenExternalHyperlink(wsRef, wsTooltip, arSelectors);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CAnchor<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors)
|
||||
{
|
||||
if (!ValidWriter())
|
||||
return;
|
||||
|
||||
bool bCross = false;
|
||||
std::wstring wsFootnote;
|
||||
|
||||
if (arSelectors.back().m_wsStyle.find(L"mso-footnote-id") != std::wstring::npos)
|
||||
@ -87,23 +63,32 @@ void CAnchor<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors)
|
||||
wsFootnote = L"href";
|
||||
}
|
||||
|
||||
std::wstring wsRef;
|
||||
|
||||
if (arSelectors.back().GetAttributeValue(L"href", wsRef))
|
||||
bool bFootnote = false;
|
||||
if (arSelectors.size() > 1)
|
||||
{
|
||||
if(wsRef.find('#') != std::wstring::npos)
|
||||
bCross = true;
|
||||
const NSCSS::CNode& oNode = arSelectors[arSelectors.size() - 2];
|
||||
bFootnote = oNode.m_wsName == L"p" && oNode.m_wsClass == L"MsoFootnoteText";
|
||||
}
|
||||
|
||||
if (bCross)
|
||||
{
|
||||
if (wsFootnote == L"href")
|
||||
wsFootnote = wsRef.substr(wsRef.find('#') + 1);
|
||||
|
||||
m_pWriter->CloseCrossHyperlink(arSelectors, wsFootnote, wsRef);
|
||||
}
|
||||
m_pWriter->SetHyperlinkData(wsRef, L"", true, wsFootnote, bFootnote);
|
||||
else
|
||||
m_pWriter->CloseExternalHyperlink();
|
||||
{
|
||||
std::wstring wsTooltip(wsRef);
|
||||
arSelectors.back().GetAttributeValue(L"title", wsTooltip);
|
||||
|
||||
m_pWriter->SetHyperlinkData(wsRef, wsTooltip, false, wsFootnote, bFootnote);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CAnchor<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors)
|
||||
{
|
||||
if (!ValidWriter())
|
||||
return;
|
||||
|
||||
m_pWriter->ClearHyperlinkData();
|
||||
}
|
||||
|
||||
CAbbr<COOXMLWriter>::CAbbr(COOXMLWriter* pWriter)
|
||||
@ -208,6 +193,7 @@ void CDivision<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors
|
||||
m_pWriter->RollBackState();
|
||||
}
|
||||
|
||||
m_pWriter->CloseP();
|
||||
m_arFootnoteIDs.pop();
|
||||
}
|
||||
|
||||
@ -330,6 +316,12 @@ bool CImage<COOXMLWriter>::Open(const std::vector<NSCSS::CNode>& arSelectors, co
|
||||
((!wsBasePath.empty() && wsBasePath.length() > 4 && wsBasePath.substr(0, 4) == L"http") ||
|
||||
(wsSrc.length() > 4 && wsSrc.substr(0, 4) == L"http")))
|
||||
{
|
||||
if (!wsExtention.empty() && NotValidExtension(wsExtention))
|
||||
{
|
||||
m_pWriter->WriteAlternativeImage(wsAlt, wsSrc, oImageData);
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::wstring wsDst = wsImagePath + L'.' + ((!wsExtention.empty()) ? wsExtention : L"png");
|
||||
|
||||
// Проверка gc_allowNetworkRequest предполагается в kernel_network
|
||||
@ -404,11 +396,10 @@ bool CImage<COOXMLWriter>::Open(const std::vector<NSCSS::CNode>& arSelectors, co
|
||||
m_pWriter->WriteAlternativeImage(wsAlt, wsSrc, oImageData);
|
||||
else
|
||||
{
|
||||
m_arrImages.push_back(wsSrc);
|
||||
|
||||
m_pWriter->WritePPr(arSelectors);
|
||||
|
||||
const std::wstring wsImageID{std::to_wstring(m_arrImages.size())};
|
||||
m_arrImages.push_back(wsSrc);
|
||||
|
||||
if (nImageId < 0)
|
||||
{
|
||||
@ -1256,6 +1247,7 @@ void CTable<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors)
|
||||
return;
|
||||
|
||||
m_pWriter->GetCurrentDocument()->WriteNodeEnd(L"w:tbl");
|
||||
m_pWriter->WriteEmptyParagraph(true);
|
||||
}
|
||||
|
||||
CTableRow<COOXMLWriter>::CTableRow(COOXMLWriter* pWriter)
|
||||
|
||||
@ -19,6 +19,8 @@ public:
|
||||
|
||||
virtual void WriteEmptyParagraph(bool bVahish = false, bool bInP = false) = 0;
|
||||
|
||||
virtual void PageBreak() = 0;
|
||||
|
||||
virtual void BeginBlock() = 0;
|
||||
virtual void EndBlock(bool bAddBlock) = 0;
|
||||
|
||||
|
||||
@ -95,8 +95,10 @@ bool CMDWriter::WriteText(std::wstring wsText, const std::vector<NSCSS::CNode>&
|
||||
}
|
||||
|
||||
void CMDWriter::WriteEmptyParagraph(bool bVahish, bool bInP)
|
||||
{
|
||||
}
|
||||
{}
|
||||
|
||||
void CMDWriter::PageBreak()
|
||||
{}
|
||||
|
||||
void CMDWriter::BeginBlock()
|
||||
{
|
||||
|
||||
@ -43,6 +43,8 @@ public:
|
||||
|
||||
void WriteEmptyParagraph(bool bVahish = false, bool bInP = false) override;
|
||||
|
||||
void PageBreak() override;
|
||||
|
||||
void BeginBlock() override;
|
||||
void EndBlock(bool bAddBlock) override;
|
||||
|
||||
|
||||
@ -40,11 +40,11 @@ inline UINT GetFontSizeLevel(UINT unFontSize);
|
||||
inline UINT GetFontSizeByLevel(UINT unLevel);
|
||||
inline void ReplaceSpaces(std::wstring& wsValue);
|
||||
|
||||
COOXMLWriter::COOXMLWriter()
|
||||
COOXMLWriter::COOXMLWriter(THTMLParameters* pHTMLParameters, NSCSS::CCssCalculator* pCSSCalculator)
|
||||
: m_pDstPath(nullptr), m_pTempDir(nullptr), m_pSrcPath(nullptr),
|
||||
m_pBasePath(nullptr), m_pCorePath(nullptr), m_pHTMLParameters(nullptr),
|
||||
m_nFootnoteId(1), m_nHyperlinkId(1), m_nListId(1), m_nElementId(1),
|
||||
m_bBanUpdatePageData(false), m_bWasDivs(false), m_pFonts(nullptr)
|
||||
m_pBasePath(nullptr), m_pCorePath(nullptr), m_pStylesCalculator(pCSSCalculator),
|
||||
m_pHTMLParameters(pHTMLParameters), m_nFootnoteId(1), m_nHyperlinkId(1), m_nListId(1),
|
||||
m_nElementId(1), m_bBanUpdatePageData(false), m_bWasDivs(false), m_pFonts(nullptr)
|
||||
{
|
||||
m_oPageData.SetWidth (DEFAULT_PAGE_WIDTH, NSCSS::UnitMeasure::Twips, 0, true);
|
||||
m_oPageData.SetHeight(DEFAULT_PAGE_HEIGHT, NSCSS::UnitMeasure::Twips, 0, true);
|
||||
@ -56,16 +56,6 @@ COOXMLWriter::COOXMLWriter()
|
||||
m_arStates.top().m_pCurrentDocument = &m_oDocXml;
|
||||
}
|
||||
|
||||
void COOXMLWriter::SetCSSCalculator(NSCSS::CCssCalculator* pCSSCalculator)
|
||||
{
|
||||
m_pStylesCalculator = pCSSCalculator;
|
||||
}
|
||||
|
||||
void COOXMLWriter::SetHTMLParameters(THTMLParameters* pHTMLParameters)
|
||||
{
|
||||
m_pHTMLParameters = pHTMLParameters;
|
||||
}
|
||||
|
||||
void COOXMLWriter::SetSrcDirectory(const std::wstring& wsPath)
|
||||
{
|
||||
m_pSrcPath = &wsPath;
|
||||
@ -401,6 +391,8 @@ bool COOXMLWriter::OpenR()
|
||||
if (m_arStates.top().m_bInR)
|
||||
return false;
|
||||
|
||||
OpenHyperlink();
|
||||
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:r>");
|
||||
m_arStates.top().m_bInR = true;
|
||||
return true;
|
||||
@ -416,6 +408,22 @@ bool COOXMLWriter::OpenT()
|
||||
return true;
|
||||
}
|
||||
|
||||
void COOXMLWriter::OpenHyperlink()
|
||||
{
|
||||
if (m_arStates.top().m_bInHyperlink)
|
||||
return;
|
||||
|
||||
if (!m_arStates.top().m_wsHref.empty())
|
||||
{
|
||||
if (m_arStates.top().m_bISCrossHyperlink)
|
||||
OpenCrossHyperlink(m_arStates.top().m_wsHref);
|
||||
else
|
||||
OpenExternalHyperlink(m_arStates.top().m_wsHref, m_arStates.top().m_wsTooltip);
|
||||
|
||||
m_arStates.top().m_bInHyperlink = true;
|
||||
}
|
||||
}
|
||||
|
||||
void COOXMLWriter::CloseP()
|
||||
{
|
||||
m_arStates.top().m_bWasSpace = true;
|
||||
@ -425,6 +433,7 @@ void COOXMLWriter::CloseP()
|
||||
|
||||
CloseT();
|
||||
CloseR();
|
||||
CloseHyperlink();
|
||||
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"</w:p>");
|
||||
m_arStates.top().m_bInP = false;
|
||||
@ -448,6 +457,36 @@ void COOXMLWriter::CloseT()
|
||||
m_arStates.top().m_bInT = false;
|
||||
}
|
||||
|
||||
void COOXMLWriter::CloseHyperlink()
|
||||
{
|
||||
if (!m_arStates.top().m_bInHyperlink)
|
||||
return;
|
||||
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"</w:hyperlink>");
|
||||
m_arStates.top().m_bInHyperlink = false;
|
||||
|
||||
// Сноска
|
||||
if (m_arStates.top().m_wsFootnote.empty())
|
||||
return;
|
||||
|
||||
if (!m_arStates.top().m_bIsFootnote)
|
||||
{
|
||||
std::wstring sFootnoteID = std::to_wstring(m_nFootnoteId++);
|
||||
OpenR();
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:rPr><w:rStyle w:val=\"footnote\"/></w:rPr><w:footnoteReference w:id=\"");
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(sFootnoteID);
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"\"/>");
|
||||
CloseR();
|
||||
m_mFootnotes.insert(std::make_pair(m_arStates.top().m_wsFootnote, sFootnoteID));
|
||||
}
|
||||
else
|
||||
{
|
||||
OpenR();
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:rPr><w:rStyle w:val=\"footnote\"/></w:rPr><w:footnoteRef/>");
|
||||
CloseR();
|
||||
}
|
||||
}
|
||||
|
||||
void COOXMLWriter::BeginBlock()
|
||||
{
|
||||
CloseP();
|
||||
@ -534,7 +573,35 @@ void COOXMLWriter::Break(const std::vector<NSCSS::CNode>& arSelectors)
|
||||
m_arStates.top().m_bWasSpace = true;
|
||||
}
|
||||
|
||||
void COOXMLWriter::OpenCrossHyperlink(const std::wstring& wsRef, const std::vector<NSCSS::CNode>& arSelectors)
|
||||
void COOXMLWriter::SetHyperlinkData(const std::wstring& wsRef, const std::wstring& wsTooltip, bool bIsCross, const std::wstring& wsFootnote, bool bIsFootnote)
|
||||
{
|
||||
CloseHyperlink();
|
||||
|
||||
//TODO:: подумать как лучше сделать работу с гиперссылками
|
||||
m_arStates.top().m_wsHref = wsRef;
|
||||
m_arStates.top().m_wsTooltip = wsTooltip;
|
||||
m_arStates.top().m_bISCrossHyperlink = bIsCross;
|
||||
m_arStates.top().m_wsFootnote = wsFootnote;
|
||||
m_arStates.top().m_bIsFootnote = bIsFootnote;
|
||||
}
|
||||
|
||||
void COOXMLWriter::ClearHyperlinkData()
|
||||
{
|
||||
m_arStates.top().m_wsHref.clear();
|
||||
m_arStates.top().m_wsTooltip.clear();
|
||||
m_arStates.top().m_bISCrossHyperlink = false;
|
||||
m_arStates.top().m_wsFootnote.clear();
|
||||
m_arStates.top().m_bIsFootnote = false;
|
||||
}
|
||||
|
||||
void COOXMLWriter::PageBreak()
|
||||
{
|
||||
OpenP();
|
||||
GetCurrentDocument()->WriteString(L"<w:pPr><w:pageBreakBefore/></w:pPr>");
|
||||
CloseP();
|
||||
}
|
||||
|
||||
void COOXMLWriter::OpenCrossHyperlink(const std::wstring& wsRef)
|
||||
{
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:hyperlink w:anchor=\"");
|
||||
const size_t nSharp = wsRef.find('#');
|
||||
@ -552,7 +619,7 @@ void COOXMLWriter::OpenCrossHyperlink(const std::wstring& wsRef, const std::vect
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"\">");
|
||||
}
|
||||
|
||||
void COOXMLWriter::OpenExternalHyperlink(const std::wstring& wsRef, const std::wstring& wsTooltip, const std::vector<NSCSS::CNode>& arSelectors)
|
||||
void COOXMLWriter::OpenExternalHyperlink(const std::wstring& wsRef, const std::wstring& wsTooltip)
|
||||
{
|
||||
XmlString& oRelationshipXml(m_oDocXmlRels);
|
||||
|
||||
@ -647,7 +714,6 @@ std::wstring COOXMLWriter::WritePPr(const std::vector<NSCSS::CNode>& arSelectors
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
if (sPStyle.empty() && m_arDivId.empty() && wsAnchor.empty())
|
||||
return L"";
|
||||
|
||||
@ -662,6 +728,7 @@ std::wstring COOXMLWriter::WritePPr(const std::vector<NSCSS::CNode>& arSelectors
|
||||
|
||||
int nLiLevel{-1};
|
||||
bool bNumberingLi{false};
|
||||
bool bInTable{false};
|
||||
|
||||
for (const NSCSS::CNode& oNode : arSelectors)
|
||||
{
|
||||
@ -669,6 +736,11 @@ std::wstring COOXMLWriter::WritePPr(const std::vector<NSCSS::CNode>& arSelectors
|
||||
bNumberingLi = true;
|
||||
else if (L"ul" == oNode.m_wsName)
|
||||
bNumberingLi = false;
|
||||
else if (L"table" == oNode.m_wsName)
|
||||
{
|
||||
bInTable = true;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
continue;
|
||||
|
||||
@ -679,20 +751,15 @@ std::wstring COOXMLWriter::WritePPr(const std::vector<NSCSS::CNode>& arSelectors
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:numPr><w:ilvl w:val=\"" + std::to_wstring(nLiLevel) + L"\"/><w:numId w:val=\"" +
|
||||
(!bNumberingLi ? L"1" : std::to_wstring(m_nListId)) + L"\"/></w:numPr>");
|
||||
|
||||
if (!m_arDivId.empty())
|
||||
if (!m_arDivId.empty() && !bInTable)
|
||||
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:divId w:val=\"" + m_arDivId.top() + L"\"/>");
|
||||
// m_pCurrentDocument->WriteString(oTS.sPStyle + sPSettings);
|
||||
|
||||
m_arStates.top().m_pCurrentDocument->WriteNodeEnd(L"w:pPr");
|
||||
|
||||
m_arStates.top().m_bWasPStyle = true;
|
||||
|
||||
if (!wsAnchor.empty())
|
||||
{
|
||||
// const anchors_map::const_iterator itAnchor{m_mAnchors.find(wsAnchor)};
|
||||
|
||||
// if (m_mAnchors.cend() != itAnchor)
|
||||
WriteEmptyBookmark(wsAnchor);
|
||||
}
|
||||
|
||||
return sPStyle;
|
||||
}
|
||||
@ -797,7 +864,6 @@ bool COOXMLWriter::WriteText(std::wstring wsText, const std::vector<NSCSS::CNode
|
||||
|
||||
const bool bInT = m_arStates.top().m_bInT;
|
||||
|
||||
//TODO:: сделать так, чтобы параграф (со своими стилями) открывался при чтении сооответствующей ноды, а не при чтении текста
|
||||
OpenP();
|
||||
|
||||
WritePPr(arSelectors);
|
||||
|
||||
@ -66,7 +66,13 @@ class COOXMLWriter : public IWriter
|
||||
bool m_bInT; // <w:t> открыт?
|
||||
bool m_bWasPStyle; // <w:pStyle> записан?
|
||||
bool m_bWasSpace; // Был пробел?
|
||||
|
||||
bool m_bInHyperlink; // <w:hyperlink> открыт?
|
||||
std::wstring m_wsTooltip;
|
||||
std::wstring m_wsHref;
|
||||
std::wstring m_wsFootnote;
|
||||
bool m_bIsFootnote;
|
||||
bool m_bISCrossHyperlink;
|
||||
|
||||
XmlString *m_pCurrentDocument; //Текущее место записи
|
||||
bool m_bRemoveCurrentDocument;
|
||||
@ -74,6 +80,7 @@ class COOXMLWriter : public IWriter
|
||||
TState(XmlString *pCurrentDocument)
|
||||
: m_bInP(false), m_bInR(false), m_bInT(false),
|
||||
m_bWasPStyle(false), m_bWasSpace(true), m_bInHyperlink(false),
|
||||
m_bIsFootnote(false), m_bISCrossHyperlink(false),
|
||||
m_pCurrentDocument(pCurrentDocument), m_bRemoveCurrentDocument(false)
|
||||
{}
|
||||
|
||||
@ -109,10 +116,7 @@ class COOXMLWriter : public IWriter
|
||||
|
||||
NSFonts::IApplicationFonts* m_pFonts; // Необходимо для оптимизации работы со шрифтами
|
||||
public:
|
||||
COOXMLWriter();
|
||||
|
||||
void SetCSSCalculator(NSCSS::CCssCalculator* pCSSCalculator);
|
||||
void SetHTMLParameters(THTMLParameters* pHTMLParameters);
|
||||
COOXMLWriter(THTMLParameters* pHTMLParameters = nullptr, NSCSS::CCssCalculator* pCSSCalculator = nullptr);
|
||||
|
||||
void SetSrcDirectory (const std::wstring& wsPath);
|
||||
void SetDstDirectory (const std::wstring& wsPath);
|
||||
@ -126,10 +130,14 @@ public:
|
||||
bool OpenP();
|
||||
bool OpenR();
|
||||
bool OpenT();
|
||||
void OpenHyperlink();
|
||||
|
||||
void CloseP();
|
||||
void CloseR();
|
||||
void CloseT();
|
||||
void CloseHyperlink();
|
||||
|
||||
void PageBreak() override;
|
||||
|
||||
void BeginBlock() override;
|
||||
void EndBlock(bool bAddBlock) override;
|
||||
@ -144,8 +152,11 @@ public:
|
||||
|
||||
void Break(const std::vector<NSCSS::CNode>& arSelectors);
|
||||
|
||||
void OpenCrossHyperlink(const std::wstring& wsRef, const std::vector<NSCSS::CNode>& arSelectors);
|
||||
void OpenExternalHyperlink(const std::wstring& wsRef, const std::wstring& wsTooltip, const std::vector<NSCSS::CNode>& arSelectors);
|
||||
void SetHyperlinkData(const std::wstring& wsRef, const std::wstring& wsTooltip, bool bIsCross, const std::wstring& wsFootnote, bool bIsFootnote);
|
||||
void ClearHyperlinkData();
|
||||
|
||||
void OpenCrossHyperlink(const std::wstring& wsRef);
|
||||
void OpenExternalHyperlink(const std::wstring& wsRef, const std::wstring& wsTooltip);
|
||||
void CloseCrossHyperlink(const std::vector<NSCSS::CNode>& arSelectors, std::wstring wsFootnote, const std::wstring& wsRef);
|
||||
void CloseExternalHyperlink();
|
||||
|
||||
|
||||
@ -5106,15 +5106,15 @@ HRESULT CHtmlFile2::ConvertHTML2Markdown(const std::wstring& wsPath, const std::
|
||||
#endif
|
||||
}
|
||||
|
||||
HRESULT CHtmlFile2::OpenMht(const std::wstring& sSrc, const std::wstring& sDst)
|
||||
HRESULT CHtmlFile2::ConvertMHT2OOXML(const std::wstring& wsPath, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs)
|
||||
{
|
||||
#ifdef USE_OLD_HTML_CONVERTER
|
||||
if(!m_internal->m_oLightReader.IsValid())
|
||||
if(!IsMhtFile(sSrc))
|
||||
if(!IsMhtFile(wsPath))
|
||||
return S_FALSE;
|
||||
|
||||
m_internal->m_sSrc = NSSystemPath::GetDirectoryName(sSrc);
|
||||
m_internal->m_sDst = sDst;
|
||||
m_internal->m_sSrc = NSSystemPath::GetDirectoryName(wsPath);
|
||||
m_internal->m_sDst = sDirectory;
|
||||
m_internal->CreateDocxEmpty(oParams);
|
||||
m_internal->readStyle();
|
||||
|
||||
@ -5128,22 +5128,37 @@ HRESULT CHtmlFile2::OpenMht(const std::wstring& sSrc, const std::wstring& sDst)
|
||||
m_internal->write();
|
||||
return S_OK;
|
||||
#else
|
||||
return S_FALSE;
|
||||
if (nullptr == m_pReader)
|
||||
return S_FALSE;
|
||||
|
||||
return m_pReader->ConvertMHT2OOXML(wsPath, wsDirectory, pParametrs);
|
||||
#endif
|
||||
}
|
||||
|
||||
HRESULT CHtmlFile2::OpenBatchHtml(const std::vector<std::wstring>& sSrc, const std::wstring& sDst)
|
||||
HRESULT CHtmlFile2::ConvertMHT2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs)
|
||||
{
|
||||
#ifdef USE_OLD_HTML_CONVERTER
|
||||
m_internal->m_sDst = sDst;
|
||||
return S_FALSE;
|
||||
#else
|
||||
if (nullptr == m_pReader)
|
||||
return S_FALSE;
|
||||
|
||||
return m_pReader->ConvertMHT2Markdown(wsPath, wsFinalFile, pParametrs);
|
||||
#endif
|
||||
}
|
||||
|
||||
HRESULT CHtmlFile2::ConvertHTML2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs)
|
||||
{
|
||||
#ifdef USE_OLD_HTML_CONVERTER
|
||||
m_internal->m_sDst = wsDirectory;
|
||||
m_internal->CreateDocxEmpty(oParams);
|
||||
bool bFirst = true;
|
||||
|
||||
for(const std::wstring& sS : sSrc)
|
||||
for(const std::wstring& sS : arPaths)
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
#ifdef _DEBUG
|
||||
std::wcout << NSFile::GetFileName(sS) << std::endl;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
m_internal->m_sSrc = NSSystemPath::GetDirectoryName(sS);
|
||||
if(!IsHtmlFile(sS))
|
||||
@ -5171,8 +5186,48 @@ HRESULT CHtmlFile2::OpenBatchHtml(const std::vector<std::wstring>& sSrc, const s
|
||||
|
||||
m_internal->write();
|
||||
return S_OK;
|
||||
#else
|
||||
if (nullptr == m_pReader)
|
||||
return S_FALSE;
|
||||
|
||||
return m_pReader->ConvertHTML2OOXML(arPaths, wsDirectory, pParametrs);
|
||||
#endif
|
||||
}
|
||||
|
||||
HRESULT CHtmlFile2::ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs)
|
||||
{
|
||||
#ifdef USE_OLD_HTML_CONVERTER
|
||||
return S_FALSE;
|
||||
#else
|
||||
if (nullptr == m_pReader)
|
||||
return S_FALSE;
|
||||
|
||||
return m_pReader->ConvertHTML2Markdown(arPaths, wsFinalFile, pParametrs);
|
||||
#endif
|
||||
}
|
||||
|
||||
HRESULT CHtmlFile2::ConvertMHT2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs)
|
||||
{
|
||||
#ifdef USE_OLD_HTML_CONVERTER
|
||||
return S_FALSE;
|
||||
#else
|
||||
if (nullptr == m_pReader)
|
||||
return S_FALSE;
|
||||
|
||||
return m_pReader->ConvertMHT2OOXML(arPaths, wsDirectory, pParametrs);
|
||||
#endif
|
||||
}
|
||||
|
||||
HRESULT CHtmlFile2::ConvertMHT2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs)
|
||||
{
|
||||
#ifdef USE_OLD_HTML_CONVERTER
|
||||
return S_FALSE;
|
||||
#else
|
||||
if (nullptr == m_pReader)
|
||||
return S_FALSE;
|
||||
|
||||
return m_pReader->ConvertMHT2Markdown(arPaths, wsFinalFile, pParametrs);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef USE_OLD_HTML_CONVERTER
|
||||
|
||||
@ -44,17 +44,14 @@ public:
|
||||
HRESULT ConvertHTML2OOXML (const std::wstring& wsPath, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs = nullptr);
|
||||
HRESULT ConvertHTML2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs = nullptr);
|
||||
|
||||
HRESULT ConvertMHT2OOXML (const std::wstring& sPath, const std::wstring& sDirectory, HTML::THTMLParameters* pParametrs = nullptr);
|
||||
HRESULT ConvertMHT2Markdown (const std::wstring& sPath, const std::wstring& sDirectory, HTML::TMarkdownParameters* pParametrs = nullptr);
|
||||
HRESULT ConvertMHT2OOXML (const std::wstring& wsPath, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs = nullptr);
|
||||
HRESULT ConvertMHT2Markdown (const std::wstring& wsPath, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs = nullptr);
|
||||
|
||||
HRESULT ConvertHTML2OOXML (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs = nullptr);
|
||||
HRESULT ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::TMarkdownParameters* pParametrs = nullptr);
|
||||
HRESULT ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs = nullptr);
|
||||
|
||||
HRESULT ConvertMHT2OOXML (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs = nullptr);
|
||||
HRESULT ConvertMHT2Markdown (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::TMarkdownParameters* pParametrs = nullptr);
|
||||
|
||||
HRESULT OpenMht (const std::wstring& sPath, const std::wstring& sDirectory);
|
||||
HRESULT OpenBatchHtml(const std::vector<std::wstring>& sPath, const std::wstring& sDirectory);
|
||||
HRESULT ConvertMHT2Markdown (const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs = nullptr);
|
||||
};
|
||||
|
||||
#endif // _HTMLFILE2_HTMLFILE2_H
|
||||
|
||||
@ -198,6 +198,8 @@ namespace NExtractTools
|
||||
DECLARE_CONVERT_FUNC(doct_bin2html);
|
||||
DECLARE_CONVERT_FUNC(doct_bin2html_zip);
|
||||
|
||||
DECLARE_CONVERT_FUNC(html2md);
|
||||
|
||||
// mht
|
||||
DECLARE_CONVERT_FUNC(mht2docx_dir);
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ namespace NExtractTools
|
||||
params.m_bMacro = false;
|
||||
|
||||
CHtmlFile2 oFile;
|
||||
oFile.SetTmpDirectory(convertParams.m_sTempDir);
|
||||
oFile.SetTempDirectory(convertParams.m_sTempDir);
|
||||
return (S_OK == oFile.OpenBatchHtml(arFiles, sTo)) ? 0 : AVS_FILEUTILS_ERROR_CONVERT;
|
||||
}
|
||||
_UINT32 html2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
|
||||
@ -129,7 +129,7 @@ namespace NExtractTools
|
||||
_UINT32 mht2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
|
||||
{
|
||||
CHtmlFile2 oFile;
|
||||
oFile.SetTmpDirectory(convertParams.m_sTempDir);
|
||||
oFile.SetTempDirectory(convertParams.m_sTempDir);
|
||||
return (S_OK == oFile.OpenMht(sFrom, sTo)) ? 0 : AVS_FILEUTILS_ERROR_CONVERT;
|
||||
}
|
||||
// epub
|
||||
@ -258,4 +258,12 @@ namespace NExtractTools
|
||||
{
|
||||
return Md::ConvertMdFileToHtml(sFrom, sTo) ? S_OK : AVS_FILEUTILS_ERROR_CONVERT;
|
||||
}
|
||||
|
||||
// html -> markdown
|
||||
_UINT32 html2md(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
|
||||
{
|
||||
CHtmlFile2 oFile;
|
||||
oFile.SetTempDirectory(convertParams.m_sTempDir);
|
||||
return (S_OK == oFile.ConvertHTML2Markdown(sFrom, sTo)) ? 0 : AVS_FILEUTILS_ERROR_CONVERT;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user