Modification of the html converter

This commit is contained in:
Kirill Polyakov
2026-02-10 02:18:38 +03:00
parent 56b8e41875
commit 098d40d44f
15 changed files with 445 additions and 245 deletions

View File

@ -511,7 +511,6 @@ namespace NSCSS
m_mDefaultStyleData[L"i"] = new CElement(L"i", {{L"font-style", L"italic"}});
m_mDefaultStyleData[L"code"] = new CElement(L"code", {{L"font-family", L"Courier New"}});
m_mDefaultStyleData[L"kbd"] = new CElement(L"kbd", {{L"font-family", L"Courier New"},
{L"font-size", L"20pt"},
{L"font_weight", L"bold"}});
m_mDefaultStyleData[L"s"] = new CElement(L"s", {{L"text-decoration", L"line-through"}});
m_mDefaultStyleData[L"u"] = new CElement(L"u", {{L"text-decoration", L"underline"}});
@ -520,7 +519,6 @@ namespace NSCSS
m_mDefaultStyleData[L"sub"] = new CElement(L"sub", {{L"vertical-align", L"bottom"}});
m_mDefaultStyleData[L"dd"] = new CElement(L"dd", {{L"margin-left", L"720tw"}});
m_mDefaultStyleData[L"pre"] = new CElement(L"pre", {{L"font-family", L"Courier New"},
{L"font-size", L"20pt"},
{L"margin-top", L"0"},
{L"margin-bottom", L"0"}});
m_mDefaultStyleData[L"blockquote"] = new CElement(L"blockquote", {{L"margin", L"0px"}});

View File

@ -142,7 +142,7 @@ HRESULT CEpubFile::Convert(const std::wstring& sInputFile, const std::wstring& s
*/
CHtmlFile2 oFile;
CHtmlParams oFileParams;
HTML::THTMLParameters oFileParams;
oFileParams.SetAuthors (m_oBookInfo.GetCreators());
oFileParams.SetGenres (m_oBookInfo.GetSubjects());
@ -155,7 +155,7 @@ HRESULT CEpubFile::Convert(const std::wstring& sInputFile, const std::wstring& s
std::wstring sDocxFileTempDir = m_sTempDir + L"/tmp";
NSDirectory::CreateDirectory(sDocxFileTempDir);
oFile.SetTmpDirectory(sDocxFileTempDir);
oFile.SetTempDirectory(sDocxFileTempDir);
oFile.SetCoreDirectory(NSFile::GetDirectoryName(sContent));
std::vector<std::wstring> arFiles;
@ -182,7 +182,7 @@ HRESULT CEpubFile::Convert(const std::wstring& sInputFile, const std::wstring& s
sOutputDir = sOutputFile;
NSDirectory::CreateDirectory(sOutputDir);
HRESULT hRes = oFile.OpenBatchHtml(arFiles, sOutputDir, &oFileParams);
HRESULT hRes = oFile.ConvertHTML2OOXML(arFiles, sOutputDir, &oFileParams);
if (bIsOutCompress && S_OK == hRes)
hRes = oOfficeUtils.CompressFileOrDirectory(sOutputDir, sOutputFile);

View File

@ -2170,7 +2170,7 @@ HRESULT CFb2File::FromHtml(const std::wstring& sHtmlFile, const std::wstring& sD
RELEASEARRAYOBJECTS(pData);
//XmlUtils::CXmlLiteReader oIndexHtml;
std::wstring xhtml = htmlToXhtml(sContent, bNeedConvert);
std::wstring xhtml = HTML::htmlToXhtml(sContent, bNeedConvert);
if (!m_internal->m_oLightReader.FromString(xhtml))
return S_FALSE;

View File

@ -162,6 +162,96 @@ const static std::map<std::wstring, HtmlTag> m_HTML_TAGS
ADD_TAG(L"svg", SVG)
};
bool HTML2XHTML(const std::wstring& wsFileName, XmlUtils::CXmlLiteReader& oLiteReader)
{
BYTE* pData;
DWORD nLength;
if (!NSFile::CFileBinary::ReadAllBytes(wsFileName, &pData, nLength))
return false;
std::string sFileContent = XmlUtils::GetUtf8FromFileContent(pData, nLength);
bool bNeedConvert = true;
if (nLength > 4)
{
if (pData[0] == 0xFF && pData[1] == 0xFE && !(pData[2] == 0x00 && pData[3] == 0x00))
bNeedConvert = false;
if (pData[0] == 0xFE && pData[1] == 0xFF)
bNeedConvert = false;
if (pData[0] == 0xFF && pData[1] == 0xFE && pData[2] == 0x00 && pData[3] == 0x00)
bNeedConvert = false;
if (pData[0] == 0 && pData[1] == 0 && pData[2] == 0xFE && pData[3] == 0xFF)
bNeedConvert = false;
}
RELEASEARRAYOBJECTS(pData);
size_t nFind = sFileContent.find("version=\"");
if(nFind != std::string::npos)
{
nFind += 9;
size_t nFindEnd = sFileContent.find("\"", nFind);
if(nFindEnd != std::string::npos)
sFileContent.replace(nFind, nFindEnd - nFind, "1.0");
}
const std::wstring sRes{htmlToXhtml(sFileContent, bNeedConvert)};
#ifdef SAVE_NORMALIZED_HTML
#if 1 == SAVE_NORMALIZED_HTML
NSFile::CFileBinary oWriter;
if (oWriter.CreateFileW(L"res.html"))
{
oWriter.WriteStringUTF8(sRes);
oWriter.CloseFile();
}
#endif
#endif
return oLiteReader.FromString(sRes);
}
bool MHT2XHTML(const std::wstring& wsFileName, XmlUtils::CXmlLiteReader& oLiteReader)
{
NSFile::CFileBinary file;
if (!file.OpenFile(wsFileName))
return false;
unsigned char* buffer = new unsigned char[4096];
if (!buffer)
{
file.CloseFile();
return false;
}
DWORD dwReadBytes = 0;
file.ReadFile(buffer, 4096, dwReadBytes);
file.CloseFile();
std::string xml_string = XmlUtils::GetUtf8FromFileContent(buffer, dwReadBytes);
const std::string sContentType = NSStringFinder::FindProperty(xml_string, "content-type", ":", ";");
bool bRes = false;
if(NSStringFinder::Equals(sContentType, "multipart/related"))
{
BYTE* pData;
DWORD nLength;
if (!NSFile::CFileBinary::ReadAllBytes(wsFileName, &pData, nLength))
return false;
std::string sFileContent = XmlUtils::GetUtf8FromFileContent(pData, nLength);
RELEASEARRAYOBJECTS(pData);
const std::wstring sRes = mhtToXhtml(sFileContent);
bRes = oLiteReader.FromString(sRes);
}
else
bRes = HTML2XHTML(wsFileName, oLiteReader);
RELEASEARRAYOBJECTS(buffer);
return bRes;
}
inline std::wstring GetArgumentValue(XmlUtils::CXmlLiteReader& oLiteReader, const std::wstring& wsArgumentName, const std::wstring& wsDefaultValue = L"");
inline bool CheckArgumentMath(const std::wstring& wsNodeName, const std::wstring& wsStyleName);
inline HtmlTag GetHtmlTag(const std::wstring& wsStrTag);
@ -190,17 +280,42 @@ void CHTMLReader::SetCoreDirectory(const std::wstring& wsPath)
HRESULT CHTMLReader::ConvertHTML2OOXML(const std::wstring& wsPath, const std::wstring& wsDirectory, THTMLParameters* pParameters)
{
InitOOXMLTags(pParameters);
m_wsDstDirectory = wsDirectory;
return ConvertHTML(wsPath, wsDirectory);
return InitAndConvert2OOXML({wsPath}, wsDirectory, HTML2XHTML, pParameters);
}
HRESULT CHTMLReader::ConvertHTML2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters)
{
InitMDTags();
return ConvertHTML(wsPath, wsFinalFile);
return InitAndConvert2Markdown({wsPath}, wsFinalFile, HTML2XHTML, pParameters);
}
HRESULT CHTMLReader::ConvertHTML2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, THTMLParameters* pParameters)
{
return InitAndConvert2OOXML(arPaths, wsDirectory, HTML2XHTML, pParameters);
}
HRESULT CHTMLReader::ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters)
{
return InitAndConvert2Markdown(arPaths, wsFinalFile, HTML2XHTML, pParameters);
}
HRESULT CHTMLReader::ConvertMHT2OOXML(const std::wstring& wsPath, const std::wstring& wsDirectory, THTMLParameters* pParameters)
{
return InitAndConvert2OOXML({wsPath}, wsDirectory, MHT2XHTML, pParameters);
}
HRESULT CHTMLReader::ConvertMHT2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters)
{
return InitAndConvert2Markdown({wsPath}, wsFinalFile, MHT2XHTML, pParameters);
}
HRESULT CHTMLReader::ConvertMHT2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, THTMLParameters* pParameters)
{
return InitAndConvert2OOXML(arPaths, wsDirectory, MHT2XHTML, pParameters);
}
HRESULT CHTMLReader::ConvertMHT2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters)
{
return InitAndConvert2Markdown(arPaths, wsFinalFile, MHT2XHTML, pParameters);
}
void CHTMLReader::Clear()
@ -221,14 +336,11 @@ void CHTMLReader::InitOOXMLTags(THTMLParameters* pParametrs)
{
Clear();
COOXMLWriter *pWriter = new COOXMLWriter();
COOXMLWriter *pWriter = new COOXMLWriter(pParametrs, &m_oCSSCalculator);
if (nullptr == pWriter)
return;
pWriter->SetCSSCalculator(&m_oCSSCalculator);
pWriter->SetHTMLParameters(pParametrs);
pWriter->SetSrcDirectory (m_wsSrcDirectory);
pWriter->SetDstDirectory (m_wsDstDirectory);
pWriter->SetTempDirectory(m_wsTempDirectory);
@ -270,12 +382,12 @@ void CHTMLReader::InitOOXMLTags(THTMLParameters* pParametrs)
m_mTags[HTML_TAG(BDO)] = oIgnoredTag;
m_mTags[HTML_TAG(SPAN)] = oIgnoredTag;
m_mTags[HTML_TAG(H1)] = oIgnoredTag;
m_mTags[HTML_TAG(CODE)] = oIgnoredTag;
m_mTags[HTML_TAG(CODE)] = oIgnoredTag;
}
void CHTMLReader::InitMDTags()
void CHTMLReader::InitMDTags(TMarkdownParameters* pParametrs)
{
CMDWriter *pWriter = new CMDWriter({});
CMDWriter *pWriter = new CMDWriter((nullptr != pParametrs) ? *pParametrs : TMarkdownParameters{});
if (nullptr == pWriter)
return;
@ -326,65 +438,56 @@ bool CHTMLReader::IsHTML()
return ((m_oLightReader.MoveToStart() && m_oLightReader.ReadNextNode()) ? m_oLightReader.GetName() == L"html" : false);
}
bool CHTMLReader::HTML2XHTML(const std::wstring& wsFileName)
HRESULT CHTMLReader::InitAndConvert2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, Convert_Func Convertation, THTMLParameters* pParameters)
{
BYTE* pData;
DWORD nLength;
if (!NSFile::CFileBinary::ReadAllBytes(wsFileName, &pData, nLength))
return false;
InitOOXMLTags(pParameters);
m_wsDstDirectory = wsDirectory;
std::string sFileContent = XmlUtils::GetUtf8FromFileContent(pData, nLength);
bool bNeedConvert = true;
if (nLength > 4)
{
if (pData[0] == 0xFF && pData[1] == 0xFE && !(pData[2] == 0x00 && pData[3] == 0x00))
bNeedConvert = false;
if (pData[0] == 0xFE && pData[1] == 0xFF)
bNeedConvert = false;
if (pData[0] == 0xFF && pData[1] == 0xFE && pData[2] == 0x00 && pData[3] == 0x00)
bNeedConvert = false;
if (pData[0] == 0 && pData[1] == 0 && pData[2] == 0xFE && pData[3] == 0xFF)
bNeedConvert = false;
}
RELEASEARRAYOBJECTS(pData);
size_t nFind = sFileContent.find("version=\"");
if(nFind != std::string::npos)
{
nFind += 9;
size_t nFindEnd = sFileContent.find("\"", nFind);
if(nFindEnd != std::string::npos)
sFileContent.replace(nFind, nFindEnd - nFind, "1.0");
}
const std::wstring sRes{htmlToXhtml(sFileContent, bNeedConvert)};
#ifdef SAVE_NORMALIZED_HTML
#if 1 == SAVE_NORMALIZED_HTML
NSFile::CFileBinary oWriter;
if (oWriter.CreateFileW(m_sTmp + L"/res.html"))
{
oWriter.WriteStringUTF8(sRes);
oWriter.CloseFile();
}
#endif
#endif
return m_oLightReader.FromString(sRes);
}
HRESULT CHTMLReader::ConvertHTML(const std::wstring& wsPath, const std::wstring& wsDirectory)
{
if (nullptr == m_pWriter || !HTML2XHTML(wsPath) || !m_oLightReader.IsValid() || !IsHTML())
return S_FALSE;
HRESULT lResult{S_FALSE};
m_pWriter->Begin(wsDirectory);
for (const std::wstring& wsPath : arPaths)
{
if (Convert(wsPath, Convertation))
{
lResult = S_OK;
if (nullptr != pParameters && pParameters->m_bNeedPageBreakBefore)
m_pWriter->PageBreak();
}
}
m_pWriter->End(wsDirectory);
return lResult;
}
HRESULT CHTMLReader::InitAndConvert2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, Convert_Func Convertation, TMarkdownParameters* pParameters)
{
InitMDTags(pParameters);
HRESULT lResult{S_FALSE};
m_pWriter->Begin(L"");
for (const std::wstring& wsPath : arPaths)
{
if (Convert(wsPath, Convertation))
lResult = S_OK;
}
m_pWriter->End(wsFinalFile);
return lResult;
}
bool CHTMLReader::Convert(const std::wstring& wsPath, Convert_Func Convertation)
{
if (nullptr == m_pWriter || !Convertation(wsPath, m_oLightReader) || !m_oLightReader.IsValid() || !IsHTML())
return false;
m_wsSrcDirectory = NSSystemPath::GetDirectoryName(wsPath);
// m_sDst = sDst;
m_oLightReader.MoveToStart();
m_oLightReader.ReadNextNode();
@ -394,13 +497,9 @@ HRESULT CHTMLReader::ConvertHTML(const std::wstring& wsPath, const std::wstring&
if(!m_oLightReader.MoveToStart())
return S_FALSE;
// if(oParams && oParams->m_bNeedPageBreakBefore)
// m_internal->PageBreakBefore();
ReadDocument();
m_pWriter->End(wsDirectory);
return S_OK;
return true;
}
void CHTMLReader::ReadStyle()
@ -438,16 +537,16 @@ void CHTMLReader::ReadStyle()
void CHTMLReader::ReadStyle2()
{
std::wstring sName = m_oLightReader.GetName();
const std::wstring wsName = m_oLightReader.GetName();
// Стиль по ссылке
if(sName == L"link")
if(wsName == L"link")
{
while(m_oLightReader.MoveToNextAttribute())
ReadStyleFromNetwork();
m_oLightReader.MoveToElement();
}
// тэг style содержит стили для styles.xml
else if(sName == L"style")
else if(wsName == L"style")
m_oCSSCalculator.AddStyles(m_oLightReader.GetText2());
const int nDeath = m_oLightReader.GetDepth();
@ -492,10 +591,10 @@ void CHTMLReader::ReadDocument()
int nDeath = m_oLightReader.GetDepth();
while(m_oLightReader.ReadNextSiblingNode(nDeath))
{
std::wstring sName = m_oLightReader.GetName();
if(sName == L"head")
const std::wstring wsName = m_oLightReader.GetName();
if(wsName == L"head")
ReadHead();
else if(sName == L"body")
else if(wsName == L"body")
ReadBody();
}
}
@ -524,12 +623,11 @@ void CHTMLReader::ReadBody()
GetSubClass(arSelectors);
/*
if (!sSelectors.back().m_mAttributes.empty())
if (!arSelectors.back().m_mAttributes.empty())
{
std::map<std::wstring, std::wstring>::iterator itFound = sSelectors.back().m_mAttributes.find(L"bgcolor");
std::map<std::wstring, std::wstring>::iterator itFound = arSelectors.back().m_mAttributes.find(L"bgcolor");
if (sSelectors.back().m_mAttributes.end() != itFound)
if (arSelectors.back().m_mAttributes.end() != itFound)
{
NSCSS::NSProperties::CColor oColor;
oColor.SetValue(itFound->second);
@ -539,15 +637,14 @@ void CHTMLReader::ReadBody()
const std::wstring wsHEXColor{oColor.ToHEX()};
if (!wsHEXColor.empty())
m_oDocXml.WriteString(L"<w:background w:color=\"" + wsHEXColor + L"\"/>");
m_pWriter->GetCurrentDocument()->WriteString(L"<w:background w:color=\"" + wsHEXColor + L"\"/>");
sSelectors.back().m_mAttributes.erase(itFound);
arSelectors.back().m_mAttributes.erase(itFound);
}
}
}
m_oLightReader.MoveToElement();
*/
ReadStream(arSelectors);
}
@ -557,42 +654,24 @@ bool CHTMLReader::ReadStream(std::vector<NSCSS::CNode>& arSelectors, bool bInser
if (nullptr == m_pWriter)
return false;
const int nDepth{m_oLightReader.GetDepth()};
bool bResult = false;
XmlUtils::XmlNodeType eNodeType = XmlUtils::XmlNodeType_EndElement;
bool bResult{false};
while (m_oLightReader.Read(eNodeType) && m_oLightReader.GetDepth() >= nDepth && XmlUtils::XmlNodeType_EndElement != eNodeType)
const int nDeath = m_oLightReader.GetDepth();
if(m_oLightReader.IsEmptyNode() || !m_oLightReader.ReadNextSiblingNode2(nDeath))
{
if (eNodeType == XmlUtils::XmlNodeType_Text ||
eNodeType == XmlUtils::XmlNodeType_Whitespace ||
eNodeType == XmlUtils::XmlNodeType_SIGNIFICANT_WHITESPACE ||
eNodeType == XmlUtils::XmlNodeType_CDATA)
{
const char* pValue = m_oLightReader.GetTextChar();
if (!bInsertEmptyP)
return false;
if('\0' != pValue[0])
{
std::wstring wsText;
NSFile::CUtf8Converter::GetUnicodeStringFromUTF8((BYTE*)pValue, (LONG)strlen(pValue), wsText);
if (wsText.empty())
continue;
arSelectors.push_back(NSCSS::CNode{L"#text", L"", L""});
m_oCSSCalculator.CalculateCompiledStyle(arSelectors);
bResult = m_pWriter->WriteText(wsText, arSelectors);
arSelectors.pop_back();
}
}
else if (eNodeType == XmlUtils::XmlNodeType_Element)
{
if (ReadInside(arSelectors))
bResult = true;
}
m_pWriter->WriteEmptyParagraph();
return true;
}
do
{
if (ReadInside(arSelectors))
bResult = true;
} while(m_oLightReader.ReadNextSiblingNode2(nDeath));
if (!bResult && bInsertEmptyP)
m_pWriter->WriteEmptyParagraph();
@ -603,10 +682,10 @@ bool CHTMLReader::ReadInside(std::vector<NSCSS::CNode>& arSelectors)
{
const std::wstring wsName{m_oLightReader.GetName()};
//TODO:: обработать все варианты return'а
if(wsName == L"#text")
return ReadText(arSelectors);
//TODO:: обработать все варианты return'а
if (UnreadableNode(wsName) || TagIsUnprocessed(wsName))
return false;
@ -621,7 +700,7 @@ bool CHTMLReader::ReadInside(std::vector<NSCSS::CNode>& arSelectors)
case HTML_TAG(A):
case HTML_TAG(AREA):
{
bResult = ReadAnchor(arSelectors);
bResult = ReadDefaultTag(HTML_TAG(A), arSelectors);
break;
}
case HTML_TAG(ABBR):
@ -892,15 +971,6 @@ bool CHTMLReader::ReadInside(std::vector<NSCSS::CNode>& arSelectors)
}
}
// if (HTML_TAG(DIV) != eHtmlTag && HTML_TAG(ASIDE) != eHtmlTag)
// {
// if (bResult)
// m_oState.m_eLastElement = eHtmlTag;
// m_oState.m_bBanUpdatePageData = true;
// }
// readNote(oXml, sSelectors, sNote);
arSelectors.pop_back();
return bResult;
}
@ -919,25 +989,12 @@ bool CHTMLReader::ReadText(std::vector<NSCSS::CNode>& arSelectors)
return bResult;
}
bool CHTMLReader::ReadAnchor(std::vector<NSCSS::CNode>& arSelectors)
{
if (nullptr == m_pWriter || !m_mTags[HTML_TAG(A)]->Open(arSelectors))
return false;
if (!ReadStream(arSelectors))
m_pWriter->WriteEmptyParagraph(true);
m_mTags[HTML_TAG(A)]->Close(arSelectors);
return true;
}
bool CHTMLReader::ReadSVG(const std::vector<NSCSS::CNode>& arSelectors)
{
if (!m_mTags[HTML_TAG(IMAGE)]->Open(arSelectors, m_oLightReader.GetOuterXml()))
if (!m_mTags[HTML_TAG(IMG)]->Open(arSelectors, m_oLightReader.GetOuterXml()))
return false;
m_mTags[HTML_TAG(IMAGE)]->Close(arSelectors);
m_mTags[HTML_TAG(IMG)]->Close(arSelectors);
return true;
}

View File

@ -37,20 +37,29 @@ public:
HRESULT ConvertHTML2OOXML (const std::wstring& wsPath, const std::wstring& wsDirectory, THTMLParameters* pParameters = nullptr);
HRESULT ConvertHTML2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters = nullptr);
HRESULT ConvertMHT2OOXML (const std::wstring& sPath, const std::wstring& sDirectory, THTMLParameters* pParameters = nullptr);
HRESULT ConvertMHT2Markdown (const std::wstring& sPath, const std::wstring& sDirectory, TMarkdownParameters* pParameters = nullptr);
HRESULT ConvertHTML2OOXML (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, THTMLParameters* pParameters = nullptr);
HRESULT ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters = nullptr);
HRESULT ConvertMHT2OOXML (const std::wstring& wsPath, const std::wstring& wsDirectory, THTMLParameters* pParameters = nullptr);
HRESULT ConvertMHT2Markdown (const std::wstring& wsPath, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters = nullptr);
HRESULT ConvertMHT2OOXML (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, THTMLParameters* pParameters = nullptr);
HRESULT ConvertMHT2Markdown (const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, TMarkdownParameters* pParameters = nullptr);
NSCSS::CCssCalculator* GetCSSCalculator();
private:
void Clear();
void InitOOXMLTags(THTMLParameters* pParametrs = nullptr);
void InitMDTags();
void InitMDTags(TMarkdownParameters* pParametrs = nullptr);
bool IsHTML();
bool HTML2XHTML(const std::wstring& wsFileName);
typedef std::function<bool(const std::wstring&, XmlUtils::CXmlLiteReader&)> Convert_Func;
HRESULT ConvertHTML(const std::wstring& wsPath, const std::wstring& wsDirectory);
HRESULT InitAndConvert2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, Convert_Func Convertation, THTMLParameters* pParameters = nullptr);
HRESULT InitAndConvert2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, Convert_Func Convertation, TMarkdownParameters* pParameters = nullptr);
bool Convert(const std::wstring& wsPath, Convert_Func Convertation);
void ReadStyle();
void ReadStyle2();
@ -65,7 +74,6 @@ private:
bool ReadText(std::vector<NSCSS::CNode>& arSelectors);
bool ReadAnchor(std::vector<NSCSS::CNode>& arSelectors);
bool ReadSVG(const std::vector<NSCSS::CNode>& arSelectors);
bool ReadEmptyTag(UINT unTag, const std::vector<NSCSS::CNode>& arSelectors);
bool ReadDefaultTag(UINT unTag, std::vector<NSCSS::CNode>& arSelectors);

View File

@ -49,34 +49,10 @@ bool CAnchor<COOXMLWriter>::Open(const std::vector<NSCSS::CNode>& arSelectors, c
bCross = true;
if (arSelectors.back().GetAttributeValue(L"name", wsName))
m_pWriter->WriteBookmark(wsName);
m_pWriter->WriteEmptyBookmark(wsName);
arSelectors.back().GetAttributeValue(L"alt", wsAlt);
if (!m_pWriter->OpenP())
m_pWriter->CloseR();
else
m_pWriter->WritePPr(arSelectors);
if (bCross)
m_pWriter->OpenCrossHyperlink(wsRef, arSelectors);
else
{
std::wstring wsTooltip(wsRef);
arSelectors.back().GetAttributeValue(L"title", wsTooltip);
m_pWriter->OpenExternalHyperlink(wsRef, wsTooltip, arSelectors);
}
return true;
}
void CAnchor<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors)
{
if (!ValidWriter())
return;
bool bCross = false;
std::wstring wsFootnote;
if (arSelectors.back().m_wsStyle.find(L"mso-footnote-id") != std::wstring::npos)
@ -87,23 +63,32 @@ void CAnchor<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors)
wsFootnote = L"href";
}
std::wstring wsRef;
if (arSelectors.back().GetAttributeValue(L"href", wsRef))
bool bFootnote = false;
if (arSelectors.size() > 1)
{
if(wsRef.find('#') != std::wstring::npos)
bCross = true;
const NSCSS::CNode& oNode = arSelectors[arSelectors.size() - 2];
bFootnote = oNode.m_wsName == L"p" && oNode.m_wsClass == L"MsoFootnoteText";
}
if (bCross)
{
if (wsFootnote == L"href")
wsFootnote = wsRef.substr(wsRef.find('#') + 1);
m_pWriter->CloseCrossHyperlink(arSelectors, wsFootnote, wsRef);
}
m_pWriter->SetHyperlinkData(wsRef, L"", true, wsFootnote, bFootnote);
else
m_pWriter->CloseExternalHyperlink();
{
std::wstring wsTooltip(wsRef);
arSelectors.back().GetAttributeValue(L"title", wsTooltip);
m_pWriter->SetHyperlinkData(wsRef, wsTooltip, false, wsFootnote, bFootnote);
}
return true;
}
void CAnchor<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors)
{
if (!ValidWriter())
return;
m_pWriter->ClearHyperlinkData();
}
CAbbr<COOXMLWriter>::CAbbr(COOXMLWriter* pWriter)
@ -208,6 +193,7 @@ void CDivision<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors
m_pWriter->RollBackState();
}
m_pWriter->CloseP();
m_arFootnoteIDs.pop();
}
@ -330,6 +316,12 @@ bool CImage<COOXMLWriter>::Open(const std::vector<NSCSS::CNode>& arSelectors, co
((!wsBasePath.empty() && wsBasePath.length() > 4 && wsBasePath.substr(0, 4) == L"http") ||
(wsSrc.length() > 4 && wsSrc.substr(0, 4) == L"http")))
{
if (!wsExtention.empty() && NotValidExtension(wsExtention))
{
m_pWriter->WriteAlternativeImage(wsAlt, wsSrc, oImageData);
return true;
}
const std::wstring wsDst = wsImagePath + L'.' + ((!wsExtention.empty()) ? wsExtention : L"png");
// Проверка gc_allowNetworkRequest предполагается в kernel_network
@ -404,11 +396,10 @@ bool CImage<COOXMLWriter>::Open(const std::vector<NSCSS::CNode>& arSelectors, co
m_pWriter->WriteAlternativeImage(wsAlt, wsSrc, oImageData);
else
{
m_arrImages.push_back(wsSrc);
m_pWriter->WritePPr(arSelectors);
const std::wstring wsImageID{std::to_wstring(m_arrImages.size())};
m_arrImages.push_back(wsSrc);
if (nImageId < 0)
{
@ -1256,6 +1247,7 @@ void CTable<COOXMLWriter>::Close(const std::vector<NSCSS::CNode>& arSelectors)
return;
m_pWriter->GetCurrentDocument()->WriteNodeEnd(L"w:tbl");
m_pWriter->WriteEmptyParagraph(true);
}
CTableRow<COOXMLWriter>::CTableRow(COOXMLWriter* pWriter)

View File

@ -19,6 +19,8 @@ public:
virtual void WriteEmptyParagraph(bool bVahish = false, bool bInP = false) = 0;
virtual void PageBreak() = 0;
virtual void BeginBlock() = 0;
virtual void EndBlock(bool bAddBlock) = 0;

View File

@ -95,8 +95,10 @@ bool CMDWriter::WriteText(std::wstring wsText, const std::vector<NSCSS::CNode>&
}
void CMDWriter::WriteEmptyParagraph(bool bVahish, bool bInP)
{
}
{}
void CMDWriter::PageBreak()
{}
void CMDWriter::BeginBlock()
{

View File

@ -43,6 +43,8 @@ public:
void WriteEmptyParagraph(bool bVahish = false, bool bInP = false) override;
void PageBreak() override;
void BeginBlock() override;
void EndBlock(bool bAddBlock) override;

View File

@ -40,11 +40,11 @@ inline UINT GetFontSizeLevel(UINT unFontSize);
inline UINT GetFontSizeByLevel(UINT unLevel);
inline void ReplaceSpaces(std::wstring& wsValue);
COOXMLWriter::COOXMLWriter()
COOXMLWriter::COOXMLWriter(THTMLParameters* pHTMLParameters, NSCSS::CCssCalculator* pCSSCalculator)
: m_pDstPath(nullptr), m_pTempDir(nullptr), m_pSrcPath(nullptr),
m_pBasePath(nullptr), m_pCorePath(nullptr), m_pHTMLParameters(nullptr),
m_nFootnoteId(1), m_nHyperlinkId(1), m_nListId(1), m_nElementId(1),
m_bBanUpdatePageData(false), m_bWasDivs(false), m_pFonts(nullptr)
m_pBasePath(nullptr), m_pCorePath(nullptr), m_pStylesCalculator(pCSSCalculator),
m_pHTMLParameters(pHTMLParameters), m_nFootnoteId(1), m_nHyperlinkId(1), m_nListId(1),
m_nElementId(1), m_bBanUpdatePageData(false), m_bWasDivs(false), m_pFonts(nullptr)
{
m_oPageData.SetWidth (DEFAULT_PAGE_WIDTH, NSCSS::UnitMeasure::Twips, 0, true);
m_oPageData.SetHeight(DEFAULT_PAGE_HEIGHT, NSCSS::UnitMeasure::Twips, 0, true);
@ -56,16 +56,6 @@ COOXMLWriter::COOXMLWriter()
m_arStates.top().m_pCurrentDocument = &m_oDocXml;
}
void COOXMLWriter::SetCSSCalculator(NSCSS::CCssCalculator* pCSSCalculator)
{
m_pStylesCalculator = pCSSCalculator;
}
void COOXMLWriter::SetHTMLParameters(THTMLParameters* pHTMLParameters)
{
m_pHTMLParameters = pHTMLParameters;
}
void COOXMLWriter::SetSrcDirectory(const std::wstring& wsPath)
{
m_pSrcPath = &wsPath;
@ -401,6 +391,8 @@ bool COOXMLWriter::OpenR()
if (m_arStates.top().m_bInR)
return false;
OpenHyperlink();
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:r>");
m_arStates.top().m_bInR = true;
return true;
@ -416,6 +408,22 @@ bool COOXMLWriter::OpenT()
return true;
}
void COOXMLWriter::OpenHyperlink()
{
if (m_arStates.top().m_bInHyperlink)
return;
if (!m_arStates.top().m_wsHref.empty())
{
if (m_arStates.top().m_bISCrossHyperlink)
OpenCrossHyperlink(m_arStates.top().m_wsHref);
else
OpenExternalHyperlink(m_arStates.top().m_wsHref, m_arStates.top().m_wsTooltip);
m_arStates.top().m_bInHyperlink = true;
}
}
void COOXMLWriter::CloseP()
{
m_arStates.top().m_bWasSpace = true;
@ -425,6 +433,7 @@ void COOXMLWriter::CloseP()
CloseT();
CloseR();
CloseHyperlink();
m_arStates.top().m_pCurrentDocument->WriteString(L"</w:p>");
m_arStates.top().m_bInP = false;
@ -448,6 +457,36 @@ void COOXMLWriter::CloseT()
m_arStates.top().m_bInT = false;
}
void COOXMLWriter::CloseHyperlink()
{
if (!m_arStates.top().m_bInHyperlink)
return;
m_arStates.top().m_pCurrentDocument->WriteString(L"</w:hyperlink>");
m_arStates.top().m_bInHyperlink = false;
// Сноска
if (m_arStates.top().m_wsFootnote.empty())
return;
if (!m_arStates.top().m_bIsFootnote)
{
std::wstring sFootnoteID = std::to_wstring(m_nFootnoteId++);
OpenR();
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:rPr><w:rStyle w:val=\"footnote\"/></w:rPr><w:footnoteReference w:id=\"");
m_arStates.top().m_pCurrentDocument->WriteString(sFootnoteID);
m_arStates.top().m_pCurrentDocument->WriteString(L"\"/>");
CloseR();
m_mFootnotes.insert(std::make_pair(m_arStates.top().m_wsFootnote, sFootnoteID));
}
else
{
OpenR();
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:rPr><w:rStyle w:val=\"footnote\"/></w:rPr><w:footnoteRef/>");
CloseR();
}
}
void COOXMLWriter::BeginBlock()
{
CloseP();
@ -534,7 +573,35 @@ void COOXMLWriter::Break(const std::vector<NSCSS::CNode>& arSelectors)
m_arStates.top().m_bWasSpace = true;
}
void COOXMLWriter::OpenCrossHyperlink(const std::wstring& wsRef, const std::vector<NSCSS::CNode>& arSelectors)
void COOXMLWriter::SetHyperlinkData(const std::wstring& wsRef, const std::wstring& wsTooltip, bool bIsCross, const std::wstring& wsFootnote, bool bIsFootnote)
{
CloseHyperlink();
//TODO:: подумать как лучше сделать работу с гиперссылками
m_arStates.top().m_wsHref = wsRef;
m_arStates.top().m_wsTooltip = wsTooltip;
m_arStates.top().m_bISCrossHyperlink = bIsCross;
m_arStates.top().m_wsFootnote = wsFootnote;
m_arStates.top().m_bIsFootnote = bIsFootnote;
}
void COOXMLWriter::ClearHyperlinkData()
{
m_arStates.top().m_wsHref.clear();
m_arStates.top().m_wsTooltip.clear();
m_arStates.top().m_bISCrossHyperlink = false;
m_arStates.top().m_wsFootnote.clear();
m_arStates.top().m_bIsFootnote = false;
}
void COOXMLWriter::PageBreak()
{
OpenP();
GetCurrentDocument()->WriteString(L"<w:pPr><w:pageBreakBefore/></w:pPr>");
CloseP();
}
void COOXMLWriter::OpenCrossHyperlink(const std::wstring& wsRef)
{
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:hyperlink w:anchor=\"");
const size_t nSharp = wsRef.find('#');
@ -552,7 +619,7 @@ void COOXMLWriter::OpenCrossHyperlink(const std::wstring& wsRef, const std::vect
m_arStates.top().m_pCurrentDocument->WriteString(L"\">");
}
void COOXMLWriter::OpenExternalHyperlink(const std::wstring& wsRef, const std::wstring& wsTooltip, const std::vector<NSCSS::CNode>& arSelectors)
void COOXMLWriter::OpenExternalHyperlink(const std::wstring& wsRef, const std::wstring& wsTooltip)
{
XmlString& oRelationshipXml(m_oDocXmlRels);
@ -647,7 +714,6 @@ std::wstring COOXMLWriter::WritePPr(const std::vector<NSCSS::CNode>& arSelectors
break;
}
if (sPStyle.empty() && m_arDivId.empty() && wsAnchor.empty())
return L"";
@ -662,6 +728,7 @@ std::wstring COOXMLWriter::WritePPr(const std::vector<NSCSS::CNode>& arSelectors
int nLiLevel{-1};
bool bNumberingLi{false};
bool bInTable{false};
for (const NSCSS::CNode& oNode : arSelectors)
{
@ -669,6 +736,11 @@ std::wstring COOXMLWriter::WritePPr(const std::vector<NSCSS::CNode>& arSelectors
bNumberingLi = true;
else if (L"ul" == oNode.m_wsName)
bNumberingLi = false;
else if (L"table" == oNode.m_wsName)
{
bInTable = true;
continue;
}
else
continue;
@ -679,20 +751,15 @@ std::wstring COOXMLWriter::WritePPr(const std::vector<NSCSS::CNode>& arSelectors
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:numPr><w:ilvl w:val=\"" + std::to_wstring(nLiLevel) + L"\"/><w:numId w:val=\"" +
(!bNumberingLi ? L"1" : std::to_wstring(m_nListId)) + L"\"/></w:numPr>");
if (!m_arDivId.empty())
if (!m_arDivId.empty() && !bInTable)
m_arStates.top().m_pCurrentDocument->WriteString(L"<w:divId w:val=\"" + m_arDivId.top() + L"\"/>");
// m_pCurrentDocument->WriteString(oTS.sPStyle + sPSettings);
m_arStates.top().m_pCurrentDocument->WriteNodeEnd(L"w:pPr");
m_arStates.top().m_bWasPStyle = true;
if (!wsAnchor.empty())
{
// const anchors_map::const_iterator itAnchor{m_mAnchors.find(wsAnchor)};
// if (m_mAnchors.cend() != itAnchor)
WriteEmptyBookmark(wsAnchor);
}
return sPStyle;
}
@ -797,7 +864,6 @@ bool COOXMLWriter::WriteText(std::wstring wsText, const std::vector<NSCSS::CNode
const bool bInT = m_arStates.top().m_bInT;
//TODO:: сделать так, чтобы параграф (со своими стилями) открывался при чтении сооответствующей ноды, а не при чтении текста
OpenP();
WritePPr(arSelectors);

View File

@ -66,7 +66,13 @@ class COOXMLWriter : public IWriter
bool m_bInT; // <w:t> открыт?
bool m_bWasPStyle; // <w:pStyle> записан?
bool m_bWasSpace; // Был пробел?
bool m_bInHyperlink; // <w:hyperlink> открыт?
std::wstring m_wsTooltip;
std::wstring m_wsHref;
std::wstring m_wsFootnote;
bool m_bIsFootnote;
bool m_bISCrossHyperlink;
XmlString *m_pCurrentDocument; //Текущее место записи
bool m_bRemoveCurrentDocument;
@ -74,6 +80,7 @@ class COOXMLWriter : public IWriter
TState(XmlString *pCurrentDocument)
: m_bInP(false), m_bInR(false), m_bInT(false),
m_bWasPStyle(false), m_bWasSpace(true), m_bInHyperlink(false),
m_bIsFootnote(false), m_bISCrossHyperlink(false),
m_pCurrentDocument(pCurrentDocument), m_bRemoveCurrentDocument(false)
{}
@ -109,10 +116,7 @@ class COOXMLWriter : public IWriter
NSFonts::IApplicationFonts* m_pFonts; // Необходимо для оптимизации работы со шрифтами
public:
COOXMLWriter();
void SetCSSCalculator(NSCSS::CCssCalculator* pCSSCalculator);
void SetHTMLParameters(THTMLParameters* pHTMLParameters);
COOXMLWriter(THTMLParameters* pHTMLParameters = nullptr, NSCSS::CCssCalculator* pCSSCalculator = nullptr);
void SetSrcDirectory (const std::wstring& wsPath);
void SetDstDirectory (const std::wstring& wsPath);
@ -126,10 +130,14 @@ public:
bool OpenP();
bool OpenR();
bool OpenT();
void OpenHyperlink();
void CloseP();
void CloseR();
void CloseT();
void CloseHyperlink();
void PageBreak() override;
void BeginBlock() override;
void EndBlock(bool bAddBlock) override;
@ -144,8 +152,11 @@ public:
void Break(const std::vector<NSCSS::CNode>& arSelectors);
void OpenCrossHyperlink(const std::wstring& wsRef, const std::vector<NSCSS::CNode>& arSelectors);
void OpenExternalHyperlink(const std::wstring& wsRef, const std::wstring& wsTooltip, const std::vector<NSCSS::CNode>& arSelectors);
void SetHyperlinkData(const std::wstring& wsRef, const std::wstring& wsTooltip, bool bIsCross, const std::wstring& wsFootnote, bool bIsFootnote);
void ClearHyperlinkData();
void OpenCrossHyperlink(const std::wstring& wsRef);
void OpenExternalHyperlink(const std::wstring& wsRef, const std::wstring& wsTooltip);
void CloseCrossHyperlink(const std::vector<NSCSS::CNode>& arSelectors, std::wstring wsFootnote, const std::wstring& wsRef);
void CloseExternalHyperlink();

View File

@ -5106,15 +5106,15 @@ HRESULT CHtmlFile2::ConvertHTML2Markdown(const std::wstring& wsPath, const std::
#endif
}
HRESULT CHtmlFile2::OpenMht(const std::wstring& sSrc, const std::wstring& sDst)
HRESULT CHtmlFile2::ConvertMHT2OOXML(const std::wstring& wsPath, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs)
{
#ifdef USE_OLD_HTML_CONVERTER
if(!m_internal->m_oLightReader.IsValid())
if(!IsMhtFile(sSrc))
if(!IsMhtFile(wsPath))
return S_FALSE;
m_internal->m_sSrc = NSSystemPath::GetDirectoryName(sSrc);
m_internal->m_sDst = sDst;
m_internal->m_sSrc = NSSystemPath::GetDirectoryName(wsPath);
m_internal->m_sDst = sDirectory;
m_internal->CreateDocxEmpty(oParams);
m_internal->readStyle();
@ -5128,22 +5128,37 @@ HRESULT CHtmlFile2::OpenMht(const std::wstring& sSrc, const std::wstring& sDst)
m_internal->write();
return S_OK;
#else
return S_FALSE;
if (nullptr == m_pReader)
return S_FALSE;
return m_pReader->ConvertMHT2OOXML(wsPath, wsDirectory, pParametrs);
#endif
}
HRESULT CHtmlFile2::OpenBatchHtml(const std::vector<std::wstring>& sSrc, const std::wstring& sDst)
HRESULT CHtmlFile2::ConvertMHT2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs)
{
#ifdef USE_OLD_HTML_CONVERTER
m_internal->m_sDst = sDst;
return S_FALSE;
#else
if (nullptr == m_pReader)
return S_FALSE;
return m_pReader->ConvertMHT2Markdown(wsPath, wsFinalFile, pParametrs);
#endif
}
HRESULT CHtmlFile2::ConvertHTML2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs)
{
#ifdef USE_OLD_HTML_CONVERTER
m_internal->m_sDst = wsDirectory;
m_internal->CreateDocxEmpty(oParams);
bool bFirst = true;
for(const std::wstring& sS : sSrc)
for(const std::wstring& sS : arPaths)
{
#ifdef _DEBUG
#ifdef _DEBUG
std::wcout << NSFile::GetFileName(sS) << std::endl;
#endif
#endif
m_internal->m_sSrc = NSSystemPath::GetDirectoryName(sS);
if(!IsHtmlFile(sS))
@ -5171,8 +5186,48 @@ HRESULT CHtmlFile2::OpenBatchHtml(const std::vector<std::wstring>& sSrc, const s
m_internal->write();
return S_OK;
#else
if (nullptr == m_pReader)
return S_FALSE;
return m_pReader->ConvertHTML2OOXML(arPaths, wsDirectory, pParametrs);
#endif
}
HRESULT CHtmlFile2::ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs)
{
#ifdef USE_OLD_HTML_CONVERTER
return S_FALSE;
#else
if (nullptr == m_pReader)
return S_FALSE;
return m_pReader->ConvertHTML2Markdown(arPaths, wsFinalFile, pParametrs);
#endif
}
HRESULT CHtmlFile2::ConvertMHT2OOXML(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs)
{
#ifdef USE_OLD_HTML_CONVERTER
return S_FALSE;
#else
if (nullptr == m_pReader)
return S_FALSE;
return m_pReader->ConvertMHT2OOXML(arPaths, wsDirectory, pParametrs);
#endif
}
HRESULT CHtmlFile2::ConvertMHT2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs)
{
#ifdef USE_OLD_HTML_CONVERTER
return S_FALSE;
#else
if (nullptr == m_pReader)
return S_FALSE;
return m_pReader->ConvertMHT2Markdown(arPaths, wsFinalFile, pParametrs);
#endif
}
#ifdef USE_OLD_HTML_CONVERTER

View File

@ -44,17 +44,14 @@ public:
HRESULT ConvertHTML2OOXML (const std::wstring& wsPath, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs = nullptr);
HRESULT ConvertHTML2Markdown(const std::wstring& wsPath, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs = nullptr);
HRESULT ConvertMHT2OOXML (const std::wstring& sPath, const std::wstring& sDirectory, HTML::THTMLParameters* pParametrs = nullptr);
HRESULT ConvertMHT2Markdown (const std::wstring& sPath, const std::wstring& sDirectory, HTML::TMarkdownParameters* pParametrs = nullptr);
HRESULT ConvertMHT2OOXML (const std::wstring& wsPath, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs = nullptr);
HRESULT ConvertMHT2Markdown (const std::wstring& wsPath, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs = nullptr);
HRESULT ConvertHTML2OOXML (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs = nullptr);
HRESULT ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::TMarkdownParameters* pParametrs = nullptr);
HRESULT ConvertHTML2Markdown(const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs = nullptr);
HRESULT ConvertMHT2OOXML (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::THTMLParameters* pParametrs = nullptr);
HRESULT ConvertMHT2Markdown (const std::vector<std::wstring>& arPaths, const std::wstring& wsDirectory, HTML::TMarkdownParameters* pParametrs = nullptr);
HRESULT OpenMht (const std::wstring& sPath, const std::wstring& sDirectory);
HRESULT OpenBatchHtml(const std::vector<std::wstring>& sPath, const std::wstring& sDirectory);
HRESULT ConvertMHT2Markdown (const std::vector<std::wstring>& arPaths, const std::wstring& wsFinalFile, HTML::TMarkdownParameters* pParametrs = nullptr);
};
#endif // _HTMLFILE2_HTMLFILE2_H

View File

@ -198,6 +198,8 @@ namespace NExtractTools
DECLARE_CONVERT_FUNC(doct_bin2html);
DECLARE_CONVERT_FUNC(doct_bin2html_zip);
DECLARE_CONVERT_FUNC(html2md);
// mht
DECLARE_CONVERT_FUNC(mht2docx_dir);

View File

@ -83,7 +83,7 @@ namespace NExtractTools
params.m_bMacro = false;
CHtmlFile2 oFile;
oFile.SetTmpDirectory(convertParams.m_sTempDir);
oFile.SetTempDirectory(convertParams.m_sTempDir);
return (S_OK == oFile.OpenBatchHtml(arFiles, sTo)) ? 0 : AVS_FILEUTILS_ERROR_CONVERT;
}
_UINT32 html2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
@ -129,7 +129,7 @@ namespace NExtractTools
_UINT32 mht2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
CHtmlFile2 oFile;
oFile.SetTmpDirectory(convertParams.m_sTempDir);
oFile.SetTempDirectory(convertParams.m_sTempDir);
return (S_OK == oFile.OpenMht(sFrom, sTo)) ? 0 : AVS_FILEUTILS_ERROR_CONVERT;
}
// epub
@ -258,4 +258,12 @@ namespace NExtractTools
{
return Md::ConvertMdFileToHtml(sFrom, sTo) ? S_OK : AVS_FILEUTILS_ERROR_CONVERT;
}
// html -> markdown
_UINT32 html2md(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
CHtmlFile2 oFile;
oFile.SetTempDirectory(convertParams.m_sTempDir);
return (S_OK == oFile.ConvertHTML2Markdown(sFrom, sTo)) ? 0 : AVS_FILEUTILS_ERROR_CONVERT;
}
}