HWPML format added to x2t and refactoring

This commit is contained in:
Kirill Polyakov
2025-09-19 16:46:51 +03:00
parent e7a57c1584
commit 0ed11abb38
6 changed files with 106 additions and 17 deletions

View File

@ -69,6 +69,7 @@ public:
bool isOpenOfficeFormatFile(const std::wstring& fileName, std::wstring& documentID);
bool isOnlyOfficeFormatFile(const std::wstring& fileName);
bool isMacFormatFile(const std::wstring& fileName);
bool isHwpxFile(const std::wstring& fileName);
bool isDocFormatFile(const std::wstring& fileName);
bool isXlsFormatFile(const std::wstring& fileName);
@ -99,6 +100,7 @@ public:
bool isPdfFormatFile(unsigned char* pBuffer, int dwBytes, std::wstring& documentID);
bool isPdfOformFormatFile(unsigned char* pBuffer, int dwBytes);
bool isOpenOfficeFlatFormatFile(unsigned char* pBuffer, int dwBytes);
bool isHwpmlFile(unsigned char* pBuffer, int dwBytes);
bool isBinaryDoctFormatFile(unsigned char* pBuffer, int dwBytes);
bool isBinaryXlstFormatFile(unsigned char* pBuffer, int dwBytes);

View File

@ -467,15 +467,18 @@ bool COfficeFileFormatChecker::isHwpFile(POLE::Storage* storage)
if (storage == NULL)
return false;
unsigned char buffer[10];
unsigned char buffer[17];
POLE::Stream stream(storage, L"BodyText/Section0");
if (stream.read(buffer, 10) < 1)
{
return false;
}
return true;
POLE::Stream stream(storage, L"FileHeader");
static constexpr const char* hwpFormatLine = "HWP Document File";
if (17 == stream.read(buffer, 17) && NULL != strstr((char*)buffer, hwpFormatLine))
return true;
return false;
}
bool COfficeFileFormatChecker::isXlsFormatFile(POLE::Storage *storage)
{
if (storage == NULL)
@ -811,6 +814,13 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring &_fileName)
bufferDetect = NULL;
return true;
}
else if (isHwpxFile(fileName))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
}
//-----------------------------------------------------------------------------------------------
@ -890,6 +900,10 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring &_fileName)
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_MHT;
}
else if (isHwpmlFile(bufferDetect, sizeRead))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
}
//------------------------------------------------------------------------------------------------
file.CloseFile();
}
@ -959,6 +973,8 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring &_fileName)
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP;
else if (0 == sExt.compare(L".hwpx"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX;
else if (0 == sExt.compare(L".hml"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true;
@ -1403,6 +1419,31 @@ bool COfficeFileFormatChecker::isMacFormatFile(const std::wstring& fileName)
return true;
}
bool COfficeFileFormatChecker::isHwpxFile(const std::wstring &fileName)
{
COfficeUtils oOfficeUtils;
ULONG unSize = 0;
BYTE* pBuffer = NULL;
HRESULT hresult = oOfficeUtils.LoadFileFromArchive(fileName, L"mimetype", &pBuffer, unSize);
if (hresult != S_OK || NULL == pBuffer)
return false;
static constexpr const char* hwpxFormatLine = "application/hwp+zip";
bool bResult = false;
if (19 <= unSize && NULL != strstr((char *)pBuffer, hwpxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
bResult = true;
}
delete[] pBuffer;
return bResult;
}
bool COfficeFileFormatChecker::isOpenOfficeFormatFile(const std::wstring &fileName, std::wstring &documentID)
{
documentID.clear();
@ -1568,6 +1609,28 @@ bool COfficeFileFormatChecker::isOpenOfficeFlatFormatFile(unsigned char *pBuffer
return false;
}
bool COfficeFileFormatChecker::isHwpmlFile(unsigned char *pBuffer, int dwBytes)
{
if (NULL == pBuffer || dwBytes < 8)
return false;
for (unsigned int unPos = 0; unPos < dwBytes - 8; ++unPos)
{
if ('<' != pBuffer[unPos])
continue;
if (dwBytes - unPos >= 15 && '!' == pBuffer[unPos + 1] &&
0 == memcmp(&pBuffer[unPos], "<!DOCTYPE HWPML", 15))
return true;
if (dwBytes - unPos >= 6 && 0 == memcmp(&pBuffer[unPos], "<HWPML", 6))
return true;
}
return false;
}
bool COfficeFileFormatChecker::isOOXFlatFormatFile(unsigned char *pBuffer, int dwBytes)
{
if (dwBytes < 8)
@ -1676,6 +1739,8 @@ std::wstring COfficeFileFormatChecker::GetExtensionByType(int type)
return L".hwp";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX:
return L".hwpx";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML:
return L".hml";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_MD:
return L".md";
@ -1861,6 +1926,8 @@ int COfficeFileFormatChecker::GetFormatByExtension(const std::wstring &sExt)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP;
if (L".hwpx" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX;
if (L".hml" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
if (L".md" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_MD;

View File

@ -59,7 +59,8 @@
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_PAGES AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0018
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0019
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001a
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_MD AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001b
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001b
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_MD AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001c
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_XML AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0030

View File

@ -696,6 +696,10 @@ namespace NExtractTools
{
nRes = hwpx2docx_dir(sFrom, sDocxDir, params, convertParams);
}
else if (AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML == nFormatFrom)
{
nRes = hwpml2docx_dir(sFrom, sDocxDir, params, convertParams);
}
else
nRes = AVS_FILEUTILS_ERROR_CONVERT_PARAMS;

View File

@ -294,6 +294,10 @@ namespace NExtractTools
DECLARE_CONVERT_FUNC(hwpx2docx);
DECLARE_CONVERT_FUNC(hwpx2docx_dir);
//HWPML
DECLARE_CONVERT_FUNC(hwpml2docx);
DECLARE_CONVERT_FUNC(hwpml2docx_dir);
//-------------------------------------------------------------------------------------------------------------------------------------------------
_UINT32 convertmailmerge(const InputParamsMailMerge& oMailMergeSend, const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams);

View File

@ -6,17 +6,18 @@
namespace NExtractTools
{
_UINT32 hwp_file2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams, bool bIsXmlFormat, bool bConvertToDir)
template <typename OpenMethod>
_UINT32 hwp_file2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams, OpenMethod openMethod, bool bConvertToDir)
{
CHWPFile oFile;
oFile.SetTempDirectory(convertParams.m_sTempDir);
params.m_bMacro = false;
if (((bIsXmlFormat && !oFile.OpenHWPX(sFrom)) ||
(!bIsXmlFormat && !oFile.OpenHWP(sFrom))) ||
((bConvertToDir && !oFile.ConvertToOOXML_Dir(sTo)) ||
(!bConvertToDir && !oFile.ConvertToOOXML(sTo))))
if (!openMethod(oFile, sFrom) ||
(bConvertToDir && !oFile.ConvertToOOXML_Dir(sTo)) ||
(!bConvertToDir && !oFile.ConvertToOOXML(sTo)))
return AVS_FILEUTILS_ERROR_CONVERT;
return 0;
@ -24,22 +25,32 @@ namespace NExtractTools
_UINT32 hwp2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
return hwp_file2docx(sFrom, sTo, params, convertParams, false, false);
return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWP(sFrom); }, false);
}
_UINT32 hwp2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
return hwp_file2docx(sFrom, sTo, params, convertParams, false, true);
return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWP(sFrom); }, true);
}
_UINT32 hwpx2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
return hwp_file2docx(sFrom, sTo, params, convertParams, true, false);
return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWPX(sFrom); }, false);
}
_UINT32 hwpx2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
return hwp_file2docx(sFrom, sTo, params, convertParams, true, true);
return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWPX(sFrom); }, true);
}
_UINT32 hwpml2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWPML(sFrom); }, false);
}
_UINT32 hwpml2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWPML(sFrom); }, true);
}
}