HWPML format added to x2t and refactoring

This commit is contained in:
Kirill Polyakov
2025-09-19 16:46:51 +03:00
parent e7a57c1584
commit 0ed11abb38
6 changed files with 106 additions and 17 deletions

View File

@ -69,6 +69,7 @@ public:
bool isOpenOfficeFormatFile(const std::wstring& fileName, std::wstring& documentID); bool isOpenOfficeFormatFile(const std::wstring& fileName, std::wstring& documentID);
bool isOnlyOfficeFormatFile(const std::wstring& fileName); bool isOnlyOfficeFormatFile(const std::wstring& fileName);
bool isMacFormatFile(const std::wstring& fileName); bool isMacFormatFile(const std::wstring& fileName);
bool isHwpxFile(const std::wstring& fileName);
bool isDocFormatFile(const std::wstring& fileName); bool isDocFormatFile(const std::wstring& fileName);
bool isXlsFormatFile(const std::wstring& fileName); bool isXlsFormatFile(const std::wstring& fileName);
@ -99,6 +100,7 @@ public:
bool isPdfFormatFile(unsigned char* pBuffer, int dwBytes, std::wstring& documentID); bool isPdfFormatFile(unsigned char* pBuffer, int dwBytes, std::wstring& documentID);
bool isPdfOformFormatFile(unsigned char* pBuffer, int dwBytes); bool isPdfOformFormatFile(unsigned char* pBuffer, int dwBytes);
bool isOpenOfficeFlatFormatFile(unsigned char* pBuffer, int dwBytes); bool isOpenOfficeFlatFormatFile(unsigned char* pBuffer, int dwBytes);
bool isHwpmlFile(unsigned char* pBuffer, int dwBytes);
bool isBinaryDoctFormatFile(unsigned char* pBuffer, int dwBytes); bool isBinaryDoctFormatFile(unsigned char* pBuffer, int dwBytes);
bool isBinaryXlstFormatFile(unsigned char* pBuffer, int dwBytes); bool isBinaryXlstFormatFile(unsigned char* pBuffer, int dwBytes);

View File

@ -467,15 +467,18 @@ bool COfficeFileFormatChecker::isHwpFile(POLE::Storage* storage)
if (storage == NULL) if (storage == NULL)
return false; return false;
unsigned char buffer[10]; unsigned char buffer[17];
POLE::Stream stream(storage, L"BodyText/Section0"); POLE::Stream stream(storage, L"FileHeader");
if (stream.read(buffer, 10) < 1)
{ static constexpr const char* hwpFormatLine = "HWP Document File";
return false;
} if (17 == stream.read(buffer, 17) && NULL != strstr((char*)buffer, hwpFormatLine))
return true; return true;
return false;
} }
bool COfficeFileFormatChecker::isXlsFormatFile(POLE::Storage *storage) bool COfficeFileFormatChecker::isXlsFormatFile(POLE::Storage *storage)
{ {
if (storage == NULL) if (storage == NULL)
@ -811,6 +814,13 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring &_fileName)
bufferDetect = NULL; bufferDetect = NULL;
return true; return true;
} }
else if (isHwpxFile(fileName))
{
if (bufferDetect)
delete[] bufferDetect;
bufferDetect = NULL;
return true;
}
} }
//----------------------------------------------------------------------------------------------- //-----------------------------------------------------------------------------------------------
@ -890,6 +900,10 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring &_fileName)
{ {
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_MHT; nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_MHT;
} }
else if (isHwpmlFile(bufferDetect, sizeRead))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
}
//------------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------------
file.CloseFile(); file.CloseFile();
} }
@ -959,6 +973,8 @@ bool COfficeFileFormatChecker::isOfficeFile(const std::wstring &_fileName)
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP; nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP;
else if (0 == sExt.compare(L".hwpx")) else if (0 == sExt.compare(L".hwpx"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX; nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX;
else if (0 == sExt.compare(L".hml"))
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN) if (nFileType != AVS_OFFICESTUDIO_FILE_UNKNOWN)
return true; return true;
@ -1403,6 +1419,31 @@ bool COfficeFileFormatChecker::isMacFormatFile(const std::wstring& fileName)
return true; return true;
} }
bool COfficeFileFormatChecker::isHwpxFile(const std::wstring &fileName)
{
COfficeUtils oOfficeUtils;
ULONG unSize = 0;
BYTE* pBuffer = NULL;
HRESULT hresult = oOfficeUtils.LoadFileFromArchive(fileName, L"mimetype", &pBuffer, unSize);
if (hresult != S_OK || NULL == pBuffer)
return false;
static constexpr const char* hwpxFormatLine = "application/hwp+zip";
bool bResult = false;
if (19 <= unSize && NULL != strstr((char *)pBuffer, hwpxFormatLine))
{
nFileType = AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
bResult = true;
}
delete[] pBuffer;
return bResult;
}
bool COfficeFileFormatChecker::isOpenOfficeFormatFile(const std::wstring &fileName, std::wstring &documentID) bool COfficeFileFormatChecker::isOpenOfficeFormatFile(const std::wstring &fileName, std::wstring &documentID)
{ {
documentID.clear(); documentID.clear();
@ -1568,6 +1609,28 @@ bool COfficeFileFormatChecker::isOpenOfficeFlatFormatFile(unsigned char *pBuffer
return false; return false;
} }
bool COfficeFileFormatChecker::isHwpmlFile(unsigned char *pBuffer, int dwBytes)
{
if (NULL == pBuffer || dwBytes < 8)
return false;
for (unsigned int unPos = 0; unPos < dwBytes - 8; ++unPos)
{
if ('<' != pBuffer[unPos])
continue;
if (dwBytes - unPos >= 15 && '!' == pBuffer[unPos + 1] &&
0 == memcmp(&pBuffer[unPos], "<!DOCTYPE HWPML", 15))
return true;
if (dwBytes - unPos >= 6 && 0 == memcmp(&pBuffer[unPos], "<HWPML", 6))
return true;
}
return false;
}
bool COfficeFileFormatChecker::isOOXFlatFormatFile(unsigned char *pBuffer, int dwBytes) bool COfficeFileFormatChecker::isOOXFlatFormatFile(unsigned char *pBuffer, int dwBytes)
{ {
if (dwBytes < 8) if (dwBytes < 8)
@ -1676,6 +1739,8 @@ std::wstring COfficeFileFormatChecker::GetExtensionByType(int type)
return L".hwp"; return L".hwp";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX: case AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX:
return L".hwpx"; return L".hwpx";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML:
return L".hml";
case AVS_OFFICESTUDIO_FILE_DOCUMENT_MD: case AVS_OFFICESTUDIO_FILE_DOCUMENT_MD:
return L".md"; return L".md";
@ -1861,6 +1926,8 @@ int COfficeFileFormatChecker::GetFormatByExtension(const std::wstring &sExt)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP; return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP;
if (L".hwpx" == ext) if (L".hwpx" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX; return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX;
if (L".hml" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML;
if (L".md" == ext) if (L".md" == ext)
return AVS_OFFICESTUDIO_FILE_DOCUMENT_MD; return AVS_OFFICESTUDIO_FILE_DOCUMENT_MD;

View File

@ -59,7 +59,8 @@
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_PAGES AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0018 #define AVS_OFFICESTUDIO_FILE_DOCUMENT_PAGES AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0018
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0019 #define AVS_OFFICESTUDIO_FILE_DOCUMENT_HWP AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0019
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001a #define AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPX AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001a
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_MD AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001b #define AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001b
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_MD AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x001c
#define AVS_OFFICESTUDIO_FILE_DOCUMENT_XML AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0030 #define AVS_OFFICESTUDIO_FILE_DOCUMENT_XML AVS_OFFICESTUDIO_FILE_DOCUMENT + 0x0030

View File

@ -696,6 +696,10 @@ namespace NExtractTools
{ {
nRes = hwpx2docx_dir(sFrom, sDocxDir, params, convertParams); nRes = hwpx2docx_dir(sFrom, sDocxDir, params, convertParams);
} }
else if (AVS_OFFICESTUDIO_FILE_DOCUMENT_HWPML == nFormatFrom)
{
nRes = hwpml2docx_dir(sFrom, sDocxDir, params, convertParams);
}
else else
nRes = AVS_FILEUTILS_ERROR_CONVERT_PARAMS; nRes = AVS_FILEUTILS_ERROR_CONVERT_PARAMS;

View File

@ -294,6 +294,10 @@ namespace NExtractTools
DECLARE_CONVERT_FUNC(hwpx2docx); DECLARE_CONVERT_FUNC(hwpx2docx);
DECLARE_CONVERT_FUNC(hwpx2docx_dir); DECLARE_CONVERT_FUNC(hwpx2docx_dir);
//HWPML
DECLARE_CONVERT_FUNC(hwpml2docx);
DECLARE_CONVERT_FUNC(hwpml2docx_dir);
//------------------------------------------------------------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------------------------------------------------------------
_UINT32 convertmailmerge(const InputParamsMailMerge& oMailMergeSend, const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams); _UINT32 convertmailmerge(const InputParamsMailMerge& oMailMergeSend, const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams);

View File

@ -6,17 +6,18 @@
namespace NExtractTools namespace NExtractTools
{ {
_UINT32 hwp_file2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams, bool bIsXmlFormat, bool bConvertToDir) template <typename OpenMethod>
_UINT32 hwp_file2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams, OpenMethod openMethod, bool bConvertToDir)
{ {
CHWPFile oFile; CHWPFile oFile;
oFile.SetTempDirectory(convertParams.m_sTempDir); oFile.SetTempDirectory(convertParams.m_sTempDir);
params.m_bMacro = false; params.m_bMacro = false;
if (((bIsXmlFormat && !oFile.OpenHWPX(sFrom)) ||
(!bIsXmlFormat && !oFile.OpenHWP(sFrom))) || if (!openMethod(oFile, sFrom) ||
((bConvertToDir && !oFile.ConvertToOOXML_Dir(sTo)) || (bConvertToDir && !oFile.ConvertToOOXML_Dir(sTo)) ||
(!bConvertToDir && !oFile.ConvertToOOXML(sTo)))) (!bConvertToDir && !oFile.ConvertToOOXML(sTo)))
return AVS_FILEUTILS_ERROR_CONVERT; return AVS_FILEUTILS_ERROR_CONVERT;
return 0; return 0;
@ -24,22 +25,32 @@ namespace NExtractTools
_UINT32 hwp2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams) _UINT32 hwp2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{ {
return hwp_file2docx(sFrom, sTo, params, convertParams, false, false); return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWP(sFrom); }, false);
} }
_UINT32 hwp2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams) _UINT32 hwp2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{ {
return hwp_file2docx(sFrom, sTo, params, convertParams, false, true); return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWP(sFrom); }, true);
} }
_UINT32 hwpx2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams) _UINT32 hwpx2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{ {
return hwp_file2docx(sFrom, sTo, params, convertParams, true, false); return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWPX(sFrom); }, false);
} }
_UINT32 hwpx2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams) _UINT32 hwpx2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{ {
return hwp_file2docx(sFrom, sTo, params, convertParams, true, true); return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWPX(sFrom); }, true);
}
_UINT32 hwpml2docx(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWPML(sFrom); }, false);
}
_UINT32 hwpml2docx_dir(const std::wstring& sFrom, const std::wstring& sTo, InputParams& params, ConvertParams& convertParams)
{
return hwp_file2docx(sFrom, sTo, params, convertParams, [](CHWPFile& oFile, const std::wstring& sFrom){ return oFile.OpenHWPML(sFrom); }, true);
} }
} }