From ea9ef2349d750869f12eed769835fa088d31e997 Mon Sep 17 00:00:00 2001 From: "Elena.Subbotina" Date: Thu, 8 Feb 2024 15:09:27 +0300 Subject: [PATCH] fix bug #66401 remove template --- DesktopEditor/common/File.cpp | 2 +- MsBinaryFile/Common/Base/FormatUtils.cpp | 397 ++++++++++++++++++ MsBinaryFile/Common/Base/FormatUtils.h | 148 +------ .../DocFile/AnnotationReferenceDescriptor.cpp | 2 +- MsBinaryFile/DocFile/FontFamilyName.cpp | 4 +- MsBinaryFile/DocFile/ListData.cpp | 4 +- MsBinaryFile/DocFile/OleObject.cpp | 2 +- MsBinaryFile/DocFile/PictureDescriptor.cpp | 2 +- MsBinaryFile/DocFile/PieceTable.cpp | 12 +- MsBinaryFile/DocFile/StyleSheet.cpp | 2 +- .../DocFile/StyleSheetDescription.cpp | 2 +- MsBinaryFile/DocFile/VirtualStreamReader.cpp | 4 +- MsBinaryFile/DocFile/WideString.cpp | 4 +- MsBinaryFile/DocFile/WordDocument.cpp | 15 +- .../DocFormatLib/Linux/DocFormatLib.pro | 5 +- .../DocFormatLib/Windows/DocFormatLib.vcxproj | 2 + .../Windows/DocFormatLib.vcxproj.filters | 2 + 17 files changed, 431 insertions(+), 178 deletions(-) create mode 100644 MsBinaryFile/Common/Base/FormatUtils.cpp diff --git a/DesktopEditor/common/File.cpp b/DesktopEditor/common/File.cpp index 7cc6a7dbfa..87db1ec14c 100644 --- a/DesktopEditor/common/File.cpp +++ b/DesktopEditor/common/File.cpp @@ -256,7 +256,7 @@ namespace NSFile } pUnicodeString[lIndexUnicode++] = (WCHAR)(val); - lIndex += 5; + lIndex += 6; } } diff --git a/MsBinaryFile/Common/Base/FormatUtils.cpp b/MsBinaryFile/Common/Base/FormatUtils.cpp new file mode 100644 index 0000000000..7f7e4a7188 --- /dev/null +++ b/MsBinaryFile/Common/Base/FormatUtils.cpp @@ -0,0 +1,397 @@ +/* + * (c) Copyright Ascensio System SIA 2010-2023 + * + * This program is a free software product. You can redistribute it and/or + * modify it under the terms of the GNU Affero General Public License (AGPL) + * version 3 as published by the Free Software Foundation. In accordance with + * Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect + * that Ascensio System SIA expressly excludes the warranty of non-infringement + * of any third-party rights. + * + * This program is distributed WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For + * details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html + * + * You can contact Ascensio System SIA at 20A-6 Ernesta Birznieka-Upish + * street, Riga, Latvia, EU, LV-1050. + * + * The interactive user interfaces in modified source and object code versions + * of the Program must display Appropriate Legal Notices, as required under + * Section 5 of the GNU AGPL version 3. + * + * Pursuant to Section 7(b) of the License you must retain the original Product + * logo when distributing the program. Pursuant to Section 7(e) we decline to + * grant you any rights under trademark law for use of our trademarks. + * + * All the Product's GUI elements, including illustrations and icon sets, as + * well as technical writing content are licensed under the terms of the + * Creative Commons Attribution-ShareAlike 4.0 International. See the License + * terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode + * + */ +#include "FormatUtils.h" + +namespace DocFileFormat +{ +#define WriteWCharPadding(STLCollection, padding) \ +for (int i = 0; i < padding; ++i)\ +{\ +STLCollection->push_back((wchar_t)0);\ +} + static void WriteUtf16ToWChar(int code, std::vector* STLCollection, int padding) + { + int used = 0; + if (code < 0x10000) + { + STLCollection->push_back((wchar_t)code); + used = 1; + } + else + { + code -= 0x10000; + STLCollection->push_back((wchar_t)(0xD800 | ((code >> 10) & 0x03FF))); + STLCollection->push_back((wchar_t)(0xDC00 | (code & 0x03FF))); + used = 2; + } + WriteWCharPadding(STLCollection, padding - used); + + } + + static bool GetSTLCollectionFromUtf8(std::vector* STLCollection, unsigned char* pBuffer, int lCount) + { + if ((STLCollection == NULL) || (pBuffer == NULL)) + { + return false; + } + int lIndex = 0; + if (sizeof(wchar_t) == 2)//utf8 -> utf16 + { + while (lIndex < lCount) + { + BYTE byteMain = pBuffer[lIndex]; + if (0x00 == (byteMain & 0x80)) + { + // 1 byte + STLCollection->push_back((WCHAR)byteMain); + ++lIndex; + } + else if (0x00 == (byteMain & 0x20)) + { + // 2 byte + int val = 0; + if ((lIndex + 1) < lCount) + { + val = (int)(((byteMain & 0x1F) << 6) | + (pBuffer[lIndex + 1] & 0x3F)); + } + STLCollection->push_back((wchar_t)val); + STLCollection->push_back((wchar_t)0); + lIndex += 2; + } + else if (0x00 == (byteMain & 0x10)) + { + // 3 byte + int val = 0; + if ((lIndex + 2) < lCount) + { + val = (int)(((byteMain & 0x0F) << 12) | + ((pBuffer[lIndex + 1] & 0x3F) << 6) | + (pBuffer[lIndex + 2] & 0x3F)); + } + WriteUtf16ToWChar(val, STLCollection, 3); + lIndex += 3; + } + else if (0x00 == (byteMain & 0x0F)) + { + // 4 byte + int val = 0; + if ((lIndex + 3) < lCount) + { + val = (int)(((byteMain & 0x07) << 18) | + ((pBuffer[lIndex + 1] & 0x3F) << 12) | + ((pBuffer[lIndex + 2] & 0x3F) << 6) | + (pBuffer[lIndex + 3] & 0x3F)); + } + WriteUtf16ToWChar(val, STLCollection, 4); + lIndex += 4; + } + else if (0x00 == (byteMain & 0x08)) + { + // 4 byte + int val = 0; + if ((lIndex + 3) < lCount) + { + val = (int)(((byteMain & 0x07) << 18) | + ((pBuffer[lIndex + 1] & 0x3F) << 12) | + ((pBuffer[lIndex + 2] & 0x3F) << 6) | + (pBuffer[lIndex + 3] & 0x3F)); + } + WriteUtf16ToWChar(val, STLCollection, 4); + lIndex += 4; + } + else if (0x00 == (byteMain & 0x04)) + { + // 5 byte + int val = 0; + if ((lIndex + 4) < lCount) + { + val = (int)(((byteMain & 0x03) << 24) | + ((pBuffer[lIndex + 1] & 0x3F) << 18) | + ((pBuffer[lIndex + 2] & 0x3F) << 12) | + ((pBuffer[lIndex + 3] & 0x3F) << 6) | + (pBuffer[lIndex + 4] & 0x3F)); + } + WriteUtf16ToWChar(val, STLCollection, 5); + lIndex += 5; + } + else + { + // 6 byte + int val = 0; + if ((lIndex + 5) < lCount) + { + val = (int)(((byteMain & 0x01) << 30) | + ((pBuffer[lIndex + 1] & 0x3F) << 24) | + ((pBuffer[lIndex + 2] & 0x3F) << 18) | + ((pBuffer[lIndex + 3] & 0x3F) << 12) | + ((pBuffer[lIndex + 4] & 0x3F) << 6) | + (pBuffer[lIndex + 5] & 0x3F)); + } + WriteUtf16ToWChar(val, STLCollection, 6); + lIndex += 6; + } + } + } + else //utf8 -> utf32 + { + while (lIndex < lCount) + { + BYTE byteMain = pBuffer[lIndex]; + if (0x00 == (byteMain & 0x80)) + { + // 1 byte + STLCollection->push_back((WCHAR)byteMain); + ++lIndex; + } + else if (0x00 == (byteMain & 0x20)) + { + // 2 byte + int val = 0; + if ((lIndex + 1) < lCount) + { + val = (int)(((byteMain & 0x1F) << 6) | + (pBuffer[lIndex + 1] & 0x3F)); + } + + STLCollection->push_back((WCHAR)val); + STLCollection->push_back((WCHAR)0); + lIndex += 2; + } + else if (0x00 == (byteMain & 0x10)) + { + // 3 byte + int val = 0; + if ((lIndex + 2) < lCount) + { + val = (int)(((byteMain & 0x0F) << 12) | + ((pBuffer[lIndex + 1] & 0x3F) << 6) | + (pBuffer[lIndex + 2] & 0x3F)); + } + STLCollection->push_back((WCHAR)val); + WriteWCharPadding(STLCollection, 2); + lIndex += 3; + } + else if (0x00 == (byteMain & 0x0F)) + { + // 4 byte + int val = 0; + if ((lIndex + 3) < lCount) + { + val = (int)(((byteMain & 0x07) << 18) | + ((pBuffer[lIndex + 1] & 0x3F) << 12) | + ((pBuffer[lIndex + 2] & 0x3F) << 6) | + (pBuffer[lIndex + 3] & 0x3F)); + } + + STLCollection->push_back((WCHAR)val); + WriteWCharPadding(STLCollection, 3); + lIndex += 4; + } + else if (0x00 == (byteMain & 0x08)) + { + // 4 byte + int val = 0; + if ((lIndex + 3) < lCount) + { + val = (int)(((byteMain & 0x07) << 18) | + ((pBuffer[lIndex + 1] & 0x3F) << 12) | + ((pBuffer[lIndex + 2] & 0x3F) << 6) | + (pBuffer[lIndex + 3] & 0x3F)); + } + STLCollection->push_back((WCHAR)val); + WriteWCharPadding(STLCollection, 3); + lIndex += 4; + } + else if (0x00 == (byteMain & 0x04)) + { + // 5 byte + int val = 0; + if ((lIndex + 4) < lCount) + { + val = (int)(((byteMain & 0x03) << 24) | + ((pBuffer[lIndex + 1] & 0x3F) << 18) | + ((pBuffer[lIndex + 2] & 0x3F) << 12) | + ((pBuffer[lIndex + 3] & 0x3F) << 6) | + (pBuffer[lIndex + 4] & 0x3F)); + } + STLCollection->push_back((WCHAR)val); + WriteWCharPadding(STLCollection, 4); + lIndex += 5; + } + else + { + // 6 byte + int val = 0; + if ((lIndex + 5) < lCount) + { + val = (int)(((byteMain & 0x01) << 30) | + ((pBuffer[lIndex + 1] & 0x3F) << 24) | + ((pBuffer[lIndex + 2] & 0x3F) << 18) | + ((pBuffer[lIndex + 3] & 0x3F) << 12) | + ((pBuffer[lIndex + 4] & 0x3F) << 6) | + (pBuffer[lIndex + 5] & 0x3F)); + } + STLCollection->push_back((WCHAR)val); + WriteWCharPadding(STLCollection, 5); + lIndex += 6; + } + } + } + return true; + } +} + +bool DocFileFormat::FormatUtils::GetWStringFromBytes(std::wstring& string, unsigned char* bytes, int size, int code_page) +{ + if (bytes == NULL) + { + return false; + } + + if (code_page == ENCODING_UTF8) + { + string = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8(bytes, size); + return true; + } + else if (code_page == ENCODING_UTF16) + { + string = NSFile::CUtf8Converter::GetWStringFromUTF16((unsigned short*)bytes, size / 2); + } + else if (code_page == ENCODING_WINDOWS_1250) + { + wchar_t wch = 0; + int i = 0; + while (i < size) + { + wch = MapByteToWChar(bytes[i++]); + + string += (wch); + } + } + else + { + std::string sCodePage; + std::map::const_iterator pFind = NSUnicodeConverter::mapEncodingsICU.find(code_page); + if (pFind != NSUnicodeConverter::mapEncodingsICU.end()) + { + sCodePage = pFind->second; + } + + if (sCodePage.empty()) + sCodePage = "CP1250"/* + std::to_string(code_page)*/; + + NSUnicodeConverter::CUnicodeConverter oConverter; + string = oConverter.toUnicode((char*)bytes, (unsigned int)size, sCodePage.c_str()); + } + + return true; +} + +bool DocFileFormat::FormatUtils::GetSTLCollectionFromBytes(std::vector* STLCollection, unsigned char* bytes, int size, int code_page) +{ + if (bytes == NULL) + { + return false; + } + + if (code_page == ENCODING_UTF8) + { + return GetSTLCollectionFromUtf8(STLCollection, bytes, size); + } + else if (code_page == ENCODING_UTF16) + { + int i = 0; +#if !defined(_WIN32) && !defined(_WIN64) + int nCount = size / 2; + unsigned short* pShort = (unsigned short*)bytes; + + int nCurrent = 0; + while (nCurrent < nCount) + { + if (*pShort < 0xD800 || *pShort > 0xDBFF) + { + STLCollection->push_back((wchar_t)(*pShort)); + ++pShort; + ++nCurrent; + } + else + { + STLCollection->push_back((wchar_t)(((((pShort[0] - 0xD800) & 0x03FF) << 10) | ((pShort[1] - 0xDC00) & 0x03FF)) + 0x10000)); + STLCollection->push_back((wchar_t)0); + pShort += 2; + nCurrent += 2; + } + } +#else + while (i < size) + { + STLCollection->push_back(FormatUtils::BytesToUInt16(bytes, i, size)); + + i += 2; + } +#endif + } + else if (code_page == ENCODING_WINDOWS_1250) + { + wchar_t wch = 0; + int i = 0; + while (i < size) + { + wch = MapByteToWChar(bytes[i++]); + + STLCollection->push_back(wch); + } + } + else + { + std::string sCodePage; + std::map::const_iterator pFind = NSUnicodeConverter::mapEncodingsICU.find(code_page); + if (pFind != NSUnicodeConverter::mapEncodingsICU.end()) + { + sCodePage = pFind->second; + } + + if (sCodePage.empty()) + sCodePage = "CP1250"/* + std::to_string(code_page)*/; + + NSUnicodeConverter::CUnicodeConverter oConverter; + std::wstring unicode_string = oConverter.toUnicode((char*)bytes, (unsigned int)size, sCodePage.c_str()); + + for (size_t i = 0; i < unicode_string.size(); i++) + { + STLCollection->push_back(unicode_string[i]); + } + } + + return true; +} diff --git a/MsBinaryFile/Common/Base/FormatUtils.h b/MsBinaryFile/Common/Base/FormatUtils.h index 76b558a5f9..dcde3afd4c 100644 --- a/MsBinaryFile/Common/Base/FormatUtils.h +++ b/MsBinaryFile/Common/Base/FormatUtils.h @@ -494,152 +494,8 @@ namespace DocFileFormat } return true; } - - template static bool GetSTLCollectionFromUtf8( T *STLCollection, unsigned char *bytes, int size) - { - if ( ( STLCollection == NULL ) || ( bytes == NULL ) ) - { - return false; - } - if (sizeof(wchar_t) == 2)//utf8 -> utf16 - { - unsigned int nLength = size; - - UTF16 *pStrUtf16 = new UTF16 [nLength+1]; - memset ((void *) pStrUtf16, 0, sizeof (UTF16) * (nLength+1)); - - UTF8 *pStrUtf8 = (UTF8 *) bytes; - - // this values will be modificated - const UTF8 *pStrUtf8_Conv = pStrUtf8; - UTF16 *pStrUtf16_Conv = pStrUtf16; - - ConversionResult eUnicodeConversionResult = ConvertUTF8toUTF16 (&pStrUtf8_Conv, &pStrUtf8[nLength] - , &pStrUtf16_Conv, &pStrUtf16 [nLength] - , strictConversion); - - if (conversionOK != eUnicodeConversionResult) - { - delete [] pStrUtf16; - return GetSTLCollectionFromLocale(STLCollection, bytes,size); - } - for (unsigned int i = 0; i < nLength; i++) - { - STLCollection->push_back(pStrUtf16[i]); - } - delete [] pStrUtf16; - return true; - } - else //utf8 -> utf32 - { - unsigned int nLength = size; - - UTF32 *pStrUtf32 = new UTF32 [nLength+1]; - memset ((void *) pStrUtf32, 0, sizeof (UTF32) * (nLength+1)); - - - UTF8 *pStrUtf8 = (UTF8 *) bytes; - - // this values will be modificated - const UTF8 *pStrUtf8_Conv = pStrUtf8; - UTF32 *pStrUtf32_Conv = pStrUtf32; - - ConversionResult eUnicodeConversionResult = ConvertUTF8toUTF32 (&pStrUtf8_Conv, &pStrUtf8[nLength] - , &pStrUtf32_Conv, &pStrUtf32 [nLength] - , strictConversion); - - if (conversionOK != eUnicodeConversionResult) - { - delete [] pStrUtf32; - return GetSTLCollectionFromLocale(STLCollection, bytes, size); - } - for (unsigned int i = 0; i < nLength; i++) - { - STLCollection->push_back(pStrUtf32[i]); - } - delete [] pStrUtf32; - return true; - } - } - - template static bool GetSTLCollectionFromBytes( T *STLCollection, unsigned char *bytes, int size, int code_page ) - { - if ( ( STLCollection == NULL ) || ( bytes == NULL ) ) - { - return false; - } - - if (code_page == ENCODING_UTF8) - { - return GetSTLCollectionFromUtf8(STLCollection, bytes, size); - } - else if (code_page == ENCODING_UTF16) - { - int i = 0; -#if !defined(_WIN32) && !defined(_WIN64) - int nCount = size / 2; - unsigned short* pShort = (unsigned short*)bytes; - - int nCurrent = 0; - while (nCurrent < nCount) - { - if (*pShort < 0xD800 || *pShort > 0xDBFF) - { - STLCollection->push_back((wchar_t)(*pShort)); - ++pShort; - ++nCurrent; - } - else - { - STLCollection->push_back((wchar_t)(((((pShort[0] - 0xD800) & 0x03FF) << 10) | ((pShort[1] - 0xDC00) & 0x03FF)) + 0x10000)); - STLCollection->push_back((wchar_t)0); - pShort += 2; - nCurrent += 2; - } - } -#else - while ( i < size ) - { - STLCollection->push_back( FormatUtils::BytesToUInt16( bytes, i, size ) ); - - i += 2; - } -#endif - } - else if (code_page == ENCODING_WINDOWS_1250) - { - wchar_t wch = 0; - int i = 0; - while ( i < size ) - { - wch = MapByteToWChar( bytes[i++] ); - - STLCollection->push_back( wch ); - } - } - else - { - std::string sCodePage; - std::map::const_iterator pFind = NSUnicodeConverter::mapEncodingsICU.find(code_page); - if (pFind != NSUnicodeConverter::mapEncodingsICU.end()) - { - sCodePage = pFind->second; - } - - if (sCodePage.empty()) - sCodePage = "CP1250"/* + std::to_string(code_page)*/; - - NSUnicodeConverter::CUnicodeConverter oConverter; - std::wstring unicode_string = oConverter.toUnicode((char*)bytes, (unsigned int)size, sCodePage.c_str()); - - for (size_t i = 0; i < unicode_string.size(); i++) - { - STLCollection->push_back(unicode_string[i]); - } - } - - return true; - } + static bool GetWStringFromBytes(std::wstring & string, unsigned char* bytes, int size, int code_page); + static bool GetSTLCollectionFromBytes(std::vector* STLCollection, unsigned char* bytes, int size, int code_page); static int BitmaskToInt( int value, int mask ) { diff --git a/MsBinaryFile/DocFile/AnnotationReferenceDescriptor.cpp b/MsBinaryFile/DocFile/AnnotationReferenceDescriptor.cpp index fb17c1be41..03ce84359c 100644 --- a/MsBinaryFile/DocFile/AnnotationReferenceDescriptor.cpp +++ b/MsBinaryFile/DocFile/AnnotationReferenceDescriptor.cpp @@ -45,7 +45,7 @@ namespace DocFileFormat short cch = reader->ReadByte(); unsigned char *chars = reader->ReadBytes(cch, true); - FormatUtils::GetSTLCollectionFromBytes( &(newObject->m_UserInitials), chars, cch , ENCODING_WINDOWS_1250); + FormatUtils::GetWStringFromBytes(newObject->m_UserInitials, chars, cch , ENCODING_WINDOWS_1250); newObject->m_AuthorIndex = reader->ReadUInt16(); newObject->m_BookmarkId = reader->ReadInt16(); diff --git a/MsBinaryFile/DocFile/FontFamilyName.cpp b/MsBinaryFile/DocFile/FontFamilyName.cpp index de34ff169a..dddd952668 100644 --- a/MsBinaryFile/DocFile/FontFamilyName.cpp +++ b/MsBinaryFile/DocFile/FontFamilyName.cpp @@ -107,7 +107,7 @@ namespace DocFileFormat if (reader->nWordVersion > 0) { - FormatUtils::GetSTLCollectionFromBytes( &(newObject->xszFtn), bytes, (int)( strEnd - strStart ), ENCODING_WINDOWS_1250 ); + FormatUtils::GetWStringFromBytes(newObject->xszFtn, bytes, (int)( strEnd - strStart ), ENCODING_WINDOWS_1250 ); } else { @@ -141,7 +141,7 @@ namespace DocFileFormat if (reader->nWordVersion > 0) { - FormatUtils::GetSTLCollectionFromBytes( &(newObject->xszAlt), bytes, (int)( strEnd - strStart ), ENCODING_WINDOWS_1250); + FormatUtils::GetWStringFromBytes( newObject->xszAlt, bytes, (int)( strEnd - strStart ), ENCODING_WINDOWS_1250); } else { diff --git a/MsBinaryFile/DocFile/ListData.cpp b/MsBinaryFile/DocFile/ListData.cpp index 5c7fdb6399..a8bd7e351f 100644 --- a/MsBinaryFile/DocFile/ListData.cpp +++ b/MsBinaryFile/DocFile/ListData.cpp @@ -228,7 +228,7 @@ namespace DocFileFormat } if (strLen > 0) { - FormatUtils::GetSTLCollectionFromBytes( &(xst), data + 20, ( strLen ), ENCODING_WINDOWS_1250); + FormatUtils::GetWStringFromBytes( xst, data + 20, ( strLen ), ENCODING_WINDOWS_1250); } } OutlineListDescriptor::~OutlineListDescriptor() @@ -273,7 +273,7 @@ namespace DocFileFormat } if (strLen > 0) { - FormatUtils::GetSTLCollectionFromBytes( &(xst), data + 20, ( strLen ), ENCODING_WINDOWS_1250); + FormatUtils::GetWStringFromBytes( xst, data + 20, ( strLen ), ENCODING_WINDOWS_1250); } } } diff --git a/MsBinaryFile/DocFile/OleObject.cpp b/MsBinaryFile/DocFile/OleObject.cpp index 41a43c7879..7d039d1e87 100644 --- a/MsBinaryFile/DocFile/OleObject.cpp +++ b/MsBinaryFile/DocFile/OleObject.cpp @@ -356,7 +356,7 @@ void OleObject::processLinkInfoStream( VirtualStreamReader& reader ) { short cch = reader.ReadInt16(); unsigned char* str = reader.ReadBytes( cch, true ); - FormatUtils::GetSTLCollectionFromBytes( &this->Link, str, cch, ENCODING_WINDOWS_1250 ); + FormatUtils::GetWStringFromBytes( this->Link, str, cch, ENCODING_WINDOWS_1250 ); RELEASEARRAYOBJECTS( str ); //skip the terminating zero of the ANSI string diff --git a/MsBinaryFile/DocFile/PictureDescriptor.cpp b/MsBinaryFile/DocFile/PictureDescriptor.cpp index 457d860a84..af401e075d 100644 --- a/MsBinaryFile/DocFile/PictureDescriptor.cpp +++ b/MsBinaryFile/DocFile/PictureDescriptor.cpp @@ -310,7 +310,7 @@ namespace DocFileFormat if ( stPicName != NULL ) { std::wstring picName; - FormatUtils::GetSTLCollectionFromBytes( &picName, stPicName, cchPicName, ENCODING_WINDOWS_1250 ); + FormatUtils::GetWStringFromBytes( picName, stPicName, cchPicName, ENCODING_WINDOWS_1250 ); RELEASEARRAYOBJECTS(stPicName); } } diff --git a/MsBinaryFile/DocFile/PieceTable.cpp b/MsBinaryFile/DocFile/PieceTable.cpp index 5613baf16b..cf49adc537 100644 --- a/MsBinaryFile/DocFile/PieceTable.cpp +++ b/MsBinaryFile/DocFile/PieceTable.cpp @@ -188,7 +188,7 @@ namespace DocFileFormat stream->seek(pcd.fc); stream->read(bytes, cb); - FormatUtils::GetSTLCollectionFromBytes >(piecePairs, bytes, cb, pcd.code_page); + FormatUtils::GetSTLCollectionFromBytes(piecePairs, bytes, cb, pcd.code_page); RELEASEARRAYOBJECTS(bytes); } @@ -234,7 +234,7 @@ namespace DocFileFormat wordStream->read( bytes, cb); //get the chars - FormatUtils::GetSTLCollectionFromBytes>( encodingChars, bytes, cb, pcd.code_page ); + FormatUtils::GetSTLCollectionFromBytes( encodingChars, bytes, cb, pcd.code_page ); RELEASEARRAYOBJECTS( bytes ); } @@ -253,7 +253,7 @@ namespace DocFileFormat wordStream->read( bytes, cb); //get the chars - FormatUtils::GetSTLCollectionFromBytes>( encodingChars, bytes, cb, pcd.code_page ); + FormatUtils::GetSTLCollectionFromBytes( encodingChars, bytes, cb, pcd.code_page ); RELEASEARRAYOBJECTS( bytes ); } @@ -272,7 +272,7 @@ namespace DocFileFormat wordStream->read( bytes, cb); //get the chars - FormatUtils::GetSTLCollectionFromBytes>(encodingChars, bytes, cb, pcd.code_page); + FormatUtils::GetSTLCollectionFromBytes(encodingChars, bytes, cb, pcd.code_page); RELEASEARRAYOBJECTS(bytes); @@ -295,7 +295,7 @@ namespace DocFileFormat wordStream->read( bytes, cb ); //get the chars - FormatUtils::GetSTLCollectionFromBytes>( encodingChars, bytes, cb, pcd.code_page ); + FormatUtils::GetSTLCollectionFromBytes( encodingChars, bytes, cb, pcd.code_page ); RELEASEARRAYOBJECTS( bytes ); @@ -484,7 +484,7 @@ namespace DocFileFormat word->read(bytes, size); - FormatUtils::GetSTLCollectionFromBytes>(encodingChars, bytes, size, coding); + FormatUtils::GetSTLCollectionFromBytes(encodingChars, bytes, size, coding); RELEASEARRAYOBJECTS(bytes); diff --git a/MsBinaryFile/DocFile/StyleSheet.cpp b/MsBinaryFile/DocFile/StyleSheet.cpp index 48f22e5d28..4265df3d05 100644 --- a/MsBinaryFile/DocFile/StyleSheet.cpp +++ b/MsBinaryFile/DocFile/StyleSheet.cpp @@ -116,7 +116,7 @@ namespace DocFileFormat { //user style unsigned char *bytes = tableReader.ReadBytes( sz_name, true ); - FormatUtils::GetSTLCollectionFromBytes( &std->xstzName, bytes, sz_name, ENCODING_WINDOWS_1250 ); + FormatUtils::GetWStringFromBytes( std->xstzName, bytes, sz_name, ENCODING_WINDOWS_1250 ); RELEASEARRAYOBJECTS( bytes ); } // ms style diff --git a/MsBinaryFile/DocFile/StyleSheetDescription.cpp b/MsBinaryFile/DocFile/StyleSheetDescription.cpp index fb6dc874be..e408a449c1 100644 --- a/MsBinaryFile/DocFile/StyleSheetDescription.cpp +++ b/MsBinaryFile/DocFile/StyleSheetDescription.cpp @@ -138,7 +138,7 @@ namespace DocFileFormat { name = new unsigned char[characterCount];//characters are zero-terminated, so 1 char has 2 bytes: memcpy( name, ( bytes + cbStdBase + 1 ), ( characterCount ) ); - FormatUtils::GetSTLCollectionFromBytes( &(xstzName), name, ( characterCount ), ENCODING_WINDOWS_1250 ); + FormatUtils::GetWStringFromBytes( xstzName, name, ( characterCount ), ENCODING_WINDOWS_1250 ); upxOffset = cbStdBase + 1 + ( characterCount /** 2*/ ) + 1; } else diff --git a/MsBinaryFile/DocFile/VirtualStreamReader.cpp b/MsBinaryFile/DocFile/VirtualStreamReader.cpp index 65c561d1b7..a79170931d 100644 --- a/MsBinaryFile/DocFile/VirtualStreamReader.cpp +++ b/MsBinaryFile/DocFile/VirtualStreamReader.cpp @@ -194,7 +194,7 @@ std::wstring VirtualStreamReader::ReadXst() int xstzSize = DocFileFormat::FormatUtils::BytesToUChar( cch, 0, cchSize ) * 1; xstz = ReadBytes(xstzSize, true); - DocFileFormat::FormatUtils::GetSTLCollectionFromBytes( &wstrResult, xstz, xstzSize, ENCODING_WINDOWS_1250 ); + DocFileFormat::FormatUtils::GetWStringFromBytes( wstrResult, xstz, xstzSize, ENCODING_WINDOWS_1250 ); } else { @@ -272,7 +272,7 @@ std::wstring VirtualStreamReader::ReadLengthPrefixedAnsiString(unsigned int max_ //dont read the terminating zero stringBytes = ReadBytes( cch, true ); - DocFileFormat::FormatUtils::GetSTLCollectionFromBytes( &result, stringBytes, ( cch - 1 ), ENCODING_WINDOWS_1250); + DocFileFormat::FormatUtils::GetWStringFromBytes( result, stringBytes, ( cch - 1 ), ENCODING_WINDOWS_1250); } RELEASEARRAYOBJECTS( stringBytes ); diff --git a/MsBinaryFile/DocFile/WideString.cpp b/MsBinaryFile/DocFile/WideString.cpp index c91499ff50..314d87627f 100644 --- a/MsBinaryFile/DocFile/WideString.cpp +++ b/MsBinaryFile/DocFile/WideString.cpp @@ -54,11 +54,11 @@ namespace DocFileFormat //It's a real string table if (reader->nWordVersion > 0) { - FormatUtils::GetSTLCollectionFromBytes( newObject, bytes, length, ENCODING_WINDOWS_1250 ); + FormatUtils::GetWStringFromBytes( *newObject, bytes, length, ENCODING_WINDOWS_1250 ); } else { - FormatUtils::GetSTLCollectionFromBytes( newObject, bytes, length, ENCODING_UTF16 ); + FormatUtils::GetWStringFromBytes( *newObject, bytes, length, ENCODING_UTF16 ); } RELEASEARRAYOBJECTS( bytes ); diff --git a/MsBinaryFile/DocFile/WordDocument.cpp b/MsBinaryFile/DocFile/WordDocument.cpp index 18e8500ceb..8e7436ca1b 100644 --- a/MsBinaryFile/DocFile/WordDocument.cpp +++ b/MsBinaryFile/DocFile/WordDocument.cpp @@ -499,18 +499,13 @@ namespace DocFileFormat RELEASEOBJECT(Text); Text = new std::vector(); - int coding = nFontsCodePage != ENCODING_WINDOWS_1250 ? nFontsCodePage : nDocumentCodePage; + int coding = nFontsCodePage != ENCODING_WINDOWS_1250 ? nFontsCodePage : (nWordVersion < 1 ? nDocumentCodePage : ENCODING_WINDOWS_1250); - if (coding == ENCODING_UTF16) - { - std::wstring sText = NSFile::CUtf8Converter::GetWStringFromUTF16((unsigned short*)(bytes), cb / 2); - std::copy(sText.begin(), sText.end(), std::back_inserter(*Text)); - } - else - { - FormatUtils::GetSTLCollectionFromBytes>(Text, bytes, cb, coding); - } + FormatUtils::GetSTLCollectionFromBytes(Text, bytes, cb, coding); + //std::wstring strTest1 = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8(bytes, cb); + //std::wstring strTest2; + //FormatUtils::GetWStringFromBytes(strTest2, bytes, cb, ENCODING_WINDOWS_1250); RELEASEARRAYOBJECTS(bytes); } } diff --git a/MsBinaryFile/Projects/DocFormatLib/Linux/DocFormatLib.pro b/MsBinaryFile/Projects/DocFormatLib/Linux/DocFormatLib.pro index 1ec2f9ca83..e0b2b884cb 100644 --- a/MsBinaryFile/Projects/DocFormatLib/Linux/DocFormatLib.pro +++ b/MsBinaryFile/Projects/DocFormatLib/Linux/DocFormatLib.pro @@ -31,8 +31,9 @@ SOURCES += \ ../../../DocFile/EncryptionHeader.cpp \ ../../../DocFile/DrawingPrimitives.cpp \ ../../../DocFile/Spa.cpp \ - ../../../DocFile/OleObject.cpp \ - ../../../Common/Base/XmlTools.cpp + ../../../DocFile/OleObject.cpp \ + ../../../Common/Base/FormatUtils.cpp \ + ../../../Common/Base/XmlTools.cpp core_release { SOURCES += \ diff --git a/MsBinaryFile/Projects/DocFormatLib/Windows/DocFormatLib.vcxproj b/MsBinaryFile/Projects/DocFormatLib/Windows/DocFormatLib.vcxproj index 7ecd663c84..1d3bed4ce5 100644 --- a/MsBinaryFile/Projects/DocFormatLib/Windows/DocFormatLib.vcxproj +++ b/MsBinaryFile/Projects/DocFormatLib/Windows/DocFormatLib.vcxproj @@ -141,6 +141,7 @@ + @@ -308,6 +309,7 @@ + diff --git a/MsBinaryFile/Projects/DocFormatLib/Windows/DocFormatLib.vcxproj.filters b/MsBinaryFile/Projects/DocFormatLib/Windows/DocFormatLib.vcxproj.filters index a3f79cb0fd..5e1465669c 100644 --- a/MsBinaryFile/Projects/DocFormatLib/Windows/DocFormatLib.vcxproj.filters +++ b/MsBinaryFile/Projects/DocFormatLib/Windows/DocFormatLib.vcxproj.filters @@ -505,6 +505,7 @@ Converter + @@ -870,5 +871,6 @@ OfficeDrawing + \ No newline at end of file