From bf600c6eb08314a29e4b5570e292107ff3844cca Mon Sep 17 00:00:00 2001 From: Oleg Korshul Date: Fri, 21 Jun 2019 12:54:00 +0300 Subject: [PATCH 1/3] Core/mac build (#161) --- Common/3dParty/boost/build.sh | 4 +--- Common/3dParty/boost/fetch.sh | 20 +++----------------- Common/3dParty/cef/fetch.sh | 21 ++++++++++++++++----- Common/3dParty/icu/fetch.sh | 22 ++++++++++++++-------- Common/3dParty/icu/fetch_mobile.sh | 4 +--- Common/3dParty/make.sh | 3 +-- Common/3dParty/openssl/build.sh | 4 +--- Common/3dParty/openssl/fetch.sh | 4 +--- Common/3dParty/v8/build.sh | 3 +-- Common/3dParty/v8/fetch.sh | 4 +--- Common/3dParty/v8/fetch_linux_correct.sh | 4 +--- 11 files changed, 41 insertions(+), 52 deletions(-) diff --git a/Common/3dParty/boost/build.sh b/Common/3dParty/boost/build.sh index bb3b8f2248..946939c5dc 100755 --- a/Common/3dParty/boost/build.sh +++ b/Common/3dParty/boost/build.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" os=$(uname -s) platform="" diff --git a/Common/3dParty/boost/fetch.sh b/Common/3dParty/boost/fetch.sh index 4970c3199a..7e3415af54 100755 --- a/Common/3dParty/boost/fetch.sh +++ b/Common/3dParty/boost/fetch.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" os=$(uname -s) platform="" @@ -12,21 +10,11 @@ case "$os" in *) exit ;; esac -if [[ "$platform" == *"mac"* ]] -then -if [[ -f "$SCRIPTPATH/7zX_1.7.1.dmg" ]] -then -echo "7z already downloaded" -else -wget http://static.updatestar.net/dl/7zX/7zX_1.7.1.dmg -fi -fi - if [[ -f "$SCRIPTPATH/boost_1_58_0.7z" ]] then echo "boost already downloaded" else -wget http://freefr.dl.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.7z +wget http://freefr.dl.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.7z || curl -O http://freefr.dl.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.7z fi if [ -d "$SCRIPTPATH/boost_1_58_0" ]; then @@ -36,8 +24,6 @@ if [[ "$platform" == *"linux"* ]] then 7z x -y "$SCRIPTPATH/boost_1_58_0.7z" -o"$SCRIPTPATH/" else -hdiutil mount "$SCRIPTPATH/7zX_1.7.1.dmg" -/Volumes/7zX/7zX.app/Contents/Resources/7za x "$SCRIPTPATH/boost_1_58_0.7z" -o"$SCRIPTPATH/" -hdiutil unmount /Volumes/7zX +7za x "$SCRIPTPATH/boost_1_58_0.7z" -o"$SCRIPTPATH/" fi fi diff --git a/Common/3dParty/cef/fetch.sh b/Common/3dParty/cef/fetch.sh index 6a46be3d56..aa9f31b21b 100755 --- a/Common/3dParty/cef/fetch.sh +++ b/Common/3dParty/cef/fetch.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" os=$(uname -s) platform="" @@ -42,9 +40,10 @@ cef_version="3163" if [[ "$platform" == *"linux"* ]] then cef_version="3202" -fi - cef_url=http://d2ettrnqo7v976.cloudfront.net/cef/$cef_version/$platform$arch/$cef_arch +else +cef_url=http://d2ettrnqo7v976.cloudfront.net/cef/$cef_version/$platform/$cef_arch +fi if [[ "$platform" == *"linux"* ]] then @@ -75,3 +74,15 @@ then cp -r -t build/ ./$cef_binary/Release/* ./$cef_binary/Resources/* chmod a+xr build/locales fi + +if [[ "$platform" == *"mac"* ]] +then + if [ -d "build/Chromium Embedded Framework.framework" ] + then + echo "cef_binary already extracted" + else + wget $cef_url || curl -O $cef_url + 7za x $cef_arch + mv "$cef_binary" "build/Chromium Embedded Framework.framework" + fi +fi diff --git a/Common/3dParty/icu/fetch.sh b/Common/3dParty/icu/fetch.sh index 99ebd16ffc..613da85543 100755 --- a/Common/3dParty/icu/fetch.sh +++ b/Common/3dParty/icu/fetch.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" ICU_MAJOR_VER=58 ICU_MINOR_VER=2 @@ -59,11 +57,19 @@ else svn export http://source.icu-project.org/repos/icu/tags/release-$ICU_MAJOR_VER-$ICU_MINOR_VER/icu4c ./icu fi -# Workaround for building icu older than 60.0 -# on systems without xlocale.h (removed from glibc since 2.26) -# See https://sourceware.org/glibc/wiki/Release/2.26#Removal_of_.27xlocale.h.27 -# See https://bugs.archlinux.org/task/55246 -sed -i 's/xlocale/locale/' ./icu/source/i18n/digitlst.cpp +if [[ "$platform" == *"linux"* ]] +then + # Workaround for building icu older than 60.0 + # on systems without xlocale.h (removed from glibc since 2.26) + # See https://sourceware.org/glibc/wiki/Release/2.26#Removal_of_.27xlocale.h.27 + # See https://bugs.archlinux.org/task/55246 + sed -i 's/xlocale/locale/' ./icu/source/i18n/digitlst.cpp +fi + +if [[ "$platform" == *"mac"* ]] +then + sed -i -e 's/cmd\, \"%s %s -o %s%s %s %s%s %s %s\"\,/cmd\, \"%s %s -o %s%s %s %s %s %s %s\"\,/' ./icu/source/tools/pkgdata/pkgdata.cpp +fi cd ./icu/source/ diff --git a/Common/3dParty/icu/fetch_mobile.sh b/Common/3dParty/icu/fetch_mobile.sh index 588d7fd118..76e12253f8 100755 --- a/Common/3dParty/icu/fetch_mobile.sh +++ b/Common/3dParty/icu/fetch_mobile.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" ICU_MAJOR_VER=62 ICU_MINOR_VER=1 diff --git a/Common/3dParty/make.sh b/Common/3dParty/make.sh index 38fb02302f..f0f416d066 100755 --- a/Common/3dParty/make.sh +++ b/Common/3dParty/make.sh @@ -1,8 +1,7 @@ #!/bin/bash set -e -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "$SCRIPTPATH" diff --git a/Common/3dParty/openssl/build.sh b/Common/3dParty/openssl/build.sh index 8497650d69..f557707980 100755 --- a/Common/3dParty/openssl/build.sh +++ b/Common/3dParty/openssl/build.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "$SCRIPTPATH"/openssl diff --git a/Common/3dParty/openssl/fetch.sh b/Common/3dParty/openssl/fetch.sh index f31e20b980..5433582a20 100755 --- a/Common/3dParty/openssl/fetch.sh +++ b/Common/3dParty/openssl/fetch.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "$SCRIPTPATH" diff --git a/Common/3dParty/v8/build.sh b/Common/3dParty/v8/build.sh index 2885cff449..9dbd402fdc 100755 --- a/Common/3dParty/v8/build.sh +++ b/Common/3dParty/v8/build.sh @@ -2,8 +2,7 @@ export PATH=`pwd`/depot_tools:"$PATH" -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" os=$(uname -s) platform="" diff --git a/Common/3dParty/v8/fetch.sh b/Common/3dParty/v8/fetch.sh index d112514eb5..36a5ac6e00 100755 --- a/Common/3dParty/v8/fetch.sh +++ b/Common/3dParty/v8/fetch.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0" || grealpath "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "$SCRIPTPATH" diff --git a/Common/3dParty/v8/fetch_linux_correct.sh b/Common/3dParty/v8/fetch_linux_correct.sh index a52e79228c..39365f9864 100755 --- a/Common/3dParty/v8/fetch_linux_correct.sh +++ b/Common/3dParty/v8/fetch_linux_correct.sh @@ -1,7 +1,5 @@ #!/bin/bash - -SCRIPT=$(readlink -f "$0") -SCRIPTPATH=$(dirname "$SCRIPT") +SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" export PATH=`pwd`/depot_tools:"$PATH" From 221436242607f5b656a77437acabe6b50f3ac5a2 Mon Sep 17 00:00:00 2001 From: Alexey Golubev Date: Mon, 24 Jun 2019 14:51:39 +0300 Subject: [PATCH 2/3] Fix boost download error --- Common/3dParty/boost/fetch.bat | 2 +- Common/3dParty/boost/fetch.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Common/3dParty/boost/fetch.bat b/Common/3dParty/boost/fetch.bat index 1bab4c0925..607fc23a73 100644 --- a/Common/3dParty/boost/fetch.bat +++ b/Common/3dParty/boost/fetch.bat @@ -4,7 +4,7 @@ CD /D %~dp0 if exist "%SCRIPTPATH%boost_1_58_0.7z" ( echo "boost already downloaded" ) else ( - Powershell.exe Invoke-WebRequest -OutFile boost_1_58_0.7z http://freefr.dl.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.7z + Powershell.exe Invoke-WebRequest -OutFile boost_1_58_0.7z https://downloads.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.7z ) SET UNSIP_PROGRAMM="C:\Program Files\7-Zip\7z.exe" diff --git a/Common/3dParty/boost/fetch.sh b/Common/3dParty/boost/fetch.sh index 7e3415af54..0cf8ea457e 100755 --- a/Common/3dParty/boost/fetch.sh +++ b/Common/3dParty/boost/fetch.sh @@ -1,5 +1,6 @@ #!/bin/bash SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +BOOST_URL=https://downloads.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.7z os=$(uname -s) platform="" @@ -14,7 +15,7 @@ if [[ -f "$SCRIPTPATH/boost_1_58_0.7z" ]] then echo "boost already downloaded" else -wget http://freefr.dl.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.7z || curl -O http://freefr.dl.sourceforge.net/project/boost/boost/1.58.0/boost_1_58_0.7z +wget $BOOST_URL || curl -O $BOOST_URL fi if [ -d "$SCRIPTPATH/boost_1_58_0" ]; then From c9330ee0f12b1745357d8cffb53d184a15aba1ca Mon Sep 17 00:00:00 2001 From: Sergey Konovalov Date: Mon, 24 Jun 2019 14:56:02 +0300 Subject: [PATCH 3/3] Feature/bug 41890 (#164) * [x2t] Decode xlsx escaped chars * [x2t] Fix bug #41890, #36575. Add CStringBuilder::WriteEncodeXmlStringHHHH --- .../Source/XlsxFormat/SharedStrings/Text.h | 20 +- DesktopEditor/common/File.cpp | 247 ++++++++++++++++++ DesktopEditor/common/File.h | 11 +- DesktopEditor/common/StringBuilder.cpp | 116 ++++++++ DesktopEditor/common/StringBuilder.h | 7 +- 5 files changed, 396 insertions(+), 5 deletions(-) diff --git a/Common/DocxFormat/Source/XlsxFormat/SharedStrings/Text.h b/Common/DocxFormat/Source/XlsxFormat/SharedStrings/Text.h index af318bea6b..f6e1761451 100644 --- a/Common/DocxFormat/Source/XlsxFormat/SharedStrings/Text.h +++ b/Common/DocxFormat/Source/XlsxFormat/SharedStrings/Text.h @@ -61,7 +61,7 @@ namespace OOX if(std::wstring::npos != m_sText.find(' ') || std::wstring::npos != m_sText.find('\n')) writer.WriteString(_T(" xml:space=\"preserve\"")); writer.WriteString(_T(">")); - writer.WriteEncodeXmlString(m_sText); + writer.WriteEncodeXmlStringHHHH(m_sText); writer.WriteString(_T("")); } virtual void toXML2(NSStringUtils::CStringBuilder& writer, const wchar_t* name) const @@ -71,7 +71,7 @@ namespace OOX if(std::wstring::npos != m_sText.find(' ') || std::wstring::npos != m_sText.find('\n')) writer.WriteString(_T(" xml:space=\"preserve\"")); writer.WriteString(_T(">")); - writer.WriteEncodeXmlString(m_sText); + writer.WriteEncodeXmlStringHHHH(m_sText); writer.WriteString(_T("")); @@ -83,7 +83,21 @@ namespace OOX if ( oReader.IsEmptyNode() ) return; - m_sText = oReader.GetText3(); + int nDepth = oReader.GetDepth(); + XmlUtils::XmlNodeType eNodeType = XmlUtils::XmlNodeType_EndElement; + while (oReader.Read(eNodeType) && oReader.GetDepth() >= nDepth && XmlUtils::XmlNodeType_EndElement != eNodeType) + { + if (eNodeType == XmlUtils::XmlNodeType_Text || eNodeType == XmlUtils::XmlNodeType_Whitespace || eNodeType == XmlUtils::XmlNodeType_SIGNIFICANT_WHITESPACE) + { + std::string sTemp = oReader.GetTextA(); + wchar_t* pUnicodes = NULL; + LONG lOutputCount = 0; + NSFile::CUtf8Converter::GetUnicodeStringFromUTF8WithHHHH((BYTE*)sTemp.c_str(), sTemp.length(), pUnicodes, lOutputCount); + m_sText.append(pUnicodes); + RELEASEARRAYOBJECTS(pUnicodes); + } + } + NSStringExt::Replace(m_sText, L"\t", L""); if(!(m_oSpace.IsInit() && SimpleTypes::xmlspacePreserve == m_oSpace->GetValue())) { diff --git a/DesktopEditor/common/File.cpp b/DesktopEditor/common/File.cpp index 7aa6745498..657cd46c48 100644 --- a/DesktopEditor/common/File.cpp +++ b/DesktopEditor/common/File.cpp @@ -121,6 +121,10 @@ namespace NSFile std::wstring CUtf8Converter::GetUnicodeFromCharPtr(const std::string& sParam, INT bIsUtf8) { return GetUnicodeFromCharPtr(sParam.c_str(), (LONG)sParam.length(), bIsUtf8); + } + LONG CUtf8Converter::GetUnicodeStringFromUTF8BufferSize(LONG lCount) + { + return lCount + 1; } std::wstring CUtf8Converter::GetUnicodeStringFromUTF8_4bytes( BYTE* pBuffer, LONG lCount ) { @@ -305,6 +309,249 @@ namespace NSFile return GetUnicodeStringFromUTF8_4bytes(pBuffer, lCount); } +#define CHECK_HHHH(pBuffer) \ + wchar_t code = 0; \ + if('_' == pBuffer[0] && 'x' == pBuffer[1] && 0 != pBuffer[2] && 0 != pBuffer[3] && 0 != pBuffer[4] && 0 != pBuffer[5] && '_' == pBuffer[6]) \ + { \ + int i = 2; \ + for(; i < 6; ++i) \ + { \ + code *= 16; \ + if('0' <= pBuffer[i] && pBuffer[i] <= '9') \ + { \ + code += pBuffer[i] - '0'; \ + } \ + else if('A' <= pBuffer[i] && pBuffer[i] <= 'F') \ + { \ + code += pBuffer[i] - 'A' + 10; \ + } \ + else if('a' <= pBuffer[i] && pBuffer[i] <= 'f') \ + { \ + code += pBuffer[i] - 'a' + 10; \ + } \ + else \ + { \ + break; \ + } \ + } \ + if(i == 6) \ + { \ + if(0x005F == code) \ + { \ + code = '_'; \ + } \ + return code; \ + } \ + } \ + return -1; + + long CUtf8Converter::CheckHHHHChar(const BYTE* pBuffer) + { + CHECK_HHHH(pBuffer); + } + long CUtf8Converter::CheckHHHHChar(const wchar_t* pBuffer) + { + CHECK_HHHH(pBuffer); + } + + void CUtf8Converter::GetUnicodeStringFromUTF8WithHHHH_4bytes( const BYTE* pBuffer, LONG lCount, wchar_t*& pUnicodes, LONG& lOutputCount ) + { + if (NULL == pUnicodes) + { + pUnicodes = new wchar_t[GetUnicodeStringFromUTF8BufferSize(lCount)]; + } + WCHAR* pUnicodeString = pUnicodes; + LONG lIndexUnicode = 0; + + LONG lIndex = 0; + while (lIndex < lCount) + { + BYTE byteMain = pBuffer[lIndex]; + if (0x00 == (byteMain & 0x80)) + { + // 1 byte + long code = CheckHHHHChar(pBuffer + lIndex); + if(code < 0) + { + pUnicodeString[lIndexUnicode++] = (WCHAR)byteMain; + ++lIndex; + } + else + { + pUnicodeString[lIndexUnicode++] = (WCHAR)code; + lIndex += 7; + } + } + else if (0x00 == (byteMain & 0x20)) + { + // 2 byte + int val = (int)(((byteMain & 0x1F) << 6) | + (pBuffer[lIndex + 1] & 0x3F)); + pUnicodeString[lIndexUnicode++] = (WCHAR)(val); + lIndex += 2; + } + else if (0x00 == (byteMain & 0x10)) + { + // 3 byte + int val = (int)(((byteMain & 0x0F) << 12) | + ((pBuffer[lIndex + 1] & 0x3F) << 6) | + (pBuffer[lIndex + 2] & 0x3F)); + pUnicodeString[lIndexUnicode++] = (WCHAR)(val); + lIndex += 3; + } + else if (0x00 == (byteMain & 0x0F)) + { + // 4 byte + int val = (int)(((byteMain & 0x07) << 18) | + ((pBuffer[lIndex + 1] & 0x3F) << 12) | + ((pBuffer[lIndex + 2] & 0x3F) << 6) | + (pBuffer[lIndex + 3] & 0x3F)); + pUnicodeString[lIndexUnicode++] = (WCHAR)(val); + lIndex += 4; + } + else if (0x00 == (byteMain & 0x08)) + { + // 4 byte + int val = (int)(((byteMain & 0x07) << 18) | + ((pBuffer[lIndex + 1] & 0x3F) << 12) | + ((pBuffer[lIndex + 2] & 0x3F) << 6) | + (pBuffer[lIndex + 3] & 0x3F)); + pUnicodeString[lIndexUnicode++] = (WCHAR)(val); + lIndex += 4; + } + else if (0x00 == (byteMain & 0x04)) + { + // 5 byte + int val = (int)(((byteMain & 0x03) << 24) | + ((pBuffer[lIndex + 1] & 0x3F) << 18) | + ((pBuffer[lIndex + 2] & 0x3F) << 12) | + ((pBuffer[lIndex + 3] & 0x3F) << 6) | + (pBuffer[lIndex + 4] & 0x3F)); + pUnicodeString[lIndexUnicode++] = (WCHAR)(val); + lIndex += 5; + } + else + { + // 6 byte + int val = (int)(((byteMain & 0x01) << 30) | + ((pBuffer[lIndex + 1] & 0x3F) << 24) | + ((pBuffer[lIndex + 2] & 0x3F) << 18) | + ((pBuffer[lIndex + 3] & 0x3F) << 12) | + ((pBuffer[lIndex + 4] & 0x3F) << 6) | + (pBuffer[lIndex + 5] & 0x3F)); + pUnicodeString[lIndexUnicode++] = (WCHAR)(val); + lIndex += 5; + } + } + + pUnicodeString[lIndexUnicode] = 0; + lOutputCount = lIndexUnicode; + } + void CUtf8Converter::GetUnicodeStringFromUTF8WithHHHH_2bytes( const BYTE* pBuffer, LONG lCount, wchar_t*& pUnicodes, LONG& lOutputCount ) + { + if (NULL == pUnicodes) + { + pUnicodes = new wchar_t[GetUnicodeStringFromUTF8BufferSize(lCount)]; + } + WCHAR* pUnicodeString = pUnicodes; + WCHAR* pStart = pUnicodeString; + LONG lIndex = 0; + while (lIndex < lCount) + { + BYTE byteMain = pBuffer[lIndex]; + if (0x00 == (byteMain & 0x80)) + { + // 1 byte + long code = CheckHHHHChar(pBuffer + lIndex); + if(code < 0) + { + *pUnicodeString++ = (WCHAR)byteMain; + ++lIndex; + } + else + { + *pUnicodeString++ = (WCHAR)code; + lIndex += 7; + } + + } + else if (0x00 == (byteMain & 0x20)) + { + // 2 byte + int val = (int)(((byteMain & 0x1F) << 6) | + (pBuffer[lIndex + 1] & 0x3F)); + *pUnicodeString++ = (WCHAR)(val); + lIndex += 2; + } + else if (0x00 == (byteMain & 0x10)) + { + // 3 byte + int val = (int)(((byteMain & 0x0F) << 12) | + ((pBuffer[lIndex + 1] & 0x3F) << 6) | + (pBuffer[lIndex + 2] & 0x3F)); + + WriteUtf16_WCHAR(val, pUnicodeString); + lIndex += 3; + } + else if (0x00 == (byteMain & 0x0F)) + { + // 4 byte + int val = (int)(((byteMain & 0x07) << 18) | + ((pBuffer[lIndex + 1] & 0x3F) << 12) | + ((pBuffer[lIndex + 2] & 0x3F) << 6) | + (pBuffer[lIndex + 3] & 0x3F)); + + WriteUtf16_WCHAR(val, pUnicodeString); + lIndex += 4; + } + else if (0x00 == (byteMain & 0x08)) + { + // 4 byte + int val = (int)(((byteMain & 0x07) << 18) | + ((pBuffer[lIndex + 1] & 0x3F) << 12) | + ((pBuffer[lIndex + 2] & 0x3F) << 6) | + (pBuffer[lIndex + 3] & 0x3F)); + + WriteUtf16_WCHAR(val, pUnicodeString); + lIndex += 4; + } + else if (0x00 == (byteMain & 0x04)) + { + // 5 byte + int val = (int)(((byteMain & 0x03) << 24) | + ((pBuffer[lIndex + 1] & 0x3F) << 18) | + ((pBuffer[lIndex + 2] & 0x3F) << 12) | + ((pBuffer[lIndex + 3] & 0x3F) << 6) | + (pBuffer[lIndex + 4] & 0x3F)); + + WriteUtf16_WCHAR(val, pUnicodeString); + lIndex += 5; + } + else + { + // 6 byte + int val = (int)(((byteMain & 0x01) << 30) | + ((pBuffer[lIndex + 1] & 0x3F) << 24) | + ((pBuffer[lIndex + 2] & 0x3F) << 18) | + ((pBuffer[lIndex + 3] & 0x3F) << 12) | + ((pBuffer[lIndex + 4] & 0x3F) << 6) | + (pBuffer[lIndex + 5] & 0x3F)); + + WriteUtf16_WCHAR(val, pUnicodeString); + lIndex += 5; + } + } + + *pUnicodeString++ = 0; + lOutputCount = pUnicodeString - pStart; + } + void CUtf8Converter::GetUnicodeStringFromUTF8WithHHHH( const BYTE* pBuffer, LONG lCount, wchar_t*& pUnicodes, LONG& lOutputCount ) + { + if (sizeof(WCHAR) == 2) + return GetUnicodeStringFromUTF8WithHHHH_2bytes(pBuffer, lCount, pUnicodes, lOutputCount); + return GetUnicodeStringFromUTF8WithHHHH_4bytes(pBuffer, lCount, pUnicodes, lOutputCount); + } + void CUtf8Converter::GetUtf8StringFromUnicode_4bytes(const wchar_t* pUnicodes, LONG lCount, BYTE*& pData, LONG& lOutputCount, bool bIsBOM) { if (NULL == pData) diff --git a/DesktopEditor/common/File.h b/DesktopEditor/common/File.h index f8b8aa7f5b..c5993b7189 100644 --- a/DesktopEditor/common/File.h +++ b/DesktopEditor/common/File.h @@ -100,11 +100,17 @@ namespace NSFile static std::wstring GetUnicodeFromCharPtr(const char* pData, LONG lCount, INT bIsUtf8 = FALSE); static std::wstring GetUnicodeFromCharPtr(const std::string& sParam, INT bIsUtf8 = FALSE); + static std::wstring GetUnicodeStringFromUTF8_4bytes( BYTE* pBuffer, LONG lCount ); static std::wstring GetUnicodeStringFromUTF8_2bytes( BYTE* pBuffer, LONG lCount ); - static std::wstring GetUnicodeStringFromUTF8( BYTE* pBuffer, LONG lCount ); + static void GetUnicodeStringFromUTF8WithHHHH_4bytes( const BYTE* pBuffer, LONG lCount, wchar_t*& pUnicodes, LONG& lOutputCount ); + static void GetUnicodeStringFromUTF8WithHHHH_2bytes( const BYTE* pBuffer, LONG lCount, wchar_t*& pUnicodes, LONG& lOutputCount ); + static void GetUnicodeStringFromUTF8WithHHHH( const BYTE* pBuffer, LONG lCount, wchar_t*& pUnicodes, LONG& lOutputCount ); + + static inline LONG GetUnicodeStringFromUTF8BufferSize( LONG lCount ); + static void GetUtf8StringFromUnicode_4bytes(const wchar_t* pUnicodes, LONG lCount, BYTE*& pData, LONG& lOutputCount, bool bIsBOM = false); static void GetUtf8StringFromUnicode_2bytes(const wchar_t* pUnicodes, LONG lCount, BYTE*& pData, LONG& lOutputCount, bool bIsBOM = false); static void GetUtf8StringFromUnicode(const wchar_t* pUnicodes, LONG lCount, BYTE*& pData, LONG& lOutputCount, bool bIsBOM = false); @@ -118,6 +124,9 @@ namespace NSFile static std::wstring GetWStringFromUTF16(const CStringUtf16& data); static std::wstring GetWStringFromUTF16(const unsigned short* pUtf16, LONG lCount); + + static inline long CheckHHHHChar(const BYTE* pBuffer); + static inline long CheckHHHHChar(const wchar_t* pBuffer); }; class KERNEL_DECL CFileBinary diff --git a/DesktopEditor/common/StringBuilder.cpp b/DesktopEditor/common/StringBuilder.cpp index 4a6aeef1a2..e098f933eb 100644 --- a/DesktopEditor/common/StringBuilder.cpp +++ b/DesktopEditor/common/StringBuilder.cpp @@ -30,6 +30,7 @@ * */ #include "StringBuilder.h" +#include "File.h" namespace NSStringUtils { @@ -301,6 +302,18 @@ namespace NSStringUtils else WriteEncodeXmlString_4bytes(pString, nCount); } + void CStringBuilder::WriteEncodeXmlStringHHHH(const std::wstring& sString) + { + WriteEncodeXmlStringHHHH(sString.c_str(), (int)sString.length()); + } + + void CStringBuilder::WriteEncodeXmlStringHHHH(const wchar_t* pString, int nCount) + { + if (sizeof(wchar_t) == 2) + WriteEncodeXmlStringHHHH_2bytes(pString, nCount); + else + WriteEncodeXmlStringHHHH_4bytes(pString, nCount); + } inline void CStringBuilder::WriteEncodeXmlString_4bytes(const wchar_t* pString, int nCount) { const wchar_t* pData = pString; @@ -354,6 +367,59 @@ namespace NSStringUtils } } } + inline void CStringBuilder::WriteEncodeXmlStringHHHH_4bytes(const wchar_t* pString, int nCount) + { + const wchar_t* pData = pString; + int nCounter = 0; + unsigned int code; + while (*pData != 0) + { + code = (unsigned int)*pData; + WriteEncodeXmlChar(*pData, CheckXmlCodeHHHH(code, pData)); + + ++pData; + if (-1 != nCount) + { + ++nCounter; + if (nCounter >= nCount) + break; + } + } + } + inline void CStringBuilder::WriteEncodeXmlStringHHHH_2bytes(const wchar_t* pString, int nCount) + { + const wchar_t* pData = pString; + int nCounter = 0; + unsigned int code; + BYTE type; + while (*pData != 0) + { + code = (unsigned int)*pData; + if (code >= 0xD800 && code <= 0xDFFF && *(pData + 1) != 0) + { + code = 0x10000 + (((code & 0x3FF) << 10) | (0x03FF & *(pData + 1))); + type = CheckXmlCodeHHHH(code, pData); + if(0 != type) + { + WriteEncodeXmlChar(*pData, type); + ++pData; + } + } + else + { + type = CheckXmlCodeHHHH(code, pData); + } + WriteEncodeXmlChar(*pData, type); + + ++pData; + if (-1 != nCount) + { + ++nCounter; + if (nCounter >= nCount) + break; + } + } + } inline void CStringBuilder::WriteEncodeXmlChar(wchar_t code, BYTE type) { switch (type) @@ -436,6 +502,27 @@ namespace NSStringUtils *m_pDataCur++ = (wchar_t)(';'); m_lSizeCur += 5; break; + case 10: + AddSize(7); + *m_pDataCur++ = (wchar_t)('_'); + *m_pDataCur++ = (wchar_t)('x'); + *m_pDataCur++ = (wchar_t)('0'); + *m_pDataCur++ = (wchar_t)('0'); + *m_pDataCur++ = (wchar_t)('5'); + *m_pDataCur++ = (wchar_t)('F'); + *m_pDataCur++ = (wchar_t)('_'); + m_lSizeCur += 7; + break; + case 11: + AddSize(7); + *m_pDataCur++ = (wchar_t)('_'); + *m_pDataCur++ = (wchar_t)('x'); + m_lSizeCur += 2; + WriteHexByteNoSafe((code >> 8) & 0xFF); + WriteHexByteNoSafe(code & 0xFF); + *m_pDataCur++ = (wchar_t)('_'); + ++m_lSizeCur; + break; default: break; } @@ -751,6 +838,35 @@ namespace NSStringUtils return 0; } + unsigned char CStringBuilder::CheckXmlCodeHHHH(unsigned int c, const wchar_t* pData) + { + if ('&' == c) + return 2; + if ('\'' == c) + return 3; + if ('<' == c) + return 4; + if ('>' == c) + return 5; + if ('\"' == c) + return 6; + if ('\n' == c)//when reading from the attributes is replaced by a space. + return 7; + if ('\r' == c)//when reading from the attributes is replaced by a space. + return 8; + if ('\t' == c)//when reading from the attributes is replaced by a space. + return 9; + if (NSFile::CUtf8Converter::CheckHHHHChar(pData) >= 0) + return 10; + + //xml 1.0 Character Range https://www.w3.org/TR/xml/#charsets + if ((0x20 <= c && c <= 0xD7FF) || (0xE000 <= c && c <= 0xFFFD) || (0x10000 <= c && c <= 0x10FFFF)) + return 1; + else if(c <= 0xFFFF) + return 11; + + return 0; + } void string_replace(std::wstring& text, const std::wstring& replaceFrom, const std::wstring& replaceTo) { diff --git a/DesktopEditor/common/StringBuilder.h b/DesktopEditor/common/StringBuilder.h index 43bddc38fa..c45275ce4c 100644 --- a/DesktopEditor/common/StringBuilder.h +++ b/DesktopEditor/common/StringBuilder.h @@ -109,9 +109,11 @@ namespace NSStringUtils void AddChar2Safe(const wchar_t _c1, const wchar_t& _c2); void WriteEncodeXmlString(const std::wstring& sString); - void WriteEncodeXmlString(const wchar_t* pString, int nCount = -1); + void WriteEncodeXmlStringHHHH(const std::wstring& sString); + void WriteEncodeXmlStringHHHH(const wchar_t* pString, int nCount = -1); + size_t GetCurSize(); void SetCurSize(size_t lCurSize); size_t GetSize(); @@ -147,8 +149,11 @@ namespace NSStringUtils protected: inline void WriteEncodeXmlString_4bytes(const wchar_t* pString, int nCount); inline void WriteEncodeXmlString_2bytes(const wchar_t* pString, int nCount); + inline void WriteEncodeXmlStringHHHH_4bytes(const wchar_t* pString, int nCount); + inline void WriteEncodeXmlStringHHHH_2bytes(const wchar_t* pString, int nCount); inline void WriteEncodeXmlChar(wchar_t code, unsigned char type); inline unsigned char CheckXmlCode(unsigned int c); + inline unsigned char CheckXmlCodeHHHH(unsigned int c, const wchar_t* pData); }; KERNEL_DECL void string_replace(std::wstring& text, const std::wstring& replaceFrom, const std::wstring& replaceTo);