Files
core/DesktopEditor/common/String.h
Oleg.Korshul 53e35b5afa убрал min & max. У кого проблемы - тот и меняет у себя
git-svn-id: svn://fileserver/activex/AVS/Sources/TeamlabOffice/trunk/ServerComponents@63678 954022d7-b5bf-4e40-9824-e11837661b57
2016-05-21 00:17:56 +03:00

442 lines
13 KiB
C++

#ifndef _BUILD_STRING_CROSSPLATFORM_H_
#define _BUILD_STRING_CROSSPLATFORM_H_
#include "CPEncodings/CodePage.h"
#include <vector>
#include <sstream>
#include <algorithm>
#ifdef __linux__
#include <string.h>
#endif
namespace NSStringExt
{
#define NSSTRING_COMMON_CP(UnicodeMapCP, lCount, pData) \
for (long i = 0; i < lCount; ++i)\
{\
unsigned char unChar = (unsigned char)pData[i];\
if (unChar < MSCP_FIRST_CHAR || unChar > MSCP_LAST_CHAR)\
pUnicode[i] = (wchar_t)unChar;\
else\
pUnicode[i] = (wchar_t)(UnicodeMapCP[unChar - MSCP_FIRST_CHAR]);\
}
// end define
#define NSSTRING_WITHLEADBYTE_CP(LEAD_CHAR, UnicodeMapCP, UnicodeMapWithLeadByte, lCount, pData) \
{\
int nLeadByte = -1;\
for (long i = 0; i < lCount; i++)\
{\
unsigned char unCode = (unsigned char)pData[i];\
unsigned short ushUnicode = UnicodeMapCP[unCode];\
if (-1 == nLeadByte)\
{\
if (LEAD_CHAR != ushUnicode)\
{\
pUnicode[i] = ushUnicode;\
nLeadByte = -1;\
}\
else\
{\
nLeadByte = ushUnicode;\
}\
}\
else\
{\
unsigned short ushCode = (nLeadByte << 16) | ushUnicode;\
pUnicode[i] = 0x0020;\
TCodePagePair *pPair = (TCodePagePair*)UnicodeMapWithLeadByte;\
while(0xFFFF != pPair->ushCode)\
{\
if (ushCode == pPair->ushCode)\
{\
pUnicode[i] = pPair->ushUnicode;\
break;\
}\
pPair++;\
}\
}\
}\
}
// end define
class CConverter
{
public:
typedef enum
{
SINGLE_BYTE_ENCODING_DEFAULT = 0x01, // DEFAULT_CHARSET 1 (x01)
SINGLE_BYTE_ENCODING_SYMBOL = 0x02, // SYMBOL_CHARSET 2 (x02)
SINGLE_BYTE_ENCODING_CP1252 = 0x00, // ANSI_CHARSET 0 (x00)
SINGLE_BYTE_ENCODING_CP1251 = 0xCC, // RUSSIAN_CHARSET 204 (xCC)
SINGLE_BYTE_ENCODING_CP1250 = 0xEE, // EASTEUROPE_CHARSET 238 (xEE)
SINGLE_BYTE_ENCODING_CP1253 = 0xA1, // GREEK_CHARSET 161 (xA1)
SINGLE_BYTE_ENCODING_CP1254 = 0xA2, // TURKISH_CHARSET 162 (xA2)
SINGLE_BYTE_ENCODING_CP1257 = 0xBA, // BALTIC_CHARSET 186 (xBA)
SINGLE_BYTE_ENCODING_CP1255 = 0xB1, // HEBREW_CHARSET 177 (xB1)
SINGLE_BYTE_ENCODING_CP1256 = 0xB2, // ARABIC _CHARSET 178 (xB2)
SINGLE_BYTE_ENCODING_CP932 = 0x80, // SHIFTJIS_CHARSET 128 (x80)
SINGLE_BYTE_ENCODING_CP949 = 0x81, // HANGEUL_CHARSET 129 (x81)
SINGLE_BYTE_ENCODING_CP936 = 0x86, // GB2313_CHARSET 134 (x86)
SINGLE_BYTE_ENCODING_CP950 = 0x88, // CHINESEBIG5_CHARSET 136 (x88)
SINGLE_BYTE_ENCODING_CP874 = 0xDE, // THAI_CHARSET 222 (xDE)
SINGLE_BYTE_ENCODING_CP1361 = 0x82, // JOHAB_CHARSET 130 (x82)
SINGLE_BYTE_ENCODING_CP1258 = 0xA3, // VIETNAMESE_CHARSET 163 (xA3)
SINGLE_BYTE_ENCODING_CP866 = 0xFF // OEM_CHARSET 255 (xFF) // Проверить, что OEM соответствует CP866
} ESingleByteEncoding;
static std::wstring GetUnicodeFromSingleByteString(const unsigned char* pData, long lCount, ESingleByteEncoding eType = SINGLE_BYTE_ENCODING_DEFAULT)
{
wchar_t* pUnicode = new wchar_t[lCount + 1];
if (!pUnicode)
return std::wstring(L"");
switch (eType)
{
default:
case SINGLE_BYTE_ENCODING_DEFAULT:
{
for (long i = 0; i < lCount; ++i)
pUnicode[i] = (wchar_t)(unsigned char)pData[i];
break;
}
case SINGLE_BYTE_ENCODING_SYMBOL:
{
// Добавляем 0xF000 к кодам всех символов
for (long i = 0; i < lCount; ++i)
{
pUnicode[i] = (wchar_t)(0xF000 | (unsigned char)pData[i]);
}
break;
}
case SINGLE_BYTE_ENCODING_CP866: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP866, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP874: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP874, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1250: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1250, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1251: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1251, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1252: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1252, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1253: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1253, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1254: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1254, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1255: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1255, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1256: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1256, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1257: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1257, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1258: NSSTRING_COMMON_CP(NSStringExt::c_anUnicodeMapCP1258, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP932: NSSTRING_WITHLEADBYTE_CP(MSCP932_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP932, c_aoUnicodeMapCP932WithLeadByte, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP936: NSSTRING_WITHLEADBYTE_CP(MSCP936_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP936, NSStringExt::c_aoUnicodeMapCP936WithLeadByte, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP949: NSSTRING_WITHLEADBYTE_CP(MSCP949_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP949, NSStringExt::c_aoUnicodeMapCP949WithLeadByte, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP950: NSSTRING_WITHLEADBYTE_CP(MSCP950_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP950, NSStringExt::c_aoUnicodeMapCP950WithLeadByte, lCount, pData); break;
case SINGLE_BYTE_ENCODING_CP1361:NSSTRING_WITHLEADBYTE_CP(MSCP1361_LEAD_CHAR, NSStringExt::c_anUnicodeMapCP1361, NSStringExt::c_aoUnicodeMapCP1361WithLeadByte, lCount, pData); break;
}
pUnicode[lCount] = 0;
std::wstring s(pUnicode, lCount);
if (pUnicode)
delete[] pUnicode;
return s;
}
static std::wstring GetUnicodeFromUTF16(const unsigned short* pData, long lCount)
{
if (0 == lCount)
return L"";
if (2 == sizeof(wchar_t))
return std::wstring((wchar_t*)pData, lCount);
wchar_t* pUnicode = new wchar_t[lCount + 1];
if (!pUnicode)
return L"";
wchar_t* pCur = pUnicode;
int nCurPos = 0;
while (nCurPos < lCount)
{
int nLeading = pData[nCurPos]; nCurPos++;
if (nLeading < 0xD800 || nLeading > 0xDFFF)
{
*pCur = (wchar_t)nLeading;
pCur++;
}
else
{
if (nCurPos >= lCount)
break;
int nTrailing = pData[nCurPos]; nCurPos++;
if (nTrailing >= 0xDC00 && nTrailing <= 0xDFFF)
{
*pCur = (wchar_t)(((nLeading & 0x03FF) << 10) | (nTrailing & 0x03FF));
pCur++;
}
}
}
if (0 == pCur - pUnicode)
return L"";
std::wstring sRet(pUnicode, pCur - pUnicode);
if (pUnicode)
delete[] pUnicode;
return sRet;
}
static std::wstring GetUnicodeFromUTF32(const unsigned int* pData, long lCount)
{
if (0 == lCount)
return L"";
if (4 == sizeof(wchar_t))
return std::wstring((wchar_t*)pData, lCount);
wchar_t* pUnicode = new wchar_t[2 * lCount + 1];
if (!pUnicode)
return L"";
wchar_t* pCur = pUnicode;
memset(pUnicode, 0x00, sizeof(wchar_t) * (2 * lCount + 1));
for (long lIndex = 0; lIndex < lCount; lIndex++)
{
unsigned int unUnicode = pData[lIndex];
if (unUnicode < 0x10000)
{
*pCur = unUnicode;
pCur++;
}
else
{
unUnicode = unUnicode - 0x10000;
*pCur = 0xD800 | (unUnicode >> 10);
pCur++;
*pCur = 0xDC00 | (unUnicode & 0x3FF);
pCur++;
}
}
if (0 == pCur - pUnicode)
return L"";
std::wstring sRet(pUnicode, pCur - pUnicode);
if (pUnicode)
delete[] pUnicode;
return sRet;
}
static unsigned int* GetUtf32FromUnicode(const std::wstring& wsUnicodeText, unsigned int& unLen)
{
if (wsUnicodeText.size() <= 0)
return NULL;
unsigned int* pUnicodes = new unsigned int[wsUnicodeText.size()];
if (!pUnicodes)
return NULL;
unsigned int* pOutput = pUnicodes;
unLen = 0;
if (2 == sizeof(wchar_t))
{
const wchar_t* wsEnd = wsUnicodeText.c_str() + wsUnicodeText.size();
wchar_t* wsInput = (wchar_t*)wsUnicodeText.c_str();
wchar_t wLeading, wTrailing;
unsigned int unCode;
while (wsInput < wsEnd)
{
wLeading = *wsInput++;
if (wLeading < 0xD800 || wLeading > 0xDFFF)
{
pUnicodes[unLen++] = (unsigned int)wLeading;
}
else if (wLeading >= 0xDC00)
{
// Такого не должно быть
continue;
}
else
{
unCode = (wLeading & 0x3FF) << 10;
wTrailing = *wsInput++;
if (wTrailing < 0xDC00 || wTrailing > 0xDFFF)
{
// Такого не должно быть
continue;
}
else
{
pUnicodes[unLen++] = (unCode | (wTrailing & 0x3FF) + 0x10000);
}
}
}
}
else
{
unLen = wsUnicodeText.size();
for (unsigned int unIndex = 0; unIndex < unLen; unIndex++)
{
pUnicodes[unIndex] = (unsigned int)wsUnicodeText.at(unIndex);
}
}
return pUnicodes;
}
static unsigned short* GetUtf16FromUnicode(const std::wstring& wsUnicodeText, unsigned int& unLen)
{
unsigned int unTextLen = wsUnicodeText.size();
if (unTextLen <= 0)
return NULL;
unsigned short* pUtf16 = NULL;
unLen = 0;
if (2 == sizeof(wchar_t))
{
pUtf16 = new unsigned short[unTextLen];
if (!pUtf16)
return NULL;
unLen = unTextLen;
for (unsigned int unIndex = 0; unIndex < unLen; unIndex++)
{
pUtf16[unIndex] = (unsigned short)wsUnicodeText.at(unIndex);
}
}
else
{
pUtf16 = new unsigned short[2 * unTextLen + 1];
if (!pUtf16)
return NULL;
unsigned short* pCur = pUtf16;
memset(pUtf16, 0x00, sizeof(unsigned short) * (2 * unTextLen + 1));
for (long lIndex = 0; lIndex < unTextLen; lIndex++)
{
unsigned int unUnicode = wsUnicodeText.at(lIndex);
if (unUnicode < 0x10000)
{
*pCur = unUnicode;
pCur++;
}
else
{
unUnicode = unUnicode - 0x10000;
*pCur = 0xD800 | (unUnicode >> 10);
pCur++;
*pCur = 0xDC00 | (unUnicode & 0x3FF);
pCur++;
}
}
unLen = (unsigned int)(pCur - pUtf16);
if (!unLen)
{
delete[] pUtf16;
return NULL;
}
}
return pUtf16;
}
};
static std::vector<std::wstring>& Split(const std::wstring& wsString, wchar_t nDelim, std::vector<std::wstring> &arrElements)
{
std::wstringstream wStringStream(wsString);
std::wstring wsItem;
while (std::getline(wStringStream, wsItem, nDelim))
{
arrElements.push_back(wsItem);
}
return arrElements;
}
static std::vector<std::wstring> Split(const std::wstring& wsString, wchar_t nDelim)
{
std::vector<std::wstring> wsElements;
Split(wsString, nDelim, wsElements);
return wsElements;
}
static std::vector<std::wstring>& Split(const std::wstring& wsString, const std::wstring wsDelim, std::vector<std::wstring> &arrElements)
{
int nDelimLen = wsDelim.length();
int nPrevPos = 0;
if (nDelimLen > 0)
{
int nPos = wsString.find(wsDelim);
while (std::wstring::npos != nPos)
{
if (nPrevPos != nPos)
arrElements.push_back(wsString.substr(nPrevPos, nPos - nPrevPos));
nPrevPos = nPos + nDelimLen;
nPos = wsString.find(wsDelim, nPrevPos);
}
}
if (nPrevPos < wsString.length())
arrElements.push_back(wsString.substr(nPrevPos));
return arrElements;
}
static std::vector<std::wstring> Split(const std::wstring& wsString, const std::wstring& wsDelim, bool bWholeString = true)
{
std::vector<std::wstring> arrElements;
if (bWholeString)
{
int nDelimLen = wsDelim.length();
if (0 == nDelimLen)
arrElements.push_back(wsString);
else if (1 == nDelimLen)
Split(wsString, wchar_t(wsDelim[0]), arrElements);
else
Split(wsString, wsDelim, arrElements);
}
else
{
std::vector<std::wstring> arrCurrent;
arrCurrent.push_back(wsString);
arrElements.push_back(wsString);
int nPos = 0;
int nLen = wsDelim.length();
while (nPos < nLen)
{
wchar_t wChar = wsDelim.at(nPos++);
arrElements.clear();
for (int nIndex = 0, nCount = arrCurrent.size(); nIndex < nCount; nIndex++)
{
std::vector<std::wstring> arrTemp = Split(arrCurrent.at(nIndex), wChar);
arrElements.insert(arrElements.end(), arrTemp.begin(), arrTemp.end());
}
arrCurrent = arrElements;
}
}
return arrElements;
}
static inline void ToLower(std::wstring& wsString)
{
std::transform(wsString.begin(), wsString.end(), wsString.begin(), ::towlower);
}
static inline void ToUpper(std::wstring& wsString)
{
std::transform(wsString.begin(), wsString.end(), wsString.begin(), ::towupper);
}
static inline void Replace(std::wstring& wsString, const std::wstring& wsFrom, const std::wstring& wsTo)
{
int nFromLen = wsFrom.length();
int nToLen = wsTo.length();
int nPos = -nToLen;
while (std::wstring::npos != (nPos = wsString.find(wsFrom, nPos + nToLen)))
{
wsString.replace(nPos, nFromLen, wsTo);
}
}
};
#endif // _BUILD_STRING_CROSSPLATFORM_H_