#include "stdafx.h" #include #include #include "MemoryUtils.h" #include "File.h" #include "StringExt.h" #include "GlobalParams.h" #include "PSLexer.h" #include "CharCodeToUnicode.h" //------------------------------------------------------------------------------------------------------------------------------- #define MaxUnicodeString 8 struct CharCodeToUnicodeString { CharCode nCode; Unicode pUnicodeString[MaxUnicodeString]; int nLen; }; //------------------------------------------------------------------------------------------------------------------------------- static int GetCharFromString(void *pData) { int nChar = 0; char *sString = *(char **)pData; if (*sString) { nChar = *sString++; *(char **)pData = sString; } else { nChar = EOF; } return nChar; } static int GetCharFromFile(void *pData) { return fgetc((FILE *)pData); } //------------------------------------------------------------------------------------------------------------------------------- CharCodeToUnicode *CharCodeToUnicode::ParseCIDToUnicode(StringExt *seFileName, StringExt *seCollection) { FILE *pFile = NULL; char sBuffer[64]; Unicode nUnicode = 0; if ( !( pFile = fopen(seFileName->GetBuffer(), "r") ) ) { // TO DO: Error "Couldn't open cidToUnicode file" return NULL; } unsigned int nSize = 32768; Unicode *pMap = (Unicode *)MemUtilsMallocArray( nSize, sizeof(Unicode)); unsigned int nMapLen = 0; while ( GetLine( sBuffer, sizeof(sBuffer), pFile ) ) { if ( nMapLen == nSize ) { nSize *= 2; pMap = (Unicode *)MemUtilsReallocArray( pMap, nSize, sizeof(Unicode)); } if ( sscanf( sBuffer, "%x", &nUnicode ) == 1) { pMap[nMapLen] = nUnicode; } else { // TO DO: Error "Bad line in cidToUnicode file" pMap[nMapLen] = 0; } ++nMapLen; } fclose(pFile); CharCodeToUnicode *pCharCodeToUnicode = new CharCodeToUnicode( seCollection->Copy(), pMap, nMapLen, TRUE, NULL, 0, 0); MemUtilsFree(pMap); return pCharCodeToUnicode; } CharCodeToUnicode *CharCodeToUnicode::ParseUnicodeToUnicode(StringExt *seFileName) { FILE *pFile = NULL; char sBuffer[256]; char *sToken; Unicode nUnicode0; Unicode arrUnicodeBuffer[MaxUnicodeString]; if ( !( pFile = fopen(seFileName->GetBuffer(), "r") ) ) { // TO DO: Error "Couldn't open unicodeToUnicode file" return NULL; } unsigned int nSize = 4096; Unicode *pMap = (Unicode *)MemUtilsMallocArray( nSize, sizeof(Unicode)); memset( pMap, 0, nSize * sizeof(Unicode)); unsigned int nLen = 0; CharCodeToUnicodeString *pSMap = NULL; unsigned int nSMapSize = 0, nSMapLen = 0; int nLine = 0; while ( GetLine( sBuffer, sizeof(sBuffer), pFile ) ) { ++nLine; if ( !( sToken = strtok(sBuffer, " \t\r\n") ) || sscanf( sToken, "%x", &nUnicode0) != 1 ) { // TO DO: Error "Bad line in unicodeToUnicode file" continue; } int nCount = 0; while ( nCount < MaxUnicodeString ) { if ( !( sToken = strtok(NULL, " \t\r\n") ) ) { break; } if ( sscanf( sToken, "%x", &arrUnicodeBuffer[nCount]) != 1 ) { // TO DO: Error "Bad line in unicodeToUnicode file" break; } ++nCount; } if ( nCount < 1 ) { // TO DO: Error "Bad line in unicodeToUnicode file" continue; } if ( nUnicode0 >= nSize ) { unsigned int nOldSize = nSize; while ( nUnicode0 >= nSize ) { nSize *= 2; } pMap = (Unicode *)MemUtilsReallocArray( pMap, nSize, sizeof(Unicode)); memset( pMap + nOldSize, 0, (nSize - nOldSize) * sizeof(Unicode)); } if ( nCount == 1 ) { pMap[nUnicode0] = arrUnicodeBuffer[0]; } else { pMap[nUnicode0] = 0; if ( nSMapLen == nSMapSize ) { nSMapSize += 16; pSMap = (CharCodeToUnicodeString *) MemUtilsReallocArray( pSMap, nSMapSize, sizeof(CharCodeToUnicodeString)); } pSMap[nSMapLen].nCode = nUnicode0; for ( int nIndex = 0; nIndex < nCount; ++nIndex ) { pSMap[nSMapLen].pUnicodeString[nIndex] = arrUnicodeBuffer[nIndex]; } pSMap[nSMapLen].nLen = nCount; ++nSMapLen; } if ( nUnicode0 >= nLen ) { nLen = nUnicode0 + 1; } } fclose(pFile); CharCodeToUnicode *pCharCodeToUnicode = new CharCodeToUnicode( seFileName->Copy(), pMap, nLen, TRUE, pSMap, nSMapLen, nSMapSize); MemUtilsFree(pMap); return pCharCodeToUnicode; } CharCodeToUnicode *CharCodeToUnicode::Make8BitToUnicode(Unicode *pToUnicode) { return new CharCodeToUnicode( NULL, pToUnicode, 256, TRUE, NULL, 0, 0); } CharCodeToUnicode *CharCodeToUnicode::ParseCMap(StringExt *seBuffer, int nBitCount, GlobalParams *pGlobalParams) { CharCodeToUnicode *pCharCodeToUnicode = new CharCodeToUnicode(NULL); char *pData = seBuffer->GetBuffer(); pCharCodeToUnicode->ParseCMap1( &GetCharFromString, &pData, nBitCount, pGlobalParams); return pCharCodeToUnicode; } void CharCodeToUnicode::MergeCMap(StringExt *seBuffer, int nBitCount, GlobalParams *pGlobalParams) { char *pData = seBuffer->GetBuffer(); ParseCMap1( &GetCharFromString, &pData, nBitCount, pGlobalParams); } void CharCodeToUnicode::ParseCMap1(int (*GetCharFunc)(void *), void *pData, int nBitCount, GlobalParams *pGlobalParams) { char sToken1[256], sToken2[256], sToken3[256]; int nLen1, nLen2, nLen3; CharCode nCode1, nCode2; int nDigitCount = nBitCount / 4; PSLexer *pLexer = new PSLexer( GetCharFunc, pData); pLexer->GetToken( sToken1, sizeof(sToken1), &nLen1); while ( pLexer->GetToken( sToken2, sizeof(sToken2), &nLen2) ) { if ( !strcmp( sToken2, "usecmap" ) ) { if ( sToken1[0] == '/' ) { StringExt *seName = new StringExt(sToken1 + 1); FILE *pFile = NULL; if ( pGlobalParams && ( pFile = pGlobalParams->FindToUnicodeFile(seName) ) ) { ParseCMap1( &GetCharFromFile, pFile, nBitCount, pGlobalParams ); fclose(pFile); } else { // TO DO: Error "Couldn't find ToUnicode CMap file" } delete seName; } pLexer->GetToken( sToken1, sizeof(sToken1), &nLen1); } else if ( !strcmp( sToken2, "beginbfchar") ) { while ( pLexer->GetToken( sToken1, sizeof(sToken1), &nLen1) ) { if ( !strcmp( sToken1, "endbfchar") ) { break; } if ( !pLexer->GetToken( sToken2, sizeof(sToken2), &nLen2 ) || !strcmp( sToken2, "endbfchar") ) { // TO DO: Error "Illegal entry in bfchar block in ToUnicode CMap" break; } if ( !( nLen1 == 2 + nDigitCount && sToken1[0] == '<' && sToken1[nLen1 - 1] == '>' && sToken2[0] == '<' && sToken2[nLen2 - 1] == '>' ) ) { // TO DO: Error "Illegal entry in bfchar block in ToUnicode CMap" continue; } sToken1[nLen1 - 1] = sToken2[nLen2 - 1] = '\0'; if ( sscanf( sToken1 + 1, "%x", &nCode1) != 1 ) { // TO DO: Error "Illegal entry in bfchar block in ToUnicode CMap" continue; } AddMapping( nCode1, sToken2 + 1, nLen2 - 2, 0); } pLexer->GetToken( sToken1, sizeof(sToken1), &nLen1); } else if ( !strcmp( sToken2, "beginbfrange") ) { while ( pLexer->GetToken( sToken1, sizeof(sToken1), &nLen1) ) { if ( !strcmp( sToken1, "endbfrange") ) { break; } if ( !pLexer->GetToken( sToken2, sizeof(sToken2), &nLen2) || !strcmp( sToken2, "endbfrange") || !pLexer->GetToken( sToken3, sizeof(sToken3), &nLen3) || !strcmp( sToken3, "endbfrange") ) { // TO DO: Error "Illegal entry in bfrange block in ToUnicode CMap" break; } if ( !( nLen1 == 2 + nDigitCount && sToken1[0] == '<' && sToken1[nLen1 - 1] == '>' && nLen2 == 2 + nDigitCount && sToken2[0] == '<' && sToken2[nLen2 - 1] == '>' ) ) { // TO DO: Error "Illegal entry in bfrange block in ToUnicode CMap" continue; } sToken1[nLen1 - 1] = sToken2[nLen2 - 1] = '\0'; if ( sscanf( sToken1 + 1, "%x", &nCode1) != 1 || sscanf( sToken2 + 1, "%x", &nCode2) != 1 ) { // TO DO: Error "Illegal entry in bfrange block in ToUnicode CMap" continue; } if ( !strcmp( sToken3, "[") ) { int nIndex = 0; while ( pLexer->GetToken( sToken1, sizeof(sToken1), &nLen1) && nCode1 + nIndex <= nCode2 ) { if ( !strcmp(sToken1, "]") ) { break; } if ( sToken1[0] == '<' && sToken1[nLen1 - 1] == '>' ) { sToken1[nLen1 - 1] = '\0'; AddMapping( nCode1 + nIndex, sToken1 + 1, nLen1 - 2, 0); } else { // TO DO: Error "Illegal entry in bfrange block in ToUnicode CMap" } ++nIndex; } } else if ( sToken3[0] == '<' && sToken3[nLen3 - 1] == '>' ) { sToken3[nLen3 - 1] = '\0'; for ( int nIndex = 0; nCode1 <= nCode2; ++nCode1, ++nIndex ) { AddMapping( nCode1, sToken3 + 1, nLen3 - 2, nIndex); } } else { // TO DO: Error "Illegal entry in bfrange block in ToUnicode CMap" } } pLexer->GetToken( sToken1, sizeof(sToken1), &nLen1); } else { strcpy( sToken1, sToken2); } } delete pLexer; } void CharCodeToUnicode::AddMapping(CharCode nCode, char *sUnicodeString, int nLen, int nOffset) { if ( nCode >= m_nMapLen ) { unsigned int unOldLen = m_nMapLen; m_nMapLen = (nCode + 256) & ~255; m_pMap = (Unicode *)MemUtilsReallocArray( m_pMap, m_nMapLen, sizeof(Unicode)); for ( unsigned int unIndex = unOldLen; unIndex < m_nMapLen; ++unIndex ) { m_pMap[unIndex] = 0; } } if ( nLen <= 4 ) { Unicode nUnicode = 0; if ( sscanf( sUnicodeString, "%x", &nUnicode) != 1) { // TO DO: Error Illegal entry in ToUnicode CMap" return; } m_pMap[nCode] = nUnicode + nOffset; } else { if ( m_nSMapLen >= m_nSMapSize ) { m_nSMapSize = m_nSMapSize + 16; m_pSMap = (CharCodeToUnicodeString *) MemUtilsReallocArray( m_pSMap, m_nSMapSize, sizeof(CharCodeToUnicodeString)); } m_pMap[nCode] = 0; m_pSMap[m_nSMapLen].nCode = nCode; m_pSMap[m_nSMapLen].nLen = nLen / 4; for ( int nIndex = 0; nIndex < m_pSMap[m_nSMapLen].nLen && nIndex < MaxUnicodeString; ++nIndex ) { char pUnicodeHex[5]; strncpy( pUnicodeHex, sUnicodeString + nIndex * 4, 4); pUnicodeHex[4] = '\0'; if ( sscanf( pUnicodeHex, "%x", &m_pSMap[m_nSMapLen].pUnicodeString[nIndex] ) != 1 ) { // TO DO: Error "Illegal entry in ToUnicode CMap" } } m_pSMap[m_nSMapLen].pUnicodeString[m_pSMap[m_nSMapLen].nLen - 1] += nOffset; ++m_nSMapLen; } } CharCodeToUnicode::CharCodeToUnicode(StringExt *seTag) { m_seTag = seTag; m_nMapLen = 256; m_pMap = (Unicode *)MemUtilsMallocArray( m_nMapLen, sizeof(Unicode)); for ( unsigned int unIndex = 0; unIndex < m_nMapLen; ++unIndex ) { m_pMap[unIndex] = 0; } m_pSMap = NULL; m_nSMapLen = m_nSMapSize = 0; m_nRef = 1; InitializeCriticalSection( &m_oCS ); } CharCodeToUnicode::CharCodeToUnicode(StringExt *seTag, Unicode *pMap, CharCode unMapLen, BOOL bCopyMap, CharCodeToUnicodeString *pSMap, int nSMapLen, int nSMapSize) { m_seTag = seTag; m_nMapLen = unMapLen; if ( bCopyMap ) { m_pMap = (Unicode *)MemUtilsMallocArray(m_nMapLen, sizeof(Unicode)); memcpy( m_pMap, pMap, m_nMapLen * sizeof(Unicode)); } else { m_pMap = pMap; } m_pSMap = pSMap; m_nSMapLen = nSMapLen; m_nSMapSize = nSMapSize; m_nRef = 1; InitializeCriticalSection( &m_oCS ); } CharCodeToUnicode::~CharCodeToUnicode() { if ( m_seTag ) { delete m_seTag; } MemUtilsFree(m_pMap); MemUtilsFree(m_pSMap); DeleteCriticalSection( &m_oCS ); } void CharCodeToUnicode::AddRef() { CTemporaryCS *pCS = new CTemporaryCS( &m_oCS ); ++m_nRef; RELEASEOBJECT( pCS ); } void CharCodeToUnicode::Release() { CTemporaryCS *pCS = new CTemporaryCS( &m_oCS ); BOOL bDelete = ( --m_nRef == 0 ); RELEASEOBJECT( pCS ); if ( bDelete ) { delete this; } } BOOL CharCodeToUnicode::Match(StringExt *seTag) { return m_seTag && !m_seTag->Compare(seTag); } void CharCodeToUnicode::SetMapping(CharCode nCode, Unicode *pUnicode, int nLen) { if ( nLen == 1 ) { m_pMap[nCode] = pUnicode[0]; } else { int nIndex = 0; for ( nIndex = 0; nIndex < m_nSMapLen; ++nIndex ) { if ( m_pSMap[nIndex].nCode == nCode ) { break; } } if ( nIndex == m_nSMapLen ) { if (m_nSMapLen == m_nSMapSize) { m_nSMapSize += 8; m_pSMap = (CharCodeToUnicodeString *) MemUtilsReallocArray(m_pSMap, m_nSMapSize, sizeof(CharCodeToUnicodeString)); } ++m_nSMapLen; } m_pMap[nCode] = 0; m_pSMap[nIndex].nCode = nCode; m_pSMap[nIndex].nLen = nLen; for ( int nJ = 0; nJ < nLen && nJ < MaxUnicodeString; ++nJ ) { m_pSMap[nIndex].pUnicodeString[nJ] = pUnicode[nJ]; } } } int CharCodeToUnicode::MapToUnicode(CharCode nCode, Unicode *pUnicode, int size) { if ( nCode >= m_nMapLen ) { return 0; } if ( m_pMap[nCode] ) { pUnicode[0] = m_pMap[nCode]; return 1; } for ( int nIndex = 0; nIndex < m_nSMapLen; ++nIndex ) { if ( m_pSMap[nIndex].nCode == nCode ) { int nJ = 0; for ( nJ = 0; nJ < m_pSMap[nIndex].nLen && nJ < size; ++nJ ) { pUnicode[nJ] = m_pSMap[nIndex].pUnicodeString[nJ]; } return nJ; } } return 0; } //------------------------------------------------------------------------------------------------------------------------------- CharCodeToUnicodeCache::CharCodeToUnicodeCache(int nSize) { m_nSize = nSize; m_ppCache = (CharCodeToUnicode **)MemUtilsMallocArray( m_nSize, sizeof(CharCodeToUnicode *)); for ( int nIndex = 0; nIndex < m_nSize; ++nIndex ) { m_ppCache[nIndex] = NULL; } } CharCodeToUnicodeCache::~CharCodeToUnicodeCache() { for ( int nIndex = 0; nIndex < m_nSize; ++nIndex ) { if ( m_ppCache[nIndex] ) { m_ppCache[nIndex]->Release(); } } MemUtilsFree(m_ppCache); } CharCodeToUnicode *CharCodeToUnicodeCache::GetCharCodeToUnicode(StringExt *seTag) { if ( m_ppCache[0] && m_ppCache[0]->Match(seTag) ) { m_ppCache[0]->AddRef(); return m_ppCache[0]; } for ( int nIndex = 1; nIndex < m_nSize; ++nIndex ) { if ( m_ppCache[nIndex] && m_ppCache[nIndex]->Match(seTag) ) { CharCodeToUnicode *pCharCodeToUnicode = m_ppCache[nIndex]; for ( int nJ = nIndex; nJ >= 1; --nJ ) { m_ppCache[nJ] = m_ppCache[nJ - 1]; } m_ppCache[0] = pCharCodeToUnicode; pCharCodeToUnicode->AddRef(); return pCharCodeToUnicode; } } return NULL; } void CharCodeToUnicodeCache::Add(CharCodeToUnicode *pCharCodeToUnicode) { if ( m_ppCache[m_nSize - 1] ) { m_ppCache[m_nSize - 1]->Release(); } for ( int nIndex = m_nSize - 1; nIndex >= 1; --nIndex ) { m_ppCache[nIndex] = m_ppCache[nIndex - 1]; } m_ppCache[0] = pCharCodeToUnicode; pCharCodeToUnicode->AddRef(); }