DocFormatReader - дешифрование + поддержка формата 95 года

This commit is contained in:
ElenaSubbotina
2016-06-26 17:37:17 +03:00
parent 0d261b8769
commit de899687bb
118 changed files with 3963 additions and 3492 deletions

View File

@ -34,17 +34,22 @@
namespace DocFileFormat
{
PieceTable::PieceTable (FileInformationBlock *fib, POLE::Stream *tableStream)
PieceTable::PieceTable (FileInformationBlock *fib, POLE::Stream *tableStream, POLE::Stream* wordStream)
{
if (fib->m_FibWord97.lcbClx < 1/* || !fib->m_FibBase.fComplex*/) return;
// Read the bytes of complex file information
unsigned char* clx = new unsigned char[fib->m_FibWord97.lcbClx];
tableStream->seek(fib->m_FibWord97.fcClx);
tableStream->read(clx, (int)fib->m_FibWord97.lcbClx);
if (tableStream)
{
tableStream->seek(fib->m_FibWord97.fcClx);
tableStream->read(clx, (int)fib->m_FibWord97.lcbClx);
}
Pieces = list<PieceDescriptor>();
FileCharacterPositions = new map<int, int>();
CharacterPositions = new map<int, int>();
Pieces = std::list<PieceDescriptor>();
FileCharacterPositions = new std::map<int, int>();
CharacterPositions = new std::map<int, int>();
int pos = 0;
bool goon = true;
@ -87,7 +92,7 @@ namespace DocFileFormat
memcpy(pcdBytes, (piecetable + indexPcd), 8);
PieceDescriptor pcd(pcdBytes, 8);
PieceDescriptor pcd(pcdBytes, 8, fib->m_CodePage);
pcd.cpStart = cp;
pcd.cpEnd = cpNext;
@ -99,7 +104,7 @@ namespace DocFileFormat
int f = (int)pcd.fc;
int multi = 1;
if ( pcd.encoding == ENCODING_UNICODE )
if ( pcd.code_page == ENCODING_UTF16 )
{
multi = 2;
}
@ -144,7 +149,6 @@ namespace DocFileFormat
}
RELEASEARRAYOBJECTS(clx);
m_carriageIter = Pieces.begin();
}
@ -161,14 +165,14 @@ namespace DocFileFormat
{
std::vector<wchar_t> *piecePairs = new std::vector<wchar_t>();
for ( list<PieceDescriptor>::iterator iter = this->Pieces.begin(); iter != this->Pieces.end(); ++iter)
for ( std::list<PieceDescriptor>::iterator iter = this->Pieces.begin(); iter != this->Pieces.end(); ++iter)
{
//get the FC end of this piece
PieceDescriptor pcd = *iter;
int pcdFcEnd = pcd.cpEnd - pcd.cpStart;
if (pcd.encoding == ENCODING_UNICODE)
if (pcd.code_page == ENCODING_UTF16)
{
pcdFcEnd *= 2;
}
@ -184,7 +188,7 @@ namespace DocFileFormat
stream->seek(pcd.fc);
stream->read(bytes, cb);
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t> >(piecePairs, bytes, cb, pcd.encoding);
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t> >(piecePairs, bytes, cb, pcd.code_page);
RELEASEARRAYOBJECTS(bytes);
}
@ -196,14 +200,14 @@ namespace DocFileFormat
{
std::vector<wchar_t> *encodingChars = new std::vector<wchar_t>();
for (list<PieceDescriptor>::iterator iter = Pieces.begin(); iter != Pieces.end(); ++iter)
for (std::list<PieceDescriptor>::iterator iter = Pieces.begin(); iter != Pieces.end(); ++iter)
{
PieceDescriptor pcd = *iter;
//get the FC end of this piece
int pcdFcEnd = pcd.cpEnd - pcd.cpStart;
if ( pcd.encoding == ENCODING_UNICODE )
if ( pcd.code_page == ENCODING_UTF16 )
{
pcdFcEnd *= 2;
}
@ -230,7 +234,7 @@ namespace DocFileFormat
wordStream->read( bytes, cb);
//get the chars
FormatUtils::GetSTLCollectionFromBytes<vector<wchar_t>>( encodingChars, bytes, cb, pcd.encoding );
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t>>( encodingChars, bytes, cb, pcd.code_page );
RELEASEARRAYOBJECTS( bytes );
}
@ -249,7 +253,7 @@ namespace DocFileFormat
wordStream->read( bytes, cb);
//get the chars
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t>>( encodingChars, bytes, cb, pcd.encoding );
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t>>( encodingChars, bytes, cb, pcd.code_page );
RELEASEARRAYOBJECTS( bytes );
}
@ -268,7 +272,7 @@ namespace DocFileFormat
wordStream->read( bytes, cb);
//get the chars
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t>>(encodingChars, bytes, cb, pcd.encoding);
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t>>(encodingChars, bytes, cb, pcd.code_page);
RELEASEARRAYOBJECTS(bytes);
@ -291,7 +295,7 @@ namespace DocFileFormat
wordStream->read( bytes, cb );
//get the chars
FormatUtils::GetSTLCollectionFromBytes<vector<wchar_t>>( encodingChars, bytes, cb, pcd.encoding );
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t>>( encodingChars, bytes, cb, pcd.code_page );
RELEASEARRAYOBJECTS( bytes );
@ -318,21 +322,11 @@ namespace DocFileFormat
{
std::vector<wchar_t>* encodingChars = new std::vector<wchar_t>();
//if (fcStart >= fcEnd)
// return encodingChars;
#ifdef _DEBUG
//if (fcStart == 3296 && fcEnd == 3326)
//{
// int ccc = 0;
//}
#endif
int fcSize = fcEnd - fcStart;
bool read = true;
for (list<PieceDescriptor>::iterator iter = Pieces.begin(); iter != Pieces.end(); ++iter)
for (std::list<PieceDescriptor>::iterator iter = Pieces.begin(); iter != Pieces.end(); ++iter)
{
PieceDescriptor pcd = (*iter);
@ -341,7 +335,7 @@ namespace DocFileFormat
int pcdFcEnd = pcd.cpEnd - pcd.cpStart;
if (pcd.encoding == ENCODING_UNICODE)
if (pcd.code_page == ENCODING_UTF16)
{
pcdFcEnd *= 2;
}
@ -362,7 +356,7 @@ namespace DocFileFormat
if (cb < 0)
break;
if (!ReadSymbolsBuffer((int)fcStart, cb, pcd.encoding, word, encodingChars))
if (!ReadSymbolsBuffer((int)fcStart, cb, pcd.code_page, word, encodingChars))
break;
fcSize -= cb;
@ -382,7 +376,7 @@ namespace DocFileFormat
if (cb < 0)
break;
if (!ReadSymbolsBuffer((int)pcd.fc, cb, pcd.encoding, word, encodingChars))
if (!ReadSymbolsBuffer((int)pcd.fc, cb, pcd.code_page, word, encodingChars))
break;
fcSize -= cb;
@ -402,7 +396,7 @@ namespace DocFileFormat
if (cb <= 0)
break;
if (!ReadSymbolsBuffer((int)pcd.fc, cb, pcd.encoding, word, encodingChars))
if (!ReadSymbolsBuffer((int)pcd.fc, cb, pcd.code_page, word, encodingChars))
break;
if (read)
@ -423,7 +417,7 @@ namespace DocFileFormat
if (cb <= 0)
break;
if (!ReadSymbolsBuffer((int)fcStart, cb, pcd.encoding, word, encodingChars))
if (!ReadSymbolsBuffer((int)fcStart, cb, pcd.code_page, word, encodingChars))
break;
if (read)
@ -436,11 +430,6 @@ namespace DocFileFormat
}
else if (fcEnd < (int)pcd.fc) // this piece is beyond the requested range
{
#ifdef _DEBUG
//ATLTRACE(_T("PieceTable::GetChars() - fcEnd < (int)pcd.fc\n"));
#endif
// имеет место быть перескок по стриму, поэтому корректируем начальную позицию
//size_t count = encodingChars->size();
@ -461,11 +450,11 @@ namespace DocFileFormat
// int length = pcdFcEnd - pcd.fc;
// if (length > fcSize)
// {
// ReadSymbolsBuffer((int)pcd.fc, fcSize, pcd.encoding, word, encodingChars);
// ReadSymbolsBuffer((int)pcd.fc, fcSize, pcd.code_page, word, encodingChars);
// break;
// }
// ReadSymbolsBuffer((int)pcd.fc, length, pcd.encoding, word, encodingChars);
// ReadSymbolsBuffer((int)pcd.fc, length, pcd.code_page, word, encodingChars);
// fcSize -= length;
// continue;
@ -478,7 +467,7 @@ namespace DocFileFormat
return encodingChars;
}
inline bool PieceTable::ReadSymbolsBuffer(int pos, int size, Encoding encoding, POLE::Stream* word, std::vector<wchar_t>* encodingChars)
inline bool PieceTable::ReadSymbolsBuffer(int pos, int size, int coding, POLE::Stream* word, std::vector<wchar_t>* encodingChars)
{
unsigned char* bytes = new unsigned char[size];
if (NULL == bytes)
@ -488,7 +477,7 @@ namespace DocFileFormat
word->read(bytes, size);
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t>>(encodingChars, bytes, size, encoding);
FormatUtils::GetSTLCollectionFromBytes<std::vector<wchar_t>>(encodingChars, bytes, size, coding);
RELEASEARRAYOBJECTS(bytes);