Files
core/ASCOfficePDFReader/Lexer.cpp

557 lines
11 KiB
C++

#include "stdafx.h"
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>
#include "Lexer.h"
//----------------------------------------------------------------------------------------------
// '1' - îçíà÷àåò ïðîáåë. '1' èëè '2' îçíà÷àåò, ÷òî äàííûì ñèìâîëîì çàêàí÷èâàåòñÿ
// èìÿ èëè êîìàíäà.
static char c_sSpecialChars[256] =
{
// 0x 1x 2x 3x 4x 5x 6x 7x 8x 9x ax bx cx dx ex fx
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx
};
//------------------------------------------------------------------------
// Lexer
//------------------------------------------------------------------------
Lexer::Lexer(XRef *pXref, Stream *pStream)
{
Object oTemp;
m_oCurStream.InitStream( pStream );
m_pStreams = new Array( pXref );
m_pStreams->Add( m_oCurStream.Copy(&oTemp) );
m_nCurStreamIndex = 0;
m_bFreeArray = TRUE;
m_oCurStream.StreamReset();
}
Lexer::Lexer(XRef *pXref, Object *pObject)
{
// Ïðåäïîëàãàåì, ÷òî â pObject ëèáî ïîòîê, ëèáî ìàññèâ ïîòîêîâ
Object oTemp;
if ( pObject->IsStream() )
{
m_pStreams = new Array( pXref );
m_bFreeArray = TRUE;
m_pStreams->Add( pObject->Copy(&oTemp) );
}
else
{
m_pStreams = pObject->GetArray();
m_bFreeArray = FALSE;
}
m_nCurStreamIndex = 0;
if ( m_pStreams->GetCount() > 0 )
{
m_pStreams->Get( m_nCurStreamIndex, &m_oCurStream);
m_oCurStream.StreamReset();
}
}
Lexer::~Lexer()
{
if ( !m_oCurStream.IsNone() )
{
m_oCurStream.StreamClose();
m_oCurStream.Free();
}
if ( m_bFreeArray )
{
delete m_pStreams;
}
}
int Lexer::GetChar()
{
int nChar = EOF;
while ( !m_oCurStream.IsNone() && ( nChar = m_oCurStream.StreamGetChar() ) == EOF )
{
m_oCurStream.StreamClose();
m_oCurStream.Free();
++m_nCurStreamIndex;
if ( m_nCurStreamIndex < m_pStreams->GetCount() )
{
m_pStreams->Get( m_nCurStreamIndex, &m_oCurStream);
m_oCurStream.StreamReset();
}
}
return nChar;
}
int Lexer::LookChar()
{
if ( m_oCurStream.IsNone() )
{
return EOF;
}
return m_oCurStream.StreamLookChar();
}
Object *Lexer::GetObject(Object *pObject)
{
char *pCurPointer;
int nChar, nTempChar;
BOOL bNegative = FALSE, bDone = FALSE;
int nBracketCount = 0;
int nInt = 0;
double dFloat = 0, dScale = 0;
StringExt *seString;
int nCount = 0, nHexCharLen = 0;
// Ïðîïóñêàåì êîììåíòàðèè è ïðîáåëû
BOOL bComment = FALSE;
while (1)
{
if ( ( nChar = GetChar() ) == EOF )
{
return pObject->InitEOF();
}
if ( bComment )
{
if ( nChar == '\r' || nChar == '\n' )
bComment = FALSE;
}
else if ( nChar == '%' )
{
bComment = TRUE;
}
else if ( c_sSpecialChars[nChar] != 1 )
{
break;
}
}
// íà÷èíàåì ÷òåíèå îáúåêòà
switch ( nChar )
{
// ×èñëî
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '-': case '.':
bNegative = FALSE;
nInt = 0;
if ( nChar == '-' )
{
bNegative = TRUE;
}
else if ( nChar == '.' )
{
goto doReal;
}
else
{
nInt = nChar - '0';
}
while (1)
{
nChar = LookChar();
if ( isdigit( nChar ) )
{
GetChar();
nInt = nInt * 10 + ( nChar - '0' );
}
else if ( nChar == '.' )
{
GetChar();
goto doReal;
}
else
{
break;
}
}
if ( bNegative )
nInt = -nInt;
pObject->InitInt( nInt );
break;
doReal:
dFloat = nInt;
dScale = 0.1;
while (1)
{
nChar = LookChar();
if ( nChar == '-' )
{
// èãíîðèðóåì çíàêè ìèíóñ, ïîÿâëÿþùèåñÿ ïî ñåðåäèíå ÷èñëà
// (Adobe èãíîðèðóåò òàêèå ñèòóàöèè)
// TO DO: Error "Badly formatted number"
GetChar();
continue;
}
if ( !isdigit( nChar ) )
{
break;
}
GetChar();
dFloat = dFloat + dScale * ( nChar - '0' );
dScale *= 0.1;
}
if ( bNegative )
dFloat = -dFloat;
pObject->InitReal( dFloat );
break;
// Ñòðîêà
case '(':
pCurPointer = m_sTempBuffer;
nCount = 0;
nBracketCount = 1; // ñ÷åò÷èê ñîáîê
bDone = FALSE;
seString = NULL;
do {
nTempChar = EOF;
switch ( nChar = GetChar() )
{
case EOF:
#if 0
// This breaks some PDF files, e.g., ones from Photoshop.
case '\r':
case '\n':
#endif
// TO DO: Error "Unterminated string"
bDone = TRUE;
break;
case '(':
++nBracketCount;
nTempChar = nChar;
break;
case ')':
if ( --nBracketCount == 0 )
{
bDone = TRUE;
}
else
{
nTempChar = nChar;
}
break;
case '\\':
switch ( nChar = GetChar() )
{
case 'n': nTempChar = '\n'; break;
case 'r': nTempChar = '\r'; break;
case 't': nTempChar = '\t'; break;
case 'b': nTempChar = '\b'; break;
case 'f': nTempChar = '\f'; break;
case '\\':
case '(':
case ')': nTempChar = nChar; break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
nTempChar = nChar - '0';
nChar = LookChar();
if ( nChar >= '0' && nChar <= '7' )
{
GetChar();
nTempChar = (nTempChar << 3) + ( nChar - '0' );
nChar = LookChar();
if ( nChar >= '0' && nChar <= '7' )
{
GetChar();
nTempChar = (nTempChar << 3) + ( nChar - '0' );
}
}
break;
case '\r':
nChar = LookChar();
if ( nChar == '\n' )
{
GetChar();
}
break;
case '\n':
break;
case EOF:
// TO DO: Error "Unterminated string"
bDone = TRUE;
break;
default:
nTempChar = nChar;
break;
}
break;
default:
nTempChar = nChar;
break;
}
if ( nTempChar != EOF )
{
if ( nCount == TokenBufferSize )
{
if ( !seString )
seString = new StringExt( m_sTempBuffer, TokenBufferSize);
else
seString->Append( m_sTempBuffer, TokenBufferSize);
pCurPointer = m_sTempBuffer;
nCount = 0;
}
*pCurPointer++ = (char)nTempChar;
++nCount;
}
} while ( !bDone );
if ( !seString )
seString = new StringExt(m_sTempBuffer, nCount);
else
seString->Append(m_sTempBuffer, nCount);
pObject->InitString( seString );
break;
// Èìÿ
case '/':
pCurPointer = m_sTempBuffer;
nCount = 0;
while ( ( nChar = LookChar() ) != EOF && !c_sSpecialChars[nChar] )
{
GetChar();
if ( nChar == '#' )
{
nTempChar = LookChar();
if ( nTempChar >= '0' && nTempChar <= '9' )
{
nChar = nTempChar - '0';
}
else if ( nTempChar >= 'A' && nTempChar <= 'F')
{
nChar = nTempChar - 'A' + 10;
}
else if ( nTempChar >= 'a' && nTempChar <= 'f' )
{
nChar = nTempChar - 'a' + 10;
}
else
{
goto notEscChar;
}
GetChar();
nChar <<= 4;
nTempChar = GetChar();
if ( nTempChar >= '0' && nTempChar <= '9' )
{
nChar += nTempChar - '0';
}
else if ( nTempChar >= 'A' && nTempChar <= 'F' )
{
nChar += nTempChar - 'A' + 10;
}
else if ( nTempChar >= 'a' && nTempChar <= 'f' )
{
nChar += nTempChar - 'a' + 10;
}
else
{
// TO DO: Error "Illegal digit in hex char in name"
}
}
notEscChar:
if ( ++nCount == TokenBufferSize )
{
// TO DO: Error "Name token too long"
break;
}
*pCurPointer++ = nChar;
}
*pCurPointer = '\0';
pObject->InitName(m_sTempBuffer);
break;
// Ìàññèâ
case '[':
case ']':
m_sTempBuffer[0] = nChar;
m_sTempBuffer[1] = '\0';
pObject->InitCommand( m_sTempBuffer );
break;
// Hex èëè Dictionary
case '<':
nChar = LookChar();
// Dictionary
if ( nChar == '<' )
{
GetChar();
m_sTempBuffer[0] = m_sTempBuffer[1] = '<';
m_sTempBuffer[2] = '\0';
pObject->InitCommand( m_sTempBuffer );
}
else // Hex string
{
pCurPointer = m_sTempBuffer;
nHexCharLen = nCount = 0;
nTempChar = 0;
seString = NULL;
while (1)
{
nChar = GetChar();
if ( nChar == '>' )
{
break;
}
else if ( nChar == EOF )
{
// TO DO: Error "Unterminated hex string"
break;
}
else if ( c_sSpecialChars[nChar] != 1 )
{
nTempChar = nTempChar << 4;
if ( nChar >= '0' && nChar <= '9')
nTempChar += nChar - '0';
else if ( nChar >= 'A' && nChar <= 'F')
nTempChar += nChar - 'A' + 10;
else if ( nChar >= 'a' && nChar <= 'f')
nTempChar += nChar - 'a' + 10;
else
{
// TO DO: Error "Illegal character <nChar> in hex string"
}
if ( ++nHexCharLen == 2 )
{
if ( nCount == TokenBufferSize )
{
if ( !seString )
seString = new StringExt(m_sTempBuffer, TokenBufferSize);
else
seString->Append(m_sTempBuffer, TokenBufferSize);
pCurPointer = m_sTempBuffer;
nCount = 0;
}
*pCurPointer++ = (char)nTempChar;
++nCount;
nTempChar = 0;
nHexCharLen = 0;
}
}
}
if ( !seString )
seString = new StringExt(m_sTempBuffer, nCount);
else
seString->Append(m_sTempBuffer, nCount);
if ( nHexCharLen == 1 )
seString->Append((char)(nTempChar << 4));
pObject->InitString( seString );
}
break;
// Dictionary
case '>':
nChar = LookChar();
if ( nChar == '>' )
{
GetChar();
m_sTempBuffer[0] = m_sTempBuffer[1] = '>';
m_sTempBuffer[2] = '\0';
pObject->InitCommand(m_sTempBuffer);
}
else
{
// TO DO: Error "Illegal character '>'"
pObject->InitError();
}
break;
// Error
case ')':
case '{':
case '}':
// TO DO: Error "Illegal character <nChar>"
pObject->InitError();
break;
// Command (ýòî ïðîñòî êàêîå íèáóäü çàðåçåðâèðîâàííîå ñëîâî, íàïðèìåð 'obj' èëè 'stream')
default:
pCurPointer = m_sTempBuffer;
*pCurPointer++ = nChar;
nCount = 1;
while ( ( nChar = LookChar() ) != EOF && !c_sSpecialChars[nChar] )
{
GetChar();
if ( ++nCount == TokenBufferSize )
{
// TO DO: Error "Command token too long"
break;
}
*pCurPointer++ = nChar;
}
*pCurPointer = '\0';
if ( m_sTempBuffer[0] == 't' && !strcmp(m_sTempBuffer, "true") )
{
pObject->InitBool(TRUE);
}
else if ( m_sTempBuffer[0] == 'f' && !strcmp(m_sTempBuffer, "false") )
{
pObject->InitBool(FALSE);
}
else if ( m_sTempBuffer[0] == 'n' && !strcmp(m_sTempBuffer, "null") )
{
pObject->InitNull();
}
else
{
pObject->InitCommand(m_sTempBuffer);
}
break;
}
return pObject;
}
void Lexer::SkipToNextLine()
{
while (1)
{
int nChar = GetChar();
if ( nChar == EOF || nChar == '\n' )
{
return;
}
if ( nChar == '\r' )
{
if ( ( nChar = LookChar()) == '\n' )
{
GetChar();
}
return;
}
}
}
BOOL Lexer::IsSpace(int nChar)
{
return nChar >= 0 && nChar <= 0xff && c_sSpecialChars[ nChar ] == 1;
}