/* * (c) Copyright Ascensio System SIA 2010-2016 * * This program is a free software product. You can redistribute it and/or * modify it under the terms of the GNU Affero General Public License (AGPL) * version 3 as published by the Free Software Foundation. In accordance with * Section 7(a) of the GNU AGPL its Section 15 shall be amended to the effect * that Ascensio System SIA expressly excludes the warranty of non-infringement * of any third-party rights. * * This program is distributed WITHOUT ANY WARRANTY; without even the implied * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. For * details, see the GNU AGPL at: http://www.gnu.org/licenses/agpl-3.0.html * * You can contact Ascensio System SIA at Lubanas st. 125a-25, Riga, Latvia, * EU, LV-1021. * * The interactive user interfaces in modified source and object code versions * of the Program must display Appropriate Legal Notices, as required under * Section 5 of the GNU AGPL version 3. * * Pursuant to Section 7(b) of the License you must retain the original Product * logo when distributing the program. Pursuant to Section 7(e) we decline to * grant you any rights under trademark law for use of our trademarks. * * All the Product's GUI elements, including illustrations and icon sets, as * well as technical writing content are licensed under the terms of the * Creative Commons Attribution-ShareAlike 4.0 International. See the License * terms at http://creativecommons.org/licenses/by-sa/4.0/legalcode * */ #include "WordDocument.h" #include "../../../Common/OfficeFileErrorDescription.h" namespace DocFileFormat { WordDocument::WordDocument (CString sFileName) : m_sFileName (sFileName), m_PieceTable(NULL), WordDocumentStream(NULL), TableStream(NULL), DataStream(NULL), FIB(NULL), Text(NULL), RevisionAuthorTable(NULL), FontTable(NULL), BookmarkNames(NULL), AutoTextNames(NULL), IndividualFootnotesPlex(NULL), FootnoteReferenceCharactersPlex(NULL), IndividualEndnotesPlex(NULL), EndnoteReferenceCharactersPlex(NULL), FieldsPlex(NULL), FootnoteDocumentFieldsPlex(NULL), EndnoteDocumentFieldsPlex(NULL), HeadersAndFootersDocumentFieldsPlex(NULL), HeaderStoriesPlex(NULL), AnnotationsReferencePlex(NULL), IndividualCommentsPlex(NULL), TextboxBreakPlex(NULL), TextboxBreakPlexHeader(NULL), OfficeDrawingPlex(NULL), OfficeDrawingPlexHeader(NULL), SectionPlex(NULL), BookmarkStartPlex(NULL), BookmarkEndPlex(NULL), AutoTextPlex(NULL), AllPapxFkps(NULL), AllChpxFkps(NULL), AllPapx(NULL), AllPapxVector(NULL), AllSepx(NULL), Styles(NULL), listTable(NULL), AnnotationOwners(NULL), DocProperties(NULL), listFormatOverrideTable(NULL), headerAndFooterTable(NULL) { m_pCallFunc = NULL; m_pStorage = NULL; m_pOfficeArtContent = NULL; fibFC = 0; } WordDocument::~WordDocument() { Clear(); } } namespace DocFileFormat { long WordDocument::LoadDocument(const ProgressCallback* pCallFunc) { HRESULT hr = S_FALSE; m_pCallFunc = pCallFunc; hr = Parse (fibFC); return hr; } void WordDocument::Clear() { if (AllPapxFkps) { for (list::iterator iter = AllPapxFkps->begin(); iter != AllPapxFkps->end(); ++iter) { RELEASEOBJECT(*iter); } RELEASEOBJECT(AllPapxFkps); } if (AllChpxFkps) { for (list::iterator iter = AllChpxFkps->begin(); iter != AllChpxFkps->end(); ++iter) { RELEASEOBJECT(*iter); } RELEASEOBJECT(AllChpxFkps); } if (AllSepx) { for (map::iterator iter = AllSepx->begin(); iter != AllSepx->end(); ++iter) { RELEASEOBJECT(iter->second); } RELEASEOBJECT(AllSepx); } RELEASEOBJECT(Text); RELEASEOBJECT(m_PieceTable); RELEASEOBJECT(FIB); RELEASEOBJECT(RevisionAuthorTable); RELEASEOBJECT(FontTable); RELEASEOBJECT(BookmarkNames); RELEASEOBJECT(AutoTextNames); RELEASEOBJECT(IndividualFootnotesPlex); RELEASEOBJECT(FootnoteReferenceCharactersPlex); RELEASEOBJECT(IndividualEndnotesPlex); RELEASEOBJECT(EndnoteReferenceCharactersPlex); RELEASEOBJECT(FieldsPlex); RELEASEOBJECT(FootnoteDocumentFieldsPlex); RELEASEOBJECT(EndnoteDocumentFieldsPlex); RELEASEOBJECT(HeadersAndFootersDocumentFieldsPlex); RELEASEOBJECT(HeaderStoriesPlex); RELEASEOBJECT(IndividualCommentsPlex); RELEASEOBJECT(AnnotationsReferencePlex); RELEASEOBJECT(TextboxBreakPlex); RELEASEOBJECT(TextboxBreakPlexHeader); RELEASEOBJECT(OfficeDrawingPlex); RELEASEOBJECT(OfficeDrawingPlexHeader); RELEASEOBJECT(TextboxBreakPlexHeader); RELEASEOBJECT(SectionPlex); RELEASEOBJECT(BookmarkStartPlex); RELEASEOBJECT(BookmarkEndPlex); RELEASEOBJECT(AutoTextPlex); RELEASEOBJECT(Styles); RELEASEOBJECT(listTable); RELEASEOBJECT(AllPapx); RELEASEOBJECT(AllPapxVector); RELEASEOBJECT(AnnotationOwners); RELEASEOBJECT(DocProperties); RELEASEOBJECT(listFormatOverrideTable); RELEASEOBJECT(headerAndFooterTable); RELEASEOBJECT(m_pStorage); RELEASEOBJECT(m_pOfficeArtContent); RELEASEOBJECT(DataStream); RELEASEOBJECT(TableStream); RELEASEOBJECT(WordDocumentStream); } long WordDocument::Parse(int fibFC) { bool res = false; m_pStorage = new StructuredStorageReader(); if (m_pStorage) { if (m_pStorage->SetFile (m_sFileName.GetBuffer()) == false) { Clear(); return AVS_ERROR_FILEFORMAT; } } if (m_pStorage->GetStream ("WordDocument", &WordDocumentStream) == false) { Clear(); return AVS_ERROR_FILEFORMAT; } // Parse FIB FIB = new FileInformationBlock(VirtualStreamReader(WordDocumentStream,0)); #ifdef _DEBUG //if (Fib2007 == FIB->m_FibBase.nFib) ATLTRACE (_T("NOTE: OFFICE 2007 file format\n")); //else if (Fib2003 == FIB->m_FibBase.nFib) ATLTRACE (_T("NOTE: OFFICE 2003 file format\n")); //else if (Fib2002 == FIB->m_FibBase.nFib) ATLTRACE (_T("NOTE: OFFICE 2002 file format\n")); //else if (Fib2000 == FIB->m_FibBase.nFib) ATLTRACE (_T("NOTE: OFFICE 2000 file format\n")); //else if (Fib1997 == FIB->m_FibBase.nFib) ATLTRACE (_T("NOTE: OFFICE 1997 file format\n")); //else if (Fib1997 < FIB->m_FibBase.nFib) ATLTRACE (_T("ERROR: OFFICE file format - UNSUPPORTED\n")); #endif // Check the file version if (FIB->m_FibBase.nFib) { // nFib (2 bytes): An unsigned integer that specifies the version number of the file format used. // Superseded by FibRgCswNew.nFibNew if it is present. This value SHOULD<12> be 0x00C1. // <12> Section 2.5.2: A special empty document is installed with Word 97, Word 2000, Word 2002, // and Office Word 2003 to allow "Create New Word Document" from the operating system. This // document has an nFib of 0x00C0. In addition the BiDi build of Word 97 differentiates its documents // by saving 0x00C2 as the nFib. In both cases treat them as if they were 0x00C1. // FIB version written. This will be >= 101 for all Word 6.0 for Windows and after documents if (FIB->m_FibBase.nFib < Fib1997) { Clear(); return AVS_MSFILE_ERROR_OLDER; } } else { if (FIB->m_FibNew.nFibNew < Fib1997) { Clear(); return AVS_MSFILE_ERROR_OLDER; } } if (FIB->m_FibBase.fEncrypted) { Clear(); return AVS_ERROR_DRM; } // Get the streams if (FIB->m_FibBase.fWhichTblStm) { res = m_pStorage->GetStream ("1Table", &TableStream); } else { res = m_pStorage->GetStream ("0Table", &TableStream); } if (res == false) { Clear(); return AVS_ERROR_FILEFORMAT; } try { m_pStorage->GetStream ("Data", &DataStream); } catch (...) { DataStream = NULL; } //Read all needed STTBs RevisionAuthorTable = new StringTable(TableStream, FIB->m_FibWord97.fcSttbfRMark, FIB->m_FibWord97.lcbSttbfRMark ); FontTable = new StringTable(TableStream, FIB->m_FibWord97.fcSttbfFfn, FIB->m_FibWord97.lcbSttbfFfn ); BookmarkNames = new StringTable(TableStream, FIB->m_FibWord97.fcSttbfBkmk, FIB->m_FibWord97.lcbSttbfBkmk ); AutoTextNames = new StringTable(TableStream, FIB->m_FibWord97.fcSttbfGlsy, FIB->m_FibWord97.lcbSttbfGlsy ); if (m_pCallFunc) { m_pCallFunc->OnProgress (m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 100000); SHORT bCancel = 0; m_pCallFunc->OnProgressEx(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 100000, &bCancel); if (bCancel) { Clear(); return AVS_ERROR_FILEFORMAT; } } // Read all needed PLCFs if (FIB->m_RgLw97.ccpFtn > 0) { IndividualFootnotesPlex = new Plex(EmptyStructure::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcffndTxt, FIB->m_FibWord97.lcbPlcffndTxt ); FootnoteReferenceCharactersPlex = new Plex(FootnoteDescriptor::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcffndRef, FIB->m_FibWord97.lcbPlcffndRef ); } if (FIB->m_RgLw97.ccpEdn > 0) { IndividualEndnotesPlex = new Plex(EmptyStructure::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfendTxt, FIB->m_FibWord97.lcbPlcfendTxt ); EndnoteReferenceCharactersPlex = new Plex(EndnoteDescriptor::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfendRef, FIB->m_FibWord97.lcbPlcfendRef ); } if (FIB->m_RgLw97.ccpHdr > 0) { HeaderStoriesPlex = new Plex( EmptyStructure::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfHdd, FIB->m_FibWord97.lcbPlcfHdd ); } if (FIB->m_RgLw97.ccpAtn > 0) { IndividualCommentsPlex = new Plex(EmptyStructure::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfandTxt, FIB->m_FibWord97.lcbPlcfandTxt); AnnotationsReferencePlex = new Plex(AnnotationReferenceDescriptor::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfandRef, FIB->m_FibWord97.lcbPlcfandRef); } TextboxBreakPlex = new Plex (Tbkd::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfTxbxBkd, FIB->m_FibWord97.lcbPlcfTxbxBkd); TextboxBreakPlexHeader = new Plex (Tbkd::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfTxbxHdrBkd, FIB->m_FibWord97.lcbPlcfTxbxHdrBkd); OfficeDrawingPlex = new Plex (Spa::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcSpaMom, FIB->m_FibWord97.lcbPlcSpaMom); OfficeDrawingPlexHeader = new Plex (Spa::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcSpaHdr, FIB->m_FibWord97.lcbPlcSpaHdr); SectionPlex = new Plex (SectionDescriptor::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfSed, FIB->m_FibWord97.lcbPlcfSed); BookmarkStartPlex = new Plex (BookmarkFirst::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfBkf, FIB->m_FibWord97.lcbPlcfBkf); BookmarkEndPlex = new Plex (EmptyStructure::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfBkl, FIB->m_FibWord97.lcbPlcfBkl); // только для тестов //m_arFTBXS = new Plex (FTXBXS::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcftxbxTxt, FIB->m_FibWord97.lcbPlcftxbxTxt); //#ifdef _DEBUG // DumpBuffer(L"C:\\office_spa.dat", VirtualStreamReader(TableStream), FIB->m_FibWord97.fcPlcSpaMom, FIB->m_FibWord97.lcbPlcSpaMom); //#endif for (size_t i = 0; i < BookmarkStartPlex->Elements.size(); ++i) { BookmarkFirst* pBookmark = static_cast(BookmarkStartPlex->Elements[i]); if (pBookmark) { BookmarkStartEndCPs.push_back(make_pair(BookmarkStartPlex->CharacterPositions[i], BookmarkEndPlex->CharacterPositions[pBookmark->GetIndex()])); } } AutoTextPlex = new Plex (EmptyStructure::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfGlsy, FIB->m_FibWord97.lcbPlcfGlsy); FieldsPlex = new Plex (FieldCharacter::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfFldMom, FIB->m_FibWord97.lcbPlcfFldMom); FootnoteDocumentFieldsPlex = new Plex (FieldCharacter::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfFldFtn, FIB->m_FibWord97.lcbPlcfFldFtn); EndnoteDocumentFieldsPlex = new Plex (FieldCharacter::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfFldEdn, FIB->m_FibWord97.lcbPlcfFldEdn); HeadersAndFootersDocumentFieldsPlex = new Plex (FieldCharacter::STRUCTURE_SIZE, TableStream, FIB->m_FibWord97.fcPlcfFldHdr, FIB->m_FibWord97.lcbPlcfFldHdr); if (m_pCallFunc) { m_pCallFunc->OnProgress(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 200000 ); SHORT bCancel = 0; m_pCallFunc->OnProgressEx(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 200000, &bCancel); if (bCancel) { Clear(); return AVS_ERROR_FILEFORMAT; } } // Read the FKPs AllPapxFkps = FormattedDiskPagePAPX::GetAllPAPXFKPs (FIB, WordDocumentStream, TableStream, DataStream); AllChpxFkps = FormattedDiskPageCHPX::GetAllCHPXFKPs (FIB, WordDocumentStream, TableStream); // Read custom tables DocProperties = new WordDocumentProperties (FIB, TableStream); Styles = new StyleSheet (FIB, TableStream, DataStream); listTable = new ListTable (FIB, TableStream); listFormatOverrideTable = new ListFormatOverrideTable (FIB, TableStream); m_pOfficeArtContent = new OfficeArtContent (FIB, TableStream); headerAndFooterTable = new HeaderAndFooterTable (FIB, TableStream); AnnotationOwners = new AnnotationOwnerList (FIB, TableStream); #ifdef _DEBUG //if (FIB->m_FibWord97.lcbDop) // DumpBuffer(L"C:\\dop.dat", VirtualStreamReader(TableStream), FIB->m_FibWord97.fcDop, FIB->m_FibWord97.lcbDop); #endif #ifdef _DEBUG //DumpBuffer(L"d:\\office_art.dat", VirtualStreamReader(TableStream), FIB->m_FibWord97.fcDggInfo, FIB->m_FibWord97.lcbDggInfo); #endif if (m_pCallFunc) { m_pCallFunc->OnProgress(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 300000 ); SHORT bCancel = 0; m_pCallFunc->OnProgressEx(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 300000, &bCancel); if (bCancel) { Clear(); return AVS_ERROR_FILEFORMAT; } } // Parse the piece table and construct a list that contains all chars m_PieceTable = new PieceTable (FIB, TableStream); Text = m_PieceTable->GetAllEncodingText (WordDocumentStream); #ifdef _DEBUG if (0) { //ATLTRACE (L"\n=========================================================\n"); for (size_t j = 0; j < Text->size(); ++j) { //ATLTRACE (L"%c", Text->operator [](j)); } //ATLTRACE (L"\n=======================================================\n"); } #endif if (BookmarkNames) { for (unsigned int i = 0; i < BookmarkStartEndCPs.size(); ++i) { WideString* bookmarkName = static_cast(BookmarkNames->operator[]( i )); if ( ( bookmarkName != NULL ) && ( *bookmarkName == _T( "_PictureBullets" ) ) ) { for (unsigned int j = BookmarkStartEndCPs[i].first, k = 0; j < this->Text->size(); ++j, ++k ) { if ( this->Text->at( j ) == 1 ) { PictureBulletsCPsMap.insert (make_pair( k, j )); } } } } } // Build a dictionaries of all PAPX AllPapx = new map(); AllPapxVector = new vector(); for (list::iterator iter = AllPapxFkps->begin(); iter != AllPapxFkps->end(); ++iter) { for (unsigned int j = 0; j < (*iter)->grppapxSize; ++j) { int nVal = (*iter)->rgfc[j]; AllPapx->insert( pair( nVal, (*iter)->grppapx[j] ) ); AllPapxVector->push_back(nVal); } } std::sort (AllPapxVector->begin(), AllPapxVector->end()); if (m_pCallFunc) { m_pCallFunc->OnProgress(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 400000 ); SHORT bCancel = 0; m_pCallFunc->OnProgressEx(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 400000, &bCancel); if (bCancel) { Clear(); return AVS_ERROR_FILEFORMAT; } } //build a dictionary of all SEPX if ( !SectionPlex->Elements.empty() ) { AllSepx = new map(); for (unsigned int i = 0; i < SectionPlex->Elements.size(); ++i) { //Read the SED SectionDescriptor* sed = static_cast(SectionPlex->Elements.at(i)); int cp = SectionPlex->CharacterPositions[i + 1]; //Get the SEPX VirtualStreamReader wordReader( this->WordDocumentStream, sed->fcSepx ); //!!!TODO: cbSepx is the size in bytes of the rest properties part!!! short cbSepx = wordReader.ReadInt16(); unsigned char* bytes = wordReader.ReadBytes( ( cbSepx /*- 2*/ ), true ); AllSepx->insert( pair( cp, new SectionPropertyExceptions( bytes, ( cbSepx /*- 2*/ ) ) ) ); RELEASEARRAYOBJECTS( bytes ); } } if (m_pCallFunc) { m_pCallFunc->OnProgress(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 500000 ); SHORT bCancel = 0; m_pCallFunc->OnProgressEx(m_pCallFunc->caller, DOC_ONPROGRESSEVENT_ID, 500000, &bCancel); if (bCancel) { Clear(); return AVS_ERROR_FILEFORMAT; } } return 0; } // Returns a list of all CHPX which are valid for the given FCs. std::vector* WordDocument::GetFileCharacterPositions(int fcMin, int fcMax) { vector* intVector = new vector(); int i = 0; for (std::list::iterator iter = AllChpxFkps->begin(); iter != AllChpxFkps->end(); ++iter) { FormattedDiskPageCHPX *fkp = (*iter); //if the last fc of this fkp is smaller the fcMin //this fkp is before the requested range if ( fkp->rgfc[fkp->rgfcSize - 1] < fcMin ) { continue; } //if the first fc of this fkp is larger the Max //this fkp is beyond the requested range if ( fkp->rgfc[0] > fcMax ) { break; } //don't add the duplicated values of the FKP boundaries (Length-1) int max = fkp->rgfcSize - 1; //last fkp? //use full table if ( i++ == ( this->AllChpxFkps->size() - 1 ) ) { max = fkp->rgfcSize; } for (int j = 0; j < max; ++j) { if ( ( fkp->rgfc[j] < fcMin ) && ( fkp->rgfc[j + 1] > fcMin ) ) { //this chpx starts before fcMin intVector->push_back( fkp->rgfc[j] ); } else if ( ( fkp->rgfc[j] >= fcMin ) && ( fkp->rgfc[j] < fcMax ) ) { //this chpx is in the range intVector->push_back( fkp->rgfc[j] ); } } } if ( intVector->empty() ) { RELEASEOBJECT( intVector ); } return intVector; } // Returnes a list of all CharacterPropertyExceptions which correspond to text std::list* WordDocument::GetCharacterPropertyExceptions(int fcMin, int fcMax) { std::list* cpeList = new list(); for (std::list::iterator iter = AllChpxFkps->begin(); iter != AllChpxFkps->end(); ++iter) { FormattedDiskPageCHPX *fkp = (*iter); // get the CHPX for (unsigned int j = 0; j < fkp->grpchpxSize; ++j) { if ( ( fkp->rgfc[j] < fcMin ) && ( fkp->rgfc[j + 1] > fcMin ) ) { //this chpx starts before fcMin cpeList->push_back( fkp->grpchpx[j] ); } else if ( ( fkp->rgfc[j] >= fcMin ) && ( fkp->rgfc[j] < fcMax ) ) { //this chpx is in the range cpeList->push_back( fkp->grpchpx[j] ); } } } if (cpeList->empty()) { RELEASEOBJECT(cpeList); } return cpeList; } }