Fix MergePages

This commit is contained in:
Svetlana Kulikova
2025-02-19 17:51:10 +03:00
parent e1b13913ac
commit edcd907749
6 changed files with 157 additions and 142 deletions

View File

@ -1253,49 +1253,125 @@ bool CPdfEditor::EditPage(int nPageIndex, bool bSet)
RELEASEOBJECT(pXref); RELEASEOBJECT(pXref);
return false; return false;
} }
bool CPdfEditor::SplitPages(const int* arrPageIndex, unsigned int unLength) bool CPdfEditor::SplitPages(const int* arrPageIndex, unsigned int unLength, CPdfReader* _pReader, int nMergePos)
{ {
if (m_nMode >= 0) if (m_nMode == 1 || (m_nMode == 0 && !_pReader))
return false; return false;
m_nMode = 1; if (m_nMode < 0)
PDFDoc* pPDFDocument = m_pReader->GetPDFDocument(); m_nMode = 1;
PDFDoc* pPDFDocument = _pReader ? _pReader->GetPDFDocument() : m_pReader->GetPDFDocument();
XRef* xref = pPDFDocument->getXRef(); XRef* xref = pPDFDocument->getXRef();
PdfWriter::CDocument* pDoc = m_pWriter->GetDocument(); PdfWriter::CDocument* pDoc = m_pWriter->GetDocument();
// Страницы должны быть созданы заранее для ссылки if (unLength == 0)
unLength = pPDFDocument->getNumPages();
// Страницы должны быть созданы заранее для ссылки на них
Catalog* pCatalog = pPDFDocument->getCatalog(); Catalog* pCatalog = pPDFDocument->getCatalog();
for (unsigned int i = 0; i < unLength; ++i) for (unsigned int i = 0; i < unLength; ++i)
{ {
Ref* pPageRef = pCatalog->getPageRef(arrPageIndex[i] + 1); Ref* pPageRef = pCatalog->getPageRef((arrPageIndex ? arrPageIndex[i] : i) + 1);
if (pPageRef->num == 0) if (pPageRef->num == 0)
{
m_nError = 3;
return false; return false;
}
PdfWriter::CPage* pPage = new PdfWriter::CPage(pDoc); PdfWriter::CPage* pPage = new PdfWriter::CPage(pDoc);
pDoc->AddObject(pPage); pDoc->AddObject(pPage);
pDoc->AddPage(pPage); pDoc->AddPage(nMergePos == -1 ? pDoc->GetPagesCount() : (nMergePos + i), pPage);
// Получение объекта страницы // Получение объекта страницы
Object pageRefObj, pageObj; Object pageRefObj, pageObj;
pageRefObj.initRef(pPageRef->num, pPageRef->gen); pageRefObj.initRef(pPageRef->num, pPageRef->gen);
if (!pageRefObj.fetch(xref, &pageObj)->isDict()) if (!pageRefObj.fetch(xref, &pageObj)->isDict())
{ {
pageObj.free(); pageObj.free(); pageRefObj.free();
pageRefObj.free();
m_nError = 3;
return false; return false;
} }
m_mSplitUniqueRef[pPageRef->num] = pPage; m_mSplitUniqueRef[pPageRef->num] = pPage;
pageRefObj.free(); pageObj.free(); pageRefObj.free();
} }
bool bRes = true;
for (unsigned int i = 0; i < unLength; ++i) for (unsigned int i = 0; i < unLength; ++i)
bRes &= SplitPage(arrPageIndex[i]); {
if (!bRes) Ref* pPageRef = pCatalog->getPageRef((arrPageIndex ? arrPageIndex[i] : i) + 1);
return false; if (pPageRef->num == 0)
return false;
// Получение объекта страницы
PdfWriter::CPage* pPage = (PdfWriter::CPage*)m_mSplitUniqueRef[pPageRef->num];
Object pageRefObj, pageObj;
pageRefObj.initRef(pPageRef->num, pPageRef->gen);
if (!pageRefObj.fetch(xref, &pageObj)->isDict())
{
pageObj.free();
pageRefObj.free();
return false;
}
pageRefObj.free();
// Копирование страницы со всеми ресурсами из reader для writer
for (int nIndex = 0; nIndex < pageObj.dictGetLength(); ++nIndex)
{
Object oTemp;
char* chKey = pageObj.dictGetKey(nIndex);
if (strcmp("Resources", chKey) == 0)
{
Ref oResourcesRef = { -1, -1 };
if (pageObj.dictGetValNF(nIndex, &oTemp)->isRef())
oResourcesRef = oTemp.getRef();
oTemp.free();
std::map<int, PdfWriter::CObjectBase*>::iterator it = m_mSplitUniqueRef.find(oResourcesRef.num);
if (oResourcesRef.num > 0 && it != m_mSplitUniqueRef.end())
{
pPage->Add(chKey, it->second);
continue;
}
if (pageObj.dictGetVal(nIndex, &oTemp)->isDict())
{
PdfWriter::CResourcesDict* pDict = pDoc->CreateResourcesDict(oResourcesRef.num < 0, false);
if (oResourcesRef.num > 0)
m_mSplitUniqueRef[oResourcesRef.num] = pDict;
pPage->Add(chKey, pDict);
for (int nIndex = 0; nIndex < oTemp.dictGetLength(); ++nIndex)
{
Object oRes;
char* chKey2 = oTemp.dictGetKey(nIndex);
oTemp.dictGetValNF(nIndex, &oRes);
PdfWriter::CObjectBase* pBase = DictToCDictObject2(&oRes, pDoc, xref, m_mSplitUniqueRef);
pDict->Add(chKey2, pBase);
oRes.free();
}
oTemp.free();
continue;
}
else
{
oTemp.free();
pageObj.dictGetValNF(nIndex, &oTemp);
}
}
else if (strcmp("Parent", chKey) == 0)
{
oTemp.free();
continue;
}
else
pageObj.dictGetValNF(nIndex, &oTemp);
PdfWriter::CObjectBase* pBase = DictToCDictObject2(&oTemp, pDoc, xref, m_mSplitUniqueRef);
pPage->Add(chKey, pBase);
oTemp.free();
}
pPage->Fix();
if (m_nMode == 0)
{
// pPage->AddContents(); // TODO pPage->AddContents чтобы можно было дописать изменения, если понадобится
}
else
m_pWriter->SetNeedAddHelvetica(false); // TODO дописывает шрифт для адекватного редактирования Adobe pdf без текст. Убрать при реализации map шрифтов
pageObj.free();
}
Object oCatalog; Object oCatalog;
if (!xref->getCatalog(&oCatalog)->isDict()) if (!xref->getCatalog(&oCatalog)->isDict())
@ -1307,10 +1383,17 @@ bool CPdfEditor::SplitPages(const int* arrPageIndex, unsigned int unLength)
Object oAcroForm; Object oAcroForm;
if (oCatalog.dictLookupNF("AcroForm", &oAcroForm)->isRef() || oAcroForm.isDict()) if (oCatalog.dictLookupNF("AcroForm", &oAcroForm)->isRef() || oAcroForm.isDict())
{ {
PdfWriter::CDictObject* pAcroForm = new PdfWriter::CDictObject(); PdfWriter::CDictObject* pAcroForm = pDoc->GetAcroForm();
if (!pAcroForm)
{
pAcroForm = new PdfWriter::CDictObject();
if (oAcroForm.isRef())
pDoc->AddObject(pAcroForm);
pDoc->SetAcroForm(pAcroForm);
}
if (oAcroForm.isRef()) if (oAcroForm.isRef())
{ {
pDoc->AddObject(pAcroForm);
oAcroForm.free(); oAcroForm.free();
if (!oCatalog.dictLookup("AcroForm", &oAcroForm)->isDict()) if (!oCatalog.dictLookup("AcroForm", &oAcroForm)->isDict())
{ {
@ -1318,7 +1401,6 @@ bool CPdfEditor::SplitPages(const int* arrPageIndex, unsigned int unLength)
return false; return false;
} }
} }
pDoc->SetAcroForm(pAcroForm);
for (int nIndex = 0; nIndex < oAcroForm.dictGetLength(); ++nIndex) for (int nIndex = 0; nIndex < oAcroForm.dictGetLength(); ++nIndex)
{ {
@ -1331,41 +1413,64 @@ bool CPdfEditor::SplitPages(const int* arrPageIndex, unsigned int unLength)
oFieldsRef = oTemp.getRef(); oFieldsRef = oTemp.getRef();
oTemp.free(); oTemp.free();
std::map<int, PdfWriter::CObjectBase*>::iterator it = m_mSplitUniqueRef.find(oFieldsRef.num); PdfWriter::CArrayObject* pFields = dynamic_cast<PdfWriter::CArrayObject*>(pAcroForm->Get("Fields"));
if (oFieldsRef.num > 0 && it != m_mSplitUniqueRef.end()) if (!pFields)
{ {
pAcroForm->Add(chKey, it->second); std::map<int, PdfWriter::CObjectBase*>::iterator it = m_mSplitUniqueRef.find(oFieldsRef.num);
continue; if (oFieldsRef.num > 0 && it != m_mSplitUniqueRef.end())
{
pAcroForm->Add(chKey, it->second);
continue;
}
} }
if (oAcroForm.dictGetVal(nIndex, &oTemp)->isArray()) if (oAcroForm.dictGetVal(nIndex, &oTemp)->isArray())
{ {
PdfWriter::CArrayObject* pArray = new PdfWriter::CArrayObject(); if (!pFields)
if (oFieldsRef.num > 0)
{ {
pDoc->AddObject(pArray); pFields = new PdfWriter::CArrayObject();
m_mSplitUniqueRef[oFieldsRef.num] = pArray; if (oFieldsRef.num > 0)
{
pDoc->AddObject(pFields);
m_mSplitUniqueRef[oFieldsRef.num] = pFields;
}
pAcroForm->Add(chKey, pFields);
} }
pAcroForm->Add(chKey, pArray);
for (int nIndex = 0; nIndex < oTemp.arrayGetLength(); ++nIndex) for (int nIndex = 0; nIndex < oTemp.arrayGetLength(); ++nIndex)
{ {
Object oRes; Object oRes;
if (oTemp.arrayGetNF(nIndex, &oRes)->isRef()) if (oTemp.arrayGetNF(nIndex, &oRes)->isRef())
{ {
it = m_mSplitUniqueRef.find(oRes.getRefNum()); std::map<int, PdfWriter::CObjectBase*>::iterator it = m_mSplitUniqueRef.find(oRes.getRefNum());
if (it != m_mSplitUniqueRef.end()) if (it != m_mSplitUniqueRef.end())
pArray->Add(it->second); pFields->Add(it->second);
} }
oRes.free(); oRes.free();
} }
oTemp.free(); oTemp.free();
continue; continue;
} }
else else if (!pFields)
{ {
oTemp.free(); oTemp.free();
oAcroForm.dictGetValNF(nIndex, &oTemp); oAcroForm.dictGetValNF(nIndex, &oTemp);
} }
else
{
oTemp.free();
continue;
}
}
else if (strcmp("SigFlags", chKey) == 0 || strcmp("XFA", chKey) == 0 || (strcmp("DA", chKey) == 0 && pAcroForm->Get("DA")))
{
oTemp.free();
continue;
}
else if (strcmp("DR", chKey) == 0)
{
// TODO объединение ресурсов >(0o0)<
oAcroForm.dictGetValNF(nIndex, &oTemp);
} }
else else
oAcroForm.dictGetValNF(nIndex, &oTemp); oAcroForm.dictGetValNF(nIndex, &oTemp);
@ -1376,97 +1481,14 @@ bool CPdfEditor::SplitPages(const int* arrPageIndex, unsigned int unLength)
} }
oAcroForm.free(); oCatalog.free(); oAcroForm.free(); oCatalog.free();
return bRes;
}
bool CPdfEditor::SplitPage(int nPageIndex)
{
PDFDoc* pPDFDocument = m_pReader->GetPDFDocument();
PdfWriter::CDocument* pDoc = m_pWriter->GetDocument();
if (!pPDFDocument || !pDoc)
return false;
XRef* xref = pPDFDocument->getXRef();
Catalog* pCatalog = pPDFDocument->getCatalog();
if (!xref || !pCatalog)
return false;
Ref* pPageRef = pCatalog->getPageRef(nPageIndex + 1);
if (pPageRef->num == 0)
return false;
// Получение объекта страницы
PdfWriter::CPage* pPage = (PdfWriter::CPage*)m_mSplitUniqueRef[pPageRef->num];
Object pageRefObj, pageObj;
pageRefObj.initRef(pPageRef->num, pPageRef->gen);
if (!pageRefObj.fetch(xref, &pageObj)->isDict())
{
pageObj.free();
pageRefObj.free();
return false;
}
pageRefObj.free();
// Копирование страницы со всеми ресурсами из reader для writer
for (int nIndex = 0; nIndex < pageObj.dictGetLength(); ++nIndex)
{
Object oTemp;
char* chKey = pageObj.dictGetKey(nIndex);
if (strcmp("Resources", chKey) == 0)
{
Ref oResourcesRef = { -1, -1 };
if (pageObj.dictGetValNF(nIndex, &oTemp)->isRef())
oResourcesRef = oTemp.getRef();
oTemp.free();
std::map<int, PdfWriter::CObjectBase*>::iterator it = m_mSplitUniqueRef.find(oResourcesRef.num);
if (oResourcesRef.num > 0 && it != m_mSplitUniqueRef.end())
{
pPage->Add(chKey, it->second);
continue;
}
if (pageObj.dictGetVal(nIndex, &oTemp)->isDict())
{
PdfWriter::CResourcesDict* pDict = pDoc->CreateResourcesDict(oResourcesRef.num < 0, false);
if (oResourcesRef.num > 0)
m_mSplitUniqueRef[oResourcesRef.num] = pDict;
pPage->Add(chKey, pDict);
for (int nIndex = 0; nIndex < oTemp.dictGetLength(); ++nIndex)
{
Object oRes;
char* chKey2 = oTemp.dictGetKey(nIndex);
oTemp.dictGetValNF(nIndex, &oRes);
PdfWriter::CObjectBase* pBase = DictToCDictObject2(&oRes, pDoc, xref, m_mSplitUniqueRef);
pDict->Add(chKey2, pBase);
oRes.free();
}
oTemp.free();
continue;
}
else
{
oTemp.free();
pageObj.dictGetValNF(nIndex, &oTemp);
}
}
else if (strcmp("Parent", chKey) == 0)
{
oTemp.free();
continue;
}
else
pageObj.dictGetValNF(nIndex, &oTemp);
PdfWriter::CObjectBase* pBase = DictToCDictObject2(&oTemp, pDoc, xref, m_mSplitUniqueRef);
pPage->Add(chKey, pBase);
oTemp.free();
}
pPage->Fix();
// TODO pPage->AddContents чтобы можно было дописать изменения, если понадобится
m_pWriter->SetNeedAddHelvetica(false); // TODO дописывает шрифт для адекватного редактирования Adobe pdf без текст. Убрать при реализации map шрифтов
pageObj.free();
return true; return true;
} }
bool CPdfEditor::MergePages(CPdfReader* _pReader, const int* arrPageIndex, unsigned int unLength, int nMergePos)
{
if (m_nMode != 0 && !IncrementalUpdates())
return false;
return SplitPages(arrPageIndex, unLength, _pReader, nMergePos);
}
bool CPdfEditor::DeletePage(int nPageIndex) bool CPdfEditor::DeletePage(int nPageIndex)
{ {
if (m_nMode != 0 && !IncrementalUpdates()) if (m_nMode != 0 && !IncrementalUpdates())

View File

@ -61,10 +61,10 @@ public:
void EndMarkedContent(); void EndMarkedContent();
bool IsBase14(const std::wstring& wsFontName, bool& bBold, bool& bItalic, std::wstring& wsFontPath); bool IsBase14(const std::wstring& wsFontName, bool& bBold, bool& bItalic, std::wstring& wsFontPath);
bool SplitPages(const int* arrPageIndex, unsigned int unLength); bool SplitPages(const int* arrPageIndex, unsigned int unLength, CPdfReader* _pReader = NULL, int nMergePos = -1);
bool MergePages(CPdfReader* _pReader, const int* arrPageIndex, unsigned int unLength, int nMergePos);
private: private:
bool SplitPage(int nPageIndex);
void GetPageTree(XRef* xref, Object* pPagesRefObj, PdfWriter::CPageTree* pPageParent = NULL); void GetPageTree(XRef* xref, Object* pPagesRefObj, PdfWriter::CPageTree* pPageParent = NULL);
std::wstring m_wsSrcFile; std::wstring m_wsSrcFile;

View File

@ -181,7 +181,7 @@ bool CPdfFile::MergePages(const std::wstring& wsPath, const std::wstring& wsPass
return false; return false;
} }
bool bRes = m_pInternal->pEditor->SplitPages(arrPageIndex, unLength); bool bRes = m_pInternal->pEditor->MergePages(pMergeFile->m_pInternal->pReader, arrPageIndex, unLength, nMergePos);
RELEASEOBJECT(pMergeFile); RELEASEOBJECT(pMergeFile);
return bRes; return bRes;
} }

View File

@ -335,12 +335,6 @@ namespace PdfWriter
m_pTrailer->Add("Encrypt", m_pEncryptDict); m_pTrailer->Add("Encrypt", m_pEncryptDict);
m_bEncrypt = true; m_bEncrypt = true;
} }
void CDocument::AddPage(CPage* pPage)
{
pPage->Add("Parent", m_pPageTree);
m_pPageTree->AddPage(pPage);
m_pCurPage = pPage;
}
CPage* CDocument::AddPage() CPage* CDocument::AddPage()
{ {
CPage* pPage = new CPage(m_pXref, m_pPageTree, this); CPage* pPage = new CPage(m_pXref, m_pPageTree, this);
@ -1569,22 +1563,20 @@ namespace PdfWriter
return true; return true;
} }
CPage* CDocument::AddPage(int nPageIndex) CPage* CDocument::AddPage(int nPageIndex, CPage* _pNewPage)
{ {
if (!m_pPageTree) if (!m_pPageTree)
return NULL; return NULL;
CPage* pNewPage = new CPage(m_pXref, NULL, this); CPage* pNewPage = _pNewPage ? _pNewPage : new CPage(m_pXref, NULL, this);
if (!pNewPage) if (!pNewPage)
return NULL; return NULL;
bool bRes = m_pPageTree->InsertPage(nPageIndex, pNewPage); bool bRes = m_pPageTree->InsertPage(nPageIndex, pNewPage);
if (!bRes) if (!bRes)
return NULL; return NULL;
#ifndef FILTER_FLATE_DECODE_DISABLED if (!_pNewPage)
if (m_unCompressMode & COMP_TEXT)
pNewPage->SetFilter(STREAM_FILTER_FLATE_DECODE); pNewPage->SetFilter(STREAM_FILTER_FLATE_DECODE);
#endif
m_pCurPage = pNewPage; m_pCurPage = pNewPage;
return pNewPage; return pNewPage;
} }

View File

@ -121,7 +121,6 @@ namespace PdfWriter
void SetPDFAConformanceMode(bool isPDFA); void SetPDFAConformanceMode(bool isPDFA);
bool IsPDFA() const; bool IsPDFA() const;
void AddPage(CPage* pPage);
CPage* AddPage(); CPage* AddPage();
CPage* GetPage (const unsigned int& unPage); CPage* GetPage (const unsigned int& unPage);
CPage* GetEditPage(const unsigned int& unPage); CPage* GetEditPage(const unsigned int& unPage);
@ -194,7 +193,7 @@ namespace PdfWriter
bool EditResources(CXref* pXref, CResourcesDict* pResources); bool EditResources(CXref* pXref, CResourcesDict* pResources);
std::pair<int, int> GetPageRef(int nPageIndex); std::pair<int, int> GetPageRef(int nPageIndex);
bool EditPage(CXref* pXref, CPage* pPage, int nPageIndex); bool EditPage(CXref* pXref, CPage* pPage, int nPageIndex);
CPage* AddPage(int nPageIndex); CPage* AddPage(int nPageIndex, CPage* _pNewPage = NULL);
bool DeletePage(int nPageIndex); bool DeletePage(int nPageIndex);
bool AddToFile(const std::wstring& wsPath, CXref* pXref, CDictObject* pTrailer, CXref* pInfoXref, CInfoDict* pInfo); bool AddToFile(const std::wstring& wsPath, CXref* pXref, CDictObject* pTrailer, CXref* pInfoXref, CInfoDict* pInfo);
void AddObject(CObjectBase* pObj); void AddObject(CObjectBase* pObj);
@ -214,6 +213,7 @@ namespace PdfWriter
void ClearPage(); void ClearPage();
bool EditXref(CXref* pXref); bool EditXref(CXref* pXref);
void SetAcroForm(CDictObject* pObj); void SetAcroForm(CDictObject* pObj);
CDictObject* GetAcroForm() { return m_pAcroForm; }
CResourcesDict* CreateResourcesDict(bool bInline, bool bProcSet); CResourcesDict* CreateResourcesDict(bool bInline, bool bProcSet);
private: private:

View File

@ -352,7 +352,7 @@ TEST_F(CPdfFileTest, VerifySign)
TEST_F(CPdfFileTest, SplitPdf) TEST_F(CPdfFileTest, SplitPdf)
{ {
//GTEST_SKIP(); GTEST_SKIP();
LoadFromFile(); LoadFromFile();
ASSERT_TRUE(pdfFile->EditPdf(wsDstFile)); ASSERT_TRUE(pdfFile->EditPdf(wsDstFile));
@ -365,13 +365,14 @@ TEST_F(CPdfFileTest, SplitPdf)
TEST_F(CPdfFileTest, MergePdf) TEST_F(CPdfFileTest, MergePdf)
{ {
GTEST_SKIP(); //GTEST_SKIP();
LoadFromFile(); LoadFromFile();
ASSERT_TRUE(pdfFile->EditPdf(NSFile::GetProcessDirectory() + L"/test3.pdf"));
pdfFile->MergePages(wsSrcFile); pdfFile->MergePages(wsDstFile);
pdfFile->SaveToFile(wsDstFile); pdfFile->Close();
} }
TEST_F(CPdfFileTest, CopyAnotherPdf) TEST_F(CPdfFileTest, CopyAnotherPdf)