mirror of
https://github.com/ONLYOFFICE/core.git
synced 2026-02-10 18:05:41 +08:00
Add extract mode in x2ttester
This commit is contained in:
@ -1,7 +1,9 @@
|
||||
CONFIGURATION
|
||||
=============
|
||||
|
||||
You need to create an xml configuration file. It must contain:
|
||||
## Default conversion
|
||||
|
||||
You need to create an xml configuration file. It contains:
|
||||
|
||||
# root of xml
|
||||
<settings> </settings>
|
||||
@ -95,8 +97,23 @@ You need to create an xml configuration file. It must contain:
|
||||
<input> docx txt pptx xlsx<input>
|
||||
<output> txt doc pdf</output>
|
||||
|
||||
## Extraction
|
||||
x2ttester can extract files with the required output extension instead of default x2t conversion. Set extraction mode:
|
||||
|
||||
You can use the following templates:
|
||||
(non-required) sets extraction mode (default - "0")
|
||||
<extract> </extract>
|
||||
|
||||
When `extract` is "1", you can set the `output` parameter to determine which exts will be extracted. Default `output` is `emf wmf`.
|
||||
Params `input`, `inputDirectory`, `outputDirectory`, `cores` works the same.
|
||||
|
||||
Extract mode has additional options:
|
||||
|
||||
(non-required) converts non-zip office files into docx (e.g. pdf) (default - "0").
|
||||
<convertBeforeExtract> </convertBeforeExtract>
|
||||
|
||||
The conversion params in `convertBeforeExtract` are the same as the default conversion.
|
||||
|
||||
## Templates
|
||||
|
||||
# main xml config
|
||||
|
||||
|
||||
@ -119,22 +119,23 @@ std::vector<std::wstring> CFormatsList::GetAllExts() const
|
||||
{
|
||||
std::vector<std::wstring> all_formats;
|
||||
|
||||
for(auto& val : m_documents)
|
||||
for (const auto& val : m_documents)
|
||||
all_formats.push_back(val);
|
||||
|
||||
for(auto& val : m_presentations)
|
||||
for (const auto& val : m_presentations)
|
||||
all_formats.push_back(val);
|
||||
|
||||
for(auto& val : m_spreadsheets)
|
||||
for (const auto& val : m_spreadsheets)
|
||||
all_formats.push_back(val);
|
||||
|
||||
for(auto& val : m_images)
|
||||
for (const auto& val : m_images)
|
||||
all_formats.push_back(val);
|
||||
|
||||
for(auto& val : m_crossplatform)
|
||||
for (const auto& val : m_crossplatform)
|
||||
all_formats.push_back(val);
|
||||
|
||||
all_formats.push_back(m_pdf);
|
||||
if (!m_pdf.empty())
|
||||
all_formats.push_back(m_pdf);
|
||||
|
||||
return all_formats;
|
||||
}
|
||||
@ -259,6 +260,16 @@ CFormatsList CFormatsList::GetOutputExts()
|
||||
return list;
|
||||
}
|
||||
|
||||
CFormatsList CFormatsList::GetExtractExts()
|
||||
{
|
||||
CFormatsList list;
|
||||
|
||||
list.m_images.push_back(L"emf");
|
||||
list.m_images.push_back(L"wmf");
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
Cx2tTester::Cx2tTester(const std::wstring& configPath)
|
||||
{
|
||||
m_bIsUseSystemFonts = true;
|
||||
@ -269,14 +280,22 @@ Cx2tTester::Cx2tTester(const std::wstring& configPath)
|
||||
m_bIsFilenamePassword = true;
|
||||
m_bTroughConversion = false;
|
||||
m_bSaveEnvironment = false;
|
||||
|
||||
m_bExtract = false;
|
||||
m_bConvertBeforeExtract = false;
|
||||
|
||||
m_defaultCsvDelimiter = L";";
|
||||
m_defaultCsvTxtEndcoding = L"UTF-8";
|
||||
m_inputFormatsList = CFormatsList::GetDefaultExts();
|
||||
m_outputFormatsList = CFormatsList::GetOutputExts();
|
||||
m_extractFormatsList = CFormatsList::GetExtractExts();
|
||||
m_timeout = 5 * 60; // 5 min
|
||||
|
||||
SetConfig(configPath);
|
||||
|
||||
m_errorsXmlDirectory = m_outputDirectory + FILE_SEPARATOR_STR + L"_errors";
|
||||
m_troughConversionDirectory = m_outputDirectory + FILE_SEPARATOR_STR + L"_t";
|
||||
m_tempDirectory = m_outputDirectory + FILE_SEPARATOR_STR + L"_temp";
|
||||
m_fontsDirectory = NSFile::GetProcessDirectory() + FILE_SEPARATOR_STR + L"fonts";
|
||||
|
||||
|
||||
@ -335,6 +354,12 @@ Cx2tTester::~Cx2tTester()
|
||||
m_reportCS.DeleteCriticalSection();
|
||||
m_outputCS.DeleteCriticalSection();
|
||||
m_reportStream.CloseFile();
|
||||
|
||||
for(auto&& val : m_deleteLaterFiles)
|
||||
NSFile::CFileBinary::Remove(val);
|
||||
|
||||
for(auto&& val : m_deleteLaterDirectories)
|
||||
NSDirectory::DeleteDirectory(val);
|
||||
}
|
||||
|
||||
void Cx2tTester::SetConfig(const std::wstring& configPath)
|
||||
@ -366,6 +391,8 @@ void Cx2tTester::SetConfig(const std::wstring& configPath)
|
||||
else if(name == L"troughConversion" && !node.GetText().empty()) m_bTroughConversion = std::stoi(node.GetText());
|
||||
else if(name == L"saveEnvironment" && !node.GetText().empty()) m_bSaveEnvironment = std::stoi(node.GetText());
|
||||
else if(name == L"defaultCsvTxtEncoding" && !node.GetText().empty()) m_defaultCsvTxtEndcoding = node.GetText();
|
||||
else if(name == L"extract" && !node.GetText().empty()) m_bExtract = std::stoi(node.GetText());
|
||||
else if(name == L"convertBeforeExtract" && !node.GetText().empty()) m_bConvertBeforeExtract = std::stoi(node.GetText());
|
||||
else if(name == L"defaultCsvDelimiter" && !node.GetText().empty()) m_defaultCsvDelimiter = (wchar_t)std::stoi(node.GetText(), nullptr, 16);
|
||||
else if(name == L"inputFilesList" && !node.GetText().empty())
|
||||
{
|
||||
@ -418,17 +445,39 @@ void Cx2tTester::SetConfig(const std::wstring& configPath)
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if(default_input_formats)
|
||||
if (default_input_formats)
|
||||
m_inputExts = m_inputFormatsList.GetAllExts();
|
||||
|
||||
if(default_output_formats)
|
||||
m_outputExts = m_outputFormatsList.GetAllExts();
|
||||
if (default_output_formats)
|
||||
{
|
||||
if (m_bExtract)
|
||||
m_outputExts = m_extractFormatsList.GetAllExts();
|
||||
else
|
||||
m_outputExts = m_outputFormatsList.GetAllExts();
|
||||
}
|
||||
|
||||
}
|
||||
void Cx2tTester::Start()
|
||||
{
|
||||
// setup timer
|
||||
m_timeStart = NSTimers::GetTickCount();
|
||||
|
||||
m_outputDirectory = CorrectPathW(m_outputDirectory);
|
||||
m_errorsXmlDirectory = CorrectPathW(m_errorsXmlDirectory);
|
||||
m_troughConversionDirectory = CorrectPathW(m_troughConversionDirectory);
|
||||
|
||||
// setup & clear output folder
|
||||
if(NSDirectory::Exists(m_outputDirectory))
|
||||
NSDirectory::DeleteDirectory(m_outputDirectory);
|
||||
|
||||
NSDirectory::CreateDirectory(m_outputDirectory);
|
||||
|
||||
// setup & clear errors folder
|
||||
if(NSDirectory::Exists(m_errorsXmlDirectory))
|
||||
NSDirectory::DeleteDirectory(m_errorsXmlDirectory);
|
||||
|
||||
NSDirectory::CreateDirectory(m_errorsXmlDirectory);
|
||||
|
||||
// check fonts
|
||||
CApplicationFontsWorker fonts_worker;
|
||||
fonts_worker.m_sDirectory = m_fontsDirectory;
|
||||
@ -449,23 +498,6 @@ void Cx2tTester::Start()
|
||||
NSFonts::IApplicationFonts* pFonts = fonts_worker.Check();
|
||||
RELEASEINTERFACE(pFonts);
|
||||
|
||||
m_outputDirectory = CorrectPathW(m_outputDirectory);
|
||||
m_errorsXmlDirectory = CorrectPathW(m_errorsXmlDirectory);
|
||||
m_troughConversionDirectory = CorrectPathW(m_troughConversionDirectory);
|
||||
|
||||
// setup & clear output folder
|
||||
if(NSDirectory::Exists(m_outputDirectory))
|
||||
NSDirectory::DeleteDirectory(m_outputDirectory);
|
||||
|
||||
NSDirectory::CreateDirectory(m_outputDirectory);
|
||||
|
||||
// setup & clear errors folder
|
||||
if(NSDirectory::Exists(m_errorsXmlDirectory))
|
||||
NSDirectory::DeleteDirectory(m_errorsXmlDirectory);
|
||||
|
||||
NSDirectory::CreateDirectory(m_errorsXmlDirectory);
|
||||
|
||||
|
||||
std::vector<std::wstring> files = NSDirectory::GetFiles(m_inputDirectory, true);
|
||||
for(int i = 0; i < files.size(); i++)
|
||||
{
|
||||
@ -486,6 +518,54 @@ void Cx2tTester::Start()
|
||||
if(files.size() < m_maxProc)
|
||||
m_maxProc = files.size();
|
||||
|
||||
if (m_bExtract)
|
||||
{
|
||||
COfficeFileFormatChecker checker;
|
||||
COfficeUtils utils;
|
||||
std::vector<std::wstring> files_to_convert;
|
||||
|
||||
for (size_t i = 0; i < files.size(); i++)
|
||||
if (utils.IsArchive(files[i]) == S_FALSE && checker.isOfficeFile(files[i]))
|
||||
{
|
||||
if (m_bConvertBeforeExtract)
|
||||
files_to_convert.push_back(files[i]);
|
||||
files.erase(files.begin() + i);
|
||||
}
|
||||
|
||||
if (!files_to_convert.empty())
|
||||
{
|
||||
if(NSDirectory::Exists(m_tempDirectory))
|
||||
NSDirectory::DeleteDirectory(m_tempDirectory);
|
||||
|
||||
NSDirectory::CreateDirectories(m_tempDirectory);
|
||||
|
||||
auto copy_inputDirectory = m_inputDirectory;
|
||||
auto copy_outputDirectory = m_outputDirectory;
|
||||
auto copy_outputExts = m_outputExts;
|
||||
|
||||
m_outputDirectory = m_tempDirectory;
|
||||
m_outputExts = {L"docx"};
|
||||
|
||||
Convert(files_to_convert, true, true);
|
||||
|
||||
m_outputDirectory = copy_outputDirectory;
|
||||
m_outputExts = copy_outputExts;
|
||||
|
||||
m_inputDirectory = m_tempDirectory;
|
||||
std::vector<std::wstring> temp_files = NSDirectory::GetFiles(m_tempDirectory, true);
|
||||
Extract(temp_files);
|
||||
|
||||
m_inputDirectory = copy_inputDirectory;
|
||||
}
|
||||
|
||||
Extract(files);
|
||||
|
||||
if(NSDirectory::Exists(m_tempDirectory))
|
||||
NSDirectory::DeleteDirectory(m_tempDirectory);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// conversion in _t directory -> _t directory to output
|
||||
if(m_bTroughConversion)
|
||||
{
|
||||
@ -512,12 +592,6 @@ void Cx2tTester::Start()
|
||||
|
||||
Convert(files);
|
||||
WriteTime();
|
||||
|
||||
for(auto&& val : m_deleteLaterFiles)
|
||||
NSFile::CFileBinary::Remove(val);
|
||||
|
||||
for(auto&& val : m_deleteLaterDirectories)
|
||||
NSDirectory::DeleteDirectory(val);
|
||||
}
|
||||
|
||||
void Cx2tTester::Convert(const std::vector<std::wstring>& files, bool bNoDirectory, bool bTrough)
|
||||
@ -652,6 +726,47 @@ void Cx2tTester::Convert(const std::vector<std::wstring>& files, bool bNoDirecto
|
||||
while(!IsAllFree())
|
||||
NSThreads::Sleep(150);
|
||||
}
|
||||
void Cx2tTester::Extract(const std::vector<std::wstring>& files)
|
||||
{
|
||||
for (int i = 0; i < files.size(); i++)
|
||||
{
|
||||
const std::wstring& input_file = files[i];
|
||||
std::wstring input_filename = NSFile::GetFileName(input_file);
|
||||
std::wstring input_file_directory = NSFile::GetDirectoryName(input_file);
|
||||
std::wstring input_subfolders = input_file_directory.substr(m_inputDirectory.size(),
|
||||
input_file_directory.size() - m_inputDirectory.size());
|
||||
std::wstring output_files_directory = m_outputDirectory + input_subfolders + FILE_SEPARATOR_STR + input_filename;
|
||||
|
||||
if(!NSDirectory::Exists(output_files_directory))
|
||||
NSDirectory::CreateDirectories(output_files_directory);
|
||||
|
||||
// waiting...
|
||||
do
|
||||
{
|
||||
NSThreads::Sleep(50);
|
||||
} while(IsAllBusy());
|
||||
|
||||
m_coresCS.Enter();
|
||||
|
||||
// setup & start new extractor
|
||||
CExtractor *extractor = new CExtractor(this);
|
||||
extractor->SetInputFile(input_file);
|
||||
extractor->SetOutputFilesDirectory(output_files_directory);
|
||||
extractor->SetExtractExts(m_outputExts);
|
||||
extractor->SetFilesCount(files.size(), i + 1);
|
||||
extractor->DestroyOnFinish();
|
||||
m_currentProc++;
|
||||
|
||||
m_coresCS.Leave();
|
||||
|
||||
extractor->Start(0);
|
||||
}
|
||||
|
||||
// waiting all procs end
|
||||
while(!IsAllFree())
|
||||
NSThreads::Sleep(150);
|
||||
}
|
||||
|
||||
void Cx2tTester::WriteReportHeader()
|
||||
{
|
||||
CTemporaryCS CS(&m_reportCS);
|
||||
@ -844,7 +959,7 @@ DWORD CConverter::ThreadProc()
|
||||
for(int i = 0; i < m_outputExts.size(); i++)
|
||||
{
|
||||
std::wstring output_ext = L"."+ m_outputExts[i];
|
||||
int output_format = checker.GetFormatByExtension(output_ext);
|
||||
int output_format = m_checker.GetFormatByExtension(output_ext);
|
||||
|
||||
std::wstring xml_params_filename = input_filename + L"_" + output_ext + L".xml";
|
||||
std::wstring xml_params_file = m_outputFilesDirectory + FILE_SEPARATOR_STR + xml_params_filename;
|
||||
@ -1077,4 +1192,68 @@ DWORD CConverter::ThreadProc()
|
||||
return 0;
|
||||
}
|
||||
|
||||
CExtractor::CExtractor(Cx2tTester* internal) : m_internal(internal)
|
||||
{
|
||||
}
|
||||
CExtractor::~CExtractor()
|
||||
{
|
||||
Stop();
|
||||
}
|
||||
|
||||
void CExtractor::SetInputFile(const std::wstring& inputFile)
|
||||
{
|
||||
m_inputFile = inputFile;
|
||||
}
|
||||
void CExtractor::SetOutputFilesDirectory(const std::wstring& outputFilesDirectory)
|
||||
{
|
||||
m_outputFilesDirectory = outputFilesDirectory;
|
||||
}
|
||||
void CExtractor::SetExtractExts(const std::vector<std::wstring>& extractExts)
|
||||
{
|
||||
m_extractExts = extractExts;
|
||||
}
|
||||
void CExtractor::SetFilesCount(int totalFiles, int currFile)
|
||||
{
|
||||
m_totalFiles = totalFiles;
|
||||
m_currFile = currFile;
|
||||
}
|
||||
|
||||
DWORD CExtractor::ThreadProc()
|
||||
{
|
||||
std::wstring input_filename = NSFile::GetFileName(m_inputFile);
|
||||
std::wstring input_ext = L'.' + NSFile::GetFileExtention(input_filename);
|
||||
std::wstring input_filename_no_ext = input_filename.substr(0, input_filename.size() - input_ext.size());
|
||||
|
||||
for (size_t i = 0; i < m_extractExts.size(); i++)
|
||||
{
|
||||
const std::wstring& extract_ext = m_extractExts[i];
|
||||
std::wstring output_folder = m_outputFilesDirectory + FILE_SEPARATOR_STR + extract_ext;
|
||||
|
||||
if (NSDirectory::Exists(output_folder))
|
||||
NSDirectory::DeleteDirectory(output_folder);
|
||||
|
||||
NSDirectory::CreateDirectories(output_folder);
|
||||
|
||||
std::wstring temp_folder = NSDirectory::CreateDirectoryWithUniqueName(output_folder);
|
||||
m_utils.ExtractToDirectory(m_inputFile, temp_folder, nullptr, false);
|
||||
|
||||
auto unzip_files = NSDirectory::GetFiles(temp_folder, true);
|
||||
bool delete_empty = true;
|
||||
for (const auto& file : unzip_files)
|
||||
{
|
||||
if (NSFile::GetFileExtention(file) == m_extractExts[i])
|
||||
{
|
||||
delete_empty = false;
|
||||
NSFile::CFileBinary::Move(file, output_folder + FILE_SEPARATOR_STR +NSFile::GetFileName(file));
|
||||
}
|
||||
}
|
||||
if (delete_empty)
|
||||
NSDirectory::DeleteDirectory(output_folder);
|
||||
NSDirectory::DeleteDirectory(temp_folder);
|
||||
}
|
||||
if (NSDirectory::GetFilesCount(m_outputFilesDirectory, true) == 0)
|
||||
NSDirectory::DeleteDirectory(m_outputFilesDirectory);
|
||||
|
||||
m_internal->m_currentProc--;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -55,7 +55,10 @@ public:
|
||||
static CFormatsList GetDefaultExts();
|
||||
|
||||
// all writable exts
|
||||
static CFormatsList GetOutputExts();
|
||||
static CFormatsList GetOutputExts();
|
||||
|
||||
// default exts to extract
|
||||
static CFormatsList GetExtractExts();
|
||||
|
||||
private:
|
||||
std::vector<std::wstring> m_documents;
|
||||
@ -111,6 +114,7 @@ private:
|
||||
// parse string like "docx txt" into vector
|
||||
std::vector<std::wstring> ParseExtensionsString(std::wstring extensions, const CFormatsList& fl);
|
||||
void Convert(const std::vector<std::wstring>& files, bool bNoDirectory = false, bool bTrough = false);
|
||||
void Extract(const std::vector<std::wstring>& files);
|
||||
|
||||
// takes from config
|
||||
std::wstring m_reportFile;
|
||||
@ -121,6 +125,7 @@ private:
|
||||
std::wstring m_errorsXmlDirectory;
|
||||
std::wstring m_troughConversionDirectory;
|
||||
std::wstring m_fontsDirectory;
|
||||
std::wstring m_tempDirectory;
|
||||
|
||||
// fonts
|
||||
bool m_bIsUseSystemFonts;
|
||||
@ -137,6 +142,7 @@ private:
|
||||
// lists
|
||||
CFormatsList m_inputFormatsList;
|
||||
CFormatsList m_outputFormatsList;
|
||||
CFormatsList m_extractFormatsList;
|
||||
|
||||
bool m_bIsErrorsOnly;
|
||||
bool m_bIsTimestamp;
|
||||
@ -157,6 +163,12 @@ private:
|
||||
|
||||
std::vector<std::wstring> m_deleteLaterFiles;
|
||||
std::vector<std::wstring> m_deleteLaterDirectories;
|
||||
|
||||
// extract files with output_ext from input_files
|
||||
bool m_bExtract;
|
||||
|
||||
// convert to docx before extract
|
||||
bool m_bConvertBeforeExtract;
|
||||
};
|
||||
|
||||
// generates temp xml, convert, calls m_internal->writeReport
|
||||
@ -194,7 +206,7 @@ private:
|
||||
std::wstring m_inputExt;
|
||||
|
||||
std::wstring m_fontsDirectory;
|
||||
COfficeFileFormatChecker checker;
|
||||
COfficeFileFormatChecker m_checker;
|
||||
|
||||
std::wstring m_x2tPath;
|
||||
std::wstring m_errorsXmlDirectory;
|
||||
@ -214,4 +226,30 @@ private:
|
||||
unsigned long m_timeout;
|
||||
};
|
||||
|
||||
// extracts files from office files
|
||||
class CExtractor : public NSThreads::CBaseThread
|
||||
{
|
||||
public:
|
||||
CExtractor(Cx2tTester* internal);
|
||||
virtual ~CExtractor();
|
||||
|
||||
void SetInputFile(const std::wstring& inputFile);
|
||||
void SetOutputFilesDirectory(const std::wstring& outputFilesDirectory);
|
||||
void SetExtractExts(const std::vector<std::wstring>& extractExts);
|
||||
void SetFilesCount(int totalFiles, int currFile);
|
||||
|
||||
virtual DWORD ThreadProc();
|
||||
|
||||
private:
|
||||
Cx2tTester* m_internal;
|
||||
std::wstring m_inputFile;
|
||||
std::wstring m_outputFilesDirectory;
|
||||
std::vector<std::wstring> m_extractExts;
|
||||
COfficeUtils m_utils;
|
||||
|
||||
int m_totalFiles;
|
||||
int m_currFile;
|
||||
|
||||
};
|
||||
|
||||
#endif // X2T_TESTER_H
|
||||
|
||||
Reference in New Issue
Block a user