diff --git a/Common/3dParty/hyphen/hyphen_test/main.cpp b/Common/3dParty/hyphen/hyphen_test/main.cpp index c98c37472d..b66767e39f 100644 --- a/Common/3dParty/hyphen/hyphen_test/main.cpp +++ b/Common/3dParty/hyphen/hyphen_test/main.cpp @@ -1,110 +1,149 @@ -#include -#include +#include +#include -#include "./../js/src/ExportedFunctions.h" +#include "../../../../DesktopEditor/common/Directory.h" +#include "../../../../DesktopEditor/fontengine/TextHyphen.h" +#include "../../../../DesktopEditor/common/StringUTF32.h" + +#define USE_DICTIONARIES_FROM_MEMORY + +std::vector ReadWords(const std::wstring& file) +{ + std::vector words; + + NSFile::CFileBinary oFile; + if (!oFile.OpenFile(file)) + return words; + + int nInputDataLen = (int)oFile.GetFileSize(); + char* pInputData = new char[nInputDataLen]; + + oFile.ReadFile((BYTE*)pInputData, (DWORD)nInputDataLen); + + int nPos = 0; + if (nInputDataLen > 3 && + ((BYTE)pInputData[0] == 0xEF) && ((BYTE)pInputData[1] == 0xBB) && ((BYTE)pInputData[2]) == 0xBF) + { + nPos = 3; + } + + while (nPos < nInputDataLen) + { + while (nPos < nInputDataLen && (pInputData[nPos] == '\n' || pInputData[nPos] == '\r')) + ++nPos; + + int nStart = nPos; + + while (nPos < nInputDataLen && (pInputData[nPos] != '\n' && pInputData[nPos] != '\r')) + ++nPos; + + if (nPos > nStart) + { + std::string s(pInputData + nStart, nPos - nStart); + words.push_back(UTF8_TO_U(s)); + } + } + + oFile.CloseFile(); + RELEASEARRAYOBJECTS(pInputData); + return words; +} + +std::wstring HyphenWord(NSHyphen::CEngine& engine, const int& lang, const std::wstring& word) +{ + std::string worda = U_TO_UTF8(word); + char* result = engine.Process(lang, worda.c_str(), (int)worda.length()); + + NSStringUtils::CStringUTF32 oInput = word; + NSStringUtils::CStringUTF32 oOutput; + + int nCurrentIndex = 0; + int nPosHyphen = 0; + while (result[nPosHyphen] != 0) + { + if (1 == (result[nPosHyphen] & 1)) + { + int nLenChunk = nPosHyphen - nCurrentIndex + 1; + oOutput += oInput.substr(nCurrentIndex, nLenChunk); + oOutput += '='; + nCurrentIndex += nLenChunk; + } + ++nPosHyphen; + } + + if (nCurrentIndex < oInput.length()) + oOutput += oInput.substr(nCurrentIndex, oInput.length() - nCurrentIndex); + + return oOutput.ToStdWString(); +} + +int GetLanguage(const std::wstring& sLanguage) +{ + std::string sLang = U_TO_UTF8(sLanguage); + int nLang = 0; + for (int j = 0; j < NSTextLanguages::DictionaryRec_count; ++j) + { + if (std::string(NSTextLanguages::Dictionaries[j].m_name) == sLang) + { + nLang = NSTextLanguages::Dictionaries[j].m_lang; + break; + } + } + return nLang; +} int main(int argc, char *argv[]) { - HyphenDict *dict; - - std::string dict_filename = PRO_DIR; - std::string words_filename = PRO_DIR; - std::string result_filename = PRO_DIR; - std::string dict_name = "en_US"; - - // set your filenames here - dict_filename += ("../../../../../dictionaries/" + dict_name + "/hyph_" + dict_name + ".dic"); - words_filename += "words.txt"; - result_filename += "result.txt"; - - // load the hyphenation dictionary - dict = hnj_hyphen_load(dict_filename.c_str()); - - std::ifstream fin(words_filename); - if(!fin.is_open()) - { - std::cerr << "could not open " << words_filename << "!" << std::endl; - return -1; - } - - std::ofstream fout(result_filename); - if(!fout.is_open()) - { - std::cerr << "could not open " << result_filename << "!" << std::endl; - return -1; - } - - while(!fin.eof()) - { - char **rep = NULL; - int *pos = NULL; - int *cut = NULL; - - std::string word; - - fin >> word; - int n = word.size(); - char *hword = new char[n * 2]; - char *hyphens = new char[n + 5]; - - /** - * @brief - * input data: - * - * word: input word - * word_size: byte length of the input word - * hyphens: allocated character buffer (size = word_size + 5) - * hyphenated_word: allocated character buffer (size ~ word_size * 2) or NULL - * rep, pos, cut: pointers (point to the allocated and _zeroed_ buffers - * (size=word_size) or with NULL value) or NULL - * - * output data: - * - * hyphens: hyphenation vector (hyphenation points signed with odd numbers). - * hyphenated_word: hyphenated input word (hyphens signed with `='). - * optional (NULL input). - * rep: NULL (only standard hyph.), or replacements (hyphenation points - * signed with `=' in replacements). - * pos: NULL, or difference of the actual position and the beginning - * positions of the change in input words. - * cut: NULL, or counts of the removed characters of the original words - * at hyphenation. - * - * Note: rep, pos, cut are complementary arrays to the hyphens, indexed with the - * character positions of the input word. - */ - hnj_hyphen_hyphenate2(dict, word.c_str(), n, hyphens, hword, &rep, &pos, &cut); - - fout << hword << ' '; - - delete[] hword; - delete[] hyphens; - } - fin.close(); - fout.close(); - -#if 1 - - CHyphenApplication* pApplication = hyphenCreateApplication(); - - FILE* fDictionary = fopen(dict_filename.c_str(), "rb"); - fseek(fDictionary, 0, SEEK_END); - long lDictSize = ftell(fDictionary); - fseek(fDictionary, 0, SEEK_SET); /* same as rewind(f); */ - - char* pDictData = (char*)malloc(lDictSize); - fread(pDictData, (size_t)lDictSize, 1, fDictionary); - fclose(fDictionary); - - int nResult = hyphenLoadDictionary(pApplication, pDictData, (unsigned int)lDictSize, dict_name.c_str()); - - free(pDictData); - - char* pHyphenVector = hyphenWord(pApplication, "expedition", dict_name.c_str()); - - hyphenDestroyApplication(pApplication); + NSHyphen::CEngine engine; + std::wstring dictionaries_dir = NSFile::GetProcessDirectory() + L"/../../../../../../dictionaries"; + engine.Init(dictionaries_dir); +#if 0 + std::wstring sOneWord = HyphenWord(engine, 1033, L"expedition"); #endif + + std::wstring input_dir = NSFile::GetProcessDirectory() + L"/input"; + std::wstring output_dir = NSFile::GetProcessDirectory() + L"/output"; + std::vector langs = NSDirectory::GetFiles(input_dir); + + for (std::wstring& lang : langs) + { + std::wstring sLang = NSFile::GetFileName(lang); + int nLang = GetLanguage(sLang); + if (nLang == 0) + continue; + + std::vector words = ReadWords(lang); + if (words.empty()) + continue; + +#ifdef USE_DICTIONARIES_FROM_MEMORY + BYTE* pData = NULL; + DWORD dwDataLen = 0; + std::wstring sFileDict = dictionaries_dir + L"/" + sLang + L"/hyph_" + sLang + L".dic"; + if (NSFile::CFileBinary::ReadAllBytes(sFileDict, &pData, dwDataLen)) + { + engine.LoadDictionary(nLang, pData, (unsigned int)dwDataLen); + RELEASEARRAYOBJECTS(pData); + } +#endif + + std::wstring sResult; + for (std::wstring& word : words) + { + sResult += HyphenWord(engine, nLang, word); + sResult += L"\r\n"; + } + + std::wstring sOutputFile = output_dir + L"/" + sLang; + + if (NSFile::CFileBinary::Exists(sOutputFile)) + NSFile::CFileBinary::Remove(sOutputFile); + + NSFile::CFileBinary::SaveToFile(sOutputFile, sResult, true); + } + + return 0; } diff --git a/Common/3dParty/hyphen/hyphen_test/test.pro b/Common/3dParty/hyphen/hyphen_test/test.pro index 0cc0f126ba..564cce9b83 100644 --- a/Common/3dParty/hyphen/hyphen_test/test.pro +++ b/Common/3dParty/hyphen/hyphen_test/test.pro @@ -11,20 +11,7 @@ CORE_ROOT_DIR = $$PWD/../../../../../core PWD_ROOT_DIR = $$PWD include($$CORE_ROOT_DIR/Common/base.pri) - -INCLUDEPATH += $$PWD_ROOT_DIR/../hyphen - -DEFINES += PRO_DIR=\\\"$$PWD/\\\" - -HEADERS += $$PWD_ROOT_DIR/../hyphen/hyphen.h -HEADERS += $$PWD_ROOT_DIR/../hyphen/hnjalloc.h - -#SOURCES += $$PWD_ROOT_DIR/../hyphen/hyphen.c -SOURCES += $$PWD_ROOT_DIR/../hyphen/hnjalloc.c - -SOURCES += \ - ../js/src/ExportedFunctions.cpp \ - ../js/src/HyphenApplication.cpp +ADD_DEPENDENCY(UnicodeConverter, kernel, graphics) SOURCES += main.cpp diff --git a/DesktopEditor/doctrenderer/embed/TextMeasurerEmbed.cpp b/DesktopEditor/doctrenderer/embed/TextMeasurerEmbed.cpp index 8a3674a235..269a6b9e3b 100644 --- a/DesktopEditor/doctrenderer/embed/TextMeasurerEmbed.cpp +++ b/DesktopEditor/doctrenderer/embed/TextMeasurerEmbed.cpp @@ -237,37 +237,27 @@ JSSmart CTextMeasurerEmbed::Hyphen_Word(JSSmart lang, JSSmar if (!result) return CJSContext::createNull(); - int nCount = 0; - char* tmp = result; - - while (*tmp != 0) - { - if (1 == (*tmp & 1)) - ++nCount; - ++tmp; - } - - if (0 == nCount) - return CJSContext::createNull(); - - CJSArray* ret = CJSContext::createArray(nCount); - nCount = 0; + int count = 0; int pos = 0; - int posUnicode = 0; - int posUtf8 = 0; - while (result[pos] != 0) { if (1 == (result[pos] & 1)) - { - while (posUtf8 < (pos + 1)) - { - ++posUnicode; - posUtf8 += GetUtf8SymbolLen(curUnicode[posUtf8]); - } - ret->set(nCount++, CJSContext::createInt(posUnicode)); - } + ++count; + ++pos; + } + + if (0 == count) + return CJSContext::createNull(); + + CJSArray* ret = CJSContext::createArray(count); + + pos = 0; + count = 0; + while (result[pos] != 0) + { + if (1 == (result[pos] & 1)) + ret->set(count++, CJSContext::createInt(pos + 1)); pos++; } diff --git a/DesktopEditor/fontengine/TextHyphen.cpp b/DesktopEditor/fontengine/TextHyphen.cpp index eb53a72e6b..8e4c91c02e 100644 --- a/DesktopEditor/fontengine/TextHyphen.cpp +++ b/DesktopEditor/fontengine/TextHyphen.cpp @@ -85,7 +85,7 @@ HyphenDict* hnj_hyphen_load_stream(std::istream &in) for (i = 0; i < MAX_NAME; i++) dict[k]->cset[i]= 0; - if (in >> buf) + if (in >> dict[k]->cset) { for (i = 0; i < MAX_NAME; i++) if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) diff --git a/DesktopEditor/fontengine/js/engine/module_js.js b/DesktopEditor/fontengine/js/engine/module_js.js index 40aba938da..1745f2118d 100644 --- a/DesktopEditor/fontengine/js/engine/module_js.js +++ b/DesktopEditor/fontengine/js/engine/module_js.js @@ -571,52 +571,22 @@ AscFonts.Hyphen_LoadDictionary = function(lang, data) return (result === 0) ? true : false; }; -function GetUtf8SymbolLen(c) -{ - if (0x00 == (c & 0x80)) - return 1; - else if (0x00 == (c & 0x20)) - return 2; - else if (0x00 == (c & 0x10)) - return 3; - else if (0x00 == (c & 0x0F)) - return 4; - else if (0x00 == (c & 0x08)) - return 4; - else if (0x00 == (c & 0x04)) - return 5; - return 6; -} - AscFonts.Hyphen_Word = function(lang, word) { - let wordPointer = word.toUtf8Pointer(); + let wordPointer = word.toUtf8Pointer(true); let wordLen = wordPointer.length; let hyphens = []; if (wordPointer) { - const ptr = Module._hyphenWord(hyphenApplication, lang, wordPointer.ptr, wordLen); - - let curUnicode = new Uint8ClampedArray(Module["HEAP8"].buffer, wordPointer.ptr, wordLen); - let posUnicode = 0; - let posUtf8 = 0; - + let ptr = Module._hyphenWord(hyphenApplication, lang, wordPointer.ptr, wordLen); let vector = new Uint8ClampedArray(Module["HEAP8"].buffer, ptr, wordLen + 5); let pos = 0; while (vector[pos] != 0) { if (1 === (vector[pos] & 1)) - { - while (posUtf8 < (pos + 1)) - { - ++posUnicode; - posUtf8 += GetUtf8SymbolLen(curUnicode[posUtf8]); - } - hyphens.push(posUnicode); - } - + hyphens.push(pos+1); pos++; } diff --git a/DesktopEditor/fontengine/js/libfont.json b/DesktopEditor/fontengine/js/libfont.json index efc85d4831..200fbfdd26 100644 --- a/DesktopEditor/fontengine/js/libfont.json +++ b/DesktopEditor/fontengine/js/libfont.json @@ -273,7 +273,14 @@ }, { "folder": "../../raster/Metafile/", - "files": ["MetaFile.cpp", "Common/MetaFileTypes.cpp", "Common/MetaFileUtils.cpp", "Common/CPathConverter.cpp"] + "files": [ + "MetaFile.cpp", + "Common/MetaFileTypes.cpp", + "Common/MetaFileUtils.cpp", + "Common/CPathConverter.cpp", + "Common/CPath.cpp", + "Common/CClip.cpp" + ] }, { "folder": "../../raster/Metafile/Emf/", @@ -286,7 +293,7 @@ "EmfParser/CEmfParserBase.cpp", "EmfParser/CEmfPlusParser.cpp", "EmfObjects.cpp", - "EmfPath.cpp", + "EmfPlusObjects.cpp", "EmfPlayer.cpp", "EmfFile.cpp" ]