Add module for hyphenation

This commit is contained in:
Oleg Korshul
2023-08-23 22:11:00 +03:00
parent 1a95501cbb
commit b88b7db389
10 changed files with 372 additions and 29 deletions

View File

@ -54,6 +54,10 @@ WASM_EXPORT int hyphenLoadDictionary(NSHyphen::CEngine* app, const int lang, con
{
return app->LoadDictionary(lang, (const unsigned char*)dict, dict_size);
}
WASM_EXPORT int hyphenCheckDictionary(NSHyphen::CEngine* app, const int lang)
{
return app->IsDictionaryExist(lang) ? 1 : 0;
}
WASM_EXPORT char* hyphenWord(NSHyphen::CEngine* app, const int lang, const char *word, const int word_len)
{
return app->Process(lang, word, word_len);

View File

@ -85,11 +85,12 @@
request.onload = function ()
{
var dict = request.response;
window.hyphen.loadDictionary(lang, dict);
var langInt = langs[lang];
window.hyphen.loadDictionary(langInt, dict);
for (var i = 0; i < text.length; i++)
{
var hyphens = window.hyphen.hyphenWord(lang, text[i].toLowerCase());
var hyphens = window.hyphen.hyphenWord(langInt, text[i].toLowerCase());
let itemUtf8 = text[i].toUtf8(true);
let start = 0;

View File

@ -1,6 +1,7 @@
#include "./TextMeasurerEmbed.h"
#include "./PointerEmbed.h"
#include "./../../fontengine/TextShaper.h"
#include "./../../fontengine/TextHyphen.h"
#define RAW_POINTER(value) ((CPointerEmbedObject*)value->toObject()->getNative())->Data
#define POINTER_OBJECT(value) ((CPointerEmbedObject*)value->toObject()->getNative())
@ -29,6 +30,17 @@ public:
}
};
CTextMeasurerEmbed::CTextMeasurerEmbed()
{
m_hyphen_engine = new NSHyphen::CEngine();
}
CTextMeasurerEmbed::~CTextMeasurerEmbed()
{
NSHyphen::CEngine* tmp = (NSHyphen::CEngine*)m_hyphen_engine;
delete tmp;
m_hyphen_engine = NULL;
}
JSSmart<CJSValue> CTextMeasurerEmbed::FT_Malloc(JSSmart<CJSValue> typed_array_or_len)
{
void* pData = NULL;
@ -192,3 +204,77 @@ JSSmart<CJSValue> CTextMeasurerEmbed::HB_FontFree(JSSmart<CJSValue> font)
return CJSContext::createUndefined();
}
#endif
JSSmart<CJSValue> CTextMeasurerEmbed::Hyphen_SetCacheSize(JSSmart<CJSValue> size)
{
((NSHyphen::CEngine*)m_hyphen_engine)->SetCacheSize(size->toInt32());
return CJSContext::createUndefined();
}
inline int GetUtf8SymbolLen(const unsigned char& c)
{
if (0x00 == (c & 0x80))
return 1;
else if (0x00 == (c & 0x20))
return 2;
else if (0x00 == (c & 0x10))
return 3;
else if (0x00 == (c & 0x0F))
return 4;
else if (0x00 == (c & 0x08))
return 4;
else if (0x00 == (c & 0x04))
return 5;
return 6;
}
JSSmart<CJSValue> CTextMeasurerEmbed::Hyphen_Word(JSSmart<CJSValue> lang, JSSmart<CJSValue> word)
{
std::string sWord = word->toStringA();
const char* curUnicode = sWord.c_str();
char* result = ((NSHyphen::CEngine*)m_hyphen_engine)->Process(lang->toInt32(), curUnicode, (int)sWord.length());
if (!result)
return CJSContext::createNull();
int nCount = 0;
char* tmp = result;
while (*tmp != 0)
{
if (1 == (*tmp & 1))
++nCount;
++tmp;
}
if (0 == nCount)
return CJSContext::createNull();
CJSArray* ret = CJSContext::createArray(nCount);
nCount = 0;
int pos = 0;
int posUnicode = 0;
int posUtf8 = 0;
while (result[pos] != 0)
{
if (1 == (result[pos] & 1))
{
while (posUtf8 <= pos)
{
++posUnicode;
posUtf8 += GetUtf8SymbolLen(curUnicode[posUtf8]);
}
ret->set(nCount++, CJSContext::createInt(posUnicode));
}
pos++;
}
return ret;
}
JSSmart<CJSValue> CTextMeasurerEmbed::Hyphen_IsDictionaryExist(JSSmart<CJSValue> lang)
{
return CJSContext::createBool(((NSHyphen::CEngine*)m_hyphen_engine)->IsDictionaryExist(lang->toInt32()));
}

View File

@ -10,13 +10,12 @@
using namespace NSJSBase;
class JS_DECL CTextMeasurerEmbed : public CJSEmbedObject
{
private:
void* m_hyphen_engine;
public:
CTextMeasurerEmbed()
{
}
~CTextMeasurerEmbed()
{
}
CTextMeasurerEmbed();
~CTextMeasurerEmbed();
public:
JSSmart<CJSValue> FT_Malloc(JSSmart<CJSValue> typed_array_or_len);
@ -50,6 +49,10 @@ public:
JSSmart<CJSValue> HB_FontFree(JSSmart<CJSValue> font);
#endif
JSSmart<CJSValue> Hyphen_SetCacheSize(JSSmart<CJSValue> size);
JSSmart<CJSValue> Hyphen_Word(JSSmart<CJSValue> lang, JSSmart<CJSValue> word);
JSSmart<CJSValue> Hyphen_IsDictionaryExist(JSSmart<CJSValue> lang);
DECLARE_EMBED_METHODS
};

View File

@ -285,29 +285,18 @@ namespace NSHyphen
m_pLastDict = NULL;
#ifndef HYPHEN_ENGINE_DISABLE_FILESYSTEM
std::wstring sFilePath = GetDictionaryPath(m_nLastLang);
if (m_sDirectory.empty())
m_sDirectory = NSFile::GetProcessDirectory() + L"/dictionaries";
for (int i = 0; i < NSTextLanguages::DictionaryRec_count; ++i)
if (NSFile::CFileBinary::Exists(sFilePath))
{
if (m_nLastLang == NSTextLanguages::Dictionaries[i].m_lang)
{
const char* sNameStr = NSTextLanguages::Dictionaries[i].m_name;
std::wstring sNameU = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8((BYTE*)sNameStr, (LONG)(strlen(sNameStr)));
std::wstring sFilePath = m_sDirectory + L"/" + sNameU + L"/hyph_" + sNameU + L".dic";
FILE* f = NSFile::CFileBinary::OpenFileNative(sFilePath, L"r");
if (f == NULL)
return 1;
if (NSFile::CFileBinary::Exists(sFilePath))
{
FILE* f = NSFile::CFileBinary::OpenFileNative(sFilePath, L"r");
if (f == NULL)
return 1;
m_pLastDict = hnj_hyphen_load_file(f);
fclose(f);
}
break;
}
m_pLastDict = hnj_hyphen_load_file(f);
fclose(f);
}
#endif
@ -328,6 +317,26 @@ namespace NSHyphen
return (NULL == m_pLastDict) ? 1 : 0;
}
#ifndef HYPHEN_ENGINE_DISABLE_FILESYSTEM
std::wstring GetDictionaryPath(const int& lang)
{
if (m_sDirectory.empty())
m_sDirectory = NSFile::GetProcessDirectory() + L"/dictionaries";
for (int i = 0; i < NSTextLanguages::DictionaryRec_count; ++i)
{
if (m_nLastLang == NSTextLanguages::Dictionaries[i].m_lang)
{
const char* sNameStr = NSTextLanguages::Dictionaries[i].m_name;
std::wstring sNameU = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8((BYTE*)sNameStr, (LONG)(strlen(sNameStr)));
std::wstring sFilePath = m_sDirectory + L"/" + sNameU + L"/hyph_" + sNameU + L".dic";
return sFilePath;
}
}
return L"";
}
#endif
char* Process(const int& lang, const char* word, const int& len)
{
// resize 2x
@ -416,4 +425,13 @@ namespace NSHyphen
{
return m_internal->Process(lang, word, (len == -1) ? strlen(word) : len);
}
bool CEngine::IsDictionaryExist(const int& lang)
{
for (int i = 0; i < NSTextLanguages::DictionaryRec_count; ++i)
{
if (lang == NSTextLanguages::Dictionaries[i].m_lang)
return true;
}
return false;
}
}

View File

@ -52,6 +52,7 @@ namespace NSHyphen
void SetCacheSize(const int& size);
int LoadDictionary(const int& lang);
int LoadDictionary(const int& lang, const unsigned char* data, const unsigned int& data_len);
bool IsDictionaryExist(const int& lang);
char* Process(const int& lang, const char* word, const int& len = -1);

View File

@ -193,6 +193,86 @@ AscFonts.HB_ShapeText = function(fontFile, text, features, script, direction, la
return g_return_obj_count;
};
var hyphenApplication = 0;
AscFonts.Hyphen_Init = function()
{
hyphenApplication = Module["_hyphenCreateApplication"]();
};
AscFonts.Hyphen_Destroy = function()
{
Module["_hyphenDestroyApplication"](hyphenApplication);
};
AscFonts.Hyphen_CheckDictionary = function(lang)
{
return false;
};
AscFonts.Hyphen_LoadDictionary = function(lang, data)
{
let dictSize = data.byteLength;
let dictPointer = Module["_malloc"](dictSize);
Module["HEAP8"].set(new Uint8ClampedArray(data), dictPointer);
let result = Module["_hyphenLoadDictionary"](hyphenApplication, lang, dictPointer, dictSize);
Module["_free"](dictPointer);
return (result === 0) ? true : false;
};
function GetUtf8SymbolLen(c)
{
if (0x00 == (c & 0x80))
return 1;
else if (0x00 == (c & 0x20))
return 2;
else if (0x00 == (c & 0x10))
return 3;
else if (0x00 == (c & 0x0F))
return 4;
else if (0x00 == (c & 0x08))
return 4;
else if (0x00 == (c & 0x04))
return 5;
return 6;
}
AscFonts.Hyphen_Word = function(lang, word)
{
let wordPointer = word.toUtf8Pointer();
let wordLen = wordPointer.length;
let hyphens = [];
if (wordPointer)
{
const ptr = Module._hyphenWord(hyphenApplication, lang, wordPointer.ptr, wordLen);
let curUnicode = new Uint8ClampedArray(Module["HEAP8"].buffer, wordPointer.ptr, wordLen);
let posUnicode = 0;
let posUtf8 = 0;
let vector = new Uint8ClampedArray(Module["HEAP8"].buffer, ptr, wordLen + 5);
let pos = 0;
while (vector[pos] != 0)
{
if (1 === (vector[pos] & 1))
{
while (posUtf8 < pos)
{
++posUnicode;
posUtf8 += GetUtf8SymbolLen(curUnicode[posUtf8]);
}
hyphens.push(posUnicode);
}
pos++;
}
wordPointer.free();
}
return hyphens;
};
AscFonts.onLoadModule();
})(window, undefined);

View File

@ -35,7 +35,7 @@
window['AscFonts'] = window['AscFonts'] || {};
var AscFonts = window['AscFonts'];
var g_native_engine = CreateNativeTextMeasurer();
var g_native_engine = CreateEmbedObject("CTextMeasurerEmbed");
function CReturnObject()
{
@ -206,6 +206,28 @@ AscFonts.HB_ShapeText = function(fontFile, text, features, script, direction, la
return g_return_obj_count;
};
AscFonts.Hyphen_Init = function()
{
// none
};
AscFonts.Hyphen_Destroy = function()
{
// GC
};
AscFonts.Hyphen_CheckDictionary = function(lang)
{
return g_native_engine["Hyphen_IsDictionaryExist"](lang);
};
AscFonts.Hyphen_LoadDictionary = function(lang, data)
{
return false;
};
AscFonts.Hyphen_Word = function(lang, word)
{
let ret = g_native_engine["Hyphen_Word"](lang, word);
return ret ? ret : [];
};
AscFonts.onLoadModule();
AscFonts.onLoadModule();

View File

@ -44,7 +44,13 @@
"_ASC_HB_LanguageFromString",
"_ASC_HB_ShapeText",
"_ASC_HB_FontFree"
"_ASC_HB_FontFree",
"_hyphenCreateApplication",
"_hyphenDestroyApplication",
"_hyphenLoadDictionary",
"_hyphenCheckDictionary",
"_hyphenWord"
],
"include_path": [
"./../../graphics/pro/js/freetype-2.10.4/include",
@ -55,7 +61,7 @@
"./../../../OfficeUtils/src",
"./../../../OfficeUtils/src/zlib-1.2.11"
],
"define": ["__linux__", "_LINUX", "UNIX", "NDEBUG", "_LIB", "_CRT_SECURE_NO_WARNINGS", "FT2_BUILD_LIBRARY", "HAVE_FREETYPE", "FT_CONFIG_OPTION_SYSTEM_ZLIB"],
"define": ["__linux__", "_LINUX", "UNIX", "NDEBUG", "_LIB", "_CRT_SECURE_NO_WARNINGS", "FT2_BUILD_LIBRARY", "HAVE_FREETYPE", "FT_CONFIG_OPTION_SYSTEM_ZLIB", "GRAPHICS_NO_USE_DYNAMIC_LIBRARY", "HYPHEN_ENGINE_DISABLE_FILESYSTEM"],
"compile_files_array": [
{
"name": "f",
@ -186,6 +192,16 @@
"name": "w",
"folder": "./../../graphics/pro/js/wasm/src/lib",
"files": ["wasm_jmp.cpp"]
},
{
"name": "h_e",
"folder": "./..",
"files": ["TextHyphen.cpp"]
},
{
"name": "h_m",
"folder": "./../../../Common/3dParty/hyphen/js/src",
"files": ["hyphen.cpp"]
}
]
}

View File

@ -73,6 +73,13 @@ function onLoadFontsModule(window, undefined)
AscFonts.HB_FontFree = AscFonts["HB_FontFree"];
AscFonts.HB_ShapeText = AscFonts["HB_ShapeText"];
AscFonts["Hyphen_Init"]();
AscFonts.Hyphen_Destroy = AscFonts["Hyphen_Destroy"];
AscFonts.Hyphen_LoadDictionary = AscFonts["Hyphen_LoadDictionary"];
AscFonts.Hyphen_CheckDictionary = AscFonts["Hyphen_CheckDictionary"];
AscFonts.Hyphen_Word = AscFonts["Hyphen_Word"];
AscFonts.CreateNativeStreamByIndex = function(stream_index)
{
let stream = AscFonts.g_fonts_streams[stream_index];
@ -664,4 +671,109 @@ function onLoadFontsModule(window, undefined)
retObj["free"]();
return glyphs;
};
function Hyphenation()
{
this._value = "";
this._lang = 0;
this._dictionaries = {};
this._mapToNames = null;
this.addCodePoint = function(codePoint)
{
this._value += String.fromCodePoint(codePoint);
};
this.clear = function()
{
this._value = "";
};
this.setLang = function(langCode)
{
this._lang = langCode;
let _langKey = "" + langCode;
if (this._dictionaries[_langKey] !== undefined)
return this._dictionaries[_langKey];
if (window["NATIVE_EDITOR_ENJINE"])
{
this._dictionaries[_langKey] = AscFonts.Hyphen_CheckDictionary(this._lang);
return this._dictionaries[_langKey];
}
return false;
};
this.hyphenate = function()
{
if ("" === this._value)
return [];
return AscFonts.Hyphen_Word(this._lang, this._value);
};
this.loadDictionary = function(lang, callback)
{
if (window["NATIVE_EDITOR_ENJINE"])
{
callback();
return;
}
if (!this._mapToNames)
this._mapToNames = AscCommon.spellcheckGetLanguages();
let _langKey = "" + lang;
let _langName = this._mapToNames[_langKey];
if (_langName === undefined)
{
this._dictionaries[_langKey] = false;
callback();
return;
}
this._loadDictionaryAttemt(_langKey, _langName, callback);
};
this._loadDictionaryAttemt = function(langKey, langName, callback, currentAttempt)
{
var xhr = new XMLHttpRequest();
let urlDictionaries = "../../../../dictionaries/";
let url = urlDictionaries + langName + "/hyph_" + langName + ".dic";
xhr.open('GET', url, true);
xhr.responseType = 'arraybuffer';
xhr.currentAttempt = currentAttempt || 0;
if (xhr.overrideMimeType)
xhr.overrideMimeType('text/plain; charset=x-user-defined');
else
xhr.setRequestHeader('Accept-Charset', 'x-user-defined');
var _t = this;
xhr.onload = function()
{
if (this.status === 200 || location.href.indexOf("file:") === 0)
{
_t._dictionaries[langKey] = true;
AscFonts.Hyphen_LoadDictionary(parseInt(langKey), this.response);
callback();
}
};
xhr.onerror = function()
{
let _currentAttempt = xhr.currentAttempt + 1;
if (_currentAttempt > 3)
{
_t._dictionaries[langKey] = false;
callback();
return;
}
_t._loadDictionaryAttemt(langKey, langName, callback, _currentAttempt);
};
xhr.send(null);
};
}
window["AscHyphenation"] = new Hyphenation();
}