mirror of
https://github.com/ONLYOFFICE/core.git
synced 2026-02-10 18:05:41 +08:00
Add module for hyphenation
This commit is contained in:
@ -54,6 +54,10 @@ WASM_EXPORT int hyphenLoadDictionary(NSHyphen::CEngine* app, const int lang, con
|
||||
{
|
||||
return app->LoadDictionary(lang, (const unsigned char*)dict, dict_size);
|
||||
}
|
||||
WASM_EXPORT int hyphenCheckDictionary(NSHyphen::CEngine* app, const int lang)
|
||||
{
|
||||
return app->IsDictionaryExist(lang) ? 1 : 0;
|
||||
}
|
||||
WASM_EXPORT char* hyphenWord(NSHyphen::CEngine* app, const int lang, const char *word, const int word_len)
|
||||
{
|
||||
return app->Process(lang, word, word_len);
|
||||
|
||||
@ -85,11 +85,12 @@
|
||||
request.onload = function ()
|
||||
{
|
||||
var dict = request.response;
|
||||
window.hyphen.loadDictionary(lang, dict);
|
||||
var langInt = langs[lang];
|
||||
window.hyphen.loadDictionary(langInt, dict);
|
||||
|
||||
for (var i = 0; i < text.length; i++)
|
||||
{
|
||||
var hyphens = window.hyphen.hyphenWord(lang, text[i].toLowerCase());
|
||||
var hyphens = window.hyphen.hyphenWord(langInt, text[i].toLowerCase());
|
||||
|
||||
let itemUtf8 = text[i].toUtf8(true);
|
||||
let start = 0;
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
#include "./TextMeasurerEmbed.h"
|
||||
#include "./PointerEmbed.h"
|
||||
#include "./../../fontengine/TextShaper.h"
|
||||
#include "./../../fontengine/TextHyphen.h"
|
||||
|
||||
#define RAW_POINTER(value) ((CPointerEmbedObject*)value->toObject()->getNative())->Data
|
||||
#define POINTER_OBJECT(value) ((CPointerEmbedObject*)value->toObject()->getNative())
|
||||
@ -29,6 +30,17 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
CTextMeasurerEmbed::CTextMeasurerEmbed()
|
||||
{
|
||||
m_hyphen_engine = new NSHyphen::CEngine();
|
||||
}
|
||||
CTextMeasurerEmbed::~CTextMeasurerEmbed()
|
||||
{
|
||||
NSHyphen::CEngine* tmp = (NSHyphen::CEngine*)m_hyphen_engine;
|
||||
delete tmp;
|
||||
m_hyphen_engine = NULL;
|
||||
}
|
||||
|
||||
JSSmart<CJSValue> CTextMeasurerEmbed::FT_Malloc(JSSmart<CJSValue> typed_array_or_len)
|
||||
{
|
||||
void* pData = NULL;
|
||||
@ -192,3 +204,77 @@ JSSmart<CJSValue> CTextMeasurerEmbed::HB_FontFree(JSSmart<CJSValue> font)
|
||||
return CJSContext::createUndefined();
|
||||
}
|
||||
#endif
|
||||
|
||||
JSSmart<CJSValue> CTextMeasurerEmbed::Hyphen_SetCacheSize(JSSmart<CJSValue> size)
|
||||
{
|
||||
((NSHyphen::CEngine*)m_hyphen_engine)->SetCacheSize(size->toInt32());
|
||||
return CJSContext::createUndefined();
|
||||
}
|
||||
|
||||
inline int GetUtf8SymbolLen(const unsigned char& c)
|
||||
{
|
||||
if (0x00 == (c & 0x80))
|
||||
return 1;
|
||||
else if (0x00 == (c & 0x20))
|
||||
return 2;
|
||||
else if (0x00 == (c & 0x10))
|
||||
return 3;
|
||||
else if (0x00 == (c & 0x0F))
|
||||
return 4;
|
||||
else if (0x00 == (c & 0x08))
|
||||
return 4;
|
||||
else if (0x00 == (c & 0x04))
|
||||
return 5;
|
||||
return 6;
|
||||
}
|
||||
|
||||
JSSmart<CJSValue> CTextMeasurerEmbed::Hyphen_Word(JSSmart<CJSValue> lang, JSSmart<CJSValue> word)
|
||||
{
|
||||
std::string sWord = word->toStringA();
|
||||
const char* curUnicode = sWord.c_str();
|
||||
char* result = ((NSHyphen::CEngine*)m_hyphen_engine)->Process(lang->toInt32(), curUnicode, (int)sWord.length());
|
||||
|
||||
if (!result)
|
||||
return CJSContext::createNull();
|
||||
|
||||
int nCount = 0;
|
||||
char* tmp = result;
|
||||
|
||||
while (*tmp != 0)
|
||||
{
|
||||
if (1 == (*tmp & 1))
|
||||
++nCount;
|
||||
++tmp;
|
||||
}
|
||||
|
||||
if (0 == nCount)
|
||||
return CJSContext::createNull();
|
||||
|
||||
CJSArray* ret = CJSContext::createArray(nCount);
|
||||
nCount = 0;
|
||||
int pos = 0;
|
||||
|
||||
int posUnicode = 0;
|
||||
int posUtf8 = 0;
|
||||
|
||||
while (result[pos] != 0)
|
||||
{
|
||||
if (1 == (result[pos] & 1))
|
||||
{
|
||||
while (posUtf8 <= pos)
|
||||
{
|
||||
++posUnicode;
|
||||
posUtf8 += GetUtf8SymbolLen(curUnicode[posUtf8]);
|
||||
}
|
||||
ret->set(nCount++, CJSContext::createInt(posUnicode));
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
JSSmart<CJSValue> CTextMeasurerEmbed::Hyphen_IsDictionaryExist(JSSmart<CJSValue> lang)
|
||||
{
|
||||
return CJSContext::createBool(((NSHyphen::CEngine*)m_hyphen_engine)->IsDictionaryExist(lang->toInt32()));
|
||||
}
|
||||
|
||||
@ -10,13 +10,12 @@
|
||||
using namespace NSJSBase;
|
||||
class JS_DECL CTextMeasurerEmbed : public CJSEmbedObject
|
||||
{
|
||||
private:
|
||||
void* m_hyphen_engine;
|
||||
|
||||
public:
|
||||
CTextMeasurerEmbed()
|
||||
{
|
||||
}
|
||||
~CTextMeasurerEmbed()
|
||||
{
|
||||
}
|
||||
CTextMeasurerEmbed();
|
||||
~CTextMeasurerEmbed();
|
||||
|
||||
public:
|
||||
JSSmart<CJSValue> FT_Malloc(JSSmart<CJSValue> typed_array_or_len);
|
||||
@ -50,6 +49,10 @@ public:
|
||||
JSSmart<CJSValue> HB_FontFree(JSSmart<CJSValue> font);
|
||||
#endif
|
||||
|
||||
JSSmart<CJSValue> Hyphen_SetCacheSize(JSSmart<CJSValue> size);
|
||||
JSSmart<CJSValue> Hyphen_Word(JSSmart<CJSValue> lang, JSSmart<CJSValue> word);
|
||||
JSSmart<CJSValue> Hyphen_IsDictionaryExist(JSSmart<CJSValue> lang);
|
||||
|
||||
DECLARE_EMBED_METHODS
|
||||
};
|
||||
|
||||
|
||||
@ -285,29 +285,18 @@ namespace NSHyphen
|
||||
m_pLastDict = NULL;
|
||||
|
||||
#ifndef HYPHEN_ENGINE_DISABLE_FILESYSTEM
|
||||
std::wstring sFilePath = GetDictionaryPath(m_nLastLang);
|
||||
if (m_sDirectory.empty())
|
||||
m_sDirectory = NSFile::GetProcessDirectory() + L"/dictionaries";
|
||||
|
||||
for (int i = 0; i < NSTextLanguages::DictionaryRec_count; ++i)
|
||||
if (NSFile::CFileBinary::Exists(sFilePath))
|
||||
{
|
||||
if (m_nLastLang == NSTextLanguages::Dictionaries[i].m_lang)
|
||||
{
|
||||
const char* sNameStr = NSTextLanguages::Dictionaries[i].m_name;
|
||||
std::wstring sNameU = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8((BYTE*)sNameStr, (LONG)(strlen(sNameStr)));
|
||||
std::wstring sFilePath = m_sDirectory + L"/" + sNameU + L"/hyph_" + sNameU + L".dic";
|
||||
FILE* f = NSFile::CFileBinary::OpenFileNative(sFilePath, L"r");
|
||||
if (f == NULL)
|
||||
return 1;
|
||||
|
||||
if (NSFile::CFileBinary::Exists(sFilePath))
|
||||
{
|
||||
FILE* f = NSFile::CFileBinary::OpenFileNative(sFilePath, L"r");
|
||||
if (f == NULL)
|
||||
return 1;
|
||||
|
||||
m_pLastDict = hnj_hyphen_load_file(f);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
m_pLastDict = hnj_hyphen_load_file(f);
|
||||
fclose(f);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -328,6 +317,26 @@ namespace NSHyphen
|
||||
|
||||
return (NULL == m_pLastDict) ? 1 : 0;
|
||||
}
|
||||
#ifndef HYPHEN_ENGINE_DISABLE_FILESYSTEM
|
||||
std::wstring GetDictionaryPath(const int& lang)
|
||||
{
|
||||
if (m_sDirectory.empty())
|
||||
m_sDirectory = NSFile::GetProcessDirectory() + L"/dictionaries";
|
||||
|
||||
for (int i = 0; i < NSTextLanguages::DictionaryRec_count; ++i)
|
||||
{
|
||||
if (m_nLastLang == NSTextLanguages::Dictionaries[i].m_lang)
|
||||
{
|
||||
const char* sNameStr = NSTextLanguages::Dictionaries[i].m_name;
|
||||
std::wstring sNameU = NSFile::CUtf8Converter::GetUnicodeStringFromUTF8((BYTE*)sNameStr, (LONG)(strlen(sNameStr)));
|
||||
std::wstring sFilePath = m_sDirectory + L"/" + sNameU + L"/hyph_" + sNameU + L".dic";
|
||||
return sFilePath;
|
||||
}
|
||||
}
|
||||
|
||||
return L"";
|
||||
}
|
||||
#endif
|
||||
char* Process(const int& lang, const char* word, const int& len)
|
||||
{
|
||||
// resize 2x
|
||||
@ -416,4 +425,13 @@ namespace NSHyphen
|
||||
{
|
||||
return m_internal->Process(lang, word, (len == -1) ? strlen(word) : len);
|
||||
}
|
||||
bool CEngine::IsDictionaryExist(const int& lang)
|
||||
{
|
||||
for (int i = 0; i < NSTextLanguages::DictionaryRec_count; ++i)
|
||||
{
|
||||
if (lang == NSTextLanguages::Dictionaries[i].m_lang)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,6 +52,7 @@ namespace NSHyphen
|
||||
void SetCacheSize(const int& size);
|
||||
int LoadDictionary(const int& lang);
|
||||
int LoadDictionary(const int& lang, const unsigned char* data, const unsigned int& data_len);
|
||||
bool IsDictionaryExist(const int& lang);
|
||||
|
||||
char* Process(const int& lang, const char* word, const int& len = -1);
|
||||
|
||||
|
||||
@ -193,6 +193,86 @@ AscFonts.HB_ShapeText = function(fontFile, text, features, script, direction, la
|
||||
return g_return_obj_count;
|
||||
};
|
||||
|
||||
var hyphenApplication = 0;
|
||||
AscFonts.Hyphen_Init = function()
|
||||
{
|
||||
hyphenApplication = Module["_hyphenCreateApplication"]();
|
||||
};
|
||||
AscFonts.Hyphen_Destroy = function()
|
||||
{
|
||||
Module["_hyphenDestroyApplication"](hyphenApplication);
|
||||
};
|
||||
AscFonts.Hyphen_CheckDictionary = function(lang)
|
||||
{
|
||||
return false;
|
||||
};
|
||||
AscFonts.Hyphen_LoadDictionary = function(lang, data)
|
||||
{
|
||||
let dictSize = data.byteLength;
|
||||
let dictPointer = Module["_malloc"](dictSize);
|
||||
Module["HEAP8"].set(new Uint8ClampedArray(data), dictPointer);
|
||||
|
||||
let result = Module["_hyphenLoadDictionary"](hyphenApplication, lang, dictPointer, dictSize);
|
||||
|
||||
Module["_free"](dictPointer);
|
||||
|
||||
return (result === 0) ? true : false;
|
||||
};
|
||||
|
||||
function GetUtf8SymbolLen(c)
|
||||
{
|
||||
if (0x00 == (c & 0x80))
|
||||
return 1;
|
||||
else if (0x00 == (c & 0x20))
|
||||
return 2;
|
||||
else if (0x00 == (c & 0x10))
|
||||
return 3;
|
||||
else if (0x00 == (c & 0x0F))
|
||||
return 4;
|
||||
else if (0x00 == (c & 0x08))
|
||||
return 4;
|
||||
else if (0x00 == (c & 0x04))
|
||||
return 5;
|
||||
return 6;
|
||||
}
|
||||
|
||||
AscFonts.Hyphen_Word = function(lang, word)
|
||||
{
|
||||
let wordPointer = word.toUtf8Pointer();
|
||||
let wordLen = wordPointer.length;
|
||||
let hyphens = [];
|
||||
|
||||
if (wordPointer)
|
||||
{
|
||||
const ptr = Module._hyphenWord(hyphenApplication, lang, wordPointer.ptr, wordLen);
|
||||
|
||||
let curUnicode = new Uint8ClampedArray(Module["HEAP8"].buffer, wordPointer.ptr, wordLen);
|
||||
let posUnicode = 0;
|
||||
let posUtf8 = 0;
|
||||
|
||||
let vector = new Uint8ClampedArray(Module["HEAP8"].buffer, ptr, wordLen + 5);
|
||||
|
||||
let pos = 0;
|
||||
while (vector[pos] != 0)
|
||||
{
|
||||
if (1 === (vector[pos] & 1))
|
||||
{
|
||||
while (posUtf8 < pos)
|
||||
{
|
||||
++posUnicode;
|
||||
posUtf8 += GetUtf8SymbolLen(curUnicode[posUtf8]);
|
||||
}
|
||||
hyphens.push(posUnicode);
|
||||
}
|
||||
|
||||
pos++;
|
||||
}
|
||||
|
||||
wordPointer.free();
|
||||
}
|
||||
return hyphens;
|
||||
};
|
||||
|
||||
AscFonts.onLoadModule();
|
||||
|
||||
})(window, undefined);
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
window['AscFonts'] = window['AscFonts'] || {};
|
||||
var AscFonts = window['AscFonts'];
|
||||
|
||||
var g_native_engine = CreateNativeTextMeasurer();
|
||||
var g_native_engine = CreateEmbedObject("CTextMeasurerEmbed");
|
||||
|
||||
function CReturnObject()
|
||||
{
|
||||
@ -206,6 +206,28 @@ AscFonts.HB_ShapeText = function(fontFile, text, features, script, direction, la
|
||||
return g_return_obj_count;
|
||||
};
|
||||
|
||||
AscFonts.Hyphen_Init = function()
|
||||
{
|
||||
// none
|
||||
};
|
||||
AscFonts.Hyphen_Destroy = function()
|
||||
{
|
||||
// GC
|
||||
};
|
||||
AscFonts.Hyphen_CheckDictionary = function(lang)
|
||||
{
|
||||
return g_native_engine["Hyphen_IsDictionaryExist"](lang);
|
||||
};
|
||||
AscFonts.Hyphen_LoadDictionary = function(lang, data)
|
||||
{
|
||||
return false;
|
||||
};
|
||||
AscFonts.Hyphen_Word = function(lang, word)
|
||||
{
|
||||
let ret = g_native_engine["Hyphen_Word"](lang, word);
|
||||
return ret ? ret : [];
|
||||
};
|
||||
|
||||
AscFonts.onLoadModule();
|
||||
AscFonts.onLoadModule();
|
||||
|
||||
|
||||
@ -44,7 +44,13 @@
|
||||
|
||||
"_ASC_HB_LanguageFromString",
|
||||
"_ASC_HB_ShapeText",
|
||||
"_ASC_HB_FontFree"
|
||||
"_ASC_HB_FontFree",
|
||||
|
||||
"_hyphenCreateApplication",
|
||||
"_hyphenDestroyApplication",
|
||||
"_hyphenLoadDictionary",
|
||||
"_hyphenCheckDictionary",
|
||||
"_hyphenWord"
|
||||
],
|
||||
"include_path": [
|
||||
"./../../graphics/pro/js/freetype-2.10.4/include",
|
||||
@ -55,7 +61,7 @@
|
||||
"./../../../OfficeUtils/src",
|
||||
"./../../../OfficeUtils/src/zlib-1.2.11"
|
||||
],
|
||||
"define": ["__linux__", "_LINUX", "UNIX", "NDEBUG", "_LIB", "_CRT_SECURE_NO_WARNINGS", "FT2_BUILD_LIBRARY", "HAVE_FREETYPE", "FT_CONFIG_OPTION_SYSTEM_ZLIB"],
|
||||
"define": ["__linux__", "_LINUX", "UNIX", "NDEBUG", "_LIB", "_CRT_SECURE_NO_WARNINGS", "FT2_BUILD_LIBRARY", "HAVE_FREETYPE", "FT_CONFIG_OPTION_SYSTEM_ZLIB", "GRAPHICS_NO_USE_DYNAMIC_LIBRARY", "HYPHEN_ENGINE_DISABLE_FILESYSTEM"],
|
||||
"compile_files_array": [
|
||||
{
|
||||
"name": "f",
|
||||
@ -186,6 +192,16 @@
|
||||
"name": "w",
|
||||
"folder": "./../../graphics/pro/js/wasm/src/lib",
|
||||
"files": ["wasm_jmp.cpp"]
|
||||
},
|
||||
{
|
||||
"name": "h_e",
|
||||
"folder": "./..",
|
||||
"files": ["TextHyphen.cpp"]
|
||||
},
|
||||
{
|
||||
"name": "h_m",
|
||||
"folder": "./../../../Common/3dParty/hyphen/js/src",
|
||||
"files": ["hyphen.cpp"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@ -73,6 +73,13 @@ function onLoadFontsModule(window, undefined)
|
||||
AscFonts.HB_FontFree = AscFonts["HB_FontFree"];
|
||||
AscFonts.HB_ShapeText = AscFonts["HB_ShapeText"];
|
||||
|
||||
AscFonts["Hyphen_Init"]();
|
||||
AscFonts.Hyphen_Destroy = AscFonts["Hyphen_Destroy"];
|
||||
AscFonts.Hyphen_LoadDictionary = AscFonts["Hyphen_LoadDictionary"];
|
||||
AscFonts.Hyphen_CheckDictionary = AscFonts["Hyphen_CheckDictionary"];
|
||||
AscFonts.Hyphen_Word = AscFonts["Hyphen_Word"];
|
||||
|
||||
|
||||
AscFonts.CreateNativeStreamByIndex = function(stream_index)
|
||||
{
|
||||
let stream = AscFonts.g_fonts_streams[stream_index];
|
||||
@ -664,4 +671,109 @@ function onLoadFontsModule(window, undefined)
|
||||
retObj["free"]();
|
||||
return glyphs;
|
||||
};
|
||||
|
||||
function Hyphenation()
|
||||
{
|
||||
this._value = "";
|
||||
this._lang = 0;
|
||||
this._dictionaries = {};
|
||||
this._mapToNames = null;
|
||||
|
||||
this.addCodePoint = function(codePoint)
|
||||
{
|
||||
this._value += String.fromCodePoint(codePoint);
|
||||
};
|
||||
this.clear = function()
|
||||
{
|
||||
this._value = "";
|
||||
};
|
||||
this.setLang = function(langCode)
|
||||
{
|
||||
this._lang = langCode;
|
||||
|
||||
let _langKey = "" + langCode;
|
||||
if (this._dictionaries[_langKey] !== undefined)
|
||||
return this._dictionaries[_langKey];
|
||||
|
||||
if (window["NATIVE_EDITOR_ENJINE"])
|
||||
{
|
||||
this._dictionaries[_langKey] = AscFonts.Hyphen_CheckDictionary(this._lang);
|
||||
return this._dictionaries[_langKey];
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
this.hyphenate = function()
|
||||
{
|
||||
if ("" === this._value)
|
||||
return [];
|
||||
return AscFonts.Hyphen_Word(this._lang, this._value);
|
||||
};
|
||||
|
||||
this.loadDictionary = function(lang, callback)
|
||||
{
|
||||
if (window["NATIVE_EDITOR_ENJINE"])
|
||||
{
|
||||
callback();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this._mapToNames)
|
||||
this._mapToNames = AscCommon.spellcheckGetLanguages();
|
||||
|
||||
let _langKey = "" + lang;
|
||||
let _langName = this._mapToNames[_langKey];
|
||||
if (_langName === undefined)
|
||||
{
|
||||
this._dictionaries[_langKey] = false;
|
||||
callback();
|
||||
return;
|
||||
}
|
||||
|
||||
this._loadDictionaryAttemt(_langKey, _langName, callback);
|
||||
};
|
||||
|
||||
this._loadDictionaryAttemt = function(langKey, langName, callback, currentAttempt)
|
||||
{
|
||||
var xhr = new XMLHttpRequest();
|
||||
let urlDictionaries = "../../../../dictionaries/";
|
||||
let url = urlDictionaries + langName + "/hyph_" + langName + ".dic";
|
||||
|
||||
xhr.open('GET', url, true);
|
||||
xhr.responseType = 'arraybuffer';
|
||||
xhr.currentAttempt = currentAttempt || 0;
|
||||
|
||||
if (xhr.overrideMimeType)
|
||||
xhr.overrideMimeType('text/plain; charset=x-user-defined');
|
||||
else
|
||||
xhr.setRequestHeader('Accept-Charset', 'x-user-defined');
|
||||
|
||||
var _t = this;
|
||||
xhr.onload = function()
|
||||
{
|
||||
if (this.status === 200 || location.href.indexOf("file:") === 0)
|
||||
{
|
||||
_t._dictionaries[langKey] = true;
|
||||
AscFonts.Hyphen_LoadDictionary(parseInt(langKey), this.response);
|
||||
callback();
|
||||
}
|
||||
};
|
||||
xhr.onerror = function()
|
||||
{
|
||||
let _currentAttempt = xhr.currentAttempt + 1;
|
||||
if (_currentAttempt > 3)
|
||||
{
|
||||
_t._dictionaries[langKey] = false;
|
||||
callback();
|
||||
return;
|
||||
}
|
||||
|
||||
_t._loadDictionaryAttemt(langKey, langName, callback, _currentAttempt);
|
||||
};
|
||||
|
||||
xhr.send(null);
|
||||
};
|
||||
}
|
||||
|
||||
window["AscHyphenation"] = new Hyphenation();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user