correction for surrogate symbols in the text

This commit is contained in:
Artur
2026-01-21 13:45:34 +03:00
parent 7a7ab31327
commit d5e0c13c2d
7 changed files with 60 additions and 10 deletions

View File

@ -96,7 +96,7 @@ AssistantHint.prototype.annotateParagraph = async function(paraId, recalcId, tex
while (searchStart < text.length)
{
const index = text.indexOf(origin, searchStart);
const index = _t.simpleGraphemeIndexOf(text, origin, searchStart);
if (index === -1) break;
count++;
@ -104,7 +104,7 @@ AssistantHint.prototype.annotateParagraph = async function(paraId, recalcId, tex
{
ranges.push({
"start": index,
"length": origin.length,
"length": [...origin].length,
"id": rangeId
});
_t.paragraphs[paraId][rangeId] = {

View File

@ -96,7 +96,7 @@ AssistantReplaceHint.prototype.annotateParagraph = async function(paraId, recalc
while (searchStart < text.length)
{
const index = text.indexOf(origin, searchStart);
const index = _t.simpleGraphemeIndexOf(text, origin, searchStart);
if (index === -1) break;
count++;
@ -104,7 +104,7 @@ AssistantReplaceHint.prototype.annotateParagraph = async function(paraId, recalc
{
ranges.push({
"start": index,
"length": origin.length,
"length": [...origin].length,
"id": rangeId
});
_t.paragraphs[paraId][rangeId] = {

View File

@ -96,7 +96,7 @@ AssistantReplace.prototype.annotateParagraph = async function(paraId, recalcId,
while (searchStart < text.length)
{
const index = text.indexOf(origin, searchStart);
const index = _t.simpleGraphemeIndexOf(text, origin, searchStart);
if (index === -1) break;
count++;
@ -104,7 +104,7 @@ AssistantReplace.prototype.annotateParagraph = async function(paraId, recalcId,
{
ranges.push({
"start": index,
"length": origin.length,
"length": [...origin].length,
"id": rangeId
});
_t.paragraphs[paraId][rangeId] = {

View File

@ -164,3 +164,28 @@ CustomAnnotator.prototype._handleNewRanges = function(ranges, paraId, text)
CustomAnnotator.prototype._handleNewRangePositions = function(range, paraId, text)
{
};
/**
* @param {string} str
* @param {string} searchStr
* @param {string} [fromIndex]
* @returns {number}
*/
CustomAnnotator.prototype.simpleGraphemeIndexOf = function(str, searchStr, fromIndex = 0) {
const codeUnitIndex = str.indexOf(searchStr, fromIndex);
if (codeUnitIndex < 2) {
return codeUnitIndex;
}
const adjustedIndex = adjustIndexForSurrogates(str, codeUnitIndex);
function adjustIndexForSurrogates(str, codeUnitIndex) {
let surrogateCount = 0;
for (let i = 0; i < codeUnitIndex; i++) {
const code = str.charCodeAt(i);
if (code >= 0xD800 && code <= 0xDBFF) {
surrogateCount++;
}
}
return codeUnitIndex - surrogateCount;
}
return adjustedIndex;
}

View File

@ -181,7 +181,7 @@ Text to check:`;
while (searchStart < text.length)
{
const index = text.indexOf(origin, searchStart);
const index = _t.simpleGraphemeIndexOf(text, origin, searchStart);
if (index === -1) break;
count++;
@ -189,7 +189,7 @@ Text to check:`;
{
ranges.push({
"start": index,
"length": origin.length,
"length": [...origin].length,
"id": rangeId
});
_t.paragraphs[paraId][rangeId] = {

View File

@ -171,7 +171,7 @@ Text to check:`;
while (searchStart < text.length)
{
const index = text.indexOf(wrong, searchStart);
const index = _t.simpleGraphemeIndexOf(text, wrong, searchStart);
if (index === -1) break;
const isStartBoundary = index === 0 || _t._isWordBoundary(text[index - 1]);
@ -184,7 +184,7 @@ Text to check:`;
{
ranges.push({
"start": index,
"length": wrong.length,
"length": [...wrong].length,
"id": rangeId
});
_t.paragraphs[paraId][rangeId] = {

View File

@ -173,3 +173,28 @@ TextAnnotator.prototype._handleNewRanges = function(ranges, paraId, text)
TextAnnotator.prototype._handleNewRangePositions = function(range, paraId, text)
{
};
/**
* @param {string} str
* @param {string} searchStr
* @param {string} [fromIndex]
* @returns {number}
*/
TextAnnotator.prototype.simpleGraphemeIndexOf = function(str, searchStr, fromIndex = 0) {
const codeUnitIndex = str.indexOf(searchStr, fromIndex);
if (codeUnitIndex < 2) {
return codeUnitIndex;
}
const adjustedIndex = adjustIndexForSurrogates(str, codeUnitIndex);
function adjustIndexForSurrogates(str, codeUnitIndex) {
let surrogateCount = 0;
for (let i = 0; i < codeUnitIndex; i++) {
const code = str.charCodeAt(i);
if (code >= 0xD800 && code <= 0xDBFF) {
surrogateCount++;
}
}
return codeUnitIndex - surrogateCount;
}
return adjustedIndex;
}