diff --git a/Common/3dParty/html/htmltoxhtml.h b/Common/3dParty/html/htmltoxhtml.h
index 3fdf63a5d1..68a505e59b 100644
--- a/Common/3dParty/html/htmltoxhtml.h
+++ b/Common/3dParty/html/htmltoxhtml.h
@@ -19,7 +19,6 @@ static std::string nonbreaking_inline = "|a|abbr|acronym|b|bdo|big|cite|code|df
static std::string empty_tags = "|area|base|basefont|bgsound|br|command|col|embed|event-source|frame|hr|image|img|input|keygen|link|menuitem|meta|param|source|spacer|track|wbr|";
static std::string preserve_whitespace = "|pre|textarea|script|style|";
static std::string special_handling = "|html|body|";
-static std::string no_entity_sub = ""; //"|style|";
static std::string treat_like_inline = "|p|";
static std::vector html_tags = {"div","span","a","img","p","h1","h2","h3","h4","h5","h6",
@@ -436,9 +435,25 @@ static void substitute_xml_entities_into_text(std::string& text)
replace_all(text, ">", ">");
}
+// After running through Gumbo, the values of type "" are replaced with the corresponding code '0x01'
+// Since the attribute value does not use control characters (value <= 0x1F),
+// then just delete them, otherwise XmlUtils::CXmlLiteReader crashes on them.
+// bug#73486
+static void remove_control_symbols(std::string& text)
+{
+ std::string::iterator itFound = std::find_if(text.begin(), text.end(), [](char chValue){ return chValue <= 0x1F; });
+
+ while (itFound != text.end())
+ {
+ itFound = text.erase(itFound);
+ itFound = std::find_if(itFound, text.end(), [](char chValue){ return chValue <= 0x1F; });
+ }
+}
+
// Заменяет сущности " в text
static void substitute_xml_entities_into_attributes(std::string& text)
{
+ remove_control_symbols(text);
substitute_xml_entities_into_text(text);
replace_all(text, "\"", """);
}
@@ -486,7 +501,7 @@ static void build_doctype(GumboNode* node, NSStringUtils::CStringBuilderA& oBuil
}
}
-static void build_attributes(const GumboVector* attribs, bool no_entities, NSStringUtils::CStringBuilderA& atts)
+static void build_attributes(const GumboVector* attribs, NSStringUtils::CStringBuilderA& atts)
{
std::vector arrRepeat;
for (size_t i = 0; i < attribs->length; ++i)
@@ -532,8 +547,7 @@ static void build_attributes(const GumboVector* attribs, bool no_entities, NSStr
std::string qs ="\"";
atts.WriteString("=");
atts.WriteString(qs);
- if(!no_entities)
- substitute_xml_entities_into_attributes(sVal);
+ substitute_xml_entities_into_attributes(sVal);
atts.WriteString(sVal);
atts.WriteString(qs);
}
@@ -542,7 +556,6 @@ static void build_attributes(const GumboVector* attribs, bool no_entities, NSStr
static void prettyprint_contents(GumboNode* node, NSStringUtils::CStringBuilderA& contents, bool bCheckValidNode)
{
std::string key = "|" + get_tag_name(node) + "|";
- bool no_entity_substitution = no_entity_sub.find(key) != std::string::npos;
bool keep_whitespace = preserve_whitespace.find(key) != std::string::npos;
bool is_inline = nonbreaking_inline.find(key) != std::string::npos;
bool is_like_inline = treat_like_inline.find(key) != std::string::npos;
@@ -556,8 +569,7 @@ static void prettyprint_contents(GumboNode* node, NSStringUtils::CStringBuilderA
if (child->type == GUMBO_NODE_TEXT)
{
std::string val(child->v.text.text);
- if(!no_entity_substitution)
- substitute_xml_entities_into_text(val);
+ substitute_xml_entities_into_text(val);
// Избавление от FF
size_t found = val.find_first_of("\014");
@@ -613,7 +625,6 @@ static void prettyprint(GumboNode* node, NSStringUtils::CStringBuilderA& oBuilde
std::string closeTag = "";
std::string key = "|" + tagname + "|";
bool is_empty_tag = empty_tags.find(key) != std::string::npos;
- bool no_entity_substitution = no_entity_sub.find(key) != std::string::npos;
// determine closing tag type
if (is_empty_tag)
@@ -626,7 +637,7 @@ static void prettyprint(GumboNode* node, NSStringUtils::CStringBuilderA& oBuilde
// build attr string
const GumboVector* attribs = &node->v.element.attributes;
- build_attributes(attribs, no_entity_substitution, oBuilder);
+ build_attributes(attribs, oBuilder);
oBuilder.WriteString(close + ">");
// prettyprint your contents