Added merging of extra cells in html to ooxml conversion

This commit is contained in:
Kirill Polyakov
2024-03-20 13:51:18 +03:00
parent fb4c8ebf32
commit c9fbe4189c
4 changed files with 249 additions and 97 deletions

View File

@ -46,67 +46,68 @@ namespace NSCSS
{
typedef enum
{
B_CustomStyle = 0,
B_StyleId = 1,
B_Type = 2,
B_Default = 3,
B_CustomStyle,
B_StyleId,
B_Type,
B_Default,
B_Name = 4,
B_BasedOn = 5,
B_QFormat = 6,
B_Link = 7,
B_UnhideWhenUsed = 8,
B_UiPriority = 9,
B_Name,
B_BasedOn,
B_QFormat,
B_Link,
B_UnhideWhenUsed,
B_UiPriority,
B_SemiHidden
} BasicProperties;
typedef enum
{
P_Jc = 0,
P_Spacing = 1,
P_ContextualSpacing = 2,
P_Ind = 3,
P_OutlineLvl = 4,
P_Shd = 5,
P_Jc,
P_Spacing,
P_ContextualSpacing,
P_Ind,
P_OutlineLvl,
P_Shd,
// <pBdr>
P_TopBorder = 6,
P_LeftBorder = 7,
P_BottomBorder = 8,
P_RightBorder = 9,
P_TopBorder,
P_LeftBorder,
P_BottomBorder,
P_RightBorder,
// </pBdr>
P_KeepLines = 10,
P_KeepNext = 11,
P_KeepLines,
P_KeepNext,
} ParagraphProperties;
typedef enum
{
R_RFonts = 0,
R_Sz = 1,
R_B = 2,
R_I = 3,
R_Color = 4,
R_U = 5,
R_Highlight = 6,
R_SmallCaps = 7,
R_Kern = 8
R_RFonts ,
R_Sz,
R_B,
R_I,
R_Color,
R_U,
R_Highlight,
R_SmallCaps,
R_Kern
} RunnerProperties;
typedef enum
{
T_TblInd = 0,
T_TblInd ,
// <tblCellMar>
T_CellTop = 1,
T_CellLeft = 2,
T_CellBottom = 3,
T_CellRight = 4,
T_CellTop,
T_CellLeft,
T_CellBottom,
T_CellRight,
// <tblCellMar>
// <tblBorders>
T_BorderTop = 5,
T_BorderLeft = 6,
T_BorderBottom = 7,
T_BorderRight = 8,
T_BorderInsideH = 9,
T_BorderInsideV = 10
T_BorderTop ,
T_BorderLeft,
T_BorderBottom,
T_BorderRight,
T_BorderInsideH,
T_BorderInsideV
// </tblBorders>
} TableProperties;
}

View File

@ -509,10 +509,14 @@ namespace NSCSS
CXmlElement oXmlElement;
SetPStyle(oStyle, oXmlElement);
if (!oXmlElement.Empty())
if (oXmlElement.Empty())
return false;
structStyle.setId(oXmlElement.GetStyleId());
if (structStyle.getId().empty())
structStyle.setId(m_sId);
m_arStyleUsed.push_back(structStyle);
m_sStyle += oXmlElement.GetPStyle();

View File

@ -27,7 +27,7 @@ CXmlElement::CXmlElement(const std::wstring& sNameDefaultElement)
bool CXmlElement::Empty() const
{
return m_mPStyleValues.empty() && m_mRStyleValues.empty();
return m_mPStyleValues.empty() && m_mRStyleValues.empty() && m_mBasicValues.find(CSSProperties::BasicProperties::B_BasedOn) == m_mBasicValues.end();
}
void CXmlElement::CreateDefaultElement(const std::wstring& sNameDefaultElement)
@ -35,7 +35,19 @@ void CXmlElement::CreateDefaultElement(const std::wstring& sNameDefaultElement)
if (!Empty())
Clear();
if (sNameDefaultElement == L"li")
if (sNameDefaultElement == L"p")
{
AddBasicProperties(CSSProperties::BasicProperties::B_Type, L"paragraph");
AddBasicProperties(CSSProperties::BasicProperties::B_StyleId, L"p");
AddBasicProperties(CSSProperties::BasicProperties::B_Name, L"Normal (Web)");
AddBasicProperties(CSSProperties::BasicProperties::B_BasedOn, L"normal");
AddBasicProperties(CSSProperties::BasicProperties::B_UiPriority, L"99");
AddBasicProperties(CSSProperties::BasicProperties::B_UnhideWhenUsed, L"true");
AddBasicProperties(CSSProperties::BasicProperties::B_SemiHidden, L"true");
AddPropertiesInP(CSSProperties::ParagraphProperties::P_Spacing, L"w:before=\"100\" w:beforeAutospacing=\"1\" w:after=\"100\" w:afterAutospacing=\"1\"");
}
else if (sNameDefaultElement == L"li")
{
AddBasicProperties(CSSProperties::BasicProperties::B_Type, L"paragraph");
AddBasicProperties(CSSProperties::BasicProperties::B_StyleId, L"li");
@ -191,23 +203,6 @@ void CXmlElement::CreateDefaultElement(const std::wstring& sNameDefaultElement)
AddPropertiesInR(CSSProperties::RunnerProperties::R_Sz, L"15");
AddPropertiesInR(CSSProperties::RunnerProperties::R_B, L"bold");
}
else if (sNameDefaultElement == L"p-c")
{
AddBasicProperties(CSSProperties::BasicProperties::B_Type, L"character");
AddBasicProperties(CSSProperties::BasicProperties::B_StyleId, L"p-c");
AddBasicProperties(CSSProperties::BasicProperties::B_CustomStyle, L"1");
AddBasicProperties(CSSProperties::BasicProperties::B_Name, L"Paragraph character");
AddBasicProperties(CSSProperties::BasicProperties::B_Link, L"p");
}
else if (sNameDefaultElement == L"p")
{
AddBasicProperties(CSSProperties::BasicProperties::B_Type, L"paragraph");
AddBasicProperties(CSSProperties::BasicProperties::B_StyleId, L"p");
AddBasicProperties(CSSProperties::BasicProperties::B_CustomStyle, L"1");
AddBasicProperties(CSSProperties::BasicProperties::B_Name, L"Paragraph");
AddBasicProperties(CSSProperties::BasicProperties::B_BasedOn, L"normal");
AddBasicProperties(CSSProperties::BasicProperties::B_Link, L"p-c");
}
else if (sNameDefaultElement == L"div-c")
{
AddBasicProperties(CSSProperties::BasicProperties::B_Type, L"character");
@ -515,7 +510,8 @@ std::wstring CXmlElement::ConvertBasicInfoStyle() const
}
case CSSProperties::BasicProperties::B_UnhideWhenUsed:
{
sBasicInfo += L"<w:unhideWhenUsed/>";
if (L"true" == oItem.second)
sBasicInfo += L"<w:unhideWhenUsed/>";
break;
}
case CSSProperties::BasicProperties::B_UiPriority:
@ -523,6 +519,12 @@ std::wstring CXmlElement::ConvertBasicInfoStyle() const
sBasicInfo += L"<w:uiPriority w:val=\"" + oItem.second + L"\"/>";
break;
}
case CSSProperties::BasicProperties::B_SemiHidden:
{
if (L"true" == oItem.second)
sBasicInfo += L"<w:semiHidden/>";
break;
}
default:
break;
}

View File

@ -71,16 +71,19 @@ struct CTextSettings
{
bool bBdo; // Реверс текста
bool bPre; // Сохранение форматирования (Сохранение пробелов, табуляций, переносов строк)
bool bAddSpaces; // Не добавлять пробелы перед текстом
bool bAddSpaces; // Добавлять пробелы перед текстом?
bool bMergeText; // Объединять подяр идущий текст в 1?
int nLi; // Уровень списка
std::wstring sRStyle; // w:rStyle
std::wstring sPStyle; // w:pStyle
CTextSettings(bool _bBdo, bool _bPre, bool _bAddSpaces, int _nLi, const std::wstring& _sRStyle, const std::wstring& _sPStyle) :
bBdo(_bBdo), bPre(_bPre), bAddSpaces(_bAddSpaces), nLi(_nLi), sRStyle(_sRStyle), sPStyle(_sPStyle) {}
CTextSettings(bool _bBdo, bool _bPre, bool _bAddSpaces, bool _bMergeText, int _nLi, const std::wstring& _sRStyle, const std::wstring& _sPStyle) :
bBdo(_bBdo), bPre(_bPre), bAddSpaces(_bAddSpaces), bMergeText(_bMergeText), nLi(_nLi), sRStyle(_sRStyle), sPStyle(_sPStyle)
{}
CTextSettings(const CTextSettings& oTS) :
bBdo(oTS.bBdo), bPre(oTS.bPre), bAddSpaces(oTS.bAddSpaces), nLi(oTS.nLi), sRStyle(oTS.sRStyle), sPStyle(oTS.sPStyle) {}
bBdo(oTS.bBdo), bPre(oTS.bPre), bAddSpaces(oTS.bAddSpaces), bMergeText(oTS.bMergeText), nLi(oTS.nLi), sRStyle(oTS.sRStyle), sPStyle(oTS.sPStyle)
{}
};
//Необходимые стили таблицы
@ -182,6 +185,8 @@ private:
NSStringUtils::CStringBuilder m_oNumberXml; // numbering.xml
bool m_bInP; // <w:p> открыт?
bool m_bInR; // <w:r> открыт?
bool m_bInT; // <w:t> открыт?
bool m_bWasPStyle; // <w:pStyle> записан?
bool m_bWasSpace; // Был пробел?
@ -191,7 +196,7 @@ public:
CHtmlFile2_Private()
: m_nFootnoteId(1), m_nHyperlinkId(1), m_nCrossId(1), m_nNumberingId(1),
m_bInP(false), m_bWasPStyle(false), m_bWasSpace(false)
m_bInP(false), m_bInR(false), m_bInT(false), m_bWasPStyle(false), m_bWasSpace(false)
{
m_oPageData.SetSize (std::to_wstring(DEFAULT_PAGE_WIDTH) + L"tw " + std::to_wstring(DEFAULT_PAGE_HEIGHT) + L"tw", 0, true);
m_oPageData.SetMargin(L"1440tw 1440tw 1440tw 1440tw", 0, true);
@ -773,6 +778,44 @@ private:
pXml->WriteString(L"</w:rPr></w:pPr></w:p>");
}
bool OpenR(NSStringUtils::CStringBuilder* pXml)
{
if (m_bInR)
return false;
pXml->WriteString(L"<w:r>");
m_bInR = true;
return true;
}
void CloseR(NSStringUtils::CStringBuilder* pXml)
{
if (!m_bInR)
return;
pXml->WriteString(L"</w:r>");
m_bInR = false;
}
bool OpenT(NSStringUtils::CStringBuilder* pXml)
{
if (m_bInT)
return false;
pXml->WriteString(L"<w:t xml:space=\"preserve\">");
m_bInT = true;
return true;
}
void CloseT(NSStringUtils::CStringBuilder* pXml)
{
if (!m_bInT)
return;
pXml->WriteString(L"</w:t>");
m_bInT = false;
}
void CloseP(NSStringUtils::CStringBuilder* pXml, const std::vector<NSCSS::CNode>& arSelectors)
{
m_bWasSpace = true;
@ -783,11 +826,19 @@ private:
for (const NSCSS::CNode& item : arSelectors)
if (item.m_wsName == L"a")
pXml->WriteString(L"</w:hyperlink>");
CloseT(pXml);
CloseR(pXml);
pXml->WriteString(L"</w:p>");
m_bInP = false;
m_bInP = false;
}
std::wstring GetText()
{
return ToUnicode(m_oLightReader.GetTextA());
}
std::wstring GetSubClass(NSStringUtils::CStringBuilder* oXml, std::vector<NSCSS::CNode>& sSelectors)
{
NSCSS::CNode oNode;
@ -879,26 +930,41 @@ private:
m_oDocXml.WriteString(L"\"/>");
*/
readStream(&m_oDocXml, sSelectors, { false, false, true, -1, L"", L"" });
readStream(&m_oDocXml, sSelectors, { false, false, true, false, -1, L"", L"" });
}
void readInside (NSStringUtils::CStringBuilder* oXml, std::vector<NSCSS::CNode>& sSelectors, const CTextSettings& oTS, const std::wstring& sName)
{
if(sName == L"#text")
{
std::wstring sText = ToUnicode(m_oLightReader.GetTextA());
std::wstring sText = GetText();
size_t find = sText.find_first_not_of(L" \n\t\r");
if (find == std::wstring::npos)
return;
if (oTS.bAddSpaces && m_bInP && !iswspace(sText.front()) && !m_bWasSpace)
bool bInT = m_bInT;
if (!oTS.sRStyle.empty() || oTS.bPre)
{
CloseT(oXml);
CloseR(oXml);
}
if (oTS.bAddSpaces && m_bInP && !m_bInR && !iswspace(sText.front()) && !m_bWasSpace)
{
oXml->WriteString(L"<w:r><w:rPr><w:rFonts w:eastAsia=\"Times New Roman\"/></w:rPr><w:t xml:space=\"preserve\"> </w:t></w:r>");
m_bWasSpace = true;
}
std::wstring sPStyle = wrP(oXml, sSelectors, oTS);
oXml->WriteString(L"<w:r>");
std::wstring sRStyle = wrR(oXml, sSelectors, oTS);
oXml->WriteString(L"<w:t xml:space=\"preserve\">");
std::wstring sRStyle;
if (OpenR(oXml))
{
sRStyle = wrR(oXml, sSelectors, oTS);
OpenT(oXml);
}
if(oTS.bBdo)
std::reverse(sText.begin(), sText.end());
@ -938,11 +1004,19 @@ private:
if (std::iswspace(sText.front()) && m_bWasSpace)
sText.erase(0, 1);
if (oTS.bMergeText && !m_bWasSpace && bInT)
oXml->WriteEncodeXmlString(L" ");
if (!sText.empty())
m_bWasSpace = std::iswspace(sText.back());
oXml->WriteEncodeXmlString(sText);
oXml->WriteString(L"</w:t></w:r>");
if (!oTS.bMergeText)
{
CloseT(oXml);
CloseR(oXml);
}
return;
}
@ -958,7 +1032,7 @@ private:
else if(sName == L"b" || sName == L"strong")
{
CTextSettings oTSR(oTS);
oTSR.sRStyle += L"<w:b/>";
oTSR.sRStyle += L"<w:b/><w:bCs/>";
readStream(oXml, sSelectors, oTSR);
}
// Направление текста
@ -1167,6 +1241,12 @@ private:
}
readStream(oXml, sSelectors, oTS);
}
else if (sName == L"nobr")
{
CTextSettings oTSPre(oTS);
oTSPre.bPre = true;
readStream(oXml, sSelectors, oTSPre);
}
// Без нового абзаца
else if(sName == L"basefont" || sName == L"button" || sName == L"label" || sName == L"data" || sName == L"object" ||
sName == L"noscript" || sName == L"output" || sName == L"abbr" || sName == L"time" || sName == L"ruby" ||
@ -1265,12 +1345,6 @@ private:
oTSPre.bPre = true;
readStream(oXml, sSelectors, oTSPre);
}
else if (sName == L"nobr")
{
CTextSettings oTSPre(oTS);
oTSPre.bPre = true;
readStream(oXml, sSelectors, oTSPre);
}
// Таблицы
else if(sName == L"table")
readTable(oXml, sSelectors, oTS);
@ -1345,6 +1419,71 @@ private:
return L"";
}
struct TTextReadingSettings
{
bool m_bOpenedRT;
bool m_bAddSpace;
std::wstring m_wsLastElement;
TTextReadingSettings()
: m_bOpenedRT(false), m_bAddSpace(true)
{}
TTextReadingSettings(bool bOpenedRT, bool bAddSpace, const std::wstring& wsLastElement)
: m_bOpenedRT(bOpenedRT), m_bAddSpace(bAddSpace), m_wsLastElement(wsLastElement)
{}
};
void ReadOnlyText(NSStringUtils::CStringBuilder* pXml, TTextReadingSettings& oTextReadingSettings, std::vector<NSCSS::CNode>& sSelectors, const CTextSettings& oTS, int nDepth)
{
if (!m_oLightReader.IsValid() || m_oLightReader.IsEmptyNode())
return;
while (m_oLightReader.ReadNextSiblingNode2(nDepth))
{
if (L"#text" == m_oLightReader.GetName())
{
if (!oTextReadingSettings.m_bOpenedRT)
{
pXml->WriteString(L"<w:r><w:t>");
oTextReadingSettings.m_bOpenedRT = true;
}
pXml->WriteString(GetText());
if (oTextReadingSettings.m_bAddSpace)
pXml->WriteString(L" ");
}
else if (L"td" == m_oLightReader.GetName())
ReadOnlyText(pXml, oTextReadingSettings, sSelectors, oTS, nDepth + 1);
else
{
GetSubClass(pXml, sSelectors);
if (oTextReadingSettings.m_bOpenedRT)
{
pXml->WriteString(L"</w:t></w:r>");
oTextReadingSettings.m_bOpenedRT = false;
}
readStream(pXml, sSelectors, oTS);
sSelectors.pop_back();
}
}
}
void MergeCells(NSStringUtils::CStringBuilder* pXml)
{
if (!m_oLightReader.IsValid() || m_oLightReader.IsEmptyNode() || L"td" != m_oLightReader.GetName())
return;
std::vector<NSCSS::CNode> arSelectors;
TTextReadingSettings oSettings;
ReadOnlyText(pXml, oSettings, arSelectors, { false, false, true, false, -1, L"", L"" }, m_oLightReader.GetDepth() - 1);
if (oSettings.m_bOpenedRT)
pXml->WriteString(L"</w:t></w:r>");
}
void readTr (NSStringUtils::CStringBuilder* oXml, std::vector<NSCSS::CNode>& sSelectors, const CTextSettings& oTS, const TTableStyles& oTableStyles)
{
const std::wstring wsName = m_oLightReader.GetName();
@ -1543,19 +1682,20 @@ private:
if (j - 1 == MAXCOLUMNSINTABLE)
{
// MergeCells(&oTrBody);
CTextSettings oTrTS{oTS};
oTrTS.bMergeText = true;
oTrTS.bAddSpaces = true;
m_bWasSpace = true;
while (m_oLightReader.ReadNextSiblingNode(nTrDeath) && L"td" == m_oLightReader.GetName())
{
GetSubClass(&oTrBody, sSelectors);
CTextSettings oTSTd{oTS};
oTSTd.bAddSpaces = false;
readStream(&oTrBody, sSelectors, oTSTd);
readStream(&oTrBody, sSelectors, oTrTS);
sSelectors.pop_back();
}
}
CloseP(&oTrBody, sSelectors);
oTrBody.WriteString(L"</w:tc>");
@ -1748,7 +1888,8 @@ private:
m_bWasPStyle = false;
}
// Заголовок таблицы выравнивание посередине
CTextSettings oTSP { oTS.bBdo, oTS.bPre, oTS.bAddSpaces, oTS.nLi, oTS.sRStyle, oTS.sPStyle + L"<w:jc w:val=\"center\"/>" };
CTextSettings oTSP(oTS);
oTSP.sPStyle += L"<w:jc w:val=\"center\"/>";
readStream(oXml, sSelectors, oTSP);
if (m_bInP)
m_bWasPStyle = false;
@ -2280,12 +2421,16 @@ private:
const std::wstring sRSettings = m_oXmlStyle.GetStyle();
m_oXmlStyle.Clear();
if (!sRStyle.empty())
if (!sRStyle.empty() || !oTS.sRStyle.empty())
{
oXml->WriteString(L"<w:rPr><w:rStyle w:val=\"");
oXml->WriteString(sRStyle);
oXml->WriteString(L"\"/>");
oXml->WriteString(L"<w:rPr>");
if (!sRStyle.empty())
{
oXml->WriteString(L"<w:rStyle w:val=\"");
oXml->WriteString(sRStyle);
oXml->WriteString(L"\"/>");
}
oXml->WriteString(oTS.sRStyle + L' ' + sRSettings);
oXml->WriteString(L"</w:rPr>");
}